Mercurial > hg > monetdb-java
diff src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java @ 0:a5a898f6886c
Copy of MonetDB java directory changeset e6e32756ad31.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Wed, 21 Sep 2016 09:34:48 +0200 (2016-09-21) |
parents | |
children | e67d58485172 b9b35ca2eec2 |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java @@ -0,0 +1,182 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V. + */ + +package nl.cwi.monetdb.mcl.parser; + + +/** + * The HeaderLineParser is a generic MCLParser that extracts values from + * a metadata header in the MCL protocol either as string or integer + * values. + * + * @author Fabian Groffen <Fabian.Groffen> + */ +public class HeaderLineParser extends MCLParser { + private int type; + + public final static int NAME = 1; + public final static int LENGTH = 2; + public final static int TABLE = 3; + public final static int TYPE = 4; + + /** + * Constructs a HeaderLineParser which expects columncount columns. + * + * @param columncount the number of columns in the to be parsed string + */ + public HeaderLineParser(int columncount) { + super(columncount); + } + + /** + * Parses the given String source as header line. If source cannot + * be parsed, an MCLParseException is thrown. The columncount argument + * given during construction is used for allocation of the backing + * array. Parsing a header line with has more fields will therefore + * result in a crash. While this seems illogical, the caller should + * know this size, since the StartOfHeader contains this + * information. + * + * @param source a String which should be parsed + * @return the type of then parsed header line + * @throws MCLParseException if an error occurs during parsing + */ + @Override + public int parse(String source) throws MCLParseException { + char[] chrLine = source.toCharArray(); + int len = chrLine.length; + int pos = 0; + boolean foundChar = false; + boolean nameFound = false; + // find header name + for (int i = len - 1; i >= 0; i--) { + switch (chrLine[i]) { + case ' ': + case '\n': + case '\t': + case '\r': + if (!foundChar) { + len = i - 1; + } else { + pos = i + 1; + } + break; + case '#': + // found! + nameFound = true; + if (pos == 0) pos = i + 1; + i = 0; // force the loop to terminate + break; + default: + foundChar = true; + pos = 0; + break; + } + } + if (!nameFound) + throw new MCLParseException("invalid header, no header name found", pos); + + // depending on the name of the header, we continue + switch (chrLine[pos]) { + case 'n': + if (len - pos == 4 && + source.regionMatches(pos + 1, "name", 1, 3)) + { + getValues(chrLine, 2, pos - 3); + type = NAME; + } + break; + case 'l': + if (len - pos == 6 && + source.regionMatches(pos + 1, "length", 1, 5)) + { + getIntValues(chrLine, 2, pos - 3); + type = LENGTH; + } + break; + case 't': + if (len - pos == 4 && + source.regionMatches(pos + 1, "type", 1, 3)) + { + getValues(chrLine, 2, pos - 3); + type = TYPE; + } else if (len - pos == 10 && + source.regionMatches(pos + 1, "table_name", 1, 9)) + { + getValues(chrLine, 2, pos - 3); + type = TABLE; + } + break; + default: + throw new MCLParseException("unknown header: " + + (new String(chrLine, pos, len - pos))); + } + + // adjust colno + reset(); + + return type; + } + + /** + * Returns an array of Strings containing the values between + * ',\t' separators. Note that no quoting/dequoting is done in this + * method. + * + * @param chrLine a character array holding the input data + * @param start where the relevant data starts + * @param stop where the relevant data stops + */ + final private void getValues(char[] chrLine, int start, int stop) { + int elem = 0; + + for (int i = start + 1; i < stop; i++) { + if (chrLine[i] == '\t' && chrLine[i - 1] == ',') { + values[elem++] = + new String(chrLine, start, i - 1 - start); + start = i + 1; + } + } + // add the left over part + values[elem++] = new String(chrLine, start, stop - start); + } + + /** + * Returns an array of ints containing the values between + * ',\t' separators. + * + * @param chrLine a character array holding the input data + * @param start where the relevant data starts + * @param stop where the relevant data stops + */ + final private void getIntValues(char[] chrLine, int start, int stop) + throws MCLParseException + { + int elem = 0; + int tmp = 0; + + for (int i = start; i < stop; i++) { + if (chrLine[i] == ',' && chrLine[i + 1] == '\t') { + intValues[elem++] = tmp; + tmp = 0; + start = i++; + } else { + tmp *= 10; + // note: don't use Character.isDigit() here, because + // we only want ISO-LATIN-1 digits + if (chrLine[i] >= '0' && chrLine[i] <= '9') { + tmp += (int)chrLine[i] - (int)'0'; + } else { + throw new MCLParseException("expected a digit in " + new String(chrLine) + " at " + i); + } + } + } + // add the left over part + intValues[elem++] = tmp; + } +}