diff src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java @ 0:a5a898f6886c

Copy of MonetDB java directory changeset e6e32756ad31.
author Sjoerd Mullender <sjoerd@acm.org>
date Wed, 21 Sep 2016 09:34:48 +0200 (2016-09-21)
parents
children e67d58485172 b9b35ca2eec2
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java
@@ -0,0 +1,182 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V.
+ */
+
+package nl.cwi.monetdb.mcl.parser;
+
+
+/**
+ * The HeaderLineParser is a generic MCLParser that extracts values from
+ * a metadata header in the MCL protocol either as string or integer
+ * values.
+ *
+ * @author Fabian Groffen <Fabian.Groffen>
+ */
+public class HeaderLineParser extends MCLParser {
+	private int type;
+
+	public final static int NAME       = 1;
+	public final static int LENGTH     = 2;
+	public final static int TABLE      = 3;
+	public final static int TYPE       = 4;
+
+	/**
+	 * Constructs a HeaderLineParser which expects columncount columns.
+	 *
+	 * @param columncount the number of columns in the to be parsed string
+	 */
+	public HeaderLineParser(int columncount) {
+		super(columncount);
+	}
+
+	/**
+	 * Parses the given String source as header line.  If source cannot
+	 * be parsed, an MCLParseException is thrown.  The columncount argument
+	 * given during construction is used for allocation of the backing
+	 * array.  Parsing a header line with has more fields will therefore
+	 * result in a crash.  While this seems illogical, the caller should
+	 * know this size, since the StartOfHeader contains this
+	 * information.
+	 *
+	 * @param source a String which should be parsed
+	 * @return the type of then parsed header line
+	 * @throws MCLParseException if an error occurs during parsing
+	 */
+	@Override
+	public int parse(String source) throws MCLParseException {
+		char[] chrLine = source.toCharArray();
+		int len = chrLine.length;
+		int pos = 0;
+		boolean foundChar = false;
+		boolean nameFound = false;
+		// find header name
+		for (int i = len - 1; i >= 0; i--) {
+			switch (chrLine[i]) {
+				case ' ':
+				case '\n':
+				case '\t':
+				case '\r':
+					if (!foundChar) {
+						len = i - 1;
+					} else {
+						pos = i + 1;
+					}
+					break;
+				case '#':
+					// found!
+					nameFound = true;
+					if (pos == 0) pos = i + 1;
+					i = 0;	// force the loop to terminate
+					break;
+				default:
+					foundChar = true;
+					pos = 0;
+					break;
+			}
+		}
+		if (!nameFound)
+			throw new MCLParseException("invalid header, no header name found", pos);
+
+		// depending on the name of the header, we continue
+		switch (chrLine[pos]) {
+			case 'n':
+				if (len - pos == 4 &&
+						source.regionMatches(pos + 1, "name", 1, 3))
+				{
+					getValues(chrLine, 2, pos - 3);
+					type = NAME;
+				}
+				break;
+			case 'l':
+				if (len - pos == 6 &&
+						source.regionMatches(pos + 1, "length", 1, 5))
+				{
+					getIntValues(chrLine, 2, pos - 3);
+					type = LENGTH;
+				}
+				break;
+			case 't':
+				if (len - pos == 4 &&
+						source.regionMatches(pos + 1, "type", 1, 3))
+				{
+					getValues(chrLine, 2, pos - 3);
+					type = TYPE;
+				} else if (len - pos == 10 &&
+						source.regionMatches(pos + 1, "table_name", 1, 9))
+				{
+					getValues(chrLine, 2, pos - 3);
+					type = TABLE;
+				}
+				break;
+			default:
+				throw new MCLParseException("unknown header: " +
+						(new String(chrLine, pos, len - pos)));
+		}
+
+		// adjust colno
+		reset();
+
+		return type;
+	}
+
+	/**
+	 * Returns an array of Strings containing the values between
+	 * ',\t' separators.  Note that no quoting/dequoting is done in this
+	 * method.
+	 *
+	 * @param chrLine a character array holding the input data
+	 * @param start where the relevant data starts
+	 * @param stop where the relevant data stops
+	 */
+	final private void getValues(char[] chrLine, int start, int stop) {
+		int elem = 0;
+
+		for (int i = start + 1; i < stop; i++) {
+			if (chrLine[i] == '\t' && chrLine[i - 1] == ',') {
+				values[elem++] =
+					new String(chrLine, start, i - 1 - start);
+				start = i + 1;
+			}
+		}
+		// add the left over part
+		values[elem++] = new String(chrLine, start, stop - start);
+	}
+
+	/**
+	 * Returns an array of ints containing the values between
+	 * ',\t' separators.
+	 *
+	 * @param chrLine a character array holding the input data
+	 * @param start where the relevant data starts
+	 * @param stop where the relevant data stops
+	 */
+	final private void getIntValues(char[] chrLine, int start, int stop)
+		throws MCLParseException
+	{
+		int elem = 0;
+		int tmp = 0;
+
+		for (int i = start; i < stop; i++) {
+			if (chrLine[i] == ',' && chrLine[i + 1] == '\t') {
+				intValues[elem++] = tmp;
+				tmp = 0;
+				start = i++;
+			} else {
+				tmp *= 10;
+				// note: don't use Character.isDigit() here, because
+				// we only want ISO-LATIN-1 digits
+				if (chrLine[i] >= '0' && chrLine[i] <= '9') {
+					tmp += (int)chrLine[i] - (int)'0';
+				} else {
+					throw new MCLParseException("expected a digit in " + new String(chrLine) + " at " + i);
+				}
+			}
+		}
+		// add the left over part
+		intValues[elem++] = tmp;
+	}
+}