Mercurial > hg > monetdb-java
view src/main/java/nl/cwi/monetdb/mcl/parser/TupleLineParser.java @ 0:a5a898f6886c
Copy of MonetDB java directory changeset e6e32756ad31.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Wed, 21 Sep 2016 09:34:48 +0200 (2016-09-21) |
parents | |
children | 57978db4ee57 b9b35ca2eec2 |
line wrap: on
line source
/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V. */ package nl.cwi.monetdb.mcl.parser; /** * The TupleLineParser extracts the values from a given tuple. The * number of values that are expected are known upfront to speed up * allocation and validation. * * @author Fabian Groffen <Fabian.Groffen> */ public class TupleLineParser extends MCLParser { /** * Constructs a TupleLineParser which expects columncount columns. * * @param columncount the number of columns in the to be parsed string */ public TupleLineParser(int columncount) { super(columncount); } /** * Parses the given String source as tuple line. If source cannot * be parsed, a ParseException is thrown. The columncount argument * is used for allocation of the returned array. While this seems * illogical, the caller should know this size, since the * StartOfHeader contains this information. * * @param source a String which should be parsed * @return 0, as there is no 'type' of TupleLine * @throws ParseException if an error occurs during parsing */ @Override public int parse(String source) throws MCLParseException { int len = source.length(); char[] chrLine = new char[len]; source.getChars(0, len, chrLine, 0); // first detect whether this is a single value line (=) or a // real tuple ([) if (chrLine[0] == '=') { if (values.length != 1) throw new MCLParseException(values.length + " columns expected, but only single value found"); // return the whole string but the leading = values[0] = source.substring(1); // reset colnr reset(); return 0; } // extract separate fields by examining string, char for char boolean inString = false, escaped = false; int cursor = 2, column = 0, i = 2; StringBuilder uesc = new StringBuilder(); for (; i < len; i++) { switch(chrLine[i]) { default: escaped = false; break; case '\\': escaped = !escaped; break; case '"': /** * If all strings are wrapped between two quotes, a \" can * never exist outside a string. Thus if we believe that we * are not within a string, we can safely assume we're about * to enter a string if we find a quote. * If we are in a string we should stop being in a string if * we find a quote which is not prefixed by a \, for that * would be an escaped quote. However, a nasty situation can * occur where the string is like "test \\" as obvious, a * test for a \ in front of a " doesn't hold here for all * cases. Because "test \\\"" can exist as well, we need to * know if a quote is prefixed by an escaping slash or not. */ if (!inString) { inString = true; } else if (!escaped) { inString = false; } // reset escaped flag escaped = false; break; case '\t': if (!inString && (i > 0 && chrLine[i - 1] == ',') || (i + 1 == len - 1 && chrLine[++i] == ']')) // dirty { // split! if (chrLine[cursor] == '"' && chrLine[i - 2] == '"') { // reuse the StringBuilder by cleaning it uesc.delete(0, uesc.length()); // prevent capacity increasements uesc.ensureCapacity((i - 2) - (cursor + 1)); for (int pos = cursor + 1; pos < i - 2; pos++) { if (chrLine[pos] == '\\' && pos + 1 < i - 2) { pos++; // strToStr and strFromStr in gdk_atoms.mx only // support \t \n \\ \" and \377 switch (chrLine[pos]) { case '\\': uesc.append('\\'); break; case 'n': uesc.append('\n'); break; case 't': uesc.append('\t'); break; case '"': uesc.append('"'); break; case '0': case '1': case '2': case '3': // this could be an octal number, let's check it out if (pos + 2 < i - 2 && chrLine[pos + 1] >= '0' && chrLine[pos + 1] <= '7' && chrLine[pos + 2] >= '0' && chrLine[pos + 2] <= '7' ) { // we got the number! try { uesc.append((char)(Integer.parseInt("" + chrLine[pos] + chrLine[pos + 1] + chrLine[pos + 2], 8))); pos += 2; } catch (NumberFormatException e) { // hmmm, this point should never be reached actually... throw new AssertionError("Flow error, should never try to parse non-number"); } } else { // do default action if number seems not to be correct uesc.append(chrLine[pos]); } break; default: // this is wrong, just ignore the escape, and print the char uesc.append(chrLine[pos]); break; } } else { uesc.append(chrLine[pos]); } } // put the unescaped string in the right place values[column++] = uesc.toString(); } else if ((i - 1) - cursor == 4 && source.indexOf("NULL", cursor) == cursor) { values[column++] = null; } else { values[column++] = source.substring(cursor, i - 1); } cursor = i + 1; } // reset escaped flag escaped = false; break; } } // check if this result is of the size we expected it to be if (column != values.length) throw new MCLParseException("illegal result length: " + column + "\nlast read: " + (column > 0 ? values[column - 1] : "<none>")); // reset colnr reset(); return 0; } }