Mercurial > hg > monetdb-java
diff src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java @ 119:1ea2ee3b946c
Extend HeaderLineParser to also correctly parse result set header lines for table
and column names which contain special characters such as \", \\, comma-tab combination.
The comma-tab combination is the separator for names in the header line. This is now properly parsed.
Also extended the test program BugResultSetMetaData_Bug_6183.java with examples of such column names.
author | Martin van Dinther <martin.van.dinther@monetdbsolutions.com> |
---|---|
date | Thu, 23 Feb 2017 18:29:46 +0100 (2017-02-23) |
parents | a030c3e53cf5 |
children | fdf4c888d5b7 |
line wrap: on
line diff
--- a/src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java +++ b/src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java @@ -69,7 +69,8 @@ public class HeaderLineParser extends MC case '#': // found! nameFound = true; - if (pos == 0) pos = i + 1; + if (pos == 0) + pos = i + 1; i = 0; // force the loop to terminate break; default: @@ -84,30 +85,23 @@ public class HeaderLineParser extends MC // depending on the name of the header, we continue switch (chrLine[pos]) { case 'n': - if (len - pos == 4 && - source.regionMatches(pos + 1, "name", 1, 3)) - { + if (len - pos == 4 && source.regionMatches(pos + 1, "name", 1, 3)) { getValues(chrLine, 2, pos - 3); type = NAME; } break; case 'l': - if (len - pos == 6 && - source.regionMatches(pos + 1, "length", 1, 5)) - { + if (len - pos == 6 && source.regionMatches(pos + 1, "length", 1, 5)) { getIntValues(chrLine, 2, pos - 3); type = LENGTH; } break; case 't': - if (len - pos == 4 && - source.regionMatches(pos + 1, "type", 1, 3)) - { + if (len - pos == 4 && source.regionMatches(pos + 1, "type", 1, 3)) { getValues(chrLine, 2, pos - 3); type = TYPE; - } else if (len - pos == 10 && - source.regionMatches(pos + 1, "table_name", 1, 9)) - { + } else + if (len - pos == 10 && source.regionMatches(pos + 1, "table_name", 1, 9)) { getValues(chrLine, 2, pos - 3); type = TABLE; } @@ -126,8 +120,9 @@ public class HeaderLineParser extends MC /** * Returns an array of Strings containing the values between * ',\t' separators. + * * As of Oct2014-SP1 release MAPI adds double quotes around names when - * the name contains a comma or a tab or a space or a # or " character. + * the name contains a comma or a tab or a space or a # or " or \ escape character. * See issue: https://www.monetdb.org/bugzilla/show_bug.cgi?id=3616 * If the parsed name string part has a " as first and last character, * we remove those added double quotes here. @@ -138,19 +133,59 @@ public class HeaderLineParser extends MC */ final private void getValues(char[] chrLine, int start, int stop) { int elem = 0; + boolean inString = false, escaped = false; - for (int i = start + 1; i < stop; i++) { - if (chrLine[i] == '\t' && chrLine[i - 1] == ',') { - if (chrLine[start] == '"') - start++; // skip leading double quote - values[elem++] = new String(chrLine, start, i - (chrLine[i - 2] == '"' ? 2 : 1) - start); - start = i + 1; + for (int i = start; i < stop; i++) { + switch(chrLine[i]) { + case '\\': + escaped = !escaped; + break; + case '"': + /** + * If all strings are wrapped between two quotes, a \" can + * never exist outside a string. Thus if we believe that we + * are not within a string, we can safely assume we're about + * to enter a string if we find a quote. + * If we are in a string we should stop being in a string if + * we find a quote which is not prefixed by a \, for that + * would be an escaped quote. However, a nasty situation can + * occur where the string is like "test \\" as obvious, a + * test for a \ in front of a " doesn't hold here for all + * cases. Because "test \\\"" can exist as well, we need to + * know if a quote is prefixed by an escaping slash or not. + */ + if (!inString) { + inString = true; + } else if (!escaped) { + inString = false; + } + // reset escaped flag + escaped = false; + break; + case ',': + if (!inString && chrLine[i + 1] == '\t') { + // we found the field separator + if (chrLine[start] == '"') + start++; // skip leading double quote + if (elem < values.length) { + values[elem++] = new String(chrLine, start, i - (chrLine[i - 1] == '"' ? 1 : 0) - start); + } + i++; + start = i + 1; // reset start for the next name, skipping the field separator (a comma and tab) + } + // reset escaped flag + escaped = false; + break; + default: + escaped = false; + break; } } // add the left over part (last column) if (chrLine[start] == '"') start++; // skip leading double quote - values[elem++] = new String(chrLine, start, stop - (chrLine[stop - 1] == '"' ? 1 : 0) - start); + if (elem < values.length) + values[elem] = new String(chrLine, start, stop - (chrLine[stop - 1] == '"' ? 1 : 0) - start); } /** @@ -159,15 +194,13 @@ public class HeaderLineParser extends MC * * Feb2017 note - This integer parser doesn't have to parse negative * numbers, because it is only used to parse column lengths - * which is always greater than 0. + * which are always greater than 0. * * @param chrLine a character array holding the input data * @param start where the relevant data starts * @param stop where the relevant data stops */ - final private void getIntValues(char[] chrLine, int start, int stop) - throws MCLParseException - { + final private void getIntValues(char[] chrLine, int start, int stop) throws MCLParseException { int elem = 0; int tmp = 0; @@ -177,17 +210,17 @@ public class HeaderLineParser extends MC tmp = 0; start = i++; } else { - tmp *= 10; // note: don't use Character.isDigit() here, because // we only want ISO-LATIN-1 digits if (chrLine[i] >= '0' && chrLine[i] <= '9') { + tmp *= 10; tmp += (int)chrLine[i] - (int)'0'; } else { throw new MCLParseException("expected a digit in " + new String(chrLine) + " at " + i); } } } - // add the left over part - intValues[elem++] = tmp; + // add the left over part (last column) + intValues[elem] = tmp; } }