changeset 119:1ea2ee3b946c

Extend HeaderLineParser to also correctly parse result set header lines for table and column names which contain special characters such as \", \\, comma-tab combination. The comma-tab combination is the separator for names in the header line. This is now properly parsed. Also extended the test program BugResultSetMetaData_Bug_6183.java with examples of such column names.
author Martin van Dinther <martin.van.dinther@monetdbsolutions.com>
date Thu, 23 Feb 2017 18:29:46 +0100 (2017-02-23)
parents a030c3e53cf5
children 65b27b29ca71 890dce2d5305
files src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java tests/BugResultSetMetaData_Bug_6183.java
diffstat 2 files changed, 132 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java
+++ b/src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java
@@ -69,7 +69,8 @@ public class HeaderLineParser extends MC
 				case '#':
 					// found!
 					nameFound = true;
-					if (pos == 0) pos = i + 1;
+					if (pos == 0)
+						pos = i + 1;
 					i = 0;	// force the loop to terminate
 					break;
 				default:
@@ -84,30 +85,23 @@ public class HeaderLineParser extends MC
 		// depending on the name of the header, we continue
 		switch (chrLine[pos]) {
 			case 'n':
-				if (len - pos == 4 &&
-						source.regionMatches(pos + 1, "name", 1, 3))
-				{
+				if (len - pos == 4 && source.regionMatches(pos + 1, "name", 1, 3)) {
 					getValues(chrLine, 2, pos - 3);
 					type = NAME;
 				}
 				break;
 			case 'l':
-				if (len - pos == 6 &&
-						source.regionMatches(pos + 1, "length", 1, 5))
-				{
+				if (len - pos == 6 && source.regionMatches(pos + 1, "length", 1, 5)) {
 					getIntValues(chrLine, 2, pos - 3);
 					type = LENGTH;
 				}
 				break;
 			case 't':
-				if (len - pos == 4 &&
-						source.regionMatches(pos + 1, "type", 1, 3))
-				{
+				if (len - pos == 4 && source.regionMatches(pos + 1, "type", 1, 3)) {
 					getValues(chrLine, 2, pos - 3);
 					type = TYPE;
-				} else if (len - pos == 10 &&
-						source.regionMatches(pos + 1, "table_name", 1, 9))
-				{
+				} else
+				if (len - pos == 10 && source.regionMatches(pos + 1, "table_name", 1, 9)) {
 					getValues(chrLine, 2, pos - 3);
 					type = TABLE;
 				}
@@ -126,8 +120,9 @@ public class HeaderLineParser extends MC
 	/**
 	 * Returns an array of Strings containing the values between
 	 * ',\t' separators.
+	 *
 	 * As of Oct2014-SP1 release MAPI adds double quotes around names when
-	 * the name contains a comma or a tab or a space or a # or " character.
+	 * the name contains a comma or a tab or a space or a # or " or \ escape character.
 	 * See issue: https://www.monetdb.org/bugzilla/show_bug.cgi?id=3616
 	 * If the parsed name string part has a " as first and last character,
 	 * we remove those added double quotes here.
@@ -138,19 +133,59 @@ public class HeaderLineParser extends MC
 	 */
 	final private void getValues(char[] chrLine, int start, int stop) {
 		int elem = 0;
+		boolean inString = false, escaped = false;
 
-		for (int i = start + 1; i < stop; i++) {
-			if (chrLine[i] == '\t' && chrLine[i - 1] == ',') {
-				if (chrLine[start] == '"')
-					start++;  // skip leading double quote
-				values[elem++] = new String(chrLine, start, i - (chrLine[i - 2] == '"' ? 2 : 1) - start);
-				start = i + 1;
+		for (int i = start; i < stop; i++) {
+			switch(chrLine[i]) {
+				case '\\':
+					escaped = !escaped;
+					break;
+				case '"':
+					/**
+					 * If all strings are wrapped between two quotes, a \" can
+					 * never exist outside a string. Thus if we believe that we
+					 * are not within a string, we can safely assume we're about
+					 * to enter a string if we find a quote.
+					 * If we are in a string we should stop being in a string if
+					 * we find a quote which is not prefixed by a \, for that
+					 * would be an escaped quote. However, a nasty situation can
+					 * occur where the string is like "test \\" as obvious, a
+					 * test for a \ in front of a " doesn't hold here for all
+					 * cases. Because "test \\\"" can exist as well, we need to
+					 * know if a quote is prefixed by an escaping slash or not.
+					 */
+					if (!inString) {
+						inString = true;
+					} else if (!escaped) {
+						inString = false;
+					}
+					// reset escaped flag
+					escaped = false;
+					break;
+				case ',':
+					if (!inString && chrLine[i + 1] == '\t') {
+						// we found the field separator
+						if (chrLine[start] == '"')
+							start++;  // skip leading double quote
+						if (elem < values.length) {
+							values[elem++] = new String(chrLine, start, i - (chrLine[i - 1] == '"' ? 1 : 0) - start);
+						}
+						i++;
+						start = i + 1;	// reset start for the next name, skipping the field separator (a comma and tab)
+					}
+					// reset escaped flag
+					escaped = false;
+					break;
+				default:
+					escaped = false;
+					break;
 			}
 		}
 		// add the left over part (last column)
 		if (chrLine[start] == '"')
 			start++;  // skip leading double quote
-		values[elem++] = new String(chrLine, start, stop - (chrLine[stop - 1] == '"' ? 1 : 0) - start);
+		if (elem < values.length)
+			values[elem] = new String(chrLine, start, stop - (chrLine[stop - 1] == '"' ? 1 : 0) - start);
 	}
 
 	/**
@@ -159,15 +194,13 @@ public class HeaderLineParser extends MC
 	 *
 	 * Feb2017 note - This integer parser doesn't have to parse negative
 	 * numbers, because it is only used to parse column lengths
-	 * which is always greater than 0.
+	 * which are always greater than 0.
 	 *
 	 * @param chrLine a character array holding the input data
 	 * @param start where the relevant data starts
 	 * @param stop where the relevant data stops
 	 */
-	final private void getIntValues(char[] chrLine, int start, int stop)
-		throws MCLParseException
-	{
+	final private void getIntValues(char[] chrLine, int start, int stop) throws MCLParseException {
 		int elem = 0;
 		int tmp = 0;
 
@@ -177,17 +210,17 @@ public class HeaderLineParser extends MC
 				tmp = 0;
 				start = i++;
 			} else {
-				tmp *= 10;
 				// note: don't use Character.isDigit() here, because
 				// we only want ISO-LATIN-1 digits
 				if (chrLine[i] >= '0' && chrLine[i] <= '9') {
+					tmp *= 10;
 					tmp += (int)chrLine[i] - (int)'0';
 				} else {
 					throw new MCLParseException("expected a digit in " + new String(chrLine) + " at " + i);
 				}
 			}
 		}
-		// add the left over part
-		intValues[elem++] = tmp;
+		// add the left over part (last column)
+		intValues[elem] = tmp;
 	}
 }
--- a/tests/BugResultSetMetaData_Bug_6183.java
+++ b/tests/BugResultSetMetaData_Bug_6183.java
@@ -9,64 +9,76 @@
 import java.sql.*;
 
 public class BugResultSetMetaData_Bug_6183 {
+	static final String dqTblName = "\"my dq_table\"";
+	static final String[] dqColNames = {"\"my space\"", "\"my, comma_space\"", "\"my$dollar\"", "\"my#hash\"", "\"my	tab\"", "\"my	,tab_comma\"", "\"my,	comma_tab\"", "\"my\\\"backslash_doublequote\""};
+
 	public static void main(String[] args) throws Exception {
 		// Class.forName("nl.cwi.monetdb.jdbc.MonetDriver");	// not needed anymore for self registering JDBC drivers
 		Connection con = DriverManager.getConnection(args[0]);
 		Statement stmt = con.createStatement();
 		ResultSet rs = null;
 		try {
-			System.out.println("1. create table \"my dq_table\"");
-			int ret = stmt.executeUpdate("CREATE TABLE \"my dq_table\" (\"my column\" varchar(30), col2 int, \"my, column\" int, \"my$column\" int, \"my#column\" int, \"my	tcolumn\" int, \"my	,tc column\" int, \"my\\\"column\" int)");
-			System.out.println(" returned: " + ret);
-
-			System.out.println("2. show column names of this new table via sys.columns query");
-			rs = stmt.executeQuery("SELECT name, type, number from sys.columns where table_id in (select id from sys._tables where name = 'my dq_table') order by number");
-			showResultAndClose(rs);
+			System.out.println("1. create table " + dqTblName);
+			StringBuilder sb = new StringBuilder(30 + (dqColNames.length * (30 + 15)));
+			sb.append("CREATE TABLE ").append(dqTblName).append(" (");
+			for (int n = 0; n < dqColNames.length; n++) {
+				sb.append(dqColNames[n]);
+				sb.append(" varchar(").append(31 + n).append(')');
+				if (n < (dqColNames.length -1))
+					sb.append(", ");
+			}
+			sb.append(')');
+			int ret = stmt.executeUpdate(sb.toString());
+			System.out.println(" returned: " + ret + " (expected -2)");
+			System.out.println();
 
-			System.out.println("3. insert 1 row of data");
-			ret = stmt.executeUpdate("INSERT INTO \"my dq_table\" VALUES ('row1', 1,2,3,4,5,6,7)");
-			System.out.println(" returned: " + ret);
-
-			System.out.println("4. show full content of table \"my dq_table\"");
-			rs = stmt.executeQuery("SELECT * from \"my dq_table\"");
-			showResultAndClose(rs);
-
-			System.out.println("5. show content of column \"my column\"");
-			rs = stmt.executeQuery("SELECT \"my column\" from \"my dq_table\"");
-			showResultAndClose(rs);
-
-			System.out.println("6. show content of column \"my, column\"");
-			rs = stmt.executeQuery("SELECT \"my, column\" from \"my dq_table\"");
+			String tblName = dqTblName.substring(1, dqTblName.length() -1);	// trim the leading and trailing double quote characters
+			System.out.println("2. show column names of this new table (" + tblName + ") via sys.columns query");
+			rs = stmt.executeQuery("SELECT number, name, type from sys.columns where table_id in (select id from sys._tables where name = '" + tblName + "') order by number");
 			showResultAndClose(rs);
 
-			System.out.println("7. show content of column \"my$column\"");
-			rs = stmt.executeQuery("SELECT \"my$column\" from \"my dq_table\"");
-			showResultAndClose(rs);
-
-			System.out.println("8. show content of column \"my#column\"");
-			rs = stmt.executeQuery("SELECT \"my#column\" from \"my dq_table\"");
-			showResultAndClose(rs);
-
-			System.out.println("9. show content of column \"my	tcolumn\"");
-			rs = stmt.executeQuery("SELECT \"my	tcolumn\" from \"my dq_table\"");
-			showResultAndClose(rs);
+			System.out.println("3. insert 1 row of data with values same as column names");
+			sb.setLength(0);
+			sb.append("INSERT INTO ").append(dqTblName).append(" VALUES (");
+			for (int n = 0; n < dqColNames.length; n++) {
+				sb.append('\'');
+				sb.append(dqColNames[n]);
+				sb.append('\'');
+				if (n < (dqColNames.length -1))
+					sb.append(", ");
+			}
+			sb.append(')');
+			ret = stmt.executeUpdate(sb.toString());
+			System.out.println(" returned: " + ret + " (expected 1)");
+			System.out.println();
 
-			System.out.println("10. show content of column \"my	,tc column\"");
-			rs = stmt.executeQuery("SELECT \"my	,tc column\" from \"my dq_table\"");
-			showResultAndClose(rs);
-
-			System.out.println("11. show content of column \"my\\\"column\"");
-			rs = stmt.executeQuery("SELECT \"my\\\"column\" from \"my dq_table\"");
-			showResultAndClose(rs);
+			System.out.println("4. insert 1 row of data with values same as column names but without enclosing double quotes");
+			sb.setLength(0);
+			sb.append("INSERT INTO ").append(dqTblName).append(" VALUES (");
+			for (int n = 0; n < dqColNames.length; n++) {
+				sb.append('\'');
+				// remove enclosing double quotes
+				sb.append(dqColNames[n].substring(1, dqColNames[n].length() -1));
+				sb.append('\'');
+				if (n < (dqColNames.length -1))
+					sb.append(", ");
+			}
+			sb.append(')');
+			ret = stmt.executeUpdate(sb.toString());
+			System.out.println(" returned: " + ret + " (expected 1)");
+			System.out.println();
 
-			System.out.println("12. show content of all columns");
-			rs = stmt.executeQuery("select col2, \"my column\", \"my, column\", \"my$column\", \"my#column\", \"my	tcolumn\", \"my	,tc column\", \"my\\\"column\" from \"my dq_table\"");
-			showResultAndClose(rs);
+			// query each column separately
+			for (int n = 0; n < dqColNames.length; n++) {
+				executeQueryAndShowResult(stmt, dqColNames[n], 5 + n);
+			}
+			// query all columns
+			executeQueryAndShowResult(stmt, "*", 5 + dqColNames.length);
 
-			System.out.println("Finally drop table \"my dq_table\"");
-			ret = stmt.executeUpdate("DROP TABLE \"my dq_table\" ");
-			System.out.println(" returned: " + ret);
-
+			System.out.println("Finally drop table " + dqTblName);
+			ret = stmt.executeUpdate("DROP TABLE " + dqTblName);
+			System.out.println(" returned: " + ret + " (expected -2)");
+			System.out.println();
 		} catch (SQLException se) {
 			System.out.println("Failure occurred: " + se);
 		} finally {
@@ -77,29 +89,26 @@ public class BugResultSetMetaData_Bug_61
 		con.close();
 	}
 
+	private static void executeQueryAndShowResult(Statement st, String col_list, int query_count) throws SQLException {
+		System.out.print(query_count);
+		System.out.println(". show content of column(s): " + col_list);
+		ResultSet rs = st.executeQuery("SELECT " + col_list + " from " + dqTblName);
+		showResultAndClose(rs);
+	}
+
 	private static void showResultAndClose(ResultSet rs) throws SQLException {
 		ResultSetMetaData rsmd = rs.getMetaData();
 		int rs_col_count = rsmd.getColumnCount();
 		System.out.println("Resultset with " + rs_col_count + " columns");
-
-		System.out.println("print column names");
+		System.out.println("\tColumn Name, Column Label:");
 		for (int col = 1; col <= rs_col_count; col++) {
-			if (col > 1)
-				System.out.print("\t");
-			System.out.print(rsmd.getColumnName(col));
+			System.out.println(col + "\t" + rsmd.getColumnName(col) + "\t" +rsmd.getColumnLabel(col));
 		}
-		System.out.println();
 
-		System.out.println("print column labels");
-		for (int col = 1; col <= rs_col_count; col++) {
-			if (col > 1)
-				System.out.print("\t");
-			System.out.print(rsmd.getColumnLabel(col));
-		}
-		System.out.println();
-
-		System.out.println("print data rows");
+		System.out.println("Data rows:");
+		long row_count = 0;
 		while (rs.next()) {
+			row_count++;
 			for (int col = 1; col <= rs_col_count; col++) {
 				if (col > 1)
 					System.out.print("\t");
@@ -108,9 +117,7 @@ public class BugResultSetMetaData_Bug_61
 			System.out.println();
 		}
 		rs.close();
-		System.out.println("Completed");
+		System.out.println("Listed " + row_count + " rows");
 		System.out.println();
 	}
-
-
 }