Mercurial > hg > monetdb-java
comparison src/main/java/org/monetdb/mcl/parser/HeaderLineParser.java @ 391:f523727db392
Moved Java classes from packages starting with nl.cwi.monetdb.* to package org.monetdb.*
This naming complies to the Java Package Naming convention as MonetDB's main website is www.monetdb.org.
author | Martin van Dinther <martin.van.dinther@monetdbsolutions.com> |
---|---|
date | Thu, 12 Nov 2020 22:02:01 +0100 (2020-11-12) |
parents | src/main/java/nl/cwi/monetdb/mcl/parser/HeaderLineParser.java@f15d2ac35932 |
children | bf9f6b6ecf40 |
comparison
equal
deleted
inserted
replaced
390:6199e0be3c6e | 391:f523727db392 |
---|---|
1 /* | |
2 * This Source Code Form is subject to the terms of the Mozilla Public | |
3 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
5 * | |
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2020 MonetDB B.V. | |
7 */ | |
8 | |
9 package org.monetdb.mcl.parser; | |
10 | |
11 | |
12 /** | |
13 * The HeaderLineParser is a generic MCLParser that extracts values from | |
14 * a metadata header in the MCL protocol either as string or integer | |
15 * values. | |
16 * | |
17 * @author Fabian Groffen | |
18 */ | |
19 public final class HeaderLineParser extends MCLParser { | |
20 /* types of meta data supported by MCL protocol */ | |
21 public final static int NAME = 1; // name of column | |
22 public final static int LENGTH = 2; | |
23 public final static int TABLE = 3; // may include the schema name | |
24 public final static int TYPE = 4; | |
25 | |
26 /** The int values found while parsing. Public, you may touch it. */ | |
27 public final int intValues[]; | |
28 | |
29 /** | |
30 * Constructs a HeaderLineParser which expects columncount columns. | |
31 * | |
32 * @param columncount the number of columns in the to be parsed string | |
33 */ | |
34 public HeaderLineParser(final int columncount) { | |
35 super(columncount); | |
36 intValues = new int[columncount]; | |
37 } | |
38 | |
39 /** | |
40 * Parses the given String source as header line. If source cannot | |
41 * be parsed, an MCLParseException is thrown. The columncount argument | |
42 * given during construction is used for allocation of the backing array. | |
43 * | |
44 * @param source a String which should be parsed | |
45 * @return the type of then parsed header line | |
46 * @throws MCLParseException if an error occurs during parsing | |
47 */ | |
48 @Override | |
49 public int parse(final String source) throws MCLParseException { | |
50 final char[] chrLine = source.toCharArray(); | |
51 int len = chrLine.length; | |
52 int pos = 0; | |
53 boolean foundChar = false; | |
54 boolean nameFound = false; | |
55 int i; | |
56 | |
57 // find header name searching from the end of the line | |
58 for (i = len - 1; i >= 0; i--) { | |
59 switch (chrLine[i]) { | |
60 case ' ': | |
61 case '\n': | |
62 case '\t': | |
63 case '\r': | |
64 if (!foundChar) { | |
65 len = i - 1; | |
66 } else { | |
67 pos = i + 1; | |
68 } | |
69 break; | |
70 case '#': | |
71 // found! | |
72 nameFound = true; | |
73 if (pos == 0) | |
74 pos = i + 1; | |
75 i = 0; // force the loop to terminate | |
76 break; | |
77 default: | |
78 foundChar = true; | |
79 pos = 0; | |
80 break; | |
81 } | |
82 } | |
83 if (!nameFound) | |
84 throw new MCLParseException("invalid header, no header name found", pos); | |
85 | |
86 // depending on the name of the header, we continue | |
87 int type = 0; | |
88 i = pos; | |
89 switch (len - pos) { | |
90 case 4: | |
91 // source.regionMatches(pos + 1, "name", 0, 4) | |
92 if (chrLine[i] == 'n' && chrLine[++i] == 'a' && chrLine[++i] == 'm' && chrLine[++i] == 'e') { | |
93 getValues(chrLine, 2, pos - 3); | |
94 type = NAME; | |
95 } else | |
96 // source.regionMatches(pos + 1, "type", 0, 4) | |
97 if (chrLine[i] == 't' && chrLine[++i] == 'y' && chrLine[++i] == 'p' && chrLine[++i] == 'e') { | |
98 getValues(chrLine, 2, pos - 3); | |
99 type = TYPE; | |
100 } | |
101 break; | |
102 case 6: | |
103 // source.regionMatches(pos + 1, "length", 0, 6) | |
104 if (chrLine[ i ] == 'l' && chrLine[++i] == 'e' && chrLine[++i] == 'n' && chrLine[++i] == 'g' | |
105 && chrLine[++i] == 't' && chrLine[++i] == 'h') { | |
106 getIntValues(chrLine, 2, pos - 3); | |
107 type = LENGTH; | |
108 } | |
109 break; | |
110 case 10: | |
111 // source.regionMatches(pos + 1, "table_name", 0, 10) | |
112 if (chrLine[ i ] == 't' && chrLine[++i] == 'a' && chrLine[++i] == 'b' && chrLine[++i] == 'l' && chrLine[++i] == 'e' | |
113 && chrLine[++i] == '_' && chrLine[++i] == 'n' && chrLine[++i] == 'a' && chrLine[++i] == 'm' && chrLine[++i] == 'e') { | |
114 getValues(chrLine, 2, pos - 3); | |
115 type = TABLE; | |
116 } | |
117 break; | |
118 default: | |
119 throw new MCLParseException("unknown header: " + (new String(chrLine, pos, len - pos))); | |
120 } | |
121 | |
122 // adjust colno | |
123 colnr = 0; | |
124 | |
125 return type; | |
126 } | |
127 | |
128 /** | |
129 * Fills an array of Strings containing the values between | |
130 * ',\t' separators. | |
131 * | |
132 * As of Oct2014-SP1 release MAPI adds double quotes around names when | |
133 * the name contains a comma or a tab or a space or a # or " or \ escape character. | |
134 * See issue: https://www.monetdb.org/bugzilla/show_bug.cgi?id=3616 | |
135 * If the parsed name string part has a " as first and last character, | |
136 * we remove those added double quotes here. | |
137 * | |
138 * @param chrLine a character array holding the input data | |
139 * @param start where the relevant data starts | |
140 * @param stop where the relevant data stops | |
141 */ | |
142 private final void getValues(final char[] chrLine, int start, final int stop) { | |
143 int elem = 0; | |
144 boolean inString = false, escaped = false; | |
145 | |
146 for (int i = start; i < stop; i++) { | |
147 switch(chrLine[i]) { | |
148 case '\\': | |
149 escaped = !escaped; | |
150 break; | |
151 case '"': | |
152 /** | |
153 * If all strings are wrapped between two quotes, a \" can | |
154 * never exist outside a string. Thus if we believe that we | |
155 * are not within a string, we can safely assume we're about | |
156 * to enter a string if we find a quote. | |
157 * If we are in a string we should stop being in a string if | |
158 * we find a quote which is not prefixed by a \, for that | |
159 * would be an escaped quote. However, a nasty situation can | |
160 * occur where the string is like "test \\" as obvious, a | |
161 * test for a \ in front of a " doesn't hold here for all | |
162 * cases. Because "test \\\"" can exist as well, we need to | |
163 * know if a quote is prefixed by an escaping slash or not. | |
164 */ | |
165 if (!inString) { | |
166 inString = true; | |
167 } else if (!escaped) { | |
168 inString = false; | |
169 } | |
170 // reset escaped flag | |
171 escaped = false; | |
172 break; | |
173 case ',': | |
174 if (!inString && chrLine[i + 1] == '\t') { | |
175 // we found the field separator | |
176 if (chrLine[start] == '"') | |
177 start++; // skip leading double quote | |
178 if (elem < values.length) { | |
179 // TODO: also deal with escape characters as done in TupleLineParser.parse() | |
180 values[elem++] = new String(chrLine, start, i - (chrLine[i - 1] == '"' ? 1 : 0) - start); | |
181 } | |
182 i++; | |
183 start = i + 1; // reset start for the next name, skipping the field separator (a comma and tab) | |
184 } | |
185 // reset escaped flag | |
186 escaped = false; | |
187 break; | |
188 default: | |
189 escaped = false; | |
190 break; | |
191 } | |
192 } | |
193 // add the left over part (last column) | |
194 if (chrLine[start] == '"') | |
195 start++; // skip leading double quote | |
196 if (elem < values.length) | |
197 values[elem] = new String(chrLine, start, stop - (chrLine[stop - 1] == '"' ? 1 : 0) - start); | |
198 } | |
199 | |
200 /** | |
201 * Fills an array of ints containing the values between | |
202 * ',\t' separators. | |
203 * | |
204 * Feb2017 note - This integer parser doesn't have to parse negative | |
205 * numbers, because it is only used to parse column lengths | |
206 * which are always greater than 0. | |
207 * | |
208 * @param chrLine a character array holding the input data | |
209 * @param start where the relevant data starts | |
210 * @param stop where the relevant data stops | |
211 */ | |
212 private final void getIntValues(final char[] chrLine, final int start, final int stop) throws MCLParseException { | |
213 int elem = 0; | |
214 int tmp = 0; | |
215 | |
216 for (int i = start; i < stop; i++) { | |
217 if (chrLine[i] == ',' && chrLine[i + 1] == '\t') { | |
218 if (elem < intValues.length) { | |
219 intValues[elem++] = tmp; | |
220 } | |
221 tmp = 0; | |
222 i++; | |
223 } else { | |
224 // note: don't use Character.isDigit() here, because | |
225 // we only want ISO-LATIN-1 digits | |
226 if (chrLine[i] >= '0' && chrLine[i] <= '9') { | |
227 tmp *= 10; | |
228 tmp += (int)chrLine[i] - (int)'0'; | |
229 } else { | |
230 throw new MCLParseException("expected a digit in " + new String(chrLine) + " at " + i); | |
231 } | |
232 } | |
233 } | |
234 // add the left over part (last column) | |
235 if (elem < intValues.length) | |
236 intValues[elem] = tmp; | |
237 } | |
238 } |