Mercurial > hg > monetdb-java
comparison src/main/java/nl/cwi/monetdb/mcl/parser/TupleLineParser.java @ 0:a5a898f6886c
Copy of MonetDB java directory changeset e6e32756ad31.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Wed, 21 Sep 2016 09:34:48 +0200 (2016-09-21) |
parents | |
children | 57978db4ee57 b9b35ca2eec2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a5a898f6886c |
---|---|
1 /* | |
2 * This Source Code Form is subject to the terms of the Mozilla Public | |
3 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
5 * | |
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2016 MonetDB B.V. | |
7 */ | |
8 | |
9 package nl.cwi.monetdb.mcl.parser; | |
10 | |
11 /** | |
12 * The TupleLineParser extracts the values from a given tuple. The | |
13 * number of values that are expected are known upfront to speed up | |
14 * allocation and validation. | |
15 * | |
16 * @author Fabian Groffen <Fabian.Groffen> | |
17 */ | |
18 public class TupleLineParser extends MCLParser { | |
19 /** | |
20 * Constructs a TupleLineParser which expects columncount columns. | |
21 * | |
22 * @param columncount the number of columns in the to be parsed string | |
23 */ | |
24 public TupleLineParser(int columncount) { | |
25 super(columncount); | |
26 } | |
27 | |
28 /** | |
29 * Parses the given String source as tuple line. If source cannot | |
30 * be parsed, a ParseException is thrown. The columncount argument | |
31 * is used for allocation of the returned array. While this seems | |
32 * illogical, the caller should know this size, since the | |
33 * StartOfHeader contains this information. | |
34 * | |
35 * @param source a String which should be parsed | |
36 * @return 0, as there is no 'type' of TupleLine | |
37 * @throws ParseException if an error occurs during parsing | |
38 */ | |
39 @Override | |
40 public int parse(String source) throws MCLParseException { | |
41 int len = source.length(); | |
42 char[] chrLine = new char[len]; | |
43 source.getChars(0, len, chrLine, 0); | |
44 | |
45 // first detect whether this is a single value line (=) or a | |
46 // real tuple ([) | |
47 if (chrLine[0] == '=') { | |
48 if (values.length != 1) | |
49 throw new MCLParseException(values.length + | |
50 " columns expected, but only single value found"); | |
51 | |
52 // return the whole string but the leading = | |
53 values[0] = source.substring(1); | |
54 | |
55 // reset colnr | |
56 reset(); | |
57 | |
58 return 0; | |
59 } | |
60 | |
61 // extract separate fields by examining string, char for char | |
62 boolean inString = false, escaped = false; | |
63 int cursor = 2, column = 0, i = 2; | |
64 StringBuilder uesc = new StringBuilder(); | |
65 for (; i < len; i++) { | |
66 switch(chrLine[i]) { | |
67 default: | |
68 escaped = false; | |
69 break; | |
70 case '\\': | |
71 escaped = !escaped; | |
72 break; | |
73 case '"': | |
74 /** | |
75 * If all strings are wrapped between two quotes, a \" can | |
76 * never exist outside a string. Thus if we believe that we | |
77 * are not within a string, we can safely assume we're about | |
78 * to enter a string if we find a quote. | |
79 * If we are in a string we should stop being in a string if | |
80 * we find a quote which is not prefixed by a \, for that | |
81 * would be an escaped quote. However, a nasty situation can | |
82 * occur where the string is like "test \\" as obvious, a | |
83 * test for a \ in front of a " doesn't hold here for all | |
84 * cases. Because "test \\\"" can exist as well, we need to | |
85 * know if a quote is prefixed by an escaping slash or not. | |
86 */ | |
87 if (!inString) { | |
88 inString = true; | |
89 } else if (!escaped) { | |
90 inString = false; | |
91 } | |
92 | |
93 // reset escaped flag | |
94 escaped = false; | |
95 break; | |
96 case '\t': | |
97 if (!inString && | |
98 (i > 0 && chrLine[i - 1] == ',') || | |
99 (i + 1 == len - 1 && chrLine[++i] == ']')) // dirty | |
100 { | |
101 // split! | |
102 if (chrLine[cursor] == '"' && | |
103 chrLine[i - 2] == '"') | |
104 { | |
105 // reuse the StringBuilder by cleaning it | |
106 uesc.delete(0, uesc.length()); | |
107 // prevent capacity increasements | |
108 uesc.ensureCapacity((i - 2) - (cursor + 1)); | |
109 for (int pos = cursor + 1; pos < i - 2; pos++) { | |
110 if (chrLine[pos] == '\\' && pos + 1 < i - 2) { | |
111 pos++; | |
112 // strToStr and strFromStr in gdk_atoms.mx only | |
113 // support \t \n \\ \" and \377 | |
114 switch (chrLine[pos]) { | |
115 case '\\': | |
116 uesc.append('\\'); | |
117 break; | |
118 case 'n': | |
119 uesc.append('\n'); | |
120 break; | |
121 case 't': | |
122 uesc.append('\t'); | |
123 break; | |
124 case '"': | |
125 uesc.append('"'); | |
126 break; | |
127 case '0': case '1': case '2': case '3': | |
128 // this could be an octal number, let's check it out | |
129 if (pos + 2 < i - 2 && | |
130 chrLine[pos + 1] >= '0' && chrLine[pos + 1] <= '7' && | |
131 chrLine[pos + 2] >= '0' && chrLine[pos + 2] <= '7' | |
132 ) { | |
133 // we got the number! | |
134 try { | |
135 uesc.append((char)(Integer.parseInt("" + chrLine[pos] + chrLine[pos + 1] + chrLine[pos + 2], 8))); | |
136 pos += 2; | |
137 } catch (NumberFormatException e) { | |
138 // hmmm, this point should never be reached actually... | |
139 throw new AssertionError("Flow error, should never try to parse non-number"); | |
140 } | |
141 } else { | |
142 // do default action if number seems not to be correct | |
143 uesc.append(chrLine[pos]); | |
144 } | |
145 break; | |
146 default: | |
147 // this is wrong, just ignore the escape, and print the char | |
148 uesc.append(chrLine[pos]); | |
149 break; | |
150 } | |
151 } else { | |
152 uesc.append(chrLine[pos]); | |
153 } | |
154 } | |
155 | |
156 // put the unescaped string in the right place | |
157 values[column++] = uesc.toString(); | |
158 } else if ((i - 1) - cursor == 4 && | |
159 source.indexOf("NULL", cursor) == cursor) | |
160 { | |
161 values[column++] = null; | |
162 } else { | |
163 values[column++] = | |
164 source.substring(cursor, i - 1); | |
165 } | |
166 cursor = i + 1; | |
167 } | |
168 | |
169 // reset escaped flag | |
170 escaped = false; | |
171 break; | |
172 } | |
173 } | |
174 // check if this result is of the size we expected it to be | |
175 if (column != values.length) | |
176 throw new MCLParseException("illegal result length: " + column + "\nlast read: " + (column > 0 ? values[column - 1] : "<none>")); | |
177 | |
178 // reset colnr | |
179 reset(); | |
180 | |
181 return 0; | |
182 } | |
183 } |