view src/main/java/nl/cwi/monetdb/mcl/protocol/oldmapi/OldMapiTupleLineParser.java @ 67:87ba760038b6 embedded

More cleanup. About to start the tuple conversions.
author Pedro Ferreira <pedro.ferreira@monetdbsolutions.com>
date Tue, 06 Dec 2016 18:13:54 +0100 (2016-12-06)
parents
children 86967be24645
line wrap: on
line source
package nl.cwi.monetdb.mcl.protocol.oldmapi;

import nl.cwi.monetdb.mcl.protocol.MCLParseException;

/**
 * Created by ferreira on 12/6/16.
 */
final class OldMapiTupleLineParser {

    static int OldMapiParseTupleLine(StringBuilder line, Object[] values, StringBuilder helper, int[] typesMap) throws MCLParseException {
        int len = line.length();

        // first detect whether this is a single value line (=) or a real tuple ([)
        if (line.charAt(0) == '=') {
            if (values.length != 1) {
                throw new MCLParseException(values.length + " columns expected, but only single value found");
            }
            // return the whole string but the leading =
            values[0] = line.substring(1);
            return 1;
        }

        // extract separate fields by examining string, char for char
        boolean inString = false, escaped = false;
        int cursor = 2, column = 0, i = 2;
        for (; i < len; i++) {
            switch(line.charAt(i)) {
                default:
                    escaped = false;
                    break;
                case '\\':
                    escaped = !escaped;
                    break;
                case '"':
                    /**
                     * If all strings are wrapped between two quotes, a \" can
                     * never exist outside a string. Thus if we believe that we
                     * are not within a string, we can safely assume we're about
                     * to enter a string if we find a quote.
                     * If we are in a string we should stop being in a string if
                     * we find a quote which is not prefixed by a \, for that
                     * would be an escaped quote. However, a nasty situation can
                     * occur where the string is like "test \\" as obvious, a
                     * test for a \ in front of a " doesn't hold here for all
                     * cases. Because "test \\\"" can exist as well, we need to
                     * know if a quote is prefixed by an escaping slash or not.
                     */
                    if (!inString) {
                        inString = true;
                    } else if (!escaped) {
                        inString = false;
                    }

                    // reset escaped flag
                    escaped = false;
                    break;
                case '\t':
                    if (!inString && (i > 0 && line.charAt(i - 1) == ',') || (i + 1 == len - 1 && line.charAt(++i) == ']')) { // dirty
                        // split!
                        if (line.charAt(cursor) == '"' && line.charAt(i - 2) == '"') {
                            // reuse the StringBuilder by cleaning it
                            helper.setLength(0);
                            // prevent capacity increases
                            helper.ensureCapacity((i - 2) - (cursor + 1));
                            for (int pos = cursor + 1; pos < i - 2; pos++) {
                                if (line.charAt(pos) == '\\' && pos + 1 < i - 2) {
                                    pos++;
                                    // strToStr and strFromStr in gdk_atoms.mx only
                                    // support \t \n \\ \" and \377
                                    switch (line.charAt(pos)) {
                                        case '\\':
                                            helper.append('\\');
                                            break;
                                        case 'n':
                                            helper.append('\n');
                                            break;
                                        case 't':
                                            helper.append('\t');
                                            break;
                                        case '"':
                                            helper.append('"');
                                            break;
                                        case '0': case '1': case '2': case '3':
                                            // this could be an octal number, let's check it out
                                            if (pos + 2 < i - 2 &&
                                                    line.charAt(pos + 1) >= '0' && line.charAt(pos + 1) <= '7' &&
                                                    line.charAt(pos + 2) >= '0' && line.charAt(pos + 2) <= '7') {
                                                // we got the number!
                                                try {
                                                    helper.append((char)(Integer.parseInt("" + line.charAt(pos) + line.charAt(pos + 1) + line.charAt(pos + 2), 8)));
                                                    pos += 2;
                                                } catch (NumberFormatException e) {
                                                    // hmmm, this point should never be reached actually...
                                                    throw new AssertionError("Flow error, should never try to parse non-number");
                                                }
                                            } else {
                                                // do default action if number seems not to be correct
                                                helper.append(line.charAt(pos));
                                            }
                                            break;
                                        default:
                                            // this is wrong, just ignore the escape, and print the char
                                            helper.append(line.charAt(pos));
                                            break;
                                    }
                                } else {
                                    helper.append(line.charAt(pos));
                                }
                            }

                            // put the unescaped string in the right place
                            values[column++] = helper.toString();
                        } else if ((i - 1) - cursor == 4 && line.indexOf("NULL", cursor) == cursor) {
                            values[column++] = null;
                        } else {
                            values[column++] = line.substring(cursor, i - 1);
                        }
                        cursor = i + 1;
                    }
                    // reset escaped flag
                    escaped = false;
                    break;
            }
        }
        // check if this result is of the size we expected it to be
        if (column != values.length)
            throw new MCLParseException("illegal result length: " + column + "\nlast read: " + (column > 0 ? values[column - 1] : "<none>"));

        return column;
    }
}