Beispiel #1
0
static void
parse_process_char(ReaderObj *self, char c)
{
        DialectObj *dialect = self->dialect;

	switch (self->state) {
	case START_RECORD:
		/* start of record */
		if (c == '\n')
			/* empty line - return [] */
			break;
		/* normal character - handle as START_FIELD */
		self->state = START_FIELD;
		/* fallthru */
	case START_FIELD:
		/* expecting field */
		if (c == '\n') {
			/* save empty field - return [fields] */
			parse_save_field(self);
			self->state = START_RECORD;
		}
		else if (c == dialect->quotechar) {
			/* start quoted field */
			self->state = IN_QUOTED_FIELD;
		}
		else if (c == dialect->escapechar) {
			/* possible escaped character */
			self->state = ESCAPED_CHAR;
		}
		else if (c == ' ' && dialect->skipinitialspace)
			/* ignore space at start of field */
			;
		else if (c == dialect->delimiter) {
			/* save empty field */
			parse_save_field(self);
		}
		else {
			/* begin new unquoted field */
			parse_add_char(self, c);
			self->state = IN_FIELD;
		}
		break;

	case ESCAPED_CHAR:
		if (c != dialect->escapechar && 
                    c != dialect->delimiter &&
		    c != dialect->quotechar)
			parse_add_char(self, dialect->escapechar);
		parse_add_char(self, c);
		self->state = IN_FIELD;
		break;

	case IN_FIELD:
		/* in unquoted field */
		if (c == '\n') {
			/* end of line - return [fields] */
			parse_save_field(self);
			self->state = START_RECORD;
		}
		else if (c == dialect->escapechar) {
			/* possible escaped character */
			self->state = ESCAPED_CHAR;
		}
		else if (c == dialect->delimiter) {
			/* save field - wait for new field */
			parse_save_field(self);
			self->state = START_FIELD;
		}
		else {
			/* normal character - save in field */
			parse_add_char(self, c);
		}
		break;

	case IN_QUOTED_FIELD:
		/* in quoted field */
		if (c == '\n') {
			/* end of line - save '\n' in field */
			parse_add_char(self, '\n');
		}
		else if (c == dialect->escapechar) {
			/* Possible escape character */
			self->state = ESCAPE_IN_QUOTED_FIELD;
		}
		else if (c == dialect->quotechar) {
			if (dialect->doublequote) {
				/* doublequote; " represented by "" */
				self->state = QUOTE_IN_QUOTED_FIELD;
			}
			else {
				/* end of quote part of field */
				self->state = IN_FIELD;
			}
		}
		else {
			/* normal character - save in field */
			parse_add_char(self, c);
		}
		break;

	case ESCAPE_IN_QUOTED_FIELD:
		if (c != dialect->escapechar && 
                    c != dialect->delimiter &&
		    c != dialect->quotechar)
			parse_add_char(self, dialect->escapechar);
		parse_add_char(self, c);
		self->state = IN_QUOTED_FIELD;
		break;

	case QUOTE_IN_QUOTED_FIELD:
		/* doublequote - seen a quote in an quoted field */
		if (dialect->quoting != QUOTE_NONE && 
                    c == dialect->quotechar) {
			/* save "" as " */
			parse_add_char(self, c);
			self->state = IN_QUOTED_FIELD;
		}
		else if (c == dialect->delimiter) {
			/* save field - wait for new field */
			parse_save_field(self);
			self->state = START_FIELD;
		}
		else if (c == '\n') {
			/* end of line - return [fields] */
			parse_save_field(self);
			self->state = START_RECORD;
		}
		else if (!dialect->strict) {
			parse_add_char(self, c);
			self->state = IN_FIELD;
		}
		else {
			/* illegal */
			self->had_parse_error = 1;
			PyErr_Format(error_obj, "%c expected after %c", 
					dialect->delimiter, 
                                        dialect->quotechar);
		}
		break;

	}
}
Beispiel #2
0
static int
parse_process_char(ReaderObj *self, char c)
{
    DialectObj *dialect = self->dialect;

    switch (self->state) {
    case START_RECORD:
        /* start of record */
        if (c == '\0')
            /* empty line - return [] */
            break;
        else if (c == '\n' || c == '\r') {
            self->state = EAT_CRNL;
            break;
        }
        /* normal character - handle as START_FIELD */
        self->state = START_FIELD;
        /* fallthru */
    case START_FIELD:
        /* expecting field */
        if (c == '\n' || c == '\r' || c == '\0') {
            /* save empty field - return [fields] */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
        }
        else if (c == dialect->quotechar &&
                 dialect->quoting != QUOTE_NONE) {
            /* start quoted field */
            self->state = IN_QUOTED_FIELD;
        }
        else if (c == dialect->escapechar) {
            /* possible escaped character */
            self->state = ESCAPED_CHAR;
        }
        else if (c == ' ' && dialect->skipinitialspace)
            /* ignore space at start of field */
            ;
        else if (c == dialect->delimiter) {
            /* save empty field */
            if (parse_save_field(self) < 0)
                return -1;
        }
        else {
            /* begin new unquoted field */
            if (dialect->quoting == QUOTE_NONNUMERIC)
                self->numeric_field = 1;
            if (parse_add_char(self, c) < 0)
                return -1;
            self->state = IN_FIELD;
        }
        break;

    case ESCAPED_CHAR:
        if (c == '\0')
            c = '\n';
        if (parse_add_char(self, c) < 0)
            return -1;
        self->state = IN_FIELD;
        break;

    case IN_FIELD:
        /* in unquoted field */
        if (c == '\n' || c == '\r' || c == '\0') {
            /* end of line - return [fields] */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
        }
        else if (c == dialect->escapechar) {
            /* possible escaped character */
            self->state = ESCAPED_CHAR;
        }
        else if (c == dialect->delimiter) {
            /* save field - wait for new field */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = START_FIELD;
        }
        else {
            /* normal character - save in field */
            if (parse_add_char(self, c) < 0)
                return -1;
        }
        break;

    case IN_QUOTED_FIELD:
        /* in quoted field */
        if (c == '\0')
            ;
        else if (c == dialect->escapechar) {
            /* Possible escape character */
            self->state = ESCAPE_IN_QUOTED_FIELD;
        }
        else if (c == dialect->quotechar &&
                 dialect->quoting != QUOTE_NONE) {
            if (dialect->doublequote) {
                /* doublequote; " represented by "" */
                self->state = QUOTE_IN_QUOTED_FIELD;
            }
            else {
                /* end of quote part of field */
                self->state = IN_FIELD;
            }
        }
        else {
            /* normal character - save in field */
            if (parse_add_char(self, c) < 0)
                return -1;
        }
        break;

    case ESCAPE_IN_QUOTED_FIELD:
        if (c == '\0')
            c = '\n';
        if (parse_add_char(self, c) < 0)
            return -1;
        self->state = IN_QUOTED_FIELD;
        break;

    case QUOTE_IN_QUOTED_FIELD:
        /* doublequote - seen a quote in an quoted field */
        if (dialect->quoting != QUOTE_NONE &&
            c == dialect->quotechar) {
            /* save "" as " */
            if (parse_add_char(self, c) < 0)
                return -1;
            self->state = IN_QUOTED_FIELD;
        }
        else if (c == dialect->delimiter) {
            /* save field - wait for new field */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = START_FIELD;
        }
        else if (c == '\n' || c == '\r' || c == '\0') {
            /* end of line - return [fields] */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
        }
        else if (!dialect->strict) {
            if (parse_add_char(self, c) < 0)
                return -1;
            self->state = IN_FIELD;
        }
        else {
            /* illegal */
            PyErr_Format(error_obj, "'%c' expected after '%c'",
                            dialect->delimiter,
                            dialect->quotechar);
            return -1;
        }
        break;

    case EAT_CRNL:
        if (c == '\n' || c == '\r')
            ;
        else if (c == '\0')
            self->state = START_RECORD;
        else {
            PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
            return -1;
        }
        break;

    }
    return 0;
}
Beispiel #3
0
static int
parse_process_char(CSVParser *self, char c)
{
    switch (self->state) {
    case START_RECORD:
        /* start of record */
        if (c == '\n' || c == '\r') {
            self->state = START_RECORD;
            break;
        }
        /* normal character - handle as START_FIELD */
        self->state = START_FIELD;
        /* fallthru */
    case START_FIELD:
        /* expecting field */
        if (c == '\n' || c == '\r') {
            /* save empty field - return [fields] */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = START_RECORD;
        }
        else if (c == self->quotechar) {
            /* start quoted field */
            self->state = IN_QUOTED_FIELD;
        }
        else if (c == self->delimiter) {
            /* save empty field */
            if (parse_save_field(self) < 0)
                return -1;

        }
        else {
            if (parse_add_char(self, c) < 0)
                return -1;
            self->state = IN_FIELD;
        }
        break;

    case IN_FIELD:
        /* in unquoted field */
        if (c == '\n' || c == '\r') {
            /* end of line - return [fields] */
            if (parse_save_field(self) < 0)
                return -1;
            self->state = START_RECORD;
        }
        else if (c == self->delimiter) {
            /* save field - wait for new field */
            if (parse_save_field(self) < 0)
                return -1;

            if(self->engine->is_fail)
                self->state = QUERY_FAIL;
            else
                self->state = START_FIELD;
        }
        else {
            /* normal character - save in field */
            if (parse_add_char(self, c) < 0)
                return -1;
        }
        break;

    case IN_QUOTED_FIELD:
        /* in quoted field */
        if (c == self->quotechar) {
            /* doublequote; " represented by "" */
            self->state = QUOTE_IN_QUOTED_FIELD;
        }
        else {
            /* normal character - save in field */
            if (parse_add_char(self, c) < 0)
                return -1;
        }
        break;

    case QUOTE_IN_QUOTED_FIELD:
        /* doublequote - seen a quote in an quoted field */
        if (c == self->quotechar) {
            /* save "" as " */
            if (parse_add_char(self, c) < 0)
                return -1;
            self->state = IN_QUOTED_FIELD;
        }
        else if (c == self->delimiter) {
            /* save field - wait for new field */
            if (parse_save_field(self) < 0)
                return -1;

            self->state = START_FIELD;
        }
        else if (c == '\n' || c == '\r') {
            /* end of line - return [fields] */
            if (parse_save_field(self) < 0)
                return -1;

            self->state = START_RECORD;
        }else{
            if(parse_add_char(self, c) < 0)
                return -1;

            self->state = IN_FIELD;
        }
        break;

    case QUERY_FAIL:
        if (c == '\n' || c == '\r') {
            self->state = START_RECORD;
        }
        break;
    }
    return 0;
}