static void parse_process_char(ReaderObj *self, char c) { DialectObj *dialect = self->dialect; switch (self->state) { case START_RECORD: /* start of record */ if (c == '\n') /* empty line - return [] */ break; /* normal character - handle as START_FIELD */ self->state = START_FIELD; /* fallthru */ case START_FIELD: /* expecting field */ if (c == '\n') { /* save empty field - return [fields] */ parse_save_field(self); self->state = START_RECORD; } else if (c == dialect->quotechar) { /* start quoted field */ self->state = IN_QUOTED_FIELD; } else if (c == dialect->escapechar) { /* possible escaped character */ self->state = ESCAPED_CHAR; } else if (c == ' ' && dialect->skipinitialspace) /* ignore space at start of field */ ; else if (c == dialect->delimiter) { /* save empty field */ parse_save_field(self); } else { /* begin new unquoted field */ parse_add_char(self, c); self->state = IN_FIELD; } break; case ESCAPED_CHAR: if (c != dialect->escapechar && c != dialect->delimiter && c != dialect->quotechar) parse_add_char(self, dialect->escapechar); parse_add_char(self, c); self->state = IN_FIELD; break; case IN_FIELD: /* in unquoted field */ if (c == '\n') { /* end of line - return [fields] */ parse_save_field(self); self->state = START_RECORD; } else if (c == dialect->escapechar) { /* possible escaped character */ self->state = ESCAPED_CHAR; } else if (c == dialect->delimiter) { /* save field - wait for new field */ parse_save_field(self); self->state = START_FIELD; } else { /* normal character - save in field */ parse_add_char(self, c); } break; case IN_QUOTED_FIELD: /* in quoted field */ if (c == '\n') { /* end of line - save '\n' in field */ parse_add_char(self, '\n'); } else if (c == dialect->escapechar) { /* Possible escape character */ self->state = ESCAPE_IN_QUOTED_FIELD; } else if (c == dialect->quotechar) { if (dialect->doublequote) { /* doublequote; " represented by "" */ self->state = QUOTE_IN_QUOTED_FIELD; } else { /* end of quote part of field */ self->state = IN_FIELD; } } else { /* normal character - save in field */ parse_add_char(self, c); } break; case ESCAPE_IN_QUOTED_FIELD: if (c != dialect->escapechar && c != dialect->delimiter && c != dialect->quotechar) parse_add_char(self, dialect->escapechar); parse_add_char(self, c); self->state = IN_QUOTED_FIELD; break; case QUOTE_IN_QUOTED_FIELD: /* doublequote - seen a quote in an quoted field */ if (dialect->quoting != QUOTE_NONE && c == dialect->quotechar) { /* save "" as " */ parse_add_char(self, c); self->state = IN_QUOTED_FIELD; } else if (c == dialect->delimiter) { /* save field - wait for new field */ parse_save_field(self); self->state = START_FIELD; } else if (c == '\n') { /* end of line - return [fields] */ parse_save_field(self); self->state = START_RECORD; } else if (!dialect->strict) { parse_add_char(self, c); self->state = IN_FIELD; } else { /* illegal */ self->had_parse_error = 1; PyErr_Format(error_obj, "%c expected after %c", dialect->delimiter, dialect->quotechar); } break; } }
static int parse_process_char(ReaderObj *self, char c) { DialectObj *dialect = self->dialect; switch (self->state) { case START_RECORD: /* start of record */ if (c == '\0') /* empty line - return [] */ break; else if (c == '\n' || c == '\r') { self->state = EAT_CRNL; break; } /* normal character - handle as START_FIELD */ self->state = START_FIELD; /* fallthru */ case START_FIELD: /* expecting field */ if (c == '\n' || c == '\r' || c == '\0') { /* save empty field - return [fields] */ if (parse_save_field(self) < 0) return -1; self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (c == dialect->quotechar && dialect->quoting != QUOTE_NONE) { /* start quoted field */ self->state = IN_QUOTED_FIELD; } else if (c == dialect->escapechar) { /* possible escaped character */ self->state = ESCAPED_CHAR; } else if (c == ' ' && dialect->skipinitialspace) /* ignore space at start of field */ ; else if (c == dialect->delimiter) { /* save empty field */ if (parse_save_field(self) < 0) return -1; } else { /* begin new unquoted field */ if (dialect->quoting == QUOTE_NONNUMERIC) self->numeric_field = 1; if (parse_add_char(self, c) < 0) return -1; self->state = IN_FIELD; } break; case ESCAPED_CHAR: if (c == '\0') c = '\n'; if (parse_add_char(self, c) < 0) return -1; self->state = IN_FIELD; break; case IN_FIELD: /* in unquoted field */ if (c == '\n' || c == '\r' || c == '\0') { /* end of line - return [fields] */ if (parse_save_field(self) < 0) return -1; self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (c == dialect->escapechar) { /* possible escaped character */ self->state = ESCAPED_CHAR; } else if (c == dialect->delimiter) { /* save field - wait for new field */ if (parse_save_field(self) < 0) return -1; self->state = START_FIELD; } else { /* normal character - save in field */ if (parse_add_char(self, c) < 0) return -1; } break; case IN_QUOTED_FIELD: /* in quoted field */ if (c == '\0') ; else if (c == dialect->escapechar) { /* Possible escape character */ self->state = ESCAPE_IN_QUOTED_FIELD; } else if (c == dialect->quotechar && dialect->quoting != QUOTE_NONE) { if (dialect->doublequote) { /* doublequote; " represented by "" */ self->state = QUOTE_IN_QUOTED_FIELD; } else { /* end of quote part of field */ self->state = IN_FIELD; } } else { /* normal character - save in field */ if (parse_add_char(self, c) < 0) return -1; } break; case ESCAPE_IN_QUOTED_FIELD: if (c == '\0') c = '\n'; if (parse_add_char(self, c) < 0) return -1; self->state = IN_QUOTED_FIELD; break; case QUOTE_IN_QUOTED_FIELD: /* doublequote - seen a quote in an quoted field */ if (dialect->quoting != QUOTE_NONE && c == dialect->quotechar) { /* save "" as " */ if (parse_add_char(self, c) < 0) return -1; self->state = IN_QUOTED_FIELD; } else if (c == dialect->delimiter) { /* save field - wait for new field */ if (parse_save_field(self) < 0) return -1; self->state = START_FIELD; } else if (c == '\n' || c == '\r' || c == '\0') { /* end of line - return [fields] */ if (parse_save_field(self) < 0) return -1; self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (!dialect->strict) { if (parse_add_char(self, c) < 0) return -1; self->state = IN_FIELD; } else { /* illegal */ PyErr_Format(error_obj, "'%c' expected after '%c'", dialect->delimiter, dialect->quotechar); return -1; } break; case EAT_CRNL: if (c == '\n' || c == '\r') ; else if (c == '\0') self->state = START_RECORD; else { PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); return -1; } break; } return 0; }
static int parse_process_char(CSVParser *self, char c) { switch (self->state) { case START_RECORD: /* start of record */ if (c == '\n' || c == '\r') { self->state = START_RECORD; break; } /* normal character - handle as START_FIELD */ self->state = START_FIELD; /* fallthru */ case START_FIELD: /* expecting field */ if (c == '\n' || c == '\r') { /* save empty field - return [fields] */ if (parse_save_field(self) < 0) return -1; self->state = START_RECORD; } else if (c == self->quotechar) { /* start quoted field */ self->state = IN_QUOTED_FIELD; } else if (c == self->delimiter) { /* save empty field */ if (parse_save_field(self) < 0) return -1; } else { if (parse_add_char(self, c) < 0) return -1; self->state = IN_FIELD; } break; case IN_FIELD: /* in unquoted field */ if (c == '\n' || c == '\r') { /* end of line - return [fields] */ if (parse_save_field(self) < 0) return -1; self->state = START_RECORD; } else if (c == self->delimiter) { /* save field - wait for new field */ if (parse_save_field(self) < 0) return -1; if(self->engine->is_fail) self->state = QUERY_FAIL; else self->state = START_FIELD; } else { /* normal character - save in field */ if (parse_add_char(self, c) < 0) return -1; } break; case IN_QUOTED_FIELD: /* in quoted field */ if (c == self->quotechar) { /* doublequote; " represented by "" */ self->state = QUOTE_IN_QUOTED_FIELD; } else { /* normal character - save in field */ if (parse_add_char(self, c) < 0) return -1; } break; case QUOTE_IN_QUOTED_FIELD: /* doublequote - seen a quote in an quoted field */ if (c == self->quotechar) { /* save "" as " */ if (parse_add_char(self, c) < 0) return -1; self->state = IN_QUOTED_FIELD; } else if (c == self->delimiter) { /* save field - wait for new field */ if (parse_save_field(self) < 0) return -1; self->state = START_FIELD; } else if (c == '\n' || c == '\r') { /* end of line - return [fields] */ if (parse_save_field(self) < 0) return -1; self->state = START_RECORD; }else{ if(parse_add_char(self, c) < 0) return -1; self->state = IN_FIELD; } break; case QUERY_FAIL: if (c == '\n' || c == '\r') { self->state = START_RECORD; } break; } return 0; }