int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) { /* Finalize parsing. Needed, for example, when file does not end in a newline */ int quoted = p->quoted; int pstate = p->pstate; size_t spaces = p->spaces; size_t entry_pos = p->entry_pos; if (p == NULL) return -1; if (p->pstate == FIELD_BEGUN && p->quoted && p->options & CSV_STRICT && p->options & CSV_STRICT_FINI) { /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */ p->status = CSV_EPARSE; return -1; } switch (p->pstate) { case FIELD_MIGHT_HAVE_ENDED: p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ /* Fall-through */ case FIELD_NOT_BEGUN: case FIELD_BEGUN: quoted = p->quoted, pstate = p->pstate; spaces = p->spaces, entry_pos = p->entry_pos; SUBMIT_FIELD(p); SUBMIT_ROW(p, -1); case ROW_NOT_BEGUN: /* Already ended properly */ ; } /* Reset parser */ p->spaces = p->quoted = p->entry_pos = p->status = 0; p->pstate = ROW_NOT_BEGUN; return 0; }
size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) { unsigned const char *us = s; /* Access input data as array of unsigned char */ unsigned char c; /* The character we are currently processing */ size_t pos = 0; /* The number of characters we have processed in this call */ /* Store key fields into local variables for performance */ unsigned char delim = p->delim_char; unsigned char quote = p->quote_char; int (*is_space)(unsigned char) = p->is_space; int (*is_term)(unsigned char) = p->is_term; int quoted = p->quoted; int pstate = p->pstate; size_t spaces = p->spaces; size_t entry_pos = p->entry_pos; if (!p->entry_buf && pos < len) { /* Buffer hasn't been allocated yet and len > 0 */ if (csv_increase_buffer(p) != 0) { p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; return pos; } } while (pos < len) { /* Check memory usage, increase buffer if neccessary */ if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size) ) { if (csv_increase_buffer(p) != 0) { p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; return pos; } } c = us[pos++]; switch (pstate) { case ROW_NOT_BEGUN: case FIELD_NOT_BEGUN: if ((is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) && c!=delim) { /* Space or Tab */ continue; } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ if (pstate == FIELD_NOT_BEGUN) { SUBMIT_FIELD(p); SUBMIT_ROW(p, (unsigned char)c); } else { /* ROW_NOT_BEGUN */ /* Don't submit empty rows by default */ if (p->options & CSV_REPALL_NL) { SUBMIT_ROW(p, (unsigned char)c); } } continue; } else if (c == delim) { /* Comma */ SUBMIT_FIELD(p); break; } else if (c == quote) { /* Quote */ pstate = FIELD_BEGUN; quoted = 1; } else { /* Anything else */ pstate = FIELD_BEGUN; quoted = 0; SUBMIT_CHAR(p, c); } break; case FIELD_BEGUN: if (c == quote) { /* Quote */ if (quoted) { SUBMIT_CHAR(p, c); pstate = FIELD_MIGHT_HAVE_ENDED; } else { /* STRICT ERROR - double quote inside non-quoted field */ if (p->options & CSV_STRICT) { p->status = CSV_EPARSE; p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; return pos-1; } SUBMIT_CHAR(p, c); spaces = 0; } } else if (c == delim) { /* Comma */ if (quoted) { SUBMIT_CHAR(p, c); } else { SUBMIT_FIELD(p); } } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ if (!quoted) { SUBMIT_FIELD(p); SUBMIT_ROW(p, (unsigned char)c); } else { SUBMIT_CHAR(p, c); } } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ SUBMIT_CHAR(p, c); spaces++; } else { /* Anything else */ SUBMIT_CHAR(p, c); spaces = 0; } break; case FIELD_MIGHT_HAVE_ENDED: /* This only happens when a quote character is encountered in a quoted field */ if (c == delim) { /* Comma */ entry_pos -= spaces + 1; /* get rid of spaces and original quote */ SUBMIT_FIELD(p); } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ entry_pos -= spaces + 1; /* get rid of spaces and original quote */ SUBMIT_FIELD(p); SUBMIT_ROW(p, (unsigned char)c); } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ SUBMIT_CHAR(p, c); spaces++; } else if (c == quote) { /* Quote */ if (spaces) { /* STRICT ERROR - unescaped double quote */ if (p->options & CSV_STRICT) { p->status = CSV_EPARSE; p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; return pos-1; } spaces = 0; SUBMIT_CHAR(p, c); } else { /* Two quotes in a row */ pstate = FIELD_BEGUN; } } else { /* Anything else */ /* STRICT ERROR - unescaped double quote */ if (p->options & CSV_STRICT) { p->status = CSV_EPARSE; p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; return pos-1; } pstate = FIELD_BEGUN; spaces = 0; SUBMIT_CHAR(p, c); } break; default: break; } } p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; return pos; }