static int csv_accept(strm_stream* strm, strm_value data) { strm_array ary; strm_string line = strm_value_str(data); strm_value *bp; const char *fbeg; const char *ptr; const char *pend; int fieldcnt; int in_quote = 0, all_str = 1; int i = 0; enum csv_type ftype; enum csv_type* types; struct csv_data *cd = strm->data; if (cd->prev) { strm_int len = strm_str_len(cd->prev)+strm_str_len(line)+1; char* tmp = malloc(len); memcpy(tmp, strm_str_ptr(cd->prev), strm_str_len(cd->prev)); *(tmp+strm_str_len(cd->prev)) = '\n'; memcpy(tmp+strm_str_len(cd->prev)+1, strm_str_ptr(line), strm_str_len(line)); line = strm_str_new(tmp, len); free(tmp); cd->prev = strm_str_null; } fieldcnt = count_fields(line); if (fieldcnt == -1) { cd->prev = line; return STRM_NG; } if (cd->n > 0 && fieldcnt != cd->n) return STRM_NG; ptr = strm_str_ptr(line); pend = ptr + strm_str_len(line); ary = strm_ary_new(NULL, fieldcnt); if (!ary) return STRM_NG; bp = (strm_value*)strm_ary_ptr(ary); types = cd->types; ftype = types ? types[0] : TYPE_UNSPC; for (fbeg=ptr; ptr<pend; ptr++) { if (in_quote) { if (*ptr == '\"') { if (ptr[1] == '\"') { ptr++; ftype = TYPE_ESC; continue; } in_quote = 0; } continue; } switch(*ptr) { case '\"': in_quote = 1; if (ptr == fbeg) { ftype = TYPE_STR; fbeg = ptr+1; } else { ftype = TYPE_ESC; } continue; case ',': *bp = csv_value(fbeg, ptr-fbeg, ftype); if (!strm_string_p(*bp)) all_str = 0; bp++; fbeg = ptr+1; i++; ftype = types ? types[i] : TYPE_UNSPC; break; default: continue; } } /* trim newline at the end */ if (ptr[-1] == '\n') { ptr--; } /* trim carriage return at the end */ if (ptr[-1] == '\r') { ptr--; } *bp = csv_value(fbeg, ptr-fbeg, ftype); if (!strm_string_p(*bp)) all_str = 0; /* check headers */ if (!cd->headers && !cd->types) { if (all_str) { cd->headers = ary; ary = strm_ary_null; } cd->n = fieldcnt; } if (ary) { /* set headers if any */ if (cd->headers) strm_ary_headers(ary) = cd->headers; if (!cd->types) { /* first data line (after optinal header line) */ if (cd->headers) { if (all_str) { /* data line is all string; emit header line */ strm_emit(strm, strm_ary_value(cd->headers), NULL); cd->headers = strm_ary_null; } else { /* intern header strings */ strm_array h = cd->headers; strm_value *p = strm_ary_ptr(h); int i; for (i=0; i<strm_ary_len(h); i++) { strm_string str = strm_value_str(p[i]); p[i] = strm_str_value(strm_str_intern_str(str)); } } } /* initialize types (determined by first data line) */ cd->types = malloc(sizeof(enum csv_type)*fieldcnt); if (!cd->types) return STRM_NG; for (i=0; i<fieldcnt; i++) { cd->types[i] = csv_type(strm_ary_ptr(ary)[i]); } } else { /* type check */ for (i=0; i<fieldcnt; i++) { if (cd->types[i] != csv_type(strm_ary_ptr(ary)[i])) { /* type mismatch (error); skip this line */ strm_raise(strm, "csv type mismatch"); return STRM_NG; } } } strm_emit(strm, strm_str_value(ary), NULL); } return STRM_OK; }
static void csv_accept(strm_task* task, strm_value data) { strm_array *ary; strm_string *line = strm_value_str(data); strm_value *bp; char *tmp, *tptr; const char *ptr; const char *pend; int fieldcnt, len; int in_quote = 0, quoted = 0, all_str = 1;; struct csv_data *cd = task->data; if (cd->prev) { strm_string *str = strm_str_new(NULL, cd->prev->len+line->len+1); tmp = (char*)str->ptr; memcpy(tmp, cd->prev->ptr, cd->prev->len); *(tmp+cd->prev->len) = '\n'; memcpy(tmp+cd->prev->len+1, line->ptr, line->len); line = str; cd->prev = NULL; } fieldcnt = count_fields(line); if (fieldcnt == -1) { cd->prev = line; return; } if (cd->n > 0 && fieldcnt != cd->n) return; ptr = line->ptr; pend = ptr + line->len; ary = strm_ary_new(NULL, fieldcnt); if (!ary) return; bp = (strm_value*)ary->ptr; len = line->len; tmp = malloc(len+1); if (!tmp) return; *tmp='\0'; ptr=line->ptr; tptr=tmp; for (;ptr<pend; ptr++) { if (in_quote) { if (*ptr == '\"') { if (ptr[1] == '\"') { *tptr++ = '\"'; ptr++; continue; } in_quote = 0; } else *tptr++ = *ptr; continue; } switch(*ptr) { case '\"': in_quote = 1; quoted = 1; continue; case ',': if (quoted) { *bp = strm_str_value(tmp, tptr-tmp); } else { *bp = csv_value(tmp, tptr-tmp); } if (!strm_str_p(*bp)) all_str = 0; bp++; tptr = tmp; quoted = 0; break; default: *tptr++ = *ptr; continue; } } /* trim newline at the end */ if (tptr > tmp && tptr[-1] == '\n') { tptr--; } /* trim carriage return at the end */ if (tptr > tmp && tptr[-1] == '\r') { tptr--; } *bp = csv_value(tmp, tptr-tmp); if (!strm_str_p(*bp)) all_str = 0; free(tmp); /* check headers */ if (!cd->headers && !cd->types) { if (all_str) { cd->headers = ary; ary = NULL; } cd->n = fieldcnt; } if (ary) { int i; /* set headers if any */ if (cd->headers) ary->headers = cd->headers; if (!cd->types) { /* first data line (after optinal header line) */ if (cd->headers) { if (all_str) { /* data line is all string; emit header line */ strm_emit(task, strm_ptr_value(cd->headers), NULL); cd->headers = NULL; } else { /* intern header strings */ strm_array *h = cd->headers; strm_value *p = (strm_value*)h->ptr; int i; for (i=0; i<h->len; i++) { strm_string *str = strm_value_str(p[i]); p[i] = strm_ptr_value(strm_str_intern_str(str)); } } } /* initialize types (determined by first data line) */ cd->types = malloc(sizeof(enum csv_type)*fieldcnt); if (!cd->types) return; for (i=0; i<fieldcnt; i++) { cd->types[i] = csv_type(ary->ptr[i]); } } else { /* type check */ for (i=0; i<fieldcnt; i++) { if (cd->types[i] != csv_type(ary->ptr[i])) { if (cd->types[i] == STRING_TYPE) { /* convert value to string */ ((strm_value*)ary->ptr)[i] = strm_ptr_value(strm_to_str(ary->ptr[i])); } else { /* type mismatch (error); skip this line */ return; } } } } strm_emit(task, strm_ptr_value(ary), NULL); } }