예제 #1
0
파일: csv.c 프로젝트: applideveloper/streem
static int
csv_accept(strm_stream* strm, strm_value data)
{
  strm_array ary;
  strm_string line = strm_value_str(data);
  strm_value *bp;
  const char *fbeg;
  const char *ptr;
  const char *pend;
  int fieldcnt;
  int in_quote = 0, all_str = 1;
  int i = 0;
  enum csv_type ftype;
  enum csv_type* types;
  struct csv_data *cd = strm->data;

  if (cd->prev) {
    strm_int len = strm_str_len(cd->prev)+strm_str_len(line)+1;
    char* tmp = malloc(len);

    memcpy(tmp, strm_str_ptr(cd->prev), strm_str_len(cd->prev));
    *(tmp+strm_str_len(cd->prev)) = '\n';
    memcpy(tmp+strm_str_len(cd->prev)+1, strm_str_ptr(line), strm_str_len(line));
    line = strm_str_new(tmp, len);
    free(tmp);
    cd->prev = strm_str_null;
  }
  fieldcnt = count_fields(line);
  if (fieldcnt == -1) {
    cd->prev = line;
    return STRM_NG;
  }
  if (cd->n > 0 && fieldcnt != cd->n)
    return STRM_NG;

  ptr = strm_str_ptr(line);
  pend = ptr + strm_str_len(line);
  ary = strm_ary_new(NULL, fieldcnt);
  if (!ary) return STRM_NG;
  bp = (strm_value*)strm_ary_ptr(ary);
  types = cd->types;
  ftype = types ? types[0] : TYPE_UNSPC;

  for (fbeg=ptr; ptr<pend; ptr++) {
    if (in_quote) {
      if (*ptr == '\"') {
        if (ptr[1] == '\"') {
          ptr++;
          ftype = TYPE_ESC;
          continue;
        }
        in_quote = 0;
      }
      continue;
    }

    switch(*ptr) {
    case '\"':
      in_quote = 1;
      if (ptr == fbeg) {
        ftype = TYPE_STR;
        fbeg = ptr+1;
      }
      else {
        ftype = TYPE_ESC;
      }
      continue;
    case ',':
      *bp = csv_value(fbeg, ptr-fbeg, ftype);
      if (!strm_string_p(*bp)) all_str = 0;
      bp++;
      fbeg = ptr+1;
      i++;
      ftype = types ? types[i] : TYPE_UNSPC;
      break;

    default:
      continue;
    }
  }
  /* trim newline at the end */
  if (ptr[-1] == '\n') {
    ptr--;
  }
  /* trim carriage return at the end */
  if (ptr[-1] == '\r') {
    ptr--;
  }
  *bp = csv_value(fbeg, ptr-fbeg, ftype);
  if (!strm_string_p(*bp)) all_str = 0;

  /* check headers */
  if (!cd->headers && !cd->types) {
    if (all_str) {
      cd->headers = ary;
      ary = strm_ary_null;
    }
    cd->n = fieldcnt;
  }
  if (ary) {
    /* set headers if any */
    if (cd->headers)
      strm_ary_headers(ary) = cd->headers;
    if (!cd->types) {
      /* first data line (after optinal header line) */
      if (cd->headers) {
        if (all_str) {          /* data line is all string; emit header line */
          strm_emit(strm, strm_ary_value(cd->headers), NULL);
          cd->headers = strm_ary_null;
        }
        else {                  /* intern header strings */
          strm_array h = cd->headers;
          strm_value *p = strm_ary_ptr(h);
          int i;

          for (i=0; i<strm_ary_len(h); i++) {
            strm_string str = strm_value_str(p[i]);

            p[i] = strm_str_value(strm_str_intern_str(str));
          }
        }
      }
      /* initialize types (determined by first data line) */
      cd->types = malloc(sizeof(enum csv_type)*fieldcnt);
      if (!cd->types) return STRM_NG;
      for (i=0; i<fieldcnt; i++) {
        cd->types[i] = csv_type(strm_ary_ptr(ary)[i]);
      }
    }
    else {
      /* type check */
      for (i=0; i<fieldcnt; i++) {
        if (cd->types[i] != csv_type(strm_ary_ptr(ary)[i])) {
          /* type mismatch (error); skip this line */
          strm_raise(strm, "csv type mismatch");
          return STRM_NG;
        }
      }
    }
    strm_emit(strm, strm_str_value(ary), NULL);
  }
  return STRM_OK;
}
예제 #2
0
파일: csv.c 프로젝트: fenglyu/streem
static void
csv_accept(strm_task* task, strm_value data)
{
  strm_array *ary;
  strm_string *line = strm_value_str(data);
  strm_value *bp;
  char *tmp, *tptr;
  const char *ptr;
  const char *pend;
  int fieldcnt, len;
  int in_quote = 0, quoted = 0, all_str = 1;;
  struct csv_data *cd = task->data;

  if (cd->prev) {
    strm_string *str = strm_str_new(NULL, cd->prev->len+line->len+1);

    tmp = (char*)str->ptr;
    memcpy(tmp, cd->prev->ptr, cd->prev->len);
    *(tmp+cd->prev->len) = '\n';
    memcpy(tmp+cd->prev->len+1, line->ptr, line->len);
    line = str;
    cd->prev = NULL;
  }
  fieldcnt = count_fields(line);
  if (fieldcnt == -1) {
    cd->prev = line;
    return;
  }
  if (cd->n > 0 && fieldcnt != cd->n)
    return;

  ptr = line->ptr;
  pend = ptr + line->len;
  ary = strm_ary_new(NULL, fieldcnt);
  if (!ary) return;
  bp = (strm_value*)ary->ptr;

  len = line->len;
  tmp = malloc(len+1);
  if (!tmp) return;
  *tmp='\0';

  ptr=line->ptr;
  tptr=tmp;
  for (;ptr<pend; ptr++) {
    if (in_quote) {
      if (*ptr == '\"') {
        if (ptr[1] == '\"') {
          *tptr++ = '\"';
          ptr++;
          continue;
        }
        in_quote = 0;
      }
      else
        *tptr++ = *ptr;
      continue;
    }

    switch(*ptr) {
    case '\"':
      in_quote = 1;
      quoted = 1;
      continue;
    case ',':
      if (quoted) {
        *bp = strm_str_value(tmp, tptr-tmp);
      }
      else {
        *bp = csv_value(tmp, tptr-tmp);
      }
      if (!strm_str_p(*bp)) all_str = 0;
      bp++;
      tptr = tmp;
      quoted = 0;
      break;

    default:
      *tptr++ = *ptr;
      continue;
    }
  }
  /* trim newline at the end */
  if (tptr > tmp && tptr[-1] == '\n') {
    tptr--;
  }
  /* trim carriage return at the end */
  if (tptr > tmp && tptr[-1] == '\r') {
    tptr--;
  }
  *bp = csv_value(tmp, tptr-tmp);
  if (!strm_str_p(*bp)) all_str = 0;

  free(tmp);

  /* check headers */
  if (!cd->headers && !cd->types) {
    if (all_str) {
      cd->headers = ary;
      ary = NULL;
    }
    cd->n = fieldcnt;
  }
  if (ary) {
    int i;

    /* set headers if any */
    if (cd->headers)
      ary->headers = cd->headers;
    if (!cd->types) {
      /* first data line (after optinal header line) */
      if (cd->headers) {
        if (all_str) {          /* data line is all string; emit header line */
          strm_emit(task, strm_ptr_value(cd->headers), NULL);
          cd->headers = NULL;
        }
        else {                  /* intern header strings */
          strm_array *h = cd->headers;
          strm_value *p = (strm_value*)h->ptr;
          int i;

          for (i=0; i<h->len; i++) {
            strm_string *str = strm_value_str(p[i]);

            p[i] = strm_ptr_value(strm_str_intern_str(str));
          }
        }
      }
      /* initialize types (determined by first data line) */
      cd->types = malloc(sizeof(enum csv_type)*fieldcnt);
      if (!cd->types) return;
      for (i=0; i<fieldcnt; i++) {
        cd->types[i] = csv_type(ary->ptr[i]);
      }
    }
    else {
      /* type check */
      for (i=0; i<fieldcnt; i++) {
        if (cd->types[i] != csv_type(ary->ptr[i])) {
          if (cd->types[i] == STRING_TYPE) {
            /* convert value to string */
            ((strm_value*)ary->ptr)[i] = strm_ptr_value(strm_to_str(ary->ptr[i]));
          }
          else {
            /* type mismatch (error); skip this line */
            return;
          }
        }
      }
    }
    strm_emit(task, strm_ptr_value(ary), NULL);
  }
}