Exemplo n.º 1
0
/* Operates on three CHARSXP (internal) arguments. Returns STRSXP vector.
 * Returned strings alternate between "outside" and "inside" blocks.
 * E.g. "Four .(score) and seven .(years)"
 * returns c("Four ", "score", " and seven ", "years")
 */
SEXP find_subst_expressions(SEXP str, SEXP begin_delim, SEXP end_delim) {
  const char *s, *b, *e;
  const char *p, *start;
  const char *before, *begin, *end, *after;
  cetype_t enc;
  step_t step;
  int nBlocks, i;
  SEXP out;

  assert_type(str, CHARSXP);
  assert_type(begin_delim, CHARSXP);
  assert_type(end_delim, CHARSXP);

  step = get_stepper(str, &s, &enc);

  if (getCharCE(begin_delim) != enc) {
    b = Rf_reEnc(CHAR(begin_delim), getCharCE(begin_delim), enc, 0);
  } else {
    b = CHAR(begin_delim);
  }

  if (getCharCE(end_delim) != enc) {
    e = Rf_reEnc(CHAR(end_delim), getCharCE(end_delim), enc, 0);
  } else {
    e = CHAR(end_delim);
  }

  /* Scan once to count how many blocks we need, then scan again (ugh) */
  nBlocks = 0;
  p = s;
  while(p && *p) {
    if ( (p = block_search(p, b, e, NULL, NULL, NULL, NULL, step)) ) {
      nBlocks++;
    }
  }

  out = PROTECT(allocVector(STRSXP, 2*nBlocks+1));
  start = s;
  p = s;
  for (i = 0; i < nBlocks; i++) {
    p = block_search(p, b, e,
                     &before, &begin, &end, &after,
                     step);
    /* extract leading unescaped block, then escaped block */
    if (p) {
      SET_STRING_ELT(out, 2*i, mkCharLenCE(start, before-start, enc));
      SET_STRING_ELT(out, 2*i+1, mkCharLenCE(begin, end-begin, enc));
      start = after;
    }
  }
  /* then the rest of the string. */
  SET_STRING_ELT(out, 2*i, mkCharCE(start, enc));

  UNPROTECT(1);
  return out;
}
Exemplo n.º 2
0
Arquivo: RJSON.c Projeto: cran/RJSONIO
/*
  Convert a JSON object to an R object.
 */ 
SEXP   
convertJSONValueToR(int type, const struct JSON_value_struct *value, cetype_t encoding)
{
    SEXP ans = R_NilValue;
    switch(type) {

      case JSON_T_INTEGER: 
          ans = ScalarInteger((int) ((long) value->vu.integer_value));
	break;
      case JSON_T_FLOAT:
          ans = ScalarReal(value->vu.float_value);
	break;
      case JSON_T_NULL:
          ans = R_NilValue;
	break;
      case JSON_T_FALSE:
          ans = ScalarLogical(0);
	break;
      case JSON_T_TRUE:
          ans = ScalarLogical(1);
	break;
      case JSON_T_STRING:
      case JSON_T_KEY:
	  ans = ScalarString(mkCharLenCE(value->vu.str.value, value->vu.str.length, encoding));
	break;
    }

    return(ans);
}
Exemplo n.º 3
0
SEXP R_mongo_cursor_next_json (SEXP ptr, SEXP n){
  mongoc_cursor_t *c = r2cursor(ptr);
  int len = asInteger(n);
  SEXP out = PROTECT(allocVector(STRSXP, len));
  const bson_t *b = NULL;
  int total = 0;
  bson_error_t err;
  while(total < len){
    if(!mongoc_cursor_next(c, &b)){
      if(mongoc_cursor_error (c, &err))
        stop(err.message);
      else
        //cursor exchausted: done
        break;
    } else {
      size_t jsonlength;
      const char *str = bson_as_json ((bson_t*) b, &jsonlength);
      SET_STRING_ELT(out, total, mkCharLenCE(str, jsonlength, CE_UTF8));
      if(str) bson_free(str);
      total++;
    }
  }
  if(total < len){
    SEXP out2 = PROTECT(allocVector(STRSXP, total));
    for(int i = 0; i < total; i++){
      SET_STRING_ELT(out2, i, STRING_ELT(out, i));
    }
    UNPROTECT(2);
    return out2;
  }
  UNPROTECT(1);
  return out;
}
Exemplo n.º 4
0
SEXP code_allele_observations(SEXP options, SEXP observed)
{
    /* options and observed are both vectors of character arrays where
       each character array is a collection of alleles at that
       position */

    SEXP obs_string, opt_string;
    const char *obs_char, *opt_char;
    SEXP coded = PROTECT(allocVector(STRSXP, length(options)));
    int max_num_alleles = 10;  /* World will end if index more than
                                * one character long */
    int buffer_index = 0;
    char coded_buffer[max_num_alleles * 2]; /* One int char and one
                                             * comma for each
                                             * observation at a
                                             * position */

    for (int i=0; i < length(options); i++) {
        opt_string = VECTOR_ELT(options, i);
        obs_string = VECTOR_ELT(observed, i);
        if (length(obs_string) > 10)
            error("no more than 10 alleles allowed\n");
        buffer_index = 0;
        for (int observed_index=0; observed_index < length(obs_string);
             observed_index++)
        {
            coded_buffer[ buffer_index ] = '.'; /* NA to begin with */
            coded_buffer[buffer_index + 1] = '/';
            obs_char = CHAR(STRING_ELT(obs_string,observed_index));

            for (int option_index=0; option_index < length(opt_string);
                 option_index++)
            {
                opt_char = CHAR(STRING_ELT(opt_string,option_index));
                if (obs_char == opt_char) {
                    sprintf(coded_buffer + buffer_index, "%i/",
                            option_index);
                    break;
                }
            }
            buffer_index += 2;
        }
        SET_STRING_ELT(coded, i,
                       mkCharLenCE(coded_buffer, buffer_index-1,
                                   CE_UTF8));
    }

    UNPROTECT(1);
    return coded;
}
Exemplo n.º 5
0
/* Return the Subject of an X509 Certificate by wrapping the OpenSSL X509_get_subject_name() function. */
SEXP PKI_get_subject(SEXP sCert) {
    SEXP res;
    X509 *cert;
    BIO  *mem = BIO_new(BIO_s_mem());
    long len;
    char *txt = 0;
    PKI_init();
    cert = retrieve_cert(sCert, "");
    if (X509_NAME_print_ex(mem, X509_get_subject_name(cert), 0, (XN_FLAG_ONELINE | ASN1_STRFLGS_UTF8_CONVERT) & ~ASN1_STRFLGS_ESC_MSB) < 0) {
      BIO_free(mem);
	Rf_error("X509_NAME_print_ex failed with %s", ERR_error_string(ERR_get_error(), NULL));
    }
    len = BIO_get_mem_data(mem, &txt);
    if (len < 0) {
      BIO_free(mem);
      Rf_error("cannot get memory buffer, %s", ERR_error_string(ERR_get_error(), NULL));
    }
    res = PROTECT(allocVector(STRSXP, 1));
    SET_STRING_ELT(res, 0, mkCharLenCE(txt, len, CE_UTF8));
    UNPROTECT(1);
    BIO_free(mem);
    return res;
}
Exemplo n.º 6
0
void pushBuffer(ThreadLocalFreadParsingContext *ctx)
{
  const void *buff8 = ctx->buff8;
  const void *buff4 = ctx->buff4;
  const void *buff1 = ctx->buff1;
  const char *anchor = ctx->anchor;
  int nRows = (int) ctx->nRows;
  size_t DTi = ctx->DTi;
  int rowSize8 = (int) ctx->rowSize8;
  int rowSize4 = (int) ctx->rowSize4;
  int rowSize1 = (int) ctx->rowSize1;
  int nStringCols = ctx->nStringCols;
  int nNonStringCols = ctx->nNonStringCols;

  // Do all the string columns first so as to minimize and concentrate the time inside the single critical.
  // While the string columns are happening other threads before me can be copying their non-string buffers to the
  // final DT and other threads after me can be filling their buffers too.
  // rowSize is passed in because it will be different (much smaller) on the reread covering any type exception columns
  // locals passed in on stack so openmp knows that no synchonization is required

  // the byte position of this column in the first row of the row-major buffer
  if (nStringCols) {
    #pragma omp critical
    {
      int off8 = 0;
      int cnt8 = rowSize8 / 8;
      lenOff *buff8_lenoffs = (lenOff*) buff8;
      for (int j=0, resj=-1, done=0; done<nStringCols && j<ncol; j++) {
        if (type[j] == CT_DROP) continue;
        resj++;
        if (type[j] == CT_STRING) {
          SEXP dest = VECTOR_ELT(DT, resj);
          lenOff *source = buff8_lenoffs + off8;
          for (int i=0; i<nRows; i++) {
            int strLen = source->len;
            if (strLen) {
              SEXP thisStr = strLen<0 ? NA_STRING : mkCharLenCE(anchor + source->off, strLen, ienc);
              // stringLen == INT_MIN => NA, otherwise not a NAstring was checked inside fread_mean
              SET_STRING_ELT(dest, DTi+i, thisStr);
            } // else dest was already initialized with R_BlankString by allocVector()
            source += cnt8;
          }
          done++; // if just one string col near the start, don't loop over the other 10,000 cols. TODO? start on first too
        }
        off8 += (size[j] == 8);
      }
    }
  }

  int off1 = 0, off4 = 0, off8 = 0;
  for (int j=0, resj=-1, done=0; done<nNonStringCols && j<ncol; j++) {
    if (type[j]==CT_DROP) continue;
    int thisSize = size[j];
    resj++;
    if (type[j]!=CT_STRING && type[j]>0) {
      if (thisSize == 8) {
        char *dest = (char *)DATAPTR(VECTOR_ELT(DT, resj)) + DTi*8;
        char *src8 = (char*)buff8 + off8;
        for (int i=0; i<nRows; i++) {
          memcpy(dest, src8, 8);
          src8 += rowSize8;
          dest += 8;
        }
      } else
      if (thisSize == 4) {
        char *dest = (char *)DATAPTR(VECTOR_ELT(DT, resj)) + DTi*4;
        char *src4 = (char*)buff4 + off4;
        for (int i=0; i<nRows; i++) {
          memcpy(dest, src4, 4);
          src4 += rowSize4;
          dest += 4;
        }
      } else
      if (thisSize == 1) {
        if (type[j] != CT_BOOL8) STOP("Field size is 1 but the field is of type %d\n", type[j]);
        Rboolean *dest = (Rboolean *)((char *)DATAPTR(VECTOR_ELT(DT, resj)) + DTi*sizeof(Rboolean));
        char *src1 = (char*)buff1 + off1;
        for (int i=0; i<nRows; i++) {
          int8_t v = *(int8_t *)src1;
          *dest = (v==INT8_MIN ? NA_INTEGER : v);
          src1 += rowSize1;
          dest++;
        }
      } else STOP("Runtime error: unexpected field of size %d\n", thisSize);
      done++;
    }
    off8 += (size[j] & 8);
    off4 += (size[j] & 4);
    off1 += (size[j] & 1);
  }
}
Exemplo n.º 7
0
_Bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, int ncol)
{
  // use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c
  if (typeSize[CT_BOOL8]!=1) STOP("Internal error: typeSize[CT_BOOL8] != 1");
  if (typeSize[CT_STRING]!=8) STOP("Internal error: typeSize[CT_STRING] != 1");
  colNamesSxp = NULL;
  if (colNames!=NULL) {
    colNamesSxp = PROTECT(allocVector(STRSXP, ncol));
    protecti++;
    for (int i=0; i<ncol; i++) {
      SEXP this;
      if (colNames[i].len<=0) {
        char buff[10];
        sprintf(buff,"V%d",i+1);
        this = mkChar(buff);
      } else {
        this = mkCharLenCE(anchor+colNames[i].off, colNames[i].len, ienc);
      }
      SET_STRING_ELT(colNamesSxp, i, this);
    }
  }
  if (length(colClassesSxp)) {
    SEXP typeRName_sxp = PROTECT(allocVector(STRSXP, NUT));
    protecti++;
    for (int i=0; i<NUT; i++) SET_STRING_ELT(typeRName_sxp, i, mkChar(typeRName[i]));
    if (isString(colClassesSxp)) {
      SEXP typeEnum_idx = PROTECT(chmatch(colClassesSxp, typeRName_sxp, NUT, FALSE));
      protecti++;
      if (LENGTH(colClassesSxp)==1) {
        signed char newType = typeEnum[INTEGER(typeEnum_idx)[0]-1];
        if (newType == CT_DROP) STOP("colClasses='drop' is not permitted; i.e. to drop all columns and load nothing");
        for (int i=0; i<ncol; i++) type[i]=newType;   // freadMain checks bump up only not down
      } else if (LENGTH(colClassesSxp)==ncol) {
        for (int i=0; i<ncol; i++) {
          if (STRING_ELT(colClassesSxp,i)==NA_STRING) continue; // user is ok with inherent type for this column
          type[i] = typeEnum[INTEGER(typeEnum_idx)[i]-1];
        }
      } else {
        STOP("colClasses is an unnamed character vector but its length is %d. Must be length 1 or ncol (%d in this case) when unnamed. To specify types for a subset of columns you can either name the items with the column names or pass list() format to colClasses using column names or column numbers. See examples in ?fread.",
              LENGTH(colClassesSxp), ncol);
      }
    } else {
      if (!isNewList(colClassesSxp)) STOP("CfreadR: colClasses is not type list");
      if (!length(getAttrib(colClassesSxp, R_NamesSymbol))) STOP("CfreadR: colClasses is type list but has no names");
      SEXP typeEnum_idx = PROTECT(chmatch(getAttrib(colClassesSxp, R_NamesSymbol), typeRName_sxp, NUT, FALSE));
      protecti++;
      for (int i=0; i<LENGTH(colClassesSxp); i++) {
        SEXP items;
        signed char thisType = typeEnum[INTEGER(typeEnum_idx)[i]-1];
        items = VECTOR_ELT(colClassesSxp,i);
        if (thisType == CT_DROP) {
          if (!isNull(dropSxp) || !isNull(selectSxp)) STOP("Can't use NULL in colClasses when select or drop is used as well.");
          dropSxp = items;
          continue;
        }
        SEXP itemsInt;
        if (isString(items)) itemsInt = PROTECT(chmatch(items, colNamesSxp, NA_INTEGER, FALSE));
        else itemsInt = PROTECT(coerceVector(items, INTSXP));
        protecti++;
        for (int j=0; j<LENGTH(items); j++) {
          int k = INTEGER(itemsInt)[j];
          if (k==NA_INTEGER) {
            if (isString(items)) STOP("Column name '%s' in colClasses[[%d]] not found", CHAR(STRING_ELT(items, j)),i+1);
            else STOP("colClasses[[%d]][%d] is NA", i+1, j+1);
          } else {
            if (k<1 || k>ncol) STOP("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]",k,i+1,j+1,ncol);
            k--;
            if (type[k]<0) STOP("Column '%s' appears more than once in colClasses", CHAR(STRING_ELT(colNamesSxp,k)));
            type[k] = -thisType;
            // freadMain checks bump up only not down.  Deliberately don't catch here to test freadMain; e.g. test 959
          }
        }
      }
      for (int i=0; i<ncol; i++) if (type[i]<0) type[i] *= -1;  // undo sign; was used to detect duplicates
    }
  }
  if (readInt64As != CT_INT64) {
    for (int i=0; i<ncol; i++) if (type[i]==CT_INT64) type[i] = readInt64As;
  }
  if (length(dropSxp)) {
    SEXP itemsInt;
    if (isString(dropSxp)) itemsInt = PROTECT(chmatch(dropSxp, colNamesSxp, NA_INTEGER, FALSE));
    else itemsInt = PROTECT(coerceVector(dropSxp, INTSXP));
    protecti++;
    for (int j=0; j<LENGTH(itemsInt); j++) {
      int k = INTEGER(itemsInt)[j];
      if (k==NA_INTEGER) {
        if (isString(dropSxp)) {
          DTWARN("Column name '%s' in 'drop' not found", CHAR(STRING_ELT(dropSxp, j)));
        } else {
          DTWARN("drop[%d] is NA", j+1);
        }
      } else {
        if (k<1 || k>ncol) {
          DTWARN("Column number %d (drop[%d]) is out of range [1,ncol=%d]",k,j+1,ncol);
        } else {
          if (type[k-1] == CT_DROP) STOP("Duplicates detected in drop");
          type[k-1] = CT_DROP;
        }
      }
    }
  } else if (length(selectSxp)) {
    SEXP tt;
    if (isString(selectSxp)) {
      // invalid cols check part of #1445 moved here (makes sense before reading the file)
      tt = PROTECT(chmatch(selectSxp, colNamesSxp, NA_INTEGER, FALSE));
      protecti++;
      for (int i=0; i<length(selectSxp); i++) if (INTEGER(tt)[i]==NA_INTEGER)
        DTWARN("Column name '%s' not found in column name header (case sensitive), skipping.", CHAR(STRING_ELT(selectSxp, i)));
    } else tt = selectSxp;
    for (int i=0; i<LENGTH(tt); i++) {
      int k = isInteger(tt) ? INTEGER(tt)[i] : (int)REAL(tt)[i];
      if (k == NA_INTEGER) continue;
      if (k<1 || k>ncol) STOP("Column number %d (select[%d]) is out of range [1,ncol=%d]",k,i+1,ncol);
      if (type[k-1]<0) STOP("Column number %d ('%s') has been selected twice by select=", k, STRING_ELT(colNames,k-1));
      type[k-1] *= -1; // detect and error on duplicates on all types without calling duplicated() at all
    }
    for (int i=0; i<ncol; i++) {
      if (type[i]<0) type[i] *= -1;
      else type[i]=CT_DROP;
    }
  }
  return TRUE;  // continue
}
Exemplo n.º 8
0
_Bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, int ncol)
{
  // use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c
  if (typeSize[CT_BOOL8_N]!=1) STOP("Internal error: typeSize[CT_BOOL8_N] != 1"); // # nocov
  if (typeSize[CT_STRING]!=8) STOP("Internal error: typeSize[CT_STRING] != 1"); // # nocov
  colNamesSxp = R_NilValue;
  if (colNames!=NULL) {
    SET_VECTOR_ELT(RCHK, 1, colNamesSxp=allocVector(STRSXP, ncol));
    for (int i=0; i<ncol; i++) {
      SEXP elem;
      if (colNames[i].len<=0) {
        char buff[12];
        sprintf(buff,"V%d",i+1);
        elem = mkChar(buff);  // no PROTECT as passed immediately to SET_STRING_ELT
      } else {
        elem = mkCharLenCE(anchor+colNames[i].off, colNames[i].len, ienc);  // no PROTECT as passed immediately to SET_STRING_ELT
      }
      SET_STRING_ELT(colNamesSxp, i, elem);
    }
  }
  if (length(colClassesSxp)) {
    SEXP typeRName_sxp = PROTECT(allocVector(STRSXP, NUT));
    for (int i=0; i<NUT; i++) SET_STRING_ELT(typeRName_sxp, i, mkChar(typeRName[i]));
    if (isString(colClassesSxp)) {
      SEXP typeEnum_idx = PROTECT(chmatch(colClassesSxp, typeRName_sxp, NUT, FALSE));
      if (LENGTH(colClassesSxp)==1) {
        signed char newType = typeEnum[INTEGER(typeEnum_idx)[0]-1];
        if (newType == CT_DROP) STOP("colClasses='NULL' is not permitted; i.e. to drop all columns and load nothing");
        for (int i=0; i<ncol; i++) type[i]=newType;   // freadMain checks bump up only not down
      } else if (LENGTH(colClassesSxp)==ncol) {
        for (int i=0; i<ncol; i++) {
          if (STRING_ELT(colClassesSxp,i)==NA_STRING) continue; // user is ok with inherent type for this column
          type[i] = typeEnum[INTEGER(typeEnum_idx)[i]-1];
        }
      } else {
        STOP("colClasses is an unnamed character vector but its length is %d. Must be length 1 or ncol (%d in this case) when unnamed. To specify types for a subset of columns you can either name the items with the column names or pass list() format to colClasses using column names or column numbers. See examples in ?fread.",
              LENGTH(colClassesSxp), ncol);
      }
      UNPROTECT(1); // typeEnum_idx
    } else {
      if (!isNewList(colClassesSxp)) STOP("CfreadR: colClasses is not type list");
      if (!length(getAttrib(colClassesSxp, R_NamesSymbol))) STOP("CfreadR: colClasses is type list but has no names");
      SEXP typeEnum_idx = PROTECT(chmatch(PROTECT(getAttrib(colClassesSxp, R_NamesSymbol)), typeRName_sxp, NUT, FALSE));
      for (int i=0; i<LENGTH(colClassesSxp); i++) {
        SEXP items;
        signed char thisType = typeEnum[INTEGER(typeEnum_idx)[i]-1];
        items = VECTOR_ELT(colClassesSxp,i);
        if (thisType == CT_DROP) {
          if (!isNull(dropSxp) || !isNull(selectSxp)) {
            if (dropSxp!=items) DTWARN("Ignoring the NULL item in colClasses= because select= or drop= has been used.");
            // package damr has a nice workaround for when NULL didn't work before v1.12.0: it sets drop=col_class$`NULL`. So allow that unambiguous case with no warning.
          } else {
            dropSxp = items;
          }
          continue;
        }
        SEXP itemsInt;
        if (isString(items)) itemsInt = PROTECT(chmatch(items, colNamesSxp, NA_INTEGER, FALSE));
        else                 itemsInt = PROTECT(coerceVector(items, INTSXP));
        // UNPROTECTed directly just after this for loop. No protecti++ here is correct.
        for (int j=0; j<LENGTH(items); j++) {
          int k = INTEGER(itemsInt)[j];
          if (k==NA_INTEGER) {
            if (isString(items)) STOP("Column name '%s' in colClasses[[%d]] not found", CHAR(STRING_ELT(items, j)),i+1);
            else STOP("colClasses[[%d]][%d] is NA", i+1, j+1);
          } else {
            if (k<1 || k>ncol) STOP("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]",k,i+1,j+1,ncol);
            k--;
            if (type[k]<0) STOP("Column '%s' appears more than once in colClasses", CHAR(STRING_ELT(colNamesSxp,k)));
            type[k] = -thisType;
            // freadMain checks bump up only not down.  Deliberately don't catch here to test freadMain; e.g. test 959
          }
        }
        UNPROTECT(1); // UNPROTECTing itemsInt inside loop to save protection stack
      }
      for (int i=0; i<ncol; i++) if (type[i]<0) type[i] *= -1;  // undo sign; was used to detect duplicates
      UNPROTECT(2);  // typeEnum_idx (+1 for its protect of getAttrib)
    }
    UNPROTECT(1);  // typeRName_sxp
  }
  if (readInt64As != CT_INT64) {
    for (int i=0; i<ncol; i++) if (type[i]==CT_INT64) type[i] = readInt64As;
  }
  if (length(dropSxp)) {
    SEXP itemsInt;
    if (isString(dropSxp)) itemsInt = PROTECT(chmatch(dropSxp, colNamesSxp, NA_INTEGER, FALSE));
    else                   itemsInt = PROTECT(coerceVector(dropSxp, INTSXP));
    for (int j=0; j<LENGTH(itemsInt); j++) {
      int k = INTEGER(itemsInt)[j];
      if (k==NA_INTEGER) {
        if (isString(dropSxp)) {
          DTWARN("Column name '%s' in 'drop' not found", CHAR(STRING_ELT(dropSxp, j)));
        } else {
          DTWARN("drop[%d] is NA", j+1);
        }
      } else {
        if (k<1 || k>ncol) {
          DTWARN("Column number %d (drop[%d]) is out of range [1,ncol=%d]",k,j+1,ncol);
        } else {
          // if (type[k-1] == CT_DROP) DTWARN("drop= contains duplicates");
          // NULL in colClasses didn't work between 1.11.0 and 1.11.8 so people have been using drop= to re-specify the NULL columns in colClasses. Now that NULL in colClasses works
          // from v1.12.0 there is no easy way to distinguish dups in drop= from drop overlapping with NULLs in colClasses. But it's unambiguous that it was intended to remove these
          // columns, so no need for warning.
          type[k-1] = CT_DROP;
        }
      }
    }
    UNPROTECT(1); // itemsInt
  } else if (length(selectSxp)) {
    SEXP tt;
    if (isString(selectSxp)) {
      // invalid cols check part of #1445 moved here (makes sense before reading the file)
      tt = PROTECT(chmatch(selectSxp, colNamesSxp, NA_INTEGER, FALSE));
      for (int i=0; i<length(selectSxp); i++) if (INTEGER(tt)[i]==NA_INTEGER)
        DTWARN("Column name '%s' not found in column name header (case sensitive), skipping.", CHAR(STRING_ELT(selectSxp, i)));
    } else {
      tt = PROTECT(selectSxp); // harmless superfluous PROTECT, for ease of balancing
    }
    for (int i=0; i<LENGTH(tt); i++) {
      int k = isInteger(tt) ? INTEGER(tt)[i] : (int)REAL(tt)[i];
      if (k == NA_INTEGER) continue;
      if (k<0) STOP("Column number %d (select[%d]) negative but should be in the range [1,ncol=%d]. Consider drop= for column exclusion.",k,i+1,ncol);
      if (k==0) STOP("select = 0 (select[%d]) has no meaning. All values of select should be in the range [1,ncol=%d].",i+1,ncol);
      if (k>ncol) STOP("Column number %d (select[%d]) is too large for this table, which only has %d columns.",k,i+1,ncol);
      if (type[k-1]<0) STOP("Column number %d ('%s') has been selected twice by select=", k, CHAR(STRING_ELT(colNamesSxp,k-1)));
      type[k-1] *= -1; // detect and error on duplicates on all types without calling duplicated() at all
    }
    for (int i=0; i<ncol; i++) {
      if (type[i]<0) type[i] *= -1;
      else type[i]=CT_DROP;
    }
    UNPROTECT(1); // tt
  }
  return true;
}
Exemplo n.º 9
0
/* iconv(x, from, to, sub, mark) */
SEXP attribute_hidden do_iconv(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP ans, x = CAR(args), si;
    void * obj;
    const char *inbuf;
    char *outbuf;
    const char *sub;
    size_t inb, outb, res;
    R_StringBuffer cbuff = {NULL, 0, MAXELTSIZE};
    Rboolean isRawlist = FALSE;

    checkArity(op, args);
    if(isNull(x)) {  /* list locales */
#ifdef HAVE_ICONVLIST
	cnt = 0;
	iconvlist(count_one, NULL);
	PROTECT(ans = allocVector(STRSXP, cnt));
	cnt = 0;
	iconvlist(write_one, (void *)ans);
#else
	PROTECT(ans = R_NilValue);
#endif
    } else {
	int mark, toRaw;
	const char *from, *to;
	Rboolean isLatin1 = FALSE, isUTF8 = FALSE;

	args = CDR(args);
	if(!isString(CAR(args)) || length(CAR(args)) != 1)
	    error(_("invalid '%s' argument"), "from");
	from = CHAR(STRING_ELT(CAR(args), 0)); /* ASCII */
	args = CDR(args);
	if(!isString(CAR(args)) || length(CAR(args)) != 1)
	    error(_("invalid '%s' argument"), "to");
	to = CHAR(STRING_ELT(CAR(args), 0));
	args = CDR(args);
	if(!isString(CAR(args)) || length(CAR(args)) != 1)
	    error(_("invalid '%s' argument"), "sub");
	if(STRING_ELT(CAR(args), 0) == NA_STRING) sub = NULL;
	else sub = translateChar(STRING_ELT(CAR(args), 0));
	args = CDR(args);
	mark = asLogical(CAR(args));
	if(mark == NA_LOGICAL)
	    error(_("invalid '%s' argument"), "mark");	
	args = CDR(args);
	toRaw = asLogical(CAR(args));
	if(toRaw == NA_LOGICAL)
	    error(_("invalid '%s' argument"), "toRaw");	
	/* some iconv's allow "UTF8", but libiconv does not */
	if(streql(from, "UTF8") || streql(from, "utf8") ) from = "UTF-8";
	if(streql(to, "UTF8") || streql(to, "utf8") ) to = "UTF-8";
	/* Should we do something about marked CHARSXPs in 'from = ""'? */
	if(streql(to, "UTF-8")) isUTF8 = TRUE;
	if(streql(to, "latin1") || streql(to, "ISO_8859-1")
	    || streql(to, "CP1252")) isLatin1 = TRUE;
	if(streql(to, "") && known_to_be_latin1) isLatin1 = TRUE;
	if(streql(to, "") && known_to_be_utf8) isUTF8 = TRUE;
	obj = Riconv_open(to, from);
	if(obj == (iconv_t)(-1))
#ifdef Win32
	    error(_("unsupported conversion from '%s' to '%s' in codepage %d"), 
		  from, to, localeCP);
#else
	    error(_("unsupported conversion from '%s' to '%s'"), from, to);
#endif
	isRawlist = (TYPEOF(x) == VECSXP);
	if(isRawlist) {
	    if(toRaw)
		PROTECT(ans = duplicate(x));
	    else {
		PROTECT(ans = allocVector(STRSXP, LENGTH(x)));
		DUPLICATE_ATTRIB(ans, x);
	    }
	} else {   
	    if(TYPEOF(x) != STRSXP)
		error(_("'x' must be a character vector"));
	    if(toRaw) {
		PROTECT(ans = allocVector(VECSXP, LENGTH(x)));
		DUPLICATE_ATTRIB(ans, x);
	    } else 
		PROTECT(ans = duplicate(x));
	}
	R_AllocStringBuffer(0, &cbuff);  /* 0 -> default */
	for(R_xlen_t i = 0; i < XLENGTH(x); i++) {
	    if (isRawlist) {
		si = VECTOR_ELT(x, i);
		if (TYPEOF(si) == NILSXP) {
		    if (!toRaw) SET_STRING_ELT(ans, i, NA_STRING);
		    continue;
		} else if (TYPEOF(si) != RAWSXP)
		    error(_("'x' must be a list of NULL or raw vectors"));
	    } else {
		si = STRING_ELT(x, i);
		if (si == NA_STRING) {
		    if(!toRaw) SET_STRING_ELT(ans, i, NA_STRING);
		    continue;
		}
	    }
	top_of_loop:
	    inbuf = isRawlist ? (const char *) RAW(si) : CHAR(si); 
	    inb = LENGTH(si);
	    outbuf = cbuff.data; outb = cbuff.bufsize - 1;
	    /* First initialize output */
	    Riconv (obj, NULL, NULL, &outbuf, &outb);
	next_char:
	    /* Then convert input  */
	    res = Riconv(obj, &inbuf , &inb, &outbuf, &outb);
	    *outbuf = '\0';
	    /* other possible error conditions are incomplete
	       and invalid multibyte chars */
	    if(res == -1 && errno == E2BIG) {
		R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
		goto top_of_loop;
	    } else if(res == -1 && sub && 
		      (errno == EILSEQ || errno == EINVAL)) {
		/* it seems this gets thrown for non-convertible input too */
		if(strcmp(sub, "byte") == 0) {
		    if(outb < 5) {
			R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
			goto top_of_loop;
		    }
		    snprintf(outbuf, 5, "<%02x>", (unsigned char)*inbuf);
		    outbuf += 4; outb -= 4;
		} else {
		    size_t j;
		    if(outb < strlen(sub)) {
			R_AllocStringBuffer(2*cbuff.bufsize, &cbuff);
			goto top_of_loop;
		    }
		    memcpy(outbuf, sub, j = strlen(sub));
		    outbuf += j;
		    outb -= j;
		}
		inbuf++; inb--;
		goto next_char;
	    }

	    if(toRaw) {
		if(res != -1 && inb == 0) {
		    size_t nout = cbuff.bufsize - 1 - outb;
		    SEXP el = allocVector(RAWSXP, nout);
		    memcpy(RAW(el), cbuff.data, nout);
		    SET_VECTOR_ELT(ans, i, el);
		} /* otherwise is already NULL */
	    } else {
		if(res != -1 && inb == 0) {
		    cetype_t ienc = CE_NATIVE;
		    
		    size_t nout = cbuff.bufsize - 1 - outb;
		    if(mark) {
			if(isLatin1) ienc = CE_LATIN1;
			else if(isUTF8) ienc = CE_UTF8;
		    }
		    SET_STRING_ELT(ans, i, 
				   mkCharLenCE(cbuff.data, (int) nout, ienc));
		} else SET_STRING_ELT(ans, i, NA_STRING);
	    }
	}
	Riconv_close(obj);
	R_FreeStringBuffer(&cbuff);
    }
    UNPROTECT(1);
    return ans;
}
Exemplo n.º 10
0
SEXP StringValue(void *input, int len)
{
    Rboolean mbcslocale = TRUE;
    SEXP yylval = R_NilValue;
    int pos = 0;
    int c = xxgetc();
    int quote = c;
    char currtext[1010], *ct = currtext;
    char st0[MAXELTSIZE];
    unsigned int nstext = MAXELTSIZE;
    char *stext = st0, *bp = st0;
    int wcnt = 0;
    ucs_t wcs[10001];
    Rboolean oct_or_hex = FALSE, use_wcs = FALSE;

    while (pos < len && (c = xxgetc()) != R_EOF && c != quote) {
	CTEXT_PUSH(c);
	if (c == '\n') {
	    xxungetc(c);
	    /* Fix suggested by Mark Bravington to allow multiline strings
	     * by pretending we've seen a backslash. Was:
	     * return ERROR;
	     */
	    c = '\\';
	}
	if (c == '\\') {
	    c = xxgetc(); CTEXT_PUSH(c);
	    if ('0' <= c && c <= '8') {
		int octal = c - '0';
		if ('0' <= (c = xxgetc()) && c <= '8') {
		    CTEXT_PUSH(c);
		    octal = 8 * octal + c - '0';
		    if ('0' <= (c = xxgetc()) && c <= '8') {
			CTEXT_PUSH(c);
			octal = 8 * octal + c - '0';
		    } else {
			xxungetc(c);
			CTEXT_POP();
		    }
		} else {
		    xxungetc(c);
		    CTEXT_POP();
		}
		c = octal;
		oct_or_hex = TRUE;
	    }
	    else if(c == 'x') {
		int val = 0; int i, ext;
		for(i = 0; i < 2; i++) {
		    c = xxgetc(); CTEXT_PUSH(c);
		    if(c >= '0' && c <= '9') ext = c - '0';
		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
		    else {
			xxungetc(c);
			CTEXT_POP();
			if (i == 0) { /* was just \x */
			    *ct = '\0';
			    PROBLEM "'\\x' used without hex digits in character string starting \"%s\"", currtext
                            ERROR;
			}
			break;
		    }
		    val = 16*val + ext;
		}
		c = val;
		oct_or_hex = TRUE;
	    }
	    else if(c == 'u') {
		unsigned int val = 0; int i, ext; 
		Rboolean delim = FALSE;


		if((c = xxgetc()) == '{') {
		    delim = TRUE;
		    CTEXT_PUSH(c);
		} else xxungetc(c);
		for(i = 0; i < 4; i++) {
		    c = xxgetc(); CTEXT_PUSH(c);
		    if(c >= '0' && c <= '9') ext = c - '0';
		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
		    else {
			xxungetc(c);
			CTEXT_POP();
			if (i == 0) { /* was just \u */
			    *ct = '\0';
			    PROBLEM "'\\u' used without hex digits in character string starting \"%s\"", currtext
				ERROR;
			}
			break;
		    }
		    val = 16*val + ext;
		}
		if(delim) {
		    if((c = xxgetc()) != '}') {
			PROBLEM "invalid \\u{xxxx} sequence"
		        ERROR;
		    } else CTEXT_PUSH(c);
		}
		WTEXT_PUSH(val); /* this assumes wchar_t is Unicode */
		use_wcs = TRUE;
		continue;
	    }
	    else if(c == 'U') {
		unsigned int val = 0; int i, ext;
		Rboolean delim = FALSE;

		if((c = xxgetc()) == '{') {
		    delim = TRUE;
		    CTEXT_PUSH(c);
		} else xxungetc(c);
		for(i = 0; i < 8; i++) {
		    c = xxgetc(); CTEXT_PUSH(c);
		    if(c >= '0' && c <= '9') ext = c - '0';
		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
		    else {
			xxungetc(c);
			CTEXT_POP();
			if (i == 0) { /* was just \U */
			    *ct = '\0';
			    PROBLEM  "'\\U' used without hex digits in character string starting \"%s\"", currtext 
				ERROR;
			}
			break;
		    }
		    val = 16*val + ext;
		}
		if(delim) {
		    if((c = xxgetc()) != '}') {
			PROBLEM "invalid \\U{xxxxxxxx} sequence"
			    ERROR;
		    } else CTEXT_PUSH(c);
		}
		WTEXT_PUSH(val);
		use_wcs = TRUE;
		continue;
	    }
	    else {
		switch (c) {
		case 'a':
		    c = '\a';
		    break;
		case 'b':
		    c = '\b';
		    break;
		case 'f':
		    c = '\f';
		    break;
		case 'n':
		    c = '\n';
		    break;
		case 'r':
		    c = '\r';
		    break;
		case 't':
		    c = '\t';
		    break;
		case 'v':
		    c = '\v';
		    break;
		case '\\':
		    c = '\\';
		    break;
		case '"':
		case '\'':
		case ' ':
		case '\n':
		    break;
		default:
		    *ct = '\0';
		    PROBLEM "'\\%c' is an unrecognized escape in character string starting \"%s\"", c, currtext
												 ERROR;
		}
	    }
	}
    }

    STEXT_PUSH(c);
    if ((unsigned int) c < 0x80) WTEXT_PUSH(c);

    STEXT_PUSH('\0');
    WTEXT_PUSH(0);

    yylval = mkCharLenCE(wcs, wcnt, CE_UTF8); /* include terminator */
    if(stext != st0) free(stext);
    return(yylval);
}