Exemplo n.º 1
0
/* Sets fields of typeInfo, ruling out possible types based on s.
 *
 * The typeInfo struct should be initialized with all fields TRUE.
 */
static void ruleout_types(const char *s, Typecvt_Info *typeInfo, LocalData *data)
{
    int res;
    char *endp;

    if (typeInfo->islogical) {
	if (strcmp(s, "F") == 0 || strcmp(s, "FALSE") == 0
	    || strcmp(s, "T") == 0 || strcmp(s, "TRUE") == 0) {
	    typeInfo->isinteger = FALSE;
	    typeInfo->isreal = FALSE;
	    typeInfo->iscomplex = FALSE;
	} else {
	    typeInfo->islogical = TRUE;
	}
    }

    if (typeInfo->isinteger) {
	res = Strtoi(s, 10);
	if (res == NA_INTEGER)
	    typeInfo->isinteger = FALSE;
    }

    if (typeInfo->isreal) {
	Strtod(s, &endp, TRUE, data);
	if (!isBlankString(endp))
	    typeInfo->isreal = FALSE;
    }

    if (typeInfo->iscomplex) {
	strtoc(s, &endp, TRUE, data);
	if (!isBlankString(endp))
	    typeInfo->iscomplex = FALSE;
    }
}
Exemplo n.º 2
0
SEXP df_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol,
            SEXP sWhat, SEXP sColNames, SEXP sSkip, SEXP sNlines, SEXP sQuote) {
    char sep;
    int nsep, use_ncol, resilient, ncol;
    long i, j, k, m, len, nmsep_flag, skip, quoteLen;
    unsigned long nrow;
    char num_buf[48];
    const char *c, *c2, *sraw = 0, *send = 0, *quoteChars;
    long nlines = asLong(sNlines, -1);

    SEXP sOutput, tmp, sOutputNames, st, clv;

    /* Parse inputs */
    sep = CHAR(STRING_ELT(sSep, 0))[0];
    nsep = (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0) ? ((int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0))) : -1;

    nmsep_flag = (nsep > 0);
    use_ncol = asInteger(sNcol);
    resilient = asInteger(sResilient);
    ncol = use_ncol; /* NOTE: "character" is prepended by the R code if nmsep is TRUE,
                        so ncol *does* include the key column */
    skip = asLong(sSkip, 0);

    /* parse quote information */
    quoteChars = CHAR(STRING_ELT(sQuote, 0));
    quoteLen = strlen(quoteChars);

    /* count non-NA columns */
    for (i = 0; i < use_ncol; i++)
	if (TYPEOF(VECTOR_ELT(sWhat,i)) == NILSXP) ncol--;

    /* check input */
    if (TYPEOF(s) == RAWSXP) {
	nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s);
	sraw = (const char*) RAW(s);
	send = sraw + XLENGTH(s);
	if (nrow >= skip) {
	    unsigned long slen = XLENGTH(s);
	    nrow = nrow - skip;
	    i = 0;
	    while (i < skip && (sraw = memchr(sraw, '\n', slen))) { sraw++; i++; }
	} else {
	    nrow = 0;
	    sraw = send;
	}
    } else if (TYPEOF(s) == STRSXP) {
	nrow = XLENGTH(s);
	if (nrow >= skip) {
	    nrow -= skip;
	} else {
	    skip = nrow;
	    nrow = 0;
	}
    } else
	Rf_error("invalid input to split - must be a raw or character vector");

    if (nlines >= 0 && nrow > nlines) nrow = nlines;

    /* allocate result */
    PROTECT(sOutput = allocVector(VECSXP, ncol));

    /* set names */
    setAttrib(sOutput, R_NamesSymbol, sOutputNames = allocVector(STRSXP, ncol));

    if (nrow > INT_MAX)
	Rf_warning("R currently doesn't support large data frames, but we have %lu rows, returning a named list instead", nrow);
    else {
	/* set automatic row names */
	PROTECT(tmp = allocVector(INTSXP, 2));
	INTEGER(tmp)[0] = NA_INTEGER;
	INTEGER(tmp)[1] = -nrow;
	setAttrib(sOutput, R_RowNamesSymbol, tmp);
	UNPROTECT(1);

	/* set class */
	classgets(sOutput, mkString("data.frame"));
    }

    /* Create SEXP for each element of the output */
    j = 0;
    for (i = 0; i < use_ncol; i++) {
      if (TYPEOF(VECTOR_ELT(sWhat,i)) != NILSXP) /* copy col.name */
        SET_STRING_ELT(sOutputNames, j, STRING_ELT(sColNames, i));

      switch (TYPEOF(VECTOR_ELT(sWhat,i))) {
      case LGLSXP:
      case INTSXP:
      case REALSXP:
      case CPLXSXP:
      case STRSXP:
      case RAWSXP:
        SET_VECTOR_ELT(sOutput, j++, allocVector(TYPEOF(VECTOR_ELT(sWhat,i)), nrow));
        break;

      case VECSXP:
        SET_VECTOR_ELT(sOutput, j++, st = allocVector(REALSXP, nrow));
        clv = PROTECT(allocVector(STRSXP, 2));
        SET_STRING_ELT(clv, 0, mkChar("POSIXct"));
        SET_STRING_ELT(clv, 1, mkChar("POSIXt"));
        setAttrib(st, R_ClassSymbol, clv);
        /* this is somewhat a security precaution such that users
           don't get surprised -- if there is no TZ R will
           render it in local time - which is correct but
           may confuse people that didn't use GMT to start with */
        setAttrib(st, install("tzone"), mkString("GMT"));
        UNPROTECT(1);
        break;

      case NILSXP:
        break;

      default:
        Rf_error("Unsupported input to what %u.", TYPEOF(VECTOR_ELT(sWhat,i)));
        break;
      }
    }

    /* Cycle through the rows and extract the data */
    for (k = 0; k < nrow; k++) {
      const char *l = 0, *le;
      if (TYPEOF(s) == RAWSXP) {
          l = sraw;
          le = memchr(l, '\n', send - l);
          if (!le) le = send;
          sraw = le + 1;
          if (*(le - 1) == '\r' ) le--; /* account for DOS-style '\r\n' */
      } else {
          l = CHAR(STRING_ELT(s, k + skip));
          le = l + strlen(l); /* probably lame, but using strings is way inefficient anyway ;) */
      }
      if (nmsep_flag) {
          c = memchr(l, nsep, le - l);
          if (c) {
            SET_STRING_ELT(VECTOR_ELT(sOutput, 0), k, Rf_mkCharLen(l, c - l));
            l = c + 1;
          } else
            SET_STRING_ELT(VECTOR_ELT(sOutput, 0), k, R_BlankString);
      }

      i = nmsep_flag;
      j = nmsep_flag;
      while (l < le) {
        if (!(c = memchr(l, sep, le - l)))
          c = le;

        if (i >= use_ncol) {
          if (resilient) break;
          Rf_error("line %lu: too many input columns (expected %u)", k, use_ncol);
        }

        switch(TYPEOF(VECTOR_ELT(sWhat,i))) { // NOTE: no matching case for NILSXP
        case LGLSXP:
          len = (int) (c - l);
          if (len > sizeof(num_buf) - 1)
              len = sizeof(num_buf) - 1;
          memcpy(num_buf, l, len);
          num_buf[len] = 0;
          int tr = StringTrue(num_buf), fa = StringFalse(num_buf);
          LOGICAL(VECTOR_ELT(sOutput, j))[k] = (tr || fa) ? tr : NA_INTEGER;
          j++;
          break;

        case INTSXP:
          len = (int) (c - l);
          /* watch for overflow and truncate -- should we warn? */
          if (len > sizeof(num_buf) - 1)
              len = sizeof(num_buf) - 1;
          memcpy(num_buf, l, len);
          num_buf[len] = 0;
          INTEGER(VECTOR_ELT(sOutput, j))[k] = Strtoi(num_buf, 10);
          j++;
          break;

        case REALSXP:
          len = (int) (c - l);
          /* watch for overflow and truncate -- should we warn? */
          if (len > sizeof(num_buf) - 1)
              len = sizeof(num_buf) - 1;
          memcpy(num_buf, l, len);
          num_buf[len] = 0;
          REAL(VECTOR_ELT(sOutput, j))[k] = R_atof(num_buf);
          j++;
          break;

        case CPLXSXP:
          len = (int) (c - l);
          /* watch for overflow and truncate -- should we warn? */
          if (len > sizeof(num_buf) - 1)
              len = sizeof(num_buf) - 1;
          memcpy(num_buf, l, len);
          num_buf[len] = 0;
          COMPLEX(VECTOR_ELT(sOutput, j))[k] = strtoc(num_buf, TRUE);
          j++;
          break;

        case STRSXP:
          c2 = c;
          if (quoteLen) {
            for (m = 0; m < quoteLen; m++) {
              if (*l == quoteChars[m]) {
                l++;
                if (!(c2 = memchr(l, quoteChars[m], le - l))) {
                  Rf_error("End of line within quoted string.");
                } else {
                  if (!(c = memchr(c2, (unsigned char) sep, le - c2)))
                    c = le;
                }
              }
            }
          }
          SET_STRING_ELT(VECTOR_ELT(sOutput, j), k, Rf_mkCharLen(l, c2 - l));
          j++;
          break;

        case RAWSXP:
          len = (int) (c - l);
          /* watch for overflow and truncate -- should we warn? */
          if (len > sizeof(num_buf) - 1)
              len = sizeof(num_buf) - 1;
          memcpy(num_buf, l, len);
          num_buf[len] = 0;
          RAW(VECTOR_ELT(sOutput, j))[k] = strtoraw(num_buf);
          j++;
          break;

        case VECSXP:
          REAL(VECTOR_ELT(sOutput, j))[k] = parse_ts(l, c);
          j++;
        }

        l = c + 1;
        i++;
      }

      /* fill-up unused columns */
      while (i < use_ncol) {
          switch (TYPEOF(VECTOR_ELT(sWhat,i))) { // NOTE: no matching case for NILSXP
          case LGLSXP:
            LOGICAL(VECTOR_ELT(sOutput, j++))[k] = NA_INTEGER;
            break;

          case INTSXP:
            INTEGER(VECTOR_ELT(sOutput, j++))[k] = NA_INTEGER;
            break;

          case REALSXP:
          case VECSXP:
            REAL(VECTOR_ELT(sOutput, j++))[k] = NA_REAL;
            break;

          case CPLXSXP:
            COMPLEX(VECTOR_ELT(sOutput, j))[k].r = NA_REAL;
            COMPLEX(VECTOR_ELT(sOutput, j++))[k].i = NA_REAL;
            break;

          case STRSXP:
            SET_STRING_ELT(VECTOR_ELT(sOutput, j++), k, R_NaString);
            break;

          case RAWSXP:
            RAW(VECTOR_ELT(sOutput, j))[k] = (Rbyte) 0;
            break;
          }
          i++;
      }
    }

    UNPROTECT(1); /* sOutput */
    return(sOutput);
}
Exemplo n.º 3
0
SEXP mat_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol,
               SEXP sWhat, SEXP sSkip, SEXP sNlines, SEXP sQuote) {
  unsigned int ncol = 1, np = 0, resilient = asInteger(sResilient);
  unsigned long nrow, i, k, N, len, quoteLen;
  int use_ncol = asInteger(sNcol);
  int nsep = -1;
  long skip = asLong(sSkip, 0);
  long nlines = asLong(sNlines, -1);
  SEXP res, rnam;
  char sep;
  char num_buf[48];
  /* sraw/send is only used for raw vector parsing, but we have to set it to 0
     to make gcc happy which cannot figure out that it's actually unused */
  const char *c, *c2, *sraw = 0, *send = 0, *l, *le, *quoteChars;

  /* parse sep input */
  if (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0)
    nsep = (int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0));
  if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) < 1)
    Rf_error("invalid separator");
  sep = CHAR(STRING_ELT(sSep, 0))[0];

  /* parse quote information */
  quoteChars = CHAR(STRING_ELT(sQuote, 0));
  quoteLen = strlen(quoteChars);

  /* check the input data */
  if (TYPEOF(s) == RAWSXP) {
    nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s);
    sraw = (const char*) RAW(s);
    send = sraw + XLENGTH(s);
    if (nrow >= skip) {
	unsigned long slen = XLENGTH(s);
	nrow = nrow - skip;
	i = 0;
	while (i < skip && (sraw = memchr(sraw, '\n', slen))) { sraw++; i++; }
    } else {
	nrow = 0;
	sraw = send;
    }
  } else if (TYPEOF(s) == STRSXP) {
    nrow = LENGTH(s);
    if (nrow >= skip) {
      nrow -= skip;
    } else {
      skip = nrow;
      nrow = 0;
    }
  } else {
    Rf_error("invalid input to split - must be a raw or character vector");
  }
  if (nlines >= 0 && nrow > nlines) nrow = nlines;

  /* If no rows left, return an empty matrix */
  if (!nrow) {
    if (np) UNPROTECT(np);
    return allocMatrix(TYPEOF(sWhat), 0, 0);
  }

  /* count number of columns */
  if (use_ncol < 1) {
    if (TYPEOF(s) == RAWSXP) {
      c = sraw;
      le = memchr(sraw, '\n', send - sraw);
      if (TYPEOF(sWhat) == STRSXP && quoteLen) {
        ncol = 0;
        while(1)
        {
          ncol++;
          for (k = 0; k < quoteLen; k++) {
            if (*c == quoteChars[k]) {
              c++;
              if (! (c = memchr(c, quoteChars[k], le - c)))
                Rf_error("End of line within quote string on line 1; cannot determine num columns!");
              break; /* note: breaks inner 'for' loop, not 'if' statement */
            }
          }
          if (!(c = (memchr(c, (unsigned char) sep, le - c))))
            break;
          c++;
        }
      } else {
        ncol = 1;
        while ((c = memchr(c, (unsigned char) sep, le - c))) { ncol++; c++; }
      }
    } else {
      c = CHAR(STRING_ELT(s, 0));
      if (TYPEOF(sWhat) == STRSXP && quoteLen) {
        ncol = 0;
        while(1)
        {
          ncol++;
          for (k = 0; k < quoteLen; k++) {
            if (*c == quoteChars[k]) {
              c++;
              if (!(c = strchr(c, quoteChars[k])))
                Rf_error("End of line within quote string on line 1; cannot determine num columns!");
              break; /* note: breaks inner 'for' loop, not 'if' statement */
            }
          }
          if (!(c = (strchr(c, sep))))
            break;
          c++;
        }
      } else {
        while ((c = strchr(c, sep))) { ncol++; c++; }
      }
    }
    /* if sep and nsep are the same then the first "column" is the key and not the column */
    if (nsep == (int) (unsigned char) sep) ncol--;
  } else ncol = use_ncol;

  /* allocate space for the result */
  N = ncol * nrow;
  switch(TYPEOF(sWhat)) {
    case LGLSXP:
    case INTSXP:
    case REALSXP:
    case CPLXSXP:
    case STRSXP:
    case RAWSXP:
      res = PROTECT(allocMatrix(TYPEOF(sWhat), nrow, ncol));
      break;

    default:
      Rf_error("Unsupported input to what.");
      break;
  }
  if (nsep >= 0) {
    SEXP dn;
    setAttrib(res, R_DimNamesSymbol, (dn = allocVector(VECSXP, 2)));
    SET_VECTOR_ELT(dn, 0, (rnam = allocVector(STRSXP, nrow)));
  }
  np++;

  /* cycle over the rows and parse the data */
  for (i = 0; i < nrow; i++) {
    int j = i;

    /* find the row of data */
    if (TYPEOF(s) == RAWSXP) {
        l = sraw;
        le = memchr(l, '\n', send - l);
        if (!le) le = send;
        sraw = le + 1;
        if (*(le - 1) == '\r' ) le--; /* account for DOS-style '\r\n' */
    } else {
        l = CHAR(STRING_ELT(s, i + skip));
        le = l + strlen(l);
    }

    /* if nsep, load rowname */
    if (nsep >= 0) {
      c = memchr(l, nsep, le - l);
      if (c) {
        SET_STRING_ELT(rnam, i, Rf_mkCharLen(l, c - l));
        l = c + 1;
      } else
        SET_STRING_ELT(rnam, i, R_BlankString);
    }

    /* now split the row into elements */
    while (l < le) {
      if (!(c = memchr(l, sep, le - l)))
        c = le;

      if (j >= N) {
        if (resilient) break;
        Rf_error("line %lu: too many columns (expected %u)", (unsigned long)(i + 1), ncol);
      }

      switch(TYPEOF(sWhat)) {
      case LGLSXP:
        len = (int) (c - l);
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        int tr = StringTrue(num_buf), fa = StringFalse(num_buf);
        LOGICAL(res)[j] = (tr || fa) ? tr : NA_INTEGER;
        break;

      case INTSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        INTEGER(res)[j] = Strtoi(num_buf, 10);
        break;

      case REALSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        REAL(res)[j] = R_atof(num_buf);
        break;

      case CPLXSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        COMPLEX(res)[j] = strtoc(num_buf, TRUE);
        break;

      case STRSXP:
        c2 = c;
        if (quoteLen) {
          for (k = 0; k < quoteLen; k++) {
            if (*l == quoteChars[k]) {
              l++;
              if (!(c2 = memchr(l, quoteChars[k], le - l))) {
                Rf_warning("End of line within quoted string!");
                c = c2 = le;
              } else {
                if (!(c = memchr(c2, (unsigned char) sep, le - c2)))
                  c = le;
              }
            }
          }
        }
        SET_STRING_ELT(res, j, Rf_mkCharLen(l, c2 - l));
        break;

      case RAWSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        RAW(res)[j] = strtoraw(num_buf);
        break;
      }
      l = c + 1;
      j += nrow;
    }

    /* fill up unused columns with NAs */
    while (j < N) {
      switch (TYPEOF(sWhat)) {
      case LGLSXP:
        LOGICAL(res)[j] = NA_INTEGER;
        break;

      case INTSXP:
        INTEGER(res)[j] = NA_INTEGER;
        break;

      case REALSXP:
        REAL(res)[j] = NA_REAL;
        break;

      case CPLXSXP:
        COMPLEX(res)[j].r = NA_REAL;
        COMPLEX(res)[j].i = NA_REAL;
        break;

      case STRSXP:
        SET_STRING_ELT(res, j, R_NaString);
        break;

      case RAWSXP:
        RAW(res)[j] = (Rbyte) 0;
        break;
      }
      j += nrow;
    }
  }

  UNPROTECT(np);
  return res;
}
Exemplo n.º 4
0
SEXP typeconvert(SEXP call, SEXP op, SEXP args, SEXP env)
{
    SEXP cvec, a, dup, levs, dims, names, dec;
    SEXP rval = R_NilValue; /* -Wall */
    int i, j, len, asIs;
    Rboolean done = FALSE;
    char *endp;
    const char *tmp = NULL;
    LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE,
		      FALSE, 0, FALSE, FALSE};
    Typecvt_Info typeInfo;      /* keep track of possible types of cvec */
    typeInfo.islogical = TRUE;  /* we can't rule anything out initially */
    typeInfo.isinteger = TRUE;
    typeInfo.isreal = TRUE;
    typeInfo.iscomplex = TRUE;
    data.NAstrings = R_NilValue;

    args = CDR(args);

    if (!isString(CAR(args)))
	error(_("the first argument must be of mode character"));

    data.NAstrings = CADR(args);
    if (TYPEOF(data.NAstrings) != STRSXP)
	error(_("invalid '%s' argument"), "na.strings");

    asIs = asLogical(CADDR(args));
    if (asIs == NA_LOGICAL) asIs = 0;

    dec = CADDDR(args);

    if (isString(dec) || isNull(dec)) {
	if (length(dec) == 0)
	    data.decchar = '.';
	else
	    data.decchar = translateChar(STRING_ELT(dec, 0))[0];
    }

    cvec = CAR(args);
    len = length(cvec);

    /* save the dim/dimnames attributes */

    PROTECT(dims = getAttrib(cvec, R_DimSymbol));
    if (isArray(cvec))
	PROTECT(names = getAttrib(cvec, R_DimNamesSymbol));
    else
	PROTECT(names = getAttrib(cvec, R_NamesSymbol));

    /* Use the first non-NA to screen */
    for (i = 0; i < len; i++) {
	tmp = CHAR(STRING_ELT(cvec, i));
	if (!(STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0
	      || isNAstring(tmp, 1, &data) || isBlankString(tmp)))
	    break;
    }
    if (i < len) {  /* not all entries are NA */
	ruleout_types(tmp, &typeInfo, &data);
    }

    if (typeInfo.islogical) {
	PROTECT(rval = allocVector(LGLSXP, len));
	for (i = 0; i < len; i++) {
	    tmp = CHAR(STRING_ELT(cvec, i));
	    if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0
		|| isNAstring(tmp, 1, &data) || isBlankString(tmp))
		LOGICAL(rval)[i] = NA_LOGICAL;
	    else {
		if (strcmp(tmp, "F") == 0 || strcmp(tmp, "FALSE") == 0)
		    LOGICAL(rval)[i] = 0;
		else if(strcmp(tmp, "T") == 0 || strcmp(tmp, "TRUE") == 0)
		    LOGICAL(rval)[i] = 1;
		else {
		    typeInfo.islogical = FALSE;
		    ruleout_types(tmp, &typeInfo, &data);
		    break;
		}
	    }
	}
	if (typeInfo.islogical) done = TRUE; else UNPROTECT(1);
    }

    if (!done && typeInfo.isinteger) {
	PROTECT(rval = allocVector(INTSXP, len));
	for (i = 0; i < len; i++) {
	    tmp = CHAR(STRING_ELT(cvec, i));
	    if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0
		|| isNAstring(tmp, 1, &data) || isBlankString(tmp))
		INTEGER(rval)[i] = NA_INTEGER;
	    else {
		INTEGER(rval)[i] = Strtoi(tmp, 10);
		if (INTEGER(rval)[i] == NA_INTEGER) {
		    typeInfo.isinteger = FALSE;
		    ruleout_types(tmp, &typeInfo, &data);
		    break;
		}
	    }
	}
	if(typeInfo.isinteger) done = TRUE; else UNPROTECT(1);
    }

    if (!done && typeInfo.isreal) {
	PROTECT(rval = allocVector(REALSXP, len));
	for (i = 0; i < len; i++) {
	    tmp = CHAR(STRING_ELT(cvec, i));
	    if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0
		|| isNAstring(tmp, 1, &data) || isBlankString(tmp))
		REAL(rval)[i] = NA_REAL;
	    else {
		REAL(rval)[i] = Strtod(tmp, &endp, FALSE, &data);
		if (!isBlankString(endp)) {
		    typeInfo.isreal = FALSE;
		    ruleout_types(tmp, &typeInfo, &data);
		    break;
		}
	    }
	}
	if(typeInfo.isreal) done = TRUE; else UNPROTECT(1);
    }

    if (!done && typeInfo.iscomplex) {
	PROTECT(rval = allocVector(CPLXSXP, len));
	for (i = 0; i < len; i++) {
	    tmp = CHAR(STRING_ELT(cvec, i));
	    if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0
		|| isNAstring(tmp, 1, &data) || isBlankString(tmp))
		COMPLEX(rval)[i].r = COMPLEX(rval)[i].i = NA_REAL;
	    else {
		COMPLEX(rval)[i] = strtoc(tmp, &endp, FALSE, &data);
		if (!isBlankString(endp)) {
		    typeInfo.iscomplex = FALSE;
		    /* this is not needed, unless other cases are added */
		    ruleout_types(tmp, &typeInfo, &data);
		    break;
		}
	    }
	}
	if(typeInfo.iscomplex) done = TRUE; else UNPROTECT(1);
    }

    if (!done) {
	if (asIs) {
	    PROTECT(rval = duplicate(cvec));
	    for (i = 0; i < len; i++)
		if(isNAstring(CHAR(STRING_ELT(rval, i)), 1, &data))
		    SET_STRING_ELT(rval, i, NA_STRING);
	}
	else {
	    PROTECT(dup = duplicated(cvec, FALSE));
	    j = 0;
	    for (i = 0; i < len; i++) {
		/* <NA> is never to be a level here */
		if (STRING_ELT(cvec, i) == NA_STRING) continue;
		if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data))
		    j++;
	    }

	    PROTECT(levs = allocVector(STRSXP,j));
	    j = 0;
	    for (i = 0; i < len; i++) {
		if (STRING_ELT(cvec, i) == NA_STRING) continue;
		if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data))
		    SET_STRING_ELT(levs, j++, STRING_ELT(cvec, i));
	    }

	    /* We avoid an allocation by reusing dup,
	     * a LGLSXP of the right length
	     */
	    rval = dup;
	    SET_TYPEOF(rval, INTSXP);

	    /* put the levels in lexicographic order */

	    sortVector(levs, FALSE);

	    PROTECT(a = matchE(levs, cvec, NA_INTEGER, env));
	    for (i = 0; i < len; i++)
		INTEGER(rval)[i] = INTEGER(a)[i];

	    setAttrib(rval, R_LevelsSymbol, levs);
	    PROTECT(a = mkString("factor"));
	    setAttrib(rval, R_ClassSymbol, a);
	    UNPROTECT(3);
	}
    }

    setAttrib(rval, R_DimSymbol, dims);
    setAttrib(rval, isArray(cvec) ? R_DimNamesSymbol : R_NamesSymbol, names);
    UNPROTECT(3);
    return rval;
}
Exemplo n.º 5
0
static void extractItem(char *buffer, SEXP ans, int i, LocalData *d)
{
    char *endp;
    switch(TYPEOF(ans)) {
    case NILSXP:
	break;
    case LGLSXP:
	if (isNAstring(buffer, 0, d))
	    LOGICAL(ans)[i] = NA_INTEGER;
	else {
	    int tr = StringTrue(buffer), fa = StringFalse(buffer);
	    if(tr || fa) LOGICAL(ans)[i] = tr;
	    else expected("a logical", buffer, d);
	}
	break;
    case INTSXP:
	if (isNAstring(buffer, 0, d))
	    INTEGER(ans)[i] = NA_INTEGER;
	else {
	    INTEGER(ans)[i] = Strtoi(buffer, 10);
	    if (INTEGER(ans)[i] == NA_INTEGER)
		expected("an integer", buffer, d);
	}
	break;
    case REALSXP:
	if (isNAstring(buffer, 0, d))
	    REAL(ans)[i] = NA_REAL;
	else {
	    REAL(ans)[i] = Strtod(buffer, &endp, TRUE, d);
	    if (!isBlankString(endp))
		expected("a real", buffer, d);
	}
	break;
    case CPLXSXP:
	if (isNAstring(buffer, 0, d))
	    COMPLEX(ans)[i].r = COMPLEX(ans)[i].i = NA_REAL;
	else {
	    COMPLEX(ans)[i] = strtoc(buffer, &endp, TRUE, d);
	    if (!isBlankString(endp))
		expected("a complex", buffer, d);
	}
	break;
    case STRSXP:
	if (isNAstring(buffer, 1, d))
	    SET_STRING_ELT(ans, i, NA_STRING);
	else
	    SET_STRING_ELT(ans, i, insertString(buffer, d));
	break;
    case RAWSXP:
	if (isNAstring(buffer, 0, d))
	    RAW(ans)[i] = 0;
	else {
	    RAW(ans)[i] = strtoraw(buffer, &endp);
	    if (!isBlankString(endp))
		expected("a raw", buffer, d);
	}
	break;
    default:
	UNIMPLEMENTED_TYPE("extractItem", ans);
    }
}
Exemplo n.º 6
0
SEXP mat_split(SEXP s, SEXP sSep, SEXP sNamesSep, SEXP sResilient, SEXP sNcol,
               SEXP sWhat, SEXP sSkip, SEXP sNlines) {
  unsigned int ncol = 1, nrow, np = 0, i, N, resilient = asInteger(sResilient);
  int use_ncol = asInteger(sNcol);
  int nsep = -1;
  int skip = INTEGER(sSkip)[0];
  int nlines = INTEGER(sNlines)[0];
  int len;
  SEXP res, rnam, zerochar = 0;
  char sep;
  char num_buf[48];
  double * res_ptr;
  const char *c, *sraw, *send, *l, *le;;

  /* parse sep input */
  if (TYPEOF(sNamesSep) == STRSXP && LENGTH(sNamesSep) > 0)
    nsep = (int) (unsigned char) *CHAR(STRING_ELT(sNamesSep, 0));
  if (TYPEOF(sSep) != STRSXP || LENGTH(sSep) < 1)
    Rf_error("invalid separator");
  sep = CHAR(STRING_ELT(sSep, 0))[0];

  /* check the input data */
  if (TYPEOF(s) == RAWSXP) {
    nrow = (nlines >= 0) ? count_lines_bounded(s, nlines + skip) : count_lines(s);
    sraw = (const char*) RAW(s);
    send = sraw + XLENGTH(s);
    if (nrow >= skip) {
      nrow = nrow - skip;
      for (i = 0; i < skip; i++) sraw = memchr(sraw,'\n',XLENGTH(s)) + 1;
    } else {
      nrow = 0;
      sraw = send;
    }
  } else if (TYPEOF(s) == STRSXP) {
    nrow = LENGTH(s);
    if (nrow >= skip) {
      nrow -= skip;
    } else {
      skip = nrow;
      nrow = 0;
    }
  } else {
    Rf_error("invalid input to split - must be a raw or character vector");
  }
  if (nlines >= 0 && nrow > nlines) nrow = nlines;

  /* If no rows left, return an empty matrix */
  if (!nrow) {
    if (np) UNPROTECT(np);
    return allocMatrix(TYPEOF(sWhat), 0, 0);
  }

  /* count number of columns */
  if (use_ncol < 1) {
    if (TYPEOF(s) == RAWSXP) {
      ncol = 1;
      c = sraw;
      le = memchr(sraw, '\n', send - sraw);
      while ((c = memchr(c, (unsigned char) sep, le - c))) { ncol++; c++; }
    } else {
      c = CHAR(STRING_ELT(s, 0));
      while ((c = strchr(c, sep))) { ncol++; c++; }
      /* if sep and nsep are the same then the first "column" is the key and not the column */
      if (nsep == (int) (unsigned char) sep) ncol--;
    }
  } else ncol = use_ncol;

  /* allocate space for the result */
  N = ncol * nrow;
  switch(TYPEOF(sWhat)) {
    case LGLSXP:
    case INTSXP:
    case REALSXP:
    case CPLXSXP:
    case STRSXP:
    case RAWSXP:
      res = PROTECT(allocMatrix(TYPEOF(sWhat), nrow, ncol));
      break;

    default:
      Rf_error("Unsupported input to what.");
      break;
  }
  if (nsep >= 0) {
    SEXP dn;
    setAttrib(res, R_DimNamesSymbol, (dn = allocVector(VECSXP, 2)));
    SET_VECTOR_ELT(dn, 0, (rnam = allocVector(STRSXP, nrow)));
  }
  np++;

  /* cycle over the rows and parse the data */
  for (i = 0; i < nrow; i++) {
    int j = i;

    /* find the row of data */
    if (TYPEOF(s) == RAWSXP) {
        l = sraw;
        le = memchr(l, '\n', send - l);
        if (!le) le = send;
        sraw = le + 1;
    } else {
        l = CHAR(STRING_ELT(s, i + skip));
        le = l + strlen(l);
    }

    /* if nsep, load rowname */
    if (nsep >= 0) {
      c = memchr(l, nsep, le - l);
      if (c) {
        SET_STRING_ELT(rnam, i, Rf_mkCharLen(l, c - l));
        l = c + 1;
      } else
        SET_STRING_ELT(rnam, i, R_BlankString);
    }

    /* now split the row into elements */
    while (l < le) {
      if (!(c = memchr(l, sep, le - l)))
        c = le;

      if (j >= N) {
        if (resilient) break;
        Rf_error("line %lu: too many columns (expected %u)", (unsigned long)(i + 1), ncol);
      }

      switch(TYPEOF(sWhat)) {
      case LGLSXP:
        len = (int) (c - l);
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        int tr = StringTrue(num_buf), fa = StringFalse(num_buf);
        LOGICAL(res)[j] = (tr || fa) ? tr : NA_INTEGER;
        break;

      case INTSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        INTEGER(res)[j] = Strtoi(num_buf, 10);
        break;

      case REALSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        REAL(res)[j] = R_atof(num_buf);
        break;

      case CPLXSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        COMPLEX(res)[j] = strtoc(num_buf, TRUE);
        break;

      case STRSXP:
        SET_STRING_ELT(res, j, Rf_mkCharLen(l, c - l));
        break;

      case RAWSXP:
        len = (int) (c - l);
        /* watch for overflow and truncate -- should we warn? */
        if (len > sizeof(num_buf) - 1)
            len = sizeof(num_buf) - 1;
        memcpy(num_buf, l, len);
        num_buf[len] = 0;
        RAW(res)[j] = strtoraw(num_buf);
        break;
      }
      l = c + 1;
      j += nrow;
    }

    /* fill up unused columns with NAs */
    while (j < N) {
      switch (TYPEOF(sWhat)) {
      case LGLSXP:
        LOGICAL(res)[j] = NA_INTEGER;
        break;

      case INTSXP:
        INTEGER(res)[j] = NA_INTEGER;
        break;

      case REALSXP:
        REAL(res)[j] = NA_REAL;
        break;

      case CPLXSXP:
        COMPLEX(res)[j].r = NA_REAL;
        COMPLEX(res)[j].i = NA_REAL;
        break;

      case STRSXP:
        SET_STRING_ELT(res, j, R_NaString);
        break;

      case RAWSXP:
        RAW(res)[j] = (Rbyte) 0;
        break;
      }
      j += nrow;
    }
  }

  UNPROTECT(np);
  return res;
}