Example #1
0
size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size_t allocNrow) {
  // save inputs for use by pushBuffer
  size = sizeArg;
  type = typeArg;
  int newDT = (ncol == 0);
  if (newDT) {
    ncol = ncolArg;
    dtnrows = allocNrow;
    SET_VECTOR_ELT(RCHK, 0, DT=allocVector(VECSXP,ncol-ndrop));
    if (ndrop==0) {
      setAttrib(DT,R_NamesSymbol,colNamesSxp);  // colNames mkChar'd in userOverride step
    } else {
      SEXP tt = PROTECT(allocVector(STRSXP, ncol-ndrop));
      setAttrib(DT, R_NamesSymbol, tt);
      UNPROTECT(1); // tt; now that it's safely a member of protected object
      for (int i=0,resi=0; i<ncol; i++) if (type[i]!=CT_DROP) {
        SET_STRING_ELT(tt,resi++,STRING_ELT(colNamesSxp,i));
      }
    }
  }
  // TODO: move DT size calculation into a separate function (since the final size is different from the initial size anyways)
  size_t DTbytes = SIZEOF(DT)*(ncol-ndrop)*2; // the VECSXP and its column names (exclude global character cache usage)

  // For each column we could have one of the following cases:
  //   * if the DataTable is "new", then make a new vector
  //   * if the column's type has changed, then replace it with a new vector
  //     (however if column's type[i] is negative, then it means we're skipping
  //     the column in the rerun, and its type hasn't actually changed).
  //   * if dtnrows≠allocNrow and the column's type has not changed, then that
  //     column needs to be re-alloced (using growVector).
  //   * otherwise leave the column as-is.
  for (int i=0, resi=0; i<ncol; i++) {
    if (type[i] == CT_DROP) continue;
    SEXP col = VECTOR_ELT(DT, resi);
    int oldIsInt64 = newDT? 0 : INHERITS(col, char_integer64);
    int newIsInt64 = type[i] == CT_INT64;
    int typeChanged = (type[i] > 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64);
    int nrowChanged = (allocNrow != dtnrows);
    if (typeChanged || nrowChanged) {
      SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow)  // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
                                 : growVector(col, allocNrow);
      SET_VECTOR_ELT(DT,resi,thiscol);
      if (type[i]==CT_INT64) {
        SEXP tt = PROTECT(ScalarString(char_integer64));
        setAttrib(thiscol, R_ClassSymbol, tt);
        UNPROTECT(1);
      }
      SET_TRUELENGTH(thiscol, allocNrow);
      DTbytes += SIZEOF(thiscol)*allocNrow;
    }
    resi++;
  }
  dtnrows = allocNrow;
  return DTbytes;
}
Example #2
0
static SEXP subsetVectorRaw(SEXP target, SEXP source, SEXP idx, Rboolean any0orNA)
// Only for use by subsetDT() or subsetVector() below, hence static
{
    if (!length(target)) return target;

    const int max=length(source);
    switch(TYPEOF(source)) {
    case INTSXP :
    case LGLSXP :
        if (any0orNA) {
            // any 0 or NA *in idx*; if there's 0 or NA in the data that's just regular data to be copied
            for (int i=0, ansi=0; i<LENGTH(idx); i++) {
                int this = INTEGER(idx)[i];
                if (this==0) continue;
                INTEGER(target)[ansi++] = (this==NA_INTEGER || this>max) ? NA_INTEGER : INTEGER(source)[this-1];
                // negatives are checked before (in check_idx()) not to have reached here
                // NA_INTEGER == NA_LOGICAL is checked in init.c
            }
        } else {
            // totally branch free to give optimizer/hardware best chance on all platforms
            // We keep the branchless version together here inside the same switch to keep
            // the code together by type
            // INTEGER and LENGTH are up front to isolate in preparation to stop using USE_RINTERNALS
            int *vd = INTEGER(source);
            int *vi = INTEGER(idx);
            int *p =  INTEGER(target);
            const int upp = LENGTH(idx);
            for (int i=0; i<upp; i++) *p++ = vd[vi[i]-1];
        }
        break;
    case REALSXP :
        if (any0orNA) {
            // define needed vars just when we need them. To registerize and to limit scope related bugs
            union {
                double d;
                long long ll;
            } naval;
            if (INHERITS(source, char_integer64)) naval.ll = NAINT64;
            else naval.d = NA_REAL;
            for (int i=0, ansi=0; i<LENGTH(idx); i++) {
                int this = INTEGER(idx)[i];
                if (this==0) continue;
                REAL(target)[ansi++] = (this==NA_INTEGER || this>max) ? naval.d : REAL(source)[this-1];
            }
        } else {
Example #3
0
size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size_t allocNrow) {
  // save inputs for use by pushBuffer
  int newDT = (ncol == 0);
  size = sizeArg;
  type = typeArg;
  if (newDT) {
    ncol = ncolArg;
    DT=PROTECT(allocVector(VECSXP,ncol-ndrop));  // safer to leave over allocation to alloc.col on return in fread.R
    protecti++;
    if (ndrop==0) {
      setAttrib(DT,R_NamesSymbol,colNamesSxp);  // colNames mkChar'd in userOverride step
    } else {
      SEXP tt;
      setAttrib(DT, R_NamesSymbol, tt = allocVector(STRSXP, ncol-ndrop));
      for (int i=0,resi=0; i<ncol; i++) if (type[i]!=CT_DROP) {
        SET_STRING_ELT(tt,resi++,STRING_ELT(colNamesSxp,i));
      }
    }
  }
  size_t DTbytes = SIZEOF(DT)*(ncol-ndrop)*2; // the VECSXP and its column names (exclude global character cache usage)
  for (int i=0,resi=0; i<ncol; i++) {
    if (type[i] == CT_DROP) continue;
    int oldSxpType = newDT? -1 : TYPEOF(VECTOR_ELT(DT, resi));
    int oldIsInt64 = newDT? 0 : INHERITS(VECTOR_ELT(DT, resi), char_integer64);
    int newIsInt64 = type[i] == CT_INT64;
    if (type[i] > 0 && (oldSxpType != typeSxp[type[i]] || oldIsInt64 != newIsInt64)) {
      SEXP thiscol = allocVector(typeSxp[type[i]], allocNrow);
      SET_VECTOR_ELT(DT,resi,thiscol);     // no need to PROTECT thiscol, see R-exts 5.9.1
      if (type[i]==CT_INT64) setAttrib(thiscol, R_ClassSymbol, ScalarString(char_integer64));
      SET_TRUELENGTH(thiscol, allocNrow);
      DTbytes += SIZEOF(thiscol)*allocNrow;
    }
    resi++;
  }
  return DTbytes;
}