size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size_t allocNrow) { // save inputs for use by pushBuffer size = sizeArg; type = typeArg; int newDT = (ncol == 0); if (newDT) { ncol = ncolArg; dtnrows = allocNrow; SET_VECTOR_ELT(RCHK, 0, DT=allocVector(VECSXP,ncol-ndrop)); if (ndrop==0) { setAttrib(DT,R_NamesSymbol,colNamesSxp); // colNames mkChar'd in userOverride step } else { SEXP tt = PROTECT(allocVector(STRSXP, ncol-ndrop)); setAttrib(DT, R_NamesSymbol, tt); UNPROTECT(1); // tt; now that it's safely a member of protected object for (int i=0,resi=0; i<ncol; i++) if (type[i]!=CT_DROP) { SET_STRING_ELT(tt,resi++,STRING_ELT(colNamesSxp,i)); } } } // TODO: move DT size calculation into a separate function (since the final size is different from the initial size anyways) size_t DTbytes = SIZEOF(DT)*(ncol-ndrop)*2; // the VECSXP and its column names (exclude global character cache usage) // For each column we could have one of the following cases: // * if the DataTable is "new", then make a new vector // * if the column's type has changed, then replace it with a new vector // (however if column's type[i] is negative, then it means we're skipping // the column in the rerun, and its type hasn't actually changed). // * if dtnrows≠allocNrow and the column's type has not changed, then that // column needs to be re-alloced (using growVector). // * otherwise leave the column as-is. for (int i=0, resi=0; i<ncol; i++) { if (type[i] == CT_DROP) continue; SEXP col = VECTOR_ELT(DT, resi); int oldIsInt64 = newDT? 0 : INHERITS(col, char_integer64); int newIsInt64 = type[i] == CT_INT64; int typeChanged = (type[i] > 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64); int nrowChanged = (allocNrow != dtnrows); if (typeChanged || nrowChanged) { SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 : growVector(col, allocNrow); SET_VECTOR_ELT(DT,resi,thiscol); if (type[i]==CT_INT64) { SEXP tt = PROTECT(ScalarString(char_integer64)); setAttrib(thiscol, R_ClassSymbol, tt); UNPROTECT(1); } SET_TRUELENGTH(thiscol, allocNrow); DTbytes += SIZEOF(thiscol)*allocNrow; } resi++; } dtnrows = allocNrow; return DTbytes; }
static SEXP subsetVectorRaw(SEXP target, SEXP source, SEXP idx, Rboolean any0orNA) // Only for use by subsetDT() or subsetVector() below, hence static { if (!length(target)) return target; const int max=length(source); switch(TYPEOF(source)) { case INTSXP : case LGLSXP : if (any0orNA) { // any 0 or NA *in idx*; if there's 0 or NA in the data that's just regular data to be copied for (int i=0, ansi=0; i<LENGTH(idx); i++) { int this = INTEGER(idx)[i]; if (this==0) continue; INTEGER(target)[ansi++] = (this==NA_INTEGER || this>max) ? NA_INTEGER : INTEGER(source)[this-1]; // negatives are checked before (in check_idx()) not to have reached here // NA_INTEGER == NA_LOGICAL is checked in init.c } } else { // totally branch free to give optimizer/hardware best chance on all platforms // We keep the branchless version together here inside the same switch to keep // the code together by type // INTEGER and LENGTH are up front to isolate in preparation to stop using USE_RINTERNALS int *vd = INTEGER(source); int *vi = INTEGER(idx); int *p = INTEGER(target); const int upp = LENGTH(idx); for (int i=0; i<upp; i++) *p++ = vd[vi[i]-1]; } break; case REALSXP : if (any0orNA) { // define needed vars just when we need them. To registerize and to limit scope related bugs union { double d; long long ll; } naval; if (INHERITS(source, char_integer64)) naval.ll = NAINT64; else naval.d = NA_REAL; for (int i=0, ansi=0; i<LENGTH(idx); i++) { int this = INTEGER(idx)[i]; if (this==0) continue; REAL(target)[ansi++] = (this==NA_INTEGER || this>max) ? naval.d : REAL(source)[this-1]; } } else {
size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size_t allocNrow) { // save inputs for use by pushBuffer int newDT = (ncol == 0); size = sizeArg; type = typeArg; if (newDT) { ncol = ncolArg; DT=PROTECT(allocVector(VECSXP,ncol-ndrop)); // safer to leave over allocation to alloc.col on return in fread.R protecti++; if (ndrop==0) { setAttrib(DT,R_NamesSymbol,colNamesSxp); // colNames mkChar'd in userOverride step } else { SEXP tt; setAttrib(DT, R_NamesSymbol, tt = allocVector(STRSXP, ncol-ndrop)); for (int i=0,resi=0; i<ncol; i++) if (type[i]!=CT_DROP) { SET_STRING_ELT(tt,resi++,STRING_ELT(colNamesSxp,i)); } } } size_t DTbytes = SIZEOF(DT)*(ncol-ndrop)*2; // the VECSXP and its column names (exclude global character cache usage) for (int i=0,resi=0; i<ncol; i++) { if (type[i] == CT_DROP) continue; int oldSxpType = newDT? -1 : TYPEOF(VECTOR_ELT(DT, resi)); int oldIsInt64 = newDT? 0 : INHERITS(VECTOR_ELT(DT, resi), char_integer64); int newIsInt64 = type[i] == CT_INT64; if (type[i] > 0 && (oldSxpType != typeSxp[type[i]] || oldIsInt64 != newIsInt64)) { SEXP thiscol = allocVector(typeSxp[type[i]], allocNrow); SET_VECTOR_ELT(DT,resi,thiscol); // no need to PROTECT thiscol, see R-exts 5.9.1 if (type[i]==CT_INT64) setAttrib(thiscol, R_ClassSymbol, ScalarString(char_integer64)); SET_TRUELENGTH(thiscol, allocNrow); DTbytes += SIZEOF(thiscol)*allocNrow; } resi++; } return DTbytes; }