// Returns the proper integer chrom target (as is or through factor levels) // Negative values should skip further treatments (level not found in factor) int chromTarget(SEXP chrom, SEXP targetChrom) { int output; if(isFactor(chrom)) { // Convert 'targetChrom' to a character vector if(isFactor(targetChrom)) { targetChrom = PROTECT(asCharacterFactor(targetChrom)); } else { targetChrom = PROTECT(coerceVector(targetChrom, STRSXP)); } if(LENGTH(targetChrom) != 1 || STRING_ELT(targetChrom, 0) == NA_STRING) { error("As 'chrom' is factor, target 'chrom' must be a single non-NA character-coercible value"); } // From character to integer position in the index (0+) SEXP levels = PROTECT(getAttrib(chrom, R_LevelsSymbol)); for(int i = 0; i < LENGTH(levels); i++) { if(strcmp(CHAR(STRING_ELT(levels, i)), CHAR(STRING_ELT(targetChrom, 0))) == 0) { // Early exit when found output = i; UNPROTECT(2); return output; } } // Was not found in levels output = -1; UNPROTECT(2); } else { // Integer chrom targetChrom = PROTECT(coerceVector(targetChrom, INTSXP)); if(LENGTH(targetChrom) != 1 || INTEGER(targetChrom)[0] == NA_INTEGER || INTEGER(targetChrom)[0] < 0) { error("As 'chrom' is integer, target 'chrom' must be a single non-NA integer-coercible strictly positive value"); } // Position in the index (0+) output = INTEGER(targetChrom)[0] - 1; UNPROTECT(1); } return output; }
SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP var_name, SEXP val_name, SEXP na_rm, SEXP drop_levels, SEXP print_out) { int i, j, k, nrow, ncol, protecti=0, lids=-1, lvalues=-1, totlen=0, counter=0, thislen=0; SEXP thiscol, ans, dtnames, ansnames, idcols, valuecols, levels, factorLangSxp; SEXP vars, target, idxkeep = R_NilValue, thisidx = R_NilValue; Rboolean isfactor=FALSE, isidentical=TRUE, narm = FALSE, droplevels=FALSE, verbose=FALSE; SEXPTYPE valtype=NILSXP; size_t size; if (TYPEOF(DT) != VECSXP) error("Input is not of type VECSXP, expected a data.table, data.frame or list"); if (TYPEOF(valfactor) != LGLSXP) error("Argument 'value.factor' should be logical TRUE/FALSE"); if (TYPEOF(varfactor) != LGLSXP) error("Argument 'variable.factor' should be logical TRUE/FALSE"); if (TYPEOF(na_rm) != LGLSXP) error("Argument 'na.rm' should be logical TRUE/FALSE"); if (LOGICAL(na_rm)[0] == TRUE) narm = TRUE; if (TYPEOF(print_out) != LGLSXP) error("Argument 'verbose' should be logical TRUE/FALSE"); if (LOGICAL(print_out)[0] == TRUE) verbose = TRUE; // check for var and val names if (TYPEOF(var_name) != STRSXP || length(var_name) != 1) error("Argument 'variable.name' must be a character vector of length 1"); if (TYPEOF(val_name) != STRSXP || length(val_name) != 1) error("Argument 'value.name' must be a character vector of length 1"); // droplevels future feature request, maybe... should ask on data.table-help // if (!isLogical(drop_levels)) error("Argument 'drop.levels' should be logical TRUE/FALSE"); // if (LOGICAL(drop_levels)[0] == TRUE) droplevels = TRUE; // if (droplevels && !narm) warning("Ignoring argument 'drop.levels'. 'drop.levels' should be set to remove any unused levels as a result of setting 'na.rm=TRUE'. Here there is nothing to do because 'na.rm=FALSE'"); ncol = LENGTH(DT); nrow = length(VECTOR_ELT(DT, 0)); if (ncol <= 0) { warning("ncol(data) is 0. Nothing to do, returning original data.table."); return(DT); } PROTECT(dtnames = getAttrib(DT, R_NamesSymbol)); protecti++; if (isNull(dtnames)) error("names(data) is NULL. Please report to data.table-help"); vars = checkVars(DT, id, measure, verbose); PROTECT(idcols = VECTOR_ELT(vars, 0)); protecti++; PROTECT(valuecols = VECTOR_ELT(vars, 1)); protecti++; // <~~~ not protecting vars leads to segfault (on big data) lids = length(idcols); lvalues = length(valuecols); // edgecase where lvalues = 0 and lids > 0 if (lvalues == 0 && lids > 0) { if (verbose) Rprintf("length(measure.var) is 0. Edge case detected. Nothing to melt. Returning data.table with all 'id.vars' which are columns %s\n", CHAR(STRING_ELT(concat(dtnames, idcols), 0))); PROTECT(ansnames = allocVector(STRSXP, lids)); protecti++; PROTECT(ans = allocVector(VECSXP, lids)); protecti++; for (i=0; i<lids; i++) { SET_VECTOR_ELT(ans, i, VECTOR_ELT(DT, INTEGER(idcols)[i]-1)); SET_STRING_ELT(ansnames, i, STRING_ELT(dtnames, INTEGER(idcols)[i]-1)); } setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(protecti); return(ans); } if (lvalues == 0 && lids == 0 && verbose) Rprintf("length(measure.var) and length(id.var) are both 0. Edge case detected. Nothing to melt.\n"); // <~~ don't think this will ever happen though with all the checks // set names for 'ans' - the output list PROTECT(ansnames = allocVector(STRSXP, lids+2)); protecti++; for (i=0; i<lids; i++) { SET_STRING_ELT(ansnames, i, STRING_ELT(dtnames, INTEGER(idcols)[i]-1)); } SET_STRING_ELT(ansnames, lids, mkChar(CHAR(STRING_ELT(var_name, 0)))); // mkChar("variable") SET_STRING_ELT(ansnames, lids+1, mkChar(CHAR(STRING_ELT(val_name, 0)))); // mkChar("value") // get "value" column for (i=0; i<lvalues; i++) { thiscol = VECTOR_ELT(DT, INTEGER(valuecols)[i]-1); if (!isfactor && isFactor(thiscol)) isfactor = TRUE; if (TYPEOF(thiscol) > valtype) valtype = TYPEOF(thiscol); } if (isfactor && valtype != VECSXP) valtype = STRSXP; for (i=0; i<lvalues; i++) { thiscol = VECTOR_ELT(DT, INTEGER(valuecols)[i]-1); if (TYPEOF(thiscol) != valtype && isidentical) { if (!(isFactor(thiscol) && valtype == STRSXP)) { isidentical = FALSE; // for Date like column (not implemented for now) warning("All 'measure.vars are NOT of the SAME type. By order of hierarchy, the molten data value column will be of type '%s'. Therefore all measure variables that are not of type '%s' will be coerced to. Check the DETAILS section of ?melt.data.table for more on coercion.\n", type2char(valtype), type2char(valtype)); break; } } } if (valtype == VECSXP && narm) { narm = FALSE; if (verbose) Rprintf("The molten data value type is a list. 'na.rm=TRUE' is therefore ignored.\n"); } if (narm) { PROTECT(idxkeep = allocVector(VECSXP, lvalues)); protecti++; for (i=0; i<lvalues; i++) { SET_VECTOR_ELT(idxkeep, i, which_notNA(VECTOR_ELT(DT, INTEGER(valuecols)[i]-1))); totlen += length(VECTOR_ELT(idxkeep, i)); } } else totlen = nrow * lvalues; PROTECT(ans = allocVector(VECSXP, lids + 2)); protecti++; target = PROTECT(allocVector(valtype, totlen)); for (i=0; i<lvalues; i++) { thiscol = VECTOR_ELT(DT, INTEGER(valuecols)[i]-1); if (isFactor(thiscol)) thiscol = asCharacterFactor(thiscol); if (TYPEOF(thiscol) != valtype && !isFactor(thiscol)) { // thiscol = valtype == STRSXP ? PROTECT(coerce_to_char(thiscol, R_GlobalEnv)) : PROTECT(coerceVector(thiscol, valtype)); // protecti++; // for now, no preserving of class attributes thiscol = PROTECT(coerceVector(thiscol, valtype)); protecti++; } size = SIZEOF(thiscol); if (narm) { thisidx = VECTOR_ELT(idxkeep, i); thislen = length(thisidx); } switch(valtype) { case VECSXP : if (narm) { for (j=0; j<thislen; j++) SET_VECTOR_ELT(target, counter + j, VECTOR_ELT(thiscol, INTEGER(thisidx)[j]-1)); } else { for (j=0; j<nrow; j++) SET_VECTOR_ELT(target, i*nrow + j, VECTOR_ELT(thiscol, j)); } break; case STRSXP : if (narm) { for (j=0; j<thislen; j++) SET_STRING_ELT(target, counter + j, STRING_ELT(thiscol, INTEGER(thisidx)[j]-1)); } else { for (j=0; j<nrow; j++) SET_STRING_ELT(target, i*nrow + j, STRING_ELT(thiscol, j)); } break; case REALSXP : if (narm) { for (j=0; j<thislen; j++) REAL(target)[counter + j] = REAL(thiscol)[INTEGER(thisidx)[j]-1]; } else { memcpy((char *)DATAPTR(target)+i*nrow*size, (char *)DATAPTR(thiscol), nrow*size); } break; case INTSXP : if (narm) { for (j=0; j<thislen; j++) INTEGER(target)[counter + j] = INTEGER(thiscol)[INTEGER(thisidx)[j]-1]; } else { memcpy((char *)DATAPTR(target)+i*nrow*size, (char *)DATAPTR(thiscol), nrow*size); } break; case LGLSXP : if (narm) { for (j=0; j<thislen; j++) LOGICAL(target)[counter + j] = LOGICAL(thiscol)[INTEGER(thisidx)[j]-1]; } else { memcpy((char *)DATAPTR(target)+i*nrow*size, (char *)DATAPTR(thiscol), nrow*size); } break; default : error("Unknown column type '%s' for column '%s' in 'data'", type2char(TYPEOF(thiscol)), CHAR(STRING_ELT(dtnames, INTEGER(valuecols)[i]-1))); } if (narm) counter += thislen; // if (isidentical && valtype != VECSXP) // for now, no preserving of class attributes // setAttrib(target, R_ClassSymbol, getAttrib(VECTOR_ELT(DT, INTEGER(valuecols)[0]-1), R_ClassSymbol)); // for Date like column } // check for factor if (LOGICAL(valfactor)[0] == TRUE && valtype == VECSXP) warning("argument 'value.factor' ignored because 'value' column is a list\n"); if (LOGICAL(valfactor)[0] == TRUE && valtype != VECSXP) { PROTECT(factorLangSxp = allocList(2)); SET_TYPEOF(factorLangSxp, LANGSXP); SETCAR(factorLangSxp, install("factor")); SETCAR(CDR(factorLangSxp), target); SET_VECTOR_ELT(ans, lids+1, eval(factorLangSxp, R_GlobalEnv)); // last column UNPROTECT(1); // factorLangSxp } else SET_VECTOR_ELT(ans, lids+1, target); UNPROTECT(1); // target // get "variable" column counter = 0, i=0; target = PROTECT(allocVector(INTSXP, totlen)); for (j=0; j<lvalues; j++) { if (narm) { thislen = length(VECTOR_ELT(idxkeep, j)); for (k=0; k<thislen; k++) INTEGER(target)[counter + k] = i+1; counter += thislen; if (thislen > 0 || !droplevels) i++; } else { for (k=0; k<nrow; k++) INTEGER(target)[nrow*j + k] = j+1; } } setAttrib(target, R_ClassSymbol, mkString("factor")); if (narm && droplevels) { counter = 0; for (j=0; j<lvalues; j++) { if (length(VECTOR_ELT(idxkeep, j)) > 0) counter++; } } else counter = lvalues; levels = PROTECT(allocVector(STRSXP, counter)); i = 0; for (j=0; j<lvalues; j++) { if (narm && droplevels) { if (length(VECTOR_ELT(idxkeep, j)) > 0) SET_STRING_ELT(levels, i++, STRING_ELT(dtnames, INTEGER(valuecols)[j]-1)); } else SET_STRING_ELT(levels, j, STRING_ELT(dtnames, INTEGER(valuecols)[j]-1)); } setAttrib(target, R_LevelsSymbol, levels); UNPROTECT(1); // levels if (LOGICAL(varfactor)[0] == FALSE) target = asCharacterFactor(target); SET_VECTOR_ELT(ans, lids, target); UNPROTECT(1); // target // generate idcols (left part) for (i=0; i<lids; i++) { counter = 0; thiscol = VECTOR_ELT(DT, INTEGER(idcols)[i]-1); size = SIZEOF(thiscol); target = PROTECT(allocVector(TYPEOF(thiscol), totlen)); switch(TYPEOF(thiscol)) { case REALSXP : if (narm) { for (j=0; j<lvalues; j++) { thisidx = PROTECT(VECTOR_ELT(idxkeep, j)); thislen = length(thisidx); for (k=0; k<thislen; k++) REAL(target)[counter + k] = REAL(thiscol)[INTEGER(thisidx)[k]-1]; counter += thislen; UNPROTECT(1); // thisidx } } else { for (j=0; j<lvalues; j++) memcpy((char *)DATAPTR(target)+j*nrow*size, (char *)DATAPTR(thiscol), nrow*size); } break; case INTSXP : if (narm) { for (j=0; j<lvalues; j++) { thisidx = PROTECT(VECTOR_ELT(idxkeep, j)); thislen = length(thisidx); for (k=0; k<thislen; k++) INTEGER(target)[counter + k] = INTEGER(thiscol)[INTEGER(thisidx)[k]-1]; counter += thislen; UNPROTECT(1); // thisidx } } else { for (j=0; j<lvalues; j++) memcpy((char *)DATAPTR(target)+j*nrow*size, (char *)DATAPTR(thiscol), nrow*size); } break; case LGLSXP : if (narm) { for (j=0; j<lvalues; j++) { thisidx = PROTECT(VECTOR_ELT(idxkeep, j)); thislen = length(thisidx); for (k=0; k<thislen; k++) LOGICAL(target)[counter + k] = LOGICAL(thiscol)[INTEGER(thisidx)[k]-1]; counter += thislen; UNPROTECT(1); // thisidx } } else { for (j=0; j<lvalues; j++) memcpy((char *)DATAPTR(target)+j*nrow*size, (char *)DATAPTR(thiscol), nrow*size); } break; case STRSXP : if (narm) { for (j=0; j<lvalues; j++) { thisidx = PROTECT(VECTOR_ELT(idxkeep, j)); thislen = length(thisidx); for (k=0; k<thislen; k++) SET_STRING_ELT(target, counter + k, STRING_ELT(thiscol, INTEGER(thisidx)[k]-1)); counter += thislen; UNPROTECT(1); // thisidx } } else { // SET_STRING_ELT for j=0 and memcpy for j>0, WHY? // From assign.c's memcrecycle - only one SET_STRING_ELT per RHS item is needed to set generations (overhead) for (k=0; k<nrow; k++) SET_STRING_ELT(target, k, STRING_ELT(thiscol, k)); for (j=1; j<lvalues; j++) memcpy((char *)DATAPTR(target)+j*nrow*size, (char *)DATAPTR(target), nrow*size); } break; case VECSXP : for (j=0; j<lvalues; j++) { for (k=0; k<nrow; k++) { SET_VECTOR_ELT(target, j*nrow + k, VECTOR_ELT(thiscol, k)); } } break; default : error("Unknown column type '%s' for column '%s' in 'data'", type2char(TYPEOF(thiscol)), CHAR(STRING_ELT(dtnames, INTEGER(idcols)[i]-1))); } copyMostAttrib(thiscol, target); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB. SET_VECTOR_ELT(ans, i, target); UNPROTECT(1); // target } setAttrib(ans, R_NamesSymbol, ansnames); UNPROTECT(protecti); return(ans); }
SEXP transpose(SEXP l, SEXP fill, SEXP ignoreArg) { R_len_t i, j, k=0, maxlen=0, zerolen=0, anslen; SEXP li, thisi, ans; SEXPTYPE type, maxtype=0; Rboolean coerce = FALSE; if (!isNewList(l)) error("l must be a list."); if (!length(l)) return(duplicate(l)); if (!isLogical(ignoreArg) || LOGICAL(ignoreArg)[0] == NA_LOGICAL) error("ignore.empty should be logical TRUE/FALSE."); if (length(fill) != 1) error("fill must be NULL or length=1 vector."); R_len_t ln = LENGTH(l); Rboolean ignore = LOGICAL(ignoreArg)[0]; // preprocessing R_len_t *len = (R_len_t *)R_alloc(ln, sizeof(R_len_t)); for (i=0; i<ln; i++) { li = VECTOR_ELT(l, i); if (!isVectorAtomic(li) && !isNull(li)) error("Item %d of list input is not an atomic vector", i+1); len[i] = length(li); if (len[i] > maxlen) maxlen = len[i]; zerolen += (len[i] == 0); if (isFactor(li)) { maxtype = STRSXP; } else { type = TYPEOF(li); if (type > maxtype) maxtype = type; } } // coerce fill to maxtype fill = PROTECT(coerceVector(fill, maxtype)); // allocate 'ans' ans = PROTECT(allocVector(VECSXP, maxlen)); anslen = (!ignore) ? ln : (ln - zerolen); for (i=0; i<maxlen; i++) { SET_VECTOR_ELT(ans, i, thisi=allocVector(maxtype, anslen) ); } // transpose for (i=0; i<ln; i++) { if (ignore && !len[i]) continue; li = VECTOR_ELT(l, i); if (TYPEOF(li) != maxtype) { coerce = TRUE; if (!isFactor(li)) li = PROTECT(coerceVector(li, maxtype)); else li = PROTECT(asCharacterFactor(li)); } switch (maxtype) { case INTSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); INTEGER(thisi)[k] = (j < len[i]) ? INTEGER(li)[j] : INTEGER(fill)[0]; } break; case LGLSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); LOGICAL(thisi)[k] = (j < len[i]) ? LOGICAL(li)[j] : LOGICAL(fill)[0]; } break; case REALSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); REAL(thisi)[k] = (j < len[i]) ? REAL(li)[j] : REAL(fill)[0]; } break; case STRSXP : for (j=0; j<maxlen; j++) { thisi = VECTOR_ELT(ans, j); SET_STRING_ELT(thisi, k, (j < len[i]) ? STRING_ELT(li, j) : STRING_ELT(fill, 0)); } break; default : error("Unsupported column type '%s'", type2char(maxtype)); } if (coerce) { coerce = FALSE; UNPROTECT(1); } k++; } UNPROTECT(2); return(ans); }
SEXP melt_dataframe( SEXP x, SEXP id_ind_, SEXP val_ind_, SEXP variable_name, SEXP value_name ) { if (length(x) == 0) { error("Can't melt a data.frame with 0 columns"); } if (length(VECTOR_ELT(x, 0)) == 0) { error("Can't melt a data.frame with 0 rows"); } int* id_ind = INTEGER(id_ind_); int* val_ind = INTEGER(val_ind_); int nColStack = length(id_ind_); int nColRep = length(val_ind_); int nRow = length( VECTOR_ELT(x, 0) ); int out_nRow = nRow * nColRep; int out_nCol = nColStack + 2; char mt = max_type(x, val_ind_); if (mt > STRSXP) { error("Error: cannot melt data.frames w/ elements of type '%s'", CHAR(type2str(mt))); } if (diff_types(x, val_ind_)) { warning("Coercing type of 'value' variables to '%s'", CHAR(type2str(mt))); } SEXP out; PROTECT(out = allocVector( VECSXP, out_nCol )); // populate the value array SEXP value_SEXP; #define HANDLE_CASE( RTYPE, CTYPE ) \ case RTYPE: { \ PROTECT( value_SEXP = allocVector( RTYPE, value_len ) ); \ SEXP tmp; \ for( int i=0; i < nColRep; ++i ) { \ if (TYPEOF( VECTOR_ELT(x, val_ind[i]) ) != mt) { \ tmp = PROTECT( coerceVector( VECTOR_ELT(x, val_ind[i]), mt ) ); \ } else { \ tmp = VECTOR_ELT(x, val_ind[i]); \ } \ memcpy( \ (char*) DATAPTR(value_SEXP) + (i*nRow*sizeof(CTYPE)), \ (char*) DATAPTR(tmp), \ nRow * sizeof(CTYPE) \ ); \ if (TYPEOF( VECTOR_ELT(x, val_ind[i]) ) != mt) { \ UNPROTECT(1); \ } \ } \ break; \ } \ int value_len = nColRep * nRow; int value_type = mt; switch( value_type ) { HANDLE_CASE( INTSXP, int ); HANDLE_CASE( REALSXP, double ); HANDLE_CASE( LGLSXP, int ); case STRSXP: { int counter = 0; SEXP* curr_str_vec_ptr; SEXP tmp; PROTECT( value_SEXP = allocVector( STRSXP, value_len ) ); for( int i=0; i < nColRep; ++i ) { #define curr_str_vec (VECTOR_ELT(x, val_ind[i])) if (TYPEOF(curr_str_vec) != STRSXP) { if (isFactor(curr_str_vec)) { PROTECT(tmp = asCharacterFactor(curr_str_vec)); } else { PROTECT(tmp = coerceVector(curr_str_vec, STRSXP)); } curr_str_vec_ptr = STRING_PTR(tmp); } else { curr_str_vec_ptr = STRING_PTR(curr_str_vec); } #undef curr_str_vec SEXP* value_SEXP_ptr = STRING_PTR( value_SEXP ); for( int j=0; j < nRow; ++j ) { value_SEXP_ptr[counter] = curr_str_vec_ptr[j]; ++counter; } if (TYPEOF( VECTOR_ELT(x, val_ind[i]) ) != mt) { UNPROTECT(1); } } break; } default: error("Unsupported RTYPE encountered"); } #undef HANDLE_CASE // generate the id variables, and assign them on generation // we need to convert factors if necessary for( int i=0; i < nColStack; ++i ) { SET_VECTOR_ELT( out, i, stack_vector( VECTOR_ELT( x, id_ind[i] ), nColRep )); if (isFactor( VECTOR_ELT(x, id_ind[i]) )) { setAttrib( VECTOR_ELT(out, i), R_ClassSymbol, mkString("factor") ); setAttrib( VECTOR_ELT(out, i), R_LevelsSymbol, getAttrib( VECTOR_ELT(x, id_ind[i]), R_LevelsSymbol ) ); } } // assign the names, values SET_VECTOR_ELT( out, nColStack, rep_each_char( getAttrib( x, R_NamesSymbol ), val_ind_, nRow ) ); SET_VECTOR_ELT( out, nColStack+1, value_SEXP ); UNPROTECT(1); // value_SEXP // set the row names SEXP row_names; PROTECT( row_names = allocVector(INTSXP, out_nRow) ); int* row_names_ptr = INTEGER(row_names); for( int i=0; i < out_nRow; ++i ) { row_names_ptr[i] = i+1; } setAttrib( out, R_RowNamesSymbol, row_names ); UNPROTECT(1); // row_names // set the class to data.frame setAttrib(out, R_ClassSymbol, mkString("data.frame")); // set the names SEXP names = getAttrib(x, R_NamesSymbol); SEXP names_out; PROTECT(names_out = allocVector( STRSXP, out_nCol )); SEXP* names_ptr = STRING_PTR(names); SEXP* names_out_ptr = STRING_PTR(names_out); for (int i=0; i < nColStack; ++i) { names_out_ptr[i] = names_ptr[ id_ind[i] ]; } SET_STRING_ELT( names_out, nColStack, STRING_ELT(variable_name, 0) ); SET_STRING_ELT( names_out, nColStack+1, STRING_ELT(value_name, 0) ); setAttrib( out, R_NamesSymbol, names_out ); UNPROTECT(1); // names_out UNPROTECT(1); // out return out; }