DataFrame subset_join(DataFrame x, DataFrame y, const Index& indices_x, const Index& indices_y, const IntegerVector& by_x, const IntegerVector& by_y, const IntegerVector& aux_x, const IntegerVector& aux_y, CharacterVector classes) { // construct out object List out(x.ncol() + aux_y.size()); // first the joined columns (all x columns keep their location) DataFrameJoinVisitors join_visitors(x, y, by_x, by_y, true, false); for (int i = 0; i < by_x.size(); i++) { JoinVisitor* v = join_visitors.get(i); out[by_x[i] - 1] = v->subset(indices_x); } // then the auxiliary x columns (all x columns keep their location) DataFrameSubsetVisitors visitors_x(x, aux_x); for (int i = 0; i < aux_x.size(); i++) { SubsetVectorVisitor* const v = visitors_x.get(i); out[aux_x[i] - 1] = v->subset(indices_x); } // then the auxiliary y columns (all y columns keep their relative location) DataFrameSubsetVisitors visitors_y(y, aux_y); for (int i = 0, k = x.ncol(); i < visitors_y.size(); i++, k++) { SubsetVectorVisitor* const v = visitors_y.get(i); out[k] = v->subset(indices_y); } int nrows = indices_x.size(); set_rownames(out, nrows); set_class(out, classes); return (SEXP)out; }
// Based on table() // dataframe { // for each column get the num of dims.tfm // get size of the resulting talbe // finally tabulate by the bins meaning how many are there // each value will correspond to its index in the dim array. // this is just indexing by a set of values, then you go to there and find it. // } // [[Rcpp::export]] Rcpp::IntegerVector table_cpp(const RObject & input, const RObject & columns) { if(!is<DataFrame>(input)) stop("Must be a data frame."); DataFrame data = as<DataFrame>(input); if(!is<CharacterVector>(columns)) stop("Must be character vector."); CharacterVector cols = as<CharacterVector>(columns); data = data[cols]; const R_xlen_t ncols = data.ncol(); if (ncols == 0) stop("No columns in data frame."); const IntegerVector & column = data.at(0); // There is a single entry for each row IntegerVector to_tabulate = no_init(column.size()); to_tabulate.fill(1); // The product of dimensions. R_xlen_t pd = 1; IntegerVector dims(ncols); List dimnames(ncols); dimnames.names() = data.names(); for (R_xlen_t i = 0; i < ncols; i++) { const IntegerVector & a = data.at(i); if(!Rf_isFactor(a)) stop("Not a factor."); const CharacterVector & factorLevels = a.attr("levels"); R_xlen_t nl = factorLevels.size(); to_tabulate = to_tabulate + pd * (a - 1L); pd = pd * nl ; dims.at(i) = nl; dimnames.at(i) = factorLevels; } to_tabulate = na_omit(to_tabulate); IntegerVector tbl = tabulate_cpp(to_tabulate, pd); tbl.attr("dim") = dims; tbl.attr("dimnames") = dimnames; tbl.attr("class") = "table"; return tbl; }