Beispiel #1
0
DataFrame subset_join(DataFrame x, DataFrame y,
                      const Index& indices_x, const Index& indices_y,
                      const IntegerVector& by_x, const IntegerVector& by_y,
                      const IntegerVector& aux_x, const IntegerVector& aux_y,
                      CharacterVector classes) {
  // construct out object
  List out(x.ncol() + aux_y.size());

  // first the joined columns (all x columns keep their location)
  DataFrameJoinVisitors join_visitors(x, y, by_x, by_y, true, false);
  for (int i = 0; i < by_x.size(); i++) {
    JoinVisitor* v = join_visitors.get(i);
    out[by_x[i] - 1] = v->subset(indices_x);
  }

  // then the auxiliary x columns (all x columns keep their location)
  DataFrameSubsetVisitors visitors_x(x, aux_x);
  for (int i = 0; i < aux_x.size(); i++) {
    SubsetVectorVisitor* const v = visitors_x.get(i);
    out[aux_x[i] - 1] = v->subset(indices_x);
  }

  // then the auxiliary y columns (all y columns keep their relative location)
  DataFrameSubsetVisitors visitors_y(y, aux_y);
  for (int i = 0, k = x.ncol(); i < visitors_y.size(); i++, k++) {
    SubsetVectorVisitor* const v = visitors_y.get(i);
    out[k] = v->subset(indices_y);
  }

  int nrows = indices_x.size();
  set_rownames(out, nrows);

  set_class(out, classes);

  return (SEXP)out;
}
Beispiel #2
0
// Based on table()  
// dataframe {
//   for each column get the num of dims.tfm 
//   get size of the resulting talbe 
//   finally tabulate by the bins meaning how many are there 
//   each value will correspond to its index in the dim array.
//   this is just indexing by a set of values, then you go to there and find it. 
// }
// [[Rcpp::export]]
Rcpp::IntegerVector table_cpp(const RObject & input, const RObject & columns) { 
  if(!is<DataFrame>(input)) stop("Must be a data frame.");
  DataFrame data = as<DataFrame>(input);  
  if(!is<CharacterVector>(columns)) stop("Must be character vector."); 
  CharacterVector cols = as<CharacterVector>(columns);  
  data = data[cols];
  
  const R_xlen_t ncols = data.ncol();  
  if (ncols == 0) stop("No columns in data frame.");  
  const IntegerVector & column = data.at(0);
  // There is a single entry for each row
  IntegerVector to_tabulate = no_init(column.size());
  to_tabulate.fill(1);
  // The product of dimensions.
  R_xlen_t pd = 1;
  IntegerVector  dims(ncols);
  List  dimnames(ncols);  
  dimnames.names() = data.names();
  
  for (R_xlen_t i = 0; i < ncols; i++) {
    const IntegerVector & a = data.at(i);  
    if(!Rf_isFactor(a)) stop("Not a factor."); 
    const CharacterVector & factorLevels = a.attr("levels"); 
    R_xlen_t nl = factorLevels.size();
    to_tabulate = to_tabulate + pd * (a - 1L);
    pd = pd * nl ; 
    dims.at(i) = nl; 
    dimnames.at(i) = factorLevels;
  }    
  
  to_tabulate = na_omit(to_tabulate);
  IntegerVector tbl = tabulate_cpp(to_tabulate, pd);
  tbl.attr("dim") =  dims;
  tbl.attr("dimnames") =  dimnames;
  tbl.attr("class") =  "table";
  
  return tbl;
}