DataFrameJoinVisitors::DataFrameJoinVisitors(const DataFrame& left_, const DataFrame& right_, const SymbolVector& names_left, const SymbolVector& names_right, bool warn_, bool na_match) : left(left_), right(right_), visitor_names_left(names_left), visitor_names_right(names_right), visitors(names_left.size()), warn(warn_) { IntegerVector indices_left = names_left.match_in_table(RCPP_GET_NAMES(left)); IntegerVector indices_right = names_right.match_in_table(RCPP_GET_NAMES(right)); const int nvisitors = indices_left.size(); if (indices_right.size() != nvisitors) { stop("Different size of join column index vectors"); } for (int i = 0; i < nvisitors; i++) { const SymbolString& name_left = names_left[i]; const SymbolString& name_right = names_right[i]; if (indices_left[i] == NA_INTEGER) { stop("'%s' column not found in lhs, cannot join", name_left.get_utf8_cstring()); } if (indices_right[i] == NA_INTEGER) { stop("'%s' column not found in rhs, cannot join", name_right.get_utf8_cstring()); } visitors[i] = join_visitor( Column(left[indices_left[i] - 1], name_left), Column(right[indices_right[i] - 1], name_right), warn, na_match ); } }
DataFrameVisitors::DataFrameVisitors(const DataFrame& data_, const SymbolVector& names) : data(data_), visitors(), visitor_names(names) { int n = names.size(); CharacterVector data_names = vec_names_or_empty(data); IntegerVector indices = names.match_in_table(data_names); for (int i = 0; i < n; i++) { if (indices[i] == NA_INTEGER) { bad_col(names[i], "is unknown"); } SEXP column = data[indices[i] - 1]; visitors.push_back(visitor(column)); } }
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) : data(data_), visitors(), visitor_names(names) { CharacterVector data_names = vec_names_or_empty(data); IntegerVector indices = names.match_in_table(data_names); int n = indices.size(); for (int i = 0; i < n; i++) { int pos = indices[i]; if (pos == NA_INTEGER) { bad_col(names[i], "is unknown"); } SubsetVectorVisitor* v = subset_visitor(data[pos - 1], data_names[pos - 1]); visitors.push_back(v); } }
SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) { const int nvars = vars.size(); CharacterVector names = data.names(); IntegerVector indx = vars.match_in_table(names); std::vector<SEXP> visited_data(nvars); CharacterVector groups_names(nvars + 1); for (int i = 0; i < nvars; ++i) { int pos = indx[i]; if (pos == NA_INTEGER) { bad_col(vars[i], "is unknown"); } SEXP v = data[pos - 1]; visited_data[i] = v; groups_names[i] = names[pos - 1]; if (!white_list(v) || TYPEOF(v) == VECSXP) { bad_col(vars[i], "can't be used as a grouping variable because it's a {type}", _["type"] = get_single_class(v)); } } DataFrameVisitors visitors(data, vars); boost::shared_ptr<Slicer> s = slicer(std::vector<int>(), 0, visited_data, visitors); int ncases = s->size(); // construct the groups data List vec_groups(nvars + 1); List indices(ncases); ListCollecter indices_collecter(indices); for (int i = 0; i < nvars; i++) { vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases); copy_most_attributes(vec_groups[i], visited_data[i]); } vec_groups[nvars] = indices; groups_names[nvars] = ".rows"; s->make(vec_groups, indices_collecter); // warn about NA in factors for (int i = 0; i < nvars; i++) { SEXP x = vec_groups[i]; if (Rf_isFactor(x)) { IntegerVector xi(x); if (std::find(xi.begin(), xi.end(), NA_INTEGER) < xi.end()) { warningcall(R_NilValue, tfm::format("Factor `%s` contains implicit NA, consider using `forcats::fct_explicit_na`", CHAR(groups_names[i].get()))); } } } vec_groups.attr("names") = groups_names; vec_groups.attr("row.names") = IntegerVector::create(NA_INTEGER, -ncases); vec_groups.attr("class") = classes_not_grouped() ; return vec_groups; }