DataFrameJoinVisitors::DataFrameJoinVisitors(const DataFrame& left_, const DataFrame& right_, const SymbolVector& names_left, const SymbolVector& names_right, bool warn_, bool na_match) : left(left_), right(right_), visitor_names_left(names_left), visitor_names_right(names_right), visitors(names_left.size()), warn(warn_) { IntegerVector indices_left = names_left.match_in_table(RCPP_GET_NAMES(left)); IntegerVector indices_right = names_right.match_in_table(RCPP_GET_NAMES(right)); const int nvisitors = indices_left.size(); if (indices_right.size() != nvisitors) { stop("Different size of join column index vectors"); } for (int i = 0; i < nvisitors; i++) { const SymbolString& name_left = names_left[i]; const SymbolString& name_right = names_right[i]; if (indices_left[i] == NA_INTEGER) { stop("'%s' column not found in lhs, cannot join", name_left.get_utf8_cstring()); } if (indices_right[i] == NA_INTEGER) { stop("'%s' column not found in rhs, cannot join", name_right.get_utf8_cstring()); } visitors[i] = join_visitor( Column(left[indices_left[i] - 1], name_left), Column(right[indices_right[i] - 1], name_right), warn, na_match ); } }
void SimpleStreamChecker::reportLeaks(SymbolVector LeakedStreams, CheckerContext &C, ExplodedNode *ErrNode) const { // Attach bug reports to the leak node. // TODO: Identify the leaked file descriptor. for (SmallVectorImpl<SymbolRef>::iterator I = LeakedStreams.begin(), E = LeakedStreams.end(); I != E; ++I) { BugReport *R = new BugReport(*LeakBugType, "Opened file is never closed; potential resource leak", ErrNode); R->markInteresting(*I); C.emitReport(R); } }
BleuSScorer::count_type BleuSScorer::find(const SymbolVector& ngram) const { typedef SymbolVector ngram_type; ngram_set_type::id_type id = ngrams.root(); ngram_type::const_iterator iter_end = ngram.end(); for (ngram_type::const_iterator iter = ngram.begin(); iter != iter_end; ++ iter) { id = ngrams.find(id, *iter); if (ngrams.is_root(id)) break; } return (ngrams.is_root(id) ? 0.0 : ngrams[id]); }
void Symbol::findSymbols(QualifiedName name, SymbolVector& symbols) { NameVector names; context()->separateName(name, names); // // NOTE: this is all very fishy. If a tuple symbol is used here // that looks like this: (int,foo.bar), it will separate into: // "(int,foo" and "bar)" which is clearly wrong. The parens // should prevent the dot separation. Similarly, these names will // separate incorrectly: // // int[foo.bar] // [foo.bar] // vector foo.bar[3] // // This function assumes that these symbols will live in the // global namespace and therefore will be found if no tokenizing // of the name occurs. // qualifiedNameLookup(names, this, symbols); if (symbols.empty()) { names.clear(); names.push_back(name); qualifiedNameLookup(names, this, symbols); } }
// [[Rcpp::export]] DataFrame grouped_df_impl(DataFrame data, SymbolVector symbols) { assert_all_white_list(data); DataFrame copy(shallow_copy(data)); set_class(copy, classes_grouped<GroupedDataFrame>()); if (!symbols.size()) stop("no variables to group by"); GroupedDataFrame::set_groups(copy, build_index_cpp(copy, symbols)); return copy; }
DataFrameVisitors::DataFrameVisitors(const DataFrame& data_, const SymbolVector& names) : data(data_), visitors(), visitor_names(names) { int n = names.size(); CharacterVector data_names = vec_names_or_empty(data); IntegerVector indices = names.match_in_table(data_names); for (int i = 0; i < n; i++) { if (indices[i] == NA_INTEGER) { bad_col(names[i], "is unknown"); } SEXP column = data[indices[i] - 1]; visitors.push_back(visitor(column)); } }
void SimpleStreamChecker::checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const { ProgramStateRef State = C.getState(); SymbolVector LeakedStreams; StreamMapTy TrackedStreams = State->get<StreamMap>(); for (StreamMapTy::iterator I = TrackedStreams.begin(), E = TrackedStreams.end(); I != E; ++I) { SymbolRef Sym = I->first; bool IsSymDead = SymReaper.isDead(Sym); // Collect leaked symbols. if (isLeaked(Sym, I->second, IsSymDead, State)) LeakedStreams.push_back(Sym); // Remove the dead symbol from the streams map. if (IsSymDead) State = State->remove<StreamMap>(Sym); } ExplodedNode *N = C.addTransition(State); reportLeaks(LeakedStreams, C, N); }
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) : data(data_), visitors(), visitor_names(names) { CharacterVector data_names = vec_names_or_empty(data); IntegerVector indices = names.match_in_table(data_names); int n = indices.size(); for (int i = 0; i < n; i++) { int pos = indices[i]; if (pos == NA_INTEGER) { bad_col(names[i], "is unknown"); } SubsetVectorVisitor* v = subset_visitor(data[pos - 1], data_names[pos - 1]); visitors.push_back(v); } }
SEXP set_vars(SEXP x, const SymbolVector& vars) { static SEXP vars_symbol = Rf_install("vars"); return Rf_setAttrib(x, vars_symbol, vars.get_vector()); }
SEXP slice_grouped(GroupedDataFrame gdf, const LazyDots& dots) { typedef GroupedCallProxy<GroupedDataFrame, LazyGroupedSubsets> Proxy; const DataFrame& data = gdf.data(); const Lazy& lazy = dots[0]; Environment env = lazy.env(); SymbolVector names = data.names(); SymbolSet set; for (int i=0; i<names.size(); i++) { set.insert(names[i].get_symbol()); } // we already checked that we have only one expression Call call(lazy.expr()); std::vector<int> indx; indx.reserve(1000); IntegerVector g_test; Proxy call_proxy(call, gdf, env); int ngroups = gdf.ngroups(); GroupedDataFrame::group_iterator git = gdf.group_begin(); for (int i=0; i<ngroups; i++, ++git) { const SlicingIndex& indices = *git; int nr = indices.size(); g_test = check_filter_integer_result(call_proxy.get(indices)); CountIndices counter(indices.size(), g_test); if (counter.is_positive()) { // positive indexing int ntest = g_test.size(); for (int j=0; j<ntest; j++) { if (!(g_test[j] > nr || g_test[j] == NA_INTEGER)) { indx.push_back(indices[g_test[j]-1]); } } } else if (counter.get_n_negative() != 0) { // negative indexing std::set<int> drop; int n = g_test.size(); for (int j=0; j<n; j++) { if (g_test[j] != NA_INTEGER) drop.insert(-g_test[j]); } int n_drop = drop.size(); std::set<int>::const_iterator drop_it = drop.begin(); int k = 0, j = 0; while (drop_it != drop.end()) { int next_drop = *drop_it - 1; while (j < next_drop) { indx.push_back(indices[j++]); k++; } j++; ++drop_it; } while (k < nr - n_drop) { indx.push_back(indices[j++]); k++; } } } DataFrame res = subset(data, indx, names, classes_grouped<GroupedDataFrame>()); set_vars(res, get_vars(data)); strip_index(res); return GroupedDataFrame(res).data(); }
SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) { const int nvars = vars.size(); CharacterVector names = data.names(); IntegerVector indx = vars.match_in_table(names); std::vector<SEXP> visited_data(nvars); CharacterVector groups_names(nvars + 1); for (int i = 0; i < nvars; ++i) { int pos = indx[i]; if (pos == NA_INTEGER) { bad_col(vars[i], "is unknown"); } SEXP v = data[pos - 1]; visited_data[i] = v; groups_names[i] = names[pos - 1]; if (!white_list(v) || TYPEOF(v) == VECSXP) { bad_col(vars[i], "can't be used as a grouping variable because it's a {type}", _["type"] = get_single_class(v)); } } DataFrameVisitors visitors(data, vars); boost::shared_ptr<Slicer> s = slicer(std::vector<int>(), 0, visited_data, visitors); int ncases = s->size(); // construct the groups data List vec_groups(nvars + 1); List indices(ncases); ListCollecter indices_collecter(indices); for (int i = 0; i < nvars; i++) { vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases); copy_most_attributes(vec_groups[i], visited_data[i]); } vec_groups[nvars] = indices; groups_names[nvars] = ".rows"; s->make(vec_groups, indices_collecter); // warn about NA in factors for (int i = 0; i < nvars; i++) { SEXP x = vec_groups[i]; if (Rf_isFactor(x)) { IntegerVector xi(x); if (std::find(xi.begin(), xi.end(), NA_INTEGER) < xi.end()) { warningcall(R_NilValue, tfm::format("Factor `%s` contains implicit NA, consider using `forcats::fct_explicit_na`", CHAR(groups_names[i].get()))); } } } vec_groups.attr("names") = groups_names; vec_groups.attr("row.names") = IntegerVector::create(NA_INTEGER, -ncases); vec_groups.attr("class") = classes_not_grouped() ; return vec_groups; }