예제 #1
0
파일: api.cpp 프로젝트: blasern/dplyr
DataFrameJoinVisitors::DataFrameJoinVisitors(const DataFrame& left_, const DataFrame& right_, const SymbolVector& names_left, const SymbolVector& names_right, bool warn_, bool na_match) :
  left(left_), right(right_),
  visitor_names_left(names_left),
  visitor_names_right(names_right),
  visitors(names_left.size()),
  warn(warn_)
{
  IntegerVector indices_left  = names_left.match_in_table(RCPP_GET_NAMES(left));
  IntegerVector indices_right = names_right.match_in_table(RCPP_GET_NAMES(right));

  const int nvisitors = indices_left.size();
  if (indices_right.size() != nvisitors) {
    stop("Different size of join column index vectors");
  }

  for (int i = 0; i < nvisitors; i++) {
    const SymbolString& name_left  = names_left[i];
    const SymbolString& name_right = names_right[i];

    if (indices_left[i] == NA_INTEGER) {
      stop("'%s' column not found in lhs, cannot join", name_left.get_utf8_cstring());
    }
    if (indices_right[i] == NA_INTEGER) {
      stop("'%s' column not found in rhs, cannot join", name_right.get_utf8_cstring());
    }

    visitors[i] =
      join_visitor(
        Column(left[indices_left[i] - 1], name_left),
        Column(right[indices_right[i] - 1], name_right),
        warn, na_match
      );
  }
}
예제 #2
0
void SimpleStreamChecker::reportLeaks(SymbolVector LeakedStreams,
                                               CheckerContext &C,
                                               ExplodedNode *ErrNode) const {
  // Attach bug reports to the leak node.
  // TODO: Identify the leaked file descriptor.
  for (SmallVectorImpl<SymbolRef>::iterator
         I = LeakedStreams.begin(), E = LeakedStreams.end(); I != E; ++I) {
    BugReport *R = new BugReport(*LeakBugType,
        "Opened file is never closed; potential resource leak", ErrNode);
    R->markInteresting(*I);
    C.emitReport(R);
  }
}
예제 #3
0
    BleuSScorer::count_type BleuSScorer::find(const SymbolVector& ngram) const
    {
      typedef SymbolVector ngram_type;
      
      ngram_set_type::id_type id = ngrams.root();
      ngram_type::const_iterator iter_end = ngram.end();
      for (ngram_type::const_iterator iter = ngram.begin(); iter != iter_end; ++ iter) {
	id = ngrams.find(id, *iter);
	
	if (ngrams.is_root(id)) break;
      }
      
      return (ngrams.is_root(id) ? 0.0 : ngrams[id]);
    }
예제 #4
0
파일: Symbol.cpp 프로젝트: jimhourihan/mu
void
Symbol::findSymbols(QualifiedName name, SymbolVector& symbols)
{
    NameVector names;
    context()->separateName(name, names);

    //
    //  NOTE: this is all very fishy. If a tuple symbol is used here
    //  that looks like this: (int,foo.bar), it will separate into:
    //  "(int,foo" and "bar)" which is clearly wrong. The parens
    //  should prevent the dot separation. Similarly, these names will
    //  separate incorrectly:
    //
    //      int[foo.bar]
    //      [foo.bar]
    //      vector foo.bar[3]
    //
    //  This function assumes that these symbols will live in the
    //  global namespace and therefore will be found if no tokenizing
    //  of the name occurs.
    //
    
    qualifiedNameLookup(names, this, symbols);

    if (symbols.empty())
    {
        names.clear();
        names.push_back(name);
        qualifiedNameLookup(names, this, symbols);
    }
}
예제 #5
0
// [[Rcpp::export]]
DataFrame grouped_df_impl(DataFrame data, SymbolVector symbols) {
  assert_all_white_list(data);
  DataFrame copy(shallow_copy(data));
  set_class(copy, classes_grouped<GroupedDataFrame>());
  if (!symbols.size())
    stop("no variables to group by");
  GroupedDataFrame::set_groups(copy, build_index_cpp(copy, symbols));
  return copy;
}
예제 #6
0
파일: api.cpp 프로젝트: blasern/dplyr
DataFrameVisitors::DataFrameVisitors(const DataFrame& data_, const SymbolVector& names) :
  data(data_),
  visitors(),
  visitor_names(names)
{

  int n = names.size();
  CharacterVector data_names = vec_names_or_empty(data);
  IntegerVector indices = names.match_in_table(data_names);

  for (int i = 0; i < n; i++) {
    if (indices[i] == NA_INTEGER) {
      bad_col(names[i], "is unknown");
    }
    SEXP column = data[indices[i] - 1];
    visitors.push_back(visitor(column));
  }

}
예제 #7
0
void SimpleStreamChecker::checkDeadSymbols(SymbolReaper &SymReaper,
                                           CheckerContext &C) const {
  ProgramStateRef State = C.getState();
  SymbolVector LeakedStreams;
  StreamMapTy TrackedStreams = State->get<StreamMap>();
  for (StreamMapTy::iterator I = TrackedStreams.begin(),
                             E = TrackedStreams.end(); I != E; ++I) {
    SymbolRef Sym = I->first;
    bool IsSymDead = SymReaper.isDead(Sym);

    // Collect leaked symbols.
    if (isLeaked(Sym, I->second, IsSymDead, State))
      LeakedStreams.push_back(Sym);

    // Remove the dead symbol from the streams map.
    if (IsSymDead)
      State = State->remove<StreamMap>(Sym);
  }

  ExplodedNode *N = C.addTransition(State);
  reportLeaks(LeakedStreams, C, N);
}
예제 #8
0
파일: api.cpp 프로젝트: blasern/dplyr
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) :
  data(data_),
  visitors(),
  visitor_names(names)
{

  CharacterVector data_names = vec_names_or_empty(data);
  IntegerVector indices = names.match_in_table(data_names);

  int n = indices.size();
  for (int i = 0; i < n; i++) {

    int pos = indices[i];
    if (pos == NA_INTEGER) {
      bad_col(names[i], "is unknown");
    }

    SubsetVectorVisitor* v = subset_visitor(data[pos - 1], data_names[pos - 1]);
    visitors.push_back(v);

  }

}
예제 #9
0
파일: utils.cpp 프로젝트: pachevalier/dplyr
 SEXP set_vars(SEXP x, const SymbolVector& vars) {
   static SEXP vars_symbol = Rf_install("vars");
   return Rf_setAttrib(x, vars_symbol, vars.get_vector());
 }
예제 #10
0
SEXP slice_grouped(GroupedDataFrame gdf, const LazyDots& dots) {
  typedef GroupedCallProxy<GroupedDataFrame, LazyGroupedSubsets> Proxy;

  const DataFrame& data = gdf.data();
  const Lazy& lazy = dots[0];
  Environment env = lazy.env();
  SymbolVector names = data.names();
  SymbolSet set;
  for (int i=0; i<names.size(); i++) {
    set.insert(names[i].get_symbol());
  }

  // we already checked that we have only one expression
  Call call(lazy.expr());

  std::vector<int> indx;
  indx.reserve(1000);

  IntegerVector g_test;
  Proxy call_proxy(call, gdf, env);

  int ngroups = gdf.ngroups();
  GroupedDataFrame::group_iterator git = gdf.group_begin();
  for (int i=0; i<ngroups; i++, ++git) {
    const SlicingIndex& indices = *git;
    int nr = indices.size();
    g_test = check_filter_integer_result(call_proxy.get(indices));
    CountIndices counter(indices.size(), g_test);

    if (counter.is_positive()) {
      // positive indexing
      int ntest = g_test.size();
      for (int j=0; j<ntest; j++) {
        if (!(g_test[j] > nr || g_test[j] == NA_INTEGER)) {
          indx.push_back(indices[g_test[j]-1]);
        }
      }
    } else if (counter.get_n_negative() != 0) {
      // negative indexing
      std::set<int> drop;
      int n = g_test.size();
      for (int j=0; j<n; j++) {
        if (g_test[j] != NA_INTEGER)
          drop.insert(-g_test[j]);
      }
      int n_drop = drop.size();
      std::set<int>::const_iterator drop_it = drop.begin();

      int k = 0, j = 0;
      while (drop_it != drop.end()) {
        int next_drop = *drop_it - 1;
        while (j < next_drop) {
          indx.push_back(indices[j++]);
          k++;
        }
        j++;
        ++drop_it;
      }
      while (k < nr - n_drop) {
        indx.push_back(indices[j++]);
        k++;
      }

    }
  }
  DataFrame res = subset(data, indx, names, classes_grouped<GroupedDataFrame>());
  set_vars(res, get_vars(data));
  strip_index(res);

  return GroupedDataFrame(res).data();

}
예제 #11
0
SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) {
  const int nvars = vars.size();

  CharacterVector names = data.names();
  IntegerVector indx = vars.match_in_table(names);
  std::vector<SEXP> visited_data(nvars);
  CharacterVector groups_names(nvars + 1);

  for (int i = 0; i < nvars; ++i) {
    int pos = indx[i];
    if (pos == NA_INTEGER) {
      bad_col(vars[i], "is unknown");
    }

    SEXP v = data[pos - 1];
    visited_data[i] = v;
    groups_names[i] = names[pos - 1];

    if (!white_list(v) || TYPEOF(v) == VECSXP) {
      bad_col(vars[i], "can't be used as a grouping variable because it's a {type}",
              _["type"] = get_single_class(v));
    }
  }

  DataFrameVisitors visitors(data, vars);

  boost::shared_ptr<Slicer> s = slicer(std::vector<int>(), 0, visited_data, visitors);

  int ncases = s->size();

  // construct the groups data
  List vec_groups(nvars + 1);
  List indices(ncases);
  ListCollecter indices_collecter(indices);

  for (int i = 0; i < nvars; i++) {
    vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases);
    copy_most_attributes(vec_groups[i], visited_data[i]);
  }

  vec_groups[nvars] = indices;
  groups_names[nvars] = ".rows";
  s->make(vec_groups, indices_collecter);

  // warn about NA in factors
  for (int i = 0; i < nvars; i++) {
    SEXP x = vec_groups[i];
    if (Rf_isFactor(x)) {
      IntegerVector xi(x);
      if (std::find(xi.begin(), xi.end(), NA_INTEGER) < xi.end()) {
        warningcall(R_NilValue, tfm::format("Factor `%s` contains implicit NA, consider using `forcats::fct_explicit_na`", CHAR(groups_names[i].get())));
      }
    }
  }

  vec_groups.attr("names") = groups_names;
  vec_groups.attr("row.names") = IntegerVector::create(NA_INTEGER, -ncases);
  vec_groups.attr("class") = classes_not_grouped() ;

  return vec_groups;
}