예제 #1
0
파일: api.cpp 프로젝트: blasern/dplyr
DataFrameJoinVisitors::DataFrameJoinVisitors(const DataFrame& left_, const DataFrame& right_, const SymbolVector& names_left, const SymbolVector& names_right, bool warn_, bool na_match) :
  left(left_), right(right_),
  visitor_names_left(names_left),
  visitor_names_right(names_right),
  visitors(names_left.size()),
  warn(warn_)
{
  IntegerVector indices_left  = names_left.match_in_table(RCPP_GET_NAMES(left));
  IntegerVector indices_right = names_right.match_in_table(RCPP_GET_NAMES(right));

  const int nvisitors = indices_left.size();
  if (indices_right.size() != nvisitors) {
    stop("Different size of join column index vectors");
  }

  for (int i = 0; i < nvisitors; i++) {
    const SymbolString& name_left  = names_left[i];
    const SymbolString& name_right = names_right[i];

    if (indices_left[i] == NA_INTEGER) {
      stop("'%s' column not found in lhs, cannot join", name_left.get_utf8_cstring());
    }
    if (indices_right[i] == NA_INTEGER) {
      stop("'%s' column not found in rhs, cannot join", name_right.get_utf8_cstring());
    }

    visitors[i] =
      join_visitor(
        Column(left[indices_left[i] - 1], name_left),
        Column(right[indices_right[i] - 1], name_right),
        warn, na_match
      );
  }
}
예제 #2
0
파일: api.cpp 프로젝트: blasern/dplyr
DataFrameVisitors::DataFrameVisitors(const DataFrame& data_, const SymbolVector& names) :
  data(data_),
  visitors(),
  visitor_names(names)
{

  int n = names.size();
  CharacterVector data_names = vec_names_or_empty(data);
  IntegerVector indices = names.match_in_table(data_names);

  for (int i = 0; i < n; i++) {
    if (indices[i] == NA_INTEGER) {
      bad_col(names[i], "is unknown");
    }
    SEXP column = data[indices[i] - 1];
    visitors.push_back(visitor(column));
  }

}
예제 #3
0
파일: api.cpp 프로젝트: blasern/dplyr
DataFrameSubsetVisitors::DataFrameSubsetVisitors(const DataFrame& data_, const SymbolVector& names) :
  data(data_),
  visitors(),
  visitor_names(names)
{

  CharacterVector data_names = vec_names_or_empty(data);
  IntegerVector indices = names.match_in_table(data_names);

  int n = indices.size();
  for (int i = 0; i < n; i++) {

    int pos = indices[i];
    if (pos == NA_INTEGER) {
      bad_col(names[i], "is unknown");
    }

    SubsetVectorVisitor* v = subset_visitor(data[pos - 1], data_names[pos - 1]);
    visitors.push_back(v);

  }

}
예제 #4
0
SEXP build_index_cpp(const DataFrame& data, const SymbolVector& vars) {
  const int nvars = vars.size();

  CharacterVector names = data.names();
  IntegerVector indx = vars.match_in_table(names);
  std::vector<SEXP> visited_data(nvars);
  CharacterVector groups_names(nvars + 1);

  for (int i = 0; i < nvars; ++i) {
    int pos = indx[i];
    if (pos == NA_INTEGER) {
      bad_col(vars[i], "is unknown");
    }

    SEXP v = data[pos - 1];
    visited_data[i] = v;
    groups_names[i] = names[pos - 1];

    if (!white_list(v) || TYPEOF(v) == VECSXP) {
      bad_col(vars[i], "can't be used as a grouping variable because it's a {type}",
              _["type"] = get_single_class(v));
    }
  }

  DataFrameVisitors visitors(data, vars);

  boost::shared_ptr<Slicer> s = slicer(std::vector<int>(), 0, visited_data, visitors);

  int ncases = s->size();

  // construct the groups data
  List vec_groups(nvars + 1);
  List indices(ncases);
  ListCollecter indices_collecter(indices);

  for (int i = 0; i < nvars; i++) {
    vec_groups[i] = Rf_allocVector(TYPEOF(visited_data[i]), ncases);
    copy_most_attributes(vec_groups[i], visited_data[i]);
  }

  vec_groups[nvars] = indices;
  groups_names[nvars] = ".rows";
  s->make(vec_groups, indices_collecter);

  // warn about NA in factors
  for (int i = 0; i < nvars; i++) {
    SEXP x = vec_groups[i];
    if (Rf_isFactor(x)) {
      IntegerVector xi(x);
      if (std::find(xi.begin(), xi.end(), NA_INTEGER) < xi.end()) {
        warningcall(R_NilValue, tfm::format("Factor `%s` contains implicit NA, consider using `forcats::fct_explicit_na`", CHAR(groups_names[i].get())));
      }
    }
  }

  vec_groups.attr("names") = groups_names;
  vec_groups.attr("row.names") = IntegerVector::create(NA_INTEGER, -ncases);
  vec_groups.attr("class") = classes_not_grouped() ;

  return vec_groups;
}