Example #1
0
DataFrame subset( DataFrame x, DataFrame y, const Index& indices_x, const Index& indices_y, CharacterVector by, CharacterVector classes ){
    CharacterVector x_columns = x.names() ;
    DataFrameVisitors visitors_x(x, x_columns) ;

    CharacterVector all_y_columns = y.names() ;
    CharacterVector y_columns = setdiff( all_y_columns, by ) ;
    JoinColumnSuffixer suffixer(x_columns, y_columns, by) ;

    DataFrameVisitors visitors_y(y, y_columns) ;

    int nrows = indices_x.size() ;
    int nv_x = visitors_x.size(), nv_y = visitors_y.size() ;
    List out(nv_x+nv_y);
    CharacterVector names(nv_x+nv_y) ;
    int k=0;
    for( ; k<nv_x; k++){
       out[k] = visitors_x.get(k)->subset(indices_x) ;
       names[k] = suffixer.get( x_columns[k], ".x" ) ;
    }
    for( int i=0; i<nv_y; i++, k++){
       out[k] = visitors_y.get(i)->subset(indices_y) ;
       names[k] = suffixer.get(y_columns[i], ".y" ) ;
    }
    out.attr("class") = classes ;
    set_rownames(out, nrows) ;
    out.names() = names ;

    SEXP vars = x.attr( "vars" ) ;
    if( !Rf_isNull(vars) )
        out.attr( "vars" ) = vars ;

    return (SEXP)out ;
}
Example #2
0
SEXP filter_not_grouped( DataFrame df, List args, const DataDots& dots){
    CharacterVector names = df.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    if( dots.single_env() ){
        Environment env = dots.envir(0) ;
        // a, b, c ->  a & b & c
        Shield<SEXP> call( and_calls( args, set ) ) ;

        // replace the symbols that are in the data frame by vectors from the data frame
        // and evaluate the expression
        CallProxy proxy( (SEXP)call, df, env ) ;
        LogicalVector test = proxy.eval() ;
        check_filter_result(test, df.nrows());
        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    } else {
        int nargs = args.size() ;
        CallProxy first_proxy(args[0], df, dots.envir(0) ) ;
        LogicalVector test = first_proxy.eval() ;
        check_filter_result(test, df.nrows());

        for( int i=1; i<nargs; i++){
            LogicalVector test2 = CallProxy(args[i], df, dots.envir(i) ).eval() ;
            combine_and(test, test2) ;
        }

        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    }
}
Example #3
0
DataFrame filter_not_grouped( DataFrame df, const LazyDots& dots){
    CharacterVector names = df.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_installChar( names[i] ) ) ;
    }
    if( dots.single_env() ){
        Environment env = dots[0].env() ;
        // a, b, c ->  a & b & c
        Shield<SEXP> call( and_calls( dots, set, env ) ) ;

        // replace the symbols that are in the data frame by vectors from the data frame
        // and evaluate the expression
        CallProxy proxy( (SEXP)call, df, env ) ;
        LogicalVector test = check_filter_logical_result(proxy.eval()) ;

        if( test.size() == 1){
            if( test[0] == TRUE ){
                return df ;
            } else {
                return empty_subset(df, df.names(), classes_not_grouped()) ;
            }
        } else {
            check_filter_result(test, df.nrows());
            return subset(df, test, classes_not_grouped() ) ;
        }
    } else {
        int nargs = dots.size() ;

        Call call(dots[0].expr());
        CallProxy first_proxy(call, df, dots[0].env() ) ;
        LogicalVector test = check_filter_logical_result(first_proxy.eval()) ;
        if( test.size() == 1 ) {
            if( !test[0] ){
                return empty_subset(df, df.names(), classes_not_grouped() ) ;
            }
        } else {
            check_filter_result(test, df.nrows());
        }

        for( int i=1; i<nargs; i++){
            Rcpp::checkUserInterrupt() ;

            Call call( dots[i].expr() ) ;
            CallProxy proxy(call, df, dots[i].env() ) ;
            LogicalVector test2 = check_filter_logical_result(proxy.eval()) ;
            if( combine_and(test, test2) ){
                return empty_subset(df, df.names(), classes_not_grouped() ) ;
            }
        }

        DataFrame res = subset( df, test, classes_not_grouped() ) ;
        return res ;
    }
}
Example #4
0
// [[Rcpp::export]]
DataFrame semi_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  int n_y = y.nrows();
  // this will collect indices from rows in x that match rows in y
  std::vector<int> indices;
  for (int i = 0; i < n_y; i++) {
    // find a row in x that matches row i from y
    Map::iterator it = map.find(-i - 1);

    if (it != map.end()) {
      // collect the indices and remove them from the
      // map so that they are only found once.
      push_back(indices, it->second);

      map.erase(it);

    }
  }

  const DataFrame& out = subset(x, indices, x.names(), get_class(x));
  strip_index(out);
  return out;
}
Example #5
0
// [[Rcpp::export]]
SEXP distinct_impl(DataFrame df, CharacterVector vars, CharacterVector keep) {
  if (df.size() == 0)
    return df;

  // No vars means ungrouped data with keep_all = TRUE.
  if (vars.size() == 0)
    return df;

  check_valid_colnames(df);
  if (!vars.size()) {
    vars = df.names();
  }
  DataFrameVisitors visitors(df, vars);

  std::vector<int> indices;
  VisitorSetIndexSet<DataFrameVisitors> set(visitors);

  int n = df.nrows();
  for (int i=0; i<n; i++) {
    if (set.insert(i).second) {
      indices.push_back(i);
    }
  }

  return DataFrameSubsetVisitors(df, keep).subset(indices, df.attr("class"));
}
Example #6
0
// [[Rcpp::export]]
DataFrame anti_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  int n_y = y.nrows();
  // remove the rows in x that match
  for (int i = 0; i < n_y; i++) {
    Map::iterator it = map.find(-i - 1);
    if (it != map.end())
      map.erase(it);
  }

  // collect what's left
  std::vector<int> indices;
  for (Map::iterator it = map.begin(); it != map.end(); ++it)
    push_back(indices, it->second);

  const DataFrame& out = subset(x, indices, x.names(), get_class(x));
  strip_index(out);
  return out;
}
Example #7
0
// [[Rcpp::export]]
DataFrame semi_join_impl( DataFrame x, DataFrame y, CharacterVector by){
    typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map ;
    DataFrameJoinVisitors visitors(x, y, by) ;
    Map map(visitors);

    // train the map in terms of x
    train_push_back( map, x.nrows(), x.nrows() / 10) ;

    int n_y = y.nrows() ;
    // this will collect indices from rows in x that match rows in y
    std::vector<int> indices ;
    for( int i=0; i<n_y; i++){
        // find a row in x that matches row i from y
        Map::iterator it = map.find(-i-1) ;

        if( it != map.end() ){
            // collect the indices and remove them from the
            // map so that they are only found once.
            push_back( indices, it->second ) ;

            map.erase(it) ;

        }
    }

    return subset(x, indices, x.names(), x.attr("class") ) ;
}
Example #8
0
SEXP structure_mutate( Proxy& call_proxy, const DataFrame& df, const CharacterVector& results_names, CharacterVector classes){
    int n = call_proxy.nsubsets() ;
    
    List out(n) ;
    CharacterVector names(n) ;
    
    CharacterVector input_names = df.names() ;
    int ncolumns = df.size() ;
    int i=0 ;
    for( ; i<ncolumns; i++){
        out[i] = call_proxy.get_variable(input_names[i]) ;
        SET_NAMED( out[i], 2 );
        names[i] = input_names[i] ;
    }
    for( int k=0; i<n; k++ ){
        String name = results_names[k] ;
        
        if( ! any( input_names.begin(), input_names.end(), name.get_sexp() ) ){
            SEXP x   = call_proxy.get_variable( name ) ; 
            out[i]   = x ;
            SET_NAMED( out[i], 2 );
            names[i] = name ;
            i++ ;
        }
    }
    
    
    out.attr("class") = classes ;
    set_rownames( out, df.nrows() ) ;
    out.names() = names;
    
    return out ;    
}
Example #9
0
// [[Rcpp::export]]
SEXP filter_impl( DataFrame df, LazyDots dots){
    if( df.nrows() == 0 || Rf_isNull(df) ) {
        return df ;
    }
    check_valid_colnames(df) ;
    assert_all_white_list(df) ;

    if( dots.size() == 0 ) return df ;

    // special case
    if( dots.size() == 1 && TYPEOF(dots[0].expr()) == LGLSXP){
        LogicalVector what = dots[0].expr() ;
        if( what.size() == 1 ){
            if( what[0] == TRUE ){
                return df ;
            } else {
                return empty_subset( df, df.names(), is<GroupedDataFrame>(df) ? classes_grouped<GroupedDataFrame>() : classes_not_grouped() ) ;
            }
        }
    }
    if( is<GroupedDataFrame>( df ) ){
        return filter_grouped<GroupedDataFrame, LazyGroupedSubsets>( GroupedDataFrame(df), dots);
    } else if( is<RowwiseDataFrame>(df) ){
        return filter_grouped<RowwiseDataFrame, LazyRowwiseSubsets>( RowwiseDataFrame(df), dots);
    } else {
        return filter_not_grouped( df, dots ) ;
    }
}
// [[Rcpp::export]]
SEXP ex10(DataFrame input){
  CharacterVector names = input.names();
  for(int i = 0; i<names.size(); i++){
    cout << "names[" << i << "]=" << names[i] << "\n";
  }
  return(names);
}
Example #11
0
// [[Rcpp::export]]
DataFrame arrange_impl( DataFrame data, List args, DataDots dots ){
    int nargs = args.size() ;
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;
    Shelter<SEXP> __ ;

    for(int i=0; i<nargs; i++){
        SEXP call = args[i] ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;

        CallProxy call_proxy( is_desc ? CADR(call) : call, data, dots.envir(i)) ;
        variables[i] = __(call_proxy.eval()) ;
        if( Rf_length(variables[i]) != data.nrows() ){
            std::stringstream s ;
            s << "incorrect size ("
              << Rf_length(variables[i])
              << "), expecting :"
              << data.nrows() ;
            stop(s.str()) ;
        }
        ascending[i] = !is_desc ;
    }
    OrderVisitors o(variables,ascending, nargs) ;
    IntegerVector index = o.apply() ;

    DataFrameVisitors visitors( data, data.names() ) ;
    DataFrame res = visitors.subset(index, data.attr("class") ) ;
    return res;
}
Example #12
0
// [[Rcpp::export]]
List arrange_impl(DataFrame data, QuosureList quosures) {
  if (data.size() == 0) return data;
  check_valid_colnames(data);
  assert_all_white_list(data);

  if (quosures.size() == 0 || data.nrows() == 0) return data;

  int nargs = quosures.size();
  List variables(nargs);
  LogicalVector ascending(nargs);

  for (int i = 0; i < nargs; i++) {
    const NamedQuosure& quosure = quosures[i];

    Shield<SEXP> call_(quosure.expr());
    SEXP call = call_;
    bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call);

    CallProxy call_proxy(is_desc ? CADR(call) : call, data, quosure.env());

    Shield<SEXP> v(call_proxy.eval());
    if (!white_list(v)) {
      stop("cannot arrange column of class '%s'", get_single_class(v));
    }

    if (Rf_inherits(v, "data.frame")) {
      DataFrame df(v);
      int nr = df.nrows();
      if (nr != data.nrows()) {
        stop("data frame column with incompatible number of rows (%d), expecting : %d", nr, data.nrows());
      }
    } else if (Rf_isMatrix(v)) {
      stop("can't arrange by a matrix");
    } else {
      if (Rf_length(v) != data.nrows()) {
        stop("incorrect size (%d), expecting : %d", Rf_length(v), data.nrows());
      }
    }
    variables[i] = v;
    ascending[i] = !is_desc;
  }
  OrderVisitors o(variables, ascending, nargs);
  IntegerVector index = o.apply();

  DataFrameSubsetVisitors visitors(data, data.names());
  List res = visitors.subset(index, get_class(data));

  if (is<GroupedDataFrame>(data)) {
    // so that all attributes are recalculated (indices ... )
    // see the lazyness feature in GroupedDataFrame
    // if we don't do that, we get the values of the un-arranged data
    // set for free from subset (#1064)
    res.attr("labels") = R_NilValue;
    copy_vars(res, data);
    return GroupedDataFrame(res).data();
  }
  SET_ATTRIB(res, strip_group_attributes(res));
  return res;
}
Example #13
0
DataFrame build_index_cpp(DataFrame data) {
  SymbolVector vars(get_vars(data));
  const int nvars = vars.size();

  CharacterVector names = data.names();
  IntegerVector indx = vars.match_in_table(names);

  for (int i = 0; i < nvars; ++i) {
    int pos = indx[i];
    if (pos == NA_INTEGER) {
      stop("unknown column '%s' ", vars[i].get_utf8_cstring());
    }

    SEXP v = data[pos - 1];

    if (!white_list(v) || TYPEOF(v) == VECSXP) {
      stop(
        "cannot group column %s, of class '%s'",
        vars[i].get_utf8_cstring(),
        get_single_class(v));
    }
  }

  DataFrameVisitors visitors(data, vars);
  ChunkIndexMap map(visitors);

  train_push_back(map, data.nrows());

  DataFrame labels = DataFrameSubsetVisitors(data, vars).subset(map, "data.frame");
  int ngroups = labels.nrows();
  IntegerVector labels_order = OrderVisitors(labels).apply();

  labels = DataFrameSubsetVisitors(labels).subset(labels_order, "data.frame");

  List indices(ngroups);
  IntegerVector group_sizes = no_init(ngroups);
  int biggest_group = 0;

  ChunkIndexMap::const_iterator it = map.begin();
  std::vector<const std::vector<int>* > chunks(ngroups);
  for (int i = 0; i < ngroups; i++, ++it) {
    chunks[i] = &it->second;
  }

  for (int i = 0; i < ngroups; i++) {
    int idx = labels_order[i];
    const std::vector<int>& chunk = *chunks[idx];
    indices[i] = chunk;
    group_sizes[i] = chunk.size();
    biggest_group = std::max(biggest_group, (int)chunk.size());
  }

  data.attr("indices") = indices;
  data.attr("group_sizes") = group_sizes;
  data.attr("biggest_group_size") = biggest_group;
  data.attr("labels") = labels;
  set_class(data, CharacterVector::create("grouped_df", "tbl_df", "tbl", "data.frame"));
  return data;
}
// [[Rcpp::export]]
SEXP ex10_2(DataFrame input){
  BEGIN_RCPP
  List names = input.names();
  for(int i = 0; i<names.size(); i++){
    cout << "names[" << i << "]=" << as<string>(names[i]) << "\n"; }
  return(wrap(names));
  END_RCPP
}
Example #15
0
// [[Rcpp::export]]
DataFrame sort_impl( DataFrame data ){
    OrderVisitors o(data) ;
    IntegerVector index = o.apply() ;

    DataFrameVisitors visitors( data, data.names() ) ;
    DataFrame res = visitors.subset(index, "data.frame" ) ;
    return res;
}
// [[Rcpp::export]]
SEXP ex11_2(DataFrame input){
  BEGIN_RCPP
  List names = input.names();
  map<string, NumericVector> mapObj;
  for(int i = 0; i<names.size(); i++){
    mapObj.insert(pair<string, NumericVector>(as<string>(names[i]),input(i)));
  }
  return(wrap(mapObj));
  END_RCPP
}
// [[Rcpp::export]]
SEXP ex10_1(DataFrame input){
  BEGIN_RCPP
  CharacterVector names = input.names();
  // vector<string> str_names = as<string>(names); // error
  string str_names = as<string>(names); // names must have one element.
  for(int i = 0; i<names.size(); i++){
    cout << "names[" << i << "]=" << names[i] << "\n";
  }
  return(wrap(str_names));
  END_RCPP
}
// [[Rcpp::export]]
SEXP ex10_3(DataFrame input){
  BEGIN_RCPP
  List names = input.names();
  vector<string> strVec;
  for(int i = 0; i<names.size(); i++){
    string s = as<string>(names[i]);
    strVec.push_back(s);
  }
  return(wrap(strVec));
  END_RCPP
}
// [[Rcpp::export]]
SEXP ex12(DataFrame input, CharacterVector columnName, int rowIndex, double replace){
  BEGIN_RCPP
  List names = input.names();
  map<string, NumericVector> mapObj;
  for(int i = 0; i<names.size(); i++){
    mapObj.insert(pair<string, NumericVector>(as<string>(names[i]),input(i)));
  }
  ex12helper(mapObj, as<string>(columnName), rowIndex, replace);
  return(wrap(mapObj));
  END_RCPP
}
// [[Rcpp::export]]
SEXP ex13_2(DataFrame input, CharacterVector columnName, double replace){
  BEGIN_RCPP
  List names = input.names();
  List mapObj = as<List>(input);
  // all rows
  for(int i=0; i<input.nrows(); i++){
    ex13helper(mapObj, as<string>(columnName), i, replace);
  }
  return(wrap(mapObj));
  END_RCPP
}
Example #21
0
SEXP filter_not_grouped( DataFrame df, List args, Environment env){
    // a, b, c ->  a & b & c
    Language call = and_calls( args ) ;
    
    // replace the symbols that are in the data frame by vectors from the data frame
    // and evaluate the expression
    CallProxy proxy( call, df, env ) ;
    LogicalVector test = proxy.eval() ;
    
    DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
    return res ;
}
Example #22
0
// [[Rcpp::export]]
DataFrame union_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(x, y, x.names() ) ;
    Set set(visitors);

    train_insert( set, x.nrows() ) ;
    train_insert_right( set, y.nrows() ) ;

    return visitors.subset( set, x.attr("class") ) ;
}
// [[Rcpp::export]]
SEXP ex13(DataFrame input, CharacterVector columnName, double replace){
  BEGIN_RCPP
  List names = input.names();
  map<string, NumericVector> mapObj;
  for(int i = 0; i<names.size(); i++){
    mapObj.insert(pair<string, NumericVector>(as<string>(names[i]),input(i)));
  }
  // all rows
  for(int i=0; i<input.nrows(); i++){
    ex13helper(wrap(mapObj), as<string>(columnName), i, replace);
  }
  return(wrap(mapObj));
  END_RCPP
}
Example #24
0
// [[Rcpp::export]]
DataFrame build_index_cpp( DataFrame data ){
    CharacterVector vars = Rf_getAttrib( data.attr( "vars" ), R_NamesSymbol ) ;
    
    DataFrameVisitors visitors(data, vars) ;
    ChunkIndexMap map( visitors ) ;
    train_push_back( map, data.nrows() ) ;
    
    DataFrame labels = visitors.subset( map, "data.frame") ;
    int ngroups = labels.nrows() ;
    
    OrderVisitors order_labels( labels, vars ) ;
    IntegerVector orders = order_labels.apply() ;
    
    std::vector< const std::vector<int>* > chunks(ngroups) ;
    ChunkIndexMap::const_iterator it = map.begin() ;
    for( int i=0; i<ngroups; i++, ++it){
        chunks[ i ] = &it->second ;
    }
    IntegerVector group_sizes = no_init( ngroups );
    int biggest_group = 0 ;
    std::vector<int> indices ;
    indices.reserve( data.nrows() );
    for( int i=0; i<ngroups; i++){
        const std::vector<int>& chunk = *chunks[orders[i]] ;
        push_back( indices, chunk ) ;
        biggest_group = std::max( biggest_group, (int)chunk.size() );
        group_sizes[i] = chunk.size() ;
    }
    
    DataFrameVisitors all_variables_visitors(data, data.names() ) ;
    data = all_variables_visitors.subset( indices, classes_grouped() ) ;
    
    // TODO: we own labels, so perhaps we can do an inplace sort, 
    //       to reuse its memory instead of creating a new data frame
    DataFrameVisitors labels_visitors( labels, vars) ;
    
    labels = labels_visitors.subset( orders, "data.frame" ) ;
    labels.attr( "vars" ) = R_NilValue ;
    
    data.attr( "group_sizes") = group_sizes ;
    data.attr( "biggest_group_size" ) = biggest_group ;
    data.attr( "labels" ) = labels ;
    return data ;
}
Example #25
0
// [[Rcpp::export]]
IntegerVector match_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(y, x, x.names() ) ;
    Set set(visitors);

    train_insert( set, y.nrows() ) ;

    int n_x = x.nrows() ;
    IntegerVector res = no_init( n_x );
    for( int i=0; i<n_x; i++) {
        Set::iterator it = set.find( -i-1 );
        res[i] = ( it == set.end() ) ? NA_INTEGER : (*it+1) ;
    }

    return res ;
}
Example #26
0
File: bind.cpp Project: alyst/dplyr
List cbind__impl( Dots dots ){
  int n = dots.size() ;
  
  // first check that the number of rows is the same
  DataFrame df = dots[0] ;
  int nrows = df.nrows() ;
  int nv = df.size() ;
  for( int i=1; i<n; i++){
    DataFrame current = dots[i] ;
    if( current.nrows() != nrows ){
      std::stringstream ss ;
      ss << "incompatible number of rows (" 
         << current.size()
         << ", expecting "
         << nrows 
      ;
      stop( ss.str() ) ;
    }
    nv += current.size() ;
  }
  
  // collect columns
  List out(nv) ;
  CharacterVector out_names(nv) ;
  
  // then do the subsequent dfs
  for( int i=0, k=0 ; i<n; i++){
      Rcpp::checkUserInterrupt() ;
    
      DataFrame current = dots[i] ;
      CharacterVector current_names = current.names() ;
      int nc = current.size() ;
      for( int j=0; j<nc; j++, k++){
          out[k] = shared_SEXP(current[j]) ;
          out_names[k] = current_names[j] ;
      }
  }
  out.names() = out_names ;
  set_rownames( out, nrows ) ;
  out.attr( "class") = "data.frame" ;
  return out ;
}
Example #27
0
// [[Rcpp::export]]
void assert_all_allow_list(const DataFrame& data) {
  // checking variables are on the allow list
  int nc = data.size();
  for (int i = 0; i < nc; i++) {
    if (!allow_list(data[i])) {
      SymbolVector names = data.names();
      const SymbolString& name_i = names[i];
      SEXP v = data[i];

      SEXP klass = Rf_getAttrib(v, R_ClassSymbol);
      if (!Rf_isNull(klass)) {
        bad_col(name_i, "is of unsupported class {type}",
                _["type"] = get_single_class(v));
      }
      else {
        bad_col(name_i, "is of unsupported type {type}", _["type"] = Rf_type2char(TYPEOF(v)));
      }
    }
  }
}
Example #28
0
// [[Rcpp::export]]
void assert_all_white_list(const DataFrame& data) {
  // checking variables are on the white list
  int nc = data.size();
  for (int i=0; i<nc; i++) {
    if (!white_list(data[i])) {
      SymbolVector names = data.names();
      const SymbolString& name_i = names[i];
      SEXP v = data[i];

      SEXP klass = Rf_getAttrib(v, R_ClassSymbol);
      if (!Rf_isNull(klass)) {
        stop("column '%s' has unsupported class : %s",
             name_i.get_utf8_cstring() , get_single_class(v));
      }
      else {
        stop("column '%s' has unsupported type : %s",
             name_i.get_utf8_cstring() , Rf_type2char(TYPEOF(v)));
      }
    }
  }
}
Example #29
0
// [[Rcpp::export]]
DataFrame setdiff_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(y, x, y.names() ) ;
    Set set(visitors);

    train_insert( set, y.nrows() ) ;

    std::vector<int> indices ;

    int n_x = x.nrows() ;
    for( int i=0; i<n_x; i++) {
        if( !set.count(-i-1) ){
            set.insert(-i-1) ;
            indices.push_back(-i-1) ;
        }
    }

    return visitors.subset( indices, x.attr("class") ) ;
}
Example #30
0
// [[Rcpp::export]]
DataFrame intersect_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(x, y, x.names() ) ;
    Set set(visitors);

    train_insert( set, x.nrows() ) ;

    std::vector<int> indices ;
    int n_y = y.nrows() ;
    for( int i=0; i<n_y; i++) {
        Set::iterator it = set.find( -i-1 ) ;
        if( it != set.end() ){
            indices.push_back(*it) ;
            set.erase(it) ;
        }
    }

    return visitors.subset( indices, x.attr("class") ) ;
}