C++ (Cpp) DataFrame::names Examples

Programming Language: C++ (Cpp)

Class/Type: DataFrame

Method/Function: names

Examples at hotexamples.com: 30

The `DataFrame.names` function is a method in C++ that is used to retrieve the names of the columns in a DataFrame object. This function returns a vector of strings, where each element represents the name of a column in the DataFrame. It is commonly used to access column names for further data manipulation or analysis.

C++ (Cpp) DataFrame::names - 30 examples found. These are the top rated real world C++ (Cpp) examples of DataFrame::names extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

attr(30)

nrows(30)

names(30)

getNumDataVectors(5)

setTotalDataIn(4)

setTotalErrorsIn(4)

setTotalDataOut(4)

setTotalPacketsOut(4)

setTotalPacketsIn(4)

setTotalDropsIn(4)

setTotalErrorsOut(4)

getTotalDataOut(3)

getTotalDataIn(3)

getTotalDropsIn(3)

getTotalDropsOut(3)

getTotalErrorsOut(3)

getTotalPacketsIn(3)

getTotalPacketsOut(3)

getTotalErrorsIn(3)

getNumFactors(3)

isNominal(3)

getDataElement(3)

matrix(2)

getLabel(2)

getDataVector(2)

getCMD(2)

findBestFeature(2)

ncol(2)

getTrainingLabel(2)

setHeader(2)

setDeviceName(1)

setFactorLabels(1)

addDataVector(1)

setFooter(1)

setIpV4(1)

setTimeStampMicroseconds(1)

setCMD(1)

setTimeStampSeconds(1)

setValid(1)

sortIndicesOnFactorValue(1)

setData(1)

isDataSetPure(1)

selectRandomFactors(1)

getSim(1)

begin(1)

computeBandwidthByFactor(1)

copy(1)

end(1)

getFactorLabelFromIndex(1)

getInstance(1)

Example #1

Show file

File: dplyr.cpp Project: kevinushey/dplyr

DataFrame subset( DataFrame x, DataFrame y, const Index& indices_x, const Index& indices_y, CharacterVector by, CharacterVector classes ){
    CharacterVector x_columns = x.names() ;
    DataFrameVisitors visitors_x(x, x_columns) ;

    CharacterVector all_y_columns = y.names() ;
    CharacterVector y_columns = setdiff( all_y_columns, by ) ;
    JoinColumnSuffixer suffixer(x_columns, y_columns, by) ;

    DataFrameVisitors visitors_y(y, y_columns) ;

    int nrows = indices_x.size() ;
    int nv_x = visitors_x.size(), nv_y = visitors_y.size() ;
    List out(nv_x+nv_y);
    CharacterVector names(nv_x+nv_y) ;
    int k=0;
    for( ; k<nv_x; k++){
       out[k] = visitors_x.get(k)->subset(indices_x) ;
       names[k] = suffixer.get( x_columns[k], ".x" ) ;
    }
    for( int i=0; i<nv_y; i++, k++){
       out[k] = visitors_y.get(i)->subset(indices_y) ;
       names[k] = suffixer.get(y_columns[i], ".y" ) ;
    }
    out.attr("class") = classes ;
    set_rownames(out, nrows) ;
    out.names() = names ;

    SEXP vars = x.attr( "vars" ) ;
    if( !Rf_isNull(vars) )
        out.attr( "vars" ) = vars ;

    return (SEXP)out ;
}

Example #2

Show file

File: dplyr.cpp Project: kevinushey/dplyr

SEXP filter_not_grouped( DataFrame df, List args, const DataDots& dots){
    CharacterVector names = df.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    if( dots.single_env() ){
        Environment env = dots.envir(0) ;
        // a, b, c ->  a & b & c
        Shield<SEXP> call( and_calls( args, set ) ) ;

        // replace the symbols that are in the data frame by vectors from the data frame
        // and evaluate the expression
        CallProxy proxy( (SEXP)call, df, env ) ;
        LogicalVector test = proxy.eval() ;
        check_filter_result(test, df.nrows());
        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    } else {
        int nargs = args.size() ;
        CallProxy first_proxy(args[0], df, dots.envir(0) ) ;
        LogicalVector test = first_proxy.eval() ;
        check_filter_result(test, df.nrows());

        for( int i=1; i<nargs; i++){
            LogicalVector test2 = CallProxy(args[i], df, dots.envir(i) ).eval() ;
            combine_and(test, test2) ;
        }

        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    }
}

Example #3

Show file

File: filter.cpp Project: regine-adhoc/dplyr

DataFrame filter_not_grouped( DataFrame df, const LazyDots& dots){
    CharacterVector names = df.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_installChar( names[i] ) ) ;
    }
    if( dots.single_env() ){
        Environment env = dots[0].env() ;
        // a, b, c ->  a & b & c
        Shield<SEXP> call( and_calls( dots, set, env ) ) ;

        // replace the symbols that are in the data frame by vectors from the data frame
        // and evaluate the expression
        CallProxy proxy( (SEXP)call, df, env ) ;
        LogicalVector test = check_filter_logical_result(proxy.eval()) ;

        if( test.size() == 1){
            if( test[0] == TRUE ){
                return df ;
            } else {
                return empty_subset(df, df.names(), classes_not_grouped()) ;
            }
        } else {
            check_filter_result(test, df.nrows());
            return subset(df, test, classes_not_grouped() ) ;
        }
    } else {
        int nargs = dots.size() ;

        Call call(dots[0].expr());
        CallProxy first_proxy(call, df, dots[0].env() ) ;
        LogicalVector test = check_filter_logical_result(first_proxy.eval()) ;
        if( test.size() == 1 ) {
            if( !test[0] ){
                return empty_subset(df, df.names(), classes_not_grouped() ) ;
            }
        } else {
            check_filter_result(test, df.nrows());
        }

        for( int i=1; i<nargs; i++){
            Rcpp::checkUserInterrupt() ;

            Call call( dots[i].expr() ) ;
            CallProxy proxy(call, df, dots[i].env() ) ;
            LogicalVector test2 = check_filter_logical_result(proxy.eval()) ;
            if( combine_and(test, test2) ){
                return empty_subset(df, df.names(), classes_not_grouped() ) ;
            }
        }

        DataFrame res = subset( df, test, classes_not_grouped() ) ;
        return res ;
    }
}

Example #4

Show file

File: join_exports.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
DataFrame semi_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  int n_y = y.nrows();
  // this will collect indices from rows in x that match rows in y
  std::vector<int> indices;
  for (int i = 0; i < n_y; i++) {
    // find a row in x that matches row i from y
    Map::iterator it = map.find(-i - 1);

    if (it != map.end()) {
      // collect the indices and remove them from the
      // map so that they are only found once.
      push_back(indices, it->second);

      map.erase(it);

    }
  }

  const DataFrame& out = subset(x, indices, x.names(), get_class(x));
  strip_index(out);
  return out;
}

Example #5

Show file

File: distinct.cpp Project: LCHansson/dplyr

// [[Rcpp::export]]
SEXP distinct_impl(DataFrame df, CharacterVector vars, CharacterVector keep) {
  if (df.size() == 0)
    return df;

  // No vars means ungrouped data with keep_all = TRUE.
  if (vars.size() == 0)
    return df;

  check_valid_colnames(df);
  if (!vars.size()) {
    vars = df.names();
  }
  DataFrameVisitors visitors(df, vars);

  std::vector<int> indices;
  VisitorSetIndexSet<DataFrameVisitors> set(visitors);

  int n = df.nrows();
  for (int i=0; i<n; i++) {
    if (set.insert(i).second) {
      indices.push_back(i);
    }
  }

  return DataFrameSubsetVisitors(df, keep).subset(indices, df.attr("class"));
}

Example #6

Show file

File: join_exports.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
DataFrame anti_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  int n_y = y.nrows();
  // remove the rows in x that match
  for (int i = 0; i < n_y; i++) {
    Map::iterator it = map.find(-i - 1);
    if (it != map.end())
      map.erase(it);
  }

  // collect what's left
  std::vector<int> indices;
  for (Map::iterator it = map.begin(); it != map.end(); ++it)
    push_back(indices, it->second);

  const DataFrame& out = subset(x, indices, x.names(), get_class(x));
  strip_index(out);
  return out;
}

Example #7

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame semi_join_impl( DataFrame x, DataFrame y, CharacterVector by){
    typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map ;
    DataFrameJoinVisitors visitors(x, y, by) ;
    Map map(visitors);

    // train the map in terms of x
    train_push_back( map, x.nrows(), x.nrows() / 10) ;

    int n_y = y.nrows() ;
    // this will collect indices from rows in x that match rows in y
    std::vector<int> indices ;
    for( int i=0; i<n_y; i++){
        // find a row in x that matches row i from y
        Map::iterator it = map.find(-i-1) ;

        if( it != map.end() ){
            // collect the indices and remove them from the
            // map so that they are only found once.
            push_back( indices, it->second ) ;

            map.erase(it) ;

        }
    }

    return subset(x, indices, x.names(), x.attr("class") ) ;
}

Example #8

Show file

File: dplyr.cpp Project: jimhester/dplyr

SEXP structure_mutate( Proxy& call_proxy, const DataFrame& df, const CharacterVector& results_names, CharacterVector classes){
    int n = call_proxy.nsubsets() ;
    
    List out(n) ;
    CharacterVector names(n) ;
    
    CharacterVector input_names = df.names() ;
    int ncolumns = df.size() ;
    int i=0 ;
    for( ; i<ncolumns; i++){
        out[i] = call_proxy.get_variable(input_names[i]) ;
        SET_NAMED( out[i], 2 );
        names[i] = input_names[i] ;
    }
    for( int k=0; i<n; k++ ){
        String name = results_names[k] ;
        
        if( ! any( input_names.begin(), input_names.end(), name.get_sexp() ) ){
            SEXP x   = call_proxy.get_variable( name ) ; 
            out[i]   = x ;
            SET_NAMED( out[i], 2 );
            names[i] = name ;
            i++ ;
        }
    }
    
    
    out.attr("class") = classes ;
    set_rownames( out, df.nrows() ) ;
    out.names() = names;
    
    return out ;    
}

Example #9

Show file

File: filter.cpp Project: regine-adhoc/dplyr

// [[Rcpp::export]]
SEXP filter_impl( DataFrame df, LazyDots dots){
    if( df.nrows() == 0 || Rf_isNull(df) ) {
        return df ;
    }
    check_valid_colnames(df) ;
    assert_all_white_list(df) ;

    if( dots.size() == 0 ) return df ;

    // special case
    if( dots.size() == 1 && TYPEOF(dots[0].expr()) == LGLSXP){
        LogicalVector what = dots[0].expr() ;
        if( what.size() == 1 ){
            if( what[0] == TRUE ){
                return df ;
            } else {
                return empty_subset( df, df.names(), is<GroupedDataFrame>(df) ? classes_grouped<GroupedDataFrame>() : classes_not_grouped() ) ;
            }
        }
    }
    if( is<GroupedDataFrame>( df ) ){
        return filter_grouped<GroupedDataFrame, LazyGroupedSubsets>( GroupedDataFrame(df), dots);
    } else if( is<RowwiseDataFrame>(df) ){
        return filter_grouped<RowwiseDataFrame, LazyRowwiseSubsets>( RowwiseDataFrame(df), dots);
    } else {
        return filter_not_grouped( df, dots ) ;
    }
}

Example #10

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex10(DataFrame input){
  CharacterVector names = input.names();
  for(int i = 0; i<names.size(); i++){
    cout << "names[" << i << "]=" << names[i] << "\n";
  }
  return(names);
}

Example #11

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame arrange_impl( DataFrame data, List args, DataDots dots ){
    int nargs = args.size() ;
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;
    Shelter<SEXP> __ ;

    for(int i=0; i<nargs; i++){
        SEXP call = args[i] ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;

        CallProxy call_proxy( is_desc ? CADR(call) : call, data, dots.envir(i)) ;
        variables[i] = __(call_proxy.eval()) ;
        if( Rf_length(variables[i]) != data.nrows() ){
            std::stringstream s ;
            s << "incorrect size ("
              << Rf_length(variables[i])
              << "), expecting :"
              << data.nrows() ;
            stop(s.str()) ;
        }
        ascending[i] = !is_desc ;
    }
    OrderVisitors o(variables,ascending, nargs) ;
    IntegerVector index = o.apply() ;

    DataFrameVisitors visitors( data, data.names() ) ;
    DataFrame res = visitors.subset(index, data.attr("class") ) ;
    return res;
}

Example #12

Show file

File: arrange.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
List arrange_impl(DataFrame data, QuosureList quosures) {
  if (data.size() == 0) return data;
  check_valid_colnames(data);
  assert_all_white_list(data);

  if (quosures.size() == 0 || data.nrows() == 0) return data;

  int nargs = quosures.size();
  List variables(nargs);
  LogicalVector ascending(nargs);

  for (int i = 0; i < nargs; i++) {
    const NamedQuosure& quosure = quosures[i];

    Shield<SEXP> call_(quosure.expr());
    SEXP call = call_;
    bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call);

    CallProxy call_proxy(is_desc ? CADR(call) : call, data, quosure.env());

    Shield<SEXP> v(call_proxy.eval());
    if (!white_list(v)) {
      stop("cannot arrange column of class '%s'", get_single_class(v));
    }

    if (Rf_inherits(v, "data.frame")) {
      DataFrame df(v);
      int nr = df.nrows();
      if (nr != data.nrows()) {
        stop("data frame column with incompatible number of rows (%d), expecting : %d", nr, data.nrows());
      }
    } else if (Rf_isMatrix(v)) {
      stop("can't arrange by a matrix");
    } else {
      if (Rf_length(v) != data.nrows()) {
        stop("incorrect size (%d), expecting : %d", Rf_length(v), data.nrows());
      }
    }
    variables[i] = v;
    ascending[i] = !is_desc;
  }
  OrderVisitors o(variables, ascending, nargs);
  IntegerVector index = o.apply();

  DataFrameSubsetVisitors visitors(data, data.names());
  List res = visitors.subset(index, get_class(data));

  if (is<GroupedDataFrame>(data)) {
    // so that all attributes are recalculated (indices ... )
    // see the lazyness feature in GroupedDataFrame
    // if we don't do that, we get the values of the un-arranged data
    // set for free from subset (#1064)
    res.attr("labels") = R_NilValue;
    copy_vars(res, data);
    return GroupedDataFrame(res).data();
  }
  SET_ATTRIB(res, strip_group_attributes(res));
  return res;
}

Example #13

Show file

File: group_indices.cpp Project: ijlyttle/dplyr

DataFrame build_index_cpp(DataFrame data) {
  SymbolVector vars(get_vars(data));
  const int nvars = vars.size();

  CharacterVector names = data.names();
  IntegerVector indx = vars.match_in_table(names);

  for (int i = 0; i < nvars; ++i) {
    int pos = indx[i];
    if (pos == NA_INTEGER) {
      stop("unknown column '%s' ", vars[i].get_utf8_cstring());
    }

    SEXP v = data[pos - 1];

    if (!white_list(v) || TYPEOF(v) == VECSXP) {
      stop(
        "cannot group column %s, of class '%s'",
        vars[i].get_utf8_cstring(),
        get_single_class(v));
    }
  }

  DataFrameVisitors visitors(data, vars);
  ChunkIndexMap map(visitors);

  train_push_back(map, data.nrows());

  DataFrame labels = DataFrameSubsetVisitors(data, vars).subset(map, "data.frame");
  int ngroups = labels.nrows();
  IntegerVector labels_order = OrderVisitors(labels).apply();

  labels = DataFrameSubsetVisitors(labels).subset(labels_order, "data.frame");

  List indices(ngroups);
  IntegerVector group_sizes = no_init(ngroups);
  int biggest_group = 0;

  ChunkIndexMap::const_iterator it = map.begin();
  std::vector<const std::vector<int>* > chunks(ngroups);
  for (int i = 0; i < ngroups; i++, ++it) {
    chunks[i] = &it->second;
  }

  for (int i = 0; i < ngroups; i++) {
    int idx = labels_order[i];
    const std::vector<int>& chunk = *chunks[idx];
    indices[i] = chunk;
    group_sizes[i] = chunk.size();
    biggest_group = std::max(biggest_group, (int)chunk.size());
  }

  data.attr("indices") = indices;
  data.attr("group_sizes") = group_sizes;
  data.attr("biggest_group_size") = biggest_group;
  data.attr("labels") = labels;
  set_class(data, CharacterVector::create("grouped_df", "tbl_df", "tbl", "data.frame"));
  return data;
}

Example #14

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex10_2(DataFrame input){
  BEGIN_RCPP
  List names = input.names();
  for(int i = 0; i<names.size(); i++){
    cout << "names[" << i << "]=" << as<string>(names[i]) << "\n"; }
  return(wrap(names));
  END_RCPP
}

Example #15

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame sort_impl( DataFrame data ){
    OrderVisitors o(data) ;
    IntegerVector index = o.apply() ;

    DataFrameVisitors visitors( data, data.names() ) ;
    DataFrame res = visitors.subset(index, "data.frame" ) ;
    return res;
}

Example #16

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex11_2(DataFrame input){
  BEGIN_RCPP
  List names = input.names();
  map<string, NumericVector> mapObj;
  for(int i = 0; i<names.size(); i++){
    mapObj.insert(pair<string, NumericVector>(as<string>(names[i]),input(i)));
  }
  return(wrap(mapObj));
  END_RCPP
}

Example #17

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex10_1(DataFrame input){
  BEGIN_RCPP
  CharacterVector names = input.names();
  // vector<string> str_names = as<string>(names); // error
  string str_names = as<string>(names); // names must have one element.
  for(int i = 0; i<names.size(); i++){
    cout << "names[" << i << "]=" << names[i] << "\n";
  }
  return(wrap(str_names));
  END_RCPP
}

Example #18

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex10_3(DataFrame input){
  BEGIN_RCPP
  List names = input.names();
  vector<string> strVec;
  for(int i = 0; i<names.size(); i++){
    string s = as<string>(names[i]);
    strVec.push_back(s);
  }
  return(wrap(strVec));
  END_RCPP
}

Example #19

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex12(DataFrame input, CharacterVector columnName, int rowIndex, double replace){
  BEGIN_RCPP
  List names = input.names();
  map<string, NumericVector> mapObj;
  for(int i = 0; i<names.size(); i++){
    mapObj.insert(pair<string, NumericVector>(as<string>(names[i]),input(i)));
  }
  ex12helper(mapObj, as<string>(columnName), rowIndex, replace);
  return(wrap(mapObj));
  END_RCPP
}

Example #20

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex13_2(DataFrame input, CharacterVector columnName, double replace){
  BEGIN_RCPP
  List names = input.names();
  List mapObj = as<List>(input);
  // all rows
  for(int i=0; i<input.nrows(); i++){
    ex13helper(mapObj, as<string>(columnName), i, replace);
  }
  return(wrap(mapObj));
  END_RCPP
}

Example #21

Show file

File: dplyr.cpp Project: jimhester/dplyr

SEXP filter_not_grouped( DataFrame df, List args, Environment env){
    // a, b, c ->  a & b & c
    Language call = and_calls( args ) ;
    
    // replace the symbols that are in the data frame by vectors from the data frame
    // and evaluate the expression
    CallProxy proxy( call, df, env ) ;
    LogicalVector test = proxy.eval() ;
    
    DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
    return res ;
}

Example #22

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame union_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(x, y, x.names() ) ;
    Set set(visitors);

    train_insert( set, x.nrows() ) ;
    train_insert_right( set, y.nrows() ) ;

    return visitors.subset( set, x.attr("class") ) ;
}

Example #23

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex13(DataFrame input, CharacterVector columnName, double replace){
  BEGIN_RCPP
  List names = input.names();
  map<string, NumericVector> mapObj;
  for(int i = 0; i<names.size(); i++){
    mapObj.insert(pair<string, NumericVector>(as<string>(names[i]),input(i)));
  }
  // all rows
  for(int i=0; i<input.nrows(); i++){
    ex13helper(wrap(mapObj), as<string>(columnName), i, replace);
  }
  return(wrap(mapObj));
  END_RCPP
}

Example #24

Show file

File: dplyr.cpp Project: jimhester/dplyr

// [[Rcpp::export]]
DataFrame build_index_cpp( DataFrame data ){
    CharacterVector vars = Rf_getAttrib( data.attr( "vars" ), R_NamesSymbol ) ;
    
    DataFrameVisitors visitors(data, vars) ;
    ChunkIndexMap map( visitors ) ;
    train_push_back( map, data.nrows() ) ;
    
    DataFrame labels = visitors.subset( map, "data.frame") ;
    int ngroups = labels.nrows() ;
    
    OrderVisitors order_labels( labels, vars ) ;
    IntegerVector orders = order_labels.apply() ;
    
    std::vector< const std::vector<int>* > chunks(ngroups) ;
    ChunkIndexMap::const_iterator it = map.begin() ;
    for( int i=0; i<ngroups; i++, ++it){
        chunks[ i ] = &it->second ;
    }
    IntegerVector group_sizes = no_init( ngroups );
    int biggest_group = 0 ;
    std::vector<int> indices ;
    indices.reserve( data.nrows() );
    for( int i=0; i<ngroups; i++){
        const std::vector<int>& chunk = *chunks[orders[i]] ;
        push_back( indices, chunk ) ;
        biggest_group = std::max( biggest_group, (int)chunk.size() );
        group_sizes[i] = chunk.size() ;
    }
    
    DataFrameVisitors all_variables_visitors(data, data.names() ) ;
    data = all_variables_visitors.subset( indices, classes_grouped() ) ;
    
    // TODO: we own labels, so perhaps we can do an inplace sort, 
    //       to reuse its memory instead of creating a new data frame
    DataFrameVisitors labels_visitors( labels, vars) ;
    
    labels = labels_visitors.subset( orders, "data.frame" ) ;
    labels.attr( "vars" ) = R_NilValue ;
    
    data.attr( "group_sizes") = group_sizes ;
    data.attr( "biggest_group_size" ) = biggest_group ;
    data.attr( "labels" ) = labels ;
    return data ;
}

Example #25

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
IntegerVector match_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(y, x, x.names() ) ;
    Set set(visitors);

    train_insert( set, y.nrows() ) ;

    int n_x = x.nrows() ;
    IntegerVector res = no_init( n_x );
    for( int i=0; i<n_x; i++) {
        Set::iterator it = set.find( -i-1 );
        res[i] = ( it == set.end() ) ? NA_INTEGER : (*it+1) ;
    }

    return res ;
}

Example #26

Show file

File: bind.cpp Project: alyst/dplyr

List cbind__impl( Dots dots ){
  int n = dots.size() ;
  
  // first check that the number of rows is the same
  DataFrame df = dots[0] ;
  int nrows = df.nrows() ;
  int nv = df.size() ;
  for( int i=1; i<n; i++){
    DataFrame current = dots[i] ;
    if( current.nrows() != nrows ){
      std::stringstream ss ;
      ss << "incompatible number of rows (" 
         << current.size()
         << ", expecting "
         << nrows 
      ;
      stop( ss.str() ) ;
    }
    nv += current.size() ;
  }
  
  // collect columns
  List out(nv) ;
  CharacterVector out_names(nv) ;
  
  // then do the subsequent dfs
  for( int i=0, k=0 ; i<n; i++){
      Rcpp::checkUserInterrupt() ;
    
      DataFrame current = dots[i] ;
      CharacterVector current_names = current.names() ;
      int nc = current.size() ;
      for( int j=0; j<nc; j++, k++){
          out[k] = shared_SEXP(current[j]) ;
          out_names[k] = current_names[j] ;
      }
  }
  out.names() = out_names ;
  set_rownames( out, nrows ) ;
  out.attr( "class") = "data.frame" ;
  return out ;
}

Example #27

Show file

File: utils.cpp Project: yutannihilation/dplyr

// [[Rcpp::export]]
void assert_all_allow_list(const DataFrame& data) {
  // checking variables are on the allow list
  int nc = data.size();
  for (int i = 0; i < nc; i++) {
    if (!allow_list(data[i])) {
      SymbolVector names = data.names();
      const SymbolString& name_i = names[i];
      SEXP v = data[i];

      SEXP klass = Rf_getAttrib(v, R_ClassSymbol);
      if (!Rf_isNull(klass)) {
        bad_col(name_i, "is of unsupported class {type}",
                _["type"] = get_single_class(v));
      }
      else {
        bad_col(name_i, "is of unsupported type {type}", _["type"] = Rf_type2char(TYPEOF(v)));
      }
    }
  }
}

Example #28

Show file

File: utils.cpp Project: pachevalier/dplyr

// [[Rcpp::export]]
void assert_all_white_list(const DataFrame& data) {
  // checking variables are on the white list
  int nc = data.size();
  for (int i=0; i<nc; i++) {
    if (!white_list(data[i])) {
      SymbolVector names = data.names();
      const SymbolString& name_i = names[i];
      SEXP v = data[i];

      SEXP klass = Rf_getAttrib(v, R_ClassSymbol);
      if (!Rf_isNull(klass)) {
        stop("column '%s' has unsupported class : %s",
             name_i.get_utf8_cstring() , get_single_class(v));
      }
      else {
        stop("column '%s' has unsupported type : %s",
             name_i.get_utf8_cstring() , Rf_type2char(TYPEOF(v)));
      }
    }
  }
}

Example #29

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame setdiff_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(y, x, y.names() ) ;
    Set set(visitors);

    train_insert( set, y.nrows() ) ;

    std::vector<int> indices ;

    int n_x = x.nrows() ;
    for( int i=0; i<n_x; i++) {
        if( !set.count(-i-1) ){
            set.insert(-i-1) ;
            indices.push_back(-i-1) ;
        }
    }

    return visitors.subset( indices, x.attr("class") ) ;
}

Example #30

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame intersect_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(x, y, x.names() ) ;
    Set set(visitors);

    train_insert( set, x.nrows() ) ;

    std::vector<int> indices ;
    int n_y = y.nrows() ;
    for( int i=0; i<n_y; i++) {
        Set::iterator it = set.find( -i-1 ) ;
        if( it != set.end() ){
            indices.push_back(*it) ;
            set.erase(it) ;
        }
    }

    return visitors.subset( indices, x.attr("class") ) ;
}