// [[Rcpp::export]] SEXP combine_all( List data ){ int nv = data.size() ; if( nv == 0 ) stop("combine_all needs at least one vector") ; // get the size of the output int n = 0 ; for( int i=0; i<nv; i++){ n += Rf_length(data[i]) ; } // collect Collecter* coll = collecter( data[0], n ) ; coll->collect( SlicingIndex(0, Rf_length(data[0])), data[0] ) ; int k = Rf_length(data[0]) ; for( int i=1; i<nv; i++){ SEXP current = data[i] ; int n_current= Rf_length(current) ; if( coll->compatible(current) ){ coll->collect( SlicingIndex(k, n_current), current ) ; } else if( coll->can_promote(current) ) { Collecter* new_coll = promote_collecter(current, n, coll) ; new_coll->collect( SlicingIndex(k, n_current), current ) ; new_coll->collect( SlicingIndex(0, k), coll->get() ) ; delete coll ; coll = new_coll ; } else { std::stringstream msg ; msg << "incompatible type at index " << (i+1) << " : " << get_single_class(current) << ", was collecting : " << get_single_class(coll->get()) ; stop( msg.str() ) ; } k += n_current ; } RObject out = coll->get() ; delete coll ; return out ; }
List rbind__impl( Dots dots ){ int ndata = dots.size() ; int n = 0 ; std::vector<DataFrameAble> chunks ; for( int i=0; i<ndata; i++) { chunks.push_back( DataFrameAble( dots[i] ) ); n += chunks[i].nrows() ; } pointer_vector<Collecter> columns ; std::vector<String> names ; int k=0 ; Function enc2native( "enc2native" ) ; for( int i=0; i<ndata; i++){ Rcpp::checkUserInterrupt() ; const DataFrameAble& df = chunks[i] ; if( !df.size() ) continue ; int nrows = df.nrows() ; CharacterVector df_names = enc2native(df.names()) ; for( int j=0; j<df.size(); j++){ SEXP source = df.get(j) ; String name = df_names[j] ; Collecter* coll = 0; size_t index = 0 ; for( ; index < names.size(); index++){ if( name == names[index] ){ coll = columns[index] ; break ; } } if( ! coll ){ coll = collecter( source, n ) ; columns.push_back( coll ); names.push_back(name) ; } if( coll->compatible(source) ){ // if the current source is compatible, collect coll->collect( SlicingIndex( k, nrows), source ) ; } else if( coll->can_promote(source) ) { // setup a new Collecter Collecter* new_collecter = promote_collecter(source, n, coll ) ; // import data from this chunk new_collecter->collect( SlicingIndex( k, nrows), source ) ; // import data from previous collecter new_collecter->collect( SlicingIndex(0, k), coll->get() ) ; // dispose the previous collecter and keep the new one. delete coll ; columns[index] = new_collecter ; } else if( all_na(source) ) { // do nothing, the collecter already initialized data with the // right NA } else if( coll->is_logical_all_na() ) { Collecter* new_collecter = collecter( source, n ) ; new_collecter->collect( SlicingIndex(k, nrows), source ) ; delete coll ; columns[index] = new_collecter ; } else { std::string column_name(name) ; stop( "incompatible type (data index: %d, column: '%s', was collecting: %s (%s), incompatible with data of type: %s", (i+1), column_name, coll->describe(), DEMANGLE(*coll), get_single_class(source) ); } } k += nrows ; } int nc = columns.size() ; List out(nc) ; CharacterVector out_names(nc) ; for( int i=0; i<nc; i++){ out[i] = columns[i]->get() ; out_names[i] = names[i] ; } out.attr( "names" ) = out_names ; set_rownames( out, n ); out.attr( "class" ) = classes_not_grouped() ; return out ; }
//' @export //' @rdname rbind // [[Rcpp::export]] List rbind_all( ListOf<DataFrame> dots ){ int ndata = dots.size() ; int n = 0 ; for( int i=0; i<ndata; i++) n += dots[i].nrows() ; std::vector<Collecter*> columns ; std::vector<String> names ; int k=0 ; for( int i=0; i<ndata; i++){ DataFrame df = dots[i] ; DataFrameVisitors visitors( df, df.names() ) ; int nrows = df.nrows() ; CharacterVector df_names = df.names() ; for( int j=0; j<df.size(); j++){ SEXP source = df[j] ; String name = df_names[j] ; Collecter* coll = 0; size_t index = 0 ; for( ; index < names.size(); index++){ if( name == names[index] ){ coll = columns[index] ; break ; } } if( ! coll ){ coll = collecter( source, n ) ; columns.push_back( coll ); names.push_back(name) ; } if( coll->compatible(source) ){ // if the current source is compatible, collect coll->collect( SlicingIndex( k, nrows), source ) ; } else if( coll->can_promote(source) ) { // setup a new Collecter Collecter* new_collecter = promote_collecter(source, n, coll ) ; // import data from this chunk new_collecter->collect( SlicingIndex( k, nrows), source ) ; // import data from previous collecter new_collecter->collect( SlicingIndex(0, k), coll->get() ) ; // dispose the previous collecter and keep the new one. delete coll ; columns[index] = new_collecter ; } else { std::stringstream msg ; std::string column_name(name) ; msg << "incompatible type (" << "data index: " << (i+1) << ", column: '" << column_name << "', was collecting: " << coll->describe() << " (" << DEMANGLE(*coll) << ")" << ", incompatible with data of type: " << type_name(source) ; stop( msg.str() ) ; } } k += nrows ; } int nc = columns.size() ; List out(nc) ; CharacterVector out_names(nc) ; for( int i=0; i<nc; i++){ out[i] = columns[i]->get() ; out_names[i] = names[i] ; } out.attr( "names" ) = out_names ; delete_all( columns ) ; set_rownames( out, n ); out.attr( "class" ) = "data.frame" ; return out ; }
List rbind__impl( Dots dots, SEXP id = R_NilValue ){ int ndata = dots.size() ; int n = 0 ; DataFrameAbleVector chunks ; std::vector<int> df_nrows ; int k=0 ; for( int i=0; i<ndata; i++) { SEXP obj = dots[i] ; if( Rf_isNull(obj) ) continue ; chunks.push_back( obj ) ; int nrows = chunks[k].nrows() ; df_nrows.push_back(nrows) ; n += nrows ; k++ ; } ndata = chunks.size() ; pointer_vector<Collecter> columns ; std::vector<String> names ; k=0 ; Function enc2native( "enc2native" ) ; for( int i=0; i<ndata; i++){ Rcpp::checkUserInterrupt() ; const DataFrameAble& df = chunks[i] ; if( !df.size() ) continue ; int nrows = df.nrows() ; CharacterVector df_names = enc2native(df.names()) ; for( int j=0; j<df.size(); j++){ SEXP source = df.get(j) ; String name = df_names[j] ; Collecter* coll = 0; size_t index = 0 ; for( ; index < names.size(); index++){ if( name == names[index] ){ coll = columns[index] ; break ; } } if( ! coll ){ coll = collecter( source, n ) ; columns.push_back( coll ); names.push_back(name) ; } if( coll->compatible(source) ){ // if the current source is compatible, collect coll->collect( SlicingIndex( k, nrows), source ) ; } else if( coll->can_promote(source) ) { // setup a new Collecter Collecter* new_collecter = promote_collecter(source, n, coll ) ; // import data from this chunk new_collecter->collect( SlicingIndex( k, nrows), source ) ; // import data from previous collecter new_collecter->collect( SlicingIndex(0, k), coll->get() ) ; // dispose the previous collecter and keep the new one. delete coll ; columns[index] = new_collecter ; } else if( all_na(source) ) { // do nothing, the collecter already initialized data with the // right NA } else if( coll->is_logical_all_na() ) { Collecter* new_collecter = collecter( source, n ) ; new_collecter->collect( SlicingIndex(k, nrows), source ) ; delete coll ; columns[index] = new_collecter ; } else { std::string column_name(name) ; stop( "Can not automatically convert from %s to %s in column \"%s\".", coll->describe(), get_single_class(source), column_name ) ; } } k += nrows ; } int nc = columns.size() ; int has_id = Rf_isNull(id) ? 0 : 1; List out(nc + has_id) ; CharacterVector out_names(nc + has_id) ; for( int i=0; i<nc; i++){ out[i + has_id] = columns[i]->get() ; out_names[i + has_id] = names[i] ; } // Add vector of identifiers if .id is supplied if (!Rf_isNull(id)) { CharacterVector df_names = dots.names() ; CharacterVector id_col = no_init(n) ; CharacterVector::iterator it = id_col.begin() ; for (int i=0; i<ndata; ++i) { std::fill( it, it + df_nrows[i], df_names[i] ) ; it += df_nrows[i] ; } out[0] = id_col ; out_names[0] = Rcpp::as<std::string>(id) ; } out.attr( "names" ) = out_names ; set_rownames( out, n ) ; // infer the classes and extra info (groups, etc ) from the first (#1692) if( ndata ){ const DataFrameAble& first = chunks[0] ; if( first.is_dataframe() ){ DataFrame df = first.get() ; out.attr("class") = df.attr("class") ; if( df.inherits("grouped_df") ){ out.attr("vars") = df.attr("vars") ; out = GroupedDataFrame(out).data() ; } } else { out.attr( "class" ) = classes_not_grouped() ; } } else { out.attr( "class" ) = classes_not_grouped() ; } return out ; }