Пример #1
0
//' @export
//' @rdname rbind
// [[Rcpp::export]]
List rbind_all( ListOf<DataFrame> dots ){
    int ndata = dots.size() ;
    int n = 0 ;
    for( int i=0; i<ndata; i++) n += dots[i].nrows() ;

    std::vector<Collecter*> columns ;
    std::vector<String> names ;
    int k=0 ;
    for( int i=0; i<ndata; i++){
        DataFrame df = dots[i] ;
        DataFrameVisitors visitors( df, df.names() ) ;
        int nrows = df.nrows() ;

        CharacterVector df_names = df.names() ;
        for( int j=0; j<df.size(); j++){
            SEXP source = df[j] ;
            String name = df_names[j] ;

            Collecter* coll = 0;
            size_t index = 0 ;
            for( ; index < names.size(); index++){
                if( name == names[index] ){
                    coll = columns[index] ;
                    break ;
                }
            }
            if( ! coll ){
                coll = collecter( source, n ) ;
                columns.push_back( coll );
                names.push_back(name) ;
            }

            if( coll->compatible(source) ){
                // if the current source is compatible, collect
                coll->collect( SlicingIndex( k, nrows), source ) ;

            } else if( coll->can_promote(source) ) {
                // setup a new Collecter
                Collecter* new_collecter = promote_collecter(source, n, coll ) ;

                // import data from this chunk
                new_collecter->collect( SlicingIndex( k, nrows), source ) ;

                // import data from previous collecter
                new_collecter->collect( SlicingIndex(0, k), coll->get() ) ;

                // dispose the previous collecter and keep the new one.
                delete coll ;
                columns[index] = new_collecter ;

            } else {
                std::stringstream msg ;
                std::string column_name(name) ;
                msg << "incompatible type ("
                    << "data index: "
                    << (i+1)
                    << ", column: '"
                    << column_name
                    << "', was collecting: "
                    << coll->describe()
                    << " ("
                    << DEMANGLE(*coll)
                    << ")"
                    << ", incompatible with data of type: "
                    << type_name(source) ;

                stop( msg.str() ) ;
            }

        }

        k += nrows ;
    }

    int nc = columns.size() ;
    List out(nc) ;
    CharacterVector out_names(nc) ;
    for( int i=0; i<nc; i++){
        out[i] = columns[i]->get() ;
        out_names[i] = names[i] ;
    }
    out.attr( "names" ) = out_names ;
    delete_all( columns ) ;
    set_rownames( out, n );
    out.attr( "class" ) = "data.frame" ;

    return out ;
}
Пример #2
0
List rbind__impl( Dots dots ){
    int ndata = dots.size() ;
    int n = 0 ;
    std::vector<DataFrameAble> chunks ;

    for( int i=0; i<ndata; i++) {
      chunks.push_back( DataFrameAble( dots[i] ) );
      n += chunks[i].nrows() ;
    }
    pointer_vector<Collecter> columns ;

    std::vector<String> names ;
    int k=0 ;

    Function enc2native( "enc2native" ) ;
    for( int i=0; i<ndata; i++){
        Rcpp::checkUserInterrupt() ;

        const DataFrameAble& df = chunks[i] ;
        if( !df.size() ) continue ;

        int nrows = df.nrows() ;

        CharacterVector df_names = enc2native(df.names()) ;
        for( int j=0; j<df.size(); j++){
            SEXP source = df.get(j) ;
            String name = df_names[j] ;

            Collecter* coll = 0;
            size_t index = 0 ;
            for( ; index < names.size(); index++){
                if( name == names[index] ){
                    coll = columns[index] ;
                    break ;
                }
            }
            if( ! coll ){
                coll = collecter( source, n ) ;
                columns.push_back( coll );
                names.push_back(name) ;
            }
            if( coll->compatible(source) ){
                // if the current source is compatible, collect
                coll->collect( SlicingIndex( k, nrows), source ) ;

            } else if( coll->can_promote(source) ) {
                // setup a new Collecter
                Collecter* new_collecter = promote_collecter(source, n, coll ) ;

                // import data from this chunk
                new_collecter->collect( SlicingIndex( k, nrows), source ) ;

                // import data from previous collecter
                new_collecter->collect( SlicingIndex(0, k), coll->get() ) ;

                // dispose the previous collecter and keep the new one.
                delete coll ;
                columns[index] = new_collecter ;

            } else if( all_na(source) ) {
                // do nothing, the collecter already initialized data with the
                // right NA
            } else if( coll->is_logical_all_na()  ) {
                Collecter* new_collecter = collecter( source, n ) ;
                new_collecter->collect( SlicingIndex(k, nrows), source ) ;
                delete coll ;
                columns[index] = new_collecter ;
            } else {
                std::string column_name(name) ;
                stop( "incompatible type (data index: %d, column: '%s', was collecting: %s (%s), incompatible with data of type: %s",
                    (i+1), column_name, coll->describe(), DEMANGLE(*coll), get_single_class(source) );

            }

        }

        k += nrows ;
    }
    int nc = columns.size() ;
    List out(nc) ;
    CharacterVector out_names(nc) ;
    for( int i=0; i<nc; i++){
        out[i] = columns[i]->get() ;
        out_names[i] = names[i] ;
    }
    out.attr( "names" ) = out_names ;
    set_rownames( out, n );
    out.attr( "class" ) = classes_not_grouped() ;
    return out ;
}
Пример #3
0
List rbind__impl( Dots dots, SEXP id = R_NilValue ){
    int ndata = dots.size() ;
    int n = 0 ;
    DataFrameAbleVector chunks ;
    std::vector<int> df_nrows ;

    int k=0 ;
    for( int i=0; i<ndata; i++) {
      SEXP obj = dots[i] ;
      if( Rf_isNull(obj) ) continue ;
      chunks.push_back( obj ) ;
      int nrows = chunks[k].nrows() ;
      df_nrows.push_back(nrows) ;
      n += nrows ;
      k++ ;
    }
    ndata = chunks.size() ;
    pointer_vector<Collecter> columns ;

    std::vector<String> names ;

    k=0 ;
    Function enc2native( "enc2native" ) ;
    for( int i=0; i<ndata; i++){
        Rcpp::checkUserInterrupt() ;

        const DataFrameAble& df = chunks[i] ;
        if( !df.size() ) continue ;

        int nrows = df.nrows() ;

        CharacterVector df_names = enc2native(df.names()) ;
        for( int j=0; j<df.size(); j++){
            SEXP source = df.get(j) ;
            String name = df_names[j] ;

            Collecter* coll = 0;
            size_t index = 0 ;
            for( ; index < names.size(); index++){
                if( name == names[index] ){
                    coll = columns[index] ;
                    break ;
                }
            }
            if( ! coll ){
                coll = collecter( source, n ) ;
                columns.push_back( coll );
                names.push_back(name) ;
            }
            if( coll->compatible(source) ){
                // if the current source is compatible, collect
                coll->collect( SlicingIndex( k, nrows), source ) ;

            } else if( coll->can_promote(source) ) {
                // setup a new Collecter
                Collecter* new_collecter = promote_collecter(source, n, coll ) ;

                // import data from this chunk
                new_collecter->collect( SlicingIndex( k, nrows), source ) ;

                // import data from previous collecter
                new_collecter->collect( SlicingIndex(0, k), coll->get() ) ;

                // dispose the previous collecter and keep the new one.
                delete coll ;
                columns[index] = new_collecter ;

            } else if( all_na(source) ) {
                // do nothing, the collecter already initialized data with the
                // right NA
            } else if( coll->is_logical_all_na()  ) {
                Collecter* new_collecter = collecter( source, n ) ;
                new_collecter->collect( SlicingIndex(k, nrows), source ) ;
                delete coll ;
                columns[index] = new_collecter ;
            } else {
                std::string column_name(name) ;
                stop(
                  "Can not automatically convert from %s to %s in column \"%s\".",
                  coll->describe(), get_single_class(source), column_name
                ) ;
            }

        }

        k += nrows ;
    }

    int nc = columns.size() ;
    int has_id = Rf_isNull(id) ? 0 : 1;

    List out(nc + has_id) ;
    CharacterVector out_names(nc + has_id) ;
    for( int i=0; i<nc; i++){
        out[i + has_id] = columns[i]->get() ;
        out_names[i + has_id] = names[i] ;
    }

    // Add vector of identifiers if .id is supplied
    if (!Rf_isNull(id)) {
      CharacterVector df_names = dots.names() ;
      CharacterVector id_col = no_init(n) ;

      CharacterVector::iterator it = id_col.begin() ;
      for (int i=0; i<ndata; ++i) {
        std::fill( it, it + df_nrows[i], df_names[i] ) ;
        it += df_nrows[i] ;
      }
      out[0] = id_col ;
      out_names[0] = Rcpp::as<std::string>(id) ;
    }
    out.attr( "names" ) = out_names ;
    set_rownames( out, n ) ;

    // infer the classes and extra info (groups, etc ) from the first (#1692)
    if( ndata ){
      const DataFrameAble& first = chunks[0] ;
      if( first.is_dataframe() ){
        DataFrame df = first.get() ;
        out.attr("class") = df.attr("class") ;
        if( df.inherits("grouped_df") ){
          out.attr("vars") = df.attr("vars") ;
          out = GroupedDataFrame(out).data() ;
        }
      } else {
        out.attr( "class" ) = classes_not_grouped() ;
      }
    } else {
      out.attr( "class" ) = classes_not_grouped() ;
    }

    return out ;
}