Esempio n. 1
0
File: bind.cpp Progetto: alyst/dplyr
// [[Rcpp::export]]
SEXP combine_all( List data ){
    int nv = data.size() ;
    if( nv == 0 ) stop("combine_all needs at least one vector") ;
    
    // get the size of the output
    int n = 0 ;
    for( int i=0; i<nv; i++){
        n += Rf_length(data[i]) ;    
    }
    
    // collect
    Collecter* coll = collecter( data[0], n ) ;
    coll->collect( SlicingIndex(0, Rf_length(data[0])), data[0] ) ;
    int k = Rf_length(data[0]) ;
    
    for( int i=1; i<nv; i++){
        SEXP current = data[i] ;
        int n_current= Rf_length(current) ;
        if( coll->compatible(current) ){
            coll->collect( SlicingIndex(k, n_current), current ) ;
        } else if( coll->can_promote(current) ) {
            Collecter* new_coll = promote_collecter(current, n, coll) ;
            new_coll->collect( SlicingIndex(k, n_current), current ) ;
            new_coll->collect( SlicingIndex(0, k), coll->get() ) ;
            delete coll ;
            coll = new_coll ;
        } else {
            std::stringstream msg ;
            msg << "incompatible type at index "
                << (i+1)
                << " : "
                << get_single_class(current)
                << ", was collecting : "
                << get_single_class(coll->get())
            ;
            stop( msg.str() ) ;
        }
        k += n_current ;
    }
    
    RObject out = coll->get() ;
    delete coll ;
    return out ;
}
Esempio n. 2
0
List rbind__impl( Dots dots ){
    int ndata = dots.size() ;
    int n = 0 ;
    std::vector<DataFrameAble> chunks ;

    for( int i=0; i<ndata; i++) {
      chunks.push_back( DataFrameAble( dots[i] ) );
      n += chunks[i].nrows() ;
    }
    pointer_vector<Collecter> columns ;

    std::vector<String> names ;
    int k=0 ;

    Function enc2native( "enc2native" ) ;
    for( int i=0; i<ndata; i++){
        Rcpp::checkUserInterrupt() ;

        const DataFrameAble& df = chunks[i] ;
        if( !df.size() ) continue ;

        int nrows = df.nrows() ;

        CharacterVector df_names = enc2native(df.names()) ;
        for( int j=0; j<df.size(); j++){
            SEXP source = df.get(j) ;
            String name = df_names[j] ;

            Collecter* coll = 0;
            size_t index = 0 ;
            for( ; index < names.size(); index++){
                if( name == names[index] ){
                    coll = columns[index] ;
                    break ;
                }
            }
            if( ! coll ){
                coll = collecter( source, n ) ;
                columns.push_back( coll );
                names.push_back(name) ;
            }
            if( coll->compatible(source) ){
                // if the current source is compatible, collect
                coll->collect( SlicingIndex( k, nrows), source ) ;

            } else if( coll->can_promote(source) ) {
                // setup a new Collecter
                Collecter* new_collecter = promote_collecter(source, n, coll ) ;

                // import data from this chunk
                new_collecter->collect( SlicingIndex( k, nrows), source ) ;

                // import data from previous collecter
                new_collecter->collect( SlicingIndex(0, k), coll->get() ) ;

                // dispose the previous collecter and keep the new one.
                delete coll ;
                columns[index] = new_collecter ;

            } else if( all_na(source) ) {
                // do nothing, the collecter already initialized data with the
                // right NA
            } else if( coll->is_logical_all_na()  ) {
                Collecter* new_collecter = collecter( source, n ) ;
                new_collecter->collect( SlicingIndex(k, nrows), source ) ;
                delete coll ;
                columns[index] = new_collecter ;
            } else {
                std::string column_name(name) ;
                stop( "incompatible type (data index: %d, column: '%s', was collecting: %s (%s), incompatible with data of type: %s",
                    (i+1), column_name, coll->describe(), DEMANGLE(*coll), get_single_class(source) );

            }

        }

        k += nrows ;
    }
    int nc = columns.size() ;
    List out(nc) ;
    CharacterVector out_names(nc) ;
    for( int i=0; i<nc; i++){
        out[i] = columns[i]->get() ;
        out_names[i] = names[i] ;
    }
    out.attr( "names" ) = out_names ;
    set_rownames( out, n );
    out.attr( "class" ) = classes_not_grouped() ;
    return out ;
}
Esempio n. 3
0
//' @export
//' @rdname rbind
// [[Rcpp::export]]
List rbind_all( ListOf<DataFrame> dots ){
    int ndata = dots.size() ;
    int n = 0 ;
    for( int i=0; i<ndata; i++) n += dots[i].nrows() ;

    std::vector<Collecter*> columns ;
    std::vector<String> names ;
    int k=0 ;
    for( int i=0; i<ndata; i++){
        DataFrame df = dots[i] ;
        DataFrameVisitors visitors( df, df.names() ) ;
        int nrows = df.nrows() ;

        CharacterVector df_names = df.names() ;
        for( int j=0; j<df.size(); j++){
            SEXP source = df[j] ;
            String name = df_names[j] ;

            Collecter* coll = 0;
            size_t index = 0 ;
            for( ; index < names.size(); index++){
                if( name == names[index] ){
                    coll = columns[index] ;
                    break ;
                }
            }
            if( ! coll ){
                coll = collecter( source, n ) ;
                columns.push_back( coll );
                names.push_back(name) ;
            }

            if( coll->compatible(source) ){
                // if the current source is compatible, collect
                coll->collect( SlicingIndex( k, nrows), source ) ;

            } else if( coll->can_promote(source) ) {
                // setup a new Collecter
                Collecter* new_collecter = promote_collecter(source, n, coll ) ;

                // import data from this chunk
                new_collecter->collect( SlicingIndex( k, nrows), source ) ;

                // import data from previous collecter
                new_collecter->collect( SlicingIndex(0, k), coll->get() ) ;

                // dispose the previous collecter and keep the new one.
                delete coll ;
                columns[index] = new_collecter ;

            } else {
                std::stringstream msg ;
                std::string column_name(name) ;
                msg << "incompatible type ("
                    << "data index: "
                    << (i+1)
                    << ", column: '"
                    << column_name
                    << "', was collecting: "
                    << coll->describe()
                    << " ("
                    << DEMANGLE(*coll)
                    << ")"
                    << ", incompatible with data of type: "
                    << type_name(source) ;

                stop( msg.str() ) ;
            }

        }

        k += nrows ;
    }

    int nc = columns.size() ;
    List out(nc) ;
    CharacterVector out_names(nc) ;
    for( int i=0; i<nc; i++){
        out[i] = columns[i]->get() ;
        out_names[i] = names[i] ;
    }
    out.attr( "names" ) = out_names ;
    delete_all( columns ) ;
    set_rownames( out, n );
    out.attr( "class" ) = "data.frame" ;

    return out ;
}
Esempio n. 4
0
List rbind__impl( Dots dots, SEXP id = R_NilValue ){
    int ndata = dots.size() ;
    int n = 0 ;
    DataFrameAbleVector chunks ;
    std::vector<int> df_nrows ;

    int k=0 ;
    for( int i=0; i<ndata; i++) {
      SEXP obj = dots[i] ;
      if( Rf_isNull(obj) ) continue ;
      chunks.push_back( obj ) ;
      int nrows = chunks[k].nrows() ;
      df_nrows.push_back(nrows) ;
      n += nrows ;
      k++ ;
    }
    ndata = chunks.size() ;
    pointer_vector<Collecter> columns ;

    std::vector<String> names ;

    k=0 ;
    Function enc2native( "enc2native" ) ;
    for( int i=0; i<ndata; i++){
        Rcpp::checkUserInterrupt() ;

        const DataFrameAble& df = chunks[i] ;
        if( !df.size() ) continue ;

        int nrows = df.nrows() ;

        CharacterVector df_names = enc2native(df.names()) ;
        for( int j=0; j<df.size(); j++){
            SEXP source = df.get(j) ;
            String name = df_names[j] ;

            Collecter* coll = 0;
            size_t index = 0 ;
            for( ; index < names.size(); index++){
                if( name == names[index] ){
                    coll = columns[index] ;
                    break ;
                }
            }
            if( ! coll ){
                coll = collecter( source, n ) ;
                columns.push_back( coll );
                names.push_back(name) ;
            }
            if( coll->compatible(source) ){
                // if the current source is compatible, collect
                coll->collect( SlicingIndex( k, nrows), source ) ;

            } else if( coll->can_promote(source) ) {
                // setup a new Collecter
                Collecter* new_collecter = promote_collecter(source, n, coll ) ;

                // import data from this chunk
                new_collecter->collect( SlicingIndex( k, nrows), source ) ;

                // import data from previous collecter
                new_collecter->collect( SlicingIndex(0, k), coll->get() ) ;

                // dispose the previous collecter and keep the new one.
                delete coll ;
                columns[index] = new_collecter ;

            } else if( all_na(source) ) {
                // do nothing, the collecter already initialized data with the
                // right NA
            } else if( coll->is_logical_all_na()  ) {
                Collecter* new_collecter = collecter( source, n ) ;
                new_collecter->collect( SlicingIndex(k, nrows), source ) ;
                delete coll ;
                columns[index] = new_collecter ;
            } else {
                std::string column_name(name) ;
                stop(
                  "Can not automatically convert from %s to %s in column \"%s\".",
                  coll->describe(), get_single_class(source), column_name
                ) ;
            }

        }

        k += nrows ;
    }

    int nc = columns.size() ;
    int has_id = Rf_isNull(id) ? 0 : 1;

    List out(nc + has_id) ;
    CharacterVector out_names(nc + has_id) ;
    for( int i=0; i<nc; i++){
        out[i + has_id] = columns[i]->get() ;
        out_names[i + has_id] = names[i] ;
    }

    // Add vector of identifiers if .id is supplied
    if (!Rf_isNull(id)) {
      CharacterVector df_names = dots.names() ;
      CharacterVector id_col = no_init(n) ;

      CharacterVector::iterator it = id_col.begin() ;
      for (int i=0; i<ndata; ++i) {
        std::fill( it, it + df_nrows[i], df_names[i] ) ;
        it += df_nrows[i] ;
      }
      out[0] = id_col ;
      out_names[0] = Rcpp::as<std::string>(id) ;
    }
    out.attr( "names" ) = out_names ;
    set_rownames( out, n ) ;

    // infer the classes and extra info (groups, etc ) from the first (#1692)
    if( ndata ){
      const DataFrameAble& first = chunks[0] ;
      if( first.is_dataframe() ){
        DataFrame df = first.get() ;
        out.attr("class") = df.attr("class") ;
        if( df.inherits("grouped_df") ){
          out.attr("vars") = df.attr("vars") ;
          out = GroupedDataFrame(out).data() ;
        }
      } else {
        out.attr( "class" ) = classes_not_grouped() ;
      }
    } else {
      out.attr( "class" ) = classes_not_grouped() ;
    }

    return out ;
}