예제 #1
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
SEXP filter_not_grouped( DataFrame df, List args, const DataDots& dots){
    CharacterVector names = df.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    if( dots.single_env() ){
        Environment env = dots.envir(0) ;
        // a, b, c ->  a & b & c
        Shield<SEXP> call( and_calls( args, set ) ) ;

        // replace the symbols that are in the data frame by vectors from the data frame
        // and evaluate the expression
        CallProxy proxy( (SEXP)call, df, env ) ;
        LogicalVector test = proxy.eval() ;
        check_filter_result(test, df.nrows());
        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    } else {
        int nargs = args.size() ;
        CallProxy first_proxy(args[0], df, dots.envir(0) ) ;
        LogicalVector test = first_proxy.eval() ;
        check_filter_result(test, df.nrows());

        for( int i=1; i<nargs; i++){
            LogicalVector test2 = CallProxy(args[i], df, dots.envir(i) ).eval() ;
            combine_and(test, test2) ;
        }

        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    }
}
예제 #2
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
DataFrame filter_grouped( const GroupedDataFrame& gdf, List args, const DataDots& dots){
    if( dots.single_env() ){
        return filter_grouped_single_env(gdf, args, dots.envir(0) ) ;
    } else {
        return filter_grouped_multiple_env(gdf,args,dots) ;
    }
}
예제 #3
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){
    int nexpr = args.size() ;
    CharacterVector names = args.names();

    LazySubsets subsets( df ) ;
    std::vector<SEXP> results ;
    std::vector<SEXP> result_names ;
    NamedListAccumulator<SEXP> accumulator ;

    Rcpp::Shelter<SEXP> __ ;
    for( int i=0; i<nexpr; i++){
        SEXP name = names[i] ;
        Environment env = dots.envir(i) ;
        Result* res = get_handler( args[i], subsets, env ) ;
        SEXP result ;
        if(res) {
            result = __(res->process( FullDataFrame(df) )) ;
        } else {
            result = __(CallProxy( args[i], subsets, env).eval()) ;
        }
        delete res ;
        subsets.input( Symbol(name), result ) ;
        accumulator.set(name, result);
    }

    return tbl_cpp( accumulator, 1 ) ;
}
예제 #4
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
// version of grouped filter when contributions to ... come from several environment
DataFrame filter_grouped_multiple_env( const GroupedDataFrame& gdf, const List& args, const DataDots& dots){
    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    int nrows = data.nrows() ;
    LogicalVector test(nrows, TRUE);

    LogicalVector g_test ;

    for( int k=0; k<args.size(); k++){
        Call call( (SEXP)args[k] ) ;
        GroupedCallProxy call_proxy( call, gdf, dots.envir(k) ) ;
        int ngroups = gdf.ngroups() ;
        GroupedDataFrame::group_iterator git = gdf.group_begin() ;
        for( int i=0; i<ngroups; i++, ++git){
            SlicingIndex indices = *git ;
            int chunk_size = indices.size() ;

            g_test  = call_proxy.get( indices );
            check_filter_result(g_test, chunk_size ) ;
            for( int j=0; j<chunk_size; j++){
                test[ indices[j] ] = test[ indices[j] ] & g_test[j] ;
            }
        }
    }
    DataFrame res = subset( data, test, names, classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;

    return res ;
}
예제 #5
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
// [[Rcpp::export]]
DataFrame arrange_impl( DataFrame data, List args, DataDots dots ){
    int nargs = args.size() ;
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;
    Shelter<SEXP> __ ;

    for(int i=0; i<nargs; i++){
        SEXP call = args[i] ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;

        CallProxy call_proxy( is_desc ? CADR(call) : call, data, dots.envir(i)) ;
        variables[i] = __(call_proxy.eval()) ;
        if( Rf_length(variables[i]) != data.nrows() ){
            std::stringstream s ;
            s << "incorrect size ("
              << Rf_length(variables[i])
              << "), expecting :"
              << data.nrows() ;
            stop(s.str()) ;
        }
        ascending[i] = !is_desc ;
    }
    OrderVisitors o(variables,ascending, nargs) ;
    IntegerVector index = o.apply() ;

    DataFrameVisitors visitors( data, data.names() ) ;
    DataFrame res = visitors.subset(index, data.attr("class") ) ;
    return res;
}
예제 #6
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
SEXP summarise_grouped(const GroupedDataFrame& gdf, List args, const DataDots& dots){
    DataFrame df = gdf.data() ;

    int nexpr = args.size() ;
    int nvars = gdf.nvars() ;
    CharacterVector results_names = args.names() ;
    check_not_groups(results_names, gdf);
    NamedListAccumulator<SEXP> accumulator ;

    int i=0;
    for( ; i<nvars; i++){
        SET_NAMED(gdf.label(i), 2) ;
        accumulator.set( PRINTNAME(gdf.symbol(i)), gdf.label(i) ) ;
    }

    LazyGroupedSubsets subsets(gdf) ;
    Shelter<SEXP> __ ;
    for( int k=0; k<nexpr; k++, i++ ){
        Environment env = dots.envir(k) ;

        Result* res = get_handler( args[k], subsets, env ) ;

        // if we could not find a direct Result
        // we can use a GroupedCalledReducer which will callback to R
        if( !res ) res = new GroupedCalledReducer( args[k], subsets, env) ;

        SEXP result = __( res->process(gdf) ) ;
        SEXP name = results_names[k] ;
        accumulator.set( name, result );
        subsets.input( Symbol(name), SummarisedVariable(result) ) ;
        delete res;
    }

    return summarised_grouped_tbl_cpp(accumulator, gdf );
}
예제 #7
0
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){
    int nexpr = dots.size() ;
    if( nexpr == 0) return DataFrame() ;
    
    CharacterVector names = args.names();

    LazySubsets subsets( df ) ;
    std::vector<SEXP> results ;
    std::vector<SEXP> result_names ;
    NamedListAccumulator<DataFrame> accumulator ;

    Rcpp::Shelter<SEXP> __ ;
    for( int i=0; i<nexpr; i++){
        Rcpp::checkUserInterrupt() ;
        
        SEXP name = names[dots.expr_index(i)] ;
        Environment env = dots.envir(i) ;
        Result* res = get_handler( args[i], subsets, env ) ;
        
        SEXP result ;
        if(res) {
            result = __(res->process( FullDataFrame(df) )) ;
        } else {
            result = __(CallProxy( args[dots.expr_index(i)], subsets, env).eval()) ;
        }
        delete res ;
        if( Rf_length(result) != 1 ){
            std::stringstream s ;
            s << "expecting result of length one, got : "
              << Rf_length(result) ;
            stop(s.str()) ;
        }
        subsets.input( Symbol(name), result ) ;
        accumulator.set(name, result);
    }

    return tbl_cpp( accumulator, 1 ) ;
}
예제 #8
0
SEXP summarise_grouped(const DataFrame& df, List args, const DataDots& dots){
    Data gdf(df) ;
    
    int nexpr = dots.size() ;
    int nvars = gdf.nvars() ;
    CharacterVector results_names = args.names() ;
    check_not_groups(results_names, gdf);
    NamedListAccumulator<Data> accumulator ;

    int i=0;
    for( ; i<nvars; i++){
        accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ;
    }

    Subsets subsets(gdf) ;
    Shelter<SEXP> __ ;
    for( int k=0; k<nexpr; k++, i++ ){
        Rcpp::checkUserInterrupt() ;
        
        Environment env = dots.envir(k) ;

        Result* res = get_handler( args[dots.expr_index(k)], subsets, env ) ;
        
        // if we could not find a direct Result
        // we can use a GroupedCallReducer which will callback to R
        if( !res ) res = new GroupedCallReducer<Data, Subsets>( args[dots.expr_index(k)], subsets, env) ;
        
        
        SEXP result = __( res->process(gdf) ) ;
        SEXP name = results_names[dots.expr_index(k)] ;
        accumulator.set( name, result );
        subsets.input( Symbol(name), SummarisedVariable(result) ) ;
        delete res;
    }

    return summarised_grouped_tbl_cpp<Data>(accumulator, gdf );
}
예제 #9
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
SEXP mutate_not_grouped(DataFrame df, List args, const DataDots& dots){
    Shelter<SEXP> __ ;

    Environment env = dots.envir(0) ;

    int nexpr = args.size() ;
    CharacterVector results_names = args.names() ;

    NamedListAccumulator<SEXP> accumulator ;
    int nvars = df.size() ;
    CharacterVector df_names = df.names() ;
    for( int i=0; i<nvars; i++){
        accumulator.set( df_names[i], df[i] ) ;
    }

    CallProxy call_proxy(df, env) ;
    for( int i=0; i<nexpr; i++){
        env = dots.envir(i) ;
        call_proxy.set_env(env) ;

        SEXP call = args[i] ;
        SEXP name = results_names[i] ;
        SEXP result = R_NilValue ;
        if( TYPEOF(call) == SYMSXP ){
            if(call_proxy.has_variable(call)){
                result = call_proxy.get_variable(PRINTNAME(call)) ;
            } else {
                result = env.find(CHAR(PRINTNAME(call))) ;
                SET_NAMED(result,2) ;
            }
        } else if( TYPEOF(call) == LANGSXP ){
            call_proxy.set_call( args[i] );

            // we need to protect the SEXP, that's what the Shelter does
            result = __( call_proxy.eval() ) ;

        } else if( Rf_length(call) == 1 ){
            boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, df.nrows() ) );
            result = __( gather->collect() ) ;
        } else {
            stop( "cannot handle" ) ;
        }


        if( Rf_length(result) == df.nrows() ){
            // ok
        } else if( Rf_length(result) == 1 ){
            // recycle
            Gatherer* gather = constant_gatherer( result, df.nrows() ) ;
            result = __( gather->collect() ) ;
            delete gather ;
        } else {
            std::stringstream s ;
            s << "wrong result size ("
              << Rf_length(result)
              << "), expected "
              << df.nrows()
              << " or 1" ;
            stop(s.str()) ;
        }

        call_proxy.input( name, result ) ;
        accumulator.set( name, result );
    }

    List res = structure_mutate(accumulator, df, classes_not_grouped() ) ;

    return res ;
}
예제 #10
0
파일: dplyr.cpp 프로젝트: kevinushey/dplyr
SEXP mutate_grouped(GroupedDataFrame gdf, List args, const DataDots& dots){
    const DataFrame& df = gdf.data() ;
    int nexpr = args.size() ;
    CharacterVector results_names = args.names() ;
    check_not_groups(results_names, gdf);

    Environment env = dots.envir(0) ;
    GroupedCallProxy proxy(gdf, env) ;
    Shelter<SEXP> __ ;

    NamedListAccumulator<SEXP> accumulator ;
    int ncolumns = df.size() ;
    CharacterVector column_names = df.names() ;
    for( int i=0; i<ncolumns; i++){
        accumulator.set( column_names[i], df[i] ) ;
    }

    for( int i=0; i<nexpr; i++){
        env = dots.envir(i) ;
        proxy.set_env( env ) ;
        SEXP call = args[i] ;
        SEXP name = results_names[i] ;
        SEXP variable = R_NilValue ;
        if( TYPEOF(call) == SYMSXP ){
            if(proxy.has_variable(call)){
                variable = proxy.get_variable( PRINTNAME(call) ) ;
            } else {
                SEXP v = env.find(CHAR(PRINTNAME(call))) ;
                if( Rf_isNull(v) ){
                    std::stringstream s ;
                    s << "unknown variable: " << CHAR(PRINTNAME(call)) ;
                    stop(s.str());
                } else if( Rf_length(v) == 1){
                    Replicator* rep = constant_replicator(v, gdf.nrows() );
                    variable = __( rep->collect() );
                    delete rep ;
                } else {
                    Replicator* rep = replicator(v, gdf) ;
                    variable = __( rep->collect() );
                    delete rep ;
                }
            }

        } else if(TYPEOF(call) == LANGSXP){
            proxy.set_call( call );
            Gatherer* gather = gatherer( proxy, gdf ) ;
            variable = __( gather->collect() ) ;
            delete gather ;
        } else if(Rf_length(call) == 1) {
            boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, gdf.nrows() ) );
            variable = __( gather->collect() ) ;
        } else {
            stop( "cannot handle" ) ;
        }

        proxy.input( name, variable ) ;
        accumulator.set( name, variable) ;
    }

    return structure_mutate(accumulator, df, classes_grouped() );
}
예제 #11
0
파일: arrange.cpp 프로젝트: kireru/dplyr
// [[Rcpp::export]]
List arrange_impl( DataFrame data, List args, DataDots dots ){
    check_valid_colnames(data) ;
    assert_all_white_list(data) ;
    
    // special case arrange() with no arguments for grouped data
    if( dots.size() == 0 && is<GroupedDataFrame>(data) ){
        DataFrame labels( data.attr( "labels" ) );
        OrderVisitors o(labels) ;
        IntegerVector index = o.apply() ;
        
        // reorganize
        labels = DataFrameVisitors( labels, labels.names() ).subset( index, labels.attr("class") );
        
        ListOf<IntegerVector> indices( data.attr("indices") ) ;
        int ngroups = indices.size() ;
        List new_indices(ngroups) ;
        IntegerVector master_index(data.nrows()) ;
        for( int i=0; i<ngroups; i++){
            new_indices[index[i]] = indices[i] ;
        }
        IntegerVector group_sizes = data.attr("group_sizes") ;
        IntegerVector new_group_sizes(ngroups); 
        for( int i=0, k=0; i<ngroups; i++){
            IntegerVector idx = new_indices[i] ;
            IntegerVector new_group_index = seq(k, k + idx.size() - 1 );
            for( int j=0; j<idx.size(); j++, k++){
                master_index[k] = idx[j] ;    
            }
            new_indices[i] = new_group_index ;
            new_group_sizes[i] = idx.size() ;
        }
        
        DataFrame res = DataFrameVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ;
        res.attr( "labels" )  = labels ;
        res.attr( "indices" ) = new_indices ;
        res.attr( "vars"    ) = data.attr("vars" ) ;
        res.attr( "group_sizes" ) = new_group_sizes ;
        res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ;
        res.attr( "drop" ) = data.attr("drop") ;
        return res ;
    }
    
    if( dots.size() == 0 || data.nrows() == 0) return data ;
    
    int nargs = dots.size() ;
    if( is<GroupedDataFrame>(data) ){
        nargs += GroupedDataFrame(data).nvars() ;
    }
    
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;
    
    int k = 0 ;
    if( is<GroupedDataFrame>(data) ){
        GroupedDataFrame gdf(data);
        for( ; k< gdf.nvars(); k++) {
            ascending[k] = true ;
            
            String s = PRINTNAME(gdf.symbol(k));
            variables[k] = data[s] ; 
        }
    }            
    
    for(int i=0; k<nargs; i++, k++){
        Shelter<SEXP> __ ;
    
        SEXP call = args[dots.expr_index(i)] ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;
        
        CallProxy call_proxy(is_desc ? CADR(call) : call, data, dots.envir(i)) ;
        
        SEXP v = __(call_proxy.eval()) ;
        if( !white_list(v) || TYPEOF(v) == VECSXP ){
            std::stringstream ss ;
            ss << "cannot arrange column of class '"
               << get_single_class(v) 
               << "'" ;
            stop(ss.str()) ;
        }
        
        if( Rf_length(v) != data.nrows() ){
            std::stringstream s ;
            s << "incorrect size ("
              << Rf_length(v)
              << "), expecting :"
              << data.nrows() ;
            stop(s.str()) ;
        }
        variables[k] = v ;
        ascending[k] = !is_desc ;
    }
    OrderVisitors o(variables, ascending, nargs) ;
    IntegerVector index = o.apply() ;
    
    DataFrameVisitors visitors( data, data.names() ) ;
    List res = visitors.subset(index, data.attr("class") ) ;
    SET_ATTRIB(res, strip_group_attributes(res));
    return res ;
}