SEXP summarise_grouped(const GroupedDataFrame& gdf, List args, const DataDots& dots){ DataFrame df = gdf.data() ; int nexpr = args.size() ; int nvars = gdf.nvars() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); NamedListAccumulator<SEXP> accumulator ; int i=0; for( ; i<nvars; i++){ SET_NAMED(gdf.label(i), 2) ; accumulator.set( PRINTNAME(gdf.symbol(i)), gdf.label(i) ) ; } LazyGroupedSubsets subsets(gdf) ; Shelter<SEXP> __ ; for( int k=0; k<nexpr; k++, i++ ){ Environment env = dots.envir(k) ; Result* res = get_handler( args[k], subsets, env ) ; // if we could not find a direct Result // we can use a GroupedCalledReducer which will callback to R if( !res ) res = new GroupedCalledReducer( args[k], subsets, env) ; SEXP result = __( res->process(gdf) ) ; SEXP name = results_names[k] ; accumulator.set( name, result ); subsets.input( Symbol(name), SummarisedVariable(result) ) ; delete res; } return summarised_grouped_tbl_cpp(accumulator, gdf ); }
SEXP summarise_grouped(const DataFrame& df, const LazyDots& dots){ Data gdf(df) ; int nexpr = dots.size() ; int nvars = gdf.nvars() ; check_not_groups(dots, gdf); NamedListAccumulator<Data> accumulator ; int i=0; for( ; i<nvars; i++){ accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ; } Subsets subsets(gdf) ; for( int k=0; k<nexpr; k++, i++ ){ Rcpp::checkUserInterrupt() ; const Lazy& lazy = dots[k] ; const Environment& env = lazy.env() ; Shield<SEXP> expr_(lazy.expr()) ; SEXP expr = expr_ ; boost::scoped_ptr<Result> res( get_handler( expr, subsets, env ) ); // if we could not find a direct Result // we can use a GroupedCallReducer which will callback to R if( !res ) { res.reset( new GroupedCallReducer<Data, Subsets>( lazy.expr(), subsets, env) ); } Shield<SEXP> result( res->process(gdf) ) ; accumulator.set( lazy.name(), result ); subsets.input( lazy.name(), SummarisedVariable(result) ) ; } return summarised_grouped_tbl_cpp<Data>(accumulator, gdf ); }
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){ int nexpr = args.size() ; CharacterVector names = args.names(); LazySubsets subsets( df ) ; std::vector<SEXP> results ; std::vector<SEXP> result_names ; NamedListAccumulator<SEXP> accumulator ; Rcpp::Shelter<SEXP> __ ; for( int i=0; i<nexpr; i++){ SEXP name = names[i] ; Environment env = dots.envir(i) ; Result* res = get_handler( args[i], subsets, env ) ; SEXP result ; if(res) { result = __(res->process( FullDataFrame(df) )) ; } else { result = __(CallProxy( args[i], subsets, env).eval()) ; } delete res ; subsets.input( Symbol(name), result ) ; accumulator.set(name, result); } return tbl_cpp( accumulator, 1 ) ; }
SEXP summarise_not_grouped(DataFrame df, const LazyDots& dots){ int nexpr = dots.size() ; if( nexpr == 0) return DataFrame() ; LazySubsets subsets( df ) ; std::vector<SEXP> results ; NamedListAccumulator<DataFrame> accumulator ; for( int i=0; i<nexpr; i++){ Rcpp::checkUserInterrupt() ; const Lazy& lazy = dots[i] ; Environment env = lazy.env() ; Shield<SEXP> expr_(lazy.expr()) ; SEXP expr = expr_ ; boost::scoped_ptr<Result> res( get_handler( expr, subsets, env ) ) ; RObject result ; if(res) { result = res->process( FullDataFrame(df) ) ; } else { result = CallProxy( lazy.expr(), subsets, env).eval() ; } if( Rf_length(result) != 1 ){ stop( "expecting result of length one, got : %d", Rf_length(result) ) ; } accumulator.set(lazy.name(), result); subsets.input( lazy.name(), result ) ; } return tbl_cpp( accumulator, 1 ) ; }
SEXP summarise_not_grouped(DataFrame df, const LazyDots& dots){ int nexpr = dots.size() ; if( nexpr == 0) return DataFrame() ; LazySubsets subsets( df ) ; std::vector<SEXP> results ; NamedListAccumulator<DataFrame> accumulator ; Rcpp::Shelter<SEXP> __ ; for( int i=0; i<nexpr; i++){ Rcpp::checkUserInterrupt() ; const Lazy& lazy = dots[i] ; Environment env = lazy.env() ; Result* res = get_handler( lazy.expr(), subsets, env ) ; SEXP result ; if(res) { result = __(res->process( FullDataFrame(df) )) ; } else { result = __(CallProxy( lazy.expr(), subsets, env).eval()) ; } delete res ; if( Rf_length(result) != 1 ){ stop( "expecting result of length one, got : %d", Rf_length(result) ) ; } accumulator.set(lazy.name(), result); subsets.input( Symbol(lazy.name()), result ) ; } return tbl_cpp( accumulator, 1 ) ; }
SEXP summarise_grouped(const DataFrame& df, List args, const DataDots& dots){ Data gdf(df) ; int nexpr = dots.size() ; int nvars = gdf.nvars() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); NamedListAccumulator<Data> accumulator ; int i=0; for( ; i<nvars; i++){ accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ; } Subsets subsets(gdf) ; Shelter<SEXP> __ ; for( int k=0; k<nexpr; k++, i++ ){ Rcpp::checkUserInterrupt() ; Environment env = dots.envir(k) ; Result* res = get_handler( args[dots.expr_index(k)], subsets, env ) ; // if we could not find a direct Result // we can use a GroupedCallReducer which will callback to R if( !res ) res = new GroupedCallReducer<Data, Subsets>( args[dots.expr_index(k)], subsets, env) ; SEXP result = __( res->process(gdf) ) ; SEXP name = results_names[dots.expr_index(k)] ; accumulator.set( name, result ); subsets.input( Symbol(name), SummarisedVariable(result) ) ; delete res; } return summarised_grouped_tbl_cpp<Data>(accumulator, gdf ); }
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){ int nexpr = dots.size() ; if( nexpr == 0) return DataFrame() ; CharacterVector names = args.names(); LazySubsets subsets( df ) ; std::vector<SEXP> results ; std::vector<SEXP> result_names ; NamedListAccumulator<DataFrame> accumulator ; Rcpp::Shelter<SEXP> __ ; for( int i=0; i<nexpr; i++){ Rcpp::checkUserInterrupt() ; SEXP name = names[dots.expr_index(i)] ; Environment env = dots.envir(i) ; Result* res = get_handler( args[i], subsets, env ) ; SEXP result ; if(res) { result = __(res->process( FullDataFrame(df) )) ; } else { result = __(CallProxy( args[dots.expr_index(i)], subsets, env).eval()) ; } delete res ; if( Rf_length(result) != 1 ){ std::stringstream s ; s << "expecting result of length one, got : " << Rf_length(result) ; stop(s.str()) ; } subsets.input( Symbol(name), result ) ; accumulator.set(name, result); } return tbl_cpp( accumulator, 1 ) ; }
SEXP mutate_not_grouped(DataFrame df, List args, const DataDots& dots){ Shelter<SEXP> __ ; Environment env = dots.envir(0) ; int nexpr = args.size() ; CharacterVector results_names = args.names() ; NamedListAccumulator<SEXP> accumulator ; int nvars = df.size() ; CharacterVector df_names = df.names() ; for( int i=0; i<nvars; i++){ accumulator.set( df_names[i], df[i] ) ; } CallProxy call_proxy(df, env) ; for( int i=0; i<nexpr; i++){ env = dots.envir(i) ; call_proxy.set_env(env) ; SEXP call = args[i] ; SEXP name = results_names[i] ; SEXP result = R_NilValue ; if( TYPEOF(call) == SYMSXP ){ if(call_proxy.has_variable(call)){ result = call_proxy.get_variable(PRINTNAME(call)) ; } else { result = env.find(CHAR(PRINTNAME(call))) ; SET_NAMED(result,2) ; } } else if( TYPEOF(call) == LANGSXP ){ call_proxy.set_call( args[i] ); // we need to protect the SEXP, that's what the Shelter does result = __( call_proxy.eval() ) ; } else if( Rf_length(call) == 1 ){ boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, df.nrows() ) ); result = __( gather->collect() ) ; } else { stop( "cannot handle" ) ; } if( Rf_length(result) == df.nrows() ){ // ok } else if( Rf_length(result) == 1 ){ // recycle Gatherer* gather = constant_gatherer( result, df.nrows() ) ; result = __( gather->collect() ) ; delete gather ; } else { std::stringstream s ; s << "wrong result size (" << Rf_length(result) << "), expected " << df.nrows() << " or 1" ; stop(s.str()) ; } call_proxy.input( name, result ) ; accumulator.set( name, result ); } List res = structure_mutate(accumulator, df, classes_not_grouped() ) ; return res ; }
SEXP mutate_grouped(GroupedDataFrame gdf, List args, const DataDots& dots){ const DataFrame& df = gdf.data() ; int nexpr = args.size() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); Environment env = dots.envir(0) ; GroupedCallProxy proxy(gdf, env) ; Shelter<SEXP> __ ; NamedListAccumulator<SEXP> accumulator ; int ncolumns = df.size() ; CharacterVector column_names = df.names() ; for( int i=0; i<ncolumns; i++){ accumulator.set( column_names[i], df[i] ) ; } for( int i=0; i<nexpr; i++){ env = dots.envir(i) ; proxy.set_env( env ) ; SEXP call = args[i] ; SEXP name = results_names[i] ; SEXP variable = R_NilValue ; if( TYPEOF(call) == SYMSXP ){ if(proxy.has_variable(call)){ variable = proxy.get_variable( PRINTNAME(call) ) ; } else { SEXP v = env.find(CHAR(PRINTNAME(call))) ; if( Rf_isNull(v) ){ std::stringstream s ; s << "unknown variable: " << CHAR(PRINTNAME(call)) ; stop(s.str()); } else if( Rf_length(v) == 1){ Replicator* rep = constant_replicator(v, gdf.nrows() ); variable = __( rep->collect() ); delete rep ; } else { Replicator* rep = replicator(v, gdf) ; variable = __( rep->collect() ); delete rep ; } } } else if(TYPEOF(call) == LANGSXP){ proxy.set_call( call ); Gatherer* gather = gatherer( proxy, gdf ) ; variable = __( gather->collect() ) ; delete gather ; } else if(Rf_length(call) == 1) { boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, gdf.nrows() ) ); variable = __( gather->collect() ) ; } else { stop( "cannot handle" ) ; } proxy.input( name, variable ) ; accumulator.set( name, variable) ; } return structure_mutate(accumulator, df, classes_grouped() ); }
SEXP mutate_not_grouped(DataFrame df, const LazyDots& dots) { int nexpr = dots.size(); int nrows = df.nrows(); NamedListAccumulator<DataFrame> accumulator; int nvars = df.size(); if (nvars) { CharacterVector df_names = df.names(); for (int i=0; i<nvars; i++) { accumulator.set(Symbol(df_names[i]), df[i]); } } CallProxy call_proxy(df); List results(nexpr); for (int i=0; i<nexpr; i++) { Rcpp::checkUserInterrupt(); const Lazy& lazy = dots[i]; Shield<SEXP> call_(lazy.expr()); SEXP call = call_; Symbol name = lazy.name(); Environment env = lazy.env(); call_proxy.set_env(env); if (TYPEOF(call) == SYMSXP) { if (call_proxy.has_variable(call)) { results[i] = call_proxy.get_variable(PRINTNAME(call)); } else { results[i] = shared_SEXP(env.find(CHAR(PRINTNAME(call)))); } } else if (TYPEOF(call) == LANGSXP) { call_proxy.set_call(call); results[i] = call_proxy.eval(); } else if (Rf_length(call) == 1) { boost::scoped_ptr<Gatherer> gather(constant_gatherer(call, nrows)); results[i] = gather->collect(); } else if (Rf_isNull(call)) { accumulator.rm(name); continue; } else { stop("cannot handle"); } check_supported_type(results[i], name.c_str()); if (Rf_inherits(results[i], "POSIXlt")) { stop("`mutate` does not support `POSIXlt` results"); } int n_res = Rf_length(results[i]); if (n_res == nrows) { // ok } else if (n_res == 1) { // recycle boost::scoped_ptr<Gatherer> gather(constant_gatherer(results[i] , df.nrows())); results[i] = gather->collect(); } else { stop("wrong result size (%d), expected %d or 1", n_res, nrows); } call_proxy.input(name, results[i]); accumulator.set(name, results[i]); } List res = structure_mutate(accumulator, df, classes_not_grouped()); return res; }
SEXP mutate_grouped(const DataFrame& df, const LazyDots& dots) { LOG_VERBOSE << "checking zero rows"; // special 0 rows case if (df.nrows() == 0) { DataFrame res = mutate_not_grouped(df, dots); res.attr("vars") = df.attr("vars"); res.attr("class") = df.attr("class"); return Data(res).data(); } LOG_VERBOSE << "initializing proxy"; typedef GroupedCallProxy<Data, Subsets> Proxy; Data gdf(df); int nexpr = dots.size(); check_not_groups(dots, gdf); Proxy proxy(gdf); LOG_VERBOSE << "copying data to accumulator"; NamedListAccumulator<Data> accumulator; int ncolumns = df.size(); CharacterVector column_names = df.names(); for (int i=0; i<ncolumns; i++) { accumulator.set(Symbol(column_names[i]), df[i]); } LOG_VERBOSE << "processing " << nexpr << " variables"; List variables(nexpr); for (int i=0; i<nexpr; i++) { Rcpp::checkUserInterrupt(); const Lazy& lazy = dots[i]; Environment env = lazy.env(); Shield<SEXP> call_(lazy.expr()); SEXP call = call_; Symbol name = lazy.name(); proxy.set_env(env); LOG_VERBOSE << "processing " << CharacterVector(name); if (TYPEOF(call) == LANGSXP || TYPEOF(call) == SYMSXP) { proxy.set_call(call); boost::scoped_ptr<Gatherer> gather(gatherer<Data, Subsets>(proxy, gdf, name)); SEXP variable = variables[i] = gather->collect(); proxy.input(name, variable); accumulator.set(name, variable); } else if (Rf_length(call) == 1) { boost::scoped_ptr<Gatherer> gather(constant_gatherer(call, gdf.nrows())); SEXP variable = variables[i] = gather->collect(); proxy.input(name, variable); accumulator.set(name, variable); } else if (Rf_isNull(call)) { accumulator.rm(name); continue; } else { stop("cannot handle"); } } return structure_mutate(accumulator, df, df.attr("class")); }
SEXP mutate_not_grouped(DataFrame df, const QuosureList& dots) { const int nexpr = dots.size(); const int nrows = df.nrows(); NamedListAccumulator<DataFrame> accumulator; const int nvars = df.size(); if (nvars) { CharacterVector df_names = df.names(); for (int i = 0; i < nvars; i++) { accumulator.set(df_names[i], df[i]); } } CallProxy call_proxy(df); List results(nexpr); for (int i = 0; i < nexpr; i++) { Rcpp::checkUserInterrupt(); const NamedQuosure& quosure = dots[i]; Shield<SEXP> call_(quosure.expr()); SEXP call = call_; SymbolString name = quosure.name(); Environment env = quosure.env(); call_proxy.set_env(env); if (TYPEOF(call) == SYMSXP) { SymbolString call_name = SymbolString(Symbol(call)); if (call_proxy.has_variable(call_name)) { results[i] = call_proxy.get_variable(call_name); } else { results[i] = shared_SEXP(env.find(call_name.get_string())); } } else if (TYPEOF(call) == LANGSXP) { call_proxy.set_call(call); results[i] = call_proxy.eval(); } else if (Rf_length(call) == 1) { boost::scoped_ptr<Gatherer> gather(constant_gatherer(call, nrows)); results[i] = gather->collect(); } else if (Rf_isNull(call)) { accumulator.rm(name); continue; } else { stop("cannot handle"); } if (Rf_inherits(results[i], "POSIXlt")) { stop("`mutate` does not support `POSIXlt` results"); } const int n_res = Rf_length(results[i]); check_supported_type(results[i], name); check_length(n_res, nrows, "the number of rows"); if (n_res == 1 && nrows != 1) { // recycle boost::scoped_ptr<Gatherer> gather(constant_gatherer(results[i], nrows)); results[i] = gather->collect(); } call_proxy.input(name, results[i]); accumulator.set(name, results[i]); } List res = structure_mutate(accumulator, df, classes_not_grouped()); return res; }
SEXP summarise_grouped(const DataFrame& df, const LazyDots& dots){ Data gdf(df) ; int nexpr = dots.size() ; int nvars = gdf.nvars() ; check_not_groups(dots, gdf); NamedListAccumulator<Data> accumulator ; int i=0; List results(nvars + nexpr) ; for( ; i<nvars; i++){ results[i] = shared_SEXP(gdf.label(i)) ; accumulator.set( PRINTNAME(gdf.symbol(i)), results[i] ) ; } Subsets subsets(gdf) ; for( int k=0; k<nexpr; k++, i++ ){ Rcpp::checkUserInterrupt() ; const Lazy& lazy = dots[k] ; const Environment& env = lazy.env() ; Shield<SEXP> expr_(lazy.expr()) ; SEXP expr = expr_ ; boost::scoped_ptr<Result> res( get_handler( expr, subsets, env ) ); // if we could not find a direct Result // we can use a GroupedCallReducer which will callback to R if( !res ) { res.reset( new GroupedCallReducer<Data, Subsets>( lazy.expr(), subsets, env) ); } RObject result = res->process(gdf) ; results[i] = result ; accumulator.set( lazy.name(), result ); subsets.input( lazy.name(), SummarisedVariable(result) ) ; } List out = accumulator ; copy_most_attributes( out, df) ; out.names() = accumulator.names() ; int nr = gdf.ngroups() ; set_rownames(out, nr ) ; if( gdf.nvars() > 1){ out.attr( "class" ) = classes_grouped<Data>() ; List vars = gdf.data().attr("vars") ; vars.erase( gdf.nvars() - 1) ; out.attr( "vars") = vars ; out.attr( "labels") = R_NilValue ; out.attr( "indices") = R_NilValue ; out.attr( "group_sizes") = R_NilValue ; out.attr( "biggest_group_size") = R_NilValue ; out.attr( "drop" ) = true ; } else { out.attr( "class" ) = classes_not_grouped() ; SET_ATTRIB( out, strip_group_attributes(out) ) ; } return out ; }