DataFrame filter_grouped( const GroupedDataFrame& gdf, List args, const DataDots& dots){ if( dots.single_env() ){ return filter_grouped_single_env(gdf, args, dots.envir(0) ) ; } else { return filter_grouped_multiple_env(gdf,args,dots) ; } }
SEXP filter_not_grouped( DataFrame df, List args, const DataDots& dots){ CharacterVector names = df.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_install( names[i] ) ) ; } if( dots.single_env() ){ Environment env = dots.envir(0) ; // a, b, c -> a & b & c Shield<SEXP> call( and_calls( args, set ) ) ; // replace the symbols that are in the data frame by vectors from the data frame // and evaluate the expression CallProxy proxy( (SEXP)call, df, env ) ; LogicalVector test = proxy.eval() ; check_filter_result(test, df.nrows()); DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ; return res ; } else { int nargs = args.size() ; CallProxy first_proxy(args[0], df, dots.envir(0) ) ; LogicalVector test = first_proxy.eval() ; check_filter_result(test, df.nrows()); for( int i=1; i<nargs; i++){ LogicalVector test2 = CallProxy(args[i], df, dots.envir(i) ).eval() ; combine_and(test, test2) ; } DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ; return res ; } }
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){ int nexpr = args.size() ; CharacterVector names = args.names(); LazySubsets subsets( df ) ; std::vector<SEXP> results ; std::vector<SEXP> result_names ; NamedListAccumulator<SEXP> accumulator ; Rcpp::Shelter<SEXP> __ ; for( int i=0; i<nexpr; i++){ SEXP name = names[i] ; Environment env = dots.envir(i) ; Result* res = get_handler( args[i], subsets, env ) ; SEXP result ; if(res) { result = __(res->process( FullDataFrame(df) )) ; } else { result = __(CallProxy( args[i], subsets, env).eval()) ; } delete res ; subsets.input( Symbol(name), result ) ; accumulator.set(name, result); } return tbl_cpp( accumulator, 1 ) ; }
// version of grouped filter when contributions to ... come from several environment DataFrame filter_grouped_multiple_env( const GroupedDataFrame& gdf, const List& args, const DataDots& dots){ const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_install( names[i] ) ) ; } int nrows = data.nrows() ; LogicalVector test(nrows, TRUE); LogicalVector g_test ; for( int k=0; k<args.size(); k++){ Call call( (SEXP)args[k] ) ; GroupedCallProxy call_proxy( call, gdf, dots.envir(k) ) ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = call_proxy.get( indices ); check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ test[ indices[j] ] = test[ indices[j] ] & g_test[j] ; } } } DataFrame res = subset( data, test, names, classes_grouped() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
// [[Rcpp::export]] DataFrame arrange_impl( DataFrame data, List args, DataDots dots ){ int nargs = args.size() ; List variables(nargs) ; LogicalVector ascending(nargs) ; Shelter<SEXP> __ ; for(int i=0; i<nargs; i++){ SEXP call = args[i] ; bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ; CallProxy call_proxy( is_desc ? CADR(call) : call, data, dots.envir(i)) ; variables[i] = __(call_proxy.eval()) ; if( Rf_length(variables[i]) != data.nrows() ){ std::stringstream s ; s << "incorrect size (" << Rf_length(variables[i]) << "), expecting :" << data.nrows() ; stop(s.str()) ; } ascending[i] = !is_desc ; } OrderVisitors o(variables,ascending, nargs) ; IntegerVector index = o.apply() ; DataFrameVisitors visitors( data, data.names() ) ; DataFrame res = visitors.subset(index, data.attr("class") ) ; return res; }
SEXP summarise_grouped(const GroupedDataFrame& gdf, List args, const DataDots& dots){ DataFrame df = gdf.data() ; int nexpr = args.size() ; int nvars = gdf.nvars() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); NamedListAccumulator<SEXP> accumulator ; int i=0; for( ; i<nvars; i++){ SET_NAMED(gdf.label(i), 2) ; accumulator.set( PRINTNAME(gdf.symbol(i)), gdf.label(i) ) ; } LazyGroupedSubsets subsets(gdf) ; Shelter<SEXP> __ ; for( int k=0; k<nexpr; k++, i++ ){ Environment env = dots.envir(k) ; Result* res = get_handler( args[k], subsets, env ) ; // if we could not find a direct Result // we can use a GroupedCalledReducer which will callback to R if( !res ) res = new GroupedCalledReducer( args[k], subsets, env) ; SEXP result = __( res->process(gdf) ) ; SEXP name = results_names[k] ; accumulator.set( name, result ); subsets.input( Symbol(name), SummarisedVariable(result) ) ; delete res; } return summarised_grouped_tbl_cpp(accumulator, gdf ); }
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){ int nexpr = dots.size() ; if( nexpr == 0) return DataFrame() ; CharacterVector names = args.names(); LazySubsets subsets( df ) ; std::vector<SEXP> results ; std::vector<SEXP> result_names ; NamedListAccumulator<DataFrame> accumulator ; Rcpp::Shelter<SEXP> __ ; for( int i=0; i<nexpr; i++){ Rcpp::checkUserInterrupt() ; SEXP name = names[dots.expr_index(i)] ; Environment env = dots.envir(i) ; Result* res = get_handler( args[i], subsets, env ) ; SEXP result ; if(res) { result = __(res->process( FullDataFrame(df) )) ; } else { result = __(CallProxy( args[dots.expr_index(i)], subsets, env).eval()) ; } delete res ; if( Rf_length(result) != 1 ){ std::stringstream s ; s << "expecting result of length one, got : " << Rf_length(result) ; stop(s.str()) ; } subsets.input( Symbol(name), result ) ; accumulator.set(name, result); } return tbl_cpp( accumulator, 1 ) ; }
SEXP summarise_grouped(const DataFrame& df, List args, const DataDots& dots){ Data gdf(df) ; int nexpr = dots.size() ; int nvars = gdf.nvars() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); NamedListAccumulator<Data> accumulator ; int i=0; for( ; i<nvars; i++){ accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ; } Subsets subsets(gdf) ; Shelter<SEXP> __ ; for( int k=0; k<nexpr; k++, i++ ){ Rcpp::checkUserInterrupt() ; Environment env = dots.envir(k) ; Result* res = get_handler( args[dots.expr_index(k)], subsets, env ) ; // if we could not find a direct Result // we can use a GroupedCallReducer which will callback to R if( !res ) res = new GroupedCallReducer<Data, Subsets>( args[dots.expr_index(k)], subsets, env) ; SEXP result = __( res->process(gdf) ) ; SEXP name = results_names[dots.expr_index(k)] ; accumulator.set( name, result ); subsets.input( Symbol(name), SummarisedVariable(result) ) ; delete res; } return summarised_grouped_tbl_cpp<Data>(accumulator, gdf ); }
SEXP mutate_not_grouped(DataFrame df, List args, const DataDots& dots){ Shelter<SEXP> __ ; Environment env = dots.envir(0) ; int nexpr = args.size() ; CharacterVector results_names = args.names() ; NamedListAccumulator<SEXP> accumulator ; int nvars = df.size() ; CharacterVector df_names = df.names() ; for( int i=0; i<nvars; i++){ accumulator.set( df_names[i], df[i] ) ; } CallProxy call_proxy(df, env) ; for( int i=0; i<nexpr; i++){ env = dots.envir(i) ; call_proxy.set_env(env) ; SEXP call = args[i] ; SEXP name = results_names[i] ; SEXP result = R_NilValue ; if( TYPEOF(call) == SYMSXP ){ if(call_proxy.has_variable(call)){ result = call_proxy.get_variable(PRINTNAME(call)) ; } else { result = env.find(CHAR(PRINTNAME(call))) ; SET_NAMED(result,2) ; } } else if( TYPEOF(call) == LANGSXP ){ call_proxy.set_call( args[i] ); // we need to protect the SEXP, that's what the Shelter does result = __( call_proxy.eval() ) ; } else if( Rf_length(call) == 1 ){ boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, df.nrows() ) ); result = __( gather->collect() ) ; } else { stop( "cannot handle" ) ; } if( Rf_length(result) == df.nrows() ){ // ok } else if( Rf_length(result) == 1 ){ // recycle Gatherer* gather = constant_gatherer( result, df.nrows() ) ; result = __( gather->collect() ) ; delete gather ; } else { std::stringstream s ; s << "wrong result size (" << Rf_length(result) << "), expected " << df.nrows() << " or 1" ; stop(s.str()) ; } call_proxy.input( name, result ) ; accumulator.set( name, result ); } List res = structure_mutate(accumulator, df, classes_not_grouped() ) ; return res ; }
SEXP mutate_grouped(GroupedDataFrame gdf, List args, const DataDots& dots){ const DataFrame& df = gdf.data() ; int nexpr = args.size() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); Environment env = dots.envir(0) ; GroupedCallProxy proxy(gdf, env) ; Shelter<SEXP> __ ; NamedListAccumulator<SEXP> accumulator ; int ncolumns = df.size() ; CharacterVector column_names = df.names() ; for( int i=0; i<ncolumns; i++){ accumulator.set( column_names[i], df[i] ) ; } for( int i=0; i<nexpr; i++){ env = dots.envir(i) ; proxy.set_env( env ) ; SEXP call = args[i] ; SEXP name = results_names[i] ; SEXP variable = R_NilValue ; if( TYPEOF(call) == SYMSXP ){ if(proxy.has_variable(call)){ variable = proxy.get_variable( PRINTNAME(call) ) ; } else { SEXP v = env.find(CHAR(PRINTNAME(call))) ; if( Rf_isNull(v) ){ std::stringstream s ; s << "unknown variable: " << CHAR(PRINTNAME(call)) ; stop(s.str()); } else if( Rf_length(v) == 1){ Replicator* rep = constant_replicator(v, gdf.nrows() ); variable = __( rep->collect() ); delete rep ; } else { Replicator* rep = replicator(v, gdf) ; variable = __( rep->collect() ); delete rep ; } } } else if(TYPEOF(call) == LANGSXP){ proxy.set_call( call ); Gatherer* gather = gatherer( proxy, gdf ) ; variable = __( gather->collect() ) ; delete gather ; } else if(Rf_length(call) == 1) { boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, gdf.nrows() ) ); variable = __( gather->collect() ) ; } else { stop( "cannot handle" ) ; } proxy.input( name, variable ) ; accumulator.set( name, variable) ; } return structure_mutate(accumulator, df, classes_grouped() ); }
// [[Rcpp::export]] List arrange_impl( DataFrame data, List args, DataDots dots ){ check_valid_colnames(data) ; assert_all_white_list(data) ; // special case arrange() with no arguments for grouped data if( dots.size() == 0 && is<GroupedDataFrame>(data) ){ DataFrame labels( data.attr( "labels" ) ); OrderVisitors o(labels) ; IntegerVector index = o.apply() ; // reorganize labels = DataFrameVisitors( labels, labels.names() ).subset( index, labels.attr("class") ); ListOf<IntegerVector> indices( data.attr("indices") ) ; int ngroups = indices.size() ; List new_indices(ngroups) ; IntegerVector master_index(data.nrows()) ; for( int i=0; i<ngroups; i++){ new_indices[index[i]] = indices[i] ; } IntegerVector group_sizes = data.attr("group_sizes") ; IntegerVector new_group_sizes(ngroups); for( int i=0, k=0; i<ngroups; i++){ IntegerVector idx = new_indices[i] ; IntegerVector new_group_index = seq(k, k + idx.size() - 1 ); for( int j=0; j<idx.size(); j++, k++){ master_index[k] = idx[j] ; } new_indices[i] = new_group_index ; new_group_sizes[i] = idx.size() ; } DataFrame res = DataFrameVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ; res.attr( "labels" ) = labels ; res.attr( "indices" ) = new_indices ; res.attr( "vars" ) = data.attr("vars" ) ; res.attr( "group_sizes" ) = new_group_sizes ; res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ; res.attr( "drop" ) = data.attr("drop") ; return res ; } if( dots.size() == 0 || data.nrows() == 0) return data ; int nargs = dots.size() ; if( is<GroupedDataFrame>(data) ){ nargs += GroupedDataFrame(data).nvars() ; } List variables(nargs) ; LogicalVector ascending(nargs) ; int k = 0 ; if( is<GroupedDataFrame>(data) ){ GroupedDataFrame gdf(data); for( ; k< gdf.nvars(); k++) { ascending[k] = true ; String s = PRINTNAME(gdf.symbol(k)); variables[k] = data[s] ; } } for(int i=0; k<nargs; i++, k++){ Shelter<SEXP> __ ; SEXP call = args[dots.expr_index(i)] ; bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ; CallProxy call_proxy(is_desc ? CADR(call) : call, data, dots.envir(i)) ; SEXP v = __(call_proxy.eval()) ; if( !white_list(v) || TYPEOF(v) == VECSXP ){ std::stringstream ss ; ss << "cannot arrange column of class '" << get_single_class(v) << "'" ; stop(ss.str()) ; } if( Rf_length(v) != data.nrows() ){ std::stringstream s ; s << "incorrect size (" << Rf_length(v) << "), expecting :" << data.nrows() ; stop(s.str()) ; } variables[k] = v ; ascending[k] = !is_desc ; } OrderVisitors o(variables, ascending, nargs) ; IntegerVector index = o.apply() ; DataFrameVisitors visitors( data, data.names() ) ; List res = visitors.subset(index, data.attr("class") ) ; SET_ATTRIB(res, strip_group_attributes(res)); return res ; }