SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){ int nexpr = dots.size() ; if( nexpr == 0) return DataFrame() ; CharacterVector names = args.names(); LazySubsets subsets( df ) ; std::vector<SEXP> results ; std::vector<SEXP> result_names ; NamedListAccumulator<DataFrame> accumulator ; Rcpp::Shelter<SEXP> __ ; for( int i=0; i<nexpr; i++){ Rcpp::checkUserInterrupt() ; SEXP name = names[dots.expr_index(i)] ; Environment env = dots.envir(i) ; Result* res = get_handler( args[i], subsets, env ) ; SEXP result ; if(res) { result = __(res->process( FullDataFrame(df) )) ; } else { result = __(CallProxy( args[dots.expr_index(i)], subsets, env).eval()) ; } delete res ; if( Rf_length(result) != 1 ){ std::stringstream s ; s << "expecting result of length one, got : " << Rf_length(result) ; stop(s.str()) ; } subsets.input( Symbol(name), result ) ; accumulator.set(name, result); } return tbl_cpp( accumulator, 1 ) ; }
SEXP summarise_grouped(const DataFrame& df, List args, const DataDots& dots){ Data gdf(df) ; int nexpr = dots.size() ; int nvars = gdf.nvars() ; CharacterVector results_names = args.names() ; check_not_groups(results_names, gdf); NamedListAccumulator<Data> accumulator ; int i=0; for( ; i<nvars; i++){ accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ; } Subsets subsets(gdf) ; Shelter<SEXP> __ ; for( int k=0; k<nexpr; k++, i++ ){ Rcpp::checkUserInterrupt() ; Environment env = dots.envir(k) ; Result* res = get_handler( args[dots.expr_index(k)], subsets, env ) ; // if we could not find a direct Result // we can use a GroupedCallReducer which will callback to R if( !res ) res = new GroupedCallReducer<Data, Subsets>( args[dots.expr_index(k)], subsets, env) ; SEXP result = __( res->process(gdf) ) ; SEXP name = results_names[dots.expr_index(k)] ; accumulator.set( name, result ); subsets.input( Symbol(name), SummarisedVariable(result) ) ; delete res; } return summarised_grouped_tbl_cpp<Data>(accumulator, gdf ); }
// version of grouped filter when contributions to ... come from several environment DataFrame filter_grouped_multiple_env( const GroupedDataFrame& gdf, const List& args, const DataDots& dots){ const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_install( names[i] ) ) ; } int nrows = data.nrows() ; LogicalVector test(nrows, TRUE); LogicalVector g_test ; for( int k=0; k<dots.size(); k++){ Rcpp::checkUserInterrupt() ; Call call( (SEXP)args[dots.expr_index(k)] ) ; GroupedCallProxy<GroupedDataFrame> call_proxy( call, gdf, dots.envir(k) ) ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = check_filter_logical_result(call_proxy.get( indices )); if( g_test.size() == 1 ){ if( g_test[0] != TRUE ){ for( int j=0; j<chunk_size; j++){ test[indices[j]] = FALSE ; } } } else { check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ if( g_test[j] != TRUE ){ test[ indices[j] ] = FALSE ; } } } } } DataFrame res = subset( data, test, names, classes_grouped<GroupedDataFrame>() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
// [[Rcpp::export]] List arrange_impl( DataFrame data, List args, DataDots dots ){ check_valid_colnames(data) ; assert_all_white_list(data) ; // special case arrange() with no arguments for grouped data if( dots.size() == 0 && is<GroupedDataFrame>(data) ){ DataFrame labels( data.attr( "labels" ) ); OrderVisitors o(labels) ; IntegerVector index = o.apply() ; // reorganize labels = DataFrameVisitors( labels, labels.names() ).subset( index, labels.attr("class") ); ListOf<IntegerVector> indices( data.attr("indices") ) ; int ngroups = indices.size() ; List new_indices(ngroups) ; IntegerVector master_index(data.nrows()) ; for( int i=0; i<ngroups; i++){ new_indices[index[i]] = indices[i] ; } IntegerVector group_sizes = data.attr("group_sizes") ; IntegerVector new_group_sizes(ngroups); for( int i=0, k=0; i<ngroups; i++){ IntegerVector idx = new_indices[i] ; IntegerVector new_group_index = seq(k, k + idx.size() - 1 ); for( int j=0; j<idx.size(); j++, k++){ master_index[k] = idx[j] ; } new_indices[i] = new_group_index ; new_group_sizes[i] = idx.size() ; } DataFrame res = DataFrameVisitors( data, data.names() ).subset( master_index, data.attr("class" ) ) ; res.attr( "labels" ) = labels ; res.attr( "indices" ) = new_indices ; res.attr( "vars" ) = data.attr("vars" ) ; res.attr( "group_sizes" ) = new_group_sizes ; res.attr( "biggest_group_size" ) = data.attr("biggest_group_size") ; res.attr( "drop" ) = data.attr("drop") ; return res ; } if( dots.size() == 0 || data.nrows() == 0) return data ; int nargs = dots.size() ; if( is<GroupedDataFrame>(data) ){ nargs += GroupedDataFrame(data).nvars() ; } List variables(nargs) ; LogicalVector ascending(nargs) ; int k = 0 ; if( is<GroupedDataFrame>(data) ){ GroupedDataFrame gdf(data); for( ; k< gdf.nvars(); k++) { ascending[k] = true ; String s = PRINTNAME(gdf.symbol(k)); variables[k] = data[s] ; } } for(int i=0; k<nargs; i++, k++){ Shelter<SEXP> __ ; SEXP call = args[dots.expr_index(i)] ; bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ; CallProxy call_proxy(is_desc ? CADR(call) : call, data, dots.envir(i)) ; SEXP v = __(call_proxy.eval()) ; if( !white_list(v) || TYPEOF(v) == VECSXP ){ std::stringstream ss ; ss << "cannot arrange column of class '" << get_single_class(v) << "'" ; stop(ss.str()) ; } if( Rf_length(v) != data.nrows() ){ std::stringstream s ; s << "incorrect size (" << Rf_length(v) << "), expecting :" << data.nrows() ; stop(s.str()) ; } variables[k] = v ; ascending[k] = !is_desc ; } OrderVisitors o(variables, ascending, nargs) ; IntegerVector index = o.apply() ; DataFrameVisitors visitors( data, data.names() ) ; List res = visitors.subset(index, data.attr("class") ) ; SET_ATTRIB(res, strip_group_attributes(res)); return res ; }