// version of grouped filter when contributions to ... come from several environment DataFrame filter_grouped_multiple_env( const GroupedDataFrame& gdf, const List& args, const DataDots& dots){ const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_install( names[i] ) ) ; } int nrows = data.nrows() ; LogicalVector test(nrows, TRUE); LogicalVector g_test ; for( int k=0; k<args.size(); k++){ Call call( (SEXP)args[k] ) ; GroupedCallProxy call_proxy( call, gdf, dots.envir(k) ) ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = call_proxy.get( indices ); check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ test[ indices[j] ] = test[ indices[j] ] & g_test[j] ; } } } DataFrame res = subset( data, test, names, classes_grouped() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
DataFrame filter_grouped( const GroupedDataFrame& gdf, List args, Environment env){ // a, b, c -> a & b & c Language call = and_calls( args ) ; const DataFrame& data = gdf.data() ; int nrows = data.nrows() ; LogicalVector test = no_init(nrows); LogicalVector g_test ; GroupedCallProxy call_proxy( call, gdf, env ) ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; g_test = call_proxy.get( indices ); int chunk_size = indices.size() ; for( int j=0; j<chunk_size; j++){ test[ indices[j] ] = g_test[j] ; } } DataFrame res = subset( data, test, data.names(), classes_grouped() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
DataFrame filter_grouped_single_env( const GroupedDataFrame& gdf, const List& args, const Environment& env){ const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_install( names[i] ) ) ; } // a, b, c -> a & b & c Call call( and_calls( args, set ) ) ; int nrows = data.nrows() ; LogicalVector test = no_init(nrows); LogicalVector g_test ; GroupedCallProxy call_proxy( call, gdf, env ) ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = call_proxy.get( indices ); check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ test[ indices[j] ] = g_test[j] ; } } DataFrame res = subset( data, test, names, classes_grouped() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
// [[Rcpp::export]] IntegerVector grouped_indices_grouped_df_impl(GroupedDataFrame gdf) { int n=gdf.nrows(); IntegerVector res = no_init(n); int ngroups = gdf.ngroups(); GroupedDataFrameIndexIterator it = gdf.group_begin(); for (int i=0; i<ngroups; i++, ++it) { SlicingIndex index = *it; int n_index = index.size(); for (int j=0; j<n_index; j++) { res[ index[j] ] = i + 1; } } return res; }
DataFrame filter_grouped_multiple_env( const Data& gdf, const LazyDots& dots){ const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_installChar( names[i] ) ) ; } int nrows = data.nrows() ; LogicalVector test(nrows, TRUE); LogicalVector g_test ; for( int k=0; k<dots.size(); k++){ Rcpp::checkUserInterrupt() ; const Lazy& lazy = dots[k] ; Call call( lazy.expr() ) ; GroupedCallProxy<Data, Subsets> call_proxy( call, gdf, lazy.env() ) ; int ngroups = gdf.ngroups() ; typename Data::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = check_filter_logical_result(call_proxy.get( indices )); if( g_test.size() == 1 ){ if( g_test[0] != TRUE ){ for( int j=0; j<chunk_size; j++){ test[indices[j]] = FALSE ; } } } else { check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ if( g_test[j] != TRUE ){ test[ indices[j] ] = FALSE ; } } } } } DataFrame res = subset( data, test, names, classes_grouped<Data>() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
DataFrame filter_grouped_single_env( const Data& gdf, const LazyDots& dots){ typedef GroupedCallProxy<Data, Subsets> Proxy ; Environment env = dots[0].env() ; const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_installChar( names[i] ) ) ; } // a, b, c -> a & b & c Call call( and_calls( dots, set, env ) ) ; int nrows = data.nrows() ; LogicalVector test(nrows, TRUE); LogicalVector g_test ; Proxy call_proxy( call, gdf, env ) ; int ngroups = gdf.ngroups() ; typename Data::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = check_filter_logical_result( call_proxy.get( indices ) ) ; if( g_test.size() == 1 ){ int val = g_test[0] == TRUE ; for( int j=0; j<chunk_size; j++){ test[ indices[j] ] = val ; } } else { check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ if( g_test[j] != TRUE ) test[ indices[j] ] = FALSE ; } } } DataFrame res = subset( data, test, names, classes_grouped<Data>() ) ; res.attr( "vars") = data.attr("vars") ; return res ; }
inline STORAGE process_chunk(const SlicingIndex& indices) { int n = indices.size(); if (n == 0 || idx > n || idx < -n) return def; int i = idx > 0 ? (idx -1) : (n+idx); typedef VectorSliceVisitor<ORDER_RTYPE> Slice; typedef OrderVectorVisitorImpl<ORDER_RTYPE,true,Slice> Visitor; typedef Compare_Single_OrderVisitor<Visitor> Comparer; Comparer comparer(Visitor(Slice(order, indices))); IntegerVector sequence = seq(0,n-1); std::nth_element(sequence.begin(), sequence.begin() + i, sequence.end(), comparer); return data[ indices[ sequence[i] ] ]; }
SEXP process(const SlicingIndex& i) { return IntegerVector(i.size(), i.group() + 1); }
inline STORAGE process_chunk(const SlicingIndex& indices) { int n = indices.size(); if (n == 0 || idx > n || idx < -n) return def; int i = idx > 0 ? (idx -1) : (n+idx); return data[indices[i]]; }
//[[Rcpp::export]] DataFrame complement_impl(GroupedDataFrame gdf, DataFrame genome) { genome_map_t chrom_sizes = makeChromSizes(genome) ; DataFrame df = gdf.data() ; IntegerVector starts = df["start"] ; IntegerVector ends = df["end"] ; CharacterVector chroms = df["chrom"] ; std::vector<std::string> chroms_out ; std::vector<int> starts_out ; std::vector<int> ends_out ; int ngroups = gdf.ngroups() ; GroupedDataFrame::group_iterator git = gdf.group_begin() ; for (int i = 0; i < ngroups; ++i, ++git) { SlicingIndex indices = *git ; int ni = indices.size() ; int start, end ; int last_end = 1 ; // get chrom from first index auto chrom = as<std::string>(chroms[indices[0]]) ; for (int j = 0; j < ni; ++j) { start = starts[indices[j]] ; end = ends[indices[j]] ; if (j == 0) { if (start == 1) { last_end = end ; continue ; } else { chroms_out.push_back(chrom) ; starts_out.push_back(1) ; ends_out.push_back(start) ; } } else { chroms_out.push_back(chrom) ; starts_out.push_back(last_end) ; ends_out.push_back(start) ; } last_end = end; } auto chrom_size = chrom_sizes[chrom] ; if (last_end < chrom_size) { chroms_out.push_back(chrom) ; starts_out.push_back(last_end) ; ends_out.push_back(chrom_size) ; } } return DataFrame::create(_("chrom") = chroms_out, _("start") = starts_out, _("end") = ends_out, _("stringsAsFactors") = false) ; }