Esempio n. 1
0
// version of grouped filter when contributions to ... come from several environment
DataFrame filter_grouped_multiple_env( const GroupedDataFrame& gdf, const List& args, const DataDots& dots){
    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    int nrows = data.nrows() ;
    LogicalVector test(nrows, TRUE);

    LogicalVector g_test ;

    for( int k=0; k<args.size(); k++){
        Call call( (SEXP)args[k] ) ;
        GroupedCallProxy call_proxy( call, gdf, dots.envir(k) ) ;
        int ngroups = gdf.ngroups() ;
        GroupedDataFrame::group_iterator git = gdf.group_begin() ;
        for( int i=0; i<ngroups; i++, ++git){
            SlicingIndex indices = *git ;
            int chunk_size = indices.size() ;

            g_test  = call_proxy.get( indices );
            check_filter_result(g_test, chunk_size ) ;
            for( int j=0; j<chunk_size; j++){
                test[ indices[j] ] = test[ indices[j] ] & g_test[j] ;
            }
        }
    }
    DataFrame res = subset( data, test, names, classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;

    return res ;
}
Esempio n. 2
0
DataFrame filter_grouped( const GroupedDataFrame& gdf, List args, Environment env){
    // a, b, c ->  a & b & c
    Language call = and_calls( args ) ;
    
    const DataFrame& data = gdf.data() ;
    int nrows = data.nrows() ;
    LogicalVector test = no_init(nrows);
    
    LogicalVector g_test ;
    GroupedCallProxy call_proxy( call, gdf, env ) ;
    
    int ngroups = gdf.ngroups() ;
    GroupedDataFrame::group_iterator git = gdf.group_begin() ;
    for( int i=0; i<ngroups; i++, ++git){
        SlicingIndex indices = *git ;
        g_test  = call_proxy.get( indices );
        
        int chunk_size = indices.size() ;
        for( int j=0; j<chunk_size; j++){
            test[ indices[j] ] = g_test[j] ;  
        }
    }
    DataFrame res = subset( data, test, data.names(), classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;
            
    return res ;
}
Esempio n. 3
0
DataFrame filter_grouped_single_env( const GroupedDataFrame& gdf, const List& args, const Environment& env){
    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    // a, b, c ->  a & b & c
    Call call( and_calls( args, set ) ) ;

    int nrows = data.nrows() ;
    LogicalVector test = no_init(nrows);

    LogicalVector g_test ;
    GroupedCallProxy call_proxy( call, gdf, env ) ;

    int ngroups = gdf.ngroups() ;
    GroupedDataFrame::group_iterator git = gdf.group_begin() ;
    for( int i=0; i<ngroups; i++, ++git){
        SlicingIndex indices = *git ;
        int chunk_size = indices.size() ;

        g_test  = call_proxy.get( indices );
        check_filter_result(g_test, chunk_size ) ;
        for( int j=0; j<chunk_size; j++){
            test[ indices[j] ] = g_test[j] ;
        }
    }

    DataFrame res = subset( data, test, names, classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;

    return res ;
}
Esempio n. 4
0
// [[Rcpp::export]]
IntegerVector grouped_indices_grouped_df_impl(GroupedDataFrame gdf) {
  int n=gdf.nrows();
  IntegerVector res = no_init(n);
  int ngroups = gdf.ngroups();
  GroupedDataFrameIndexIterator it = gdf.group_begin();
  for (int i=0; i<ngroups; i++, ++it) {
    SlicingIndex index = *it;
    int n_index = index.size();
    for (int j=0; j<n_index; j++) {
      res[ index[j] ] = i + 1;
    }
  }
  return res;
}
Esempio n. 5
0
DataFrame filter_grouped_multiple_env( const Data& gdf, const LazyDots& dots){
    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_installChar( names[i] ) ) ;
    }

    int nrows = data.nrows() ;
    LogicalVector test(nrows, TRUE);

    LogicalVector g_test ;

    for( int k=0; k<dots.size(); k++){
        Rcpp::checkUserInterrupt() ;
        const Lazy& lazy = dots[k] ;

        Call call( lazy.expr() ) ;
        GroupedCallProxy<Data, Subsets> call_proxy( call, gdf, lazy.env() ) ;
        int ngroups = gdf.ngroups() ;
        typename Data::group_iterator git = gdf.group_begin() ;
        for( int i=0; i<ngroups; i++, ++git){
            SlicingIndex indices = *git ;
            int chunk_size = indices.size() ;

            g_test  = check_filter_logical_result(call_proxy.get( indices ));
            if( g_test.size() == 1 ){
                if( g_test[0] != TRUE ){
                    for( int j=0; j<chunk_size; j++){
                        test[indices[j]] = FALSE ;
                    }
                }
            } else {
                check_filter_result(g_test, chunk_size ) ;
                for( int j=0; j<chunk_size; j++){
                    if( g_test[j] != TRUE ){
                        test[ indices[j] ] = FALSE ;
                    }
                }
            }
        }
    }
    DataFrame res = subset( data, test, names, classes_grouped<Data>() ) ;
    res.attr( "vars") = data.attr("vars") ;

    return res ;
}
Esempio n. 6
0
DataFrame filter_grouped_single_env( const Data& gdf, const LazyDots& dots){
    typedef GroupedCallProxy<Data, Subsets> Proxy ;
    Environment env = dots[0].env() ;

    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_installChar( names[i] ) ) ;
    }

    // a, b, c ->  a & b & c
    Call call( and_calls( dots, set, env ) ) ;

    int nrows = data.nrows() ;
    LogicalVector test(nrows, TRUE);

    LogicalVector g_test ;
    Proxy call_proxy( call, gdf, env ) ;

    int ngroups = gdf.ngroups() ;
    typename Data::group_iterator git = gdf.group_begin() ;
    for( int i=0; i<ngroups; i++, ++git){
        SlicingIndex indices = *git ;
        int chunk_size = indices.size() ;

        g_test = check_filter_logical_result( call_proxy.get( indices ) ) ;
        if( g_test.size() == 1 ){
            int val = g_test[0] == TRUE ;
            for( int j=0; j<chunk_size; j++){
                test[ indices[j] ] = val ;
            }
        } else {
            check_filter_result(g_test, chunk_size ) ;
            for( int j=0; j<chunk_size; j++){
                if( g_test[j] != TRUE ) test[ indices[j] ] = FALSE ;
            }
        }
    }
    DataFrame res = subset( data, test, names, classes_grouped<Data>() ) ;
    res.attr( "vars")   = data.attr("vars") ;

    return res ;
}
Esempio n. 7
0
    inline STORAGE process_chunk(const SlicingIndex& indices) {
      int n = indices.size();
      if (n == 0 || idx > n || idx < -n) return def;

      int i = idx > 0 ? (idx -1) : (n+idx);

      typedef VectorSliceVisitor<ORDER_RTYPE> Slice;
      typedef OrderVectorVisitorImpl<ORDER_RTYPE,true,Slice> Visitor;
      typedef Compare_Single_OrderVisitor<Visitor> Comparer;

      Comparer comparer(Visitor(Slice(order, indices)));
      IntegerVector sequence = seq(0,n-1);
      std::nth_element(sequence.begin(), sequence.begin() + i, sequence.end(), comparer);

      return data[ indices[ sequence[i] ] ];
    }
Esempio n. 8
0
 SEXP process(const SlicingIndex& i) {
   return IntegerVector(i.size(), i.group() + 1);
 }
Esempio n. 9
0
 inline STORAGE process_chunk(const SlicingIndex& indices) {
   int n = indices.size();
   if (n == 0 || idx > n || idx < -n) return def;
   int i = idx > 0 ? (idx -1) : (n+idx);
   return data[indices[i]];
 }
Esempio n. 10
0
//[[Rcpp::export]]
DataFrame complement_impl(GroupedDataFrame gdf, DataFrame genome) {

  genome_map_t chrom_sizes = makeChromSizes(genome) ;

  DataFrame df = gdf.data() ;

  IntegerVector starts = df["start"] ;
  IntegerVector ends = df["end"] ;
  CharacterVector chroms = df["chrom"] ;

  std::vector<std::string> chroms_out ;
  std::vector<int> starts_out ;
  std::vector<int> ends_out ;

  int ngroups = gdf.ngroups() ;
  GroupedDataFrame::group_iterator git = gdf.group_begin() ;
  for (int i = 0; i < ngroups; ++i, ++git) {

    SlicingIndex indices = *git ;
    int ni = indices.size() ;

    int start, end ;
    int last_end = 1 ;

    // get chrom from first index
    auto chrom = as<std::string>(chroms[indices[0]]) ;

    for (int j = 0; j < ni; ++j) {

      start = starts[indices[j]] ;
      end = ends[indices[j]] ;

      if (j == 0) {
        if (start == 1) {
          last_end = end ;
          continue ;
        } else {
          chroms_out.push_back(chrom) ;
          starts_out.push_back(1) ;
          ends_out.push_back(start) ;
        }
      } else {
        chroms_out.push_back(chrom) ;
        starts_out.push_back(last_end) ;
        ends_out.push_back(start) ;
      }

      last_end = end;
    }

    auto chrom_size = chrom_sizes[chrom] ;

    if (last_end < chrom_size) {
      chroms_out.push_back(chrom) ;
      starts_out.push_back(last_end) ;
      ends_out.push_back(chrom_size) ;
    }
  }

  return DataFrame::create(_("chrom") = chroms_out,
                           _("start") = starts_out,
                           _("end") = ends_out,
                           _("stringsAsFactors") = false) ;
}