Ejemplo n.º 1
0
SEXP summarise_grouped(const GroupedDataFrame& gdf, List args, const DataDots& dots){
    DataFrame df = gdf.data() ;

    int nexpr = args.size() ;
    int nvars = gdf.nvars() ;
    CharacterVector results_names = args.names() ;
    check_not_groups(results_names, gdf);
    NamedListAccumulator<SEXP> accumulator ;

    int i=0;
    for( ; i<nvars; i++){
        SET_NAMED(gdf.label(i), 2) ;
        accumulator.set( PRINTNAME(gdf.symbol(i)), gdf.label(i) ) ;
    }

    LazyGroupedSubsets subsets(gdf) ;
    Shelter<SEXP> __ ;
    for( int k=0; k<nexpr; k++, i++ ){
        Environment env = dots.envir(k) ;

        Result* res = get_handler( args[k], subsets, env ) ;

        // if we could not find a direct Result
        // we can use a GroupedCalledReducer which will callback to R
        if( !res ) res = new GroupedCalledReducer( args[k], subsets, env) ;

        SEXP result = __( res->process(gdf) ) ;
        SEXP name = results_names[k] ;
        accumulator.set( name, result );
        subsets.input( Symbol(name), SummarisedVariable(result) ) ;
        delete res;
    }

    return summarised_grouped_tbl_cpp(accumulator, gdf );
}
Ejemplo n.º 2
0
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){
    int nexpr = args.size() ;
    CharacterVector names = args.names();

    LazySubsets subsets( df ) ;
    std::vector<SEXP> results ;
    std::vector<SEXP> result_names ;
    NamedListAccumulator<SEXP> accumulator ;

    Rcpp::Shelter<SEXP> __ ;
    for( int i=0; i<nexpr; i++){
        SEXP name = names[i] ;
        Environment env = dots.envir(i) ;
        Result* res = get_handler( args[i], subsets, env ) ;
        SEXP result ;
        if(res) {
            result = __(res->process( FullDataFrame(df) )) ;
        } else {
            result = __(CallProxy( args[i], subsets, env).eval()) ;
        }
        delete res ;
        subsets.input( Symbol(name), result ) ;
        accumulator.set(name, result);
    }

    return tbl_cpp( accumulator, 1 ) ;
}
Ejemplo n.º 3
0
SEXP summarise_not_grouped(DataFrame df, const LazyDots& dots){
    int nexpr = dots.size() ;
    if( nexpr == 0) return DataFrame() ;
    
    LazySubsets subsets( df ) ;
    std::vector<SEXP> results ;
    NamedListAccumulator<DataFrame> accumulator ;

    for( int i=0; i<nexpr; i++){
        Rcpp::checkUserInterrupt() ;
        
        const Lazy& lazy = dots[i] ;
        Environment env = lazy.env() ;
        Shield<SEXP> expr_(lazy.expr()) ; SEXP expr = expr_ ;
        boost::scoped_ptr<Result> res( get_handler( expr, subsets, env ) ) ;
        RObject result ;
        if(res) {
            result = res->process( FullDataFrame(df) ) ;
        } else {
            result = CallProxy( lazy.expr(), subsets, env).eval() ;
        }
        if( Rf_length(result) != 1 ){
            stop( "expecting result of length one, got : %d", Rf_length(result) ) ;
        }
        accumulator.set(lazy.name(), result);
        subsets.input( lazy.name(), result ) ;
    }

    return tbl_cpp( accumulator, 1 ) ;
}
Ejemplo n.º 4
0
SEXP summarise_grouped(const DataFrame& df, const LazyDots& dots){
    Data gdf(df) ;
    
    int nexpr = dots.size() ;
    int nvars = gdf.nvars() ;
    check_not_groups(dots, gdf);
    NamedListAccumulator<Data> accumulator ;

    int i=0;
    for( ; i<nvars; i++){
        accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ;
    }

    Subsets subsets(gdf) ;
    for( int k=0; k<nexpr; k++, i++ ){
        Rcpp::checkUserInterrupt() ;
        const Lazy& lazy = dots[k] ;
        const Environment& env = lazy.env() ;
        
        Shield<SEXP> expr_(lazy.expr()) ; SEXP expr = expr_ ;
        boost::scoped_ptr<Result> res( get_handler( expr, subsets, env ) );
        
        // if we could not find a direct Result
        // we can use a GroupedCallReducer which will callback to R
        if( !res ) {
            res.reset( new GroupedCallReducer<Data, Subsets>( lazy.expr(), subsets, env) );
        }
        Shield<SEXP> result( res->process(gdf) ) ;
        accumulator.set( lazy.name(), result );
        subsets.input( lazy.name(), SummarisedVariable(result) ) ;
        
    }

    return summarised_grouped_tbl_cpp<Data>(accumulator, gdf );
}
Ejemplo n.º 5
0
SEXP summarise_not_grouped(DataFrame df, const LazyDots& dots){
    int nexpr = dots.size() ;
    if( nexpr == 0) return DataFrame() ;
    
    LazySubsets subsets( df ) ;
    std::vector<SEXP> results ;
    NamedListAccumulator<DataFrame> accumulator ;

    Rcpp::Shelter<SEXP> __ ;
    for( int i=0; i<nexpr; i++){
        Rcpp::checkUserInterrupt() ;
        
        const Lazy& lazy = dots[i] ;
        Environment env = lazy.env() ;
        Result* res = get_handler( lazy.expr(), subsets, env ) ;
        
        SEXP result ;
        if(res) {
            result = __(res->process( FullDataFrame(df) )) ;
        } else {
            result = __(CallProxy( lazy.expr(), subsets, env).eval()) ;
        }
        delete res ;
        if( Rf_length(result) != 1 ){
            stop( "expecting result of length one, got : %d", Rf_length(result) ) ;
        }
        accumulator.set(lazy.name(), result);
        subsets.input( Symbol(lazy.name()), result ) ;
    }

    return tbl_cpp( accumulator, 1 ) ;
}
Ejemplo n.º 6
0
SEXP summarise_not_grouped(DataFrame df, List args, const DataDots& dots){
    int nexpr = dots.size() ;
    if( nexpr == 0) return DataFrame() ;
    
    CharacterVector names = args.names();

    LazySubsets subsets( df ) ;
    std::vector<SEXP> results ;
    std::vector<SEXP> result_names ;
    NamedListAccumulator<DataFrame> accumulator ;

    Rcpp::Shelter<SEXP> __ ;
    for( int i=0; i<nexpr; i++){
        Rcpp::checkUserInterrupt() ;
        
        SEXP name = names[dots.expr_index(i)] ;
        Environment env = dots.envir(i) ;
        Result* res = get_handler( args[i], subsets, env ) ;
        
        SEXP result ;
        if(res) {
            result = __(res->process( FullDataFrame(df) )) ;
        } else {
            result = __(CallProxy( args[dots.expr_index(i)], subsets, env).eval()) ;
        }
        delete res ;
        if( Rf_length(result) != 1 ){
            std::stringstream s ;
            s << "expecting result of length one, got : "
              << Rf_length(result) ;
            stop(s.str()) ;
        }
        subsets.input( Symbol(name), result ) ;
        accumulator.set(name, result);
    }

    return tbl_cpp( accumulator, 1 ) ;
}
Ejemplo n.º 7
0
SEXP summarise_grouped(const DataFrame& df, List args, const DataDots& dots){
    Data gdf(df) ;
    
    int nexpr = dots.size() ;
    int nvars = gdf.nvars() ;
    CharacterVector results_names = args.names() ;
    check_not_groups(results_names, gdf);
    NamedListAccumulator<Data> accumulator ;

    int i=0;
    for( ; i<nvars; i++){
        accumulator.set( PRINTNAME(gdf.symbol(i)), shared_SEXP(gdf.label(i)) ) ;
    }

    Subsets subsets(gdf) ;
    Shelter<SEXP> __ ;
    for( int k=0; k<nexpr; k++, i++ ){
        Rcpp::checkUserInterrupt() ;
        
        Environment env = dots.envir(k) ;

        Result* res = get_handler( args[dots.expr_index(k)], subsets, env ) ;
        
        // if we could not find a direct Result
        // we can use a GroupedCallReducer which will callback to R
        if( !res ) res = new GroupedCallReducer<Data, Subsets>( args[dots.expr_index(k)], subsets, env) ;
        
        
        SEXP result = __( res->process(gdf) ) ;
        SEXP name = results_names[dots.expr_index(k)] ;
        accumulator.set( name, result );
        subsets.input( Symbol(name), SummarisedVariable(result) ) ;
        delete res;
    }

    return summarised_grouped_tbl_cpp<Data>(accumulator, gdf );
}
Ejemplo n.º 8
0
SEXP mutate_not_grouped(DataFrame df, List args, const DataDots& dots){
    Shelter<SEXP> __ ;

    Environment env = dots.envir(0) ;

    int nexpr = args.size() ;
    CharacterVector results_names = args.names() ;

    NamedListAccumulator<SEXP> accumulator ;
    int nvars = df.size() ;
    CharacterVector df_names = df.names() ;
    for( int i=0; i<nvars; i++){
        accumulator.set( df_names[i], df[i] ) ;
    }

    CallProxy call_proxy(df, env) ;
    for( int i=0; i<nexpr; i++){
        env = dots.envir(i) ;
        call_proxy.set_env(env) ;

        SEXP call = args[i] ;
        SEXP name = results_names[i] ;
        SEXP result = R_NilValue ;
        if( TYPEOF(call) == SYMSXP ){
            if(call_proxy.has_variable(call)){
                result = call_proxy.get_variable(PRINTNAME(call)) ;
            } else {
                result = env.find(CHAR(PRINTNAME(call))) ;
                SET_NAMED(result,2) ;
            }
        } else if( TYPEOF(call) == LANGSXP ){
            call_proxy.set_call( args[i] );

            // we need to protect the SEXP, that's what the Shelter does
            result = __( call_proxy.eval() ) ;

        } else if( Rf_length(call) == 1 ){
            boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, df.nrows() ) );
            result = __( gather->collect() ) ;
        } else {
            stop( "cannot handle" ) ;
        }


        if( Rf_length(result) == df.nrows() ){
            // ok
        } else if( Rf_length(result) == 1 ){
            // recycle
            Gatherer* gather = constant_gatherer( result, df.nrows() ) ;
            result = __( gather->collect() ) ;
            delete gather ;
        } else {
            std::stringstream s ;
            s << "wrong result size ("
              << Rf_length(result)
              << "), expected "
              << df.nrows()
              << " or 1" ;
            stop(s.str()) ;
        }

        call_proxy.input( name, result ) ;
        accumulator.set( name, result );
    }

    List res = structure_mutate(accumulator, df, classes_not_grouped() ) ;

    return res ;
}
Ejemplo n.º 9
0
SEXP mutate_grouped(GroupedDataFrame gdf, List args, const DataDots& dots){
    const DataFrame& df = gdf.data() ;
    int nexpr = args.size() ;
    CharacterVector results_names = args.names() ;
    check_not_groups(results_names, gdf);

    Environment env = dots.envir(0) ;
    GroupedCallProxy proxy(gdf, env) ;
    Shelter<SEXP> __ ;

    NamedListAccumulator<SEXP> accumulator ;
    int ncolumns = df.size() ;
    CharacterVector column_names = df.names() ;
    for( int i=0; i<ncolumns; i++){
        accumulator.set( column_names[i], df[i] ) ;
    }

    for( int i=0; i<nexpr; i++){
        env = dots.envir(i) ;
        proxy.set_env( env ) ;
        SEXP call = args[i] ;
        SEXP name = results_names[i] ;
        SEXP variable = R_NilValue ;
        if( TYPEOF(call) == SYMSXP ){
            if(proxy.has_variable(call)){
                variable = proxy.get_variable( PRINTNAME(call) ) ;
            } else {
                SEXP v = env.find(CHAR(PRINTNAME(call))) ;
                if( Rf_isNull(v) ){
                    std::stringstream s ;
                    s << "unknown variable: " << CHAR(PRINTNAME(call)) ;
                    stop(s.str());
                } else if( Rf_length(v) == 1){
                    Replicator* rep = constant_replicator(v, gdf.nrows() );
                    variable = __( rep->collect() );
                    delete rep ;
                } else {
                    Replicator* rep = replicator(v, gdf) ;
                    variable = __( rep->collect() );
                    delete rep ;
                }
            }

        } else if(TYPEOF(call) == LANGSXP){
            proxy.set_call( call );
            Gatherer* gather = gatherer( proxy, gdf ) ;
            variable = __( gather->collect() ) ;
            delete gather ;
        } else if(Rf_length(call) == 1) {
            boost::scoped_ptr<Gatherer> gather( constant_gatherer( call, gdf.nrows() ) );
            variable = __( gather->collect() ) ;
        } else {
            stop( "cannot handle" ) ;
        }

        proxy.input( name, variable ) ;
        accumulator.set( name, variable) ;
    }

    return structure_mutate(accumulator, df, classes_grouped() );
}
Ejemplo n.º 10
0
SEXP mutate_not_grouped(DataFrame df, const LazyDots& dots) {
  int nexpr = dots.size();
  int nrows = df.nrows();

  NamedListAccumulator<DataFrame> accumulator;
  int nvars = df.size();
  if (nvars) {
    CharacterVector df_names = df.names();
    for (int i=0; i<nvars; i++) {
      accumulator.set(Symbol(df_names[i]), df[i]);
    }
  }

  CallProxy call_proxy(df);
  List results(nexpr);

  for (int i=0; i<nexpr; i++) {
    Rcpp::checkUserInterrupt();
    const Lazy& lazy = dots[i];

    Shield<SEXP> call_(lazy.expr());
    SEXP call = call_;
    Symbol name = lazy.name();
    Environment env = lazy.env();
    call_proxy.set_env(env);

    if (TYPEOF(call) == SYMSXP) {
      if (call_proxy.has_variable(call)) {
        results[i] = call_proxy.get_variable(PRINTNAME(call));
      } else {
        results[i] = shared_SEXP(env.find(CHAR(PRINTNAME(call))));
      }
    } else if (TYPEOF(call) == LANGSXP) {
      call_proxy.set_call(call);
      results[i] = call_proxy.eval();
    } else if (Rf_length(call) == 1) {
      boost::scoped_ptr<Gatherer> gather(constant_gatherer(call, nrows));
      results[i] = gather->collect();
    } else if (Rf_isNull(call)) {
      accumulator.rm(name);
      continue;
    } else {
      stop("cannot handle");
    }

    check_supported_type(results[i], name.c_str());

    if (Rf_inherits(results[i], "POSIXlt")) {
      stop("`mutate` does not support `POSIXlt` results");
    }
    int n_res = Rf_length(results[i]);
    if (n_res == nrows) {
      // ok
    } else if (n_res == 1) {
      // recycle
      boost::scoped_ptr<Gatherer> gather(constant_gatherer(results[i] , df.nrows()));
      results[i] = gather->collect();
    } else {
      stop("wrong result size (%d), expected %d or 1", n_res, nrows);
    }

    call_proxy.input(name, results[i]);
    accumulator.set(name, results[i]);
  }
  List res = structure_mutate(accumulator, df, classes_not_grouped());

  return res;
}
Ejemplo n.º 11
0
SEXP mutate_grouped(const DataFrame& df, const LazyDots& dots) {
  LOG_VERBOSE << "checking zero rows";

  // special 0 rows case
  if (df.nrows() == 0) {
    DataFrame res = mutate_not_grouped(df, dots);
    res.attr("vars") = df.attr("vars");
    res.attr("class") = df.attr("class");
    return Data(res).data();
  }

  LOG_VERBOSE << "initializing proxy";

  typedef GroupedCallProxy<Data, Subsets> Proxy;
  Data gdf(df);
  int nexpr = dots.size();
  check_not_groups(dots, gdf);

  Proxy proxy(gdf);

  LOG_VERBOSE << "copying data to accumulator";

  NamedListAccumulator<Data> accumulator;
  int ncolumns = df.size();
  CharacterVector column_names = df.names();
  for (int i=0; i<ncolumns; i++) {
    accumulator.set(Symbol(column_names[i]), df[i]);
  }

  LOG_VERBOSE << "processing " << nexpr << " variables";

  List variables(nexpr);
  for (int i=0; i<nexpr; i++) {
    Rcpp::checkUserInterrupt();
    const Lazy& lazy = dots[i];

    Environment env = lazy.env();
    Shield<SEXP> call_(lazy.expr());
    SEXP call = call_;
    Symbol name = lazy.name();
    proxy.set_env(env);

    LOG_VERBOSE << "processing " << CharacterVector(name);

    if (TYPEOF(call) == LANGSXP || TYPEOF(call) == SYMSXP) {
      proxy.set_call(call);
      boost::scoped_ptr<Gatherer> gather(gatherer<Data, Subsets>(proxy, gdf, name));
      SEXP variable = variables[i] = gather->collect();
      proxy.input(name, variable);
      accumulator.set(name, variable);
    } else if (Rf_length(call) == 1) {
      boost::scoped_ptr<Gatherer> gather(constant_gatherer(call, gdf.nrows()));
      SEXP variable = variables[i] = gather->collect();
      proxy.input(name, variable);
      accumulator.set(name, variable);
    } else if (Rf_isNull(call)) {
      accumulator.rm(name);
      continue;
    } else {
      stop("cannot handle");
    }
  }

  return structure_mutate(accumulator, df, df.attr("class"));
}
Ejemplo n.º 12
0
SEXP mutate_not_grouped(DataFrame df, const QuosureList& dots) {
  const int nexpr = dots.size();
  const int nrows = df.nrows();

  NamedListAccumulator<DataFrame> accumulator;
  const int nvars = df.size();
  if (nvars) {
    CharacterVector df_names = df.names();
    for (int i = 0; i < nvars; i++) {
      accumulator.set(df_names[i], df[i]);
    }
  }

  CallProxy call_proxy(df);
  List results(nexpr);

  for (int i = 0; i < nexpr; i++) {
    Rcpp::checkUserInterrupt();
    const NamedQuosure& quosure = dots[i];

    Shield<SEXP> call_(quosure.expr());
    SEXP call = call_;
    SymbolString name = quosure.name();
    Environment env = quosure.env();
    call_proxy.set_env(env);

    if (TYPEOF(call) == SYMSXP) {
      SymbolString call_name = SymbolString(Symbol(call));
      if (call_proxy.has_variable(call_name)) {
        results[i] = call_proxy.get_variable(call_name);
      } else {
        results[i] = shared_SEXP(env.find(call_name.get_string()));
      }
    } else if (TYPEOF(call) == LANGSXP) {
      call_proxy.set_call(call);
      results[i] = call_proxy.eval();
    } else if (Rf_length(call) == 1) {
      boost::scoped_ptr<Gatherer> gather(constant_gatherer(call, nrows));
      results[i] = gather->collect();
    } else if (Rf_isNull(call)) {
      accumulator.rm(name);
      continue;
    } else {
      stop("cannot handle");
    }

    if (Rf_inherits(results[i], "POSIXlt")) {
      stop("`mutate` does not support `POSIXlt` results");
    }

    const int n_res = Rf_length(results[i]);
    check_supported_type(results[i], name);
    check_length(n_res, nrows, "the number of rows");

    if (n_res == 1 && nrows != 1) {
      // recycle
      boost::scoped_ptr<Gatherer> gather(constant_gatherer(results[i], nrows));
      results[i] = gather->collect();
    }

    call_proxy.input(name, results[i]);
    accumulator.set(name, results[i]);
  }
  List res = structure_mutate(accumulator, df, classes_not_grouped());

  return res;
}
Ejemplo n.º 13
0
SEXP summarise_grouped(const DataFrame& df, const LazyDots& dots){
    Data gdf(df) ;

    int nexpr = dots.size() ;
    int nvars = gdf.nvars() ;
    check_not_groups(dots, gdf);
    NamedListAccumulator<Data> accumulator ;

    int i=0;
    List results(nvars + nexpr) ;
    for( ; i<nvars; i++){
        results[i] = shared_SEXP(gdf.label(i)) ;
        accumulator.set( PRINTNAME(gdf.symbol(i)), results[i] ) ;
    }

    Subsets subsets(gdf) ;
    for( int k=0; k<nexpr; k++, i++ ){
        Rcpp::checkUserInterrupt() ;
        const Lazy& lazy = dots[k] ;
        const Environment& env = lazy.env() ;

        Shield<SEXP> expr_(lazy.expr()) ; SEXP expr = expr_ ;
        boost::scoped_ptr<Result> res( get_handler( expr, subsets, env ) );
        
        // if we could not find a direct Result
        // we can use a GroupedCallReducer which will callback to R
        if( !res ) {
            res.reset( new GroupedCallReducer<Data, Subsets>( lazy.expr(), subsets, env) );
        }
        RObject result = res->process(gdf)  ;
        results[i] = result ;
        accumulator.set( lazy.name(), result );
        subsets.input( lazy.name(), SummarisedVariable(result) ) ;

    }

    List out = accumulator ;
    copy_most_attributes( out, df) ;
    out.names() = accumulator.names() ;

    int nr = gdf.ngroups() ;
    set_rownames(out, nr ) ;

    if( gdf.nvars() > 1){
        out.attr( "class" ) = classes_grouped<Data>()  ;
        List vars = gdf.data().attr("vars") ;
        vars.erase( gdf.nvars() - 1) ;
        out.attr( "vars") = vars ;
        out.attr( "labels") = R_NilValue ;
        out.attr( "indices") = R_NilValue ;
        out.attr( "group_sizes") = R_NilValue ;
        out.attr( "biggest_group_size") = R_NilValue ;

        out.attr( "drop" ) = true ;
    } else {
        out.attr( "class" ) = classes_not_grouped()  ;
        SET_ATTRIB( out, strip_group_attributes(out) ) ;
    }

    return out ;
}