C++ (Cpp) DataFrame Examples

Programming Language: C++ (Cpp)

Class/Type: DataFrame

Examples at hotexamples.com: 30

Cpp DataFrame is a library that enables the creation, manipulation, and analysis of tabular data in C++. It provides functionalities similar to data frames in other programming languages such as R and Python's pandas library. With Cpp DataFrame, users can easily import data from various sources, perform data transformations and manipulations, apply descriptive and statistical analysis, and export the results. It offers a convenient and efficient way to work with structured data in C++ programming, making it suitable for tasks related to data exploration, data cleaning, data visualization, and data analysis.

C++ (Cpp) DataFrame - 30 examples found. These are the top rated real world C++ (Cpp) examples of DataFrame extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

attr(30)

nrows(30)

names(30)

getNumDataVectors(5)

setTotalDataIn(4)

setTotalErrorsIn(4)

setTotalDataOut(4)

setTotalPacketsOut(4)

setTotalPacketsIn(4)

setTotalDropsIn(4)

setTotalErrorsOut(4)

getTotalDataOut(3)

getTotalDataIn(3)

getTotalDropsIn(3)

getTotalDropsOut(3)

getTotalErrorsOut(3)

getTotalPacketsIn(3)

getTotalPacketsOut(3)

getTotalErrorsIn(3)

getNumFactors(3)

isNominal(3)

getDataElement(3)

matrix(2)

getLabel(2)

getDataVector(2)

getCMD(2)

findBestFeature(2)

ncol(2)

getTrainingLabel(2)

setHeader(2)

setDeviceName(1)

setFactorLabels(1)

addDataVector(1)

setFooter(1)

setIpV4(1)

setTimeStampMicroseconds(1)

setCMD(1)

setTimeStampSeconds(1)

setValid(1)

sortIndicesOnFactorValue(1)

setData(1)

isDataSetPure(1)

selectRandomFactors(1)

getSim(1)

begin(1)

computeBandwidthByFactor(1)

copy(1)

end(1)

getFactorLabelFromIndex(1)

getInstance(1)

Example #1

Show file

File: join_exports.cpp Project: cderv/dplyr

// [[Rcpp::export]]
DataFrame inner_join_impl(DataFrame x, DataFrame y,
                          IntegerVector by_x, IntegerVector by_y,
                          IntegerVector aux_x, IntegerVector aux_y,
                          bool na_match) {
  check_by(by_x);

  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, by_x, by_y, false, na_match);
  Map map(visitors);

  int n_x = x.nrows(), n_y = y.nrows();

  std::vector<int> indices_x;
  std::vector<int> indices_y;

  train_push_back_right(map, n_y);

  for (int i = 0; i < n_x; i++) {
    Map::iterator it = map.find(i);
    if (it != map.end()) {
      push_back_right(indices_y, it->second);
      push_back(indices_x, i, it->second.size());
    }
  }

  return subset_join(x, y,
                     indices_x, indices_y,
                     by_x, by_y,
                     aux_x, aux_y,
                     get_class(x)
                    );
}

Example #2

Show file

File: join_exports.cpp Project: cderv/dplyr

// [[Rcpp::export]]
DataFrame right_join_impl(DataFrame x, DataFrame y,
                          IntegerVector by_x, IntegerVector by_y,
                          IntegerVector aux_x, IntegerVector aux_y,
                          bool na_match) {
  check_by(by_x);

  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, by_x, by_y, false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  std::vector<int> indices_x;
  std::vector<int> indices_y;

  int n_y = y.nrows();
  for (int i = 0; i < n_y; i++) {
    // find a row in y that matches row i in x
    Map::iterator it = map.find(-i - 1);
    if (it != map.end()) {
      push_back(indices_x,  it->second);
      push_back(indices_y, i, it->second.size());
    } else {
      indices_x.push_back(-i - 1); // point to the i-th row in the right table
      indices_y.push_back(i);
    }
  }
  return subset_join(x, y,
                     indices_x, indices_y,
                     by_x, by_y,
                     aux_x, aux_y,
                     get_class(x)
                    );
}

Example #3

Show file

File: RandomTree.cpp Project: BSteine/hootenanny

  void RandomTree::findProximity(DataFrame & data, std::vector<unsigned int> & proximity)
  {
    std::vector<unsigned int> proxVec;

    unsigned int dSize = data.getNumDataVectors();
    proxVec.resize(dSize);

    //Find out which node each vector is classified as 
    for(unsigned int i = 0; i < dSize; i++)
    {
      std::string resultClass;
      unsigned int nodeId = classifyDataVector(data.getDataVector(i), resultClass);

      proxVec[i] = nodeId;
    }

    for(unsigned int j = 0; j < dSize; j++)
    {
      unsigned int tempId = proxVec[j];
      for(unsigned int k = j; k < dSize;j++)
      {
        if(proxVec[k] == tempId)
        {
          proximity[j * dSize + k] += 1;
        }
      }
    }
  }

Example #4

Show file

File: filter.cpp Project: jeevadatascience/dplyr

inline DataFrame grouped_subset( const Data& gdf, const LogicalVector& test, const CharacterVector& names, CharacterVector classes){
  DataFrame data = gdf.data() ;
  DataFrame res = subset( data, test, names, classes) ;
  res.attr("vars")   = data.attr("vars") ;
  strip_index(res);
  return Data(res).data() ;
}

Example #5

Show file

File: DataFrameSubsampler.cpp Project: mitulvpatel/hootenanny

  void DataFrameDiscretizer::discretize(DataFrame& df, TgsProgress* progress)
  {
    _df = &df;

    for (unsigned int i = 0; i < df.getNumFactors(); i++)
    {
      if (progress)
      {
        progress->setProgress((double)i / (double)df.getNumFactors());
      }
      if (_df->isNominal(i) == false)
      {
        if (_df->getNullTreatment(i) == DataFrame::NullAsMissingValue)
        {
          // replace nulls with random sampling of data (imputation), otherwise nulls get put
          // into their own category.
          _replaceNulls(i);
        } 
        _discretizeColumn(i);
      }
    }
    if (progress)
    {
      progress->setProgress(1.0);
    }
  }

Example #6

Show file

File: join_exports.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
DataFrame inner_join_impl(DataFrame x, DataFrame y,
                          CharacterVector by_x, CharacterVector by_y,
                          std::string& suffix_x, std::string& suffix_y,
                          bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), true, na_match);
  Map map(visitors);

  int n_x = x.nrows(), n_y = y.nrows();

  std::vector<int> indices_x;
  std::vector<int> indices_y;

  train_push_back_right(map, n_y);

  for (int i = 0; i < n_x; i++) {
    Map::iterator it = map.find(i);
    if (it != map.end()) {
      push_back_right(indices_y, it->second);
      push_back(indices_x, i, it->second.size());
    }
  }

  return subset_join(x, y,
                     indices_x, indices_y,
                     by_x, by_y,
                     suffix_x, suffix_y,
                     get_class(x)
                    );
}

Example #7

Show file

File: join_exports.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
DataFrame anti_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  int n_y = y.nrows();
  // remove the rows in x that match
  for (int i = 0; i < n_y; i++) {
    Map::iterator it = map.find(-i - 1);
    if (it != map.end())
      map.erase(it);
  }

  // collect what's left
  std::vector<int> indices;
  for (Map::iterator it = map.begin(); it != map.end(); ++it)
    push_back(indices, it->second);

  const DataFrame& out = subset(x, indices, x.names(), get_class(x));
  strip_index(out);
  return out;
}

Example #8

Show file

File: join_exports.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
DataFrame right_join_impl(DataFrame x, DataFrame y,
                          CharacterVector by_x, CharacterVector by_y,
                          std::string& suffix_x, std::string& suffix_y,
                          bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), true, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  std::vector<int> indices_x;
  std::vector<int> indices_y;

  int n_y = y.nrows();
  for (int i = 0; i < n_y; i++) {
    // find a row in y that matches row i in x
    Map::iterator it = map.find(-i - 1);
    if (it != map.end()) {
      push_back(indices_x,  it->second);
      push_back(indices_y, i, it->second.size());
    } else {
      indices_x.push_back(-i - 1); // point to the i-th row in the right table
      indices_y.push_back(i);
    }
  }
  return subset_join(x, y,
                     indices_x, indices_y,
                     by_x, by_y,
                     suffix_x, suffix_y,
                     get_class(x)
                    );
}

Example #9

Show file

File: join_exports.cpp Project: Klaus012/dplyr

// [[Rcpp::export]]
DataFrame semi_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, bool na_match) {
  if (by_x.size() == 0) stop("no variable to join by");
  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match);
  Map map(visitors);

  // train the map in terms of x
  train_push_back(map, x.nrows());

  int n_y = y.nrows();
  // this will collect indices from rows in x that match rows in y
  std::vector<int> indices;
  for (int i = 0; i < n_y; i++) {
    // find a row in x that matches row i from y
    Map::iterator it = map.find(-i - 1);

    if (it != map.end()) {
      // collect the indices and remove them from the
      // map so that they are only found once.
      push_back(indices, it->second);

      map.erase(it);

    }
  }

  const DataFrame& out = subset(x, indices, x.names(), get_class(x));
  strip_index(out);
  return out;
}

Example #10

Show file

File: filter.cpp Project: regine-adhoc/dplyr

// [[Rcpp::export]]
SEXP filter_impl( DataFrame df, LazyDots dots){
    if( df.nrows() == 0 || Rf_isNull(df) ) {
        return df ;
    }
    check_valid_colnames(df) ;
    assert_all_white_list(df) ;

    if( dots.size() == 0 ) return df ;

    // special case
    if( dots.size() == 1 && TYPEOF(dots[0].expr()) == LGLSXP){
        LogicalVector what = dots[0].expr() ;
        if( what.size() == 1 ){
            if( what[0] == TRUE ){
                return df ;
            } else {
                return empty_subset( df, df.names(), is<GroupedDataFrame>(df) ? classes_grouped<GroupedDataFrame>() : classes_not_grouped() ) ;
            }
        }
    }
    if( is<GroupedDataFrame>( df ) ){
        return filter_grouped<GroupedDataFrame, LazyGroupedSubsets>( GroupedDataFrame(df), dots);
    } else if( is<RowwiseDataFrame>(df) ){
        return filter_grouped<RowwiseDataFrame, LazyRowwiseSubsets>( RowwiseDataFrame(df), dots);
    } else {
        return filter_not_grouped( df, dots ) ;
    }
}

Example #11

Show file

File: device.cpp Project: rsaxvc/ioload

// update the device's data
void Device::update()
{
    // read current traffic
    DataFrame dataFrame = m_devReader.getNewDataFrame();

    if(dataFrame.isValid())
    {
        /* Depending on the CPU architecture and the OS interface
         * used for reading the device statistics, the counts can
         * overflow. We monitor the overflows and fix them.
         */
        fixOverflows(dataFrame, m_dataFrameOld);

        m_deviceStatistics.insertDataFrame(dataFrame);

        m_deviceGraphIn.update(m_deviceStatistics.getDataInPerSecond());
        m_deviceGraphOut.update(m_deviceStatistics.getDataOutPerSecond());

        m_dataFrameOld = dataFrame;
    }
    else
    {
        m_deviceStatistics.reset();
        m_deviceGraphIn.resetTrafficData();
        m_deviceGraphOut.resetTrafficData();
    }
}

Example #12

Show file

File: dplyr.cpp Project: kevinushey/dplyr

SEXP filter_not_grouped( DataFrame df, List args, const DataDots& dots){
    CharacterVector names = df.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    if( dots.single_env() ){
        Environment env = dots.envir(0) ;
        // a, b, c ->  a & b & c
        Shield<SEXP> call( and_calls( args, set ) ) ;

        // replace the symbols that are in the data frame by vectors from the data frame
        // and evaluate the expression
        CallProxy proxy( (SEXP)call, df, env ) ;
        LogicalVector test = proxy.eval() ;
        check_filter_result(test, df.nrows());
        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    } else {
        int nargs = args.size() ;
        CallProxy first_proxy(args[0], df, dots.envir(0) ) ;
        LogicalVector test = first_proxy.eval() ;
        check_filter_result(test, df.nrows());

        for( int i=1; i<nargs; i++){
            LogicalVector test2 = CallProxy(args[i], df, dots.envir(i) ).eval() ;
            combine_and(test, test2) ;
        }

        DataFrame res = subset( df, test, df.names(), classes_not_grouped() ) ;
        return res ;
    }
}

Example #13

Show file

File: dplyr.cpp Project: kevinushey/dplyr

DataFrame filter_grouped_single_env( const GroupedDataFrame& gdf, const List& args, const Environment& env){
    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    // a, b, c ->  a & b & c
    Call call( and_calls( args, set ) ) ;

    int nrows = data.nrows() ;
    LogicalVector test = no_init(nrows);

    LogicalVector g_test ;
    GroupedCallProxy call_proxy( call, gdf, env ) ;

    int ngroups = gdf.ngroups() ;
    GroupedDataFrame::group_iterator git = gdf.group_begin() ;
    for( int i=0; i<ngroups; i++, ++git){
        SlicingIndex indices = *git ;
        int chunk_size = indices.size() ;

        g_test  = call_proxy.get( indices );
        check_filter_result(g_test, chunk_size ) ;
        for( int j=0; j<chunk_size; j++){
            test[ indices[j] ] = g_test[j] ;
        }
    }

    DataFrame res = subset( data, test, names, classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;

    return res ;
}

Example #14

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame semi_join_impl( DataFrame x, DataFrame y, CharacterVector by){
    typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map ;
    DataFrameJoinVisitors visitors(x, y, by) ;
    Map map(visitors);

    // train the map in terms of x
    train_push_back( map, x.nrows(), x.nrows() / 10) ;

    int n_y = y.nrows() ;
    // this will collect indices from rows in x that match rows in y
    std::vector<int> indices ;
    for( int i=0; i<n_y; i++){
        // find a row in x that matches row i from y
        Map::iterator it = map.find(-i-1) ;

        if( it != map.end() ){
            // collect the indices and remove them from the
            // map so that they are only found once.
            push_back( indices, it->second ) ;

            map.erase(it) ;

        }
    }

    return subset(x, indices, x.names(), x.attr("class") ) ;
}

Example #15

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame right_join_impl( DataFrame x, DataFrame y, CharacterVector by){
    typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map ;
    DataFrameJoinVisitors visitors(x, y, by) ;
    Map map(visitors);

    // train the map in terms of y
    train_push_back( map, x.nrows(), x.nrows() / 10 ) ;

    std::vector<int> indices_x ;
    std::vector<int> indices_y ;

    int n_y = y.nrows() ;
    for( int i=0; i<n_y; i++){
        // find a row in y that matches row i in x
        Map::iterator it = map.find(-i-1) ;
        if( it != map.end() ){
            push_back( indices_x,    it->second ) ;
            push_back( indices_y, i, it->second.size() ) ;
        } else {
            indices_x.push_back(-1) ; // mark NA
            indices_y.push_back(i) ;
        }
    }
    return subset( x, y, indices_x, indices_y, by, x.attr( "class" ) ) ;
}

Example #16

Show file

File: dplyr.cpp Project: jimhester/dplyr

DataFrame filter_grouped( const GroupedDataFrame& gdf, List args, Environment env){
    // a, b, c ->  a & b & c
    Language call = and_calls( args ) ;
    
    const DataFrame& data = gdf.data() ;
    int nrows = data.nrows() ;
    LogicalVector test = no_init(nrows);
    
    LogicalVector g_test ;
    GroupedCallProxy call_proxy( call, gdf, env ) ;
    
    int ngroups = gdf.ngroups() ;
    GroupedDataFrame::group_iterator git = gdf.group_begin() ;
    for( int i=0; i<ngroups; i++, ++git){
        SlicingIndex indices = *git ;
        g_test  = call_proxy.get( indices );
        
        int chunk_size = indices.size() ;
        for( int j=0; j<chunk_size; j++){
            test[ indices[j] ] = g_test[j] ;  
        }
    }
    DataFrame res = subset( data, test, data.names(), classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;
            
    return res ;
}

Example #17

Show file

File: dplyr.cpp Project: kevinushey/dplyr

DataFrame subset( DataFrame x, DataFrame y, const Index& indices_x, const Index& indices_y, CharacterVector by, CharacterVector classes ){
    CharacterVector x_columns = x.names() ;
    DataFrameVisitors visitors_x(x, x_columns) ;

    CharacterVector all_y_columns = y.names() ;
    CharacterVector y_columns = setdiff( all_y_columns, by ) ;
    JoinColumnSuffixer suffixer(x_columns, y_columns, by) ;

    DataFrameVisitors visitors_y(y, y_columns) ;

    int nrows = indices_x.size() ;
    int nv_x = visitors_x.size(), nv_y = visitors_y.size() ;
    List out(nv_x+nv_y);
    CharacterVector names(nv_x+nv_y) ;
    int k=0;
    for( ; k<nv_x; k++){
       out[k] = visitors_x.get(k)->subset(indices_x) ;
       names[k] = suffixer.get( x_columns[k], ".x" ) ;
    }
    for( int i=0; i<nv_y; i++, k++){
       out[k] = visitors_y.get(i)->subset(indices_y) ;
       names[k] = suffixer.get(y_columns[i], ".y" ) ;
    }
    out.attr("class") = classes ;
    set_rownames(out, nrows) ;
    out.names() = names ;

    SEXP vars = x.attr( "vars" ) ;
    if( !Rf_isNull(vars) )
        out.attr( "vars" ) = vars ;

    return (SEXP)out ;
}

Example #18

Show file

File: SymmetricUncertaintyCalculator.cpp Project: Nanonid/hootenanny

  double SymmetricUncertaintyCalculator::_calculateEntropy(const DataFrame& df, int factorIndex)
  {
    typedef HashMap<int, int> ClassCounts;
    ClassCounts cc;
    
    for(unsigned int i = 0; i < df.getNumDataVectors(); i++)
    {
      double v = df.getDataElement(i, factorIndex);
      // null values are not supported Use the DataFrameDiscretizer to "fix" nulls
      if (DataFrame::isNull(v) == true)
      {
        throw Tgs::Exception("Null values are not supported by SymmetricUncertaintyCalculator");
      }
      cc[(int)(v + .5)]++;
    }

    double sum = 0.0;
    double totalSize = df.getNumDataVectors();
    for (ClassCounts::const_iterator classIt = cc.begin(); classIt != cc.end(); classIt++)
    {
      double count = classIt->second;
      sum += count / totalSize * log2(count / totalSize);
    }

    return -sum;
  }

Example #19

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame arrange_impl( DataFrame data, List args, DataDots dots ){
    int nargs = args.size() ;
    List variables(nargs) ;
    LogicalVector ascending(nargs) ;
    Shelter<SEXP> __ ;

    for(int i=0; i<nargs; i++){
        SEXP call = args[i] ;
        bool is_desc = TYPEOF(call) == LANGSXP && Rf_install("desc") == CAR(call) ;

        CallProxy call_proxy( is_desc ? CADR(call) : call, data, dots.envir(i)) ;
        variables[i] = __(call_proxy.eval()) ;
        if( Rf_length(variables[i]) != data.nrows() ){
            std::stringstream s ;
            s << "incorrect size ("
              << Rf_length(variables[i])
              << "), expecting :"
              << data.nrows() ;
            stop(s.str()) ;
        }
        ascending[i] = !is_desc ;
    }
    OrderVisitors o(variables,ascending, nargs) ;
    IntegerVector index = o.apply() ;

    DataFrameVisitors visitors( data, data.names() ) ;
    DataFrame res = visitors.subset(index, data.attr("class") ) ;
    return res;
}

Example #20

Show file

File: distinct.cpp Project: LCHansson/dplyr

// [[Rcpp::export]]
SEXP distinct_impl(DataFrame df, CharacterVector vars, CharacterVector keep) {
  if (df.size() == 0)
    return df;

  // No vars means ungrouped data with keep_all = TRUE.
  if (vars.size() == 0)
    return df;

  check_valid_colnames(df);
  if (!vars.size()) {
    vars = df.names();
  }
  DataFrameVisitors visitors(df, vars);

  std::vector<int> indices;
  VisitorSetIndexSet<DataFrameVisitors> set(visitors);

  int n = df.nrows();
  for (int i=0; i<n; i++) {
    if (set.insert(i).second) {
      indices.push_back(i);
    }
  }

  return DataFrameSubsetVisitors(df, keep).subset(indices, df.attr("class"));
}

Example #21

Show file

File: dplyr.cpp Project: jimhester/dplyr

SEXP structure_mutate( Proxy& call_proxy, const DataFrame& df, const CharacterVector& results_names, CharacterVector classes){
    int n = call_proxy.nsubsets() ;
    
    List out(n) ;
    CharacterVector names(n) ;
    
    CharacterVector input_names = df.names() ;
    int ncolumns = df.size() ;
    int i=0 ;
    for( ; i<ncolumns; i++){
        out[i] = call_proxy.get_variable(input_names[i]) ;
        SET_NAMED( out[i], 2 );
        names[i] = input_names[i] ;
    }
    for( int k=0; i<n; k++ ){
        String name = results_names[k] ;
        
        if( ! any( input_names.begin(), input_names.end(), name.get_sexp() ) ){
            SEXP x   = call_proxy.get_variable( name ) ; 
            out[i]   = x ;
            SET_NAMED( out[i], 2 );
            names[i] = name ;
            i++ ;
        }
    }
    
    
    out.attr("class") = classes ;
    set_rownames( out, df.nrows() ) ;
    out.names() = names;
    
    return out ;    
}

Example #22

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// version of grouped filter when contributions to ... come from several environment
DataFrame filter_grouped_multiple_env( const GroupedDataFrame& gdf, const List& args, const DataDots& dots){
    const DataFrame& data = gdf.data() ;
    CharacterVector names = data.names() ;
    SymbolSet set ;
    for( int i=0; i<names.size(); i++){
        set.insert( Rf_install( names[i] ) ) ;
    }

    int nrows = data.nrows() ;
    LogicalVector test(nrows, TRUE);

    LogicalVector g_test ;

    for( int k=0; k<args.size(); k++){
        Call call( (SEXP)args[k] ) ;
        GroupedCallProxy call_proxy( call, gdf, dots.envir(k) ) ;
        int ngroups = gdf.ngroups() ;
        GroupedDataFrame::group_iterator git = gdf.group_begin() ;
        for( int i=0; i<ngroups; i++, ++git){
            SlicingIndex indices = *git ;
            int chunk_size = indices.size() ;

            g_test  = call_proxy.get( indices );
            check_filter_result(g_test, chunk_size ) ;
            for( int j=0; j<chunk_size; j++){
                test[ indices[j] ] = test[ indices[j] ] & g_test[j] ;
            }
        }
    }
    DataFrame res = subset( data, test, names, classes_grouped() ) ;
    res.attr( "vars")   = data.attr("vars") ;

    return res ;
}

Example #23

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame as_regular_df(DataFrame df){
  DataFrame copy = shallow_copy(df) ;
  SET_ATTRIB(copy, strip_group_attributes(df)) ;
  SET_OBJECT(copy, OBJECT(df)) ;
  copy.attr("class") = CharacterVector::create("data.frame") ;
  return copy ;
}

Example #24

Show file

File: HandlingDataFrameExample2.cpp Project: watermouth/RcppExamples

// [[Rcpp::export]]
SEXP ex13_2(DataFrame input, CharacterVector columnName, double replace){
  BEGIN_RCPP
  List names = input.names();
  List mapObj = as<List>(input);
  // all rows
  for(int i=0; i<input.nrows(); i++){
    ex13helper(mapObj, as<string>(columnName), i, replace);
  }
  return(wrap(mapObj));
  END_RCPP
}

Example #25

Show file

File: dplyr.cpp Project: kevinushey/dplyr

SEXP structure_mutate( const NamedListAccumulator<SEXP>& accumulator, const DataFrame& df, CharacterVector classes){
    List res = accumulator ;
    res.attr("class") = classes ;
    set_rownames( res, df.nrows() ) ;
    res.attr( "vars")     = df.attr("vars") ;
    res.attr( "labels" )  = df.attr("labels" );
    res.attr( "index")    = df.attr("index") ;
    res.attr( "indices" ) = df.attr("indices" ) ;

    return res ;
}

Example #26

Show file

File: group_indices.cpp Project: HughParsonage/dplyr

// [[Rcpp::export]]
IntegerVector grouped_indices_impl(DataFrame data, ListOf<Symbol> symbols) {
  int nsymbols = symbols.size();
  if (nsymbols == 0)
    return rep(1, data.nrows());
  CharacterVector vars(nsymbols);
  for (int i=0; i<nsymbols; i++) {
    vars[i] = PRINTNAME(symbols[i]);

    const char* name = vars[i];
    SEXP v;
    try {
      v = data[name];
    } catch (...) {
      stop("unknown column '%s'", name);
    }
    if (!white_list(v) || TYPEOF(v) == VECSXP) {
      stop("cannot group column %s, of class '%s'", name, get_single_class(v));
    }
  }

  DataFrameVisitors visitors(data, vars);
  ChunkIndexMap map(visitors);
  int n = data.nrows();
  train_push_back(map, n);

  DataFrame labels = DataFrameSubsetVisitors(data, vars).subset(map, "data.frame");
  IntegerVector labels_order = OrderVisitors(labels).apply();

  labels = DataFrameSubsetVisitors(labels).subset(labels_order, "data.frame");

  int ngroups = map.size();

  IntegerVector res = no_init(n);

  std::vector<const std::vector<int>* > chunks(ngroups);
  ChunkIndexMap::const_iterator it = map.begin();
  for (int i=0; i<ngroups; i++, ++it) {
    chunks[i] = &it->second;
  }

  for (int i=0; i<ngroups; i++) {
    int idx = labels_order[i];
    const std::vector<int>& v = *chunks[idx];

    int n_index = v.size();
    for (int j=0; j<n_index; j++) {
      res[ v[j] ] = i+1;
    }
  }

  return res;
}

Example #27

Show file

File: join_exports.cpp Project: cderv/dplyr

// [[Rcpp::export]]
DataFrame full_join_impl(DataFrame x, DataFrame y,
                         IntegerVector by_x, IntegerVector by_y,
                         IntegerVector aux_x, IntegerVector aux_y,
                         bool na_match) {
  check_by(by_x);

  typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
  DataFrameJoinVisitors visitors(y, x, by_y, by_x, false, na_match);
  Map map(visitors);

  // train the map in terms of y
  train_push_back(map, y.nrows());

  std::vector<int> indices_x;
  std::vector<int> indices_y;

  int n_x = x.nrows(), n_y = y.nrows();

  // get both the matches and the rows from left but not right
  for (int i = 0; i < n_x; i++) {
    // find a row in y that matches row i in x
    Map::iterator it = map.find(-i - 1);
    if (it != map.end()) {
      push_back(indices_y,  it->second);
      push_back(indices_x, i, it->second.size());
    } else {
      indices_y.push_back(-1); // mark NA
      indices_x.push_back(i);
    }
  }

  // train a new map in terms of x this time
  DataFrameJoinVisitors visitors2(x, y, by_x, by_y, false, na_match);
  Map map2(visitors2);
  train_push_back(map2, x.nrows());

  for (int i = 0; i < n_y; i++) {
    // try to find row in x that matches this row of y
    Map::iterator it = map2.find(-i - 1);
    if (it == map2.end()) {
      indices_x.push_back(-i - 1);
      indices_y.push_back(i);
    }
  }

  return subset_join(x, y,
                     indices_x, indices_y,
                     by_x, by_y,
                     aux_x, aux_y,
                     get_class(x)
                    );
}

Example #28

Show file

File: InformationGainCalculator.cpp Project: mitulvpatel/hootenanny

  double InformationGainCalculator::calculateInformationGain(const DataFrame& df1, 
    int factorIndex1, const DataFrame& df2, int factorIndex2)
  {
    assert(df1.isNominal(factorIndex1));
    assert(df2.isNominal(factorIndex2));

    double hy = _calculateEntropy(df1, factorIndex1);
    double hyx = _calculateConditionalEntropy(df1, factorIndex1, df2, factorIndex2);

    double gain = hy - hyx;

    return gain;
  }

Example #29

Show file

File: dplyr.cpp Project: kevinushey/dplyr

// [[Rcpp::export]]
DataFrame union_data_frame( DataFrame x, DataFrame y){
    if( !compatible_data_frame(x,y) )
        stop( "not compatible" );

    typedef VisitorSetIndexSet<DataFrameJoinVisitors> Set ;
    DataFrameJoinVisitors visitors(x, y, x.names() ) ;
    Set set(visitors);

    train_insert( set, x.nrows() ) ;
    train_insert_right( set, y.nrows() ) ;

    return visitors.subset( set, x.attr("class") ) ;
}

Example #30

Show file

File: distinct.cpp Project: AndreMikulec/dplyr

// [[Rcpp::export]]
SEXP distinct_impl( DataFrame df ){
    DataFrameVisitors visitors(df) ;
    
    std::vector<int> indices ;
    VisitorSetIndexSet<DataFrameVisitors> set(visitors) ;
    
    int n = df.nrows() ;
    for( int i=0; i<n; i++){
        if( set.insert(i).second ){
            indices.push_back(i) ;    
        }
    }
    return visitors.subset(indices, df.attr("class") ); 
}