Exemplo n.º 1
0
CharacterVectorOrderer::CharacterVectorOrderer(const CharacterVector& data) :
  orders(no_init(data.size()))
{
  int n = data.size();
  if (n == 0) return;

  dplyr_hash_set<SEXP> set(n);

  // 1 - gather unique SEXP pointers from data
  SEXP* p_data = Rcpp::internal::r_vector_start<STRSXP>(data);
  SEXP previous = *p_data++;
  set.insert(previous);
  for (int i = 1; i < n; i++, p_data++) {
    SEXP s = *p_data;

    // we've just seen this string, keep going
    if (s == previous) continue;

    // is this string in the set already
    set.insert(s);
    previous = s;
  }

  // retrieve unique strings from the set
  int n_uniques = set.size();
  LOG_VERBOSE << "Sorting " <<  n_uniques << " unique character elements";

  CharacterVector uniques(set.begin(), set.end());

  static Function sort("sort", R_BaseEnv);
  CharacterVector s_uniques = Language(sort, uniques).fast_eval();

  // order the uniques with a callback to R
  IntegerVector o = r_match(uniques, s_uniques);

  // combine uniques and o into a hash map for fast retrieval
  dplyr_hash_map<SEXP, int> map(n_uniques);
  for (int i = 0; i < n_uniques; i++) {
    map.insert(std::make_pair(uniques[i], o[i]));
  }

  // grab min ranks
  p_data = Rcpp::internal::r_vector_start<STRSXP>(data);
  previous = *p_data++;

  int o_pos;
  orders[0] = o_pos = map.find(previous)->second;

  for (int i = 1; i < n; ++i, ++p_data) {
    SEXP s = *p_data;
    if (s == previous) {
      orders[i] = o_pos;
      continue;
    }
    previous = s;
    orders[i] = o_pos = map.find(s)->second;
  }

}
Exemplo n.º 2
0
    CharacterVectorOrderer::CharacterVectorOrderer( const CharacterVector& data_ ) :
        data(data_),
        set(),
        orders(no_init(data.size()))
    {
        int n = data.size() ;
        if( n == 0 ) return ;

        // 1 - gather unique SEXP pointers from data
        SEXP* p_data = Rcpp::internal::r_vector_start<STRSXP>(data);
        SEXP previous = *p_data++ ;
        set.insert( previous ) ;
        for( int i=1; i<n; i++, p_data++){
            SEXP s = *p_data ;

            // we've just seen this string, keep going
            if( s == previous ) continue ;

            // is this string in the set already
            set.insert(s) ;
            previous = s ;
        }

        // retrieve unique strings from the set
        int n_uniques = set.size() ;
        CharacterVector uniques( set.begin(), set.end() ) ;

        // order the uniques with a callback to R
        IntegerVector o = Language( "rank", uniques, _["ties.method"] = "min", _["na.last"] = "keep" ).fast_eval() ;

        // combine uniques and o into a hash map for fast retrieval
        dplyr_hash_map<SEXP,int> map ;
        for( int i=0; i<n_uniques; i++){
            map.insert( std::make_pair(uniques[i], o[i] ) ) ;
        }

        // grab min ranks
        p_data = Rcpp::internal::r_vector_start<STRSXP>(data);
        previous = *p_data++ ;

        int o_pos ;
        orders[0] = o_pos = map.find(previous)->second ;

        for( int i=1; i<n; i++, p_data++){
            SEXP s = *p_data;
            if( s == previous ) {
                orders[i] = o_pos ;
                continue ;
            }
            previous = s ;
            orders[i] = o_pos = map.find(previous)->second ;
        }

    }