CharacterVectorOrderer::CharacterVectorOrderer(const CharacterVector& data) : orders(no_init(data.size())) { int n = data.size(); if (n == 0) return; dplyr_hash_set<SEXP> set(n); // 1 - gather unique SEXP pointers from data SEXP* p_data = Rcpp::internal::r_vector_start<STRSXP>(data); SEXP previous = *p_data++; set.insert(previous); for (int i = 1; i < n; i++, p_data++) { SEXP s = *p_data; // we've just seen this string, keep going if (s == previous) continue; // is this string in the set already set.insert(s); previous = s; } // retrieve unique strings from the set int n_uniques = set.size(); LOG_VERBOSE << "Sorting " << n_uniques << " unique character elements"; CharacterVector uniques(set.begin(), set.end()); static Function sort("sort", R_BaseEnv); CharacterVector s_uniques = Language(sort, uniques).fast_eval(); // order the uniques with a callback to R IntegerVector o = r_match(uniques, s_uniques); // combine uniques and o into a hash map for fast retrieval dplyr_hash_map<SEXP, int> map(n_uniques); for (int i = 0; i < n_uniques; i++) { map.insert(std::make_pair(uniques[i], o[i])); } // grab min ranks p_data = Rcpp::internal::r_vector_start<STRSXP>(data); previous = *p_data++; int o_pos; orders[0] = o_pos = map.find(previous)->second; for (int i = 1; i < n; ++i, ++p_data) { SEXP s = *p_data; if (s == previous) { orders[i] = o_pos; continue; } previous = s; orders[i] = o_pos = map.find(s)->second; } }
CharacterVectorOrderer::CharacterVectorOrderer( const CharacterVector& data_ ) : data(data_), set(), orders(no_init(data.size())) { int n = data.size() ; if( n == 0 ) return ; // 1 - gather unique SEXP pointers from data SEXP* p_data = Rcpp::internal::r_vector_start<STRSXP>(data); SEXP previous = *p_data++ ; set.insert( previous ) ; for( int i=1; i<n; i++, p_data++){ SEXP s = *p_data ; // we've just seen this string, keep going if( s == previous ) continue ; // is this string in the set already set.insert(s) ; previous = s ; } // retrieve unique strings from the set int n_uniques = set.size() ; CharacterVector uniques( set.begin(), set.end() ) ; // order the uniques with a callback to R IntegerVector o = Language( "rank", uniques, _["ties.method"] = "min", _["na.last"] = "keep" ).fast_eval() ; // combine uniques and o into a hash map for fast retrieval dplyr_hash_map<SEXP,int> map ; for( int i=0; i<n_uniques; i++){ map.insert( std::make_pair(uniques[i], o[i] ) ) ; } // grab min ranks p_data = Rcpp::internal::r_vector_start<STRSXP>(data); previous = *p_data++ ; int o_pos ; orders[0] = o_pos = map.find(previous)->second ; for( int i=1; i<n; i++, p_data++){ SEXP s = *p_data; if( s == previous ) { orders[i] = o_pos ; continue ; } previous = s ; orders[i] = o_pos = map.find(previous)->second ; } }