R_xlen_t get_first_reencode_pos(const CharacterVector& xc) { R_xlen_t len = xc.length(); for (R_xlen_t i = 0; i < len; ++i) { SEXP xci = xc[i]; if (xci != NA_STRING && !IS_ASCII(xci) && !IS_UTF8(xci)) { return i; } } return len; }
DataFrame filter_grouped_single_env( const Data& gdf, const LazyDots& dots){ typedef GroupedCallProxy<Data, Subsets> Proxy ; Environment env = dots[0].env() ; const DataFrame& data = gdf.data() ; CharacterVector names = data.names() ; SymbolSet set ; for( int i=0; i<names.size(); i++){ set.insert( Rf_installChar( names[i] ) ) ; } // a, b, c -> a & b & c Call call( and_calls( dots, set, env ) ) ; int nrows = data.nrows() ; LogicalVector test(nrows, TRUE); LogicalVector g_test ; Proxy call_proxy( call, gdf, env ) ; int ngroups = gdf.ngroups() ; typename Data::group_iterator git = gdf.group_begin() ; for( int i=0; i<ngroups; i++, ++git){ SlicingIndex indices = *git ; int chunk_size = indices.size() ; g_test = check_filter_logical_result( call_proxy.get( indices ) ) ; if( g_test.size() == 1 ){ int val = g_test[0] == TRUE ; for( int j=0; j<chunk_size; j++){ test[ indices[j] ] = val ; } } else { check_filter_result(g_test, chunk_size ) ; for( int j=0; j<chunk_size; j++){ if( g_test[j] != TRUE ) test[ indices[j] ] = FALSE ; } } } return grouped_subset<Data>( gdf, test, names, classes_grouped<Data>() ) ; }
List distance(CharacterVector& lhs, CharacterVector& rhs, int topn) { uint64_t lhsres; uint64_t rhsres; vector<pair<string, double> > lhsword; vector<pair<string, double> > rhsword; const char *const lhs_path = lhs[0]; const char *const rhs_path = rhs[0]; hash.make(lhs_path, topn, lhsres, lhsword); hash.make(rhs_path, topn, rhsres, rhsword); CharacterVector lhsm(lhsword.size()); CharacterVector lhsatb(lhsword.size()); //unsigned int it; CharacterVector::iterator lhsm_it = lhsm.begin(); CharacterVector::iterator lhsatb_it = lhsatb.begin(); for (vector<pair<string, double> >::iterator it = lhsword.begin(); it != lhsword.end(); it++) { *lhsm_it = (*it).first; lhsm_it++; *lhsatb_it = itos((*it).second); lhsatb_it++; } lhsm.attr("names") = lhsatb; CharacterVector rhsm(rhsword.size()); CharacterVector rhsatb(rhsword.size()); CharacterVector::iterator rhsm_it = rhsm.begin(); CharacterVector::iterator rhsatb_it = rhsatb.begin(); for (vector<pair<string, double> >::iterator it = rhsword.begin(); it != rhsword.end(); it++) { *rhsm_it = (*it).first; rhsm_it++; *rhsatb_it = itos((*it).second); rhsatb_it++; } rhsm.attr("names") = rhsatb; CharacterVector hashvec; hashvec.push_back(int64tos(hash.distances(lhsres, rhsres))); return List::create( Named("distance") = hashvec, Named("lhs") = lhsm, Named("rhs") = rhsm ); }
List simhash_fromvec(vector<string>& code, int topn) { vector<pair<string, double> > lhsword; uint64_t hashres; hash.make_fromvec(code, topn, hashres, lhsword); CharacterVector lhsm(lhsword.size()); CharacterVector lhsatb(lhsword.size()); //unsigned int it; CharacterVector::iterator lhsm_it = lhsm.begin(); CharacterVector::iterator lhsatb_it = lhsatb.begin(); for (vector<pair<string, double> >::iterator it = lhsword.begin(); it != lhsword.end(); it++) { *lhsm_it = (*it).first; lhsm_it++; *lhsatb_it = itos((*it).second); lhsatb_it++; } lhsm.attr("names") = lhsatb; CharacterVector hashvec; hashvec.push_back(int64tos(hashres)); return List::create( Named("simhash") = hashvec, Named("keyword") = lhsm); }
// [[Rcpp::export]] CharacterVector connection_escape_identifier(XPtr<PqConnectionPtr> con, CharacterVector xs) { int n = xs.size(); CharacterVector escaped(n); for (int i = 0; i < n; ++i) { std::string x(xs[i]); escaped[i] = (*con)->escapeIdentifier(x); } return escaped; }
bool character_vector_equal(const CharacterVector& x, const CharacterVector& y) { if ((SEXP)x == (SEXP)y) return true; if (x.length() != y.length()) return false; for (R_xlen_t i = 0; i < x.length(); ++i) { SEXP xi = x[i]; SEXP yi = y[i]; // Ideally we'd use Rf_Seql(), but this is not exported. if (Rf_NonNullStringMatch(xi, yi)) continue; if (xi == NA_STRING && yi == NA_STRING) continue; if (xi == NA_STRING || yi == NA_STRING) return false; if (CHAR(xi)[0] == 0 && CHAR(yi)[0] == 0) continue; return false; } return true; }
std::string nodeName(T* node, CharacterVector nsMap) { std::string name = Xml2String(node->name).asStdString(); if (nsMap.size() == 0) return name; xmlNs* ns = node->ns; if (ns == NULL) return name; std::string prefix = NsMap(nsMap).findPrefix(Xml2String(ns->href).asStdString()); return prefix + ":" + name; }
void defineVariable(CharacterVector x, std::string name) { readstat_label_set_t* labelSet = NULL; if (rClass(x) == "labelled") { labelSet = readstat_add_label_set(writer_, READSTAT_TYPE_STRING, name.c_str()); CharacterVector values = as<CharacterVector>(x.attr("labels")); CharacterVector labels = as<CharacterVector>(values.attr("names")); for (int i = 0; i < values.size(); ++i) readstat_label_string_value(labelSet, values[i], std::string(labels[i]).c_str()); } int max_length = 0; for (int i = 0; i < x.size(); ++i) { int length = std::string(x[i]).size(); if (length > max_length) max_length = length; } readstat_add_variable(writer_, READSTAT_TYPE_STRING, max_length, name.c_str(), var_label(x), NULL, labelSet); }
void defineVariable(IntegerVector x, std::string name) { readstat_label_set_t* labelSet = NULL; if (rClass(x) == "factor") { labelSet = readstat_add_label_set(writer_, READSTAT_TYPE_INT32, name.c_str()); CharacterVector levels = as<CharacterVector>(x.attr("levels")); for (int i = 0; i < levels.size(); ++i) readstat_label_int32_value(labelSet, i + 1, std::string(levels[i]).c_str()); } else if (rClass(x) == "labelled") { labelSet = readstat_add_label_set(writer_, READSTAT_TYPE_INT32, name.c_str()); IntegerVector values = as<IntegerVector>(x.attr("labels")); CharacterVector labels = as<CharacterVector>(values.attr("names")); for (int i = 0; i < values.size(); ++i) readstat_label_int32_value(labelSet, values[i], std::string(labels[i]).c_str()); } readstat_add_variable(writer_, READSTAT_TYPE_INT32, 0, name.c_str(), var_label(x), NULL, labelSet); }
void Player::enterGame() { this->sendMsg(Formatter::clearScreen()); map<string, string>::reverse_iterator it; // Greet them. this->sendMsg(Formatter::bold() + "Welcome, " + name + "!\n" + Formatter::reset()); // Load the news. this->sendMsg("#---------------- Global News ----------------#\n"); for (it = Mud::instance().mudNews.rbegin(); it != Mud::instance().mudNews.rend(); ++it) { this->sendMsg("Date :" + it->first + "\n"); this->sendMsg(it->second + "\n"); } this->sendMsg("#---------------------------------------------#\n\n"); this->sendMsg("You walked through the mist and came into the world...\n\n"); // Notice all the players in the same room. if (room != nullptr) { room->addCharacter(this); // Set the list of exceptions. CharacterVector exceptions; exceptions.push_back(this); // Send the message inside the room. room->sendToAll("%s appears.\n", exceptions, name); } else { closeConnection(); } // Set the player as logged in. logged_in = true; // New player looks around. doCommand("look"); }
CharacterVector compose::compose_multiple(DataFrame parsed_urls){ CharacterVector schemes = parsed_urls["scheme"]; CharacterVector domains = parsed_urls["domain"]; CharacterVector ports = parsed_urls["port"]; CharacterVector paths = parsed_urls["path"]; CharacterVector parameters = parsed_urls["parameter"]; CharacterVector fragments = parsed_urls["fragment"]; unsigned int input_size = schemes.size(); CharacterVector output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } output[i] = compose_single(schemes[i], domains[i], ports[i], paths[i], parameters[i], fragments[i]); } return output; }
// [[Rcpp::export]] DataFrame full_join_impl(DataFrame x, DataFrame y, CharacterVector by_x, CharacterVector by_y, std::string& suffix_x, std::string& suffix_y, bool na_match) { if (by_x.size() == 0) stop("no variable to join by"); typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map; DataFrameJoinVisitors visitors(y, x, SymbolVector(by_y), SymbolVector(by_x), true, na_match); Map map(visitors); // train the map in terms of y train_push_back(map, y.nrows()); std::vector<int> indices_x; std::vector<int> indices_y; int n_x = x.nrows(), n_y = y.nrows(); // get both the matches and the rows from left but not right for (int i = 0; i < n_x; i++) { // find a row in y that matches row i in x Map::iterator it = map.find(-i - 1); if (it != map.end()) { push_back(indices_y, it->second); push_back(indices_x, i, it->second.size()); } else { indices_y.push_back(-1); // mark NA indices_x.push_back(i); } } // train a new map in terms of x this time DataFrameJoinVisitors visitors2(x, y, SymbolVector(by_x), SymbolVector(by_y), false, na_match); Map map2(visitors2); train_push_back(map2, x.nrows()); for (int i = 0; i < n_y; i++) { // try to find row in x that matches this row of y Map::iterator it = map2.find(-i - 1); if (it == map2.end()) { indices_x.push_back(-i - 1); indices_y.push_back(i); } } return subset_join(x, y, indices_x, indices_y, by_x, by_y, suffix_x, suffix_y, get_class(x) ); }
bool canParse(CharacterVector x, const canParseFun& canParse, LocaleInfo* pLocale) { for (int i = 0; i < x.size(); ++i) { if (x[i] == NA_STRING) continue; if (x[i].size() == 0) continue; if (!canParse(std::string(x[i]), pLocale)) return false; } return true; }
//' Finds the integer that represents the nucleotide //' //' Returns an integer for {G,A,T,G} and NA for 'N' //' //' @param letter A single nucleotide, as a character. //' @param alph_vect The alphabet we are using. A dataframe created using //' build_alphabet() //' @return An integer, or NA if the input is 'N' //' @author Tom Mayo \email{t.mayo@@ed.ac.uk} // [[Rcpp::export]] int let2base_c(String letter, CharacterVector alph_vect){ CharacterVector temp = alph_vect; temp.push_back("N"); int len = temp.size(); int i = 0; bool test = true; while (test){ if (temp[i] == letter){ test = false; } if(test){ i++; } if(i == len){ stop("Invalid letter, must be ACGTM or N"); } } if(i == len - 1){ double ret = NA_REAL; return ret; } return i; }
// Create shingled n-grams // [[Rcpp::export]] CharacterVector shingle_ngrams(CharacterVector words, int n) { int out_length = words.size() - n + 1; CharacterVector ngrams(out_length); for(int i = 0; i < out_length; i++) { CharacterVector subset = words[i - 1 + seq_len(n)]; std::string ngram; for(int j = 0; j < n; j++) { ngram += subset[j]; if(j != n - 1) ngram += " "; } ngrams[i] = ngram; } return ngrams; }
// [[Rcpp::export]] RObject type_convert_col(CharacterVector x, List spec, int col, const std::vector<std::string>& na, bool trim_ws) { CollectorPtr collector = Collector::create(spec); collector->resize(x.size()); for (int i = 0; i < x.size(); ++i) { SEXP string = x[i]; Token t; if (string == NA_STRING) { t = Token(TOKEN_MISSING, i - 1, col - 1); } else { const char* begin = CHAR(string); t = Token(begin, begin + Rf_length(string), i - 1, col - 1); if (trim_ws) t.trim(); t.flagNA(na); } collector->setValue(i, t); } return collector->vector(); }
vector<mihandle_t> open_minc2_volumes(CharacterVector filenames){ vector<mihandle_t> volumes; mihandle_t current_handle; CharacterVector::iterator file_iterator; vector<mihandle_t>::iterator volume_iterator; for(file_iterator = filenames.begin(); file_iterator != filenames.end(); ++file_iterator){ try { current_handle = open_minc2_volume(wrap(*file_iterator)); } catch(...){ for(volume_iterator = volumes.begin(); volume_iterator != volumes.end(); ++volume_iterator){ miclose_volume(*volume_iterator); } throw; } volumes.push_back(current_handle); } return(volumes); }
// [[Rcpp::export]] List compute_chaos(List input, CharacterVector dec_val, CharacterVector unique_dec_val) { std::map<String, int> indexes; for (int i=0; i<unique_dec_val.size(); ++i) { indexes[unique_dec_val[i]] = i; } int input_size = input.size(); std::vector<std::vector<int> > result(input_size); int unique_dec_val_size = unique_dec_val.size(); std::vector<int> classCounts(unique_dec_val_size); for (int i=0; i<input_size; ++i) { for (int j=0; j<unique_dec_val_size; ++j) { classCounts[j] = 0; } IntegerVector ind_class = input[i]; int ind_class_size = ind_class.size(); for (int j=0; j<ind_class_size; ++j) { ++classCounts[indexes[dec_val[ind_class[j]-1]]]; } result[i] = classCounts; } return wrap(result); }
// [[Rcpp::export]] CharacterVector connection_quote_string(XPtr<MyConnectionPtr> con, CharacterVector input) { int n = input.size(); CharacterVector output(n); for (int i = 0; i < n; ++i) { if (input[i] == NA_STRING) { output[i] = NA_STRING; } else { String x = input[i]; output[i] = "'" + (*con)->quoteString(x) + "'"; } } return output; }
//' Hash a string to an integer //' @param x A character vector to be hashed. //' @return A vector of integer hashes. //' @examples //' s <- c("How", "many", "roads", "must", "a", "man", "walk", "down") //' hash_string(s) //' @export // [[Rcpp::export]] IntegerVector hash_string(CharacterVector x) { boost::hash<std::string> hash_fn; int length = x.size(); IntegerVector hash_vec(length); std::string str; for(int i = 0; i < length; i++) { str = Rcpp::as<std::string>(x[i]); hash_vec[i] = hash_fn(str); } return hash_vec; }
CharacterVector get_uniques(const CharacterVector& left, const CharacterVector& right) { int nleft = left.size(), nright = right.size(); int n = nleft + nright; CharacterVector big(no_init(n)); CharacterVector::iterator it = big.begin(); std::copy(left.begin(), left.end(), it); std::copy(right.begin(), right.end(), it + nleft); static Function unique("unique", R_BaseEnv); return Language(unique, big).fast_eval(); }
// [[Rcpp::export]] List compute_indiscernibility(List input, CharacterVector attr_val, CharacterVector unique_attr_val) { std::map<String, int> numbers; for (int i=0; i<unique_attr_val.size(); ++i) { numbers[unique_attr_val[i]] = i; } int numbers_size = numbers.size(); int input_size = input.size(); std::vector<std::vector<int> > result(input_size*numbers_size); for (int i=0; i<input_size; ++i) { IntegerVector ind_class = input[i]; int ind_class_size = ind_class.size(); for (int j=0; j<ind_class_size; ++j) { result[i*numbers_size+numbers[attr_val[ind_class[j]-1]]].push_back(ind_class[j]); } } result.erase(std::remove_if(result.begin(), result.end(), filter<std::vector<int> >), result.end()); return wrap(result); }
DataFrame human_parse::parse_vector(CharacterVector names){ // Measure and construct output unsigned int input_size = names.size(); CharacterVector salutation(input_size); CharacterVector first_name(input_size); CharacterVector middle_name(input_size); CharacterVector last_name(input_size); CharacterVector suffix(input_size); CharacterVector holding(5); // For each element, go nuts for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(names[i] == NA_STRING){ salutation[i] = NA_STRING; first_name[i] = NA_STRING; middle_name[i] = NA_STRING; last_name[i] = NA_STRING; suffix[i] = NA_STRING; } else { holding = parse_single(Rcpp::as<std::string>(names[i])); salutation[i] = holding[0]; first_name[i] = holding[1]; middle_name[i] = holding[2]; last_name[i] = holding[3]; suffix[i] = holding[4]; } } return DataFrame::create(_["salutation"] = salutation, _["first_name"] = first_name, _["middle_name"] = middle_name, _["last_name"] = last_name, _["suffix"] = suffix, _["full_name"] = names, _["stringsAsFactors"] = false); }
//'@title Get or remove user authentication credentials //'@description authentication credentials appear before the domain //'name and look like \emph{user:password}. Sometimes you want the removed, //'or retrieved; \code{strip_credentials} and \code{get_credentials} do //'precisely that //' //'@aliases creds //'@rdname creds //' //'@param urls a URL, or vector of URLs //' //'@examples //'# Remove credentials //'strip_credentials("http://*****:*****@97.77.104.22:3128") //' //'# Get credentials //'get_credentials("http://*****:*****@97.77.104.22:3128") //'@export //[[Rcpp::export]] CharacterVector strip_credentials(CharacterVector urls){ std::string holding; unsigned int input_size = urls.size(); CharacterVector output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(urls[i] == NA_STRING){ output[i] = NA_STRING; } else { output[i] = strip_single(Rcpp::as<std::string>(urls[i])); } } return output; }
// [[Rcpp::export]] CharacterVector gh_neighbour(CharacterVector hashes, IntegerVector direction){ unsigned int input_size = hashes.size(); int directions [2] = {direction[0], direction[1]}; CharacterVector output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(CharacterVector::is_na(hashes[i])){ output[i] = NA_STRING; } else { output[i] = cgeohash::neighbor(Rcpp::as<std::string>(hashes[i]), directions); } } return output; }
//'@rdname creds //'@export //[[Rcpp::export]] DataFrame get_credentials(CharacterVector urls){ unsigned int input_size = urls.size(); CharacterVector user(input_size); CharacterVector data(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(urls[i] == NA_STRING){ user[i] = NA_STRING; data[i] = NA_STRING; } else { get_single(Rcpp::as<std::string>(urls[i]), user, data, i); } } return DataFrame::create(_["username"] = user, _["authentication"] = data, _["stringsAsFactors"] = false); }
//' Compute string extents. //' //' Determines the width and height of a bounding box that's big enough //' to (just) enclose the provided text. //' //' @param x Character vector of of strings to measure //' @param bold,italic Is text bold/italic? //' @param fontname Font name //' @param fontsize Font size //' @examples //' str_extents(letters) //' str_extents("Hello World!", bold = TRUE, italic = FALSE, //' fontname = "sans", fontsize = 12) //' @export // [[Rcpp::export]] NumericMatrix str_extents(CharacterVector x, std::string fontname = "sans", double fontsize = 12, int bold = false, int italic = false) { int n = x.size(); CairoContext cc; cc.setFont(fontname, fontsize, bold, italic); NumericMatrix out(n, 2); for (int i = 0; i < n; ++i) { if (x[i] == NA_STRING) { out(i, 0) = NA_REAL; out(i, 1) = NA_REAL; } else { std::string str(Rf_translateCharUTF8(x[i])); FontMetric fm = cc.getExtents(str); out(i, 0) = fm.width; out(i, 1) = fm.height; } } return out; }
// [[Rcpp::export]] std::string collectorGuess(CharacterVector input, List locale_) { LocaleInfo locale(locale_); if (input.size() == 0 || allMissing(input)) return "character"; // Work from strictest to most flexible if (canParse(input, isLogical, &locale)) return "logical"; if (canParse(input, isInteger, &locale)) return "integer"; if (canParse(input, isDouble, &locale)) return "double"; if (canParseNumber(input, &locale)) return "number"; if (canParse(input, isDate, &locale)) return "date"; if (canParse(input, isDateTime, &locale)) return "datetime"; // Otherwise can always parse as a character return "character"; }
//'@title Decode Geohashes //'@description \code{gh_decode} takes geohashes and turns them back into //'latitude/longitude pairs, with an associated margin of error for each value. //' //'@param hashes a character vector of geohashes. //' //'@return a data.frame of four columns; "lat", "lng", "lat_error" and "lng_error" //' //'@seealso \code{\link{gh_encode}} for generating geohashes, and //'\code{\link{gh_neighbours}} for identifying the neighbouring hash boxes //'to a geohash. //' //'@examples //'# A simple example: //'gh_encode(lat = 42.60498046875, lng = -5.60302734375, precision = 5) //'#[1] "ezs42" //' //'gh_decode("ezs42") //'# lat lng lat_error lng_error //'# 42.60498 -5.603027 0.02197266 0.02197266 //'@export //[[Rcpp::export]] DataFrame gh_decode(CharacterVector hashes){ unsigned int input_size = hashes.size(); NumericVector lats(input_size); NumericVector lngs(input_size); NumericVector lat_error(input_size); NumericVector lng_error(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(CharacterVector::is_na(hashes[i])){ lats[i] = NA_REAL; lngs[i] = NA_REAL; lat_error[i] = NA_REAL; lng_error[i] = NA_REAL; } else { cgeohash::DecodedHash holding = cgeohash::decode(Rcpp::as<std::string>(hashes[i])); lats[i] = holding.latitude; lngs[i] = holding.longitude; lat_error[i] = holding.latitude_err; lng_error[i] = holding.longitude_err; } } return DataFrame::create(_["gh"] = hashes, _["lat"] = lats, _["lng"] = lngs, _["lat_error"] = lat_error, _["lng_error"] = lng_error, _["stringsAsFactors"] = false); }
//[[Rcpp::export]] DataFrame constant_element_interpolator(CharacterVector data, CharacterVector group, IntegerVector frame, CharacterVector ease) { std::deque<std::string> tweendata; std::deque<std::string> tweengroup; std::deque<int> tweenframe; int i, j, nframes; std::string groupString; std::string currentGroup = as<std::string>(group[0]); for (i = 1; i < data.size(); ++i) { groupString = as<std::string>(group[i]); if (currentGroup == groupString) { nframes = frame[i] - frame[i-1]; std::vector<double> ease_points = easeSeq(as<std::string>(ease[i-1]), nframes); for (j = 0; j < ease_points.size(); ++j) { if (ease_points[j] < 0.5) { tweendata.push_back(as<std::string>(data[i - 1])); } else { tweendata.push_back(as<std::string>(data[i])); } tweengroup.push_back(groupString); tweenframe.push_back(j + frame[i-1]); } } else { tweendata.push_back(as<std::string>(data[i - 1])); tweengroup.push_back(currentGroup); tweenframe.push_back(frame[i-1]); currentGroup = groupString; } } return DataFrame::create( Named("data") = wrap(tweendata), Named("group") = wrap(tweengroup), Named("frame") = wrap(tweenframe) ); }