// [[Rcpp::export]] std::vector<std::string> guess_types_(List sourceSpec, List tokenizerSpec, Rcpp::List locale_, int n = 100) { Warnings warnings; SourcePtr source = Source::create(sourceSpec); TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec); tokenizer->tokenize(source->begin(), source->end()); tokenizer->setWarnings(&warnings); // silence warnings LocaleInfo locale(locale_); std::vector<CollectorPtr> collectors; for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) { if (t.row() >= (size_t) n) break; // Add new collectors, if needed if (t.col() >= collectors.size()) { int p = collectors.size() - t.col() + 1; for (int j = 0; j < p; ++j) { CollectorPtr col = CollectorPtr(new CollectorCharacter(&locale.encoder_)); col->setWarnings(&warnings); col->resize(n); collectors.push_back(col); } } collectors[t.col()]->setValue(t.row(), t); } std::vector<std::string> out; for (size_t j = 0; j < collectors.size(); ++j) { CharacterVector col = as<CharacterVector>(collectors[j]->vector()); out.push_back(collectorGuess(col, locale_)); } return out; }
// [[Rcpp::export]] RObject type_convert_col(CharacterVector x, List spec, int col, const std::vector<std::string>& na, bool trim_ws) { CollectorPtr collector = Collector::create(spec); collector->resize(x.size()); for (int i = 0; i < x.size(); ++i) { SEXP string = x[i]; Token t; if (string == NA_STRING) { t = Token(TOKEN_MISSING, i - 1, col - 1); } else { const char* begin = CHAR(string); t = Token(begin, begin + Rf_length(string), i - 1, col - 1); if (trim_ws) t.trim(); t.flagNA(na); } collector->setValue(i, t); } return collector->vector(); }