Beispiel #1
0
// [[Rcpp::export]]
std::vector<std::string> guess_types_(List sourceSpec, List tokenizerSpec,
                                      Rcpp::List locale_, int n = 100) {
  Warnings warnings;
  SourcePtr source = Source::create(sourceSpec);
  TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
  tokenizer->tokenize(source->begin(), source->end());
  tokenizer->setWarnings(&warnings); // silence warnings

  LocaleInfo locale(locale_);

  std::vector<CollectorPtr> collectors;
  for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
    if (t.row() >= (size_t) n)
      break;

    // Add new collectors, if needed
    if (t.col() >= collectors.size()) {
      int p = collectors.size() - t.col() + 1;
      for (int j = 0; j < p; ++j) {
        CollectorPtr col = CollectorPtr(new CollectorCharacter(&locale.encoder_));
        col->setWarnings(&warnings);
        col->resize(n);
        collectors.push_back(col);
      }
    }

    collectors[t.col()]->setValue(t.row(), t);
  }

  std::vector<std::string> out;
  for (size_t j = 0; j < collectors.size(); ++j) {
    CharacterVector col = as<CharacterVector>(collectors[j]->vector());
    out.push_back(collectorGuess(col, locale_));
  }

  return out;
}
Beispiel #2
0
// [[Rcpp::export]]
RObject type_convert_col(CharacterVector x, List spec, int col,
                         const std::vector<std::string>& na, bool trim_ws) {
  CollectorPtr collector = Collector::create(spec);
  collector->resize(x.size());

  for (int i = 0; i < x.size(); ++i) {
    SEXP string = x[i];
    Token t;

    if (string == NA_STRING) {
      t = Token(TOKEN_MISSING, i - 1, col - 1);
    } else {
      const char* begin = CHAR(string);
      t = Token(begin, begin + Rf_length(string), i - 1, col - 1);
      if (trim_ws)
        t.trim();
      t.flagNA(na);
    }

    collector->setValue(i, t);
  }

  return collector->vector();
}