Exemplo n.º 1
0
// [[Rcpp::export]]
CharacterVector read_lines_(List sourceSpec, int n_max = -1) {
  SourcePtr source = Source::create(sourceSpec);
  TokenizerLine tokenizer;
  tokenizer.tokenize(source->begin(), source->end());

  int n = (n_max < 0) ? 1000 : n_max;
  CharacterVector out(n);

  int i = 0;
  for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) {
    if (i >= n) {
      if (n_max < 0) {
        n = (n * 3)/2 + 1;
        out = Rf_lengthgets(out, n);
      } else {
        break;
      }
    }

    if (t.type() == TOKEN_STRING)
      out[i] = t.asString();

    ++i;
  }

  if (i < n) {
    out = Rf_lengthgets(out, i);
  }

  return out;
}
Exemplo n.º 2
0
// [[Rcpp::export]]
CharacterVector read_lines_(List sourceSpec, List locale_, int n_max = -1,
                            bool progress = true) {

  SourcePtr source = Source::create(sourceSpec);
  TokenizerLine tokenizer;
  tokenizer.tokenize(source->begin(), source->end());
  LocaleInfo locale(locale_);
  Progress progressBar;

  int n = (n_max < 0) ? 10000 : n_max;
  CharacterVector out(n);

  int i = 0;
  for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) {
    if (progress && (i + 1) % 25000 == 0)
      progressBar.show(tokenizer.progress());

    if (i >= n) {
      if (n_max < 0) {
        // Estimate rows in full dataset
        n = (i / tokenizer.progress().first) * 1.2;
        out = Rf_lengthgets(out, n);
      } else {
        break;
      }
    }

    if (t.type() == TOKEN_STRING)
      out[i] = t.asSEXP(&locale.encoder_);

    ++i;
  }

  if (i < n) {
    out = Rf_lengthgets(out, i);
  }

  if (progress)
    progressBar.show(tokenizer.progress());
  progressBar.stop();

  return out;
}