// [[Rcpp::export]] CharacterVector read_lines_(List sourceSpec, int n_max = -1) { SourcePtr source = Source::create(sourceSpec); TokenizerLine tokenizer; tokenizer.tokenize(source->begin(), source->end()); int n = (n_max < 0) ? 1000 : n_max; CharacterVector out(n); int i = 0; for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) { if (i >= n) { if (n_max < 0) { n = (n * 3)/2 + 1; out = Rf_lengthgets(out, n); } else { break; } } if (t.type() == TOKEN_STRING) out[i] = t.asString(); ++i; } if (i < n) { out = Rf_lengthgets(out, i); } return out; }
// [[Rcpp::export]] CharacterVector read_lines_(List sourceSpec, List locale_, int n_max = -1, bool progress = true) { SourcePtr source = Source::create(sourceSpec); TokenizerLine tokenizer; tokenizer.tokenize(source->begin(), source->end()); LocaleInfo locale(locale_); Progress progressBar; int n = (n_max < 0) ? 10000 : n_max; CharacterVector out(n); int i = 0; for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) { if (progress && (i + 1) % 25000 == 0) progressBar.show(tokenizer.progress()); if (i >= n) { if (n_max < 0) { // Estimate rows in full dataset n = (i / tokenizer.progress().first) * 1.2; out = Rf_lengthgets(out, n); } else { break; } } if (t.type() == TOKEN_STRING) out[i] = t.asSEXP(&locale.encoder_); ++i; } if (i < n) { out = Rf_lengthgets(out, i); } if (progress) progressBar.show(tokenizer.progress()); progressBar.stop(); return out; }