Beispiel #1
0
/// the non-empty non-space spans in s[0..len)
inline void addTokenSpans(TokenSpans &spans, char const* s, Position len) {
  Position i = 0;
  for (;;) {
    if (i == len) return;
    if (s[i] != ' ') break;
    ++i;
  }
  TokenSpan span;
  span.first = span.second = i;
  for (;;) {
    assert(span.second < len && s[span.second] != ' ');
    if (++span.second == len) {
      spans.push_back(span);
      return;
    }
    if (s[span.second] == ' ') {
      spans.push_back(span);
      for (;;) {
        if (s[++span.second] != ' ') break;
        if (span.second == len) return;
      }
      span.first = span.second;
    }
  }
}
Beispiel #2
0
inline void spansToTokens(std::string const& str, TokenSpans const& spans, Tokens &tokens) {
  if (str.empty()) return;
  char const* s = &str[0];
  unsigned i = 0, n = spans.size();
  tokens.resize(n);
  for (; i < n; ++i) {
    TokenSpan const& span = spans[i];
    assert(span.first < str.size());
    assert(span.second <= str.size());
    tokens[i].assign(s + span.first, s + span.second);
  }

}
Beispiel #3
0
 void operator()(std::string const& word, TokenSpan span) const {
   if (spans_) spans_->push_back(span);
   tokens_.push_back(word);
 }
Beispiel #4
0
 void operator()(Unicode c, Position pos) const {
   if (spans_) spans_->push_back(TokenSpan(pos, pos + 1));
   tokens_.push_back(Util::utf8s(c));
 }
Beispiel #5
0
 void operator()(Slice const& word, TokenSpan span) const {
   if (spans_) spans_->push_back(span);
   tokens_.push_back(std::string(word.first, word.second));
 }