Ejemplo n.º 1
0
bool utf8_tokenizer::next_sentence(vector<string_piece>* forms, vector<token_range>* tokens) {
  if (forms) forms->clear();
  if (tokens) tokens->clear();
  if (!text || text == text_end) return false;

  cache* c = nullptr;
  if (!forms) {
    c = caches.pop();
    if (!c) c = new cache();
    forms = &c->forms;
    forms->clear();
  }

  const char* text_start = text;
  bool result = next_sentence(*forms);
  for (auto&& form : *forms) {
    for (; text_start < form.str; chars++) utf8_advance(text_start, form.str);
    size_t chars_start = chars;
    for (; text_start < form.str + form.len; chars++) utf8_advance(text_start, form.str + form.len);
    if (tokens) tokens->emplace_back(chars_start, chars - chars_start);
  }
  for (; text_start < text; chars++) utf8_advance(text_start, text);

  if (c) caches.push(c);
  return result;
}
Ejemplo n.º 2
0
 bool string_has_suffix_i( const char * string, const char * suffix ) {
     for(;;) {
         if (*string == 0) return false;
         if (stringEqualsI_utf8( string, suffix )) return true;
         if (!utf8_advance(string)) return false;
     }
 }
Ejemplo n.º 3
0
bool string_base::limit_length(t_size length_in_chars,const char * append)
{
	bool rv = false;
	const char * base = get_ptr(), * ptr = base;
	while(length_in_chars && utf8_advance(ptr)) length_in_chars--;
	if (length_in_chars==0)
	{
		truncate(ptr-base);
		add_string(append);
		rv = true;
	}
	return rv;
}