Пример #1
0
 PosibErr<void> Dictionary::add_repl(ParmString mis, ParmString cor) 
 {
   if (!invisible_soundslike) {
     VARARRAY(char, sl, mis.size() + 1);
     lang()->LangImpl::to_soundslike(sl, mis.str(), mis.size());
     return add_repl(mis, cor, sl);
   } else {
Пример #2
0
str 
utf8_fix (const str &in, const str &repl)
{
  if (!in) return in;

  size_t repl_len = repl ? repl.len () : size_t (0);
  mstr out (in.len () * max<size_t> (1, repl_len) + 1);
  const char *ip = in.cstr ();
  const char *endp = ip + in.len ();
  char *op = out.cstr ();
  const char *cps = NULL; // code point start
  ssize_t expected_width = 0, tmp = 0;

  for ( ; ip < endp; ip++) {

    bool consumed = false;

    // if 'cps' is true, that means we've previously started
    // a code point
    if (cps) {

      // We're expecting a follow byte -- if there's none here, it's
      // an error and we need to throw away our buffered code point.
      if (!is_follow_byte (*ip)) { 
	cps = NULL; 
	expected_width = 0;
	add_repl (op, repl);

	// We do have a follow byte, and we've seen enough bytes,
	// so we're all set.
      } else if (ip - cps == expected_width - 1) {
	while (cps <= ip) {
	  *op++ = *cps++;
	}
	cps = NULL;
	expected_width = 0;
	consumed = true;
      } else {
	// in the default case, we haven't seen enough data, so keep moving.
	consumed = true;
      }

    }

    // Need to handle the non-code-point case AFTER the code-point case,
    // since in some cases (a busted code-point), we hit both this if
    // and the one above.
    if (!cps && !consumed) { 
      tmp = point_width (*ip);
      // start a new code point
      if (tmp > 1) {
	cps = ip;
	expected_width = tmp;
      } else if (tmp == 1) {
	*op++ = *ip;
      } else {
	// we hit a bad starting byte, or a follow byte, neither of which
	// should be useful here.
	add_repl (op, repl);
      }
    }

  }

  out.setlen (op - out.cstr ());
  return out;
}