PosibErr<void> Dictionary::add_repl(ParmString mis, ParmString cor) { if (!invisible_soundslike) { VARARRAY(char, sl, mis.size() + 1); lang()->LangImpl::to_soundslike(sl, mis.str(), mis.size()); return add_repl(mis, cor, sl); } else {
str utf8_fix (const str &in, const str &repl) { if (!in) return in; size_t repl_len = repl ? repl.len () : size_t (0); mstr out (in.len () * max<size_t> (1, repl_len) + 1); const char *ip = in.cstr (); const char *endp = ip + in.len (); char *op = out.cstr (); const char *cps = NULL; // code point start ssize_t expected_width = 0, tmp = 0; for ( ; ip < endp; ip++) { bool consumed = false; // if 'cps' is true, that means we've previously started // a code point if (cps) { // We're expecting a follow byte -- if there's none here, it's // an error and we need to throw away our buffered code point. if (!is_follow_byte (*ip)) { cps = NULL; expected_width = 0; add_repl (op, repl); // We do have a follow byte, and we've seen enough bytes, // so we're all set. } else if (ip - cps == expected_width - 1) { while (cps <= ip) { *op++ = *cps++; } cps = NULL; expected_width = 0; consumed = true; } else { // in the default case, we haven't seen enough data, so keep moving. consumed = true; } } // Need to handle the non-code-point case AFTER the code-point case, // since in some cases (a busted code-point), we hit both this if // and the one above. if (!cps && !consumed) { tmp = point_width (*ip); // start a new code point if (tmp > 1) { cps = ip; expected_width = tmp; } else if (tmp == 1) { *op++ = *ip; } else { // we hit a bad starting byte, or a follow byte, neither of which // should be useful here. add_repl (op, repl); } } } out.setlen (op - out.cstr ()); return out; }