static void msplit(wchar_t line[]) { wchar_t *cp, *cp2, prev; wchar_t word[BUFSIZ]; static const wchar_t *srchlist = (const wchar_t *) L".:!?"; cp = line; while (*cp) { cp2 = word; prev = *cp; /* * Collect a 'word,' allowing it to contain escaped * white space. */ while (*cp) { if (iswspace(*cp)) break; if (_wckind(*cp) != _wckind(prev)) if (wcsetno(*cp) != 0 || wcsetno(prev) != 0) break; if (*cp == '\\' && iswspace(cp[1])) *cp2++ = *cp++; prev = *cp; *cp2++ = *cp++; } /* * Guarantee a space at end of line. * Two spaces after end of sentence punctuation. */ if (*cp == L'\0') { *cp2++ = L' '; if (wschr(srchlist, cp[-1]) != NULL) *cp2++ = L' '; } while (iswspace(*cp)) *cp2++ = *cp++; *cp2 = L'\0'; pack(word); } }
static int wdchkind_C(wchar_t wc) { switch (wcsetno(wc)) { case 1: return (2); case 2: return (3); case 3: return (4); case 0: return (isascii(wc) && (isalpha(wc) || isdigit(wc) || wc == ' ')); } return (0); }
static int _wckind_c_locale(wchar_t wc) { int ret; /* * DEPEND_ON_ANSIC: L notion for the character is new in * ANSI-C, k&r compiler won't work. */ if (iswascii(wc)) ret = (iswalnum(wc) || wc == L'_') ? 0 : 1; else ret = wcsetno(wc) + 1; return (ret); }