static int pg_wc_isupper(pg_wchar c) { switch (pg_regex_strategy) { case PG_REGEX_LOCALE_C: return (c <= (pg_wchar) 127 && (pg_char_properties[c] & PG_ISUPPER)); case PG_REGEX_LOCALE_WIDE: #ifdef USE_WIDE_UPPER_LOWER if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswupper((wint_t) c); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE: return (c <= (pg_wchar) UCHAR_MAX && isupper((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswupper_l((wint_t) c, pg_regex_locale); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && isupper_l((unsigned char) c, pg_regex_locale)); #endif break; } return 0; /* can't get here, but keep compiler quiet */ }
wint_t towlower_l(wint_t wc, locale_t locale) { if (iswupper_l(wc, locale)) { return wc - 'A' + 'a'; } return wc; }
void wctype_check_functions(wint_t i, wctype_t t, wctrans_t tr, locale_t l) { (void)iswalnum(i); (void)iswalnum_l(i, l); (void)iswalpha(i); (void)iswalpha_l(i, l); (void)iswblank(i); (void)iswblank_l(i, l); (void)iswcntrl(i); (void)iswcntrl_l(i, l); (void)iswctype(i, t); (void)iswctype_l(i, t, l); (void)iswdigit(i); (void)iswdigit_l(i, l); (void)iswgraph(i); (void)iswgraph_l(i, l); (void)iswlower(i); (void)iswlower_l(i, l); (void)iswprint(i); (void)iswprint_l(i, l); (void)iswpunct(i); (void)iswpunct_l(i, l); (void)iswspace(i); (void)iswspace_l(i, l); (void)iswupper(i); (void)iswupper_l(i, l); (void)iswxdigit(i); (void)iswxdigit_l(i, l); (void)towctrans(i, tr); (void)towctrans_l(i, tr, l); (void)towlower(i); (void)towlower_l(i, l); (void)towupper(i); (void)towupper_l(i, l); (void)wctrans((const char *)1234); (void)wctrans_l((const char *)1234, l); (void)wctype((const char *)1234); (void)wctype_l((const char *)1234, l); }
void shorten_name(const char *name, char short_name[512], char shortest_name[512]) { wchar_t w_name[512]; wchar_t w_short_name[512]; wchar_t w_shortest_name[512]; const wchar_t *cur_word, *wchar_ptr; wchar_t *cur_short_word, *cur_shortest_word; int unabbrev = 0; int i, len, new_len, capital; if (!name) return; mbsrtowcs_l(w_name, &name, ARRAY_SIZE(w_name), NULL, l); /* TODO: also skip anything in parenthesis from the short names */ /* TODO: instead of calling wcscasecmp_l all the time it'd be more * effective to lower case w_name once and use plain wcscmp or mem * compare since the phrases we search for are all lower case already. * Might also want to take "collation" into account (wcsxfrm_l the * string once and use memcmp instead of wcscoll_l). */ cur_word = w_name; cur_short_word = w_short_name; cur_shortest_word = w_shortest_name; while (1) { while (*cur_word && !iswalnum_l(*cur_word, l)) *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++; if (!*cur_word) break; /* TODO: use a hash of some kind instead of iterating over arrays */ /* Go through possible abbreviations from top to bottom */ for (i = 0; i < ARRAY_SIZE(abbrevs); i += 2) if (!wcsncasecmp_l(abbrevs[i], cur_word, wcslen(abbrevs[i]), l)) { len = wcslen(abbrevs[i]); /* Check that we matched a full word */ if (iswalnum_l(cur_word[len], l)) continue; capital = iswupper_l(*cur_word, l); cur_word += len; new_len = wcslen(abbrevs[i + 1]); memcpy(cur_short_word, abbrevs[i + 1], new_len * sizeof(wchar_t)); /* * If original was capitalised then capitalise the abbreviation * as well, if it was lower case. */ if (capital) *cur_short_word = towupper_l(*cur_short_word, l); /* Make sure shortest_word doesn't end up being empty */ if (!*cur_word && !unabbrev) { memcpy(cur_shortest_word, cur_short_word, new_len * sizeof(wchar_t)); cur_shortest_word += new_len; } cur_short_word += new_len; /* * Avoid excess whitespace in short and shortest * when a word is replaced with "". * TODO: this may require more complicated logic to get * the corner cases right. */ if (new_len == 0) { if (cur_short_word > w_short_name && iswspace_l(cur_short_word[-1], l)) cur_short_word --; if (cur_shortest_word > w_shortest_name && iswspace_l(cur_shortest_word[-1], l)) cur_shortest_word --; } /*if (new_len != len)*/ break; } if (i < ARRAY_SIZE(abbrevs)) continue; /* Go through possible given names from top to bottom */ for (i = 0; i < ARRAY_SIZE(given_names); i ++) if (!wcsncasecmp_l(given_names[i], cur_word, wcslen(given_names[i]), l)) { len = wcslen(given_names[i]); /* Check that we matched a full word */ if (iswalnum_l(cur_word[len], l)) continue; /* * If this is the final part of the name, and it matches a * given name then that's most likely somebody's surname which * happens to also be a possibble given name. In that case * do not abbreviate or omit it. */ if (!cur_word[len]) continue; cur_word += len; *cur_short_word++ = given_names[i][0]; *cur_short_word++ = L'.'; /* * Avoid excess whitespace in shortest when a word is * replaced with "". * TODO: this may require more complicated logic to get * the corner cases right. */ if (cur_shortest_word > w_shortest_name && iswspace_l(cur_shortest_word[-1], l)) cur_shortest_word --; break; } if (i < ARRAY_SIZE(given_names)) continue; /* Nothing matched, copy the current word as-is */ while (iswalnum_l(*cur_word, l)) *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++; unabbrev += 1; } *cur_short_word = 0; *cur_shortest_word = 0; wchar_ptr = w_short_name; wcsrtombs_l(short_name, &wchar_ptr, 512, NULL, l); wchar_ptr = w_shortest_name; wcsrtombs_l(shortest_name, &wchar_ptr, 512, NULL, l); }