virtual std::string convert(converter_base::conversion_type how,char const *begin,char const *end,int flags = 0) const { switch(how) { case upper_case: { std::wstring tmp = conv::to_utf<wchar_t>(begin,end,"UTF-8"); std::wstring wres; wres.reserve(tmp.size()); for(unsigned i=0;i<tmp.size();i++) wres+=towupper_l(tmp[i],*lc_); return conv::from_utf<wchar_t>(wres,"UTF-8"); } case lower_case: case case_folding: { std::wstring tmp = conv::to_utf<wchar_t>(begin,end,"UTF-8"); std::wstring wres; wres.reserve(tmp.size()); for(unsigned i=0;i<tmp.size();i++) wres+=towlower_l(tmp[i],*lc_); return conv::from_utf<wchar_t>(wres,"UTF-8"); } default: return std::string(begin,end-begin); } }
int toupper_l(int c, struct __locale_t *locale) { #if defined (_MB_EXTENDED_CHARSETS_ISO) \ || defined (_MB_EXTENDED_CHARSETS_WINDOWS) if ((unsigned char) c <= 0x7f) return islower_l (c, locale) ? c - 'a' + 'A' : c; else if (c != EOF && __locale_mb_cur_max_l (locale) == 1 && islower_l (c, locale)) { char s[MB_LEN_MAX] = { c, '\0' }; wchar_t wc; mbstate_t state; memset (&state, 0, sizeof state); if (locale->mbtowc (_REENT, &wc, s, 1, &state) >= 0 && locale->wctomb (_REENT, s, (wchar_t) towupper_l ((wint_t) wc, locale), &state) == 1) c = (unsigned char) s[0]; } return c; #else return islower_l(c, locale) ? c - 'a' + 'A' : c; #endif }
/** * Upcase the first letter of the word. * XXX FIXME This works 'most of the time', but is not technically correct. * This is because towlower() and towupper() are locale dependent, and also * because the byte-counts might not match up, e.g. German ß and SS. * The correct long-term fix is to use ICU or glib g_utf8_strup(), etc. */ void upcase_utf8_str(char *to, const char * from, size_t usize, locale_t locale) { wchar_t c; int i, nbl, nbh; char low[MB_LEN_MAX]; mbstate_t mbs; memset(&mbs, 0, sizeof(mbs)); nbh = mbrtowc (&c, from, MB_CUR_MAX, &mbs); if (nbh < 0) { prt_error("Error: Invalid UTF-8 string!\n"); return; } c = towupper_l(c, locale); nbl = wctomb_check(low, c); /* Check for error on an in-place copy */ if ((nbh < nbl) && (to == from)) { /* I'm to lazy to fix this */ prt_error("Error: can't upcase UTF-8 string!\n"); return; } /* Upcase */ for (i=0; i<nbl; i++) { to[i] = low[i]; } if ((nbh == nbl) && (to == from)) return; from += nbh; to += nbl; safe_strcpy(to, from, usize-nbl); }
void test_char() { booster::locale::generator gen; std::cout << "- Testing at least C" << std::endl; std::locale l = gen("en_US.UTF-8"); test_one<CharType>(l,"Hello World i","hello world i","HELLO WORLD I"); std::string name = "en_US.UTF-8"; if(have_locale(name)) { std::cout << "- Testing " << name << std::endl; std::locale l=gen(name); test_one<CharType>(l,"Façade","façade","FAÇADE"); } else { std::cout << "- en_US.UTF-8 is not supported, skipping" << std::endl; } name = "en_US.ISO8859-1"; if(have_locale(name)) { std::cout << "Testing " << name << std::endl; std::locale l=gen(name); test_one<CharType>(l,"Hello World","hello world","HELLO WORLD"); #if defined(__APPLE__) || defined(__FreeBSD__) if(sizeof(CharType)!=1) #endif test_one<CharType>(l,"Façade","façade","FAÇADE"); } else { std::cout << "- en_US.ISO8859-1 is not supported, skipping" << std::endl; } name = "tr_TR.UTF-8"; if(have_locale(name)) { std::cout << "Testing " << name << std::endl; locale_t cl = newlocale(LC_ALL_MASK,name.c_str(),0); try { TEST(cl); if(towupper_l(L'i',cl) == 0x130) { test_one<CharType>(gen(name),"i","i","İ"); } else { std::cout <<" Turkish locale is not supported well" << std::endl; } } catch(...) { if(cl) freelocale(cl); throw; } if(cl) freelocale(cl); } else { std::cout << "- tr_TR.UTF-8 is not supported, skipping" << std::endl; } }
wint_t towctrans_l(wint_t wc, wctrans_t desc, locale_t locale) { switch (desc) { case _WCT_TOLOWER: wc = towlower_l(wc, locale); break; case _WCT_TOUPPER: wc = towupper_l(wc, locale); break; case _WCT_ERROR: default: errno = EINVAL; break; } return (wc); }
static pg_wchar pg_wc_toupper(pg_wchar c) { switch (pg_regex_strategy) { case PG_REGEX_LOCALE_C: if (c <= (pg_wchar) 127) return pg_ascii_toupper((unsigned char) c); return c; case PG_REGEX_LOCALE_WIDE: /* force C behavior for ASCII characters, per comments above */ if (c <= (pg_wchar) 127) return pg_ascii_toupper((unsigned char) c); #ifdef USE_WIDE_UPPER_LOWER if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return towupper((wint_t) c); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE: /* force C behavior for ASCII characters, per comments above */ if (c <= (pg_wchar) 127) return pg_ascii_toupper((unsigned char) c); if (c <= (pg_wchar) UCHAR_MAX) return toupper((unsigned char) c); return c; case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return towupper_l((wint_t) c, pg_regex_locale); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T if (c <= (pg_wchar) UCHAR_MAX) return toupper_l((unsigned char) c, pg_regex_locale); #endif return c; } return 0; /* can't get here, but keep compiler quiet */ }
void wctype_check_functions(wint_t i, wctype_t t, wctrans_t tr, locale_t l) { (void)iswalnum(i); (void)iswalnum_l(i, l); (void)iswalpha(i); (void)iswalpha_l(i, l); (void)iswblank(i); (void)iswblank_l(i, l); (void)iswcntrl(i); (void)iswcntrl_l(i, l); (void)iswctype(i, t); (void)iswctype_l(i, t, l); (void)iswdigit(i); (void)iswdigit_l(i, l); (void)iswgraph(i); (void)iswgraph_l(i, l); (void)iswlower(i); (void)iswlower_l(i, l); (void)iswprint(i); (void)iswprint_l(i, l); (void)iswpunct(i); (void)iswpunct_l(i, l); (void)iswspace(i); (void)iswspace_l(i, l); (void)iswupper(i); (void)iswupper_l(i, l); (void)iswxdigit(i); (void)iswxdigit_l(i, l); (void)towctrans(i, tr); (void)towctrans_l(i, tr, l); (void)towlower(i); (void)towlower_l(i, l); (void)towupper(i); (void)towupper_l(i, l); (void)wctrans((const char *)1234); (void)wctrans_l((const char *)1234, l); (void)wctype((const char *)1234); (void)wctype_l((const char *)1234, l); }
void shorten_name(const char *name, char short_name[512], char shortest_name[512]) { wchar_t w_name[512]; wchar_t w_short_name[512]; wchar_t w_shortest_name[512]; const wchar_t *cur_word, *wchar_ptr; wchar_t *cur_short_word, *cur_shortest_word; int unabbrev = 0; int i, len, new_len, capital; if (!name) return; mbsrtowcs_l(w_name, &name, ARRAY_SIZE(w_name), NULL, l); /* TODO: also skip anything in parenthesis from the short names */ /* TODO: instead of calling wcscasecmp_l all the time it'd be more * effective to lower case w_name once and use plain wcscmp or mem * compare since the phrases we search for are all lower case already. * Might also want to take "collation" into account (wcsxfrm_l the * string once and use memcmp instead of wcscoll_l). */ cur_word = w_name; cur_short_word = w_short_name; cur_shortest_word = w_shortest_name; while (1) { while (*cur_word && !iswalnum_l(*cur_word, l)) *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++; if (!*cur_word) break; /* TODO: use a hash of some kind instead of iterating over arrays */ /* Go through possible abbreviations from top to bottom */ for (i = 0; i < ARRAY_SIZE(abbrevs); i += 2) if (!wcsncasecmp_l(abbrevs[i], cur_word, wcslen(abbrevs[i]), l)) { len = wcslen(abbrevs[i]); /* Check that we matched a full word */ if (iswalnum_l(cur_word[len], l)) continue; capital = iswupper_l(*cur_word, l); cur_word += len; new_len = wcslen(abbrevs[i + 1]); memcpy(cur_short_word, abbrevs[i + 1], new_len * sizeof(wchar_t)); /* * If original was capitalised then capitalise the abbreviation * as well, if it was lower case. */ if (capital) *cur_short_word = towupper_l(*cur_short_word, l); /* Make sure shortest_word doesn't end up being empty */ if (!*cur_word && !unabbrev) { memcpy(cur_shortest_word, cur_short_word, new_len * sizeof(wchar_t)); cur_shortest_word += new_len; } cur_short_word += new_len; /* * Avoid excess whitespace in short and shortest * when a word is replaced with "". * TODO: this may require more complicated logic to get * the corner cases right. */ if (new_len == 0) { if (cur_short_word > w_short_name && iswspace_l(cur_short_word[-1], l)) cur_short_word --; if (cur_shortest_word > w_shortest_name && iswspace_l(cur_shortest_word[-1], l)) cur_shortest_word --; } /*if (new_len != len)*/ break; } if (i < ARRAY_SIZE(abbrevs)) continue; /* Go through possible given names from top to bottom */ for (i = 0; i < ARRAY_SIZE(given_names); i ++) if (!wcsncasecmp_l(given_names[i], cur_word, wcslen(given_names[i]), l)) { len = wcslen(given_names[i]); /* Check that we matched a full word */ if (iswalnum_l(cur_word[len], l)) continue; /* * If this is the final part of the name, and it matches a * given name then that's most likely somebody's surname which * happens to also be a possibble given name. In that case * do not abbreviate or omit it. */ if (!cur_word[len]) continue; cur_word += len; *cur_short_word++ = given_names[i][0]; *cur_short_word++ = L'.'; /* * Avoid excess whitespace in shortest when a word is * replaced with "". * TODO: this may require more complicated logic to get * the corner cases right. */ if (cur_shortest_word > w_shortest_name && iswspace_l(cur_shortest_word[-1], l)) cur_shortest_word --; break; } if (i < ARRAY_SIZE(given_names)) continue; /* Nothing matched, copy the current word as-is */ while (iswalnum_l(*cur_word, l)) *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++; unabbrev += 1; } *cur_short_word = 0; *cur_shortest_word = 0; wchar_ptr = w_short_name; wcsrtombs_l(short_name, &wchar_ptr, 512, NULL, l); wchar_ptr = w_shortest_name; wcsrtombs_l(shortest_name, &wchar_ptr, 512, NULL, l); }
static wchar_t upper(wchar_t c,locale_t lc) { return towupper_l(c,lc); }