Пример #1
0
 virtual std::string convert(converter_base::conversion_type how,char const *begin,char const *end,int flags = 0) const 
 {
     switch(how) {
     case upper_case:
         {
             std::wstring tmp = conv::to_utf<wchar_t>(begin,end,"UTF-8");
             std::wstring wres;
             wres.reserve(tmp.size());
             for(unsigned i=0;i<tmp.size();i++)
                 wres+=towupper_l(tmp[i],*lc_);
             return conv::from_utf<wchar_t>(wres,"UTF-8");
         }
         
     case lower_case:
     case case_folding:
         {
             std::wstring tmp = conv::to_utf<wchar_t>(begin,end,"UTF-8");
             std::wstring wres;
             wres.reserve(tmp.size());
             for(unsigned i=0;i<tmp.size();i++)
                 wres+=towlower_l(tmp[i],*lc_);
             return conv::from_utf<wchar_t>(wres,"UTF-8");
         }
     default:
         return std::string(begin,end-begin);
     }
 }
Пример #2
0
int toupper_l(int c, struct __locale_t *locale)
{
#if defined (_MB_EXTENDED_CHARSETS_ISO) \
    || defined (_MB_EXTENDED_CHARSETS_WINDOWS)
  if ((unsigned char) c <= 0x7f)
    return islower_l (c, locale) ? c - 'a' + 'A' : c;
  else if (c != EOF && __locale_mb_cur_max_l (locale) == 1
	   && islower_l (c, locale))
    {
      char s[MB_LEN_MAX] = { c, '\0' };
      wchar_t wc;
      mbstate_t state;

      memset (&state, 0, sizeof state);
      if (locale->mbtowc (_REENT, &wc, s, 1, &state) >= 0
	  && locale->wctomb (_REENT, s,
			     (wchar_t) towupper_l ((wint_t) wc, locale),
			     &state) == 1)
	c = (unsigned char) s[0];
    }
  return c;
#else
  return islower_l(c, locale) ? c - 'a' + 'A' : c;
#endif
}
Пример #3
0
/**
 * Upcase the first letter of the word.
 * XXX FIXME This works 'most of the time', but is not technically correct.
 * This is because towlower() and towupper() are locale dependent, and also
 * because the byte-counts might not match up, e.g. German ß and SS.
 * The correct long-term fix is to use ICU or glib g_utf8_strup(), etc.
 */
void upcase_utf8_str(char *to, const char * from, size_t usize, locale_t locale)
{
	wchar_t c;
	int i, nbl, nbh;
	char low[MB_LEN_MAX];
	mbstate_t mbs;

	memset(&mbs, 0, sizeof(mbs));
	nbh = mbrtowc (&c, from, MB_CUR_MAX, &mbs);
	if (nbh < 0)
	{
		prt_error("Error: Invalid UTF-8 string!\n");
		return;
	}
	c = towupper_l(c, locale);
	nbl = wctomb_check(low, c);

	/* Check for error on an in-place copy */
	if ((nbh < nbl) && (to == from))
	{
		/* I'm to lazy to fix this */
		prt_error("Error: can't upcase UTF-8 string!\n");
		return;
	}

	/* Upcase */
	for (i=0; i<nbl; i++) { to[i] = low[i]; }

	if ((nbh == nbl) && (to == from)) return;

	from += nbh;
	to += nbl;
	safe_strcpy(to, from, usize-nbl);
}
Пример #4
0
void test_char()
{
    booster::locale::generator gen;

    std::cout << "- Testing at least C" << std::endl;

    std::locale l = gen("en_US.UTF-8");

    test_one<CharType>(l,"Hello World i","hello world i","HELLO WORLD I");

    std::string name = "en_US.UTF-8";
    if(have_locale(name)) {
        std::cout << "- Testing " << name << std::endl;
        std::locale l=gen(name);
        test_one<CharType>(l,"Façade","façade","FAÇADE");
    }
    else {
        std::cout << "- en_US.UTF-8 is not supported, skipping" << std::endl;
    }

    name = "en_US.ISO8859-1";
    if(have_locale(name)) {
        std::cout << "Testing " << name << std::endl;
        std::locale l=gen(name);
        test_one<CharType>(l,"Hello World","hello world","HELLO WORLD");
        #if defined(__APPLE__) || defined(__FreeBSD__)
        if(sizeof(CharType)!=1)
        #endif
            test_one<CharType>(l,"Façade","façade","FAÇADE");
    }
    else {
        std::cout << "- en_US.ISO8859-1 is not supported, skipping" << std::endl;
    }
    
    name = "tr_TR.UTF-8";
    if(have_locale(name)) {
        std::cout << "Testing " << name << std::endl;
        locale_t cl = newlocale(LC_ALL_MASK,name.c_str(),0);
        try { 
            TEST(cl);
            if(towupper_l(L'i',cl) == 0x130) {
                test_one<CharType>(gen(name),"i","i","İ");
            }
            else {
                std::cout <<"  Turkish locale is not supported well" << std::endl;
            }
        }
        catch(...) {
            if(cl) freelocale(cl);
            throw;
        }
        if(cl) freelocale(cl);
        
    }
    else 
    {
        std::cout << "- tr_TR.UTF-8 is not supported, skipping" << std::endl;
    }
}
Пример #5
0
wint_t
towctrans_l(wint_t wc, wctrans_t desc, locale_t locale)
{
	switch (desc) {
	case _WCT_TOLOWER:
		wc = towlower_l(wc, locale);
		break;
	case _WCT_TOUPPER:
		wc = towupper_l(wc, locale);
		break;
	case _WCT_ERROR:
	default:
		errno = EINVAL;
		break;
	}

	return (wc);
}
Пример #6
0
static pg_wchar
pg_wc_toupper(pg_wchar c)
{
	switch (pg_regex_strategy)
	{
		case PG_REGEX_LOCALE_C:
			if (c <= (pg_wchar) 127)
				return pg_ascii_toupper((unsigned char) c);
			return c;
		case PG_REGEX_LOCALE_WIDE:
			/* force C behavior for ASCII characters, per comments above */
			if (c <= (pg_wchar) 127)
				return pg_ascii_toupper((unsigned char) c);
#ifdef USE_WIDE_UPPER_LOWER
			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
				return towupper((wint_t) c);
#endif
			/* FALL THRU */
		case PG_REGEX_LOCALE_1BYTE:
			/* force C behavior for ASCII characters, per comments above */
			if (c <= (pg_wchar) 127)
				return pg_ascii_toupper((unsigned char) c);
			if (c <= (pg_wchar) UCHAR_MAX)
				return toupper((unsigned char) c);
			return c;
		case PG_REGEX_LOCALE_WIDE_L:
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
				return towupper_l((wint_t) c, pg_regex_locale);
#endif
			/* FALL THRU */
		case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
			if (c <= (pg_wchar) UCHAR_MAX)
				return toupper_l((unsigned char) c, pg_regex_locale);
#endif
			return c;
	}
	return 0;					/* can't get here, but keep compiler quiet */
}
Пример #7
0
void wctype_check_functions(wint_t i, wctype_t t, wctrans_t tr, locale_t l)
{
    (void)iswalnum(i);
    (void)iswalnum_l(i, l);
    (void)iswalpha(i);
    (void)iswalpha_l(i, l);
    (void)iswblank(i);
    (void)iswblank_l(i, l);
    (void)iswcntrl(i);
    (void)iswcntrl_l(i, l);
    (void)iswctype(i, t);
    (void)iswctype_l(i, t, l);
    (void)iswdigit(i);
    (void)iswdigit_l(i, l);
    (void)iswgraph(i);
    (void)iswgraph_l(i, l);
    (void)iswlower(i);
    (void)iswlower_l(i, l);
    (void)iswprint(i);
    (void)iswprint_l(i, l);
    (void)iswpunct(i);
    (void)iswpunct_l(i, l);
    (void)iswspace(i);
    (void)iswspace_l(i, l);
    (void)iswupper(i);
    (void)iswupper_l(i, l);
    (void)iswxdigit(i);
    (void)iswxdigit_l(i, l);
    (void)towctrans(i, tr);
    (void)towctrans_l(i, tr, l);
    (void)towlower(i);
    (void)towlower_l(i, l);
    (void)towupper(i);
    (void)towupper_l(i, l);
    (void)wctrans((const char *)1234);
    (void)wctrans_l((const char *)1234, l);
    (void)wctype((const char *)1234);
    (void)wctype_l((const char *)1234, l);
}
Пример #8
0
void shorten_name(const char *name,
		char short_name[512], char shortest_name[512])
{
    wchar_t w_name[512];
    wchar_t w_short_name[512];
    wchar_t w_shortest_name[512];

    const wchar_t *cur_word, *wchar_ptr;
    wchar_t *cur_short_word, *cur_shortest_word;

    int unabbrev = 0;
    int i, len, new_len, capital;

    if (!name)
        return;

    mbsrtowcs_l(w_name, &name, ARRAY_SIZE(w_name), NULL, l);

    /* TODO: also skip anything in parenthesis from the short names */

    /* TODO: instead of calling wcscasecmp_l all the time it'd be more
     * effective to lower case w_name once and use plain wcscmp or mem
     * compare since the phrases we search for are all lower case already.
     * Might also want to take "collation" into account (wcsxfrm_l the
     * string once and use memcmp instead of wcscoll_l).
     */

    cur_word = w_name;
    cur_short_word = w_short_name;
    cur_shortest_word = w_shortest_name;
    while (1) {
        while (*cur_word && !iswalnum_l(*cur_word, l))
	    *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++;

	if (!*cur_word)
	    break;

        /* TODO: use a hash of some kind instead of iterating over arrays */

        /* Go through possible abbreviations from top to bottom */
        for (i = 0; i < ARRAY_SIZE(abbrevs); i += 2)
	    if (!wcsncasecmp_l(abbrevs[i], cur_word, wcslen(abbrevs[i]), l)) {
                len = wcslen(abbrevs[i]);

	        /* Check that we matched a full word */
                if (iswalnum_l(cur_word[len], l))
		    continue;

		capital = iswupper_l(*cur_word, l);
		cur_word += len;

		new_len = wcslen(abbrevs[i + 1]);
		memcpy(cur_short_word, abbrevs[i + 1],
		        new_len * sizeof(wchar_t));
		/*
		 * If original was capitalised then capitalise the abbreviation
		 * as well, if it was lower case.
		 */
		if (capital)
		    *cur_short_word = towupper_l(*cur_short_word, l);

		/* Make sure shortest_word doesn't end up being empty */
		if (!*cur_word && !unabbrev) {
		    memcpy(cur_shortest_word, cur_short_word,
		            new_len * sizeof(wchar_t));
		    cur_shortest_word += new_len;
		}

		cur_short_word += new_len;

		/*
		 * Avoid excess whitespace in short and shortest
		 * when a word is replaced with "".
		 * TODO: this may require more complicated logic to get
		 * the corner cases right.
		 */
		if (new_len == 0) {
		    if (cur_short_word > w_short_name &&
		            iswspace_l(cur_short_word[-1], l))
			cur_short_word --;
		    if (cur_shortest_word > w_shortest_name &&
		            iswspace_l(cur_shortest_word[-1], l))
			cur_shortest_word --;
	        }

                /*if (new_len != len)*/
		break;
	    }
        if (i < ARRAY_SIZE(abbrevs))
	    continue;

        /* Go through possible given names from top to bottom */
        for (i = 0; i < ARRAY_SIZE(given_names); i ++)
	    if (!wcsncasecmp_l(given_names[i], cur_word,
	            wcslen(given_names[i]), l)) {
                len = wcslen(given_names[i]);

	        /* Check that we matched a full word */
                if (iswalnum_l(cur_word[len], l))
		    continue;

		/*
		 * If this is the final part of the name, and it matches a
		 * given name then that's most likely somebody's surname which
		 * happens to also be a possibble given name.  In that case
		 * do not abbreviate or omit it.
		 */
		if (!cur_word[len])
		    continue;

		cur_word += len;

		*cur_short_word++ = given_names[i][0];
		*cur_short_word++ = L'.';

		/*
		 * Avoid excess whitespace in shortest when a word is
		 * replaced with "".
		 * TODO: this may require more complicated logic to get
		 * the corner cases right.
		 */
		if (cur_shortest_word > w_shortest_name &&
		        iswspace_l(cur_shortest_word[-1], l))
		    cur_shortest_word --;

		break;
	    }
        if (i < ARRAY_SIZE(given_names))
	    continue;

        /* Nothing matched, copy the current word as-is */
        while (iswalnum_l(*cur_word, l))
	    *cur_short_word ++ = *cur_shortest_word ++ = *cur_word ++;
	unabbrev += 1;
    }

    *cur_short_word = 0;
    *cur_shortest_word = 0;

    wchar_ptr = w_short_name;
    wcsrtombs_l(short_name, &wchar_ptr, 512, NULL, l);
    wchar_ptr = w_shortest_name;
    wcsrtombs_l(shortest_name, &wchar_ptr, 512, NULL, l);
}
Пример #9
0
 static wchar_t upper(wchar_t c,locale_t lc)
 {
     return towupper_l(c,lc);
 }