Beispiel #1
0
int
utf8_charcasecmp_range (const char *string1, const char *string2, int range)
{
    wint_t wchar1, wchar2;

    if (!string1 || !string2)
        return (string1) ? 1 : ((string2) ? -1 : 0);

    wchar1 = utf8_wide_char (string1);
    if ((wchar1 >= (wint_t)'A') && (wchar1 < (wint_t)('A' + range)))
        wchar1 += ('a' - 'A');

    wchar2 = utf8_wide_char (string2);
    if ((wchar2 >= (wint_t)'A') && (wchar2 < (wint_t)('A' + range)))
        wchar2 += ('a' - 'A');

    return (wchar1 < wchar2) ? -1 : ((wchar1 == wchar2) ? 0 : 1);
}
Beispiel #2
0
int
utf8_charcasecmp (const char *string1, const char *string2)
{
    wint_t wchar1, wchar2;

    if (!string1 || !string2)
        return (string1) ? 1 : ((string2) ? -1 : 0);

    wchar1 = utf8_wide_char (string1);
    if ((wchar1 >= 'A') && (wchar1 <= 'Z'))
        wchar1 += ('a' - 'A');

    wchar2 = utf8_wide_char (string2);
    if ((wchar2 >= 'A') && (wchar2 <= 'Z'))
        wchar2 += ('a' - 'A');

    return (wchar1 < wchar2) ? -1 : ((wchar1 == wchar2) ? 0 : 1);
}
Beispiel #3
0
TEST(CoreUtf8, Convert)
{
    char result[5];

    /* get UTF-8 char as integer */
    BYTES_EQUAL(0, utf8_char_int (NULL));
    BYTES_EQUAL(0, utf8_char_int (""));
    BYTES_EQUAL(65, utf8_char_int ("ABC"));
    BYTES_EQUAL(235, utf8_char_int ("ë"));
    BYTES_EQUAL(0x20ac, utf8_char_int ("€"));
    BYTES_EQUAL(0x2ee9, utf8_char_int (cjk_yellow));
    BYTES_EQUAL(0x24b62, utf8_char_int (han_char));

    BYTES_EQUAL(0x0, utf8_char_int ("\xc0\x80"));   /* invalid */
    BYTES_EQUAL(0x7f, utf8_char_int ("\xc1\xbf"));  /* invalid */
    BYTES_EQUAL(0x80, utf8_char_int ("\xc2\x80"));
    BYTES_EQUAL(0x7ff, utf8_char_int ("\xdf\xbf"));

    BYTES_EQUAL(0x0, utf8_char_int ("\xe0\x80\x80"));     /* invalid */
    BYTES_EQUAL(0x7ff, utf8_char_int ("\xe0\x9f\xbf"));   /* invalid */
    LONGS_EQUAL(0xd800, utf8_char_int ("\xed\xa0\x80"));  /* invalid */
    LONGS_EQUAL(0xdfff, utf8_char_int ("\xed\xbf\xbf"));  /* invalid */
    BYTES_EQUAL(0x800, utf8_char_int ("\xe0\xa0\x80"));
    BYTES_EQUAL(0xd7ff, utf8_char_int ("\xed\x9f\xbf"));
    BYTES_EQUAL(0xe000, utf8_char_int ("\xe7\x80\x80"));
    BYTES_EQUAL(0xffff, utf8_char_int ("\xef\xbf\xbf"));

    BYTES_EQUAL(0x0, utf8_char_int ("\xf0\x80\x80\x80"));     /* invalid */
    BYTES_EQUAL(0xffff, utf8_char_int ("\xf0\x8f\xbf\xbf"));  /* invalid */
    BYTES_EQUAL(0x10000, utf8_char_int ("\xf0\x90\x80\x80"));
    BYTES_EQUAL(0x1fffff, utf8_char_int ("\xf7\xbf\xbf\xbf"));

    /* convert unicode char to a string */
    utf8_int_string (0, NULL);
    utf8_int_string (0, result);
    STRCMP_EQUAL("", result);
    utf8_int_string (235, result);
    STRCMP_EQUAL("ë", result);
    utf8_int_string (0x20ac, result);
    STRCMP_EQUAL("€", result);
    utf8_int_string (0x2ee9, result);
    STRCMP_EQUAL(cjk_yellow, result);
    utf8_int_string (0x24b62, result);
    STRCMP_EQUAL(han_char, result);

    /* get wide char */
    BYTES_EQUAL(WEOF, utf8_wide_char (NULL));
    BYTES_EQUAL(WEOF, utf8_wide_char (""));
    BYTES_EQUAL(65, utf8_wide_char ("A"));
    BYTES_EQUAL(0xc3ab, utf8_wide_char ("ë"));
    BYTES_EQUAL(0xe282ac, utf8_wide_char ("€"));
    BYTES_EQUAL(0xe2bba9, utf8_wide_char (cjk_yellow));
    BYTES_EQUAL(0xf0a4ada2, utf8_wide_char (han_char));
}