Пример #1
0
/* This function is equivalent to strncasecmp() for multibyte
 * strings. */
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
#ifdef ENABLE_UTF8
    if (use_utf8) {
	char *s1_mb, *s2_mb;
	wchar_t ws1, ws2;

	if (s1 == s2)
	    return 0;

	assert(s1 != NULL && s2 != NULL);

	s1_mb = charalloc(MB_CUR_MAX);
	s2_mb = charalloc(MB_CUR_MAX);

	for (; *s1 != '\0' && *s2 != '\0' && n > 0; s1 +=
		move_mbright(s1, 0), s2 += move_mbright(s2, 0), n--) {
	    bool bad_s1_mb = FALSE, bad_s2_mb = FALSE;
	    int s1_mb_len, s2_mb_len;

	    s1_mb_len = parse_mbchar(s1, s1_mb, NULL);

	    if (mbtowc(&ws1, s1_mb, s1_mb_len) < 0) {
		mbtowc_reset();
		ws1 = (unsigned char)*s1_mb;
		bad_s1_mb = TRUE;
	    }

	    s2_mb_len = parse_mbchar(s2, s2_mb, NULL);

	    if (mbtowc(&ws2, s2_mb, s2_mb_len) < 0) {
		mbtowc_reset();
		ws2 = (unsigned char)*s2_mb;
		bad_s2_mb = TRUE;
	    }

	    if (bad_s1_mb != bad_s2_mb || towlower(ws1) !=
		towlower(ws2))
		break;
	}

	free(s1_mb);
	free(s2_mb);

	return (n > 0) ? towlower(ws1) - towlower(ws2) : 0;
    } else
#endif
	return strncasecmp(s1, s2, n);
}
Пример #2
0
/* This function is equivalent to wcwidth() for multibyte characters. */
int mbwidth(const char *c)
{
    assert(c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;
	int width;

	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
	    wc = bad_wchar;
	}

	width = wcwidth(wc);

	if (width == -1) {
	    wc = bad_wchar;
	    width = wcwidth(wc);
	}

	return width;
    } else
#endif
	return 1;
}
Пример #3
0
/* c is a multibyte non-control character.  We return that multibyte
 * character.  If crep is an invalid multibyte sequence, it will be
 * replaced with Unicode 0xFFFD (Replacement Character). */
char *mbrep(const char *c, char *crep, int *crep_len)
{
    assert(c != NULL && crep != NULL && crep_len != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;

	/* Reject invalid Unicode characters. */
	if (mbtowc(&wc, c, MB_CUR_MAX) < 0 || !is_valid_unicode(wc)) {
	    mbtowc_reset();
	    *crep_len = bad_mbchar_len;
	    strncpy(crep, bad_mbchar, *crep_len);
	} else {
	    *crep_len = wctomb(crep, wc);

	    if (*crep_len < 0) {
		wctomb_reset();
		*crep_len = 0;
	    }
	}
    } else {
#endif
	*crep_len = 1;
	*crep = *c;
#ifdef ENABLE_UTF8
    }
#endif

    return crep;
}
Пример #4
0
/* c is a multibyte control character.  It displays as ^@, ^?, or ^[ch],
 * where ch is (c + 64).  We return that multibyte character.  If crep
 * is an invalid multibyte sequence, it will be replaced with Unicode
 * 0xFFFD (Replacement Character). */
char *control_mbrep(const char *c, char *crep, int *crep_len)
{
    assert(c != NULL && crep != NULL && crep_len != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;

	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
	    *crep_len = bad_mbchar_len;
	    strncpy(crep, bad_mbchar, *crep_len);
	} else {
	    *crep_len = wctomb(crep, control_wrep(wc));

	    if (*crep_len < 0) {
		wctomb_reset();
		*crep_len = 0;
	    }
	}
    } else {
#endif
	*crep_len = 1;
	*crep = control_rep(*c);
#ifdef ENABLE_UTF8
    }
#endif

    return crep;
}
Пример #5
0
/* Assess how many bytes the given (multibyte) character occupies.  Return -1
 * if the byte sequence is invalid, and return the number of bytes minus 8
 * when it encodes an invalid codepoint.  Also, in the second parameter,
 * return the number of columns that the character occupies. */
int length_of_char(const char *c, int *width)
{
    assert(c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;
	int charlen = mbtowc(&wc, c, MB_CUR_MAX);

	/* If the sequence is invalid... */
	if (charlen < 0) {
	    mbtowc_reset();
	    return -1;
	}

	/* If the codepoint is invalid... */
	if (!is_valid_unicode(wc))
	    return charlen - 8;
	else {
	    *width = wcwidth(wc);
	    /* If the codepoint is unassigned, assume a width of one. */
	    if (*width < 0)
		*width = 1;
	    return charlen;
	}
    } else
#endif
	return 1;
}
Пример #6
0
/* This function is equivalent to strchr() for multibyte strings. */
char *mbstrchr(const char *s, const char *c)
{
    assert(s != NULL && c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	bool bad_s_mb = FALSE, bad_c_mb = FALSE;
	char *s_mb = charalloc(MB_CUR_MAX);
	const char *q = s;
	wchar_t ws, wc;
	int c_mb_len = mbtowc(&wc, c, MB_CUR_MAX);

	if (c_mb_len < 0) {
	    mbtowc_reset();
	    wc = (unsigned char)*c;
	    bad_c_mb = TRUE;
	}

	while (*s != '\0') {
	    int s_mb_len = parse_mbchar(s, s_mb, NULL);

	    if (mbtowc(&ws, s_mb, s_mb_len) < 0) {
		mbtowc_reset();
		ws = (unsigned char)*s;
		bad_s_mb = TRUE;
	    }

	    if (bad_s_mb == bad_c_mb && ws == wc)
		break;

	    s += s_mb_len;
	    q += s_mb_len;
	}

	free(s_mb);

	if (*s == '\0')
	    q = NULL;

	return (char *)q;
    } else
#endif
	return strchr(s, *c);
}
Пример #7
0
/* This function is equivalent to strncasecmp() for multibyte strings. */
int mbstrncasecmp(const char *s1, const char *s2, size_t n)
{
#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc1, wc2;

	while (*s1 != '\0' && *s2 != '\0' && n > 0) {
	    bool bad1 = FALSE, bad2 = FALSE;

	    if (mbtowc(&wc1, s1, MB_CUR_MAX) < 0) {
		mbtowc_reset();
		bad1 = TRUE;
	    }

	    if (mbtowc(&wc2, s2, MB_CUR_MAX) < 0) {
		mbtowc_reset();
		bad2 = TRUE;
	    }

	    if (bad1 || bad2) {
		if (*s1 != *s2)
		    return (unsigned char)*s1 - (unsigned char)*s2;

		if (bad1 != bad2)
		    return (bad1 ? 1 : -1);
	    } else {
		int difference = towlower(wc1) - towlower(wc2);

		if (difference != 0)
		    return difference;
	    }

	    s1 += move_mbright(s1, 0);
	    s2 += move_mbright(s2, 0);
	    n--;
	}

	return (n > 0) ? ((unsigned char)*s1 - (unsigned char)*s2) : 0;
    } else
#endif
	return strncasecmp(s1, s2, n);
}
Пример #8
0
/* This function is equivalent to isalnum() for multibyte characters. */
bool is_alnum_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;

	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
	    wc = bad_wchar;
	}

	return iswalnum(wc);
    } else
#endif
	return isalnum((unsigned char)*c);
}
Пример #9
0
/* This function is equivalent to ispunct() for multibyte characters. */
bool is_punct_mbchar(const char *c)
{
    assert(c != NULL);

#ifdef ENABLE_UTF8
    if (use_utf8) {
	wchar_t wc;

	if (mbtowc(&wc, c, MB_CUR_MAX) < 0) {
	    mbtowc_reset();
	    return 0;
	}

	return iswpunct(wc);
    } else
#endif
	return ispunct((unsigned char)*c);
}