Example #1
0
void test_unichar(void)
{
	static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1";
	static const char collate_in[] = "\xc3\xbc \xc2\xb3";
	static const char collate_exp[] = "U\xcc\x88 3";
	buffer_t *collate_out;
	unichar_t chr, chr2;
	string_t *str = t_str_new(16);

	test_begin("unichars");
	for (chr = 0; chr <= 0x10ffff; chr++) {
		str_truncate(str, 0);
		uni_ucs4_to_utf8_c(chr, str);
		test_assert(uni_utf8_str_is_valid(str_c(str)));
		test_assert(uni_utf8_get_char(str_c(str), &chr2) > 0);
		test_assert(chr2 == chr);
	}

	collate_out = buffer_create_dynamic(default_pool, 32);
	uni_utf8_to_decomposed_titlecase(collate_in, sizeof(collate_in),
					 collate_out);
	test_assert(!strcmp(collate_out->data, collate_exp));
	buffer_free(&collate_out);

	test_assert(!uni_utf8_str_is_valid(overlong_utf8));
	test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0);
	test_end();

	test_unichar_uni_utf8_strlen();
	test_unichar_uni_utf8_partial_strlen_n();
}
Example #2
0
void test_unichar(void)
{
	static const char overlong_utf8[] = "\xf8\x80\x95\x81\xa1";
	static const char collate_in[] = "\xc3\xbc \xc2\xb3";
	static const char collate_exp[] = "U\xcc\x88 3";
	buffer_t *collate_out;
	unichar_t chr, chr2;
	string_t *str = t_str_new(16);

	test_begin("unichars encode/decode");
	for (chr = 0; chr <= 0x10ffff; chr++) {
		/* The bottom 6 bits should be irrelevant to code coverage,
		   only test 000000, 111111, and something in between. */
		if ((chr & 63) == 1)
			chr += rand() % 62; /* After 0, somewhere between 1 and 62 */
		else if ((chr & 63) > 0 && (chr & 63) < 63)
			chr |= 63; /* After random, straight to 63 */

		str_truncate(str, 0);
		uni_ucs4_to_utf8_c(chr, str);
		test_assert(uni_utf8_str_is_valid(str_c(str)));
		test_assert(uni_utf8_get_char(str_c(str), &chr2) == (int)uni_utf8_char_bytes(*str_data(str)));
		test_assert(chr2 == chr);

		if ((chr & 0x63) == 0) {
			unsigned int utf8len = uni_utf8_char_bytes(*str_c(str));

			/* virtually truncate the byte string */
			while (--utf8len > 0)
				test_assert(uni_utf8_get_char_n(str_c(str), utf8len, &chr2) == 0);

			utf8len = uni_utf8_char_bytes(*str_c(str));

			/* actually truncate the byte stream */
			while (--utf8len > 0) {
				str_truncate(str, utf8len);
				test_assert(!uni_utf8_str_is_valid(str_c(str)));
				test_assert(uni_utf8_get_char(str_c(str), &chr2) == 0);
			}
		}
	}
	test_end();

	test_begin("unichar collation");
	collate_out = buffer_create_dynamic(default_pool, 32);
	uni_utf8_to_decomposed_titlecase(collate_in, sizeof(collate_in),
					 collate_out);
	test_assert(!strcmp(collate_out->data, collate_exp));
	buffer_free(&collate_out);

	test_assert(!uni_utf8_str_is_valid(overlong_utf8));
	test_assert(uni_utf8_get_char(overlong_utf8, &chr2) < 0);
	test_end();

	test_unichar_uni_utf8_strlen();
	test_unichar_uni_utf8_partial_strlen_n();
}
Example #3
0
void str_sanitize_append(string_t *dest, const char *src, size_t max_len)
{
	unsigned int len;
	unichar_t chr;
	size_t i;
	int ret;

	for (i = 0; i < max_len && src[i] != '\0'; ) {
		len = uni_utf8_char_bytes(src[i]);
		ret = uni_utf8_get_char(src+i, &chr);
		if (ret <= 0) {
			/* invalid UTF-8 */
			str_append_c(dest, '?');
			if (ret == 0) {
				/* input ended too early */
				return;
			}
			i++;
			continue;
		}
		if ((unsigned char)src[i] < 32)
			str_append_c(dest, '?');
		else
			str_append_c(dest, src[i]);
		i += len;
	}

	if (src[i] != '\0') {
		str_truncate(dest, str_len(dest) <= 3 ? 0 : str_len(dest)-3);
		str_append(dest, "...");
	}
}
static unichar_t get_ending_utf8_char(const char *str, size_t *end_pos)
{
	unichar_t c;

	while (!UTF8_IS_START_SEQ(str[*end_pos])) {
		i_assert(*end_pos > 0);
		*end_pos -= 1;
	}
	if (uni_utf8_get_char(str + *end_pos, &c) <= 0)
		i_unreached();
	return c;
}
Example #5
0
static size_t str_sanitize_skip_start(const char *src, size_t max_len)
{
	unsigned int len;
	unichar_t chr;
	size_t i;

	for (i = 0; i < max_len; ) {
		len = uni_utf8_char_bytes(src[i]);
		if (uni_utf8_get_char(src+i, &chr) <= 0)
			break;
		if ((unsigned char)src[i] < 32)
			break;
		i += len;
	}
	return i;
}
Example #6
0
static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
{
    unsigned int len;
    unichar_t chr;
    size_t i;

    for (i = 0; src[i] != '\0'; ) {
        len = uni_utf8_char_bytes(src[i]);
        if (i + len > max_bytes || uni_utf8_get_char(src+i, &chr) <= 0)
            break;
        if ((unsigned char)src[i] < 32)
            break;
        i += len;
    }
    i_assert(i <= max_bytes);
    return i;
}
Example #7
0
void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
{
    unsigned int len, initial_pos = str_len(dest);
    unichar_t chr;
    size_t i;
    int ret;

    for (i = 0; src[i] != '\0'; ) {
        len = uni_utf8_char_bytes(src[i]);
        if (i + len > max_bytes)
            break;
        ret = uni_utf8_get_char(src+i, &chr);
        if (ret <= 0) {
            /* invalid UTF-8 */
            str_append_c(dest, '?');
            if (ret == 0) {
                /* input ended too early */
                return;
            }
            i++;
            continue;
        }
        if ((unsigned char)src[i] < 32)
            str_append_c(dest, '?');
        else
            str_append_n(dest, src+i, len);
        i += len;
    }

    if (src[i] != '\0') {
        if (max_bytes < 3)
            str_truncate(dest, initial_pos);
        else {
            while (str_len(dest) - initial_pos > max_bytes-3)
                str_sanitize_truncate_char(dest, initial_pos);
        }
        str_append(dest, "...");
    }
}