Example #1
0
static void test_utf8_encoded_valid_unichar(void) {
        assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
        assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
        assert_se(utf8_encoded_valid_unichar("a") == 1);
        assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
        assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
}
Example #2
0
char *utf8_escape_non_printable(const char *str) {
    char *p, *s;

    assert(str);

    p = s = malloc(strlen(str) * 4 + 1);
    if (!p)
        return NULL;

    while (*str) {
        int len;

        len = utf8_encoded_valid_unichar(str);
        if (len > 0) {
            if (utf8_is_printable(str, len)) {
                s = mempcpy(s, str, len);
                str += len;
            } else {
                while (len > 0) {
                    *(s++) = '\\';
                    *(s++) = 'x';
                    *(s++) = hexchar((int) *str >> 4);
                    *(s++) = hexchar((int) *str);

                    str += 1;
                    len --;
                }
            }
        } else {
            s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
            str += 1;
        }
    }
Example #3
0
char *utf8_escape_invalid(const char *str) {
    char *p, *s;

    assert(str);

    p = s = malloc(strlen(str) * 4 + 1);
    if (!p)
        return NULL;

    while (*str) {
        int len;

        len = utf8_encoded_valid_unichar(str);
        if (len > 0) {
            s = mempcpy(s, str, len);
            str += len;
        } else {
            s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER);
            str += 1;
        }
    }

    *s = '\0';

    return p;
}
Example #4
0
bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
    const char *p;

    assert(str);

    for (p = str; length;) {
        int encoded_len, val;

        encoded_len = utf8_encoded_valid_unichar(p);
        if (encoded_len < 0 ||
                (size_t) encoded_len > length)
            return false;

        val = utf8_encoded_to_unichar(p);
        if (val < 0 ||
                unichar_is_control(val) ||
                (!newline && val == '\n'))
            return false;

        length -= encoded_len;
        p += encoded_len;
    }

    return true;
}
Example #5
0
/* replace everything but whitelisted plain ascii and valid utf8 */
int replace_untrusted_chars(char *str)
{
    size_t i = 0;
    int replaced = 0;

    while (str[i] != '\0') {
        int len;

        /* valid printable ascii char */
        if ((str[i] >= '0' && str[i] <= '9') ||
                (str[i] >= 'A' && str[i] <= 'Z') ||
                (str[i] >= 'a' && str[i] <= 'z') ||
                strchr(" #$%+-./:=?@_,", str[i])) {
            i++;
            continue;
        }
        /* valid utf8 is accepted */
        len = utf8_encoded_valid_unichar(&str[i]);
        if (len > 1) {
            i += len;
            continue;
        }

        /* everything else is garbage */
        str[i] = '_';
        i++;
        replaced++;
    }

    return replaced;
}
Example #6
0
int encode_devnode_name(const char *str, char *str_enc, size_t len) {
        size_t i, j;

        if (str == NULL || str_enc == NULL)
                return -EINVAL;

        for (i = 0, j = 0; str[i] != '\0'; i++) {
                int seqlen;

                seqlen = utf8_encoded_valid_unichar(&str[i]);
                if (seqlen > 1) {
                        if (len-j < (size_t)seqlen)
                                goto err;
                        memcpy(&str_enc[j], &str[i], seqlen);
                        j += seqlen;
                        i += (seqlen-1);
                } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) {
                        if (len-j < 4)
                                goto err;
                        sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
                        j += 4;
                } else {
                        if (len-j < 1)
                                goto err;
                        str_enc[j] = str[i];
                        j++;
                }
        }
        if (len-j < 1)
                goto err;
        str_enc[j] = '\0';
        return 0;
err:
        return -EINVAL;
}
Example #7
0
/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
int replace_chars(char *str, const char *white)
{
	size_t i = 0;
	int replaced = 0;

	while (str[i] != '\0') {
		int len;

		/* accept whitelist */
		if (white != NULL && strchr(white, str[i]) != NULL) {
			i++;
			continue;
		}

		/* accept plain ascii char */
		if ((str[i] >= '0' && str[i] <= '9') ||
		    (str[i] >= 'A' && str[i] <= 'Z') ||
		    (str[i] >= 'a' && str[i] <= 'z')) {
			i++;
			continue;
		}

		/* accept hex encoding */
		if (str[i] == '\\' && str[i+1] == 'x') {
			i += 2;
			continue;
		}

		/* accept valid utf8 */
		len = utf8_encoded_valid_unichar(&str[i]);
		if (len > 1) {
			i += len;
			continue;
		}

		/* if space is allowed, replace whitespace with ordinary space */
		if (isspace(str[i]) && strchr(white, ' ') != NULL) {
			str[i] = ' ';
			i++;
			replaced++;
			continue;
		}

		/* everything else is replaced with '_' */
		str[i] = '_';
		i++;
		replaced++;
	}

	return replaced;
}
Example #8
0
const char *utf8_is_valid(const char *str) {
    const uint8_t *p;

    assert(str);

    for (p = (const uint8_t*) str; *p; ) {
        int len;

        len = utf8_encoded_valid_unichar((const char *)p);
        if (len < 0)
            return NULL;

        p += len;
    }

    return str;
}
Example #9
0
/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
size_t util_replace_chars(char *str, const char *white) {
        size_t i = 0, replaced = 0;

        assert(str);

        while (str[i] != '\0') {
                int len;

                if (whitelisted_char_for_devnode(str[i], white)) {
                        i++;
                        continue;
                }

                /* accept hex encoding */
                if (str[i] == '\\' && str[i+1] == 'x') {
                        i += 2;
                        continue;
                }

                /* accept valid utf8 */
                len = utf8_encoded_valid_unichar(str + i, (size_t) -1);
                if (len > 1) {
                        i += len;
                        continue;
                }

                /* if space is allowed, replace whitespace with ordinary space */
                if (isspace(str[i]) && white && strchr(white, ' ')) {
                        str[i] = ' ';
                        i++;
                        replaced++;
                        continue;
                }

                /* everything else is replaced with '_' */
                str[i] = '_';
                i++;
                replaced++;
        }
        return replaced;
}
Example #10
0
static int json_parse_string(const char **p, char **ret) {
        _cleanup_free_ char *s = NULL;
        size_t n = 0, allocated = 0;
        const char *c;

        assert(p);
        assert(*p);
        assert(ret);

        c = *p;

        if (*c != '"')
                return -EINVAL;

        c++;

        for (;;) {
                int len;

                /* Check for EOF */
                if (*c == 0)
                        return -EINVAL;

                /* Check for control characters 0x00..0x1f */
                if (*c > 0 && *c < ' ')
                        return -EINVAL;

                /* Check for control character 0x7f */
                if (*c == 0x7f)
                        return -EINVAL;

                if (*c == '"') {
                        if (!s) {
                                s = strdup("");
                                if (!s)
                                        return -ENOMEM;
                        } else
                                s[n] = 0;

                        *p = c + 1;

                        *ret = s;
                        s = NULL;
                        return JSON_STRING;
                }

                if (*c == '\\') {
                        char ch = 0;
                        c++;

                        if (*c == 0)
                                return -EINVAL;

                        if (IN_SET(*c, '"', '\\', '/'))
                                ch = *c;
                        else if (*c == 'b')
                                ch = '\b';
                        else if (*c == 'f')
                                ch = '\f';
                        else if (*c == 'n')
                                ch = '\n';
                        else if (*c == 'r')
                                ch = '\r';
                        else if (*c == 't')
                                ch = '\t';
                        else if (*c == 'u') {
                                uint16_t x;
                                int r;

                                r = unhex_ucs2(c + 1, &x);
                                if (r < 0)
                                        return r;

                                c += 5;

                                if (!GREEDY_REALLOC(s, allocated, n + 4))
                                        return -ENOMEM;

                                if (!utf16_is_surrogate(x))
                                        n += utf8_encode_unichar(s + n, x);
                                else if (utf16_is_trailing_surrogate(x))
                                        return -EINVAL;
                                else {
                                        uint16_t y;

                                        if (c[0] != '\\' || c[1] != 'u')
                                                return -EINVAL;

                                        r = unhex_ucs2(c + 2, &y);
                                        if (r < 0)
                                                return r;

                                        c += 6;

                                        if (!utf16_is_trailing_surrogate(y))
                                                return -EINVAL;

                                        n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
                                }

                                continue;
                        } else
                                return -EINVAL;

                        if (!GREEDY_REALLOC(s, allocated, n + 2))
                                return -ENOMEM;

                        s[n++] = ch;
                        c ++;
                        continue;
                }

                len = utf8_encoded_valid_unichar(c);
                if (len < 0)
                        return len;

                if (!GREEDY_REALLOC(s, allocated, n + len + 1))
                        return -ENOMEM;

                memcpy(s + n, c, len);
                n += len;
                c += len;
        }
}