static void test_utf8_encoded_valid_unichar(void) { assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3); assert_se(utf8_encoded_valid_unichar("\302\256") == 2); assert_se(utf8_encoded_valid_unichar("a") == 1); assert_se(utf8_encoded_valid_unichar("\341\204") < 0); assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0); }
char *utf8_escape_non_printable(const char *str) { char *p, *s; assert(str); p = s = malloc(strlen(str) * 4 + 1); if (!p) return NULL; while (*str) { int len; len = utf8_encoded_valid_unichar(str); if (len > 0) { if (utf8_is_printable(str, len)) { s = mempcpy(s, str, len); str += len; } else { while (len > 0) { *(s++) = '\\'; *(s++) = 'x'; *(s++) = hexchar((int) *str >> 4); *(s++) = hexchar((int) *str); str += 1; len --; } } } else { s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER); str += 1; } }
char *utf8_escape_invalid(const char *str) { char *p, *s; assert(str); p = s = malloc(strlen(str) * 4 + 1); if (!p) return NULL; while (*str) { int len; len = utf8_encoded_valid_unichar(str); if (len > 0) { s = mempcpy(s, str, len); str += len; } else { s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER); str += 1; } } *s = '\0'; return p; }
bool utf8_is_printable_newline(const char* str, size_t length, bool newline) { const char *p; assert(str); for (p = str; length;) { int encoded_len, val; encoded_len = utf8_encoded_valid_unichar(p); if (encoded_len < 0 || (size_t) encoded_len > length) return false; val = utf8_encoded_to_unichar(p); if (val < 0 || unichar_is_control(val) || (!newline && val == '\n')) return false; length -= encoded_len; p += encoded_len; } return true; }
/* replace everything but whitelisted plain ascii and valid utf8 */ int replace_untrusted_chars(char *str) { size_t i = 0; int replaced = 0; while (str[i] != '\0') { int len; /* valid printable ascii char */ if ((str[i] >= '0' && str[i] <= '9') || (str[i] >= 'A' && str[i] <= 'Z') || (str[i] >= 'a' && str[i] <= 'z') || strchr(" #$%+-./:=?@_,", str[i])) { i++; continue; } /* valid utf8 is accepted */ len = utf8_encoded_valid_unichar(&str[i]); if (len > 1) { i += len; continue; } /* everything else is garbage */ str[i] = '_'; i++; replaced++; } return replaced; }
int encode_devnode_name(const char *str, char *str_enc, size_t len) { size_t i, j; if (str == NULL || str_enc == NULL) return -EINVAL; for (i = 0, j = 0; str[i] != '\0'; i++) { int seqlen; seqlen = utf8_encoded_valid_unichar(&str[i]); if (seqlen > 1) { if (len-j < (size_t)seqlen) goto err; memcpy(&str_enc[j], &str[i], seqlen); j += seqlen; i += (seqlen-1); } else if (str[i] == '\\' || !whitelisted_char_for_devnode(str[i], NULL)) { if (len-j < 4) goto err; sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]); j += 4; } else { if (len-j < 1) goto err; str_enc[j] = str[i]; j++; } } if (len-j < 1) goto err; str_enc[j] = '\0'; return 0; err: return -EINVAL; }
/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */ int replace_chars(char *str, const char *white) { size_t i = 0; int replaced = 0; while (str[i] != '\0') { int len; /* accept whitelist */ if (white != NULL && strchr(white, str[i]) != NULL) { i++; continue; } /* accept plain ascii char */ if ((str[i] >= '0' && str[i] <= '9') || (str[i] >= 'A' && str[i] <= 'Z') || (str[i] >= 'a' && str[i] <= 'z')) { i++; continue; } /* accept hex encoding */ if (str[i] == '\\' && str[i+1] == 'x') { i += 2; continue; } /* accept valid utf8 */ len = utf8_encoded_valid_unichar(&str[i]); if (len > 1) { i += len; continue; } /* if space is allowed, replace whitespace with ordinary space */ if (isspace(str[i]) && strchr(white, ' ') != NULL) { str[i] = ' '; i++; replaced++; continue; } /* everything else is replaced with '_' */ str[i] = '_'; i++; replaced++; } return replaced; }
const char *utf8_is_valid(const char *str) { const uint8_t *p; assert(str); for (p = (const uint8_t*) str; *p; ) { int len; len = utf8_encoded_valid_unichar((const char *)p); if (len < 0) return NULL; p += len; } return str; }
/* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */ size_t util_replace_chars(char *str, const char *white) { size_t i = 0, replaced = 0; assert(str); while (str[i] != '\0') { int len; if (whitelisted_char_for_devnode(str[i], white)) { i++; continue; } /* accept hex encoding */ if (str[i] == '\\' && str[i+1] == 'x') { i += 2; continue; } /* accept valid utf8 */ len = utf8_encoded_valid_unichar(str + i, (size_t) -1); if (len > 1) { i += len; continue; } /* if space is allowed, replace whitespace with ordinary space */ if (isspace(str[i]) && white && strchr(white, ' ')) { str[i] = ' '; i++; replaced++; continue; } /* everything else is replaced with '_' */ str[i] = '_'; i++; replaced++; } return replaced; }
static int json_parse_string(const char **p, char **ret) { _cleanup_free_ char *s = NULL; size_t n = 0, allocated = 0; const char *c; assert(p); assert(*p); assert(ret); c = *p; if (*c != '"') return -EINVAL; c++; for (;;) { int len; /* Check for EOF */ if (*c == 0) return -EINVAL; /* Check for control characters 0x00..0x1f */ if (*c > 0 && *c < ' ') return -EINVAL; /* Check for control character 0x7f */ if (*c == 0x7f) return -EINVAL; if (*c == '"') { if (!s) { s = strdup(""); if (!s) return -ENOMEM; } else s[n] = 0; *p = c + 1; *ret = s; s = NULL; return JSON_STRING; } if (*c == '\\') { char ch = 0; c++; if (*c == 0) return -EINVAL; if (IN_SET(*c, '"', '\\', '/')) ch = *c; else if (*c == 'b') ch = '\b'; else if (*c == 'f') ch = '\f'; else if (*c == 'n') ch = '\n'; else if (*c == 'r') ch = '\r'; else if (*c == 't') ch = '\t'; else if (*c == 'u') { uint16_t x; int r; r = unhex_ucs2(c + 1, &x); if (r < 0) return r; c += 5; if (!GREEDY_REALLOC(s, allocated, n + 4)) return -ENOMEM; if (!utf16_is_surrogate(x)) n += utf8_encode_unichar(s + n, x); else if (utf16_is_trailing_surrogate(x)) return -EINVAL; else { uint16_t y; if (c[0] != '\\' || c[1] != 'u') return -EINVAL; r = unhex_ucs2(c + 2, &y); if (r < 0) return r; c += 6; if (!utf16_is_trailing_surrogate(y)) return -EINVAL; n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y)); } continue; } else return -EINVAL; if (!GREEDY_REALLOC(s, allocated, n + 2)) return -ENOMEM; s[n++] = ch; c ++; continue; } len = utf8_encoded_valid_unichar(c); if (len < 0) return len; if (!GREEDY_REALLOC(s, allocated, n + len + 1)) return -ENOMEM; memcpy(s + n, c, len); n += len; c += len; } }