char *utf8_escape_non_printable(const char *str) { char *p, *s; assert(str); p = s = malloc(strlen(str) * 4 + 1); if (!p) return NULL; while (*str) { int len; len = utf8_encoded_valid_unichar(str); if (len > 0) { if (utf8_is_printable(str, len)) { s = mempcpy(s, str, len); str += len; } else { while (len > 0) { *(s++) = '\\'; *(s++) = 'x'; *(s++) = hexchar((int) *str >> 4); *(s++) = hexchar((int) *str); str += 1; len --; } } } else { s = stpcpy(s, UTF8_REPLACEMENT_CHARACTER); str += 1; } }
static bool shall_print(const char *p, size_t l, OutputFlags flags) { assert(p); if (flags & OUTPUT_SHOW_ALL) return true; if (l >= PRINT_CHAR_THRESHOLD) return false; if (!utf8_is_printable(p, l)) return false; return true; }
static void test_utf8_is_printable(void) { assert_se(utf8_is_printable("ascii is valid\tunicode", 22)); assert_se(utf8_is_printable("\342\204\242", 3)); assert_se(!utf8_is_printable("\341\204", 2)); assert_se(utf8_is_printable("ąę", 4)); }