/* Compare the two different implementations using random data. */ static svn_error_t * utf_validate2(apr_pool_t *pool) { int i; seed_val(); /* We want enough iterations so that most runs get both valid and invalid strings. We also want enough iterations such that a deliberate error in one of the implementations will trigger a failure. By experiment the second requirement requires a much larger number of iterations that the first. */ for (i = 0; i < 100000; ++i) { unsigned int j; char str[64]; apr_size_t len; /* A random string; experiment shows that it's occasionally (less than 1%) valid but usually invalid. */ for (j = 0; j < sizeof(str) - 1; ++j) str[j] = (char)range_rand(0, 255); str[sizeof(str) - 1] = 0; len = strlen(str); if (svn_utf__last_valid(str, len) != svn_utf__last_valid2(str, len)) { /* Duplicate calls for easy debugging */ svn_utf__last_valid(str, len); svn_utf__last_valid2(str, len); return svn_error_createf (SVN_ERR_TEST_FAILED, NULL, "is_valid2 test %d failed", i); } } return SVN_NO_ERROR; }
/* Prepare LINE for display, pruning or extending it to an appropriate * display width, and stripping the EOL marker, if any. * This function assumes that the data in LINE is encoded in UTF-8. */ static const char * prepare_line_for_display(const char *line, apr_pool_t *pool) { svn_stringbuf_t *buf = svn_stringbuf_create(line, pool); size_t width; size_t line_width = LINE_DISPLAY_WIDTH; apr_pool_t *iterpool; /* Trim EOL. */ if (buf->len >= 2 && buf->data[buf->len - 2] == '\r' && buf->data[buf->len - 1] == '\n') svn_stringbuf_chop(buf, 2); else if (buf->len >= 1 && (buf->data[buf->len - 1] == '\n' || buf->data[buf->len - 1] == '\r')) svn_stringbuf_chop(buf, 1); /* Determine the on-screen width of the line. */ width = svn_utf_cstring_utf8_width(buf->data); if (width == -1) { /* Determining the width failed. Try to get rid of unprintable * characters in the line buffer. */ buf = svn_stringbuf_create(svn_xml_fuzzy_escape(buf->data, pool), pool); width = svn_utf_cstring_utf8_width(buf->data); if (width == -1) width = buf->len; /* fallback: buffer length */ } /* Trim further in case line is still too long, or add padding in case * it is too short. */ iterpool = svn_pool_create(pool); while (width > line_width) { const char *last_valid; svn_pool_clear(iterpool); svn_stringbuf_chop(buf, 1); /* Be careful not to invalidate the UTF-8 string by trimming * just part of a character. */ last_valid = svn_utf__last_valid(buf->data, buf->len); if (last_valid < buf->data + buf->len) svn_stringbuf_chop(buf, (buf->data + buf->len) - last_valid); width = svn_utf_cstring_utf8_width(buf->data); if (width == -1) width = buf->len; /* fallback: buffer length */ } svn_pool_destroy(iterpool); while (width == 0 || width < line_width) { svn_stringbuf_appendbyte(buf, ' '); width++; } SVN_ERR_ASSERT_NO_RETURN(width == line_width); return buf->data; }
/* Explicit tests of various valid/invalid sequences */ static svn_error_t * utf_validate(apr_pool_t *pool) { struct data { svn_boolean_t valid; char string[20]; } tests[] = { {TRUE, {'a', 'b', '\0'}}, {FALSE, {'a', 'b', '\x80', '\0'}}, {FALSE, {'a', 'b', '\xC0', '\0'}}, {FALSE, {'a', 'b', '\xC0', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xC5', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xC5', '\xC0', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE0', '\0'}}, {FALSE, {'a', 'b', '\xE0', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE0', '\xA0', '\0'}}, {FALSE, {'a', 'b', '\xE0', '\xA0', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xE0', '\xA0', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE0', '\x9F', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE0', '\xCF', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE5', '\0'}}, {FALSE, {'a', 'b', '\xE5', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE5', '\x81', '\0'}}, {FALSE, {'a', 'b', '\xE5', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xE5', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE5', '\xE1', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xE5', '\x81', '\xE1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xED', '\0'}}, {FALSE, {'a', 'b', '\xED', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xED', '\x81', '\0'}}, {FALSE, {'a', 'b', '\xED', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xED', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xED', '\xA0', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xED', '\x81', '\xC1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xEE', '\0'}}, {FALSE, {'a', 'b', '\xEE', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xEE', '\x81', '\0'}}, {FALSE, {'a', 'b', '\xEE', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xEE', '\x81', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xEE', '\xA0', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xEE', '\xC0', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xEE', '\x81', '\xC1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\0'}}, {FALSE, {'a', 'b', '\xF0', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x91', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x91', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x91', '\x81', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x91', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xF0', '\x91', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x81', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF0', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF2', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF2', '\x91', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF2', '\x91', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xF2', '\x91', '\x81', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xF2', '\x81', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF2', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF2', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF2', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x91', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x91', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x91', '\x81', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\xC1', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x91', '\xC1', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF5', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF5', '\x81', 'x', 'y', '\0'}}, {TRUE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y', 'a', 'b', '\xF2', '\x91', '\x81', '\x81', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y', 'a', 'b', '\xF2', '\x91', '\x81', '\xC1', 'x', 'y', '\0'}}, {FALSE, {'a', 'b', '\xF4', '\x81', '\x81', '\x81', 'x', 'y', 'a', 'b', '\xF2', '\x91', '\x81', 'x', 'y', '\0'}}, {-1}, }; int i = 0; while (tests[i].valid != -1) { const char *last = svn_utf__last_valid(tests[i].string, strlen(tests[i].string)); apr_size_t len = strlen(tests[i].string); if ((svn_utf__cstring_is_valid(tests[i].string) != tests[i].valid) || (svn_utf__is_valid(tests[i].string, len) != tests[i].valid)) return svn_error_createf (SVN_ERR_TEST_FAILED, NULL, "is_valid test %d failed", i); if (!svn_utf__is_valid(tests[i].string, last - tests[i].string) || (tests[i].valid && *last)) return svn_error_createf (SVN_ERR_TEST_FAILED, NULL, "last_valid test %d failed", i); ++i; } return SVN_NO_ERROR; }