static void TestGetChar() { static const uint8_t input[]={ /* code unit,*/ 0x61, 0x7f, 0xe4, 0xba, 0x8c, 0xF0, 0x90, 0x90, 0x81, 0xc0, 0x65, 0x31, 0x9a, 0xc9 }; static const UChar32 result[]={ /*codepoint-unsafe, codepoint-safe(not strict) codepoint-safe(strict)*/ 0x61, 0x61, 0x61, 0x7f, 0x7f, 0x7f, 0x4e8c, 0x4e8c, 0x4e8c, 0x4e8c, 0x4e8c, 0x4e8c , 0x4e8c, 0x4e8c, 0x4e8c, 0x10401, 0x10401, 0x10401 , 0x10401, 0x10401, 0x10401 , 0x10401, 0x10401, 0x10401 , 0x10401, 0x10401, 0x10401, 0x25, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x65, 0x65, 0x65, 0x31, 0x31, 0x31, 0x31, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x240, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1 }; uint16_t i=0; UChar32 c; uint32_t offset=0; for(offset=0; offset<sizeof(input); offset++) { if (offset < sizeof(input) - 1) { UTF8_GET_CHAR_UNSAFE(input, offset, c); if(c != result[i]){ log_err("ERROR: UTF8_GET_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); } U8_GET_UNSAFE(input, offset, c); if(c != result[i]){ log_err("ERROR: U8_GET_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); } } U8_GET(input, 0, offset, sizeof(input), c); if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){ log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); } UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, FALSE); if(c != result[i+1]){ log_err("ERROR: UTF8_GET_CHAR_SAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); } UTF8_GET_CHAR_SAFE(input, 0, offset, sizeof(input), c, TRUE); if(c != result[i+2]){ log_err("ERROR: UTF8_GET_CHAR_SAFE(strict) failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); } i=(uint16_t)(i+3); } }
static void TestNextPrevChar(){ static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00}; static const UChar32 result[]={ /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/ 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000, 0x10401, 0x10401, 0x10401, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841410, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1050, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61, 0x80, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xc2, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x77e, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0xbe, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61, 0xc0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x10401, 0x10401, 0x10401, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF_ERROR_VALUE, UTF_ERROR_VALUE, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x0840, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061 }; static const int32_t movedOffset[]={ /*next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s*/ 1, 1, 1, 15, 15, 15, 5, 5, 5, 14, 14 , 14, 3, 3, 3, 9, 13, 13, 4, 4, 4, 9, 12, 12, 5, 5, 5, 9, 11, 11, 7, 7, 7, 10, 10, 10, 7, 7, 7, 9, 9, 9, 8, 9, 9, 7, 7, 7, 9, 9, 9, 7, 7, 7, 11, 10, 10, 5, 5, 5, 11, 11, 11, 5, 5, 5, 12, 12, 12, 1, 1, 1, 13, 13, 13, 1, 1, 1, 14, 14, 14, 1, 1, 1, 14, 15, 15, 1, 1, 1, 14, 16, 16, 0, 0, 0, }; UChar32 c=0x0000; uint32_t i=0; uint32_t offset=0; int32_t setOffset=0; for(offset=0; offset<sizeof(input); offset++){ if (offset < sizeof(input) - 2) { /* Can't have it go off the end of the array based on input */ setOffset=offset; UTF8_NEXT_CHAR_UNSAFE(input, setOffset, c); if(setOffset != movedOffset[i]){ log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i], setOffset); } if(c != result[i]){ log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); } setOffset=offset; U8_NEXT_UNSAFE(input, setOffset, c); if(setOffset != movedOffset[i]){ log_err("ERROR: U8_NEXT_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i], setOffset); } if(c != result[i]){ log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i], c); } } setOffset=offset; UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE); if(setOffset != movedOffset[i+1]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+1], setOffset); } if(c != result[i+1]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); } setOffset=offset; U8_NEXT(input, setOffset, sizeof(input), c); if(setOffset != movedOffset[i+1]){ log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+1], setOffset); } if(UTF_IS_ERROR(result[i+1]) ? c >= 0 : c != result[i+1]){ log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+1], c); } setOffset=offset; UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE); if(setOffset != movedOffset[i+1]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+2], setOffset); } if(c != result[i+2]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); } i=i+6; } i=0; for(offset=sizeof(input); offset > 0; --offset){ setOffset=offset; UTF8_PREV_CHAR_UNSAFE(input, setOffset, c); if(setOffset != movedOffset[i+3]){ log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+3], setOffset); } if(c != result[i+3]){ log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, result[i+3], c); } setOffset=offset; UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE); if(setOffset != movedOffset[i+4]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+4], setOffset); } if(c != result[i+4]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c); } setOffset=offset; U8_PREV(input, 0, setOffset, c); if(setOffset != movedOffset[i+4]){ log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+4], setOffset); } if(UTF_IS_ERROR(result[i+4]) ? c >= 0 : c != result[i+4]){ log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+4], c); } setOffset=offset; UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE); if(setOffset != movedOffset[i+5]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+5], setOffset); } if(c != result[i+5]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c); } i=i+6; } { /* test non-characters */ static const uint8_t nonChars[]={ 0xef, 0xb7, 0x90, /* U+fdd0 */ 0xef, 0xbf, 0xbf, /* U+feff */ 0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */ 0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */ 0xf4, 0x8f, 0xbf, 0xbe /* U+10fffe */ }; UChar32 ch; int32_t idx; for(idx=0; idx<(int32_t)sizeof(nonChars);) { U8_NEXT(nonChars, idx, sizeof(nonChars), ch); if(!U_IS_UNICODE_NONCHAR(ch)) { log_err("U8_NEXT(before %d) failed to read a non-character\n", idx); } } for(idx=(int32_t)sizeof(nonChars); idx>0;) { U8_PREV(nonChars, 0, idx, ch); if(!U_IS_UNICODE_NONCHAR(ch)) { log_err("U8_PREV(at %d) failed to read a non-character\n", idx); } } } }
static void TestNextPrevChar() { static const uint8_t input[]={0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, 0xfd, 0xbe, 0xc2, 0x61, 0x81, 0x90, 0x90, 0xf0, 0x00}; static const UChar32 result[]={ /* next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s */ 0x0061, 0x0061, 0x0061, 0x0000, 0x0000, 0x0000, 0x10401, 0x10401, 0x10401, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841410, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1050, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x2841, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61, 0x80, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xc2, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x77e, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0xbe, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xfd, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xa1, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x00, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x61, 0x61, 0x61, 0xc0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x81, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x10401, 0x10401, 0x10401, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF_ERROR_VALUE, UTF_ERROR_VALUE, 0x90, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x410, UTF8_ERROR_VALUE_2, UTF8_ERROR_VALUE_2, 0x0840, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0xf0, UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_1, 0x0000, 0x0000, 0x0000, 0x0061, 0x0061, 0x0061 }; static const int32_t movedOffset[]={ /* next_unsafe next_safe_ns next_safe_s prev_unsafe prev_safe_ns prev_safe_s */ 1, 1, 1, 15, 15, 15, 5, 5, 5, 14, 14 , 14, 3, 3, 3, 9, 13, 13, 4, 4, 4, 9, 12, 12, 5, 5, 5, 9, 11, 11, 7, 7, 7, 10, 10, 10, 7, 7, 7, 9, 9, 9, 8, 9, 9, 7, 7, 7, 9, 9, 9, 7, 7, 7, 11, 10, 10, 5, 5, 5, 11, 11, 11, 5, 5, 5, 12, 12, 12, 1, 1, 1, 13, 13, 13, 1, 1, 1, 14, 14, 14, 1, 1, 1, 14, 15, 15, 1, 1, 1, 14, 16, 16, 0, 0, 0, }; /* TODO: remove unused columns for next_unsafe & prev_unsafe, and adjust the test code */ UChar32 c, expected; uint32_t i=0; uint32_t offset=0; int32_t setOffset=0; for(offset=0; offset<sizeof(input); offset++){ setOffset=offset; UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, FALSE); if(setOffset != movedOffset[i+1]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+1], setOffset); } expected=result[i+1]; if(c != expected){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); } setOffset=offset; U8_NEXT(input, setOffset, sizeof(input), c); if(setOffset != movedOffset[i+1]){ log_err("ERROR: U8_NEXT failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+1], setOffset); } if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; } if(c != expected){ log_err("ERROR: U8_NEXT failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); } setOffset=offset; U8_NEXT_OR_FFFD(input, setOffset, sizeof(input), c); if(setOffset != movedOffset[i+1]){ log_err("ERROR: U8_NEXT_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+1], setOffset); } if(expected<0) { expected=0xfffd; } if(c != expected){ log_err("ERROR: U8_NEXT_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); } setOffset=offset; UTF8_NEXT_CHAR_SAFE(input, setOffset, sizeof(input), c, TRUE); if(setOffset != movedOffset[i+1]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+2], setOffset); } if(c != result[i+2]){ log_err("ERROR: UTF8_NEXT_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+2], c); } i=i+6; } i=0; for(offset=sizeof(input); offset > 0; --offset){ setOffset=offset; UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, FALSE); if(setOffset != movedOffset[i+4]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+4], setOffset); } expected=result[i+4]; if(c != expected){ log_err("ERROR: UTF8_PREV_CHAR_SAFE failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); } setOffset=offset; U8_PREV(input, 0, setOffset, c); if(setOffset != movedOffset[i+4]){ log_err("ERROR: U8_PREV failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+4], setOffset); } if(UTF_IS_ERROR(expected)) { expected=U_SENTINEL; } if(c != expected){ log_err("ERROR: U8_PREV failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); } setOffset=offset; U8_PREV_OR_FFFD(input, 0, setOffset, c); if(setOffset != movedOffset[i+4]){ log_err("ERROR: U8_PREV_OR_FFFD failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+4], setOffset); } if(expected<0) { expected=0xfffd; } if(c != expected){ log_err("ERROR: U8_PREV_OR_FFFD failed for input=%ld. Expected:%lx Got:%lx\n", offset, expected, c); } setOffset=offset; UTF8_PREV_CHAR_SAFE(input, 0, setOffset, c, TRUE); if(setOffset != movedOffset[i+5]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed to move the offset correctly at %d\n ExpectedOffset:%d Got %d\n", offset, movedOffset[i+5], setOffset); } if(c != result[i+5]){ log_err("ERROR: UTF8_PREV_CHAR_SAFE(strict) failed for input=%ld. Expected:%lx Got:%lx\n", offset, result[i+5], c); } i=i+6; } }