static void TestNextPrevCharUnsafe() { /* * Use a (mostly) well-formed UTF-8 string and test at code point boundaries. * The behavior of _UNSAFE macros for ill-formed strings is undefined. */ static const uint8_t input[]={ 0x61, 0xf0, 0x90, 0x90, 0x81, 0xc0, 0x80, /* non-shortest form */ 0xe2, 0x82, 0xac, 0xc2, 0xa1, 0xf4, 0x8f, 0xbf, 0xbf, 0x00 }; static const UChar32 codePoints[]={ 0x61, 0x10401, 0, 0x20ac, 0xa1, 0x10ffff, 0 }; UChar32 c; int32_t i; uint32_t offset; for(i=0, offset=0; offset<sizeof(input); ++i) { UTF8_NEXT_CHAR_UNSAFE(input, offset, c); if(c != codePoints[i]){ log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, codePoints[i], c); } } for(i=0, offset=0; offset<sizeof(input); ++i) { U8_NEXT_UNSAFE(input, offset, c); if(c != codePoints[i]){ log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, codePoints[i], c); } } for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ UTF8_PREV_CHAR_UNSAFE(input, offset, c); if(c != codePoints[i]){ log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, codePoints[i], c); } } for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){ U8_PREV_UNSAFE(input, offset, c); if(c != codePoints[i]){ log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n", offset, codePoints[i], c); } } }
static void TestSurrogates() { static const uint8_t b[]={ 0xc3, 0x9f, /* 00DF */ 0xed, 0x9f, 0xbf, /* D7FF */ 0xed, 0xa0, 0x81, /* D801 */ 0xed, 0xbf, 0xbe, /* DFFE */ 0xee, 0x80, 0x80, /* E000 */ 0xf0, 0x97, 0xbf, 0xbe /* 17FFE */ }; static const UChar32 cp[]={ 0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe }; UChar32 cu, cs, cl; int32_t i, j, k, iu, is, il, length; k=0; /* index into cp[] */ length=LENGTHOF(b); for(i=0; i<length;) { j=i; U8_NEXT_UNSAFE(b, j, cu); iu=j; j=i; U8_NEXT(b, j, length, cs); is=j; j=i; L8_NEXT(b, j, length, cl); il=j; if(cu!=cp[k]) { log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]); } /* U8_NEXT() returns <0 for surrogate code points */ if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) { log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu); } /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */ if(cl!=cu) { log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu); } if(is!=iu || il!=iu) { log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i); } ++k; /* next code point */ i=iu; /* advance by one UTF-8 sequence */ } while(i>0) { --k; /* previous code point */ j=i; U8_PREV_UNSAFE(b, j, cu); iu=j; j=i; U8_PREV(b, 0, j, cs); is=j; j=i; L8_PREV(b, 0, j, cl); il=j; if(cu!=cp[k]) { log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]); } /* U8_PREV() returns <0 for surrogate code points */ if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) { log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu); } /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */ if(cl!=cu) { log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu); } if(is!=iu || il !=iu) { log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i); } i=iu; /* go back by one UTF-8 sequence */ } }