Пример #1
0
static void TestNextPrevCharUnsafe() {
    /*
     * Use a (mostly) well-formed UTF-8 string and test at code point boundaries.
     * The behavior of _UNSAFE macros for ill-formed strings is undefined.
     */
    static const uint8_t input[]={
        0x61,
        0xf0, 0x90, 0x90, 0x81,
        0xc0, 0x80,  /* non-shortest form */
        0xe2, 0x82, 0xac,
        0xc2, 0xa1,
        0xf4, 0x8f, 0xbf, 0xbf,
        0x00
    };
    static const UChar32 codePoints[]={
        0x61,
        0x10401,
        0,
        0x20ac,
        0xa1,
        0x10ffff,
        0
    };

    UChar32 c;
    int32_t i;
    uint32_t offset;
    for(i=0, offset=0; offset<sizeof(input); ++i) {
        UTF8_NEXT_CHAR_UNSAFE(input, offset, c);
        if(c != codePoints[i]){
            log_err("ERROR: UTF8_NEXT_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
                    offset, codePoints[i], c);
        }
    }
    for(i=0, offset=0; offset<sizeof(input); ++i) {
        U8_NEXT_UNSAFE(input, offset, c);
        if(c != codePoints[i]){
            log_err("ERROR: U8_NEXT_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
                    offset, codePoints[i], c);
        }
    }

    for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
         UTF8_PREV_CHAR_UNSAFE(input, offset, c);
         if(c != codePoints[i]){
             log_err("ERROR: UTF8_PREV_CHAR_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
                     offset, codePoints[i], c);
         }
    }
    for(i=LENGTHOF(codePoints)-1, offset=sizeof(input); offset > 0; --i){
         U8_PREV_UNSAFE(input, offset, c);
         if(c != codePoints[i]){
             log_err("ERROR: U8_PREV_UNSAFE failed for offset=%ld. Expected:%lx Got:%lx\n",
                     offset, codePoints[i], c);
         }
    }
}
Пример #2
0
static void
TestSurrogates() {
    static const uint8_t b[]={
        0xc3, 0x9f,             /*  00DF */
        0xed, 0x9f, 0xbf,       /*  D7FF */
        0xed, 0xa0, 0x81,       /*  D801 */
        0xed, 0xbf, 0xbe,       /*  DFFE */
        0xee, 0x80, 0x80,       /*  E000 */
        0xf0, 0x97, 0xbf, 0xbe  /* 17FFE */
    };
    static const UChar32 cp[]={
        0xdf, 0xd7ff, 0xd801, 0xdffe, 0xe000, 0x17ffe
    };

    UChar32 cu, cs, cl;
    int32_t i, j, k, iu, is, il, length;

    k=0; /* index into cp[] */
    length=LENGTHOF(b);
    for(i=0; i<length;) {
        j=i;
        U8_NEXT_UNSAFE(b, j, cu);
        iu=j;

        j=i;
        U8_NEXT(b, j, length, cs);
        is=j;

        j=i;
        L8_NEXT(b, j, length, cl);
        il=j;

        if(cu!=cp[k]) {
            log_err("U8_NEXT_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
        }

        /* U8_NEXT() returns <0 for surrogate code points */
        if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
            log_err("U8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
        }

        /* L8_NEXT() returns surrogate code points like U8_NEXT_UNSAFE() */
        if(cl!=cu) {
            log_err("L8_NEXT(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
        }

        if(is!=iu || il!=iu) {
            log_err("U8_NEXT(b[%ld]) or L8_NEXT(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
        }

        ++k;    /* next code point */
        i=iu;   /* advance by one UTF-8 sequence */
    }

    while(i>0) {
        --k; /* previous code point */

        j=i;
        U8_PREV_UNSAFE(b, j, cu);
        iu=j;

        j=i;
        U8_PREV(b, 0, j, cs);
        is=j;

        j=i;
        L8_PREV(b, 0, j, cl);
        il=j;

        if(cu!=cp[k]) {
            log_err("U8_PREV_UNSAFE(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cu, (long)cp[k]);
        }

        /* U8_PREV() returns <0 for surrogate code points */
        if(U_IS_SURROGATE(cu) ? cs>=0 : cs!=cu) {
            log_err("U8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cs, (long)cu);
        }

        /* L8_PREV() returns surrogate code points like U8_PREV_UNSAFE() */
        if(cl!=cu) {
            log_err("L8_PREV(b[%ld])=U+%04lX != U+%04lX\n", (long)i, (long)cl, (long)cu);
        }

        if(is!=iu || il !=iu) {
            log_err("U8_PREV(b[%ld]) or L8_PREV(b[%ld]) did not advance the index correctly\n", (long)i, (long)i);
        }

        i=iu;   /* go back by one UTF-8 sequence */
    }
}