virtual int32_t move32(int32_t delta, CharacterIterator::EOrigin origin){    
        switch(origin) {
        case kStart:
            pos = begin;
            if(delta > 0) {
                U16_FWD_N(text, pos, end, delta);
            }
            break;
        case kCurrent:
            if(delta > 0) {
                U16_FWD_N(text, pos, end, delta);
            } else {
                U16_BACK_N(text, begin, pos, -delta);
            }
            break;
        case kEnd:
            pos = end;
            if(delta < 0) {
                U16_BACK_N(text, begin, pos, -delta);
            }
            break;
        default:
            break;
        }

        return pos;
    };
Beispiel #2
0
int utf16_cp_to_cu(const UChar *ustring, int32_t ustring_len, long cp_offset, int32_t *cu_offset, UErrorCode *status)
{
    if (0 != cp_offset) {
        int32_t _cp_count = u_countChar32(ustring, ustring_len);
        if (cp_offset < 0) {
            if (cp_offset < -_cp_count) {
                *status = U_INDEX_OUTOFBOUNDS_ERROR;
                return FAILURE;
            }
            *cu_offset = ustring_len;
            U16_BACK_N(ustring, 0, *cu_offset, -cp_offset);
        } else {
            if (cp_offset >= _cp_count) {
                *status = U_INDEX_OUTOFBOUNDS_ERROR;
                return FAILURE;
            }
            U16_FWD_N(ustring, *cu_offset, ustring_len, cp_offset);
        }
    }

    return SUCCESS;
}
Beispiel #3
0
U_CFUNC int32_t
u_strFromPunycode(const UChar *src, int32_t srcLength,
                  UChar *dest, int32_t destCapacity,
                  UBool *caseFlags,
                  UErrorCode *pErrorCode) {
    int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t,
            destCPCount, firstSupplementaryIndex, cpLength;
    UChar b;

    /* argument checking */
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }

    if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    if(srcLength==-1) {
        srcLength=u_strlen(src);
    }

    /*
     * Handle the basic code points:
     * Let basicLength be the number of input code points
     * before the last delimiter, or 0 if there is none,
     * then copy the first basicLength code points to the output.
     *
     * The two following loops iterate backward.
     */
    for(j=srcLength; j>0;) {
        if(src[--j]==DELIMITER) {
            break;
        }
    }
    destLength=basicLength=destCPCount=j;
    U_ASSERT(destLength>=0);

    while(j>0) {
        b=src[--j];
        if(!IS_BASIC(b)) {
            *pErrorCode=U_INVALID_CHAR_FOUND;
            return 0;
        }

        if(j<destCapacity) {
            dest[j]=(UChar)b;

            if(caseFlags!=NULL) {
                caseFlags[j]=IS_BASIC_UPPERCASE(b);
            }
        }
    }

    /* Initialize the state: */
    n=INITIAL_N;
    i=0;
    bias=INITIAL_BIAS;
    firstSupplementaryIndex=1000000000;

    /*
     * Main decoding loop:
     * Start just after the last delimiter if any
     * basic code points were copied; start at the beginning otherwise.
     */
    for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) {
        /*
         * in is the index of the next character to be consumed, and
         * destCPCount is the number of code points in the output array.
         *
         * Decode a generalized variable-length integer into delta,
         * which gets added to i.  The overflow checking is easier
         * if we increase i as we go, then subtract off its starting
         * value at the end to obtain delta.
         */
        for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) {
            if(in>=srcLength) {
                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                return 0;
            }

            digit=basicToDigit[(uint8_t)src[in++]];
            if(digit<0) {
                *pErrorCode=U_INVALID_CHAR_FOUND;
                return 0;
            }
            if(digit>(0x7fffffff-i)/w) {
                /* integer overflow */
                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                return 0;
            }

            i+=digit*w;
            /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt
            t=k-bias;
            if(t<TMIN) {
                t=TMIN;
            } else if(t>TMAX) {
                t=TMAX;
            }
            */
            t=k-bias;
            if(t<TMIN) {
                t=TMIN;
            } else if(k>=(bias+TMAX)) {
                t=TMAX;
            }
            if(digit<t) {
                break;
            }

            if(w>0x7fffffff/(BASE-t)) {
                /* integer overflow */
                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                return 0;
            }
            w*=BASE-t;
        }

        /*
         * Modification from sample code:
         * Increments destCPCount here,
         * where needed instead of in for() loop tail.
         */
        ++destCPCount;
        bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0));

        /*
         * i was supposed to wrap around from (incremented) destCPCount to 0,
         * incrementing n each time, so we'll fix that now:
         */
        if(i/destCPCount>(0x7fffffff-n)) {
            /* integer overflow */
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
            return 0;
        }

        n+=i/destCPCount;
        i%=destCPCount;
        /* not needed for Punycode: */
        /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */

        if(n>0x10ffff || U_IS_SURROGATE(n)) {
            /* Unicode code point overflow */
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
            return 0;
        }

        /* Insert n at position i of the output: */
        cpLength=U16_LENGTH(n);
        if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) {
            int32_t codeUnitIndex;

            /*
             * Handle indexes when supplementary code points are present.
             *
             * In almost all cases, there will be only BMP code points before i
             * and even in the entire string.
             * This is handled with the same efficiency as with UTF-32.
             *
             * Only the rare cases with supplementary code points are handled
             * more slowly - but not too bad since this is an insertion anyway.
             */
            if(i<=firstSupplementaryIndex) {
                codeUnitIndex=i;
                if(cpLength>1) {
                    firstSupplementaryIndex=codeUnitIndex;
                } else {
                    ++firstSupplementaryIndex;
                }
            } else {
                codeUnitIndex=firstSupplementaryIndex;
                U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex);
            }

            /* use the UChar index codeUnitIndex instead of the code point index i */
            if(codeUnitIndex<destLength) {
                uprv_memmove(dest+codeUnitIndex+cpLength,
                             dest+codeUnitIndex,
                             (destLength-codeUnitIndex)*U_SIZEOF_UCHAR);
                if(caseFlags!=NULL) {
                    uprv_memmove(caseFlags+codeUnitIndex+cpLength,
                                 caseFlags+codeUnitIndex,
                                 destLength-codeUnitIndex);
                }
            }
            if(cpLength==1) {
                /* BMP, insert one code unit */
                dest[codeUnitIndex]=(UChar)n;
            } else {
                /* supplementary character, insert two code units */
                dest[codeUnitIndex]=U16_LEAD(n);
                dest[codeUnitIndex+1]=U16_TRAIL(n);
            }
            if(caseFlags!=NULL) {
                /* Case of last character determines uppercase flag: */
                caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]);
                if(cpLength==2) {
                    caseFlags[codeUnitIndex+1]=FALSE;
                }
            }
        }
        destLength+=cpLength;
        U_ASSERT(destLength>=0);
        ++i;
    }

    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
Beispiel #4
0
static void TestFwdBack(){ 
    static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000};
    static uint16_t fwd_unsafe[] ={1, 3, 5, 6,  8, 10, 11, 12};
    static uint16_t fwd_safe[]   ={1, 3, 5, 6, 7, 8, 10, 11, 12};
    static uint16_t back_unsafe[]={11, 9, 8, 7, 6, 5, 3, 1, 0};
    static uint16_t back_safe[]  ={11, 10, 8, 7, 6, 5, 3, 1, 0};

    static uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1};
    static uint16_t fwd_N_unsafe[] ={0, 1, 5, 10, 11};
    static uint16_t fwd_N_safe[]   ={0, 1, 5, 8, 10, 12, 12}; /*safe macro keeps it at the end of the string */
    static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3};
    static uint16_t back_N_safe[]  ={12, 11, 8, 5, 3, 0, 0};   

    uint16_t offunsafe=0, offsafe=0;
    uint16_t i=0;
    while(offunsafe < sizeof(input)/U_SIZEOF_UCHAR){
        UTF16_FWD_1_UNSAFE(input, offunsafe);
        if(offunsafe != fwd_unsafe[i]){
            log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
        }
        i++;
    }

    offunsafe=0, offsafe=0;
    i=0;
    while(offunsafe < sizeof(input)/U_SIZEOF_UCHAR){
        U16_FWD_1_UNSAFE(input, offunsafe);
        if(offunsafe != fwd_unsafe[i]){
            log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe);
        }
        i++;
    }

    i=0;
    while(offsafe < sizeof(input)/U_SIZEOF_UCHAR){
        UTF16_FWD_1_SAFE(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR);
        if(offsafe != fwd_safe[i]){
            log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
        }
        i++;
    }

    i=0;
    while(offsafe < sizeof(input)/U_SIZEOF_UCHAR){
        U16_FWD_1(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR);
        if(offsafe != fwd_safe[i]){
            log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe);
        }
        i++;
    }

    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    i=0;
    while(offunsafe > 0){
        UTF16_BACK_1_UNSAFE(input, offunsafe);
        if(offunsafe != back_unsafe[i]){
            log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
        }
        i++;
    }

    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    i=0;
    while(offunsafe > 0){
        U16_BACK_1_UNSAFE(input, offunsafe);
        if(offunsafe != back_unsafe[i]){
            log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe);
        }
        i++;
    }

    i=0;
    while(offsafe > 0){
        UTF16_BACK_1_SAFE(input,0,  offsafe);
        if(offsafe != back_safe[i]){
            log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
        }
        i++;
    }

    i=0;
    while(offsafe > 0){
        U16_BACK_1(input,0,  offsafe);
        if(offsafe != back_safe[i]){
            log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe);
        }
        i++;
    }

    offunsafe=0;
    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  /*didn't want it to fail(we assume 0<i<length)*/
        UTF16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != fwd_N_unsafe[i]){
            log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
        }
    }

    offunsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){  /*didn't want it to fail(we assume 0<i<length)*/
        U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != fwd_N_unsafe[i]){
            log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe);
        }
    }

    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF16_FWD_N_SAFE(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR, Nvalue[i]);
        if(offsafe != fwd_N_safe[i]){
            log_err("ERROR: Forward_N_safe offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
        }
    
    }

    offsafe=0;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        U16_FWD_N(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR, Nvalue[i]);
        if(offsafe != fwd_N_safe[i]){
            log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe);
        }
    
    }

    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
        UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != back_N_unsafe[i]){
            log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
        }
    }

    offunsafe=sizeof(input)/U_SIZEOF_UCHAR;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){
        U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]);
        if(offunsafe != back_N_unsafe[i]){
            log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe);
        }
    }

    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]);
        if(offsafe != back_N_safe[i]){
            log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
        }
    }

    offsafe=sizeof(input)/U_SIZEOF_UCHAR;
    for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){
        U16_BACK_N(input, 0, offsafe, Nvalue[i]);
        if(offsafe != back_N_safe[i]){
            log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe);
        }
    }
}
Beispiel #5
0
/* keep this in sync with utf8tst.c's TestNulTerminated() */
static void TestNulTerminated() {
    static const UChar input[]={
        /*  0 */  0x61,
        /*  1 */  0xd801, 0xdc01,
        /*  3 */  0xdc01,
        /*  4 */  0x62,
        /*  5 */  0xd801,
        /*  6 */  0x00
        /*  7 */
    };
    static const UChar32 result[]={
        0x61,
        0x10401,
        0xdc01,
        0x62,
        0xd801,
        0
    };

    UChar32 c, c2;
    int32_t i0, i=0, j, k, expectedIndex;
    int32_t cpIndex=0;
    do {
        i0=i;
        U16_NEXT(input, i, -1, c);
        if(c!=result[cpIndex]) {
            log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, result[cpIndex]);
        }
        j=i0;
        U16_FWD_1(input, j, -1);
        if(j!=i) {
            log_err("U16_FWD_1() moved to index %d but U16_NEXT() moved to %d\n", j, i);
        }
        ++cpIndex;
        /*
         * Move by this many code points from the start.
         * U16_FWD_N() stops at the end of the string, that is, at the NUL if necessary.
         */
        expectedIndex= (c==0) ? i-1 : i;
        k=0;
        U16_FWD_N(input, k, -1, cpIndex);
        if(k!=expectedIndex) {
            log_err("U16_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex);
        }
    } while(c!=0);

    i=0;
    do {
        j=i0=i;
        U16_NEXT(input, i, -1, c);
        do {
            U16_GET(input, 0, j, -1, c2);
            if(c2!=c) {
                log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j);
            }
            /* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */
            k=j+1;
            U16_SET_CP_LIMIT(input, 0, k, -1);
            if(k!=i) {
                log_err("U16_NEXT() moved to %d but U16_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k);
            }
        } while(++j<i);
    } while(c!=0);
}