コード例 #1
0
ファイル: bocu1tst.c プロジェクト: 00zhengfu00/third_party
/**
 * BOCU-1 encoder function.
 *
 * @param pPrev pointer to the integer that holds
 *        the "previous code point" state;
 *        the initial value should be 0 which
 *        encodeBocu1 will set to the actual BOCU-1 initial state value
 * @param c the code point to encode
 * @return the packed 1/2/3/4-byte encoding, see packDiff(),
 *         or 0 if an error occurs
 *
 * @see packDiff
 */
U_CFUNC int32_t
encodeBocu1(int32_t *pPrev, int32_t c) {
    int32_t prev;

    if(pPrev==NULL || c<0 || c>0x10ffff) {
        /* illegal argument */
        return 0;
    }

    prev=*pPrev;
    if(prev==0) {
        /* lenient handling of initial value 0 */
        prev=*pPrev=BOCU1_ASCII_PREV;
    }

    if(c<=0x20) {
        /*
         * ISO C0 control & space:
         * Encode directly for MIME compatibility,
         * and reset state except for space, to not disrupt compression.
         */
        if(c!=0x20) {
            *pPrev=BOCU1_ASCII_PREV;
        }
        return 0x01000000|c;
    }

    /*
     * all other Unicode code points c==U+0021..U+10ffff
     * are encoded with the difference c-prev
     *
     * a new prev is computed from c,
     * placed in the middle of a 0x80-block (for most small scripts) or
     * in the middle of the Unihan and Hangul blocks
     * to statistically minimize the following difference
     */
    *pPrev=bocu1Prev(c);
    return packDiff(c-prev);
}
コード例 #2
0
ファイル: bocu1tst.c プロジェクト: 00zhengfu00/third_party
/**
 * Encode one difference value -0x10ffff..+0x10ffff in 1..4 bytes,
 * preserving lexical order.
 * Also checks for roundtripping of the difference encoding.
 * Test function.
 *
 * @param diff difference value to test, -0x10ffff..0x10ffff
 * @param p pointer to output byte array
 * @return p advanced by number of bytes output
 *
 * @see unpackDiff
 */
static uint8_t *
writeDiff(int32_t diff, uint8_t *p) {
    /* generate the difference as a packed value and serialize it */
    int32_t packed, initialPrev;

    packed=packDiff(diff);

    /*
     * bogus initial "prev" to work around
     * code point range check in decodeBocu1()
     */
    if(diff<=0) {
        initialPrev=0x10ffff;
    } else {
        initialPrev=-1;
    }

    if(diff!=unpackDiff(initialPrev, packed)) {
        log_err("error: unpackDiff(packDiff(diff=%ld)=0x%08lx)=%ld!=diff\n",
                diff, packed, unpackDiff(initialPrev, packed));
    }
    return p+writePacked(packed, p);
}
コード例 #3
0
ファイル: ucnvbocu.cpp プロジェクト: DavidCai1993/node
static void U_CALLCONV
_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                             UErrorCode *pErrorCode) {
    UConverter *cnv;
    const UChar *source, *sourceLimit;
    uint8_t *target;
    int32_t targetCapacity;
    int32_t *offsets;

    int32_t prev, c, diff;

    int32_t sourceIndex, nextSourceIndex;

    /* set up the local pointers */
    cnv=pArgs->converter;
    source=pArgs->source;
    sourceLimit=pArgs->sourceLimit;
    target=(uint8_t *)pArgs->target;
    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
    offsets=pArgs->offsets;

    /* get the converter state from UConverter */
    c=cnv->fromUChar32;
    prev=(int32_t)cnv->fromUnicodeStatus;
    if(prev==0) {
        prev=BOCU1_ASCII_PREV;
    }

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= c==0 ? 0 : -1;
    nextSourceIndex=0;

    /* conversion loop */
    if(c!=0 && targetCapacity>0) {
        goto getTrail;
    }

fastSingle:
    /* fast loop for single-byte differences */
    /* use only one loop counter variable, targetCapacity, not also source */
    diff=(int32_t)(sourceLimit-source);
    if(targetCapacity>diff) {
        targetCapacity=diff;
    }
    while(targetCapacity>0 && (c=*source)<0x3000) {
        if(c<=0x20) {
            if(c!=0x20) {
                prev=BOCU1_ASCII_PREV;
            }
            *target++=(uint8_t)c;
            *offsets++=nextSourceIndex++;
            ++source;
            --targetCapacity;
        } else {
            diff=c-prev;
            if(DIFF_IS_SINGLE(diff)) {
                prev=BOCU1_SIMPLE_PREV(c);
                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
                *offsets++=nextSourceIndex++;
                ++source;
                --targetCapacity;
            } else {
                break;
            }
        }
    }
    /* restore real values */
    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */

    /* regular loop for all cases */
    while(source<sourceLimit) {
        if(targetCapacity>0) {
            c=*source++;
            ++nextSourceIndex;

            if(c<=0x20) {
                /*
                 * ISO C0 control & space:
                 * Encode directly for MIME compatibility,
                 * and reset state except for space, to not disrupt compression.
                 */
                if(c!=0x20) {
                    prev=BOCU1_ASCII_PREV;
                }
                *target++=(uint8_t)c;
                *offsets++=sourceIndex;
                --targetCapacity;

                sourceIndex=nextSourceIndex;
                continue;
            }

            if(U16_IS_LEAD(c)) {
getTrail:
                if(source<sourceLimit) {
                    /* test the following code unit */
                    UChar trail=*source;
                    if(U16_IS_TRAIL(trail)) {
                        ++source;
                        ++nextSourceIndex;
                        c=U16_GET_SUPPLEMENTARY(c, trail);
                    }
                } else {
                    /* no more input */
                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
                    break;
                }
            }

            /*
             * all other Unicode code points c==U+0021..U+10ffff
             * are encoded with the difference c-prev
             *
             * a new prev is computed from c,
             * placed in the middle of a 0x80-block (for most small scripts) or
             * in the middle of the Unihan and Hangul blocks
             * to statistically minimize the following difference
             */
            diff=c-prev;
            prev=BOCU1_PREV(c);
            if(DIFF_IS_SINGLE(diff)) {
                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
                *offsets++=sourceIndex;
                --targetCapacity;
                sourceIndex=nextSourceIndex;
                if(c<0x3000) {
                    goto fastSingle;
                }
            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
                /* optimize 2-byte case */
                int32_t m;

                if(diff>=0) {
                    diff-=BOCU1_REACH_POS_1+1;
                    m=diff%BOCU1_TRAIL_COUNT;
                    diff/=BOCU1_TRAIL_COUNT;
                    diff+=BOCU1_START_POS_2;
                } else {
                    diff-=BOCU1_REACH_NEG_1;
                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
                    diff+=BOCU1_START_NEG_2;
                }
                *target++=(uint8_t)diff;
                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
                *offsets++=sourceIndex;
                *offsets++=sourceIndex;
                targetCapacity-=2;
                sourceIndex=nextSourceIndex;
            } else {
                int32_t length; /* will be 2..4 */

                diff=packDiff(diff);
                length=BOCU1_LENGTH_FROM_PACKED(diff);

                /* write the output character bytes from diff and length */
                /* from the first if in the loop we know that targetCapacity>0 */
                if(length<=targetCapacity) {
                    switch(length) {
                        /* each branch falls through to the next one */
                    case 4:
                        *target++=(uint8_t)(diff>>24);
                        *offsets++=sourceIndex;
                        U_FALLTHROUGH;
                    case 3:
                        *target++=(uint8_t)(diff>>16);
                        *offsets++=sourceIndex;
                        U_FALLTHROUGH;
                    case 2:
                        *target++=(uint8_t)(diff>>8);
                        *offsets++=sourceIndex;
                    /* case 1: handled above */
                        *target++=(uint8_t)diff;
                        *offsets++=sourceIndex;
                        U_FALLTHROUGH;
                    default:
                        /* will never occur */
                        break;
                    }
                    targetCapacity-=length;
                    sourceIndex=nextSourceIndex;
                } else {
                    uint8_t *charErrorBuffer;

                    /*
                     * We actually do this backwards here:
                     * In order to save an intermediate variable, we output
                     * first to the overflow buffer what does not fit into the
                     * regular target.
                     */
                    /* we know that 1<=targetCapacity<length<=4 */
                    length-=targetCapacity;
                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
                    switch(length) {
                        /* each branch falls through to the next one */
                    case 3:
                        *charErrorBuffer++=(uint8_t)(diff>>16);
                        U_FALLTHROUGH;
                    case 2:
                        *charErrorBuffer++=(uint8_t)(diff>>8);
                        U_FALLTHROUGH;
                    case 1:
                        *charErrorBuffer=(uint8_t)diff;
                        U_FALLTHROUGH;
                    default:
                        /* will never occur */
                        break;
                    }
                    cnv->charErrorBufferLength=(int8_t)length;

                    /* now output what fits into the regular target */
                    diff>>=8*length; /* length was reduced by targetCapacity */
                    switch(targetCapacity) {
                        /* each branch falls through to the next one */
                    case 3:
                        *target++=(uint8_t)(diff>>16);
                        *offsets++=sourceIndex;
                        U_FALLTHROUGH;
                    case 2:
                        *target++=(uint8_t)(diff>>8);
                        *offsets++=sourceIndex;
                        U_FALLTHROUGH;
                    case 1:
                        *target++=(uint8_t)diff;
                        *offsets++=sourceIndex;
                        U_FALLTHROUGH;
                    default:
                        /* will never occur */
                        break;
                    }

                    /* target overflow */
                    targetCapacity=0;
                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                    break;
                }
            }
        } else {