/** * Write a packed BOCU-1 byte sequence into a byte array, * without overflow check. * Test function. * * @param packed packed BOCU-1 byte sequence, see packDiff() * @param p pointer to byte array * @return number of bytes * * @see packDiff */ static int32_t writePacked(int32_t packed, uint8_t *p) { int32_t count=BOCU1_LENGTH_FROM_PACKED(packed); switch(count) { case 4: *p++=(uint8_t)(packed>>24); case 3: *p++=(uint8_t)(packed>>16); case 2: *p++=(uint8_t)(packed>>8); case 1: *p++=(uint8_t)packed; default: break; } return count; }
/** * Unpack a packed BOCU-1 non-C0/space byte sequence and get * the difference to initialPrev. * Used only for round-trip testing of the difference encoding and decoding. * Test function. * * @param initialPrev bogus "previous code point" value to make sure that * the resulting code point is in the range 0..0x10ffff * @param packed packed BOCU-1 byte sequence * @return the difference to initialPrev * * @see packDiff * @see writeDiff */ static int32_t unpackDiff(int32_t initialPrev, int32_t packed) { Bocu1Rx rx={ 0, 0, 0 }; int32_t count; rx.prev=initialPrev; count=BOCU1_LENGTH_FROM_PACKED(packed); switch(count) { case 4: decodeBocu1(&rx, (uint8_t)(packed>>24)); case 3: decodeBocu1(&rx, (uint8_t)(packed>>16)); case 2: decodeBocu1(&rx, (uint8_t)(packed>>8)); case 1: /* subtract initial prev */ return decodeBocu1(&rx, (uint8_t)packed)-initialPrev; default: return -0x7fffffff; } }
static void U_CALLCONV _Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, UErrorCode *pErrorCode) { UConverter *cnv; const UChar *source, *sourceLimit; uint8_t *target; int32_t targetCapacity; int32_t *offsets; int32_t prev, c, diff; int32_t sourceIndex, nextSourceIndex; /* set up the local pointers */ cnv=pArgs->converter; source=pArgs->source; sourceLimit=pArgs->sourceLimit; target=(uint8_t *)pArgs->target; targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); offsets=pArgs->offsets; /* get the converter state from UConverter */ c=cnv->fromUChar32; prev=(int32_t)cnv->fromUnicodeStatus; if(prev==0) { prev=BOCU1_ASCII_PREV; } /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex= c==0 ? 0 : -1; nextSourceIndex=0; /* conversion loop */ if(c!=0 && targetCapacity>0) { goto getTrail; } fastSingle: /* fast loop for single-byte differences */ /* use only one loop counter variable, targetCapacity, not also source */ diff=(int32_t)(sourceLimit-source); if(targetCapacity>diff) { targetCapacity=diff; } while(targetCapacity>0 && (c=*source)<0x3000) { if(c<=0x20) { if(c!=0x20) { prev=BOCU1_ASCII_PREV; } *target++=(uint8_t)c; *offsets++=nextSourceIndex++; ++source; --targetCapacity; } else { diff=c-prev; if(DIFF_IS_SINGLE(diff)) { prev=BOCU1_SIMPLE_PREV(c); *target++=(uint8_t)PACK_SINGLE_DIFF(diff); *offsets++=nextSourceIndex++; ++source; --targetCapacity; } else { break; } } } /* restore real values */ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ /* regular loop for all cases */ while(source<sourceLimit) { if(targetCapacity>0) { c=*source++; ++nextSourceIndex; if(c<=0x20) { /* * ISO C0 control & space: * Encode directly for MIME compatibility, * and reset state except for space, to not disrupt compression. */ if(c!=0x20) { prev=BOCU1_ASCII_PREV; } *target++=(uint8_t)c; *offsets++=sourceIndex; --targetCapacity; sourceIndex=nextSourceIndex; continue; } if(U16_IS_LEAD(c)) { getTrail: if(source<sourceLimit) { /* test the following code unit */ UChar trail=*source; if(U16_IS_TRAIL(trail)) { ++source; ++nextSourceIndex; c=U16_GET_SUPPLEMENTARY(c, trail); } } else { /* no more input */ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ break; } } /* * all other Unicode code points c==U+0021..U+10ffff * are encoded with the difference c-prev * * a new prev is computed from c, * placed in the middle of a 0x80-block (for most small scripts) or * in the middle of the Unihan and Hangul blocks * to statistically minimize the following difference */ diff=c-prev; prev=BOCU1_PREV(c); if(DIFF_IS_SINGLE(diff)) { *target++=(uint8_t)PACK_SINGLE_DIFF(diff); *offsets++=sourceIndex; --targetCapacity; sourceIndex=nextSourceIndex; if(c<0x3000) { goto fastSingle; } } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { /* optimize 2-byte case */ int32_t m; if(diff>=0) { diff-=BOCU1_REACH_POS_1+1; m=diff%BOCU1_TRAIL_COUNT; diff/=BOCU1_TRAIL_COUNT; diff+=BOCU1_START_POS_2; } else { diff-=BOCU1_REACH_NEG_1; NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); diff+=BOCU1_START_NEG_2; } *target++=(uint8_t)diff; *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); *offsets++=sourceIndex; *offsets++=sourceIndex; targetCapacity-=2; sourceIndex=nextSourceIndex; } else { int32_t length; /* will be 2..4 */ diff=packDiff(diff); length=BOCU1_LENGTH_FROM_PACKED(diff); /* write the output character bytes from diff and length */ /* from the first if in the loop we know that targetCapacity>0 */ if(length<=targetCapacity) { switch(length) { /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(diff>>24); *offsets++=sourceIndex; U_FALLTHROUGH; case 3: *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; U_FALLTHROUGH; case 2: *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; /* case 1: handled above */ *target++=(uint8_t)diff; *offsets++=sourceIndex; U_FALLTHROUGH; default: /* will never occur */ break; } targetCapacity-=length; sourceIndex=nextSourceIndex; } else { uint8_t *charErrorBuffer; /* * We actually do this backwards here: * In order to save an intermediate variable, we output * first to the overflow buffer what does not fit into the * regular target. */ /* we know that 1<=targetCapacity<length<=4 */ length-=targetCapacity; charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; switch(length) { /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(diff>>16); U_FALLTHROUGH; case 2: *charErrorBuffer++=(uint8_t)(diff>>8); U_FALLTHROUGH; case 1: *charErrorBuffer=(uint8_t)diff; U_FALLTHROUGH; default: /* will never occur */ break; } cnv->charErrorBufferLength=(int8_t)length; /* now output what fits into the regular target */ diff>>=8*length; /* length was reduced by targetCapacity */ switch(targetCapacity) { /* each branch falls through to the next one */ case 3: *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; U_FALLTHROUGH; case 2: *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; U_FALLTHROUGH; case 1: *target++=(uint8_t)diff; *offsets++=sourceIndex; U_FALLTHROUGH; default: /* will never occur */ break; } /* target overflow */ targetCapacity=0; *pErrorCode=U_BUFFER_OVERFLOW_ERROR; break; } } } else {