U_CAPI UCollationResult U_EXPORT2 ucol_strcollUTF8( const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status) { U_ALIGN_CODE(16); UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); if (UTRACE_LEVEL(UTRACE_VERBOSE)) { UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength); UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength); } if (U_FAILURE(*status)) { /* do nothing */ UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); return UCOL_EQUAL; } UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8( source, sourceLength, target, targetLength, *status); UTRACE_EXIT_VALUE_STATUS(returnVal, *status); return returnVal; }
U_CAPI UCollationResult U_EXPORT2 ucol_strcoll( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) { U_ALIGN_CODE(16); UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); if (UTRACE_LEVEL(UTRACE_VERBOSE)) { UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); } UErrorCode status = U_ZERO_ERROR; UCollationResult returnVal = Collator::fromUCollator(coll)-> compare(source, sourceLength, target, targetLength, status); UTRACE_EXIT_VALUE_STATUS(returnVal, status); return returnVal; }
static void _Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, UErrorCode *pErrorCode) { UConverter *cnv; const UChar *source, *sourceLimit; uint8_t *target; int32_t targetCapacity; int32_t *offsets; int32_t prev, c, diff; int32_t sourceIndex, nextSourceIndex; U_ALIGN_CODE(16) /* set up the local pointers */ cnv=pArgs->converter; source=pArgs->source; sourceLimit=pArgs->sourceLimit; target=(uint8_t *)pArgs->target; targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); offsets=pArgs->offsets; /* get the converter state from UConverter */ c=cnv->fromUChar32; prev=(int32_t)cnv->fromUnicodeStatus; if(prev==0) { prev=BOCU1_ASCII_PREV; } /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex= c==0 ? 0 : -1; nextSourceIndex=0; /* conversion loop */ if(c!=0 && targetCapacity>0) { goto getTrail; } fastSingle: /* fast loop for single-byte differences */ /* use only one loop counter variable, targetCapacity, not also source */ diff=(int32_t)(sourceLimit-source); if(targetCapacity>diff) { targetCapacity=diff; } while(targetCapacity>0 && (c=*source)<0x3000) { if(c<=0x20) { if(c!=0x20) { prev=BOCU1_ASCII_PREV; } *target++=(uint8_t)c; *offsets++=nextSourceIndex++; ++source; --targetCapacity; } else { diff=c-prev; if(DIFF_IS_SINGLE(diff)) { prev=BOCU1_SIMPLE_PREV(c); *target++=(uint8_t)PACK_SINGLE_DIFF(diff); *offsets++=nextSourceIndex++; ++source; --targetCapacity; } else { break; } } } /* restore real values */ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ /* regular loop for all cases */ while(source<sourceLimit) { if(targetCapacity>0) { c=*source++; ++nextSourceIndex; if(c<=0x20) { /* * ISO C0 control & space: * Encode directly for MIME compatibility, * and reset state except for space, to not disrupt compression. */ if(c!=0x20) { prev=BOCU1_ASCII_PREV; } *target++=(uint8_t)c; *offsets++=sourceIndex; --targetCapacity; sourceIndex=nextSourceIndex; continue; } if(U16_IS_LEAD(c)) { getTrail: if(source<sourceLimit) { /* test the following code unit */ UChar trail=*source; if(U16_IS_TRAIL(trail)) { ++source; ++nextSourceIndex; c=U16_GET_SUPPLEMENTARY(c, trail); } } else { /* no more input */ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ break; } } /* * all other Unicode code points c==U+0021..U+10ffff * are encoded with the difference c-prev * * a new prev is computed from c, * placed in the middle of a 0x80-block (for most small scripts) or * in the middle of the Unihan and Hangul blocks * to statistically minimize the following difference */ diff=c-prev; prev=BOCU1_PREV(c); if(DIFF_IS_SINGLE(diff)) { *target++=(uint8_t)PACK_SINGLE_DIFF(diff); *offsets++=sourceIndex; --targetCapacity; sourceIndex=nextSourceIndex; if(c<0x3000) { goto fastSingle; } } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { /* optimize 2-byte case */ int32_t m; if(diff>=0) { diff-=BOCU1_REACH_POS_1+1; m=diff%BOCU1_TRAIL_COUNT; diff/=BOCU1_TRAIL_COUNT; diff+=BOCU1_START_POS_2; } else { diff-=BOCU1_REACH_NEG_1; NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); diff+=BOCU1_START_NEG_2; } *target++=(uint8_t)diff; *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); *offsets++=sourceIndex; *offsets++=sourceIndex; targetCapacity-=2; sourceIndex=nextSourceIndex; } else { int32_t length; /* will be 2..4 */ diff=packDiff(diff); length=BOCU1_LENGTH_FROM_PACKED(diff); /* write the output character bytes from diff and length */ /* from the first if in the loop we know that targetCapacity>0 */ if(length<=targetCapacity) { switch(length) { /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(diff>>24); *offsets++=sourceIndex; case 3: /*fall through*/ *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; case 2: /*fall through*/ *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; /* case 1: handled above */ *target++=(uint8_t)diff; *offsets++=sourceIndex; default: /* will never occur */ break; } targetCapacity-=length; sourceIndex=nextSourceIndex; } else { uint8_t *charErrorBuffer; /* * We actually do this backwards here: * In order to save an intermediate variable, we output * first to the overflow buffer what does not fit into the * regular target. */ /* we know that 1<=targetCapacity<length<=4 */ length-=targetCapacity; charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; switch(length) { /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(diff>>16); case 2: /*fall through*/ *charErrorBuffer++=(uint8_t)(diff>>8); case 1: /*fall through*/ *charErrorBuffer=(uint8_t)diff; default: /* will never occur */ break; } cnv->charErrorBufferLength=(int8_t)length; /* now output what fits into the regular target */ diff>>=8*length; /* length was reduced by targetCapacity */ switch(targetCapacity) { /* each branch falls through to the next one */ case 3: *target++=(uint8_t)(diff>>16); *offsets++=sourceIndex; case 2: /*fall through*/ *target++=(uint8_t)(diff>>8); *offsets++=sourceIndex; case 1: /*fall through*/ *target++=(uint8_t)diff; *offsets++=sourceIndex; default: /* will never occur */ break; } /* target overflow */ targetCapacity=0; *pErrorCode=U_BUFFER_OVERFLOW_ERROR; break; } } } else {