virtual int32_t move32(int32_t delta, CharacterIterator::EOrigin origin){ switch(origin) { case kStart: pos = begin; if(delta > 0) { U16_FWD_N(text, pos, end, delta); } break; case kCurrent: if(delta > 0) { U16_FWD_N(text, pos, end, delta); } else { U16_BACK_N(text, begin, pos, -delta); } break; case kEnd: pos = end; if(delta < 0) { U16_BACK_N(text, begin, pos, -delta); } break; default: break; } return pos; };
int utf16_cp_to_cu(const UChar *ustring, int32_t ustring_len, long cp_offset, int32_t *cu_offset, UErrorCode *status) { if (0 != cp_offset) { int32_t _cp_count = u_countChar32(ustring, ustring_len); if (cp_offset < 0) { if (cp_offset < -_cp_count) { *status = U_INDEX_OUTOFBOUNDS_ERROR; return FAILURE; } *cu_offset = ustring_len; U16_BACK_N(ustring, 0, *cu_offset, -cp_offset); } else { if (cp_offset >= _cp_count) { *status = U_INDEX_OUTOFBOUNDS_ERROR; return FAILURE; } U16_FWD_N(ustring, *cu_offset, ustring_len, cp_offset); } } return SUCCESS; }
U_CFUNC int32_t u_strFromPunycode(const UChar *src, int32_t srcLength, UChar *dest, int32_t destCapacity, UBool *caseFlags, UErrorCode *pErrorCode) { int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, destCPCount, firstSupplementaryIndex, cpLength; UChar b; /* argument checking */ if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; } if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(srcLength==-1) { srcLength=u_strlen(src); } /* * Handle the basic code points: * Let basicLength be the number of input code points * before the last delimiter, or 0 if there is none, * then copy the first basicLength code points to the output. * * The two following loops iterate backward. */ for(j=srcLength; j>0;) { if(src[--j]==DELIMITER) { break; } } destLength=basicLength=destCPCount=j; U_ASSERT(destLength>=0); while(j>0) { b=src[--j]; if(!IS_BASIC(b)) { *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } if(j<destCapacity) { dest[j]=(UChar)b; if(caseFlags!=NULL) { caseFlags[j]=IS_BASIC_UPPERCASE(b); } } } /* Initialize the state: */ n=INITIAL_N; i=0; bias=INITIAL_BIAS; firstSupplementaryIndex=1000000000; /* * Main decoding loop: * Start just after the last delimiter if any * basic code points were copied; start at the beginning otherwise. */ for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { /* * in is the index of the next character to be consumed, and * destCPCount is the number of code points in the output array. * * Decode a generalized variable-length integer into delta, * which gets added to i. The overflow checking is easier * if we increase i as we go, then subtract off its starting * value at the end to obtain delta. */ for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { if(in>=srcLength) { *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } digit=basicToDigit[(uint8_t)src[in++]]; if(digit<0) { *pErrorCode=U_INVALID_CHAR_FOUND; return 0; } if(digit>(0x7fffffff-i)/w) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } i+=digit*w; /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt t=k-bias; if(t<TMIN) { t=TMIN; } else if(t>TMAX) { t=TMAX; } */ t=k-bias; if(t<TMIN) { t=TMIN; } else if(k>=(bias+TMAX)) { t=TMAX; } if(digit<t) { break; } if(w>0x7fffffff/(BASE-t)) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } w*=BASE-t; } /* * Modification from sample code: * Increments destCPCount here, * where needed instead of in for() loop tail. */ ++destCPCount; bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); /* * i was supposed to wrap around from (incremented) destCPCount to 0, * incrementing n each time, so we'll fix that now: */ if(i/destCPCount>(0x7fffffff-n)) { /* integer overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } n+=i/destCPCount; i%=destCPCount; /* not needed for Punycode: */ /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ if(n>0x10ffff || U_IS_SURROGATE(n)) { /* Unicode code point overflow */ *pErrorCode=U_ILLEGAL_CHAR_FOUND; return 0; } /* Insert n at position i of the output: */ cpLength=U16_LENGTH(n); if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { int32_t codeUnitIndex; /* * Handle indexes when supplementary code points are present. * * In almost all cases, there will be only BMP code points before i * and even in the entire string. * This is handled with the same efficiency as with UTF-32. * * Only the rare cases with supplementary code points are handled * more slowly - but not too bad since this is an insertion anyway. */ if(i<=firstSupplementaryIndex) { codeUnitIndex=i; if(cpLength>1) { firstSupplementaryIndex=codeUnitIndex; } else { ++firstSupplementaryIndex; } } else { codeUnitIndex=firstSupplementaryIndex; U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); } /* use the UChar index codeUnitIndex instead of the code point index i */ if(codeUnitIndex<destLength) { uprv_memmove(dest+codeUnitIndex+cpLength, dest+codeUnitIndex, (destLength-codeUnitIndex)*U_SIZEOF_UCHAR); if(caseFlags!=NULL) { uprv_memmove(caseFlags+codeUnitIndex+cpLength, caseFlags+codeUnitIndex, destLength-codeUnitIndex); } } if(cpLength==1) { /* BMP, insert one code unit */ dest[codeUnitIndex]=(UChar)n; } else { /* supplementary character, insert two code units */ dest[codeUnitIndex]=U16_LEAD(n); dest[codeUnitIndex+1]=U16_TRAIL(n); } if(caseFlags!=NULL) { /* Case of last character determines uppercase flag: */ caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]); if(cpLength==2) { caseFlags[codeUnitIndex+1]=FALSE; } } } destLength+=cpLength; U_ASSERT(destLength>=0); ++i; } return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); }
static void TestFwdBack(){ static UChar input[]={0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062, 0xd841, 0xd7ff, 0xd841, 0xdc41, 0xdc00, 0x0000}; static uint16_t fwd_unsafe[] ={1, 3, 5, 6, 8, 10, 11, 12}; static uint16_t fwd_safe[] ={1, 3, 5, 6, 7, 8, 10, 11, 12}; static uint16_t back_unsafe[]={11, 9, 8, 7, 6, 5, 3, 1, 0}; static uint16_t back_safe[] ={11, 10, 8, 7, 6, 5, 3, 1, 0}; static uint16_t Nvalue[]= {0, 1, 2, 3, 1, 2, 1}; static uint16_t fwd_N_unsafe[] ={0, 1, 5, 10, 11}; static uint16_t fwd_N_safe[] ={0, 1, 5, 8, 10, 12, 12}; /*safe macro keeps it at the end of the string */ static uint16_t back_N_unsafe[]={12, 11, 8, 5, 3}; static uint16_t back_N_safe[] ={12, 11, 8, 5, 3, 0, 0}; uint16_t offunsafe=0, offsafe=0; uint16_t i=0; while(offunsafe < sizeof(input)/U_SIZEOF_UCHAR){ UTF16_FWD_1_UNSAFE(input, offunsafe); if(offunsafe != fwd_unsafe[i]){ log_err("ERROR: Forward_unsafe offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe); } i++; } offunsafe=0, offsafe=0; i=0; while(offunsafe < sizeof(input)/U_SIZEOF_UCHAR){ U16_FWD_1_UNSAFE(input, offunsafe); if(offunsafe != fwd_unsafe[i]){ log_err("ERROR: U16_FWD_1_UNSAFE offset expected:%d, Got:%d\n", fwd_unsafe[i], offunsafe); } i++; } i=0; while(offsafe < sizeof(input)/U_SIZEOF_UCHAR){ UTF16_FWD_1_SAFE(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR); if(offsafe != fwd_safe[i]){ log_err("ERROR: Forward_safe offset expected:%d, Got:%d\n", fwd_safe[i], offsafe); } i++; } i=0; while(offsafe < sizeof(input)/U_SIZEOF_UCHAR){ U16_FWD_1(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR); if(offsafe != fwd_safe[i]){ log_err("ERROR: U16_FWD_1 offset expected:%d, Got:%d\n", fwd_safe[i], offsafe); } i++; } offunsafe=sizeof(input)/U_SIZEOF_UCHAR; offsafe=sizeof(input)/U_SIZEOF_UCHAR; i=0; while(offunsafe > 0){ UTF16_BACK_1_UNSAFE(input, offunsafe); if(offunsafe != back_unsafe[i]){ log_err("ERROR: Backward_unsafe offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe); } i++; } offunsafe=sizeof(input)/U_SIZEOF_UCHAR; offsafe=sizeof(input)/U_SIZEOF_UCHAR; i=0; while(offunsafe > 0){ U16_BACK_1_UNSAFE(input, offunsafe); if(offunsafe != back_unsafe[i]){ log_err("ERROR: U16_BACK_1_UNSAFE offset expected:%d, Got:%d\n", back_unsafe[i], offunsafe); } i++; } i=0; while(offsafe > 0){ UTF16_BACK_1_SAFE(input,0, offsafe); if(offsafe != back_safe[i]){ log_err("ERROR: Backward_safe offset expected:%d, Got:%d\n", back_unsafe[i], offsafe); } i++; } i=0; while(offsafe > 0){ U16_BACK_1(input,0, offsafe); if(offsafe != back_safe[i]){ log_err("ERROR: U16_BACK_1 offset expected:%d, Got:%d\n", back_unsafe[i], offsafe); } i++; } offunsafe=0; offsafe=0; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/ UTF16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]); if(offunsafe != fwd_N_unsafe[i]){ log_err("ERROR: Forward_N_unsafe offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe); } } offunsafe=0; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){ /*didn't want it to fail(we assume 0<i<length)*/ U16_FWD_N_UNSAFE(input, offunsafe, Nvalue[i]); if(offunsafe != fwd_N_unsafe[i]){ log_err("ERROR: U16_FWD_N_UNSAFE offset expected:%d, Got:%d\n", fwd_N_unsafe[i], offunsafe); } } offsafe=0; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){ UTF16_FWD_N_SAFE(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR, Nvalue[i]); if(offsafe != fwd_N_safe[i]){ log_err("ERROR: Forward_N_safe offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe); } } offsafe=0; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){ U16_FWD_N(input, offsafe, sizeof(input)/U_SIZEOF_UCHAR, Nvalue[i]); if(offsafe != fwd_N_safe[i]){ log_err("ERROR: U16_FWD_N offset expected:%d, Got:%d\n", fwd_N_safe[i], offsafe); } } offunsafe=sizeof(input)/U_SIZEOF_UCHAR; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){ UTF16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]); if(offunsafe != back_N_unsafe[i]){ log_err("ERROR: backward_N_unsafe offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe); } } offunsafe=sizeof(input)/U_SIZEOF_UCHAR; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0])-2; i++){ U16_BACK_N_UNSAFE(input, offunsafe, Nvalue[i]); if(offunsafe != back_N_unsafe[i]){ log_err("ERROR: U16_BACK_N_UNSAFE offset expected:%d, Got:%d\n", back_N_unsafe[i], offunsafe); } } offsafe=sizeof(input)/U_SIZEOF_UCHAR; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){ UTF16_BACK_N_SAFE(input, 0, offsafe, Nvalue[i]); if(offsafe != back_N_safe[i]){ log_err("ERROR: backward_N_safe offset expected:%d, Got:%d\n", back_N_safe[i], offsafe); } } offsafe=sizeof(input)/U_SIZEOF_UCHAR; for(i=0; i<sizeof(Nvalue)/sizeof(Nvalue[0]); i++){ U16_BACK_N(input, 0, offsafe, Nvalue[i]); if(offsafe != back_N_safe[i]){ log_err("ERROR: U16_BACK_N offset expected:%d, Got:%d\n", back_N_safe[i], offsafe); } } }
/* keep this in sync with utf8tst.c's TestNulTerminated() */ static void TestNulTerminated() { static const UChar input[]={ /* 0 */ 0x61, /* 1 */ 0xd801, 0xdc01, /* 3 */ 0xdc01, /* 4 */ 0x62, /* 5 */ 0xd801, /* 6 */ 0x00 /* 7 */ }; static const UChar32 result[]={ 0x61, 0x10401, 0xdc01, 0x62, 0xd801, 0 }; UChar32 c, c2; int32_t i0, i=0, j, k, expectedIndex; int32_t cpIndex=0; do { i0=i; U16_NEXT(input, i, -1, c); if(c!=result[cpIndex]) { log_err("U16_NEXT(from %d)=U+%04x != U+%04x\n", i0, c, result[cpIndex]); } j=i0; U16_FWD_1(input, j, -1); if(j!=i) { log_err("U16_FWD_1() moved to index %d but U16_NEXT() moved to %d\n", j, i); } ++cpIndex; /* * Move by this many code points from the start. * U16_FWD_N() stops at the end of the string, that is, at the NUL if necessary. */ expectedIndex= (c==0) ? i-1 : i; k=0; U16_FWD_N(input, k, -1, cpIndex); if(k!=expectedIndex) { log_err("U16_FWD_N(code points from 0) moved to index %d but expected %d\n", k, expectedIndex); } } while(c!=0); i=0; do { j=i0=i; U16_NEXT(input, i, -1, c); do { U16_GET(input, 0, j, -1, c2); if(c2!=c) { log_err("U16_NEXT(from %d)=U+%04x != U+%04x=U16_GET(at %d)\n", i0, c, c2, j); } /* U16_SET_CP_LIMIT moves from a non-lead byte to the limit of the code point */ k=j+1; U16_SET_CP_LIMIT(input, 0, k, -1); if(k!=i) { log_err("U16_NEXT() moved to %d but U16_SET_CP_LIMIT(%d) moved to %d\n", i, j+1, k); } } while(++j<i); } while(c!=0); }