static void addClosureMapping(UChar32 src, UChar32 dest) { uint32_t value; if(beVerbose) { printf("add closure mapping U+%04lx->U+%04lx\n", (unsigned long)src, (unsigned long)dest); } value=upvec_getValue(pv, src, 0); if(value&UCASE_EXCEPTION) { Props *p=excProps+(value>>UGENCASE_EXC_SHIFT); int32_t i; /* append dest to src's closure array */ for(i=0;; ++i) { if(i==LENGTHOF(p->closure)) { fprintf(stderr, "closure[] overflow for U+%04lx->U+%04lx\n", (unsigned long)src, (unsigned long)dest); exit(U_BUFFER_OVERFLOW_ERROR); } else if(p->closure[i]==dest) { break; /* do not store duplicates */ } else if(p->closure[i]==0) { p->closure[i]=dest; break; } } } else {
/* Improve code coverage of UPropsVectors */ static void TestUPropsVector() { UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; UPropsVectors *pv = upvec_open(100, &errorCode); if (pv != NULL) { log_err("Should have returned NULL if UErrorCode is an error."); return; } errorCode = U_ZERO_ERROR; pv = upvec_open(-1, &errorCode); if (pv != NULL || U_SUCCESS(errorCode)) { log_err("Should have returned NULL if column is less than 0.\n"); return; } errorCode = U_ZERO_ERROR; pv = upvec_open(100, &errorCode); if (pv == NULL || U_FAILURE(errorCode)) { log_err("Unable to open UPropsVectors.\n"); return; } if (upvec_getValue(pv, 0, 1) != 0) { log_err("upvec_getValue should return 0.\n"); } if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { log_err("upvec_getRow should not return NULL.\n"); } if (upvec_getArray(pv, NULL, NULL) != NULL) { log_err("upvec_getArray should return NULL.\n"); } upvec_close(pv); }
extern void setProps(Props *p) { UErrorCode errorCode; uint32_t value, oldValue; int32_t delta; UBool isCaseIgnorable; /* get the non-UnicodeData.txt properties */ value=oldValue=upvec_getValue(pv, p->code, 0); /* default: map to self */ delta=0; if(p->gc==U_TITLECASE_LETTER) { /* the Titlecase property is read late, from UnicodeData.txt */ value|=UCASE_TITLE; } if(p->upperCase!=0) { /* uppercase mapping as delta if the character is lowercase */ if((value&UCASE_TYPE_MASK)==UCASE_LOWER) { delta=p->upperCase-p->code; } else { value|=UCASE_EXCEPTION; } } if(p->lowerCase!=0) { /* lowercase mapping as delta if the character is uppercase or titlecase */ if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) { delta=p->lowerCase-p->code; } else { value|=UCASE_EXCEPTION; } } if(p->upperCase!=p->titleCase) { value|=UCASE_EXCEPTION; } if(p->closure[0]!=0) { value|=UCASE_EXCEPTION; } if(p->specialCasing!=NULL) { value|=UCASE_EXCEPTION; } if(p->caseFolding!=NULL) { value|=UCASE_EXCEPTION; } if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) { value|=UCASE_EXCEPTION; } if(p->cc!=0) { if(value&UCASE_DOT_MASK) { fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n"); exit(U_INTERNAL_PROGRAM_ERROR); } if(p->cc==230) { value|=UCASE_ABOVE; } else { value|=UCASE_OTHER_ACCENT; } } /* encode case-ignorable as delta==1 on uncased characters */ isCaseIgnorable=FALSE; if((value&UCASE_TYPE_MASK)==UCASE_NONE) { if(ucdVersion>=UNI_4_1) { /* * Unicode 4.1 and up: (D47a) Word_Break=MidLetter or Mn, Me, Cf, Lm, Sk * Unicode 5.1 and up: Word_Break=(MidLetter or MidNumLet) or Mn, Me, Cf, Lm, Sk * The UGENCASE_IS_MID_LETTER_SHIFT bit is set for both WB=MidLetter and WB=MidNumLet. */ if( (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 || (upvec_getValue(pv, p->code, 1)&U_MASK(UGENCASE_IS_MID_LETTER_SHIFT))!=0 ) { isCaseIgnorable=TRUE; } } else { /* before Unicode 4.1: Mn, Me, Cf, Lm, Sk or 0027 or 00AD or 2019 */ if( (U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 || p->code==0x27 || p->code==0xad || p->code==0x2019 ) { isCaseIgnorable=TRUE; } } } if(isCaseIgnorable && p->code!=0x307) { /* * We use one of the delta/exception bits, which works because we only * store the case-ignorable flag for uncased characters. * There is no delta for uncased characters (see checks above). * If there is an exception for an uncased, case-ignorable character * (although there should not be any case mappings if it's uncased) * then we have a problem. * There is one character which is case-ignorable but has an exception: * U+0307 is uncased, Mn, has conditional special casing and * is therefore handled in code instead. */ if(value&UCASE_EXCEPTION) { fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n", (unsigned long)p->code); exit(U_INTERNAL_PROGRAM_ERROR); } delta=1; } /* handle exceptions */ if(value&UCASE_EXCEPTION) { /* simply store exceptions for later processing and encoding */ value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT; uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p)); if(++exceptionsCount==MAX_EXC_COUNT) { fprintf(stderr, "gencase: too many exceptions\n"); exit(U_INDEX_OUTOFBOUNDS_ERROR); } } else { /* store the simple case mapping delta */ value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK; } errorCode=U_ZERO_ERROR; if(value!=oldValue) { upvec_setValue(pv, p->code, p->code, 0, value, 0xffffffff, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n", u_errorName(errorCode)); exit(errorCode); } } /* add the multi-character case folding to the "unfold" data */ if(p->caseFolding!=NULL) { int32_t length=p->caseFolding->full[0]; if(length>1 && u_strHasMoreChar32Than(p->caseFolding->full+1, length, 1)) { addUnfolding(p->code, p->caseFolding->full+1, length); } } }