U_CAPI int32_t U_EXPORT2 u_charDigitValue(UChar32 c) { uint32_t props; int32_t value; GET_PROPS(c, props); value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; if(value<=9) { return value; } else { return -1; } }
U_CAPI double U_EXPORT2 u_getNumericValue(UChar32 c) { uint32_t props; int32_t ntv; GET_PROPS(c, props); ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); if(ntv==UPROPS_NTV_NONE) { return U_NO_NUMERIC_VALUE; } else if(ntv<UPROPS_NTV_DIGIT_START) { /* decimal digit */ return ntv-UPROPS_NTV_DECIMAL_START; } else if(ntv<UPROPS_NTV_NUMERIC_START) { /* other digit */ return ntv-UPROPS_NTV_DIGIT_START; } else if(ntv<UPROPS_NTV_FRACTION_START) { /* small integer */ return ntv-UPROPS_NTV_NUMERIC_START; } else if(ntv<UPROPS_NTV_LARGE_START) { /* fraction */ int32_t numerator=(ntv>>4)-12; int32_t denominator=(ntv&0xf)+1; return (double)numerator/denominator; } else if(ntv<UPROPS_NTV_BASE60_START) {
U_CAPI int32_t U_EXPORT2 u_getIntPropertyValue(UChar32 c, UProperty which) { UErrorCode errorCode; if(which<UCHAR_BINARY_START) { return 0; /* undefined */ } else if(which<UCHAR_BINARY_LIMIT) { return (int32_t)u_hasBinaryProperty(c, which); } else if(which<UCHAR_INT_START) { return 0; /* undefined */ } else if(which<UCHAR_INT_LIMIT) { switch(which) { case UCHAR_BIDI_CLASS: return (int32_t)u_charDirection(c); case UCHAR_BLOCK: return (int32_t)ublock_getCode(c); #if !UCONFIG_NO_NORMALIZATION case UCHAR_CANONICAL_COMBINING_CLASS: return u_getCombiningClass(c); #endif case UCHAR_DECOMPOSITION_TYPE: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK); case UCHAR_EAST_ASIAN_WIDTH: return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT; case UCHAR_GENERAL_CATEGORY: return (int32_t)u_charType(c); case UCHAR_JOINING_GROUP: return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); case UCHAR_JOINING_TYPE: return ubidi_getJoiningType(GET_BIDI_PROPS(), c); case UCHAR_LINE_BREAK: return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT; case UCHAR_NUMERIC_TYPE: { int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1)); return UPROPS_NTV_GET_TYPE(ntv); } case UCHAR_SCRIPT: errorCode=U_ZERO_ERROR; return (int32_t)uscript_getScript(c, &errorCode); case UCHAR_HANGUL_SYLLABLE_TYPE: { /* see comments on gcbToHst[] above */ int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; if(gcb<LENGTHOF(gcbToHst)) { return gcbToHst[gcb]; } else { return U_HST_NOT_APPLICABLE; } } #if !UCONFIG_NO_NORMALIZATION case UCHAR_NFD_QUICK_CHECK: case UCHAR_NFKD_QUICK_CHECK: case UCHAR_NFC_QUICK_CHECK: case UCHAR_NFKC_QUICK_CHECK: return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); case UCHAR_LEAD_CANONICAL_COMBINING_CLASS: return getFCD16(c)>>8; case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS: return getFCD16(c)&0xff; #endif case UCHAR_GRAPHEME_CLUSTER_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; case UCHAR_SENTENCE_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT; case UCHAR_WORD_BREAK: return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT; default: return 0; /* undefined */ } } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
static void U_CALLCONV numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { Props newProps={ 0 }; char *s, *numberLimit; uint32_t start, end, value, oldProps32; char c; UBool isFraction; /* get the code point range */ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } /* * Ignore the * # @missing: 0000..10FFFF; NaN * line from Unicode 5.1's DerivedNumericValues.txt: * The following code cannot parse "NaN", and we don't want to overwrite * the numeric values for all characters after reading most * from UnicodeData.txt already. */ if(start==0 && end==0x10ffff) { return; } /* check if the numeric value is a fraction (this code does not handle any) */ isFraction=FALSE; s=uprv_strchr(fields[1][0], '.'); if(s!=NULL) { numberLimit=s+1; while('0'<=(c=*numberLimit++) && c<='9') { if(c!='0') { isFraction=TRUE; break; } } } if(isFraction) { value=0; } else { /* parse numeric value */ s=(char *)u_skipWhitespace(fields[1][0]); /* try large, single-significant-digit numbers, may otherwise overflow strtoul() */ if('1'<=s[0] && s[0]<='9' && s[1]=='0' && s[2]=='0') { /* large integers are encoded in a special way, see store.c */ uint8_t exp=0; value=s[0]-'0'; numberLimit=s; while(*(++numberLimit)=='0') { ++exp; } newProps.exponent=exp; } else { /* normal number parsing */ value=(uint32_t)uprv_strtoul(s, &numberLimit, 10); } if(numberLimit<=s || (*numberLimit!='.' && u_skipWhitespace(numberLimit)!=fields[1][1]) || value>=0x80000000) { fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]); exit(U_PARSE_ERROR); } } /* * Unicode 4.0.1 removes the third column that used to list the numeric type. * Assume that either the data is the same as in UnicodeData.txt, * or else that the numeric type is "numeric". * This should work because we only expect to add numeric values for * Han characters; for those, UnicodeData.txt lists only ranges without * specific properties for single characters. */ /* set the new numeric value */ newProps.code=start; newProps.numericValue=(int32_t)value; /* newly parsed numeric value */ /* the exponent may have been set above */ for(; start<=end; ++start) { uint32_t newProps32; int32_t oldNtv; oldProps32=getProps(start); oldNtv=(int32_t)GET_NUMERIC_TYPE_VALUE(oldProps32); if(isFraction) { if(UPROPS_NTV_FRACTION_START<=oldNtv && oldNtv<UPROPS_NTV_LARGE_START) { /* this code point was already listed with its numeric value in UnicodeData.txt */ continue; } else { fprintf(stderr, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields[1][0]); exit(U_PARSE_ERROR); } } /* * For simplicity, and because we only expect to set numeric values for Han characters, * for now we only allow to set these values for Lo characters. */ if(oldNtv==UPROPS_NTV_NONE && GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) { fprintf(stderr, "genprops error: new numeric value for a character other than Lo in DerivedNumericValues.txt at %s\n", fields[0][0]); exit(U_PARSE_ERROR); } /* verify that we do not change an existing value (fractions were excluded above) */ if(oldNtv!=UPROPS_NTV_NONE) { /* the code point already has a value stored */ newProps.numericType=UPROPS_NTV_GET_TYPE(oldNtv); newProps32=makeProps(&newProps); if(oldNtv!=GET_NUMERIC_TYPE_VALUE(newProps32)) { fprintf(stderr, "genprops error: new numeric value differs from old one for U+%04lx\n", (long)start); exit(U_PARSE_ERROR); } /* same value, continue */ } else { /* the code point is getting a new numeric value */ newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */ newProps32=makeProps(&newProps); if(beVerbose) { printf("adding U+%04x numeric type %d encoded-numeric-type-value 0x%03x from %s\n", (int)start, U_NT_NUMERIC, (int)GET_NUMERIC_TYPE_VALUE(newProps32), fields[0][0]); } addProps(start, newProps32|GET_CATEGORY(oldProps32)); } } }