/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ U_CAPI UBool U_EXPORT2 u_isIDIgnorable(UChar32 c) { if(c<=0x9f) { return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); } else { uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); } }
/** * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. * Implements UCHAR_POSIX_PRINT. * @internal */ U_CFUNC UBool u_isprintPOSIX(UChar32 c) { uint32_t props; GET_PROPS(c, props); /* * The only cntrl character in graph+blank is TAB (in blank). * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). */ return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); }
U_CAPI UBool U_EXPORT2 u_isblank(UChar32 c) { if((uint32_t)c<=0x9f) { return c==9 || c==0x20; /* TAB or SPACE */ } else { /* Zs */ uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); } }
U_CAPI UBool U_EXPORT2 u_isxdigit(UChar32 c) { uint32_t props; /* check ASCII and Fullwidth ASCII a-fA-F */ if( (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) ) { return TRUE; } GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); }
/* Checks if ch is a lower case letter.*/ U_CAPI UBool U_EXPORT2 u_islower(UChar32 c) { uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); }
static uint32_t U_CALLCONV _enumTypeValue(const void *context, uint32_t value) { return GET_CATEGORY(value); }
/* Gets the Unicode character's general category.*/ U_CAPI int8_t U_EXPORT2 u_charType(UChar32 c) { uint32_t props; GET_PROPS(c, props); return (int8_t)GET_CATEGORY(props); }
/* Checks if ch is a unicode character with assigned character type.*/ U_CAPI UBool U_EXPORT2 u_isdefined(UChar32 c) { uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)!=0); }
/* Checks if ch is a decimal digit. */ U_CAPI UBool U_EXPORT2 u_isdigit(UChar32 c) { uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); }
/* Checks if ch is a title case letter; usually upper case letters.*/ U_CAPI UBool U_EXPORT2 u_istitle(UChar32 c) { uint32_t props; GET_PROPS(c, props); return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); }
static void U_CALLCONV numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { Props newProps={ 0 }; char *s, *numberLimit; uint32_t start, end, value, oldProps32; char c; UBool isFraction; /* get the code point range */ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } /* * Ignore the * # @missing: 0000..10FFFF; NaN * line from Unicode 5.1's DerivedNumericValues.txt: * The following code cannot parse "NaN", and we don't want to overwrite * the numeric values for all characters after reading most * from UnicodeData.txt already. */ if(start==0 && end==0x10ffff) { return; } /* check if the numeric value is a fraction (this code does not handle any) */ isFraction=FALSE; s=uprv_strchr(fields[1][0], '.'); if(s!=NULL) { numberLimit=s+1; while('0'<=(c=*numberLimit++) && c<='9') { if(c!='0') { isFraction=TRUE; break; } } } if(isFraction) { value=0; } else { /* parse numeric value */ s=(char *)u_skipWhitespace(fields[1][0]); /* try large, single-significant-digit numbers, may otherwise overflow strtoul() */ if('1'<=s[0] && s[0]<='9' && s[1]=='0' && s[2]=='0') { /* large integers are encoded in a special way, see store.c */ uint8_t exp=0; value=s[0]-'0'; numberLimit=s; while(*(++numberLimit)=='0') { ++exp; } newProps.exponent=exp; } else { /* normal number parsing */ value=(uint32_t)uprv_strtoul(s, &numberLimit, 10); } if(numberLimit<=s || (*numberLimit!='.' && u_skipWhitespace(numberLimit)!=fields[1][1]) || value>=0x80000000) { fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]); exit(U_PARSE_ERROR); } } /* * Unicode 4.0.1 removes the third column that used to list the numeric type. * Assume that either the data is the same as in UnicodeData.txt, * or else that the numeric type is "numeric". * This should work because we only expect to add numeric values for * Han characters; for those, UnicodeData.txt lists only ranges without * specific properties for single characters. */ /* set the new numeric value */ newProps.code=start; newProps.numericValue=(int32_t)value; /* newly parsed numeric value */ /* the exponent may have been set above */ for(; start<=end; ++start) { uint32_t newProps32; int32_t oldNtv; oldProps32=getProps(start); oldNtv=(int32_t)GET_NUMERIC_TYPE_VALUE(oldProps32); if(isFraction) { if(UPROPS_NTV_FRACTION_START<=oldNtv && oldNtv<UPROPS_NTV_LARGE_START) { /* this code point was already listed with its numeric value in UnicodeData.txt */ continue; } else { fprintf(stderr, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields[1][0]); exit(U_PARSE_ERROR); } } /* * For simplicity, and because we only expect to set numeric values for Han characters, * for now we only allow to set these values for Lo characters. */ if(oldNtv==UPROPS_NTV_NONE && GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) { fprintf(stderr, "genprops error: new numeric value for a character other than Lo in DerivedNumericValues.txt at %s\n", fields[0][0]); exit(U_PARSE_ERROR); } /* verify that we do not change an existing value (fractions were excluded above) */ if(oldNtv!=UPROPS_NTV_NONE) { /* the code point already has a value stored */ newProps.numericType=UPROPS_NTV_GET_TYPE(oldNtv); newProps32=makeProps(&newProps); if(oldNtv!=GET_NUMERIC_TYPE_VALUE(newProps32)) { fprintf(stderr, "genprops error: new numeric value differs from old one for U+%04lx\n", (long)start); exit(U_PARSE_ERROR); } /* same value, continue */ } else { /* the code point is getting a new numeric value */ newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */ newProps32=makeProps(&newProps); if(beVerbose) { printf("adding U+%04x numeric type %d encoded-numeric-type-value 0x%03x from %s\n", (int)start, U_NT_NUMERIC, (int)GET_NUMERIC_TYPE_VALUE(newProps32), fields[0][0]); } addProps(start, newProps32|GET_CATEGORY(oldProps32)); } } }