U_CDECL_BEGIN static void U_CALLCONV strprepProfileLineFn(void * /*context*/, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { uint32_t mapping[40]; char *end, *map; uint32_t code; int32_t length; /*UBool* mapWithNorm = (UBool*) context;*/ const char* typeName; uint32_t rangeStart=0,rangeEnd =0; const char *s; s = u_skipWhitespace(fields[0][0]); if (*s == '@') { /* a special directive introduced in 4.2 */ return; } if(fieldCount != 3){ *pErrorCode = U_INVALID_FORMAT_ERROR; return; } typeName = fields[2][0]; map = fields[1][0]; if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); /* store the range */ compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED); }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); /* store the range */ compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED); }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ /* get the character code, field 0 */ code=(uint32_t)uprv_strtoul(s, &end, 16); /* parse the mapping string */ length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); /* store the mapping */ compareMapping(code,mapping, length,USPREP_MAP); }else{ *pErrorCode = U_INVALID_FORMAT_ERROR; } }
U_CFUNC void U_CALLCONV derivedPropsLineFn(void *context, char *fields[][2], int32_t /* fieldCount */, UErrorCode *pErrorCode) { UnicodeTest *me=(UnicodeTest *)context; uint32_t start, end; int32_t i; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); return; } /* parse derived binary property name, ignore unknown names */ i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0]); if(i<0) { UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0])); propName.trim(); if(me->unknownPropertyNames->find(propName)==NULL) { UErrorCode errorCode=U_ZERO_ERROR; me->unknownPropertyNames->puti(propName, 1, errorCode); me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); } return; } me->derivedProps[i].add(start, end); }
static void U_CALLCONV bidiClassLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { char *s; uint32_t start, end, value; /* get the code point range */ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi: syntax error in DerivedBidiClass.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } /* parse bidi class */ s=trimTerminateField(fields[1][0], fields[1][1]); value=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, s); if((int32_t)value<0) { fprintf(stderr, "genbidi error: unknown bidi class in DerivedBidiClass.txt field 1 at %s\n", s); exit(U_PARSE_ERROR); } upvec_setValue(pv, start, end, 0, value, UBIDI_CLASS_MASK, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set derived bidi class for U+%04x..U+%04x - %s\n", (int)start, (int)end, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void U_CALLCONV strprepProfileLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { uint32_t mapping[40]; char *end, *map; uint32_t code; int32_t length; UStringPrepProfile* data = (UStringPrepProfile*) context; const char* typeName; uint32_t rangeStart=0,rangeEnd =0; typeName = fields[2][0]; map = fields[1][0]; if(strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode); /* store the range */ compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_UNASSIGNED); }else if(strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode); /* store the range */ compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_PROHIBITED); }else if(strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ /* get the character code, field 0 */ code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16); /* parse the mapping string */ length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); /* compare the mapping */ compareMapping(data, code,mapping, length,USPREP_MAP); }else{ *pErrorCode = U_INVALID_FORMAT_ERROR; } }
static void U_CALLCONV singleEnumLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { const SingleEnum *sen; char *s; uint32_t start, end, uv; int32_t value; sen=(const SingleEnum *)context; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", sen->ucdFile, fields[0][0]); exit(*pErrorCode); } /* parse property alias */ s=trimTerminateField(fields[1][0], fields[1][1]); value=u_getPropertyValueEnum(sen->prop, s); if(value<0) { if(sen->prop==UCHAR_BLOCK) { if(isToken("Greek", s)) { value=UBLOCK_GREEK; /* Unicode 3.2 renames this to "Greek and Coptic" */ } else if(isToken("Combining Marks for Symbols", s)) { value=UBLOCK_COMBINING_MARKS_FOR_SYMBOLS; /* Unicode 3.2 renames this to "Combining Diacritical Marks for Symbols" */ } else if(isToken("Private Use", s)) { value=UBLOCK_PRIVATE_USE; /* Unicode 3.2 renames this to "Private Use Area" */ } } } if(value<0) { fprintf(stderr, "genprops error: unknown %s name in %s.txt field 1 at %s\n", sen->propName, sen->ucdFile, s); exit(U_PARSE_ERROR); } uv=(uint32_t)(value<<sen->vecShift); if((uv&sen->vecMask)!=uv) { fprintf(stderr, "genprops error: %s value overflow (0x%x) at %s\n", sen->propName, (int)uv, s); exit(U_INTERNAL_PROGRAM_ERROR); } if(start==0 && end==0x10ffff) { /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } upvec_setValue(pv, start, end, sen->vecWord, uv, sen->vecMask, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set %s code: %s\n", sen->propName, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
UBool PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) { uint32_t st, e; u_parseCodePointRange(s, &st, &e, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n", s, (long)lineNumber); return FALSE; } start=(UChar32)st; end=(UChar32)e; return TRUE; }
static void U_CALLCONV ageLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { char *s, *numberLimit; uint32_t value, start, end, version; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } /* ignore "unassigned" (the default is already set to 0.0) */ s=(char *)u_skipWhitespace(fields[1][0]); if(0==uprv_strncmp(s, "unassigned", 10)) { return; } /* parse version number */ value=(uint32_t)uprv_strtoul(s, &numberLimit, 10); if(s==numberLimit || value==0 || value>15 || (*numberLimit!='.' && *numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } version=value<<4; /* parse minor version number */ if(*numberLimit=='.') { s=(char *)u_skipWhitespace(numberLimit+1); value=(uint32_t)uprv_strtoul(s, &numberLimit, 10); if(s==numberLimit || value>15 || (*numberLimit!=' ' && *numberLimit!='\t' && *numberLimit!=0)) { fprintf(stderr, "genprops: syntax error in DerivedAge.txt field 1 at %s\n", fields[1][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } version|=value; } if(start==0 && end==0x10ffff) { /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } upvec_setValue(pv, start, end, 0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set character age: %s\n", u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void U_CALLCONV binariesLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { const Binaries *bin; char *s; uint32_t start, end, uv; int32_t i; bin=(const Binaries *)context; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]); exit(*pErrorCode); } /* parse binary property name */ s=(char *)u_skipWhitespace(fields[1][0]); for(i=0;; ++i) { if(i==bin->binariesCount) { /* ignore unrecognized properties */ if(beVerbose) { addIgnoredProp(s, fields[1][1]); } return; } if(isToken(bin->binaries[i].propName, s)) { break; } } if(bin->binaries[i].vecShift>=32) { fprintf(stderr, "genprops error: shift value %d>=32 for %s %s\n", (int)bin->binaries[i].vecShift, bin->ucdFile, bin->binaries[i].propName); exit(U_INTERNAL_PROGRAM_ERROR); } uv=U_MASK(bin->binaries[i].vecShift); if(start==0 && end==0x10ffff) { /* Also set bits for initialValue and errorValue. */ end=UPVEC_MAX_CP; } upvec_setValue(pv, start, end, bin->binaries[i].vecWord, uv, uv, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops error: unable to set %s code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void U_CALLCONV binariesLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { const Binaries *bin; char *s; uint32_t start, end; int32_t i; bin=(const Binaries *)context; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]); exit(*pErrorCode); } /* parse binary property name */ s=(char *)u_skipWhitespace(fields[1][0]); for(i=0;; ++i) { if(i==bin->binariesCount) { /* ignore unrecognized properties */ return; } if(isToken(bin->binaries[i].propName, s)) { break; } } if(bin->binaries[i].vecMask==0) { fprintf(stderr, "genbidi error: mask value %d==0 for %s %s\n", (int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName); exit(U_INTERNAL_PROGRAM_ERROR); } upvec_setValue(pv, start, end, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set %s, code: %s\n", bin->binaries[i].propName, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
U_CFUNC void U_CALLCONV derivedCorePropsLineFn(void *context, char *fields[][2], int32_t /* fieldCount */, UErrorCode *pErrorCode) { UnicodeTest *me=(UnicodeTest *)context; uint32_t start, end; int32_t i; u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields[0][0]); return; } /* parse derived binary property name, ignore unknown names */ i=getTokenIndex(derivedCorePropsNames, LENGTHOF(derivedCorePropsNames), fields[1][0]); if(i<0) { me->errln("UnicodeTest warning: unknown property name '%s' in \n", fields[1][0]); return; } me->derivedCoreProps[i].add(start, end); }
static void U_CALLCONV derivedNormalizationPropertiesLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { UChar string[32]; char *s; uint32_t start, end; int32_t count; uint8_t qcFlags; /* get code point range */ count=u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "gennorm: error parsing DerivedNormalizationProperties.txt mapping at %s\n", fields[0][0]); exit(*pErrorCode); } /* ignore hangul - handle explicitly */ if(start==0xac00) { return; } /* get property - ignore unrecognized ones */ s=(char *)u_skipWhitespace(fields[1][0]); if(*s=='N' && s[1]=='F') { /* quick check flag */ qcFlags=0x11; s+=2; if(*s=='K') { qcFlags<<=1; ++s; } if(*s=='C' && s[1]=='_') { s+=2; } else if(*s=='D' && s[1]=='_') { qcFlags<<=2; s+=2; } else { return; } if(0==uprv_memcmp(s, "NO", 2)) { qcFlags&=0xf; } else if(0==uprv_memcmp(s, "MAYBE", 5)) { qcFlags&=0x30; } else if(0==uprv_memcmp(s, "QC", 2) && *(s=(char *)u_skipWhitespace(s+2))==';') { /* * Unicode 4.0.1: * changes single field "NFD_NO" -> two fields "NFD_QC; N" etc. */ /* start of the field */ s=(char *)u_skipWhitespace(s+1); if(*s=='N') { qcFlags&=0xf; } else if(*s=='M') { qcFlags&=0x30; } else { return; /* do nothing for "Yes" because it's the default value */ } } else { return; /* do nothing for "Yes" because it's the default value */ } /* set this flag for all code points in this range */ while(start<=end) { setQCFlags(start++, qcFlags); } } else if(0==uprv_memcmp(s, "Comp_Ex", 7) || 0==uprv_memcmp(s, "Full_Composition_Exclusion", 26)) { /* full composition exclusion */ while(start<=end) { setCompositionExclusion(start++); } } else if( ((0==uprv_memcmp(s, "FNC", 3) && *(s=(char *)u_skipWhitespace(s+3))==';') || (0==uprv_memcmp(s, "FC_NFKC", 7) && *(s=(char *)u_skipWhitespace(s+7))==';')) ) { /* FC_NFKC_Closure, parse field 2 to get the string */ char *t; /* start of the field */ s=(char *)u_skipWhitespace(s+1); /* find the end of the field */ for(t=s; *t!=';' && *t!='#' && *t!=0 && *t!='\n' && *t!='\r'; ++t) {} *t=0; string[0]=(UChar)u_parseString(s, string+1, 31, NULL, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "gennorm error: illegal FNC string at %s\n", fields[0][0]); exit(*pErrorCode); } while(start<=end) { setFNC(start++, string); } } }
static void U_CALLCONV strprepProfileLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { uint32_t mapping[40]; char *end, *map; uint32_t code; int32_t length; /*UBool* mapWithNorm = (UBool*) context;*/ const char* typeName; uint32_t rangeStart=0,rangeEnd =0; const char* filename = (const char*) context; const char *s; s = u_skipWhitespace(fields[0][0]); if (*s == '@') { /* special directive */ s++; length = fields[0][1] - s; if (length >= NORMALIZE_DIRECTIVE_LEN && uprv_strncmp(s, NORMALIZE_DIRECTIVE, NORMALIZE_DIRECTIVE_LEN) == 0) { options[NORMALIZE].doesOccur = TRUE; return; } else if (length >= CHECK_BIDI_DIRECTIVE_LEN && uprv_strncmp(s, CHECK_BIDI_DIRECTIVE, CHECK_BIDI_DIRECTIVE_LEN) == 0) { options[CHECK_BIDI].doesOccur = TRUE; return; } else { fprintf(stderr, "gensprep error parsing a directive %s.", fields[0][0]); } } typeName = fields[2][0]; map = fields[1][0]; if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); if(U_FAILURE(*pErrorCode)){ fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode)); return; } /* store the range */ storeRange(rangeStart,rangeEnd,USPREP_UNASSIGNED, pErrorCode); }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode); if(U_FAILURE(*pErrorCode)){ fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode)); return; } /* store the range */ storeRange(rangeStart,rangeEnd,USPREP_PROHIBITED, pErrorCode); }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ /* get the character code, field 0 */ code=(uint32_t)uprv_strtoul(s, &end, 16); if(end<=s || end!=fields[0][1]) { fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } /* parse the mapping string */ length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); /* store the mapping */ storeMapping(code,mapping, length,USPREP_MAP, pErrorCode); }else{ *pErrorCode = U_INVALID_FORMAT_ERROR; } if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "gensprep error parsing %s line %s at %s. Error: %s\n",filename, fields[0][0],fields[2][0],u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void U_CALLCONV numericLineFn(void *context, char *fields[][2], int32_t fieldCount, UErrorCode *pErrorCode) { Props newProps={ 0 }; char *s, *numberLimit; uint32_t start, end, value, oldProps32; char c; UBool isFraction; /* get the code point range */ u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields[0][0]); exit(*pErrorCode); } /* * Ignore the * # @missing: 0000..10FFFF; NaN * line from Unicode 5.1's DerivedNumericValues.txt: * The following code cannot parse "NaN", and we don't want to overwrite * the numeric values for all characters after reading most * from UnicodeData.txt already. */ if(start==0 && end==0x10ffff) { return; } /* check if the numeric value is a fraction (this code does not handle any) */ isFraction=FALSE; s=uprv_strchr(fields[1][0], '.'); if(s!=NULL) { numberLimit=s+1; while('0'<=(c=*numberLimit++) && c<='9') { if(c!='0') { isFraction=TRUE; break; } } } if(isFraction) { value=0; } else { /* parse numeric value */ s=(char *)u_skipWhitespace(fields[1][0]); /* try large, single-significant-digit numbers, may otherwise overflow strtoul() */ if('1'<=s[0] && s[0]<='9' && s[1]=='0' && s[2]=='0') { /* large integers are encoded in a special way, see store.c */ uint8_t exp=0; value=s[0]-'0'; numberLimit=s; while(*(++numberLimit)=='0') { ++exp; } newProps.exponent=exp; } else { /* normal number parsing */ value=(uint32_t)uprv_strtoul(s, &numberLimit, 10); } if(numberLimit<=s || (*numberLimit!='.' && u_skipWhitespace(numberLimit)!=fields[1][1]) || value>=0x80000000) { fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]); exit(U_PARSE_ERROR); } } /* * Unicode 4.0.1 removes the third column that used to list the numeric type. * Assume that either the data is the same as in UnicodeData.txt, * or else that the numeric type is "numeric". * This should work because we only expect to add numeric values for * Han characters; for those, UnicodeData.txt lists only ranges without * specific properties for single characters. */ /* set the new numeric value */ newProps.code=start; newProps.numericValue=(int32_t)value; /* newly parsed numeric value */ /* the exponent may have been set above */ for(; start<=end; ++start) { uint32_t newProps32; int32_t oldNtv; oldProps32=getProps(start); oldNtv=(int32_t)GET_NUMERIC_TYPE_VALUE(oldProps32); if(isFraction) { if(UPROPS_NTV_FRACTION_START<=oldNtv && oldNtv<UPROPS_NTV_LARGE_START) { /* this code point was already listed with its numeric value in UnicodeData.txt */ continue; } else { fprintf(stderr, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields[1][0]); exit(U_PARSE_ERROR); } } /* * For simplicity, and because we only expect to set numeric values for Han characters, * for now we only allow to set these values for Lo characters. */ if(oldNtv==UPROPS_NTV_NONE && GET_CATEGORY(oldProps32)!=U_OTHER_LETTER) { fprintf(stderr, "genprops error: new numeric value for a character other than Lo in DerivedNumericValues.txt at %s\n", fields[0][0]); exit(U_PARSE_ERROR); } /* verify that we do not change an existing value (fractions were excluded above) */ if(oldNtv!=UPROPS_NTV_NONE) { /* the code point already has a value stored */ newProps.numericType=UPROPS_NTV_GET_TYPE(oldNtv); newProps32=makeProps(&newProps); if(oldNtv!=GET_NUMERIC_TYPE_VALUE(newProps32)) { fprintf(stderr, "genprops error: new numeric value differs from old one for U+%04lx\n", (long)start); exit(U_PARSE_ERROR); } /* same value, continue */ } else { /* the code point is getting a new numeric value */ newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */ newProps32=makeProps(&newProps); if(beVerbose) { printf("adding U+%04x numeric type %d encoded-numeric-type-value 0x%03x from %s\n", (int)start, U_NT_NUMERIC, (int)GET_NUMERIC_TYPE_VALUE(newProps32), fields[0][0]); } addProps(start, newProps32|GET_CATEGORY(oldProps32)); } } }