static void parseDB(const char *filename, Options *storeOptions) { char *fields[15][2]; UErrorCode errorCode=U_ZERO_ERROR; u_parseDelimitedFile(filename, ';', fields, 15, lineFn, storeOptions, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode)); exit(errorCode); } if(cpNameAliasesIndex<cpNameAliasesTop) { fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n", (unsigned long)cpNameAliases[cpNameAliasesIndex].code); exit(U_PARSE_ERROR); } if(!beQuiet) { printf("size of all names in the database: %lu\n", (unsigned long)lineTop); printf("number of named Unicode characters: %lu\n", (unsigned long)lineCount); printf("number of words in the dictionary from these names: %lu\n", (unsigned long)wordCount); } }
static void parseBinariesFile(char *filename, char *basename, const char *suffix, const Binaries *bin, UErrorCode *pErrorCode) { char *fields[2][2]; int32_t i; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } writeUCDFilename(basename, bin->ucdFile, suffix); ignoredPropsCount=0; u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode)); } if(beVerbose) { for(i=0; i<ignoredPropsCount; ++i) { printf("genprops: ignoring property %s in %s.txt\n", ignoredProps[i], bin->ucdFile); } } }
static void parseCaseFolding(const char *filename, UErrorCode *pErrorCode) { char *fields[3][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL, pErrorCode); }
static void parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) { char *fields[2][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode); }
static void parseDB(const char *filename, UErrorCode *pErrorCode) { /* default Bidi classes for unassigned code points */ static const UChar32 defaultBidi[][3]={ /* { start, end, class } */ /* R: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF */ { 0x0590, 0x05FF, U_RIGHT_TO_LEFT }, { 0x07C0, 0x08FF, U_RIGHT_TO_LEFT }, { 0xFB1D, 0xFB4F, U_RIGHT_TO_LEFT }, { 0x10800, 0x10FFF, U_RIGHT_TO_LEFT }, /* AL: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE */ { 0x0600, 0x07BF, U_RIGHT_TO_LEFT_ARABIC }, { 0xFB50, 0xFDCF, U_RIGHT_TO_LEFT_ARABIC }, { 0xFDF0, 0xFDFF, U_RIGHT_TO_LEFT_ARABIC }, { 0xFE70, 0xFEFE, U_RIGHT_TO_LEFT_ARABIC } /* L otherwise */ }; char *fields[15][2]; UChar32 start, end; int32_t i; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } /* * Set default Bidi classes for unassigned code points. * See the documentation for Bidi_Class in UCD.html in the Unicode data. * http://www.unicode.org/Public/ * * Starting with Unicode 5.0, DerivedBidiClass.txt should (re)set * the Bidi_Class values for all code points including unassigned ones * and including L values for these. * This code becomes unnecesary but harmless. Leave it for now in case * someone uses genbidi on pre-Unicode 5.0 data. */ for(i=0; i<LENGTHOF(defaultBidi); ++i) { start=defaultBidi[i][0]; end=defaultBidi[i][1]; upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n", (long)start, (long)end, u_errorName(*pErrorCode)); exit(*pErrorCode); } } u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode); if(U_FAILURE(*pErrorCode)) { return; } }
static void parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) { char *fields[5][2]; int32_t i, j; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode); /* sort the special casing entries by code point */ if(specialCasingCount>0) { uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings, NULL, FALSE, pErrorCode); } if(U_FAILURE(*pErrorCode)) { return; } /* replace multiple entries for any code point by one "complex" one */ j=0; for(i=1; i<specialCasingCount; ++i) { if(specialCasings[i-1].code==specialCasings[i].code) { /* there is a duplicate code point */ specialCasings[i-1].code=0x7fffffff; /* remove this entry in the following sorting */ specialCasings[i].isComplex=TRUE; /* make the following one complex */ specialCasings[i].lowerCase[0]=0; specialCasings[i].upperCase[0]=0; specialCasings[i].titleCase[0]=0; ++j; } } /* if some entries just were removed, then re-sort */ if(j>0) { uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing), compareSpecialCasings, NULL, FALSE, pErrorCode); specialCasingCount-=j; } if(U_FAILURE(*pErrorCode)) { return; } /* * Add one complex mapping to caseSensitive that was filtered out above: * Greek final Sigma has a conditional mapping but not locale-sensitive, * and it is taken when lowercasing just U+03A3 alone. * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA */ uset_add(caseSensitive, 0x3c2); }
static void parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode, UBool reportError) { char *fields[2][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 2, derivedNormalizationPropertiesLineFn, NULL, pErrorCode); if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void parseDB(const char *filename, UErrorCode *pErrorCode) { char *fields[15][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode) { char *fields[3][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)data, pErrorCode); /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/ if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { log_err( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); } }
static void parseBinariesFile(char *filename, char *basename, const char *suffix, const Binaries *bin, UErrorCode *pErrorCode) { char *fields[2][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } writeUCDFilename(basename, bin->ucdFile, suffix); u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode)); } }
static void parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) { char *fields[4][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode); /* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */ if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) { fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); exit(*pErrorCode); } }
static void parseNameAliases(const char *filename, Options *storeOptions) { char *fields[2][2]; UErrorCode errorCode=U_ZERO_ERROR; if(!storeOptions->storeNames) { return; } u_parseDelimitedFile(filename, ';', fields, 2, nameAliasesLineFn, NULL, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode)); exit(errorCode); } if(!beQuiet) { printf("number of name aliases: %lu\n", (unsigned long)cpNameAliasesTop); } }
static void parseTwoFieldFile(char *filename, char *basename, const char *ucdFile, const char *suffix, UParseLineFn *lineFn, UErrorCode *pErrorCode) { char *fields[2][2]; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } writeUCDFilename(basename, ucdFile, suffix); u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode); if(U_FAILURE(*pErrorCode)) { fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode)); } }
static void parseDB(const char *filename, UBool store10Names) { char *fields[15][2]; UErrorCode errorCode=U_ZERO_ERROR; u_parseDelimitedFile(filename, ';', fields, 15, lineFn, &store10Names, &errorCode); if(U_FAILURE(errorCode)) { fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode)); exit(errorCode); } if(!beQuiet) { printf("size of all names in the database: %lu\n", (unsigned long)lineTop); printf("number of named Unicode characters: %lu\n", (unsigned long)lineCount); printf("number of words in the dictionary from these names: %lu\n", (unsigned long)wordCount); } }
static void parseDB(const char *filename, UErrorCode *pErrorCode) { char *fields[15][2]; UChar32 start, end; int32_t i; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return; } u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode); /* are all sub-properties consumed? */ if(specialCasingIndex<specialCasingCount) { fprintf(stderr, "gencase: error - some code points in SpecialCasing.txt are missing from UnicodeData.txt\n"); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } if(caseFoldingIndex<caseFoldingCount) { fprintf(stderr, "gencase: error - some code points in CaseFolding.txt are missing from UnicodeData.txt\n"); *pErrorCode=U_PARSE_ERROR; exit(U_PARSE_ERROR); } if(U_FAILURE(*pErrorCode)) { return; } for(i=0; 0==uset_getItem(caseSensitive, i, &start, &end, NULL, 0, pErrorCode) && U_SUCCESS(*pErrorCode); ++i ) { addCaseSensitive(start, end); } if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) { *pErrorCode=U_ZERO_ERROR; } }
void UnicodeTest::TestAdditionalProperties() { #if !UCONFIG_NO_NORMALIZATION // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) { errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n", UPRV_LENGTHOF(derivedPropsNames)); return; } if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) { errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)\n"); return; } char path[500]; if(getUnidataPath(path) == NULL) { errln("unable to find path to source/data/unidata/"); return; } char *basename=strchr(path, 0); strcpy(basename, "DerivedCoreProperties.txt"); char *fields[2][2]; UErrorCode errorCode=U_ZERO_ERROR; u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode); if(U_FAILURE(errorCode)) { errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode)); return; } strcpy(basename, "DerivedNormalizationProps.txt"); u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode); if(U_FAILURE(errorCode)) { errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode)); return; } // now we have all derived core properties in the UnicodeSets // run them all through the API int32_t rangeCount, range; uint32_t i; UChar32 start, end; // test all TRUE properties for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { rangeCount=derivedProps[i].getRangeCount(); for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { start=derivedProps[i].getRangeStart(range); end=derivedProps[i].getRangeEnd(range); for(; start<=end; ++start) { if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]); if(++numErrors[i]>=MAX_ERRORS) { dataerrln("Too many errors, moving to the next test"); break; } } } } } // invert all properties for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { derivedProps[i].complement(); } // test all FALSE properties for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { rangeCount=derivedProps[i].getRangeCount(); for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { start=derivedProps[i].getRangeStart(range); end=derivedProps[i].getRangeEnd(range); for(; start<=end; ++start) { if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]); if(++numErrors[i]>=MAX_ERRORS) { errln("Too many errors, moving to the next test"); break; } } } } } #endif /* !UCONFIG_NO_NORMALIZATION */ }
void UnicodeTest::TestAdditionalProperties() { #if !UCONFIG_NO_NORMALIZATION // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) { errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n", LENGTHOF(derivedPropsNames)); return; } if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) { errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)\n"); return; } char newPath[256]; char backupPath[256]; char *fields[2][2]; UErrorCode errorCode=U_ZERO_ERROR; /* Look inside ICU_DATA first */ strcpy(newPath, pathToDataDirectory()); strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); // As a fallback, try to guess where the source data was located // at the time ICU was built, and look there. # ifdef U_TOPSRCDIR strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); # else strcpy(backupPath, loadTestData(errorCode)); strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); # endif strcat(backupPath, U_FILE_SEP_STRING); strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); char *path=newPath; u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode); if(errorCode==U_FILE_ACCESS_ERROR) { errorCode=U_ZERO_ERROR; path=backupPath; u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode); } if(U_FAILURE(errorCode)) { errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode)); return; } char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt"); strcpy(basename, "DerivedNormalizationProps.txt"); u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode); if(U_FAILURE(errorCode)) { errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode)); return; } // now we have all derived core properties in the UnicodeSets // run them all through the API int32_t rangeCount, range; uint32_t i; UChar32 start, end; // test all TRUE properties for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { rangeCount=derivedProps[i].getRangeCount(); for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { start=derivedProps[i].getRangeStart(range); end=derivedProps[i].getRangeEnd(range); for(; start<=end; ++start) { if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]); if(++numErrors[i]>=MAX_ERRORS) { dataerrln("Too many errors, moving to the next test"); break; } } } } } // invert all properties for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { derivedProps[i].complement(); } // test all FALSE properties for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { rangeCount=derivedProps[i].getRangeCount(); for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { start=derivedProps[i].getRangeStart(range); end=derivedProps[i].getRangeEnd(range); for(; start<=end; ++start) { if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]); if(++numErrors[i]>=MAX_ERRORS) { errln("Too many errors, moving to the next test"); break; } } } } } #endif /* !UCONFIG_NO_NORMALIZATION */ }