Example #1
0
static void
parseDB(const char *filename, Options *storeOptions) {
    char *fields[15][2];
    UErrorCode errorCode=U_ZERO_ERROR;

    u_parseDelimitedFile(filename, ';', fields, 15, lineFn, storeOptions, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode));
        exit(errorCode);
    }
    if(cpNameAliasesIndex<cpNameAliasesTop) {
        fprintf(stderr, "gennames: error - NameAlias but no UnicodeData entry for U+%04lx\n",
                (unsigned long)cpNameAliases[cpNameAliasesIndex].code);
        exit(U_PARSE_ERROR);
    }

    if(!beQuiet) {
        printf("size of all names in the database: %lu\n",
            (unsigned long)lineTop);
        printf("number of named Unicode characters: %lu\n",
            (unsigned long)lineCount);
        printf("number of words in the dictionary from these names: %lu\n",
            (unsigned long)wordCount);
    }
}
Example #2
0
static void
parseBinariesFile(char *filename, char *basename, const char *suffix,
                  const Binaries *bin,
                  UErrorCode *pErrorCode) {
    char *fields[2][2];
    int32_t i;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    writeUCDFilename(basename, bin->ucdFile, suffix);

    ignoredPropsCount=0;

    u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
    }

    if(beVerbose) {
        for(i=0; i<ignoredPropsCount; ++i) {
            printf("genprops: ignoring property %s in %s.txt\n", ignoredProps[i], bin->ucdFile);
        }
    }
}
Example #3
0
static void
parseCaseFolding(const char *filename, UErrorCode *pErrorCode) {
    char *fields[3][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL, pErrorCode);
}
Example #4
0
static void
parseBidiMirroring(const char *filename, UErrorCode *pErrorCode) {
    char *fields[2][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 2, mirrorLineFn, NULL, pErrorCode);
}
Example #5
0
static void
parseDB(const char *filename, UErrorCode *pErrorCode) {
    /* default Bidi classes for unassigned code points */
    static const UChar32 defaultBidi[][3]={ /* { start, end, class } */
        /* R: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF */
        { 0x0590, 0x05FF, U_RIGHT_TO_LEFT },
        { 0x07C0, 0x08FF, U_RIGHT_TO_LEFT },
        { 0xFB1D, 0xFB4F, U_RIGHT_TO_LEFT },
        { 0x10800, 0x10FFF, U_RIGHT_TO_LEFT },

        /* AL: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE */
        { 0x0600, 0x07BF, U_RIGHT_TO_LEFT_ARABIC },
        { 0xFB50, 0xFDCF, U_RIGHT_TO_LEFT_ARABIC },
        { 0xFDF0, 0xFDFF, U_RIGHT_TO_LEFT_ARABIC },
        { 0xFE70, 0xFEFE, U_RIGHT_TO_LEFT_ARABIC }

        /* L otherwise */
    };

    char *fields[15][2];
    UChar32 start, end;
    int32_t i;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    /*
     * Set default Bidi classes for unassigned code points.
     * See the documentation for Bidi_Class in UCD.html in the Unicode data.
     * http://www.unicode.org/Public/
     *
     * Starting with Unicode 5.0, DerivedBidiClass.txt should (re)set
     * the Bidi_Class values for all code points including unassigned ones
     * and including L values for these.
     * This code becomes unnecesary but harmless. Leave it for now in case
     * someone uses genbidi on pre-Unicode 5.0 data.
     */
    for(i=0; i<LENGTHOF(defaultBidi); ++i) {
        start=defaultBidi[i][0];
        end=defaultBidi[i][1];
        upvec_setValue(pv, start, end, 0, (uint32_t)defaultBidi[i][2], UBIDI_CLASS_MASK, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "genbidi error: unable to set default bidi class for U+%04lx..U+%04lx, code: %s\n",
                            (long)start, (long)end, u_errorName(*pErrorCode));
            exit(*pErrorCode);
        }
    }

    u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);

    if(U_FAILURE(*pErrorCode)) {
        return;
    }
}
Example #6
0
static void
parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) {
    char *fields[5][2];
    int32_t i, j;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode);

    /* sort the special casing entries by code point */
    if(specialCasingCount>0) {
        uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
                       compareSpecialCasings, NULL, FALSE, pErrorCode);
    }
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /* replace multiple entries for any code point by one "complex" one */
    j=0;
    for(i=1; i<specialCasingCount; ++i) {
        if(specialCasings[i-1].code==specialCasings[i].code) {
            /* there is a duplicate code point */
            specialCasings[i-1].code=0x7fffffff;    /* remove this entry in the following sorting */
            specialCasings[i].isComplex=TRUE;       /* make the following one complex */
            specialCasings[i].lowerCase[0]=0;
            specialCasings[i].upperCase[0]=0;
            specialCasings[i].titleCase[0]=0;
            ++j;
        }
    }

    /* if some entries just were removed, then re-sort */
    if(j>0) {
        uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
                       compareSpecialCasings, NULL, FALSE, pErrorCode);
        specialCasingCount-=j;
    }
    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    /*
     * Add one complex mapping to caseSensitive that was filtered out above:
     * Greek final Sigma has a conditional mapping but not locale-sensitive,
     * and it is taken when lowercasing just U+03A3 alone.
     * 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
     */
    uset_add(caseSensitive, 0x3c2);
}
Example #7
0
static void
parseDerivedNormalizationProperties(const char *filename, UErrorCode *pErrorCode, UBool reportError) {
    char *fields[2][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 2, derivedNormalizationPropertiesLineFn, NULL, pErrorCode);
    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}
Example #8
0
static void
parseDB(const char *filename, UErrorCode *pErrorCode) {
    char *fields[15][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gennorm error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}
Example #9
0
static void
parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode) {
    char *fields[3][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)data, pErrorCode);

    /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/

    if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        log_err( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
    }
}
Example #10
0
static void
parseBinariesFile(char *filename, char *basename, const char *suffix,
                  const Binaries *bin,
                  UErrorCode *pErrorCode) {
    char *fields[2][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    writeUCDFilename(basename, bin->ucdFile, suffix);

    u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
    }
}
Example #11
0
static void
parseNormalizationCorrections(const char *filename, UErrorCode *pErrorCode) {
    char *fields[4][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 4, normalizationCorrectionsLineFn, NULL, pErrorCode);

    /* fprintf(stdout,"Number of code points that have NormalizationCorrections mapping with length >1 : %i\n",len); */

    if(U_FAILURE(*pErrorCode) && ( *pErrorCode!=U_FILE_ACCESS_ERROR)) {
        fprintf(stderr, "gensprep error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
        exit(*pErrorCode);
    }
}
Example #12
0
static void
parseNameAliases(const char *filename, Options *storeOptions) {
    char *fields[2][2];
    UErrorCode errorCode=U_ZERO_ERROR;

    if(!storeOptions->storeNames) {
        return;
    }
    u_parseDelimitedFile(filename, ';', fields, 2, nameAliasesLineFn, NULL, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode));
        exit(errorCode);
    }

    if(!beQuiet) {
        printf("number of name aliases: %lu\n", (unsigned long)cpNameAliasesTop);
    }
}
Example #13
0
static void
parseTwoFieldFile(char *filename, char *basename,
                  const char *ucdFile, const char *suffix,
                  UParseLineFn *lineFn,
                  UErrorCode *pErrorCode) {
    char *fields[2][2];

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    writeUCDFilename(basename, ucdFile, suffix);

    u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
    }
}
static void
parseDB(const char *filename, UBool store10Names) {
    char *fields[15][2];
    UErrorCode errorCode=U_ZERO_ERROR;

    u_parseDelimitedFile(filename, ';', fields, 15, lineFn, &store10Names, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "gennames parse error: %s\n", u_errorName(errorCode));
        exit(errorCode);
    }

    if(!beQuiet) {
        printf("size of all names in the database: %lu\n",
            (unsigned long)lineTop);
        printf("number of named Unicode characters: %lu\n",
            (unsigned long)lineCount);
        printf("number of words in the dictionary from these names: %lu\n",
            (unsigned long)wordCount);
    }
}
Example #15
0
static void
parseDB(const char *filename, UErrorCode *pErrorCode) {
    char *fields[15][2];
    UChar32 start, end;
    int32_t i;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return;
    }

    u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);

    /* are all sub-properties consumed? */
    if(specialCasingIndex<specialCasingCount) {
        fprintf(stderr, "gencase: error - some code points in SpecialCasing.txt are missing from UnicodeData.txt\n");
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    if(caseFoldingIndex<caseFoldingCount) {
        fprintf(stderr, "gencase: error - some code points in CaseFolding.txt are missing from UnicodeData.txt\n");
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    if(U_FAILURE(*pErrorCode)) {
        return;
    }

    for(i=0;
        0==uset_getItem(caseSensitive, i, &start, &end, NULL, 0, pErrorCode) && U_SUCCESS(*pErrorCode);
        ++i
    ) {
        addCaseSensitive(start, end);
    }
    if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
        *pErrorCode=U_ZERO_ERROR;
    }
}
Example #16
0
void UnicodeTest::TestAdditionalProperties() {
#if !UCONFIG_NO_NORMALIZATION
    // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt
    if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) {
        errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n",
              UPRV_LENGTHOF(derivedPropsNames));
        return;
    }
    if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) {
        errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)\n");
        return;
    }

    char path[500];
    if(getUnidataPath(path) == NULL) {
        errln("unable to find path to source/data/unidata/");
        return;
    }
    char *basename=strchr(path, 0);
    strcpy(basename, "DerivedCoreProperties.txt");

    char *fields[2][2];
    UErrorCode errorCode=U_ZERO_ERROR;
    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
        return;
    }

    strcpy(basename, "DerivedNormalizationProps.txt");
    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode));
        return;
    }

    // now we have all derived core properties in the UnicodeSets
    // run them all through the API
    int32_t rangeCount, range;
    uint32_t i;
    UChar32 start, end;

    // test all TRUE properties
    for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      dataerrln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }

    // invert all properties
    for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
        derivedProps[i].complement();
    }

    // test all FALSE properties
    for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      errln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }
#endif /* !UCONFIG_NO_NORMALIZATION */
}
Example #17
0
void UnicodeTest::TestAdditionalProperties() {
#if !UCONFIG_NO_NORMALIZATION
    // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt
    if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) {
        errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n",
              LENGTHOF(derivedPropsNames));
        return;
    }
    if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) {
        errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)\n");
        return;
    }

    char newPath[256];
    char backupPath[256];
    char *fields[2][2];
    UErrorCode errorCode=U_ZERO_ERROR;

    /* Look inside ICU_DATA first */
    strcpy(newPath, pathToDataDirectory());
    strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

    // As a fallback, try to guess where the source data was located
    // at the time ICU was built, and look there.
#   ifdef U_TOPSRCDIR
        strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
#   else
        strcpy(backupPath, loadTestData(errorCode));
        strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
#   endif
    strcat(backupPath, U_FILE_SEP_STRING);
    strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt");

    char *path=newPath;
    u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode);

    if(errorCode==U_FILE_ACCESS_ERROR) {
        errorCode=U_ZERO_ERROR;
        path=backupPath;
        u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    }
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode));
        return;
    }
    char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt");
    strcpy(basename, "DerivedNormalizationProps.txt");
    u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode);
    if(U_FAILURE(errorCode)) {
        errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode));
        return;
    }

    // now we have all derived core properties in the UnicodeSets
    // run them all through the API
    int32_t rangeCount, range;
    uint32_t i;
    UChar32 start, end;

    // test all TRUE properties
    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      dataerrln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }

    // invert all properties
    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
        derivedProps[i].complement();
    }

    // test all FALSE properties
    for(i=0; i<LENGTHOF(derivedPropsNames); ++i) {
        rangeCount=derivedProps[i].getRangeCount();
        for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) {
            start=derivedProps[i].getRangeStart(range);
            end=derivedProps[i].getRangeEnd(range);
            for(; start<=end; ++start) {
                if(u_hasBinaryProperty(start, derivedPropsIndex[i])) {
                    errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]);
                    if(++numErrors[i]>=MAX_ERRORS) {
                      errln("Too many errors, moving to the next test");
                      break;
                    }
                }
            }
        }
    }
#endif /* !UCONFIG_NO_NORMALIZATION */
}