static void U_CALLCONV
specialCasingLineFn(void *context,
                    char *fields[][2], int32_t fieldCount,
                    UErrorCode *pErrorCode) {
    char *end;

    /* get code point */
    specialCasings[specialCasingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
    end=(char *)u_skipWhitespace(end);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "gencase: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* is this a complex mapping? */
    if(*(end=(char *)u_skipWhitespace(fields[4][0]))!=0 && *end!=';' && *end!='#') {
        /* there is some condition text in the fifth field */
        specialCasings[specialCasingCount].isComplex=TRUE;

        /* do not store any actual mappings for this */
        specialCasings[specialCasingCount].lowerCase[0]=0;
        specialCasings[specialCasingCount].upperCase[0]=0;
        specialCasings[specialCasingCount].titleCase[0]=0;
    } else {
        /* just set the "complex" flag and get the case mappings */
        specialCasings[specialCasingCount].isComplex=FALSE;
        specialCasings[specialCasingCount].lowerCase[0]=
            (UChar)u_parseString(fields[1][0], specialCasings[specialCasingCount].lowerCase+1, 31, NULL, pErrorCode);
        specialCasings[specialCasingCount].upperCase[0]=
            (UChar)u_parseString(fields[3][0], specialCasings[specialCasingCount].upperCase+1, 31, NULL, pErrorCode);
        specialCasings[specialCasingCount].titleCase[0]=
            (UChar)u_parseString(fields[2][0], specialCasings[specialCasingCount].titleCase+1, 31, NULL, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "gencase: error parsing special casing at %s\n", fields[0][0]);
            exit(*pErrorCode);
        }

        uset_add(caseSensitive, (UChar32)specialCasings[specialCasingCount].code);
        _set_addAll(caseSensitive, specialCasings[specialCasingCount].lowerCase+1, specialCasings[specialCasingCount].lowerCase[0]);
        _set_addAll(caseSensitive, specialCasings[specialCasingCount].upperCase+1, specialCasings[specialCasingCount].upperCase[0]);
        _set_addAll(caseSensitive, specialCasings[specialCasingCount].titleCase+1, specialCasings[specialCasingCount].titleCase[0]);
    }

    if(++specialCasingCount==MAX_SPECIAL_CASING_COUNT) {
        fprintf(stderr, "gencase: too many special casing mappings\n");
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
}
Exemple #2
0
void
PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
    UChar *buffer=uni.getBuffer(-1);
    int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
        errorCode=U_ZERO_ERROR;
        uni.releaseBuffer(0);
        buffer=uni.getBuffer(length);
        length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
    }
    uni.releaseBuffer(length);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr,
                "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
                s, (long)lineNumber);
    }
}
Exemple #3
0
static void U_CALLCONV
derivedNormalizationPropertiesLineFn(void *context,
                                     char *fields[][2], int32_t fieldCount,
                                     UErrorCode *pErrorCode) {
    UChar string[32];
    char *s;
    uint32_t start, end;
    int32_t count;
    uint8_t qcFlags;

    /* get code point range */
    count=u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gennorm: error parsing DerivedNormalizationProperties.txt mapping at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }

    /* ignore hangul - handle explicitly */
    if(start==0xac00) {
        return;
    }

    /* get property - ignore unrecognized ones */
    s=(char *)u_skipWhitespace(fields[1][0]);
    if(*s=='N' && s[1]=='F') {
        /* quick check flag */
        qcFlags=0x11;
        s+=2;
        if(*s=='K') {
            qcFlags<<=1;
            ++s;
        }

        if(*s=='C' && s[1]=='_') {
            s+=2;
        } else if(*s=='D' && s[1]=='_') {
            qcFlags<<=2;
            s+=2;
        } else {
            return;
        }

        if(0==uprv_memcmp(s, "NO", 2)) {
            qcFlags&=0xf;
        } else if(0==uprv_memcmp(s, "MAYBE", 5)) {
            qcFlags&=0x30;
        } else if(0==uprv_memcmp(s, "QC", 2) && *(s=(char *)u_skipWhitespace(s+2))==';') {
            /*
             * Unicode 4.0.1:
             * changes single field "NFD_NO" -> two fields "NFD_QC; N" etc.
             */
            /* start of the field */
            s=(char *)u_skipWhitespace(s+1);
            if(*s=='N') {
                qcFlags&=0xf;
            } else if(*s=='M') {
                qcFlags&=0x30;
            } else {
                return; /* do nothing for "Yes" because it's the default value */
            }
        } else {
            return; /* do nothing for "Yes" because it's the default value */
        }

        /* set this flag for all code points in this range */
        while(start<=end) {
            setQCFlags(start++, qcFlags);
        }
    } else if(0==uprv_memcmp(s, "Comp_Ex", 7) || 0==uprv_memcmp(s, "Full_Composition_Exclusion", 26)) {
        /* full composition exclusion */
        while(start<=end) {
            setCompositionExclusion(start++);
        }
    } else if(
        ((0==uprv_memcmp(s, "FNC", 3) && *(s=(char *)u_skipWhitespace(s+3))==';') || 
        (0==uprv_memcmp(s, "FC_NFKC", 7) && *(s=(char *)u_skipWhitespace(s+7))==';'))
        
    ) {
        /* FC_NFKC_Closure, parse field 2 to get the string */
        char *t;

        /* start of the field */
        s=(char *)u_skipWhitespace(s+1);

        /* find the end of the field */
        for(t=s; *t!=';' && *t!='#' && *t!=0 && *t!='\n' && *t!='\r'; ++t) {}
        *t=0;

        string[0]=(UChar)u_parseString(s, string+1, 31, NULL, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            fprintf(stderr, "gennorm error: illegal FNC string at %s\n", fields[0][0]);
            exit(*pErrorCode);
        }
        while(start<=end) {
            setFNC(start++, string);
        }
    }
}
void MultithreadTest::TestCollators()
{

    UErrorCode status = U_ZERO_ERROR;
    FILE *testFile = NULL;
    char testDataPath[1024];
    strcpy(testDataPath, IntlTest::getSourceTestData(status));
    if (U_FAILURE(status)) {
        errln("ERROR: could not open test data %s", u_errorName(status));
        return;
    }
    strcat(testDataPath, "CollationTest_");

    const char* type = "NON_IGNORABLE";

    const char *ext = ".txt";
    if(testFile) {
        fclose(testFile);
    }
    char buffer[1024];
    strcpy(buffer, testDataPath);
    strcat(buffer, type);
    size_t bufLen = strlen(buffer);

    // we try to open 3 files:
    // path/CollationTest_type.txt
    // path/CollationTest_type_SHORT.txt
    // path/CollationTest_type_STUB.txt
    // we are going to test with the first one that we manage to open.

    strcpy(buffer+bufLen, ext);

    testFile = fopen(buffer, "rb");

    if(testFile == 0) {
        strcpy(buffer+bufLen, "_SHORT");
        strcat(buffer, ext);
        testFile = fopen(buffer, "rb");

        if(testFile == 0) {
            strcpy(buffer+bufLen, "_STUB");
            strcat(buffer, ext);
            testFile = fopen(buffer, "rb");

            if (testFile == 0) {
                *(buffer+bufLen) = 0;
                dataerrln("could not open any of the conformance test files, tried opening base %s", buffer);
                return;        
            } else {
                infoln(
                    "INFO: Working with the stub file.\n"
                    "If you need the full conformance test, please\n"
                    "download the appropriate data files from:\n"
                    "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
            }
        }
    }

    Line *lines = new Line[200000];
    memset(lines, 0, sizeof(Line)*200000);
    int32_t lineNum = 0;

    UChar bufferU[1024];
    int32_t buflen = 0;
    uint32_t first = 0;
    uint32_t offset = 0;

    while (fgets(buffer, 1024, testFile) != NULL) {
        offset = 0;
        if(*buffer == 0 || strlen(buffer) < 3 || buffer[0] == '#') {
            continue;
        }
        offset = u_parseString(buffer, bufferU, 1024, &first, &status);
        buflen = offset;
        bufferU[offset++] = 0;
        lines[lineNum].buflen = buflen;
        //lines[lineNum].buff = new UChar[buflen+1];
        u_memcpy(lines[lineNum].buff, bufferU, buflen);
        lineNum++;
    }
    fclose(testFile);
    if(U_FAILURE(status)) {
      dataerrln("Couldn't read the test file!");
      return;
    }

    UCollator *coll = ucol_open("root", &status);
    if(U_FAILURE(status)) {
        errcheckln(status, "Couldn't open UCA collator");
        return;
    }
    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
    ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);

    int32_t noSpawned = 0;
    int32_t spawnResult = 0;
    LocalArray<CollatorThreadTest> tests(new CollatorThreadTest[kCollatorThreadThreads]);

    logln(UnicodeString("Spawning: ") + kCollatorThreadThreads + " threads * " + kFormatThreadIterations + " iterations each.");
    int32_t j = 0;
    for(j = 0; j < kCollatorThreadThreads; j++) {
        //logln("Setting collator %i", j);
        tests[j].setCollator(coll, lines, lineNum);
    }
    for(j = 0; j < kCollatorThreadThreads; j++) {
        log("%i ", j);
        spawnResult = tests[j].start();
        if(spawnResult != 0) {
            infoln("THREAD INFO: Couldn't spawn more than %i threads", noSpawned);
            break;
        }
        noSpawned++;
    }
    logln("Spawned all");
    if (noSpawned == 0) {
        errln("No threads could be spawned.");
        return;
    }

    for(int32_t patience = kCollatorThreadPatience;patience > 0; patience --)
    {
        logln("Waiting...");

        int32_t i;
        int32_t terrs = 0;
        int32_t completed =0;

        for(i=0;i<kCollatorThreadThreads;i++)
        {
            if (tests[i].isRunning() == FALSE)
            {
                completed++;

                //logln(UnicodeString("Test #") + i + " is complete.. ");

                UnicodeString theErr;
                if(tests[i].getError(theErr))
                {
                    terrs++;
                    errln(UnicodeString("#") + i + ": " + theErr);
                }
                // print out the error, too, if any.
            }
        }
        logln("Completed %i tests", completed);

        if(completed == noSpawned)
        {
            logln("Done! All %i tests are finished", noSpawned);

            if(terrs)
            {
                errln("There were errors.");
                SimpleThread::errorFunc();
            }
            ucol_close(coll);
            //for(i = 0; i < lineNum; i++) {
            //delete[] lines[i].buff;
            //}
            delete[] lines;

            return;
        }

        SimpleThread::sleep(900);
    }
    errln("patience exceeded. ");
    SimpleThread::errorFunc();
    ucol_close(coll);
}
Exemple #5
0
void parseFile(FILE *f, Normalizer2DataBuilder &builder) {
    IcuToolErrorCode errorCode("gennorm2/parseFile()");
    char line[300];
    uint32_t startCP, endCP;
    while(NULL!=fgets(line, (int)sizeof(line), f)) {
        char *comment=(char *)strchr(line, '#');
        if(comment!=NULL) {
            *comment=0;
        }
        u_rtrim(line);
        if(line[0]==0) {
            continue;  // skip empty and comment-only lines
        }
        if(line[0]=='*') {
            const char *s=u_skipWhitespace(line+1);
            if(0==strncmp(s, "Unicode", 7)) {
                s=u_skipWhitespace(s+7);
                builder.setUnicodeVersion(s);
            }
            continue;  // reserved syntax
        }
        const char *delimiter;
        int32_t rangeLength=
            u_parseCodePointRangeAnyTerminator(line, &startCP, &endCP, &delimiter, errorCode);
        if(errorCode.isFailure()) {
            fprintf(stderr, "gennorm2 error: parsing code point range from %s\n", line);
            exit(errorCode.reset());
        }
        delimiter=u_skipWhitespace(delimiter);
        if(*delimiter==':') {
            const char *s=u_skipWhitespace(delimiter+1);
            char *end;
            unsigned long value=strtoul(s, &end, 10);
            if(end<=s || *u_skipWhitespace(end)!=0 || value>=0xff) {
                fprintf(stderr, "gennorm2 error: parsing ccc from %s\n", line);
                exit(U_PARSE_ERROR);
            }
            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
                builder.setCC(c, (uint8_t)value);
            }
            continue;
        }
        if(*delimiter=='-') {
            if(*u_skipWhitespace(delimiter+1)!=0) {
                fprintf(stderr, "gennorm2 error: parsing remove-mapping %s\n", line);
                exit(U_PARSE_ERROR);
            }
            for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
                builder.removeMapping(c);
            }
            continue;
        }
        if(*delimiter=='=' || *delimiter=='>') {
            UChar uchars[Normalizer2Impl::MAPPING_LENGTH_MASK];
            int32_t length=u_parseString(delimiter+1, uchars, LENGTHOF(uchars), NULL, errorCode);
            if(errorCode.isFailure()) {
                fprintf(stderr, "gennorm2 error: parsing mapping string from %s\n", line);
                exit(errorCode.reset());
            }
            UnicodeString mapping(FALSE, uchars, length);
            if(*delimiter=='=') {
                if(rangeLength!=1) {
                    fprintf(stderr,
                            "gennorm2 error: round-trip mapping for more than 1 code point on %s\n",
                            line);
                    exit(U_PARSE_ERROR);
                }
                builder.setRoundTripMapping((UChar32)startCP, mapping);
            } else {
                for(UChar32 c=(UChar32)startCP; c<=(UChar32)endCP; ++c) {
                    builder.setOneWayMapping(c, mapping);
                }
            }
            continue;
        }
        fprintf(stderr, "gennorm2 error: unrecognized data line %s\n", line);
        exit(U_PARSE_ERROR);
    }
}
static void U_CALLCONV
caseFoldingLineFn(void *context,
                  char *fields[][2], int32_t fieldCount,
                  UErrorCode *pErrorCode) {
    char *end;
    static UChar32 prevCode=0;
    int32_t count;
    char status;

    /* get code point */
    caseFoldings[caseFoldingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
    end=(char *)u_skipWhitespace(end);
    if(end<=fields[0][0] || end!=fields[0][1]) {
        fprintf(stderr, "gencase: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* get the status of this mapping */
    caseFoldings[caseFoldingCount].status=status=*u_skipWhitespace(fields[1][0]);
    if(status!='L' && status!='E' && status!='C' && status!='S' && status!='F' && status!='I' && status!='T') {
        fprintf(stderr, "gencase: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }

    /* ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings */
    if(status=='L') {
        return;
    }

    /* get the mapping */
    count=caseFoldings[caseFoldingCount].full[0]=
        (UChar)u_parseString(fields[2][0], caseFoldings[caseFoldingCount].full+1, 31, (uint32_t *)&caseFoldings[caseFoldingCount].simple, pErrorCode);
    if(U_FAILURE(*pErrorCode)) {
        fprintf(stderr, "gencase: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
        exit(*pErrorCode);
    }

    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
    if(count==0 || count>2 || (count==2 && UTF_IS_SINGLE(caseFoldings[caseFoldingCount].full[1]))) {
        caseFoldings[caseFoldingCount].simple=0;
    }

    /* update the case-sensitive set */
    if(status!='T') {
        uset_add(caseSensitive, (UChar32)caseFoldings[caseFoldingCount].code);
        _set_addAll(caseSensitive, caseFoldings[caseFoldingCount].full+1, caseFoldings[caseFoldingCount].full[0]);
    }

    /* check the status */
    if(status=='S') {
        /* check if there was a full mapping for this code point before */
        if( caseFoldingCount>0 &&
            caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
            caseFoldings[caseFoldingCount-1].status=='F'
        ) {
            /* merge the two entries */
            caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple;
            return;
        }
    } else if(status=='F') {
        /* check if there was a simple mapping for this code point before */
        if( caseFoldingCount>0 &&
            caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
            caseFoldings[caseFoldingCount-1].status=='S'
        ) {
            /* merge the two entries */
            uprv_memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32*U_SIZEOF_UCHAR);
            return;
        }
    } else if(status=='I' || status=='T') {
        /* check if there was a default mapping for this code point before (remove it) */
        while(caseFoldingCount>0 &&
              caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code
        ) {
            prevCode=0;
            --caseFoldingCount;
        }
        /* store only a marker for special handling for cases like dotless i */
        caseFoldings[caseFoldingCount].simple=0;
        caseFoldings[caseFoldingCount].full[0]=0;
    }

    /* check that the code points (caseFoldings[caseFoldingCount].code) are in ascending order */
    if(caseFoldings[caseFoldingCount].code<=prevCode && caseFoldings[caseFoldingCount].code>0) {
        fprintf(stderr, "gencase: error - CaseFolding entries out of order, U+%04lx after U+%04lx\n",
                (unsigned long)caseFoldings[caseFoldingCount].code,
                (unsigned long)prevCode);
        *pErrorCode=U_PARSE_ERROR;
        exit(U_PARSE_ERROR);
    }
    prevCode=caseFoldings[caseFoldingCount].code;

    if(++caseFoldingCount==MAX_CASE_FOLDING_COUNT) {
        fprintf(stderr, "gencase: too many case folding mappings\n");
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
        exit(U_INDEX_OUTOFBOUNDS_ERROR);
    }
}
/*
*******************************************************************************
*
*   created on: 2013jul01
*   created by: Matitiahu Allouche

This function performs a conformance test for implementations of the
Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/

Each test case is represented in a single line which is read from a file
named BidiCharacter.txt.  Empty, blank and comment lines may also appear
in this file.

The format of the test data is specified below.  Note that each test
case constitutes a single line of text; reordering is applied within a
single line and independently of a rendering engine, and rules L3 and L4
are out of scope.

The number sign '#' is the comment character: everything is ignored from
the occurrence of '#' until the end of the line,
Empty lines and lines containing only spaces and/or comments are ignored.

Lines which represent test cases consist of 4 or 5 fields separated by a
semicolon.  Each field consists of tokens separated by whitespace (space
or Tab).  Whitespace before and after semicolons is optional.

Field 0: A sequence of hexadecimal code point values separated by space

Field 1: A value representing the paragraph direction, as follows:
    - 0 represents left-to-right
    - 1 represents right-to-left
    - 2 represents auto-LTR according to rules P2 and P3 of the algorithm
    - 3 represents auto-RTL according to rules P2 and P3 of the algorithm
    - a negative number whose absolute value is taken as paragraph level;
      this may be useful to test cases where the embedding level approaches
      or exceeds the maximum embedding level.

Field 2: The resolved paragraph embedding level.  If the input (field 0)
         includes more than one paragraph, this field represents the
         resolved level of the first paragraph.

Field 3: An ordered list of resulting levels for each token in field 0
         (each token represents one source character).
         The UBA does not assign levels to certain characters (e.g. LRO);
         characters removed in rule X9 are indicated with an 'x'.

Field 4: An ordered list of indices showing the resulting visual ordering
         from left to right; characters with a resolved level of 'x' are
         skipped.  The number are zero-based.  Each index corresponds to
         a character in the reordered (visual) string. It represents the
         index of the source character in the input (field 0).
         This field is optional.  When it is absent, the visual ordering
         is not verified.

Examples:

# This is a comment line.
L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
L L ON R;0;0;0 0 0 1;0 1 2 3

# Note: in the next line, 'B' represents a block separator, not the letter 'B'.
LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
# Note: in the next line, 'b' represents the letter 'b', not a block separator.
a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5

a R R x ; 1 ; 1 ; 2 1 1 2
L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1

*
*******************************************************************************
*/
void BiDiConformanceTest::TestBidiCharacterTest() {
    IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
    const char *sourceTestDataPath=getSourceTestData(errorCode);
    if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
                                      "folder (getSourceTestData())")) {
        return;
    }
    char bidiTestPath[400];
    strcpy(bidiTestPath, sourceTestDataPath);
    strcat(bidiTestPath, "BidiCharacterTest.txt");
    LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
    if(bidiTestFile.isNull()) {
        errln("unable to open %s", bidiTestPath);
        return;
    }
    LocalUBiDiPointer ubidi(ubidi_open());
    lineNumber=0;
    levelsCount=0;
    orderingCount=0;
    errorCount=0;
    while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
        ++lineNumber;
        paraLevelName="N/A";
        inputString="N/A";
        // Remove trailing comments and whitespace.
        char *commentStart=strchr(line, '#');
        if(commentStart!=NULL) {
            *commentStart=0;
        }
        u_rtrim(line);
        const char *start=u_skipWhitespace(line);
        if(*start==0) {
            continue;  // Skip empty and comment-only lines.
        }
        // Parse the code point string in field 0.
        UChar *buffer=inputString.getBuffer(200);
        int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode);
        if(errorCode.logIfFailureAndReset("Invalid string in field 0")) {
            errln("Input line %d: %s", (int)lineNumber, line);
            inputString.remove();
            continue;
        }
        inputString.releaseBuffer(length);
        start=strchr(start, ';');
        if(start==NULL) {
            errorCount++;
            errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
            continue;
        }
        start=u_skipWhitespace(start+1);
        char *end;
        int32_t paraDirection=(int32_t)strtol(start, &end, 10);
        UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
        if(paraDirection==0) {
            paraLevel=0;
            paraLevelName="LTR";
        }
        else if(paraDirection==1) {
            paraLevel=1;
            paraLevelName="RTL";
        }
        else if(paraDirection==2) {
            paraLevel=UBIDI_DEFAULT_LTR;
            paraLevelName="Auto/LTR";
        }
        else if(paraDirection==3) {
            paraLevel=UBIDI_DEFAULT_RTL;
            paraLevelName="Auto/RTL";
        }
        else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
            paraLevel=(UBiDiLevel)(-paraDirection);
            sprintf(levelNameString, "%d", (int)paraLevel);
            paraLevelName=levelNameString;
        }
        if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
                         paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
            errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
            printErrorLine();
            continue;
        }
        start=u_skipWhitespace(end);
        if(*start!=';') {
            errorCount++;
            errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
            continue;
        }
        start++;
        uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
        if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
           resolvedParaLevel>1) {
            errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
            printErrorLine();
            continue;
        }
        start=u_skipWhitespace(end);
        if(*start!=';') {
            errorCount++;
            errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
            return;
        }
        start++;
        if(!parseLevels(start)) {
            continue;
        }
        start=u_skipWhitespace(start);
        if(*start==';') {
            if(!parseOrdering(start+1)) {
                continue;
            }
        }
        else
            orderingCount=-1;

        ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
                      paraLevel, NULL, errorCode);
        const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
        if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
            errln("Input line %d: %s", (int)lineNumber, line);
            continue;
        }
        UBiDiLevel actualLevel;
        if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
            printErrorLine();
            errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
                   (int)lineNumber, resolvedParaLevel, actualLevel);
            continue;
        }
        if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
            continue;
        }
        if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
            continue;
        }
    }
}
Exemple #8
0
void UCAConformanceTest::testConformance(const Collator *coll) 
{
    if(testFile == 0) {
        return;
    }
    uint32_t skipFlags = 0;
    if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) {
        skipFlags |= IS_SHIFTED;
    }
    if(coll == rbUCA) {
        skipFlags |= FROM_RULES;
    }

    logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest");
    UBool withSortKeys = getProperty("ucaconfnosortkeys") == NULL;

    int32_t line = 0;

    UChar b1[1024], b2[1024];
    UChar *buffer = b1, *oldB = NULL;

    char lineB1[1024], lineB2[1024];
    char *lineB = lineB1, *oldLineB = lineB2;

    uint8_t sk1[1024], sk2[1024];
    uint8_t *oldSk = NULL, *newSk = sk1;

    int32_t oldLen = 0;
    int32_t oldBlen = 0;
    uint32_t first = 0;

    while (fgets(lineB, 1024, testFile) != NULL) {
        // remove trailing whitespace
        u_rtrim(lineB);

        line++;
        if(*lineB == 0 || lineB[0] == '#') {
            continue;
        }
        int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status);
        if(U_FAILURE(status)) {
            errln("Error parsing line %ld (%s): %s\n",
                  (long)line, u_errorName(status), lineB);
            status = U_ZERO_ERROR;
        }
        buffer[buflen] = 0;

        if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) {
            logln("Skipping line %i because of a known bug", line);
            continue;
        }

        int32_t resLen = withSortKeys ? coll->getSortKey(buffer, buflen, newSk, 1024) : 0;

        if(oldSk != NULL) {
            UBool ok=TRUE;
            int32_t skres = withSortKeys ? strcmp((char *)oldSk, (char *)newSk) : 0;
            int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status);
            int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status);

            if(cmpres != -cmpres2) {
                errln("Compare result not symmetrical on line %i: "
                      "previous vs. current (%d) / current vs. previous (%d)",
                      line, cmpres, cmpres2);
                ok = FALSE;
            }

            // TODO: Compare with normalization turned off if the input passes the FCD test.

            if(withSortKeys && cmpres != normalizeResult(skres)) {
                errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i",
                      cmpres, skres, line);
                ok = FALSE;
            }

            int32_t res = cmpres;
            if(res == 0 && !isAtLeastUCA62) {
                // Up to UCA 6.1, the collation test files use a custom tie-breaker,
                // comparing the raw input strings.
                res = u_strcmpCodePointOrder(oldB, buffer);
                // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
                // comparing the NFD versions of the input strings,
                // which we do via setting strength=identical.
            }
            if(res > 0) {
                errln("Line %i is not greater or equal than previous line", line);
                ok = FALSE;
            }

            if(!ok) {
                errln("  Previous data line %s", oldLineB);
                errln("  Current data line  %s", lineB);
                if(withSortKeys) {
                    UnicodeString oldS, newS;
                    prettify(CollationKey(oldSk, oldLen), oldS);
                    prettify(CollationKey(newSk, resLen), newS);
                    errln("  Previous key: "+oldS);
                    errln("  Current key:  "+newS);
                }
            }
        }

        // swap buffers
        oldLineB = lineB;
        oldB = buffer;
        oldSk = newSk;
        if(lineB == lineB1) {
            lineB = lineB2;
            buffer = b2;
            newSk = sk2;
        } else {
            lineB = lineB1;
            buffer = b1;
            newSk = sk1;
        }
        oldLen = resLen;
        oldBlen = buflen;
    }
}
Exemple #9
0
void UCAConformanceTest::testConformance(UCollator *coll) 
{
  if(testFile == 0) {
    return;
  }

  int32_t line = 0;

  UChar b1[1024], b2[1024];
  char lineB[1024];
  UChar *buffer = b1, *oldB = NULL;
  uint8_t sk1[1024], sk2[1024];
  uint8_t *oldSk = NULL, *newSk = sk1;
  int32_t resLen = 0, oldLen = 0;
  int32_t buflen = 0, oldBlen = 0;
  uint32_t first = 0;
  uint32_t offset = 0;


  while (fgets(lineB, 1024, testFile) != NULL) {
    offset = 0;

    line++;
    if(*lineB == 0 || lineB[0] == '#') {
      continue;
    }
    offset = u_parseString(lineB, buffer, 1024, &first, &status);
    buflen = offset;
    buffer[offset++] = 0;

    resLen = ucol_getSortKey(coll, buffer, buflen, newSk, 1024);

    int32_t res = 0, cmpres = 0, cmpres2 = 0;

    if(oldSk != NULL) {
      res = strcmp((char *)oldSk, (char *)newSk);
      cmpres = ucol_strcoll(coll, oldB, oldBlen, buffer, buflen);
      cmpres2 = ucol_strcoll(coll, buffer, buflen, oldB, oldBlen);

      if(cmpres != -cmpres2) {
        errln("Compare result not symmetrical on line %i", line);
      }

      if(((res&0x80000000) != (cmpres&0x80000000)) || (res == 0 && cmpres != 0) || (res != 0 && cmpres == 0)) {
        errln("Difference between ucol_strcoll and sortkey compare on line %i", line);
        logln("Data line %s", lineB);
      }

      if(res > 0) {
        errln("Line %i is not greater or equal than previous line", line);
        logln("Data line %s", lineB);
      } else if(res == 0) { /* equal */
        res = u_strcmpCodePointOrder(oldB, buffer);
        if (res == 0) {
          errln("Probable error in test file on line %i (comparing identical strings)", line);
          logln("Data line %s", lineB);
        } else if (res > 0) {
          errln("Sortkeys are identical, but code point comapare gives >0 on line %i", line);
          logln("Data line %s", lineB);
        }
      }
    }

    oldSk = newSk;
    oldLen = resLen;

    newSk = (newSk == sk1)?sk2:sk1;
    oldB = buffer;
    oldBlen = buflen;
    buffer = (buffer == b1)?b2:b1;
  }
}
Exemple #10
0
void MultithreadTest::TestCollators()
{

    UErrorCode status = U_ZERO_ERROR;
    FILE *testFile = NULL;
    char testDataPath[1024];
    strcpy(testDataPath, IntlTest::getSourceTestData(status));
    if (U_FAILURE(status)) {
        errln("ERROR: could not open test data %s", u_errorName(status));
        return;
    }
    strcat(testDataPath, "CollationTest_");

    const char* type = "NON_IGNORABLE";

    const char *ext = ".txt";
    if(testFile) {
        fclose(testFile);
    }
    char buffer[1024];
    strcpy(buffer, testDataPath);
    strcat(buffer, type);
    size_t bufLen = strlen(buffer);

    // we try to open 3 files:
    // path/CollationTest_type.txt
    // path/CollationTest_type_SHORT.txt
    // path/CollationTest_type_STUB.txt
    // we are going to test with the first one that we manage to open.

    strcpy(buffer+bufLen, ext);

    testFile = fopen(buffer, "rb");

    if(testFile == 0) {
        strcpy(buffer+bufLen, "_SHORT");
        strcat(buffer, ext);
        testFile = fopen(buffer, "rb");

        if(testFile == 0) {
            strcpy(buffer+bufLen, "_STUB");
            strcat(buffer, ext);
            testFile = fopen(buffer, "rb");

            if (testFile == 0) {
                *(buffer+bufLen) = 0;
                dataerrln("could not open any of the conformance test files, tried opening base %s", buffer);
                return;
            } else {
                infoln(
                    "INFO: Working with the stub file.\n"
                    "If you need the full conformance test, please\n"
                    "download the appropriate data files from:\n"
                    "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
            }
        }
    }

    LocalArray<Line> lines(new Line[200000]);
    memset(lines.getAlias(), 0, sizeof(Line)*200000);
    int32_t lineNum = 0;

    UChar bufferU[1024];
    uint32_t first = 0;

    while (fgets(buffer, 1024, testFile) != NULL) {
        if(*buffer == 0 || buffer[0] == '#') {
            // Store empty and comment lines so that errors are reported
            // for the real test file lines.
            lines[lineNum].buflen = 0;
            lines[lineNum].buff[0] = 0;
        } else {
            int32_t buflen = u_parseString(buffer, bufferU, 1024, &first, &status);
            lines[lineNum].buflen = buflen;
            u_memcpy(lines[lineNum].buff, bufferU, buflen);
            lines[lineNum].buff[buflen] = 0;
        }
        lineNum++;
    }
    fclose(testFile);
    if(U_FAILURE(status)) {
      dataerrln("Couldn't read the test file!");
      return;
    }

    UVersionInfo uniVersion;
    static const UVersionInfo v62 = { 6, 2, 0, 0 };
    u_getUnicodeVersion(uniVersion);
    UBool isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0;

    LocalPointer<Collator> coll(Collator::createInstance(Locale::getRoot(), status));
    if(U_FAILURE(status)) {
        errcheckln(status, "Couldn't open UCA collator");
        return;
    }
    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
    coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
    coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
    coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status);
    coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status);

    int32_t spawnResult = 0;
    LocalArray<CollatorThreadTest> tests(new CollatorThreadTest[kCollatorThreadThreads]);

    logln(UnicodeString("Spawning: ") + kCollatorThreadThreads + " threads * " + kFormatThreadIterations + " iterations each.");
    int32_t j = 0;
    for(j = 0; j < kCollatorThreadThreads; j++) {
        //logln("Setting collator %i", j);
        tests[j].setCollator(coll.getAlias(), lines.getAlias(), lineNum, isAtLeastUCA62);
    }
    for(j = 0; j < kCollatorThreadThreads; j++) {
        log("%i ", j);
        spawnResult = tests[j].start();
        if(spawnResult != 0) {
            errln("%s:%d THREAD INFO: thread %d failed to start with status %d", __FILE__, __LINE__, j, spawnResult);
            return;
        }
    }
    logln("Spawned all");

    for(int32_t i=0;i<kCollatorThreadThreads;i++) {
        tests[i].join();
        //logln(UnicodeString("Test #") + i + " is complete.. ");
    }
}