コード例 #1
0
void IntlTestSpoof::testSpoofAPI() {

    TEST_SETUP
        UnicodeString s("xyz");  // Many latin ranges are whole-script confusable with other scripts.
                                 // If this test starts failing, consult confusablesWholeScript.txt
        int32_t position = 666;
        int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);
        TEST_ASSERT_EQ(666, position);
    TEST_TEARDOWN;
    
    TEST_SETUP
        UnicodeString s1("cxs");
        UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape();  // Cyrillic "cxs"
        int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);

    TEST_TEARDOWN;

    TEST_SETUP
        UnicodeString s("I1l0O");
        UnicodeString dest;
        UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT(UnicodeString("lllOO") == dest);
        TEST_ASSERT(&dest == &retStr);
    TEST_TEARDOWN;
}
コード例 #2
0
void IntlTestSpoof::testSpoofAPI() {

    TEST_SETUP
    UnicodeString s("uvw");
    int32_t position = 666;
    int32_t checkResults = uspoof_checkUnicodeString(sc, s, &position, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);
    TEST_ASSERT_EQ(666, position);
    TEST_TEARDOWN;

    TEST_SETUP
    UnicodeString s1("cxs");
    UnicodeString s2 = UnicodeString("\\u0441\\u0445\\u0455").unescape();  // Cyrillic "cxs"
    int32_t checkResults = uspoof_areConfusableUnicodeString(sc, s1, s2, &status);
    TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);

    TEST_TEARDOWN;

    TEST_SETUP
    UnicodeString s("I1l0O");
    UnicodeString dest;
    UnicodeString &retStr = uspoof_getSkeletonUnicodeString(sc, USPOOF_ANY_CASE, s, dest, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT(UnicodeString("11100") == dest);
    TEST_ASSERT(&dest == &retStr);
    TEST_TEARDOWN;
}
コード例 #3
0
void IntlTestSpoof::testBug8654() {
    TEST_SETUP
        UnicodeString s = UnicodeString("B\\u00c1\\u0301").unescape();
        int32_t position = -42;
        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s, &position, &status) & USPOOF_INVISIBLE );
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(3, position);
    TEST_TEARDOWN;
}
コード例 #4
0
ファイル: corelib.c プロジェクト: 8l/awl
void test_corelib_do(void) {
    awlenv* e = setup_test();

    TEST_ASSERT_TYPE(e, "(do x)", AWLVAL_ERR);
    TEST_ASSERT_TYPE(e, "(do (/ 1 0))", AWLVAL_ERR);

    TEST_ASSERT_EQ(e, "(do (let ((x 5) (y 6)) (+ x y)))", "11");
    TEST_ASSERT_TYPE(e, "x", AWLVAL_ERR);
    TEST_ASSERT_TYPE(e, "y", AWLVAL_ERR);

    TEST_ASSERT_EQ(e, "(do 1 2 3 4 5)", "5");
    TEST_ASSERT_EQ(e, "(do (define x 5) (define y 6) (+ x y))", "11");
    TEST_ASSERT_EQ(e, "x", "5");
    TEST_ASSERT_EQ(e, "y", "6");

    teardown_test(e);
}
コード例 #5
0
void IntlTestSpoof::testAreConfusable() {
    TEST_SETUP
        UnicodeString s1("A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
                         "A long string that will overflow stack buffers.  A long string that will overflow stack buffers. ");
        UnicodeString s2("A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. "
                         "A long string that wi11 overflow stack buffers.  A long string that will overflow stack buffers. ");
        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, uspoof_areConfusableUnicodeString(sc, s1, s2, &status));
        TEST_ASSERT_SUCCESS(status);

    TEST_TEARDOWN;
}
コード例 #6
0
ファイル: spooftest.c プロジェクト: icu-project/icu4c
// Test open from source rules.
// Run this in isolation to verify initialization.
static void TestOpenFromSource() {
    // No TEST_SETUP because that calls uspoof_open().
    UErrorCode status = U_ZERO_ERROR;
    const char *dataSrcDir;
    char       *fileName;
    char       *confusables;
    int         confusablesLength = 0;
    char       *confusablesWholeScript;
    int         confusablesWholeScriptLength = 0;
    FILE       *f;
    UParseError pe;
    int32_t     errType;
    int32_t     checkResults;
    USpoofChecker *rsc;

    dataSrcDir = ctest_dataSrcDir();
    fileName = malloc(strlen(dataSrcDir) + 100);
    strcpy(fileName, dataSrcDir);
    strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");
    f = fopen(fileName, "rb");
    TEST_ASSERT_NE(f, NULL);
    confusables = malloc(3000000);
    if (f != NULL) {
        confusablesLength = fread(confusables, 1, 3000000, f);
        fclose(f);
    }

    strcpy(fileName, dataSrcDir);
    strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");
    f = fopen(fileName, "rb");
    TEST_ASSERT_NE(f, NULL);
    confusablesWholeScript = malloc(1000000);
    if (f != NULL) {
        confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);
        fclose(f);
    }

    rsc = uspoof_openFromSource(confusables, confusablesLength,
                                confusablesWholeScript, confusablesWholeScriptLength,
                                &errType, &pe, &status);
    TEST_ASSERT_SUCCESS(status);

    // Ticket #11860: uspoof_openFromSource() did not initialize for use.
    // Verify that the spoof checker does not crash.
    checkResults = uspoof_check(rsc, goodLatin, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    free(confusablesWholeScript);
    free(confusables);
    free(fileName);
    uspoof_close(rsc);
    /*  printf("ParseError Line is %d\n", pe.line);  */
}
コード例 #7
0
void IntlTestSpoof::testInvisible() {
    TEST_SETUP
        UnicodeString  s = UnicodeString("abcd\\u0301ef").unescape();
        int32_t position = -42;
        TEST_ASSERT_EQ(0, uspoof_checkUnicodeString(sc, s, &position, &status));
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT(position == -42);

        UnicodeString  s2 = UnicodeString("abcd\\u0301\\u0302\\u0301ef").unescape();
        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s2, &position, &status));
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(7, position);

        // Two acute accents, one from the composed a with acute accent, \u00e1,
        // and one separate.
        position = -42;
        UnicodeString  s3 = UnicodeString("abcd\\u00e1\\u0301xyz").unescape();
        TEST_ASSERT_EQ(USPOOF_INVISIBLE, uspoof_checkUnicodeString(sc, s3, &position, &status));
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(7, position);
    TEST_TEARDOWN;
}
コード例 #8
0
static void TestUSpoofCAPI(void) {

    /*
     *  basic uspoof_open().
     */
    {
        USpoofChecker *sc;
        UErrorCode  status = U_ZERO_ERROR;
        sc = uspoof_open(&status);
        TEST_ASSERT_SUCCESS(status);
        if (U_FAILURE(status)) {
            /* If things are so broken that we can't even open a default spoof checker,  */
            /*   don't even try the rest of the tests.  They would all fail.             */
            return;
        }
        uspoof_close(sc);
    }

    
        
    /*
     *  Test Open from source rules.
    */
    TEST_SETUP
    const char *dataSrcDir;
    char       *fileName;
    char       *confusables;
    int         confusablesLength;
    char       *confusablesWholeScript;
    int         confusablesWholeScriptLength;
    FILE       *f;
    UParseError pe;
    int32_t     errType;
    USpoofChecker *rsc;
    
    dataSrcDir = ctest_dataSrcDir();
    fileName = malloc(strlen(dataSrcDir) + 100);
    strcpy(fileName, dataSrcDir);
    strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusables.txt");
    f = fopen(fileName, "r");
    TEST_ASSERT_NE(f, NULL);
    confusables = malloc(3000000);
    confusablesLength = fread(confusables, 1, 3000000, f);
    fclose(f);

    
    strcpy(fileName, dataSrcDir);
    strcat(fileName, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "confusablesWholeScript.txt");
    f = fopen(fileName, "r");
    TEST_ASSERT_NE(f, NULL);
    confusablesWholeScript = malloc(1000000);
    confusablesWholeScriptLength = fread(confusablesWholeScript, 1, 1000000, f);
    fclose(f);

    rsc = uspoof_openFromSource(confusables, confusablesLength,
                                              confusablesWholeScript, confusablesWholeScriptLength,
                                              &errType, &pe, &status);
    TEST_ASSERT_SUCCESS(status);

    free(confusablesWholeScript);
    free(confusables);
    free(fileName);
    uspoof_close(rsc);
    /*  printf("ParseError Line is %d\n", pe.line);  */
    TEST_TEARDOWN;


    /*
     * openFromSerialized and serialize
    */
    TEST_SETUP
        int32_t        serializedSize = 0;
        int32_t        actualLength = 0;
        char           *buf;
        USpoofChecker  *sc2;
        int32_t         checkResults;

        
        serializedSize = uspoof_serialize(sc, NULL, 0, &status);
        TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR);
        TEST_ASSERT(serializedSize > 0);

        /* Serialize the default spoof checker */
        status = U_ZERO_ERROR;
        buf = (char *)malloc(serializedSize + 10);
        TEST_ASSERT(buf != NULL);
        buf[serializedSize] = 42;
        uspoof_serialize(sc, buf, serializedSize, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(42, buf[serializedSize]);

        /* Create a new spoof checker from the freshly serialized data */
        sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_NE(NULL, sc2);
        TEST_ASSERT_EQ(serializedSize, actualLength);

        /* Verify that the new spoof checker at least wiggles */
        checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);

        checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

        uspoof_close(sc2);
        free(buf);
    TEST_TEARDOWN;
        
        
        
    /*
     * Set & Get Check Flags
    */
    TEST_SETUP
        int32_t t;
        uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status);
        TEST_ASSERT_SUCCESS(status);
        t = uspoof_getChecks(sc, &status);
        TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS);
    
        uspoof_setChecks(sc, 0, &status);
        TEST_ASSERT_SUCCESS(status);
        t = uspoof_getChecks(sc, &status);
        TEST_ASSERT_EQ(0, t);
        
        uspoof_setChecks(sc,
                        USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
                        &status);
        TEST_ASSERT_SUCCESS(status);
        t = uspoof_getChecks(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t);
    TEST_TEARDOWN;

    /*
    * get & setAllowedChars
    */
    TEST_SETUP
        USet *us;
        const USet *uset;

        uset = uspoof_getAllowedChars(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT(uset_isFrozen(uset));
        us = uset_open((UChar32)0x41, (UChar32)0x5A);   /*  [A-Z]  */
        uspoof_setAllowedChars(sc, us, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status));
        TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status)));
        TEST_ASSERT_SUCCESS(status);
        uset_close(us);
    TEST_TEARDOWN;

    /*
    *  clone()
    */

    TEST_SETUP
        USpoofChecker *clone1 = NULL;
        USpoofChecker *clone2 = NULL;
        int32_t        checkResults = 0;
        
        clone1 = uspoof_clone(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_NE(clone1, sc);

        clone2 = uspoof_clone(clone1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_NE(clone2, clone1);

        uspoof_close(clone1);
        
        /* Verify that the cloned spoof checker is alive */
        checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);

        checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
        uspoof_close(clone2);
    TEST_TEARDOWN;

    /*
     *  get & set Checks
    */
    TEST_SETUP
        int32_t   checks;
        int32_t   checks2;
        int32_t   checkResults;

        checks = uspoof_getChecks(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks);

        checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE);
        uspoof_setChecks(sc, checks, &status);
        TEST_ASSERT_SUCCESS(status);
        checks2 = uspoof_getChecks(sc, &status);
        TEST_ASSERT_EQ(checks, checks2);

        /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
            So with those tests gone checking that Identifier should now succeed */
        checkResults = uspoof_check(sc, scMixed, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);
    TEST_TEARDOWN;
        
    /*
     *  AllowedLoacles
     */

    TEST_SETUP
        const char  *allowedLocales;
        int32_t  checkResults;

        /* Default allowed locales list should be empty */
        allowedLocales = uspoof_getAllowedLocales(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT(strcmp("", allowedLocales) == 0)

        /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
        uspoof_setAllowedLocales(sc, "en, ru_RU", &status);
        TEST_ASSERT_SUCCESS(status);
        allowedLocales = uspoof_getAllowedLocales(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT(strstr(allowedLocales, "en") != NULL);
        TEST_ASSERT(strstr(allowedLocales, "ru") != NULL);

        /* Limit checks to USPOOF_CHAR_LIMIT.  Some of the test data has whole script confusables also,
         * which we don't want to see in this test. */
        uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status);
        TEST_ASSERT_SUCCESS(status);

        checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);
        
        checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);

        checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);

        /* Reset with an empty locale list, which should allow all characters to pass */
        uspoof_setAllowedLocales(sc, " ", &status);
        TEST_ASSERT_SUCCESS(status);

        checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);
    TEST_TEARDOWN;

    /*
     * AllowedChars   set/get the USet of allowed characters.
     */
    TEST_SETUP
        const USet  *set;
        USet        *tmpSet;
        int32_t      checkResults;
        
        /* By default, we should see no restriction; the USet should allow all characters. */
        set = uspoof_getAllowedChars(sc, &status);
        TEST_ASSERT_SUCCESS(status);
        tmpSet = uset_open(0, 0x10ffff);
        TEST_ASSERT(uset_equals(tmpSet, set));

        /* Setting the allowed chars should enable the check. */
        uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status);
        TEST_ASSERT_SUCCESS(status);

        /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
        uset_remove(tmpSet, goodLatin[1]);
        uspoof_setAllowedChars(sc, tmpSet, &status);
        TEST_ASSERT_SUCCESS(status);
        uset_close(tmpSet);

        /* Latin Identifier should now fail; other non-latin test cases should still be OK */
        checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);

        checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
    TEST_TEARDOWN;

    /*
     * check UTF-8
     */
    TEST_SETUP
        char    utf8buf[200];
        int32_t checkResults;
        int32_t position;

        u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        position = 666;
        checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);
        TEST_ASSERT_EQ(666, position);

        u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);

        u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        position = 666;
        checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults);
        TEST_ASSERT_EQ(2, position);

    TEST_TEARDOWN;

    /*
     * uspoof_areConfusable()
     */
    TEST_SETUP
        int32_t  checkResults;
        
        checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

        checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);

        checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);

    TEST_TEARDOWN;

    /*
     * areConfusableUTF8
     */
    TEST_SETUP
        int32_t checkResults;
        char s1[200];
        char s2[200];


        u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);
        u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

        u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);
        u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, checkResults);
        
        u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);
        u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);

    TEST_TEARDOWN;


  /*
   * getSkeleton
   */

    TEST_SETUP
        UChar dest[100];
        int32_t   skelLength;

        skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, sizeof(dest)/sizeof(UChar), &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));
        TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);

        skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, dest, sizeof(dest)/sizeof(UChar), &status);
        TEST_ASSERT_SUCCESS(status);

        skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status);
        TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);
        TEST_ASSERT_EQ(3, skelLength);
        status = U_ZERO_ERROR;

    TEST_TEARDOWN;
}
コード例 #9
0
//  testConfData - Check each data item from the Unicode confusables.txt file,
//                 verify that it transforms correctly in a skeleton.
//
void IntlTestSpoof::testConfData() {
    UErrorCode status = U_ZERO_ERROR;

    const char *testDataDir = IntlTest::getSourceTestData(status);
    TEST_ASSERT_SUCCESS(status);
    char buffer[2000];
    uprv_strcpy(buffer, testDataDir);
    uprv_strcat(buffer, "confusables.txt");

    LocalStdioFilePointer f(fopen(buffer, "rb"));
    if (f.isNull()) {
        errln("Skipping test spoof/testConfData.  File confusables.txt not accessible.");
        return;
    }
    fseek(f.getAlias(), 0, SEEK_END);
    int32_t  fileSize = ftell(f.getAlias());
    LocalArray<char> fileBuf(new char[fileSize]);
    fseek(f.getAlias(), 0, SEEK_SET);
    int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias());
    TEST_ASSERT_EQ(amt_read, fileSize);
    TEST_ASSERT(fileSize>0);
    if (amt_read != fileSize || fileSize <=0) {
        return;
    }
    UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.getAlias(), fileSize));

    LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    TEST_ASSERT_SUCCESS(status);

    // Parse lines from the confusables.txt file.  Example Line:
    // FF44 ;	0064 ;	SL	# ( d -> d ) FULLWIDTH ....
    // Three fields.  The hex fields can contain more than one character,
    //                and each character may be more than 4 digits (for supplemntals)
    // This regular expression matches lines and splits the fields into capture groups.
    RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt, 0, status);
    TEST_ASSERT_SUCCESS(status);
    while (parseLine.find()) {
        UnicodeString from = parseHex(parseLine.group(1, status));
        if (!Normalizer::isNormalized(from, UNORM_NFD, status)) {
            // The source character was not NFD.
            // Skip this case; the first step in obtaining a skeleton is to NFD the input,
            //  so the mapping in this line of confusables.txt will never be applied.
            continue;
        }

        UnicodeString rawExpected = parseHex(parseLine.group(2, status));
        UnicodeString expected;
        Normalizer::decompose(rawExpected, FALSE /*NFD*/, 0, expected, status);
        TEST_ASSERT_SUCCESS(status);

        int32_t skeletonType = 0;
        UnicodeString tableType = parseLine.group(3, status);
        TEST_ASSERT_SUCCESS(status);
        if (tableType.indexOf("SL") >= 0) {
            skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
        } else if (tableType.indexOf("SA") >= 0) {
            skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
        } else if (tableType.indexOf("ML") >= 0) {
            skeletonType = 0;
        } else if (tableType.indexOf("MA") >= 0) {
            skeletonType = USPOOF_ANY_CASE;
        }

        UnicodeString actual;
        uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actual, &status);
        TEST_ASSERT_SUCCESS(status);
        TEST_ASSERT(actual == expected);
        if (actual != expected) {
            errln(parseLine.group(0, status));
            UnicodeString line = "Actual: ";
            int i = 0;
            while (i < actual.length()) {
                appendHexUChar(line, actual.char32At(i));
                i = actual.moveIndex32(i, 1);
            }
            errln(line);
        }
        if (U_FAILURE(status)) {
            break;
        }
    }
}
コード例 #10
0
ファイル: spooftest.c プロジェクト: icu-project/icu4c
/*
 *   Spoof Detection C API Tests
 */
static void TestUSpoofCAPI(void) {

    /*
     *  basic uspoof_open().
     */
    {
        USpoofChecker *sc;
        UErrorCode  status = U_ZERO_ERROR;
        sc = uspoof_open(&status);
        TEST_ASSERT_SUCCESS(status);
        if (U_FAILURE(status)) {
            /* If things are so broken that we can't even open a default spoof checker,  */
            /*   don't even try the rest of the tests.  They would all fail.             */
            return;
        }
        uspoof_close(sc);
    }

    /*
     * openFromSerialized and serialize
    */
    TEST_SETUP
    int32_t        serializedSize = 0;
    int32_t        actualLength = 0;
    char           *buf;
    USpoofChecker  *sc2;
    int32_t         checkResults;


    serializedSize = uspoof_serialize(sc, NULL, 0, &status);
    TEST_ASSERT_EQ(status, U_BUFFER_OVERFLOW_ERROR);
    TEST_ASSERT(serializedSize > 0);

    /* Serialize the default spoof checker */
    status = U_ZERO_ERROR;
    buf = (char *)malloc(serializedSize + 10);
    TEST_ASSERT(buf != NULL);
    buf[serializedSize] = 42;
    uspoof_serialize(sc, buf, serializedSize, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(42, buf[serializedSize]);

    /* Create a new spoof checker from the freshly serialized data */
    sc2 = uspoof_openFromSerialized(buf, serializedSize+10, &actualLength, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_NE(NULL, sc2);
    TEST_ASSERT_EQ(serializedSize, actualLength);

    /* Verify that the new spoof checker at least wiggles */
    checkResults = uspoof_check(sc2, goodLatin, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    checkResults = uspoof_check(sc2, scMixed, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

    uspoof_close(sc2);
    free(buf);
    TEST_TEARDOWN;



    /*
     * Set & Get Check Flags
    */
    TEST_SETUP
    int32_t t;
    uspoof_setChecks(sc, USPOOF_ALL_CHECKS, &status);
    TEST_ASSERT_SUCCESS(status);
    t = uspoof_getChecks(sc, &status);
    TEST_ASSERT_EQ(t, USPOOF_ALL_CHECKS);

    uspoof_setChecks(sc, 0, &status);
    TEST_ASSERT_SUCCESS(status);
    t = uspoof_getChecks(sc, &status);
    TEST_ASSERT_EQ(0, t);

    uspoof_setChecks(sc,
                     USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
                     &status);
    TEST_ASSERT_SUCCESS(status);
    t = uspoof_getChecks(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE, t);
    TEST_TEARDOWN;

    /*
    * get & setAllowedChars
    */
    TEST_SETUP
    USet *us;
    const USet *uset;

    uset = uspoof_getAllowedChars(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT(uset_isFrozen(uset));
    us = uset_open((UChar32)0x41, (UChar32)0x5A);   /*  [A-Z]  */
    uspoof_setAllowedChars(sc, us, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_NE(us, uspoof_getAllowedChars(sc, &status));
    TEST_ASSERT(uset_equals(us, uspoof_getAllowedChars(sc, &status)));
    TEST_ASSERT_SUCCESS(status);
    uset_close(us);
    TEST_TEARDOWN;

    /*
    *  clone()
    */

    TEST_SETUP
    USpoofChecker *clone1 = NULL;
    USpoofChecker *clone2 = NULL;
    int32_t        checkResults = 0;

    clone1 = uspoof_clone(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_NE(clone1, sc);

    clone2 = uspoof_clone(clone1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_NE(clone2, clone1);

    uspoof_close(clone1);

    /* Verify that the cloned spoof checker is alive */
    checkResults = uspoof_check(clone2, goodLatin, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    checkResults = uspoof_check(clone2, scMixed, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
    uspoof_close(clone2);
    TEST_TEARDOWN;

    /*
    *  basic uspoof_check()
    */
    TEST_SETUP
    int32_t result;
    result = uspoof_check(sc, goodLatin, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, result);

    result = uspoof_check(sc, han_Hiragana, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, result);

    result = uspoof_check(sc, scMixed, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE, result);
    TEST_TEARDOWN


    /*
     *  get & set Checks
    */
    TEST_SETUP
    int32_t   checks;
    int32_t   checks2;
    int32_t   checkResults;

    checks = uspoof_getChecks(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_ALL_CHECKS, checks);

    checks &= ~(USPOOF_SINGLE_SCRIPT | USPOOF_MIXED_SCRIPT_CONFUSABLE);
    uspoof_setChecks(sc, checks, &status);
    TEST_ASSERT_SUCCESS(status);
    checks2 = uspoof_getChecks(sc, &status);
    TEST_ASSERT_EQ(checks, checks2);

    /* The checks that were disabled just above are the same ones that the "scMixed" test fails.
        So with those tests gone checking that Identifier should now succeed */
    checkResults = uspoof_check(sc, scMixed, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);
    TEST_TEARDOWN;

    /*
     *  AllowedLoacles
     */

    TEST_SETUP
    const char  *allowedLocales;
    int32_t  checkResults;

    /* Default allowed locales list should be empty */
    allowedLocales = uspoof_getAllowedLocales(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT(strcmp("", allowedLocales) == 0)

    /* Allow en and ru, which should enable Latin and Cyrillic only to pass */
    uspoof_setAllowedLocales(sc, "en, ru_RU", &status);
    TEST_ASSERT_SUCCESS(status);
    allowedLocales = uspoof_getAllowedLocales(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT(strstr(allowedLocales, "en") != NULL);
    TEST_ASSERT(strstr(allowedLocales, "ru") != NULL);

    /* Limit checks to USPOOF_CHAR_LIMIT.  Some of the test data has whole script confusables also,
     * which we don't want to see in this test. */
    uspoof_setChecks(sc, USPOOF_CHAR_LIMIT, &status);
    TEST_ASSERT_SUCCESS(status);

    checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT, checkResults);

    checkResults = uspoof_check(sc, goodCyrl, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    /* Reset with an empty locale list, which should allow all characters to pass */
    uspoof_setAllowedLocales(sc, " ", &status);
    TEST_ASSERT_SUCCESS(status);

    checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);
    TEST_TEARDOWN;

    /*
     * AllowedChars   set/get the USet of allowed characters.
     */
    TEST_SETUP
    const USet  *set;
    USet        *tmpSet;
    int32_t      checkResults;

    /* By default, we should see no restriction; the USet should allow all characters. */
    set = uspoof_getAllowedChars(sc, &status);
    TEST_ASSERT_SUCCESS(status);
    tmpSet = uset_open(0, 0x10ffff);
    TEST_ASSERT(uset_equals(tmpSet, set));

    /* Setting the allowed chars should enable the check. */
    uspoof_setChecks(sc, USPOOF_ALL_CHECKS & ~USPOOF_CHAR_LIMIT, &status);
    TEST_ASSERT_SUCCESS(status);

    /* Remove a character that is in our good Latin test identifier from the allowed chars set. */
    uset_remove(tmpSet, goodLatin[1]);
    uspoof_setAllowedChars(sc, tmpSet, &status);
    TEST_ASSERT_SUCCESS(status);
    uset_close(tmpSet);

    /* Latin Identifier should now fail; other non-latin test cases should still be OK
     *  Note: fail of CHAR_LIMIT also causes the restriction level to be USPOOF_UNRESTRICTIVE
     *        which will give us a USPOOF_RESTRICTION_LEVEL failure.
     */
    checkResults = uspoof_check(sc, goodLatin, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_CHAR_LIMIT | USPOOF_RESTRICTION_LEVEL, checkResults);

    checkResults = uspoof_check(sc, goodGreek, -1, NULL, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_WHOLE_SCRIPT_CONFUSABLE, checkResults);
    TEST_TEARDOWN;

    /*
     * check UTF-8
     */
    TEST_SETUP
    char    utf8buf[200];
    int32_t checkResults;
    int32_t position;

    u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodLatin, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    position = 666;
    checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);
    TEST_ASSERT_EQ(0, position);

    u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, goodCyrl, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    u_strToUTF8(utf8buf, sizeof(utf8buf), NULL, scMixed, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    position = 666;
    checkResults = uspoof_checkUTF8(sc, utf8buf, -1, &position, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_SINGLE_SCRIPT , checkResults);
    TEST_ASSERT_EQ(0, position);

    TEST_TEARDOWN;

    /*
     * uspoof_areConfusable()
     */
    TEST_SETUP
    int32_t  checkResults;

    checkResults = uspoof_areConfusable(sc, scLatin, -1, scMixed, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

    checkResults = uspoof_areConfusable(sc, goodGreek, -1, scLatin, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    checkResults = uspoof_areConfusable(sc, lll_Latin_a, -1, lll_Latin_b, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);

    TEST_TEARDOWN;

    /*
     * areConfusableUTF8
     */
    TEST_SETUP
    int32_t checkResults;
    char s1[200];
    char s2[200];


    u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);
    u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);

    u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);
    u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, checkResults);

    u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);
    u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    checkResults = uspoof_areConfusableUTF8(sc, s1, -1, s2, -1, &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);

    TEST_TEARDOWN;


    /*
     * getSkeleton
     */

    TEST_SETUP
    UChar dest[100];
    int32_t   skelLength;

    skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, dest, UPRV_LENGTHOF(dest), &status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));
    TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);

    skelLength = uspoof_getSkeletonUTF8(sc, USPOOF_ANY_CASE, goodLatinUTF8, -1, (char*)dest,
                                        UPRV_LENGTHOF(dest), &status);
    TEST_ASSERT_SUCCESS(status);

    skelLength = uspoof_getSkeleton(sc, USPOOF_ANY_CASE, lll_Latin_a, -1, NULL, 0, &status);
    TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);
    TEST_ASSERT_EQ(3, skelLength);
    status = U_ZERO_ERROR;

    TEST_TEARDOWN;

    /*
     * get Inclusion and Recommended sets
     */
    TEST_SETUP
    const USet *inclusions = NULL;
    const USet *recommended = NULL;

    inclusions = uspoof_getInclusionSet(&status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(TRUE, uset_isFrozen(inclusions));

    status = U_ZERO_ERROR;
    recommended = uspoof_getRecommendedSet(&status);
    TEST_ASSERT_SUCCESS(status);
    TEST_ASSERT_EQ(TRUE, uset_isFrozen(recommended));
    TEST_TEARDOWN;

}