Пример #1
0
U_CAPI void  U_EXPORT2
RegexPatternDump(const RegexPattern *This) {
    int      index;
    int      i;

    REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
    UChar32 c = utext_next32From(This->fPattern, 0);
    while (c != U_SENTINEL) {
        if (c<32 || c>256) {
            c = '.';
        }
        REGEX_DUMP_DEBUG_PRINTF(("%c", c));
        
        c = UTEXT_NEXT32(This->fPattern);
    }
    REGEX_DUMP_DEBUG_PRINTF(("\n"));
    REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
    REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
    if (This->fStartType == START_STRING) {
        REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
        for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
            REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
        }
        REGEX_DUMP_DEBUG_PRINTF(("\"\n"));

    } else if (This->fStartType == START_SET) {
        int32_t numSetChars = This->fInitialChars->size();
        if (numSetChars > 20) {
            numSetChars = 20;
        }
        REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
        for (i=0; i<numSetChars; i++) {
            UChar32 c = This->fInitialChars->charAt(i);
            if (0x20<c && c <0x7e) {
                REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
            } else {
                REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
            }
        }
        if (numSetChars < This->fInitialChars->size()) {
            REGEX_DUMP_DEBUG_PRINTF((" ..."));
        }
        REGEX_DUMP_DEBUG_PRINTF(("\n"));

    } else if (This->fStartType == START_CHAR) {
        REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
        if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
                REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
            } else {
                REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
            }
    }

    REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
           "-------------------------------------------\n"));
    for (index = 0; index<This->fCompiledPat->size(); index++) {
        This->dumpOp(index);
    }
    REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
}
Пример #2
0
void RegexPattern::dumpPattern() const {
#if defined(REGEX_DEBUG)
    int      index;

    UnicodeString patStr;
    for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
        patStr.append(c);
    }
    printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
    printf("   Min Match Length:  %d\n", fMinMatchLen);
    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
    if (fStartType == START_STRING) {
        UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
        printf("   Initial match string: \"%s\"\n", CStr(initialString)());
    } else if (fStartType == START_SET) {
        UnicodeString s;
        fInitialChars->toPattern(s, TRUE);
        printf("    Match First Chars: %s\n", CStr(s)());

    } else if (fStartType == START_CHAR) {
        printf("    First char of Match: ");
        if (fInitialChar > 0x20) {
                printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
            } else {
                printf("%#x\n", fInitialChar);
            }
    }

    printf("Named Capture Groups:\n");
    if (uhash_count(fNamedCaptureMap) == 0) {
        printf("   None\n");
    } else {
        int32_t pos = UHASH_FIRST;
        const UHashElement *el = NULL;
        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
            int32_t number = el->value.integer;
            printf("   %d\t%s\n", number, CStr(*name)());
        }
    }

    printf("\nIndex   Binary     Type             Operand\n" \
           "-------------------------------------------\n");
    for (index = 0; index<fCompiledPat->size(); index++) {
        dumpOp(index);
    }
    printf("\n\n");
#endif
}
Пример #3
0
void RegexPattern::dumpPattern() const {
#if defined(REGEX_DEBUG)
    // TODO: This function assumes an ASCII based charset.
    int      index;
    int      i;

    printf("Original Pattern:  ");
    UChar32 c = utext_next32From(fPattern, 0);
    while (c != U_SENTINEL) {
        if (c<32 || c>256) {
            c = '.';
        }
        printf("%c", c);

        c = UTEXT_NEXT32(fPattern);
    }
    printf("\n");
    printf("   Min Match Length:  %d\n", fMinMatchLen);
    printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
    if (fStartType == START_STRING) {
        printf("    Initial match string: \"");
        for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
            printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
        }
        printf("\"\n");

    } else if (fStartType == START_SET) {
        int32_t numSetChars = fInitialChars->size();
        if (numSetChars > 20) {
            numSetChars = 20;
        }
        printf("     Match First Chars : ");
        for (i=0; i<numSetChars; i++) {
            UChar32 c = fInitialChars->charAt(i);
            if (0x20<c && c <0x7e) {
                printf("%c ", c);
            } else {
                printf("%#x ", c);
            }
        }
        if (numSetChars < fInitialChars->size()) {
            printf(" ...");
        }
        printf("\n");

    } else if (fStartType == START_CHAR) {
        printf("    First char of Match : ");
        if (0x20 < fInitialChar && fInitialChar<0x7e) {
                printf("%c\n", fInitialChar);
            } else {
                printf("%#x\n", fInitialChar);
            }
    }

    printf("Named Capture Groups:\n");
    if (uhash_count(fNamedCaptureMap) == 0) {
        printf("   None\n");
    } else {
        int32_t pos = UHASH_FIRST;
        const UHashElement *el = NULL;
        while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
            const UnicodeString *name = (const UnicodeString *)el->key.pointer;
            char s[100];
            name->extract(0, 99, s, sizeof(s), US_INV);  // capture group names are invariant.
            int32_t number = el->value.integer;
            printf("   %d\t%s\n", number, s);
        }
    }

    printf("\nIndex   Binary     Type             Operand\n" \
           "-------------------------------------------\n");
    for (index = 0; index<fCompiledPat->size(); index++) {
        dumpOp(index);
    }
    printf("\n\n");
#endif
}
Пример #4
0
static void TestAPI(void) {
    UErrorCode      status = U_ZERO_ERROR;
    UBool           gFailed = FALSE;
    (void)gFailed;   /* Suppress set but not used warning. */

    /* Open    */
    {
        UText           utLoc = UTEXT_INITIALIZER;
        const char *    cString = "\x61\x62\x63\x64";
        UChar           uString[]  = {0x41, 0x42, 0x43, 0};
        UText          *uta;
        UText          *utb;
        UChar           c;

        uta = utext_openUChars(NULL, uString, -1, &status);
        TEST_SUCCESS(status);
        c = utext_next32(uta);
        TEST_ASSERT(c == 0x41);
        utb = utext_close(uta); 
        TEST_ASSERT(utb == NULL);

        uta = utext_openUTF8(&utLoc, cString, -1, &status);
        TEST_SUCCESS(status);
        TEST_ASSERT(uta == &utLoc);

        uta = utext_close(&utLoc);
        TEST_ASSERT(uta == &utLoc);
    }

    /* utext_clone()  */
    {
        UChar   uString[]  = {0x41, 0x42, 0x43, 0};
        int64_t len;
        UText   *uta;
        UText   *utb;

        status = U_ZERO_ERROR;
        uta = utext_openUChars(NULL, uString, -1, &status);
        TEST_SUCCESS(status);
        utb = utext_clone(NULL, uta, FALSE, FALSE, &status);
        TEST_SUCCESS(status);
        TEST_ASSERT(utb != NULL);
        TEST_ASSERT(utb != uta);
        len = utext_nativeLength(uta);
        TEST_ASSERT(len == u_strlen(uString));
        utext_close(uta);
        utext_close(utb);
    }

    /* basic access functions  */
    {
        UChar     uString[]  = {0x41, 0x42, 0x43, 0};
        UText     *uta;
        UChar32   c;
        int64_t   len;
        UBool     b;
        int64_t   i;

        status = U_ZERO_ERROR;
        uta = utext_openUChars(NULL, uString, -1, &status);
        TEST_ASSERT(uta!=NULL);
        TEST_SUCCESS(status);
        b = utext_isLengthExpensive(uta);
        TEST_ASSERT(b==TRUE);
        len = utext_nativeLength(uta);
        TEST_ASSERT(len == u_strlen(uString));
        b = utext_isLengthExpensive(uta);
        TEST_ASSERT(b==FALSE);

        c = utext_char32At(uta, 0);
        TEST_ASSERT(c==uString[0]);
        
        c = utext_current32(uta);
        TEST_ASSERT(c==uString[0]);

        c = utext_next32(uta);
        TEST_ASSERT(c==uString[0]);
        c = utext_current32(uta);
        TEST_ASSERT(c==uString[1]);

        c = utext_previous32(uta);
        TEST_ASSERT(c==uString[0]);
        c = utext_current32(uta);
        TEST_ASSERT(c==uString[0]);

        c = utext_next32From(uta, 1);
        TEST_ASSERT(c==uString[1]);
        c = utext_next32From(uta, u_strlen(uString));
        TEST_ASSERT(c==U_SENTINEL);

        c = utext_previous32From(uta, 2);
        TEST_ASSERT(c==uString[1]);
        i = utext_getNativeIndex(uta);
        TEST_ASSERT(i == 1);

        utext_setNativeIndex(uta, 0);
        b = utext_moveIndex32(uta, 1);
        TEST_ASSERT(b==TRUE);
        i = utext_getNativeIndex(uta);
        TEST_ASSERT(i==1);

        b = utext_moveIndex32(uta, u_strlen(uString)-1);
        TEST_ASSERT(b==TRUE);
        i = utext_getNativeIndex(uta);
        TEST_ASSERT(i==u_strlen(uString));

        b = utext_moveIndex32(uta, 1);
        TEST_ASSERT(b==FALSE);
        i = utext_getNativeIndex(uta);
        TEST_ASSERT(i==u_strlen(uString));

        utext_setNativeIndex(uta, 0);
        c = UTEXT_NEXT32(uta);
        TEST_ASSERT(c==uString[0]);
        c = utext_current32(uta);
        TEST_ASSERT(c==uString[1]);

        c = UTEXT_PREVIOUS32(uta);
        TEST_ASSERT(c==uString[0]);
        c = UTEXT_PREVIOUS32(uta);
        TEST_ASSERT(c==U_SENTINEL);


        utext_close(uta);
    }

    {
        /*
         * UText opened on a NULL string with zero length
         */
        UText    *uta;
        UChar32   c;

        status = U_ZERO_ERROR;
        uta = utext_openUChars(NULL, NULL, 0, &status);
        TEST_SUCCESS(status);
        c = UTEXT_NEXT32(uta);
        TEST_ASSERT(c == U_SENTINEL);
        utext_close(uta);

        uta = utext_openUTF8(NULL, NULL, 0, &status);
        TEST_SUCCESS(status);
        c = UTEXT_NEXT32(uta);
        TEST_ASSERT(c == U_SENTINEL);
        utext_close(uta);
    }


    {
        /*
         * extract
         */
        UText     *uta;
        UChar     uString[]  = {0x41, 0x42, 0x43, 0};
        UChar     buf[100];
        int32_t   i;
        /* Test pinning of input bounds */
        UChar     uString2[]  = {0x41, 0x42, 0x43, 0x44, 0x45,
                                 0x46, 0x47, 0x48, 0x49, 0x4A, 0};
        UChar *   uString2Ptr = uString2 + 5;

        status = U_ZERO_ERROR;
        uta = utext_openUChars(NULL, uString, -1, &status);
        TEST_SUCCESS(status);

        status = U_ZERO_ERROR;
        i = utext_extract(uta, 0, 100, NULL, 0, &status);
        TEST_ASSERT(status==U_BUFFER_OVERFLOW_ERROR);
        TEST_ASSERT(i == u_strlen(uString));

        status = U_ZERO_ERROR;
        memset(buf, 0, sizeof(buf));
        i = utext_extract(uta, 0, 100, buf, 100, &status);
        TEST_SUCCESS(status);
        TEST_ASSERT(i == u_strlen(uString));
        i = u_strcmp(uString, buf);
        TEST_ASSERT(i == 0);
        utext_close(uta);

        /* Test pinning of input bounds */
        status = U_ZERO_ERROR;
        uta = utext_openUChars(NULL, uString2Ptr, -1, &status);
        TEST_SUCCESS(status);

        status = U_ZERO_ERROR;
        memset(buf, 0, sizeof(buf));
        i = utext_extract(uta, -3, 20, buf, 100, &status);
        TEST_SUCCESS(status);
        TEST_ASSERT(i == u_strlen(uString2Ptr));
        i = u_strcmp(uString2Ptr, buf);
        TEST_ASSERT(i == 0);
        utext_close(uta);
    }

    {
        /*
         *  Copy, Replace, isWritable
         *    Can't create an editable UText from plain C, so all we
         *    can easily do is check that errors returned.
         */
        UText     *uta;
        UChar     uString[]  = {0x41, 0x42, 0x43, 0};
        UBool     b;

        status = U_ZERO_ERROR;
        uta = utext_openUChars(NULL, uString, -1, &status);
        TEST_SUCCESS(status);

        b = utext_isWritable(uta);
        TEST_ASSERT(b == FALSE);

        b = utext_hasMetaData(uta);
        TEST_ASSERT(b == FALSE);

        utext_replace(uta,
                      0, 1,     /* start, limit */
                      uString, -1,  /* replacement, replacement length */
                      &status);
        TEST_ASSERT(status == U_NO_WRITE_PERMISSION);


        utext_copy(uta,
                   0, 1,         /* start, limit      */
                   2,            /* destination index */
                   FALSE,        /* move flag         */
                   &status);
        TEST_ASSERT(status == U_NO_WRITE_PERMISSION);

        utext_close(uta);
    }


}