Пример #1
0
TEST(Pattern_RegexPattern, Match)
{
    RegexPattern const pattern { ".*adblock.*"_r };
    auto const& uri = "http://adblock.org"_u;

    EXPECT_TRUE(pattern.match(uri));
}
static jint Matcher_openImpl(JNIEnv* env, jclass, jint patternAddr) {
    RegexPattern* pattern = reinterpret_cast<RegexPattern*>(static_cast<uintptr_t>(patternAddr));
    UErrorCode status = U_ZERO_ERROR;
    RegexMatcher* result = pattern->matcher(status);
    maybeThrowIcuException(env, status);
    return static_cast<jint>(reinterpret_cast<uintptr_t>(result));
}
Пример #3
0
U_CFUNC int32_t 
getText(const UChar* source, int32_t srcLen,
        UChar** dest, int32_t destCapacity,
        UnicodeString patternString, 
        UErrorCode* status){
    
    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
    UnicodeString src (source,srcLen);
    
    if (U_FAILURE(*status)) {
        return 0;
    }
    pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    
    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
        matcher.reset(stringArray[i]);
        if(matcher.lookingAt(*status)){
            UnicodeString out = matcher.group(1, *status);

            return out.extract(*dest, destCapacity,*status);
        }
    }
    return 0;
}
Пример #4
0
TEST(Pattern_RegexPattern, MatchWithDifferentCase2)
{
    RegexPattern const pattern { ".*AdBlock.*"_r };
    auto const& uri = "http://adblock.org"_u;

    EXPECT_TRUE(pattern.match(uri));
}
Пример #5
0
U_CFUNC int32_t
getCount(const UChar* source, int32_t srcLen, 
         UParseCommentsOption option, UErrorCode *status){
    
    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
    UnicodeString src (source, srcLen);


    if (U_FAILURE(*status)) {
        return 0;
    }
    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    
    RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
    if (U_FAILURE(*status)) {
        return 0;
    } 
    int32_t count = 0;
    for(int32_t i=0; i<retLen; i++){
        matcher.reset(stringArray[i]);
        if(matcher.lookingAt(*status)){
            count++;
        }
    }
    if(option == UPC_TRANSLATE && count > 1){
        fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
        exit(U_UNSUPPORTED_ERROR);
    }
    return count;
}
Пример #6
0
TEST(Pattern_RegexPattern, CaseSensitiveMatch)
{
    RegexPattern const pattern { ".*AdBlock.*"_r };
    auto const& uri1 = "http://AdBlock.org"_u;
    auto const& uri2 = "http://adblock.org"_u;

    EXPECT_TRUE(pattern.match(uri1, true));
    EXPECT_TRUE(pattern.match(uri2, false));
}
Пример #7
0
//---------------------------------------------------------------------
//
//   matches        Convenience function to test for a match, starting
//                  with a pattern string and a data string.
//
//---------------------------------------------------------------------
UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
              const UnicodeString   &input,
                    UParseError     &pe,
                    UErrorCode      &status) {

    if (U_FAILURE(status)) {return FALSE;}

    UBool         retVal;
    RegexPattern *pat     = NULL;
    RegexMatcher *matcher = NULL;

    pat     = RegexPattern::compile(regex, 0, pe, status);
    matcher = pat->matcher(input, status);
    retVal  = matcher->matches(status);

    delete matcher;
    delete pat;
    return retVal;
}
Пример #8
0
//
//   matches, UText mode
//
UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
                    UText           *input,
                    UParseError     &pe,
                    UErrorCode      &status) {

    if (U_FAILURE(status)) {return FALSE;}

    UBool         retVal;
    RegexPattern *pat     = NULL;
    RegexMatcher *matcher = NULL;

    pat     = RegexPattern::compile(regex, 0, pe, status);
    matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
    retVal  = matcher->matches(status);

    delete matcher;
    delete pat;
    return retVal;
}
Пример #9
0
U_CFUNC int32_t 
getAt(const UChar* source, int32_t srcLen,
        UChar** dest, int32_t destCapacity,
        int32_t index,
        UParseCommentsOption option,
        UErrorCode* status){

    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    UnicodeString src (source, srcLen);


    if (U_FAILURE(*status)) {
        return 0;
    }
    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
    
    UnicodeString patternString(patternStrings[option]);
    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
    if (U_FAILURE(*status)) {
        return 0;
    } 
    int32_t count = 0;
    for(int32_t i=0; i<retLen; i++){
        matcher.reset(stringArray[i]);
        if(matcher.lookingAt(*status)){
            if(count == index){
                UnicodeString out = matcher.group(1, *status);
                return out.extract(*dest, destCapacity,*status);
            }
            count++;
            
        }
    }
    return 0;

}
Пример #10
0
//
//   matches, UText mode
//
UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
                    UText           *input,
                    UParseError     &pe,
                    UErrorCode      &status) {

    if (U_FAILURE(status)) {return FALSE;}

    UBool         retVal  = FALSE;
    RegexPattern *pat     = NULL;
    RegexMatcher *matcher = NULL;

    pat     = RegexPattern::compile(regex, 0, pe, status);
    matcher = pat->matcher(status);
    if (U_SUCCESS(status)) {
        matcher->reset(input);
        retVal  = matcher->matches(status);
    }

    delete matcher;
    delete pat;
    return retVal;
}
Пример #11
0
U_CFUNC int32_t
getDescription( const UChar* source, int32_t srcLen,
                UChar** dest, int32_t destCapacity,
                UErrorCode* status){
    if(status == NULL || U_FAILURE(*status)){
        return 0;
    }

    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
    RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
    UnicodeString src(source, srcLen);
    
    if (U_FAILURE(*status)) {
        return 0;
    }
    pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);

    if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
        int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
        return trim(*dest, destLen, status);
    }
    return 0;
}
Пример #12
0
//------------------------------------------------------------------------------------------
//
//   main          for ugrep
//
//           Structurally, all use of the ICU Regular Expression API is in main(),
//           and all of the supporting stuff necessary to make a running program, but
//           not directly related to regular expressions, is factored out into these other
//           functions.
//
//------------------------------------------------------------------------------------------
int main(int argc, const char** argv) {
    UBool     matchFound = FALSE;

    //
    //  Process the commmand line options.
    //
    processOptions(argc, argv);

    //
    // Create a RegexPattern object from the user supplied pattern string.
    //
    UErrorCode status = U_ZERO_ERROR;   // All ICU operations report success or failure
                                        //   in a status variable.

    UParseError    parseErr;            // In the event of a syntax error in the regex pattern,
                                        //   this struct will contain the position of the
                                        //   error.

    RegexPattern  *rePat = RegexPattern::compile(pattern, parseErr, status);
                                        // Note that C++ is doing an automatic conversion
                                        //  of the (char *) pattern to a temporary
                                        //  UnicodeString object.
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep:  error in pattern: \"%s\" at position %d\n",
            u_errorName(status), parseErr.offset);
        exit(-1);
    }

    //
    // Create a RegexMatcher from the newly created pattern.
    //
    UnicodeString empty;
    RegexMatcher *matcher = rePat->matcher(empty, status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "ugrep:  error in creating RegexMatcher: \"%s\"\n",
            u_errorName(status));
        exit(-1);
    }

    //
    // Loop, processing each of the input files.
    //
    for (int fileNum=firstFileNum; fileNum < argc; fileNum++) {
        readFile(argv[fileNum]);

        //
        //  Loop through the lines of a file, trying to match the regex pattern on each.
        //
        for (nextLine(0); lineStart<fileLen; nextLine(lineEnd)) {
            UnicodeString s(FALSE, ucharBuf+lineStart, lineEnd-lineStart);
            matcher->reset(s);
            if (matcher->find()) {
                matchFound = TRUE;
                printMatch();
            }
        }
    }

    //
    //  Clean up
    //
    delete matcher;
    delete rePat;
    free(ucharBuf);
    free(charBuf);
    ucnv_close(outConverter);
    
    u_cleanup();       // shut down ICU, release any cached data it owns.

    return matchFound? 0: 1;
}
Пример #13
0
static PyObject *t_regexmatcher_pattern(t_regexmatcher *self)
{
    const RegexPattern pattern = self->object->pattern();
    return wrap_RegexPattern(pattern.clone(), T_OWNED);
}