TEST(Pattern_RegexPattern, Match) { RegexPattern const pattern { ".*adblock.*"_r }; auto const& uri = "http://adblock.org"_u; EXPECT_TRUE(pattern.match(uri)); }
static jint Matcher_openImpl(JNIEnv* env, jclass, jint patternAddr) { RegexPattern* pattern = reinterpret_cast<RegexPattern*>(static_cast<uintptr_t>(patternAddr)); UErrorCode status = U_ZERO_ERROR; RegexMatcher* result = pattern->matcher(status); maybeThrowIcuException(env, status); return static_cast<jint>(reinterpret_cast<uintptr_t>(result)); }
U_CFUNC int32_t getText(const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, UnicodeString patternString, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); UnicodeString src (source,srcLen); if (U_FAILURE(*status)) { return 0; } pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); if (U_FAILURE(*status)) { return 0; } for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ matcher.reset(stringArray[i]); if(matcher.lookingAt(*status)){ UnicodeString out = matcher.group(1, *status); return out.extract(*dest, destCapacity,*status); } } return 0; }
TEST(Pattern_RegexPattern, MatchWithDifferentCase2) { RegexPattern const pattern { ".*AdBlock.*"_r }; auto const& uri = "http://adblock.org"_u; EXPECT_TRUE(pattern.match(uri)); }
U_CFUNC int32_t getCount(const UChar* source, int32_t srcLen, UParseCommentsOption option, UErrorCode *status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); UnicodeString src (source, srcLen); if (U_FAILURE(*status)) { return 0; } int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); if (U_FAILURE(*status)) { return 0; } int32_t count = 0; for(int32_t i=0; i<retLen; i++){ matcher.reset(stringArray[i]); if(matcher.lookingAt(*status)){ count++; } } if(option == UPC_TRANSLATE && count > 1){ fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); exit(U_UNSUPPORTED_ERROR); } return count; }
TEST(Pattern_RegexPattern, CaseSensitiveMatch) { RegexPattern const pattern { ".*AdBlock.*"_r }; auto const& uri1 = "http://AdBlock.org"_u; auto const& uri2 = "http://adblock.org"_u; EXPECT_TRUE(pattern.match(uri1, true)); EXPECT_TRUE(pattern.match(uri2, false)); }
//--------------------------------------------------------------------- // // matches Convenience function to test for a match, starting // with a pattern string and a data string. // //--------------------------------------------------------------------- UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, const UnicodeString &input, UParseError &pe, UErrorCode &status) { if (U_FAILURE(status)) {return FALSE;} UBool retVal; RegexPattern *pat = NULL; RegexMatcher *matcher = NULL; pat = RegexPattern::compile(regex, 0, pe, status); matcher = pat->matcher(input, status); retVal = matcher->matches(status); delete matcher; delete pat; return retVal; }
// // matches, UText mode // UBool U_EXPORT2 RegexPattern::matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status) { if (U_FAILURE(status)) {return FALSE;} UBool retVal; RegexPattern *pat = NULL; RegexMatcher *matcher = NULL; pat = RegexPattern::compile(regex, 0, pe, status); matcher = pat->matcher(input, PATTERN_IS_UTEXT, status); retVal = matcher->matches(status); delete matcher; delete pat; return retVal; }
U_CFUNC int32_t getAt(const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, int32_t index, UParseCommentsOption option, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); UnicodeString src (source, srcLen); if (U_FAILURE(*status)) { return 0; } int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); UnicodeString patternString(patternStrings[option]); RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); if (U_FAILURE(*status)) { return 0; } int32_t count = 0; for(int32_t i=0; i<retLen; i++){ matcher.reset(stringArray[i]); if(matcher.lookingAt(*status)){ if(count == index){ UnicodeString out = matcher.group(1, *status); return out.extract(*dest, destCapacity,*status); } count++; } } return 0; }
// // matches, UText mode // UBool U_EXPORT2 RegexPattern::matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status) { if (U_FAILURE(status)) {return FALSE;} UBool retVal = FALSE; RegexPattern *pat = NULL; RegexMatcher *matcher = NULL; pat = RegexPattern::compile(regex, 0, pe, status); matcher = pat->matcher(status); if (U_SUCCESS(status)) { matcher->reset(input); retVal = matcher->matches(status); } delete matcher; delete pat; return retVal; }
U_CFUNC int32_t getDescription( const UChar* source, int32_t srcLen, UChar** dest, int32_t destCapacity, UErrorCode* status){ if(status == NULL || U_FAILURE(*status)){ return 0; } UnicodeString stringArray[MAX_SPLIT_STRINGS]; RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); UnicodeString src(source, srcLen); if (U_FAILURE(*status)) { return 0; } pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); return trim(*dest, destLen, status); } return 0; }
//------------------------------------------------------------------------------------------ // // main for ugrep // // Structurally, all use of the ICU Regular Expression API is in main(), // and all of the supporting stuff necessary to make a running program, but // not directly related to regular expressions, is factored out into these other // functions. // //------------------------------------------------------------------------------------------ int main(int argc, const char** argv) { UBool matchFound = FALSE; // // Process the commmand line options. // processOptions(argc, argv); // // Create a RegexPattern object from the user supplied pattern string. // UErrorCode status = U_ZERO_ERROR; // All ICU operations report success or failure // in a status variable. UParseError parseErr; // In the event of a syntax error in the regex pattern, // this struct will contain the position of the // error. RegexPattern *rePat = RegexPattern::compile(pattern, parseErr, status); // Note that C++ is doing an automatic conversion // of the (char *) pattern to a temporary // UnicodeString object. if (U_FAILURE(status)) { fprintf(stderr, "ugrep: error in pattern: \"%s\" at position %d\n", u_errorName(status), parseErr.offset); exit(-1); } // // Create a RegexMatcher from the newly created pattern. // UnicodeString empty; RegexMatcher *matcher = rePat->matcher(empty, status); if (U_FAILURE(status)) { fprintf(stderr, "ugrep: error in creating RegexMatcher: \"%s\"\n", u_errorName(status)); exit(-1); } // // Loop, processing each of the input files. // for (int fileNum=firstFileNum; fileNum < argc; fileNum++) { readFile(argv[fileNum]); // // Loop through the lines of a file, trying to match the regex pattern on each. // for (nextLine(0); lineStart<fileLen; nextLine(lineEnd)) { UnicodeString s(FALSE, ucharBuf+lineStart, lineEnd-lineStart); matcher->reset(s); if (matcher->find()) { matchFound = TRUE; printMatch(); } } } // // Clean up // delete matcher; delete rePat; free(ucharBuf); free(charBuf); ucnv_close(outConverter); u_cleanup(); // shut down ICU, release any cached data it owns. return matchFound? 0: 1; }
static PyObject *t_regexmatcher_pattern(t_regexmatcher *self) { const RegexPattern pattern = self->object->pattern(); return wrap_RegexPattern(pattern.clone(), T_OWNED); }