static int ackmate_dir_match(const wchar_t *dir_name) { if (opts.ackmate_dir_filter == NULL) { return 0; } /* we just care about the match, not where the matches are */ return pcre16_exec(opts.ackmate_dir_filter, NULL, (PCRE_SPTR16)dir_name, wcslen(dir_name), 0, 0, NULL, 0); }
bool run(String string,int pos,int len) { #ifdef HX_SMART_STRINGS if (string.isUTF16Encoded()) { if (!rUtf16) { const char *error = 0; int err_offset = 0; hx::strbuf buf; rUtf16 = pcre16_compile((PCRE_SPTR16)expr.wc_str(&buf),flags|PCRE_UTF16,&error,&err_offset,NULL); if (!rUtf16) { return false; } } int n = pcre16_exec(rUtf16,NULL,(const unsigned short *)string.raw_wptr(),pos+len,pos,0,matchs,nmatchs * 3); return n>=0; } if (!rUtf8) { rUtf8 = pcre_compile(expr.utf8_str(),flags|PCRE_UTF8,0,0,0); if (!rUtf8) return false; } #endif return pcre_exec(rUtf8,NULL,string.utf8_str(),pos+len,pos,0,matchs,nmatchs * 3) >= 0; }
bool RegExp::execute( const U16String& str, int offset, std::vector<int>& captures) const { #ifdef LIBJ_USE_PCRE16 pcre16* code = static_cast<pcre16*>(code_); captures.clear(); # define DEFAULT_OVECTOR_SIZE (32) // XXX captures.reserve(DEFAULT_OVECTOR_SIZE * 3); # undef DEFAULT_OVECTOR_SIZE size_t n = captures.capacity(); for (size_t i = 0; i < n; i++) captures.push_back(-1); int res = pcre16_exec(code, NULL, str.c_str(), str.length(), offset, 0, captures.data(), n); return (res > 0); #else static iv::aero::VM vm; iv::aero::Code* code = static_cast<iv::aero::Code*>(code_); captures.clear(); size_t n = code->captures() * 2; for (size_t i = 0; i < n; i++) captures.push_back(-1); int res = vm.Execute(code, str, captures.data(), offset); return res == iv::aero::AERO_SUCCESS; #endif }
// ECMA262: 15.10.6.3 static ejsval _ejs_RegExp_prototype_test (ejsval env, ejsval _this, uint32_t argc, ejsval *args) { if (!EJSVAL_IS_REGEXP(_this)) EJS_NOT_IMPLEMENTED(); EJSRegExp* re = (EJSRegExp*)EJSVAL_TO_OBJECT(_this); ejsval subject = _ejs_undefined; if (argc > 0) subject = args[0]; pcre16_extra extra; memset (&extra, 0, sizeof(extra)); EJSPrimString *flat_subject = _ejs_string_flatten (subject); jschar* subject_chars = flat_subject->data.flat; int ovec[3]; int rv = pcre16_exec((pcre16*)re->compiled_pattern, &extra, subject_chars, flat_subject->length, 0, PCRE_NO_UTF16_CHECK, ovec, 3); return rv == PCRE_ERROR_NOMATCH ? _ejs_false : _ejs_true; }
HL_PRIM bool regexp_regexp_match( ereg *e, vbyte *s, int pos, int len ) { int res = pcre16_exec(e->p,&limit,(PCRE_SPTR16)s,pos+len,pos,0,e->matches,e->nmatches * 3); e->matched = res >= 0; if( res >= 0 ) return true; if( res != PCRE_ERROR_NOMATCH ) hl_error("An error occured while running pcre_exec"); return false; }
int PcreCheck(TCString Str, int StartingID) { // StartingID specifies the pattern from which to start checking, i.e. the check starts from the next pattern after the one that has ID == StartingID int I; if (StartingID == -1) { I = 0; } else { for (I = 0; I < PcreCompileData.GetSize(); I++) { if (PcreCompileData[I].ID == StartingID) { I++; break; } } } for (; I < PcreCompileData.GetSize(); I++) { if (PcreCompileData[I].pPcre) { int Res = pcre16_exec(PcreCompileData[I].pPcre, PcreCompileData[I].pExtra, Str, Str.GetLen() - 1, 0, PCRE_NOTEMPTY | PCRE_NO_UTF8_CHECK, NULL, 0); if (Res >= 0) { return PcreCompileData[I].ID; } } else { if (_tcsstr(Str.ToLower(), PcreCompileData[I].Pattern.ToLower())) { return PcreCompileData[I].ID; } } } return -1; }
/* ** Substitutes part of the text */ const WCHAR* Measure::CheckSubstitute(const WCHAR* buffer) { static std::wstring str; if (m_Substitute.empty()) { return buffer; } str = buffer; if (!m_RegExpSubstitute) { for (size_t i = 0, isize = m_Substitute.size(); i < isize; i += 2) { if (!m_Substitute[i].empty()) { MakePlainSubstitute(str, i); } else if (str.empty()) { // Empty result and empty substitute -> use second str = m_Substitute[i + 1]; } } } else { int ovector[300]; for (size_t i = 0, isize = m_Substitute.size(); i < isize; i += 2) { const char* error; int errorOffset; int offset = 0; pcre16* re = pcre16_compile( (PCRE_SPTR16)m_Substitute[i].c_str(), PCRE_UTF16, &error, &errorOffset, nullptr); // Use default character tables. if (!re) { MakePlainSubstitute(str, i); LogNoticeF(this, L"Substitute: %S", error); } else { do { const int options = str.empty() ? 0 : PCRE_NOTEMPTY; const int rc = pcre16_exec( re, nullptr, (PCRE_SPTR16)str.c_str(), (int)str.length(), offset, options, // Empty string is not a valid match ovector, (int)_countof(ovector)); if (rc <= 0) { break; } std::wstring result = m_Substitute[i + 1]; if (rc > 1) { for (int j = rc - 1 ; j >= 0 ; --j) { int newStart = ovector[2 * j]; size_t inLength = ovector[2 * j + 1] - ovector[2 * j]; if (newStart < 0) break; // Match was not found, so skip to the next item WCHAR tmpName[64]; size_t cutLength = _snwprintf_s(tmpName, _TRUNCATE, L"\\%i", j); size_t start = 0, pos; do { pos = result.find(tmpName, start, cutLength); if (pos != std::string::npos) { result.replace(pos, cutLength, str, (size_t)newStart, inLength); start = pos + inLength; } } while (pos != std::string::npos); } } const int start = ovector[0]; const int length = ovector[1] - ovector[0]; str.replace(start, length, result); offset = start + (int)result.length(); } while (true); pcre16_free(re); } } } return str.c_str(); }
static int regression_tests(void) { struct regression_test_case *current = regression_test_cases; const char *error; const char *cpu_info; int i, err_offs; int is_successful, is_ascii_pattern, is_ascii_input; int total = 0; int successful = 0; int counter = 0; #ifdef SUPPORT_PCRE8 pcre *re8; pcre_extra *extra8; int ovector8_1[32]; int ovector8_2[32]; int return_value8_1, return_value8_2; int utf8 = 0, ucp8 = 0; int disabled_flags8 = 0; #endif #ifdef SUPPORT_PCRE16 pcre16 *re16; pcre16_extra *extra16; int ovector16_1[32]; int ovector16_2[32]; int return_value16_1, return_value16_2; int utf16 = 0, ucp16 = 0; int disabled_flags16 = 0; int length16; #endif /* This test compares the behaviour of interpreter and JIT. Although disabling utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is still considered successful from pcre_jit_test point of view. */ #ifdef SUPPORT_PCRE8 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info); #else pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info); #endif printf("Running JIT regression tests\n"); printf(" target CPU of SLJIT compiler: %s\n", cpu_info); #ifdef SUPPORT_PCRE8 pcre_config(PCRE_CONFIG_UTF8, &utf8); pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8); if (!utf8) disabled_flags8 |= PCRE_UTF8; if (!ucp8) disabled_flags8 |= PCRE_UCP; printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled"); #endif #ifdef SUPPORT_PCRE16 pcre16_config(PCRE_CONFIG_UTF16, &utf16); pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16); if (!utf16) disabled_flags16 |= PCRE_UTF8; if (!ucp16) disabled_flags16 |= PCRE_UCP; printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled"); #endif while (current->pattern) { /* printf("\nPattern: %s :\n", current->pattern); */ total++; if (current->start_offset & F_PROPERTY) { is_ascii_pattern = 0; is_ascii_input = 0; } else { is_ascii_pattern = check_ascii(current->pattern); is_ascii_input = check_ascii(current->input); } error = NULL; #ifdef SUPPORT_PCRE8 re8 = NULL; if (!(current->start_offset & F_NO8)) re8 = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8), &error, &err_offs, tables(0)); extra8 = NULL; if (re8) { error = NULL; extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error); if (!extra8) { printf("\n8 bit: Cannot study pattern: %s\n", current->pattern); pcre_free(re8); re8 = NULL; } if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern); pcre_free_study(extra8); pcre_free(re8); re8 = NULL; } } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8)) printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern); #endif #ifdef SUPPORT_PCRE16 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH); else copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH); re16 = NULL; if (!(current->start_offset & F_NO16)) re16 = pcre16_compile(regtest_buf, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16), &error, &err_offs, tables(0)); extra16 = NULL; if (re16) { error = NULL; extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error); if (!extra16) { printf("\n16 bit: Cannot study pattern: %s\n", current->pattern); pcre16_free(re16); re16 = NULL; } if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern); pcre16_free_study(extra16); pcre16_free(re16); re16 = NULL; } } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16)) printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern); #endif counter++; if ((counter & 0x3) != 0) { #ifdef SUPPORT_PCRE8 setstack8(NULL); #endif #ifdef SUPPORT_PCRE16 setstack16(NULL); #endif } #ifdef SUPPORT_PCRE8 return_value8_1 = -1000; return_value8_2 = -1000; for (i = 0; i < 32; ++i) ovector8_1[i] = -2; for (i = 0; i < 32; ++i) ovector8_2[i] = -2; if (re8) { setstack8(extra8); return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32); return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32); } #endif #ifdef SUPPORT_PCRE16 return_value16_1 = -1000; return_value16_2 = -1000; for (i = 0; i < 32; ++i) ovector16_1[i] = -2; for (i = 0; i < 32; ++i) ovector16_2[i] = -2; if (re16) { setstack16(extra16); if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH); else length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH); return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32); return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32); } #endif /* If F_DIFF is set, just run the test, but do not compare the results. Segfaults can still be captured. */ is_successful = 1; if (!(current->start_offset & F_DIFF)) { #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) { /* All results must be the same. */ if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) { printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, return_value16_1, return_value16_2, total, current->pattern, current->input); is_successful = 0; } else if (return_value8_1 >= 0) { return_value8_1 *= 2; /* Transform back the results. */ if (current->flags & PCRE_UTF8) { for (i = 0; i < return_value8_1; ++i) { if (ovector16_1[i] >= 0) ovector16_1[i] = regtest_offsetmap[ovector16_1[i]]; if (ovector16_2[i] >= 0) ovector16_2[i] = regtest_offsetmap[ovector16_2[i]]; } } for (i = 0; i < return_value8_1; ++i) if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n", i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); is_successful = 0; } } } else { #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ /* Only the 8 bit and 16 bit results must be equal. */ #ifdef SUPPORT_PCRE8 if (return_value8_1 != return_value8_2) { printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, total, current->pattern, current->input); is_successful = 0; } else if (return_value8_1 >= 0) { return_value8_1 *= 2; for (i = 0; i < return_value8_1; ++i) if (ovector8_1[i] != ovector8_2[i]) { printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input); is_successful = 0; } } #endif #ifdef SUPPORT_PCRE16 if (return_value16_1 != return_value16_2) { printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value16_1, return_value16_2, total, current->pattern, current->input); is_successful = 0; } else if (return_value16_1 >= 0) { return_value16_1 *= 2; for (i = 0; i < return_value16_1; ++i) if (ovector16_1[i] != ovector16_2[i]) { printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); is_successful = 0; } } #endif #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 } #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ } if (is_successful) { #ifdef SUPPORT_PCRE8 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) { if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) { printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) { printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } } #endif #ifdef SUPPORT_PCRE16 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) { if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) { printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) { printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } } #endif } if (is_successful) successful++; #ifdef SUPPORT_PCRE8 if (re8) { pcre_free_study(extra8); pcre_free(re8); } #endif #ifdef SUPPORT_PCRE16 if (re16) { pcre16_free_study(extra16); pcre16_free(re16); } #endif /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */ printf("."); fflush(stdout); current++; } tables(1); #ifdef SUPPORT_PCRE8 setstack8(NULL); #endif #ifdef SUPPORT_PCRE16 setstack16(NULL); #endif if (total == successful) { printf("\nAll JIT regression tests are successfully passed.\n"); return 0; } else { printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); return 1; } }
int regexec(regex_t *preg, const unichar_t *string, int len, size_t nmatch, regmatch_t pmatch[], int eflags) { int rc; int options = 0; int *ovector = NULL; if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ if (nmatch > 0) { ovector = (int *)malloc(sizeof(int) * nmatch * 3); if (ovector == NULL) return REG_ESPACE; } // [[ libprce update ]] SN-2014-01-14: now handles unicode-encoded input rc = pcre16_exec((const pcre16 *)preg->re_pcre, NULL, (PCRE_SPTR16)string, len, 0, options, ovector, nmatch * 3); if (rc == 0) rc = nmatch; /* All captured slots were filled in */ if (rc >= 0) { int i; for (i = 0 ; i < rc ; i++) { pmatch[i].rm_so = ovector[i*2]; pmatch[i].rm_eo = ovector[i*2+1]; } if (ovector != NULL) free(ovector); for (; i < (int)nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; return 0; } else { if (ovector != NULL) free(ovector); switch(rc) { case PCRE_ERROR_NOMATCH: return REG_NOMATCH; case PCRE_ERROR_NULL: return REG_INVARG; case PCRE_ERROR_BADOPTION: return REG_INVARG; case PCRE_ERROR_BADMAGIC: return REG_INVARG; case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; case PCRE_ERROR_NOMEMORY: return REG_ESPACE; default: return REG_ASSERT; } } }
void IfActions::DoIfActions(Measure& measure, double value) { // IfEqual if (!m_EqualAction.empty()) { if ((int64_t)value == m_EqualValue) { if (!m_EqualCommitted) { m_EqualCommitted = true; // To avoid infinite loop from !Update GetRainmeter().ExecuteCommand(m_EqualAction.c_str(), measure.GetSkin()); } } else { m_EqualCommitted = false; } } // IfAbove if (!m_AboveAction.empty()) { if (value > m_AboveValue) { if (!m_AboveCommitted) { m_AboveCommitted = true; // To avoid infinite loop from !Update GetRainmeter().ExecuteCommand(m_AboveAction.c_str(), measure.GetSkin()); } } else { m_AboveCommitted = false; } } // IfBelow if (!m_BelowAction.empty()) { if (value < m_BelowValue) { if (!m_BelowCommitted) { m_BelowCommitted = true; // To avoid infinite loop from !Update GetRainmeter().ExecuteCommand(m_BelowAction.c_str(), measure.GetSkin()); } } else { m_BelowCommitted = false; } } // IfCondition int i = 0; for (auto& item : m_Conditions) { ++i; if (!item.value.empty() && (!item.tAction.empty() || !item.fAction.empty())) { double result = 0.0; const WCHAR* errMsg = MathParser::Parse( item.value.c_str(), &result, measure.GetCurrentMeasureValue, &measure); if (errMsg != nullptr) { if (!item.parseError) { if (i == 1) { LogErrorF(&measure, L"%s: IfCondition=%s", errMsg, item.value.c_str()); } else { LogErrorF(&measure, L"%s: IfCondition%i=%s", errMsg, i, item.value.c_str()); } item.parseError = true; } } else { item.parseError = false; if (result == 1.0) // "True" { item.fCommitted = false; if (m_ConditionMode || !item.tCommitted) { item.tCommitted = true; GetRainmeter().ExecuteCommand(item.tAction.c_str(), measure.GetSkin()); } } else if (result == 0.0) // "False" { item.tCommitted = false; if (m_ConditionMode || !item.fCommitted) { item.fCommitted = true; GetRainmeter().ExecuteCommand(item.fAction.c_str(), measure.GetSkin()); } } } } } // IfMatch i = 0; for (auto& item : m_Matches) { ++i; if (!item.value.empty() && (!item.tAction.empty() || !item.fAction.empty())) { const char* error; int errorOffset; pcre16* re = pcre16_compile( (PCRE_SPTR16)item.value.c_str(), PCRE_UTF16, &error, &errorOffset, nullptr); if (!re) { if (!item.parseError) { if (i == 1) { LogErrorF(&measure, L"Error: \"%S\" in IfMatch=%s", error, item.value.c_str()); } else { LogErrorF(&measure, L"Error: \"%S\" in IfMatch%i=%s", error, i, item.value.c_str()); } item.parseError = true; } } else { item.parseError = false; const WCHAR* str = measure.GetStringValue(); int strLen = str ? (int)wcslen(str) : 0; int ovector[300]; int rc = pcre16_exec( re, nullptr, (PCRE_SPTR16)str, (int)strLen, 0, 0, ovector, (int)_countof(ovector)); if (rc > 0) // Match { item.fCommitted = false; if (m_MatchMode || !item.tCommitted) { item.tCommitted = true; GetRainmeter().ExecuteCommand(item.tAction.c_str(), measure.GetSkin()); } } else // Not Match { item.tCommitted = false; if (m_MatchMode || !item.fCommitted) { item.fCommitted = true; GetRainmeter().ExecuteCommand(item.fAction.c_str(), measure.GetSkin()); } } } // Release memory used for the compiled pattern pcre16_free(re); } } }
void ParseData(MeasureData* measure, const BYTE* rawData, DWORD rawSize, bool utf16Data) { const int UTF16_CODEPAGE = 1200; if (measure->codepage == UTF16_CODEPAGE) { utf16Data = true; } const char* error; int erroffset; int ovector[OVECCOUNT]; int rc; bool doErrorAction = false; // Compile the regular expression in the first argument pcre16* re = pcre16_compile( (PCRE_SPTR16)measure->regExp.c_str(), PCRE_UTF16, &error, &erroffset, nullptr); if (re != nullptr) { // Compilation succeeded: match the subject in the second argument std::wstring buffer; auto data = (const WCHAR*)rawData; DWORD dataLength = rawSize / 2; if (!utf16Data) { buffer = StringUtil::Widen((LPCSTR)rawData, rawSize, measure->codepage); data = buffer.c_str(); dataLength = (DWORD)buffer.length(); } rc = pcre16_exec(re, nullptr, (PCRE_SPTR16)data, dataLength, 0, 0, ovector, OVECCOUNT); if (rc >= 0) { if (rc == 0) { // The output vector wasn't big enough RmLog(measure->rm, LOG_ERROR, L"WebParser: Too many substrings"); } else { if (measure->stringIndex < rc) { if (measure->debug != 0) { for (int i = 0; i < rc; ++i) { const WCHAR* match = data + ovector[2 * i]; const int matchLen = min(ovector[2 * i + 1] - ovector[2 * i], 256); RmLogF(measure->rm, LOG_DEBUG, L"WebParser: Index %2d: %.*s", i, matchLen, match); } } const WCHAR* match = data + ovector[2 * measure->stringIndex]; int matchLen = ovector[2 * measure->stringIndex + 1] - ovector[2 * measure->stringIndex]; EnterCriticalSection(&g_CriticalSection); measure->resultString.assign(match, matchLen); DecodeReferences(measure->resultString, measure->decodeCharacterReference); LeaveCriticalSection(&g_CriticalSection); } else { RmLog(measure->rm, LOG_WARNING, L"WebParser: Not enough substrings"); // Clear the old result EnterCriticalSection(&g_CriticalSection); measure->resultString.clear(); if (measure->download) { if (measure->downloadFile.empty()) // cache mode { if (!measure->downloadedFile.empty()) { // Delete old downloaded file DeleteFile(measure->downloadedFile.c_str()); } } measure->downloadedFile.clear(); } LeaveCriticalSection(&g_CriticalSection); } // Update the references std::vector<MeasureData*>::iterator i = g_Measures.begin(); std::wstring compareStr = L"["; compareStr += RmGetMeasureName(measure->rm); compareStr += L']'; for ( ; i != g_Measures.end(); ++i) { if (measure->skin == (*i)->skin && StringUtil::CaseInsensitiveFind((*i)->url, compareStr) != std::wstring::npos) { if ((*i)->stringIndex < rc) { const WCHAR* match = data + ovector[2 * (*i)->stringIndex]; int matchLen = ovector[2 * (*i)->stringIndex + 1] - ovector[2 * (*i)->stringIndex]; if (!(*i)->regExp.empty()) { // Change the index and parse the substring int index = (*i)->stringIndex; (*i)->stringIndex = (*i)->stringIndex2; ParseData((*i), (BYTE*)match, matchLen * 2, true); (*i)->stringIndex = index; } else { // Set the result EnterCriticalSection(&g_CriticalSection); // Substitude the [measure] with result (*i)->resultString = (*i)->url; (*i)->resultString.replace( StringUtil::CaseInsensitiveFind((*i)->resultString, compareStr), compareStr.size(), match, matchLen); DecodeReferences((*i)->resultString, (*i)->decodeCharacterReference); // Start download threads for the references if ((*i)->download) { // Start the download thread unsigned int id; HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, (*i), 0, &id); if (threadHandle) { (*i)->dlThreadHandle = threadHandle; } } LeaveCriticalSection(&g_CriticalSection); } } else { RmLog((*i)->rm, LOG_WARNING, L"WebParser: Not enough substrings"); // Clear the old result EnterCriticalSection(&g_CriticalSection); (*i)->resultString.clear(); if ((*i)->download) { if ((*i)->downloadFile.empty()) // cache mode { if (!(*i)->downloadedFile.empty()) { // Delete old downloaded file DeleteFile((*i)->downloadedFile.c_str()); } } (*i)->downloadedFile.clear(); } LeaveCriticalSection(&g_CriticalSection); } } } } } else { // Matching failed: handle error cases RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp matching error (%d)", rc); doErrorAction = true; EnterCriticalSection(&g_CriticalSection); measure->resultString = measure->errorString; // Update the references std::vector<MeasureData*>::iterator i = g_Measures.begin(); std::wstring compareStr = L"["; compareStr += RmGetMeasureName(measure->rm); compareStr += L']'; for ( ; i != g_Measures.end(); ++i) { if ((StringUtil::CaseInsensitiveFind((*i)->url, compareStr) != std::wstring::npos) && (measure->skin == (*i)->skin)) { (*i)->resultString = (*i)->errorString; } } LeaveCriticalSection(&g_CriticalSection); } // Release memory used for the compiled pattern pcre16_free(re); } else { // Compilation failed. RmLogF(measure->rm, LOG_ERROR, L"WebParser: RegExp error at offset %d: %S", erroffset, error); doErrorAction = true; } if (measure->download) { // Start the download thread unsigned int id; HANDLE threadHandle = (HANDLE)_beginthreadex(nullptr, 0, NetworkDownloadThreadProc, measure, 0, &id); if (threadHandle) { measure->dlThreadHandle = threadHandle; } } if (doErrorAction && !measure->onRegExpErrAction.empty()) { RmExecute(measure->skin, measure->onRegExpErrAction.c_str()); } else if (!measure->download && !measure->finishAction.empty()) { RmExecute(measure->skin, measure->finishAction.c_str()); } }
void match_command(wchar_t *arg) { wchar_t pattern[BUFFER_SIZE], strng[BUFFER_SIZE], if_then[BUFFER_SIZE], if_else[BUFFER_SIZE], temp[BUFFER_SIZE]; CPCRE re; arg = get_arg_in_braces(arg,temp,STOP_SPACES,sizeof(temp)/sizeof(wchar_t)-1); prepare_actionalias(temp, pattern, sizeof(pattern)/sizeof(wchar_t)-1); BOOL i_flag = FALSE, m_flag = FALSE, g_flag = FALSE; std::wstring regexp = pattern; if ( *pattern == L'/' ) { regexp = (wchar_t*)pattern + 1; int size = regexp.size(); for (int i = size - 1; i >= 0; i--) { if (regexp[i] == L'i') { size--; i_flag = TRUE; } else if (regexp[i] == L'm') { size--; m_flag = TRUE; } else if (regexp[i] == L'g') { size--; g_flag = TRUE; } else if (regexp[i] == L'/') { size--; break; } else { size = regexp.size(); i_flag = m_flag = g_flag = FALSE; break; } } regexp.resize(size); } if (!re.SetSource(regexp, m_flag, i_flag)) { return; } arg = get_arg_in_braces(arg,temp,STOP_SPACES,sizeof(temp)/sizeof(wchar_t)-1); prepare_actionalias(temp, strng, sizeof(strng)/sizeof(wchar_t)-1); arg = get_arg_in_braces(arg,if_then,WITH_SPACES,sizeof(if_then)/sizeof(wchar_t)-1); arg = get_arg_in_braces(arg,if_else,WITH_SPACES,sizeof(if_else)/sizeof(wchar_t)-1); int offset = 0; int offsets[33]; bool no_match = true; wchar_t *test = (wchar_t*)strng; int test_len = wcslen(test); for (;;) { int captured = pcre16_exec(re.m_pPcre, re.m_pExtra, test, test_len, offset, 0, offsets, 33); if (captured <= 0) break; no_match = false; int i; for (i = 0; i < 10; i++) vars[i][0] = 0; for (i = 1; i < captured; i++) { if (offsets[i*2] >= 0) { int size = offsets[i*2 + 1] - offsets[i*2]; wcsncpy(vars[i-1], test + offsets[i*2], size); *(vars[i-1]+size) = L'\0'; } } prepare_actionalias(if_then, temp, sizeof(temp)/sizeof(wchar_t)-1); parse_input(temp, g_flag); offset = offsets[1]; if (!g_flag) break; } if (no_match) parse_input(if_else); }
ejsval _ejs_regexp_replace(ejsval str, ejsval search_re, ejsval replace) { EJSRegExp* re = (EJSRegExp*)EJSVAL_TO_OBJECT(search_re); pcre16_extra extra; memset (&extra, 0, sizeof(extra)); pcre16* code = (pcre16*)re->compiled_pattern; int capture_count; pcre16_fullinfo (code, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count); int ovec_count = 3 * (1 + capture_count); int* ovec = malloc(sizeof(int) * ovec_count); int cur_off = 0; do { EJSPrimString *flat_str = _ejs_string_flatten (str); jschar *chars_str = flat_str->data.flat; int rv = pcre16_exec(code, &extra, chars_str, flat_str->length, cur_off, PCRE_NO_UTF16_CHECK, ovec, ovec_count); if (rv < 0) break; ejsval replaceval; if (EJSVAL_IS_FUNCTION(replace)) { ejsval substr_match = _ejs_string_new_substring (str, ovec[0], ovec[1] - ovec[0]); ejsval capture = _ejs_string_new_substring (str, ovec[2], ovec[3] - ovec[2]); _ejs_log ("substring match is %s\n", ucs2_to_utf8(_ejs_string_flatten(substr_match)->data.flat)); _ejs_log ("capture is %s\n", ucs2_to_utf8(_ejs_string_flatten(capture)->data.flat)); int argc = 3; ejsval args[3]; args[0] = substr_match; args[1] = capture; args[2] = _ejs_undefined; replaceval = ToString(_ejs_invoke_closure (replace, _ejs_undefined, argc, args)); } else { replaceval = ToString(replace); } if (ovec[0] == 0) { // we matched from the beginning of the string, so nothing from there to prepend str = _ejs_string_concat (replaceval, _ejs_string_new_substring (str, ovec[1], flat_str->length - ovec[1])); } else { str = _ejs_string_concatv (_ejs_string_new_substring (str, 0, ovec[0]), replaceval, _ejs_string_new_substring (str, ovec[1], flat_str->length - ovec[1]), _ejs_null); } cur_off = ovec[1]; // if the RegExp object was created without a 'g' flag, only replace the first match if (!re->global) break; } while (EJS_TRUE); free (ovec); return str; }