TCString CompileRegexp(TCString Regexp, int bAddAsUsualSubstring, int ID) { TCString Result(_T("")); sPcreCompileData s = {0}; int NewID = PcreCompileData.AddElem(s); PcreCompileData[NewID].ID = ID; if (!bAddAsUsualSubstring) { const char *Err; int ErrOffs; int Flags = PCRE_CASELESS; if (Regexp[0] == '/') { TCString OrigRegexp = Regexp; Regexp = Regexp.Right(Regexp.GetLen() - 1); TCHAR *pRegexpEnd = (TCHAR*)Regexp + Regexp.GetLen(); TCHAR *p = _tcsrchr(Regexp.GetBuffer(), '/'); if (!p) { Regexp = OrigRegexp; } else { *p = 0; Flags = 0; while (++p < pRegexpEnd) { switch (*p) { case 'i': Flags |= PCRE_CASELESS; break; case 'm': Flags |= PCRE_MULTILINE; break; case 's': Flags |= PCRE_DOTALL; break; case 'x': Flags |= PCRE_EXTENDED; break; case 'A': Flags |= PCRE_ANCHORED; break; case 'f': Flags |= PCRE_FIRSTLINE; break; case 'D': Flags |= PCRE_DOLLAR_ENDONLY; break; case 'U': Flags |= PCRE_UNGREEDY; break; case 'X': Flags |= PCRE_EXTRA; break; default: // Result += LogMessage(Translate("Warning, unknown pattern modifier '%c':\n"), *p ); break; } } } Regexp.ReleaseBuffer(); } PcreCompileData[NewID].pPcre = pcre16_compile(Regexp, PCRE_UTF8 | PCRE_NO_UTF8_CHECK | Flags, &Err, &ErrOffs, NULL); if (PcreCompileData[NewID].pPcre) { PcreCompileData[NewID].pExtra = NULL; PcreCompileData[NewID].pExtra = pcre16_study(PcreCompileData[NewID].pPcre, 0, &Err); } else { // Result += LogMessage(TranslateT("Syntax error in regexp\n%s\nat offset %d: %s."), (TCHAR*)Regexp, ErrOffs, (TCHAR*)ANSI2TCHAR(Err)) + _T("\n\n"); PcreCompileData[NewID].Pattern = Regexp; } } else PcreCompileData[NewID].Pattern = Regexp; return Result; }
static int regression_tests(void) { struct regression_test_case *current = regression_test_cases; const char *error; const char *cpu_info; int i, err_offs; int is_successful, is_ascii_pattern, is_ascii_input; int total = 0; int successful = 0; int counter = 0; #ifdef SUPPORT_PCRE8 pcre *re8; pcre_extra *extra8; int ovector8_1[32]; int ovector8_2[32]; int return_value8_1, return_value8_2; int utf8 = 0, ucp8 = 0; int disabled_flags8 = 0; #endif #ifdef SUPPORT_PCRE16 pcre16 *re16; pcre16_extra *extra16; int ovector16_1[32]; int ovector16_2[32]; int return_value16_1, return_value16_2; int utf16 = 0, ucp16 = 0; int disabled_flags16 = 0; int length16; #endif /* This test compares the behaviour of interpreter and JIT. Although disabling utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is still considered successful from pcre_jit_test point of view. */ #ifdef SUPPORT_PCRE8 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info); #else pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info); #endif printf("Running JIT regression tests\n"); printf(" target CPU of SLJIT compiler: %s\n", cpu_info); #ifdef SUPPORT_PCRE8 pcre_config(PCRE_CONFIG_UTF8, &utf8); pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8); if (!utf8) disabled_flags8 |= PCRE_UTF8; if (!ucp8) disabled_flags8 |= PCRE_UCP; printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled"); #endif #ifdef SUPPORT_PCRE16 pcre16_config(PCRE_CONFIG_UTF16, &utf16); pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16); if (!utf16) disabled_flags16 |= PCRE_UTF8; if (!ucp16) disabled_flags16 |= PCRE_UCP; printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled"); #endif while (current->pattern) { /* printf("\nPattern: %s :\n", current->pattern); */ total++; if (current->start_offset & F_PROPERTY) { is_ascii_pattern = 0; is_ascii_input = 0; } else { is_ascii_pattern = check_ascii(current->pattern); is_ascii_input = check_ascii(current->input); } error = NULL; #ifdef SUPPORT_PCRE8 re8 = NULL; if (!(current->start_offset & F_NO8)) re8 = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8), &error, &err_offs, tables(0)); extra8 = NULL; if (re8) { error = NULL; extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error); if (!extra8) { printf("\n8 bit: Cannot study pattern: %s\n", current->pattern); pcre_free(re8); re8 = NULL; } if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern); pcre_free_study(extra8); pcre_free(re8); re8 = NULL; } } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8)) printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern); #endif #ifdef SUPPORT_PCRE16 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH); else copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH); re16 = NULL; if (!(current->start_offset & F_NO16)) re16 = pcre16_compile(regtest_buf, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16), &error, &err_offs, tables(0)); extra16 = NULL; if (re16) { error = NULL; extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error); if (!extra16) { printf("\n16 bit: Cannot study pattern: %s\n", current->pattern); pcre16_free(re16); re16 = NULL; } if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern); pcre16_free_study(extra16); pcre16_free(re16); re16 = NULL; } } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16)) printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern); #endif counter++; if ((counter & 0x3) != 0) { #ifdef SUPPORT_PCRE8 setstack8(NULL); #endif #ifdef SUPPORT_PCRE16 setstack16(NULL); #endif } #ifdef SUPPORT_PCRE8 return_value8_1 = -1000; return_value8_2 = -1000; for (i = 0; i < 32; ++i) ovector8_1[i] = -2; for (i = 0; i < 32; ++i) ovector8_2[i] = -2; if (re8) { setstack8(extra8); return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32); return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32); } #endif #ifdef SUPPORT_PCRE16 return_value16_1 = -1000; return_value16_2 = -1000; for (i = 0; i < 32; ++i) ovector16_1[i] = -2; for (i = 0; i < 32; ++i) ovector16_2[i] = -2; if (re16) { setstack16(extra16); if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV)) length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH); else length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH); return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32); return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32); } #endif /* If F_DIFF is set, just run the test, but do not compare the results. Segfaults can still be captured. */ is_successful = 1; if (!(current->start_offset & F_DIFF)) { #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) { /* All results must be the same. */ if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) { printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, return_value16_1, return_value16_2, total, current->pattern, current->input); is_successful = 0; } else if (return_value8_1 >= 0) { return_value8_1 *= 2; /* Transform back the results. */ if (current->flags & PCRE_UTF8) { for (i = 0; i < return_value8_1; ++i) { if (ovector16_1[i] >= 0) ovector16_1[i] = regtest_offsetmap[ovector16_1[i]]; if (ovector16_2[i] >= 0) ovector16_2[i] = regtest_offsetmap[ovector16_2[i]]; } } for (i = 0; i < return_value8_1; ++i) if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n", i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); is_successful = 0; } } } else { #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ /* Only the 8 bit and 16 bit results must be equal. */ #ifdef SUPPORT_PCRE8 if (return_value8_1 != return_value8_2) { printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, total, current->pattern, current->input); is_successful = 0; } else if (return_value8_1 >= 0) { return_value8_1 *= 2; for (i = 0; i < return_value8_1; ++i) if (ovector8_1[i] != ovector8_2[i]) { printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input); is_successful = 0; } } #endif #ifdef SUPPORT_PCRE16 if (return_value16_1 != return_value16_2) { printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value16_1, return_value16_2, total, current->pattern, current->input); is_successful = 0; } else if (return_value16_1 >= 0) { return_value16_1 *= 2; for (i = 0; i < return_value16_1; ++i) if (ovector16_1[i] != ovector16_2[i]) { printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); is_successful = 0; } } #endif #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 } #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ } if (is_successful) { #ifdef SUPPORT_PCRE8 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) { if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) { printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) { printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } } #endif #ifdef SUPPORT_PCRE16 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) { if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) { printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) { printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); is_successful = 0; } } #endif } if (is_successful) successful++; #ifdef SUPPORT_PCRE8 if (re8) { pcre_free_study(extra8); pcre_free(re8); } #endif #ifdef SUPPORT_PCRE16 if (re16) { pcre16_free_study(extra16); pcre16_free(re16); } #endif /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */ printf("."); fflush(stdout); current++; } tables(1); #ifdef SUPPORT_PCRE8 setstack8(NULL); #endif #ifdef SUPPORT_PCRE16 setstack16(NULL); #endif if (total == successful) { printf("\nAll JIT regression tests are successfully passed.\n"); return 0; } else { printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); return 1; } }