int pcre_get_stringnumber(const pcre *code, const pcre_char *stringname) { /* FIXME: This doesn't work for UTF-16 because the name table has 8-bit characters in it! */ #if !PCRE_UTF16 int rc; int entrysize; int top, bot; uschar *nametable; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; bot = 0; while (top > bot) { int mid = (top + bot) / 2; uschar *entry = nametable + entrysize*mid; int c = strcmp(stringname, (char *)(entry + 2)); if (c == 0) return (entry[0] << 8) + entry[1]; if (c > 0) bot = mid + 1; else top = mid; } #else UNUSED_PARAM(code); UNUSED_PARAM(stringname); #endif return PCRE_ERROR_NOSUBSTRING; }
static void log_matcher_pcre_re_feed_named_substrings(LogMatcher *s, LogMessage *msg, int *matches, const gchar *value) { gchar *name_table = NULL; gint i = 0; gint namecount = 0; gint name_entry_size = 0; LogMatcherPcreRe *self = (LogMatcherPcreRe *) s; pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMECOUNT, &namecount); if (namecount > 0) { gchar *tabptr; /* Before we can access the substrings, we must extract the table for translating names to numbers, and the size of each entry in the table. */ pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMETABLE, &name_table); pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); /* Now we can scan the table and, for each entry, print the number, the name, and the substring itself. */ tabptr = name_table; for (i = 0; i < namecount; i++) { int n = (tabptr[0] << 8) | tabptr[1]; log_msg_set_value_by_name(msg, tabptr + 2, value + matches[2*n], matches[2*n+1] - matches[2*n]); tabptr += name_entry_size; } } }
static void grok_study_capture_map(grok_t *grok) { char *nametable; grok_capture *gct; int nametable_size; int nametable_entrysize; int i = 0; int offset = 0; int stringnum; int capture_id; pcre_fullinfo(grok->re, NULL, PCRE_INFO_NAMECOUNT, &nametable_size); pcre_fullinfo(grok->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nametable_entrysize); pcre_fullinfo(grok->re, NULL, PCRE_INFO_NAMETABLE, &nametable); for (i = 0; i < nametable_size; i++) { offset = i * nametable_entrysize; stringnum = (nametable[offset] << 8) + nametable[offset + 1]; sscanf(nametable + offset + 2, CAPTURE_FORMAT, &capture_id); grok_log(grok, LOG_COMPILE, "Studying capture %d", capture_id); gct = (grok_capture *)grok_capture_get_by_id(grok, capture_id); assert(gct != NULL); gct->pcre_capture_number = stringnum; /* update the database with the new data */ grok_capture_add(grok, gct); } }
static int pcre_regexec(KonohaContext *kctx, kregexp_t *reg, const char *str, size_t nmatch, kregmatch_t p[], int eflags) { PCRE_regexp_t *preg = (PCRE_regexp_t *)reg; int res, nvector[nmatch*3]; nvector[0] = 0; size_t idx, matched = nmatch; if(strlen(str) == 0) return -1; if((res = pcre_exec(preg->re, NULL, str, strlen(str), 0, eflags, nvector, nmatch*3)) >= 0) { size_t nm_count = 0; matched = (res > 0 && (size_t)res < nmatch) ? (size_t)res : nmatch; res = 0; for (idx = 0; idx < matched; idx++) { p[idx].rm_so = nvector[2*idx]; p[idx].rm_eo = nvector[2*idx+1]; } p[idx].rm_so = -1; pcre_fullinfo(preg->re, NULL, PCRE_INFO_NAMECOUNT, &nm_count); if(nm_count > 0) { unsigned char *nm_table; int nm_entry_size = 0; pcre_fullinfo(preg->re, NULL, PCRE_INFO_NAMETABLE, &nm_table); pcre_fullinfo(preg->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nm_entry_size); unsigned char *tbl_ptr = nm_table; for (idx = 0; idx < nm_count; idx++) { int n_idx = (tbl_ptr[0] << 8) | tbl_ptr[1]; unsigned char *n_name = tbl_ptr + 2; p[n_idx].rm_name = n_name; p[n_idx].rm_namelen = strlen((char *)n_name); tbl_ptr += nm_entry_size; } } } return res; }
JNIEXPORT jint JNICALL Java_com_oracle_truffle_r_runtime_ffi_jni_JNI_1PCRE_nativeGetCaptureNames(JNIEnv *env, jclass c, jlong code, jlong extra, jobjectArray ret) { int nameCount; int nameEntrySize; char* nameTable; int res; res = pcre_fullinfo(code, extra, PCRE_INFO_NAMECOUNT, &nameCount); if (res < 0) { return res; } res = pcre_fullinfo(code, extra, PCRE_INFO_NAMEENTRYSIZE, &nameEntrySize); if (res < 0) { return res; } res = pcre_fullinfo(code, extra, PCRE_INFO_NAMETABLE, &nameTable); if (res < 0) { return res; } // from GNU R's grep.c for(int i = 0; i < nameCount; i++) { char* entry = nameTable + nameEntrySize * i; int captureNum = (entry[0] << 8) + entry[1] - 1; (*env)->SetObjectArrayElement(env, ret, captureNum, (*env)->NewStringUTF(env, entry + 2)); } return res; }
/* * Ported from get_first_set() in pcre_get.c in pcre source. */ static int matchres_first_set(cs_matchres_t *mr, const char *group_name) { cs_regexp_t *regexp = mr->regexp; pcre *re = regexp->re; pcre_extra *extra = regexp->extra; unsigned long options; int jchanged; pcre_fullinfo(re, extra, PCRE_INFO_OPTIONS, &options); pcre_fullinfo(re, extra, PCRE_INFO_JCHANGED, &jchanged); if (options & PCRE_DUPNAMES || jchanged) { char *first; char *last; uchar *entry; int entry_len = pcre_get_stringtable_entries(re, group_name, &first, &last); if (entry_len < 0) { return entry_len; } for (entry = (uchar *)first; entry <= (uchar *)last; entry += entry_len) { int n = entry[0] << 8 | entry[1]; if (mr->ovector[n * 2] >= 0) { return n; } } return entry[0] << 8 | entry[1]; } else { return pcre_get_stringnumber(re, group_name); } }
int pcre_get_stringnumber(const pcre *code, const char *stringname) { int rc; int entrysize; int top, bot; uschar *nametable; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; bot = 0; while (top > bot) { int mid = (top + bot) / 2; uschar *entry = nametable + entrysize*mid; int c = strcmp(stringname, (char *)(entry + 2)); if (c == 0) return (entry[0] << 8) + entry[1]; if (c > 0) bot = mid + 1; else top = mid; } return PCRE_ERROR_NOSUBSTRING; }
/* TODO: audit this function */ static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector) { int i, j, length, namecount, trunc; unsigned char *tabptr; int name_entry_size; unsigned char *name_table; const char *start; char outstr[2*MATCH_MAXLEN+1]; /* determine if there are named substrings */ #if USING_PCRE2 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount); #else (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount); #endif if (namecount <= 0) { cli_dbgmsg("cli_pcre_report: no named substrings\n"); } else { cli_dbgmsg("cli_pcre_report: named substrings\n"); /* extract named substring translation table */ #if USING_PCRE2 (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table); (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); #else (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table); (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); #endif /* print named substring information */ tabptr = name_table; for (i = 0; i < namecount; i++) { int n = (tabptr[0] << 8) | tabptr[1]; start = buffer + ovector[2*n]; length = ovector[2*n+1] - ovector[2*n]; trunc = 0; if (length > MATCH_MAXLEN) { trunc = 1; length = MATCH_MAXLEN; } for (j = 0; j < length; ++j) snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j)); cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2, outstr, trunc ? " (trunc)":""); /* cli_dbgmsg("named_substr: (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2, length, start, trunc ? " (trunc)":""); */ tabptr += name_entry_size; } } }
UpObject* UpRegexMatch(UpRegex* self, const char* subject, int start, bool capture) { int ovector[999]; int rc = pcre_exec(self->re, NULL, subject, strlen(subject), start, 0, ovector, 999); if (rc < 0) { // switch(rc) { // case PCRE_ERROR_NOMATCH : printf("String did not match the pattern\n"); break; // case PCRE_ERROR_NULL : printf("Something was null\n"); break; // case PCRE_ERROR_BADOPTION : printf("A bad option was passed\n"); break; // case PCRE_ERROR_BADMAGIC : printf("Magic number bad (compiled re corrupt?)\n"); break; // case PCRE_ERROR_UNKNOWN_NODE : printf("Something kooky in the compiled re\n"); break; // case PCRE_ERROR_NOMEMORY : printf("Ran out of memory\n"); break; // default : printf("Unknown error\n"); break; // } return UpNull(); } else { UpList* results = UpListCreate(); if (capture) { UpListAppend(results, (UpObject*)UpIntegerCreate(rc)); for (int i = 0; i < rc*2; ++i) { UpInteger* value = UpIntegerCreate(ovector[i]); UpListAppend(results, (UpObject*)value); } int nameCount; int err = pcre_fullinfo(self->re, NULL, PCRE_INFO_NAMECOUNT, &nameCount); int entrySize; err = pcre_fullinfo(self->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrySize); char* entry; err = pcre_fullinfo(self->re, NULL, PCRE_INFO_NAMETABLE, &entry); UpListAppend(results, (UpObject*)UpIntegerCreate(nameCount)); for (int i = 0; i < nameCount; ++i) { uint16_t captureIndex = (entry[0] << 8) | entry[1]; const char* captureName = (const char*)(entry + 2); entry += entrySize; UpListAppend(results, (UpObject*)UpIntegerCreate(captureIndex)); UpListAppend(results, (UpObject*)UpStringCreate(captureName)); } } else { for (int i = 0; i < 2; ++i) { UpInteger* value = UpIntegerCreate(ovector[i]); UpListAppend(results, (UpObject*)value); } } return (UpObject*)results; } }
RegEx::RegEx(const char * regex, int options, unsigned long int maxDepth) { const char* pcre_error; int erroffset; // compile and study the expression re = pcre_compile(regex, options, &pcre_error, &erroffset, NULL); if (re == NULL) { UtlString errorMsg("Regular Expression compile error: "); errorMsg.append(pcre_error); errorMsg.append(" at offset "); char offsetStr[10]; sprintf(offsetStr, "%9d", erroffset); errorMsg.append(offsetStr); errorMsg.append(" in expression '"); errorMsg.append(regex); errorMsg.append("'"); throw errorMsg.data(); assert(FALSE); // regex failed to compile } pe = pcre_study(re, 0, &pcre_error); if ( pcre_error == NULL ) { // save the compilation block sizes for the copy constructor. pcre_fullinfo(re, pe, PCRE_INFO_SIZE, &re_size); pcre_fullinfo(re, pe, PCRE_INFO_STUDYSIZE, &study_size); allocated_study = false; } else { re_size = 0; study_size = 0; } if (!pe) { // pcre_study didn't return any study data, // but we need the pcre_extra block anyway for the recursion limit, // so get one pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra)); memset(pe, 0, sizeof(pcre_extra)); } // set the maximum recursion depth option in the pcre_extra (pe) block pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; pe->match_limit_recursion = maxDepth; // allocate space for match results based on how many substrings // there are in the expression (+1 for the entire match) pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount); substrcount++; ovector = new int[3*substrcount]; matchlist = NULL; };
int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex, int unused __attribute__((unused)), bool *regex_compiled) { int rc; uint32_t entry_len; size_t info_len; rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); if (rc < 0 || !entry_len) return -1; *regex = regex_data_create(); if (!(*regex)) return -1; (*regex)->owned = 0; (*regex)->regex = (pcre *)mmap_area->next_addr; rc = next_entry(NULL, mmap_area, entry_len); if (rc < 0) goto err; /* * Check that regex lengths match. pcre_fullinfo() * also validates its magic number. */ rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); if (rc < 0 || info_len != entry_len) goto err; rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); if (rc < 0 || !entry_len) goto err; if (entry_len) { (*regex)->lsd.study_data = (void *)mmap_area->next_addr; (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; rc = next_entry(NULL, mmap_area, entry_len); if (rc < 0) goto err; /* Check that study data lengths match. */ rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, PCRE_INFO_STUDYSIZE, &info_len); if (rc < 0 || info_len != entry_len) goto err; } *regex_compiled = true; return 0; err: regex_data_free(*regex); *regex = NULL; return -1; }
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname) #endif { int rc; int entrysize; int top, bot; pcre_uchar *nametable; #ifdef COMPILE_PCRE8 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; #endif #ifdef COMPILE_PCRE16 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; #endif #ifdef COMPILE_PCRE32 if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; #endif bot = 0; while (top > bot) { int mid = (top + bot) / 2; pcre_uchar *entry = nametable + entrysize*mid; int c = STRCMP_UC_UC((pcre_uchar *)stringname, (pcre_uchar *)(entry + IMM2_SIZE)); if (c == 0) return GET2(entry, 0); if (c > 0) bot = mid + 1; else top = mid; } return PCRE_ERROR_NOSUBSTRING; }
/* * TODO Replace this compare function with something that actually compares the * regular expressions. * This compare function basically just compares the binary representations of * the automatons, and because this representation contains pointers and * metadata, it can only return a match if regex1 == regex2. * Preferably, this function would be replaced with an algorithm that computes * the equivalence of the automatons systematically. */ int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) { int rc; size_t len1, len2; rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); assert(rc == 0); rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); assert(rc == 0); if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) return SELABEL_INCOMPARABLE; return SELABEL_EQUAL; }
static EC_OBJ EcLibRe_Info( EC_OBJ stack, EcAny userdata ) { EC_OBJ regexp; EcUInt infosym; EC_OBJ res; pcre *code; pcre_extra *extra; int rc; res = EcParseStackFunction( "re.info", TRUE, stack, "O!k", tc_regexp, ®exp, &infosym ); if (EC_ERRORP(res)) return res; code = EC_PCRE(regexp); extra = EC_PCREXTRA(regexp); if (infosym == s_capturecount) { int capturecount; rc = pcre_fullinfo( code, extra, PCRE_INFO_CAPTURECOUNT, &capturecount ); if (rc < 0) return EcReError( "internal error: pcre_fullinfo", -1 ); return EcMakeInt( capturecount ); } else if (infosym == s_backrefmax) { int backrefmax; rc = pcre_fullinfo( code, extra, PCRE_INFO_BACKREFMAX, &backrefmax ); if (rc < 0) return EcReError( "internal error: pcre_fullinfo", -1 ); return EcMakeInt( backrefmax ); } else if (infosym == s_firstchar) { int firstchar = 0; rc = pcre_fullinfo( code, extra, PCRE_INFO_FIRSTCHAR, &firstchar ); if (rc < 0) return EcReError( "internal error: pcre_fullinfo", -1 ); return EcMakeInt( firstchar ); } else { EC_TYPEERROR_F( "re.info", 2, tc_symbol, EcMakeSymbolFromId( infosym ), "expected an info option symbol" ); return Ec_ERROR; } }
int regex_writef(struct regex_data *regex, FILE *fp, int unused __attribute__((unused))) { int rc; size_t len; uint32_t to_write; size_t size; pcre_extra *sd = get_pcre_extra(regex); /* determine the size of the pcre data in bytes */ rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); if (rc < 0) return -1; /* write the number of bytes in the pcre data */ to_write = size; len = fwrite(&to_write, sizeof(uint32_t), 1, fp); if (len != 1) return -1; /* write the actual pcre data as a char array */ len = fwrite(regex->regex, 1, to_write, fp); if (len != to_write) return -1; if (sd) { /* determine the size of the pcre study info */ rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size); if (rc < 0) return -1; } else size = 0; /* write the number of bytes in the pcre study data */ to_write = size; len = fwrite(&to_write, sizeof(uint32_t), 1, fp); if (len != 1) return -1; if (sd) { /* write the actual pcre study data as a char array */ len = fwrite(sd->study_data, 1, to_write, fp); if (len != to_write) return -1; } return 0; }
int pcre_get_stringtable_entries(const pcre *code, const char *stringname, char **firstptr, char **lastptr) { int rc; int entrysize; int top, bot; uschar *nametable, *lastentry; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; lastentry = nametable + entrysize * (top - 1); bot = 0; while (top > bot) { int mid = (top + bot) / 2; uschar *entry = nametable + entrysize*mid; int c = strcmp(stringname, (char *)(entry + 2)); if (c == 0) { uschar *first = entry; uschar *last = entry; while (first > nametable) { if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; first -= entrysize; } while (last < lastentry) { if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; last += entrysize; } *firstptr = (char *)first; *lastptr = (char *)last; return entrysize; } if (c > 0) bot = mid + 1; else top = mid; } return PCRE_ERROR_NOSUBSTRING; }
bool Regex::match(const MyString & string, ExtArray<MyString> * groups) { if ( ! this->isInitialized() ) { return false; } int group_count; pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &group_count); int oveccount = 3 * (group_count + 1); // +1 for the string itself int * ovector = (int *) malloc(oveccount * sizeof(int)); if (!ovector) { // XXX: EXCEPTing sucks EXCEPT("No memory to allocate data for re match"); } int rc = pcre_exec(re, NULL, string.Value(), string.Length(), 0, // Index in string from which to start matching options, ovector, oveccount); if (NULL != groups) { for (int i = 0; i < rc; i++) { (*groups)[i] = string.Substr(ovector[i * 2], ovector[i * 2 + 1] - 1); } } free(ovector); return rc > 0; }
int PME::match(const std::string & s, ///< s String to match against unsigned offset ///< offset Offset at which to start matching ) { size_t msize; pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &msize); msize = 3*(msize+1); int *m = new int[msize]; vector<markers> marks; // if we got a new string, reset the global position counter if ( addressoflaststring != (void *) &s ) { // fprintf ( stderr, "PME RESETTING: new string\n" ); lastglobalposition = 0; } if ( m_isglobal ) { offset += lastglobalposition; } //check that the offset isn't at the last position in the string if( offset == s.length() ) return 0; nMatches = pcre_exec(re, extra, s.c_str(), s.length(), offset, 0, m, msize); for ( int i = 0, *p = m ; i < nMatches ; i++, p+=2 ) { marks.push_back(markers(p[0], p[1])); } delete[] m; // store the last set of results locally, as well as returning them m_marks = marks; laststringmatched = s; addressoflaststring = (void *) &s; if ( m_isglobal ) { if ( nMatches == PCRE_ERROR_NOMATCH ) { // fprintf ( stderr, "PME RESETTING: reset for no match\n" ); lastglobalposition = 0; // reset the position for next match (perl does this) } else if ( nMatches > 0 ) { // fprintf ( stderr, "PME RESETTING: setting to %d\n", marks[0].second ); lastglobalposition = marks[0].second; // increment by the end of the match } else { // fprintf ( stderr, "PME RESETTING: reset for no unknown\n" ); lastglobalposition = 0; } } int returnvalue = 0; if ( nMatches > 0 ) returnvalue = nMatches; return returnvalue; }
bool RegExp::Impl::compile () { if (_isReady) return true; int compile_options = _lineBreak | PCRE_UTF8 // Run in UTF-8 mode | PCRE_NO_UTF8_CHECK // Do not check the pattern for UTF-8 validity (only relevant if PCRE_UTF8 is set) | PCRE_UCP; // Use Unicode properties for \d, \w, etc. const char * errsstr; _re = pcre_compile (_pattern.c_str(), compile_options, & errsstr, & _erroffset, nullptr/*tables*/); if (!_re) { _errstr = String::fromUtf8(errsstr); return false; } _extra = pcre_study(_re, 0, & errsstr); if (!_extra) { if (errsstr) { _errstr = String::fromUtf8(errsstr); return false; } } int rc; CWT_VERIFY((rc = pcre_fullinfo(_re, _extra, PCRE_INFO_CAPTURECOUNT, & _nsubpatterns)) == 0); if (rc) return false; _isReady = true; return true; }
void Pattern::compile (void) { // Compile the pattern int offset; const char * error; _re = pcre_compile (_pattern.c_str(), 0, &error, &offset, NULL); if (_re == NULL) { std::string offsetStr; std::stringstream ss; ss << offset; offsetStr = ss.str(); std::string msg ("PCRE compiling failed at offset " + offsetStr); throw compile_error (msg); } // Allocate an appropriate amount // of memory for the output vector. int captureCount; pcre_fullinfo (_re, NULL, PCRE_INFO_CAPTURECOUNT, &captureCount); delete[] _ovector; _ovector = new int[ (captureCount + 1) *3]; _ovectorSize = (captureCount + 1) * 3; }
static int cond_pcre_match(char **a, int id) { pcre *pcre_pat; const char *pcre_err; char *lhstr, *rhre; int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize; lhstr = cond_str(a,0,0); rhre = cond_str(a,1,0); switch(id) { case CPCRE_PLAIN: pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL); pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt); ovsize = (capcnt+1)*3; ov = zalloc(ovsize*sizeof(int)); r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize); if (r==0) return 1; else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */ else if (r>0) { zpcre_get_substrings(lhstr, ov, r, NULL); return 1; } break; } return 0; }
SLVAL sl_regexp_match(sl_vm_t* vm, SLVAL self, size_t argc, SLVAL* argv) { sl_regexp_t* re = get_regexp_check(vm, self); sl_string_t* str = sl_get_string(vm, argv[0]); int offset = 0, rc, ncaps; int* caps; sl_regexp_match_t* match; if(argc > 1) { offset = sl_get_int(sl_expect(vm, argv[1], vm->lib.Int)); } offset = sl_string_byte_offset_for_index(vm, argv[0], offset); if(offset < 0) { return vm->lib.nil; } pcre_fullinfo(re->re, re->study, PCRE_INFO_CAPTURECOUNT, &ncaps); ncaps += 1; ncaps *= 3; caps = sl_alloc(vm->arena, sizeof(int) * ncaps); rc = pcre_exec(re->re, re->study, (char*)str->buff, str->buff_len, offset, PCRE_NEWLINE_LF, caps, ncaps); if(rc == PCRE_ERROR_NOMATCH) { return vm->lib.nil; } check_pcre_error(vm, rc); match = (sl_regexp_match_t*)sl_get_ptr(sl_allocate(vm, vm->lib.Regexp_Match)); match->re = re; match->match_string = argv[0]; match->capture_count = ncaps / 3; match->captures = caps; return sl_make_ptr((sl_object_t*)match); }
/* * Arguments: * preg points to a structure for recording the compiled expression * pattern the pattern to compile * cflags compilation flags * * Returns: 0 on success * various non-zero codes on failure */ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags) { const char *errorptr; int erroffset; int errcode = 0; int options = PCRE_DUPNAMES; if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS; if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE; if ((cflags & AP_REG_DOTALL) != 0) options |= PCRE_DOTALL; preg->re_pcre = pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL); preg->re_erroffset = erroffset; if (preg->re_pcre == NULL) { /* * There doesn't seem to be constants defined for compile time error * codes. 21 is "failed to get memory" according to pcreapi(3). */ if (errcode == 21) return AP_REG_ESPACE; return AP_REG_INVARG; } pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); return 0; }
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION regcomp(regex_t *preg, const char *pattern, int cflags) { const char *errorptr; int erroffset; int errorcode; int options = 0; int re_nsub = 0; if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL; if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE; if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8; if ((cflags & REG_UCP) != 0) options |= PCRE_UCP; if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY; preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, &erroffset, NULL); preg->re_erroffset = erroffset; /* Safety: if the error code is too big for the translation vector (which should not happen, but we all make mistakes), return REG_BADPAT. */ if (preg->re_pcre == NULL) { return (errorcode < (int)(sizeof(eint)/sizeof(const int)))? eint[errorcode] : REG_BADPAT; } (void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &re_nsub); preg->re_nsub = (size_t)re_nsub; return 0; }
int RegEx::Compile(const char *pattern, int iFlags) { if (!mFree) Clear(); re = pcre_compile(pattern, iFlags, &mError, &mErrorOffset, NULL); if (re == NULL) { return 0; } mFree = false; /** * Retrieve the number of captured groups * including the full match. */ pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns); ++mNumSubpatterns; /** * Build the table with the named groups, * which contain an index and a name per group. */ MakeSubpatternsTable(mNumSubpatterns); return 1; }
static int pcre_local_exec(pcre_t *p) { int size; pcre_fullinfo(p->re, NULL, PCRE_INFO_CAPTURECOUNT, &size); size+=2; size *=3; if(p->ovector) FREE(p->ovector); p->ovector = CALLOCATE(size+1, int, TAG_TEMPORARY, "pcre_local_exec"); //too much, but who cares p->ovecsize = size; p->rc = pcre_exec( p->re, NULL, p->subject, p->s_length, 0, #ifndef USE_ICONV PCRE_NO_UTF8_CHECK, #else 0, #endif p->ovector, size); return p->rc; }
static int regexp_compile(lua_State *L) { const char *pattern = luaL_checkstring(L, 1); int options = luaL_optint(L, 2, 0) | PCRE_UTF8; int study_options_type = lua_type(L, 3); int study_options; if (study_options_type != LUA_TNIL) { study_options = luaL_optint(L, 3, PCRE_STUDY_JIT_COMPILE); } int err_code; const char *err_text; int err_offset; cs_regexp_t *regexp = lua_newuserdata(L, sizeof(cs_regexp_t)); memset(regexp, 0, sizeof(cs_regexp_t)); luaL_getmetatable(L, RE_MTBL_NAME); lua_setmetatable(L, -2); regexp->re = pcre_compile2(pattern, options, &err_code, &err_text, &err_offset, NULL); if (!regexp->re) return luaL_error(L, "%s (pattern offset: %d)", err_text, err_offset + 1); if (study_options_type != LUA_TNIL) { regexp->extra = pcre_study(regexp->re, study_options, &err_text); if (err_text) return luaL_error(L, "%s", err_text); } pcre_fullinfo(regexp->re, regexp->extra, PCRE_INFO_CAPTURECOUNT, ®exp->capture_cnt); return 1; }
mrb_value regexp_pcre_initialize(mrb_state *mrb, mrb_value self) { int erroff = 0, coptions; const char *errstr = NULL; struct mrb_regexp_pcre *reg = NULL; mrb_value source, opt = mrb_nil_value(); reg = (struct mrb_regexp_pcre *)DATA_PTR(self); if (reg) { mrb_regexp_free(mrb, reg); } DATA_TYPE(self) = &mrb_regexp_type; DATA_PTR(self) = NULL; mrb_get_args(mrb, "S|o", &source, &opt); reg = mrb_malloc(mrb, sizeof(struct mrb_regexp_pcre)); reg->re = NULL; DATA_PTR(self) = reg; coptions = mrb_mruby_to_pcre_options(opt); source = mrb_str_new(mrb, RSTRING_PTR(source), RSTRING_LEN(source)); reg->re = pcre_compile(RSTRING_PTR(source), coptions, &errstr, &erroff, NULL); if (reg->re == NULL) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid regular expression"); } mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), source); mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@options"), mrb_fixnum_value(mrb_pcre_to_mruby_options(coptions))); unsigned char *name_table; int i, namecount, name_entry_size; pcre_fullinfo(reg->re, NULL, PCRE_INFO_NAMECOUNT, &namecount); if (namecount > 0) { pcre_fullinfo(reg->re, NULL, PCRE_INFO_NAMETABLE, &name_table); pcre_fullinfo(reg->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size); unsigned char *tabptr = name_table; for (i = 0; i < namecount; i++) { int n = (tabptr[0] << 8) | tabptr[1]; mrb_funcall(mrb, self, "name_push", 2, mrb_str_new(mrb, (const char *)(tabptr + 2), strlen((const char *)tabptr + 2)), mrb_fixnum_value(n)); tabptr += name_entry_size; } } return self; }
int flt_urlrewrite_handle(cf_configfile_t *cfile,cf_conf_opt_t *opt,const u_char *context,u_char **args,size_t argnum) { flt_urlrewrite_rule_t n_rewrite; const u_char *error; int err_offset; if(flt_urlrewrite_fname == NULL) flt_urlrewrite_fname = cf_hash_get(GlobalValues,"FORUM_NAME",10); if(!context || cf_strcmp(context,flt_urlrewrite_fname) != 0) return 0; if(argnum != 3) { return -1; } if(!flt_urlrewrite_rules) { flt_urlrewrite_rules = cf_alloc(NULL,sizeof(cf_array_t),1,CF_ALLOC_MALLOC); cf_array_init(flt_urlrewrite_rules,sizeof(flt_urlrewrite_rule_t),(void(*)(void *))flt_urlrewrite_destroy); } n_rewrite.macro_tree = flt_urlrewrite_parse_macro(args[2]); if(!n_rewrite.macro_tree) { return -1; } n_rewrite.replacement = strdup(args[1]); if(!n_rewrite.replacement) { flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree); } n_rewrite.regexp = pcre_compile(args[0], 0, (const char **)&error, &err_offset, NULL); if(!n_rewrite.regexp) { fprintf(stderr,"flt_urlrewrite: Regexp error with \"%s\": %s\n", args[0], error); flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree); free((void *)n_rewrite.replacement); return -1; } n_rewrite.regexp_extra = pcre_study(n_rewrite.regexp, 0, (const char **)&error); if(error) { fprintf(stderr,"Regexp study error with \"%s\": %s\n", args[0], error); pcre_free(n_rewrite.regexp); flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree); free((void *)n_rewrite.replacement); return -1; } n_rewrite.match_count = 0; pcre_fullinfo(n_rewrite.regexp, n_rewrite.regexp_extra, PCRE_INFO_CAPTURECOUNT, &(n_rewrite.match_count)); n_rewrite.match_arr = (int *)cf_alloc(NULL, sizeof(int),(n_rewrite.match_count + 1) * 3, CF_ALLOC_MALLOC); if(!n_rewrite.match_arr) { pcre_free(n_rewrite.regexp_extra); pcre_free(n_rewrite.regexp); flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree); free((void *)n_rewrite.replacement); return -1; } cf_array_push(flt_urlrewrite_rules,&n_rewrite); return 0; }
static int pcre_nmatchsize(KonohaContext *kctx, kregexp_t *reg) { PCRE_regexp_t *preg = (PCRE_regexp_t *)reg; int capsize = 0; if(pcre_fullinfo(preg->re, NULL, PCRE_INFO_CAPTURECOUNT, &capsize) != 0) { return KREGEXP_MATCHSIZE; } return capsize + 1; }