void grok_init(grok_t *grok) { //int ret; /* set global pcre_callout for libpcre */ pcre_callout = grok_pcre_callout; grok->re = NULL; grok->pattern = NULL; grok->full_pattern = NULL; grok->pcre_capture_vector = NULL; grok->pcre_num_captures = 0; grok->max_capture_num = 0; grok->pcre_errptr = NULL; grok->pcre_erroffset = 0; grok->logmask = 0; grok->logdepth = 0; #ifndef GROK_TEST_NO_PATTERNS grok->patterns = tctreenew(); #endif /* GROK_TEST_NO_PATTERNS */ #ifndef GROK_TEST_NO_CAPTURE grok->captures_by_id = tctreenew(); grok->captures_by_name = tctreenew(); grok->captures_by_subname = tctreenew(); grok->captures_by_capture_number = tctreenew(); #endif /* GROK_TEST_NO_CAPTURE */ if (g_grok_global_initialized == 0) { /* do first initalization */ g_grok_global_initialized = 1; /* VALGRIND NOTE: Valgrind complains here, but this is a global variable. * Ignore valgrind here. */ g_pattern_re = pcre_compile(PATTERN_REGEX, 0, &grok->pcre_errptr, &grok->pcre_erroffset, NULL); if (g_pattern_re == NULL) { fprintf(stderr, "Internal compiler error: %s\n", grok->pcre_errptr); fprintf(stderr, "Regexp: %s\n", PATTERN_REGEX); fprintf(stderr, "Position: %d\n", grok->pcre_erroffset); } pcre_fullinfo(g_pattern_re, NULL, PCRE_INFO_CAPTURECOUNT, &g_pattern_num_captures); g_pattern_num_captures++; /* include the 0th group */ g_cap_name = pcre_get_stringnumber(g_pattern_re, "name"); g_cap_pattern = pcre_get_stringnumber(g_pattern_re, "pattern"); g_cap_subname = pcre_get_stringnumber(g_pattern_re, "subname"); g_cap_predicate = pcre_get_stringnumber(g_pattern_re, "predicate"); } }
static int get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) #endif { const REAL_PCRE *re = (const REAL_PCRE *)code; int entrysize; pcre_uchar *entry; #ifdef COMPILE_PCRE8 char *first, *last; #else PCRE_UCHAR16 *first, *last; #endif #ifdef COMPILE_PCRE8 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) return pcre_get_stringnumber(code, stringname); entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); #else if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) return pcre16_get_stringnumber(code, stringname); entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last); #endif if (entrysize <= 0) return entrysize; for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) { int n = GET2(entry, 0); if (ovector[n*2] >= 0) return n; } return GET2(entry, 0); }
/* * Ported from get_first_set() in pcre_get.c in pcre source. */ static int matchres_first_set(cs_matchres_t *mr, const char *group_name) { cs_regexp_t *regexp = mr->regexp; pcre *re = regexp->re; pcre_extra *extra = regexp->extra; unsigned long options; int jchanged; pcre_fullinfo(re, extra, PCRE_INFO_OPTIONS, &options); pcre_fullinfo(re, extra, PCRE_INFO_JCHANGED, &jchanged); if (options & PCRE_DUPNAMES || jchanged) { char *first; char *last; uchar *entry; int entry_len = pcre_get_stringtable_entries(re, group_name, &first, &last); if (entry_len < 0) { return entry_len; } for (entry = (uchar *)first; entry <= (uchar *)last; entry += entry_len) { int n = entry[0] << 8 | entry[1]; if (mr->ovector[n * 2] >= 0) { return n; } } return entry[0] << 8 | entry[1]; } else { return pcre_get_stringnumber(re, group_name); } }
static int get_first_set(const pcre *code, const char *stringname, int *ovector) { const real_pcre *re = (const real_pcre *)code; int entrysize; char *first, *last; uschar *entry; if( (re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0 ) return pcre_get_stringnumber(code, stringname); entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); if( entrysize <= 0 ) return entrysize; for( entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize ) { int n = (entry[0] << 8) + entry[1]; if (ovector[n*2] >= 0) return n; } return (first[0] << 8) + first[1]; }
int pcre_get_named_substring(const pcre *code, const pcre_char *subject, int *ovector, int stringcount, const pcre_char *stringname, const pcre_char **stringptr) { int n = pcre_get_stringnumber(code, stringname); if (n <= 0) return n; return pcre_get_substring(subject, ovector, stringcount, n, stringptr); }
int pcre_copy_named_substring(const pcre *code, const pcre_char *subject, int *ovector, int stringcount, const pcre_char *stringname, pcre_char *buffer, int size) { int n = pcre_get_stringnumber(code, stringname); if (n <= 0) return n; return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); }
static void match_internal(match_context_t *context) { size_t j; for (j = 0; j < context->state->patterns.used; j++) { full_pcre_t *regex; int options = PCRE_NO_UTF8_CHECK; /* If the regex member == NULL, this highlight is either a pointer to another state which we should search here ("use"), or it is an end pattern with a dynamic back reference. */ if (context->state->patterns.data[j].regex.regex == NULL) { if (context->state->patterns.data[j].next_state >= 0) { state_t *save_state; save_state = context->state; context->state = &context->match->highlight->states.data[context->state->patterns.data[j].next_state]; match_internal(context); context->state = save_state; continue; } regex = &context->match->mapping.data[context->match->state].dynamic->regex; } else { regex = &context->state->patterns.data[j].regex; /* For items that do not change state, we do not want an empty match ever (makes no progress). */ if (context->state->patterns.data[j].next_state == NO_CHANGE) options |= PCRE_NOTEMPTY; /* The default behaviour is to not allow start patterns to be empty, such that progress will be guaranteed. */ else if (context->state->patterns.data[j].next_state > NO_CHANGE && !(context->match->highlight->flags & T3_HIGHLIGHT_ALLOW_EMPTY_START)) options |= PCRE_NOTEMPTY; } if (pcre_exec(regex->regex, regex->extra, context->line, context->size, context->match->match_start, options, context->ovector, sizeof(context->ovector) / sizeof(context->ovector[0])) >= 0 && context->ovector[1] > context->best_end) { context->best = &context->state->patterns.data[j]; context->best_end = context->ovector[1]; if (context->best->extra != NULL && context->best->extra->dynamic_name != NULL) { int string_number = pcre_get_stringnumber(context->best->regex.regex, context->best->extra->dynamic_name); if (string_number == PCRE_ERROR_NOSUBSTRING || string_number > 10) { context->extract_start = 0; context->extract_end = 0; } else { context->extract_start = context->ovector[string_number * 2]; context->extract_end = context->ovector[string_number * 2 + 1]; } } } } }
static int cap_index(sl_vm_t* vm, SLVAL regexp_match, SLVAL i) { sl_regexp_match_t* match = get_regexp_match(vm, regexp_match); int index; if(sl_is_a(vm, i, vm->lib.String)) { char* named_cap = sl_to_cstr(vm, i); index = pcre_get_stringnumber(match->re->re, named_cap); if(index < 0) { return -1; } } else { index = sl_get_int(sl_expect(vm, i, vm->lib.Int)); } if(index < 0 || index >= match->capture_count) { return -1; } return index * 2; }
pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount) { /* ALL strings are managed as UTF-8 by default */ int options = PCRE_UTF8; int size_offsets = 45; int size_offsets_max; int *offsets = NULL; int all_use_dfa = 0; BOOL LOOP_PCRE_TST = FALSE; /* These vectors store, end-to-end, a list of captured substring names. Assume that 1024 is plenty long enough for the few names we'll be testing. */ char copynames[1024]; char getnames[1024]; char *copynamesptr = NULL; char *getnamesptr = NULL; int rc = 0; (void)pcre_config(PCRE_CONFIG_UTF8, &rc); if (rc != 1) { return UTF8_NOT_SUPPORTED; } /* bug 3891 */ /* backslash characters are not interpreted for input */ buffer = strsub(INPUT_LINE, "\\", "\\\\"); size_offsets_max = size_offsets; offsets = (int *)MALLOC(size_offsets_max * sizeof(int)); if (offsets == NULL) { if (buffer) { FREE(buffer); buffer = NULL; } return NOT_ENOUGH_MEMORY_FOR_VECTOR; } /* Main loop */ LOOP_PCRE_TST = FALSE; while (!LOOP_PCRE_TST) { pcre *re = NULL; pcre_extra *extra = NULL; const char *error = NULL; char *back_p = NULL; char *p = NULL; char *pp = NULL; char *ppp = NULL; const unsigned char *tables = NULL; int do_G = 0; int do_g = 0; int erroroffset = 0, len = 0, delimiter; LOOP_PCRE_TST = TRUE; p = strdup(INPUT_PAT); back_p = p; while (isspace(*p)) { p++; } if (*p == 0) { continue; } /* In-line pattern (the usual case). Get the delimiter and seek the end of the pattern; if is isn't complete, read more. */ delimiter = *p++; if (isalnum(delimiter) || delimiter == '\\') { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return DELIMITER_NOT_ALPHANUMERIC; } pp = p; while (*pp != 0) { if (*pp == '\\' && pp[1] != 0) { pp++; } else if (*pp == delimiter) { break; } pp++; } /* If the delimiter can't be found, it's a syntax error */ if (*pp == 0) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } if (offsets) { FREE(offsets); } return CAN_NOT_COMPILE_PATTERN; } /* If the first character after the delimiter is backslash, make the pattern end with backslash. This is purely to provide a way of testing for the error message when a pattern ends with backslash. */ if (pp[1] == '\\') { *pp++ = '\\'; } /* Terminate the pattern at the delimiter, and save a copy of the pattern for callouts. */ *pp++ = 0; /* Look for options after final delimiter */ //options = 8192; while (*pp != 0) { switch (*pp++) { case 'f': options |= PCRE_FIRSTLINE; break; case 'g': do_g = 1; break; case 'i': options |= PCRE_CASELESS; break; case 'm': options |= PCRE_MULTILINE; break; case 's': options |= PCRE_DOTALL; break; case 'x': options |= PCRE_EXTENDED; break; case '+': break; case 'A': options |= PCRE_ANCHORED; break; case 'B': break; case 'C': options |= PCRE_AUTO_CALLOUT; break; case 'D': break; case 'E': options |= PCRE_DOLLAR_ENDONLY; break; case 'F': break; case 'G': do_G = 1; break; case 'I': break; case 'J': options |= PCRE_DUPNAMES; break; case 'M': break; case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; case 'S': break; case 'U': options |= PCRE_UNGREEDY; break; case 'X': options |= PCRE_EXTRA; break; case 'Z': break; case '8': { int rc = 0; (void)pcre_config(PCRE_CONFIG_UTF8, &rc); if (rc != 1) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); } return UTF8_NOT_SUPPORTED; } options |= PCRE_UTF8; } break; case '?': options |= PCRE_NO_UTF8_CHECK; break; case 'L': ppp = pp; /* The '\r' test here is so that it works on Windows. */ /* The '0' test is just in case this is an unterminated line. */ while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') { ppp++; } *ppp = 0; if (setlocale(LC_CTYPE, (const char *)pp) == NULL) { goto SKIP_DATA; } tables = pcre_maketables(); pp = ppp; break; case '>': while (*pp != 0) { pp++; } while (isspace(pp[-1])) { pp--; } *pp = 0; break; case '<': { while (*pp++ != '>') { ; } } break; case '\r': /* So that it works in Windows */ case '\n': case ' ': break; default: goto SKIP_DATA; } } /* Handle compiling via the POSIX interface, which doesn't support the timing, showing, or debugging options, nor the ability to pass over local character tables. */ { re = pcre_compile((char *)p, options, &error, &erroroffset, tables); /* Compilation failed; go back for another re, skipping to blank line if non-interactive. */ if (re == NULL) { SKIP_DATA: if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (tables) { (*pcre_free)((void*)tables); tables = NULL; } if (extra) { FREE(extra); extra = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return CAN_NOT_COMPILE_PATTERN; } } /* End of non-POSIX compile */ /* Read data lines and test them */ { char *q = NULL; char *bptr = NULL; int *use_offsets = offsets; int use_size_offsets = size_offsets; int callout_data = 0; int callout_data_set = 0; int count = 0; int c = 0; int copystrings = 0; int find_match_limit = 0; int getstrings = 0; int gmatched = 0; int start_offset = 0; int g_notempty = 0; int use_dfa = 0; options = 0; *copynames = 0; *getnames = 0; copynamesptr = copynames; getnamesptr = getnames; callout_count = 0; callout_fail_count = 999999; callout_fail_id = -1; if (extra != NULL) { extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION); } p = buffer; bptr = q = buffer; while ((c = *p++) != 0) { int i = 0; int n = 0; if (c == '\\') switch ((c = *p++)) { case 'a': c = 7; break; case 'b': c = '\b'; break; case 'e': c = 27; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c -= '0'; while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') { c = c * 8 + *p++ - '0'; } break; case 'x': /* Ordinary \x */ c = 0; while (i++ < 2 && isxdigit(*p)) { c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W'); p++; } break; case 0: /* \ followed by EOF allows for an empty line */ p--; continue; case '>': while (isdigit(*p)) { start_offset = start_offset * 10 + *p++ - '0'; } continue; case 'A': /* Option setting */ options |= PCRE_ANCHORED; continue; case 'B': options |= PCRE_NOTBOL; continue; case 'C': if (isdigit(*p)) /* Set copy string */ { while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } copystrings |= 1 << n; } else if (isalnum(*p)) { char *npp = copynamesptr; while (isalnum(*p)) { *npp++ = *p++; } *npp++ = 0; *npp = 0; pcre_get_stringnumber(re, (char *)copynamesptr); copynamesptr = npp; } else if (*p == '+') { p++; } else if (*p == '-') { p++; } else if (*p == '!') { callout_fail_id = 0; p++; while (isdigit(*p)) { callout_fail_id = callout_fail_id * 10 + *p++ - '0'; } callout_fail_count = 0; if (*p == '!') { p++; while (isdigit(*p)) { callout_fail_count = callout_fail_count * 10 + *p++ - '0'; } } } else if (*p == '*') { int sign = 1; callout_data = 0; if (*(++p) == '-') { sign = -1; p++; } while (isdigit(*p)) { callout_data = callout_data * 10 + *p++ - '0'; } callout_data *= sign; callout_data_set = 1; } continue; case 'G': if (isdigit(*p)) { while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } getstrings |= 1 << n; } else if (isalnum(*p)) { char *npp = getnamesptr; while (isalnum(*p)) { *npp++ = *p++; } *npp++ = 0; *npp = 0; pcre_get_stringnumber(re, (char *)getnamesptr); getnamesptr = npp; } continue; case 'L': continue; case 'M': find_match_limit = 1; continue; case 'N': options |= PCRE_NOTEMPTY; continue; case 'O': while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } if (n > size_offsets_max) { size_offsets_max = n; if (offsets) { FREE(offsets); } use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int)); } use_size_offsets = n; if (n == 0) { use_offsets = NULL; /* Ensures it can't write to it */ } continue; case 'P': options |= PCRE_PARTIAL; continue; case 'Q': while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra->match_limit_recursion = n; continue; case 'q': while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } extra->flags |= PCRE_EXTRA_MATCH_LIMIT; extra->match_limit = n; continue; #if !defined NODFA case 'R': options |= PCRE_DFA_RESTART; continue; #endif case 'S': continue; case 'Z': options |= PCRE_NOTEOL; continue; case '?': options |= PCRE_NO_UTF8_CHECK; continue; case '<': { while (*p++ != '>') { ; } } continue; } *q++ = (char)c; } *q = 0; len = (int)(q - buffer); if ((all_use_dfa || use_dfa) && find_match_limit) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (p) { FREE(p); p = NULL; } if (re) { (*pcre_free)(re); re = NULL; } if (tables) { (*pcre_free)((void*)tables); tables = NULL; } if (extra) { FREE(extra); extra = NULL; } return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING; } /* Handle matching via the POSIX interface, which does not support timing or playing with the match limit or callout data. */ for (;; gmatched++) /* Loop for /g or /G */ { /* If find_match_limit is set, we want to do repeated matches with varying limits in order to find the minimum value for the match limit and for the recursion limit. */ if (find_match_limit) { if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } (void)check_match_limit(re, extra, bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets, PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit), PCRE_ERROR_MATCHLIMIT); count = check_match_limit(re, extra, bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets, PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion), PCRE_ERROR_RECURSIONLIMIT); } /* If callout_data is set, use the interface with additional data */ else if (callout_data_set) { if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } extra->flags |= PCRE_EXTRA_CALLOUT_DATA; extra->callout_data = &callout_data; count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets); extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA; } /* The normal case is just to do the match once, with the default value of match_limit. */ else { count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets); if (count == 0) { count = use_size_offsets / 3; } //to retrieve backref count and values if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL) { int i = 0; int iErr = 0; iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount); //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount); if (*_piCapturedStringCount > 0) { *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount); for (i = 0 ; i < *_piCapturedStringCount ; i++) { char* pstSubstring = NULL; pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring); if (pstSubstring != NULL) { (*_pstCapturedString)[i] = strdup(pstSubstring); } pcre_free_substring(pstSubstring); } } } } /* Matched */ if (count >= 0) { int i, maxcount; maxcount = use_size_offsets / 3; /* This is a check against a lunatic return value. */ if (count > maxcount) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (re) { (*pcre_free)(re); re = NULL; } if (tables) { (*pcre_free)((void*)tables); tables = NULL; } if (extra) { FREE(extra); extra = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return TOO_BIG_FOR_OFFSET_SIZE; } for (i = 0; i < count * 2; i += 2) { if (use_offsets[i] >= 0) { *Output_Start = use_offsets[i]; *Output_End = use_offsets[i + 1]; if (buffer) { FREE(buffer); } /* use_offsets = offsets no need to free use_offsets if we free offsets */ if (offsets) { FREE(offsets); } /* "re" allocated by pcre_compile (better to use free function associated)*/ if (re) { (*pcre_free)(re); } if (extra) { FREE(extra); } if (tables) { /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/ (*pcre_free)((void *)tables); tables = NULL; setlocale(LC_CTYPE, "C"); } if (back_p) { FREE(back_p); back_p = NULL; } return PCRE_FINISHED_OK; } } for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1) { char copybuffer[256]; pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer)); } for (i = 0; i < 32; i++) { if ((getstrings & (1 << i)) != 0) { const char *substring; pcre_get_substring((char *)bptr, use_offsets, count, i, &substring); } } for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1) { const char *substring; pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring); } } /* Failed to match. If this is a /g or /G loop and we previously set g_notempty after a null match, this is not necessarily the end. We want to advance the start offset, and continue. We won't be at the end of the string - that was checked before setting g_notempty. Complication arises in the case when the newline option is "any" or "anycrlf". If the previous match was at the end of a line terminated by CRLF, an advance of one character just passes the \r, whereas we should prefer the longer newline sequence, as does the code in pcre_exec(). Fudge the offset value to achieve this. Otherwise, in the case of UTF-8 matching, the advance must be one character, not one byte. */ else { if (count == PCRE_ERROR_NOMATCH) { if (gmatched == 0) { if (tables) { (*pcre_free)((void *)tables); tables = NULL; } if (re) { (*pcre_free)((void *)re); re = NULL; } if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); } if (p) { FREE(back_p); back_p = NULL; } return NO_MATCH; } } if (count == PCRE_ERROR_MATCHLIMIT ) { if (tables) { (*pcre_free)((void *)tables); tables = NULL; } if (re) { (*pcre_free)((void *)re); re = NULL; } if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return MATCH_LIMIT; } break; /* Out of loop */ } /* If not /g or /G we are done */ if (!do_g && !do_G) { break; } /* If we have matched an empty string, first check to see if we are at the end of the subject. If so, the /g loop is over. Otherwise, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = 0; if (use_offsets[0] == use_offsets[1]) { if (use_offsets[0] == len) { break; } g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; } /* For /g, update the start offset, leaving the rest alone */ if (do_g) { start_offset = use_offsets[1]; } /* For /G, update the pointer and length */ else { bptr += use_offsets[1]; len -= use_offsets[1]; } } /* End of loop for /g and /G */ if (re) { (*pcre_free)(re); re = NULL; } if (extra) { FREE(extra); extra = NULL; } if (tables) { (*pcre_free)((void *)tables); tables = NULL; } FREE(back_p); back_p = NULL; continue; } /* End of loop for data lines */ } if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } return PCRE_EXIT; }
CAMLprim value pcre_exec_stub0( intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj, value v_ovec, value v_maybe_cof, value v_workspace) { int ret; int is_dfa = v_workspace != (value) NULL; long pos = v_pos, len = caml_string_length(v_subj), subj_start = v_subj_start; long ovec_len = Wosize_val(v_ovec); if (pos > len || pos < subj_start) caml_invalid_argument("Pcre.pcre_exec_stub: illegal position"); if (subj_start > len || subj_start < 0) caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start"); pos -= subj_start; len -= subj_start; { const pcre *code = get_rex(v_rex); /* Compiled pattern */ const pcre_extra *extra = get_extra(v_rex); /* Extra info */ const char *ocaml_subj = String_val(v_subj) + subj_start; /* Subject string */ const int opt = v_opt; /* Runtime options */ /* Special case when no callout functions specified */ if (v_maybe_cof == None) { int *ovec = (int *) &Field(v_ovec, 0); /* Performs the match */ if (is_dfa) ret = pcre_dfa_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len, (int *) &Field(v_workspace, 0), Wosize_val(v_workspace)); else ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len); if (ret < 0) handle_exec_error("pcre_exec_stub", ret); else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret); } /* There are callout functions */ else { value v_cof = Field(v_maybe_cof, 0); value v_substrings; char *subj = caml_stat_alloc(sizeof(char) * len); int *ovec = caml_stat_alloc(sizeof(int) * ovec_len); int workspace_len; int *workspace; struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL }; struct pcre_extra new_extra = #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION # ifdef PCRE_EXTRA_MARK # ifdef PCRE_EXTRA_EXECUTABLE_JIT { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL }; # else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL }; # endif # else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 }; # endif #else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL }; #endif cod.subj_start = subj_start; memcpy(subj, ocaml_subj, len); Begin_roots4(v_rex, v_cof, v_substrings, v_ovec); Begin_roots1(v_subj); v_substrings = caml_alloc_small(2, 0); End_roots(); Field(v_substrings, 0) = v_subj; Field(v_substrings, 1) = v_ovec; cod.v_substrings_p = &v_substrings; cod.v_cof_p = &v_cof; new_extra.callout_data = &cod; if (extra != NULL) { new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags; new_extra.study_data = extra->study_data; new_extra.match_limit = extra->match_limit; new_extra.tables = extra->tables; #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION new_extra.match_limit_recursion = extra->match_limit_recursion; #endif } if (is_dfa) { workspace_len = Wosize_val(v_workspace); workspace = caml_stat_alloc(sizeof(int) * workspace_len); ret = pcre_dfa_exec(code, extra, subj, len, pos, opt, ovec, ovec_len, (int *) &Field(v_workspace, 0), workspace_len); } else ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec, ovec_len); caml_stat_free(subj); End_roots(); if (ret < 0) { if (is_dfa) caml_stat_free(workspace); caml_stat_free(ovec); if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn); else handle_exec_error("pcre_exec_stub(callout)", ret); } else { handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret); if (is_dfa) { caml_int_ptr ocaml_workspace_dst = (caml_int_ptr) &Field(v_workspace, 0); const int *workspace_src = workspace; const int *workspace_src_stop = workspace + workspace_len; while (workspace_src != workspace_src_stop) { *ocaml_workspace_dst = *workspace_src; ocaml_workspace_dst++; workspace_src++; } caml_stat_free(workspace); } caml_stat_free(ovec); } } } return Val_unit; } CAMLprim value pcre_exec_stub( intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj, value v_ovec, value v_maybe_cof) { return pcre_exec_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj, v_ovec, v_maybe_cof, (value) NULL); } /* Byte-code hook for pcre_exec_stub Needed, because there are more than 5 arguments */ CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn) { return pcre_exec_stub0( Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]), argv[4], argv[5], argv[6], (value) NULL); } /* Byte-code hook for pcre_dfa_exec_stub Needed, because there are more than 5 arguments */ CAMLprim value pcre_dfa_exec_stub_bc(value *argv, int __unused argn) { return pcre_exec_stub0( Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]), argv[4], argv[5], argv[6], argv[7]); } static struct custom_operations tables_ops = { "pcre_ocaml_tables", pcre_dealloc_tables, custom_compare_default, custom_hash_default, custom_serialize_default, custom_deserialize_default, custom_compare_ext_default }; /* Generates a new set of chartables for the current locale (see man page of PCRE */ CAMLprim value pcre_maketables_stub(value __unused v_unit) { /* GC will do a full cycle every 1_000_000 table set allocations (one table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed table sets) */ const value v_tables = caml_alloc_custom( &tables_ops, sizeof(struct pcre_ocaml_tables), 1, 1000000); set_tables(v_tables, pcre_maketables()); return v_tables; } /* Wraps around the isspace-function */ CAMLprim value pcre_isspace_stub(value v_c) { return Val_bool(isspace(Int_val(v_c))); } /* Returns number of substring associated with a name */ CAMLprim intnat pcre_get_stringnumber_stub(value v_rex, value v_name) { const int ret = pcre_get_stringnumber(get_rex(v_rex), String_val(v_name)); if (ret == PCRE_ERROR_NOSUBSTRING) caml_invalid_argument("Named string not found"); return ret; } CAMLprim value pcre_get_stringnumber_stub_bc(value v_rex, value v_name) { return Val_int(pcre_get_stringnumber_stub(v_rex, v_name)); } /* Returns array of names of named substrings in a regexp */ CAMLprim value pcre_names_stub(value v_rex) { CAMLparam0(); CAMLlocal1(v_res); int name_count; int entry_size; const char *tbl_ptr; int i; int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count); if (ret != 0) raise_internal_error("pcre_names_stub: namecount"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size); if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr); if (ret != 0) raise_internal_error("pcre_names_stub: nametable"); v_res = caml_alloc(name_count, 0); for (i = 0; i < name_count; ++i) { value v_name = caml_copy_string(tbl_ptr + 2); Store_field(v_res, i, v_name); tbl_ptr += entry_size; } CAMLreturn(v_res); } /* Generic stub for getting integer results from pcre_config */ static inline int pcre_config_int(int what) { int ret; pcre_config(what, (void *) &ret); return ret; } /* Generic stub for getting long integer results from pcre_config */ static inline int pcre_config_long(int what) { long ret; pcre_config(what, (void *) &ret); return ret; }
void Pattern::end (const std::string& groupName) const { int index = pcre_get_stringnumber (_re, groupName.c_str()); end (index); }
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags) { char *output = text; /** * Retrieve all matches and store them in * mSubStrings list. */ if (MatchAll(output) == -1) { return -1; } size_t subjectLen = strlen(subject); size_t total = 0; size_t baseIndex = 0; size_t diffLength = 0; char *toReplace = new char[textMaxLen + 1]; char *toSearch = NULL; /** * All characters which is not matched are not copied when replacing matches. * Then original text (output buffer) should be considerated as empty. */ if (flags & REGEX_FORMAT_NOCOPY) { *output = '\0'; } else { /** * This is used only when we do replace matches. */ toSearch = new char[textMaxLen + 1]; } /** * Loop over all matches found. */ for (size_t i = 0; i < mMatchesSubs.length(); ++i) { char *ptr = toReplace; size_t browsed = 0; size_t searchLen = 0; size_t length = 0; /** * Build the replace string as it can contain backreference * and this needs to be parsed. */ for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed) { unsigned int c = *s; /** * Supported format specifiers: * * $number : Substitutes the substring matched by group number. * n must be an integer value designating a valid backreference, greater than 0, and of two digits at most. * ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters). * $& : Substitutes a copy of the whole match. * $` : Substitutes all the text of the input string before the match. * $' : Substitutes all the text of the input string after the match. * $+ : Substitutes the last group that was captured. * $_ : Substitutes the entire input string. * $$ : Substitutes a literal "$". */ if (c == '$' || c == '\\') { switch (*++s) { case '\0': { /** * End of string. * Copy one character. */ *(ptr + browsed) = c; break; } case '&': { /** * Concatenate retrieved full match sub-string. * length - 1 to overwrite EOS. */ GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length); browsed += length - 1; break; } case '`': { /** * Concatenate part of original text up to * first sub-string position. */ length = mSubStrings.at(baseIndex).start; memcpy(ptr + browsed, subject, length); browsed += length - 1; break; } case '\'': { /** * Concatenate part of original text from * last sub-string end position to EOS. */ length = mSubStrings.at(baseIndex).end; memcpy(ptr + browsed, subject + length, subjectLen - length); browsed += (subjectLen - length) - 1; break; } case '+': { /** * Copy the last group that was captured. */ GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length); browsed += length - 1; break; } case '_': { /** * Copy the entire input string. */ memcpy(ptr + browsed, subject, subjectLen); browsed += (subjectLen - 1); break; } case '$': case '\\': { /** * Copy the single character $ or \. */ *(ptr + browsed) = c; break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '{': { /** * Checking backreference. * Which can be either $n, ${n} or ${name}. */ int backref = -1; const char *walk = s; bool inBrace = false; bool nameCheck = false; /** * ${nn}. * ^ */ if (*walk == '{') { inBrace = true; ++walk; } /** * Valid number. * $nn or ${nn} * ^ ^ */ if (*walk >= '0' && *walk <= '9') { backref = *walk - '0'; ++walk; } else if (inBrace) { nameCheck = true; /** * Not a valid number. * Checking as string. * ${name} * ^ */ if (*walk) { const char *pch = strchr(walk, '}'); if (pch != NULL) { /** * A named group maximum character is 32 (PCRE). */ char name[32]; size_t nameLength = strncopy(name, walk, pch - walk + 1); int flags, num = 0; pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags); /** * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used * as pcre_get_stringnumber output order is not defined. */ if (flags & PCRE_DUPNAMES) { memset(ovector, 0, REGEX_MAX_SUBPATTERNS); /** * pcre_copy_named_substring needs a vector containing sub-patterns ranges * for a given match. */ for (size_t j = 0; j < mMatchesSubs.at(i); ++j) { ovector[2 * j] = mSubStrings.at(baseIndex + j).start; ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end; } num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen); if (num != PCRE_ERROR_NOSUBSTRING) { browsed += num - 1; s = pch; break; } ++pch; } else { /** * Retrieve sub-pattern index from a give name. */ num = pcre_get_stringnumber(re, name); if (num != PCRE_ERROR_NOSUBSTRING) { backref = num; walk = ++pch; } } if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i)) { /** * If a sub-string for a given match is not found, or if > to * number of sub-patterns we still need to check if this * group name is a valid one because if so we want to escape it. * Looking at the name table. */ bool found = false; for (size_t i = 0; i < mSubsNameTable.length(); ++i) { if (!mSubsNameTable.at(i).name.compare(name)) { --browsed; s = --pch; found = true; break; } } if (found) { continue; } } } } } if (!nameCheck) { /** * Valid second number. * $nn or ${nn} * ^ ^ */ if (*walk && *walk >= '0' && *walk <= '9') { backref = backref * 10 + *walk - '0'; ++walk; } if (inBrace) { /** * Invalid specifier * Either hit EOS or missing }. * ${n or ${nn or ${nx or ${nnx * ^ ^ ^ ^ */ if (*walk == '\0' || *walk != '}') { backref = -1; } else { ++walk; } } } length = walk - s; s = --walk; /** * We can't provide a capture number >= to total that pcre_exec has found. * 0 is implicitly accepted, same behavior as $&. */ if (backref >= 0 && backref < mNumSubpatterns) { /** * Valid available index for a given match. */ if ((size_t)backref < mMatchesSubs.at(i)) { /** * Concatenate retrieved sub-string. * length - 1 to overwrite EOS. */ GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length); browsed += length - 1; } else { /** * Valid unavailable index for a given match. */ --browsed; } } else { /** * If we here it means the syntax is valid but sub-pattern doesn't exist. * So, copy as it is, including $. */ memcpy(ptr + browsed, s - length, length + 1); browsed += length; } break; } default: { /** * Not a valid format modifier. * So we copy characters as it is. */ *(ptr + browsed) = *s; break; } } } else { /** * At this point, direct copy. */ *(ptr + browsed) = c; } } *(ptr + browsed) = '\0'; /** * Concatenate only replace string of each match, * as we don't want to copy unmatched characters. */ if (flags & REGEX_FORMAT_NOCOPY) { /** * We want just the first occurrence. */ if (total++ && (flags & REGEX_FORMAT_FIRSTONLY)) { break; } strncat(output, toReplace, textMaxLen + 1); } else { /** * Retrieves full string of a given match. */ const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen); /** * We get something to replace, but the sub-pattern to search is empty. * We insert replacement either a the start end or string. */ if (*toReplace && !searchLen) { if (output - text > 0) { strncat(output, toReplace, textMaxLen); } else { strncat(toReplace, text, textMaxLen); strncopy(text, toReplace, strlen(toReplace) + 1); } ++total; } else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL) { /** * Then we simply do a replace. * Probably not the most efficient, but this should be at least safe. * To avoid issue where the function could find a string which is not at the expected index, * We force the input string to start from index of the full match. */ ++total; } if (total && (flags & REGEX_FORMAT_FIRSTONLY)) { break; } } /** * mMatchesSubs is a flat list containing all sub-patterns of all matches. * A number of sub-patterns can vary per match. So we calculate the position in the list, * from where the first sub-pattern result of current match starts. */ baseIndex += mMatchesSubs.at(i); diffLength += browsed - searchLen; } delete[] toReplace; if (toSearch != NULL) { delete[] toSearch; } /** * Return the number of successful replacements. */ return total; }
// named subpatterns int get_stringnumber(const char *stringname) const throw(std::exception) { _ASSERTE(compiled()); check_stringname(stringname); return pcre_get_stringnumber(_Code::get(), stringname); }
int pcre2_substring_number_from_name_8(const pcre2_code_8 *code, PCRE2_SPTR8 name) { return pcre_get_stringnumber(code->regex, (const char *)name); }
/* Executes a pattern match with runtime options, a regular expression, a string offset, a string length, a subject string, a number of subgroup offsets, an offset vector and an optional callout function */ CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_ofs, value v_subj, value v_subgroups2, value v_ovec, value v_maybe_cof) { const int ofs = Int_val(v_ofs), len = caml_string_length(v_subj); if (ofs > len || ofs < 0) caml_invalid_argument("Pcre.pcre_exec_stub: illegal offset"); { const pcre *code = (pcre *) Field(v_rex, 1); /* Compiled pattern */ const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2); /* Extra info */ const char *ocaml_subj = String_val(v_subj); /* Subject string */ const int opt = Int_val(v_opt); /* Runtime options */ int subgroups2 = Int_val(v_subgroups2); const int subgroups2_1 = subgroups2 - 1; const int subgroups3 = (subgroups2 >> 1) + subgroups2; /* Special case when no callout functions specified */ if (v_maybe_cof == None) { int *ovec = (int *) &Field(v_ovec, 0); /* Performs the match */ const int ret = pcre_exec(code, extra, ocaml_subj, len, ofs, opt, ovec, subgroups3); if (ret < 0) { switch(ret) { case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found); case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial); case PCRE_ERROR_MATCHLIMIT : caml_raise_constant(*pcre_exc_MatchLimit); case PCRE_ERROR_BADPARTIAL : caml_raise_constant(*pcre_exc_BadPartial); case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8); case PCRE_ERROR_BADUTF8_OFFSET : caml_raise_constant(*pcre_exc_BadUTF8Offset); default : caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub"); } } else { const int *ovec_src = ovec + subgroups2_1; long int *ovec_dst = (long int *) ovec + subgroups2_1; /* Converts offsets from C-integers to OCaml-Integers This is a bit tricky, because there are 32- and 64-bit platforms around and OCaml chooses the larger possibility for representing integers when available (also in arrays) - not so the PCRE */ while (subgroups2--) { *ovec_dst = Val_int(*ovec_src); --ovec_src; --ovec_dst; } } } /* There are callout functions */ else { value v_cof = Field(v_maybe_cof, 0); value v_substrings; char *subj = caml_stat_alloc(sizeof(char) * len); int *ovec = caml_stat_alloc(sizeof(int) * subgroups3); int ret; struct cod cod = { (value *) NULL, (value *) NULL, (value) NULL }; struct pcre_extra new_extra = #ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 }; #else { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL }; #endif memcpy(subj, ocaml_subj, len); Begin_roots3(v_rex, v_cof, v_substrings); Begin_roots2(v_subj, v_ovec); v_substrings = caml_alloc_small(2, 0); End_roots(); Field(v_substrings, 0) = v_subj; Field(v_substrings, 1) = v_ovec; cod.v_substrings_p = &v_substrings; cod.v_cof_p = &v_cof; new_extra.callout_data = &cod; if (extra == NULL) { ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec, subgroups3); } else { new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags; new_extra.study_data = extra->study_data; new_extra.match_limit = extra->match_limit; new_extra.tables = extra->tables; #ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION new_extra.match_limit_recursion = extra->match_limit_recursion; #endif ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec, subgroups3); } free(subj); End_roots(); if (ret < 0) { free(ovec); switch(ret) { case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found); case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial); case PCRE_ERROR_MATCHLIMIT : caml_raise_constant(*pcre_exc_MatchLimit); case PCRE_ERROR_BADPARTIAL : caml_raise_constant(*pcre_exc_BadPartial); case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8); case PCRE_ERROR_BADUTF8_OFFSET : caml_raise_constant(*pcre_exc_BadUTF8Offset); case PCRE_ERROR_CALLOUT : caml_raise(cod.v_exn); default : caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub"); } } else { int *ovec_src = ovec + subgroups2_1; long int *ovec_dst = &Field(v_ovec, 0) + subgroups2_1; while (subgroups2--) { *ovec_dst = Val_int(*ovec_src); --ovec_src; --ovec_dst; } free(ovec); } } } return Val_unit; } /* Byte-code hook for pcre_exec_stub Needed, because there are more than 5 arguments */ CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn) { return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6]); } /* Generates a new set of chartables for the current locale (see man page of PCRE */ CAMLprim value pcre_maketables_stub(value __unused v_unit) { /* GC will do a full cycle every 100 table set allocations (one table set consumes 864 bytes -> maximum of 86400 bytes unreclaimed table sets) */ const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 864, 86400); Field(v_res, 1) = (value) pcre_maketables(); return v_res; } /* Wraps around the isspace-function */ CAMLprim value pcre_isspace_stub(value v_c) { return Val_bool(isspace(Int_val(v_c))); } /* Returns number of substring associated with a name */ CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name) { const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1), String_val(v_name)); if (ret == PCRE_ERROR_NOSUBSTRING) caml_invalid_argument("Named string not found"); return Val_int(ret); } /* Returns array of names of named substrings in a regexp */ CAMLprim value pcre_names_stub(value v_rex) { CAMLparam0(); CAMLlocal1(v_res); int name_count; int entry_size; const char *tbl_ptr; int i; int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub"); ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr); if (ret != 0) caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub"); v_res = caml_alloc(name_count, 0); for (i = 0; i < name_count; ++i) { value v_name = caml_copy_string(tbl_ptr + 2); Store_field(v_res, i, v_name); tbl_ptr += entry_size; } CAMLreturn(v_res); }
int cPCRE::GeStringNumber(const string &substring) { return pcre_get_stringnumber(this->mPattern, substring.c_str()); }