static void external_log_results(noit_module_t *self, noit_check_t *check) { external_data_t *data; struct check_info *ci; stats_t current; struct timeval duration; noit_check_stats_clear(¤t); data = noit_module_get_userdata(self); ci = (struct check_info *)check->closure; noitL(data->nldeb, "external(%s) (timeout: %d, exit: %x)\n", check->target, ci->timedout, ci->exit_code); gettimeofday(¤t.whence, NULL); sub_timeval(current.whence, check->last_fire_time, &duration); current.duration = duration.tv_sec * 1000 + duration.tv_usec / 1000; if(ci->timedout) { current.available = NP_UNAVAILABLE; current.state = NP_BAD; } else if(WEXITSTATUS(ci->exit_code) == 3) { current.available = NP_UNKNOWN; current.state = NP_UNKNOWN; } else { current.available = NP_AVAILABLE; current.state = (WEXITSTATUS(ci->exit_code) == 0) ? NP_GOOD : NP_BAD; } /* Hack the output into metrics */ if(ci->output && ci->matcher) { int rc, len, startoffset = 0; int ovector[30]; len = strlen(ci->output); noitL(data->nldeb, "going to match output at %d/%d\n", startoffset, len); while((rc = pcre_exec(ci->matcher, NULL, ci->output, len, startoffset, 0, ovector, sizeof(ovector)/sizeof(*ovector))) > 0) { char metric[128]; char value[128]; startoffset = ovector[1]; noitL(data->nldeb, "matched at offset %d\n", rc); if(pcre_copy_named_substring(ci->matcher, ci->output, ovector, rc, "key", metric, sizeof(metric)) > 0 && pcre_copy_named_substring(ci->matcher, ci->output, ovector, rc, "value", value, sizeof(value)) > 0) { /* We're able to extract something... */ noit_stats_set_metric(¤t, metric, METRIC_GUESS, value); } noitL(data->nldeb, "going to match output at %d/%d\n", startoffset, len); } noitL(data->nldeb, "match failed.... %d\n", rc); } current.status = ci->output; noit_check_set_stats(self, check, ¤t); /* If we didn't exit normally, or we core, or we have stderr to report... * provide a full report. */ if((WTERMSIG(ci->exit_code) != SIGQUIT && WTERMSIG(ci->exit_code) != 0) || WCOREDUMP(ci->exit_code) || (ci->error && *ci->error)) { char uuid_str[37]; uuid_unparse_lower(check->checkid, uuid_str); noitL(data->nlerr, "external/%s: (sig:%d%s) [%s]\n", uuid_str, WTERMSIG(ci->exit_code), WCOREDUMP(ci->exit_code)?", cored":"", ci->error ? ci->error : ""); } }
pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount) { /* ALL strings are managed as UTF-8 by default */ int options = PCRE_UTF8; int size_offsets = 45; int size_offsets_max; int *offsets = NULL; int all_use_dfa = 0; BOOL LOOP_PCRE_TST = FALSE; /* These vectors store, end-to-end, a list of captured substring names. Assume that 1024 is plenty long enough for the few names we'll be testing. */ char copynames[1024]; char getnames[1024]; char *copynamesptr = NULL; char *getnamesptr = NULL; int rc = 0; (void)pcre_config(PCRE_CONFIG_UTF8, &rc); if (rc != 1) { return UTF8_NOT_SUPPORTED; } /* bug 3891 */ /* backslash characters are not interpreted for input */ buffer = strsub(INPUT_LINE, "\\", "\\\\"); size_offsets_max = size_offsets; offsets = (int *)MALLOC(size_offsets_max * sizeof(int)); if (offsets == NULL) { if (buffer) { FREE(buffer); buffer = NULL; } return NOT_ENOUGH_MEMORY_FOR_VECTOR; } /* Main loop */ LOOP_PCRE_TST = FALSE; while (!LOOP_PCRE_TST) { pcre *re = NULL; pcre_extra *extra = NULL; const char *error = NULL; char *back_p = NULL; char *p = NULL; char *pp = NULL; char *ppp = NULL; const unsigned char *tables = NULL; int do_G = 0; int do_g = 0; int erroroffset = 0, len = 0, delimiter; LOOP_PCRE_TST = TRUE; p = strdup(INPUT_PAT); back_p = p; while (isspace(*p)) { p++; } if (*p == 0) { continue; } /* In-line pattern (the usual case). Get the delimiter and seek the end of the pattern; if is isn't complete, read more. */ delimiter = *p++; if (isalnum(delimiter) || delimiter == '\\') { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return DELIMITER_NOT_ALPHANUMERIC; } pp = p; while (*pp != 0) { if (*pp == '\\' && pp[1] != 0) { pp++; } else if (*pp == delimiter) { break; } pp++; } /* If the delimiter can't be found, it's a syntax error */ if (*pp == 0) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } if (offsets) { FREE(offsets); } return CAN_NOT_COMPILE_PATTERN; } /* If the first character after the delimiter is backslash, make the pattern end with backslash. This is purely to provide a way of testing for the error message when a pattern ends with backslash. */ if (pp[1] == '\\') { *pp++ = '\\'; } /* Terminate the pattern at the delimiter, and save a copy of the pattern for callouts. */ *pp++ = 0; /* Look for options after final delimiter */ //options = 8192; while (*pp != 0) { switch (*pp++) { case 'f': options |= PCRE_FIRSTLINE; break; case 'g': do_g = 1; break; case 'i': options |= PCRE_CASELESS; break; case 'm': options |= PCRE_MULTILINE; break; case 's': options |= PCRE_DOTALL; break; case 'x': options |= PCRE_EXTENDED; break; case '+': break; case 'A': options |= PCRE_ANCHORED; break; case 'B': break; case 'C': options |= PCRE_AUTO_CALLOUT; break; case 'D': break; case 'E': options |= PCRE_DOLLAR_ENDONLY; break; case 'F': break; case 'G': do_G = 1; break; case 'I': break; case 'J': options |= PCRE_DUPNAMES; break; case 'M': break; case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; case 'S': break; case 'U': options |= PCRE_UNGREEDY; break; case 'X': options |= PCRE_EXTRA; break; case 'Z': break; case '8': { int rc = 0; (void)pcre_config(PCRE_CONFIG_UTF8, &rc); if (rc != 1) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); } return UTF8_NOT_SUPPORTED; } options |= PCRE_UTF8; } break; case '?': options |= PCRE_NO_UTF8_CHECK; break; case 'L': ppp = pp; /* The '\r' test here is so that it works on Windows. */ /* The '0' test is just in case this is an unterminated line. */ while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') { ppp++; } *ppp = 0; if (setlocale(LC_CTYPE, (const char *)pp) == NULL) { goto SKIP_DATA; } tables = pcre_maketables(); pp = ppp; break; case '>': while (*pp != 0) { pp++; } while (isspace(pp[-1])) { pp--; } *pp = 0; break; case '<': { while (*pp++ != '>') { ; } } break; case '\r': /* So that it works in Windows */ case '\n': case ' ': break; default: goto SKIP_DATA; } } /* Handle compiling via the POSIX interface, which doesn't support the timing, showing, or debugging options, nor the ability to pass over local character tables. */ { re = pcre_compile((char *)p, options, &error, &erroroffset, tables); /* Compilation failed; go back for another re, skipping to blank line if non-interactive. */ if (re == NULL) { SKIP_DATA: if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (tables) { (*pcre_free)((void*)tables); tables = NULL; } if (extra) { FREE(extra); extra = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return CAN_NOT_COMPILE_PATTERN; } } /* End of non-POSIX compile */ /* Read data lines and test them */ { char *q = NULL; char *bptr = NULL; int *use_offsets = offsets; int use_size_offsets = size_offsets; int callout_data = 0; int callout_data_set = 0; int count = 0; int c = 0; int copystrings = 0; int find_match_limit = 0; int getstrings = 0; int gmatched = 0; int start_offset = 0; int g_notempty = 0; int use_dfa = 0; options = 0; *copynames = 0; *getnames = 0; copynamesptr = copynames; getnamesptr = getnames; callout_count = 0; callout_fail_count = 999999; callout_fail_id = -1; if (extra != NULL) { extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION); } p = buffer; bptr = q = buffer; while ((c = *p++) != 0) { int i = 0; int n = 0; if (c == '\\') switch ((c = *p++)) { case 'a': c = 7; break; case 'b': c = '\b'; break; case 'e': c = 27; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c -= '0'; while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9') { c = c * 8 + *p++ - '0'; } break; case 'x': /* Ordinary \x */ c = 0; while (i++ < 2 && isxdigit(*p)) { c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W'); p++; } break; case 0: /* \ followed by EOF allows for an empty line */ p--; continue; case '>': while (isdigit(*p)) { start_offset = start_offset * 10 + *p++ - '0'; } continue; case 'A': /* Option setting */ options |= PCRE_ANCHORED; continue; case 'B': options |= PCRE_NOTBOL; continue; case 'C': if (isdigit(*p)) /* Set copy string */ { while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } copystrings |= 1 << n; } else if (isalnum(*p)) { char *npp = copynamesptr; while (isalnum(*p)) { *npp++ = *p++; } *npp++ = 0; *npp = 0; pcre_get_stringnumber(re, (char *)copynamesptr); copynamesptr = npp; } else if (*p == '+') { p++; } else if (*p == '-') { p++; } else if (*p == '!') { callout_fail_id = 0; p++; while (isdigit(*p)) { callout_fail_id = callout_fail_id * 10 + *p++ - '0'; } callout_fail_count = 0; if (*p == '!') { p++; while (isdigit(*p)) { callout_fail_count = callout_fail_count * 10 + *p++ - '0'; } } } else if (*p == '*') { int sign = 1; callout_data = 0; if (*(++p) == '-') { sign = -1; p++; } while (isdigit(*p)) { callout_data = callout_data * 10 + *p++ - '0'; } callout_data *= sign; callout_data_set = 1; } continue; case 'G': if (isdigit(*p)) { while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } getstrings |= 1 << n; } else if (isalnum(*p)) { char *npp = getnamesptr; while (isalnum(*p)) { *npp++ = *p++; } *npp++ = 0; *npp = 0; pcre_get_stringnumber(re, (char *)getnamesptr); getnamesptr = npp; } continue; case 'L': continue; case 'M': find_match_limit = 1; continue; case 'N': options |= PCRE_NOTEMPTY; continue; case 'O': while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } if (n > size_offsets_max) { size_offsets_max = n; if (offsets) { FREE(offsets); } use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int)); } use_size_offsets = n; if (n == 0) { use_offsets = NULL; /* Ensures it can't write to it */ } continue; case 'P': options |= PCRE_PARTIAL; continue; case 'Q': while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; extra->match_limit_recursion = n; continue; case 'q': while (isdigit(*p)) { n = n * 10 + *p++ - '0'; } if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } extra->flags |= PCRE_EXTRA_MATCH_LIMIT; extra->match_limit = n; continue; #if !defined NODFA case 'R': options |= PCRE_DFA_RESTART; continue; #endif case 'S': continue; case 'Z': options |= PCRE_NOTEOL; continue; case '?': options |= PCRE_NO_UTF8_CHECK; continue; case '<': { while (*p++ != '>') { ; } } continue; } *q++ = (char)c; } *q = 0; len = (int)(q - buffer); if ((all_use_dfa || use_dfa) && find_match_limit) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (p) { FREE(p); p = NULL; } if (re) { (*pcre_free)(re); re = NULL; } if (tables) { (*pcre_free)((void*)tables); tables = NULL; } if (extra) { FREE(extra); extra = NULL; } return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING; } /* Handle matching via the POSIX interface, which does not support timing or playing with the match limit or callout data. */ for (;; gmatched++) /* Loop for /g or /G */ { /* If find_match_limit is set, we want to do repeated matches with varying limits in order to find the minimum value for the match limit and for the recursion limit. */ if (find_match_limit) { if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } (void)check_match_limit(re, extra, bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets, PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit), PCRE_ERROR_MATCHLIMIT); count = check_match_limit(re, extra, bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets, PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion), PCRE_ERROR_RECURSIONLIMIT); } /* If callout_data is set, use the interface with additional data */ else if (callout_data_set) { if (extra == NULL) { extra = (pcre_extra *)MALLOC(sizeof(pcre_extra)); extra->flags = 0; } extra->flags |= PCRE_EXTRA_CALLOUT_DATA; extra->callout_data = &callout_data; count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets); extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA; } /* The normal case is just to do the match once, with the default value of match_limit. */ else { count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options | g_notempty, use_offsets, use_size_offsets); if (count == 0) { count = use_size_offsets / 3; } //to retrieve backref count and values if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL) { int i = 0; int iErr = 0; iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount); //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount); if (*_piCapturedStringCount > 0) { *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount); for (i = 0 ; i < *_piCapturedStringCount ; i++) { char* pstSubstring = NULL; pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring); if (pstSubstring != NULL) { (*_pstCapturedString)[i] = strdup(pstSubstring); } pcre_free_substring(pstSubstring); } } } } /* Matched */ if (count >= 0) { int i, maxcount; maxcount = use_size_offsets / 3; /* This is a check against a lunatic return value. */ if (count > maxcount) { if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (re) { (*pcre_free)(re); re = NULL; } if (tables) { (*pcre_free)((void*)tables); tables = NULL; } if (extra) { FREE(extra); extra = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return TOO_BIG_FOR_OFFSET_SIZE; } for (i = 0; i < count * 2; i += 2) { if (use_offsets[i] >= 0) { *Output_Start = use_offsets[i]; *Output_End = use_offsets[i + 1]; if (buffer) { FREE(buffer); } /* use_offsets = offsets no need to free use_offsets if we free offsets */ if (offsets) { FREE(offsets); } /* "re" allocated by pcre_compile (better to use free function associated)*/ if (re) { (*pcre_free)(re); } if (extra) { FREE(extra); } if (tables) { /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/ (*pcre_free)((void *)tables); tables = NULL; setlocale(LC_CTYPE, "C"); } if (back_p) { FREE(back_p); back_p = NULL; } return PCRE_FINISHED_OK; } } for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1) { char copybuffer[256]; pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer)); } for (i = 0; i < 32; i++) { if ((getstrings & (1 << i)) != 0) { const char *substring; pcre_get_substring((char *)bptr, use_offsets, count, i, &substring); } } for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1) { const char *substring; pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring); } } /* Failed to match. If this is a /g or /G loop and we previously set g_notempty after a null match, this is not necessarily the end. We want to advance the start offset, and continue. We won't be at the end of the string - that was checked before setting g_notempty. Complication arises in the case when the newline option is "any" or "anycrlf". If the previous match was at the end of a line terminated by CRLF, an advance of one character just passes the \r, whereas we should prefer the longer newline sequence, as does the code in pcre_exec(). Fudge the offset value to achieve this. Otherwise, in the case of UTF-8 matching, the advance must be one character, not one byte. */ else { if (count == PCRE_ERROR_NOMATCH) { if (gmatched == 0) { if (tables) { (*pcre_free)((void *)tables); tables = NULL; } if (re) { (*pcre_free)((void *)re); re = NULL; } if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); } if (p) { FREE(back_p); back_p = NULL; } return NO_MATCH; } } if (count == PCRE_ERROR_MATCHLIMIT ) { if (tables) { (*pcre_free)((void *)tables); tables = NULL; } if (re) { (*pcre_free)((void *)re); re = NULL; } if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } if (back_p) { FREE(back_p); back_p = NULL; } return MATCH_LIMIT; } break; /* Out of loop */ } /* If not /g or /G we are done */ if (!do_g && !do_G) { break; } /* If we have matched an empty string, first check to see if we are at the end of the subject. If so, the /g loop is over. Otherwise, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ g_notempty = 0; if (use_offsets[0] == use_offsets[1]) { if (use_offsets[0] == len) { break; } g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; } /* For /g, update the start offset, leaving the rest alone */ if (do_g) { start_offset = use_offsets[1]; } /* For /G, update the pointer and length */ else { bptr += use_offsets[1]; len -= use_offsets[1]; } } /* End of loop for /g and /G */ if (re) { (*pcre_free)(re); re = NULL; } if (extra) { FREE(extra); extra = NULL; } if (tables) { (*pcre_free)((void *)tables); tables = NULL; } FREE(back_p); back_p = NULL; continue; } /* End of loop for data lines */ } if (buffer) { FREE(buffer); buffer = NULL; } if (offsets) { FREE(offsets); offsets = NULL; } return PCRE_EXIT; }
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags) { char *output = text; /** * Retrieve all matches and store them in * mSubStrings list. */ if (MatchAll(output) == -1) { return -1; } size_t subjectLen = strlen(subject); size_t total = 0; size_t baseIndex = 0; size_t diffLength = 0; char *toReplace = new char[textMaxLen + 1]; char *toSearch = NULL; /** * All characters which is not matched are not copied when replacing matches. * Then original text (output buffer) should be considerated as empty. */ if (flags & REGEX_FORMAT_NOCOPY) { *output = '\0'; } else { /** * This is used only when we do replace matches. */ toSearch = new char[textMaxLen + 1]; } /** * Loop over all matches found. */ for (size_t i = 0; i < mMatchesSubs.length(); ++i) { char *ptr = toReplace; size_t browsed = 0; size_t searchLen = 0; size_t length = 0; /** * Build the replace string as it can contain backreference * and this needs to be parsed. */ for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed) { unsigned int c = *s; /** * Supported format specifiers: * * $number : Substitutes the substring matched by group number. * n must be an integer value designating a valid backreference, greater than 0, and of two digits at most. * ${name} : Substitutes the substring matched by the named group name (a maximum of 32 characters). * $& : Substitutes a copy of the whole match. * $` : Substitutes all the text of the input string before the match. * $' : Substitutes all the text of the input string after the match. * $+ : Substitutes the last group that was captured. * $_ : Substitutes the entire input string. * $$ : Substitutes a literal "$". */ if (c == '$' || c == '\\') { switch (*++s) { case '\0': { /** * End of string. * Copy one character. */ *(ptr + browsed) = c; break; } case '&': { /** * Concatenate retrieved full match sub-string. * length - 1 to overwrite EOS. */ GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length); browsed += length - 1; break; } case '`': { /** * Concatenate part of original text up to * first sub-string position. */ length = mSubStrings.at(baseIndex).start; memcpy(ptr + browsed, subject, length); browsed += length - 1; break; } case '\'': { /** * Concatenate part of original text from * last sub-string end position to EOS. */ length = mSubStrings.at(baseIndex).end; memcpy(ptr + browsed, subject + length, subjectLen - length); browsed += (subjectLen - length) - 1; break; } case '+': { /** * Copy the last group that was captured. */ GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length); browsed += length - 1; break; } case '_': { /** * Copy the entire input string. */ memcpy(ptr + browsed, subject, subjectLen); browsed += (subjectLen - 1); break; } case '$': case '\\': { /** * Copy the single character $ or \. */ *(ptr + browsed) = c; break; } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '{': { /** * Checking backreference. * Which can be either $n, ${n} or ${name}. */ int backref = -1; const char *walk = s; bool inBrace = false; bool nameCheck = false; /** * ${nn}. * ^ */ if (*walk == '{') { inBrace = true; ++walk; } /** * Valid number. * $nn or ${nn} * ^ ^ */ if (*walk >= '0' && *walk <= '9') { backref = *walk - '0'; ++walk; } else if (inBrace) { nameCheck = true; /** * Not a valid number. * Checking as string. * ${name} * ^ */ if (*walk) { const char *pch = strchr(walk, '}'); if (pch != NULL) { /** * A named group maximum character is 32 (PCRE). */ char name[32]; size_t nameLength = strncopy(name, walk, pch - walk + 1); int flags, num = 0; pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags); /** * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used * as pcre_get_stringnumber output order is not defined. */ if (flags & PCRE_DUPNAMES) { memset(ovector, 0, REGEX_MAX_SUBPATTERNS); /** * pcre_copy_named_substring needs a vector containing sub-patterns ranges * for a given match. */ for (size_t j = 0; j < mMatchesSubs.at(i); ++j) { ovector[2 * j] = mSubStrings.at(baseIndex + j).start; ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end; } num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen); if (num != PCRE_ERROR_NOSUBSTRING) { browsed += num - 1; s = pch; break; } ++pch; } else { /** * Retrieve sub-pattern index from a give name. */ num = pcre_get_stringnumber(re, name); if (num != PCRE_ERROR_NOSUBSTRING) { backref = num; walk = ++pch; } } if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i)) { /** * If a sub-string for a given match is not found, or if > to * number of sub-patterns we still need to check if this * group name is a valid one because if so we want to escape it. * Looking at the name table. */ bool found = false; for (size_t i = 0; i < mSubsNameTable.length(); ++i) { if (!mSubsNameTable.at(i).name.compare(name)) { --browsed; s = --pch; found = true; break; } } if (found) { continue; } } } } } if (!nameCheck) { /** * Valid second number. * $nn or ${nn} * ^ ^ */ if (*walk && *walk >= '0' && *walk <= '9') { backref = backref * 10 + *walk - '0'; ++walk; } if (inBrace) { /** * Invalid specifier * Either hit EOS or missing }. * ${n or ${nn or ${nx or ${nnx * ^ ^ ^ ^ */ if (*walk == '\0' || *walk != '}') { backref = -1; } else { ++walk; } } } length = walk - s; s = --walk; /** * We can't provide a capture number >= to total that pcre_exec has found. * 0 is implicitly accepted, same behavior as $&. */ if (backref >= 0 && backref < mNumSubpatterns) { /** * Valid available index for a given match. */ if ((size_t)backref < mMatchesSubs.at(i)) { /** * Concatenate retrieved sub-string. * length - 1 to overwrite EOS. */ GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length); browsed += length - 1; } else { /** * Valid unavailable index for a given match. */ --browsed; } } else { /** * If we here it means the syntax is valid but sub-pattern doesn't exist. * So, copy as it is, including $. */ memcpy(ptr + browsed, s - length, length + 1); browsed += length; } break; } default: { /** * Not a valid format modifier. * So we copy characters as it is. */ *(ptr + browsed) = *s; break; } } } else { /** * At this point, direct copy. */ *(ptr + browsed) = c; } } *(ptr + browsed) = '\0'; /** * Concatenate only replace string of each match, * as we don't want to copy unmatched characters. */ if (flags & REGEX_FORMAT_NOCOPY) { /** * We want just the first occurrence. */ if (total++ && (flags & REGEX_FORMAT_FIRSTONLY)) { break; } strncat(output, toReplace, textMaxLen + 1); } else { /** * Retrieves full string of a given match. */ const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen); /** * We get something to replace, but the sub-pattern to search is empty. * We insert replacement either a the start end or string. */ if (*toReplace && !searchLen) { if (output - text > 0) { strncat(output, toReplace, textMaxLen); } else { strncat(toReplace, text, textMaxLen); strncopy(text, toReplace, strlen(toReplace) + 1); } ++total; } else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL) { /** * Then we simply do a replace. * Probably not the most efficient, but this should be at least safe. * To avoid issue where the function could find a string which is not at the expected index, * We force the input string to start from index of the full match. */ ++total; } if (total && (flags & REGEX_FORMAT_FIRSTONLY)) { break; } } /** * mMatchesSubs is a flat list containing all sub-patterns of all matches. * A number of sub-patterns can vary per match. So we calculate the position in the list, * from where the first sub-pattern result of current match starts. */ baseIndex += mMatchesSubs.at(i); diffLength += browsed - searchLen; } delete[] toReplace; if (toSearch != NULL) { delete[] toSearch; } /** * Return the number of successful replacements. */ return total; }