Ejemplo n.º 1
0
static void external_log_results(noit_module_t *self, noit_check_t *check) {
  external_data_t *data;
  struct check_info *ci;
  stats_t current;
  struct timeval duration;

  noit_check_stats_clear(&current);

  data = noit_module_get_userdata(self);
  ci = (struct check_info *)check->closure;

  noitL(data->nldeb, "external(%s) (timeout: %d, exit: %x)\n",
        check->target, ci->timedout, ci->exit_code);

  gettimeofday(&current.whence, NULL);
  sub_timeval(current.whence, check->last_fire_time, &duration);
  current.duration = duration.tv_sec * 1000 + duration.tv_usec / 1000;
  if(ci->timedout) {
    current.available = NP_UNAVAILABLE;
    current.state = NP_BAD;
  }
  else if(WEXITSTATUS(ci->exit_code) == 3) {
    current.available = NP_UNKNOWN;
    current.state = NP_UNKNOWN;
  }
  else {
    current.available = NP_AVAILABLE;
    current.state = (WEXITSTATUS(ci->exit_code) == 0) ? NP_GOOD : NP_BAD;
  }

  /* Hack the output into metrics */
  if(ci->output && ci->matcher) {
    int rc, len, startoffset = 0;
    int ovector[30];
    len = strlen(ci->output);
    noitL(data->nldeb, "going to match output at %d/%d\n", startoffset, len);
    while((rc = pcre_exec(ci->matcher, NULL, ci->output, len, startoffset, 0,
                          ovector, sizeof(ovector)/sizeof(*ovector))) > 0) {
      char metric[128];
      char value[128];
      startoffset = ovector[1];
      noitL(data->nldeb, "matched at offset %d\n", rc);
      if(pcre_copy_named_substring(ci->matcher, ci->output, ovector, rc,
                                   "key", metric, sizeof(metric)) > 0 &&
         pcre_copy_named_substring(ci->matcher, ci->output, ovector, rc,
                                   "value", value, sizeof(value)) > 0) {
        /* We're able to extract something... */
        noit_stats_set_metric(&current, metric, METRIC_GUESS, value);
      }
      noitL(data->nldeb, "going to match output at %d/%d\n", startoffset, len);
    }
    noitL(data->nldeb, "match failed.... %d\n", rc);
  }

  current.status = ci->output;
  noit_check_set_stats(self, check, &current);

  /* If we didn't exit normally, or we core, or we have stderr to report...
   * provide a full report.
   */
  if((WTERMSIG(ci->exit_code) != SIGQUIT && WTERMSIG(ci->exit_code) != 0) ||
     WCOREDUMP(ci->exit_code) ||
     (ci->error && *ci->error)) {
    char uuid_str[37];
    uuid_unparse_lower(check->checkid, uuid_str);
    noitL(data->nlerr, "external/%s: (sig:%d%s) [%s]\n", uuid_str,
          WTERMSIG(ci->exit_code), WCOREDUMP(ci->exit_code)?", cored":"",
          ci->error ? ci->error : "");
  }
}
Ejemplo n.º 2
0
pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
{
    /* ALL strings are managed as UTF-8 by default */
    int options = PCRE_UTF8;
    int size_offsets = 45;
    int size_offsets_max;
    int *offsets = NULL;
    int all_use_dfa = 0;
    BOOL LOOP_PCRE_TST = FALSE;

    /* These vectors store, end-to-end, a list of captured substring names. Assume
    that 1024 is plenty long enough for the few names we'll be testing. */

    char copynames[1024];
    char getnames[1024];

    char *copynamesptr = NULL;
    char *getnamesptr = NULL;

    int rc = 0;
    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
    if (rc != 1)
    {
        return UTF8_NOT_SUPPORTED;
    }

    /* bug 3891 */
    /* backslash characters are not interpreted for input */
    buffer = strsub(INPUT_LINE, "\\", "\\\\");

    size_offsets_max = size_offsets;
    offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
    if (offsets == NULL)
    {
        if (buffer)
        {
            FREE(buffer);
            buffer = NULL;
        }
        return NOT_ENOUGH_MEMORY_FOR_VECTOR;
    }
    /* Main loop */
    LOOP_PCRE_TST = FALSE;
    while (!LOOP_PCRE_TST)
    {
        pcre *re = NULL;
        pcre_extra *extra = NULL;
        const char *error = NULL;
        char *back_p = NULL;
        char *p = NULL;
        char *pp = NULL;
        char *ppp = NULL;
        const unsigned char *tables = NULL;
        int do_G = 0;
        int do_g = 0;
        int erroroffset = 0, len = 0, delimiter;

        LOOP_PCRE_TST = TRUE;
        p = strdup(INPUT_PAT);
        back_p = p;
        while (isspace(*p))
        {
            p++;
        }
        if (*p == 0)
        {
            continue;
        }
        /* In-line pattern (the usual case). Get the delimiter and seek the end of
        the pattern; if is isn't complete, read more. */

        delimiter = *p++;

        if (isalnum(delimiter) || delimiter == '\\')
        {
            if (buffer)
            {
                FREE(buffer);
                buffer = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
                offsets = NULL;
            }
            if (back_p)
            {
                FREE(back_p);
                back_p = NULL;
            }
            return DELIMITER_NOT_ALPHANUMERIC;
        }

        pp = p;

        while (*pp != 0)
        {
            if (*pp == '\\' && pp[1] != 0)
            {
                pp++;
            }
            else if (*pp == delimiter)
            {
                break;
            }
            pp++;
        }

        /* If the delimiter can't be found, it's a syntax error */
        if (*pp == 0)
        {
            if (buffer)
            {
                FREE(buffer);
                buffer = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
                offsets = NULL;
            }
            if (back_p)
            {
                FREE(back_p);
                back_p = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
            }
            return CAN_NOT_COMPILE_PATTERN;
        }

        /* If the first character after the delimiter is backslash, make
        the pattern end with backslash. This is purely to provide a way
        of testing for the error message when a pattern ends with backslash. */

        if (pp[1] == '\\')
        {
            *pp++ = '\\';
        }

        /* Terminate the pattern at the delimiter, and save a copy of the pattern
        for callouts. */

        *pp++ = 0;

        /* Look for options after final delimiter */

        //options = 8192;

        while (*pp != 0)
        {
            switch (*pp++)
            {
                case 'f':
                    options |= PCRE_FIRSTLINE;
                    break;
                case 'g':
                    do_g = 1;
                    break;
                case 'i':
                    options |= PCRE_CASELESS;
                    break;
                case 'm':
                    options |= PCRE_MULTILINE;
                    break;
                case 's':
                    options |= PCRE_DOTALL;
                    break;
                case 'x':
                    options |= PCRE_EXTENDED;
                    break;
                case '+':
                    break;
                case 'A':
                    options |= PCRE_ANCHORED;
                    break;
                case 'B':
                    break;
                case 'C':
                    options |= PCRE_AUTO_CALLOUT;
                    break;
                case 'D':
                    break;
                case 'E':
                    options |= PCRE_DOLLAR_ENDONLY;
                    break;
                case 'F':
                    break;
                case 'G':
                    do_G = 1;
                    break;
                case 'I':
                    break;
                case 'J':
                    options |= PCRE_DUPNAMES;
                    break;
                case 'M':
                    break;
                case 'N':
                    options |= PCRE_NO_AUTO_CAPTURE;
                    break;
                case 'S':
                    break;
                case 'U':
                    options |= PCRE_UNGREEDY;
                    break;
                case 'X':
                    options |= PCRE_EXTRA;
                    break;
                case 'Z':
                    break;
                case '8':
                {
                    int rc = 0;
                    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                    if (rc != 1)
                    {
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                        }
                        return UTF8_NOT_SUPPORTED;
                    }
                    options |= PCRE_UTF8;
                }
                break;
                case '?':
                    options |= PCRE_NO_UTF8_CHECK;
                    break;
                case 'L':
                    ppp = pp;
                    /* The '\r' test here is so that it works on Windows. */
                    /* The '0' test is just in case this is an unterminated line. */
                    while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
                    {
                        ppp++;
                    }
                    *ppp = 0;
                    if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
                    {
                        goto SKIP_DATA;
                    }

                    tables = pcre_maketables();
                    pp = ppp;
                    break;
                case '>':
                    while (*pp != 0)
                    {
                        pp++;
                    }
                    while (isspace(pp[-1]))
                    {
                        pp--;
                    }
                    *pp = 0;
                    break;
                case '<':
                {
                    while (*pp++ != '>')
                    {
                        ;
                    }
                }
                break;
                case '\r':                      /* So that it works in Windows */
                case '\n':
                case ' ':
                    break;

                default:
                    goto SKIP_DATA;
            }
        }

        /* Handle compiling via the POSIX interface, which doesn't support the
        timing, showing, or debugging options, nor the ability to pass over
        local character tables. */


        {
            re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
            /* Compilation failed; go back for another re, skipping to blank line
            if non-interactive. */
            if (re == NULL)
            {
SKIP_DATA:
                if (buffer)
                {
                    FREE(buffer);
                    buffer = NULL;
                }
                if (offsets)
                {
                    FREE(offsets);
                    offsets = NULL;
                }
                if (tables)
                {
                    (*pcre_free)((void*)tables);
                    tables = NULL;
                }
                if (extra)
                {
                    FREE(extra);
                    extra = NULL;
                }
                if (back_p)
                {
                    FREE(back_p);
                    back_p = NULL;
                }
                return CAN_NOT_COMPILE_PATTERN;
            }

        }        /* End of non-POSIX compile */

        /* Read data lines and test them */
        {
            char *q = NULL;
            char *bptr = NULL;
            int *use_offsets = offsets;
            int use_size_offsets = size_offsets;
            int callout_data = 0;
            int callout_data_set = 0;
            int count = 0;
            int c = 0;
            int copystrings = 0;
            int find_match_limit = 0;
            int getstrings = 0;
            int gmatched = 0;
            int start_offset = 0;
            int g_notempty = 0;
            int use_dfa = 0;

            options = 0;
            *copynames = 0;
            *getnames = 0;

            copynamesptr = copynames;
            getnamesptr = getnames;

            callout_count = 0;
            callout_fail_count = 999999;
            callout_fail_id = -1;

            if (extra != NULL)
            {
                extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
            }
            p = buffer;
            bptr = q = buffer;
            while ((c = *p++) != 0)
            {
                int i = 0;
                int n = 0;

                if (c == '\\') switch ((c = *p++))
                    {
                        case 'a':
                            c =    7;
                            break;
                        case 'b':
                            c = '\b';
                            break;
                        case 'e':
                            c =   27;
                            break;
                        case 'f':
                            c = '\f';
                            break;
                        case 'n':
                            c = '\n';
                            break;
                        case 'r':
                            c = '\r';
                            break;
                        case 't':
                            c = '\t';
                            break;
                        case 'v':
                            c = '\v';
                            break;
                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                            c -= '0';
                            while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
                            {
                                c = c * 8 + *p++ - '0';
                            }
                            break;
                        case 'x':
                            /* Ordinary \x */
                            c = 0;
                            while (i++ < 2 && isxdigit(*p))
                            {
                                c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
                                p++;
                            }
                            break;
                        case 0:   /* \ followed by EOF allows for an empty line */
                            p--;
                            continue;
                        case '>':
                            while (isdigit(*p))
                            {
                                start_offset = start_offset * 10 + *p++ - '0';
                            }
                            continue;
                        case 'A':  /* Option setting */
                            options |= PCRE_ANCHORED;
                            continue;
                        case 'B':
                            options |= PCRE_NOTBOL;
                            continue;
                        case 'C':
                            if (isdigit(*p))    /* Set copy string */
                            {
                                while (isdigit(*p))
                                {
                                    n = n * 10 + *p++ - '0';
                                }
                                copystrings |= 1 << n;
                            }
                            else if (isalnum(*p))
                            {
                                char *npp = copynamesptr;
                                while (isalnum(*p))
                                {
                                    *npp++ = *p++;
                                }
                                *npp++ = 0;
                                *npp = 0;
                                pcre_get_stringnumber(re, (char *)copynamesptr);
                                copynamesptr = npp;
                            }
                            else if (*p == '+')
                            {
                                p++;
                            }
                            else if (*p == '-')
                            {
                                p++;
                            }
                            else if (*p == '!')
                            {
                                callout_fail_id = 0;
                                p++;
                                while (isdigit(*p))
                                {
                                    callout_fail_id = callout_fail_id * 10 + *p++ - '0';
                                }
                                callout_fail_count = 0;
                                if (*p == '!')
                                {
                                    p++;
                                    while (isdigit(*p))
                                    {
                                        callout_fail_count = callout_fail_count * 10 + *p++ - '0';
                                    }
                                }
                            }
                            else if (*p == '*')
                            {
                                int sign = 1;
                                callout_data = 0;
                                if (*(++p) == '-')
                                {
                                    sign = -1;
                                    p++;
                                }
                                while (isdigit(*p))
                                {
                                    callout_data = callout_data * 10 + *p++ - '0';
                                }
                                callout_data *= sign;
                                callout_data_set = 1;
                            }
                            continue;
                        case 'G':
                            if (isdigit(*p))
                            {
                                while (isdigit(*p))
                                {
                                    n = n * 10 + *p++ - '0';
                                }
                                getstrings |= 1 << n;
                            }
                            else if (isalnum(*p))
                            {
                                char *npp = getnamesptr;
                                while (isalnum(*p))
                                {
                                    *npp++ = *p++;
                                }
                                *npp++ = 0;
                                *npp = 0;
                                pcre_get_stringnumber(re, (char *)getnamesptr);
                                getnamesptr = npp;
                            }
                            continue;
                        case 'L':
                            continue;
                        case 'M':
                            find_match_limit = 1;
                            continue;
                        case 'N':
                            options |= PCRE_NOTEMPTY;
                            continue;
                        case 'O':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (n > size_offsets_max)
                            {
                                size_offsets_max = n;
                                if (offsets)
                                {
                                    FREE(offsets);
                                }
                                use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
                            }
                            use_size_offsets = n;
                            if (n == 0)
                            {
                                use_offsets = NULL;    /* Ensures it can't write to it */
                            }
                            continue;
                        case 'P':
                            options |= PCRE_PARTIAL;
                            continue;
                        case 'Q':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (extra == NULL)
                            {
                                extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                                extra->flags = 0;
                            }
                            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
                            extra->match_limit_recursion = n;
                            continue;
                        case 'q':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (extra == NULL)
                            {
                                extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                                extra->flags = 0;
                            }
                            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
                            extra->match_limit = n;
                            continue;
#if !defined NODFA
                        case 'R':
                            options |= PCRE_DFA_RESTART;
                            continue;
#endif
                        case 'S':

                            continue;
                        case 'Z':
                            options |= PCRE_NOTEOL;
                            continue;
                        case '?':
                            options |= PCRE_NO_UTF8_CHECK;
                            continue;
                        case '<':
                        {
                            while (*p++ != '>')
                            {
                                ;
                            }
                        }
                        continue;
                    }
                *q++ = (char)c;
            }
            *q = 0;
            len = (int)(q - buffer);
            if ((all_use_dfa || use_dfa) && find_match_limit)
            {
                if (buffer)
                {
                    FREE(buffer);
                    buffer = NULL;
                }
                if (offsets)
                {
                    FREE(offsets);
                    offsets = NULL;
                }
                if (p)
                {
                    FREE(p);
                    p = NULL;
                }
                if (re)
                {
                    (*pcre_free)(re);
                    re = NULL;
                }
                if (tables)
                {
                    (*pcre_free)((void*)tables);
                    tables = NULL;
                }
                if (extra)
                {
                    FREE(extra);
                    extra = NULL;
                }
                return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
            }
            /* Handle matching via the POSIX interface, which does not
            support timing or playing with the match limit or callout data. */
            for (;; gmatched++)    /* Loop for /g or /G */
            {

                /* If find_match_limit is set, we want to do repeated matches with
                varying limits in order to find the minimum value for the match limit and
                for the recursion limit. */

                if (find_match_limit)
                {
                    if (extra == NULL)
                    {
                        extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                        extra->flags = 0;
                    }

                    (void)check_match_limit(re, extra, bptr, len, start_offset,
                                            options | g_notempty, use_offsets, use_size_offsets,
                                            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
                                            PCRE_ERROR_MATCHLIMIT);

                    count = check_match_limit(re, extra, bptr, len, start_offset,
                                              options | g_notempty, use_offsets, use_size_offsets,
                                              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
                                              PCRE_ERROR_RECURSIONLIMIT);
                }
                /* If callout_data is set, use the interface with additional data */
                else if (callout_data_set)
                {
                    if (extra == NULL)
                    {
                        extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                        extra->flags = 0;
                    }
                    extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
                    extra->callout_data = &callout_data;
                    count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
                                      options | g_notempty, use_offsets, use_size_offsets);

                    extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
                }
                /* The normal case is just to do the match once, with the default
                value of match_limit. */
                else
                {
                    count = pcre_exec(re, extra, (char *)bptr, len,
                                      start_offset, options | g_notempty, use_offsets, use_size_offsets);
                    if (count == 0)
                    {
                        count = use_size_offsets / 3;
                    }

                    //to retrieve backref count and values
                    if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
                    {
                        int i = 0;
                        int iErr = 0;

                        iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
                        //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);

                        if (*_piCapturedStringCount > 0)
                        {
                            *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
                            for (i = 0 ; i < *_piCapturedStringCount ; i++)
                            {
                                char* pstSubstring = NULL;
                                pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
                                if (pstSubstring != NULL)
                                {
                                    (*_pstCapturedString)[i] = strdup(pstSubstring);
                                }
                                pcre_free_substring(pstSubstring);
                            }
                        }
                    }
                }
                /* Matched */
                if (count >= 0)
                {
                    int i, maxcount;
                    maxcount = use_size_offsets / 3;
                    /* This is a check against a lunatic return value. */
                    if (count > maxcount)
                    {
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                            offsets = NULL;
                        }
                        if (re)
                        {
                            (*pcre_free)(re);
                            re = NULL;
                        }
                        if (tables)
                        {
                            (*pcre_free)((void*)tables);
                            tables = NULL;
                        }
                        if (extra)
                        {
                            FREE(extra);
                            extra = NULL;
                        }
                        if (back_p)
                        {
                            FREE(back_p);
                            back_p = NULL;
                        }
                        return TOO_BIG_FOR_OFFSET_SIZE;
                    }

                    for (i = 0; i < count * 2; i += 2)
                    {
                        if (use_offsets[i] >= 0)
                        {
                            *Output_Start = use_offsets[i];
                            *Output_End = use_offsets[i + 1];
                            if (buffer)
                            {
                                FREE(buffer);
                            }

                            /* use_offsets = offsets no need to free use_offsets if we free offsets */
                            if (offsets)
                            {
                                FREE(offsets);
                            }

                            /* "re" allocated by pcre_compile (better to use free function associated)*/
                            if (re)
                            {
                                (*pcre_free)(re);
                            }

                            if (extra)
                            {
                                FREE(extra);
                            }
                            if (tables)
                            {
                                /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
                                (*pcre_free)((void *)tables);
                                tables = NULL;
                                setlocale(LC_CTYPE, "C");
                            }

                            if (back_p)
                            {
                                FREE(back_p);
                                back_p = NULL;
                            }
                            return PCRE_FINISHED_OK;
                        }
                    }

                    for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
                    {
                        char copybuffer[256];
                        pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
                    }

                    for (i = 0; i < 32; i++)
                    {
                        if ((getstrings & (1 << i)) != 0)
                        {
                            const char *substring;
                            pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
                        }
                    }

                    for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
                    {
                        const char *substring;
                        pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
                    }

                }
                /* Failed to match. If this is a /g or /G loop and we previously set
                g_notempty after a null match, this is not necessarily the end. We want
                to advance the start offset, and continue. We won't be at the end of the
                string - that was checked before setting g_notempty.
                Complication arises in the case when the newline option is "any" or
                "anycrlf". If the previous match was at the end of a line terminated by
                CRLF, an advance of one character just passes the \r, whereas we should
                prefer the longer newline sequence, as does the code in pcre_exec().
                Fudge the offset value to achieve this.

                Otherwise, in the case of UTF-8 matching, the advance must be one
                character, not one byte. */
                else
                {
                    if (count == PCRE_ERROR_NOMATCH)
                    {
                        if (gmatched == 0)
                        {
                            if (tables)
                            {
                                (*pcre_free)((void *)tables);
                                tables = NULL;
                            }
                            if (re)
                            {
                                (*pcre_free)((void *)re);
                                re = NULL;
                            }
                            if (buffer)
                            {
                                FREE(buffer);
                                buffer = NULL;
                            }
                            if (offsets)
                            {
                                FREE(offsets);
                            }
                            if (p)
                            {
                                FREE(back_p);
                                back_p = NULL;
                            }
                            return NO_MATCH;
                        }
                    }

                    if (count == PCRE_ERROR_MATCHLIMIT )
                    {
                        if (tables)
                        {
                            (*pcre_free)((void *)tables);
                            tables = NULL;
                        }
                        if (re)
                        {
                            (*pcre_free)((void *)re);
                            re = NULL;
                        }
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                            offsets = NULL;
                        }
                        if (back_p)
                        {
                            FREE(back_p);
                            back_p = NULL;
                        }
                        return MATCH_LIMIT;
                    }
                    break;  /* Out of loop */
                }

                /* If not /g or /G we are done */
                if (!do_g && !do_G)
                {
                    break;
                }

                /* If we have matched an empty string, first check to see if we are at
                the end of the subject. If so, the /g loop is over. Otherwise, mimic
                what Perl's /g options does. This turns out to be rather cunning. First
                we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
                same point. If this fails (picked up above) we advance to the next
                character. */

                g_notempty = 0;

                if (use_offsets[0] == use_offsets[1])
                {
                    if (use_offsets[0] == len)
                    {
                        break;
                    }
                    g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
                }

                /* For /g, update the start offset, leaving the rest alone */

                if (do_g)
                {
                    start_offset = use_offsets[1];
                }
                /* For /G, update the pointer and length */
                else
                {
                    bptr += use_offsets[1];
                    len -= use_offsets[1];
                }
            }  /* End of loop for /g and /G */

            if (re)
            {
                (*pcre_free)(re);
                re = NULL;
            }
            if (extra)
            {
                FREE(extra);
                extra = NULL;
            }
            if (tables)
            {
                (*pcre_free)((void *)tables);
                tables = NULL;
            }

            FREE(back_p);
            back_p = NULL;
            continue;
        }    /* End of loop for data lines */
    }

    if (buffer)
    {
        FREE(buffer);
        buffer = NULL;
    }
    if (offsets)
    {
        FREE(offsets);
        offsets = NULL;
    }

    return PCRE_EXIT;
}
Ejemplo n.º 3
0
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
{
	char *output = text;

	/**
	 * Retrieve all matches and store them in 
	 * mSubStrings list.
	 */
	if (MatchAll(output) == -1)
	{
		return -1;
	}

	size_t subjectLen = strlen(subject);
	size_t total = 0;
	size_t baseIndex = 0;
	size_t diffLength = 0;

	char *toReplace = new char[textMaxLen + 1];
	char *toSearch = NULL;

	/**
	 * All characters which is not matched are not copied when replacing matches.
	 * Then original text (output buffer) should be considerated as empty.
	 */
	if (flags & REGEX_FORMAT_NOCOPY)
	{
		*output = '\0';
	}
	else
	{
		/**
		 * This is used only when we do replace matches.
		 */
		toSearch  = new char[textMaxLen + 1];
	}

	/** 
	 * Loop over all matches found.
	 */
	for (size_t i = 0; i < mMatchesSubs.length(); ++i)
	{
		char *ptr = toReplace;

		size_t browsed = 0;
		size_t searchLen = 0;
		size_t length = 0;
	
		/**
		 * Build the replace string as it can contain backreference
		 * and this needs to be parsed.
		 */
		for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
		{
			unsigned int c = *s;

			/**
			 * Supported format specifiers:
			 *
			 *   $number  : Substitutes the substring matched by group number.
			 *              n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
			 *   ${name}  : Substitutes the substring matched by the named group name (a maximum of 32 characters).
			 *   $&       : Substitutes a copy of the whole match.
			 *   $`       : Substitutes all the text of the input string before the match.
			 *   $'       : Substitutes all the text of the input string after the match.
			 *   $+       : Substitutes the last group that was captured.
			 *   $_       : Substitutes the entire input string.
			 *   $$       : Substitutes a literal "$".
			 */
			if (c == '$' || c == '\\')
			{
				switch (*++s)
				{
					case '\0':
					{
						/**
						 * End of string.
						 * Copy one character.
						 */
						 *(ptr + browsed) = c;
						 break;
					}
					case '&':
					{
						/**
						 * Concatenate retrieved full match sub-string.
						 * length - 1 to overwrite EOS.
						 */
						GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
						browsed += length - 1;
						break;
					}
					case '`':
					{
						/**
						 * Concatenate part of original text up to
						 * first sub-string position.
						 */
						length = mSubStrings.at(baseIndex).start;
						memcpy(ptr + browsed, subject, length);
						browsed += length - 1;
						break;
					}
					case '\'':
					{
						/**
						 * Concatenate part of original text from
						 * last sub-string end position to EOS.
						 */
						length = mSubStrings.at(baseIndex).end;
						memcpy(ptr + browsed, subject + length, subjectLen - length);
						browsed += (subjectLen - length) - 1;
						break;
					}
					case '+':
					{
						/**
						 * Copy the last group that was captured.
						 */
						GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
						browsed += length - 1;
						break;
					}
					case '_':
					{
						/**
						 * Copy the entire input string.
						 */
						memcpy(ptr + browsed, subject, subjectLen);
						browsed += (subjectLen - 1);
						break;
					}
					case '$':
					case '\\':
					{
						/**
						 * Copy the single character $ or \.
						 */
						*(ptr + browsed) = c;
						break;
					}
					case '0': case '1':	case '2': case '3':	case '4': 
					case '5': case '6': case '7': case '8': case '9':
					case '{':
					{
						/**
						 * Checking backreference.
						 * Which can be either $n, ${n} or ${name}.
						 */
						int backref = -1;
						const char *walk = s;
						bool inBrace = false;
						bool nameCheck = false;

						/**
						 * ${nn}.
						 *  ^
						 */
						if (*walk == '{') 
						{
							inBrace = true;
							++walk;
						}

						/**
						 * Valid number.
						 * $nn or ${nn}
						 *  ^       ^
						 */
						if (*walk >= '0' && *walk <= '9')
						{
							backref = *walk - '0';
							++walk;
						}
						else if (inBrace)
						{
							nameCheck = true;

							/**
							 * Not a valid number.
							 * Checking as string.
							 * ${name}
							 *   ^
							 */
							if (*walk)
							{
								const char *pch = strchr(walk, '}');

								if (pch != NULL)
								{
									/**
									 * A named group maximum character is 32 (PCRE).
									 */
									char name[32];
									size_t nameLength = strncopy(name, walk, pch - walk + 1);

									int flags, num = 0;
									pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);

									/**
									 * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
									 * as pcre_get_stringnumber output order is not defined.
									 */
									if (flags & PCRE_DUPNAMES)
									{
										memset(ovector, 0, REGEX_MAX_SUBPATTERNS);

										/**
										 * pcre_copy_named_substring needs a vector containing sub-patterns ranges
										 * for a given match.
										 */
										for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
										{
											ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
											ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
										}

										num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);

										if (num != PCRE_ERROR_NOSUBSTRING)
										{
											browsed += num - 1;
											s = pch;
											break;
										}
										++pch;
									}
									else
									{
										/**
										 * Retrieve sub-pattern index from a give name.
										 */
										num = pcre_get_stringnumber(re, name);
										if (num != PCRE_ERROR_NOSUBSTRING)
										{
											backref = num;
											walk = ++pch;
										}
									}

									if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
									{
										/**
										 * If a sub-string for a given match is not found,  or if > to
										 * number of sub-patterns we still need to check if this 
										 * group name is a valid one because if so we want to escape it. 
										 * Looking at the name table.
										 */
										bool found = false;
										for (size_t i = 0; i < mSubsNameTable.length(); ++i)
										{
											if (!mSubsNameTable.at(i).name.compare(name))
											{
												--browsed;
												s = --pch;
												found = true;
												break;
											}
										}

										if (found)
										{
											continue;
										}
									}
								}
							}
						}

						if (!nameCheck)
						{
							/**
							 * Valid second number.
							 * $nn or ${nn}
							 *   ^       ^
							 */
							if (*walk && *walk >= '0' && *walk <= '9')
							{
								backref = backref * 10 + *walk - '0';
								++walk;
							}

							if (inBrace)
							{
								/**
								 * Invalid specifier
								 * Either hit EOS or missing }.
								 * ${n  or ${nn  or ${nx or ${nnx
								 *    ^        ^       ^        ^
								 */
								if (*walk == '\0' || *walk != '}')
								{
									backref = -1;
								}
								else
								{
									++walk;
								}
							}
						}

						length = walk - s;
						s = --walk;

						/**
						 * We can't provide a capture number >= to total that pcre_exec has found.
						 * 0 is implicitly accepted, same behavior as $&.
						 */
						if (backref >= 0 && backref < mNumSubpatterns)
						{
							/**
							 * Valid available index for a given match.
							 */
							if ((size_t)backref < mMatchesSubs.at(i))
							{
								/**
								 * Concatenate retrieved sub-string.
								 * length - 1 to overwrite EOS.
								 */
								GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
								browsed += length - 1;
							}
							else
							{
								/**
								 * Valid unavailable index for a given match.
								 */
								--browsed;
							}
						}
						else
						{
							/**
							 * If we here it means the syntax is valid but sub-pattern doesn't exist. 
							 * So, copy as it is, including $.
							 */
							memcpy(ptr + browsed, s - length, length + 1);
							browsed += length;
						}

						break;
					}
					default:
					{
						/**
						 * Not a valid format modifier.
						 * So we copy characters as it is.
						 */
						*(ptr + browsed) = *s;
						break;
					}
				}
			}
			else
			{
				/**
				 * At this point, direct copy.
				 */
				*(ptr + browsed) = c;
			}
		}

		*(ptr + browsed) = '\0';

		/**
		 * Concatenate only replace string of each match, 
		 * as we don't want to copy unmatched characters.
		 */
		if (flags & REGEX_FORMAT_NOCOPY)
		{
			/**
			 * We want just the first occurrence.
			 */
			if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
			{
				break;
			}

			strncat(output, toReplace, textMaxLen + 1);
		}
		else
		{
			/**
			 * Retrieves full string of a given match.
			 */
			const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);

			/**
			 * We get something to replace, but the sub-pattern to search is empty.
			 * We insert replacement either a the start end or string.
			 */
			if (*toReplace && !searchLen)
			{
				if (output - text > 0)
				{
					strncat(output, toReplace, textMaxLen);
				}
				else
				{
					strncat(toReplace, text, textMaxLen);
					strncopy(text, toReplace, strlen(toReplace) + 1);
				}

				++total;
			}
			else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
			{
				/**
				 * Then we simply do a replace.
				 * Probably not the most efficient, but this should be at least safe.
				 * To avoid issue where the function could find a string which is not at the expected index,
				 * We force the input string to start from index of the full match.
				 */
				++total;
			}

			if (total && (flags & REGEX_FORMAT_FIRSTONLY))
			{
				break;
			}
		}

		/**
		 * mMatchesSubs is a flat list containing all sub-patterns of all matches.
		 * A number of sub-patterns can vary per match. So we calculate the position in the list, 
		 * from where the first sub-pattern result of current match starts.
		 */
		baseIndex  += mMatchesSubs.at(i);
		diffLength += browsed - searchLen;
	}

	delete[] toReplace;
	
	if (toSearch != NULL)
	{
		delete[] toSearch;
	}

	/**
	 * Return the number of successful replacements.
	 */
	return total;
}