示例#1
0
void grok_init(grok_t *grok) {
  //int ret;
  /* set global pcre_callout for libpcre */
  pcre_callout = grok_pcre_callout;

  grok->re = NULL;
  grok->pattern = NULL;
  grok->full_pattern = NULL;
  grok->pcre_capture_vector = NULL;
  grok->pcre_num_captures = 0;
  grok->max_capture_num = 0;
  grok->pcre_errptr = NULL;
  grok->pcre_erroffset = 0;
  grok->logmask = 0;
  grok->logdepth = 0;

#ifndef GROK_TEST_NO_PATTERNS
  grok->patterns = tctreenew();
#endif /* GROK_TEST_NO_PATTERNS */

#ifndef GROK_TEST_NO_CAPTURE
  grok->captures_by_id = tctreenew();
  grok->captures_by_name = tctreenew();
  grok->captures_by_subname = tctreenew();
  grok->captures_by_capture_number = tctreenew();
#endif /* GROK_TEST_NO_CAPTURE */

  if (g_grok_global_initialized == 0) {
    /* do first initalization */
    g_grok_global_initialized = 1;

    /* VALGRIND NOTE: Valgrind complains here, but this is a global variable.
     * Ignore valgrind here. */
    g_pattern_re = pcre_compile(PATTERN_REGEX, 0,
                                &grok->pcre_errptr,
                                &grok->pcre_erroffset,
                                NULL);
    if (g_pattern_re == NULL) {
      fprintf(stderr, "Internal compiler error: %s\n", grok->pcre_errptr);
      fprintf(stderr, "Regexp: %s\n", PATTERN_REGEX);
      fprintf(stderr, "Position: %d\n", grok->pcre_erroffset);
    }

    pcre_fullinfo(g_pattern_re, NULL, PCRE_INFO_CAPTURECOUNT,
                  &g_pattern_num_captures);
    g_pattern_num_captures++; /* include the 0th group */
    g_cap_name = pcre_get_stringnumber(g_pattern_re, "name");
    g_cap_pattern = pcre_get_stringnumber(g_pattern_re, "pattern");
    g_cap_subname = pcre_get_stringnumber(g_pattern_re, "subname");
    g_cap_predicate = pcre_get_stringnumber(g_pattern_re, "predicate");
  }
}
示例#2
0
文件: pcre_get.c 项目: 3rdpaw/MdCharm
static int
get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
#endif
{
const REAL_PCRE *re = (const REAL_PCRE *)code;
int entrysize;
pcre_uchar *entry;
#ifdef COMPILE_PCRE8
char *first, *last;
#else
PCRE_UCHAR16 *first, *last;
#endif

#ifdef COMPILE_PCRE8
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
  return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
#else
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
  return pcre16_get_stringnumber(code, stringname);
entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last);
#endif
if (entrysize <= 0) return entrysize;
for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
  {
  int n = GET2(entry, 0);
  if (ovector[n*2] >= 0) return n;
  }
return GET2(entry, 0);
}
示例#3
0
/*
 * Ported from get_first_set() in pcre_get.c in pcre source.
 */
static int matchres_first_set(cs_matchres_t *mr, const char *group_name) {
  cs_regexp_t *regexp = mr->regexp;
  pcre *re = regexp->re;
  pcre_extra *extra = regexp->extra;
  unsigned long options;
  int jchanged;
  pcre_fullinfo(re, extra, PCRE_INFO_OPTIONS, &options);
  pcre_fullinfo(re, extra, PCRE_INFO_JCHANGED, &jchanged);
  if (options & PCRE_DUPNAMES || jchanged) {
    char *first;
    char *last;
    uchar *entry;
    int entry_len = pcre_get_stringtable_entries(re, group_name, &first, &last);
    if (entry_len < 0) {
      return entry_len;
    }
    for (entry = (uchar *)first; entry <= (uchar *)last; entry += entry_len) {
      int n = entry[0] << 8 | entry[1];
      if (mr->ovector[n * 2] >= 0) {
        return n;
      }
    }
    return entry[0] << 8 | entry[1];
  } else {
    return pcre_get_stringnumber(re, group_name);
  }
}
示例#4
0
文件: pcre_get.c 项目: semenovf/cwt
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
{
	const real_pcre *re = (const real_pcre *)code;
	int entrysize;
	char *first, *last;
	uschar *entry;

	if( (re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0 )
		return pcre_get_stringnumber(code, stringname);

	entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);

	if( entrysize <= 0 )
		return entrysize;

	for( entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize ) {
		int n = (entry[0] << 8) + entry[1];

		if (ovector[n*2] >= 0)
			return n;
	}

	return (first[0] << 8) + first[1];
}
示例#5
0
int
pcre_get_named_substring(const pcre *code, const pcre_char *subject, int *ovector,
  int stringcount, const pcre_char *stringname, const pcre_char **stringptr)
{
int n = pcre_get_stringnumber(code, stringname);
if (n <= 0) return n;
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
}
示例#6
0
int
pcre_copy_named_substring(const pcre *code, const pcre_char *subject, int *ovector,
  int stringcount, const pcre_char *stringname, pcre_char *buffer, int size)
{
int n = pcre_get_stringnumber(code, stringname);
if (n <= 0) return n;
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
}
示例#7
0
static void match_internal(match_context_t *context) {
    size_t j;

    for (j = 0; j < context->state->patterns.used; j++) {
        full_pcre_t *regex;
        int options = PCRE_NO_UTF8_CHECK;

        /* If the regex member == NULL, this highlight is either a pointer to
           another state which we should search here ("use"), or it is an end
           pattern with a dynamic back reference. */
        if (context->state->patterns.data[j].regex.regex == NULL) {
            if (context->state->patterns.data[j].next_state >= 0) {
                state_t *save_state;
                save_state = context->state;
                context->state = &context->match->highlight->states.data[context->state->patterns.data[j].next_state];
                match_internal(context);
                context->state = save_state;
                continue;
            }
            regex = &context->match->mapping.data[context->match->state].dynamic->regex;
        } else {
            regex = &context->state->patterns.data[j].regex;
            /* For items that do not change state, we do not want an empty match
               ever (makes no progress). */
            if (context->state->patterns.data[j].next_state == NO_CHANGE)
                options |= PCRE_NOTEMPTY;
            /* The default behaviour is to not allow start patterns to be empty, such
               that progress will be guaranteed. */
            else if (context->state->patterns.data[j].next_state > NO_CHANGE &&
                     !(context->match->highlight->flags & T3_HIGHLIGHT_ALLOW_EMPTY_START))
                options |= PCRE_NOTEMPTY;
        }

        if (pcre_exec(regex->regex, regex->extra,
                      context->line, context->size, context->match->match_start, options, context->ovector,
                      sizeof(context->ovector) / sizeof(context->ovector[0])) >= 0 && context->ovector[1] > context->best_end)
        {
            context->best = &context->state->patterns.data[j];
            context->best_end = context->ovector[1];
            if (context->best->extra != NULL && context->best->extra->dynamic_name != NULL) {
                int string_number = pcre_get_stringnumber(context->best->regex.regex, context->best->extra->dynamic_name);
                if (string_number == PCRE_ERROR_NOSUBSTRING || string_number > 10) {
                    context->extract_start = 0;
                    context->extract_end = 0;
                } else {
                    context->extract_start = context->ovector[string_number * 2];
                    context->extract_end = context->ovector[string_number * 2 + 1];
                }
            }

        }
    }

}
示例#8
0
static int
cap_index(sl_vm_t* vm, SLVAL regexp_match, SLVAL i)
{
    sl_regexp_match_t* match = get_regexp_match(vm, regexp_match);
    int index;
    if(sl_is_a(vm, i, vm->lib.String)) {
        char* named_cap = sl_to_cstr(vm, i);
        index = pcre_get_stringnumber(match->re->re, named_cap);
        if(index < 0) {
            return -1;
        }
    } else {
        index = sl_get_int(sl_expect(vm, i, vm->lib.Int));
    }
    if(index < 0 || index >= match->capture_count) {
        return -1;
    }
    return index * 2;
}
示例#9
0
pcre_error_code pcre_private(char *INPUT_LINE, char *INPUT_PAT, int *Output_Start, int *Output_End, char*** _pstCapturedString, int* _piCapturedStringCount)
{
    /* ALL strings are managed as UTF-8 by default */
    int options = PCRE_UTF8;
    int size_offsets = 45;
    int size_offsets_max;
    int *offsets = NULL;
    int all_use_dfa = 0;
    BOOL LOOP_PCRE_TST = FALSE;

    /* These vectors store, end-to-end, a list of captured substring names. Assume
    that 1024 is plenty long enough for the few names we'll be testing. */

    char copynames[1024];
    char getnames[1024];

    char *copynamesptr = NULL;
    char *getnamesptr = NULL;

    int rc = 0;
    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
    if (rc != 1)
    {
        return UTF8_NOT_SUPPORTED;
    }

    /* bug 3891 */
    /* backslash characters are not interpreted for input */
    buffer = strsub(INPUT_LINE, "\\", "\\\\");

    size_offsets_max = size_offsets;
    offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
    if (offsets == NULL)
    {
        if (buffer)
        {
            FREE(buffer);
            buffer = NULL;
        }
        return NOT_ENOUGH_MEMORY_FOR_VECTOR;
    }
    /* Main loop */
    LOOP_PCRE_TST = FALSE;
    while (!LOOP_PCRE_TST)
    {
        pcre *re = NULL;
        pcre_extra *extra = NULL;
        const char *error = NULL;
        char *back_p = NULL;
        char *p = NULL;
        char *pp = NULL;
        char *ppp = NULL;
        const unsigned char *tables = NULL;
        int do_G = 0;
        int do_g = 0;
        int erroroffset = 0, len = 0, delimiter;

        LOOP_PCRE_TST = TRUE;
        p = strdup(INPUT_PAT);
        back_p = p;
        while (isspace(*p))
        {
            p++;
        }
        if (*p == 0)
        {
            continue;
        }
        /* In-line pattern (the usual case). Get the delimiter and seek the end of
        the pattern; if is isn't complete, read more. */

        delimiter = *p++;

        if (isalnum(delimiter) || delimiter == '\\')
        {
            if (buffer)
            {
                FREE(buffer);
                buffer = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
                offsets = NULL;
            }
            if (back_p)
            {
                FREE(back_p);
                back_p = NULL;
            }
            return DELIMITER_NOT_ALPHANUMERIC;
        }

        pp = p;

        while (*pp != 0)
        {
            if (*pp == '\\' && pp[1] != 0)
            {
                pp++;
            }
            else if (*pp == delimiter)
            {
                break;
            }
            pp++;
        }

        /* If the delimiter can't be found, it's a syntax error */
        if (*pp == 0)
        {
            if (buffer)
            {
                FREE(buffer);
                buffer = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
                offsets = NULL;
            }
            if (back_p)
            {
                FREE(back_p);
                back_p = NULL;
            }
            if (offsets)
            {
                FREE(offsets);
            }
            return CAN_NOT_COMPILE_PATTERN;
        }

        /* If the first character after the delimiter is backslash, make
        the pattern end with backslash. This is purely to provide a way
        of testing for the error message when a pattern ends with backslash. */

        if (pp[1] == '\\')
        {
            *pp++ = '\\';
        }

        /* Terminate the pattern at the delimiter, and save a copy of the pattern
        for callouts. */

        *pp++ = 0;

        /* Look for options after final delimiter */

        //options = 8192;

        while (*pp != 0)
        {
            switch (*pp++)
            {
                case 'f':
                    options |= PCRE_FIRSTLINE;
                    break;
                case 'g':
                    do_g = 1;
                    break;
                case 'i':
                    options |= PCRE_CASELESS;
                    break;
                case 'm':
                    options |= PCRE_MULTILINE;
                    break;
                case 's':
                    options |= PCRE_DOTALL;
                    break;
                case 'x':
                    options |= PCRE_EXTENDED;
                    break;
                case '+':
                    break;
                case 'A':
                    options |= PCRE_ANCHORED;
                    break;
                case 'B':
                    break;
                case 'C':
                    options |= PCRE_AUTO_CALLOUT;
                    break;
                case 'D':
                    break;
                case 'E':
                    options |= PCRE_DOLLAR_ENDONLY;
                    break;
                case 'F':
                    break;
                case 'G':
                    do_G = 1;
                    break;
                case 'I':
                    break;
                case 'J':
                    options |= PCRE_DUPNAMES;
                    break;
                case 'M':
                    break;
                case 'N':
                    options |= PCRE_NO_AUTO_CAPTURE;
                    break;
                case 'S':
                    break;
                case 'U':
                    options |= PCRE_UNGREEDY;
                    break;
                case 'X':
                    options |= PCRE_EXTRA;
                    break;
                case 'Z':
                    break;
                case '8':
                {
                    int rc = 0;
                    (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
                    if (rc != 1)
                    {
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                        }
                        return UTF8_NOT_SUPPORTED;
                    }
                    options |= PCRE_UTF8;
                }
                break;
                case '?':
                    options |= PCRE_NO_UTF8_CHECK;
                    break;
                case 'L':
                    ppp = pp;
                    /* The '\r' test here is so that it works on Windows. */
                    /* The '0' test is just in case this is an unterminated line. */
                    while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ')
                    {
                        ppp++;
                    }
                    *ppp = 0;
                    if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
                    {
                        goto SKIP_DATA;
                    }

                    tables = pcre_maketables();
                    pp = ppp;
                    break;
                case '>':
                    while (*pp != 0)
                    {
                        pp++;
                    }
                    while (isspace(pp[-1]))
                    {
                        pp--;
                    }
                    *pp = 0;
                    break;
                case '<':
                {
                    while (*pp++ != '>')
                    {
                        ;
                    }
                }
                break;
                case '\r':                      /* So that it works in Windows */
                case '\n':
                case ' ':
                    break;

                default:
                    goto SKIP_DATA;
            }
        }

        /* Handle compiling via the POSIX interface, which doesn't support the
        timing, showing, or debugging options, nor the ability to pass over
        local character tables. */


        {
            re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
            /* Compilation failed; go back for another re, skipping to blank line
            if non-interactive. */
            if (re == NULL)
            {
SKIP_DATA:
                if (buffer)
                {
                    FREE(buffer);
                    buffer = NULL;
                }
                if (offsets)
                {
                    FREE(offsets);
                    offsets = NULL;
                }
                if (tables)
                {
                    (*pcre_free)((void*)tables);
                    tables = NULL;
                }
                if (extra)
                {
                    FREE(extra);
                    extra = NULL;
                }
                if (back_p)
                {
                    FREE(back_p);
                    back_p = NULL;
                }
                return CAN_NOT_COMPILE_PATTERN;
            }

        }        /* End of non-POSIX compile */

        /* Read data lines and test them */
        {
            char *q = NULL;
            char *bptr = NULL;
            int *use_offsets = offsets;
            int use_size_offsets = size_offsets;
            int callout_data = 0;
            int callout_data_set = 0;
            int count = 0;
            int c = 0;
            int copystrings = 0;
            int find_match_limit = 0;
            int getstrings = 0;
            int gmatched = 0;
            int start_offset = 0;
            int g_notempty = 0;
            int use_dfa = 0;

            options = 0;
            *copynames = 0;
            *getnames = 0;

            copynamesptr = copynames;
            getnamesptr = getnames;

            callout_count = 0;
            callout_fail_count = 999999;
            callout_fail_id = -1;

            if (extra != NULL)
            {
                extra->flags &= ~(PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
            }
            p = buffer;
            bptr = q = buffer;
            while ((c = *p++) != 0)
            {
                int i = 0;
                int n = 0;

                if (c == '\\') switch ((c = *p++))
                    {
                        case 'a':
                            c =    7;
                            break;
                        case 'b':
                            c = '\b';
                            break;
                        case 'e':
                            c =   27;
                            break;
                        case 'f':
                            c = '\f';
                            break;
                        case 'n':
                            c = '\n';
                            break;
                        case 'r':
                            c = '\r';
                            break;
                        case 't':
                            c = '\t';
                            break;
                        case 'v':
                            c = '\v';
                            break;
                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                            c -= '0';
                            while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
                            {
                                c = c * 8 + *p++ - '0';
                            }
                            break;
                        case 'x':
                            /* Ordinary \x */
                            c = 0;
                            while (i++ < 2 && isxdigit(*p))
                            {
                                c = c * 16 + tolower(*p) - ((isdigit(*p)) ? '0' : 'W');
                                p++;
                            }
                            break;
                        case 0:   /* \ followed by EOF allows for an empty line */
                            p--;
                            continue;
                        case '>':
                            while (isdigit(*p))
                            {
                                start_offset = start_offset * 10 + *p++ - '0';
                            }
                            continue;
                        case 'A':  /* Option setting */
                            options |= PCRE_ANCHORED;
                            continue;
                        case 'B':
                            options |= PCRE_NOTBOL;
                            continue;
                        case 'C':
                            if (isdigit(*p))    /* Set copy string */
                            {
                                while (isdigit(*p))
                                {
                                    n = n * 10 + *p++ - '0';
                                }
                                copystrings |= 1 << n;
                            }
                            else if (isalnum(*p))
                            {
                                char *npp = copynamesptr;
                                while (isalnum(*p))
                                {
                                    *npp++ = *p++;
                                }
                                *npp++ = 0;
                                *npp = 0;
                                pcre_get_stringnumber(re, (char *)copynamesptr);
                                copynamesptr = npp;
                            }
                            else if (*p == '+')
                            {
                                p++;
                            }
                            else if (*p == '-')
                            {
                                p++;
                            }
                            else if (*p == '!')
                            {
                                callout_fail_id = 0;
                                p++;
                                while (isdigit(*p))
                                {
                                    callout_fail_id = callout_fail_id * 10 + *p++ - '0';
                                }
                                callout_fail_count = 0;
                                if (*p == '!')
                                {
                                    p++;
                                    while (isdigit(*p))
                                    {
                                        callout_fail_count = callout_fail_count * 10 + *p++ - '0';
                                    }
                                }
                            }
                            else if (*p == '*')
                            {
                                int sign = 1;
                                callout_data = 0;
                                if (*(++p) == '-')
                                {
                                    sign = -1;
                                    p++;
                                }
                                while (isdigit(*p))
                                {
                                    callout_data = callout_data * 10 + *p++ - '0';
                                }
                                callout_data *= sign;
                                callout_data_set = 1;
                            }
                            continue;
                        case 'G':
                            if (isdigit(*p))
                            {
                                while (isdigit(*p))
                                {
                                    n = n * 10 + *p++ - '0';
                                }
                                getstrings |= 1 << n;
                            }
                            else if (isalnum(*p))
                            {
                                char *npp = getnamesptr;
                                while (isalnum(*p))
                                {
                                    *npp++ = *p++;
                                }
                                *npp++ = 0;
                                *npp = 0;
                                pcre_get_stringnumber(re, (char *)getnamesptr);
                                getnamesptr = npp;
                            }
                            continue;
                        case 'L':
                            continue;
                        case 'M':
                            find_match_limit = 1;
                            continue;
                        case 'N':
                            options |= PCRE_NOTEMPTY;
                            continue;
                        case 'O':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (n > size_offsets_max)
                            {
                                size_offsets_max = n;
                                if (offsets)
                                {
                                    FREE(offsets);
                                }
                                use_offsets = offsets = (int *)MALLOC(size_offsets_max * sizeof(int));
                            }
                            use_size_offsets = n;
                            if (n == 0)
                            {
                                use_offsets = NULL;    /* Ensures it can't write to it */
                            }
                            continue;
                        case 'P':
                            options |= PCRE_PARTIAL;
                            continue;
                        case 'Q':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (extra == NULL)
                            {
                                extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                                extra->flags = 0;
                            }
                            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
                            extra->match_limit_recursion = n;
                            continue;
                        case 'q':
                            while (isdigit(*p))
                            {
                                n = n * 10 + *p++ - '0';
                            }
                            if (extra == NULL)
                            {
                                extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                                extra->flags = 0;
                            }
                            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
                            extra->match_limit = n;
                            continue;
#if !defined NODFA
                        case 'R':
                            options |= PCRE_DFA_RESTART;
                            continue;
#endif
                        case 'S':

                            continue;
                        case 'Z':
                            options |= PCRE_NOTEOL;
                            continue;
                        case '?':
                            options |= PCRE_NO_UTF8_CHECK;
                            continue;
                        case '<':
                        {
                            while (*p++ != '>')
                            {
                                ;
                            }
                        }
                        continue;
                    }
                *q++ = (char)c;
            }
            *q = 0;
            len = (int)(q - buffer);
            if ((all_use_dfa || use_dfa) && find_match_limit)
            {
                if (buffer)
                {
                    FREE(buffer);
                    buffer = NULL;
                }
                if (offsets)
                {
                    FREE(offsets);
                    offsets = NULL;
                }
                if (p)
                {
                    FREE(p);
                    p = NULL;
                }
                if (re)
                {
                    (*pcre_free)(re);
                    re = NULL;
                }
                if (tables)
                {
                    (*pcre_free)((void*)tables);
                    tables = NULL;
                }
                if (extra)
                {
                    FREE(extra);
                    extra = NULL;
                }
                return LIMIT_NOT_RELEVANT_FOR_DFA_MATCHING;
            }
            /* Handle matching via the POSIX interface, which does not
            support timing or playing with the match limit or callout data. */
            for (;; gmatched++)    /* Loop for /g or /G */
            {

                /* If find_match_limit is set, we want to do repeated matches with
                varying limits in order to find the minimum value for the match limit and
                for the recursion limit. */

                if (find_match_limit)
                {
                    if (extra == NULL)
                    {
                        extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                        extra->flags = 0;
                    }

                    (void)check_match_limit(re, extra, bptr, len, start_offset,
                                            options | g_notempty, use_offsets, use_size_offsets,
                                            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
                                            PCRE_ERROR_MATCHLIMIT);

                    count = check_match_limit(re, extra, bptr, len, start_offset,
                                              options | g_notempty, use_offsets, use_size_offsets,
                                              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
                                              PCRE_ERROR_RECURSIONLIMIT);
                }
                /* If callout_data is set, use the interface with additional data */
                else if (callout_data_set)
                {
                    if (extra == NULL)
                    {
                        extra = (pcre_extra *)MALLOC(sizeof(pcre_extra));
                        extra->flags = 0;
                    }
                    extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
                    extra->callout_data = &callout_data;
                    count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
                                      options | g_notempty, use_offsets, use_size_offsets);

                    extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
                }
                /* The normal case is just to do the match once, with the default
                value of match_limit. */
                else
                {
                    count = pcre_exec(re, extra, (char *)bptr, len,
                                      start_offset, options | g_notempty, use_offsets, use_size_offsets);
                    if (count == 0)
                    {
                        count = use_size_offsets / 3;
                    }

                    //to retrieve backref count and values
                    if (count > 0 && _pstCapturedString != NULL && _piCapturedStringCount != NULL)
                    {
                        int i = 0;
                        int iErr = 0;

                        iErr = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, _piCapturedStringCount);
                        //sciprint("PCRE_INFO_CAPTURECOUNT %d\n", *_piCapturedStringCount);

                        if (*_piCapturedStringCount > 0)
                        {
                            *_pstCapturedString = (char**)MALLOC(sizeof(char*) * *_piCapturedStringCount);
                            for (i = 0 ; i < *_piCapturedStringCount ; i++)
                            {
                                char* pstSubstring = NULL;
                                pcre_get_substring(bptr, use_offsets, count, i + 1, &pstSubstring);
                                if (pstSubstring != NULL)
                                {
                                    (*_pstCapturedString)[i] = strdup(pstSubstring);
                                }
                                pcre_free_substring(pstSubstring);
                            }
                        }
                    }
                }
                /* Matched */
                if (count >= 0)
                {
                    int i, maxcount;
                    maxcount = use_size_offsets / 3;
                    /* This is a check against a lunatic return value. */
                    if (count > maxcount)
                    {
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                            offsets = NULL;
                        }
                        if (re)
                        {
                            (*pcre_free)(re);
                            re = NULL;
                        }
                        if (tables)
                        {
                            (*pcre_free)((void*)tables);
                            tables = NULL;
                        }
                        if (extra)
                        {
                            FREE(extra);
                            extra = NULL;
                        }
                        if (back_p)
                        {
                            FREE(back_p);
                            back_p = NULL;
                        }
                        return TOO_BIG_FOR_OFFSET_SIZE;
                    }

                    for (i = 0; i < count * 2; i += 2)
                    {
                        if (use_offsets[i] >= 0)
                        {
                            *Output_Start = use_offsets[i];
                            *Output_End = use_offsets[i + 1];
                            if (buffer)
                            {
                                FREE(buffer);
                            }

                            /* use_offsets = offsets no need to free use_offsets if we free offsets */
                            if (offsets)
                            {
                                FREE(offsets);
                            }

                            /* "re" allocated by pcre_compile (better to use free function associated)*/
                            if (re)
                            {
                                (*pcre_free)(re);
                            }

                            if (extra)
                            {
                                FREE(extra);
                            }
                            if (tables)
                            {
                                /* "tables" allocated by pcre_maketables (better to use free function associated to pcre)*/
                                (*pcre_free)((void *)tables);
                                tables = NULL;
                                setlocale(LC_CTYPE, "C");
                            }

                            if (back_p)
                            {
                                FREE(back_p);
                                back_p = NULL;
                            }
                            return PCRE_FINISHED_OK;
                        }
                    }

                    for (copynamesptr = copynames; *copynamesptr != 0; copynamesptr += (int)strlen((char*)copynamesptr) + 1)
                    {
                        char copybuffer[256];
                        pcre_copy_named_substring(re, (char *)bptr, use_offsets, count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
                    }

                    for (i = 0; i < 32; i++)
                    {
                        if ((getstrings & (1 << i)) != 0)
                        {
                            const char *substring;
                            pcre_get_substring((char *)bptr, use_offsets, count, i, &substring);
                        }
                    }

                    for (getnamesptr = getnames; *getnamesptr != 0; getnamesptr += (int)strlen((char*)getnamesptr) + 1)
                    {
                        const char *substring;
                        pcre_get_named_substring(re, (char *)bptr, use_offsets, count, (char *)getnamesptr, &substring);
                    }

                }
                /* Failed to match. If this is a /g or /G loop and we previously set
                g_notempty after a null match, this is not necessarily the end. We want
                to advance the start offset, and continue. We won't be at the end of the
                string - that was checked before setting g_notempty.
                Complication arises in the case when the newline option is "any" or
                "anycrlf". If the previous match was at the end of a line terminated by
                CRLF, an advance of one character just passes the \r, whereas we should
                prefer the longer newline sequence, as does the code in pcre_exec().
                Fudge the offset value to achieve this.

                Otherwise, in the case of UTF-8 matching, the advance must be one
                character, not one byte. */
                else
                {
                    if (count == PCRE_ERROR_NOMATCH)
                    {
                        if (gmatched == 0)
                        {
                            if (tables)
                            {
                                (*pcre_free)((void *)tables);
                                tables = NULL;
                            }
                            if (re)
                            {
                                (*pcre_free)((void *)re);
                                re = NULL;
                            }
                            if (buffer)
                            {
                                FREE(buffer);
                                buffer = NULL;
                            }
                            if (offsets)
                            {
                                FREE(offsets);
                            }
                            if (p)
                            {
                                FREE(back_p);
                                back_p = NULL;
                            }
                            return NO_MATCH;
                        }
                    }

                    if (count == PCRE_ERROR_MATCHLIMIT )
                    {
                        if (tables)
                        {
                            (*pcre_free)((void *)tables);
                            tables = NULL;
                        }
                        if (re)
                        {
                            (*pcre_free)((void *)re);
                            re = NULL;
                        }
                        if (buffer)
                        {
                            FREE(buffer);
                            buffer = NULL;
                        }
                        if (offsets)
                        {
                            FREE(offsets);
                            offsets = NULL;
                        }
                        if (back_p)
                        {
                            FREE(back_p);
                            back_p = NULL;
                        }
                        return MATCH_LIMIT;
                    }
                    break;  /* Out of loop */
                }

                /* If not /g or /G we are done */
                if (!do_g && !do_G)
                {
                    break;
                }

                /* If we have matched an empty string, first check to see if we are at
                the end of the subject. If so, the /g loop is over. Otherwise, mimic
                what Perl's /g options does. This turns out to be rather cunning. First
                we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
                same point. If this fails (picked up above) we advance to the next
                character. */

                g_notempty = 0;

                if (use_offsets[0] == use_offsets[1])
                {
                    if (use_offsets[0] == len)
                    {
                        break;
                    }
                    g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
                }

                /* For /g, update the start offset, leaving the rest alone */

                if (do_g)
                {
                    start_offset = use_offsets[1];
                }
                /* For /G, update the pointer and length */
                else
                {
                    bptr += use_offsets[1];
                    len -= use_offsets[1];
                }
            }  /* End of loop for /g and /G */

            if (re)
            {
                (*pcre_free)(re);
                re = NULL;
            }
            if (extra)
            {
                FREE(extra);
                extra = NULL;
            }
            if (tables)
            {
                (*pcre_free)((void *)tables);
                tables = NULL;
            }

            FREE(back_p);
            back_p = NULL;
            continue;
        }    /* End of loop for data lines */
    }

    if (buffer)
    {
        FREE(buffer);
        buffer = NULL;
    }
    if (offsets)
    {
        FREE(offsets);
        offsets = NULL;
    }

    return PCRE_EXIT;
}
示例#10
0
CAMLprim value pcre_exec_stub0(
    intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
    value v_ovec, value v_maybe_cof, value v_workspace)
{
  int ret;
  int is_dfa = v_workspace != (value) NULL;
  long
    pos = v_pos,
    len = caml_string_length(v_subj),
    subj_start = v_subj_start;
  long ovec_len = Wosize_val(v_ovec);

  if (pos > len || pos < subj_start)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal position");

  if (subj_start > len || subj_start < 0)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start");

  pos -= subj_start;
  len -= subj_start;

  {
    const pcre *code = get_rex(v_rex);  /* Compiled pattern */
    const pcre_extra *extra = get_extra(v_rex);  /* Extra info */
    const char *ocaml_subj =
      String_val(v_subj) + subj_start;  /* Subject string */
    const int opt = v_opt;  /* Runtime options */

    /* Special case when no callout functions specified */
    if (v_maybe_cof == None) {
      int *ovec = (int *) &Field(v_ovec, 0);

      /* Performs the match */
      if (is_dfa)
        ret =
          pcre_dfa_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len,
              (int *) &Field(v_workspace, 0), Wosize_val(v_workspace));
      else
        ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len);

      if (ret < 0) handle_exec_error("pcre_exec_stub", ret);
      else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
    }

    /* There are callout functions */
    else {
      value v_cof = Field(v_maybe_cof, 0);
      value v_substrings;
      char *subj = caml_stat_alloc(sizeof(char) * len);
      int *ovec = caml_stat_alloc(sizeof(int) * ovec_len);
      int workspace_len;
      int *workspace;
      struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
      struct pcre_extra new_extra =
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
# ifdef PCRE_EXTRA_MARK
#  ifdef PCRE_EXTRA_EXECUTABLE_JIT
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL };
#  else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL };
#  endif
# else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
# endif
#else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
#endif

      cod.subj_start = subj_start;
      memcpy(subj, ocaml_subj, len);

      Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
        Begin_roots1(v_subj);
          v_substrings = caml_alloc_small(2, 0);
        End_roots();

        Field(v_substrings, 0) = v_subj;
        Field(v_substrings, 1) = v_ovec;

        cod.v_substrings_p = &v_substrings;
        cod.v_cof_p = &v_cof;
        new_extra.callout_data = &cod;

        if (extra != NULL) {
          new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
          new_extra.study_data = extra->study_data;
          new_extra.match_limit = extra->match_limit;
          new_extra.tables = extra->tables;
#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
          new_extra.match_limit_recursion = extra->match_limit_recursion;
#endif
        }

        if (is_dfa) {
          workspace_len = Wosize_val(v_workspace);
          workspace = caml_stat_alloc(sizeof(int) * workspace_len);
          ret =
            pcre_dfa_exec(code, extra, subj, len, pos, opt, ovec, ovec_len,
                (int *) &Field(v_workspace, 0), workspace_len);
        } else
          ret =
            pcre_exec(code, &new_extra, subj, len, pos, opt, ovec, ovec_len);

        caml_stat_free(subj);
      End_roots();

      if (ret < 0) {
        if (is_dfa) caml_stat_free(workspace);
        caml_stat_free(ovec);
        if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn);
        else handle_exec_error("pcre_exec_stub(callout)", ret);
      } else {
        handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
        if (is_dfa) {
          caml_int_ptr ocaml_workspace_dst =
            (caml_int_ptr) &Field(v_workspace, 0);
          const int *workspace_src = workspace;
          const int *workspace_src_stop = workspace + workspace_len;
          while (workspace_src != workspace_src_stop) {
            *ocaml_workspace_dst = *workspace_src;
            ocaml_workspace_dst++;
            workspace_src++;
          }
          caml_stat_free(workspace);
        }
        caml_stat_free(ovec);
      }
    }
  }

  return Val_unit;
}

CAMLprim value pcre_exec_stub(
    intnat v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
    value v_ovec, value v_maybe_cof)
{
  return pcre_exec_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj,
                         v_ovec, v_maybe_cof, (value) NULL);
}

/* Byte-code hook for pcre_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
{
  return
    pcre_exec_stub0(
        Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
        argv[4], argv[5], argv[6], (value) NULL);
}

/* Byte-code hook for pcre_dfa_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_dfa_exec_stub_bc(value *argv, int __unused argn)
{
  return
    pcre_exec_stub0(
        Int_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
        argv[4], argv[5], argv[6], argv[7]);
}

static struct custom_operations tables_ops = {
  "pcre_ocaml_tables",
  pcre_dealloc_tables,
  custom_compare_default,
  custom_hash_default,
  custom_serialize_default,
  custom_deserialize_default,
  custom_compare_ext_default
};

/* Generates a new set of chartables for the current locale (see man
   page of PCRE */
CAMLprim value pcre_maketables_stub(value __unused v_unit)
{
  /* GC will do a full cycle every 1_000_000 table set allocations (one
     table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
     table sets) */
  const value v_tables =
    caml_alloc_custom(
        &tables_ops, sizeof(struct pcre_ocaml_tables), 1, 1000000);
  set_tables(v_tables, pcre_maketables());
  return v_tables;
}

/* Wraps around the isspace-function */
CAMLprim value pcre_isspace_stub(value v_c)
{
  return Val_bool(isspace(Int_val(v_c)));
}


/* Returns number of substring associated with a name */

CAMLprim intnat pcre_get_stringnumber_stub(value v_rex, value v_name)
{
  const int ret = pcre_get_stringnumber(get_rex(v_rex), String_val(v_name));
  if (ret == PCRE_ERROR_NOSUBSTRING)
    caml_invalid_argument("Named string not found");

  return ret;
}

CAMLprim value pcre_get_stringnumber_stub_bc(value v_rex, value v_name)
{
  return Val_int(pcre_get_stringnumber_stub(v_rex, v_name));
}


/* Returns array of names of named substrings in a regexp */
CAMLprim value pcre_names_stub(value v_rex)
{
  CAMLparam0();
  CAMLlocal1(v_res);
  int name_count;
  int entry_size;
  const char *tbl_ptr;
  int i;

  int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  if (ret != 0) raise_internal_error("pcre_names_stub: namecount");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  if (ret != 0) raise_internal_error("pcre_names_stub: nametable");

  v_res = caml_alloc(name_count, 0);

  for (i = 0; i < name_count; ++i) {
    value v_name = caml_copy_string(tbl_ptr + 2);
    Store_field(v_res, i, v_name);
    tbl_ptr += entry_size;
  }

  CAMLreturn(v_res);
}

/* Generic stub for getting integer results from pcre_config */
static inline int pcre_config_int(int what)
{
  int ret;
  pcre_config(what, (void *) &ret);
  return ret;
}

/* Generic stub for getting long integer results from pcre_config */
static inline int pcre_config_long(int what)
{
  long ret;
  pcre_config(what, (void *) &ret);
  return ret;
}
示例#11
0
void Pattern::end (const std::string& groupName) const
{
    int index = pcre_get_stringnumber (_re, groupName.c_str());
    end (index);
}
示例#12
0
int RegEx::Replace(char *text, size_t textMaxLen, const char *replace, size_t replaceLen, int flags)
{
	char *output = text;

	/**
	 * Retrieve all matches and store them in 
	 * mSubStrings list.
	 */
	if (MatchAll(output) == -1)
	{
		return -1;
	}

	size_t subjectLen = strlen(subject);
	size_t total = 0;
	size_t baseIndex = 0;
	size_t diffLength = 0;

	char *toReplace = new char[textMaxLen + 1];
	char *toSearch = NULL;

	/**
	 * All characters which is not matched are not copied when replacing matches.
	 * Then original text (output buffer) should be considerated as empty.
	 */
	if (flags & REGEX_FORMAT_NOCOPY)
	{
		*output = '\0';
	}
	else
	{
		/**
		 * This is used only when we do replace matches.
		 */
		toSearch  = new char[textMaxLen + 1];
	}

	/** 
	 * Loop over all matches found.
	 */
	for (size_t i = 0; i < mMatchesSubs.length(); ++i)
	{
		char *ptr = toReplace;

		size_t browsed = 0;
		size_t searchLen = 0;
		size_t length = 0;
	
		/**
		 * Build the replace string as it can contain backreference
		 * and this needs to be parsed.
		 */
		for (const char *s = replace, *end = s + replaceLen; s < end && browsed <= textMaxLen; ++s, ++browsed)
		{
			unsigned int c = *s;

			/**
			 * Supported format specifiers:
			 *
			 *   $number  : Substitutes the substring matched by group number.
			 *              n must be an integer value designating a valid backreference, greater than 0, and of two digits at most.
			 *   ${name}  : Substitutes the substring matched by the named group name (a maximum of 32 characters).
			 *   $&       : Substitutes a copy of the whole match.
			 *   $`       : Substitutes all the text of the input string before the match.
			 *   $'       : Substitutes all the text of the input string after the match.
			 *   $+       : Substitutes the last group that was captured.
			 *   $_       : Substitutes the entire input string.
			 *   $$       : Substitutes a literal "$".
			 */
			if (c == '$' || c == '\\')
			{
				switch (*++s)
				{
					case '\0':
					{
						/**
						 * End of string.
						 * Copy one character.
						 */
						 *(ptr + browsed) = c;
						 break;
					}
					case '&':
					{
						/**
						 * Concatenate retrieved full match sub-string.
						 * length - 1 to overwrite EOS.
						 */
						GetSubstring(baseIndex, ptr + browsed, textMaxLen, &length);
						browsed += length - 1;
						break;
					}
					case '`':
					{
						/**
						 * Concatenate part of original text up to
						 * first sub-string position.
						 */
						length = mSubStrings.at(baseIndex).start;
						memcpy(ptr + browsed, subject, length);
						browsed += length - 1;
						break;
					}
					case '\'':
					{
						/**
						 * Concatenate part of original text from
						 * last sub-string end position to EOS.
						 */
						length = mSubStrings.at(baseIndex).end;
						memcpy(ptr + browsed, subject + length, subjectLen - length);
						browsed += (subjectLen - length) - 1;
						break;
					}
					case '+':
					{
						/**
						 * Copy the last group that was captured.
						 */
						GetSubstring(baseIndex + mMatchesSubs.at(i) - 1, ptr + browsed, textMaxLen, &length);
						browsed += length - 1;
						break;
					}
					case '_':
					{
						/**
						 * Copy the entire input string.
						 */
						memcpy(ptr + browsed, subject, subjectLen);
						browsed += (subjectLen - 1);
						break;
					}
					case '$':
					case '\\':
					{
						/**
						 * Copy the single character $ or \.
						 */
						*(ptr + browsed) = c;
						break;
					}
					case '0': case '1':	case '2': case '3':	case '4': 
					case '5': case '6': case '7': case '8': case '9':
					case '{':
					{
						/**
						 * Checking backreference.
						 * Which can be either $n, ${n} or ${name}.
						 */
						int backref = -1;
						const char *walk = s;
						bool inBrace = false;
						bool nameCheck = false;

						/**
						 * ${nn}.
						 *  ^
						 */
						if (*walk == '{') 
						{
							inBrace = true;
							++walk;
						}

						/**
						 * Valid number.
						 * $nn or ${nn}
						 *  ^       ^
						 */
						if (*walk >= '0' && *walk <= '9')
						{
							backref = *walk - '0';
							++walk;
						}
						else if (inBrace)
						{
							nameCheck = true;

							/**
							 * Not a valid number.
							 * Checking as string.
							 * ${name}
							 *   ^
							 */
							if (*walk)
							{
								const char *pch = strchr(walk, '}');

								if (pch != NULL)
								{
									/**
									 * A named group maximum character is 32 (PCRE).
									 */
									char name[32];
									size_t nameLength = strncopy(name, walk, pch - walk + 1);

									int flags, num = 0;
									pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &flags);

									/**
									 * If PCRE_DUPNAMES is set, the pcre_copy_named_substring function should be used
									 * as pcre_get_stringnumber output order is not defined.
									 */
									if (flags & PCRE_DUPNAMES)
									{
										memset(ovector, 0, REGEX_MAX_SUBPATTERNS);

										/**
										 * pcre_copy_named_substring needs a vector containing sub-patterns ranges
										 * for a given match.
										 */
										for (size_t j = 0; j < mMatchesSubs.at(i); ++j)
										{
											ovector[2 * j] = mSubStrings.at(baseIndex + j).start;
											ovector[2 * j + 1] = mSubStrings.at(baseIndex + j).end;
										}

										num = pcre_copy_named_substring(re, subject, ovector, mMatchesSubs.at(i), name, ptr + browsed, (int)textMaxLen);

										if (num != PCRE_ERROR_NOSUBSTRING)
										{
											browsed += num - 1;
											s = pch;
											break;
										}
										++pch;
									}
									else
									{
										/**
										 * Retrieve sub-pattern index from a give name.
										 */
										num = pcre_get_stringnumber(re, name);
										if (num != PCRE_ERROR_NOSUBSTRING)
										{
											backref = num;
											walk = ++pch;
										}
									}

									if (num == PCRE_ERROR_NOSUBSTRING || num >= (int)mMatchesSubs.at(i))
									{
										/**
										 * If a sub-string for a given match is not found,  or if > to
										 * number of sub-patterns we still need to check if this 
										 * group name is a valid one because if so we want to escape it. 
										 * Looking at the name table.
										 */
										bool found = false;
										for (size_t i = 0; i < mSubsNameTable.length(); ++i)
										{
											if (!mSubsNameTable.at(i).name.compare(name))
											{
												--browsed;
												s = --pch;
												found = true;
												break;
											}
										}

										if (found)
										{
											continue;
										}
									}
								}
							}
						}

						if (!nameCheck)
						{
							/**
							 * Valid second number.
							 * $nn or ${nn}
							 *   ^       ^
							 */
							if (*walk && *walk >= '0' && *walk <= '9')
							{
								backref = backref * 10 + *walk - '0';
								++walk;
							}

							if (inBrace)
							{
								/**
								 * Invalid specifier
								 * Either hit EOS or missing }.
								 * ${n  or ${nn  or ${nx or ${nnx
								 *    ^        ^       ^        ^
								 */
								if (*walk == '\0' || *walk != '}')
								{
									backref = -1;
								}
								else
								{
									++walk;
								}
							}
						}

						length = walk - s;
						s = --walk;

						/**
						 * We can't provide a capture number >= to total that pcre_exec has found.
						 * 0 is implicitly accepted, same behavior as $&.
						 */
						if (backref >= 0 && backref < mNumSubpatterns)
						{
							/**
							 * Valid available index for a given match.
							 */
							if ((size_t)backref < mMatchesSubs.at(i))
							{
								/**
								 * Concatenate retrieved sub-string.
								 * length - 1 to overwrite EOS.
								 */
								GetSubstring(baseIndex + backref, ptr + browsed, textMaxLen, &length);
								browsed += length - 1;
							}
							else
							{
								/**
								 * Valid unavailable index for a given match.
								 */
								--browsed;
							}
						}
						else
						{
							/**
							 * If we here it means the syntax is valid but sub-pattern doesn't exist. 
							 * So, copy as it is, including $.
							 */
							memcpy(ptr + browsed, s - length, length + 1);
							browsed += length;
						}

						break;
					}
					default:
					{
						/**
						 * Not a valid format modifier.
						 * So we copy characters as it is.
						 */
						*(ptr + browsed) = *s;
						break;
					}
				}
			}
			else
			{
				/**
				 * At this point, direct copy.
				 */
				*(ptr + browsed) = c;
			}
		}

		*(ptr + browsed) = '\0';

		/**
		 * Concatenate only replace string of each match, 
		 * as we don't want to copy unmatched characters.
		 */
		if (flags & REGEX_FORMAT_NOCOPY)
		{
			/**
			 * We want just the first occurrence.
			 */
			if (total++ && (flags & REGEX_FORMAT_FIRSTONLY))
			{
				break;
			}

			strncat(output, toReplace, textMaxLen + 1);
		}
		else
		{
			/**
			 * Retrieves full string of a given match.
			 */
			const char *search = GetSubstring(baseIndex, toSearch, textMaxLen, &searchLen);

			/**
			 * We get something to replace, but the sub-pattern to search is empty.
			 * We insert replacement either a the start end or string.
			 */
			if (*toReplace && !searchLen)
			{
				if (output - text > 0)
				{
					strncat(output, toReplace, textMaxLen);
				}
				else
				{
					strncat(toReplace, text, textMaxLen);
					strncopy(text, toReplace, strlen(toReplace) + 1);
				}

				++total;
			}
			else if ((output = UTIL_ReplaceEx(text + mSubStrings.at(baseIndex).start + diffLength, textMaxLen, search, searchLen, toReplace, browsed, false)) != NULL)
			{
				/**
				 * Then we simply do a replace.
				 * Probably not the most efficient, but this should be at least safe.
				 * To avoid issue where the function could find a string which is not at the expected index,
				 * We force the input string to start from index of the full match.
				 */
				++total;
			}

			if (total && (flags & REGEX_FORMAT_FIRSTONLY))
			{
				break;
			}
		}

		/**
		 * mMatchesSubs is a flat list containing all sub-patterns of all matches.
		 * A number of sub-patterns can vary per match. So we calculate the position in the list, 
		 * from where the first sub-pattern result of current match starts.
		 */
		baseIndex  += mMatchesSubs.at(i);
		diffLength += browsed - searchLen;
	}

	delete[] toReplace;
	
	if (toSearch != NULL)
	{
		delete[] toSearch;
	}

	/**
	 * Return the number of successful replacements.
	 */
	return total;
}
示例#13
0
	// named subpatterns
	int get_stringnumber(const char *stringname) const throw(std::exception) {
		_ASSERTE(compiled());
		check_stringname(stringname);
		return pcre_get_stringnumber(_Code::get(), stringname);
	}
示例#14
0
int pcre2_substring_number_from_name_8(const pcre2_code_8 *code, PCRE2_SPTR8 name) {
  return pcre_get_stringnumber(code->regex, (const char *)name);
}
示例#15
0
/* Executes a pattern match with runtime options, a regular expression, a
   string offset, a string length, a subject string, a number of subgroup
   offsets, an offset vector and an optional callout function */
CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_ofs,
                              value v_subj, value v_subgroups2, value v_ovec,
                              value v_maybe_cof)
{
  const int ofs = Int_val(v_ofs), len = caml_string_length(v_subj);

  if (ofs > len || ofs < 0)
    caml_invalid_argument("Pcre.pcre_exec_stub: illegal offset");

  {
    const pcre *code = (pcre *) Field(v_rex, 1);  /* Compiled pattern */
    const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);  /* Extra info */
    const char *ocaml_subj = String_val(v_subj);  /* Subject string */
    const int opt = Int_val(v_opt);  /* Runtime options */
    int subgroups2 = Int_val(v_subgroups2);
    const int subgroups2_1 = subgroups2 - 1;
    const int subgroups3 = (subgroups2 >> 1) + subgroups2;

    /* Special case when no callout functions specified */
    if (v_maybe_cof == None) {
      int *ovec = (int *) &Field(v_ovec, 0);

      /* Performs the match */
      const int ret =
        pcre_exec(code, extra, ocaml_subj, len, ofs, opt, ovec, subgroups3);

      if (ret < 0) {
        switch(ret) {
          case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found);
          case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial);
          case PCRE_ERROR_MATCHLIMIT :
            caml_raise_constant(*pcre_exc_MatchLimit);
          case PCRE_ERROR_BADPARTIAL :
            caml_raise_constant(*pcre_exc_BadPartial);
          case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8);
          case PCRE_ERROR_BADUTF8_OFFSET :
            caml_raise_constant(*pcre_exc_BadUTF8Offset);
          default :
            caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub");
        }
      }

      else {
        const int *ovec_src = ovec + subgroups2_1;
        long int *ovec_dst = (long int *) ovec + subgroups2_1;

        /* Converts offsets from C-integers to OCaml-Integers
           This is a bit tricky, because there are 32- and 64-bit platforms
           around and OCaml chooses the larger possibility for representing
           integers when available (also in arrays) - not so the PCRE */
        while (subgroups2--) {
          *ovec_dst = Val_int(*ovec_src);
          --ovec_src; --ovec_dst;
        }
      }
    }

    /* There are callout functions */
    else {
      value v_cof = Field(v_maybe_cof, 0);
      value v_substrings;
      char *subj = caml_stat_alloc(sizeof(char) * len);
      int *ovec = caml_stat_alloc(sizeof(int) * subgroups3);
      int ret;
      struct cod cod = { (value *) NULL, (value *) NULL, (value) NULL };
      struct pcre_extra new_extra =
#ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
#else
        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
#endif

      memcpy(subj, ocaml_subj, len);

      Begin_roots3(v_rex, v_cof, v_substrings);
        Begin_roots2(v_subj, v_ovec);
          v_substrings = caml_alloc_small(2, 0);
        End_roots();

        Field(v_substrings, 0) = v_subj;
        Field(v_substrings, 1) = v_ovec;

        cod.v_substrings_p = &v_substrings;
        cod.v_cof_p = &v_cof;
        new_extra.callout_data = &cod;

        if (extra == NULL) {
          ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec,
                          subgroups3);
        }
        else {
          new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
          new_extra.study_data = extra->study_data;
          new_extra.match_limit = extra->match_limit;
          new_extra.tables = extra->tables;
#ifdef PCRE_CONFIG_MATCH_LIMIT_RECURSION
          new_extra.match_limit_recursion = extra->match_limit_recursion;
#endif

          ret = pcre_exec(code, &new_extra, subj, len, ofs, opt, ovec,
                          subgroups3);
        }

        free(subj);
      End_roots();

      if (ret < 0) {
        free(ovec);
        switch(ret) {
          case PCRE_ERROR_NOMATCH : caml_raise_constant(*pcre_exc_Not_found);
          case PCRE_ERROR_PARTIAL : caml_raise_constant(*pcre_exc_Partial);
          case PCRE_ERROR_MATCHLIMIT :
            caml_raise_constant(*pcre_exc_MatchLimit);
          case PCRE_ERROR_BADPARTIAL :
            caml_raise_constant(*pcre_exc_BadPartial);
          case PCRE_ERROR_BADUTF8 : caml_raise_constant(*pcre_exc_BadUTF8);
          case PCRE_ERROR_BADUTF8_OFFSET :
            caml_raise_constant(*pcre_exc_BadUTF8Offset);
          case PCRE_ERROR_CALLOUT : caml_raise(cod.v_exn);
          default :
            caml_raise_with_string(*pcre_exc_InternalError, "pcre_exec_stub");
        }
      }

      else {
        int *ovec_src = ovec + subgroups2_1;
        long int *ovec_dst = &Field(v_ovec, 0) + subgroups2_1;

        while (subgroups2--) {
          *ovec_dst = Val_int(*ovec_src);
          --ovec_src; --ovec_dst;
        }

        free(ovec);
      }
    }
  }

  return Val_unit;
}

/* Byte-code hook for pcre_exec_stub
   Needed, because there are more than 5 arguments */
CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
{
  return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3],
                        argv[4], argv[5], argv[6]);
}

/* Generates a new set of chartables for the current locale (see man
   page of PCRE */
CAMLprim value pcre_maketables_stub(value __unused v_unit)
{
  /* GC will do a full cycle every 100 table set allocations
     (one table set consumes 864 bytes -> maximum of 86400 bytes
     unreclaimed table sets) */
  const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 864, 86400);
  Field(v_res, 1) = (value) pcre_maketables();
  return v_res;
}

/* Wraps around the isspace-function */
CAMLprim value pcre_isspace_stub(value v_c)
{
  return Val_bool(isspace(Int_val(v_c)));
}

/* Returns number of substring associated with a name */
CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name)
{
  const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1),
                                        String_val(v_name));
  if (ret == PCRE_ERROR_NOSUBSTRING)
    caml_invalid_argument("Named string not found");

  return Val_int(ret);
}

/* Returns array of names of named substrings in a regexp */
CAMLprim value pcre_names_stub(value v_rex)
{
  CAMLparam0();
  CAMLlocal1(v_res);
  int name_count;
  int entry_size;
  const char *tbl_ptr;
  int i;

  int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  if (ret != 0)
    caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  if (ret != 0)
    caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub");

  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  if (ret != 0)
    caml_raise_with_string(*pcre_exc_InternalError, "pcre_names_stub");

  v_res = caml_alloc(name_count, 0);

  for (i = 0; i < name_count; ++i) {
    value v_name = caml_copy_string(tbl_ptr + 2);
    Store_field(v_res, i, v_name);
    tbl_ptr += entry_size;
  }

  CAMLreturn(v_res);
}
示例#16
0
int cPCRE::GeStringNumber(const string &substring)
{
	return pcre_get_stringnumber(this->mPattern, substring.c_str());
}