Esempio n. 1
0
int
pcre_get_stringnumber(const pcre *code, const pcre_char *stringname)
{
/* FIXME: This doesn't work for UTF-16 because the name table has 8-bit characters in it! */
#if !PCRE_UTF16
int rc;
int entrysize;
int top, bot;
uschar *nametable;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;

bot = 0;
while (top > bot)
  {
  int mid = (top + bot) / 2;
  uschar *entry = nametable + entrysize*mid;
  int c = strcmp(stringname, (char *)(entry + 2));
  if (c == 0) return (entry[0] << 8) + entry[1];
  if (c > 0) bot = mid + 1; else top = mid;
  }
#else
 UNUSED_PARAM(code);
 UNUSED_PARAM(stringname);
#endif

return PCRE_ERROR_NOSUBSTRING;
}
Esempio n. 2
0
static void
log_matcher_pcre_re_feed_named_substrings(LogMatcher *s, LogMessage *msg, int *matches, const gchar *value)
{
   gchar *name_table = NULL;
   gint i = 0;
   gint namecount = 0;
   gint name_entry_size = 0;
   LogMatcherPcreRe *self = (LogMatcherPcreRe *) s;

   pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMECOUNT, &namecount);  
   if (namecount > 0) 
     { 
       gchar *tabptr;
       /* Before we can access the substrings, we must extract the table for
          translating names to numbers, and the size of each entry in the table. 
        */
       pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMETABLE, &name_table);       
       pcre_fullinfo(self->pattern, self->extra, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
       /* Now we can scan the table and, for each entry, print the number, the name,
          and the substring itself. 
        */
       tabptr = name_table;
       for (i = 0; i < namecount; i++)
         {
           int n = (tabptr[0] << 8) | tabptr[1];
           log_msg_set_value_by_name(msg, tabptr + 2, value + matches[2*n], matches[2*n+1] - matches[2*n]);
           tabptr += name_entry_size;
         }
     }  
}
Esempio n. 3
0
static void grok_study_capture_map(grok_t *grok) {
  char *nametable;
  grok_capture *gct;
  int nametable_size;
  int nametable_entrysize;
  int i = 0;
  int offset = 0;
  int stringnum;
  int capture_id;

  pcre_fullinfo(grok->re, NULL, PCRE_INFO_NAMECOUNT, &nametable_size);
  pcre_fullinfo(grok->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nametable_entrysize);
  pcre_fullinfo(grok->re, NULL, PCRE_INFO_NAMETABLE, &nametable);

  for (i = 0; i < nametable_size; i++) {
    offset = i * nametable_entrysize;
    stringnum = (nametable[offset] << 8) + nametable[offset + 1];
    sscanf(nametable + offset + 2, CAPTURE_FORMAT, &capture_id);
    grok_log(grok, LOG_COMPILE, "Studying capture %d", capture_id);
    gct = (grok_capture *)grok_capture_get_by_id(grok, capture_id);
    assert(gct != NULL);
    gct->pcre_capture_number = stringnum;

    /* update the database with the new data */
    grok_capture_add(grok, gct);
  }
}
Esempio n. 4
0
static int pcre_regexec(KonohaContext *kctx, kregexp_t *reg, const char *str, size_t nmatch, kregmatch_t p[], int eflags)
{
	PCRE_regexp_t *preg = (PCRE_regexp_t *)reg;
	int res, nvector[nmatch*3];
	nvector[0] = 0;
	size_t idx, matched = nmatch;
	if(strlen(str) == 0) return -1;
	if((res = pcre_exec(preg->re, NULL, str, strlen(str), 0, eflags, nvector, nmatch*3)) >= 0) {
		size_t nm_count = 0;
		matched = (res > 0 && (size_t)res < nmatch) ? (size_t)res : nmatch;
		res = 0;
		for (idx = 0; idx < matched; idx++) {
			p[idx].rm_so = nvector[2*idx];
			p[idx].rm_eo = nvector[2*idx+1];
		}
		p[idx].rm_so = -1;
		pcre_fullinfo(preg->re, NULL, PCRE_INFO_NAMECOUNT, &nm_count);
		if(nm_count > 0) {
			unsigned char *nm_table;
			int nm_entry_size = 0;
			pcre_fullinfo(preg->re, NULL, PCRE_INFO_NAMETABLE, &nm_table);
			pcre_fullinfo(preg->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nm_entry_size);
			unsigned char *tbl_ptr = nm_table;
			for (idx = 0; idx < nm_count; idx++) {
				int n_idx = (tbl_ptr[0] << 8) | tbl_ptr[1];
				unsigned char *n_name = tbl_ptr + 2;
				p[n_idx].rm_name = n_name;
				p[n_idx].rm_namelen = strlen((char *)n_name);
				tbl_ptr += nm_entry_size;
			}
		}
	}
	return res;
}
Esempio n. 5
0
JNIEXPORT jint JNICALL
Java_com_oracle_truffle_r_runtime_ffi_jni_JNI_1PCRE_nativeGetCaptureNames(JNIEnv *env, jclass c, jlong code, jlong extra, jobjectArray ret) {
    int nameCount;
    int nameEntrySize;
    char* nameTable;
    int res;
	res = pcre_fullinfo(code, extra, PCRE_INFO_NAMECOUNT, &nameCount);
    if (res < 0) {
        return res;
    }
    res = pcre_fullinfo(code, extra, PCRE_INFO_NAMEENTRYSIZE, &nameEntrySize);
    if (res < 0) {
        return res;
    }
	res = pcre_fullinfo(code, extra, PCRE_INFO_NAMETABLE, &nameTable);
    if (res < 0) {
        return res;
    }
    // from GNU R's grep.c
	for(int i = 0; i < nameCount; i++) {
	    char* entry = nameTable + nameEntrySize * i;
	    int captureNum = (entry[0] << 8) + entry[1] - 1;
        (*env)->SetObjectArrayElement(env, ret, captureNum, (*env)->NewStringUTF(env, entry + 2));
    }
    return res;
}
Esempio n. 6
0
/*
 * Ported from get_first_set() in pcre_get.c in pcre source.
 */
static int matchres_first_set(cs_matchres_t *mr, const char *group_name) {
  cs_regexp_t *regexp = mr->regexp;
  pcre *re = regexp->re;
  pcre_extra *extra = regexp->extra;
  unsigned long options;
  int jchanged;
  pcre_fullinfo(re, extra, PCRE_INFO_OPTIONS, &options);
  pcre_fullinfo(re, extra, PCRE_INFO_JCHANGED, &jchanged);
  if (options & PCRE_DUPNAMES || jchanged) {
    char *first;
    char *last;
    uchar *entry;
    int entry_len = pcre_get_stringtable_entries(re, group_name, &first, &last);
    if (entry_len < 0) {
      return entry_len;
    }
    for (entry = (uchar *)first; entry <= (uchar *)last; entry += entry_len) {
      int n = entry[0] << 8 | entry[1];
      if (mr->ovector[n * 2] >= 0) {
        return n;
      }
    }
    return entry[0] << 8 | entry[1];
  } else {
    return pcre_get_stringnumber(re, group_name);
  }
}
Esempio n. 7
0
int
pcre_get_stringnumber(const pcre *code, const char *stringname)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;

bot = 0;
while (top > bot)
  {
  int mid = (top + bot) / 2;
  uschar *entry = nametable + entrysize*mid;
  int c = strcmp(stringname, (char *)(entry + 2));
  if (c == 0) return (entry[0] << 8) + entry[1];
  if (c > 0) bot = mid + 1; else top = mid;
  }

return PCRE_ERROR_NOSUBSTRING;
}
Esempio n. 8
0
/* TODO: audit this function */
static void named_substr_print(const struct cli_pcre_data *pd, const unsigned char *buffer, int *ovector)
{
    int i, j, length, namecount, trunc;
    unsigned char *tabptr;
    int name_entry_size;
    unsigned char *name_table;
    const char *start;
    char outstr[2*MATCH_MAXLEN+1];

    /* determine if there are named substrings */
#if USING_PCRE2
    (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMECOUNT, &namecount);
#else
    (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMECOUNT, &namecount);
#endif
    if (namecount <= 0) {
        cli_dbgmsg("cli_pcre_report: no named substrings\n");
    }
    else {
        cli_dbgmsg("cli_pcre_report: named substrings\n");

        /* extract named substring translation table */
#if USING_PCRE2
        (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMETABLE, &name_table);
        (void)pcre2_pattern_info(pd->re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
#else
        (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMETABLE, &name_table);
        (void)pcre_fullinfo(pd->re, pd->ex, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
#endif

        /* print named substring information */
        tabptr = name_table;
        for (i = 0; i < namecount; i++) {
            int n = (tabptr[0] << 8) | tabptr[1];

            start = buffer + ovector[2*n];
            length = ovector[2*n+1] - ovector[2*n];

            trunc = 0;
            if (length > MATCH_MAXLEN) {
                trunc = 1;
                length = MATCH_MAXLEN;
            }

            for (j = 0; j < length; ++j)
                snprintf(outstr+(2*j), sizeof(outstr)-(2*j), "%02x", (unsigned int)*(start+j));

            cli_dbgmsg("cli_pcre_report: (%d) %*s: %s%s\n", n, name_entry_size - 3, tabptr + 2,
                       outstr, trunc ? " (trunc)":"");
            /*
            cli_dbgmsg("named_substr:  (%d) %*s: %.*s%s\n", n, name_entry_size - 3, tabptr + 2,
                       length, start, trunc ? " (trunc)":"");
            */
            tabptr += name_entry_size;
        }
    }
}
Esempio n. 9
0
UpObject* UpRegexMatch(UpRegex* self, const char* subject, int start, bool capture) {
    int ovector[999];
    int rc = pcre_exec(self->re, NULL, subject, strlen(subject), start, 0, ovector, 999);
    if (rc < 0) {
        // switch(rc) {
        //   case PCRE_ERROR_NOMATCH      : printf("String did not match the pattern\n");        break;
        //   case PCRE_ERROR_NULL         : printf("Something was null\n");                      break;
        //   case PCRE_ERROR_BADOPTION    : printf("A bad option was passed\n");                 break;
        //   case PCRE_ERROR_BADMAGIC     : printf("Magic number bad (compiled re corrupt?)\n"); break;
        //   case PCRE_ERROR_UNKNOWN_NODE : printf("Something kooky in the compiled re\n");      break;
        //   case PCRE_ERROR_NOMEMORY     : printf("Ran out of memory\n");                       break;
        //   default                      : printf("Unknown error\n");                           break;
        //   }

        return UpNull();
    } else {
        UpList* results = UpListCreate();

        if (capture) {
            UpListAppend(results, (UpObject*)UpIntegerCreate(rc));

            for (int i = 0; i < rc*2; ++i) {
                UpInteger* value = UpIntegerCreate(ovector[i]);
                UpListAppend(results, (UpObject*)value);
            }

            int nameCount;
            int err = pcre_fullinfo(self->re, NULL, PCRE_INFO_NAMECOUNT, &nameCount);

            int entrySize;
            err = pcre_fullinfo(self->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrySize);

            char* entry;
            err = pcre_fullinfo(self->re, NULL, PCRE_INFO_NAMETABLE, &entry);

            UpListAppend(results, (UpObject*)UpIntegerCreate(nameCount));

            for (int i = 0; i < nameCount; ++i) {
                uint16_t captureIndex = (entry[0] << 8) | entry[1];
                const char* captureName = (const char*)(entry + 2);
                entry += entrySize;

                UpListAppend(results, (UpObject*)UpIntegerCreate(captureIndex));
                UpListAppend(results, (UpObject*)UpStringCreate(captureName));
            }
        } else {
            for (int i = 0; i < 2; ++i) {
                UpInteger* value = UpIntegerCreate(ovector[i]);
                UpListAppend(results, (UpObject*)value);
            }

        }

        return (UpObject*)results;
    }
}
Esempio n. 10
0
RegEx::RegEx(const char * regex, int options, unsigned long int maxDepth)
{
   const char*  pcre_error;
   int          erroffset;

   // compile and study the expression
   re = pcre_compile(regex, options, &pcre_error, &erroffset, NULL);
   if (re == NULL)
   {
      UtlString errorMsg("Regular Expression compile error: ");
      errorMsg.append(pcre_error);
      errorMsg.append(" at offset ");
      char offsetStr[10];
      sprintf(offsetStr, "%9d", erroffset);
      errorMsg.append(offsetStr);
      errorMsg.append(" in expression '");
      errorMsg.append(regex);
      errorMsg.append("'");

      throw errorMsg.data();
      assert(FALSE); // regex failed to compile
   }
   pe = pcre_study(re, 0, &pcre_error);
   if ( pcre_error == NULL )
   {
      // save the compilation block sizes for the copy constructor.
      pcre_fullinfo(re, pe, PCRE_INFO_SIZE, &re_size);
      pcre_fullinfo(re, pe, PCRE_INFO_STUDYSIZE, &study_size);
      allocated_study = false;
   }
   else
   {
      re_size = 0;
      study_size = 0;
   }

   if (!pe)
   {
      // pcre_study didn't return any study data,
      // but we need the pcre_extra block anyway for the recursion limit,
      // so get one
      pe = (pcre_extra*)pcre_malloc(sizeof(pcre_extra));
      memset(pe, 0, sizeof(pcre_extra));
   }
   // set the maximum recursion depth option in the pcre_extra (pe) block
   pe->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
   pe->match_limit_recursion = maxDepth;

   // allocate space for match results based on how many substrings
   // there are in the expression (+1 for the entire match)
   pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount);
   substrcount++;
   ovector = new int[3*substrcount];
   matchlist = NULL;
};
Esempio n. 11
0
int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
		    int unused __attribute__((unused)), bool *regex_compiled)
{
	int rc;
	uint32_t entry_len;
	size_t info_len;

	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
	if (rc < 0 || !entry_len)
		return -1;

	*regex = regex_data_create();
	if (!(*regex))
		return -1;

	(*regex)->owned = 0;
	(*regex)->regex = (pcre *)mmap_area->next_addr;
	rc = next_entry(NULL, mmap_area, entry_len);
	if (rc < 0)
		goto err;

	/*
	 * Check that regex lengths match. pcre_fullinfo()
	 * also validates its magic number.
	 */
	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
	if (rc < 0 || info_len != entry_len)
		goto err;

	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
	if (rc < 0 || !entry_len)
		goto err;

	if (entry_len) {
		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
		rc = next_entry(NULL, mmap_area, entry_len);
		if (rc < 0)
			goto err;

		/* Check that study data lengths match. */
		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
				   PCRE_INFO_STUDYSIZE, &info_len);
		if (rc < 0 || info_len != entry_len)
			goto err;
	}

	*regex_compiled = true;
	return 0;

err:
	regex_data_free(*regex);
	*regex = NULL;
	return -1;
}
Esempio n. 12
0
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname)
#endif
{
int rc;
int entrysize;
int top, bot;
pcre_uchar *nametable;

#ifdef COMPILE_PCRE8
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;
#endif
#ifdef COMPILE_PCRE16
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;

if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;
#endif
#ifdef COMPILE_PCRE32
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;

if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;
#endif

bot = 0;
while (top > bot)
  {
  int mid = (top + bot) / 2;
  pcre_uchar *entry = nametable + entrysize*mid;
  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
    (pcre_uchar *)(entry + IMM2_SIZE));
  if (c == 0) return GET2(entry, 0);
  if (c > 0) bot = mid + 1; else top = mid;
  }

return PCRE_ERROR_NOSUBSTRING;
}
Esempio n. 13
0
/*
 * TODO Replace this compare function with something that actually compares the
 * regular expressions.
 * This compare function basically just compares the binary representations of
 * the automatons, and because this representation contains pointers and
 * metadata, it can only return a match if regex1 == regex2.
 * Preferably, this function would be replaced with an algorithm that computes
 * the equivalence of the automatons systematically.
 */
int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
{
	int rc;
	size_t len1, len2;
	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
	assert(rc == 0);
	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
	assert(rc == 0);
	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
		return SELABEL_INCOMPARABLE;

	return SELABEL_EQUAL;
}
Esempio n. 14
0
static EC_OBJ EcLibRe_Info( EC_OBJ stack, EcAny userdata )
{
	EC_OBJ regexp;
	EcUInt infosym;
	EC_OBJ res;

	pcre       *code;
	pcre_extra *extra;
	int         rc;

	res = EcParseStackFunction( "re.info", TRUE, stack, "O!k", tc_regexp, &regexp, &infosym );
	if (EC_ERRORP(res)) return res;

	code  = EC_PCRE(regexp);
	extra = EC_PCREXTRA(regexp);

	if (infosym == s_capturecount)
	{
		int capturecount;

		rc = pcre_fullinfo( code, extra, PCRE_INFO_CAPTURECOUNT, &capturecount );
		if (rc < 0)
			return EcReError( "internal error: pcre_fullinfo", -1 );

		return EcMakeInt( capturecount );
	} else if (infosym == s_backrefmax)
	{
		int backrefmax;

		rc = pcre_fullinfo( code, extra, PCRE_INFO_BACKREFMAX, &backrefmax );
		if (rc < 0)
			return EcReError( "internal error: pcre_fullinfo", -1 );

		return EcMakeInt( backrefmax );
	} else if (infosym == s_firstchar)
	{
		int firstchar = 0;

		rc = pcre_fullinfo( code, extra, PCRE_INFO_FIRSTCHAR, &firstchar );
		if (rc < 0)
			return EcReError( "internal error: pcre_fullinfo", -1 );

		return EcMakeInt( firstchar );
	} else
	{
		EC_TYPEERROR_F( "re.info", 2, tc_symbol,
					    EcMakeSymbolFromId( infosym ), "expected an info option symbol" );
		return Ec_ERROR;
	}
}
Esempio n. 15
0
int regex_writef(struct regex_data *regex, FILE *fp,
		 int unused __attribute__((unused)))
{
	int rc;
	size_t len;
	uint32_t to_write;
	size_t size;
	pcre_extra *sd = get_pcre_extra(regex);

	/* determine the size of the pcre data in bytes */
	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
	if (rc < 0)
		return -1;

	/* write the number of bytes in the pcre data */
	to_write = size;
	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
	if (len != 1)
		return -1;

	/* write the actual pcre data as a char array */
	len = fwrite(regex->regex, 1, to_write, fp);
	if (len != to_write)
		return -1;

	if (sd) {
		/* determine the size of the pcre study info */
		rc =
		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
		if (rc < 0)
			return -1;
	} else
		size = 0;

	/* write the number of bytes in the pcre study data */
	to_write = size;
	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
	if (len != 1)
		return -1;

	if (sd) {
		/* write the actual pcre study data as a char array */
		len = fwrite(sd->study_data, 1, to_write, fp);
		if (len != to_write)
			return -1;
	}

	return 0;
}
Esempio n. 16
0
int
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
  char **firstptr, char **lastptr)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable, *lastentry;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
  return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;

if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
  return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
  return rc;

lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
  {
  int mid = (top + bot) / 2;
  uschar *entry = nametable + entrysize*mid;
  int c = strcmp(stringname, (char *)(entry + 2));
  if (c == 0)
    {
    uschar *first = entry;
    uschar *last = entry;
    while (first > nametable)
      {
      if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
      first -= entrysize;
      }
    while (last < lastentry)
      {
      if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
      last += entrysize;
      }
    *firstptr = (char *)first;
    *lastptr = (char *)last;
    return entrysize;
    }
  if (c > 0) bot = mid + 1; else top = mid;
  }

return PCRE_ERROR_NOSUBSTRING;
}
Esempio n. 17
0
bool
Regex::match(const MyString & string,
			 ExtArray<MyString> * groups)
{
	if ( ! this->isInitialized() ) {
		return false;
	}

	int group_count;
	pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &group_count);
	int oveccount = 3 * (group_count + 1); // +1 for the string itself
	int * ovector = (int *) malloc(oveccount * sizeof(int));
	if (!ovector) {
			// XXX: EXCEPTing sucks
		EXCEPT("No memory to allocate data for re match");
	}

	int rc = pcre_exec(re,
					   NULL,
					   string.Value(),
					   string.Length(),
					   0, // Index in string from which to start matching
					   options,
					   ovector,
					   oveccount);

	if (NULL != groups) {
		for (int i = 0; i < rc; i++) {
			(*groups)[i] = string.Substr(ovector[i * 2], ovector[i * 2 + 1] - 1);
		}
	}

	free(ovector);
	return rc > 0;
}
Esempio n. 18
0
int
PME::match(const std::string & s, ///< s String to match against
			 unsigned offset ///< offset Offset at which to start matching
			 )
{
	size_t msize;
	pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &msize);
	msize = 3*(msize+1);
	int *m = new int[msize];

	vector<markers> marks;

	// if we got a new string, reset the global position counter
	if ( addressoflaststring != (void *) &s ) {
//		fprintf ( stderr, "PME RESETTING: new string\n" );
		lastglobalposition = 0;
	}

	if ( m_isglobal ) {
		offset += lastglobalposition;
	}

	//check that the offset isn't at the last position in the string
	if( offset == s.length() )
		return 0;

	nMatches = pcre_exec(re, extra, s.c_str(), s.length(), offset, 0, m, msize);
	
	for ( int i = 0, *p = m ; i < nMatches ; i++, p+=2 ) {
		marks.push_back(markers(p[0], p[1]));
	}

	delete[] m;

	// store the last set of results locally, as well as returning them
	m_marks = marks;
	laststringmatched = s;
	addressoflaststring = (void *) &s;

	if ( m_isglobal ) {

		if ( nMatches == PCRE_ERROR_NOMATCH ) {
//			fprintf ( stderr, "PME RESETTING: reset for no match\n" );
			lastglobalposition = 0; // reset the position for next match (perl does this)
		} else if ( nMatches > 0 ) {
//			fprintf ( stderr, "PME RESETTING: setting to %d\n", marks[0].second );
			lastglobalposition = marks[0].second; // increment by the end of the match
		} else {
//			fprintf ( stderr, "PME RESETTING: reset for no unknown\n" );
			lastglobalposition = 0;
		}
	}


	int returnvalue = 0;
	if ( nMatches > 0 )
		returnvalue = nMatches;

	return returnvalue;
}
Esempio n. 19
0
bool RegExp::Impl::compile ()
{
	if (_isReady)
		return true;

	int compile_options = _lineBreak
			| PCRE_UTF8           // Run in UTF-8 mode
			| PCRE_NO_UTF8_CHECK  // Do not check the pattern for UTF-8 validity (only relevant if PCRE_UTF8 is set)
			| PCRE_UCP;           // Use Unicode properties for \d, \w, etc.

	const char * errsstr;
	_re = pcre_compile (_pattern.c_str(), compile_options, & errsstr, & _erroffset, nullptr/*tables*/);

	if (!_re) {
		_errstr = String::fromUtf8(errsstr);
		return false;
	}

	_extra = pcre_study(_re, 0, & errsstr);

	if (!_extra) {
		if (errsstr) {
			_errstr = String::fromUtf8(errsstr);
			return false;
		}
	}

	int rc;
	CWT_VERIFY((rc = pcre_fullinfo(_re, _extra, PCRE_INFO_CAPTURECOUNT, & _nsubpatterns)) == 0);
	if (rc)
		return false;

	_isReady = true;
	return true;
}
Esempio n. 20
0
void Pattern::compile (void)
{
    // Compile the pattern
    int offset;
    const char * error;

    _re = pcre_compile (_pattern.c_str(), 0, &error, &offset, NULL);

    if (_re == NULL) {
        std::string offsetStr;
        std::stringstream ss;
        ss << offset;
        offsetStr = ss.str();

        std::string msg ("PCRE compiling failed at offset " + offsetStr);

        throw compile_error (msg);
    }

    // Allocate an appropriate amount
    // of memory for the output vector.
    int captureCount;

    pcre_fullinfo (_re, NULL, PCRE_INFO_CAPTURECOUNT, &captureCount);

    delete[] _ovector;

    _ovector = new int[ (captureCount + 1) *3];

    _ovectorSize = (captureCount + 1) * 3;
}
Esempio n. 21
0
static int
cond_pcre_match(char **a, int id)
{
    pcre *pcre_pat;
    const char *pcre_err;
    char *lhstr, *rhre;
    int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;

    lhstr = cond_str(a,0,0);
    rhre = cond_str(a,1,0);

    switch(id) {
	 case CPCRE_PLAIN:
		 pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
                 pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
    		 ovsize = (capcnt+1)*3;
		 ov = zalloc(ovsize*sizeof(int));
    		 r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
    		if (r==0) return 1;
	        else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */
                else if (r>0) {
		    zpcre_get_substrings(lhstr, ov, r, NULL);
		    return 1;
		}
		break;
    }

    return 0;
}
Esempio n. 22
0
File: regexp.c Progetto: Hmaal/slash
SLVAL
sl_regexp_match(sl_vm_t* vm, SLVAL self, size_t argc, SLVAL* argv)
{
    sl_regexp_t* re = get_regexp_check(vm, self);
    sl_string_t* str = sl_get_string(vm, argv[0]);
    int offset = 0, rc, ncaps;
    int* caps;
    sl_regexp_match_t* match;
    if(argc > 1) {
        offset = sl_get_int(sl_expect(vm, argv[1], vm->lib.Int));
    }
    offset = sl_string_byte_offset_for_index(vm, argv[0], offset);
    if(offset < 0) {
        return vm->lib.nil;
    }
    pcre_fullinfo(re->re, re->study, PCRE_INFO_CAPTURECOUNT, &ncaps);
    ncaps += 1;
    ncaps *= 3;
    caps = sl_alloc(vm->arena, sizeof(int) * ncaps);
    rc = pcre_exec(re->re, re->study, (char*)str->buff, str->buff_len, offset, PCRE_NEWLINE_LF, caps, ncaps);
    if(rc == PCRE_ERROR_NOMATCH) {
        return vm->lib.nil;
    }
    check_pcre_error(vm, rc);
    match = (sl_regexp_match_t*)sl_get_ptr(sl_allocate(vm, vm->lib.Regexp_Match));
    match->re = re;
    match->match_string = argv[0];
    match->capture_count = ncaps / 3;
    match->captures = caps;
    return sl_make_ptr((sl_object_t*)match);
}
/*
 * Arguments:
 *  preg        points to a structure for recording the compiled expression
 *  pattern     the pattern to compile
 *  cflags      compilation flags
 *
 * Returns:      0 on success
 *               various non-zero codes on failure
*/
AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags)
{
    const char *errorptr;
    int erroffset;
    int errcode = 0;
    int options = PCRE_DUPNAMES;

    if ((cflags & AP_REG_ICASE) != 0)
        options |= PCRE_CASELESS;
    if ((cflags & AP_REG_NEWLINE) != 0)
        options |= PCRE_MULTILINE;
    if ((cflags & AP_REG_DOTALL) != 0)
        options |= PCRE_DOTALL;

    preg->re_pcre =
        pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL);
    preg->re_erroffset = erroffset;

    if (preg->re_pcre == NULL) {
        /*
         * There doesn't seem to be constants defined for compile time error
         * codes. 21 is "failed to get memory" according to pcreapi(3).
         */
        if (errcode == 21)
            return AP_REG_ESPACE;
        return AP_REG_INVARG;
    }

    pcre_fullinfo((const pcre *)preg->re_pcre, NULL,
                   PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub));
    return 0;
}
Esempio n. 24
0
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int errorcode;
int options = 0;
int re_nsub = 0;

if ((cflags & REG_ICASE) != 0)    options |= PCRE_CASELESS;
if ((cflags & REG_NEWLINE) != 0)  options |= PCRE_MULTILINE;
if ((cflags & REG_DOTALL) != 0)   options |= PCRE_DOTALL;
if ((cflags & REG_NOSUB) != 0)    options |= PCRE_NO_AUTO_CAPTURE;
if ((cflags & REG_UTF8) != 0)     options |= PCRE_UTF8;
if ((cflags & REG_UCP) != 0)      options |= PCRE_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;

preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
  &erroffset, NULL);
preg->re_erroffset = erroffset;

/* Safety: if the error code is too big for the translation vector (which
should not happen, but we all make mistakes), return REG_BADPAT. */

if (preg->re_pcre == NULL)
  {
  return (errorcode < (int)(sizeof(eint)/sizeof(const int)))?
    eint[errorcode] : REG_BADPAT;
  }

(void)pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT,
  &re_nsub);
preg->re_nsub = (size_t)re_nsub;
return 0;
}
Esempio n. 25
0
int RegEx::Compile(const char *pattern, int iFlags)
{
	if (!mFree)
		Clear();

	re = pcre_compile(pattern, iFlags, &mError, &mErrorOffset, NULL);

	if (re == NULL)
	{
		return 0;
	}

	mFree = false;

	/**
	 * Retrieve the number of captured groups
	 * including the full match.
	 */
	pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &mNumSubpatterns);
	++mNumSubpatterns;

	/**
	 * Build the table with the named groups,
	 * which contain an index and a name per group.
	 */
	MakeSubpatternsTable(mNumSubpatterns);

	return 1;
}
Esempio n. 26
0
static int pcre_local_exec(pcre_t *p)
{
	int size;
	pcre_fullinfo(p->re, NULL, PCRE_INFO_CAPTURECOUNT, &size);
	size+=2;
	size *=3;
	if(p->ovector)
		FREE(p->ovector);
	p->ovector = CALLOCATE(size+1, int, TAG_TEMPORARY, "pcre_local_exec"); //too much, but who cares
	p->ovecsize = size;
	p->rc = pcre_exec(
			p->re,
			NULL,
			p->subject,
			p->s_length,
			0,
#ifndef USE_ICONV
			PCRE_NO_UTF8_CHECK,
#else
			0,
#endif
			p->ovector,
			size);

	return p->rc;
}
Esempio n. 27
0
static int regexp_compile(lua_State *L) {
  const char *pattern = luaL_checkstring(L, 1);
  int options = luaL_optint(L, 2, 0) | PCRE_UTF8;
  int study_options_type = lua_type(L, 3);
  int study_options;
  if (study_options_type != LUA_TNIL) {
    study_options = luaL_optint(L, 3, PCRE_STUDY_JIT_COMPILE);
  }
  int err_code;
  const char *err_text;
  int err_offset;

  cs_regexp_t *regexp = lua_newuserdata(L, sizeof(cs_regexp_t));
  memset(regexp, 0, sizeof(cs_regexp_t));
  luaL_getmetatable(L, RE_MTBL_NAME);
  lua_setmetatable(L, -2);

  regexp->re = pcre_compile2(pattern, options, &err_code, &err_text,
      &err_offset, NULL);
  if (!regexp->re)
    return luaL_error(L, "%s (pattern offset: %d)", err_text, err_offset + 1);

  if (study_options_type != LUA_TNIL) {
    regexp->extra = pcre_study(regexp->re, study_options, &err_text);
    if (err_text)
      return luaL_error(L, "%s", err_text);
  }

  pcre_fullinfo(regexp->re, regexp->extra, PCRE_INFO_CAPTURECOUNT,
      &regexp->capture_cnt);

  return 1;
}
mrb_value
regexp_pcre_initialize(mrb_state *mrb, mrb_value self)
{
  int erroff = 0, coptions;
  const char *errstr = NULL;
  struct mrb_regexp_pcre *reg = NULL;
  mrb_value source, opt = mrb_nil_value();

  reg = (struct mrb_regexp_pcre *)DATA_PTR(self);
  if (reg) {
    mrb_regexp_free(mrb, reg);
  }
  DATA_TYPE(self) = &mrb_regexp_type;
  DATA_PTR(self) = NULL;

  mrb_get_args(mrb, "S|o", &source, &opt);

  reg = mrb_malloc(mrb, sizeof(struct mrb_regexp_pcre));
  reg->re = NULL;
  DATA_PTR(self) = reg;

  coptions = mrb_mruby_to_pcre_options(opt);
  source = mrb_str_new(mrb, RSTRING_PTR(source), RSTRING_LEN(source));
  reg->re = pcre_compile(RSTRING_PTR(source), coptions, &errstr, &erroff, NULL);
  if (reg->re == NULL) {
    mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid regular expression");
  }
  mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), source);
  mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@options"), mrb_fixnum_value(mrb_pcre_to_mruby_options(coptions)));

  unsigned char *name_table;
  int i, namecount, name_entry_size;

  pcre_fullinfo(reg->re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
  if (namecount > 0) {
    pcre_fullinfo(reg->re, NULL, PCRE_INFO_NAMETABLE, &name_table);
    pcre_fullinfo(reg->re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
    unsigned char *tabptr = name_table;
    for (i = 0; i < namecount; i++) {
      int n = (tabptr[0] << 8) | tabptr[1];
      mrb_funcall(mrb, self, "name_push", 2, mrb_str_new(mrb, (const char *)(tabptr + 2), strlen((const char *)tabptr + 2)), mrb_fixnum_value(n));
      tabptr += name_entry_size;
    }
  } 

  return self;
}
Esempio n. 29
0
int flt_urlrewrite_handle(cf_configfile_t *cfile,cf_conf_opt_t *opt,const u_char *context,u_char **args,size_t argnum) {
  flt_urlrewrite_rule_t n_rewrite;
  const u_char *error;
  int err_offset;
  
  if(flt_urlrewrite_fname == NULL) flt_urlrewrite_fname = cf_hash_get(GlobalValues,"FORUM_NAME",10);
  if(!context || cf_strcmp(context,flt_urlrewrite_fname) != 0) return 0;
  
  if(argnum != 3) {
    return -1;
  }
  
  if(!flt_urlrewrite_rules) {
    flt_urlrewrite_rules = cf_alloc(NULL,sizeof(cf_array_t),1,CF_ALLOC_MALLOC);
    cf_array_init(flt_urlrewrite_rules,sizeof(flt_urlrewrite_rule_t),(void(*)(void *))flt_urlrewrite_destroy);
  }
  
  n_rewrite.macro_tree = flt_urlrewrite_parse_macro(args[2]);
  if(!n_rewrite.macro_tree) {
    return -1;
  }

  n_rewrite.replacement = strdup(args[1]);
  if(!n_rewrite.replacement) {
    flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree);
  }

  n_rewrite.regexp = pcre_compile(args[0], 0, (const char **)&error, &err_offset, NULL);
  if(!n_rewrite.regexp) {
    fprintf(stderr,"flt_urlrewrite: Regexp error with \"%s\": %s\n", args[0], error);
    flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree);
    free((void *)n_rewrite.replacement);
    return -1;
  }
  n_rewrite.regexp_extra = pcre_study(n_rewrite.regexp, 0, (const char **)&error);
  if(error) {
    fprintf(stderr,"Regexp study error with \"%s\": %s\n", args[0], error);
    pcre_free(n_rewrite.regexp);
    flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree);
    free((void *)n_rewrite.replacement);
    return -1;
  }
  n_rewrite.match_count = 0;
  pcre_fullinfo(n_rewrite.regexp, n_rewrite.regexp_extra, PCRE_INFO_CAPTURECOUNT, &(n_rewrite.match_count));
  n_rewrite.match_arr = (int *)cf_alloc(NULL, sizeof(int),(n_rewrite.match_count + 1) * 3, CF_ALLOC_MALLOC);
  if(!n_rewrite.match_arr) {
    pcre_free(n_rewrite.regexp_extra);
    pcre_free(n_rewrite.regexp);
    flt_urlrewrite_free_macro_tree(n_rewrite.macro_tree);
    free((void *)n_rewrite.replacement);
    return -1;
  }

  cf_array_push(flt_urlrewrite_rules,&n_rewrite);

  return 0;
}
Esempio n. 30
0
static int pcre_nmatchsize(KonohaContext *kctx, kregexp_t *reg)
{
	PCRE_regexp_t *preg = (PCRE_regexp_t *)reg;
	int capsize = 0;
	if(pcre_fullinfo(preg->re, NULL, PCRE_INFO_CAPTURECOUNT, &capsize) != 0) {
		return KREGEXP_MATCHSIZE;
	}
	return capsize + 1;
}