예제 #1
0
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(const pcre2_code *code,
  pcre2_general_context *gcontext)
{
if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
  gcontext);
}
예제 #2
0
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
PCRE2_SIZE erroffset;
PCRE2_SIZE patlen;
int errorcode;
int options = 0;
int re_nsub = 0;

patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) :
  PCRE2_ZERO_TERMINATED;

if ((cflags & REG_ICASE) != 0)    options |= PCRE2_CASELESS;
if ((cflags & REG_NEWLINE) != 0)  options |= PCRE2_MULTILINE;
if ((cflags & REG_DOTALL) != 0)   options |= PCRE2_DOTALL;
if ((cflags & REG_NOSPEC) != 0)   options |= PCRE2_LITERAL;
if ((cflags & REG_UTF) != 0)      options |= PCRE2_UTF;
if ((cflags & REG_UCP) != 0)      options |= PCRE2_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;

preg->re_cflags = cflags;
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, patlen, options,
  &errorcode, &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre2_code == NULL)
  {
  unsigned int i;

  /* A negative value is a UTF error; otherwise all error codes are greater
  than COMPILE_ERROR_BASE, but check, just in case. */

  if (errorcode < COMPILE_ERROR_BASE) return REG_BADPAT;
  errorcode -= COMPILE_ERROR_BASE;

  if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
    return eint1[errorcode];
  for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2)
    if (errorcode == eint2[i]) return eint2[i+1];
  return REG_BADPAT;
  }

(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
  PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub;
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);

if (preg->re_match_data == NULL)
  {
  pcre2_code_free(preg->re_pcre2_code);
  return REG_ESPACE;
  }

return 0;
}
예제 #3
0
static matchOrNot findResult(pcre2_code* re, char inputString[])
{
	/*
	 * This function find re's pattern and return information about it.
	 */
	pcre2_match_data *match_data = pcre2_match_data_create(20, NULL);
	int rc;
	
	rc = pcre2_match(re, (PCRE2_SPTR)inputString, -1, 0, 0, match_data, NULL);
	pcre2_match_data_free(match_data);

	return rc <= 0 ? notMatch : match;
}
예제 #4
0
/**
 * Compile all the given regexs.
 * Return 0 on success, else an error code.
 */
int compile_regexs(Regex_node *rn, Dictionary dict)
{
	while (rn != NULL)
	{
		/* If rn->re non-null, assume compiled already. */
		if(rn->re == NULL)
		{
			int rc;
			regex_t *re = rn->re = malloc(sizeof(regex_t));

#if HAVE_PCRE2_H
			PCRE2_SIZE erroffset;
			re->re_code =
				pcre2_compile((PCRE2_SPTR)rn->pattern, PCRE2_ZERO_TERMINATED,
				              PCRE2_UTF|PCRE2_UCP, &rc, &erroffset, NULL);
			if (NULL != re->re_code)
			{
				rc = 0;
				re->re_md = pcre2_match_data_create(0, NULL);
				if (NULL == re->re_md) return -1; /* Unhandled for now. */
			}
#else
			const int erroffset = -1;

			/* REG_ENHANCED is needed for macOS to support \w etc. */
#ifndef REG_ENHANCED
#define REG_ENHANCED 0
#endif
			rc = regcomp(re, rn->pattern, REG_NOSUB|REG_EXTENDED|REG_ENHANCED);
#endif

			if (rc)
			{
				prt_regerror("Failed to compile regex", rn, rc ,erroffset);
				rn->re = NULL;
				return rc;
			}

			/* Check that the regex name is defined in the dictionary. */
			if ((NULL != dict) && !boolean_dictionary_lookup(dict, rn->name))
			{
				/* TODO: better error handing. Maybe remove the regex? */
				prt_error("Error: Regex name %s not found in dictionary!\n",
				       rn->name);
			}
		}
		rn = rn->next;
	}
	return 0;
}
예제 #5
0
파일: hbregexc.c 프로젝트: emazv72/core
HB_BOOL hb_regexMatch( PHB_REGEX pRegEx, const char * szString, HB_SIZE nLen, HB_BOOL fFull )
{
#if defined( HB_HAS_PCRE2 )
   HB_REGMATCH * aMatches = pcre2_match_data_create( 1, NULL );
#else
   HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( 1 ) ];
#endif
   HB_BOOL fMatch;

   fMatch = ( s_reg_exec )( pRegEx, szString, nLen, 1, aMatches ) > 0;
   fMatch = fMatch && ( ! fFull ||
            ( HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
              ( HB_SIZE ) HB_REGMATCH_EO( aMatches, 0 ) == nLen ) );

#if defined( HB_HAS_PCRE2 )
   pcre2_match_data_free( aMatches );
#endif

   return fMatch;
}
예제 #6
0
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
PCRE2_SIZE erroffset;
int errorcode;
int options = 0;
int re_nsub = 0;

if ((cflags & REG_ICASE) != 0)    options |= PCRE2_CASELESS;
if ((cflags & REG_NEWLINE) != 0)  options |= PCRE2_MULTILINE;
if ((cflags & REG_DOTALL) != 0)   options |= PCRE2_DOTALL;
if ((cflags & REG_NOSUB) != 0)    options |= PCRE2_NO_AUTO_CAPTURE;
if ((cflags & REG_UTF) != 0)      options |= PCRE2_UTF;
if ((cflags & REG_UCP) != 0)      options |= PCRE2_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;

preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
   options, &errorcode, &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre2_code == NULL)
  {
  unsigned int i;
  if (errorcode < 0) return REG_BADPAT;   /* UTF error */
  errorcode -= COMPILE_ERROR_BASE;
  if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
    return eint1[errorcode];
  for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2)
    if (errorcode == eint2[i]) return eint2[i+1];
  return REG_BADPAT;
  }

(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
  PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub;
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
return 0;
}
예제 #7
0
파일: matrix.c 프로젝트: cjjavellana/cs50
/**
 * An internal function for checking cyclic reference dependency error.
 *
 * ==========
 * Returns 1 if cyclic dependency is found, 0 if otherwise
 */
static int isCyclicError(const Worksheet *worksheet, const char *visitedCells, CellReference *cellRef)
{
    pcre2_match_data *match_data = NULL;
    char *saveptr = NULL;
    int rc = 0;

    MatrixLocation *m = convertToMatrixLocation(cellRef);  
    
    char *cellValue = NULL;
    getValue2(worksheet, &cellValue, m->row, m->col);
    free(m);

    if (cellValue == NULL)
    {
        return 0;
    }

    // do work on working copy
    char *token = NULL;
    token = strtok_r(cellValue, " ", &saveptr);
    pcre2_code *re = getCellReferencePattern();
    while(token != NULL)
    {
        match_data = pcre2_match_data_create(20, NULL);
        int subjectLength = strlen(token);
        rc = pcre2_match(re, (PCRE2_SPTR) token, subjectLength, 0, 0, match_data, NULL); 

        if (rc > 0)
        {
            // search if current cellref is in the visited cells
            pcre2_code *searchVal = compilePattern(token);
            int isCyclicDependency = pcre2_match(searchVal, (PCRE2_SPTR) visitedCells, strlen(visitedCells), 0, 0, match_data, NULL);
            if (isCyclicDependency > 0)
            {
                free(cellValue);
                free(match_data);
                free(searchVal);
                free(re);
                return 1;
            }
            
            free(searchVal);

            //length of existing visitedCells + space character + length of cellRef to be appended + null terminator
            char *newVisitedCells = malloc(sizeof(char) * (strlen(visitedCells) + 1 + strlen(cellRef->cellReference)) + 1);
            strcpy(newVisitedCells, visitedCells);
            strcat(newVisitedCells, " ");
            strcat(newVisitedCells, cellRef->cellReference);
        
            CellReference *tokenCellRef = malloc(sizeof(CellReference));
            tokenCellRef->cellReference = token;

            if(isCyclicError(worksheet, (const char *) newVisitedCells, tokenCellRef))
            {        
                free(cellValue);
                free(newVisitedCells);
                free(match_data);
                free(re);
                return 1;
            }   
      
            free(newVisitedCells);
            free(tokenCellRef);
        }
        token = strtok_r(NULL, " ", &saveptr);
        free(match_data);
    }

    free(cellValue);
    free(re);
    return 0;
}
예제 #8
0
int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
{
uint32_t compile_options;
uint32_t match_options;
pcre2_match_data *match_data = NULL;
pcre2_match_context *match_context = NULL;
size_t match_size;
int dfa_workspace[DFA_WORKSPACE_COUNT];
int r1, r2;
int i;

if (size < 1) return 0;

/* Limiting the length of the subject for matching stops fruitless searches
in large trees taking too much time. */

match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;

/* Figure out some options to use. Initialize the random number to ensure
repeatability. Ensure that we get a 32-bit unsigned random number for testing
options. (RAND_MAX is required to be at least 32767, but is commonly
2147483647, which excludes the top bit.) */

srand((unsigned int)(data[size/2]));
r1 = rand();
r2 = rand();

/* Ensure that all undefined option bits are zero (waste of time trying them)
and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
\C in random patterns is highly likely to cause a crash. */

compile_options =
  ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
  PCRE2_NEVER_BACKSLASH_C;
  
match_options =
  ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
  
/* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
allowed together and just give an immediate error return. */

if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
  match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT); 

/* Do the compile with and without the options, and after a successful compile,
likewise do the match with and without the options. */

for (i = 0; i < 2; i++)
  {
  uint32_t callout_count;
  int errorcode;
  PCRE2_SIZE erroroffset;
  pcre2_code *code;

#ifdef STANDALONE
  printf("Compile options %.8x never_backslash_c", compile_options);
  printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
    ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
    ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
    ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
    ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
    ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
    ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
    ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
    ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
    ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
    ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
    ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
    ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
    ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
    ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
    ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
    ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
    ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
    ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
    ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
    ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
    ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
    ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
    ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
    ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
    ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
    ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
#endif

  code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
    &errorcode, &erroroffset, NULL);

  /* Compilation succeeded */

  if (code != NULL)
    {
    int j;
    uint32_t save_match_options = match_options;

    /* Create match data and context blocks only when we first need them. Set
    low match and depth limits to avoid wasting too much searching large
    pattern trees. Almost all matches are going to fail. */

    if (match_data == NULL)
      {
      match_data = pcre2_match_data_create(32, NULL);
      if (match_data == NULL)
        {
#ifdef STANDALONE
        printf("** Failed to create match data block\n");
#endif
        return 0;
        }
      }

    if (match_context == NULL)
      {
      match_context = pcre2_match_context_create(NULL);
      if (match_context == NULL)
        {
#ifdef STANDALONE
        printf("** Failed to create match context block\n");
#endif
        return 0;
        }
      (void)pcre2_set_match_limit(match_context, 100);
      (void)pcre2_set_depth_limit(match_context, 100);
      (void)pcre2_set_callout(match_context, callout_function, &callout_count);
      }

    /* Match twice, with and without options. */

    for (j = 0; j < 2; j++)
      {
#ifdef STANDALONE
      printf("Match options %.8x", match_options);
      printf("%s%s%s%s%s%s%s%s%s%s\n",
        ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
        ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
        ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
        ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
        ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
        ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
        ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
        ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
        ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
        ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
#endif

      callout_count = 0;
      errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
        match_options, match_data, match_context);

#ifdef STANDALONE
      if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
        {
        unsigned char buffer[256];
        pcre2_get_error_message(errorcode, buffer, 256);
        printf("Match failed: error %d: %s\n", errorcode, buffer);
        }
#endif

      match_options = 0;  /* For second time */
      }

    /* Match with DFA twice, with and without options. */

    match_options = save_match_options & ~PCRE2_NO_JIT;  /* Not valid for DFA */

    for (j = 0; j < 2; j++)
      {
#ifdef STANDALONE
      printf("DFA match options %.8x", match_options);
      printf("%s%s%s%s%s%s%s%s%s\n",
        ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
        ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
        ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
        ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
        ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
        ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
        ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
        ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
        ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
#endif

      callout_count = 0;
      errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
        (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
        dfa_workspace, DFA_WORKSPACE_COUNT);

#ifdef STANDALONE
      if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
        {
        unsigned char buffer[256];
        pcre2_get_error_message(errorcode, buffer, 256);
        printf("Match failed: error %d: %s\n", errorcode, buffer);
        }
#endif

      match_options = 0;  /* For second time */
      }

    match_options = save_match_options;  /* Reset for the second compile */
    pcre2_code_free(code);
    }

  /* Compilation failed */

  else
    {
    unsigned char buffer[256];
    pcre2_get_error_message(errorcode, buffer, 256);
#ifdef STANDALONE
    printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
#else
    if (strstr((const char *)buffer, "internal error") != NULL) abort();
#endif
    }

  compile_options = PCRE2_NEVER_BACKSLASH_C;  /* For second time */
  }

if (match_data != NULL) pcre2_match_data_free(match_data);
if (match_context != NULL) pcre2_match_context_free(match_context);

return 0;
}
예제 #9
0
static HB_BOOL hb_regex( int iRequest )
{
#if defined( HB_HAS_PCRE2 )
   HB_REGMATCH * aMatches;
#else
   HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( REGEX_MAX_GROUPS ) ];
#endif
   PHB_ITEM pRetArray, pString;
   int iMatches, iMaxMatch;
   HB_BOOL fResult = HB_FALSE;
   PHB_REGEX pRegEx;
   const char * pszString;
   HB_SIZE nLen;

   pString = hb_param( 2, HB_IT_STRING );
   if( ! pString )
   {
      hb_errRT_BASE_SubstR( EG_ARG, 3014, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      return HB_FALSE;
   }
   pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
                         ( ! hb_parldef( 3, HB_TRUE ) ? HBREG_ICASE : 0 ) |
                         ( hb_parl( 4 ) ? HBREG_NEWLINE : 0 ) );
   if( ! pRegEx )
      return HB_FALSE;

#if defined( HB_HAS_PCRE2 )
   aMatches = pRegEx->re_match_data = pcre2_match_data_create( REGEX_MAX_GROUPS, NULL );
#endif

   pszString = hb_itemGetCPtr( pString );
   nLen      = hb_itemGetCLen( pString );
   iMaxMatch = iRequest == 0 || iRequest == 4 || iRequest == 5 ?
               REGEX_MAX_GROUPS : 1;
   iMatches = hb_regexec( pRegEx, pszString, nLen, iMaxMatch, aMatches );
   if( iMatches > 0 )
   {
      PHB_ITEM pMatch;
      int i;

      switch( iRequest )
      {
         case 0:
            pRetArray = hb_itemArrayNew( iMatches );
            for( i = 0; i < iMatches; i++ )
            {
               if( HB_REGMATCH_EO( aMatches, i ) != HB_REGMATCH_UNSET )
                  hb_arraySetCL( pRetArray, i + 1,
                                 pszString + HB_REGMATCH_SO( aMatches, i ),
                                 HB_REGMATCH_EO( aMatches, i ) -
                                 HB_REGMATCH_SO( aMatches, i ) );
               else
                  hb_arraySetCL( pRetArray, i + 1, NULL, 0 );
            }
            hb_itemReturnRelease( pRetArray );
            fResult = HB_TRUE;
            break;

         case 1: /* LIKE */
            fResult = HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
                      ( HB_SIZE ) HB_REGMATCH_EO( aMatches, 0 ) == nLen;
            break;

         case 2: /* HAS */
            fResult = HB_TRUE;
            break;

         case 3: /* SPLIT */
            iMaxMatch = hb_parni( 5 );
            pRetArray = hb_itemArrayNew( 0 );
            pMatch = hb_itemNew( NULL );
            iMatches = 0;
            do
            {
               hb_itemPutCL( pMatch, pszString, HB_REGMATCH_SO( aMatches, 0 ) );
               hb_arrayAddForward( pRetArray, pMatch );
               nLen -= HB_REGMATCH_EO( aMatches, 0 );
               pszString += HB_REGMATCH_EO( aMatches, 0 );
               iMatches++;
            }
            while( HB_REGMATCH_EO( aMatches, 0 ) > 0 && nLen &&
                   ( iMaxMatch == 0 || iMatches < iMaxMatch ) &&
                   hb_regexec( pRegEx, pszString, nLen, 1, aMatches ) > 0 );

            /* last match must be done also in case that pszString is empty;
               this would mean an empty split field at the end of the string */
            /* if( nLen ) */
            {
               hb_itemPutCL( pMatch, pszString, nLen );
               hb_arrayAddForward( pRetArray, pMatch );
            }
            hb_itemRelease( pMatch );

            hb_itemReturnRelease( pRetArray );
            fResult = HB_TRUE;
            break;

         case 4: /* results AND positions */
            pRetArray = hb_itemArrayNew( iMatches );

            for( i = 0; i < iMatches; i++ )
            {
               int iSO = HB_REGMATCH_SO( aMatches, i ),
                   iEO = HB_REGMATCH_EO( aMatches, i );
               pMatch = hb_arrayGetItemPtr( pRetArray, i + 1 );
               hb_arrayNew( pMatch, 3 );
               if( iEO != ( int ) HB_REGMATCH_UNSET )
               {
                  /* matched string */
                  hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
                  /* begin of match */
                  hb_arraySetNS( pMatch, 2, iSO + 1 );
                  /* End of match */
                  hb_arraySetNS( pMatch, 3, iEO );
               }
               else
               {
                  hb_arraySetCL( pMatch, 1, NULL, 0 );
                  hb_arraySetNS( pMatch, 2, 0 );
                  hb_arraySetNS( pMatch, 3, 0 );
               }
            }
            hb_itemReturnRelease( pRetArray );
            fResult = HB_TRUE;
            break;

         case 5: /* _ALL_ results AND positions */
         {
            PHB_ITEM pAtxArray;
            int      iMax       = hb_parni( 5 );   /* max nuber of matches I want, 0 = unlimited */
            int      iGetMatch  = hb_parni( 6 );   /* Gets if want only one single match or a sub-match */
            HB_BOOL  fOnlyMatch = hb_parldef( 7, HB_TRUE ); /* if HB_TRUE returns only matches and sub-matches, not positions */
            HB_SIZE  nOffset    = 0;
            int      iCount     = 0;
            int      iSO, iEO;

            /* Set new array */
            pRetArray = hb_itemArrayNew( 0 );
            do
            {
               /* If I want all matches */
               if( iGetMatch == 0 || /* Check boundaries */
                   ( iGetMatch < 0 || iGetMatch > iMatches ) )
               {
                  pAtxArray = hb_itemArrayNew( iMatches );
                  for( i = 0; i < iMatches; i++ )
                  {
                     iSO = HB_REGMATCH_SO( aMatches, i );
                     iEO = HB_REGMATCH_EO( aMatches, i );
                     pMatch = hb_arrayGetItemPtr( pAtxArray, i + 1 );
                     if( ! fOnlyMatch )
                     {
                        hb_arrayNew( pMatch, 3 );
                        if( iEO != ( int ) HB_REGMATCH_UNSET )
                        {
                           /* matched string */
                           hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
                           /* begin of match */
                           hb_arraySetNS( pMatch, 2, nOffset + iSO + 1 );
                           /* End of match */
                           hb_arraySetNS( pMatch, 3, nOffset + iEO );
                        }
                        else
                        {
                           hb_arraySetCL( pMatch, 1, NULL, 0 );
                           hb_arraySetNS( pMatch, 2, 0 );
                           hb_arraySetNS( pMatch, 3, 0 );
                        }
                     }
                     else
                     {
                        if( iEO != ( int ) HB_REGMATCH_UNSET )
                           /* matched string */
                           hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
                        else
                           hb_itemPutC( pMatch, NULL );
                     }
                  }
                  hb_arrayAddForward( pRetArray, pAtxArray );
                  hb_itemRelease( pAtxArray );
               }
               else /* Here I get only single matches */
               {
                  i = iGetMatch - 1;
                  iSO = HB_REGMATCH_SO( aMatches, i );
                  iEO = HB_REGMATCH_EO( aMatches, i );
                  pMatch = hb_itemNew( NULL );
                  if( ! fOnlyMatch )
                  {
                     hb_arrayNew( pMatch, 3 );
                     if( iEO != ( int ) HB_REGMATCH_UNSET )
                     {
                        /* matched string */
                        hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
                        /* begin of match */
                        hb_arraySetNS( pMatch, 2, nOffset + iSO + 1 );
                        /* End of match */
                        hb_arraySetNS( pMatch, 3, nOffset + iEO );
                     }
                     else
                     {
                        hb_arraySetCL( pMatch, 1, NULL, 0 );
                        hb_arraySetNS( pMatch, 2, 0 );
                        hb_arraySetNS( pMatch, 3, 0 );
                     }
                  }
                  else
                  {
                     if( iEO != ( int ) HB_REGMATCH_UNSET )
                        /* matched string */
                        hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
                     else
                        hb_itemPutC( pMatch, NULL );
                  }
                  hb_arrayAddForward( pRetArray, pMatch );
                  hb_itemRelease( pMatch );
               }

               iEO = HB_REGMATCH_EO( aMatches, 0 );
               if( iEO == ( int ) HB_REGMATCH_UNSET )
                  break;
               nLen -= iEO;
               pszString += iEO;
               nOffset += iEO;
               iCount++;
            }
            while( iEO && nLen && ( iMax == 0 || iCount < iMax ) &&
                   ( iMatches = hb_regexec( pRegEx, pszString, nLen, iMaxMatch, aMatches ) ) > 0 );
            hb_itemReturnRelease( pRetArray );
            fResult = HB_TRUE;
            break;
         }
      }
   }
   else if( iRequest == 3 )
   {
      pRetArray = hb_itemArrayNew( 1 );
      hb_arraySet( pRetArray, 1, pString );
      hb_itemReturnRelease( pRetArray );
      fResult = HB_TRUE;
   }

   hb_regexFree( pRegEx );
   return fResult;
}
예제 #10
0
int regex_compile(tvh_regex_t *regex, const char *re_str, int flags, int subsys)
{
#if ENABLE_PCRE || ENABLE_PCRE2
  regex->is_posix = 0;
  if (flags & TVHREGEX_POSIX) {
    regex->is_posix = 1;
#endif
    int options = REG_EXTENDED;
    if (flags & TVHREGEX_CASELESS)
      options |= REG_ICASE;
    if (!regcomp(&regex->re_posix_code, re_str, options))
      return 0;
    tvherror(subsys, "Unable to compile regex '%s'", re_str);
    return -1;
#if ENABLE_PCRE || ENABLE_PCRE2
  } else {
#if ENABLE_PCRE
    const char *estr;
    int eoff;
    int options = PCRE_UTF8;
    if (flags & TVHREGEX_CASELESS)
      options |= PCRE_CASELESS;
#if PCRE_STUDY_JIT_COMPILE
    regex->re_jit_stack = NULL;
#endif
    regex->re_extra = NULL;
    regex->re_code = pcre_compile(re_str, options, &estr, &eoff, NULL);
    if (regex->re_code == NULL) {
      tvherror(subsys, "Unable to compile PCRE '%s': %s", re_str, estr);
    } else {
      regex->re_extra = pcre_study(regex->re_code,
                                   PCRE_STUDY_JIT_COMPILE, &estr);
      if (regex->re_extra == NULL && estr)
        tvherror(subsys, "Unable to study PCRE '%s': %s", re_str, estr);
      else {
#if PCRE_STUDY_JIT_COMPILE
        regex->re_jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
        if (regex->re_jit_stack)
          pcre_assign_jit_stack(regex->re_extra, NULL, regex->re_jit_stack);
#endif
        return 0;
      }
    }
    return -1;
#elif ENABLE_PCRE2
    PCRE2_UCHAR8 ebuf[128];
    int ecode;
    PCRE2_SIZE eoff;
    size_t jsz;
    uint32_t options;
    assert(regex->re_jit_stack == NULL);
    regex->re_jit_stack = NULL;
    regex->re_match = NULL;
    regex->re_mcontext = pcre2_match_context_create(NULL);
    options = PCRE2_UTF;
    if (flags & TVHREGEX_CASELESS)
      options |= PCRE2_CASELESS;
    regex->re_code = pcre2_compile((PCRE2_SPTR8)re_str, -1, options,
                                   &ecode, &eoff, NULL);
    if (regex->re_code == NULL) {
      (void)pcre2_get_error_message(ecode, ebuf, 120);
      tvherror(subsys, "Unable to compile PCRE2 '%s': %s", re_str, ebuf);
    } else {
      regex->re_match = pcre2_match_data_create(TVHREGEX_MAX_MATCHES, NULL);
      if (re_str[0] && pcre2_jit_compile(regex->re_code, PCRE2_JIT_COMPLETE) >= 0) {
        jsz = 0;
        if (pcre2_pattern_info(regex->re_code, PCRE2_INFO_JITSIZE, &jsz) >= 0 && jsz > 0) {
          regex->re_jit_stack = pcre2_jit_stack_create(32 * 1024, 512 * 1024, NULL);
          if (regex->re_jit_stack)
            pcre2_jit_stack_assign(regex->re_mcontext, NULL, regex->re_jit_stack);
        }
      }
      return 0;
    }
    return -1;
#endif
  }
#endif
}
예제 #11
0
파일: regexp.c 프로젝트: smfreegard/rspamd
/* PCRE 2 version */
gboolean
rspamd_regexp_search (rspamd_regexp_t *re, const gchar *text, gsize len,
		const gchar **start, const gchar **end, gboolean raw,
		GArray *captures)
{
	pcre2_match_data *match_data;
	pcre2_match_context *mcontext;
	PCRE_T *r;
	const gchar *mt;
	gsize remain = 0, *ovec;
	gint rc, match_flags, novec, i;
	gboolean ret = FALSE;

	g_assert (re != NULL);
	g_assert (text != NULL);

	if (len == 0) {
		len = strlen (text);
	}

	if (end != NULL && *end != NULL) {
		/* Incremental search */
		mt = (*end);

		if ((gint)len > (mt - text)) {
			remain = len - (mt - text);
		}
	}
	else {
		mt = text;
		remain = len;
	}

	if (remain == 0) {
		return FALSE;
	}

	match_flags = 0;

	if (raw || re->re == re->raw_re) {
		r = re->raw_re;
		mcontext = re->raw_mcontext;
	}
	else {
		r = re->re;
		mcontext = re->mcontext;
	}

	match_data = pcre2_match_data_create (re->ncaptures + 1, NULL);

#ifdef HAVE_PCRE_JIT
	if (!(re->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT) && can_jit) {
		if (re->re != re->raw_re && !g_utf8_validate (mt, remain, NULL)) {
			msg_err ("bad utf8 input for JIT re");
			return FALSE;
		}

		rc = pcre2_jit_match (r,  mt, remain, 0, match_flags, match_data,
				mcontext);
	}
	else {
		rc = pcre2_match (r,  mt, remain, 0, match_flags, match_data,
				mcontext);
	}
#else
	rc = pcre2_match (r,  mt, remain, 0, match_flags, match_data,
					mcontext);
#endif

	if (rc >= 0) {
		novec = pcre2_get_ovector_count (match_data);
		ovec = pcre2_get_ovector_pointer (match_data);

		if (start) {
			*start = mt + ovec[0];
		}
		if (end) {
			*end = mt + ovec[1];
		}

		if (captures != NULL && novec > 1) {
			struct rspamd_re_capture *elt;

			g_assert (g_array_get_element_size (captures) ==
					sizeof (struct rspamd_re_capture));
			g_array_set_size (captures, novec);

			for (i = 0; i < novec; i ++) {
				elt = &g_array_index (captures, struct rspamd_re_capture, i);
				elt->p = mt + ovec[i * 2];
				elt->len = (mt + ovec[i * 2 + 1]) - elt->p;

			}
		}

		ret = TRUE;

		if (re->flags & RSPAMD_REGEXP_FLAG_FULL_MATCH) {
			/* We also ensure that the match is full */
			if (ovec[0] != 0 || (guint)ovec[1] < len) {
				ret = FALSE;
			}
		}
	}