Пример #1
0
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
  PCRE2_SIZE **lengthsptr)
{
int i, count, count2;
PCRE2_SIZE size;
PCRE2_SIZE *lensp;
pcre2_memctl *memp;
PCRE2_UCHAR **listp;
PCRE2_UCHAR *sp;
PCRE2_SIZE *ovector;

if ((count = match_data->rc) < 0) return count;   /* Match failed */
if (count == 0) count = match_data->oveccount;    /* Ovector too small */

count2 = 2*count;
ovector = match_data->ovector;
size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *);      /* For final NULL */
if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count;  /* For lengths */

for (i = 0; i < count2; i += 2)
  {
  size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
  if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
  }

memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;

*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));

if (lengthsptr == NULL)
  {
  sp = (PCRE2_UCHAR *)lensp;
  lensp = NULL;
  }
else
  {
  *lengthsptr = lensp;
  sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
  }

for (i = 0; i < count2; i += 2)
  {
  size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
  memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
  *listp++ = sp;
  if (lensp != NULL) *lensp++ = size;
  sp += size;
  *sp++ = 0;
  }

*listp = NULL;
return 0;
}
Пример #2
0
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
  uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
  CU2BYTES(size));
buffer[size] = 0;
*sizeptr = size;
return 0;
}
Пример #3
0
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
  uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
int rc;
PCRE2_SIZE size;
PCRE2_UCHAR *yield;
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
if (rc < 0) return rc;
yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
  (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
  CU2BYTES(size));
yield[size] = 0;
*stringptr = yield;
*sizeptr = size;
return 0;
}
Пример #4
0
static int
convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
  BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
  PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
{
char *s;
PCRE2_SPTR posix = pattern;
PCRE2_UCHAR *p = use_buffer;
PCRE2_UCHAR *pp = p;
PCRE2_UCHAR *endp = p + use_length - 1;  /* Allow for trailing zero */
PCRE2_SIZE convlength = 0;

uint32_t bracount = 0;
uint32_t posix_state = POSIX_START_REGEX;
uint32_t lastspecial = 0;
BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
BOOL nextisliteral = FALSE;

(void)utf;       /* Not used when Unicode not supported */
(void)ccontext;  /* Not currently used */

/* Initialize default for error offset as end of input. */

*bufflenptr = plength;
PUTCHARS(STR_STAR_NUL);

/* Now scan the input. */

while (plength > 0)
  {
  uint32_t c, sc;
  int clength = 1;

  /* Add in the length of the last item, then, if in the dummy run, pull the
  pointer back to the start of the (temporary) buffer and then remember the
  start of the next item. */

  convlength += p - pp;
  if (dummyrun) p = use_buffer;
  pp = p;

  /* Pick up the next character */

#ifndef SUPPORT_UNICODE
  c = *posix;
#else
  GETCHARLENTEST(c, posix, clength);
#endif
  posix += clength;
  plength -= clength;

  sc = nextisliteral? 0 : c;
  nextisliteral = FALSE;

  /* Handle a character within a class. */

  if (posix_state >= POSIX_CLASS_NOT_STARTED)
    {
    if (c == CHAR_RIGHT_SQUARE_BRACKET)
      {
      PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
      posix_state = POSIX_NOT_BRACKET;
      }

    /* Not the end of the class */

    else
      {
      switch (posix_state)
        {
        case POSIX_CLASS_STARTED:
        if (c <= 127 && islower(c)) break;  /* Remain in started state */
        posix_state = POSIX_CLASS_NOT_STARTED;
        if (c == CHAR_COLON  && plength > 0 &&
            *posix == CHAR_RIGHT_SQUARE_BRACKET)
          {
          PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
          plength--;
          posix++;
          continue;    /* With next character after :] */
          }
        /* Fall through */

        case POSIX_CLASS_NOT_STARTED:
        if (c == CHAR_LEFT_SQUARE_BRACKET)
          posix_state = POSIX_CLASS_STARTING;
        break;

        case POSIX_CLASS_STARTING:
        if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
        break;
        }

      if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
      if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
      memcpy(p, posix - clength, CU2BYTES(clength));
      p += clength;
      }
    }

  /* Handle a character not within a class. */

  else switch(sc)
    {
    case CHAR_LEFT_SQUARE_BRACKET:
    PUTCHARS(STR_LEFT_SQUARE_BRACKET);

#ifdef NEVER
    /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
    support) but they are not part of POSIX 1003.1. */

    if (plength >= 6)
      {
      if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
          posix[1] == CHAR_COLON &&
          (posix[2] == CHAR_LESS_THAN_SIGN ||
           posix[2] == CHAR_GREATER_THAN_SIGN) &&
          posix[3] == CHAR_COLON &&
          posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
          posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
        {
        if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
        memcpy(p, posix, CU2BYTES(6));
        p += 6;
        posix += 6;
        plength -= 6;
        continue;  /* With next character */
        }
      }
#endif

    /* Handle start of "normal" character classes */

    posix_state = POSIX_CLASS_NOT_STARTED;

    /* Handle ^ and ] as first characters */

    if (plength > 0)
      {
      if (*posix == CHAR_CIRCUMFLEX_ACCENT)
        {
        posix++;
        plength--;
        PUTCHARS(STR_CIRCUMFLEX_ACCENT);
        }
      if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
        {
        posix++;
        plength--;
        PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
        }
      }
    break;

    case CHAR_BACKSLASH:
    if (plength <= 0) return PCRE2_ERROR_END_BACKSLASH;
    if (extended) nextisliteral = TRUE; else
      {
      if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
        {
        if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
        if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
        lastspecial = *p++ = *posix++;
        plength--;
        }
      else nextisliteral = TRUE;
      }
    break;

    case CHAR_RIGHT_PARENTHESIS:
    if (!extended || bracount == 0) goto ESCAPE_LITERAL;
    bracount--;
    goto COPY_SPECIAL;

    case CHAR_LEFT_PARENTHESIS:
    bracount++;
    /* Fall through */

    case CHAR_QUESTION_MARK:
    case CHAR_PLUS:
    case CHAR_LEFT_CURLY_BRACKET:
    case CHAR_RIGHT_CURLY_BRACKET:
    case CHAR_VERTICAL_LINE:
    if (!extended) goto ESCAPE_LITERAL;
    /* Fall through */

    case CHAR_DOT:
    case CHAR_DOLLAR_SIGN:
    posix_state = POSIX_NOT_BRACKET;
    COPY_SPECIAL:
    lastspecial = c;
    if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
    *p++ = c;
    break;

    case CHAR_ASTERISK:
    if (lastspecial != CHAR_ASTERISK)
      {
      if (!extended && (posix_state < POSIX_NOT_BRACKET ||
          lastspecial == CHAR_LEFT_PARENTHESIS))
        goto ESCAPE_LITERAL;
      goto COPY_SPECIAL;
      }
    break;   /* Ignore second and subsequent asterisks */

    case CHAR_CIRCUMFLEX_ACCENT:
    if (extended) goto COPY_SPECIAL;
    if (posix_state == POSIX_START_REGEX ||
        lastspecial == CHAR_LEFT_PARENTHESIS)
      {
      posix_state = POSIX_ANCHORED;
      goto COPY_SPECIAL;
      }
    /* Fall through */

    default:
    if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
      {
      ESCAPE_LITERAL:
      PUTCHARS(STR_BACKSLASH);
      }
    lastspecial = 0xff;  /* Indicates nothing special */
    if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
    memcpy(p, posix - clength, CU2BYTES(clength));
    p += clength;
    posix_state = POSIX_NOT_BRACKET;
    break;
    }
  }

if (posix_state >= POSIX_CLASS_NOT_STARTED)
  return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
convlength += p - pp;        /* Final segment */
*bufflenptr = convlength;
*p++ = 0;
return 0;
}