Example #1
0
static int
convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
  pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
  BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
{
BOOL is_negative = FALSE;
BOOL separator_seen = FALSE;
BOOL has_prev_c;
PCRE2_SPTR pattern = *from;
PCRE2_SPTR char_start = NULL;
uint32_t c, prev_c;
int len, class_index;

(void)utf; /* Avoid compiler warning. */

if (pattern >= pattern_end)
  {
  *from = pattern;
  return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
  }

if (*pattern == CHAR_EXCLAMATION_MARK
    || *pattern == CHAR_CIRCUMFLEX_ACCENT)
  {
  pattern++;

  if (pattern >= pattern_end)
    {
    *from = pattern;
    return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
    }

  is_negative = TRUE;

  out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
  out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
  len = 2;

  if (!no_wildsep)
    {
    if (with_escape)
      {
      out->out_str[len] = CHAR_BACKSLASH;
      len++;
      }
    out->out_str[len] = (uint8_t) separator;
    }

  convert_glob_write_str(out, len + 1);
  }
else
  convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);

has_prev_c = FALSE;
prev_c = 0;

if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
  {
  out->out_str[0] = CHAR_BACKSLASH;
  out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
  convert_glob_write_str(out, 2);
  has_prev_c = TRUE;
  prev_c = CHAR_RIGHT_SQUARE_BRACKET;
  pattern++;
  }

while (pattern < pattern_end)
  {
  char_start = pattern;
  GETCHARINCTEST(c, pattern);

  if (c == CHAR_RIGHT_SQUARE_BRACKET)
    {
    convert_glob_write(out, c);

    if (!is_negative && !no_wildsep && separator_seen)
      {
      out->out_str[0] = CHAR_LEFT_PARENTHESIS;
      out->out_str[1] = CHAR_QUESTION_MARK;
      out->out_str[2] = CHAR_LESS_THAN_SIGN;
      out->out_str[3] = CHAR_EXCLAMATION_MARK;
      convert_glob_write_str(out, 4);

      convert_glob_print_separator(out, separator, with_escape);
      convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
      }

    *from = pattern;
    return 0;
    }

  if (pattern >= pattern_end) break;

  if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
    {
    *from = pattern;
    class_index = convert_glob_parse_class(from, pattern_end, out);

    if (class_index != 0)
      {
      pattern = *from;

      has_prev_c = FALSE;
      prev_c = 0;

      if (!is_negative &&
          convert_glob_char_in_class (class_index, separator))
        separator_seen = TRUE;
      continue;
      }
    }
  else if (c == CHAR_MINUS && has_prev_c &&
           *pattern != CHAR_RIGHT_SQUARE_BRACKET)
    {
    convert_glob_write(out, CHAR_MINUS);

    char_start = pattern;
    GETCHARINCTEST(c, pattern);

    if (pattern >= pattern_end) break;

    if (escape != 0 && c == escape)
      {
      char_start = pattern;
      GETCHARINCTEST(c, pattern);
      }
    else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
      {
      *from = pattern;
      return PCRE2_ERROR_CONVERT_SYNTAX;
      }

    if (prev_c > c)
      {
      *from = pattern;
      return PCRE2_ERROR_CONVERT_SYNTAX;
      }

    if (prev_c < separator && separator < c) separator_seen = TRUE;

    has_prev_c = FALSE;
    prev_c = 0;
    }
  else
    {
    if (escape != 0 && c == escape)
      {
      char_start = pattern;
      GETCHARINCTEST(c, pattern);

      if (pattern >= pattern_end) break;
      }

    has_prev_c = TRUE;
    prev_c = c;
    }

  if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
      c == CHAR_BACKSLASH || c == CHAR_MINUS)
    convert_glob_write(out, CHAR_BACKSLASH);

  if (c == separator) separator_seen = TRUE;

  do convert_glob_write(out, *char_start++); while (char_start < pattern);
  }

*from = pattern;
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
}
Example #2
0
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
  pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
  PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
{
int rc;
int subs;
int forcecase = 0;
int forcecasereset = 0;
uint32_t ovector_count;
uint32_t goptions = 0;
uint32_t suboptions;
BOOL match_data_created = FALSE;
BOOL literal = FALSE;
BOOL overflowed = FALSE;
#ifdef SUPPORT_UNICODE
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
#endif
PCRE2_UCHAR temp[6];
PCRE2_SPTR ptr;
PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;

buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;

/* Partial matching is not valid. */

if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
  return PCRE2_ERROR_BADOPTION;

/* If no match data block is provided, create one. */

if (match_data == NULL)
  {
  pcre2_general_context *gcontext = (mcontext == NULL)?
    (pcre2_general_context *)code :
    (pcre2_general_context *)mcontext;
  match_data = pcre2_match_data_create_from_pattern(code, gcontext);
  if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
  match_data_created = TRUE;
  }
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);

/* Find lengths of zero-terminated strings and the end of the replacement. */

if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
repend = replacement + rlength;

/* Check UTF replacement string if necessary. */

#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
  {
  rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
  if (rc != 0)
    {
    match_data->leftchar = 0;
    goto EXIT;
    }
  }
#endif  /* SUPPORT_UNICODE */

/* Save the substitute options and remove them from the match options. */

suboptions = options & SUBSTITUTE_OPTIONS;
options &= ~SUBSTITUTE_OPTIONS;

/* Copy up to the start offset */

if (start_offset > length)
  {
  match_data->leftchar = 0;
  rc = PCRE2_ERROR_BADOFFSET;
  goto EXIT;
  }
CHECKMEMCPY(subject, start_offset);

/* Loop for global substituting. */

subs = 0;
do
  {
  PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
  uint32_t ptrstackptr = 0;

  rc = pcre2_match(code, subject, length, start_offset, options|goptions,
    match_data, mcontext);

#ifdef SUPPORT_UNICODE
  if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
#endif

  /* Any error other than no match returns the error code. No match when not
  doing the special after-empty-match global rematch, or when at the end of the
  subject, breaks the global loop. Otherwise, advance the starting point by one
  character, copying it to the output, and try again. */

  if (rc < 0)
    {
    PCRE2_SIZE save_start;

    if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
    if (goptions == 0 || start_offset >= length) break;

    /* Advance by one code point. Then, if CRLF is a valid newline sequence and
    we have advanced into the middle of it, advance one more code point. In
    other words, do not start in the middle of CRLF, even if CR and LF on their
    own are valid newlines. */

    save_start = start_offset++;
    if (subject[start_offset-1] == CHAR_CR &&
        code->newline_convention != PCRE2_NEWLINE_CR &&
        code->newline_convention != PCRE2_NEWLINE_LF &&
        start_offset < length &&
        subject[start_offset] == CHAR_LF)
      start_offset++;

    /* Otherwise, in UTF mode, advance past any secondary code points. */

    else if ((code->overall_options & PCRE2_UTF) != 0)
      {
#if PCRE2_CODE_UNIT_WIDTH == 8
      while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
        start_offset++;
#elif PCRE2_CODE_UNIT_WIDTH == 16
      while (start_offset < length &&
            (subject[start_offset] & 0xfc00) == 0xdc00)
        start_offset++;
#endif
      }

    /* Copy what we have advanced past, reset the special global options, and
    continue to the next match. */

    fraglength = start_offset - save_start;
    CHECKMEMCPY(subject + save_start, fraglength);
    goptions = 0;
    continue;
    }

  /* Handle a successful match. Matches that use \K to end before they start
  are not supported. */

  if (ovector[1] < ovector[0])
    {
    rc = PCRE2_ERROR_BADSUBSPATTERN;
    goto EXIT;
    }

  /* Count substitutions with a paranoid check for integer overflow; surely no
  real call to this function would ever hit this! */

  if (subs == INT_MAX)
    {
    rc = PCRE2_ERROR_TOOMANYREPLACE;
    goto EXIT;
    }
  subs++;

  /* Copy the text leading up to the match. */

  if (rc == 0) rc = ovector_count;
  fraglength = ovector[0] - start_offset;
  CHECKMEMCPY(subject + start_offset, fraglength);

  /* Process the replacement string. Literal mode is set by \Q, but only in
  extended mode when backslashes are being interpreted. In extended mode we
  must handle nested substrings that are to be reprocessed. */

  ptr = replacement;
  for (;;)
    {
    uint32_t ch;
    unsigned int chlen;

    /* If at the end of a nested substring, pop the stack. */

    if (ptr >= repend)
      {
      if (ptrstackptr <= 0) break;       /* End of replacement string */
      repend = ptrstack[--ptrstackptr];
      ptr = ptrstack[--ptrstackptr];
      continue;
      }

    /* Handle the next character */

    if (literal)
      {
      if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
        {
        literal = FALSE;
        ptr += 2;
        continue;
        }
      goto LOADLITERAL;
      }

    /* Not in literal mode. */

    if (*ptr == CHAR_DOLLAR_SIGN)
      {
      int group, n;
      uint32_t special = 0;
      BOOL inparens;
      BOOL star;
      PCRE2_SIZE sublength;
      PCRE2_SPTR text1_start = NULL;
      PCRE2_SPTR text1_end = NULL;
      PCRE2_SPTR text2_start = NULL;
      PCRE2_SPTR text2_end = NULL;
      PCRE2_UCHAR next;
      PCRE2_UCHAR name[33];

      if (++ptr >= repend) goto BAD;
      if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;

      group = -1;
      n = 0;
      inparens = FALSE;
      star = FALSE;

      if (next == CHAR_LEFT_CURLY_BRACKET)
        {
        if (++ptr >= repend) goto BAD;
        next = *ptr;
        inparens = TRUE;
        }

      if (next == CHAR_ASTERISK)
        {
        if (++ptr >= repend) goto BAD;
        next = *ptr;
        star = TRUE;
        }

      if (!star && next >= CHAR_0 && next <= CHAR_9)
        {
        group = next - CHAR_0;
        while (++ptr < repend)
          {
          next = *ptr;
          if (next < CHAR_0 || next > CHAR_9) break;
          group = group * 10 + next - CHAR_0;

          /* A check for a number greater than the hightest captured group
          is sufficient here; no need for a separate overflow check. If unknown
          groups are to be treated as unset, just skip over any remaining
          digits and carry on. */

          if (group > code->top_bracket)
            {
            if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
              {
              while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
              break;
              }
            else
              {
              rc = PCRE2_ERROR_NOSUBSTRING;
              goto PTREXIT;
              }
            }
          }
        }
      else
        {
        const uint8_t *ctypes = code->tables + ctypes_offset;
        while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
          {
          name[n++] = next;
          if (n > 32) goto BAD;
          if (++ptr >= repend) break;
          next = *ptr;
          }
        if (n == 0) goto BAD;
        name[n] = 0;
        }

      /* In extended mode we recognize ${name:+set text:unset text} and
      ${name:-default text}. */

      if (inparens)
        {
        if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
             !star && ptr < repend - 2 && next == CHAR_COLON)
          {
          special = *(++ptr);
          if (special != CHAR_PLUS && special != CHAR_MINUS)
            {
            rc = PCRE2_ERROR_BADSUBSTITUTION;
            goto PTREXIT;
            }

          text1_start = ++ptr;
          rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
          if (rc != 0) goto PTREXIT;
          text1_end = ptr;

          if (special == CHAR_PLUS && *ptr == CHAR_COLON)
            {
            text2_start = ++ptr;
            rc = find_text_end(code, &ptr, repend, TRUE);
            if (rc != 0) goto PTREXIT;
            text2_end = ptr;
            }
          }

        else
          {
          if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
            {
            rc = PCRE2_ERROR_REPMISSINGBRACE;
            goto PTREXIT;
            }
          }

        ptr++;
        }

      /* Have found a syntactically correct group number or name, or *name.
      Only *MARK is currently recognized. */

      if (star)
        {
        if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
          {
          PCRE2_SPTR mark = pcre2_get_mark(match_data);
          if (mark != NULL)
            {
            PCRE2_SPTR mark_start = mark;
            while (*mark != 0) mark++;
            fraglength = mark - mark_start;
            CHECKMEMCPY(mark_start, fraglength);
            }
          }
        else goto BAD;
        }

      /* Substitute the contents of a group. We don't use substring_copy
      functions any more, in order to support case forcing. */

      else
        {
        PCRE2_SPTR subptr, subptrend;

        /* Find a number for a named group. In case there are duplicate names,
        search for the first one that is set. If the name is not found when
        PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
        non-existent group. */

        if (group < 0)
          {
          PCRE2_SPTR first, last, entry;
          rc = pcre2_substring_nametable_scan(code, name, &first, &last);
          if (rc == PCRE2_ERROR_NOSUBSTRING &&
              (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
            {
            group = code->top_bracket + 1;
            }
          else
            {
            if (rc < 0) goto PTREXIT;
            for (entry = first; entry <= last; entry += rc)
              {
              uint32_t ng = GET2(entry, 0);
              if (ng < ovector_count)
                {
                if (group < 0) group = ng;          /* First in ovector */
                if (ovector[ng*2] != PCRE2_UNSET)
                  {
                  group = ng;                       /* First that is set */
                  break;
                  }
                }
              }

            /* If group is still negative, it means we did not find a group
            that is in the ovector. Just set the first group. */

            if (group < 0) group = GET2(first, 0);
            }
          }

        /* We now have a group that is identified by number. Find the length of
        the captured string. If a group in a non-special substitution is unset
        when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */

        rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
        if (rc < 0)
          {
          if (rc == PCRE2_ERROR_NOSUBSTRING &&
              (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
            {
            rc = PCRE2_ERROR_UNSET;
            }
          if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
          if (special == 0)                           /* Plain substitution */
            {
            if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
            goto PTREXIT;                             /* Else error */
            }
          }

        /* If special is '+' we have a 'set' and possibly an 'unset' text,
        both of which are reprocessed when used. If special is '-' we have a
        default text for when the group is unset; it must be reprocessed. */

        if (special != 0)
          {
          if (special == CHAR_MINUS)
            {
            if (rc == 0) goto LITERAL_SUBSTITUTE;
            text2_start = text1_start;
            text2_end = text1_end;
            }

          if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
          ptrstack[ptrstackptr++] = ptr;
          ptrstack[ptrstackptr++] = repend;

          if (rc == 0)
            {
            ptr = text1_start;
            repend = text1_end;
            }
          else
            {
            ptr = text2_start;
            repend = text2_end;
            }
          continue;
          }

        /* Otherwise we have a literal substitution of a group's contents. */

        LITERAL_SUBSTITUTE:
        subptr = subject + ovector[group*2];
        subptrend = subject + ovector[group*2 + 1];

        /* Substitute a literal string, possibly forcing alphabetic case. */

        while (subptr < subptrend)
          {
          GETCHARINCTEST(ch, subptr);
          if (forcecase != 0)
            {
#ifdef SUPPORT_UNICODE
            if (utf)
              {
              uint32_t type = UCD_CHARTYPE(ch);
              if (PRIV(ucp_gentype)[type] == ucp_L &&
                  type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
                ch = UCD_OTHERCASE(ch);
              }
            else
#endif
              {
              if (((code->tables + cbits_offset +
                  ((forcecase > 0)? cbit_upper:cbit_lower)
                  )[ch/8] & (1 << (ch%8))) == 0)
                ch = (code->tables + fcc_offset)[ch];
              }
            forcecase = forcecasereset;
            }

#ifdef SUPPORT_UNICODE
          if (utf) chlen = PRIV(ord2utf)(ch, temp); else
#endif
            {
            temp[0] = ch;
            chlen = 1;
            }
          CHECKMEMCPY(temp, chlen);
          }
        }
      }

    /* Handle an escape sequence in extended mode. We can use check_escape()
    to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
    the case-forcing escapes are not supported in pcre2_compile() so must be
    recognized here. */

    else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
              *ptr == CHAR_BACKSLASH)
      {
      int errorcode;

      if (ptr < repend - 1) switch (ptr[1])
        {
        case CHAR_L:
        forcecase = forcecasereset = -1;
        ptr += 2;
        continue;

        case CHAR_l:
        forcecase = -1;
        forcecasereset = 0;
        ptr += 2;
        continue;

        case CHAR_U:
        forcecase = forcecasereset = 1;
        ptr += 2;
        continue;

        case CHAR_u:
        forcecase = 1;
        forcecasereset = 0;
        ptr += 2;
        continue;

        default:
        break;
        }

      ptr++;  /* Point after \ */
      rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
        code->overall_options, FALSE, NULL);
      if (errorcode != 0) goto BADESCAPE;

      switch(rc)
        {
        case ESC_E:
        forcecase = forcecasereset = 0;
        continue;

        case ESC_Q:
        literal = TRUE;
        continue;

        case 0:      /* Data character */
        goto LITERAL;

        default:
        goto BADESCAPE;
        }
      }

    /* Handle a literal code unit */

    else
      {
      LOADLITERAL:
      GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */

      LITERAL:
      if (forcecase != 0)
        {
#ifdef SUPPORT_UNICODE
        if (utf)
          {
          uint32_t type = UCD_CHARTYPE(ch);
          if (PRIV(ucp_gentype)[type] == ucp_L &&
              type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
            ch = UCD_OTHERCASE(ch);
          }
        else
#endif
          {
          if (((code->tables + cbits_offset +
              ((forcecase > 0)? cbit_upper:cbit_lower)
              )[ch/8] & (1 << (ch%8))) == 0)
            ch = (code->tables + fcc_offset)[ch];
          }
        forcecase = forcecasereset;
        }

#ifdef SUPPORT_UNICODE
      if (utf) chlen = PRIV(ord2utf)(ch, temp); else
#endif
        {
        temp[0] = ch;
        chlen = 1;
        }
      CHECKMEMCPY(temp, chlen);
      } /* End handling a literal code unit */
    }   /* End of loop for scanning the replacement. */

  /* The replacement has been copied to the output. Update the start offset to
  point to the rest of the subject string. If we matched an empty string,
  do the magic for global matches. */

  start_offset = ovector[1];
  goptions = (ovector[0] != ovector[1])? 0 :
    PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
  } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */

/* Copy the rest of the subject. */

fraglength = length - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
temp[0] = 0;
CHECKMEMCPY(temp , 1);

/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
and matching has carried on after a full buffer, in order to compute the length
needed. Otherwise, an overflow generates an immediate error return. */

if (overflowed)
  {
  rc = PCRE2_ERROR_NOMEMORY;
  *blength = buff_length + extra_needed;
  }

/* After a successful execution, return the number of substitutions and set the
length of buffer used, excluding the trailing zero. */

else
  {
  rc = subs;
  *blength = buff_offset - 1;
  }

EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
  else match_data->rc = rc;
return rc;

NOROOM:
rc = PCRE2_ERROR_NOMEMORY;
goto EXIT;

BAD:
rc = PCRE2_ERROR_BADREPLACEMENT;
goto PTREXIT;

BADESCAPE:
rc = PCRE2_ERROR_BADREPESCAPE;

PTREXIT:
*blength = (PCRE2_SIZE)(ptr - replacement);
goto EXIT;
}
Example #3
0
static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
  uint32_t *list)
{
PCRE2_UCHAR c = *code;
PCRE2_UCHAR base;
PCRE2_SPTR end;
uint32_t chr;

#ifdef SUPPORT_UNICODE
uint32_t *clist_dest;
const uint32_t *clist_src;
#else
(void)utf;    /* Suppress "unused parameter" compiler warning */
#endif

list[0] = c;
list[1] = FALSE;
code++;

if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
  {
  base = get_repeat_base(c);
  c -= (base - OP_STAR);

  if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
    code += IMM2_SIZE;

  list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
             c != OP_POSPLUS);

  switch(base)
    {
    case OP_STAR:
    list[0] = OP_CHAR;
    break;

    case OP_STARI:
    list[0] = OP_CHARI;
    break;

    case OP_NOTSTAR:
    list[0] = OP_NOT;
    break;

    case OP_NOTSTARI:
    list[0] = OP_NOTI;
    break;

    case OP_TYPESTAR:
    list[0] = *code;
    code++;
    break;
    }
  c = list[0];
  }

switch(c)
  {
  case OP_NOT_DIGIT:
  case OP_DIGIT:
  case OP_NOT_WHITESPACE:
  case OP_WHITESPACE:
  case OP_NOT_WORDCHAR:
  case OP_WORDCHAR:
  case OP_ANY:
  case OP_ALLANY:
  case OP_ANYNL:
  case OP_NOT_HSPACE:
  case OP_HSPACE:
  case OP_NOT_VSPACE:
  case OP_VSPACE:
  case OP_EXTUNI:
  case OP_EODN:
  case OP_EOD:
  case OP_DOLL:
  case OP_DOLLM:
  return code;

  case OP_CHAR:
  case OP_NOT:
  GETCHARINCTEST(chr, code);
  list[2] = chr;
  list[3] = NOTACHAR;
  return code;

  case OP_CHARI:
  case OP_NOTI:
  list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
  GETCHARINCTEST(chr, code);
  list[2] = chr;

#ifdef SUPPORT_UNICODE
  if (chr < 128 || (chr < 256 && !utf))
    list[3] = fcc[chr];
  else
    list[3] = UCD_OTHERCASE(chr);
#elif defined SUPPORT_WIDE_CHARS
  list[3] = (chr < 256) ? fcc[chr] : chr;
#else
  list[3] = fcc[chr];
#endif

  /* The othercase might be the same value. */

  if (chr == list[3])
    list[3] = NOTACHAR;
  else
    list[4] = NOTACHAR;
  return code;

#ifdef SUPPORT_UNICODE
  case OP_PROP:
  case OP_NOTPROP:
  if (code[0] != PT_CLIST)
    {
    list[2] = code[0];
    list[3] = code[1];
    return code + 2;
    }

  /* Convert only if we have enough space. */

  clist_src = PRIV(ucd_caseless_sets) + code[1];
  clist_dest = list + 2;
  code += 2;

  do {
     if (clist_dest >= list + 8)
       {
       /* Early return if there is not enough space. This should never
       happen, since all clists are shorter than 5 character now. */
       list[2] = code[0];
       list[3] = code[1];
       return code;
       }
     *clist_dest++ = *clist_src;
     }
  while(*clist_src++ != NOTACHAR);

  /* All characters are stored. The terminating NOTACHAR is copied from the
  clist itself. */

  list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
  return code;
#endif

  case OP_NCLASS:
  case OP_CLASS:
#ifdef SUPPORT_WIDE_CHARS
  case OP_XCLASS:
  if (c == OP_XCLASS)
    end = code + GET(code, 0) - 1;
  else
#endif
    end = code + 32 / sizeof(PCRE2_UCHAR);

  switch(*end)
    {
    case OP_CRSTAR:
    case OP_CRMINSTAR:
    case OP_CRQUERY:
    case OP_CRMINQUERY:
    case OP_CRPOSSTAR:
    case OP_CRPOSQUERY:
    list[1] = TRUE;
    end++;
    break;

    case OP_CRPLUS:
    case OP_CRMINPLUS:
    case OP_CRPOSPLUS:
    end++;
    break;

    case OP_CRRANGE:
    case OP_CRMINRANGE:
    case OP_CRPOSRANGE:
    list[1] = (GET2(end, 1) == 0);
    end += 1 + 2 * IMM2_SIZE;
    break;
    }
  list[2] = (uint32_t)(end - code);
  return end;
  }
return NULL;    /* Opcode not accepted */
}