Esempio n. 1
0
int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware,
                      const char** colon)
{
  int result = MOZ_EXPAT_VALID_QNAME;
  int nmstrt = 1;
  *colon = 0;
  if (ptr == end) {
    return MOZ_EXPAT_EMPTY_QNAME;
  }
  do {
    switch (BYTE_TYPE(ptr)) {
    case BT_COLON:
       /* We're namespace-aware and either first or last character is a colon
          or we've already seen a colon. */
      if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) {
        return MOZ_EXPAT_MALFORMED;
      }
      *colon = ptr;
      nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */
      break;
    case BT_NONASCII:
      if (nmstrt && !IS_NMSTRT_CHAR_MINBPC(ptr)) {
        /* If this is a valid name character and we're namespace-aware, the
           QName is malformed.  Otherwise, this character's invalid at the
           start of a name (or, if we're namespace-aware, at the start of a
           localpart). */
        return (IS_NAME_CHAR_MINBPC(ptr) && ns_aware) ?
               MOZ_EXPAT_MALFORMED :
               MOZ_EXPAT_INVALID_CHARACTER;
      }
      if (!IS_NAME_CHAR_MINBPC(ptr)) {
        return MOZ_EXPAT_INVALID_CHARACTER;
      }
      nmstrt = 0;
      break;
    case BT_NMSTRT:
    case BT_HEX:
      nmstrt = 0;
      break;
    case BT_DIGIT:
    case BT_NAME:
    case BT_MINUS:
      if (nmstrt) {
        return MOZ_EXPAT_INVALID_CHARACTER;
      }
      break;
    default:
      return MOZ_EXPAT_INVALID_CHARACTER;
    }
    ptr += 2;
  } while (ptr != end);
  return result;
}
Esempio n. 2
0
int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware,
                      const char** colon)
{
  int result = MOZ_EXPAT_VALID_QNAME;
  int nmstrt = 1;
  *colon = 0;
  if (ptr == end) {
    return MOZ_EXPAT_EMPTY_QNAME;
  }
  do {
    switch (BYTE_TYPE(ptr)) {
    case BT_COLON:
       /* We're namespace-aware and either first or last character is a colon
          or we've already seen a colon. */
      if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) {
        return MOZ_EXPAT_MALFORMED;
      }
      *colon = ptr;
      nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */
      break;
    case BT_NONASCII:
      if (!IS_NAME_CHAR_MINBPC(ptr) ||
          (nmstrt && !*colon && !IS_NMSTRT_CHAR_MINBPC(ptr))) {
        return MOZ_EXPAT_INVALID_CHARACTER;
      }
      if (nmstrt && *colon && !IS_NMSTRT_CHAR_MINBPC(ptr)) {
        /* If a non-starting character like a number is right after the colon,
           this is a namespace error, not invalid character */
        return MOZ_EXPAT_MALFORMED;
      }
      nmstrt = 0;
      break;
    case BT_NMSTRT:
    case BT_HEX:
      nmstrt = 0;
      break;
    case BT_DIGIT:
    case BT_NAME:
    case BT_MINUS:
      if (nmstrt) {
        return MOZ_EXPAT_INVALID_CHARACTER;
      }
      break;
    default:
      return MOZ_EXPAT_INVALID_CHARACTER;
    }
    ptr += 2;
  } while (ptr != end);
  return result;
}
Esempio n. 3
0
int MOZ_XMLIsNCNameChar(const char* ptr)
{
  switch (BYTE_TYPE(ptr)) {
  case BT_NONASCII:
    if (!IS_NAME_CHAR_MINBPC(ptr)) {
      return 0;
    }
  case BT_NMSTRT:
  case BT_HEX:
  case BT_DIGIT:
  case BT_NAME:
  case BT_MINUS:
    return 1;
  default:
    return 0;
  }
}
Esempio n. 4
0
static int PTRCALL
PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
                  const char **nextTokPtr)
{
  int tok;
  if (ptr == end)
    return XML_TOK_NONE;
  if (MINBPC(enc) > 1) {
    size_t n = end - ptr;
    if (n & (MINBPC(enc) - 1)) {
      n &= ~(MINBPC(enc) - 1);
      if (n == 0)
        return XML_TOK_PARTIAL;
      end = ptr + n;
    }
  }
  switch (BYTE_TYPE(enc, ptr)) {
  case BT_QUOT:
    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_APOS:
    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_LT:
    {
      ptr += MINBPC(enc);
      if (ptr == end)
        return XML_TOK_PARTIAL;
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_EXCL:
        return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
      case BT_QUEST:
        return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
      case BT_NMSTRT:
      case BT_HEX:
      case BT_NONASCII:
      case BT_LEAD2:
      case BT_LEAD3:
      case BT_LEAD4:
        *nextTokPtr = ptr - MINBPC(enc);
        return XML_TOK_INSTANCE_START;
      }
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  case BT_CR:
    if (ptr + MINBPC(enc) == end) {
      *nextTokPtr = end;
      /* indicate that this might be part of a CR/LF pair */
      return -XML_TOK_PROLOG_S;
    }
    /* fall through */
  case BT_S: case BT_LF:
    for (;;) {
      ptr += MINBPC(enc);
      if (ptr == end)
        break;
      switch (BYTE_TYPE(enc, ptr)) {
      case BT_S: case BT_LF:
        break;
      case BT_CR:
        /* don't split CR/LF pair */
        if (ptr + MINBPC(enc) != end)
          break;
        /* fall through */
      default:
        *nextTokPtr = ptr;
        return XML_TOK_PROLOG_S;
      }
    }
    *nextTokPtr = ptr;
    return XML_TOK_PROLOG_S;
  case BT_PERCNT:
    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
  case BT_COMMA:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_COMMA;
  case BT_LSQB:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_OPEN_BRACKET;
  case BT_RSQB:
    ptr += MINBPC(enc);
    if (ptr == end)
      return -XML_TOK_CLOSE_BRACKET;
    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
      if (ptr + MINBPC(enc) == end)
        return XML_TOK_PARTIAL;
      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
        *nextTokPtr = ptr + 2*MINBPC(enc);
        return XML_TOK_COND_SECT_CLOSE;
      }
    }
    *nextTokPtr = ptr;
    return XML_TOK_CLOSE_BRACKET;
  case BT_LPAR:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_OPEN_PAREN;
  case BT_RPAR:
    ptr += MINBPC(enc);
    if (ptr == end)
      return -XML_TOK_CLOSE_PAREN;
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_AST:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_CLOSE_PAREN_ASTERISK;
    case BT_QUEST:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_CLOSE_PAREN_QUESTION;
    case BT_PLUS:
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_CLOSE_PAREN_PLUS;
    case BT_CR: case BT_LF: case BT_S:
    case BT_GT: case BT_COMMA: case BT_VERBAR:
    case BT_RPAR:
      *nextTokPtr = ptr;
      return XML_TOK_CLOSE_PAREN;
    }
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  case BT_VERBAR:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_OR;
  case BT_GT:
    *nextTokPtr = ptr + MINBPC(enc);
    return XML_TOK_DECL_CLOSE;
  case BT_NUM:
    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
#define LEAD_CASE(n) \
  case BT_LEAD ## n: \
    if (end - ptr < n) \
      return XML_TOK_PARTIAL_CHAR; \
    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
      ptr += n; \
      tok = XML_TOK_NAME; \
      break; \
    } \
    if (IS_NAME_CHAR(enc, ptr, n)) { \
      ptr += n; \
      tok = XML_TOK_NMTOKEN; \
      break; \
    } \
    *nextTokPtr = ptr; \
    return XML_TOK_INVALID;
    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
  case BT_NMSTRT:
  case BT_HEX:
    tok = XML_TOK_NAME;
    ptr += MINBPC(enc);
    break;
  case BT_DIGIT:
  case BT_NAME:
  case BT_MINUS:
#ifdef XML_NS
  case BT_COLON:
#endif
    tok = XML_TOK_NMTOKEN;
    ptr += MINBPC(enc);
    break;
  case BT_NONASCII:
    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
      ptr += MINBPC(enc);
      tok = XML_TOK_NAME;
      break;
    }
    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
      ptr += MINBPC(enc);
      tok = XML_TOK_NMTOKEN;
      break;
    }
    /* fall through */
  default:
    *nextTokPtr = ptr;
    return XML_TOK_INVALID;
  }
  while (ptr != end) {
    switch (BYTE_TYPE(enc, ptr)) {
    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
    case BT_GT: case BT_RPAR: case BT_COMMA:
    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
    case BT_S: case BT_CR: case BT_LF:
      *nextTokPtr = ptr;
      return tok;
#ifdef XML_NS
    case BT_COLON:
      ptr += MINBPC(enc);
      switch (tok) {
      case XML_TOK_NAME:
        if (ptr == end)
          return XML_TOK_PARTIAL;
        tok = XML_TOK_PREFIXED_NAME;
        switch (BYTE_TYPE(enc, ptr)) {
        CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
        default:
          tok = XML_TOK_NMTOKEN;
          break;
        }
        break;
      case XML_TOK_PREFIXED_NAME:
        tok = XML_TOK_NMTOKEN;
        break;
      }
      break;
#endif
    case BT_PLUS:
      if (tok == XML_TOK_NMTOKEN)  {
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_NAME_PLUS;
    case BT_AST:
      if (tok == XML_TOK_NMTOKEN)  {
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_NAME_ASTERISK;
    case BT_QUEST:
      if (tok == XML_TOK_NMTOKEN)  {
        *nextTokPtr = ptr;
        return XML_TOK_INVALID;
      }
      *nextTokPtr = ptr + MINBPC(enc);
      return XML_TOK_NAME_QUESTION;
    default:
      *nextTokPtr = ptr;
      return XML_TOK_INVALID;
    }
  }
  return -tok;
}
Esempio n. 5
0
int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware,
                      const char** colon)
{
  int result = MOZ_EXPAT_VALID_QNAME;
  int nmstrt = 1;
  *colon = 0;
  if (ptr == end) {
    return MOZ_EXPAT_EMPTY_QNAME;
  }
  do {
    switch (BYTE_TYPE(ptr)) {
    case BT_COLON:
      if (ns_aware) {
        if (*colon != 0 || nmstrt || ptr + 2 == end) {
          /* We already encountered a colon or this is the first or the last
             character so the QName is malformed. */
          result |= MOZ_EXPAT_MALFORMED;
        }
        *colon = ptr;
        nmstrt = 1;
      }
      else if (nmstrt) {
        /* This is the first character so the QName is malformed. */
        result |= MOZ_EXPAT_MALFORMED;
        nmstrt = 0;
      }
      break;
    case BT_NONASCII:
      if (nmstrt) {
        if (!IS_NMSTRT_CHAR_MINBPC(ptr)) {
          /* If this is a valid name character the QName is malformed, 
             otherwise it contains an invalid character. */
          result |= IS_NAME_CHAR_MINBPC(ptr) ? MOZ_EXPAT_MALFORMED :
                                               MOZ_EXPAT_INVALID_CHARACTER;
        }
      }
      else if (!IS_NAME_CHAR_MINBPC(ptr)) {
        result |= MOZ_EXPAT_INVALID_CHARACTER;
      }
      nmstrt = 0;
      break;
    case BT_NMSTRT:
    case BT_HEX:
      nmstrt = 0;
      break;
    case BT_DIGIT:
    case BT_NAME:
    case BT_MINUS:
      if (nmstrt) {
        result |= MOZ_EXPAT_MALFORMED;
        nmstrt = 0;
      }
      break;
    default:
      result |= MOZ_EXPAT_INVALID_CHARACTER;
      nmstrt = 0;
    }
    ptr += 2;
  } while (ptr != end);
  return result;
}