int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware, const char** colon) { int result = MOZ_EXPAT_VALID_QNAME; int nmstrt = 1; *colon = 0; if (ptr == end) { return MOZ_EXPAT_EMPTY_QNAME; } do { switch (BYTE_TYPE(ptr)) { case BT_COLON: /* We're namespace-aware and either first or last character is a colon or we've already seen a colon. */ if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) { return MOZ_EXPAT_MALFORMED; } *colon = ptr; nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */ break; case BT_NONASCII: if (nmstrt && !IS_NMSTRT_CHAR_MINBPC(ptr)) { /* If this is a valid name character and we're namespace-aware, the QName is malformed. Otherwise, this character's invalid at the start of a name (or, if we're namespace-aware, at the start of a localpart). */ return (IS_NAME_CHAR_MINBPC(ptr) && ns_aware) ? MOZ_EXPAT_MALFORMED : MOZ_EXPAT_INVALID_CHARACTER; } if (!IS_NAME_CHAR_MINBPC(ptr)) { return MOZ_EXPAT_INVALID_CHARACTER; } nmstrt = 0; break; case BT_NMSTRT: case BT_HEX: nmstrt = 0; break; case BT_DIGIT: case BT_NAME: case BT_MINUS: if (nmstrt) { return MOZ_EXPAT_INVALID_CHARACTER; } break; default: return MOZ_EXPAT_INVALID_CHARACTER; } ptr += 2; } while (ptr != end); return result; }
int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware, const char** colon) { int result = MOZ_EXPAT_VALID_QNAME; int nmstrt = 1; *colon = 0; if (ptr == end) { return MOZ_EXPAT_EMPTY_QNAME; } do { switch (BYTE_TYPE(ptr)) { case BT_COLON: /* We're namespace-aware and either first or last character is a colon or we've already seen a colon. */ if (ns_aware && (nmstrt || *colon || ptr + 2 == end)) { return MOZ_EXPAT_MALFORMED; } *colon = ptr; nmstrt = ns_aware; /* e.g. "a:0" should be valid if !ns_aware */ break; case BT_NONASCII: if (!IS_NAME_CHAR_MINBPC(ptr) || (nmstrt && !*colon && !IS_NMSTRT_CHAR_MINBPC(ptr))) { return MOZ_EXPAT_INVALID_CHARACTER; } if (nmstrt && *colon && !IS_NMSTRT_CHAR_MINBPC(ptr)) { /* If a non-starting character like a number is right after the colon, this is a namespace error, not invalid character */ return MOZ_EXPAT_MALFORMED; } nmstrt = 0; break; case BT_NMSTRT: case BT_HEX: nmstrt = 0; break; case BT_DIGIT: case BT_NAME: case BT_MINUS: if (nmstrt) { return MOZ_EXPAT_INVALID_CHARACTER; } break; default: return MOZ_EXPAT_INVALID_CHARACTER; } ptr += 2; } while (ptr != end); return result; }
int MOZ_XMLIsNCNameChar(const char* ptr) { switch (BYTE_TYPE(ptr)) { case BT_NONASCII: if (!IS_NAME_CHAR_MINBPC(ptr)) { return 0; } case BT_NMSTRT: case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 1; default: return 0; } }
static int PTRCALL PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; if (ptr == end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_QUOT: return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LT: { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_QUEST: return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_NMSTRT: case BT_HEX: case BT_NONASCII: case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: *nextTokPtr = ptr - MINBPC(enc); return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } case BT_CR: if (ptr + MINBPC(enc) == end) { *nextTokPtr = end; /* indicate that this might be part of a CR/LF pair */ return -XML_TOK_PROLOG_S; } /* fall through */ case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); if (ptr == end) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ if (ptr + MINBPC(enc) != end) break; /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_PROLOG_S; } } *nextTokPtr = ptr; return XML_TOK_PROLOG_S; case BT_PERCNT: return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_COMMA: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_COMMA; case BT_LSQB: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } *nextTokPtr = ptr; return XML_TOK_CLOSE_BRACKET; case BT_LPAR: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_ASTERISK; case BT_QUEST: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_QUESTION; case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; case BT_CR: case BT_LF: case BT_S: case BT_GT: case BT_COMMA: case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; } *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_VERBAR: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OR; case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ break; \ } \ if (IS_NAME_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NMTOKEN; \ break; \ } \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; ptr += MINBPC(enc); break; case BT_DIGIT: case BT_NAME: case BT_MINUS: #ifdef XML_NS case BT_COLON: #endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; case BT_NONASCII: if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC(enc); tok = XML_TOK_NAME; break; } if (IS_NAME_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC(enc); tok = XML_TOK_NMTOKEN; break; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: case BT_VERBAR: case BT_LSQB: case BT_PERCNT: case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return tok; #ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: if (ptr == end) return XML_TOK_PARTIAL; tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; } break; case XML_TOK_PREFIXED_NAME: tok = XML_TOK_NMTOKEN; break; } break; #endif case BT_PLUS: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_QUESTION; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return -tok; }
int MOZ_XMLCheckQName(const char* ptr, const char* end, int ns_aware, const char** colon) { int result = MOZ_EXPAT_VALID_QNAME; int nmstrt = 1; *colon = 0; if (ptr == end) { return MOZ_EXPAT_EMPTY_QNAME; } do { switch (BYTE_TYPE(ptr)) { case BT_COLON: if (ns_aware) { if (*colon != 0 || nmstrt || ptr + 2 == end) { /* We already encountered a colon or this is the first or the last character so the QName is malformed. */ result |= MOZ_EXPAT_MALFORMED; } *colon = ptr; nmstrt = 1; } else if (nmstrt) { /* This is the first character so the QName is malformed. */ result |= MOZ_EXPAT_MALFORMED; nmstrt = 0; } break; case BT_NONASCII: if (nmstrt) { if (!IS_NMSTRT_CHAR_MINBPC(ptr)) { /* If this is a valid name character the QName is malformed, otherwise it contains an invalid character. */ result |= IS_NAME_CHAR_MINBPC(ptr) ? MOZ_EXPAT_MALFORMED : MOZ_EXPAT_INVALID_CHARACTER; } } else if (!IS_NAME_CHAR_MINBPC(ptr)) { result |= MOZ_EXPAT_INVALID_CHARACTER; } nmstrt = 0; break; case BT_NMSTRT: case BT_HEX: nmstrt = 0; break; case BT_DIGIT: case BT_NAME: case BT_MINUS: if (nmstrt) { result |= MOZ_EXPAT_MALFORMED; nmstrt = 0; } break; default: result |= MOZ_EXPAT_INVALID_CHARACTER; nmstrt = 0; } ptr += 2; } while (ptr != end); return result; }