static int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } switch (BYTE_TYPE(enc, ptr1)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; } }
static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) return -XML_TOK_PERCENT; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_PARAM_ENTITY_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; }
static int PTRFASTCALL PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: ptr += MINBPC(enc); break; default: return (int)(ptr - start); } } }
static int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } } *nextTokPtr = ptr; return XML_TOK_CDATA_SECT_OPEN; }
static int PTRCALL PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_END_TAG; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; #ifdef XML_NS case BT_COLON: /* no need to check qname syntax here, since end-tag must match exactly */ ptr += MINBPC(enc); break; #endif case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_END_TAG; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; }
static int PTRFASTCALL PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; UNUSED(enc); /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result); }
static int PTRCALL PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) { int upper = 0; UNUSED(enc); *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: break; case ASCII_X: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_m: break; case ASCII_M: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: break; case ASCII_L: upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1; }
static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (end1 - ptr1 < MINBPC(enc)) { /* This line cannot be executed. The incoming data has already * been tokenized once, so incomplete characters like this have * already been eliminated from the input. Retaining the * paranoia check is still valuable, however. */ return 0; /* LCOV_EXCL_LINE */ } if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; }
static int PTRCALL PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LSQB: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_COND_SECT_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC(enc); break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_PERCNT: REQUIRE_CHARS(enc, ptr, end, 2); /* don't allow <!ENTITY% foo "whatever"> */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC(enc); break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; }
static int PTRCALL PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ if (end - ptr < 6 * MINBPC(enc)) return XML_TOK_PARTIAL; for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; return XML_TOK_INVALID; } } *nextTokPtr = ptr; return XML_TOK_CDATA_SECT_OPEN; }
static int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) { (void)enc; int upper = 0; *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case 'x': break; case 'X': upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case 'm': break; case 'M': upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case 'l': break; case 'L': upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1; }
static int PTRCALL PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (ptr1 == end1) return 0; if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; }
static int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { (void)enc; int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, 'x')) { for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result <<= 4; result |= (c - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': result <<= 4; result += 10 + (c - 'A'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': result <<= 4; result += 10 + (c - 'a'); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - '0'); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result); }
static int PTRCALL PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; const char *target = ptr; REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } break; default: ptr += MINBPC(enc); break; } } return XML_TOK_PARTIAL; case BT_QUEST: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } ptr += MINBPC(enc); REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; }
static int PTRCALL PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LT: /* this is for inside entity references */ *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_S: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_ATTRIBUTE_VALUE_S; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; }
static const char * PTRFASTCALL PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: case BT_CR: case BT_S: ptr += MINBPC(enc); break; default: return ptr; } } }
static void PTRCALL PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos) { while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n) { \ return; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_LF: pos->columnNumber = (XML_Size)-1; pos->lineNumber++; ptr += MINBPC(enc); break; case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (XML_Size)-1; break; default: ptr += MINBPC(enc); break; } pos->columnNumber++; } }
static int PTRCALL PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); return ((tok == XML_TOK_PERCENT || tok == -XML_TOK_PERCENT) ? XML_TOK_INVALID : tok); } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; }
static int PTRCALL PREFIX (getAtts) (const ENCODING * enc, const char *ptr, int attsMax, ATTRIBUTE * atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; initialization just to shut up compilers */ for (ptr += MINBPC (enc);; ptr += MINBPC (enc)) { switch (BYTE_TYPE (enc, ptr)) { #define START_NAME \ if (state == other) { \ if (nAtts < attsMax) { \ atts[nAtts].name = ptr; \ atts[nAtts].normalized = 1; \ } \ state = inName; \ } #define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; LEAD_CASE (2) LEAD_CASE (3) LEAD_CASE (4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; #undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC (enc); state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_APOS: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC (enc); state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_AMP: if (nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII (enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII (enc, ptr + MINBPC (enc)) == ASCII_SPACE || BYTE_TYPE (enc, ptr + MINBPC (enc)) == open)) atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) return nAtts; break; default: break; } } /* not reached */ }
static int PTRCALL PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; if (ptr == end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_QUOT: return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LT: { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_QUEST: return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_NMSTRT: case BT_HEX: case BT_NONASCII: case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: *nextTokPtr = ptr - MINBPC(enc); return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } case BT_CR: if (ptr + MINBPC(enc) == end) { *nextTokPtr = end; /* indicate that this might be part of a CR/LF pair */ return -XML_TOK_PROLOG_S; } /* fall through */ case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); if (ptr == end) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ if (ptr + MINBPC(enc) != end) break; /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_PROLOG_S; } } *nextTokPtr = ptr; return XML_TOK_PROLOG_S; case BT_PERCNT: return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_COMMA: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_COMMA; case BT_LSQB: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } *nextTokPtr = ptr; return XML_TOK_CLOSE_BRACKET; case BT_LPAR: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_ASTERISK; case BT_QUEST: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_QUESTION; case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; case BT_CR: case BT_LF: case BT_S: case BT_GT: case BT_COMMA: case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; } *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_VERBAR: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OR; case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ break; \ } \ if (IS_NAME_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NMTOKEN; \ break; \ } \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; ptr += MINBPC(enc); break; case BT_DIGIT: case BT_NAME: case BT_MINUS: #ifdef XML_NS case BT_COLON: #endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; case BT_NONASCII: if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC(enc); tok = XML_TOK_NAME; break; } if (IS_NAME_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC(enc); tok = XML_TOK_NMTOKEN; break; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: case BT_VERBAR: case BT_LSQB: case BT_PERCNT: case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return tok; #ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: if (ptr == end) return XML_TOK_PARTIAL; tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; } break; case XML_TOK_PREFIXED_NAME: tok = XML_TOK_NMTOKEN; break; } break; #endif case BT_PLUS: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_QUESTION; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return -tok; }
static int PTRCALL PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_LT: return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_AMP: return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr; return XML_TOK_INVALID; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC(enc) != end) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } if (ptr + 2*MINBPC(enc) != end) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; } *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_INVALID; } } /* fall through */ case BT_AMP: case BT_LT: case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; }
static int PTRCALL PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { #ifdef XML_NS int hadColon; #endif if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_EXCL: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LSQB: return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); } *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_QUEST: return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_SOL: return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); default: *nextTokPtr = ptr; return XML_TOK_INVALID; } #ifdef XML_NS hadColon = 0; #endif /* we have a start-tag */ while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS case BT_COLON: if (hadColon) { *nextTokPtr = ptr; return XML_TOK_INVALID; } hadColon = 1; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break; #endif case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: goto gt; case BT_SOL: goto sol; case BT_S: case BT_CR: case BT_LF: ptr += MINBPC(enc); continue; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); } return XML_TOK_PARTIAL; } case BT_GT: gt: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_START_TAG_NO_ATTS; case BT_SOL: sol: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_EMPTY_ELEMENT_NO_ATTS; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; }
static int PTRCALL PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { #ifdef XML_NS int hadColon = 0; #endif while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS case BT_COLON: if (hadColon) { *nextTokPtr = ptr; return XML_TOK_INVALID; } hadColon = 1; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break; #endif case BT_S: case BT_CR: case BT_LF: for (;;) { int t; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; t = BYTE_TYPE(enc, ptr); if (t == BT_EQUALS) break; switch (t) { case BT_S: case BT_LF: case BT_CR: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } /* fall through */ case BT_EQUALS: { int open; #ifdef XML_NS hadColon = 0; #endif for (;;) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; open = BYTE_TYPE(enc, ptr); if (open == BT_QUOT || open == BT_APOS) break; switch (open) { case BT_S: case BT_LF: case BT_CR: break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } ptr += MINBPC(enc); /* in attribute value */ for (;;) { int t; if (ptr == end) return XML_TOK_PARTIAL; t = BYTE_TYPE(enc, ptr); if (t == open) break; switch (t) { INVALID_CASES(ptr, nextTokPtr) case BT_AMP: { int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); if (tok <= 0) { if (tok == XML_TOK_INVALID) *nextTokPtr = ptr; return tok; } break; } case BT_LT: *nextTokPtr = ptr; return XML_TOK_INVALID; default: ptr += MINBPC(enc); break; } } ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; case BT_SOL: goto sol; case BT_GT: goto gt; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* ptr points to closing quote */ for (;;) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: continue; case BT_GT: gt: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_START_TAG_WITH_ATTS; case BT_SOL: sol: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } break; } break; } default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; }
static int PTRCALL PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; if (ptr >= end) return XML_TOK_NONE; else if (! HAS_CHAR(enc, ptr, end)) { /* This line cannot be executed. The incoming data has already * been tokenized once, so incomplete characters like this have * already been eliminated from the input. Retaining the paranoia * check is still valuable, however. */ return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ } start = ptr; while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) { int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; }
static int PTRCALL PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: case BT_RSQB: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; }
static int PTRCALL PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { for (;;) { switch (BYTE_TYPE(enc, ptr1)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ return 0; LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) #undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) return 0; break; case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 1) { if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 2) { if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 3) { if (*ptr2++ != *ptr1++) return 0; } } } break; default: if (MINBPC(enc) == 1 && *ptr1 == *ptr2) return 1; switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; } } } /* not reached */ }