static int PTRCALL PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) { UNUSED(enc); switch ((end - ptr)/MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: return ASCII_LT; case ASCII_g: return ASCII_GT; } } break; case 3: if (CHAR_MATCHES(enc, ptr, ASCII_a)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_m)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_p)) return ASCII_AMP; } } break; case 4: switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_q: ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_u)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_t)) return ASCII_QUOT; } } break; case ASCII_a: ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_p)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_s)) return ASCII_APOS; } } break; } } return 0; }
static int PTRCALL PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: case BT_MINUS: case BT_APOS: case BT_LPAR: case BT_RPAR: case BT_PLUS: case BT_COMMA: case BT_SOL: case BT_EQUALS: case BT_QUEST: case BT_CR: case BT_LF: case BT_SEMI: case BT_EXCL: case BT_AST: case BT_PERCNT: case BT_NUM: #ifdef XML_NS case BT_COLON: #endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { *badPtr = ptr; return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; /* fall through */ default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ break; default: *badPtr = ptr; return 0; } break; } } return 1; }
static int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) { (void)enc; switch (end - ptr) { case 2 * MINBPC: if (CHAR_MATCHES(enc, ptr + MINBPC, 't')) { switch (BYTE_TO_ASCII(enc, ptr)) { case 'l': return '<'; case 'g': return '>'; } } break; case 3 * MINBPC: if (CHAR_MATCHES(enc, ptr, 'a')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'm')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'p')) return '&'; } } break; case 4 * MINBPC: switch (BYTE_TO_ASCII(enc, ptr)) { case 'q': ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'u')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'o')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 't')) return '"'; } } break; case 'a': ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'p')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 'o')) { ptr += MINBPC; if (CHAR_MATCHES(enc, ptr, 's')) return '\''; } } break; } } return 0; }
static int PTRFASTCALL PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); if (result >= 0x110000) return -1; } } // Some faggots compose mails with ancient control symbols in headers. // Libexpat tokenizer breaks on them (so far 'Form Feed' and 'DC2' were found). if (result == 0x0C) { result = 0x0A; } else if (result == 0x12) { result = ' '; } return checkCharRefNumber(result); }
static int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr) { ptr += MINBPC; end -= MINBPC; for (; ptr != end; ptr += MINBPC) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: case BT_MINUS: case BT_APOS: case BT_LPAR: case BT_RPAR: case BT_PLUS: case BT_COMMA: case BT_SOL: case BT_EQUALS: case BT_QUEST: case BT_CR: case BT_LF: case BT_SEMI: case BT_EXCL: case BT_AST: case BT_PERCNT: case BT_NUM: break; case BT_S: if (CHAR_MATCHES(enc, ptr, '\t')) { *badPtr = ptr; return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ break; default: *badPtr = ptr; return 0; } break; } } return 1; }
static int PTRFASTCALL PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; UNUSED(enc); /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result); }
static int PTRCALL PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) { int upper = 0; UNUSED(enc); *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: break; case ASCII_X: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_m: break; case ASCII_M: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: break; case ASCII_L: upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1; }
static int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) { (void)enc; int upper = 0; *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case 'x': break; case 'X': upper = 1; break; default: return 1; } ptr += MINBPC; switch (BYTE_TO_ASCII(enc, ptr)) { case 'm': break; case 'M': upper = 1; break; default: return 1; } ptr += MINBPC; switch (BYTE_TO_ASCII(enc, ptr)) { case 'l': break; case 'L': upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1; }
static int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; (void)enc; /* skip &# */ ptr += 2*MINBPC; if (CHAR_MATCHES(enc, ptr, 'x')) { for (ptr += MINBPC; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result <<= 4; result |= (c - '0'); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': result <<= 4; result += 10 + (c - 'A'); break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': result <<= 4; result += 10 + (c - 'a'); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - '0'); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result); }
static int PTRCALL PREFIX (getAtts) (const ENCODING * enc, const char *ptr, int attsMax, ATTRIBUTE * atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; initialization just to shut up compilers */ for (ptr += MINBPC (enc);; ptr += MINBPC (enc)) { switch (BYTE_TYPE (enc, ptr)) { #define START_NAME \ if (state == other) { \ if (nAtts < attsMax) { \ atts[nAtts].name = ptr; \ atts[nAtts].normalized = 1; \ } \ state = inName; \ } #define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; LEAD_CASE (2) LEAD_CASE (3) LEAD_CASE (4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; #undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC (enc); state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_APOS: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC (enc); state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_AMP: if (nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII (enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII (enc, ptr + MINBPC (enc)) == ASCII_SPACE || BYTE_TYPE (enc, ptr + MINBPC (enc)) == open)) atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) return nAtts; break; default: break; } } /* not reached */ }