Ejemplo n.º 1
0
static int PTRCALL
PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
                             const char *end)
{
  UNUSED(enc);

  switch ((end - ptr)/MINBPC(enc)) {
  case 2:
    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
      switch (BYTE_TO_ASCII(enc, ptr)) {
      case ASCII_l:
        return ASCII_LT;
      case ASCII_g:
        return ASCII_GT;
      }
    }
    break;
  case 3:
    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
      ptr += MINBPC(enc);
      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
        ptr += MINBPC(enc);
        if (CHAR_MATCHES(enc, ptr, ASCII_p))
          return ASCII_AMP;
      }
    }
    break;
  case 4:
    switch (BYTE_TO_ASCII(enc, ptr)) {
    case ASCII_q:
      ptr += MINBPC(enc);
      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
        ptr += MINBPC(enc);
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
          ptr += MINBPC(enc);
          if (CHAR_MATCHES(enc, ptr, ASCII_t))
            return ASCII_QUOT;
        }
      }
      break;
    case ASCII_a:
      ptr += MINBPC(enc);
      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
        ptr += MINBPC(enc);
        if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
          ptr += MINBPC(enc);
          if (CHAR_MATCHES(enc, ptr, ASCII_s))
            return ASCII_APOS;
        }
      }
      break;
    }
  }
  return 0;
}
Ejemplo n.º 2
0
static int PTRCALL
PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
                   const char **badPtr)
{
  ptr += MINBPC(enc);
  end -= MINBPC(enc);
  for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) {
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_DIGIT:
    case BT_HEX:
    case BT_MINUS:
    case BT_APOS:
    case BT_LPAR:
    case BT_RPAR:
    case BT_PLUS:
    case BT_COMMA:
    case BT_SOL:
    case BT_EQUALS:
    case BT_QUEST:
    case BT_CR:
    case BT_LF:
    case BT_SEMI:
    case BT_EXCL:
    case BT_AST:
    case BT_PERCNT:
    case BT_NUM:
#ifdef XML_NS
    case BT_COLON:
#endif
      break;
    case BT_S:
      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
        *badPtr = ptr;
        return 0;
      }
      break;
    case BT_NAME:
    case BT_NMSTRT:
      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
        break;
      /* fall through */
    default:
      switch (BYTE_TO_ASCII(enc, ptr)) {
      case 0x24: /* $ */
      case 0x40: /* @ */
        break;
      default:
        *badPtr = ptr;
        return 0;
      }
      break;
    }
  }
  return 1;
}
Ejemplo n.º 3
0
static
int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
{
	(void)enc;
  switch (end - ptr) {
  case 2 * MINBPC:
    if (CHAR_MATCHES(enc, ptr + MINBPC, 't')) {
      switch (BYTE_TO_ASCII(enc, ptr)) {
      case 'l':
	return '<';
      case 'g':
	return '>';
      }
    }
    break;
  case 3 * MINBPC:
    if (CHAR_MATCHES(enc, ptr, 'a')) {
      ptr += MINBPC;
      if (CHAR_MATCHES(enc, ptr, 'm')) {
	ptr += MINBPC;
	if (CHAR_MATCHES(enc, ptr, 'p'))
	  return '&';
      }
    }
    break;
  case 4 * MINBPC:
    switch (BYTE_TO_ASCII(enc, ptr)) {
    case 'q':
      ptr += MINBPC;
      if (CHAR_MATCHES(enc, ptr, 'u')) {
	ptr += MINBPC;
	if (CHAR_MATCHES(enc, ptr, 'o')) {
	  ptr += MINBPC;
  	  if (CHAR_MATCHES(enc, ptr, 't'))
	    return '"';
	}
      }
      break;
    case 'a':
      ptr += MINBPC;
      if (CHAR_MATCHES(enc, ptr, 'p')) {
	ptr += MINBPC;
	if (CHAR_MATCHES(enc, ptr, 'o')) {
	  ptr += MINBPC;
  	  if (CHAR_MATCHES(enc, ptr, 's'))
	    return '\'';
	}
      }
      break;
    }
  }
  return 0;
}
Ejemplo n.º 4
0
static int PTRFASTCALL
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
{
  int result = 0;
  /* skip &# */
  ptr += 2*MINBPC(enc);
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
    for (ptr += MINBPC(enc);
         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
         ptr += MINBPC(enc)) {
      int c = BYTE_TO_ASCII(enc, ptr);
      switch (c) {
      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
        result <<= 4;
        result |= (c - ASCII_0);
        break;
      case ASCII_A: case ASCII_B: case ASCII_C:
      case ASCII_D: case ASCII_E: case ASCII_F:
        result <<= 4;
        result += 10 + (c - ASCII_A);
        break;
      case ASCII_a: case ASCII_b: case ASCII_c:
      case ASCII_d: case ASCII_e: case ASCII_f:
        result <<= 4;
        result += 10 + (c - ASCII_a);
        break;
      }
      if (result >= 0x110000)
        return -1;
    }
  }
  else {
    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
      int c = BYTE_TO_ASCII(enc, ptr);
      result *= 10;
      result += (c - ASCII_0);
      if (result >= 0x110000)
        return -1;
    }
  }
  
// Some faggots compose mails with ancient control symbols in headers.
// Libexpat tokenizer breaks on them (so far 'Form Feed' and 'DC2' were found).
  if (result == 0x0C) {
    result = 0x0A;
  }
  else if (result == 0x12) {
    result = ' ';
  }

  return checkCharRefNumber(result);
}
Ejemplo n.º 5
0
static
int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
		       const char **badPtr)
{
  ptr += MINBPC;
  end -= MINBPC;
  for (; ptr != end; ptr += MINBPC) {
    switch (BYTE_TYPE(enc, ptr)) {
    case BT_DIGIT:
    case BT_HEX:
    case BT_MINUS:
    case BT_APOS:
    case BT_LPAR:
    case BT_RPAR:
    case BT_PLUS:
    case BT_COMMA:
    case BT_SOL:
    case BT_EQUALS:
    case BT_QUEST:
    case BT_CR:
    case BT_LF:
    case BT_SEMI:
    case BT_EXCL:
    case BT_AST:
    case BT_PERCNT:
    case BT_NUM:
      break;
    case BT_S:
      if (CHAR_MATCHES(enc, ptr, '\t')) {
	*badPtr = ptr;
	return 0;
      }
      break;
    case BT_NAME:
    case BT_NMSTRT:
      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
	break;
    default:
      switch (BYTE_TO_ASCII(enc, ptr)) {
      case 0x24: /* $ */
      case 0x40: /* @ */
	break;
      default:
	*badPtr = ptr;
	return 0;
      }
      break;
    }
  }
  return 1;
}
Ejemplo n.º 6
0
static int PTRFASTCALL
PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
{
  int result = 0;

  UNUSED(enc);

  /* skip &# */
  ptr += 2*MINBPC(enc);
  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
    for (ptr += MINBPC(enc);
         !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
         ptr += MINBPC(enc)) {
      int c = BYTE_TO_ASCII(enc, ptr);
      switch (c) {
      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
        result <<= 4;
        result |= (c - ASCII_0);
        break;
      case ASCII_A: case ASCII_B: case ASCII_C:
      case ASCII_D: case ASCII_E: case ASCII_F:
        result <<= 4;
        result += 10 + (c - ASCII_A);
        break;
      case ASCII_a: case ASCII_b: case ASCII_c:
      case ASCII_d: case ASCII_e: case ASCII_f:
        result <<= 4;
        result += 10 + (c - ASCII_a);
        break;
      }
      if (result >= 0x110000)
        return -1;
    }
  }
  else {
    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
      int c = BYTE_TO_ASCII(enc, ptr);
      result *= 10;
      result += (c - ASCII_0);
      if (result >= 0x110000)
        return -1;
    }
  }
  return checkCharRefNumber(result);
}
Ejemplo n.º 7
0
static int PTRCALL
PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
                      const char *end, int *tokPtr)
{
  int upper = 0;

  UNUSED(enc);

  *tokPtr = XML_TOK_PI;
  if (end - ptr != MINBPC(enc)*3)
    return 1;
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case ASCII_x:
    break;
  case ASCII_X:
    upper = 1;
    break;
  default:
    return 1;
  }
  ptr += MINBPC(enc);
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case ASCII_m:
    break;
  case ASCII_M:
    upper = 1;
    break;
  default:
    return 1;
  }
  ptr += MINBPC(enc);
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case ASCII_l:
    break;
  case ASCII_L:
    upper = 1;
    break;
  default:
    return 1;
  }
  if (upper)
    return 0;
  *tokPtr = XML_TOK_XML_DECL;
  return 1;
}
Ejemplo n.º 8
0
static
int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
{
	(void)enc;

  int upper = 0;
  *tokPtr = XML_TOK_PI;
  if (end - ptr != MINBPC*3)
    return 1;
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case 'x':
    break;
  case 'X':
    upper = 1;
    break;
  default:
    return 1;
  }
  ptr += MINBPC;
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case 'm':
    break;
  case 'M':
    upper = 1;
    break;
  default:
    return 1;
  }
  ptr += MINBPC;
  switch (BYTE_TO_ASCII(enc, ptr)) {
  case 'l':
    break;
  case 'L':
    upper = 1;
    break;
  default:
    return 1;
  }
  if (upper)
    return 0;
  *tokPtr = XML_TOK_XML_DECL;
  return 1;
}
Ejemplo n.º 9
0
static
int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
{
  int result = 0;
	(void)enc;
  /* skip &# */
  ptr += 2*MINBPC;
  if (CHAR_MATCHES(enc, ptr, 'x')) {
    for (ptr += MINBPC; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) {
      int c = BYTE_TO_ASCII(enc, ptr);
      switch (c) {
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
	result <<= 4;
	result |= (c - '0');
	break;
      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
	result <<= 4;
	result += 10 + (c - 'A');
	break;
      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
	result <<= 4;
	result += 10 + (c - 'a');
	break;
      }
      if (result >= 0x110000)
	return -1;
    }
  }
  else {
    for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) {
      int c = BYTE_TO_ASCII(enc, ptr);
      result *= 10;
      result += (c - '0');
      if (result >= 0x110000)
	return -1;
    }
  }
  return checkCharRefNumber(result);
}
Ejemplo n.º 10
0
static int PTRCALL
PREFIX (getAtts) (const ENCODING * enc, const char *ptr, int attsMax, ATTRIBUTE * atts) {
    enum { other, inName, inValue } state = inName;
    int nAtts = 0;
    int open = 0;               /* defined when state == inValue;
                                   initialization just to shut up compilers */

    for (ptr += MINBPC (enc);; ptr += MINBPC (enc)) {
        switch (BYTE_TYPE (enc, ptr)) {

#define START_NAME \
      if (state == other) { \
        if (nAtts < attsMax) { \
          atts[nAtts].name = ptr; \
          atts[nAtts].normalized = 1; \
        } \
        state = inName; \
      }

#define LEAD_CASE(n) \
    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
            LEAD_CASE (2) LEAD_CASE (3) LEAD_CASE (4)
#undef LEAD_CASE
        case BT_NONASCII:
        case BT_NMSTRT:
        case BT_HEX:
            START_NAME break;
#undef START_NAME
        case BT_QUOT:
            if (state != inValue) {
                if (nAtts < attsMax)
                    atts[nAtts].valuePtr = ptr + MINBPC (enc);
                state = inValue;
                open = BT_QUOT;
            } else if (open == BT_QUOT) {
                state = other;
                if (nAtts < attsMax)
                    atts[nAtts].valueEnd = ptr;
                nAtts++;
            }
            break;
        case BT_APOS:
            if (state != inValue) {
                if (nAtts < attsMax)
                    atts[nAtts].valuePtr = ptr + MINBPC (enc);
                state = inValue;
                open = BT_APOS;
            } else if (open == BT_APOS) {
                state = other;
                if (nAtts < attsMax)
                    atts[nAtts].valueEnd = ptr;
                nAtts++;
            }
            break;
        case BT_AMP:
            if (nAtts < attsMax)
                atts[nAtts].normalized = 0;
            break;
        case BT_S:
            if (state == inName)
                state = other;
            else if (state == inValue
                     && nAtts < attsMax
                     && atts[nAtts].normalized
                     && (ptr == atts[nAtts].valuePtr
                         || BYTE_TO_ASCII (enc, ptr) != ASCII_SPACE
                         || BYTE_TO_ASCII (enc, ptr + MINBPC (enc)) == ASCII_SPACE
                         || BYTE_TYPE (enc, ptr + MINBPC (enc)) == open))
                atts[nAtts].normalized = 0;
            break;
        case BT_CR:
        case BT_LF:
            /* This case ensures that the first attribute name is counted
               Apart from that we could just change state on the quote. */
            if (state == inName)
                state = other;
            else if (state == inValue && nAtts < attsMax)
                atts[nAtts].normalized = 0;
            break;
        case BT_GT:
        case BT_SOL:
            if (state != inValue)
                return nAtts;
            break;
        default:
            break;
        }
    }
    /* not reached */
}