Beispiel #1
0
String xml_utf8_decode(const XML_Char *s, int len,
                       const XML_Char *encoding) {
  String str = String(len, ReserveString);
  char *newbuf = str.mutableData();
  char (*decoder)(unsigned short) = nullptr;
  xml_encoding *enc = xml_get_encoding(encoding);

  int newlen = 0;
  if (enc) {
    decoder = enc->decoding_function;
  }
  if (decoder == nullptr) {
    /* If the target encoding was unknown, or no decoder function
     * was specified, return the UTF-8-encoded data as-is.
     */
    memcpy(newbuf, s, len);
    str.setSize(len);
    return str;
  }

  UTF8To16Decoder dec(s, len, true);
  for (int b = dec.decode(); b != UTF8_END; b = dec.decode()) {
    newbuf[newlen] = decoder(b);
    ++newlen;
  }

  assert(newlen <= len);
  str.shrink(newlen);
  return str;
}
Beispiel #2
0
bool f_xml_parser_set_option(CObjRef parser, int option, CVarRef value) {
  XmlParser * p = parser.getTyped<XmlParser>();
  switch (option) {
  case PHP_XML_OPTION_CASE_FOLDING:
    p->case_folding = value.toInt64();
    break;
  case PHP_XML_OPTION_SKIP_TAGSTART:
    p->toffset = value.toInt64();
    break;
  case PHP_XML_OPTION_SKIP_WHITE:
    p->skipwhite = value.toInt64();
    break;
  case PHP_XML_OPTION_TARGET_ENCODING: {
    xml_encoding *enc;
    enc = xml_get_encoding((const XML_Char*)value.toString().data());
    if (enc == NULL) {
      raise_warning("Unsupported target encoding \"%s\"",
                    value.toString().data());
      return false;
    }
    p->target_encoding = enc->name;
    break;
  }
  default:
    raise_warning("Unknown option");
    return false;
  }
  return true;
}
Beispiel #3
0
char *xml_utf8_decode(const XML_Char *s, int len, int *newlen,
                      const XML_Char *encoding) {
  int pos = len;
  char *newbuf = (char*)malloc(len+1);
  unsigned short c;
  char (*decoder)(unsigned short) = NULL;
  xml_encoding *enc = xml_get_encoding(encoding);

  *newlen = 0;
  if (enc) {
    decoder = enc->decoding_function;
  }
  if (decoder == NULL) {
    /* If the target encoding was unknown, or no decoder function
     * was specified, return the UTF-8-encoded data as-is.
     */
    memcpy(newbuf, s, len);
    *newlen = len;
    newbuf[*newlen] = '\0';
    return newbuf;
  }
  while (pos > 0) {
    c = (unsigned char)(*s);
    if (c >= 0xf0) { /* four bytes encoded, 21 bits */
      if(pos-4 >= 0) {
        c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
      } else {
        c = '?';
      }
      s += 4;
      pos -= 4;
    } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
      if(pos-3 >= 0) {
        c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
      } else {
        c = '?';
      }
      s += 3;
      pos -= 3;
    } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
      if(pos-2 >= 0) {
        c = ((s[0]&63)<<6) | (s[1]&63);
      } else {
        c = '?';
      }
      s += 2;
      pos -= 2;
    } else {
      s++;
      pos--;
    }
    newbuf[*newlen] = decoder ? decoder(c) : c;
    ++*newlen;
  }
  if (*newlen < len) {
    newbuf = (char*)realloc(newbuf, *newlen + 1);
  }
  newbuf[*newlen] = '\0';
  return newbuf;
}
Beispiel #4
0
/* {{{ xml_utf8_encode() */
PHPAPI zend_string *xml_utf8_encode(const char *s, size_t len, const XML_Char *encoding)
{
	size_t pos = len;
	zend_string *str;
	unsigned int c;
	unsigned short (*encoder)(unsigned char) = NULL;
	xml_encoding *enc = xml_get_encoding(encoding);

	if (enc) {
		encoder = enc->encoding_function;
	} else {
		/* If the target encoding was unknown, fail */
		return NULL;
	}
	if (encoder == NULL) {
		/* If no encoder function was specified, return the data as-is.
		 */
		str = zend_string_init(s, len, 0);
		return str;
	}
	/* This is the theoretical max (will never get beyond len * 2 as long
	 * as we are converting from single-byte characters, though) */
	str = zend_string_alloc(len * 4, 0);
	str->len = 0;
	while (pos > 0) {
		c = encoder ? encoder((unsigned char)(*s)) : (unsigned short)(*s);
		if (c < 0x80) {
			str->val[str->len++] = (char) c;
		} else if (c < 0x800) {
			str->val[str->len++] = (0xc0 | (c >> 6));
			str->val[str->len++] = (0x80 | (c & 0x3f));
		} else if (c < 0x10000) {
Beispiel #5
0
String xml_utf8_decode(const XML_Char *s, int len,
                       const XML_Char *encoding) {
  int pos = len;
  String str = String(len, ReserveString);
  char *newbuf = str.bufferSlice().ptr;
  unsigned short c;
  char (*decoder)(unsigned short) = NULL;
  xml_encoding *enc = xml_get_encoding(encoding);

  int newlen = 0;
  if (enc) {
    decoder = enc->decoding_function;
  }
  if (decoder == NULL) {
    /* If the target encoding was unknown, or no decoder function
     * was specified, return the UTF-8-encoded data as-is.
     */
    memcpy(newbuf, s, len);
    str.setSize(len);
    return str;
  }
  while (pos > 0) {
    c = (unsigned char)(*s);
    if (c >= 0xf0) { /* four bytes encoded, 21 bits */
      if (pos-4 >= 0) {
        c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
      } else {
        c = '?';
      }
      s += 4;
      pos -= 4;
    } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
      if (pos-3 >= 0) {
        c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
      } else {
        c = '?';
      }
      s += 3;
      pos -= 3;
    } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
      if (pos-2 >= 0) {
        c = ((s[0]&63)<<6) | (s[1]&63);
      } else {
        c = '?';
      }
      s += 2;
      pos -= 2;
    } else {
      s++;
      pos--;
    }
    newbuf[newlen] = decoder ? decoder(c) : c;
    ++newlen;
  }

  assert(newlen <= len);
  str.shrink(newlen);
  return str;
}