String xml_utf8_decode(const XML_Char *s, int len, const XML_Char *encoding) { String str = String(len, ReserveString); char *newbuf = str.mutableData(); char (*decoder)(unsigned short) = nullptr; xml_encoding *enc = xml_get_encoding(encoding); int newlen = 0; if (enc) { decoder = enc->decoding_function; } if (decoder == nullptr) { /* If the target encoding was unknown, or no decoder function * was specified, return the UTF-8-encoded data as-is. */ memcpy(newbuf, s, len); str.setSize(len); return str; } UTF8To16Decoder dec(s, len, true); for (int b = dec.decode(); b != UTF8_END; b = dec.decode()) { newbuf[newlen] = decoder(b); ++newlen; } assert(newlen <= len); str.shrink(newlen); return str; }
bool f_xml_parser_set_option(CObjRef parser, int option, CVarRef value) { XmlParser * p = parser.getTyped<XmlParser>(); switch (option) { case PHP_XML_OPTION_CASE_FOLDING: p->case_folding = value.toInt64(); break; case PHP_XML_OPTION_SKIP_TAGSTART: p->toffset = value.toInt64(); break; case PHP_XML_OPTION_SKIP_WHITE: p->skipwhite = value.toInt64(); break; case PHP_XML_OPTION_TARGET_ENCODING: { xml_encoding *enc; enc = xml_get_encoding((const XML_Char*)value.toString().data()); if (enc == NULL) { raise_warning("Unsupported target encoding \"%s\"", value.toString().data()); return false; } p->target_encoding = enc->name; break; } default: raise_warning("Unknown option"); return false; } return true; }
char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) { int pos = len; char *newbuf = (char*)malloc(len+1); unsigned short c; char (*decoder)(unsigned short) = NULL; xml_encoding *enc = xml_get_encoding(encoding); *newlen = 0; if (enc) { decoder = enc->decoding_function; } if (decoder == NULL) { /* If the target encoding was unknown, or no decoder function * was specified, return the UTF-8-encoded data as-is. */ memcpy(newbuf, s, len); *newlen = len; newbuf[*newlen] = '\0'; return newbuf; } while (pos > 0) { c = (unsigned char)(*s); if (c >= 0xf0) { /* four bytes encoded, 21 bits */ if(pos-4 >= 0) { c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); } else { c = '?'; } s += 4; pos -= 4; } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */ if(pos-3 >= 0) { c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); } else { c = '?'; } s += 3; pos -= 3; } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */ if(pos-2 >= 0) { c = ((s[0]&63)<<6) | (s[1]&63); } else { c = '?'; } s += 2; pos -= 2; } else { s++; pos--; } newbuf[*newlen] = decoder ? decoder(c) : c; ++*newlen; } if (*newlen < len) { newbuf = (char*)realloc(newbuf, *newlen + 1); } newbuf[*newlen] = '\0'; return newbuf; }
/* {{{ xml_utf8_encode() */ PHPAPI zend_string *xml_utf8_encode(const char *s, size_t len, const XML_Char *encoding) { size_t pos = len; zend_string *str; unsigned int c; unsigned short (*encoder)(unsigned char) = NULL; xml_encoding *enc = xml_get_encoding(encoding); if (enc) { encoder = enc->encoding_function; } else { /* If the target encoding was unknown, fail */ return NULL; } if (encoder == NULL) { /* If no encoder function was specified, return the data as-is. */ str = zend_string_init(s, len, 0); return str; } /* This is the theoretical max (will never get beyond len * 2 as long * as we are converting from single-byte characters, though) */ str = zend_string_alloc(len * 4, 0); str->len = 0; while (pos > 0) { c = encoder ? encoder((unsigned char)(*s)) : (unsigned short)(*s); if (c < 0x80) { str->val[str->len++] = (char) c; } else if (c < 0x800) { str->val[str->len++] = (0xc0 | (c >> 6)); str->val[str->len++] = (0x80 | (c & 0x3f)); } else if (c < 0x10000) {
String xml_utf8_decode(const XML_Char *s, int len, const XML_Char *encoding) { int pos = len; String str = String(len, ReserveString); char *newbuf = str.bufferSlice().ptr; unsigned short c; char (*decoder)(unsigned short) = NULL; xml_encoding *enc = xml_get_encoding(encoding); int newlen = 0; if (enc) { decoder = enc->decoding_function; } if (decoder == NULL) { /* If the target encoding was unknown, or no decoder function * was specified, return the UTF-8-encoded data as-is. */ memcpy(newbuf, s, len); str.setSize(len); return str; } while (pos > 0) { c = (unsigned char)(*s); if (c >= 0xf0) { /* four bytes encoded, 21 bits */ if (pos-4 >= 0) { c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); } else { c = '?'; } s += 4; pos -= 4; } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */ if (pos-3 >= 0) { c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); } else { c = '?'; } s += 3; pos -= 3; } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */ if (pos-2 >= 0) { c = ((s[0]&63)<<6) | (s[1]&63); } else { c = '?'; } s += 2; pos -= 2; } else { s++; pos--; } newbuf[newlen] = decoder ? decoder(c) : c; ++newlen; } assert(newlen <= len); str.shrink(newlen); return str; }