static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf, size_t bytes) { request_rec* r = ctx->f->r; cdn_conf* cfg = ctx->cfg; xmlCharEncoding ret; char* p; ap_regmatch_t match[2]; char* buf = (char*)cbuf; const char *encoding = 0; /* If we've got it in the HTTP headers, there's nothing to do */ if(r->content_type && (p = ap_strcasestr(r->content_type, "charset=")) && p != NULL) { p += 8; if((encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;")))) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: found charset %s in Content-Type", encoding); ret = xmlParseCharEncoding(encoding); if(((ret != XML_CHAR_ENCODING_ERROR) && (ret != XML_CHAR_ENCODING_NONE))) return ret; } } /* to sniff, first we look for BOM */ if(encoding == NULL) { if((ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes)) != XML_CHAR_ENCODING_NONE) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: got charset from XML rules"); return ret; } /* If none of the above, look for a META-thingey */ if(ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0) { p = apr_pstrndup(r->pool, buf + match[0].rm_so, match[0].rm_eo - match[0].rm_so); if(ap_regexec(seek_charset, p, 2, match, 0) == 0) encoding = apr_pstrndup(r->pool, p+match[1].rm_so, match[1].rm_eo - match[1].rm_so); } } /* either it's set to something we found or it's still the default */ if(encoding) { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: got charset %s from HTML META", encoding); ret = xmlParseCharEncoding(encoding); if(ret != XML_CHAR_ENCODING_ERROR && ret != XML_CHAR_ENCODING_NONE) return ret; ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: charset %s not supported", encoding); } /* Use configuration default as a last resort */ ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "sniff_encoding: no suitable charset information"); return (cfg->default_encoding == XML_CHAR_ENCODING_NONE) ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ; }
static HRESULT WINAPI httprequest_get_responseText(IXMLHTTPRequest *iface, BSTR *body) { httprequest *This = impl_from_IXMLHTTPRequest( iface ); HGLOBAL hglobal; HRESULT hr; TRACE("(%p)->(%p)\n", This, body); if (!body) return E_INVALIDARG; if (This->state != READYSTATE_COMPLETE) return E_FAIL; hr = GetHGlobalFromStream(This->bsc->stream, &hglobal); if (hr == S_OK) { xmlChar *ptr = GlobalLock(hglobal); DWORD size = GlobalSize(hglobal); xmlCharEncoding encoding = XML_CHAR_ENCODING_UTF8; /* try to determine data encoding */ if (size >= 4) { encoding = xmlDetectCharEncoding(ptr, 4); TRACE("detected encoding: %s\n", debugstr_a(xmlGetCharEncodingName(encoding))); if ( encoding != XML_CHAR_ENCODING_UTF8 && encoding != XML_CHAR_ENCODING_UTF16LE && encoding != XML_CHAR_ENCODING_NONE ) { FIXME("unsupported encoding: %s\n", debugstr_a(xmlGetCharEncodingName(encoding))); GlobalUnlock(hglobal); return E_FAIL; } } /* without BOM assume UTF-8 */ if (encoding == XML_CHAR_ENCODING_UTF8 || encoding == XML_CHAR_ENCODING_NONE ) { DWORD length = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)ptr, size, NULL, 0); *body = SysAllocStringLen(NULL, length); if (*body) MultiByteToWideChar( CP_UTF8, 0, (LPCSTR)ptr, size, *body, length); } else *body = SysAllocStringByteLen((LPCSTR)ptr, size); if (!*body) hr = E_OUTOFMEMORY; GlobalUnlock(hglobal); } return hr; }
PHPAPI int XML_Parse(XML_Parser parser, const XML_Char *data, int data_len, int is_final) { int error; /* The following is a hack to keep BC with PHP 4 while avoiding the inifite loop in libxml <= 2.6.17 which occurs when no encoding has been defined and none can be detected */ #if LIBXML_VERSION <= 20617 if (parser->parser->charset == XML_CHAR_ENCODING_NONE) { if (data_len >= 4 || (parser->parser->input->buf->buffer->use + data_len >= 4)) { xmlChar start[4]; int char_count; char_count = parser->parser->input->buf->buffer->use; if (char_count > 4) { char_count = 4; } memcpy(start, parser->parser->input->buf->buffer->content, (size_t)char_count); memcpy(start + char_count, data, (size_t)(4 - char_count)); if (xmlDetectCharEncoding(&start[0], 4) == XML_CHAR_ENCODING_NONE) { parser->parser->charset = XML_CHAR_ENCODING_UTF8; } } } #endif error = xmlParseChunk(parser->parser, data, data_len, is_final); if (!error) { return 1; } else if (parser->parser->lastError.level > XML_ERR_WARNING ){ return 0; } else { return 1; } }
static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node) { xmlNodePtr node = NULL; plist_data_t data = NULL; plist_t subnode = NULL; //for string long len = 0; int type = 0; if (!xml_node) return; for (node = xml_node->children; node; node = node->next) { while (node && !xmlStrcmp(node->name, XPLIST_TEXT)) node = node->next; if (!node) break; if (!xmlStrcmp(node->name, BAD_CAST("comment"))) { continue; } data = plist_new_plist_data(); subnode = plist_new_node(data); if (*plist_node) node_attach(*plist_node, subnode); else *plist_node = subnode; if (!xmlStrcmp(node->name, XPLIST_TRUE)) { data->boolval = TRUE; data->type = PLIST_BOOLEAN; data->length = 1; continue; } if (!xmlStrcmp(node->name, XPLIST_FALSE)) { data->boolval = FALSE; data->type = PLIST_BOOLEAN; data->length = 1; continue; } if (!xmlStrcmp(node->name, XPLIST_INT)) { xmlChar *strval = xmlNodeGetContent(node); int is_negative = 0; char *str = (char*)strval; if ((str[0] == '-') || (str[0] == '+')) { if (str[0] == '-') { is_negative = 1; } str++; } char* endp = NULL; data->intval = strtoull((char*)str, &endp, 0); if ((endp != NULL) && (strlen(endp) > 0)) { fprintf(stderr, "%s: integer parse error: string contains invalid characters: '%s'\n", __func__, endp); } if (is_negative || (data->intval <= INT64_MAX)) { int64_t v = data->intval; if (is_negative) { v = -v; } data->intval = (uint64_t)v; data->length = 8; } else { data->length = 16; } data->type = PLIST_UINT; xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_REAL)) { xmlChar *strval = xmlNodeGetContent(node); data->realval = atof((char *) strval); data->type = PLIST_REAL; data->length = 8; xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_DATE)) { xmlChar *strval = xmlNodeGetContent(node); time_t timev = 0; if (strlen((const char*)strval) >= 11) { struct tm btime; struct tm* tm_utc; parse_date((const char*)strval, &btime); timev = mktime(&btime); tm_utc = gmtime(&timev); timev -= (mktime(tm_utc) - timev); } data->timeval.tv_sec = (long)(timev - MAC_EPOCH); data->timeval.tv_usec = 0; data->type = PLIST_DATE; data->length = sizeof(struct timeval); xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_STRING)) { xmlChar *strval = xmlNodeGetContent(node); len = strlen((char *) strval); type = xmlDetectCharEncoding(strval, len); if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) { data->strval = strdup((char *) strval); data->type = PLIST_STRING; data->length = strlen(data->strval); } xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_KEY)) { xmlChar *strval = xmlNodeGetContent(node); len = strlen((char *) strval); type = xmlDetectCharEncoding(strval, len); if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) { data->strval = strdup((char *) strval); data->type = PLIST_KEY; data->length = strlen(data->strval); } xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_DATA)) { xmlChar *strval = xmlNodeGetContent(node); size_t size = 0; unsigned char *dec = base64decode((char*)strval, &size); data->buff = (uint8_t *) malloc(size * sizeof(uint8_t)); memcpy(data->buff, dec, size * sizeof(uint8_t)); free(dec); data->length = size; data->type = PLIST_DATA; xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_ARRAY)) { data->type = PLIST_ARRAY; xml_to_node(node, &subnode); continue; } if (!xmlStrcmp(node->name, XPLIST_DICT)) { data->type = PLIST_DICT; xml_to_node(node, &subnode); if (plist_get_node_type(subnode) == PLIST_DICT) { if (plist_dict_get_size(subnode) == 1) { plist_t uid = plist_dict_get_item(subnode, "CF$UID"); if (uid) { uint64_t val = 0; plist_get_uint_val(uid, &val); plist_dict_remove_item(subnode, "CF$UID"); plist_data_t nodedata = plist_get_data((node_t*)subnode); free(nodedata->buff); nodedata->type = PLIST_UID; nodedata->length = sizeof(uint64_t); nodedata->intval = val; } } } continue; } } }
void html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context, WorkbookView *wb_view, GsfInput *input) { guint8 const *buf; gsf_off_t size; int len, bomlen; htmlParserCtxtPtr ctxt; htmlDocPtr doc = NULL; xmlCharEncoding enc; GnmHtmlTableCtxt tc; g_return_if_fail (input != NULL); if (gsf_input_seek (input, 0, G_SEEK_SET)) return; size = gsf_input_size (input); if (size >= 4) { size -= 4; buf = gsf_input_read (input, 4, NULL); if (buf != NULL) { enc = xmlDetectCharEncoding(buf, 4); switch (enc) { /* Skip byte order mark */ case XML_CHAR_ENCODING_UCS4BE: case XML_CHAR_ENCODING_UCS4LE: case XML_CHAR_ENCODING_UCS4_2143: case XML_CHAR_ENCODING_UCS4_3412: case XML_CHAR_ENCODING_EBCDIC: bomlen = 4; break; case XML_CHAR_ENCODING_UTF16BE: case XML_CHAR_ENCODING_UTF16LE: bomlen = 2; break; case XML_CHAR_ENCODING_UTF8: if (buf[0] == 0xef) bomlen = 3; else if (buf[0] == 0x3c) bomlen = 4; else bomlen = 0; break; case XML_CHAR_ENCODING_NONE: bomlen = 0; /* Try to detect unmarked UTF16LE (Firefox Windows clipboard, drag data all platforms) */ if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) && buf[1] == 0 && (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) && buf[3] == 0) enc = XML_CHAR_ENCODING_UTF16LE; break; default: bomlen = 0; } ctxt = htmlCreatePushParserCtxt ( NULL, NULL, (char const *)(buf + bomlen), 4 - bomlen, gsf_input_name (input), enc); for (; size > 0 ; size -= len) { len = MIN (4096, size); buf = gsf_input_read (input, len, NULL); if (buf == NULL) break; htmlParseChunk ( ctxt, (char const *)buf, len, 0); } htmlParseChunk (ctxt, (char const *)buf, 0, 1); doc = ctxt->myDoc; htmlFreeParserCtxt (ctxt); } } if (doc != NULL) { xmlNodePtr ptr; tc.sheet = NULL; tc.row = -1; tc.wb_view = wb_view; for (ptr = doc->children; ptr != NULL ; ptr = ptr->next) html_search_for_tables (ptr, doc, wb_view, &tc); xmlFreeDoc (doc); } else go_io_error_info_set (io_context, go_error_info_new_str (_("Unable to parse the html."))); }
static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node) { xmlNodePtr node = NULL; plist_data_t data = NULL; plist_t subnode = NULL; //for string long len = 0; int type = 0; if (!xml_node) return; for (node = xml_node->children; node; node = node->next) { while (node && !xmlStrcmp(node->name, XPLIST_TEXT)) node = node->next; if (!node) break; if (!xmlStrcmp(node->name, BAD_CAST("comment"))) { continue; } data = plist_new_plist_data(); subnode = plist_new_node(data); if (*plist_node) node_attach(*plist_node, subnode); else *plist_node = subnode; if (!xmlStrcmp(node->name, XPLIST_TRUE)) { data->boolval = TRUE; data->type = PLIST_BOOLEAN; data->length = 1; continue; } if (!xmlStrcmp(node->name, XPLIST_FALSE)) { data->boolval = FALSE; data->type = PLIST_BOOLEAN; data->length = 1; continue; } if (!xmlStrcmp(node->name, XPLIST_INT)) { xmlChar *strval = xmlNodeGetContent(node); data->intval = strtoull((char*)strval, NULL, 0); data->type = PLIST_UINT; data->length = 8; xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_REAL)) { xmlChar *strval = xmlNodeGetContent(node); data->realval = atof((char *) strval); data->type = PLIST_REAL; data->length = 8; xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_DATE)) { xmlChar *strval = xmlNodeGetContent(node); time_t time = 0; if (strlen((const char*)strval) >= 11) { struct tm btime; parse_date((const char*)strval, &btime); time = mktime(&btime); } data->timeval.tv_sec = (long)time; data->timeval.tv_usec = 0; data->type = PLIST_DATE; data->length = sizeof(struct timeval); xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_STRING)) { xmlChar *strval = xmlNodeGetContent(node); len = strlen((char *) strval); type = xmlDetectCharEncoding(strval, len); if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type) { data->strval = strdup((char *) strval); data->type = PLIST_STRING; data->length = strlen(data->strval); } xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_KEY)) { xmlChar *strval = xmlNodeGetContent(node); data->strval = strdup((char *) strval); data->type = PLIST_KEY; data->length = strlen(data->strval); xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_DATA)) { xmlChar *strval = xmlNodeGetContent(node); size_t size = 0; unsigned char *dec = base64decode((char*)strval, &size); data->buff = (uint8_t *) malloc(size * sizeof(uint8_t)); memcpy(data->buff, dec, size * sizeof(uint8_t)); free(dec); data->length = size; data->type = PLIST_DATA; xmlFree(strval); continue; } if (!xmlStrcmp(node->name, XPLIST_ARRAY)) { data->type = PLIST_ARRAY; xml_to_node(node, &subnode); continue; } if (!xmlStrcmp(node->name, XPLIST_DICT)) { data->type = PLIST_DICT; xml_to_node(node, &subnode); if (plist_get_node_type(subnode) == PLIST_DICT) { if (plist_dict_get_size(subnode) == 1) { plist_t uid = plist_dict_get_item(subnode, "CF$UID"); if (uid) { uint64_t val = 0; plist_get_uint_val(uid, &val); plist_dict_remove_item(subnode, "CF$UID"); plist_data_t nodedata = plist_get_data((node_t*)subnode); free(nodedata->buff); nodedata->type = PLIST_UID; nodedata->length = sizeof(uint64_t); nodedata->intval = val; } } } continue; } } }