C++ (Cpp) xmlDetectCharEncoding示例

示例#1

0

显示文件

文件： mod_cdn.c 项目： carriercomm/mod_cdn

static xmlCharEncoding sniff_encoding(saxctxt* ctx, const char* cbuf, size_t bytes)
{
  request_rec* r = ctx->f->r;
  cdn_conf* cfg = ctx->cfg;
  xmlCharEncoding ret;
  char* p;
  ap_regmatch_t match[2];
  char* buf = (char*)cbuf;
  const char *encoding = 0;

  /* If we've got it in the HTTP headers, there's nothing to do */
  if(r->content_type && (p = ap_strcasestr(r->content_type, "charset=")) && p != NULL) {
    p += 8;
    if((encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;")))) {
      ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
                   "sniff_encoding: found charset %s in Content-Type", encoding);
      ret = xmlParseCharEncoding(encoding);
      if(((ret != XML_CHAR_ENCODING_ERROR) && (ret != XML_CHAR_ENCODING_NONE)))
        return ret;
    }
  }

  /* to sniff, first we look for BOM */
  if(encoding == NULL) {

    if((ret = xmlDetectCharEncoding((const xmlChar*)buf, bytes)) != XML_CHAR_ENCODING_NONE) {
      ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
                   "sniff_encoding: got charset from XML rules");
      return ret;
    }

    /* If none of the above, look for a META-thingey */
    if(ap_regexec(seek_meta_ctype, buf, 1, match, 0) == 0) {
      p = apr_pstrndup(r->pool, buf + match[0].rm_so, match[0].rm_eo - match[0].rm_so);
      if(ap_regexec(seek_charset, p, 2, match, 0) == 0)
        encoding = apr_pstrndup(r->pool, p+match[1].rm_so, match[1].rm_eo - match[1].rm_so);
    }

  }

  /* either it's set to something we found or it's still the default */
  if(encoding) {
    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
                 "sniff_encoding: got charset %s from HTML META", encoding);
    ret = xmlParseCharEncoding(encoding);
    if(ret != XML_CHAR_ENCODING_ERROR && ret != XML_CHAR_ENCODING_NONE)
      return ret;

    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
                 "sniff_encoding: charset %s not supported", encoding);
  }

  /* Use configuration default as a last resort */
  ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
               "sniff_encoding: no suitable charset information");
  return (cfg->default_encoding == XML_CHAR_ENCODING_NONE)
    ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ;
}

示例#2

0

显示文件

文件： httprequest.c 项目： YokoZar/wine

static HRESULT WINAPI httprequest_get_responseText(IXMLHTTPRequest *iface, BSTR *body)
{
    httprequest *This = impl_from_IXMLHTTPRequest( iface );
    HGLOBAL hglobal;
    HRESULT hr;

    TRACE("(%p)->(%p)\n", This, body);

    if (!body) return E_INVALIDARG;
    if (This->state != READYSTATE_COMPLETE) return E_FAIL;

    hr = GetHGlobalFromStream(This->bsc->stream, &hglobal);
    if (hr == S_OK)
    {
        xmlChar *ptr = GlobalLock(hglobal);
        DWORD size = GlobalSize(hglobal);
        xmlCharEncoding encoding = XML_CHAR_ENCODING_UTF8;

        /* try to determine data encoding */
        if (size >= 4)
        {
            encoding = xmlDetectCharEncoding(ptr, 4);
            TRACE("detected encoding: %s\n", debugstr_a(xmlGetCharEncodingName(encoding)));
            if ( encoding != XML_CHAR_ENCODING_UTF8 &&
                 encoding != XML_CHAR_ENCODING_UTF16LE &&
                 encoding != XML_CHAR_ENCODING_NONE )
            {
                FIXME("unsupported encoding: %s\n", debugstr_a(xmlGetCharEncodingName(encoding)));
                GlobalUnlock(hglobal);
                return E_FAIL;
            }
        }

        /* without BOM assume UTF-8 */
        if (encoding == XML_CHAR_ENCODING_UTF8 ||
            encoding == XML_CHAR_ENCODING_NONE )
        {
            DWORD length = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)ptr, size, NULL, 0);

            *body = SysAllocStringLen(NULL, length);
            if (*body)
                MultiByteToWideChar( CP_UTF8, 0, (LPCSTR)ptr, size, *body, length);
        }
        else
            *body = SysAllocStringByteLen((LPCSTR)ptr, size);

        if (!*body) hr = E_OUTOFMEMORY;
        GlobalUnlock(hglobal);
    }

    return hr;
}

示例#3

0

显示文件

文件： compat.c 项目： practicalweb/php-src

PHPAPI int
XML_Parse(XML_Parser parser, const XML_Char *data, int data_len, int is_final)
{
	int error;

/* The following is a hack to keep BC with PHP 4 while avoiding 
the inifite loop in libxml <= 2.6.17 which occurs when no encoding 
has been defined and none can be detected */
#if LIBXML_VERSION <= 20617
	if (parser->parser->charset == XML_CHAR_ENCODING_NONE) {
		if (data_len >= 4 || (parser->parser->input->buf->buffer->use + data_len >= 4)) {
			xmlChar start[4];
			int char_count;

			char_count = parser->parser->input->buf->buffer->use;
			if (char_count > 4) {
				char_count = 4;
			}

			memcpy(start, parser->parser->input->buf->buffer->content, (size_t)char_count);
			memcpy(start + char_count, data, (size_t)(4 - char_count));

			if (xmlDetectCharEncoding(&start[0], 4) == XML_CHAR_ENCODING_NONE) {
				parser->parser->charset = XML_CHAR_ENCODING_UTF8;
			}
		}
	}
#endif

	error = xmlParseChunk(parser->parser, data, data_len, is_final);
	if (!error) {
		return 1;
	} else if (parser->parser->lastError.level > XML_ERR_WARNING ){
		return 0;
	} else {
		return 1;
	}
}

示例#4

0

显示文件

文件： xplist.c 项目： EchoLiao/libplist

static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node)
{
    xmlNodePtr node = NULL;
    plist_data_t data = NULL;
    plist_t subnode = NULL;

    //for string
    long len = 0;
    int type = 0;

    if (!xml_node)
        return;

    for (node = xml_node->children; node; node = node->next)
    {

        while (node && !xmlStrcmp(node->name, XPLIST_TEXT))
            node = node->next;
        if (!node)
            break;

        if (!xmlStrcmp(node->name, BAD_CAST("comment"))) {
            continue;
        }

        data = plist_new_plist_data();
        subnode = plist_new_node(data);
        if (*plist_node)
            node_attach(*plist_node, subnode);
        else
            *plist_node = subnode;

        if (!xmlStrcmp(node->name, XPLIST_TRUE))
        {
            data->boolval = TRUE;
            data->type = PLIST_BOOLEAN;
            data->length = 1;
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_FALSE))
        {
            data->boolval = FALSE;
            data->type = PLIST_BOOLEAN;
            data->length = 1;
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_INT))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            int is_negative = 0;
            char *str = (char*)strval;
            if ((str[0] == '-') || (str[0] == '+')) {
                if (str[0] == '-') {
                    is_negative = 1;
                }
                str++;
            }
            char* endp = NULL;
            data->intval = strtoull((char*)str, &endp, 0);
            if ((endp != NULL) && (strlen(endp) > 0)) {
                fprintf(stderr, "%s: integer parse error: string contains invalid characters: '%s'\n", __func__, endp);
            }
            if (is_negative || (data->intval <= INT64_MAX)) {
                int64_t v = data->intval;
                if (is_negative) {
                    v = -v;
                }
                data->intval = (uint64_t)v;
                data->length = 8;
            } else {
                data->length = 16;
            }
            data->type = PLIST_UINT;
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_REAL))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            data->realval = atof((char *) strval);
            data->type = PLIST_REAL;
            data->length = 8;
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_DATE))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            time_t timev = 0;
            if (strlen((const char*)strval) >= 11) {
                struct tm btime;
                struct tm* tm_utc;
                parse_date((const char*)strval, &btime);
                timev = mktime(&btime);
                tm_utc = gmtime(&timev);
                timev -= (mktime(tm_utc) - timev);
            }
            data->timeval.tv_sec = (long)(timev - MAC_EPOCH);
            data->timeval.tv_usec = 0;
            data->type = PLIST_DATE;
            data->length = sizeof(struct timeval);
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_STRING))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            len = strlen((char *) strval);
            type = xmlDetectCharEncoding(strval, len);

            if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type)
            {
                data->strval = strdup((char *) strval);
                data->type = PLIST_STRING;
                data->length = strlen(data->strval);
            }
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_KEY))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            len = strlen((char *) strval);
            type = xmlDetectCharEncoding(strval, len);

            if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type)
            {
                data->strval = strdup((char *) strval);
                data->type = PLIST_KEY;
                data->length = strlen(data->strval);
            }
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_DATA))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            size_t size = 0;
            unsigned char *dec = base64decode((char*)strval, &size);
            data->buff = (uint8_t *) malloc(size * sizeof(uint8_t));
            memcpy(data->buff, dec, size * sizeof(uint8_t));
            free(dec);
            data->length = size;
            data->type = PLIST_DATA;
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_ARRAY))
        {
            data->type = PLIST_ARRAY;
            xml_to_node(node, &subnode);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_DICT))
        {
            data->type = PLIST_DICT;
            xml_to_node(node, &subnode);
            if (plist_get_node_type(subnode) == PLIST_DICT) {
                if (plist_dict_get_size(subnode) == 1) {
                    plist_t uid = plist_dict_get_item(subnode, "CF$UID");
                    if (uid) {
                        uint64_t val = 0;
                        plist_get_uint_val(uid, &val);
                        plist_dict_remove_item(subnode, "CF$UID");
                        plist_data_t nodedata = plist_get_data((node_t*)subnode);
                        free(nodedata->buff);
                        nodedata->type = PLIST_UID;
                        nodedata->length = sizeof(uint64_t);
                        nodedata->intval = val;
                    } 
                }
            }
            continue;
        }
    }
}

示例#5

0

显示文件

文件： html_read.c 项目： GNOME/gnumeric

void
html_file_open (G_GNUC_UNUSED GOFileOpener const *fo, GOIOContext *io_context,
		WorkbookView *wb_view, GsfInput *input)
{
	guint8 const *buf;
	gsf_off_t size;
	int len, bomlen;
	htmlParserCtxtPtr ctxt;
	htmlDocPtr doc = NULL;
	xmlCharEncoding enc;
	GnmHtmlTableCtxt tc;

	g_return_if_fail (input != NULL);

	if (gsf_input_seek (input, 0, G_SEEK_SET))
		return;

	size = gsf_input_size (input);
	if (size >= 4) {
		size -= 4;
		buf = gsf_input_read (input, 4, NULL);
		if (buf != NULL) {
			enc = xmlDetectCharEncoding(buf, 4);
			switch (enc) {	/* Skip byte order mark */
			case XML_CHAR_ENCODING_UCS4BE:
			case XML_CHAR_ENCODING_UCS4LE:
			case XML_CHAR_ENCODING_UCS4_2143:
			case XML_CHAR_ENCODING_UCS4_3412:
			case XML_CHAR_ENCODING_EBCDIC:
				bomlen = 4;
				break;
			case XML_CHAR_ENCODING_UTF16BE:
			case XML_CHAR_ENCODING_UTF16LE:
				bomlen = 2;
				break;
			case XML_CHAR_ENCODING_UTF8:
				if (buf[0] == 0xef)
					bomlen = 3;
				else if (buf[0] == 0x3c)
					bomlen = 4;
				else
					bomlen = 0;
				break;
			case XML_CHAR_ENCODING_NONE:
				bomlen = 0;
				/* Try to detect unmarked UTF16LE
				   (Firefox Windows clipboard, drag data all platforms) */
				if ((buf[0] >= 0x20 || g_ascii_isspace(buf[0])) &&
				    buf[1] == 0 &&
				    (buf[2] >= 0x20 || g_ascii_isspace(buf[2])) &&
				    buf[3] == 0)
					enc =  XML_CHAR_ENCODING_UTF16LE;
				break;
			default:
				bomlen = 0;
			}
			ctxt = htmlCreatePushParserCtxt (
				NULL, NULL, (char const *)(buf + bomlen),
				4 - bomlen, gsf_input_name (input), enc);

			for (; size > 0 ; size -= len) {
				len = MIN (4096, size);
				buf = gsf_input_read (input, len, NULL);
				if (buf == NULL)
					break;
				htmlParseChunk (
					ctxt, (char const *)buf, len, 0);
			}

			htmlParseChunk (ctxt, (char const *)buf, 0, 1);
			doc = ctxt->myDoc;
			htmlFreeParserCtxt (ctxt);
		}
	}

	if (doc != NULL) {
		xmlNodePtr ptr;
		tc.sheet = NULL;
		tc.row   = -1;
		tc.wb_view = wb_view;
		for (ptr = doc->children; ptr != NULL ; ptr = ptr->next)
			html_search_for_tables (ptr, doc, wb_view, &tc);
		xmlFreeDoc (doc);
	} else
		go_io_error_info_set (io_context,
			go_error_info_new_str (_("Unable to parse the html.")));
}

示例#6

0

显示文件

文件： xplist.c 项目： Clstroud/libplist

static void xml_to_node(xmlNodePtr xml_node, plist_t * plist_node)
{
    xmlNodePtr node = NULL;
    plist_data_t data = NULL;
    plist_t subnode = NULL;

    //for string
    long len = 0;
    int type = 0;

    if (!xml_node)
        return;

    for (node = xml_node->children; node; node = node->next)
    {

        while (node && !xmlStrcmp(node->name, XPLIST_TEXT))
            node = node->next;
        if (!node)
            break;

        if (!xmlStrcmp(node->name, BAD_CAST("comment"))) {
            continue;
        }

        data = plist_new_plist_data();
        subnode = plist_new_node(data);
        if (*plist_node)
            node_attach(*plist_node, subnode);
        else
            *plist_node = subnode;

        if (!xmlStrcmp(node->name, XPLIST_TRUE))
        {
            data->boolval = TRUE;
            data->type = PLIST_BOOLEAN;
            data->length = 1;
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_FALSE))
        {
            data->boolval = FALSE;
            data->type = PLIST_BOOLEAN;
            data->length = 1;
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_INT))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            data->intval = strtoull((char*)strval, NULL, 0);
            data->type = PLIST_UINT;
            data->length = 8;
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_REAL))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            data->realval = atof((char *) strval);
            data->type = PLIST_REAL;
            data->length = 8;
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_DATE))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            time_t time = 0;
            if (strlen((const char*)strval) >= 11) {
                struct tm btime;
                parse_date((const char*)strval, &btime);
                time = mktime(&btime);
            }
            data->timeval.tv_sec = (long)time;
            data->timeval.tv_usec = 0;
            data->type = PLIST_DATE;
            data->length = sizeof(struct timeval);
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_STRING))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            len = strlen((char *) strval);
            type = xmlDetectCharEncoding(strval, len);

            if (XML_CHAR_ENCODING_UTF8 == type || XML_CHAR_ENCODING_ASCII == type || XML_CHAR_ENCODING_NONE == type)
            {
                data->strval = strdup((char *) strval);
                data->type = PLIST_STRING;
                data->length = strlen(data->strval);
            }
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_KEY))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            data->strval = strdup((char *) strval);
            data->type = PLIST_KEY;
            data->length = strlen(data->strval);
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_DATA))
        {
            xmlChar *strval = xmlNodeGetContent(node);
            size_t size = 0;
            unsigned char *dec = base64decode((char*)strval, &size);
            data->buff = (uint8_t *) malloc(size * sizeof(uint8_t));
            memcpy(data->buff, dec, size * sizeof(uint8_t));
            free(dec);
            data->length = size;
            data->type = PLIST_DATA;
            xmlFree(strval);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_ARRAY))
        {
            data->type = PLIST_ARRAY;
            xml_to_node(node, &subnode);
            continue;
        }

        if (!xmlStrcmp(node->name, XPLIST_DICT))
        {
            data->type = PLIST_DICT;
            xml_to_node(node, &subnode);
            if (plist_get_node_type(subnode) == PLIST_DICT) {
                if (plist_dict_get_size(subnode) == 1) {
                    plist_t uid = plist_dict_get_item(subnode, "CF$UID");
                    if (uid) {
                        uint64_t val = 0;
                        plist_get_uint_val(uid, &val);
                        plist_dict_remove_item(subnode, "CF$UID");
                        plist_data_t nodedata = plist_get_data((node_t*)subnode);
                        free(nodedata->buff);
                        nodedata->type = PLIST_UID;
                        nodedata->length = sizeof(uint64_t);
                        nodedata->intval = val;
                    } 
                }
            }
            continue;
        }
    }
}