Пример #1
0
int acl_xml_decode(const char *in, ACL_VSTRING *out)
{
    int   n = 0, len;
    const char *ptr = in, *pre;
    const ACL_TOKEN *token;
    const XML_SPEC *spec;

    acl_pthread_once(&__token_once, xml_decode_init);
    if (__token_tree == NULL)
        acl_msg_fatal("__token_tree null");

    while (*ptr != 0) {
        pre = ptr;
        token = acl_token_tree_match(__token_tree, &ptr, NULL, NULL);
        if (token == NULL) {
            pre = markup_unescape(pre, out);
            len = (int) (ptr - pre);
            if (len > 0)
                acl_vstring_memcat(out, pre, len);
            break;
        }
        spec = (const XML_SPEC*) token->ctx;
        acl_assert(spec != NULL);

        len = (int) (ptr - pre - spec->len);
        if (len > 0)
            acl_vstring_memcat(out, pre, len);
        acl_vstring_strcat(out, spec->str);
        n++;
    }

    ACL_VSTRING_TERMINATE(out);
    return (n);
}
Пример #2
0
bool charset_conv::update(const char* in, size_t len, acl::string* out)
{
#ifdef  HAVE_H_ICONV
	if (in == NULL)
		logger_fatal("in null");
	if (out == NULL)
		logger_fatal("out null");

	if (EQ(m_fromCharset, m_toCharset))
	{
		out->append(in, len);
		return (true);
	}

	if (m_iconv == (iconv_t) -1)
	{
		logger_error("m_iconv invalid");
		m_errmsg = "m_iconv invalid";
		return (false);
	}

	// 去掉有些 UTF-8 文档中开始的 UTF-8 引导符
	if (*m_pUtf8Pre)
	{
		while (len > 0)
		{
			if (*m_pUtf8Pre == 0x00)
				break;
			else if (*m_pUtf8Pre != *in)
			{
				// 必须使 UTF-8 前缀失效
				m_pUtf8Pre = &UTF8_HEADER[3];
				break;
			}
			m_pUtf8Pre++;
			in++;
			len--;
		}
	}

	if (len == 0)
		return (true);

	if (m_pInBuf == NULL)
		m_pInBuf = acl_vstring_alloc(len);

	if (m_pOutBuf == NULL)
		m_pOutBuf = acl_vstring_alloc(len);
	else
		ACL_VSTRING_SPACE(m_pOutBuf, len);

	// 先将输入数据进行缓冲
	if (*m_pUtf8Pre && m_pUtf8Pre - UTF8_HEADER > 0)
		acl_vstring_memcpy(m_pInBuf, UTF8_HEADER,
			m_pUtf8Pre - UTF8_HEADER);
	acl_vstring_memcat(m_pInBuf, in, len);
	ACL_VSTRING_TERMINATE(m_pInBuf);

	char  *pIn, *pOut;
	size_t ret, nIn, nOut;

	while (true)
	{
		nIn  = LEN(m_pInBuf);
		if (nIn == 0)
			break;
		pIn  = STR(m_pInBuf);
		pOut = STR(m_pOutBuf);
		nOut = SIZE(m_pOutBuf);

#ifdef	WIN32
# ifdef USE_WIN_ICONV
		ret = __iconv(m_iconv, (const char**) &pIn, &nIn,
				&pOut, &nOut);
# else
		int   err;
		ret = __iconv(m_iconv, (const char**) &pIn, &nIn,
				&pOut, &nOut, &err);
		errno = err;
# endif // USE_WIN_ICONV
#elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD)
		ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut);
#else
		ret = __iconv(m_iconv, &pIn, &nIn, &pOut, &nOut);
#endif


		if (ret != (size_t) -1)
		{
			if ((ret = SIZE(m_pOutBuf) - nOut) > 0)
				out->append(STR(m_pOutBuf), ret);
			else  // xxx
				out->append(in, len);
			ACL_VSTRING_RESET(m_pInBuf);
			break;
		}
		else if (errno == E2BIG)
		{
			if ((ret = SIZE(m_pOutBuf) - nOut) > 0)
				out->append(STR(m_pOutBuf), ret);
			if (pIn > STR(m_pInBuf) && nIn < LEN(m_pInBuf))
				acl_vstring_memmove(m_pInBuf, pIn, nIn);
			// 扩大内存空间
			ACL_VSTRING_SPACE(m_pOutBuf, SIZE(m_pOutBuf) * 2);
			continue;
		}
		else if (errno == EILSEQ)
		{
			char *pNil = NULL;
			size_t zero = 0;

			// 重置状态, 似乎也没啥用处
#ifdef	WIN32
# ifdef USE_WIN_ICONV
			__iconv(m_iconv, (const char**) &pNil,
				&zero, &pNil, &zero);
# else
			__iconv(m_iconv, (const char**) &pNil,
				&zero, &pNil, &zero, NULL);
# endif
#elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD)
			__iconv(m_iconv, (const char**) &pNil,
				&zero, &pNil, &zero);
#else
			__iconv(m_iconv, &pNil, &zero, &pNil, &zero);
#endif

			// 遇到无效的多字节序列,pIn 指向第一个无效的位置

			// 先拷贝已经转换的数据
			if ((ret = SIZE(m_pOutBuf) - nOut) > 0)
				out->append(STR(m_pOutBuf), ret);

			if (nIn == 0)
			{
				ACL_VSTRING_RESET(m_pInBuf);
				break;
			}

			acl_assert(pIn >= STR(m_pInBuf));

			// 跳过无效字节
			(*out) += (char)(*pIn); // 直接拷贝无效字节
			nIn--;
			pIn++;
			if (nIn > 0)
				acl_vstring_memmove(m_pInBuf, pIn, nIn);
			else
				ACL_VSTRING_RESET(m_pInBuf);
		}
		else if (errno == EINVAL)
		{
			char *pNil = NULL;
			size_t zero = 0;

			// 重置状态, 似乎也没啥用处
#ifdef	WIN32
# ifdef USE_WIN_ICONV
			__iconv(m_iconv, (const char**) &pNil,
				&zero, &pNil, &zero);
# else
			__iconv(m_iconv, (const char**) &pNil,
				&zero, &pNil, &zero, NULL);
# endif // USE_WIN_ICONV
#elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD)
			__iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero);
#else
			__iconv(m_iconv, &pNil, &zero, &pNil, &zero);
#endif

			// 输入的多字节序列不完整,pIn 指向该不完整的位置

			// 先拷贝已经转换的数据
			if ((ret = SIZE(m_pOutBuf) - nOut) > 0)
				out->append(STR(m_pOutBuf), ret);

			// 移动数据,将未转换的数据移至缓冲区起始位置
			if (nIn > 0)
				acl_vstring_memmove(m_pInBuf, pIn, nIn);
			else
				ACL_VSTRING_RESET(m_pInBuf);
			break;
		}
		else if (LEN(m_pInBuf) > 0)
		{
			// 如果遇到了无效的字符集,根据设置的标志位
			// 决定是否直接拷贝
			if (m_addInvalid)
			{
				out->append(STR(m_pInBuf), LEN(m_pInBuf));
				ACL_VSTRING_RESET(m_pInBuf);
			}
			break;
		}
		else
			break;
	}

	return (true);
#else
	(void) in;
	(void) len;
	(void) out;
	logger_error("no iconv lib");
	m_errmsg = "no iconv lib";
	return (false);
#endif
}