int acl_xml_decode(const char *in, ACL_VSTRING *out) { int n = 0, len; const char *ptr = in, *pre; const ACL_TOKEN *token; const XML_SPEC *spec; acl_pthread_once(&__token_once, xml_decode_init); if (__token_tree == NULL) acl_msg_fatal("__token_tree null"); while (*ptr != 0) { pre = ptr; token = acl_token_tree_match(__token_tree, &ptr, NULL, NULL); if (token == NULL) { pre = markup_unescape(pre, out); len = (int) (ptr - pre); if (len > 0) acl_vstring_memcat(out, pre, len); break; } spec = (const XML_SPEC*) token->ctx; acl_assert(spec != NULL); len = (int) (ptr - pre - spec->len); if (len > 0) acl_vstring_memcat(out, pre, len); acl_vstring_strcat(out, spec->str); n++; } ACL_VSTRING_TERMINATE(out); return (n); }
bool charset_conv::update(const char* in, size_t len, acl::string* out) { #ifdef HAVE_H_ICONV if (in == NULL) logger_fatal("in null"); if (out == NULL) logger_fatal("out null"); if (EQ(m_fromCharset, m_toCharset)) { out->append(in, len); return (true); } if (m_iconv == (iconv_t) -1) { logger_error("m_iconv invalid"); m_errmsg = "m_iconv invalid"; return (false); } // 去掉有些 UTF-8 文档中开始的 UTF-8 引导符 if (*m_pUtf8Pre) { while (len > 0) { if (*m_pUtf8Pre == 0x00) break; else if (*m_pUtf8Pre != *in) { // 必须使 UTF-8 前缀失效 m_pUtf8Pre = &UTF8_HEADER[3]; break; } m_pUtf8Pre++; in++; len--; } } if (len == 0) return (true); if (m_pInBuf == NULL) m_pInBuf = acl_vstring_alloc(len); if (m_pOutBuf == NULL) m_pOutBuf = acl_vstring_alloc(len); else ACL_VSTRING_SPACE(m_pOutBuf, len); // 先将输入数据进行缓冲 if (*m_pUtf8Pre && m_pUtf8Pre - UTF8_HEADER > 0) acl_vstring_memcpy(m_pInBuf, UTF8_HEADER, m_pUtf8Pre - UTF8_HEADER); acl_vstring_memcat(m_pInBuf, in, len); ACL_VSTRING_TERMINATE(m_pInBuf); char *pIn, *pOut; size_t ret, nIn, nOut; while (true) { nIn = LEN(m_pInBuf); if (nIn == 0) break; pIn = STR(m_pInBuf); pOut = STR(m_pOutBuf); nOut = SIZE(m_pOutBuf); #ifdef WIN32 # ifdef USE_WIN_ICONV ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut); # else int err; ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut, &err); errno = err; # endif // USE_WIN_ICONV #elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD) ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut); #else ret = __iconv(m_iconv, &pIn, &nIn, &pOut, &nOut); #endif if (ret != (size_t) -1) { if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); else // xxx out->append(in, len); ACL_VSTRING_RESET(m_pInBuf); break; } else if (errno == E2BIG) { if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); if (pIn > STR(m_pInBuf) && nIn < LEN(m_pInBuf)) acl_vstring_memmove(m_pInBuf, pIn, nIn); // 扩大内存空间 ACL_VSTRING_SPACE(m_pOutBuf, SIZE(m_pOutBuf) * 2); continue; } else if (errno == EILSEQ) { char *pNil = NULL; size_t zero = 0; // 重置状态, 似乎也没啥用处 #ifdef WIN32 # ifdef USE_WIN_ICONV __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); # else __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero, NULL); # endif #elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD) __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); #else __iconv(m_iconv, &pNil, &zero, &pNil, &zero); #endif // 遇到无效的多字节序列,pIn 指向第一个无效的位置 // 先拷贝已经转换的数据 if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); if (nIn == 0) { ACL_VSTRING_RESET(m_pInBuf); break; } acl_assert(pIn >= STR(m_pInBuf)); // 跳过无效字节 (*out) += (char)(*pIn); // 直接拷贝无效字节 nIn--; pIn++; if (nIn > 0) acl_vstring_memmove(m_pInBuf, pIn, nIn); else ACL_VSTRING_RESET(m_pInBuf); } else if (errno == EINVAL) { char *pNil = NULL; size_t zero = 0; // 重置状态, 似乎也没啥用处 #ifdef WIN32 # ifdef USE_WIN_ICONV __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); # else __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero, NULL); # endif // USE_WIN_ICONV #elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD) __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); #else __iconv(m_iconv, &pNil, &zero, &pNil, &zero); #endif // 输入的多字节序列不完整,pIn 指向该不完整的位置 // 先拷贝已经转换的数据 if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); // 移动数据,将未转换的数据移至缓冲区起始位置 if (nIn > 0) acl_vstring_memmove(m_pInBuf, pIn, nIn); else ACL_VSTRING_RESET(m_pInBuf); break; } else if (LEN(m_pInBuf) > 0) { // 如果遇到了无效的字符集,根据设置的标志位 // 决定是否直接拷贝 if (m_addInvalid) { out->append(STR(m_pInBuf), LEN(m_pInBuf)); ACL_VSTRING_RESET(m_pInBuf); } break; } else break; } return (true); #else (void) in; (void) len; (void) out; logger_error("no iconv lib"); m_errmsg = "no iconv lib"; return (false); #endif }