bool string::scan_line(string& out, bool nonl /* = true */, size_t* n /* = NULL */, bool move /* = false */) { if (n) *n = 0; char* pEnd = buf_end(); if (pEnd == NULL) return false; size_t len = pEnd - scan_ptr_; char *ln = (char*) memchr(scan_ptr_, '\n', len); if (ln == NULL) return false; char *next = ln + 1; len = ln - scan_ptr_ + 1; if (nonl) { ln--; len--; if (ln >= scan_ptr_ && *ln == '\r') { ln--; len--; } if (len > 0) out.append(scan_ptr_, len); } else out.append(scan_ptr_, len); if (move) { if (pEnd > next) { acl_vstring_memmove(vbf_, next, pEnd - next); TERM(vbf_); scan_ptr_ = STR(vbf_); } else clear(); } else { if (next >= pEnd) clear(); else scan_ptr_ = next; } if (n) *n = len; return true; }
size_t string::scan_move() { if (scan_ptr_ == NULL) return 0; char *pEnd = acl_vstring_end(vbf_); if (scan_ptr_ >= pEnd) { clear(); return 0; } size_t len = pEnd - scan_ptr_; acl_vstring_memmove(vbf_, scan_ptr_, len); TERM(vbf_); scan_ptr_ = STR(vbf_); return len; }
size_t string::scan_buf(void* pbuf, size_t n, bool move /* = false */) { if (pbuf == NULL || n == 0) return 0; const char *pEnd = buf_end(); if (pEnd == NULL) return 0; size_t len = pEnd - scan_ptr_; if (len > n) len = n; memcpy(pbuf, scan_ptr_, len); if (move) { acl_vstring_memmove(vbf_, scan_ptr_, len); TERM(vbf_); scan_ptr_ = STR(vbf_); } else scan_ptr_ += len; return len; }
bool charset_conv::update(const char* in, size_t len, acl::string* out) { #ifdef HAVE_H_ICONV if (in == NULL) logger_fatal("in null"); if (out == NULL) logger_fatal("out null"); if (EQ(m_fromCharset, m_toCharset)) { out->append(in, len); return (true); } if (m_iconv == (iconv_t) -1) { logger_error("m_iconv invalid"); m_errmsg = "m_iconv invalid"; return (false); } // 去掉有些 UTF-8 文档中开始的 UTF-8 引导符 if (*m_pUtf8Pre) { while (len > 0) { if (*m_pUtf8Pre == 0x00) break; else if (*m_pUtf8Pre != *in) { // 必须使 UTF-8 前缀失效 m_pUtf8Pre = &UTF8_HEADER[3]; break; } m_pUtf8Pre++; in++; len--; } } if (len == 0) return (true); if (m_pInBuf == NULL) m_pInBuf = acl_vstring_alloc(len); if (m_pOutBuf == NULL) m_pOutBuf = acl_vstring_alloc(len); else ACL_VSTRING_SPACE(m_pOutBuf, len); // 先将输入数据进行缓冲 if (*m_pUtf8Pre && m_pUtf8Pre - UTF8_HEADER > 0) acl_vstring_memcpy(m_pInBuf, UTF8_HEADER, m_pUtf8Pre - UTF8_HEADER); acl_vstring_memcat(m_pInBuf, in, len); ACL_VSTRING_TERMINATE(m_pInBuf); char *pIn, *pOut; size_t ret, nIn, nOut; while (true) { nIn = LEN(m_pInBuf); if (nIn == 0) break; pIn = STR(m_pInBuf); pOut = STR(m_pOutBuf); nOut = SIZE(m_pOutBuf); #ifdef WIN32 # ifdef USE_WIN_ICONV ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut); # else int err; ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut, &err); errno = err; # endif // USE_WIN_ICONV #elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD) ret = __iconv(m_iconv, (const char**) &pIn, &nIn, &pOut, &nOut); #else ret = __iconv(m_iconv, &pIn, &nIn, &pOut, &nOut); #endif if (ret != (size_t) -1) { if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); else // xxx out->append(in, len); ACL_VSTRING_RESET(m_pInBuf); break; } else if (errno == E2BIG) { if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); if (pIn > STR(m_pInBuf) && nIn < LEN(m_pInBuf)) acl_vstring_memmove(m_pInBuf, pIn, nIn); // 扩大内存空间 ACL_VSTRING_SPACE(m_pOutBuf, SIZE(m_pOutBuf) * 2); continue; } else if (errno == EILSEQ) { char *pNil = NULL; size_t zero = 0; // 重置状态, 似乎也没啥用处 #ifdef WIN32 # ifdef USE_WIN_ICONV __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); # else __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero, NULL); # endif #elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD) __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); #else __iconv(m_iconv, &pNil, &zero, &pNil, &zero); #endif // 遇到无效的多字节序列,pIn 指向第一个无效的位置 // 先拷贝已经转换的数据 if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); if (nIn == 0) { ACL_VSTRING_RESET(m_pInBuf); break; } acl_assert(pIn >= STR(m_pInBuf)); // 跳过无效字节 (*out) += (char)(*pIn); // 直接拷贝无效字节 nIn--; pIn++; if (nIn > 0) acl_vstring_memmove(m_pInBuf, pIn, nIn); else ACL_VSTRING_RESET(m_pInBuf); } else if (errno == EINVAL) { char *pNil = NULL; size_t zero = 0; // 重置状态, 似乎也没啥用处 #ifdef WIN32 # ifdef USE_WIN_ICONV __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); # else __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero, NULL); # endif // USE_WIN_ICONV #elif defined(ACL_SUNOS5) || defined(ACL_FREEBSD) __iconv(m_iconv, (const char**) &pNil, &zero, &pNil, &zero); #else __iconv(m_iconv, &pNil, &zero, &pNil, &zero); #endif // 输入的多字节序列不完整,pIn 指向该不完整的位置 // 先拷贝已经转换的数据 if ((ret = SIZE(m_pOutBuf) - nOut) > 0) out->append(STR(m_pOutBuf), ret); // 移动数据,将未转换的数据移至缓冲区起始位置 if (nIn > 0) acl_vstring_memmove(m_pInBuf, pIn, nIn); else ACL_VSTRING_RESET(m_pInBuf); break; } else if (LEN(m_pInBuf) > 0) { // 如果遇到了无效的字符集,根据设置的标志位 // 决定是否直接拷贝 if (m_addInvalid) { out->append(STR(m_pInBuf), LEN(m_pInBuf)); ACL_VSTRING_RESET(m_pInBuf); } break; } else break; } return (true); #else (void) in; (void) len; (void) out; logger_error("no iconv lib"); m_errmsg = "no iconv lib"; return (false); #endif }