int check_hex_to_decimal(char *in, char *expected) { char buf[20]; char *out; out = hex_to_decimal(in, strlen(in), buf, 20); if (out == NULL) { fprintf(stderr, "NULL returned from hex_to_decimal\n"); return 1; } return check_str(out, expected); }
std::string HtmlStrDecode(const std::string& htmlStr, const char* encTo, bool decodeChar){ if(htmlStr.empty() || encTo == NULL) return ""; std::string destStr = htmlStr; std::string tmpStr = destStr; boost::smatch what; // dec unicode convert 年 boost::regex unicodeDecReg("&#\\d{1,6};"); std::string::const_iterator start = tmpStr.begin(); std::string::const_iterator end = tmpStr.end(); while ( boost::regex_search(start, end, what, unicodeDecReg) ) { // split unicode number std::string unicode(what[0].str(), 2, what[0].str().size() - 3); // convert unicode to char std::string dest = ConvChar(atoi(unicode.c_str()), encTo, "UTF-16"); // find next unicode number start = what[0].second; // replace html unicode to char boost::replace_all(destStr, what[0].str(), dest); } // standard unicode convert \u5e74 boost::regex unicodeReg("\\\\u\\w{1,4}"); tmpStr = destStr; start = tmpStr.begin(); end = tmpStr.end(); while ( boost::regex_search(start, end, what, unicodeReg) ) { std::string str(what[0].str(), 2, what[0].str().size() - 2); uint32_t unicode = hex_to_decimal(str.c_str(), str.size()); std::string dest = ConvChar(unicode, encTo, "UTF-16"); boost::replace_all(destStr, what[0].str(), dest); start = what[0].second; } // Hex unicode convert 年 boost::regex unicodeHexReg("&#x\\w{1,4}"); tmpStr = destStr; start = tmpStr.begin(); end = tmpStr.end(); while ( boost::regex_search(start, end, what, unicodeHexReg) ) { // split unicode number std::string str(what[0].str(), 3, what[0].str().size() - 3); uint32_t unicode = hex_to_decimal(str.c_str(), str.size()); std::string dest = ConvChar(unicode, encTo, "UTF-16"); boost::replace_all(destStr, what[0].str(), dest); start = what[0].second; } // special char convert boost::regex spCharReg("&[A-Za-z0-9]{2,8};"); tmpStr = destStr; start = tmpStr.begin(); end = tmpStr.end(); while ( boost::regex_search(start, end, what, spCharReg) ) { std::string spstr(what[0].str(), 1, what[0].str().size() - 2); std::map<std::string, uint32_t>::const_iterator it = HtmlSpMap.find(spstr); if(HtmlSpMap.end() != it){ std::string dest = ConvChar(it->second, encTo, "UTF-16"); boost::replace_all(destStr, what[0].str(), dest); } start = what[0].second; } return destStr; }