Esempio n. 1
0
//编码问题,如果是gbk,则转成utf8
string Doc::charset_convert()
{
    string pattern = "charset\\s?=\\s?(.*?)\"";
    Regex *regex = new Regex(pattern);
    string charset = regex->match_one(content, 1);
    if (charset == "utf-8" || charset == "utf8") //如果文档是utf8编码,则不做特别处理 
        return content;
    else if (charset == "gbk" || charset == "gb2312") //如果是gbk编码,则转换成utf8编码 
    {
        Convert *con = new Convert("gbk", "utf8");
        return con->exec(content);
    }
    else //其他编码不予以考虑,直接跳过 
        return "";
}