/
applyXSLT.hpp
121 lines (107 loc) · 3.07 KB
/
applyXSLT.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include <libxslt/xslt.h>
#include <libxslt/xsltutils.h>
#include <libxslt/xsltInternals.h>
#include <libxml/HTMLparser.h>
#include <libxslt/transform.h>
#include <algorithm>
static void XMLCDECL xml_error(void *ctx, const char *msg, ...)
{
va_list v;
if (ctx) {
va_start(v, msg);
vfprintf((FILE *)ctx, msg, v);
va_end(v);
}
}
void string_trim(std::string& str)
{
size_t pos = str.find_first_not_of(" \f\n\r\t\v",0);
if(pos != std::string::npos)
{
str.erase(0, pos);
}
pos = str.find_last_not_of(" \f\n\r\t\v", str.length());
if(pos != std::string::npos && pos < str.length()-1)
{
str.erase(pos+1);
}
}
static std::string getMIMETypeFromHttpHeader(const std::string& http_header)
{
std::string mimetype;
size_t pos = http_header.find("\nContent-Type: ");
if (pos != std::string::npos) {
pos += 15;
size_t epos = http_header.find("\n", pos);
if (epos != std::string::npos) {
std::string ct = http_header.substr(pos, epos - pos);
size_t charsetpos = ct.find("charset=");
if (charsetpos != std::string::npos) {
mimetype = ct.substr(0, charsetpos);
size_t split_pos = mimetype.find(';');
if (split_pos != std::string::npos) {
mimetype = mimetype.substr(0, split_pos);
}
string_trim(mimetype);
std::transform(mimetype.begin(), mimetype.end(), mimetype.begin(), (int(*)(int))tolower);
}
}
}
return mimetype;
}
std::string getMIMEType(const std::string& http_header, const std::string& html)
{
std::string mimetype;
if(!http_header.empty()) {
mimetype = getMIMETypeFromHttpHeader(http_header);
}
return mimetype;
}
class applyXSLT {
public:
applyXSLT() {
//³õʼ»¯
xmlSubstituteEntitiesDefault(1);
xmlLoadExtDtdDefaultValue = 1;
xmlSetGenericErrorFunc(NULL, &xml_error);
}
~applyXSLT() {
xsltCleanupGlobals();
xmlCleanupParser();
}
std::string parse(const std::string& xml, const std::string& name, const std::string& url, const std::string& html, const std::string& htmlheader, std::vector<std::pair<std::string, std::string> >& attaches) {
std::string ret("");
std::string pp = getParserPath(url);
xsltStylesheetPtr xslt = xsltParseStylesheetFile(BAD_CAST pp.c_str());
htmlDocPtr doc = NULL;
static std::string encoding("gb18030");
std::string mimetype = getMIMEType(htmlheader, html);
if (!mimetype.empty() && mimetype == "text/xml") {
doc = html.empty() ? NULL : xmlReadDoc(BAD_CAST html.c_str(), NULL, encoding.c_str(), XML_PARSE_RECOVER);
} else {
doc = html.empty() ? NULL : htmlParseDoc(BAD_CAST html.c_str(), encoding.c_str());
}
if (doc != NULL) {
const char *params[7] = {0};
size_t n_param = 0;
params[n_param] = NULL;
xmlDocPtr res = xsltApplyStylesheet(xslt, doc, params);
//free_xslt_params(params, n_param);
if (res != NULL) {
xmlChar *s = NULL;
int len = 0;
if (xsltSaveResultToString(&s, &len, res, xslt) >= 0) {
ret.assign((const char *)s, len);
xmlFree(s);
}
xmlFreeDoc(res);
}
xmlFreeDoc(doc);
}
return ret;
}
private:
std::string getParserPath(const std::string& url) {
return std::string("./DoubanBook_20130513.xsl");
}
};