bool xml_open_file(char *path) { int sz; FILE *file; struct stat sb; // get file size if (stat(path,&sb)!=0) return(FALSE); sz=(int)sb.st_size; // open file file=fopen(path,"rb"); if (file==NULL) return(FALSE); xml_dataptr=(char*)malloc(sz); if (xml_dataptr==NULL) { fclose(file); return(FALSE); } fread(xml_dataptr,1,sz,file); fclose(file); xml_filesz=sz; xml_buffersz=xml_buffer_chunk_sz; xml_taglistptr=NULL; return(xml_decode()); }
static bool decode(const char *probe, const char *exp) { std::string res; xml_decode(probe, res); if (res != exp) { g_warning("want: %s, got %s", exp, res.c_str()); return false; } return true; }
static void html2result(const char *p, ParseResult &result) { LinksPosList links_list; std::string res; const char *tag, *next; std::string name; std::string::size_type cur_pos; int i; struct ReplaceTag { const char *match_; int match_len_; const char *replace_; int char_len_; }; static const ReplaceTag replace_arr[] = { { "b>", 2, "<b>", 0 }, { "/b>", 3, "</b>", 0 }, { "big>", 4, "<big>", 0}, { "/big>", 5, "</big>", 0}, { "i>", 2, "<i>", 0 }, { "/i>", 3, "</i>", 0 }, { "s>", 2, "<s>", 0 }, { "/s>", 3, "</s>", 0 }, { "sub>", 4, "<sub>", 0 }, { "/sub>", 5, "</sub>", 0}, { "sup>", 4, "<sup>", 0}, { "/sup>", 5, "</sup>", 0}, { "small>", 6, "<small>", 0}, { "/small>", 7, "</small>", 0}, { "tt>", 3, "<tt>", 0}, { "/tt>", 4, "</tt>", 0}, { "u>", 2, "<u>", 0 }, { "/u>", 3, "</u>", 0 }, { "br>", 3, "\n", 1 }, { "nl>", 3, "", 0 }, { "hr>", 3, "\n<span foreground=\"gray\"><s> </s></span>\n", 7 }, { "/font>", 6, "</span>", 0 }, { NULL, 0, NULL }, }; for (cur_pos = 0; *p && (tag = strchr(p, '<')) != NULL;) { std::string chunk(p, tag - p); size_t pango_len; std::string pango; html_topango(chunk, pango, pango_len); res += pango; cur_pos += pango_len; p = tag; for (i = 0; replace_arr[i].match_; ++i) if (strncasecmp(replace_arr[i].match_, p + 1, replace_arr[i].match_len_) == 0) { res += replace_arr[i].replace_; p += 1 + replace_arr[i].match_len_; cur_pos += replace_arr[i].char_len_; goto cycle_end; } if (strncasecmp(p+1, "font ", 5)==0) { next = strchr(p, '>'); if (!next) { ++p; continue; } res += "<span"; name.assign(p + 6, next - (p + 6)); const char *p1 = strcasestr(name.c_str(), "face="); if (p1) { p1 += sizeof("face=") -1 +1; const char *p2 = p1; while (true) { if (*p2 == '\0') { p2 = NULL; break; } if (*p2 == '\'' || *p2 == '"') break; p2++; } if (p2) { std::string face(p1, p2-p1); res += " face=\""; res += face; res += "\""; } } p1 = strcasestr(name.c_str(), "color="); if (p1) { p1 += sizeof("color=") -1; if (*p1 == '\'' || *p1 == '\"') p1++; const char *p2 = p1; while (true) { if (*p2 == '\0') { p2 = NULL; break; } if (*p2 == '\'' || *p2 == '"' || *p2 == ' ' || *p2 == '>') break; p2++; } if (p2) { std::string color(p1, p2-p1); if (pango_color_parse(NULL, color.c_str())) { res += " foreground=\""; res += color; res += "\""; } } } res += ">"; p = next + 1; } else if ((*(p + 1) == 'a' || *(p + 1) == 'A') && *(p + 2) == ' ') { next = strchr(p, '>'); if (!next) { p++; continue; } p+=3; name.assign(p, next - p); const char *p1 = strcasestr(name.c_str(), "href="); std::string link; if (p1) { p1 += sizeof("href=") -1 +1; const char *p2 = p1; while (true) { if (*p2 == '\0') { p2 = NULL; break; } if (*p2 == '\'' || *p2 == '"') break; p2++; } if (p2) { link.assign(p1, p2-p1); } } p = next + 1; next = strcasestr(p, "</a>"); if (!next) { continue; } res += "<span foreground=\"lightblue\" underline=\"single\">"; std::string::size_type link_len = next - p; std::string chunk(p, link_len); html_topango(chunk, pango, pango_len); links_list.push_back(LinkDesc(cur_pos, pango_len, link)); res += pango; cur_pos += pango_len; res += "</span>"; p = next + sizeof("</a>") - 1; } else if (strncasecmp(p+1, "ref>", 4)==0) { next = strcasestr(p, "</ref>"); if (!next) { p++; continue; } p+=5; res += "<span foreground=\"lightblue\" underline=\"single\">"; std::string::size_type link_len = next - p; std::string chunk(p, link_len); html_topango(chunk, pango, pango_len); std::string xml_enc; xml_decode(chunk.c_str(), xml_enc); std::string link; link = "query://"; link += xml_enc; links_list.push_back(LinkDesc(cur_pos, pango_len, link)); res += pango; cur_pos += pango_len; res += "</span>"; p = next + sizeof("</ref>") - 1; } else if (strncasecmp(p+1, "img ", 4)==0) { next = strchr(p+5, '>'); if (!next) { p++; continue; } name.assign(p+5, next - (p+5)); p = next + 1; const char *p1 = strcasestr(name.c_str(), "src="); std::string src; if (p1) { p1 += sizeof("src=") -1 +1; const char *p2 = p1; while (true) { if (*p2 == '\0') { p2 = NULL; break; } if (*p2 == '\'' || *p2 == '"') break; p2++; } if (p2) { src.assign(p1, p2-p1); } } if (!src.empty()) { ParseResultItem item; item.type = ParseResultItemType_link; item.link = new ParseResultLinkItem; item.link->pango = res; item.link->links_list = links_list; result.item_list.push_back(item); res.clear(); cur_pos = 0; links_list.clear(); item.type = ParseResultItemType_res; item.res = new ParseResultResItem; item.res->type = "image"; int n = src.length(); if (src[0]==0x1e && src[n-1]==0x1f) { item.res->key.assign(src.c_str()+1, n-2); } else { item.res->key = src; } result.item_list.push_back(item); } } else { next = strchr(p+1, '>'); if (!next) { p++; res += "<"; cur_pos++; continue; } p = next + 1; } cycle_end: ; } res += p; ParseResultItem item; item.type = ParseResultItemType_link; item.link = new ParseResultLinkItem; item.link->pango = res; item.link->links_list = links_list; result.item_list.push_back(item); }