std::string render(const row_type &headers, std::vector<std::size_t> &widths, wmi_impl::row_enumerator e) { std::list<row_type> rows; std::size_t count = widths.size(); while (e.has_next()) { wmi_impl::row wmi_row = e.get_next(); row_type row; for (std::size_t i = 0; i < count; i++) { std::string c = wmi_row.get_string(headers[i]); widths[i] = (std::max)(c.size(), widths[i]); row.push_back(c); } rows.push_back(row); } return render_table(widths, headers, rows); }
void htmlrenderer::render(std::istream& input, std::vector<std::string>& lines, std::vector<linkpair>& links, const std::string& url) { unsigned int image_count = 0; std::string curline; int indent_level = 0; bool inside_li = false, is_ol = false, inside_pre = false; bool itunes_hack = false; size_t inside_script = 0; size_t inside_style = 0; std::vector<unsigned int> ol_counts; std::vector<char> ol_types; htmltag current_tag; int link_num = -1; std::vector<Table> tables; /* * to render the HTML, we use a self-developed "XML" pull parser. * * A pull parser works like this: * - we feed it with an XML stream * - we then gather an iterator * - we then can iterate over all continuous elements, such as start tag, close tag, text element, ... */ tagsouppullparser xpp; xpp.setInput(input); for (tagsouppullparser::event e = xpp.next(); e != tagsouppullparser::END_DOCUMENT; e = xpp.next()) { std::string tagname; switch (e) { case tagsouppullparser::START_TAG: tagname = xpp.getText(); std::transform(tagname.begin(), tagname.end(), tagname.begin(), ::tolower); current_tag = tags[tagname]; switch (current_tag) { case TAG_A: { std::string link; try { link = xpp.getAttributeValue("href"); } catch (const std::invalid_argument& ) { LOG(LOG_WARN,"htmlrenderer::render: found a tag with no href attribute"); link = ""; } if (link.length() > 0) { link_num = add_link(links,utils::censor_url(utils::absolute_url(url,link)), LINK_HREF); if (!raw_) curline.append("<u>"); } } break; case TAG_STRONG: if (!raw_) curline.append("<b>"); break; case TAG_UNDERLINE: if (!raw_) curline.append("<u>"); break; case TAG_QUOTATION: if (!raw_) curline.append("\""); break; case TAG_EMBED: { std::string type; try { type = xpp.getAttributeValue("type"); } catch (const std::invalid_argument& ) { LOG(LOG_WARN, "htmlrenderer::render: found embed object without type attribute"); type = ""; } if (type == "application/x-shockwave-flash") { std::string link; try { link = xpp.getAttributeValue("src"); } catch (const std::invalid_argument& ) { LOG(LOG_WARN, "htmlrenderer::render: found embed object without src attribute"); link = ""; } if (link.length() > 0) { link_num = add_link(links,utils::censor_url(utils::absolute_url(url,link)), LINK_EMBED); curline.append(utils::strprintf("[%s %u]", _("embedded flash:"), link_num)); } } } break; case TAG_BR: add_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_PRE: inside_pre = true; add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_ITUNESHACK: itunes_hack = true; break; case TAG_IMG: { std::string imgurl; std::string imgtitle; try { imgurl = xpp.getAttributeValue("src"); } catch (const std::invalid_argument& ) { LOG(LOG_WARN,"htmlrenderer::render: found img tag with no src attribute"); imgurl = ""; } try { imgtitle = xpp.getAttributeValue("title"); } catch (const std::invalid_argument& ) { imgtitle = ""; } if (imgurl.length() > 0) { if (imgurl.substr(0,5) == "data:") { link_num = add_link(links, "inline image", LINK_IMG); } else { link_num = add_link(links,utils::censor_url(utils::absolute_url(url,imgurl)), LINK_IMG); } if (imgtitle != "") { curline.append(utils::strprintf("[%s %u: %s]", _("image"), link_num, imgtitle.c_str())); } else { curline.append(utils::strprintf("[%s %u]", _("image"), link_num)); } image_count++; } } break; case TAG_BLOCKQUOTE: ++indent_level; add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_H1: case TAG_H2: case TAG_H3: case TAG_H4: case TAG_P: add_nonempty_line(curline, tables, lines); if (lines.size() > 0 && lines[lines.size()-1].length() > static_cast<unsigned int>(indent_level*2)) add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_OL: is_ol = true; { unsigned int ol_count = 1; try { std::string ol_count_str = xpp.getAttributeValue("start"); std::istringstream is(ol_count_str); is >> ol_count; } catch (const std::invalid_argument& ) { ol_count = 1; } ol_counts.push_back(ol_count); std::string ol_type; try { ol_type = xpp.getAttributeValue("type"); if (ol_type != "1" && ol_type != "a" && ol_type != "A" && ol_type != "i" && ol_type != "I") { ol_type = "1"; } } catch (const std::invalid_argument& ) { ol_type = "1"; } ol_types.push_back(ol_type[0]); } add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_UL: is_ol = false; add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_LI: if (inside_li) { indent_level-=2; if (indent_level < 0) indent_level = 0; add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); } inside_li = true; add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); indent_level+=2; if (is_ol && ol_counts.size() != 0) { curline.append(utils::strprintf("%s.", format_ol_count(ol_counts[ol_counts.size()-1], ol_types[ol_types.size()-1]).c_str())); ++ol_counts[ol_counts.size()-1]; } else { curline.append(" * "); } break; case TAG_DT: add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_DD: indent_level+=4; add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_DL: // ignore tag break; case TAG_SUP: curline.append("^"); break; case TAG_SUB: curline.append("["); break; case TAG_HR: add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); add_line(std::string(" ") + std::string(w - 2, '-') + std::string(" "), tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_SCRIPT: add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); // don't render scripts, ignore current line inside_script++; break; case TAG_STYLE: inside_style++; break; case TAG_TABLE: { add_nonempty_line(curline, tables, lines); prepare_newline(curline, 0); // no indent in tables bool border = false; try { std::string b = xpp.getAttributeValue("border"); border = (utils::to_u(b) > 0); } catch (const std::invalid_argument& ) { // is ok, no border than } tables.push_back(Table(border)); break; } case TAG_TR: if (!tables.empty()) tables.back().start_row(); break; case TAG_TH: { size_t span = 1; try { span = utils::to_u(xpp.getAttributeValue("colspan")); } catch (const std::invalid_argument& ) { // is ok, span 1 than } if (!tables.empty()) tables.back().start_cell(span); curline.append("<b>"); break; } case TAG_TD: { size_t span = 1; try { span = utils::to_u(xpp.getAttributeValue("colspan")); } catch (const std::invalid_argument& ) { // is ok, span 1 than } if (!tables.empty()) tables.back().start_cell(span); break; } } break; case tagsouppullparser::END_TAG: tagname = xpp.getText(); std::transform(tagname.begin(), tagname.end(), tagname.begin(), ::tolower); current_tag = tags[tagname]; switch (current_tag) { case TAG_BLOCKQUOTE: --indent_level; if (indent_level < 0) indent_level = 0; add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_OL: ol_types.pop_back(); ol_counts.pop_back(); // fall-through case TAG_UL: if (inside_li) { indent_level-=2; if (indent_level < 0) indent_level = 0; add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); } add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_DT: add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_DD: indent_level-=4; if (indent_level < 0) indent_level = 0; add_nonempty_line(curline, tables, lines); add_line("", tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_DL: // ignore tag break; case TAG_LI: indent_level-=2; if (indent_level < 0) indent_level = 0; inside_li = false; add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_H1: if (line_is_nonempty(curline)) { add_line(curline, tables, lines); size_t llen = utils::strwidth_stfl(curline); prepare_newline(curline, tables.size() ? 0 : indent_level); add_line(std::string(llen, '-'), tables, lines); } prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_H2: case TAG_H3: case TAG_H4: case TAG_P: add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_PRE: add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); inside_pre = false; break; case TAG_SUB: curline.append("]"); break; case TAG_SUP: // has closing tag, but we render nothing. break; case TAG_A: if (link_num != -1) { if (!raw_) curline.append("</>"); curline.append(utils::strprintf("[%d]", link_num)); link_num = -1; } break; case TAG_UNDERLINE: if (!raw_) curline.append("</>"); break; case TAG_STRONG: if (!raw_) curline.append("</>"); break; case TAG_QUOTATION: if (!raw_) curline.append("\""); break; case TAG_EMBED: case TAG_BR: case TAG_ITUNESHACK: case TAG_IMG: case TAG_HR: // ignore closing tags break; case TAG_SCRIPT: // don't render scripts, ignore current line if (inside_script) inside_script--; prepare_newline(curline, tables.size() ? 0 : indent_level); break; case TAG_STYLE: if (inside_style) inside_style--; break; case TAG_TABLE: add_nonempty_line(curline, tables, lines); prepare_newline(curline, 0); // no indent in tables if (!tables.empty()) { std::vector<std::string> table_text; tables.back().complete_cell(); tables.back().complete_row(); render_table(tables.back(), table_text); tables.pop_back(); if (!tables.empty()) { // still a table on the outside? for(size_t idx=0; idx < table_text.size(); ++idx) tables.back().add_text(table_text[idx]); // add rendered table to current cell } else { for(size_t idx=0; idx < table_text.size(); ++idx) { std::string s = table_text[idx]; while (s.length() > 0 && s[0] == '\n') s.erase(0, 1); add_line(s, tables, lines); } } } prepare_newline(curline, tables.size() ? 0: indent_level); break; case TAG_TR: add_nonempty_line(curline, tables, lines); prepare_newline(curline, 0); // no indent in tables if (!tables.empty()) tables.back().complete_row(); break; case TAG_TH: if (!tables.empty()) { curline.append("</>"); } add_nonempty_line(curline, tables, lines); prepare_newline(curline, 0); // no indent in tables if (!tables.empty()) { tables.back().complete_cell(); } break; case TAG_TD: add_nonempty_line(curline, tables, lines); prepare_newline(curline, 0); // no indent in tables if (!tables.empty()) tables.back().complete_cell(); break; } break; case tagsouppullparser::TEXT: { if (itunes_hack) { std::vector<std::string> words = utils::tokenize_nl(utils::quote_for_stfl(xpp.getText())); for (auto word : words) { if (word == "\n") { add_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); } else { std::vector<std::string> words2 = utils::tokenize_spaced(word); unsigned int i=0; bool new_line = false; for (auto word2 : words2) { if ((utils::strwidth_stfl(curline) + utils::strwidth_stfl(word2)) >= w) { add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); new_line = true; } if (new_line) { if (word2 != " ") curline.append(word2); new_line = false; } else { curline.append(word2); } i++; } } } } else if (inside_pre) { std::vector<std::string> words = utils::tokenize_nl(utils::quote_for_stfl(xpp.getText())); for (auto word : words) { if (word == "\n") { add_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); } else { curline.append(word); } } } else if (inside_script || inside_style) { // skip scripts and CSS styles } else { std::string s = utils::quote_for_stfl(xpp.getText()); while (s.length() > 0 && s[0] == '\n') s.erase(0, 1); std::vector<std::string> words = utils::tokenize_spaced(s); bool new_line = false; if (!line_is_nonempty(curline) && !words.empty() && words[0] == " ") { words.erase(words.begin()); } for (auto word : words) { if ((utils::strwidth_stfl(curline) + utils::strwidth_stfl(word)) >= w) { add_nonempty_line(curline, tables, lines); prepare_newline(curline, tables.size() ? 0 : indent_level); new_line = true; } if (new_line) { if (word != " ") curline.append(word); new_line = false; } else { curline.append(word); } } } } break; default: /* do nothing */ break; } } // and the rest add_nonempty_line(curline, tables, lines); // force all tables to be closed and rendered while (!tables.empty()) { std::vector<std::string> table_text; render_table(tables.back(), table_text); tables.pop_back(); for(size_t idx=0; idx < table_text.size(); ++idx) { std::string s = table_text[idx]; while (s.length() > 0 && s[0] == '\n') s.erase(0, 1); add_line(s, tables, lines); } } // add link list if (links.size() > 0) { lines.push_back(""); lines.push_back(_("Links: ")); for (unsigned int i=0; i<links.size(); ++i) { lines.push_back(utils::strprintf("[%u]: %s (%s)", i+1, links[i].first.c_str(), type2str(links[i].second).c_str())); } } }