Exemplo n.º 1
0
std::string render(const row_type &headers, std::vector<std::size_t> &widths, wmi_impl::row_enumerator e) {
	std::list<row_type> rows;
	std::size_t count = widths.size();
	while (e.has_next()) {
		wmi_impl::row wmi_row = e.get_next();
		row_type row;
		for (std::size_t i = 0; i < count; i++) {
			std::string c = wmi_row.get_string(headers[i]);
			widths[i] = (std::max)(c.size(), widths[i]);
			row.push_back(c);
		}
		rows.push_back(row);
	}
	return render_table(widths, headers, rows);
}
Exemplo n.º 2
0
void htmlrenderer::render(std::istream& input, std::vector<std::string>& lines, std::vector<linkpair>& links, const std::string& url) {
	unsigned int image_count = 0;
	std::string curline;
	int indent_level = 0;
	bool inside_li = false, is_ol = false, inside_pre = false;
	bool itunes_hack = false;
	size_t inside_script = 0;
	size_t inside_style = 0;
	std::vector<unsigned int> ol_counts;
	std::vector<char> ol_types;
	htmltag current_tag;
	int link_num = -1;
	std::vector<Table> tables;

	/*
	 * to render the HTML, we use a self-developed "XML" pull parser.
	 *
	 * A pull parser works like this:
	 *   - we feed it with an XML stream
	 *   - we then gather an iterator
	 *   - we then can iterate over all continuous elements, such as start tag, close tag, text element, ...
	 */
	tagsouppullparser xpp;
	xpp.setInput(input);

	for (tagsouppullparser::event e = xpp.next(); e != tagsouppullparser::END_DOCUMENT; e = xpp.next()) {
		std::string tagname;
		switch (e) {
		case tagsouppullparser::START_TAG:
			tagname = xpp.getText();
			std::transform(tagname.begin(), tagname.end(), tagname.begin(), ::tolower);
			current_tag = tags[tagname];

			switch (current_tag) {
			case TAG_A: {
				std::string link;
				try {
					link = xpp.getAttributeValue("href");
				} catch (const std::invalid_argument& ) {
					LOG(LOG_WARN,"htmlrenderer::render: found a tag with no href attribute");
					link = "";
				}
				if (link.length() > 0) {
					link_num = add_link(links,utils::censor_url(utils::absolute_url(url,link)), LINK_HREF);
					if (!raw_)
						curline.append("<u>");
				}
			}
			break;
			case TAG_STRONG:
				if (!raw_)
					curline.append("<b>");
				break;
			case TAG_UNDERLINE:
				if (!raw_)
					curline.append("<u>");
				break;
			case TAG_QUOTATION:
				if (!raw_)
					curline.append("\"");
				break;

			case TAG_EMBED: {
				std::string type;
				try {
					type = xpp.getAttributeValue("type");
				} catch (const std::invalid_argument& ) {
					LOG(LOG_WARN, "htmlrenderer::render: found embed object without type attribute");
					type = "";
				}
				if (type == "application/x-shockwave-flash") {
					std::string link;
					try {
						link = xpp.getAttributeValue("src");
					} catch (const std::invalid_argument& ) {
						LOG(LOG_WARN, "htmlrenderer::render: found embed object without src attribute");
						link = "";
					}
					if (link.length() > 0) {
						link_num = add_link(links,utils::censor_url(utils::absolute_url(url,link)), LINK_EMBED);
						curline.append(utils::strprintf("[%s %u]", _("embedded flash:"), link_num));
					}
				}
			}
			break;

			case TAG_BR:
				add_line(curline, tables, lines);
				prepare_newline(curline, tables.size() ? 0 : indent_level);
				break;

			case TAG_PRE:
				inside_pre = true;
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_ITUNESHACK:
				itunes_hack = true;
				break;

			case TAG_IMG: {
				std::string imgurl;
				std::string imgtitle;
				try {
					imgurl = xpp.getAttributeValue("src");
				} catch (const std::invalid_argument& ) {
					LOG(LOG_WARN,"htmlrenderer::render: found img tag with no src attribute");
					imgurl = "";
				}
				try {
					imgtitle = xpp.getAttributeValue("title");
				} catch (const std::invalid_argument& ) {
					imgtitle = "";
				}
				if (imgurl.length() > 0) {
					if (imgurl.substr(0,5) == "data:") {
						link_num = add_link(links, "inline image", LINK_IMG);
					} else {
						link_num = add_link(links,utils::censor_url(utils::absolute_url(url,imgurl)), LINK_IMG);
					}
					if (imgtitle != "") {
						curline.append(utils::strprintf("[%s %u: %s]", _("image"), link_num, imgtitle.c_str()));
					} else {
						curline.append(utils::strprintf("[%s %u]", _("image"), link_num));
					}
					image_count++;
				}
			}
			break;

			case TAG_BLOCKQUOTE:
				++indent_level;
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline, tables.size() ? 0 : indent_level);
				break;

			case TAG_H1:
			case TAG_H2:
			case TAG_H3:
			case TAG_H4:
			case TAG_P:
				add_nonempty_line(curline, tables, lines);
				if (lines.size() > 0 && lines[lines.size()-1].length() > static_cast<unsigned int>(indent_level*2))
					add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_OL:
				is_ol = true;
				{
					unsigned int ol_count = 1;
					try {
						std::string ol_count_str = xpp.getAttributeValue("start");
						std::istringstream is(ol_count_str);
						is >> ol_count;
					} catch (const std::invalid_argument& ) {
						ol_count = 1;
					}
					ol_counts.push_back(ol_count);

					std::string ol_type;
					try {
						ol_type = xpp.getAttributeValue("type");
						if (ol_type != "1" && ol_type != "a" && ol_type != "A" && ol_type != "i" && ol_type != "I") {
							ol_type = "1";
						}
					} catch (const std::invalid_argument& ) {
						ol_type = "1";
					}
					ol_types.push_back(ol_type[0]);
				}
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_UL:
				is_ol = false;
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_LI:
				if (inside_li) {
					indent_level-=2;
					if (indent_level < 0) indent_level = 0;
					add_nonempty_line(curline, tables, lines);
					prepare_newline(curline,  tables.size() ? 0 : indent_level);
				}
				inside_li = true;
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				indent_level+=2;
				if (is_ol && ol_counts.size() != 0) {
					curline.append(utils::strprintf("%s.", format_ol_count(ol_counts[ol_counts.size()-1], ol_types[ol_types.size()-1]).c_str()));
					++ol_counts[ol_counts.size()-1];
				} else {
					curline.append("  * ");
				}
				break;

			case TAG_DT:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_DD:
				indent_level+=4;
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_DL:
				// ignore tag
				break;

			case TAG_SUP:
				curline.append("^");
				break;

			case TAG_SUB:
				curline.append("[");
				break;

			case TAG_HR:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				add_line(std::string(" ") + std::string(w - 2, '-') + std::string(" "), tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_SCRIPT:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);

				// don't render scripts, ignore current line
				inside_script++;
				break;

			case TAG_STYLE:
				inside_style++;
				break;

			case TAG_TABLE: {
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline, 0); // no indent in tables

				bool border = false;
				try {
					std::string b = xpp.getAttributeValue("border");
					border = (utils::to_u(b) > 0);
				} catch (const std::invalid_argument& ) {
					// is ok, no border than
				}
				tables.push_back(Table(border));
				break;
			}

			case TAG_TR:
				if (!tables.empty())
					tables.back().start_row();
				break;

			case TAG_TH: {
				size_t span = 1;
				try {
					span = utils::to_u(xpp.getAttributeValue("colspan"));
				} catch (const std::invalid_argument& ) {
					// is ok, span 1 than
				}
				if (!tables.empty())
					tables.back().start_cell(span);
				curline.append("<b>");
				break;
			}

			case TAG_TD: {
				size_t span = 1;
				try {
					span = utils::to_u(xpp.getAttributeValue("colspan"));
				} catch (const std::invalid_argument& ) {
					// is ok, span 1 than
				}
				if (!tables.empty())
					tables.back().start_cell(span);
				break;
			}
			}
			break;

		case tagsouppullparser::END_TAG:
			tagname = xpp.getText();
			std::transform(tagname.begin(), tagname.end(), tagname.begin(), ::tolower);
			current_tag = tags[tagname];

			switch (current_tag) {
			case TAG_BLOCKQUOTE:
				--indent_level;
				if (indent_level < 0) indent_level = 0;
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_OL:
				ol_types.pop_back();
				ol_counts.pop_back();
			// fall-through
			case TAG_UL:
				if (inside_li) {
					indent_level-=2;
					if (indent_level < 0) indent_level = 0;
					add_nonempty_line(curline, tables, lines);
					prepare_newline(curline,  tables.size() ? 0 : indent_level);
				}
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_DT:
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_DD:
				indent_level-=4;
				if (indent_level < 0) indent_level = 0;
				add_nonempty_line(curline, tables, lines);
				add_line("", tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_DL:
				// ignore tag
				break;

			case TAG_LI:
				indent_level-=2;
				if (indent_level < 0) indent_level = 0;
				inside_li = false;
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_H1:
				if (line_is_nonempty(curline)) {
					add_line(curline, tables, lines);
					size_t llen = utils::strwidth_stfl(curline);
					prepare_newline(curline,  tables.size() ? 0 : indent_level);
					add_line(std::string(llen, '-'), tables, lines);
				}
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_H2:
			case TAG_H3:
			case TAG_H4:
			case TAG_P:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_PRE:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				inside_pre = false;
				break;

			case TAG_SUB:
				curline.append("]");
				break;

			case TAG_SUP:
				// has closing tag, but we render nothing.
				break;

			case TAG_A:
				if (link_num != -1) {
					if (!raw_)
						curline.append("</>");
					curline.append(utils::strprintf("[%d]", link_num));
					link_num = -1;
				}
				break;

			case TAG_UNDERLINE:
				if (!raw_)
					curline.append("</>");
				break;

			case TAG_STRONG:
				if (!raw_)
					curline.append("</>");
				break;

			case TAG_QUOTATION:
				if (!raw_)
					curline.append("\"");
				break;

			case TAG_EMBED:
			case TAG_BR:
			case TAG_ITUNESHACK:
			case TAG_IMG:
			case TAG_HR:
				// ignore closing tags
				break;

			case TAG_SCRIPT:
				// don't render scripts, ignore current line
				if (inside_script)
					inside_script--;
				prepare_newline(curline,  tables.size() ? 0 : indent_level);
				break;

			case TAG_STYLE:
				if (inside_style)
					inside_style--;
				break;

			case TAG_TABLE:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline, 0); // no indent in tables

				if (!tables.empty()) {
					std::vector<std::string> table_text;
					tables.back().complete_cell();
					tables.back().complete_row();
					render_table(tables.back(), table_text);
					tables.pop_back();

					if (!tables.empty()) { // still a table on the outside?
						for(size_t idx=0; idx < table_text.size(); ++idx)
							tables.back().add_text(table_text[idx]); // add rendered table to current cell
					} else {
						for(size_t idx=0; idx < table_text.size(); ++idx) {
							std::string s = table_text[idx];
							while (s.length() > 0 && s[0] == '\n')
								s.erase(0, 1);
							add_line(s, tables, lines);
						}
					}
				}
				prepare_newline(curline, tables.size() ? 0: indent_level);
				break;


			case TAG_TR:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline, 0); // no indent in tables

				if (!tables.empty())
					tables.back().complete_row();
				break;

			case TAG_TH:
				if (!tables.empty()) {
					curline.append("</>");
				}

				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline, 0); // no indent in tables

				if (!tables.empty()) {
					tables.back().complete_cell();
				}
				break;

			case TAG_TD:
				add_nonempty_line(curline, tables, lines);
				prepare_newline(curline, 0); // no indent in tables

				if (!tables.empty())
					tables.back().complete_cell();
				break;
			}
			break;

		case tagsouppullparser::TEXT: {
			if (itunes_hack) {
				std::vector<std::string> words = utils::tokenize_nl(utils::quote_for_stfl(xpp.getText()));
				for (auto word : words) {
					if (word == "\n") {
						add_line(curline, tables, lines);
						prepare_newline(curline,  tables.size() ? 0 : indent_level);
					} else {
						std::vector<std::string> words2 = utils::tokenize_spaced(word);
						unsigned int i=0;
						bool new_line = false;
						for (auto word2 : words2) {
							if ((utils::strwidth_stfl(curline) + utils::strwidth_stfl(word2)) >= w) {
								add_nonempty_line(curline, tables, lines);
								prepare_newline(curline,  tables.size() ? 0 : indent_level);
								new_line = true;
							}
							if (new_line) {
								if (word2 != " ")
									curline.append(word2);
								new_line = false;
							} else {
								curline.append(word2);
							}
							i++;
						}
					}
				}
			} else if (inside_pre) {
				std::vector<std::string> words = utils::tokenize_nl(utils::quote_for_stfl(xpp.getText()));
				for (auto word : words) {
					if (word == "\n") {
						add_line(curline, tables, lines);
						prepare_newline(curline,  tables.size() ? 0 : indent_level);
					} else {
						curline.append(word);
					}
				}
			} else if (inside_script || inside_style) {
				// skip scripts and CSS styles
			} else {
				std::string s = utils::quote_for_stfl(xpp.getText());
				while (s.length() > 0 && s[0] == '\n')
					s.erase(0, 1);
				std::vector<std::string> words = utils::tokenize_spaced(s);

				bool new_line = false;

				if (!line_is_nonempty(curline) && !words.empty() && words[0] == " ") {
					words.erase(words.begin());
				}

				for (auto word : words) {
					if ((utils::strwidth_stfl(curline) + utils::strwidth_stfl(word)) >= w) {
						add_nonempty_line(curline, tables, lines);
						prepare_newline(curline, tables.size() ? 0 : indent_level);
						new_line = true;
					}
					if (new_line) {
						if (word != " ")
							curline.append(word);
						new_line = false;
					} else {
						curline.append(word);
					}
				}
			}
		}
		break;
		default:
			/* do nothing */
			break;
		}
	}

	// and the rest
	add_nonempty_line(curline, tables, lines);

	// force all tables to be closed and rendered
	while (!tables.empty()) {
		std::vector<std::string> table_text;
		render_table(tables.back(), table_text);
		tables.pop_back();
		for(size_t idx=0; idx < table_text.size(); ++idx) {
			std::string s = table_text[idx];
			while (s.length() > 0 && s[0] == '\n')
				s.erase(0, 1);
			add_line(s, tables, lines);
		}
	}

	// add link list
	if (links.size() > 0) {
		lines.push_back("");
		lines.push_back(_("Links: "));
		for (unsigned int i=0; i<links.size(); ++i) {
			lines.push_back(utils::strprintf("[%u]: %s (%s)", i+1, links[i].first.c_str(), type2str(links[i].second).c_str()));
		}
	}
}