std::string GumboInterface::prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars) { std::string contents = ""; std::string tagname = get_tag_name(node); bool no_entity_substitution = in_set(no_entity_sub, tagname); bool keep_whitespace = in_set(preserve_whitespace, tagname); bool is_inline = in_set(nonbreaking_inline, tagname); bool is_structural = in_set(structural_tags, tagname); char c = indent_chars.at(0); int n = indent_chars.length(); std::string indent_space = std::string((lvl-1)*n,c); char last_char = 'x'; bool contains_block_tags = false; GumboVector* children = &node->v.element.children; if (is_structural || (tagname == "#document")) last_char = '\n'; bool in_head_without_title = (tagname == "head"); for (unsigned int i = 0; i < children->length; ++i) { GumboNode* child = static_cast<GumboNode*> (children->data[i]); if (child->type == GUMBO_NODE_TEXT) { std::string val; if (no_entity_substitution) { val = std::string(child->v.text.text); } else { val = substitute_xml_entities_into_text(std::string(child->v.text.text)); } // if child of a structual element is text and follows a newline, indent it properly if (is_structural && last_char == '\n') { contents.append(indent_space); ltrim(val); } if (!keep_whitespace && !is_structural) { // okay to condense whitespace condense_whitespace(val); } contents.append(val); } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) { std::string val = prettyprint(child, lvl, indent_chars); std::string childname = get_tag_name(child); if (in_head_without_title && (childname == "title")) in_head_without_title = false; if (!in_set(nonbreaking_inline, childname)) { contains_block_tags = true; if (last_char != '\n') { contents.append("\n"); if (tagname != "head" && tagname != "html") contents.append("\n"); last_char='\n'; } } // if child of a structual element is inline and follows a newline, indent it properly if (is_structural && in_set(nonbreaking_inline, childname) && (last_char == '\n')) { contents.append(indent_space); ltrim(val); } contents.append(val); } else if (child->type == GUMBO_NODE_WHITESPACE) { if (keep_whitespace) { std::string wspace = std::string(child->v.text.text); contents.append(wspace); } else if (is_inline || in_set(other_text_holders, tagname)) { if (std::string(" \t\v\f\r\n").find(last_char) == std::string::npos) { contents.append(std::string(" ")); } } } else if (child->type == GUMBO_NODE_CDATA) { contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>"); } else if (child->type == GUMBO_NODE_COMMENT) { contents.append("<!--" + std::string(child->v.text.text) + "-->"); } else { fprintf(stderr, "unknown element of type: %d\n", child->type); } // update last character of current contents if (!contents.empty()) { last_char = contents.at(contents.length()-1); } } // inject epmpty title into head if one is missing if (in_head_without_title) { if (last_char != '\n') contents.append("\n"); contents.append(indent_space + "<title></title>\n"); last_char = '\n'; } // treat inline tags containing block tags like a block tag if (is_inline && contains_block_tags) { if (last_char != '\n') contents.append("\n\n"); contents.append(indent_space); } return contents; }
std::string GumboInterface::prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars) { std::string contents = ""; std::string tagname = get_tag_name(node); bool no_entity_substitution = in_set(no_entity_sub, tagname); bool keep_whitespace = in_set(preserve_whitespace, tagname); bool is_inline = in_set(nonbreaking_inline, tagname); bool is_structural = in_set(structural_tags, tagname); // bool pp_okay = !is_inline && !keep_whitespace; char c = indent_chars.at(0); int n = indent_chars.length(); GumboVector* children = &node->v.element.children; for (unsigned int i = 0; i < children->length; ++i) { GumboNode* child = static_cast<GumboNode*> (children->data[i]); if (child->type == GUMBO_NODE_TEXT) { std::string val; if (no_entity_substitution) { val = std::string(child->v.text.text); } else { val = substitute_xml_entities_into_text(std::string(child->v.text.text)); } // if child of a structual element is text, indent it properly if (is_structural) { std::string indent_space = std::string((lvl-1)*n,c); contents.append(indent_space); ltrim(val); } else if (!keep_whitespace && !is_structural) { // okay to condense whitespace condense_whitespace(val); } contents.append(val); } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) { std::string val = prettyprint(child, lvl, indent_chars); contents.append(val); } else if (child->type == GUMBO_NODE_WHITESPACE) { if (keep_whitespace) { std::string wspace = std::string(child->v.text.text); contents.append(wspace); } else if (is_inline || in_set(other_text_holders, tagname)) { char last_char = 'x'; if (!contents.empty()) { last_char = contents.at(contents.length()-1); } if (std::string(" \t\v\f\r\n").find(last_char) == std::string::npos) { contents.append(std::string(" ")); } } } else if (child->type == GUMBO_NODE_CDATA) { contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>"); } else if (child->type == GUMBO_NODE_COMMENT) { contents.append("<!--" + std::string(child->v.text.text) + "-->"); } else { fprintf(stderr, "unknown element of type: %d\n", child->type); } } return contents; }