예제 #1
0
std::string GumboInterface::prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars) 
{
    std::string contents        = "";
    std::string tagname         = get_tag_name(node);
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    bool keep_whitespace        = in_set(preserve_whitespace, tagname);
    bool is_inline              = in_set(nonbreaking_inline, tagname);
    bool is_structural          = in_set(structural_tags, tagname);
    char c                      = indent_chars.at(0);
    int  n                      = indent_chars.length(); 
    std::string indent_space    = std::string((lvl-1)*n,c);
    char last_char              = 'x';
    bool contains_block_tags    = false;

    GumboVector* children = &node->v.element.children;

    if (is_structural || (tagname == "#document")) last_char = '\n';
    bool in_head_without_title = (tagname == "head");

    for (unsigned int i = 0; i < children->length; ++i) {

        GumboNode* child = static_cast<GumboNode*> (children->data[i]);

        if (child->type == GUMBO_NODE_TEXT) {
            std::string val;

            if (no_entity_substitution) {
                val = std::string(child->v.text.text);
            } else {
                val = substitute_xml_entities_into_text(std::string(child->v.text.text));
            }

            // if child of a structual element is text and follows a newline, indent it properly
            if (is_structural && last_char == '\n') {
                contents.append(indent_space);
                ltrim(val);
            }
            if (!keep_whitespace && !is_structural) {
                // okay to condense whitespace
                condense_whitespace(val);
            }
            contents.append(val);

        } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) {

            std::string val = prettyprint(child, lvl, indent_chars);
            std::string childname = get_tag_name(child);
            if (in_head_without_title && (childname == "title")) in_head_without_title = false;
            if (!in_set(nonbreaking_inline, childname)) {
                contains_block_tags = true;
                if (last_char != '\n') {
                    contents.append("\n");
                    if (tagname != "head" && tagname != "html") contents.append("\n");
                    last_char='\n';
                }
            }
            // if child of a structual element is inline and follows a newline, indent it properly
            if (is_structural && in_set(nonbreaking_inline, childname) && (last_char == '\n')) {
                contents.append(indent_space);
                ltrim(val);
            }    
            contents.append(val);

        } else if (child->type == GUMBO_NODE_WHITESPACE) {

            if (keep_whitespace) {
                std::string wspace = std::string(child->v.text.text);
                contents.append(wspace);
            } else if (is_inline || in_set(other_text_holders, tagname)) {
                if (std::string(" \t\v\f\r\n").find(last_char) == std::string::npos) {
                    contents.append(std::string(" "));
                }
            }

        } else if (child->type == GUMBO_NODE_CDATA) {
            contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>");

        } else if (child->type == GUMBO_NODE_COMMENT) {
            contents.append("<!--" + std::string(child->v.text.text) + "-->");
 
        } else {
            fprintf(stderr, "unknown element of type: %d\n", child->type); 
        }

        // update last character of current contents
        if (!contents.empty()) {
            last_char = contents.at(contents.length()-1);
        }

    }

    // inject epmpty title into head if one is missing
    if (in_head_without_title) {
        if (last_char != '\n') contents.append("\n");
        contents.append(indent_space + "<title></title>\n");
        last_char = '\n';
    }

    // treat inline tags containing block tags like a block tag
    if (is_inline && contains_block_tags) {
      if (last_char != '\n') contents.append("\n\n");
      contents.append(indent_space);
    }

    return contents;
}
예제 #2
0
std::string GumboInterface::prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars) 
{
    std::string contents        = "";
    std::string tagname         = get_tag_name(node);
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    bool keep_whitespace        = in_set(preserve_whitespace, tagname);
    bool is_inline              = in_set(nonbreaking_inline, tagname);
    bool is_structural          = in_set(structural_tags, tagname);
    // bool pp_okay                = !is_inline && !keep_whitespace;
    char c                      = indent_chars.at(0);
    int  n                      = indent_chars.length(); 

    GumboVector* children = &node->v.element.children;

    for (unsigned int i = 0; i < children->length; ++i) {

        GumboNode* child = static_cast<GumboNode*> (children->data[i]);

        if (child->type == GUMBO_NODE_TEXT) {
            std::string val;

            if (no_entity_substitution) {
                val = std::string(child->v.text.text);
            } else {
                val = substitute_xml_entities_into_text(std::string(child->v.text.text));
            }

            // if child of a structual element is text, indent it properly
            if (is_structural) {
              std::string indent_space = std::string((lvl-1)*n,c);
              contents.append(indent_space);
              ltrim(val);
            } else if (!keep_whitespace && !is_structural) {
                // okay to condense whitespace
                condense_whitespace(val);
            }
            contents.append(val);

        } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) {

            std::string val = prettyprint(child, lvl, indent_chars);
            contents.append(val);

        } else if (child->type == GUMBO_NODE_WHITESPACE) {

            if (keep_whitespace) {
                std::string wspace = std::string(child->v.text.text);
                contents.append(wspace);
            } else if (is_inline || in_set(other_text_holders, tagname)) {
                char last_char = 'x';
                if (!contents.empty()) {
                    last_char = contents.at(contents.length()-1);
                }
                if (std::string(" \t\v\f\r\n").find(last_char) == std::string::npos) {
                    contents.append(std::string(" "));
                }
            }

        } else if (child->type == GUMBO_NODE_CDATA) {
            contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>");

        } else if (child->type == GUMBO_NODE_COMMENT) {
            contents.append("<!--" + std::string(child->v.text.text) + "-->");
 
        } else {
            fprintf(stderr, "unknown element of type: %d\n", child->type); 
        }

    }

    return contents;
}