示例#1
0
std::string GumboInterface::prettyprint(GumboNode* node, int lvl, const std::string indent_chars)
{

    // special case the document node
    if (node->type == GUMBO_NODE_DOCUMENT) {
      std::string results = build_doctype(node);
      results.append(prettyprint_contents(node,lvl+1,indent_chars));
      return results;
    }

    std::string tagname = get_tag_name(node);
    std::string parentname = get_tag_name(node->parent);
    bool in_head = (parentname == "head");

    bool is_structural = in_set(structural_tags, tagname);
    bool is_inline = in_set(nonbreaking_inline, tagname);

    // build attr string
    std::string atts = "";
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    const GumboVector * attribs = &node->v.element.attributes;
    for (unsigned int i=0; i< attribs->length; ++i) {
        GumboAttribute* at = static_cast<GumboAttribute*>(attribs->data[i]);
        atts.append(build_attributes(at, no_entity_substitution));
    }

    bool is_void_tag = in_set(void_tags, tagname);

    // get tag contents
    std::string contents = "";
    if (!is_void_tag) {
        if (is_structural && tagname != "html") {
            contents = prettyprint_contents(node, lvl+1, indent_chars);
        } else {
            contents = prettyprint_contents(node, lvl, indent_chars);
        }
    }

    bool keep_whitespace = in_set(preserve_whitespace, tagname);
    if (!keep_whitespace && !is_inline) {
        rtrim(contents);
    }

    bool single = is_void_tag;
    // for xhtml serialization that allows non-void tags to be self-closing
    // uncomment the following line
    // single = single || contents.empty();

    char c = indent_chars.at(0);
    int  n = indent_chars.length(); 
    std::string indent_space = std::string((lvl-1)*n,c);

    // handle self-closed tags with no contents first
    if (single) {
        std::string selfclosetag = "<" + tagname + atts + "/>";
        if (is_inline) {
            // always add newline after br tags when they are children of structural tags
            if ((tagname == "br") && in_set(structural_tags, parentname)) {
              selfclosetag.append("\n");
              if (!in_head && (tagname != "html")) selfclosetag.append("\n");
            }
            return selfclosetag;
        }
        if (!in_head && (tagname != "html")) selfclosetag.append("\n");
        return indent_space + selfclosetag + "\n";
    } 

    // Handle the general case
    std::string results;
    std::string starttag = "<" + tagname +  atts + ">";
    std::string closetag = "</" + tagname + ">";

    if (is_structural) {
        results = indent_space + starttag;
        if (!contents.empty()) {
            results.append("\n" + contents + "\n" + indent_space);
        }  
        results.append(closetag + "\n");
        if (!in_head && (tagname != "html")) results.append("\n");
    } else if (is_inline) {
        results = starttag;
        results.append(contents);
        results.append(closetag);
    } else /** all others */ {
        results = indent_space + starttag;
        if (!keep_whitespace) {
            ltrim(contents);
        }
        results.append(contents);
        results.append(closetag + "\n");
        if (!in_head && (tagname != "html")) results.append("\n");
    }
    return results;
}
示例#2
0
std::string GumboInterface::prettyprint(GumboNode* node, int lvl, const std::string indent_chars)
{

    // special case the document node
    if (node->type == GUMBO_NODE_DOCUMENT) {
      std::string results = build_doctype(node);
      results.append(prettyprint_contents(node,lvl+1,indent_chars));
      return results;
    }

    std::string close              = "";
    std::string closeTag           = "";
    std::string atts               = "";
    std::string tagname            = get_tag_name(node);
    std::string parentname         = get_tag_name(node->parent);
    bool in_head                   = (parentname == "head");
    // bool need_special_handling     = in_set(special_handling, tagname);
    bool is_empty_tag              = in_set(empty_tags, tagname);
    bool no_entity_substitution    = in_set(no_entity_sub, tagname);
    bool keep_whitespace           = in_set(preserve_whitespace, tagname);
    bool is_inline                 = in_set(nonbreaking_inline, tagname) && (parentname != "body");
    bool is_structural             = in_set(structural_tags, tagname);
    bool pp_okay                   = !is_inline && !keep_whitespace;
    char c                         = indent_chars.at(0);
    int  n                         = indent_chars.length(); 

    // build attr string
    const GumboVector * attribs = &node->v.element.attributes;
    for (unsigned int i=0; i< attribs->length; ++i) {
        GumboAttribute* at = static_cast<GumboAttribute*>(attribs->data[i]);
        atts.append(build_attributes(at, no_entity_substitution));
    }

    // determine closing tag type
    if (is_empty_tag) {
        close = "/";
    } else {
        closeTag = "</" + tagname + ">";
    }

    std::string indent_space = std::string((lvl-1)*n,c);
    std::string contents;

    // prettyprint your contents
    if (is_structural && tagname != "html") {
        contents = prettyprint_contents(node, lvl+1, indent_chars);
    } else {
        contents = prettyprint_contents(node, lvl, indent_chars);
    }

    if (is_structural) {
        rtrim(contents);
        if (!contents.empty()) contents.append("\n");
    }

    // remove any leading or trailing whitespace form within paragraphs
    if (tagname == "p") {
        ltrim(contents);
        rtrim(contents);
    }

    char last_char = ' ';
    if (!contents.empty()) {
        last_char = contents.at(contents.length()-1);
    } 

    // build results
    std::string results;

    if (!is_inline && !in_set(nonbreaking_inline, parentname)) {
      results.append(indent_space);
    }

    results.append("<"+tagname+atts+close+">");

    if (pp_okay && is_structural && !contents.empty()) {
        results.append("\n");
    }

    results.append(contents);

    if (pp_okay && (last_char != '\n') && !contents.empty() && is_structural) {
        results.append("\n");
    }

    // handle any indent before structural close tags
    if (!is_inline && is_structural && !closeTag.empty() && !contents.empty()) {
        results.append(indent_space);
    }

    results.append(closeTag);

    if ((pp_okay || tagname =="br") && !in_set(nonbreaking_inline, parentname)) {
        if (!in_head  && tagname != "html") {
            results.append("\n\n");
        } else {
            results.append("\n");
        }
    }

    return results;
}