Exemplo n.º 1
0
std::string GumboInterface::prettyprint(GumboNode* node, int lvl, const std::string indent_chars)
{

    // special case the document node
    if (node->type == GUMBO_NODE_DOCUMENT) {
      std::string results = build_doctype(node);
      results.append(prettyprint_contents(node,lvl+1,indent_chars));
      return results;
    }

    std::string tagname = get_tag_name(node);
    std::string parentname = get_tag_name(node->parent);
    bool in_head = (parentname == "head");

    bool is_structural = in_set(structural_tags, tagname);
    bool is_inline = in_set(nonbreaking_inline, tagname);

    // build attr string
    std::string atts = "";
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    const GumboVector * attribs = &node->v.element.attributes;
    for (unsigned int i=0; i< attribs->length; ++i) {
        GumboAttribute* at = static_cast<GumboAttribute*>(attribs->data[i]);
        atts.append(build_attributes(at, no_entity_substitution));
    }

    bool is_void_tag = in_set(void_tags, tagname);

    // get tag contents
    std::string contents = "";
    if (!is_void_tag) {
        if (is_structural && tagname != "html") {
            contents = prettyprint_contents(node, lvl+1, indent_chars);
        } else {
            contents = prettyprint_contents(node, lvl, indent_chars);
        }
    }

    bool keep_whitespace = in_set(preserve_whitespace, tagname);
    if (!keep_whitespace && !is_inline) {
        rtrim(contents);
    }

    bool single = is_void_tag;
    // for xhtml serialization that allows non-void tags to be self-closing
    // uncomment the following line
    // single = single || contents.empty();

    char c = indent_chars.at(0);
    int  n = indent_chars.length(); 
    std::string indent_space = std::string((lvl-1)*n,c);

    // handle self-closed tags with no contents first
    if (single) {
        std::string selfclosetag = "<" + tagname + atts + "/>";
        if (is_inline) {
            // always add newline after br tags when they are children of structural tags
            if ((tagname == "br") && in_set(structural_tags, parentname)) {
              selfclosetag.append("\n");
              if (!in_head && (tagname != "html")) selfclosetag.append("\n");
            }
            return selfclosetag;
        }
        if (!in_head && (tagname != "html")) selfclosetag.append("\n");
        return indent_space + selfclosetag + "\n";
    } 

    // Handle the general case
    std::string results;
    std::string starttag = "<" + tagname +  atts + ">";
    std::string closetag = "</" + tagname + ">";

    if (is_structural) {
        results = indent_space + starttag;
        if (!contents.empty()) {
            results.append("\n" + contents + "\n" + indent_space);
        }  
        results.append(closetag + "\n");
        if (!in_head && (tagname != "html")) results.append("\n");
    } else if (is_inline) {
        results = starttag;
        results.append(contents);
        results.append(closetag);
    } else /** all others */ {
        results = indent_space + starttag;
        if (!keep_whitespace) {
            ltrim(contents);
        }
        results.append(contents);
        results.append(closetag + "\n");
        if (!in_head && (tagname != "html")) results.append("\n");
    }
    return results;
}
Exemplo n.º 2
0
std::string GumboInterface::prettyprint(GumboNode* node, int lvl, const std::string indent_chars)
{

    // special case the document node
    if (node->type == GUMBO_NODE_DOCUMENT) {
      std::string results = build_doctype(node);
      results.append(prettyprint_contents(node,lvl+1,indent_chars));
      return results;
    }

    std::string close              = "";
    std::string closeTag           = "";
    std::string atts               = "";
    std::string tagname            = get_tag_name(node);
    std::string parentname         = get_tag_name(node->parent);
    bool in_head                   = (parentname == "head");
    // bool need_special_handling     = in_set(special_handling, tagname);
    bool is_empty_tag              = in_set(empty_tags, tagname);
    bool no_entity_substitution    = in_set(no_entity_sub, tagname);
    bool keep_whitespace           = in_set(preserve_whitespace, tagname);
    bool is_inline                 = in_set(nonbreaking_inline, tagname) && (parentname != "body");
    bool is_structural             = in_set(structural_tags, tagname);
    bool pp_okay                   = !is_inline && !keep_whitespace;
    char c                         = indent_chars.at(0);
    int  n                         = indent_chars.length(); 

    // build attr string
    const GumboVector * attribs = &node->v.element.attributes;
    for (unsigned int i=0; i< attribs->length; ++i) {
        GumboAttribute* at = static_cast<GumboAttribute*>(attribs->data[i]);
        atts.append(build_attributes(at, no_entity_substitution));
    }

    // determine closing tag type
    if (is_empty_tag) {
        close = "/";
    } else {
        closeTag = "</" + tagname + ">";
    }

    std::string indent_space = std::string((lvl-1)*n,c);
    std::string contents;

    // prettyprint your contents
    if (is_structural && tagname != "html") {
        contents = prettyprint_contents(node, lvl+1, indent_chars);
    } else {
        contents = prettyprint_contents(node, lvl, indent_chars);
    }

    if (is_structural) {
        rtrim(contents);
        if (!contents.empty()) contents.append("\n");
    }

    // remove any leading or trailing whitespace form within paragraphs
    if (tagname == "p") {
        ltrim(contents);
        rtrim(contents);
    }

    char last_char = ' ';
    if (!contents.empty()) {
        last_char = contents.at(contents.length()-1);
    } 

    // build results
    std::string results;

    if (!is_inline && !in_set(nonbreaking_inline, parentname)) {
      results.append(indent_space);
    }

    results.append("<"+tagname+atts+close+">");

    if (pp_okay && is_structural && !contents.empty()) {
        results.append("\n");
    }

    results.append(contents);

    if (pp_okay && (last_char != '\n') && !contents.empty() && is_structural) {
        results.append("\n");
    }

    // handle any indent before structural close tags
    if (!is_inline && is_structural && !closeTag.empty() && !contents.empty()) {
        results.append(indent_space);
    }

    results.append(closeTag);

    if ((pp_okay || tagname =="br") && !in_set(nonbreaking_inline, parentname)) {
        if (!in_head  && tagname != "html") {
            results.append("\n\n");
        } else {
            results.append("\n");
        }
    }

    return results;
}
Exemplo n.º 3
0
std::string GumboInterface::serialize(GumboNode* node, enum UpdateTypes doupdates) {
    // special case the document node
    if (node->type == GUMBO_NODE_DOCUMENT) {
        std::string results = build_doctype(node);
        results.append(serialize_contents(node, doupdates));
        return results;
    }

    std::string close = "";
    std::string closeTag = "";
    std::string atts = "";
    std::string tagname            = get_tag_name(node);
    bool need_special_handling     = in_set(special_handling, tagname);
    bool is_void_tag              = in_set(void_tags, tagname);
    bool no_entity_substitution    = in_set(no_entity_sub, tagname);
    // bool is_inline                 = in_set(nonbreaking_inline, tagname);
    bool is_href_src_tag           = in_set(href_src_tags, tagname);

    // build attr string  
    const GumboVector * attribs = &node->v.element.attributes;
    for (unsigned int i=0; i< attribs->length; ++i) {
        GumboAttribute* at = static_cast<GumboAttribute*>(attribs->data[i]);
        atts.append(build_attributes(at, no_entity_substitution, ((doupdates & SourceUpdates) && is_href_src_tag), (doupdates & StyleUpdates)));
    }

    // Make sure that the xmlns attribute exists as an html tag attribute
    if (tagname == "html") {
      if (atts.find("xmlns=") == std::string::npos) {
        atts.append(" xmlns=\"http://www.w3.org/1999/xhtml\"");
      }
    }

    // determine closing tag type
    if (is_void_tag) {
        close = "/";
    } else {
        closeTag = "</" + tagname + ">";
    }

    std::string contents;

    if ((tagname == "body") && (doupdates & BodyUpdates)) {
        contents = m_newbody;
    } else {
        // serialize your contents
        contents = serialize_contents(node, doupdates);
    }

    if ((doupdates & StyleUpdates) && (tagname == "style") && 
        (node->parent->type == GUMBO_NODE_ELEMENT) && 
        (node->parent->v.element.tag == GUMBO_TAG_HEAD)) {
        contents = update_style_urls(contents);
    }

    if (need_special_handling) {
        ltrimnewlines(contents);
        rtrim(contents);
        contents.append("\n");
    }

    // build results
    std::string results;


    if ((doupdates & LinkUpdates) && (tagname == "link") && 
        (node->parent->type == GUMBO_NODE_ELEMENT) && 
        (node->parent->v.element.tag == GUMBO_TAG_HEAD)) {
      return "";
    }

    results.append("<"+tagname+atts+close+">");
    if (need_special_handling) results.append("\n");
    results.append(contents);

    if ((doupdates & LinkUpdates) && (tagname == "head")) {
        results.append(m_newcsslinks);
    }

    results.append(closeTag);
    if (need_special_handling) results.append("\n");
    return results;
}
Exemplo n.º 4
0
std::string GumboInterface::serialize(GumboNode* node, enum UpdateTypes doupdates) {
    // special case the document node
    if (node->type == GUMBO_NODE_DOCUMENT) {
        std::string results = build_doctype(node);
        results.append(serialize_contents(node, doupdates));
        return results;
    }

    std::string close = "";
    std::string closeTag = "";
    std::string atts = "";
    std::string tagname            = get_tag_name(node);
    std::string key                = "|" + tagname + "|";
    bool need_special_handling     = special_handling.find(key) != std::string::npos;
    bool is_empty_tag              = empty_tags.find(key) != std::string::npos;
    bool no_entity_substitution    = no_entity_sub.find(key) != std::string::npos;
    bool is_inline                 = nonbreaking_inline.find(key) != std::string::npos;
    bool is_href_src_tag           = href_src_tags.find(key) != std::string::npos;

    // build attr string  
    const GumboVector * attribs = &node->v.element.attributes;
    for (int i=0; i< attribs->length; ++i) {
        GumboAttribute* at = static_cast<GumboAttribute*>(attribs->data[i]);
        atts.append(build_attributes(at, no_entity_substitution, ((doupdates & SourceUpdates) && is_href_src_tag) ));
    }

    // determine closing tag type
    if (is_empty_tag) {
        close = "/";
    } else {
        closeTag = "</" + tagname + ">";
    }

    std::string contents;

    if ((tagname == "body") && (doupdates & BodyUpdates)) {
        contents = m_newbody;
    } else {
        // serialize your contents
        contents = serialize_contents(node, doupdates);
    }

    if (need_special_handling) {
        ltrimnewlines(contents);
        rtrim(contents);
        contents.append("\n");
    }

    // build results
    std::string results;

    if ((doupdates & LinkUpdates) && (tagname == "link") && 
        (node->parent->type == GUMBO_NODE_ELEMENT) && 
        (node->parent->v.element.tag == GUMBO_TAG_HEAD)) {
      return "";
    }

    results.append("<"+tagname+atts+close+">");
    if (need_special_handling) results.append("\n");
    results.append(contents);

    if ((doupdates & LinkUpdates) && (tagname == "head")) {
        results.append(m_newcsslinks);
    }

    results.append(closeTag);
    if (need_special_handling) results.append("\n");
    return results;
}