Esempio n. 1
0
std::string GumboInterface::serialize_contents(GumboNode* node, enum UpdateTypes doupdates) {
    std::string contents        = "";
    std::string tagname         = get_tag_name(node);
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    bool keep_whitespace        = in_set(preserve_whitespace, tagname);
    bool is_inline              = in_set(nonbreaking_inline, tagname);

    // build up result for each child, recursively if need be
    GumboVector* children = &node->v.element.children;

    bool inject_newline = false;

    for (unsigned int i = 0; i < children->length; ++i) {
        GumboNode* child = static_cast<GumboNode*> (children->data[i]);

        if (child->type == GUMBO_NODE_TEXT) {
            inject_newline = false;
            if (no_entity_substitution) {
                contents.append(std::string(child->v.text.text));
            } else {
                contents.append(substitute_xml_entities_into_text(std::string(child->v.text.text)));
            }

        } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) {
            contents.append(serialize(child, doupdates));
            inject_newline = false;
            std::string childname = get_tag_name(child);
            if (!is_inline && !keep_whitespace && !in_set(nonbreaking_inline,childname)) {
                contents.append("\n");
                inject_newline = true;
            }

        } else if (child->type == GUMBO_NODE_WHITESPACE) {
            // try to keep all whitespace to keep as close to original as possible
            std::string wspace = std::string(child->v.text.text);
            if (inject_newline) {
                newlinetrim(wspace);
                inject_newline = false;
            }
            contents.append(wspace);
            inject_newline = false;

        } else if (child->type == GUMBO_NODE_CDATA) {
            contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>");
            inject_newline = false;

        } else if (child->type == GUMBO_NODE_COMMENT) {
            contents.append("<!--" + std::string(child->v.text.text) + "-->");
 
        } else {
            fprintf(stderr, "unknown element of type: %d\n", child->type); 
            inject_newline = false;
        }

    }

    return contents;
}
Esempio n. 2
0
std::string GumboInterface::substitute_xml_entities_into_attributes(char quote, const std::string &text)
{
    std::string result = substitute_xml_entities_into_text(text);
    if (quote == '"') {
        replace_all(result,"\"","&quot;");
    } else if (quote == '\'') {
        replace_all(result,"'","&apos;");
    }
    return result;
}
Esempio n. 3
0
std::string GumboInterface::serialize_contents(GumboNode* node, enum UpdateTypes doupdates) {
    std::string contents        = "";
    std::string tagname         = get_tag_name(node);
    std::string key             = "|" + tagname + "|";
    bool no_entity_substitution = no_entity_sub.find(key) != std::string::npos;
    bool keep_whitespace        = preserve_whitespace.find(key) != std::string::npos;
    bool is_inline              = nonbreaking_inline.find(key) != std::string::npos;

    // build up result for each child, recursively if need be
    GumboVector* children = &node->v.element.children;

    for (unsigned int i = 0; i < children->length; ++i) {
        GumboNode* child = static_cast<GumboNode*> (children->data[i]);

        if (child->type == GUMBO_NODE_TEXT) {
            if (no_entity_substitution) {
                contents.append(std::string(child->v.text.text));
            } else {
                contents.append(substitute_xml_entities_into_text(std::string(child->v.text.text)));
            }

        } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) {
          contents.append(serialize(child, doupdates));

        } else if (child->type == GUMBO_NODE_WHITESPACE) {
          // keep all whitespace to keep as close to original as possible
          contents.append(std::string(child->v.text.text));

        } else if (child->type == GUMBO_NODE_CDATA) {
          contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>");

        } else if (child->type == GUMBO_NODE_COMMENT) {
          contents.append("<!--" + std::string(child->v.text.text) + "-->");
 
        } else {
          fprintf(stderr, "unknown element of type: %d\n", child->type); 
        }

    }

    return contents;
}
Esempio n. 4
0
std::string GumboInterface::prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars) 
{
    std::string contents        = "";
    std::string tagname         = get_tag_name(node);
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    bool keep_whitespace        = in_set(preserve_whitespace, tagname);
    bool is_inline              = in_set(nonbreaking_inline, tagname);
    bool is_structural          = in_set(structural_tags, tagname);
    char c                      = indent_chars.at(0);
    int  n                      = indent_chars.length(); 
    std::string indent_space    = std::string((lvl-1)*n,c);
    char last_char              = 'x';
    bool contains_block_tags    = false;

    GumboVector* children = &node->v.element.children;

    if (is_structural || (tagname == "#document")) last_char = '\n';
    bool in_head_without_title = (tagname == "head");

    for (unsigned int i = 0; i < children->length; ++i) {

        GumboNode* child = static_cast<GumboNode*> (children->data[i]);

        if (child->type == GUMBO_NODE_TEXT) {
            std::string val;

            if (no_entity_substitution) {
                val = std::string(child->v.text.text);
            } else {
                val = substitute_xml_entities_into_text(std::string(child->v.text.text));
            }

            // if child of a structual element is text and follows a newline, indent it properly
            if (is_structural && last_char == '\n') {
                contents.append(indent_space);
                ltrim(val);
            }
            if (!keep_whitespace && !is_structural) {
                // okay to condense whitespace
                condense_whitespace(val);
            }
            contents.append(val);

        } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) {

            std::string val = prettyprint(child, lvl, indent_chars);
            std::string childname = get_tag_name(child);
            if (in_head_without_title && (childname == "title")) in_head_without_title = false;
            if (!in_set(nonbreaking_inline, childname)) {
                contains_block_tags = true;
                if (last_char != '\n') {
                    contents.append("\n");
                    if (tagname != "head" && tagname != "html") contents.append("\n");
                    last_char='\n';
                }
            }
            // if child of a structual element is inline and follows a newline, indent it properly
            if (is_structural && in_set(nonbreaking_inline, childname) && (last_char == '\n')) {
                contents.append(indent_space);
                ltrim(val);
            }    
            contents.append(val);

        } else if (child->type == GUMBO_NODE_WHITESPACE) {

            if (keep_whitespace) {
                std::string wspace = std::string(child->v.text.text);
                contents.append(wspace);
            } else if (is_inline || in_set(other_text_holders, tagname)) {
                if (std::string(" \t\v\f\r\n").find(last_char) == std::string::npos) {
                    contents.append(std::string(" "));
                }
            }

        } else if (child->type == GUMBO_NODE_CDATA) {
            contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>");

        } else if (child->type == GUMBO_NODE_COMMENT) {
            contents.append("<!--" + std::string(child->v.text.text) + "-->");
 
        } else {
            fprintf(stderr, "unknown element of type: %d\n", child->type); 
        }

        // update last character of current contents
        if (!contents.empty()) {
            last_char = contents.at(contents.length()-1);
        }

    }

    // inject epmpty title into head if one is missing
    if (in_head_without_title) {
        if (last_char != '\n') contents.append("\n");
        contents.append(indent_space + "<title></title>\n");
        last_char = '\n';
    }

    // treat inline tags containing block tags like a block tag
    if (is_inline && contains_block_tags) {
      if (last_char != '\n') contents.append("\n\n");
      contents.append(indent_space);
    }

    return contents;
}
Esempio n. 5
0
std::string GumboInterface::prettyprint_contents(GumboNode* node, int lvl, const std::string indent_chars) 
{
    std::string contents        = "";
    std::string tagname         = get_tag_name(node);
    bool no_entity_substitution = in_set(no_entity_sub, tagname);
    bool keep_whitespace        = in_set(preserve_whitespace, tagname);
    bool is_inline              = in_set(nonbreaking_inline, tagname);
    bool is_structural          = in_set(structural_tags, tagname);
    // bool pp_okay                = !is_inline && !keep_whitespace;
    char c                      = indent_chars.at(0);
    int  n                      = indent_chars.length(); 

    GumboVector* children = &node->v.element.children;

    for (unsigned int i = 0; i < children->length; ++i) {

        GumboNode* child = static_cast<GumboNode*> (children->data[i]);

        if (child->type == GUMBO_NODE_TEXT) {
            std::string val;

            if (no_entity_substitution) {
                val = std::string(child->v.text.text);
            } else {
                val = substitute_xml_entities_into_text(std::string(child->v.text.text));
            }

            // if child of a structual element is text, indent it properly
            if (is_structural) {
              std::string indent_space = std::string((lvl-1)*n,c);
              contents.append(indent_space);
              ltrim(val);
            } else if (!keep_whitespace && !is_structural) {
                // okay to condense whitespace
                condense_whitespace(val);
            }
            contents.append(val);

        } else if (child->type == GUMBO_NODE_ELEMENT || child->type == GUMBO_NODE_TEMPLATE) {

            std::string val = prettyprint(child, lvl, indent_chars);
            contents.append(val);

        } else if (child->type == GUMBO_NODE_WHITESPACE) {

            if (keep_whitespace) {
                std::string wspace = std::string(child->v.text.text);
                contents.append(wspace);
            } else if (is_inline || in_set(other_text_holders, tagname)) {
                char last_char = 'x';
                if (!contents.empty()) {
                    last_char = contents.at(contents.length()-1);
                }
                if (std::string(" \t\v\f\r\n").find(last_char) == std::string::npos) {
                    contents.append(std::string(" "));
                }
            }

        } else if (child->type == GUMBO_NODE_CDATA) {
            contents.append("<![CDATA[" + std::string(child->v.text.text) + "]]>");

        } else if (child->type == GUMBO_NODE_COMMENT) {
            contents.append("<!--" + std::string(child->v.text.text) + "-->");
 
        } else {
            fprintf(stderr, "unknown element of type: %d\n", child->type); 
        }

    }

    return contents;
}