Ejemplo n.º 1
0
// Mend and Prettify XHTML
QString CleanSource::MendPrettify(const QString &source, const QString &version)
{
    QString newsource = PreprocessSpecialCases(source);
    GumboInterface gi = GumboInterface(newsource, version);
    newsource = gi.prettyprint();
    newsource = CharToEntity(newsource);
    newsource = PrettifyDOCTYPEHeader(newsource);
    return newsource;
}
Ejemplo n.º 2
0
// Performs general cleaning (and improving)
// of provided book XHTML source code
QString CleanSource::Mend(const QString &source, const QString &version)
{
    SettingsStore settings;
    QString newsource = PreprocessSpecialCases(source);
    GumboInterface gp = GumboInterface(newsource, version);
    newsource = gp.repair();
    newsource = CharToEntity(newsource);
    newsource = PrettifyDOCTYPEHeader(newsource);
    return newsource;
}
Ejemplo n.º 3
0
QString PerformHTMLUpdates::operator()()
{
    QString newsource = m_source;
    GumboInterface gi = GumboInterface(newsource);
    gi.parse();
    newsource = gi.perform_source_updates(m_HTMLUpdates, m_CurrentPath);
    if (!m_CSSUpdates.isEmpty()) {
        newsource = PerformCSSUpdates(newsource, m_CSSUpdates)();
    }
    return CleanSource::CharToEntity(newsource);
}
Ejemplo n.º 4
0
NavProcessor::NavProcessor(HTMLResource * nav_resource)
  : m_NavResource(nav_resource)
{
      bool valid = true;
      {
          QReadLocker locker(&m_NavResource->GetLock());
          QString source = m_NavResource->GetText();
          GumboInterface gi = GumboInterface(source, "3.0");
          gi.parse();
          const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV);
          valid = valid && nav_nodes.length() > 0;
          bool has_toc = false;
          for (int i = 0; i < nav_nodes.length(); ++i) {
              GumboNode* node = nav_nodes.at(i);
              GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type");
              if (attr) {
                  QString etype = QString::fromUtf8(attr->value);
                  if (etype == "toc") has_toc = true;
              }
          }
          valid = valid && has_toc;
      }
      if (!valid) {
          SettingsStore ss;
          QString lang = ss.defaultMetadataLang();
          QString newsource = 
            "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
            "<!DOCTYPE html>\n"
            "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" "
            "lang=\"%1\" xml:lang=\"%2\">\n"
            "<head>\n"
            "  <meta charset=\"utf-8\" />\n"
            "  <style type=\"text/css\">\n"
            "    nav#landmarks, nav#page-list { display:none; }\n"
            "    ol { list-style-type: none; }\n"
            "  </style>\n"
            "</head>\n"
            "<body epub:type=\"frontmatter\">\n"
            "  <nav epub:type=\"toc\" id=\"toc\">\n"
            "  </nav>\n"
            "  <nav epub:type=\"landmarks\" id=\"landmarks\" hidden=\"\">\n"
            "  </nav>\n"
            "</body>\n"
            "</html>";
          newsource = newsource.arg(lang).arg(lang);
          QWriteLocker locker(&m_NavResource->GetLock());
          m_NavResource->SetText(newsource);
    }
}
Ejemplo n.º 5
0
QList<NavTOCEntry> NavProcessor::GetNodeTOC(GumboInterface & gi, const GumboNode * node, int lvl)
{
    if ((node->type != GUMBO_NODE_ELEMENT) || (node->v.element.tag != GUMBO_TAG_OL)) {
        return QList<NavTOCEntry>();
    }
  
    QList<NavTOCEntry> toclist;
    const GumboVector* children = &node->v.element.children;

    for (unsigned int i = 0; i < children->length; ++i) {
        GumboNode * child = static_cast<GumboNode*>(children->data[i]);
        if (child->type == GUMBO_NODE_ELEMENT) {
            if (child->v.element.tag == GUMBO_TAG_LI) {
                const GumboVector* li_children = &child->v.element.children;
                for (unsigned int j = 0; j < li_children->length; ++j) {
                    GumboNode * li_child = static_cast<GumboNode*>(li_children->data[j]);
                    if (li_child->type == GUMBO_NODE_ELEMENT) {
                        if (li_child->v.element.tag == GUMBO_TAG_A) {
                            NavTOCEntry te;
                            te.lvl = lvl;
                            GumboAttribute* hrefattr = gumbo_get_attribute(&li_child->v.element.attributes, "href");
                            if (hrefattr) te.href = Utility::URLDecodePath(QString::fromUtf8(hrefattr->value));
                            te.title = Utility::DecodeXML(gi.get_local_text_of_node(li_child));
                            toclist.append(te);
                        } else if (li_child->v.element.tag == GUMBO_TAG_OL) {
                            toclist.append(GetNodeTOC(gi, li_child, lvl+1));
                        }
                    }
                }
            }  
        }
    } 
    return toclist;             
}
Ejemplo n.º 6
0
void Index::AddIndexIDsOneFile(HTMLResource *html_resource)
{
    QWriteLocker locker(&html_resource->GetLock());
    QString source = html_resource->GetText();
    QString version = html_resource->GetEpubVersion();
    GumboInterface gi = GumboInterface(source, version);
    QList<GumboNode*> nodes = XhtmlDoc::GetIDNodes(gi, gi.get_root_node());
    bool resource_updated = false;
    int index_id_number = 1;
    foreach(GumboNode * node, nodes) {
        QString index_id_value;

        // Get the text of all sub-nodes.
        QString text_node_text = XhtmlDoc::GetIDElementText(gi, node);
        // Convert &nbsp; to space since Index Editor unfortunately does the same.
        text_node_text.replace(QChar(160), " ");

        GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "id");
        if (attr) {
            index_id_value = QString::fromUtf8(attr->value);
            if (index_id_value.startsWith(SIGIL_INDEX_ID_PREFIX)) {
                GumboElement* element = &node->v.element;
                gumbo_element_remove_attribute(element, attr);
                resource_updated = true;
            }
        }

        // If this node is a custom index entry make sure it gets included
        bool is_custom_index_entry = false;
        QString custom_index_value = text_node_text;

        attr = gumbo_get_attribute(&node->v.element.attributes, "class");
        if (attr) {
            QString class_names = QString::fromUtf8(attr->value);

            if (class_names.split(" ").contains(SIGIL_INDEX_CLASS)) {
                is_custom_index_entry = true;
                
                GumboAttribute* titleattr = gumbo_get_attribute(&node->v.element.attributes, "title");
                if (titleattr) {
                    QString title = QString::fromUtf8(titleattr->value);
                    if (!title.isEmpty()) {
                        custom_index_value = title;
                    }
                }
            }

        }

        // Use the existing id if there is one, else add one if node contains index item
        attr = gumbo_get_attribute(&node->v.element.attributes, "id");
        if (attr) {
            CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value);
        } else {
            index_id_value = SIGIL_INDEX_ID_PREFIX + QString::number(index_id_number);

            if (CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value)) {
                GumboElement* element = &node->v.element;
                gumbo_element_set_attribute(element, "id", index_id_value.toUtf8().constData()); 
                resource_updated = true;
                index_id_number++;
            }
        }
    }
Ejemplo n.º 7
0
QList<Headings::Heading> Headings::GetHeadingListForOneFile(HTMLResource *html_resource,
        bool include_unwanted_headings)
{
    Q_ASSERT(html_resource);
    QString source = html_resource->GetText();
    QString version = html_resource->GetEpubVersion();
    GumboInterface gi = GumboInterface(source, version);
    gi.parse();

    // get original source line number of body element
    unsigned int body_line = 0;
    QList<GumboNode*> bodylist = gi.get_all_nodes_with_tag(GUMBO_TAG_BODY);
    if (!bodylist.isEmpty()) {
        GumboNode* body = bodylist.at(0);
        body_line = body->v.element.start_pos.line;
    }

    QList<GumboNode*> heading_nodes = gi.get_all_nodes_with_tags(GHEADING_TAGS);
    int num_heading_nodes = heading_nodes.count();
    QList<Headings::Heading> headings;

    for (int i = 0; i < num_heading_nodes; ++i) {

        GumboNode* node = heading_nodes.at(i);

        Heading heading;

        heading.resource_file  = html_resource;
        heading.path_to_node = gi.get_path_to_node(node);

        heading.title = QString();
        GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes,"title");
        if (attr) {
           heading.title = QString::fromUtf8(attr->value);
        }
        heading.orig_title     = heading.title;
        if (!heading.title.isEmpty()) {
            heading.text = heading.title;
        } else {
            heading.text = gi.get_local_text_of_node(node);
        }
        heading.level = QString( QString::fromStdString(gi.get_tag_name(node)).at(1) ).toInt();
        heading.orig_level     = heading.level;

        QString classes  = QString();
        attr = gumbo_get_attribute(&node->v.element.attributes,"class");
        if (attr) {
            classes = QString::fromUtf8(attr->value);
        }

        heading.include_in_toc = !(classes.contains(SIGIL_NOT_IN_TOC_CLASS) ||
                                   classes.contains(OLD_SIGIL_NOT_IN_TOC_CLASS));

        unsigned int node_line = node->v.element.start_pos.line;

        heading.at_file_start = (i == 0) && ((node_line - body_line) < ALLOWED_HEADING_DISTANCE);
        heading.is_changed     = false;

        if (heading.include_in_toc || include_unwanted_headings) {
            headings.append(heading);
        }
    }

    return headings;
}