Пример #1
0
QString PerformHTMLUpdates::operator()()
{
    QString newsource = m_source;
    { 
        GumboInterface gi = GumboInterface(newsource, m_version, m_HTMLUpdates);
        gi.parse();
        newsource = gi.perform_source_updates(m_CurrentPath);
    }
    if (!m_CSSUpdates.isEmpty()) {
        GumboInterface gi = GumboInterface(newsource, m_version, m_CSSUpdates);
        gi.parse();
        newsource = gi.perform_style_updates(m_CurrentPath);
    }
    return CleanSource::CharToEntity(newsource);
}
Пример #2
0
BookViewPreview::SearchTools BookViewPreview::GetSearchTools() const
{
    SearchTools search_tools;
    search_tools.fulltext = "";
    QString source = page()->mainFrame()->toHtml();
    QString version = "any_version";
    GumboInterface gi = GumboInterface(source, version);
    gi.parse();

    // start with body node
    // Gumbo adds body tag if missing (unless parsing a fragment which we are not doing)
    GumboNode* node = gi.get_all_nodes_with_tag(GUMBO_TAG_BODY).at(0);
    QList<GumboNode *> text_nodes = XhtmlDoc::GetVisibleTextNodes(gi, node);
    GumboNode *  current_block_ancestor = NULL;

    // We concatenate all text nodes that have the same
    // block level ancestor element. A newline is added
    // when a new block element starts.
    // We also record the starting offset of every text node.
    for (int i = 0; i < text_nodes.count(); ++i) {
        GumboNode * anode = text_nodes.at(i);
        GumboNode *new_block_ancestor = XhtmlDoc::GetAncestorBlockElement(gi, anode);
        QString webpath = gi.get_qwebpath_to_node(anode); 
        if (new_block_ancestor != current_block_ancestor) {
            current_block_ancestor = new_block_ancestor;
            search_tools.fulltext.append("\n");
        }
        search_tools.node_offsets[ search_tools.fulltext.length() ] = webpath;
        search_tools.fulltext.append(QString::fromUtf8(anode->v.text.text));
    }
    search_tools.textlen = search_tools.fulltext.length();
    return search_tools;
}
Пример #3
0
QList<NavLandmarkEntry> NavProcessor::GetLandmarks()
{
    QList<NavLandmarkEntry> landlist;
    if (!m_NavResource) return landlist; 

    QReadLocker locker(&m_NavResource->GetLock());

    GumboInterface gi = GumboInterface(m_NavResource->GetText(), "3.0");
    gi.parse();
    const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV);
    for (int i = 0; i < nav_nodes.length(); ++i) {
        GumboNode* node = nav_nodes.at(i);
        GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type");
        if (attr && (QString::fromUtf8(attr->value) == "landmarks")) {
            const QList<GumboTag> tags = QList<GumboTag>() << GUMBO_TAG_A;;
            const QList<GumboNode*> anchor_nodes = gi.get_nodes_with_tags(node, tags);
            for (int j = 0; j < anchor_nodes.length(); ++j) {
                NavLandmarkEntry le;
                GumboNode* ancnode = anchor_nodes.at(j);
                GumboAttribute* typeattr = gumbo_get_attribute(&ancnode->v.element.attributes, "epub:type");
                GumboAttribute* hrefattr = gumbo_get_attribute(&ancnode->v.element.attributes, "href");
                if (typeattr) le.etype = QString::fromUtf8(typeattr->value);
                if (hrefattr) le.href = Utility::URLDecodePath(QString::fromUtf8(hrefattr->value));
                le.title = Utility::DecodeXML(gi.get_local_text_of_node(ancnode));
                landlist.append(le);
            }
            break;
        }
    }
    return landlist;
}
Пример #4
0
QList<NavTOCEntry> NavProcessor::GetTOC()
{
    QList<NavTOCEntry> toclist;
    if (!m_NavResource) return toclist; 
        
    QReadLocker locker(&m_NavResource->GetLock());

    GumboInterface gi = GumboInterface(m_NavResource->GetText(), "3.0");
    gi.parse();
    const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV);
    for (int i = 0; i < nav_nodes.length(); ++i) {
        GumboNode* node = nav_nodes.at(i);
        GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type");
        if (attr && (QString::fromUtf8(attr->value) == "toc")) {
            QList<GumboTag> tags = QList<GumboTag>() << GUMBO_TAG_OL;
            const QList<GumboNode*> ol_nodes = gi.get_nodes_with_tags(node, tags);
            for (int j = 0; j < ol_nodes.length(); ++j) {
                GumboNode * olnode = ol_nodes.at(j);
                toclist.append(GetNodeTOC(gi, olnode, 1));
            }
            break;              
        }
    }
    return toclist;
}
Пример #5
0
// Mend and Prettify XHTML
QString CleanSource::MendPrettify(const QString &source, const QString &version)
{
    QString newsource = PreprocessSpecialCases(source);
    GumboInterface gi = GumboInterface(newsource, version);
    newsource = gi.prettyprint();
    newsource = CharToEntity(newsource);
    newsource = PrettifyDOCTYPEHeader(newsource);
    return newsource;
}
Пример #6
0
// Performs general cleaning (and improving)
// of provided book XHTML source code
QString CleanSource::Mend(const QString &source, const QString &version)
{
    SettingsStore settings;
    QString newsource = PreprocessSpecialCases(source);
    GumboInterface gp = GumboInterface(newsource, version);
    newsource = gp.repair();
    newsource = CharToEntity(newsource);
    newsource = PrettifyDOCTYPEHeader(newsource);
    return newsource;
}
Пример #7
0
NavProcessor::NavProcessor(HTMLResource * nav_resource)
  : m_NavResource(nav_resource)
{
      bool valid = true;
      {
          QReadLocker locker(&m_NavResource->GetLock());
          QString source = m_NavResource->GetText();
          GumboInterface gi = GumboInterface(source, "3.0");
          gi.parse();
          const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV);
          valid = valid && nav_nodes.length() > 0;
          bool has_toc = false;
          for (int i = 0; i < nav_nodes.length(); ++i) {
              GumboNode* node = nav_nodes.at(i);
              GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type");
              if (attr) {
                  QString etype = QString::fromUtf8(attr->value);
                  if (etype == "toc") has_toc = true;
              }
          }
          valid = valid && has_toc;
      }
      if (!valid) {
          SettingsStore ss;
          QString lang = ss.defaultMetadataLang();
          QString newsource = 
            "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
            "<!DOCTYPE html>\n"
            "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" "
            "lang=\"%1\" xml:lang=\"%2\">\n"
            "<head>\n"
            "  <meta charset=\"utf-8\" />\n"
            "  <style type=\"text/css\">\n"
            "    nav#landmarks, nav#page-list { display:none; }\n"
            "    ol { list-style-type: none; }\n"
            "  </style>\n"
            "</head>\n"
            "<body epub:type=\"frontmatter\">\n"
            "  <nav epub:type=\"toc\" id=\"toc\">\n"
            "  </nav>\n"
            "  <nav epub:type=\"landmarks\" id=\"landmarks\" hidden=\"\">\n"
            "  </nav>\n"
            "</body>\n"
            "</html>";
          newsource = newsource.arg(lang).arg(lang);
          QWriteLocker locker(&m_NavResource->GetLock());
          m_NavResource->SetText(newsource);
    }
}
Пример #8
0
void Index::AddIndexIDsOneFile(HTMLResource *html_resource)
{
    QWriteLocker locker(&html_resource->GetLock());
    QString source = html_resource->GetText();
    QString version = html_resource->GetEpubVersion();
    GumboInterface gi = GumboInterface(source, version);
    QList<GumboNode*> nodes = XhtmlDoc::GetIDNodes(gi, gi.get_root_node());
    bool resource_updated = false;
    int index_id_number = 1;
    foreach(GumboNode * node, nodes) {
        QString index_id_value;

        // Get the text of all sub-nodes.
        QString text_node_text = XhtmlDoc::GetIDElementText(gi, node);
        // Convert &nbsp; to space since Index Editor unfortunately does the same.
        text_node_text.replace(QChar(160), " ");

        GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "id");
        if (attr) {
            index_id_value = QString::fromUtf8(attr->value);
            if (index_id_value.startsWith(SIGIL_INDEX_ID_PREFIX)) {
                GumboElement* element = &node->v.element;
                gumbo_element_remove_attribute(element, attr);
                resource_updated = true;
            }
        }

        // If this node is a custom index entry make sure it gets included
        bool is_custom_index_entry = false;
        QString custom_index_value = text_node_text;

        attr = gumbo_get_attribute(&node->v.element.attributes, "class");
        if (attr) {
            QString class_names = QString::fromUtf8(attr->value);

            if (class_names.split(" ").contains(SIGIL_INDEX_CLASS)) {
                is_custom_index_entry = true;
                
                GumboAttribute* titleattr = gumbo_get_attribute(&node->v.element.attributes, "title");
                if (titleattr) {
                    QString title = QString::fromUtf8(titleattr->value);
                    if (!title.isEmpty()) {
                        custom_index_value = title;
                    }
                }
            }

        }

        // Use the existing id if there is one, else add one if node contains index item
        attr = gumbo_get_attribute(&node->v.element.attributes, "id");
        if (attr) {
            CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value);
        } else {
            index_id_value = SIGIL_INDEX_ID_PREFIX + QString::number(index_id_number);

            if (CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value)) {
                GumboElement* element = &node->v.element;
                gumbo_element_set_attribute(element, "id", index_id_value.toUtf8().constData()); 
                resource_updated = true;
                index_id_number++;
            }
        }
    }
Пример #9
0
QList<Headings::Heading> Headings::GetHeadingListForOneFile(HTMLResource *html_resource,
        bool include_unwanted_headings)
{
    Q_ASSERT(html_resource);
    QString source = html_resource->GetText();
    QString version = html_resource->GetEpubVersion();
    GumboInterface gi = GumboInterface(source, version);
    gi.parse();

    // get original source line number of body element
    unsigned int body_line = 0;
    QList<GumboNode*> bodylist = gi.get_all_nodes_with_tag(GUMBO_TAG_BODY);
    if (!bodylist.isEmpty()) {
        GumboNode* body = bodylist.at(0);
        body_line = body->v.element.start_pos.line;
    }

    QList<GumboNode*> heading_nodes = gi.get_all_nodes_with_tags(GHEADING_TAGS);
    int num_heading_nodes = heading_nodes.count();
    QList<Headings::Heading> headings;

    for (int i = 0; i < num_heading_nodes; ++i) {

        GumboNode* node = heading_nodes.at(i);

        Heading heading;

        heading.resource_file  = html_resource;
        heading.path_to_node = gi.get_path_to_node(node);

        heading.title = QString();
        GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes,"title");
        if (attr) {
           heading.title = QString::fromUtf8(attr->value);
        }
        heading.orig_title     = heading.title;
        if (!heading.title.isEmpty()) {
            heading.text = heading.title;
        } else {
            heading.text = gi.get_local_text_of_node(node);
        }
        heading.level = QString( QString::fromStdString(gi.get_tag_name(node)).at(1) ).toInt();
        heading.orig_level     = heading.level;

        QString classes  = QString();
        attr = gumbo_get_attribute(&node->v.element.attributes,"class");
        if (attr) {
            classes = QString::fromUtf8(attr->value);
        }

        heading.include_in_toc = !(classes.contains(SIGIL_NOT_IN_TOC_CLASS) ||
                                   classes.contains(OLD_SIGIL_NOT_IN_TOC_CLASS));

        unsigned int node_line = node->v.element.start_pos.line;

        heading.at_file_start = (i == 0) && ((node_line - body_line) < ALLOWED_HEADING_DISTANCE);
        heading.is_changed     = false;

        if (heading.include_in_toc || include_unwanted_headings) {
            headings.append(heading);
        }
    }

    return headings;
}