QString PerformHTMLUpdates::operator()() { QString newsource = m_source; { GumboInterface gi = GumboInterface(newsource, m_version, m_HTMLUpdates); gi.parse(); newsource = gi.perform_source_updates(m_CurrentPath); } if (!m_CSSUpdates.isEmpty()) { GumboInterface gi = GumboInterface(newsource, m_version, m_CSSUpdates); gi.parse(); newsource = gi.perform_style_updates(m_CurrentPath); } return CleanSource::CharToEntity(newsource); }
BookViewPreview::SearchTools BookViewPreview::GetSearchTools() const { SearchTools search_tools; search_tools.fulltext = ""; QString source = page()->mainFrame()->toHtml(); QString version = "any_version"; GumboInterface gi = GumboInterface(source, version); gi.parse(); // start with body node // Gumbo adds body tag if missing (unless parsing a fragment which we are not doing) GumboNode* node = gi.get_all_nodes_with_tag(GUMBO_TAG_BODY).at(0); QList<GumboNode *> text_nodes = XhtmlDoc::GetVisibleTextNodes(gi, node); GumboNode * current_block_ancestor = NULL; // We concatenate all text nodes that have the same // block level ancestor element. A newline is added // when a new block element starts. // We also record the starting offset of every text node. for (int i = 0; i < text_nodes.count(); ++i) { GumboNode * anode = text_nodes.at(i); GumboNode *new_block_ancestor = XhtmlDoc::GetAncestorBlockElement(gi, anode); QString webpath = gi.get_qwebpath_to_node(anode); if (new_block_ancestor != current_block_ancestor) { current_block_ancestor = new_block_ancestor; search_tools.fulltext.append("\n"); } search_tools.node_offsets[ search_tools.fulltext.length() ] = webpath; search_tools.fulltext.append(QString::fromUtf8(anode->v.text.text)); } search_tools.textlen = search_tools.fulltext.length(); return search_tools; }
QList<NavLandmarkEntry> NavProcessor::GetLandmarks() { QList<NavLandmarkEntry> landlist; if (!m_NavResource) return landlist; QReadLocker locker(&m_NavResource->GetLock()); GumboInterface gi = GumboInterface(m_NavResource->GetText(), "3.0"); gi.parse(); const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV); for (int i = 0; i < nav_nodes.length(); ++i) { GumboNode* node = nav_nodes.at(i); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type"); if (attr && (QString::fromUtf8(attr->value) == "landmarks")) { const QList<GumboTag> tags = QList<GumboTag>() << GUMBO_TAG_A;; const QList<GumboNode*> anchor_nodes = gi.get_nodes_with_tags(node, tags); for (int j = 0; j < anchor_nodes.length(); ++j) { NavLandmarkEntry le; GumboNode* ancnode = anchor_nodes.at(j); GumboAttribute* typeattr = gumbo_get_attribute(&ancnode->v.element.attributes, "epub:type"); GumboAttribute* hrefattr = gumbo_get_attribute(&ancnode->v.element.attributes, "href"); if (typeattr) le.etype = QString::fromUtf8(typeattr->value); if (hrefattr) le.href = Utility::URLDecodePath(QString::fromUtf8(hrefattr->value)); le.title = Utility::DecodeXML(gi.get_local_text_of_node(ancnode)); landlist.append(le); } break; } } return landlist; }
QList<NavTOCEntry> NavProcessor::GetTOC() { QList<NavTOCEntry> toclist; if (!m_NavResource) return toclist; QReadLocker locker(&m_NavResource->GetLock()); GumboInterface gi = GumboInterface(m_NavResource->GetText(), "3.0"); gi.parse(); const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV); for (int i = 0; i < nav_nodes.length(); ++i) { GumboNode* node = nav_nodes.at(i); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type"); if (attr && (QString::fromUtf8(attr->value) == "toc")) { QList<GumboTag> tags = QList<GumboTag>() << GUMBO_TAG_OL; const QList<GumboNode*> ol_nodes = gi.get_nodes_with_tags(node, tags); for (int j = 0; j < ol_nodes.length(); ++j) { GumboNode * olnode = ol_nodes.at(j); toclist.append(GetNodeTOC(gi, olnode, 1)); } break; } } return toclist; }
// Mend and Prettify XHTML QString CleanSource::MendPrettify(const QString &source, const QString &version) { QString newsource = PreprocessSpecialCases(source); GumboInterface gi = GumboInterface(newsource, version); newsource = gi.prettyprint(); newsource = CharToEntity(newsource); newsource = PrettifyDOCTYPEHeader(newsource); return newsource; }
// Performs general cleaning (and improving) // of provided book XHTML source code QString CleanSource::Mend(const QString &source, const QString &version) { SettingsStore settings; QString newsource = PreprocessSpecialCases(source); GumboInterface gp = GumboInterface(newsource, version); newsource = gp.repair(); newsource = CharToEntity(newsource); newsource = PrettifyDOCTYPEHeader(newsource); return newsource; }
NavProcessor::NavProcessor(HTMLResource * nav_resource) : m_NavResource(nav_resource) { bool valid = true; { QReadLocker locker(&m_NavResource->GetLock()); QString source = m_NavResource->GetText(); GumboInterface gi = GumboInterface(source, "3.0"); gi.parse(); const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV); valid = valid && nav_nodes.length() > 0; bool has_toc = false; for (int i = 0; i < nav_nodes.length(); ++i) { GumboNode* node = nav_nodes.at(i); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type"); if (attr) { QString etype = QString::fromUtf8(attr->value); if (etype == "toc") has_toc = true; } } valid = valid && has_toc; } if (!valid) { SettingsStore ss; QString lang = ss.defaultMetadataLang(); QString newsource = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" "<!DOCTYPE html>\n" "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" " "lang=\"%1\" xml:lang=\"%2\">\n" "<head>\n" " <meta charset=\"utf-8\" />\n" " <style type=\"text/css\">\n" " nav#landmarks, nav#page-list { display:none; }\n" " ol { list-style-type: none; }\n" " </style>\n" "</head>\n" "<body epub:type=\"frontmatter\">\n" " <nav epub:type=\"toc\" id=\"toc\">\n" " </nav>\n" " <nav epub:type=\"landmarks\" id=\"landmarks\" hidden=\"\">\n" " </nav>\n" "</body>\n" "</html>"; newsource = newsource.arg(lang).arg(lang); QWriteLocker locker(&m_NavResource->GetLock()); m_NavResource->SetText(newsource); } }
void Index::AddIndexIDsOneFile(HTMLResource *html_resource) { QWriteLocker locker(&html_resource->GetLock()); QString source = html_resource->GetText(); QString version = html_resource->GetEpubVersion(); GumboInterface gi = GumboInterface(source, version); QList<GumboNode*> nodes = XhtmlDoc::GetIDNodes(gi, gi.get_root_node()); bool resource_updated = false; int index_id_number = 1; foreach(GumboNode * node, nodes) { QString index_id_value; // Get the text of all sub-nodes. QString text_node_text = XhtmlDoc::GetIDElementText(gi, node); // Convert to space since Index Editor unfortunately does the same. text_node_text.replace(QChar(160), " "); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "id"); if (attr) { index_id_value = QString::fromUtf8(attr->value); if (index_id_value.startsWith(SIGIL_INDEX_ID_PREFIX)) { GumboElement* element = &node->v.element; gumbo_element_remove_attribute(element, attr); resource_updated = true; } } // If this node is a custom index entry make sure it gets included bool is_custom_index_entry = false; QString custom_index_value = text_node_text; attr = gumbo_get_attribute(&node->v.element.attributes, "class"); if (attr) { QString class_names = QString::fromUtf8(attr->value); if (class_names.split(" ").contains(SIGIL_INDEX_CLASS)) { is_custom_index_entry = true; GumboAttribute* titleattr = gumbo_get_attribute(&node->v.element.attributes, "title"); if (titleattr) { QString title = QString::fromUtf8(titleattr->value); if (!title.isEmpty()) { custom_index_value = title; } } } } // Use the existing id if there is one, else add one if node contains index item attr = gumbo_get_attribute(&node->v.element.attributes, "id"); if (attr) { CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value); } else { index_id_value = SIGIL_INDEX_ID_PREFIX + QString::number(index_id_number); if (CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value)) { GumboElement* element = &node->v.element; gumbo_element_set_attribute(element, "id", index_id_value.toUtf8().constData()); resource_updated = true; index_id_number++; } } }
QList<Headings::Heading> Headings::GetHeadingListForOneFile(HTMLResource *html_resource, bool include_unwanted_headings) { Q_ASSERT(html_resource); QString source = html_resource->GetText(); QString version = html_resource->GetEpubVersion(); GumboInterface gi = GumboInterface(source, version); gi.parse(); // get original source line number of body element unsigned int body_line = 0; QList<GumboNode*> bodylist = gi.get_all_nodes_with_tag(GUMBO_TAG_BODY); if (!bodylist.isEmpty()) { GumboNode* body = bodylist.at(0); body_line = body->v.element.start_pos.line; } QList<GumboNode*> heading_nodes = gi.get_all_nodes_with_tags(GHEADING_TAGS); int num_heading_nodes = heading_nodes.count(); QList<Headings::Heading> headings; for (int i = 0; i < num_heading_nodes; ++i) { GumboNode* node = heading_nodes.at(i); Heading heading; heading.resource_file = html_resource; heading.path_to_node = gi.get_path_to_node(node); heading.title = QString(); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes,"title"); if (attr) { heading.title = QString::fromUtf8(attr->value); } heading.orig_title = heading.title; if (!heading.title.isEmpty()) { heading.text = heading.title; } else { heading.text = gi.get_local_text_of_node(node); } heading.level = QString( QString::fromStdString(gi.get_tag_name(node)).at(1) ).toInt(); heading.orig_level = heading.level; QString classes = QString(); attr = gumbo_get_attribute(&node->v.element.attributes,"class"); if (attr) { classes = QString::fromUtf8(attr->value); } heading.include_in_toc = !(classes.contains(SIGIL_NOT_IN_TOC_CLASS) || classes.contains(OLD_SIGIL_NOT_IN_TOC_CLASS)); unsigned int node_line = node->v.element.start_pos.line; heading.at_file_start = (i == 0) && ((node_line - body_line) < ALLOWED_HEADING_DISTANCE); heading.is_changed = false; if (heading.include_in_toc || include_unwanted_headings) { headings.append(heading); } } return headings; }