// Mend and Prettify XHTML QString CleanSource::MendPrettify(const QString &source, const QString &version) { QString newsource = PreprocessSpecialCases(source); GumboInterface gi = GumboInterface(newsource, version); newsource = gi.prettyprint(); newsource = CharToEntity(newsource); newsource = PrettifyDOCTYPEHeader(newsource); return newsource; }
// Performs general cleaning (and improving) // of provided book XHTML source code QString CleanSource::Mend(const QString &source, const QString &version) { SettingsStore settings; QString newsource = PreprocessSpecialCases(source); GumboInterface gp = GumboInterface(newsource, version); newsource = gp.repair(); newsource = CharToEntity(newsource); newsource = PrettifyDOCTYPEHeader(newsource); return newsource; }
QString PerformHTMLUpdates::operator()() { QString newsource = m_source; GumboInterface gi = GumboInterface(newsource); gi.parse(); newsource = gi.perform_source_updates(m_HTMLUpdates, m_CurrentPath); if (!m_CSSUpdates.isEmpty()) { newsource = PerformCSSUpdates(newsource, m_CSSUpdates)(); } return CleanSource::CharToEntity(newsource); }
NavProcessor::NavProcessor(HTMLResource * nav_resource) : m_NavResource(nav_resource) { bool valid = true; { QReadLocker locker(&m_NavResource->GetLock()); QString source = m_NavResource->GetText(); GumboInterface gi = GumboInterface(source, "3.0"); gi.parse(); const QList<GumboNode*> nav_nodes = gi.get_all_nodes_with_tag(GUMBO_TAG_NAV); valid = valid && nav_nodes.length() > 0; bool has_toc = false; for (int i = 0; i < nav_nodes.length(); ++i) { GumboNode* node = nav_nodes.at(i); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "epub:type"); if (attr) { QString etype = QString::fromUtf8(attr->value); if (etype == "toc") has_toc = true; } } valid = valid && has_toc; } if (!valid) { SettingsStore ss; QString lang = ss.defaultMetadataLang(); QString newsource = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" "<!DOCTYPE html>\n" "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" " "lang=\"%1\" xml:lang=\"%2\">\n" "<head>\n" " <meta charset=\"utf-8\" />\n" " <style type=\"text/css\">\n" " nav#landmarks, nav#page-list { display:none; }\n" " ol { list-style-type: none; }\n" " </style>\n" "</head>\n" "<body epub:type=\"frontmatter\">\n" " <nav epub:type=\"toc\" id=\"toc\">\n" " </nav>\n" " <nav epub:type=\"landmarks\" id=\"landmarks\" hidden=\"\">\n" " </nav>\n" "</body>\n" "</html>"; newsource = newsource.arg(lang).arg(lang); QWriteLocker locker(&m_NavResource->GetLock()); m_NavResource->SetText(newsource); } }
QList<NavTOCEntry> NavProcessor::GetNodeTOC(GumboInterface & gi, const GumboNode * node, int lvl) { if ((node->type != GUMBO_NODE_ELEMENT) || (node->v.element.tag != GUMBO_TAG_OL)) { return QList<NavTOCEntry>(); } QList<NavTOCEntry> toclist; const GumboVector* children = &node->v.element.children; for (unsigned int i = 0; i < children->length; ++i) { GumboNode * child = static_cast<GumboNode*>(children->data[i]); if (child->type == GUMBO_NODE_ELEMENT) { if (child->v.element.tag == GUMBO_TAG_LI) { const GumboVector* li_children = &child->v.element.children; for (unsigned int j = 0; j < li_children->length; ++j) { GumboNode * li_child = static_cast<GumboNode*>(li_children->data[j]); if (li_child->type == GUMBO_NODE_ELEMENT) { if (li_child->v.element.tag == GUMBO_TAG_A) { NavTOCEntry te; te.lvl = lvl; GumboAttribute* hrefattr = gumbo_get_attribute(&li_child->v.element.attributes, "href"); if (hrefattr) te.href = Utility::URLDecodePath(QString::fromUtf8(hrefattr->value)); te.title = Utility::DecodeXML(gi.get_local_text_of_node(li_child)); toclist.append(te); } else if (li_child->v.element.tag == GUMBO_TAG_OL) { toclist.append(GetNodeTOC(gi, li_child, lvl+1)); } } } } } } return toclist; }
void Index::AddIndexIDsOneFile(HTMLResource *html_resource) { QWriteLocker locker(&html_resource->GetLock()); QString source = html_resource->GetText(); QString version = html_resource->GetEpubVersion(); GumboInterface gi = GumboInterface(source, version); QList<GumboNode*> nodes = XhtmlDoc::GetIDNodes(gi, gi.get_root_node()); bool resource_updated = false; int index_id_number = 1; foreach(GumboNode * node, nodes) { QString index_id_value; // Get the text of all sub-nodes. QString text_node_text = XhtmlDoc::GetIDElementText(gi, node); // Convert to space since Index Editor unfortunately does the same. text_node_text.replace(QChar(160), " "); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes, "id"); if (attr) { index_id_value = QString::fromUtf8(attr->value); if (index_id_value.startsWith(SIGIL_INDEX_ID_PREFIX)) { GumboElement* element = &node->v.element; gumbo_element_remove_attribute(element, attr); resource_updated = true; } } // If this node is a custom index entry make sure it gets included bool is_custom_index_entry = false; QString custom_index_value = text_node_text; attr = gumbo_get_attribute(&node->v.element.attributes, "class"); if (attr) { QString class_names = QString::fromUtf8(attr->value); if (class_names.split(" ").contains(SIGIL_INDEX_CLASS)) { is_custom_index_entry = true; GumboAttribute* titleattr = gumbo_get_attribute(&node->v.element.attributes, "title"); if (titleattr) { QString title = QString::fromUtf8(titleattr->value); if (!title.isEmpty()) { custom_index_value = title; } } } } // Use the existing id if there is one, else add one if node contains index item attr = gumbo_get_attribute(&node->v.element.attributes, "id"); if (attr) { CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value); } else { index_id_value = SIGIL_INDEX_ID_PREFIX + QString::number(index_id_number); if (CreateIndexEntry(text_node_text, html_resource, index_id_value, is_custom_index_entry, custom_index_value)) { GumboElement* element = &node->v.element; gumbo_element_set_attribute(element, "id", index_id_value.toUtf8().constData()); resource_updated = true; index_id_number++; } } }
QList<Headings::Heading> Headings::GetHeadingListForOneFile(HTMLResource *html_resource, bool include_unwanted_headings) { Q_ASSERT(html_resource); QString source = html_resource->GetText(); QString version = html_resource->GetEpubVersion(); GumboInterface gi = GumboInterface(source, version); gi.parse(); // get original source line number of body element unsigned int body_line = 0; QList<GumboNode*> bodylist = gi.get_all_nodes_with_tag(GUMBO_TAG_BODY); if (!bodylist.isEmpty()) { GumboNode* body = bodylist.at(0); body_line = body->v.element.start_pos.line; } QList<GumboNode*> heading_nodes = gi.get_all_nodes_with_tags(GHEADING_TAGS); int num_heading_nodes = heading_nodes.count(); QList<Headings::Heading> headings; for (int i = 0; i < num_heading_nodes; ++i) { GumboNode* node = heading_nodes.at(i); Heading heading; heading.resource_file = html_resource; heading.path_to_node = gi.get_path_to_node(node); heading.title = QString(); GumboAttribute* attr = gumbo_get_attribute(&node->v.element.attributes,"title"); if (attr) { heading.title = QString::fromUtf8(attr->value); } heading.orig_title = heading.title; if (!heading.title.isEmpty()) { heading.text = heading.title; } else { heading.text = gi.get_local_text_of_node(node); } heading.level = QString( QString::fromStdString(gi.get_tag_name(node)).at(1) ).toInt(); heading.orig_level = heading.level; QString classes = QString(); attr = gumbo_get_attribute(&node->v.element.attributes,"class"); if (attr) { classes = QString::fromUtf8(attr->value); } heading.include_in_toc = !(classes.contains(SIGIL_NOT_IN_TOC_CLASS) || classes.contains(OLD_SIGIL_NOT_IN_TOC_CLASS)); unsigned int node_line = node->v.element.start_pos.line; heading.at_file_start = (i == 0) && ((node_line - body_line) < ALLOWED_HEADING_DISTANCE); heading.is_changed = false; if (heading.include_in_toc || include_unwanted_headings) { headings.append(heading); } } return headings; }