void Page::parseTags() { // Check redirection QUrl redir = m_replyTags->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl(); if (!redir.isEmpty()) { m_urlRegex = m_site->fixUrl(redir.toString(), m_urlRegex); loadTags(); return; } QString source = QString::fromUtf8(m_replyTags->readAll()); m_tags.clear(); if (m_site->contains("Regex/Tags")) { QRegExp rxtags(m_site->value("Regex/Tags")); rxtags.setMinimal(true); int p = 0; QStringList order = m_site->value("Regex/TagsOrder").split('|', QString::SkipEmptyParts); while ((p = rxtags.indexIn(source, p)) != -1) { p += rxtags.matchedLength(); QString type = "", tag = ""; int count = 1; if (order.empty()) { switch (rxtags.captureCount()) { case 4: order << "type" << "" << "count" << "tag"; break; case 3: order << "type" << "tag" << "count"; break; case 2: order << "type" << "tag"; break; case 1: order << "tag"; break; } } for (int o = 0; o < order.size(); o++) { if (order.at(o) == "tag" && tag.isEmpty()) { tag = rxtags.cap(o + 1).replace(" ", "_").replace("&", "&").trimmed(); } else if (order.at(o) == "type" && type.isEmpty()) { type = rxtags.cap(o + 1).toLower().trimmed(); if (type.contains(", ")) { type = type.split(", ").at(0).trimmed(); } if (type == "series") { type = "copyright"; } else if (type == "mangaka") { type = "artist"; } else if (type == "game") { type = "copyright"; } else if (type == "studio") { type = "circle"; } else if (type == "source") { type = "general"; } else if (type == "character group") { type = "general"; } else if (type.length() == 1) { int tpe = type.toInt(); if (tpe >= 0 && tpe <= 4) { QStringList types = QStringList() << "general" << "artist" << "unknown" << "copyright" << "character"; type = types[tpe]; } } } else if (order.at(o) == "count" && count != 0) { count = rxtags.cap(o + 1).toLower().endsWith('k') ? rxtags.cap(3).left(rxtags.cap(3).length() - 1).toInt() * 1000 : rxtags.cap(3).toInt(); } } if (type.isEmpty()) { type = "unknown"; } m_tags.append(Tag(tag, type, count)); } } // Getting last page if (m_site->contains("Regex/Count") && m_imagesCount < 1) { QRegExp rxlast(m_site->value("Regex/Count")); rxlast.indexIn(source, 0); m_imagesCount = rxlast.cap(1).remove(",").toInt(); } if (m_imagesCount < 1) { for (Tag tag : m_tags) { if (tag.text() == m_search.join(" ")) { m_imagesCount = tag.count(); if (m_pagesCount < 0) m_pagesCount = (int)ceil((m_imagesCount * 1.) / m_imagesPerPage); } } } if (m_site->contains("Regex/LastPage") && (m_imagesCount < 1 || m_imagesCount % 1000 == 0)) { QRegExp rxlast(m_site->value("Regex/LastPage")); rxlast.indexIn(source, 0); m_pagesCount = rxlast.cap(1).remove(",").toInt(); if (m_pagesCount != 0) m_imagesCount = m_pagesCount * m_imagesPerPage; } // Wiki m_wiki.clear(); if (m_site->contains("Regex/Wiki")) { QRegExp rxwiki(m_site->value("Regex/Wiki")); rxwiki.setMinimal(true); if (rxwiki.indexIn(source) != -1) { m_wiki = rxwiki.cap(1); m_wiki.remove("/wiki/show?title=").remove(QRegExp("<p><a href=\"([^\"]+)\">Full entry »</a></p>")).replace("<h6>", "<span class=\"title\">").replace("</h6>", "</span>"); } } m_replyTags->deleteLater(); m_replyTagsExists = false; emit finishedLoadingTags(this); }
ParsedPage HtmlApi::parsePage(Page *parentPage, const QString &source, int first, int limit) const { ParsedPage ret; // Getting tags if (contains("Regex/Tags")) { QList<Tag> tgs = Tag::FromRegexp(value("Regex/Tags"), source); if (!tgs.isEmpty()) { ret.tags = tgs; } } // Getting images QRegularExpression rxImages(value("Regex/Image"), QRegularExpression::DotMatchesEverythingOption); auto matches = rxImages.globalMatch(source); int id = 0; while (matches.hasNext()) { auto match = matches.next(); QMap<QString, QString> d = multiMatchToMap(match, rxImages.namedCaptureGroups()); // JSON elements if (d.contains("json") && !d["json"].isEmpty()) { QVariant src = Json::parse(d["json"]); if (!src.isNull()) { QMap<QString, QVariant> map = src.toMap(); for (auto it = map.begin(); it != map.end(); ++it) { d[it.key()] = it.value().toString(); } } } QSharedPointer<Image> img = parseImage(parentPage, d, id + first); if (!img.isNull()) { ret.images.append(img); } id++; } // Navigation if (contains("Regex/NextPage")) { QRegularExpression rxNextPage(value("Regex/NextPage")); auto match = rxNextPage.match(source); if (match.hasMatch()) { ret.urlNextPage = QUrl(match.captured(1)); } } if (contains("Regex/PrevPage")) { QRegularExpression rxPrevPage(value("Regex/PrevPage")); auto match = rxPrevPage.match(source); if (match.hasMatch()) { ret.urlPrevPage = QUrl(match.captured(1)); } } // Last page if (contains("LastPage")) { ret.pageCount = value("LastPage").toInt(); } else if (contains("Regex/LastPage")) { QRegularExpression rxlast(value("Regex/LastPage")); auto match = rxlast.match(source); int cnt = match.hasMatch() ? match.captured(1).remove(",").toInt() : 0; if (cnt > 0) { int pagesCount = cnt; if (value("Urls/Tags").contains("{pid}") || (contains("Urls/PagePart") && value("Urls/PagePart").contains("{pid}"))) { int forced = forcedLimit(); int ppid = forced > 0 ? forced : limit; pagesCount = qFloor(static_cast<qreal>(pagesCount) / static_cast<qreal>(ppid)) + 1; } ret.pageCount = pagesCount; } } // Count images if (contains("Regex/Count")) { QRegularExpression rxlast(value("Regex/Count")); auto match = rxlast.match(source); int cnt = match.hasMatch() ? match.captured(1).remove(",").toInt() : 0; if (cnt > 0) { ret.imageCount = cnt; } } // Wiki if (contains("Regex/Wiki")) { QRegularExpression rxwiki(value("Regex/Wiki"), QRegularExpression::DotMatchesEverythingOption); auto match = rxwiki.match(source); if (match.hasMatch()) { QString wiki = match.captured(1); wiki.remove("/wiki/show?title="); wiki.remove(QRegularExpression("<p><a href=\"([^\"]+)\">Full entry »</a></p>")); wiki.replace("<h6>", "<span class=\"title\">").replace("</h6>", "</span>"); ret.wiki = wiki; } } return ret; }