Example #1
0
void Page::parseTags()
{
	// Check redirection
	QUrl redir = m_replyTags->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl();
	if (!redir.isEmpty())
	{
		m_urlRegex = m_site->fixUrl(redir.toString(), m_urlRegex);
		loadTags();
		return;
	}

	QString source = QString::fromUtf8(m_replyTags->readAll());
	m_tags.clear();

	if (m_site->contains("Regex/Tags"))
	{
		QRegExp rxtags(m_site->value("Regex/Tags"));
		rxtags.setMinimal(true);
		int p = 0;
		QStringList order = m_site->value("Regex/TagsOrder").split('|', QString::SkipEmptyParts);
		while ((p = rxtags.indexIn(source, p)) != -1)
		{
			p += rxtags.matchedLength();
			QString type = "", tag = "";
			int count = 1;
			if (order.empty())
			{
				switch (rxtags.captureCount())
				{
					case 4:	order << "type" << "" << "count" << "tag";	break;
					case 3:	order << "type" << "tag" << "count";		break;
					case 2:	order << "type" << "tag";					break;
					case 1:	order << "tag";								break;
				}
			}
			for (int o = 0; o < order.size(); o++)
			{
				if (order.at(o) == "tag" && tag.isEmpty())
				{ tag = rxtags.cap(o + 1).replace(" ", "_").replace("&amp;", "&").trimmed(); }
				else if (order.at(o) == "type" && type.isEmpty())
				{
					type = rxtags.cap(o + 1).toLower().trimmed();
					if (type.contains(", "))
					{ type = type.split(", ").at(0).trimmed(); }
					if (type == "series")
					{ type = "copyright"; }
					else if (type == "mangaka")
					{ type = "artist"; }
					else if (type == "game")
					{ type = "copyright"; }
					else if (type == "studio")
					{ type = "circle"; }
					else if (type == "source")
					{ type = "general"; }
					else if (type == "character group")
					{ type = "general"; }
					else if (type.length() == 1)
					{
						int tpe = type.toInt();
						if (tpe >= 0 && tpe <= 4)
						{
							QStringList types = QStringList() << "general" << "artist" << "unknown" << "copyright" << "character";
							type = types[tpe];
						}
					}
				}
				else if (order.at(o) == "count" && count != 0)
				{ count = rxtags.cap(o + 1).toLower().endsWith('k') ? rxtags.cap(3).left(rxtags.cap(3).length() - 1).toInt() * 1000 : rxtags.cap(3).toInt(); }
			}
			if (type.isEmpty())
			{ type = "unknown"; }
			m_tags.append(Tag(tag, type, count));
		}
	}

	// Getting last page
	if (m_site->contains("Regex/Count") && m_imagesCount < 1)
	{
		QRegExp rxlast(m_site->value("Regex/Count"));
		rxlast.indexIn(source, 0);
		m_imagesCount = rxlast.cap(1).remove(",").toInt();
	}
	if (m_imagesCount < 1)
	{
		for (Tag tag : m_tags)
		{
			if (tag.text() == m_search.join(" "))
			{
				m_imagesCount = tag.count();
				if (m_pagesCount < 0)
					m_pagesCount = (int)ceil((m_imagesCount * 1.) / m_imagesPerPage);
			}
		}
	}
	if (m_site->contains("Regex/LastPage") && (m_imagesCount < 1 || m_imagesCount % 1000 == 0))
	{
		QRegExp rxlast(m_site->value("Regex/LastPage"));
		rxlast.indexIn(source, 0);
		m_pagesCount = rxlast.cap(1).remove(",").toInt();
		if (m_pagesCount != 0)
			m_imagesCount = m_pagesCount * m_imagesPerPage;
	}

	// Wiki
	m_wiki.clear();
	if (m_site->contains("Regex/Wiki"))
	{
		QRegExp rxwiki(m_site->value("Regex/Wiki"));
		rxwiki.setMinimal(true);
		if (rxwiki.indexIn(source) != -1)
		{
			m_wiki = rxwiki.cap(1);
			m_wiki.remove("/wiki/show?title=").remove(QRegExp("<p><a href=\"([^\"]+)\">Full entry &raquo;</a></p>")).replace("<h6>", "<span class=\"title\">").replace("</h6>", "</span>");
		}
	}

	m_replyTags->deleteLater();
	m_replyTagsExists = false;

	emit finishedLoadingTags(this);
}
Example #2
0
ParsedPage HtmlApi::parsePage(Page *parentPage, const QString &source, int first, int limit) const
{
	ParsedPage ret;

	// Getting tags
	if (contains("Regex/Tags"))
	{
		QList<Tag> tgs = Tag::FromRegexp(value("Regex/Tags"), source);
		if (!tgs.isEmpty())
		{ ret.tags = tgs; }
	}

	// Getting images
	QRegularExpression rxImages(value("Regex/Image"), QRegularExpression::DotMatchesEverythingOption);
	auto matches = rxImages.globalMatch(source);
	int id = 0;
	while (matches.hasNext())
	{
		auto match = matches.next();
		QMap<QString, QString> d = multiMatchToMap(match, rxImages.namedCaptureGroups());

		// JSON elements
		if (d.contains("json") && !d["json"].isEmpty())
		{
			QVariant src = Json::parse(d["json"]);
			if (!src.isNull())
			{
				QMap<QString, QVariant> map = src.toMap();
				for (auto it = map.begin(); it != map.end(); ++it)
				{ d[it.key()] = it.value().toString(); }
			}
		}

		QSharedPointer<Image> img = parseImage(parentPage, d, id + first);
		if (!img.isNull())
		{ ret.images.append(img); }

		id++;
	}

	// Navigation
	if (contains("Regex/NextPage"))
	{
		QRegularExpression rxNextPage(value("Regex/NextPage"));
		auto match = rxNextPage.match(source);
		if (match.hasMatch())
		{ ret.urlNextPage = QUrl(match.captured(1)); }
	}
	if (contains("Regex/PrevPage"))
	{
		QRegularExpression rxPrevPage(value("Regex/PrevPage"));
		auto match = rxPrevPage.match(source);
		if (match.hasMatch())
		{ ret.urlPrevPage = QUrl(match.captured(1)); }
	}

	// Last page
	if (contains("LastPage"))
	{ ret.pageCount = value("LastPage").toInt(); }
	else if (contains("Regex/LastPage"))
	{
		QRegularExpression rxlast(value("Regex/LastPage"));
		auto match = rxlast.match(source);
		int cnt = match.hasMatch() ? match.captured(1).remove(",").toInt() : 0;
		if (cnt > 0)
		{
			int pagesCount = cnt;
			if (value("Urls/Tags").contains("{pid}") || (contains("Urls/PagePart") && value("Urls/PagePart").contains("{pid}")))
			{
				int forced = forcedLimit();
				int ppid = forced > 0 ? forced : limit;
				pagesCount = qFloor(static_cast<qreal>(pagesCount) / static_cast<qreal>(ppid)) + 1;
			}
			ret.pageCount = pagesCount;
		}
	}

	// Count images
	if (contains("Regex/Count"))
	{
		QRegularExpression rxlast(value("Regex/Count"));
		auto match = rxlast.match(source);
		int cnt = match.hasMatch() ? match.captured(1).remove(",").toInt() : 0;
		if (cnt > 0)
		{ ret.imageCount = cnt; }
	}

	// Wiki
	if (contains("Regex/Wiki"))
	{
		QRegularExpression rxwiki(value("Regex/Wiki"), QRegularExpression::DotMatchesEverythingOption);
		auto match = rxwiki.match(source);
		if (match.hasMatch())
		{
			QString wiki = match.captured(1);
			wiki.remove("/wiki/show?title=");
			wiki.remove(QRegularExpression("<p><a href=\"([^\"]+)\">Full entry &raquo;</a></p>"));
			wiki.replace("<h6>", "<span class=\"title\">").replace("</h6>", "</span>");
			ret.wiki = wiki;
		}
	}

	return ret;
}