WallpaperResult WallpapersWideProvider::GetRandomWallpaper(WallpaperParameters parameters, const QString &page)
{
    WallpaperResult result;

    QRegExp rxBeginImage("<div class=\"thumb\">");

    // find number of images
    int numberOfImages = 0;
    int pos = 0;
    while ((pos = rxBeginImage.indexIn(page, pos)) != -1)
    {
        numberOfImages++;
        pos += rxBeginImage.matchedLength();
    }

    if (numberOfImages == 0)
    {
        return result;
    }

    // rand image number
    int randomImageNumber = (_randomGenerator.get() % numberOfImages) + 1;

    // get pos of random image
    pos = 0;
    int n = 0;
    while ((pos = rxBeginImage.indexIn(page, pos)) != -1)
    {
        n++;
        if (n == randomImageNumber) break;
        pos += rxBeginImage.matchedLength();
    }

    // read image data
    QRegExp rxImageData("<a href=\"/([^\\.]*)\\.html\" title=\"([^\"]*)\"");
    if (rxImageData.indexIn(page, pos) != -1)
    {
        QString href = rxImageData.cap(1);
        QString name = rxImageData.cap(2);

        // better name
        QRegExp rxName("<h1>([^<]*)<");
        if (rxName.indexIn(page, pos) != -1)
        {
            name = rxName.cap(1);
        }

        if (href.endsWith("wallpapers"))
        {
            QString bestResolutionUrl = GetBestImageUrl(parameters, href);
            if (bestResolutionUrl == 0)
            {
                return result;
            }

            result.name = Utils::UnescapeHtml(name);
            result.url = QString("http://wallpaperswide.com/") +
                    href + QString(".html");
            result.image = Utils::GetDataFromUrl(bestResolutionUrl, "wallpaperswide.com", "http://wallpaperswide.com");
        }
    }

    return result;
}
Пример #2
0
void CDownload::retrieveCaptchaUrl()
{
	if(m_reply->attribute(QNetworkRequest::RedirectionTargetAttribute).isValid())
	{
		m_reply->deleteLater();

		QNetworkRequest request;
		request.setUrl(m_reply->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl());
		request.setRawHeader("User-Agent", m_data.userAgent.toAscii());

		m_reply = m_manager->get(request);
		connect(m_reply, SIGNAL(finished()), this, SLOT(retrieveCaptchaUrl()));
		connect(m_reply, SIGNAL(uploadProgress(qint64, qint64)), &m_timeoutTimer, SLOT(start()));
		connect(m_reply, SIGNAL(finished()), &m_timeoutTimer, SLOT(stop()));
	}
	else if(m_reply->error() == QNetworkReply::NoError)
	{
		QByteArray data = m_reply->readAll();
		QRegExp rxSubmitUrl("form name=\"dwn\" action=\"([^\"]+)\"");
		QRegExp rxCaptchaId("name=\"captcha_nb\" value=\"([0-9]+)\"");

		if(m_data.name.isEmpty()) // parse file name from html
		{
			QRegExp rxName("<h2[^>]*>[^<]*<a href=[^>]*>([^<]+)</a>[^<]*</h2>");
			if(rxName.indexIn(data) != -1)
				m_data.name = rxName.cap(1);
		}

		m_reply->deleteLater();
		m_reply = 0;

		if(rxSubmitUrl.indexIn(data, 0) == -1)
		{
			handleError(ERROR_PARSE_SUBMIT_URL);
		}
		else if(rxCaptchaId.indexIn(data, 0) == -1)
		{
			handleError(ERROR_PARSE_CAPTCHA_ID);
		}
		else
		{
			bool ok;
			m_captchaId = rxCaptchaId.cap(1).toInt(&ok);
			if(!ok)
			{
				handleError(ERROR_PARSE_CAPTCHA_ID);
				return;
			}

			m_submitUrl = rxSubmitUrl.cap(1);

			QNetworkRequest request;
			request.setUrl(QUrl(QString("http://img.uloz.to/captcha/sound/%1.mp3").arg(m_captchaId)));
			request.setRawHeader("User-Agent", m_data.userAgent.toAscii());

			m_reply = m_manager->get(request);

			connect(m_reply, SIGNAL(finished()), this, SLOT(downloadAndSolveCaptcha()));
			connect(m_reply, SIGNAL(uploadProgress(qint64, qint64)), &m_timeoutTimer, SLOT(start()));
			connect(m_reply, SIGNAL(finished()), &m_timeoutTimer, SLOT(stop()));

			m_data.state = STATE_CAPTCHA;
			m_data.update = true;
		}
	}
	else
	{
		handleNetError();
	}
}
Пример #3
0
void IMDB::parseAndAssignInfos(QString html, Movie *movie, QList<int> infos)
{
    QRegExp rx;
    rx.setMinimal(true);

    rx.setPattern("<h1 class=\"header\">[^<]*<span class=\"itemprop\" itemprop=\"name\">([^<]*)</span>");
    if (infos.contains(MovieScraperInfos::Title) && rx.indexIn(html) != -1)
        movie->setName(rx.cap(1));

    rx.setPattern("<h1 itemprop=\"name\" class=\"\">(.*)&nbsp;<span id=\"titleYear\">");
    if (infos.contains(MovieScraperInfos::Title) && rx.indexIn(html) != -1)
        movie->setName(rx.cap(1));

    if (infos.contains(MovieScraperInfos::Director)) {
        rx.setPattern("<div class=\"txt-block\" itemprop=\"director\" itemscope itemtype=\"http://schema.org/Person\">(.*)</div>");
        QString directorsBlock;
        if (rx.indexIn(html) != -1) {
            directorsBlock = rx.cap(1);
        } else {
            rx.setPattern("<div class=\"credit_summary_item\">[^<]*<h4 class=\"inline\">Director:</h4>(.*)</div>");
            if (rx.indexIn(html) != -1)
                directorsBlock = rx.cap(1);
        }

        if (!directorsBlock.isEmpty()) {
            QStringList directors;
            rx.setPattern("<a href=\"[^\"]*\"(.*)itemprop='url'><span class=\"itemprop\" itemprop=\"name\">([^<]*)</span></a>");
            int pos = 0;
            while ((pos = rx.indexIn(directorsBlock, pos)) != -1) {
                directors << rx.cap(2);
                pos += rx.matchedLength();
            }
            movie->setDirector(directors.join(", "));
        }
    }

    if (infos.contains(MovieScraperInfos::Writer)) {
        rx.setPattern("<div class=\"txt-block\" itemprop=\"creator\" itemscope itemtype=\"http://schema.org/Person\">(.*)</div>");
        QString writersBlock;
        if (rx.indexIn(html) != -1) {
            writersBlock = rx.cap(1);
        } else {
            rx.setPattern("<div class=\"credit_summary_item\">[^<]*<h4 class=\"inline\">Writers:</h4>(.*)</div>");
            if (rx.indexIn(html) != -1)
                writersBlock = rx.cap(1);
        }

        if (!writersBlock.isEmpty()) {
            QStringList writers;
            rx.setPattern("<a href=\"[^\"]*\"(.*)itemprop='url'><span class=\"itemprop\" itemprop=\"name\">([^<]*)</span></a>");
            int pos = 0;
            while ((pos = rx.indexIn(writersBlock, pos)) != -1) {
                writers << rx.cap(2);
                pos += rx.matchedLength();
            }
            movie->setWriter(writers.join(", "));
        }
    }

    rx.setPattern("<div class=\"see-more inline canwrap\" itemprop=\"genre\">[^<]*<h4 class=\"inline\">Genres:</h4>(.*)</div>");
    if (infos.contains(MovieScraperInfos::Genres) && rx.indexIn(html) != -1) {
        QString genres = rx.cap(1);
        rx.setPattern("<a href=\"[^\"]*\"[^>]*>([^<]*)</a>");
        int pos = 0;
        while ((pos = rx.indexIn(genres, pos)) != -1) {
            movie->addGenre(Helper::instance()->mapGenre(rx.cap(1).trimmed()));
            pos += rx.matchedLength();
        }
    }

    rx.setPattern("<div class=\"txt-block\">.*<h4 class=\"inline\">Taglines:</h4>(.*)</div>");
    if (infos.contains(MovieScraperInfos::Tagline) && rx.indexIn(html) != -1) {
        QString tagline = rx.cap(1);
        QRegExp rxMore("<span class=\"see-more inline\">.*</span>");
        rxMore.setMinimal(true);
        tagline.remove(rxMore);
        movie->setTagline(tagline.trimmed());
    }

    rx.setPattern("<div class=\"see-more inline canwrap\" itemprop=\"keywords\">(.*)</div>");
    if (infos.contains(MovieScraperInfos::Tags) && rx.indexIn(html) != -1) {
        QString keywords = rx.cap(1);
        rx.setPattern("<span class=\"itemprop\" itemprop=\"keywords\">([^<]*)</span>");
        int pos = 0;
        while ((pos = rx.indexIn(keywords, pos)) != -1) {
            movie->addTag(rx.cap(1).trimmed());
            pos += rx.matchedLength();
        }
    }

    if (infos.contains(MovieScraperInfos::Released)) {
        rx.setPattern("<a href=\"[^\"]*\"(.*)title=\"See all release dates\" >[^<]*<meta itemprop=\"datePublished\" content=\"([^\"]*)\" />");
        if (rx.indexIn(html) != -1) {
            movie->setReleased(QDate::fromString(rx.cap(2), "yyyy-MM-dd"));
        } else {
            rx.setPattern("<h4 class=\"inline\">Release Date:</h4> ([0-9]+) ([A-z]*) ([0-9]{4})");
            if (rx.indexIn(html) != -1) {
                int day = rx.cap(1).trimmed().toInt();
                int month = -1;
                QString monthName = rx.cap(2).trimmed();
                int year = rx.cap(3).trimmed().toInt();
                if (monthName.contains("January", Qt::CaseInsensitive))
                    month = 1;
                else if (monthName.contains("February", Qt::CaseInsensitive))
                    month = 2;
                else if (monthName.contains("March", Qt::CaseInsensitive))
                    month = 3;
                else if (monthName.contains("April", Qt::CaseInsensitive))
                    month = 4;
                else if (monthName.contains("May", Qt::CaseInsensitive))
                    month = 5;
                else if (monthName.contains("June", Qt::CaseInsensitive))
                    month = 6;
                else if (monthName.contains("July", Qt::CaseInsensitive))
                    month = 7;
                else if (monthName.contains("August", Qt::CaseInsensitive))
                    month = 8;
                else if (monthName.contains("September", Qt::CaseInsensitive))
                    month = 9;
                else if (monthName.contains("October", Qt::CaseInsensitive))
                    month = 10;
                else if (monthName.contains("November", Qt::CaseInsensitive))
                    month = 11;
                else if (monthName.contains("December", Qt::CaseInsensitive))
                    month = 12;

                if (day != 0 && month != -1 && year != 0)
                    movie->setReleased(QDate(year, month, day));
            }
        }
    }


    rx.setPattern("<meta itemprop=\"contentRating\" content=\"([^\"]*)\">");
    if (infos.contains(MovieScraperInfos::Certification) && rx.indexIn(html) != -1)
        movie->setCertification(Helper::instance()->mapCertification(rx.cap(1)));

    rx.setPattern("<time itemprop=\"duration\" datetime=\"PT([0-9]+)M\" >");
    if (infos.contains(MovieScraperInfos::Runtime) && rx.indexIn(html) != -1)
        movie->setRuntime(rx.cap(1).toInt());

    rx.setPattern("<h4 class=\"inline\">Runtime:</h4>[^<]*<time itemprop=\"duration\" datetime=\"PT([0-9]+)M\">");
    if (infos.contains(MovieScraperInfos::Runtime) && rx.indexIn(html) != -1)
        movie->setRuntime(rx.cap(1).toInt());

    rx.setPattern("<p itemprop=\"description\">(.*)</p>");
    if (infos.contains(MovieScraperInfos::Overview) && rx.indexIn(html) != -1) {
        QString outline = rx.cap(1).remove(QRegExp("<[^>]*>"));
        outline = outline.remove("See full summary&nbsp;&raquo;").trimmed();
        movie->setOutline(outline);
    }

    rx.setPattern("<div class=\"summary_text\" itemprop=\"description\">(.*)</div>");
    if (infos.contains(MovieScraperInfos::Overview) && rx.indexIn(html) != -1) {
        QString outline = rx.cap(1).remove(QRegExp("<[^>]*>")).trimmed();
        movie->setOutline(outline);
    }

    rx.setPattern("<div class=\"inline canwrap\" itemprop=\"description\">(.*)</div>");
    if (infos.contains(MovieScraperInfos::Overview) && rx.indexIn(html) != -1) {
        QString overview = rx.cap(1).trimmed();
        QRegExp rxWrittenBy("<em class=\"nobr\">.*</em>");
        rxWrittenBy.setMinimal(true);
        overview.remove(rxWrittenBy).remove(QRegExp("<[^>]*>"));
        movie->setOverview(overview.trimmed());
    }

    if (infos.contains(MovieScraperInfos::Rating)) {
        rx.setPattern("<div class=\"star-box-details\" itemtype=\"http://schema.org/AggregateRating\" itemscope itemprop=\"aggregateRating\">(.*)</div>");
        if (rx.indexIn(html) != -1) {
            QString content = rx.cap(1);
            rx.setPattern("<span itemprop=\"ratingValue\">(.*)</span>");
            if (rx.indexIn(content) != -1)
                movie->setRating(rx.cap(1).trimmed().replace(",", ".").toFloat());

            rx.setPattern("<span itemprop=\"ratingCount\">(.*)</span>");
            if (rx.indexIn(content) != -1)
                movie->setVotes(rx.cap(1).replace(",", "").replace(".", "").toInt());
        } else {
            rx.setPattern("<div class=\"imdbRating\" itemtype=\"http://schema.org/AggregateRating\" itemscope=\"\" itemprop=\"aggregateRating\">(.*)</div>");
            if (rx.indexIn(html) != -1) {
                QString content = rx.cap(1);
                rx.setPattern("([0-9]\\.[0-9]) based on ([0-9\\,]*) ");
                if (rx.indexIn(content) != -1) {
                    movie->setRating(rx.cap(1).trimmed().replace(",", ".").toFloat());
                    movie->setVotes(rx.cap(2).replace(",", "").replace(".", "").toInt());
                }
                rx.setPattern("([0-9]\\,[0-9]) based on ([0-9\\.]*) ");
                if (rx.indexIn(content) != -1) {
                    movie->setRating(rx.cap(1).trimmed().replace(",", ".").toFloat());
                    movie->setVotes(rx.cap(2).replace(",", "").replace(".", "").toInt());
                }
            }
        }
    }

    rx.setPattern("<strong>Top 250 #([0-9]+)</strong>");
    if (infos.contains(MovieScraperInfos::Rating) && rx.indexIn(html) != -1)
        movie->setTop250(rx.cap(1).toInt());

    if (infos.contains(MovieScraperInfos::Studios)) {
        rx.setPattern("<span itemprop=\"creator\" itemscope itemtype=\"http://schema.org/Organization\">.*<span class=\"itemprop\" itemprop=\"name\">([^<]*)</span>.*</span>");
        int pos = 0;
        while ((pos = rx.indexIn(html, pos)) != -1) {
            movie->addStudio(Helper::instance()->mapStudio(rx.cap(1).trimmed()));
            pos += rx.matchedLength();
        }
    }

    rx.setPattern("<div class=\"txt-block\">[^<]*<h4 class=\"inline\">Country:</h4>(.*)</div>");
    if (infos.contains(MovieScraperInfos::Countries) && rx.indexIn(html) != -1) {
        QString content = rx.cap(1);
        rx.setPattern("<a href=\"[^\"]*\"[\\n\\s]*itemprop='url'>([^<]*)</a>");
        int pos = 0;
        while ((pos = rx.indexIn(content, pos)) != -1) {
            movie->addCountry(Helper::instance()->mapCountry(rx.cap(1).trimmed()));
            pos += rx.matchedLength();
        }
    }

    rx.setPattern("<table class=\"cast_list\">(.*)</table>");
    if (infos.contains(MovieScraperInfos::Actors) && rx.indexIn(html) != -1) {
        QString content = rx.cap(1);
        rx.setPattern("<tr class=\"[^\"]*\">(.*)</tr>");
        int pos = 0;
        while ((pos = rx.indexIn(content, pos)) != -1) {
            QString actor = rx.cap(1);
            pos += rx.matchedLength();

            Actor a;

            QRegExp rxName("<span class=\"itemprop\" itemprop=\"name\">(.*)</span>");
            rxName.setMinimal(true);
            if (rxName.indexIn(actor) != -1)
                a.name = rxName.cap(1).trimmed();

            QRegExp rxRole("<td class=\"character\">[\\s\\n]*<div>[\\s\\n](.*)[\\s\\n]*</div>");
            rxRole.setMinimal(true);
            if (rxRole.indexIn(actor) != -1) {
                QString role = rxRole.cap(1);
                rxRole.setPattern("<a href=\"[^\"]*\" >(.*)</a>");
                if (rxRole.indexIn(role) != -1)
                    role = rxRole.cap(1);
                a.role = role.trimmed().replace(QRegExp("[\\s\\n]+"), " ");
            }

            QRegExp rxImg("<img [^<]*loadlate=\"([^\"]*)\"[^<]* />");
            rxImg.setMinimal(true);
            if (rxImg.indexIn(actor) != -1) {
                QString img = rxImg.cap(1);
                QRegExp aRx1("http://ia.media-imdb.com/images/(.*)/(.*)._V(.*).jpg");
                aRx1.setMinimal(true);
                if (aRx1.indexIn(img) != -1)
                    a.thumb = "http://ia.media-imdb.com/images/" + aRx1.cap(1) + "/" + aRx1.cap(2) + ".jpg";
                else
                    a.thumb = rxImg.cap(1);
            }

            movie->addActor(a);
        }
    }
}