WallpaperResult WallpapersWideProvider::GetRandomWallpaper(WallpaperParameters parameters, const QString &page) { WallpaperResult result; QRegExp rxBeginImage("<div class=\"thumb\">"); // find number of images int numberOfImages = 0; int pos = 0; while ((pos = rxBeginImage.indexIn(page, pos)) != -1) { numberOfImages++; pos += rxBeginImage.matchedLength(); } if (numberOfImages == 0) { return result; } // rand image number int randomImageNumber = (_randomGenerator.get() % numberOfImages) + 1; // get pos of random image pos = 0; int n = 0; while ((pos = rxBeginImage.indexIn(page, pos)) != -1) { n++; if (n == randomImageNumber) break; pos += rxBeginImage.matchedLength(); } // read image data QRegExp rxImageData("<a href=\"/([^\\.]*)\\.html\" title=\"([^\"]*)\""); if (rxImageData.indexIn(page, pos) != -1) { QString href = rxImageData.cap(1); QString name = rxImageData.cap(2); // better name QRegExp rxName("<h1>([^<]*)<"); if (rxName.indexIn(page, pos) != -1) { name = rxName.cap(1); } if (href.endsWith("wallpapers")) { QString bestResolutionUrl = GetBestImageUrl(parameters, href); if (bestResolutionUrl == 0) { return result; } result.name = Utils::UnescapeHtml(name); result.url = QString("http://wallpaperswide.com/") + href + QString(".html"); result.image = Utils::GetDataFromUrl(bestResolutionUrl, "wallpaperswide.com", "http://wallpaperswide.com"); } } return result; }
void CDownload::retrieveCaptchaUrl() { if(m_reply->attribute(QNetworkRequest::RedirectionTargetAttribute).isValid()) { m_reply->deleteLater(); QNetworkRequest request; request.setUrl(m_reply->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl()); request.setRawHeader("User-Agent", m_data.userAgent.toAscii()); m_reply = m_manager->get(request); connect(m_reply, SIGNAL(finished()), this, SLOT(retrieveCaptchaUrl())); connect(m_reply, SIGNAL(uploadProgress(qint64, qint64)), &m_timeoutTimer, SLOT(start())); connect(m_reply, SIGNAL(finished()), &m_timeoutTimer, SLOT(stop())); } else if(m_reply->error() == QNetworkReply::NoError) { QByteArray data = m_reply->readAll(); QRegExp rxSubmitUrl("form name=\"dwn\" action=\"([^\"]+)\""); QRegExp rxCaptchaId("name=\"captcha_nb\" value=\"([0-9]+)\""); if(m_data.name.isEmpty()) // parse file name from html { QRegExp rxName("<h2[^>]*>[^<]*<a href=[^>]*>([^<]+)</a>[^<]*</h2>"); if(rxName.indexIn(data) != -1) m_data.name = rxName.cap(1); } m_reply->deleteLater(); m_reply = 0; if(rxSubmitUrl.indexIn(data, 0) == -1) { handleError(ERROR_PARSE_SUBMIT_URL); } else if(rxCaptchaId.indexIn(data, 0) == -1) { handleError(ERROR_PARSE_CAPTCHA_ID); } else { bool ok; m_captchaId = rxCaptchaId.cap(1).toInt(&ok); if(!ok) { handleError(ERROR_PARSE_CAPTCHA_ID); return; } m_submitUrl = rxSubmitUrl.cap(1); QNetworkRequest request; request.setUrl(QUrl(QString("http://img.uloz.to/captcha/sound/%1.mp3").arg(m_captchaId))); request.setRawHeader("User-Agent", m_data.userAgent.toAscii()); m_reply = m_manager->get(request); connect(m_reply, SIGNAL(finished()), this, SLOT(downloadAndSolveCaptcha())); connect(m_reply, SIGNAL(uploadProgress(qint64, qint64)), &m_timeoutTimer, SLOT(start())); connect(m_reply, SIGNAL(finished()), &m_timeoutTimer, SLOT(stop())); m_data.state = STATE_CAPTCHA; m_data.update = true; } } else { handleNetError(); } }
void IMDB::parseAndAssignInfos(QString html, Movie *movie, QList<int> infos) { QRegExp rx; rx.setMinimal(true); rx.setPattern("<h1 class=\"header\">[^<]*<span class=\"itemprop\" itemprop=\"name\">([^<]*)</span>"); if (infos.contains(MovieScraperInfos::Title) && rx.indexIn(html) != -1) movie->setName(rx.cap(1)); rx.setPattern("<h1 itemprop=\"name\" class=\"\">(.*) <span id=\"titleYear\">"); if (infos.contains(MovieScraperInfos::Title) && rx.indexIn(html) != -1) movie->setName(rx.cap(1)); if (infos.contains(MovieScraperInfos::Director)) { rx.setPattern("<div class=\"txt-block\" itemprop=\"director\" itemscope itemtype=\"http://schema.org/Person\">(.*)</div>"); QString directorsBlock; if (rx.indexIn(html) != -1) { directorsBlock = rx.cap(1); } else { rx.setPattern("<div class=\"credit_summary_item\">[^<]*<h4 class=\"inline\">Director:</h4>(.*)</div>"); if (rx.indexIn(html) != -1) directorsBlock = rx.cap(1); } if (!directorsBlock.isEmpty()) { QStringList directors; rx.setPattern("<a href=\"[^\"]*\"(.*)itemprop='url'><span class=\"itemprop\" itemprop=\"name\">([^<]*)</span></a>"); int pos = 0; while ((pos = rx.indexIn(directorsBlock, pos)) != -1) { directors << rx.cap(2); pos += rx.matchedLength(); } movie->setDirector(directors.join(", ")); } } if (infos.contains(MovieScraperInfos::Writer)) { rx.setPattern("<div class=\"txt-block\" itemprop=\"creator\" itemscope itemtype=\"http://schema.org/Person\">(.*)</div>"); QString writersBlock; if (rx.indexIn(html) != -1) { writersBlock = rx.cap(1); } else { rx.setPattern("<div class=\"credit_summary_item\">[^<]*<h4 class=\"inline\">Writers:</h4>(.*)</div>"); if (rx.indexIn(html) != -1) writersBlock = rx.cap(1); } if (!writersBlock.isEmpty()) { QStringList writers; rx.setPattern("<a href=\"[^\"]*\"(.*)itemprop='url'><span class=\"itemprop\" itemprop=\"name\">([^<]*)</span></a>"); int pos = 0; while ((pos = rx.indexIn(writersBlock, pos)) != -1) { writers << rx.cap(2); pos += rx.matchedLength(); } movie->setWriter(writers.join(", ")); } } rx.setPattern("<div class=\"see-more inline canwrap\" itemprop=\"genre\">[^<]*<h4 class=\"inline\">Genres:</h4>(.*)</div>"); if (infos.contains(MovieScraperInfos::Genres) && rx.indexIn(html) != -1) { QString genres = rx.cap(1); rx.setPattern("<a href=\"[^\"]*\"[^>]*>([^<]*)</a>"); int pos = 0; while ((pos = rx.indexIn(genres, pos)) != -1) { movie->addGenre(Helper::instance()->mapGenre(rx.cap(1).trimmed())); pos += rx.matchedLength(); } } rx.setPattern("<div class=\"txt-block\">.*<h4 class=\"inline\">Taglines:</h4>(.*)</div>"); if (infos.contains(MovieScraperInfos::Tagline) && rx.indexIn(html) != -1) { QString tagline = rx.cap(1); QRegExp rxMore("<span class=\"see-more inline\">.*</span>"); rxMore.setMinimal(true); tagline.remove(rxMore); movie->setTagline(tagline.trimmed()); } rx.setPattern("<div class=\"see-more inline canwrap\" itemprop=\"keywords\">(.*)</div>"); if (infos.contains(MovieScraperInfos::Tags) && rx.indexIn(html) != -1) { QString keywords = rx.cap(1); rx.setPattern("<span class=\"itemprop\" itemprop=\"keywords\">([^<]*)</span>"); int pos = 0; while ((pos = rx.indexIn(keywords, pos)) != -1) { movie->addTag(rx.cap(1).trimmed()); pos += rx.matchedLength(); } } if (infos.contains(MovieScraperInfos::Released)) { rx.setPattern("<a href=\"[^\"]*\"(.*)title=\"See all release dates\" >[^<]*<meta itemprop=\"datePublished\" content=\"([^\"]*)\" />"); if (rx.indexIn(html) != -1) { movie->setReleased(QDate::fromString(rx.cap(2), "yyyy-MM-dd")); } else { rx.setPattern("<h4 class=\"inline\">Release Date:</h4> ([0-9]+) ([A-z]*) ([0-9]{4})"); if (rx.indexIn(html) != -1) { int day = rx.cap(1).trimmed().toInt(); int month = -1; QString monthName = rx.cap(2).trimmed(); int year = rx.cap(3).trimmed().toInt(); if (monthName.contains("January", Qt::CaseInsensitive)) month = 1; else if (monthName.contains("February", Qt::CaseInsensitive)) month = 2; else if (monthName.contains("March", Qt::CaseInsensitive)) month = 3; else if (monthName.contains("April", Qt::CaseInsensitive)) month = 4; else if (monthName.contains("May", Qt::CaseInsensitive)) month = 5; else if (monthName.contains("June", Qt::CaseInsensitive)) month = 6; else if (monthName.contains("July", Qt::CaseInsensitive)) month = 7; else if (monthName.contains("August", Qt::CaseInsensitive)) month = 8; else if (monthName.contains("September", Qt::CaseInsensitive)) month = 9; else if (monthName.contains("October", Qt::CaseInsensitive)) month = 10; else if (monthName.contains("November", Qt::CaseInsensitive)) month = 11; else if (monthName.contains("December", Qt::CaseInsensitive)) month = 12; if (day != 0 && month != -1 && year != 0) movie->setReleased(QDate(year, month, day)); } } } rx.setPattern("<meta itemprop=\"contentRating\" content=\"([^\"]*)\">"); if (infos.contains(MovieScraperInfos::Certification) && rx.indexIn(html) != -1) movie->setCertification(Helper::instance()->mapCertification(rx.cap(1))); rx.setPattern("<time itemprop=\"duration\" datetime=\"PT([0-9]+)M\" >"); if (infos.contains(MovieScraperInfos::Runtime) && rx.indexIn(html) != -1) movie->setRuntime(rx.cap(1).toInt()); rx.setPattern("<h4 class=\"inline\">Runtime:</h4>[^<]*<time itemprop=\"duration\" datetime=\"PT([0-9]+)M\">"); if (infos.contains(MovieScraperInfos::Runtime) && rx.indexIn(html) != -1) movie->setRuntime(rx.cap(1).toInt()); rx.setPattern("<p itemprop=\"description\">(.*)</p>"); if (infos.contains(MovieScraperInfos::Overview) && rx.indexIn(html) != -1) { QString outline = rx.cap(1).remove(QRegExp("<[^>]*>")); outline = outline.remove("See full summary »").trimmed(); movie->setOutline(outline); } rx.setPattern("<div class=\"summary_text\" itemprop=\"description\">(.*)</div>"); if (infos.contains(MovieScraperInfos::Overview) && rx.indexIn(html) != -1) { QString outline = rx.cap(1).remove(QRegExp("<[^>]*>")).trimmed(); movie->setOutline(outline); } rx.setPattern("<div class=\"inline canwrap\" itemprop=\"description\">(.*)</div>"); if (infos.contains(MovieScraperInfos::Overview) && rx.indexIn(html) != -1) { QString overview = rx.cap(1).trimmed(); QRegExp rxWrittenBy("<em class=\"nobr\">.*</em>"); rxWrittenBy.setMinimal(true); overview.remove(rxWrittenBy).remove(QRegExp("<[^>]*>")); movie->setOverview(overview.trimmed()); } if (infos.contains(MovieScraperInfos::Rating)) { rx.setPattern("<div class=\"star-box-details\" itemtype=\"http://schema.org/AggregateRating\" itemscope itemprop=\"aggregateRating\">(.*)</div>"); if (rx.indexIn(html) != -1) { QString content = rx.cap(1); rx.setPattern("<span itemprop=\"ratingValue\">(.*)</span>"); if (rx.indexIn(content) != -1) movie->setRating(rx.cap(1).trimmed().replace(",", ".").toFloat()); rx.setPattern("<span itemprop=\"ratingCount\">(.*)</span>"); if (rx.indexIn(content) != -1) movie->setVotes(rx.cap(1).replace(",", "").replace(".", "").toInt()); } else { rx.setPattern("<div class=\"imdbRating\" itemtype=\"http://schema.org/AggregateRating\" itemscope=\"\" itemprop=\"aggregateRating\">(.*)</div>"); if (rx.indexIn(html) != -1) { QString content = rx.cap(1); rx.setPattern("([0-9]\\.[0-9]) based on ([0-9\\,]*) "); if (rx.indexIn(content) != -1) { movie->setRating(rx.cap(1).trimmed().replace(",", ".").toFloat()); movie->setVotes(rx.cap(2).replace(",", "").replace(".", "").toInt()); } rx.setPattern("([0-9]\\,[0-9]) based on ([0-9\\.]*) "); if (rx.indexIn(content) != -1) { movie->setRating(rx.cap(1).trimmed().replace(",", ".").toFloat()); movie->setVotes(rx.cap(2).replace(",", "").replace(".", "").toInt()); } } } } rx.setPattern("<strong>Top 250 #([0-9]+)</strong>"); if (infos.contains(MovieScraperInfos::Rating) && rx.indexIn(html) != -1) movie->setTop250(rx.cap(1).toInt()); if (infos.contains(MovieScraperInfos::Studios)) { rx.setPattern("<span itemprop=\"creator\" itemscope itemtype=\"http://schema.org/Organization\">.*<span class=\"itemprop\" itemprop=\"name\">([^<]*)</span>.*</span>"); int pos = 0; while ((pos = rx.indexIn(html, pos)) != -1) { movie->addStudio(Helper::instance()->mapStudio(rx.cap(1).trimmed())); pos += rx.matchedLength(); } } rx.setPattern("<div class=\"txt-block\">[^<]*<h4 class=\"inline\">Country:</h4>(.*)</div>"); if (infos.contains(MovieScraperInfos::Countries) && rx.indexIn(html) != -1) { QString content = rx.cap(1); rx.setPattern("<a href=\"[^\"]*\"[\\n\\s]*itemprop='url'>([^<]*)</a>"); int pos = 0; while ((pos = rx.indexIn(content, pos)) != -1) { movie->addCountry(Helper::instance()->mapCountry(rx.cap(1).trimmed())); pos += rx.matchedLength(); } } rx.setPattern("<table class=\"cast_list\">(.*)</table>"); if (infos.contains(MovieScraperInfos::Actors) && rx.indexIn(html) != -1) { QString content = rx.cap(1); rx.setPattern("<tr class=\"[^\"]*\">(.*)</tr>"); int pos = 0; while ((pos = rx.indexIn(content, pos)) != -1) { QString actor = rx.cap(1); pos += rx.matchedLength(); Actor a; QRegExp rxName("<span class=\"itemprop\" itemprop=\"name\">(.*)</span>"); rxName.setMinimal(true); if (rxName.indexIn(actor) != -1) a.name = rxName.cap(1).trimmed(); QRegExp rxRole("<td class=\"character\">[\\s\\n]*<div>[\\s\\n](.*)[\\s\\n]*</div>"); rxRole.setMinimal(true); if (rxRole.indexIn(actor) != -1) { QString role = rxRole.cap(1); rxRole.setPattern("<a href=\"[^\"]*\" >(.*)</a>"); if (rxRole.indexIn(role) != -1) role = rxRole.cap(1); a.role = role.trimmed().replace(QRegExp("[\\s\\n]+"), " "); } QRegExp rxImg("<img [^<]*loadlate=\"([^\"]*)\"[^<]* />"); rxImg.setMinimal(true); if (rxImg.indexIn(actor) != -1) { QString img = rxImg.cap(1); QRegExp aRx1("http://ia.media-imdb.com/images/(.*)/(.*)._V(.*).jpg"); aRx1.setMinimal(true); if (aRx1.indexIn(img) != -1) a.thumb = "http://ia.media-imdb.com/images/" + aRx1.cap(1) + "/" + aRx1.cap(2) + ".jpg"; else a.thumb = rxImg.cap(1); } movie->addActor(a); } } }