QNetworkRequest getRequest(const QString& urlString, QNetworkReply* referer) { ProcessedUrl url = processUrl(urlString); QNetworkRequest result(url.url); result.setRawHeader("User-Agent", userAgent().toUtf8()); if (referer) { result.setRawHeader("Referer", referer->url().toEncoded()); } return result; }
// adapted from afisha-cinema/httphelpers.cpp int httpGet(QHttp* http, const QString& urlString) { ProcessedUrl url = processUrl(urlString); QHttpRequestHeader header("GET", url.fullUri); header.setValue("User-Agent", userAgent()); header.setValue("Host", QString("%1:%2").arg(url.url.host(), url.url.port())); header.setValue("Accept-Language", "en-us"); header.setValue("Accept", "*/*"); QByteArray content; header.setContentLength(content.length()); Q_ASSERT(url.url.scheme() == "https"); http->setHost(url.url.host(), QHttp::ConnectionModeHttps, 443); return http->request(header, content); }
void WebCrawler::crawler(const std::string &url, const size_t depth) { std::string page; std::string newUrl; // Filter output if (matchOutputFilter(url)) { m_filteredUrls.insert(url); } // End if reach depth if (depth > m_depth) return; // Check whether page already crawled if (m_searchedUrls.find(url) != m_searchedUrls.end()) { std::cout << ">>> Omit duplicate URL: " << url << std::endl; return; } // Add paged to crawled pages m_searchedUrls.insert(url); std::cout << ">>> Searching [depth = " << depth << "]: " << url << std::endl; // Fetch page page = m_cw->fetch(url); // Return if page is empty if (page.size() == 0) return; // Iterate all hrefs on page auto hrefs = getHrefs(page); for (auto it = hrefs.rbegin(); it != hrefs.rend(); ++it) { // Omit invalid url if (it->size() == 0 || it->at(0) == '#') continue; // Process Url newUrl = processUrl(url, *it); // std::cout << '\t' << url << " >> " << *it << " >> " << newUrl << std::endl; crawler(newUrl, depth + 1); } }
QNetworkRequest postFileRequest(const QString& urlString, const QString& fieldName, const QString& fileName, const QByteArray& fileData, QByteArray* postData) { QString boundary = getBoundaryString(fileData); postData->append("--" + boundary + "\r\n"); postData->append("Content-Disposition: form-data; name=\"" + TextUtil::escape(fieldName) + "\"; filename=\"" + TextUtil::escape(fileName.toUtf8()) + "\"\r\n"); postData->append("Content-Type: application/octet-stream\r\n"); postData->append("\r\n"); postData->append(fileData); postData->append("\r\n--" + boundary + "--\r\n"); ProcessedUrl url = processUrl(urlString); QNetworkRequest result(url.url); result.setRawHeader("User-Agent", userAgent().toUtf8()); result.setRawHeader("Content-Type", "multipart/form-data, boundary=" + boundary.toLatin1()); result.setRawHeader("Content-Length", QString::number(postData->length()).toUtf8()); return result; }