void stage_3(string &url) { rsprintf("Lacze z '%s' (3)", url.c_str()); rs_progress_start = Time::in_usec(); rs_info.status = RS::Downloading; rs_info.bytes = 0; rs_info.tmpspeed = 0; off_t length = 0; try { // TODO int code = http.get(rs_download_get_path(rs_info.url.c_str()).string().c_str(), length, url.c_str(), "mirror=", NULL, rs_progress_callback, NULL); filesystem::ofstream head(rs_sessions_get_path(url, "-head3.http")); head.write(http.get_recv_header(), strlen(http.get_recv_header())); if (code == HTTP_OK) // Oczekujemy kodu 200 rsprintf("Polaczono, code = %d, length = %lld", code, length); else { rsprintf("Polaczono, brak strony? code = %d, length = %lld, try again", code, length); throw DBreak(); } //string clen = http.get_recv_header("content-length"); //rsprintf(""); } catch (Exception &e) { rsprintf("Nastapil wyjatek: %s", e.what()); throw DBreak(); } }
int main(int argc, char **argv) { Http http; char *page = NULL; size_t size = 0; for (int i = 1; i < argc; ++i) { cerr << "== GET '" << argv[i] << "' ==" << endl; try { //http.set_verbose(true); int code = http.get(page, size, argv[i]); if (page) delete[] page; const Http::Headers &hdrs = http.get_headers(); Http::Headers::const_iterator it; const char *loc = http.get_recv_header("location"); const char *coo = http.get_recv_header("set-cookie"); const char *len = http.get_recv_header("content-length"); cerr << "== code = " << code << endl; cerr << "== headers size = " << hdrs.size() << endl; cerr << "== location = " << (loc?loc:"?") << endl; cerr << "== set-cookie = " << (coo?coo:"?") << endl; cerr << "== content-length = " << (len?len:"?") << " (strona: " << size << ")"<< endl; } catch (Exception &e) { cerr << "== exception: " << e.what() << endl; } } return 0; }
void stage_1(string &url) { rsprintf("Lacze z '%s'... (1)", url.c_str()); char *buffer = NULL; size_t buflen = 0; try { int code = http.get(buffer, buflen, url.c_str()); filesystem::ofstream head(rs_sessions_get_path(url, "-head1.http")); head.write(http.get_recv_header(), strlen(http.get_recv_header())); if (code == HTTP_OK) // Oczekujemy kodu 200 rsprintf("Polaczono, code = %d, page = %p,%zd", code, buffer, buflen); else { rsprintf("Polaczono, brak strony? code = %d, page = %p,%zd, sprobuje za chwile...", code, buffer, buflen); if (buffer) delete[] buffer; throw DBreak(); } filesystem::ofstream page(rs_sessions_get_path(url, "-page1.html")); page.write(buffer, buflen); } catch (Exception &e) { rsprintf("Nastapil wyjatek: %s", e.what()); throw DBreak(); } // Sprawdzamy czy plik jest dostepny... if (regex_search(buffer, rs_regex_illegal_file) || regex_search(buffer, rs_regex_illegal_file2) || regex_search(buffer, rs_regex_not_available) || regex_search(buffer, rs_regex_not_found)) { rsprintf("Plik '%s' jest niedostepny", url.c_str()); delete[] buffer; //rs_info.status = RS::NotFound; throw DAbort(RS::NotFound); } // Szukamy nastepnego url'a... cmatch what; if (!regex_search(buffer, what, rs_regex_url)) { rsprintf("Brak odnosnika do nastepnej strony, sprobuje jeszcze raz..."); delete[] buffer; throw DBreak(); } url = what[1]; // ustawiamy nowy url delete[] buffer; }
int main(int argc, char *argv[]) { Main app(APP_SOCKETS); Http http; app.open(argc,argv); http.setUserAgent("Http Class User-Agent, v.0.1"); http.get("www.google.com",""); const char *fn = "http.txt"; FILE *fp = fopen(fn,"wb"); if(fp) { fwrite(http.getFile(),http.getFileSize(),1,fp); fclose(fp); } else perror(fn); return 0; }
int HttpPlugin::get_info(Task *task) { Http http; http.set_timeout(task->timeout); http.set_log(&debug_log); #ifdef HAVE_SSL if (task->url.get_protocol() == HTTPS) { http.set_use_ssl(true); } #endif if (task->url.get_user() != NULL) { http.auth(task->url.get_user(), task->url.get_password() ? task->url.get_password() : ""); } if (task->get_referer() != NULL) { http.header("Referer", task->get_referer()); } else { http.header("Referer", task->url.get_url()); } if (task->fileSize > 0) { // test the Range http.set_range(1); } if (task->proxy.get_type() == HTTP_PROXY) { if (task->proxy.get_host() == NULL) { return -1; } if (http.connect(task->proxy.get_host(), task->proxy.get_port()) < 0) { return -2; } http.set_host(task->url.get_host(), task->url.get_port()); if (task->proxy.get_user() != NULL) { http.proxy_auth(task->proxy.get_user(), task->proxy.get_password() ? task->proxy.get_password() : ""); } if (http.get(task->url.get_url()) < 0) { return -2; } } else { if (http.connect(task->url.get_host(), task->url.get_port()) < 0) { return -2; } if (http.get(task->url.get_encoded_path()) < 0) { return -2; } } if (http.parse_header() < 0) return -2; switch (http.get_status_code()) { case 200: // HTTP_STATUS_OK case 206: // HTTP_STATUS_PARTIAL_CONTENTS case 300: // HTTP_STATUS_MULTIPLE_CHOICES case 304: // HTTP_STATUS_NOT_MODIFIED break; case 301: // HTTP_STATUS_MOVED_PERMANENTLY case 302: // HTTP_STATUS_MOVED_TEMPORARILY case 303: // HTTP_SEE_OTHER case 307: // HTTP_STATUS_TEMPORARY_REDIRECT { // redirect task->fileSize = -1; // if not, the new location's filesize is wrong const char *location = http.get_header("Location"); if (location == NULL) { // I do not know when this will happen, but no harm location = http.get_header("Content-Location"); if (location == NULL) return -1; } if (strcmp(location, task->url.get_url()) == 0) break; if (task->url.reset_url(location) < 0) return -2; return S_REDIRECT; } case 305: // HTTP_USE_PROXY { // get the content through the proxy task->fileSize = -1; // if not, the new location's filesize is wrong return S_REDIRECT; } case 408: // HTTP_CLIENT_TIMEOUT case 504: // HTTP_GATEWAY_TIMEOUT case 503: // HTTP_UNAVAILABLE case 502: // HTTP_BAD_GATEWAY { // these errors can retry later return -2; } default: return -1; } // if the page is an active page, we maybe can not get the filesize if (task->fileSize < 0) { task->fileSize = http.get_file_size(); if (task->fileSize > 1) { // we need test whether the Range header is supported or not return -2; } } else { // IIS never return the Accept-Ranges header // We need check the Content-Range header for the resuming const char *ptr = http.get_header("Content-Range"); if (ptr) { while (*ptr != '\0' && !ISDIGIT(*ptr)) ptr++; if (*ptr++ == '1' && *ptr == '-') { // get the filesize again for ensure the size task->fileSize = 1 + http.get_file_size(); task->resumeSupported = true; } } } const char *filename; filename = http.get_header("Content-Disposition"); if (filename) { filename = strstr(filename, "filename="); if (filename) { filename += strlen("filename="); if (task->get_local_file() == NULL) { task->set_local_file(filename); } } } if (task->get_local_file() == NULL && task->url.get_file() == NULL) { task->set_local_file("index.html"); } return 0; };
int HttpPlugin::download(Task& task, Block *block) { block->state = STOP; if (task.resumeSupported) { if (block->downloaded >= block->size) { block->state = EXIT; return 0; } else { block->bufferFile.seek(block->startPoint + block->downloaded); } } else { block->bufferFile.seek(0); block->downloaded = 0; } Http http; http.set_timeout(task.timeout); http.set_log(&debug_log); #ifdef HAVE_SSL if (task.url.get_protocol() == HTTPS) { http.set_use_ssl(true); } #endif if (task.resumeSupported) { // the end is not set for the schedule purpose http.set_range(block->startPoint + block->downloaded); } if (task.url.get_user() != NULL) { http.auth(task.url.get_user(), task.url.get_password() ? task.url.get_password() : ""); } if (task.get_referer() != NULL) { http.header("Referer", task.get_referer()); } else { http.header("Referer", task.url.get_url()); } if (task.proxy.get_type() == HTTP_PROXY) { if (http.connect(task.proxy.get_host(), task.proxy.get_port()) < 0) { return -2; } http.set_host(task.url.get_host(), task.url.get_port()); if (task.proxy.get_user() != NULL) { http.proxy_auth(task.proxy.get_user(), task.proxy.get_password() ? task.proxy.get_password() : ""); } if (http.get(task.url.get_url()) < 0) { return -2; } } else { if (http.connect(task.url.get_host(), task.url.get_port()) < 0) { return -2; } if (http.get(task.url.get_encoded_path()) < 0) { return -2; } } if (http.parse_header() < 0) return -2; // the service maybe unreachable temply // some servers alway return 302, so nasty if (http.get_status_code() >= 400) return -2; _re_retr: block->state = RETR; if (block->bufferFile.retr_data_from(&http, &block->downloaded, block->size - block->downloaded) < 0) { block->state = STOP; return -2; } if (task.resumeSupported && block->downloaded < block->size) { block->state = STOP; return -2; } block->state = WAIT; usleep(500000); if (block->state == WAKEUP) goto _re_retr; block->state = EXIT; return 0; };
void stage_2(string &url) { rsprintf("Lacze z '%s' (2)", url.c_str()); char *buffer = NULL; size_t buflen = 0; try { int code = http.get(buffer, buflen, url.c_str(), "dl.start=Free"); filesystem::ofstream head(rs_sessions_get_path(url, "-head2.http")); head.write(http.get_recv_header(), strlen(http.get_recv_header())); if (code == HTTP_OK) // Oczekujemy kodu 200 rsprintf("Polaczono, code = %d, page = %p,%zd", code, buffer, buflen); else { rsprintf("Polaczono, brak strony? code = %d, page = %p,%zd, try again", code, buffer, buflen); if (buffer) delete[] buffer; throw DBreak(); } filesystem::ofstream page(rs_sessions_get_path(url, "-page2.html")); page.write(buffer, buflen); } catch (Exception &e) { rsprintf("Nastapil wyjatek: %s", e.what()); throw DBreak(); } if (regex_search(buffer, rs_regex_try_later)) { rsprintf("Nie mozna teraz pobierac pliku, sprobuje za chwile, czekam..."); rs_wait(RS::Waiting, RS::Preparing, rs_waiting_try_later); delete[] buffer; throw DAgain(); } if (regex_search(buffer, rs_regex_reached_limit)) { rsprintf("Wyczerpal sie limit pobran, czekam..."); rs_wait(RS::Limit, RS::Preparing, rs_waiting_limit); delete[] buffer; throw DAgain(); } if (regex_search(buffer, rs_regex_server_busy)) { rsprintf("Serwery sa przeciazone, czekam..."); rs_wait(RS::Busy, RS::Preparing, rs_waiting_busy); delete[] buffer; throw DAgain(); } if (regex_search(buffer, rs_regex_server_busy)) { rsprintf("Brak wolnych slotow, czekam..."); rs_wait(RS::Busy, RS::Preparing, rs_waiting_busy); delete[] buffer; throw DAgain(); } if (regex_search(buffer, rs_regex_already_downloading)) { rsprintf("Ktos o tym samym IP pobiera juz jakis plik, czekam..."); rs_wait(RS::Rivalry, RS::Preparing, rs_waiting_rivalry); delete[] buffer; throw DAgain(); } size_t wait_for = 0; string ssize; cmatch what; if (regex_search(buffer, what, rs_regex_time)) { wait_for = strtol(what[1].str().c_str(), 0, 10) + 5; rsprintf("Czekam %zd sek...", wait_for); } else { wait_for = 5; rsprintf("Nieznany czas oczekiwania, sprobuje %zd sek...", wait_for); } if (!regex_search(buffer, what, rs_regex_size)) { rsprintf("Nieznany rozmiar pliku, jeszcze raz..."); delete[] buffer; throw DBreak(); } ssize = what[1]; rs_info.all_bytes = 1000*strtol(ssize.c_str(), 0, 10); try { vector<pair<string, string> > srvs; for (cregex_iterator it(buffer, buffer+buflen, rs_regex_server), end; it != end; ++it) srvs.push_back(make_pair<string, string>((*it)[2], (*it)[1])); if (!srvs.size()) { rsprintf("Brak serwerow, przerywam..."); throw DBreak(); } size_t i = 0; while (rs_favorites_servers[i]) { size_t found = 0, end = srvs.size(); while (found < end) { if (srvs[found].first == rs_favorites_servers[i]) break; ++found; } if (found == end) { rsprintf("Brak serwera '%s', szukam dalej...", rs_favorites_servers[i]); ++i; continue; } rsprintf("Wybieram serwer '%s'...", rs_favorites_servers[i]); url = srvs[found].second; break; } if (!rs_favorites_servers[i]) { url = srvs[0].second; rsprintf("Brak jakiegokolwiek ulubionego serwera, wybieram pierwszy z brzegu '%s'", srvs[0].first.c_str()); } } catch (DBreak) { delete[] buffer; throw; } catch (DExc &e) { delete[] buffer; throw e; } rs_wait(RS::Waiting, RS::Preparing, wait_for); delete[] buffer; }