Пример #1
0
  void stage_3(string &url)
  {
    rsprintf("Lacze z '%s' (3)", url.c_str());
    
    rs_progress_start = Time::in_usec();
    rs_info.status = RS::Downloading;
    rs_info.bytes = 0;
    rs_info.tmpspeed = 0;
    off_t length = 0;

    try { // TODO
      int code = http.get(rs_download_get_path(rs_info.url.c_str()).string().c_str(), length, url.c_str(), 
          "mirror=", NULL, rs_progress_callback, NULL);
     
      filesystem::ofstream head(rs_sessions_get_path(url, "-head3.http"));
      head.write(http.get_recv_header(), strlen(http.get_recv_header()));

      if (code == HTTP_OK) // Oczekujemy kodu 200
        rsprintf("Polaczono, code = %d, length = %lld", code, length);
      else {
        rsprintf("Polaczono, brak strony? code = %d, length = %lld, try again", code, length);
        throw DBreak();
      }

      //string clen = http.get_recv_header("content-length");
      //rsprintf("");
    } 
    catch (Exception &e) {
      rsprintf("Nastapil wyjatek: %s", e.what());
      throw DBreak();
    }  
  }
Пример #2
0
int main(int argc, char **argv)
{
  Http http;
  char *page = NULL;
  size_t size = 0;

  for (int i = 1; i < argc; ++i) {
    cerr << "== GET '" << argv[i] << "' ==" << endl;
    
    try {
      //http.set_verbose(true);
      int code = http.get(page, size, argv[i]);
      if (page) delete[] page;

      const Http::Headers &hdrs = http.get_headers();

      Http::Headers::const_iterator it;
      const char *loc = http.get_recv_header("location");
      const char *coo = http.get_recv_header("set-cookie");
      const char *len = http.get_recv_header("content-length");
    
      cerr << "== code = " << code << endl;
      cerr << "== headers size = " << hdrs.size() << endl;
      cerr << "== location = " << (loc?loc:"?") << endl;
      cerr << "== set-cookie = " << (coo?coo:"?") << endl;
      cerr << "== content-length = " << (len?len:"?") << " (strona: " << size << ")"<< endl;
    } 
    catch (Exception &e) {
      cerr << "== exception: " << e.what() << endl;
    }
  }

  return 0;
}
Пример #3
0
  void stage_1(string &url)
  {
    rsprintf("Lacze z '%s'... (1)", url.c_str());
    char *buffer = NULL;
    size_t buflen = 0;

    try {
      int code = http.get(buffer, buflen, url.c_str());

      filesystem::ofstream head(rs_sessions_get_path(url, "-head1.http"));
      head.write(http.get_recv_header(), strlen(http.get_recv_header()));

      if (code == HTTP_OK) // Oczekujemy kodu 200
        rsprintf("Polaczono, code = %d, page = %p,%zd", code, buffer, buflen);
      else { 
        rsprintf("Polaczono, brak strony? code = %d, page = %p,%zd, sprobuje za chwile...", code, buffer, buflen);
        if (buffer) delete[] buffer;
        throw DBreak();
      }

      filesystem::ofstream page(rs_sessions_get_path(url, "-page1.html"));
      page.write(buffer, buflen);
    }
    catch (Exception &e) {
      rsprintf("Nastapil wyjatek: %s", e.what());
      throw DBreak();
    }

    // Sprawdzamy czy plik jest dostepny...
    if (regex_search(buffer, rs_regex_illegal_file) ||
        regex_search(buffer, rs_regex_illegal_file2) ||
        regex_search(buffer, rs_regex_not_available) ||
        regex_search(buffer, rs_regex_not_found)) {
      rsprintf("Plik '%s' jest niedostepny", url.c_str());
      delete[] buffer;
      //rs_info.status = RS::NotFound;
      throw DAbort(RS::NotFound);
    }

    // Szukamy nastepnego url'a...
    cmatch what;
    if (!regex_search(buffer, what, rs_regex_url)) {
      rsprintf("Brak odnosnika do nastepnej strony, sprobuje jeszcze raz...");
      delete[] buffer;
      throw DBreak();
    }

    url = what[1]; // ustawiamy nowy url
    delete[] buffer;
  }
Пример #4
0
int main(int argc, char *argv[]) {
	Main app(APP_SOCKETS);
	Http http;
	app.open(argc,argv);
	http.setUserAgent("Http Class User-Agent, v.0.1");

	http.get("www.google.com","");

	const char *fn = "http.txt";
	FILE *fp = fopen(fn,"wb");
	if(fp) {
		fwrite(http.getFile(),http.getFileSize(),1,fp);
		fclose(fp);
	} else perror(fn);
	return 0;
}
Пример #5
0
int HttpPlugin::get_info(Task *task) {
    Http http;

    http.set_timeout(task->timeout);
    http.set_log(&debug_log);
#ifdef HAVE_SSL
    if (task->url.get_protocol() == HTTPS) {
        http.set_use_ssl(true);
    }
#endif

    if (task->url.get_user() != NULL) {
        http.auth(task->url.get_user(),
                task->url.get_password() ? task->url.get_password() : "");
    }

    if (task->get_referer() != NULL) {
        http.header("Referer", task->get_referer());
    } else {
        http.header("Referer", task->url.get_url());
    }

    if (task->fileSize > 0) {
        // test the Range
        http.set_range(1);
    }

    if (task->proxy.get_type() == HTTP_PROXY) {
        if (task->proxy.get_host() == NULL) {
            return -1;
        }
        if (http.connect(task->proxy.get_host(), task->proxy.get_port()) < 0) {
            return -2;
        }
        http.set_host(task->url.get_host(), task->url.get_port());
        if (task->proxy.get_user() != NULL) {
            http.proxy_auth(task->proxy.get_user(),
                    task->proxy.get_password() ? task->proxy.get_password() : "");
        }

        if (http.get(task->url.get_url()) < 0) {
            return -2;
        }
    } else {
        if (http.connect(task->url.get_host(), task->url.get_port()) < 0) {
            return -2;
        }
        if (http.get(task->url.get_encoded_path()) < 0) {
            return -2;
        }
    }

    if (http.parse_header() < 0) return -2;
    switch (http.get_status_code()) {
        case 200:  // HTTP_STATUS_OK
        case 206:  // HTTP_STATUS_PARTIAL_CONTENTS
        case 300:  // HTTP_STATUS_MULTIPLE_CHOICES
        case 304:  // HTTP_STATUS_NOT_MODIFIED
            break;
        case 301:  // HTTP_STATUS_MOVED_PERMANENTLY
        case 302:  // HTTP_STATUS_MOVED_TEMPORARILY
        case 303:  // HTTP_SEE_OTHER
        case 307:  // HTTP_STATUS_TEMPORARY_REDIRECT
            {  // redirect
                task->fileSize = -1;  // if not, the new location's filesize is wrong
                const char *location = http.get_header("Location");
                if (location == NULL) {
                    // I do not know when this will happen, but no harm
                    location = http.get_header("Content-Location");
                    if (location == NULL) return -1;
                }
                if (strcmp(location, task->url.get_url()) == 0) break;
                if (task->url.reset_url(location) < 0) return -2;
                return S_REDIRECT;
            }
        case 305:  // HTTP_USE_PROXY
            {   // get the content through the proxy
                task->fileSize = -1;  // if not, the new location's filesize is wrong
                return S_REDIRECT;
            }
        case 408:  // HTTP_CLIENT_TIMEOUT
        case 504:  // HTTP_GATEWAY_TIMEOUT
        case 503:  // HTTP_UNAVAILABLE
        case 502:  // HTTP_BAD_GATEWAY
            {   // these errors can retry later
                return -2;
            }
        default:
            return -1;
    }

    // if the page is an active page, we maybe can not get the filesize
    if (task->fileSize < 0) {
        task->fileSize = http.get_file_size();
        if (task->fileSize > 1) {
            // we need test whether the Range header is supported or not
            return -2;
        }
    } else {
        // IIS never return the Accept-Ranges header
        // We need check the Content-Range header for the resuming
        const char *ptr = http.get_header("Content-Range");
        if (ptr) {
            while (*ptr != '\0' && !ISDIGIT(*ptr)) ptr++;
            if (*ptr++ == '1' && *ptr == '-') {
                // get the filesize again for ensure the size
                task->fileSize = 1 + http.get_file_size();
                task->resumeSupported = true;
            }
        }
    }

    const char *filename;
    filename = http.get_header("Content-Disposition");
    if (filename) {
        filename = strstr(filename, "filename=");
        if (filename) {
            filename += strlen("filename=");
            if (task->get_local_file() == NULL) {
                task->set_local_file(filename);
            }
        }
    }


    if (task->get_local_file() == NULL &&  task->url.get_file() == NULL) {
        task->set_local_file("index.html");
    }

    return 0;
};
Пример #6
0
int HttpPlugin::download(Task& task, Block *block) {
    block->state = STOP;
    if (task.resumeSupported) {
        if (block->downloaded >= block->size) {
            block->state = EXIT;
            return 0;
        } else {
            block->bufferFile.seek(block->startPoint + block->downloaded);
        }
    } else {
        block->bufferFile.seek(0);
        block->downloaded = 0;
    }

    Http http;
    http.set_timeout(task.timeout);
    http.set_log(&debug_log);
#ifdef HAVE_SSL
    if (task.url.get_protocol() == HTTPS) {
        http.set_use_ssl(true);
    }
#endif

    if (task.resumeSupported) {
        // the end is not set for the schedule purpose
        http.set_range(block->startPoint + block->downloaded);
    }

    if (task.url.get_user() != NULL) {
        http.auth(task.url.get_user(),
                task.url.get_password() ? task.url.get_password() : "");
    }

    if (task.get_referer() != NULL) {
        http.header("Referer", task.get_referer());
    } else {
        http.header("Referer", task.url.get_url());
    }

    if (task.proxy.get_type() == HTTP_PROXY) {
        if (http.connect(task.proxy.get_host(), task.proxy.get_port()) < 0) {
            return -2;
        }
        http.set_host(task.url.get_host(), task.url.get_port());
        if (task.proxy.get_user() != NULL) {
            http.proxy_auth(task.proxy.get_user(),
                    task.proxy.get_password() ? task.proxy.get_password() : "");
        }
        if (http.get(task.url.get_url()) < 0) {
            return -2;
        }
    } else {
        if (http.connect(task.url.get_host(), task.url.get_port()) < 0) {
            return -2;
        }
        if (http.get(task.url.get_encoded_path()) < 0) {
            return -2;
        }
    }

    if (http.parse_header() < 0) return -2;
    // the service maybe unreachable temply
    // some servers alway return 302, so nasty
    if (http.get_status_code() >= 400) return -2;

_re_retr:
    block->state = RETR;
    if (block->bufferFile.retr_data_from(&http, &block->downloaded,
                block->size - block->downloaded) < 0) {
        block->state = STOP;
        return -2;
    }

    if (task.resumeSupported && block->downloaded < block->size) {
        block->state = STOP;
        return -2;
    }

    block->state = WAIT;
    usleep(500000);
    if (block->state == WAKEUP) goto _re_retr;
    block->state = EXIT;
    return 0;
};
Пример #7
0
  void stage_2(string &url)
  {
    rsprintf("Lacze z '%s' (2)", url.c_str());
    char *buffer = NULL;
    size_t buflen = 0;

    try {
      int code = http.get(buffer, buflen, url.c_str(), "dl.start=Free");

      filesystem::ofstream head(rs_sessions_get_path(url, "-head2.http"));
      head.write(http.get_recv_header(), strlen(http.get_recv_header()));

      if (code == HTTP_OK) // Oczekujemy kodu 200
        rsprintf("Polaczono, code = %d, page = %p,%zd", code, buffer, buflen);
      else {
        rsprintf("Polaczono, brak strony? code = %d, page = %p,%zd, try again", code, buffer, buflen);
        if (buffer) delete[] buffer;
        throw DBreak();
      }

      filesystem::ofstream page(rs_sessions_get_path(url, "-page2.html"));
      page.write(buffer, buflen);
    }
    catch (Exception &e) {
      rsprintf("Nastapil wyjatek: %s", e.what());
      throw DBreak();
    }

    if (regex_search(buffer, rs_regex_try_later)) {
      rsprintf("Nie mozna teraz pobierac pliku, sprobuje za chwile, czekam...");
      rs_wait(RS::Waiting, RS::Preparing, rs_waiting_try_later);
      delete[] buffer;
      throw DAgain();
    }

    if (regex_search(buffer, rs_regex_reached_limit)) {
      rsprintf("Wyczerpal sie limit pobran, czekam...");
      rs_wait(RS::Limit, RS::Preparing, rs_waiting_limit);
      delete[] buffer;
      throw DAgain();
    }

    if (regex_search(buffer, rs_regex_server_busy)) {
      rsprintf("Serwery sa przeciazone, czekam...");
      rs_wait(RS::Busy, RS::Preparing, rs_waiting_busy);
      delete[] buffer;
      throw DAgain();
    }

    if (regex_search(buffer, rs_regex_server_busy)) {
      rsprintf("Brak wolnych slotow, czekam...");
      rs_wait(RS::Busy, RS::Preparing, rs_waiting_busy);
      delete[] buffer;
      throw DAgain();
    }

    if (regex_search(buffer, rs_regex_already_downloading)) {
      rsprintf("Ktos o tym samym IP pobiera juz jakis plik, czekam...");
      rs_wait(RS::Rivalry, RS::Preparing, rs_waiting_rivalry);
      delete[] buffer;
      throw DAgain();
    }

    size_t wait_for = 0;
    string ssize;
    cmatch what;

    if (regex_search(buffer, what, rs_regex_time)) {
      wait_for = strtol(what[1].str().c_str(), 0, 10) + 5;
      rsprintf("Czekam %zd sek...", wait_for);
    } else {
      wait_for = 5;
      rsprintf("Nieznany czas oczekiwania, sprobuje %zd sek...", wait_for);
    }

    if (!regex_search(buffer, what, rs_regex_size)) {
      rsprintf("Nieznany rozmiar pliku, jeszcze raz...");
      delete[] buffer;
      throw DBreak();
    }
    ssize = what[1];
    rs_info.all_bytes = 1000*strtol(ssize.c_str(), 0, 10);

    try {
      vector<pair<string, string> > srvs;
      for (cregex_iterator it(buffer, buffer+buflen, rs_regex_server), end; it != end; ++it)
        srvs.push_back(make_pair<string, string>((*it)[2], (*it)[1]));

      if (!srvs.size()) {
        rsprintf("Brak serwerow, przerywam...");
        throw DBreak();
      }

      size_t i = 0;
      while (rs_favorites_servers[i]) {
        size_t found = 0, end = srvs.size();

        while (found < end) {
          if (srvs[found].first == rs_favorites_servers[i]) break;
          ++found;
        }

        if (found == end) {
          rsprintf("Brak serwera '%s', szukam dalej...", rs_favorites_servers[i]);
          ++i;
          continue;
        }

        rsprintf("Wybieram serwer '%s'...", rs_favorites_servers[i]);
        url = srvs[found].second;
        break;
      }

      if (!rs_favorites_servers[i]) {
        url = srvs[0].second;
        rsprintf("Brak jakiegokolwiek ulubionego serwera, wybieram pierwszy z brzegu '%s'",
            srvs[0].first.c_str());
      }
    }
    catch (DBreak) { delete[] buffer; throw; }
    catch (DExc &e) { delete[] buffer; throw e; }

    rs_wait(RS::Waiting, RS::Preparing, wait_for);
    delete[] buffer;
  }