void webget_file(const char *url, const char *host, short int portnum, const char *filename, char **filetype, char **encoding) { char *ref; char *realdoc; int size; FILE *f; INFO("Récupération de %s depuis %s:%d", url, host, portnum); ref = webget(host, portnum, url, &size); INFO("Récupération réussie"); /* *filetype = extract_filetype(ref); INFO("Type : %s", *filetype); *encoding = extract_fileencoding(ref); INFO("Encodage : %s", *encoding); */ INFO("Stockage dans le fichier : %s", filename); realdoc = skip_header(ref, &size); f = fopen(filename, "w"); fwrite(realdoc, sizeof(unsigned char), size, f); fclose(f); *filetype = file_type(filename); *encoding = malloc(sizeof(char)); *encoding[0] = '\0'; free(ref); }
bool qdbbot::refreshCache() { std::cout << "qdbbot: refreshing cache" << std::endl; bashBuffer* buffer = webget(baseURL + "/random"); //dump html doc into string std::string stuff(buffer->begin(), buffer->end()); //dump string into string stream; std::stringstream html(stuff); // std::cout << "this is teh massive html: " << html.str(); delete buffer; std::cout << "qdbbot: parsing quotes" << std::endl; while(!html.eof()) { std::cout << "qdbbot: parsed bash "<< cache->size() << std::endl; cacheMutex->lock(); cache->push_back(parseQuote(&html)); cacheMutex->unlock(); } std::cout << "qdbbot: cache refreshed" << std::endl; std::cout << "qdbbot: cache size now " << cache->size() << std::endl; return true; }
qdbbot::bashQuote* qdbbot::bashNum(std::string number) { std::cout << "qdbbot: bashnum url: " << baseURL << "/" << number << std::endl; bashBuffer* buffer = webget(baseURL + "/" + number); std::string stuff(buffer->begin(), buffer->end()); //dump string into string stream; std::stringstream html(stuff); std::cout << "qdbbot: bashnum is parsing the quote" << std::endl; qdbbot::bashQuote* quote = parseQuote(&html); delete buffer; return quote; }
std::string qdbbot::search(std::string searchString) { std::cout << "qdbbot: serch url: " << baseURL << "/?search=" << searchString << "&sort=0&show=25" << std::endl; //use libcurl to grap the web page std::cout << "qdbbot: search: fetching the page" << std::endl; bashBuffer* buffer = webget(baseURL + "/search?q=" + searchString); //lib curl gave us the buffer, now lets dump it into a string std::cout << "qdbbot: search: putting buffer contents into a string" << std::endl; std::string stuff(buffer->begin(), buffer->end()); //dump string into string stream; std::cout << "qdbbot: search: building a string stream" << std::endl; std::stringstream html(stuff); //strip the numbers out of the search std::cout << "qdbbot: search: stripping the numbers" << std::endl; std::vector<std::string> nums = getBashNums(html); //build the string std::cout << "qdbbot: search: building the bash string" << std::endl; std::string line = "| QDB |: "; for(unsigned int i = 0; i < nums.size(); ++i ) { line += nums[i] + " "; } //delete the bash buffer now that we're done with it delete buffer; //return the string of bash numbers std::cout << "qdbbot: search: returned" << std::endl; return line; }
int websearch(t_websearch websearch, const char *request, char *websites[], int nbr) { int match; size_t nmatch = 0; int count = 0; regmatch_t *pmatch = NULL; DEBUG("Allocation de la mémoire pour la réponse de la regexp"); nmatch = (websearch->preg).re_nsub + 1; pmatch = malloc (sizeof (*pmatch) * nmatch); if (pmatch == NULL) { FATAL("Erreur d'allocation mémoire : %s", strerror(errno)); exit(100); } else { char *text; char *http_request; int size; DEBUG("Allocation de mémoire pour la requête HTTP"); http_request = malloc( (strlen(websearch->http_request)+strlen(request)+1) *sizeof(char)); if (http_request == NULL) { FATAL("Erreur d'allocation mémoire : %s", strerror(errno)); exit(100); } sprintf(http_request, websearch->http_request, request); INFO("Récupération du résultat de la recherche"); text = webget(websearch->host, websearch->portnum, http_request, &size); INFO("Libération de la mémoire de la requête HTTP"); free(http_request); if (text != NULL) { char *current; DEBUG("Extraction des liens contenus dans la réponse: %s", text); current = text; count = 0; while (count < nbr && (match = regexec(&(websearch->preg), current, nmatch, pmatch, 0))==0) { INFO("Nouveau lien trouvé"); /* Le champ 1 contient la sous-partie à extraire */ int start = pmatch[1].rm_so; int end = pmatch[1].rm_eo; size_t size = end - start; /* Réservation de la place nécessaire */ websites[count] = malloc (sizeof (char) * (size + 1)); if (websites[count] == NULL) { ERROR("Erreur d'allocation mémoire : %s", strerror(errno)); } else { strncpy(websites[count], ¤t[start], size); websites[count][size] = '\0'; INFO("Ajout du lien : %s", websites[count]); count += 1; } /* On saute toute la zone filtrée par la regexp */ current = current + pmatch[0].rm_eo; } INFO("Fin de l'extraction des liens"); if (count < nbr && match != REG_NOMATCH) { WARNING("Erreur lors de l'extraction des liens : %d", match); } /* Libération du texte */ DEBUG("Libération buffer du texte"); free(text); } } DEBUG("Libération réponse au filtrage"); free(pmatch); return count; }