Ejemplo n.º 1
0
void webget_file(const char *url, const char *host, short int portnum,
	const char *filename, char **filetype, char **encoding)
{
    char *ref;
    char *realdoc;
    int size;
    FILE *f;

    INFO("Récupération de %s depuis %s:%d", url, host, portnum);
    ref = webget(host, portnum, url, &size);
    INFO("Récupération réussie");

    /*
     *filetype = extract_filetype(ref);
     INFO("Type : %s", *filetype);

     *encoding = extract_fileencoding(ref);
     INFO("Encodage : %s", *encoding);
     */

    INFO("Stockage dans le fichier : %s", filename);
    realdoc = skip_header(ref, &size);
    f = fopen(filename, "w");
    fwrite(realdoc, sizeof(unsigned char), size, f);
    fclose(f);

    *filetype = file_type(filename);
    *encoding = malloc(sizeof(char));
    *encoding[0] = '\0';

    free(ref);
}
Ejemplo n.º 2
0
bool qdbbot::refreshCache()
{

	std::cout << "qdbbot: refreshing cache" << std::endl;
	bashBuffer* buffer = webget(baseURL + "/random");

	//dump html doc into string
	std::string stuff(buffer->begin(), buffer->end());

	//dump string into string stream;
	std::stringstream html(stuff);

//	std::cout << "this is teh massive html: " << html.str();

	delete buffer; 

	std::cout << "qdbbot: parsing quotes" << std::endl;
	
	while(!html.eof())
	{
		std::cout << "qdbbot: parsed bash "<< cache->size() << std::endl;
		cacheMutex->lock();
		cache->push_back(parseQuote(&html));
		cacheMutex->unlock();
	}

	std::cout << "qdbbot: cache refreshed" << std::endl;
	std::cout << "qdbbot: cache size now " << cache->size() << std::endl;

	return true;
}
Ejemplo n.º 3
0
qdbbot::bashQuote* qdbbot::bashNum(std::string number)
{
	std::cout << "qdbbot: bashnum url: " << baseURL << "/" << number << std::endl;

	bashBuffer* buffer = webget(baseURL + "/" + number);

	std::string stuff(buffer->begin(), buffer->end());

	//dump string into string stream;
	std::stringstream html(stuff);
	
	std::cout << "qdbbot: bashnum is parsing the quote" << std::endl;
	qdbbot::bashQuote* quote = parseQuote(&html);

	delete buffer;

	return quote;
}
Ejemplo n.º 4
0
std::string qdbbot::search(std::string searchString)
{
	std::cout << "qdbbot: serch url: " << baseURL << "/?search=" << searchString << "&sort=0&show=25" << std::endl;

	//use libcurl to grap the web page
	std::cout << "qdbbot: search: fetching the page" << std::endl;

	bashBuffer* buffer = webget(baseURL + "/search?q=" 
			+ searchString);

	//lib curl gave us the buffer, now lets dump it into a string
	std::cout << "qdbbot: search: putting buffer contents into a string" << std::endl;

	std::string stuff(buffer->begin(), buffer->end());

	//dump string into string stream;
	std::cout << "qdbbot: search: building a string stream" << std::endl;

	std::stringstream html(stuff);

	//strip the numbers out of the search
	std::cout << "qdbbot: search: stripping the numbers" << std::endl;

	std::vector<std::string> nums = getBashNums(html);

	//build the string
	std::cout << "qdbbot: search: building the bash string" << std::endl;

	std::string line = "| QDB |: "; 
	for(unsigned int i = 0; i < nums.size(); ++i )
	{
		line += nums[i] + " ";
	}

	//delete the bash buffer now that we're done with it
	delete buffer;

	//return the string of bash numbers
	std::cout << "qdbbot: search: returned" << std::endl;
	return line;
}
Ejemplo n.º 5
0
int websearch(t_websearch websearch, const char *request,
        char *websites[], int nbr)
{
    int match;
    size_t nmatch = 0;
    int count = 0;
    regmatch_t *pmatch = NULL;

    DEBUG("Allocation de la mémoire pour la réponse de la regexp");
    nmatch = (websearch->preg).re_nsub + 1;
    pmatch = malloc (sizeof (*pmatch) * nmatch);
    if (pmatch == NULL) {
        FATAL("Erreur d'allocation mémoire : %s", strerror(errno));
        exit(100);
    } else {
        char *text;
        char *http_request;
        int size;

        DEBUG("Allocation de mémoire pour la requête HTTP");
        http_request = malloc(
                (strlen(websearch->http_request)+strlen(request)+1)
                *sizeof(char));
        if (http_request == NULL) {
            FATAL("Erreur d'allocation mémoire : %s", strerror(errno));
            exit(100);
        }

        sprintf(http_request, websearch->http_request, request);

        INFO("Récupération du résultat de la recherche");
        text = webget(websearch->host, websearch->portnum, http_request, 
                &size);

        INFO("Libération de la mémoire de la requête HTTP");
        free(http_request);

        if (text != NULL) {
            char *current;

            DEBUG("Extraction des liens contenus dans la réponse: %s", text);
            current = text;
            count = 0;
            while (count < nbr &&
                    (match = regexec(&(websearch->preg), current, 
                                     nmatch, pmatch, 0))==0) {
                INFO("Nouveau lien trouvé");
                /* Le champ 1 contient la sous-partie à extraire */
		int start = pmatch[1].rm_so;
		int end = pmatch[1].rm_eo;
		size_t size = end - start;

		/* Réservation de la place nécessaire */
		websites[count] = malloc (sizeof (char) * (size + 1));
		if (websites[count] == NULL) {
		    ERROR("Erreur d'allocation mémoire : %s", 
			    strerror(errno));
		} else {
		    strncpy(websites[count], &current[start], size);
		    websites[count][size] = '\0';
		    INFO("Ajout du lien : %s", websites[count]);
		    count += 1;
		}

		/* On saute toute la zone filtrée par la regexp */
		current = current + pmatch[0].rm_eo;
	    }
	    INFO("Fin de l'extraction des liens");

	    if (count < nbr && match != REG_NOMATCH) {
		WARNING("Erreur lors de l'extraction des liens : %d", 
			match);
	    }

	    /* Libération du texte */
	    DEBUG("Libération buffer du texte");
	    free(text);
	}
    }

    DEBUG("Libération réponse au filtrage");
    free(pmatch);

    return count;
}