Exemple #1
0
void *parser(void *arg) {
    struct input_args *in_args = (struct input_args *)arg;
    while (1) {
        struct page *page = (struct page *)unbounded_buffer_get(in_args->page_queue);
        if (page == NULL)
            break;
        char *start = page->content;
        while ((start = strstr(start, "link:")) != NULL) {
            if (start > page->content && *(start - 1) != ' ' && *(start - 1) != '\n') {
                start = start + 5;
                continue;
            }
            char *end = start + 5;
            while (*end != ' ' && *end != '\n' && *end != '\0')
                end++;
            if (*end == '\0') {
                char *url = str_duplicate(start + 5);
                in_args->edge(page->url, url);
                bounded_buffer_put(in_args->url_queue, (void *)url);
                break;
            } else {
                char tmp = *end;
                *end = '\0';
                char *url = str_duplicate(start + 5);
                in_args->edge(page->url, url);
                bounded_buffer_put(in_args->url_queue, (void *)url);
                *end = tmp;
                start = end + 1;
            }
        }
        mem_free(page->url);
        mem_free(page->content);
        mem_free(page);
        unbounded_buffer_done(in_args->page_queue);
        mutex_lock(in_args->done_mutex);
        cond_signal(in_args->done_cond);
        mutex_unlock(in_args->done_mutex);
    }
    return NULL;
}
Exemple #2
0
void*
fill_buffer(void* arg){
	params* p;
	buffer* buf;
	char temp[BUFFER_LENGTH];
	FILE* file;
		p=(params*)arg;
		buf=p->buf;
		file = open_file(p->dictionary);    /*            		  Open file        */
		do{
		  	if(sem_wait(p->empty)==-1)
			  	perror("error on sem empty");
			if(buf->nb_elem < buf->size){
				if(get_next(file, temp)<0){
					p->found = true;
					printf("Password not found\n");
				}
				bounded_buffer_put(buf, temp);
			}
			sem_post(p->full);
		}while(temp!=NULL && !p->found);
		close_file(file);
	pthread_exit(NULL);
}
Exemple #3
0
int crawl(char *start_url, int download_workers, int parse_workers, int queue_size,
    char *(*_fetch_fn)(char *url), void (*_edge_fn)(char *from, char *to)) {
    int i;

    bounded_buffer_t url_queue;
    unbounded_buffer_t page_queue;
    hashset_t url_set;
    bounded_buffer_init(&url_queue, queue_size);
    unbounded_buffer_init(&page_queue);
    hashset_init(&url_set, HASHSET_BUCKETS);

    bounded_buffer_put(&url_queue, (void *)str_duplicate(start_url));

    mutex_t done_mutex;
    cond_t done_cond;

    mutex_init(&done_mutex);
    cond_init(&done_cond);

    struct input_args in_args;
    in_args.url_queue = &url_queue;
    in_args.page_queue = &page_queue;
    in_args.url_set = &url_set;
    in_args.fetch = _fetch_fn;
    in_args.edge = _edge_fn;
    in_args.done_mutex = &done_mutex;
    in_args.done_cond = &done_cond;

    thread_t downloaders[download_workers];
    thread_t parsers[parse_workers];
    for (i = 0; i < download_workers; i++)
        thread_create(&downloaders[i], downloader, (void *)&in_args);
    for (i = 0; i < parse_workers; i++)
        thread_create(&parsers[i], parser, (void *)&in_args);

    while (1) {
        mutex_lock(&done_mutex);
        mutex_lock(&url_queue.mutex);
        mutex_lock(&url_queue.worker_mutex);
        mutex_lock(&page_queue.mutex);
        mutex_lock(&page_queue.worker_mutex);
        if (url_queue.count == 0 && url_queue.workers == 0 &&
            page_queue.count == 0 && page_queue.workers == 0) {
            url_queue.done = 1;
            page_queue.done = 1;
            cond_broadcast(&url_queue.empty);
            cond_broadcast(&url_queue.fill);
            cond_broadcast(&page_queue.fill);
            mutex_unlock(&url_queue.mutex);
            mutex_unlock(&url_queue.worker_mutex);
            mutex_unlock(&page_queue.mutex);
            mutex_unlock(&page_queue.worker_mutex);
            mutex_unlock(&done_mutex);
            break;
        } else {
            mutex_unlock(&url_queue.mutex);
            mutex_unlock(&url_queue.worker_mutex);
            mutex_unlock(&page_queue.mutex);
            mutex_unlock(&page_queue.worker_mutex);
            cond_wait(&done_cond, &done_mutex);
            mutex_unlock(&done_mutex);
        }
    }

    for (i = 0; i < download_workers; i++)
        thread_join(downloaders[i], NULL);
    for (i = 0; i < parse_workers; i++)
        thread_join(parsers[i], NULL);

    bounded_buffer_destroy(&url_queue);
    unbounded_buffer_destroy(&page_queue);
    hashset_destroy(&url_set);

    return 0;
}