コード例 #1
0
/* created thread, all this calls are in the thread context */
void *hammer_cpu_worker_loop(void *context)
{
	hammer_cpu_worker_context_t *my_context = (hammer_cpu_worker_context_t *)context;
	hammer_sched_t *sched = my_context->sched;
	hammer_batch_t *batch = my_context->batch;
	int core_id = my_context->core_id;
	hammer_epoll_handlers_t *handler;
	unsigned long mask = 0;

	/* Set affinity of this cpu worker */
	mask = 1 << core_id;
	if (sched_setaffinity(0, sizeof(unsigned long), (cpu_set_t *)&mask) < 0) {
		hammer_err("Err set affinity in GPU worker\n");
		exit(0);
	}

	if (config->gpu) {
		handler = hammer_epoll_set_handlers((void *) hammer_handler_read,
						    (void *) hammer_batch_handler_read,
						    (void *) hammer_handler_write, 
						    (void *) hammer_handler_write, // write directly, we have already encrypted the message
						    (void *) hammer_handler_error,
						    (void *) hammer_handler_close,
						    (void *) hammer_handler_close);
	} else {
		/* This is just used for forwarding */
		handler = hammer_epoll_set_handlers((void *) hammer_handler_read,
						    (void *) hammer_handler_read,
						    (void *) hammer_handler_write, 
						    (void *) hammer_handler_write,
						    (void *) hammer_handler_error,
						    (void *) hammer_handler_close,
						    (void *) hammer_handler_close);
	}

	/* Export known scheduler node to context thread */
	pthread_setspecific(worker_sched_struct, (void *)sched);
	__builtin_prefetch(sched);
	__builtin_prefetch(&worker_sched_struct);

	pthread_setspecific(worker_batch_struct, (void *)batch);
	__builtin_prefetch(batch);
	__builtin_prefetch(&worker_batch_struct);

	if (config->gpu) {
		/* Allocate the batch buffers, each cpu worker has a set of buffers,
		 * two as input buffer, and two as output buffer. */
		hammer_batch_init();
	}

	/* Notify the dispatcher and the GPU worker that this thread has been created */
	pthread_mutex_lock(&mutex_worker_init);
	sched->initialized = 1;
	pthread_mutex_unlock(&mutex_worker_init);

	/* Init epoll_wait() loop */
	hammer_epoll_start(sched->epoll_fd, handler, sched->epoll_max_events);

	return 0;
}
コード例 #2
0
ファイル: hammer_dispatcher.c プロジェクト: tmallory/hammer
int hammer_dispatcher_loop(int server_fd)
{
	int ret, remote_fd, worker_id = 0;
	hammer_sched_t *sched;
	hammer_connection_t *c;

	/* Activate TCP_DEFER_ACCEPT */
	if (hammer_socket_set_tcp_defer_accept(server_fd) != 0) {
		hammer_warn("TCP_DEFER_ACCEPT failed\n");
	}

	/* Accept new connections */
	while (1) {
		/* accept first */
		c = hammer_handler_accept(server_fd);

		/* Next worker target */
		worker_id = hammer_dispatcher_next_worker_id();
		if (hammer_unlikely(worker_id == -1)) {
			hammer_err("no worker available\n");
			exit(0);
		}
		sched = &(sched_set[worker_id]);

		/* Assign connection to worker thread */
		hammer_sched_add_connection(c, sched, NULL);
	}

	return 0;
}
コード例 #3
0
ファイル: hammer_sched.c プロジェクト: tmallory/hammer
void hammer_sched_add_connection(hammer_connection_t *c, hammer_sched_t *sched, hammer_connection_t *rc)
{
	int ret;

	ret = hammer_epoll_add(sched->epoll_fd, remote_fd, HAMMER_EPOLL_READ,
			HAMMER_EPOLL_LEVEL_TRIGGERED, (void *)c);
	if (hammer_likely(ret == 0)) {
		if (r_conn != NULL) {
			/* r_conn != NULL, this connection is added by connect(), to server */
			c->r_conn = rc;
			rc->r_conn = c;

			sched->connected_connections ++;
		} else {
			/* r_conn == NULL, this connection is added by accept(), from client */
			sched->accepted_connections ++;
		}
	} else {
		/* fails, close the connection */
		hammer_close_connection(c);
		hammer_err("epoll add fails\n");
		exit(0);
	}

	return;
}
コード例 #4
0
/* Get the buffer of each CPU worker at each time interval I */
void hammer_gpu_get_batch(hammer_gpu_worker_t *g, hammer_batch_t *batch_set)
{
	int i, id;
	hammer_batch_t *batch;

	/* Get next Batch */
	if (g->buf_set_id == 0) {
		g->cur_buf_set = g->buf_set_B;
		g->buf_set_id = 1;
	} else if (g->buf_set_id == 1) {
		g->cur_buf_set = g->buf_set_A;
		g->buf_set_id = 0;
	}

	/* Tell the CPU worker we are taking the batch */
	for (i = 0; i < config->cpu_worker_num; i ++) {
		batch = &(batch_set[i]);

		if (batch->buf_has_been_taken == -1) {
			pthread_mutex_lock(&(batch->mutex_batch_launch));
			id = batch->buf_has_been_taken = batch->cur_buf_id;
			pthread_mutex_unlock(&(batch->mutex_batch_launch));
			
			assert(id == g->buf_set_id);
		} else {
			hammer_err("error in hammer_gpu_take_buf\n");
			exit(0);
		}

		/* For statistic */
		g->total_bytes += g->cur_buf_set[i]->buf_length;
	}

	return ;
}
コード例 #5
0
ファイル: hammer_handler.c プロジェクト: tmallory/hammer
/* Write to server, this is also used for writing to clients when we accelerate 
 * encryption and HMAC with GPU, and we just send the whole packet with this */
int hammer_handler_write(hammer_connection_t *c)
{
	int send;
	hammer_connection_t *rc;

	hammer_job_t *this_job;
	struct hammer_list *job_list, *job_head;

	if (c->ssl) {
		hammer_err("What's up, this should not be a ssl connection\n");
		exit(0);
	}
	// this is the socket to write to, now we get the socket that has read something
	rc = c->r_conn;

	job_list = rc->job_list;
	hammer_list_foreach(job_head, job_list) {
		this_job = hammer_list_entry(job_head, hammer_job_t, _head);

		send = hammer_socket_write(
				c->socket, 
				this_job->job_body_ptr, 
				this_job->job_body_length);

		if (send != this_job->job_body_length) {
			printf("Not all are send \n");
			return -1;
		}

		hammer_conn_job_del(this_job);
	}
コード例 #6
0
ファイル: hammer_epoll.c プロジェクト: tmallory/hammer
int hammer_epoll_create(int max_events)
{
	int efd;

	efd = epoll_create(max_events);
	if (efd == -1) {
		perror("epoll_create");
		hammer_err("epoll_create() failed");
	}

	return efd;
}
コード例 #7
0
ファイル: hammer_connection.c プロジェクト: tmallory/hammer
int hammer_close_connection(hammer_connection_t *c)
{
	hammer_job_t *this_job;
	struct hammer_list *job_list, *job_head;

	if (c == NULL) {
		hammer_err("c is null\n");
		return 0;
	}

	hammer_socket_close(c->socket);

	job_list = c->job_list;
	hammer_list_foreach(job_head, job_list) {
		this_job = hammer_list_entry(job_head, hammer_job_t, _head);
		hammer_conn_job_del(this_job);
	}
コード例 #8
0
ファイル: hammer_sched.c プロジェクト: OlegJakushkin/hammer
void hammer_sched_add_connection(hammer_connection_t *c, hammer_sched_t *sched)
{
	int ret;

	ret = hammer_epoll_add(sched->epoll_fd, c->socket, HAMMER_EPOLL_READ,
			HAMMER_EPOLL_LEVEL_TRIGGERED, (void *)c);
	if (hammer_likely(ret == 0)) {
		if (c->type == HAMMER_CONN_CLIENT) {
			sched->client_connections ++;
		} else { /* HAMMER_CONN_SERVER */
			sched->server_connections ++;
		}
	} else {
		/* fails, close the connection */
		hammer_close_connection(c);
		hammer_err("epoll add fails\n");
		exit(0);
	}

	return;
}
コード例 #9
0
/* Tell the CPU worker that this batch has been completed */
void hammer_gpu_give_result(hammer_gpu_worker_t *g, hammer_batch_t *batch_set)
{
	int i;
	hammer_batch_t *batch;

	for (i = 0; i < config->cpu_worker_num; i ++) {
		batch = &(batch_set[i]);

		if (batch->processed_buf_id == -1) {
			/* just mark there is a buf been processed */
			pthread_mutex_lock(&(batch->mutex_batch_complete));
			batch->processed_buf_id = g->buf_set_id;
			pthread_mutex_unlock(&(batch->mutex_batch_complete));
		} else {
			hammer_err("error in hammer_gpu_take_buf\n");
			exit(0);
		}
	}

	return ;
}
コード例 #10
0
/* created thread, all this calls are in the thread context */
void *hammer_gpu_worker_loop(void *c)
{
	hammer_timer_t t, counter, loopcounter;
	hammer_log_t log;
	hammer_gpu_worker_context_t *context = c;
	hammer_batch_t *batch_set = context->cpu_batch_set;
	hammer_sched_t *sched_set = context->sched_set;
	int i, first, ready, core_id = context->core_id;
	unsigned long mask = 0;
	double elapsed_time;
	int cuda_stream_id;
	hammer_batch_buf_t *buf;

	/* Set affinity of this gpu worker */
	mask = 1 << core_id;
	if (sched_setaffinity(0, sizeof(unsigned long), (cpu_set_t *)&mask) < 0) {
		hammer_err("Err set affinity in GPU worker\n");
		exit(0);
	}

	/* Init timers */
	hammer_timer_init(&t);
	hammer_timer_init(&counter);
	hammer_timer_init(&loopcounter);
	hammer_log_init(&log);

	/* Synchronization, Wait for CPU workers */
	while (1) {
		ready = 0;

		pthread_mutex_lock(&mutex_worker_init);
		for (i = 0; i < config->cpu_worker_num; i++) {
			if (sched_set[i].initialized)	ready++;
		}
		pthread_mutex_unlock(&mutex_worker_init);

		if (ready == config->cpu_worker_num) break;
		usleep(5000);
	}

	/* Initialize GPU worker, we wait for that all CPU workers have been initialized
	 * then we can init GPU worker with the batches of CPU worker */
	hammer_gpu_worker_t g;
	hammer_gpu_worker_init(&g, batch_set, sched_set);

	/* Timers for each kernel launch */
	hammer_timer_restart(&loopcounter);
	
	for (i = 0; i < config->iterations; i ++) {
		hammer_log_loop_marker(&log);

		/* Counter for the whole loop, from the second loop */
		if (i == 2)	hammer_timer_restart(&counter);

		// Wait for 'I', synchronization point
		//////////////////////////////////////////
		/* This is a CPU/GPU synchronization point, as all commands in the
		 * in-order queue before the preceding cl*Unmap() are now finished.
		 * We can accurately sample the per-loop timer here.
		 */
		first = 1;
		do {
			elapsed_time = hammer_timer_get_elapsed_time(&loopcounter);
			if (first) {
				hammer_log_msg(&log, "\n%s %d\n", "<<<<<<<<Elapsed Time : ", elapsed_time);
				first = 0;
			}

			if (elapsed_time - config->I > 1) { // surpassed the time point more than 1 ms
				hammer_log_msg(&log, "\n%s %d\n", ">>>>>>>>Time point lost!!!! : ", elapsed_time);
				break;
			}
		} while (abs(elapsed_time - config->I) > 1);

		hammer_log_msg(&log, "%s %d\n", ">>>>>>>>Time point arrived : ", elapsed_time);
		hammer_timer_restart(&loopcounter);


		/* Get Input Buffer from CPU Workers */
		//////////////////////////////////////////
		hammer_timer_restart(&t);

		hammer_gpu_get_batch(&g, batch_set);

		hammer_timer_stop(&t);
		hammer_log_msg(&log, "\n%s\n", "---------------------------", 0);
		hammer_log_timer(&log, "%s %f ms\n", "Get Input Time",
			hammer_timer_get_total_time(&t), 10, 1);


		//Enqueue a kernel run call.
		//////////////////////////////////////////
		hammer_timer_restart(&t);

		/* We launch each cpu worker batch as a stream*/
		for (cuda_stream_id = 0; cuda_stream_id < config->cpu_worker_num; cuda_stream_id ++) {
			buf = g.cur_buf_set[cuda_stream_id];

			// FIXME:
			crypto_context_aes_sha1_encrypt (
				&(g.cry_ctx),
				buf->input_buf,
				buf->output_buf,
				0, // in_pos
				buf->aes_key_pos,
				buf->ivs_pos,
				buf->hmac_key_pos,
				buf->pkt_offset_pos,
				buf->length_pos,
				buf->buf_size, // input buffer size
				buf->buf_length, // output buffer size FIXME ???
				buf->job_num,
				cuda_stream_id,
				128);

			/* Wait for transfer completion */
			crypto_context_sync(&(g.cry_ctx), cuda_stream_id, buf->output_buf, 1, 1);
		}

		hammer_timer_stop(&t);
		hammer_log_timer(&log, "%s %f ms\n", "Execution Time",
			hammer_timer_get_total_time(&t), 10, 1);
		
		/* Tell the CPU workers that this batch has been processed */
		hammer_gpu_give_result(&g, batch_set);

		hammer_log_msg(&log, "%s %dth iteration\n", "This is", i);
		//if (i > 1)	timeLog->Msg( "%s %f ms\n", "Time after is", counter.GetElapsedTime());
	}

	hammer_timer_stop(&counter);
	printf("End of execution, now the program costs : %f ms\n", hammer_timer_get_total_time(&counter));
	//FIXME:printf("Processing speed is %.2f Mbps\n", (bytes * 8) / (1e3 * hammer_timer_get_total_time(&counter)));

	return 0;
}
コード例 #11
0
void *hammer_epoll_start(int efd, hammer_epoll_handlers_t *handler, int max_events)
{
	int i, ret = -1;
	int num_events;
	struct epoll_event *events;
	hammer_connection_t *c;
	// int fds_timeout;

	//fds_timeout = log_current_utime + config->timeout;
	events = hammer_mem_malloc(max_events * sizeof(struct epoll_event));
	
	while (1) {

		if (config->gpu) {
			/* Each time, we first check if GPU has gave any indication for 
			   1) which buffer is taken,
			   2) which buffer has been processed */
			if (hammer_batch_if_gpu_processed_new()) {
				hammer_batch_forwarding();
			}
		}

		//FIXME: maybe problems in pointer &events
		num_events = hammer_epoll_wait(efd, &events, max_events);

		for (i = 0; i < num_events; i ++) {
			c = (hammer_connection_t *) events[i].data.ptr;

			if (events[i].events & EPOLLIN) {
				if (c->type == HAMMER_CONN_CLIENT) {
					ret = (*handler->client_read) (c);
				} else {
					if (c->type != HAMMER_CONN_SERVER) {
						hammer_err("this connection is not a server conn?\n");
						exit(0);
					}
					ret = (*handler->server_read) (c);
				}
			}
			else if (events[i].events & EPOLLOUT) {
				if (c->type == HAMMER_CONN_CLIENT) {
					ret = (*handler->client_write) (c);
				} else {
					if (c->type != HAMMER_CONN_SERVER) {
						hammer_err("this connection is not a server conn?\n");
						exit(0);
					}
					ret = (*handler->server_write) (c);
				}
			}
			else if (events[i].events & (EPOLLHUP | EPOLLERR | EPOLLRDHUP)) {
				ret = (*handler->error) (c);
			} else {
				hammer_err("What's up man, error here\n");
				exit(0);
			}

			if (ret < 0) {
				HAMMER_TRACE("[FD %i] Epoll Event FORCE CLOSE | ret = %i", fd, ret);
				(*handler->close) (c);
			}
		}

		// FIXME: enable timeout
		/* Check timeouts and update next one 
		   if (log_current_utime >= fds_timeout) {
		   hammer_sched_check_timeouts(sched);
		   fds_timeout = log_current_utime + config->timeout;
		   }*/
	}

	return NULL;
}