Ejemplo n.º 1
0
void on_init1_finished()
{
    long i;
    for(i = 0; i < N/PSIZE * N_LONG_PAGE; i = i + N_LONG_PAGE)
    {
        if(trigger[i] != 1)
        {
            abort;
        }
    }

    // mark this watcher committed
    trigger[0] = 3;

    for(i = 0; i < K; i = i + 1)
    {
#ifdef INPUT_PMEM
        cx[i] = pmem_in[4*i];
        cy[i] = pmem_in[4*i+1];
        cz[i] = pmem_in[4*i+2];
        cw[i] = pmem_in[4*i+3];
#else
        cx[i] = x[i];
        cy[i] = y[i];
        cz[i] = z[i];
        cw[i] = w[i];
#endif
    }

    // number of iterations
    iter = 0;

#ifdef DEBUG
    output_char('i');
    output_char('t');
    output_char('e');
    output_char('r');
    output_char(':');

    output_q(-1);
    output_char(':');
    output_centroids();
#endif

    start_iteration();

    commitd;
}
Ejemplo n.º 2
0
/*
 * Send a status update to all connected clients
 */
void broadcast_status() {
    struct cl_entry *current;
    struct net_status *status;
    uint8_t dcount;
    uint16_t scount;
    uint8_t tcount;
    uint8_t permission;
    int ret;
    
    /*
     * Start iteration over complete client list
     * This locks the mutex for the client list
     */
    current = start_iteration();

    /* Get number of users */
    dcount = docent_exists();
    tcount = tutor_exists();
    scount = get_client_count() - dcount - tcount;

    while (current != NULL) {
        if (current == get_write_user()) {
            permission = 1;
        } else {
            permission = 0;
        }

        /* Build the status message */
        status = build_status(current->cdata->role, current->cdata->cid,
                permission, dcount, tcount, scount);
        ret = send(current->cdata->sfd, status, sizeof(struct net_status), 0);
        if (ret < 0) {
            perror("send");
        }
        free(status);
        /* Select next client in list */
        current = iteration_next();
    }

    log_debug("broadcasting agent: status sent to all connected clients");
    /*
     * End of iteration
     * This unlocks the mutex of the client list
     */
    end_iteration();
}
Ejemplo n.º 3
0
void *monitor_run(void *ptr)
{
        struct mt_context *thread_context = (struct mt_context *) ptr;
        struct monitor_ctx *monitor_context = (struct monitor_ctx *) thread_context->param;

        struct rtgtargets *targets = monitor_context->targets;
        struct rtgconf *config = monitor_context->config;
        unsigned poll_interval = config->interval;

        /* Use the specified next iteration value.
         * If it is set to zero, we'll run as soon as possible. */
        curms_t next_iteration = monitor_context->next_iteration;
        curms_t this_iteration = 0;

        struct timespec loopdelay = { 0, 250 * 1000 * 1000 };
        nanosleep(&loopdelay, NULL);

        while (!thread_stop_requested) {
                curms_t now = curms();

                /* If there are no threads running and it's time for a new iteration,
                 * start a new iteration. */
                if (active_threads == 0 && now >= next_iteration) {
                        start_iteration(targets);
                        this_iteration = now;
                        next_iteration = next_interval(now, poll_interval);
                }

                /* Give the threads a chance to start, so we don't
                 * mistakenly think they're all done already. */
                nanosleep(&loopdelay, NULL);

                /* If there are no threads running, and we have started a iteration,
                 * then we are done. End the iteration. */
                if (active_threads == 0 && this_iteration) {
                        end_iteration(next_iteration, this_iteration);
                        this_iteration = 0;
                }
        }

        /* Save next iteration value, so we can reuse it if necessary. */
        monitor_context->next_iteration = next_iteration;

        return NULL;
}
Ejemplo n.º 4
0
/*
 * Send a board update message to all connected clients
 *
 * Excludes the user with write acces if the parameter
 * excl_w is set to 1. If the blackbpard should be broadcasted to all
 * connected clients, excl_w shoul be 0.
 */
void broadcast_blackboard(char *blackboard, int bsem_id, int excl_w) {
    struct cl_entry *current;
    struct net_board *board;
    int ret;
    int length;

    /*
     * Prepare the blackboard message
     */
    lock_sem(bsem_id);
    length = strlen(blackboard);
    board = build_board(blackboard, length);
    unlock_sem(bsem_id);
    
    /*
     * Iterate over the complete client list
     * This locks the mutex of the client list
     */
    current = start_iteration();

    while (current != NULL) {
        /* Don't broadcast to the user with write access */
        if ((current == get_write_user()) && excl_w) {
            current = iteration_next();
            continue;
        }
        ret = send(current->cdata->sfd, board, sizeof(struct net_header) + length, 0);
        if (ret < 0) {
            perror("send");
        }
        /* Select next client in list */
        current = iteration_next();
    }
    
    /* Unlock mutex of client list */
    end_iteration();
    free(board);
    log_debug("broadcasting agent: blackboard sent to all connected clients");
}
Ejemplo n.º 5
0
// Main ------------------------------------------------------------------------------------------
int main(int argc, char **argv) {

	const Params p(argc, argv);
	CUDASetup setcuda(p.device);
	Timer timer;
	cudaError_t cudaStatus;
	int it_cpu = 0;
	int it_gpu = 0;
	int err = 0;
#ifdef LOGS
	set_iter_interval_print(10);
	char test_info[500];
	snprintf(test_info, 500, "-i %d -g %d -t %d -f %s -l %d\n",p.n_gpu_threads, p.n_gpu_blocks,p.n_threads, p.file_name,p.switching_limit);
	start_log_file("cudaSingleSourceShortestPath", test_info);
	//printf("Com LOG\n");
#endif

	// Allocate
	int n_nodes, n_edges;
//	int n_nodes_o;
	read_input_size(n_nodes, n_edges, p);
	timer.start("Allocation");
	Node * h_nodes = (Node *) malloc(sizeof(Node) * n_nodes);

	//*************************** Alocando Memoria para o Gold *************************************
	Gold * gold = (Gold *) malloc(sizeof(Gold) * n_nodes);
	if (p.mode == 1) {
		// **********************  Lendo O gold *********************************
		read_gold(gold, p);
		// **********************************************************************
	}

	//***********************************************************************************************
	Node * d_nodes;
	cudaStatus = cudaMalloc((void**) &d_nodes, sizeof(Node) * n_nodes);
	Edge * h_edges = (Edge *) malloc(sizeof(Edge) * n_edges);
	Edge * d_edges;
	cudaStatus = cudaMalloc((void**) &d_edges, sizeof(Edge) * n_edges);
	std::atomic_int *h_color = (std::atomic_int *) malloc(
			sizeof(std::atomic_int) * n_nodes);
	int * d_color;
	cudaStatus = cudaMalloc((void**) &d_color, sizeof(int) * n_nodes);
	std::atomic_int *h_cost = (std::atomic_int *) malloc(
			sizeof(std::atomic_int) * n_nodes);
	int * d_cost;
	cudaStatus = cudaMalloc((void**) &d_cost, sizeof(int) * n_nodes);
	int * h_q1 = (int *) malloc(n_nodes * sizeof(int));
	int * d_q1;
	cudaStatus = cudaMalloc((void**) &d_q1, sizeof(int) * n_nodes);
	int * h_q2 = (int *) malloc(n_nodes * sizeof(int));
	int * d_q2;
	cudaStatus = cudaMalloc((void**) &d_q2, sizeof(int) * n_nodes);
	std::atomic_int h_head[1];
	int * d_head;
	cudaStatus = cudaMalloc((void**) &d_head, sizeof(int));
	std::atomic_int h_tail[1];
	int * d_tail;
	cudaStatus = cudaMalloc((void**) &d_tail, sizeof(int));
	std::atomic_int h_threads_end[1];
	int * d_threads_end;
	cudaStatus = cudaMalloc((void**) &d_threads_end, sizeof(int));
	std::atomic_int h_threads_run[1];
	int * d_threads_run;
	cudaStatus = cudaMalloc((void**) &d_threads_run, sizeof(int));
	int h_num_t[1];
	int * d_num_t;
	cudaStatus = cudaMalloc((void**) &d_num_t, sizeof(int));
	int h_overflow[1];
	int * d_overflow;
	cudaStatus = cudaMalloc((void**) &d_overflow, sizeof(int));
	std::atomic_int h_gray_shade[1];
	int * d_gray_shade;
	cudaStatus = cudaMalloc((void**) &d_gray_shade, sizeof(int));
	std::atomic_int h_iter[1];
	int * d_iter;
	cudaStatus = cudaMalloc((void**) &d_iter, sizeof(int));
	cudaDeviceSynchronize();
	CUDA_ERR();
	ALLOC_ERR(h_nodes, h_edges, h_color, h_cost, h_q1, h_q2);
	timer.stop("Allocation");

	// Initialize
	timer.start("Initialization");
	const int max_gpu_threads = setcuda.max_gpu_threads();
	int source;
	read_input(source, h_nodes, h_edges, p);

	for (int i = 0; i < n_nodes; i++) {
		h_cost[i].store(INF);
	}
	h_cost[source].store(0);
	for (int i = 0; i < n_nodes; i++) {
		h_color[i].store(WHITE);
	}
	h_tail[0].store(0);
	h_head[0].store(0);
	h_threads_end[0].store(0);
	h_threads_run[0].store(0);
	h_q1[0] = source;
	h_iter[0].store(0);
	h_overflow[0] = 0;
	h_gray_shade[0].store(GRAY0);
	timer.stop("Initialization");
	//timer.print("Initialization", 1);

	// Copy to device
	timer.start("Copy To Device");
	cudaStatus = cudaMemcpy(d_nodes, h_nodes, sizeof(Node) * n_nodes,
			cudaMemcpyHostToDevice);
	cudaStatus = cudaMemcpy(d_edges, h_edges, sizeof(Edge) * n_edges,
			cudaMemcpyHostToDevice);
	cudaDeviceSynchronize();
	CUDA_ERR();
	timer.stop("Copy To Device");

	for (int rep = 0; rep < p.n_reps; rep++) {

		// Reset
		for (int i = 0; i < n_nodes; i++) {
			h_cost[i].store(INF);
		}
		h_cost[source].store(0);
		for (int i = 0; i < n_nodes; i++) {
			h_color[i].store(WHITE);
		}

		it_cpu = 0;
		it_gpu = 0;

		h_tail[0].store(0);
		h_head[0].store(0);
		h_threads_end[0].store(0);
		h_threads_run[0].store(0);
		h_q1[0] = source;
		h_iter[0].store(0);
		h_overflow[0] = 0;
		h_gray_shade[0].store(GRAY0);

		// if(rep >= p.n_warmup)
		timer.start("Kernel");

#ifdef LOGS
		start_iteration();
#endif

		// Run first iteration in master CPU thread
		h_num_t[0] = 1;
		int pid;
		int index_i, index_o;
		for (index_i = 0; index_i < h_num_t[0]; index_i++) {
			pid = h_q1[index_i];
			h_color[pid].store(BLACK);
			int cur_cost = h_cost[pid].load();
			for (int i = h_nodes[pid].x; i < (h_nodes[pid].y + h_nodes[pid].x);
					i++) {
				int id = h_edges[i].x;
				int cost = h_edges[i].y;
				cost += cur_cost;
				h_cost[id].store(cost);
				h_color[id].store(GRAY0);
				index_o = h_tail[0].fetch_add(1);
				h_q2[index_o] = id;
			}
		}
		h_num_t[0] = h_tail[0].load();
		h_tail[0].store(0);
		h_threads_run[0].fetch_add(1);
		h_gray_shade[0].store(GRAY1);
		h_iter[0].fetch_add(1);
		// if(rep >= p.n_warmup)
		timer.stop("Kernel");

		// Pointers to input and output queues
		int * h_qin = h_q2;
		int * h_qout = h_q1;
		int * d_qin = d_q2;
		int * d_qout = d_q1;

		const int CPU_EXEC = (p.n_threads > 0) ? 1 : 0;
		const int GPU_EXEC =
				(p.n_gpu_blocks > 0 && p.n_gpu_threads > 0) ? 1 : 0;

		// Run subsequent iterations on CPU or GPU until number of input queue elements is 0
		while (*h_num_t != 0) {

			if ((*h_num_t < p.switching_limit || GPU_EXEC == 0)
					&& CPU_EXEC == 1) { // If the number of input queue elements is lower than switching_limit
				it_cpu = it_cpu + 1;
				//   if(rep >= p.n_warmup)
				timer.start("Kernel");

				// Continue until switching_limit condition is not satisfied
				while ((*h_num_t != 0)
						&& (*h_num_t < p.switching_limit || GPU_EXEC == 0)
						&& CPU_EXEC == 1) {

					// Swap queues
					if (h_iter[0] % 2 == 0) {
						h_qin = h_q1;
						h_qout = h_q2;
					} else {
						h_qin = h_q2;
						h_qout = h_q1;
					}

					std::thread main_thread(run_cpu_threads, h_nodes, h_edges,
							h_cost, h_color, h_qin, h_qout, h_num_t, h_head,
							h_tail, h_threads_end, h_threads_run, h_gray_shade,
							h_iter, p.n_threads, p.switching_limit, GPU_EXEC);
					main_thread.join();

					h_num_t[0] = h_tail[0].load(); // Number of elements in output queue
					h_tail[0].store(0);
					h_head[0].store(0);
					if (h_iter[0].load() % 2 == 0)
						h_gray_shade[0].store(GRAY0);
					else
						h_gray_shade[0].store(GRAY1);
				}

				//  if(rep >= p.n_warmup)
				timer.stop("Kernel");

			} else if ((*h_num_t >= p.switching_limit || CPU_EXEC == 0)
					&& GPU_EXEC == 1) { // If the number of input queue elements is higher than or equal to switching_limit
				it_gpu = it_gpu + 1;
				//  if(rep >= p.n_warmup)
				timer.start("Copy To Device");
				cudaStatus = cudaMemcpy(d_cost, h_cost, sizeof(int) * n_nodes,
						cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_color, h_color, sizeof(int) * n_nodes,
						cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_threads_run, h_threads_run,
						sizeof(int), cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_threads_end, h_threads_end,
						sizeof(int), cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_overflow, h_overflow, sizeof(int),
						cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_q1, h_q1, sizeof(int) * n_nodes,
						cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_q2, h_q2, sizeof(int) * n_nodes,
						cudaMemcpyHostToDevice);
				cudaStatus = cudaMemcpy(d_iter, h_iter, sizeof(int),
						cudaMemcpyHostToDevice);
				cudaDeviceSynchronize();
				CUDA_ERR();
				//  if(rep >= p.n_warmup)
				timer.stop("Copy To Device");

				// Continue until switching_limit condition is not satisfied
				while ((*h_num_t != 0)
						&& (*h_num_t >= p.switching_limit || CPU_EXEC == 0)
						&& GPU_EXEC == 1) {

					// Swap queues
					if (h_iter[0] % 2 == 0) {
						d_qin = d_q1;
						d_qout = d_q2;
					} else {
						d_qin = d_q2;
						d_qout = d_q1;
					}

					//  if(rep >= p.n_warmup)
					timer.start("Copy To Device");
					cudaStatus = cudaMemcpy(d_num_t, h_num_t, sizeof(int),
							cudaMemcpyHostToDevice);
					cudaStatus = cudaMemcpy(d_tail, h_tail, sizeof(int),
							cudaMemcpyHostToDevice);
					cudaStatus = cudaMemcpy(d_head, h_head, sizeof(int),
							cudaMemcpyHostToDevice);
					cudaStatus = cudaMemcpy(d_gray_shade, h_gray_shade,
							sizeof(int), cudaMemcpyHostToDevice);
					cudaDeviceSynchronize();
					CUDA_ERR();
					//  if(rep >= p.n_warmup)
					timer.stop("Copy To Device");

					//  if(rep >= p.n_warmup)
					timer.start("Kernel");
					assert(
							p.n_gpu_threads <= max_gpu_threads
									&& "The thread block size is greater than the maximum thread block size that can be used on this device");
					cudaStatus = call_SSSP_gpu(p.n_gpu_blocks, p.n_gpu_threads,
							d_nodes, d_edges, d_cost, d_color, d_qin, d_qout,
							d_num_t, d_head, d_tail, d_threads_end,
							d_threads_run, d_overflow, d_gray_shade, d_iter,
							p.switching_limit, CPU_EXEC,
							sizeof(int) * (W_QUEUE_SIZE + 3));
					cudaDeviceSynchronize();
					CUDA_ERR();
					//  if(rep >= p.n_warmup)
					timer.stop("Kernel");

					// if(rep >= p.n_warmup)
					timer.start("Copy Back and Merge");
					cudaStatus = cudaMemcpy(h_tail, d_tail, sizeof(int),
							cudaMemcpyDeviceToHost);
					cudaStatus = cudaMemcpy(h_iter, d_iter, sizeof(int),
							cudaMemcpyDeviceToHost);
					cudaDeviceSynchronize();
					CUDA_ERR();
					//  if(rep >= p.n_warmup)
					timer.stop("Copy Back and Merge");

					h_num_t[0] = h_tail[0].load(); // Number of elements in output queue
					h_tail[0].store(0);
					h_head[0].store(0);
					if (h_iter[0].load() % 2 == 0)
						h_gray_shade[0].store(GRAY0);
					else
						h_gray_shade[0].store(GRAY1);
				}

				// if(rep >= p.n_warmup)
				timer.start("Copy Back and Merge");
				cudaStatus = cudaMemcpy(h_cost, d_cost, sizeof(int) * n_nodes,
						cudaMemcpyDeviceToHost);
				cudaStatus = cudaMemcpy(h_color, d_color, sizeof(int) * n_nodes,
						cudaMemcpyDeviceToHost);
				cudaStatus = cudaMemcpy(h_threads_run, d_threads_run,
						sizeof(int), cudaMemcpyDeviceToHost);
				cudaStatus = cudaMemcpy(h_threads_end, d_threads_end,
						sizeof(int), cudaMemcpyDeviceToHost);
				cudaStatus = cudaMemcpy(h_overflow, d_overflow, sizeof(int),
						cudaMemcpyDeviceToHost);
				cudaStatus = cudaMemcpy(h_q1, d_q1, sizeof(int) * n_nodes,
						cudaMemcpyDeviceToHost);
				cudaStatus = cudaMemcpy(h_q2, d_q2, sizeof(int) * n_nodes,
						cudaMemcpyDeviceToHost);
				cudaDeviceSynchronize();
				CUDA_ERR();
				// if(rep >= p.n_warmup)
				timer.stop("Copy Back and Merge");
			}
		}

#ifdef LOGS
		end_iteration();
#endif

		// printf("IT CPU:%d\t",it_cpu);
		//printf("IT GPU:%d\n",it_gpu);
		if (p.mode == 1) {
			err = newest_verify(h_cost, n_nodes, n_nodes, gold, it_cpu, it_gpu);
		}		//err=new_verify(h_cost, n_nodes,,it_cpu,it_gpu);

		if (err > 0) {
			printf("Errors: %d\n", err);
			read_input(source, h_nodes, h_edges, p);
			read_gold(gold, p);
		} else {
			printf(".ITERATION %d\n", rep);
		}
#ifdef LOGS
		log_error_count(err);
#endif
		// Ler a entrada novamente
		//read_input(source, h_nodes, h_edges, p);
		//read_gold(gold,p);

	} // end of iteration
#ifdef LOGS
	end_log_file();
#endif
	// timer.print("Allocation", 1);
	//timer.print("Copy To Device", p.n_reps);
	// timer.print("Kernel", p.n_reps);
	// timer.print("Copy Back and Merge", p.n_reps);
	if (p.mode == 0) {
		create_output(h_cost, n_nodes, n_edges, std::string(p.comparison_file));
	}
	// Verify answer
	verify(h_cost, n_nodes, p.comparison_file);

	// Free memory
	timer.start("Deallocation");
	free(h_nodes);
	free(h_edges);
	free(h_color);
	free(h_cost);
	free(h_q1);
	free(h_q2);
	cudaStatus = cudaFree(d_nodes);
	cudaStatus = cudaFree(d_edges);
	cudaStatus = cudaFree(d_cost);
	cudaStatus = cudaFree(d_color);
	cudaStatus = cudaFree(d_q1);
	cudaStatus = cudaFree(d_q2);
	cudaStatus = cudaFree(d_num_t);
	cudaStatus = cudaFree(d_head);
	cudaStatus = cudaFree(d_tail);
	cudaStatus = cudaFree(d_threads_end);
	cudaStatus = cudaFree(d_threads_run);
	cudaStatus = cudaFree(d_overflow);
	cudaStatus = cudaFree(d_iter);
	cudaStatus = cudaFree(d_gray_shade);
	CUDA_ERR();
	cudaDeviceSynchronize();
	timer.stop("Deallocation");
	//timer.print("Deallocation", 1);

	// Release timers
	timer.release("Allocation");
	timer.release("Initialization");
	timer.release("Copy To Device");
	timer.release("Kernel");
	timer.release("Copy Back and Merge");
	timer.release("Deallocation");

	printf("Test Passed\n");
	return 0;
}