int main(void) { struct process_arguments x = { 0 }; pid_t app_listener; pid_t web_listener; pid_t bank_listener; start_log_file(); do { app_listener = create_process(communication_app, &x); web_listener = create_process(communication_web, &x); bank_listener = create_process(communication_bank, &x); } while (true); kill_process(app_listener); kill_process(web_listener); kill_process(bank_listener); return EXIT_SUCCESS; }
int main(int argc, char *argv[]) { int notdone=1; char buf[1024]; int val[9], i, ch, code; time_t starttime, curtime; struct timeval tv; struct timezone tz; struct tm *ltime; int interval = 1; setup_vars(); printf("Hello, World!\n"); if (get_args(argc, argv) != 0) { show_usage(); exit(1); } for(ch=0;ch<CHMAX;ch++) { clear_reading_storage(ch); } clear_inputs(interval); make_line(); if ((fd = open(dsrc, O_RDWR|O_NONBLOCK)) < 0) { printf("couldn't open data source file\n"); exit(1); } start_serial_io(fd, baud); my_fgets(buf, 1000, fd); notdone = 0; printf("syncing time...\n"); while (notdone < 2) { starttime = time(NULL); ltime = localtime(&starttime); if ((ltime->tm_sec % interval) == 0) { notdone++; } else { /* sleep(1); */ my_fgets(buf, 1000, fd); } curday = ltime->tm_mday; set_curdatestring(ltime); } if (log_file_name != NULL) { start_log_file(); } printf("starting at %010d\n", (int) starttime); notdone = 1; while (notdone) { /* take care of time first */ curtime = time(NULL); if (curtime >= (starttime + interval)) { if (logfp != NULL) { fprintf(logfp, "%010d (%s)", (int) starttime, safe_ctime(&starttime)); } else { printf("%010d (%s)", (int) starttime, safe_ctime(&starttime)); } for (ch=0;ch<CHMAX;ch++) { dump_reading(ch); clear_reading_storage(ch); } /* dump_inputs(); */ if (logfp != NULL) { fprintf(logfp, "\n"); } else { printf("\n"); } starttime = curtime; ltime = localtime(&curtime); /* ltime is needed here and ctime messes it up */ /* check for the day change here so that the last second of yesterday ends up in *yesterday's* log and this days data ends up in this days log */ if (ltime->tm_mday != curday) { set_curdatestring(ltime); if (log_file_name != NULL) { fclose(logfp); start_log_file(); } curday = ltime->tm_mday; } else { /* flush whenever a line is written */ if (logfp != NULL) { fflush(logfp); } else { fflush(stdout); } } } if (my_fgets(buf, 1000, fd) == 1) { if (strlen(buf) > 4) { /* printf("%s", buf); */ buf[strlen(buf) - 1] = 0; chop_input(buf, val, &code); /* test_code_change(~code, curtime); */ for (ch=0;ch<CHMAX;ch++) { add_reading(ch, val[ch+1]); } } } else { /* don't consume 100% of the processor. */ usleep(1000); } fflush(stdout); } exit(0); }
// Main ------------------------------------------------------------------------------------------ int main(int argc, char **argv) { const Params p(argc, argv); CUDASetup setcuda(p.device); Timer timer; cudaError_t cudaStatus; int it_cpu = 0; int it_gpu = 0; int err = 0; #ifdef LOGS set_iter_interval_print(10); char test_info[500]; snprintf(test_info, 500, "-i %d -g %d -t %d -f %s -l %d\n",p.n_gpu_threads, p.n_gpu_blocks,p.n_threads, p.file_name,p.switching_limit); start_log_file("cudaSingleSourceShortestPath", test_info); //printf("Com LOG\n"); #endif // Allocate int n_nodes, n_edges; // int n_nodes_o; read_input_size(n_nodes, n_edges, p); timer.start("Allocation"); Node * h_nodes = (Node *) malloc(sizeof(Node) * n_nodes); //*************************** Alocando Memoria para o Gold ************************************* Gold * gold = (Gold *) malloc(sizeof(Gold) * n_nodes); if (p.mode == 1) { // ********************** Lendo O gold ********************************* read_gold(gold, p); // ********************************************************************** } //*********************************************************************************************** Node * d_nodes; cudaStatus = cudaMalloc((void**) &d_nodes, sizeof(Node) * n_nodes); Edge * h_edges = (Edge *) malloc(sizeof(Edge) * n_edges); Edge * d_edges; cudaStatus = cudaMalloc((void**) &d_edges, sizeof(Edge) * n_edges); std::atomic_int *h_color = (std::atomic_int *) malloc( sizeof(std::atomic_int) * n_nodes); int * d_color; cudaStatus = cudaMalloc((void**) &d_color, sizeof(int) * n_nodes); std::atomic_int *h_cost = (std::atomic_int *) malloc( sizeof(std::atomic_int) * n_nodes); int * d_cost; cudaStatus = cudaMalloc((void**) &d_cost, sizeof(int) * n_nodes); int * h_q1 = (int *) malloc(n_nodes * sizeof(int)); int * d_q1; cudaStatus = cudaMalloc((void**) &d_q1, sizeof(int) * n_nodes); int * h_q2 = (int *) malloc(n_nodes * sizeof(int)); int * d_q2; cudaStatus = cudaMalloc((void**) &d_q2, sizeof(int) * n_nodes); std::atomic_int h_head[1]; int * d_head; cudaStatus = cudaMalloc((void**) &d_head, sizeof(int)); std::atomic_int h_tail[1]; int * d_tail; cudaStatus = cudaMalloc((void**) &d_tail, sizeof(int)); std::atomic_int h_threads_end[1]; int * d_threads_end; cudaStatus = cudaMalloc((void**) &d_threads_end, sizeof(int)); std::atomic_int h_threads_run[1]; int * d_threads_run; cudaStatus = cudaMalloc((void**) &d_threads_run, sizeof(int)); int h_num_t[1]; int * d_num_t; cudaStatus = cudaMalloc((void**) &d_num_t, sizeof(int)); int h_overflow[1]; int * d_overflow; cudaStatus = cudaMalloc((void**) &d_overflow, sizeof(int)); std::atomic_int h_gray_shade[1]; int * d_gray_shade; cudaStatus = cudaMalloc((void**) &d_gray_shade, sizeof(int)); std::atomic_int h_iter[1]; int * d_iter; cudaStatus = cudaMalloc((void**) &d_iter, sizeof(int)); cudaDeviceSynchronize(); CUDA_ERR(); ALLOC_ERR(h_nodes, h_edges, h_color, h_cost, h_q1, h_q2); timer.stop("Allocation"); // Initialize timer.start("Initialization"); const int max_gpu_threads = setcuda.max_gpu_threads(); int source; read_input(source, h_nodes, h_edges, p); for (int i = 0; i < n_nodes; i++) { h_cost[i].store(INF); } h_cost[source].store(0); for (int i = 0; i < n_nodes; i++) { h_color[i].store(WHITE); } h_tail[0].store(0); h_head[0].store(0); h_threads_end[0].store(0); h_threads_run[0].store(0); h_q1[0] = source; h_iter[0].store(0); h_overflow[0] = 0; h_gray_shade[0].store(GRAY0); timer.stop("Initialization"); //timer.print("Initialization", 1); // Copy to device timer.start("Copy To Device"); cudaStatus = cudaMemcpy(d_nodes, h_nodes, sizeof(Node) * n_nodes, cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_edges, h_edges, sizeof(Edge) * n_edges, cudaMemcpyHostToDevice); cudaDeviceSynchronize(); CUDA_ERR(); timer.stop("Copy To Device"); for (int rep = 0; rep < p.n_reps; rep++) { // Reset for (int i = 0; i < n_nodes; i++) { h_cost[i].store(INF); } h_cost[source].store(0); for (int i = 0; i < n_nodes; i++) { h_color[i].store(WHITE); } it_cpu = 0; it_gpu = 0; h_tail[0].store(0); h_head[0].store(0); h_threads_end[0].store(0); h_threads_run[0].store(0); h_q1[0] = source; h_iter[0].store(0); h_overflow[0] = 0; h_gray_shade[0].store(GRAY0); // if(rep >= p.n_warmup) timer.start("Kernel"); #ifdef LOGS start_iteration(); #endif // Run first iteration in master CPU thread h_num_t[0] = 1; int pid; int index_i, index_o; for (index_i = 0; index_i < h_num_t[0]; index_i++) { pid = h_q1[index_i]; h_color[pid].store(BLACK); int cur_cost = h_cost[pid].load(); for (int i = h_nodes[pid].x; i < (h_nodes[pid].y + h_nodes[pid].x); i++) { int id = h_edges[i].x; int cost = h_edges[i].y; cost += cur_cost; h_cost[id].store(cost); h_color[id].store(GRAY0); index_o = h_tail[0].fetch_add(1); h_q2[index_o] = id; } } h_num_t[0] = h_tail[0].load(); h_tail[0].store(0); h_threads_run[0].fetch_add(1); h_gray_shade[0].store(GRAY1); h_iter[0].fetch_add(1); // if(rep >= p.n_warmup) timer.stop("Kernel"); // Pointers to input and output queues int * h_qin = h_q2; int * h_qout = h_q1; int * d_qin = d_q2; int * d_qout = d_q1; const int CPU_EXEC = (p.n_threads > 0) ? 1 : 0; const int GPU_EXEC = (p.n_gpu_blocks > 0 && p.n_gpu_threads > 0) ? 1 : 0; // Run subsequent iterations on CPU or GPU until number of input queue elements is 0 while (*h_num_t != 0) { if ((*h_num_t < p.switching_limit || GPU_EXEC == 0) && CPU_EXEC == 1) { // If the number of input queue elements is lower than switching_limit it_cpu = it_cpu + 1; // if(rep >= p.n_warmup) timer.start("Kernel"); // Continue until switching_limit condition is not satisfied while ((*h_num_t != 0) && (*h_num_t < p.switching_limit || GPU_EXEC == 0) && CPU_EXEC == 1) { // Swap queues if (h_iter[0] % 2 == 0) { h_qin = h_q1; h_qout = h_q2; } else { h_qin = h_q2; h_qout = h_q1; } std::thread main_thread(run_cpu_threads, h_nodes, h_edges, h_cost, h_color, h_qin, h_qout, h_num_t, h_head, h_tail, h_threads_end, h_threads_run, h_gray_shade, h_iter, p.n_threads, p.switching_limit, GPU_EXEC); main_thread.join(); h_num_t[0] = h_tail[0].load(); // Number of elements in output queue h_tail[0].store(0); h_head[0].store(0); if (h_iter[0].load() % 2 == 0) h_gray_shade[0].store(GRAY0); else h_gray_shade[0].store(GRAY1); } // if(rep >= p.n_warmup) timer.stop("Kernel"); } else if ((*h_num_t >= p.switching_limit || CPU_EXEC == 0) && GPU_EXEC == 1) { // If the number of input queue elements is higher than or equal to switching_limit it_gpu = it_gpu + 1; // if(rep >= p.n_warmup) timer.start("Copy To Device"); cudaStatus = cudaMemcpy(d_cost, h_cost, sizeof(int) * n_nodes, cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_color, h_color, sizeof(int) * n_nodes, cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_threads_run, h_threads_run, sizeof(int), cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_threads_end, h_threads_end, sizeof(int), cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_overflow, h_overflow, sizeof(int), cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_q1, h_q1, sizeof(int) * n_nodes, cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_q2, h_q2, sizeof(int) * n_nodes, cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_iter, h_iter, sizeof(int), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); CUDA_ERR(); // if(rep >= p.n_warmup) timer.stop("Copy To Device"); // Continue until switching_limit condition is not satisfied while ((*h_num_t != 0) && (*h_num_t >= p.switching_limit || CPU_EXEC == 0) && GPU_EXEC == 1) { // Swap queues if (h_iter[0] % 2 == 0) { d_qin = d_q1; d_qout = d_q2; } else { d_qin = d_q2; d_qout = d_q1; } // if(rep >= p.n_warmup) timer.start("Copy To Device"); cudaStatus = cudaMemcpy(d_num_t, h_num_t, sizeof(int), cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_tail, h_tail, sizeof(int), cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_head, h_head, sizeof(int), cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_gray_shade, h_gray_shade, sizeof(int), cudaMemcpyHostToDevice); cudaDeviceSynchronize(); CUDA_ERR(); // if(rep >= p.n_warmup) timer.stop("Copy To Device"); // if(rep >= p.n_warmup) timer.start("Kernel"); assert( p.n_gpu_threads <= max_gpu_threads && "The thread block size is greater than the maximum thread block size that can be used on this device"); cudaStatus = call_SSSP_gpu(p.n_gpu_blocks, p.n_gpu_threads, d_nodes, d_edges, d_cost, d_color, d_qin, d_qout, d_num_t, d_head, d_tail, d_threads_end, d_threads_run, d_overflow, d_gray_shade, d_iter, p.switching_limit, CPU_EXEC, sizeof(int) * (W_QUEUE_SIZE + 3)); cudaDeviceSynchronize(); CUDA_ERR(); // if(rep >= p.n_warmup) timer.stop("Kernel"); // if(rep >= p.n_warmup) timer.start("Copy Back and Merge"); cudaStatus = cudaMemcpy(h_tail, d_tail, sizeof(int), cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_iter, d_iter, sizeof(int), cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); CUDA_ERR(); // if(rep >= p.n_warmup) timer.stop("Copy Back and Merge"); h_num_t[0] = h_tail[0].load(); // Number of elements in output queue h_tail[0].store(0); h_head[0].store(0); if (h_iter[0].load() % 2 == 0) h_gray_shade[0].store(GRAY0); else h_gray_shade[0].store(GRAY1); } // if(rep >= p.n_warmup) timer.start("Copy Back and Merge"); cudaStatus = cudaMemcpy(h_cost, d_cost, sizeof(int) * n_nodes, cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_color, d_color, sizeof(int) * n_nodes, cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_threads_run, d_threads_run, sizeof(int), cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_threads_end, d_threads_end, sizeof(int), cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_overflow, d_overflow, sizeof(int), cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_q1, d_q1, sizeof(int) * n_nodes, cudaMemcpyDeviceToHost); cudaStatus = cudaMemcpy(h_q2, d_q2, sizeof(int) * n_nodes, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); CUDA_ERR(); // if(rep >= p.n_warmup) timer.stop("Copy Back and Merge"); } } #ifdef LOGS end_iteration(); #endif // printf("IT CPU:%d\t",it_cpu); //printf("IT GPU:%d\n",it_gpu); if (p.mode == 1) { err = newest_verify(h_cost, n_nodes, n_nodes, gold, it_cpu, it_gpu); } //err=new_verify(h_cost, n_nodes,,it_cpu,it_gpu); if (err > 0) { printf("Errors: %d\n", err); read_input(source, h_nodes, h_edges, p); read_gold(gold, p); } else { printf(".ITERATION %d\n", rep); } #ifdef LOGS log_error_count(err); #endif // Ler a entrada novamente //read_input(source, h_nodes, h_edges, p); //read_gold(gold,p); } // end of iteration #ifdef LOGS end_log_file(); #endif // timer.print("Allocation", 1); //timer.print("Copy To Device", p.n_reps); // timer.print("Kernel", p.n_reps); // timer.print("Copy Back and Merge", p.n_reps); if (p.mode == 0) { create_output(h_cost, n_nodes, n_edges, std::string(p.comparison_file)); } // Verify answer verify(h_cost, n_nodes, p.comparison_file); // Free memory timer.start("Deallocation"); free(h_nodes); free(h_edges); free(h_color); free(h_cost); free(h_q1); free(h_q2); cudaStatus = cudaFree(d_nodes); cudaStatus = cudaFree(d_edges); cudaStatus = cudaFree(d_cost); cudaStatus = cudaFree(d_color); cudaStatus = cudaFree(d_q1); cudaStatus = cudaFree(d_q2); cudaStatus = cudaFree(d_num_t); cudaStatus = cudaFree(d_head); cudaStatus = cudaFree(d_tail); cudaStatus = cudaFree(d_threads_end); cudaStatus = cudaFree(d_threads_run); cudaStatus = cudaFree(d_overflow); cudaStatus = cudaFree(d_iter); cudaStatus = cudaFree(d_gray_shade); CUDA_ERR(); cudaDeviceSynchronize(); timer.stop("Deallocation"); //timer.print("Deallocation", 1); // Release timers timer.release("Allocation"); timer.release("Initialization"); timer.release("Copy To Device"); timer.release("Kernel"); timer.release("Copy Back and Merge"); timer.release("Deallocation"); printf("Test Passed\n"); return 0; }