int main(int argc, char *argv[]) { t_gameboy gb; g_gb = &gb; signal(SIGINT, &sighandler); /* ** First call the initializers to set the default values, ** then call the argument parser to modify them if needed. */ memset(&gb, 0, sizeof(gb)); init_debug(&gb); init_timing(&gb); init_gpu(&gb); if (get_args(argc, argv, &gb) || init_gameboy(&gb) || start_gpu(&gb)) return (EXIT_FAILURE); run_gameboy(&gb); SDL_Quit(); return (EXIT_SUCCESS); }
jacobi_precond(MatrixType const & mat, jacobi_tag const & tag) : system_matrix(mat), diag_A_inv(mat.size1()) { assert(system_matrix.size1() == system_matrix.size2()); init_gpu(); }
/** * Calculates the coverage prediction for one transmitter, using the E/// model. * * params a structure holding configuration parameters which are * common to all transmitters; * tx_params a structure holding transmitter-specific configuration * parameters.- * */ void coverage (Parameters *params, Tx_parameters *tx_params, const int rank) { // // execute the path-loss calculation on CPU or GPU? // if (params->use_gpu) { // // initialize the OpenCL environment // init_gpu (params, tx_params, rank % 2); // // SIMULATE the LOS calculation on GPU // DoProfile_gpu (tx_params->m_obst_height, tx_params->m_obst_dist, tx_params->m_obst_offset, 1.0, tx_params->m_dem, tx_params->tx_north_coord_idx, tx_params->tx_east_coord_idx, tx_params->total_tx_height, tx_params->nrows, tx_params->ncols, params->map_ew_res, params->radius); #ifdef _PERFORMANCE_METRICS_ measure_time ("E/// on GPU"); #endif eric_pathloss_on_gpu (params, tx_params); } else { // // calculate the terrain profile from the top of the transmitter, // i.e. line-of-sight, only once per transmitter // DoProfile (tx_params->m_obst_height, tx_params->m_obst_dist, tx_params->m_obst_offset, 1.0, tx_params->m_dem, tx_params->tx_north_coord_idx, tx_params->tx_east_coord_idx, tx_params->total_tx_height, tx_params->nrows, tx_params->ncols, params->map_ew_res, params->radius); #ifdef _PERFORMANCE_METRICS_ measure_time ("E/// on CPU"); #endif eric_pathloss_on_cpu (params, tx_params); } #ifdef _PERFORMANCE_METRICS_ measure_time (NULL); #endif // // calculate the antenna influence, // overwriting the isotrophic path-loss // #ifdef _PERFORMANCE_METRICS_ measure_time ("Antenna influence"); #endif calculate_antenna_influence (params, tx_params); #ifdef _PERFORMANCE_METRICS_ measure_time (NULL); #endif // // if the coverage calculation happened on the GPU, // we need to refresh the memory buffers on the host // if (params->use_gpu) { size_t buff_size = tx_params->nrows * tx_params->ncols * sizeof (tx_params->m_loss[0][0]); read_buffer_blocking (tx_params->ocl_obj, 0, tx_params->m_loss_dev, buff_size, tx_params->m_loss[0]); } }
int main(int argc, char *argv[]) { int num_elem, num_sides; int n_threads, n_blocks_elem, n_blocks_reduction, n_blocks_sides; int i, n, n_p, timesteps, n_quad, n_quad1d; double dt, t, endtime; double *min_radius; double min_r; double *V1x, *V1y, *V2x, *V2y, *V3x, *V3y; double *sides_x1, *sides_x2; double *sides_y1, *sides_y2; double *r1_local, *r2_local, *w_local; double *s_r, *oned_w_local; int *left_elem, *right_elem; int *elem_s1, *elem_s2, *elem_s3; int *left_side_number, *right_side_number; FILE *mesh_file, *out_file; char line[100]; char *mesh_filename; char *out_filename; char *rho_out_filename; char *u_out_filename; char *v_out_filename; char *E_out_filename; char *outfile_base; int outfile_len; double *Uu1, *Uu2, *Uu3; double *Uv1, *Uv2, *Uv3; // get input endtime = -1; if (get_input(argc, argv, &n, ×teps, &endtime, &mesh_filename, &out_filename)) { return 1; } // TODO: this should be cleaner, obviously rho_out_filename = "output/uniform_rho.out"; u_out_filename = "output/uniform_u.out"; v_out_filename = "output/uniform_v.out"; E_out_filename = "output/uniform_E.out"; // set the order of the approximation & timestep n_p = (n + 1) * (n + 2) / 2; // open the mesh to get num_elem for allocations mesh_file = fopen(mesh_filename, "r"); if (!mesh_file) { printf("\nERROR: mesh file not found.\n"); return 1; } fgets(line, 100, mesh_file); sscanf(line, "%i", &num_elem); // allocate vertex points V1x = (double *) malloc(num_elem * sizeof(double)); V1y = (double *) malloc(num_elem * sizeof(double)); V2x = (double *) malloc(num_elem * sizeof(double)); V2y = (double *) malloc(num_elem * sizeof(double)); V3x = (double *) malloc(num_elem * sizeof(double)); V3y = (double *) malloc(num_elem * sizeof(double)); elem_s1 = (int *) malloc(num_elem * sizeof(int)); elem_s2 = (int *) malloc(num_elem * sizeof(int)); elem_s3 = (int *) malloc(num_elem * sizeof(int)); // TODO: these are too big; should be a way to figure out how many we actually need left_side_number = (int *) malloc(3*num_elem * sizeof(int)); right_side_number = (int *) malloc(3*num_elem * sizeof(int)); sides_x1 = (double *) malloc(3*num_elem * sizeof(double)); sides_x2 = (double *) malloc(3*num_elem * sizeof(double)); sides_y1 = (double *) malloc(3*num_elem * sizeof(double)); sides_y2 = (double *) malloc(3*num_elem * sizeof(double)); left_elem = (int *) malloc(3*num_elem * sizeof(int)); right_elem = (int *) malloc(3*num_elem * sizeof(int)); for (i = 0; i < 3*num_elem; i++) { right_elem[i] = -1; } // read in the mesh and make all the mappings read_mesh(mesh_file, &num_sides, num_elem, V1x, V1y, V2x, V2y, V3x, V3y, left_side_number, right_side_number, sides_x1, sides_y1, sides_x2, sides_y2, elem_s1, elem_s2, elem_s3, left_elem, right_elem); // close the file fclose(mesh_file); // initialize the gpu init_gpu(num_elem, num_sides, n_p, V1x, V1y, V2x, V2y, V3x, V3y, left_side_number, right_side_number, sides_x1, sides_y1, sides_x2, sides_y2, elem_s1, elem_s2, elem_s3, left_elem, right_elem); n_threads = 256; n_blocks_elem = (num_elem / n_threads) + ((num_elem % n_threads) ? 1 : 0); n_blocks_sides = (num_sides / n_threads) + ((num_sides % n_threads) ? 1 : 0); n_blocks_reduction = (num_elem / 256) + ((num_elem % 256) ? 1 : 0); // find the min inscribed circle preval_inscribed_circles(d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, num_elem); min_radius = (double *) malloc(num_elem * sizeof(double)); /* // find the min inscribed circle. do it on the gpu if there are at least 256 elements if (num_elem >= 256) { //min_reduction<<<n_blocks_reduction, 256>>>(d_J, d_reduction, num_elem); cudaThreadSynchronize(); checkCudaError("error after min_jacobian."); // each block finds the smallest value, so need to sort through n_blocks_reduction min_radius = (double *) malloc(n_blocks_reduction * sizeof(double)); cudaMemcpy(min_radius, d_reduction, n_blocks_reduction * sizeof(double), cudaMemcpyDeviceToHost); min_r = min_radius[0]; for (i = 1; i < n_blocks_reduction; i++) { min_r = (min_radius[i] < min_r) ? min_radius[i] : min_r; } free(min_radius); } else { */ // just grab all the radii and sort them since there are so few of them min_radius = (double *) malloc(num_elem * sizeof(double)); memcpy(min_radius, d_J, num_elem * sizeof(double)); min_r = min_radius[0]; for (i = 1; i < num_elem; i++) { min_r = (min_radius[i] < min_r) ? min_radius[i] : min_r; } free(min_radius); //} // pre computations preval_jacobian(d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, num_elem); preval_side_length(d_s_length, d_s_V1x, d_s_V1y, d_s_V2x, d_s_V2y, num_sides); //cudaThreadSynchronize(); preval_normals(d_Nx, d_Ny, d_s_V1x, d_s_V1y, d_s_V2x, d_s_V2y, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, d_left_side_number, num_sides); preval_normals_direction(d_Nx, d_Ny, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, d_left_elem, d_left_side_number, num_sides); preval_partials(d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, d_xr, d_yr, d_xs, d_ys, num_elem); // get the correct quadrature rules for this scheme set_quadrature(n, &r1_local, &r2_local, &w_local, &s_r, &oned_w_local, &n_quad, &n_quad1d); // evaluate the basis functions at those points and store on GPU preval_basis(r1_local, r2_local, s_r, w_local, oned_w_local, n_quad, n_quad1d, n_p); // initial conditions init_conditions(d_c, d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, n_quad, n_p, num_elem); printf("Computing...\n"); printf(" ? %i degree polynomial interpolation (n_p = %i)\n", n, n_p); printf(" ? %i precomputed basis points\n", n_quad * n_p); printf(" ? %i elements\n", num_elem); printf(" ? %i sides\n", num_sides); printf(" ? min radius = %lf\n", min_r); printf(" ? endtime = %lf\n", endtime); time_integrate_rk4(n_quad, n_quad1d, n_p, n, num_elem, num_sides, endtime, min_r); // evaluate at the vertex points and copy over data Uu1 = (double *) malloc(num_elem * sizeof(double)); Uu2 = (double *) malloc(num_elem * sizeof(double)); Uu3 = (double *) malloc(num_elem * sizeof(double)); Uv1 = (double *) malloc(num_elem * sizeof(double)); Uv2 = (double *) malloc(num_elem * sizeof(double)); Uv3 = (double *) malloc(num_elem * sizeof(double)); // evaluate rho and write to file eval_u(d_c, d_Uv1, d_Uv2, d_Uv3, num_elem, n_p, 0); memcpy(Uv1, d_Uv1, num_elem * sizeof(double)); memcpy(Uv2, d_Uv2, num_elem * sizeof(double)); memcpy(Uv3, d_Uv3, num_elem * sizeof(double)); out_file = fopen(rho_out_filename , "w"); fprintf(out_file, "View \"Density \" {\n"); for (i = 0; i < num_elem; i++) { fprintf(out_file, "ST (%lf,%lf,0,%lf,%lf,0,%lf,%lf,0) {%lf,%lf,%lf};\n", V1x[i], V1y[i], V2x[i], V2y[i], V3x[i], V3y[i], d_Uv1[i], d_Uv2[i], d_Uv3[i]); } fprintf(out_file,"};"); fclose(out_file); // evaluate the u and v vectors and write to file eval_u_velocity(d_c, d_Uv1, d_Uv2, d_Uv3, num_elem, n_p, 1); memcpy(Uu1, d_Uv1, num_elem * sizeof(double)); memcpy(Uu2, d_Uv2, num_elem * sizeof(double)); memcpy(Uu3, d_Uv3, num_elem * sizeof(double)); eval_u_velocity(d_c, d_Uv1, d_Uv2, d_Uv3, num_elem, n_p, 2); memcpy(Uv1, d_Uv1, num_elem * sizeof(double)); memcpy(Uv2, d_Uv2, num_elem * sizeof(double)); memcpy(Uv3, d_Uv3, num_elem * sizeof(double)); out_file = fopen(u_out_filename , "w"); fprintf(out_file, "View \"u \" {\n"); for (i = 0; i < num_elem; i++) { fprintf(out_file, "VT (%lf,%lf,0,%lf,%lf,0,%lf,%lf,0) {%lf,%lf,0,%lf,%lf,0,%lf,%lf,0};\n", V1x[i], V1y[i], V2x[i], V2y[i], V3x[i], V3y[i], Uu1[i], Uv1[i], Uu2[i], Uv2[i], Uu3[i], Uv3[i]); } fprintf(out_file,"};"); fclose(out_file); // evaluate E and write to file eval_u(d_c, d_Uv1, d_Uv2, d_Uv3, num_elem, n_p, 3); memcpy(Uv1, d_Uv1, num_elem * sizeof(double)); memcpy(Uv2, d_Uv2, num_elem * sizeof(double)); memcpy(Uv3, d_Uv3, num_elem * sizeof(double)); out_file = fopen(E_out_filename , "w"); fprintf(out_file, "View \"E \" {\n"); for (i = 0; i < num_elem; i++) { fprintf(out_file, "ST (%lf,%lf,0,%lf,%lf,0,%lf,%lf,0) {%lf,%lf,%lf};\n", V1x[i], V1y[i], V2x[i], V2y[i], V3x[i], V3y[i], Uv1[i], Uv2[i], Uv3[i]); } fprintf(out_file,"};"); fclose(out_file); // plot pressure eval_p(d_c, d_Uv1, d_Uv2, d_Uv3, num_elem, n_p, 3); memcpy(Uv1, d_Uv1, num_elem * sizeof(double)); memcpy(Uv2, d_Uv2, num_elem * sizeof(double)); memcpy(Uv3, d_Uv3, num_elem * sizeof(double)); out_file = fopen("output/p.out" , "w"); fprintf(out_file, "View \"E \" {\n"); for (i = 0; i < num_elem; i++) { fprintf(out_file, "ST (%lf,%lf,0,%lf,%lf,0,%lf,%lf,0) {%lf,%lf,%lf};\n", V1x[i], V1y[i], V2x[i], V2y[i], V3x[i], V3y[i], Uv1[i], Uv2[i], Uv3[i]); } fprintf(out_file,"};"); fclose(out_file); measure_error(d_c, d_Uv1, d_Uv2, d_Uv3, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, num_elem, n_p); memcpy(Uv1, d_Uv1, num_elem * sizeof(double)); memcpy(Uv2, d_Uv2, num_elem * sizeof(double)); memcpy(Uv3, d_Uv3, num_elem * sizeof(double)); out_file = fopen("output/p_error.out" , "w"); fprintf(out_file, "View \"p \" {\n"); for (i = 0; i < num_elem; i++) { fprintf(out_file, "ST (%lf,%lf,0,%lf,%lf,0,%lf,%lf,0) {%lf,%lf,%lf};\n", V1x[i], V1y[i], V2x[i], V2y[i], V3x[i], V3y[i], Uv1[i], Uv2[i], Uv3[i]); } fprintf(out_file,"};"); fclose(out_file); // free variables free_gpu(); free(Uu1); free(Uu2); free(Uu3); free(Uv1); free(Uv2); free(Uv3); free(V1x); free(V1y); free(V2x); free(V2y); free(V3x); free(V3y); free(elem_s1); free(elem_s2); free(elem_s3); free(sides_x1); free(sides_x2); free(sides_y1); free(sides_y2); free(left_elem); free(right_elem); free(left_side_number); free(right_side_number); free(r1_local); free(r2_local); free(w_local); free(s_r); free(oned_w_local); return 0; }
void vegas_pfb_thread(void *_args) { /* Get args */ struct guppi_thread_args *args = (struct guppi_thread_args *)_args; int rv; /* Set cpu affinity */ cpu_set_t cpuset, cpuset_orig; sched_getaffinity(0, sizeof(cpu_set_t), &cpuset_orig); //CPU_ZERO(&cpuset); CPU_CLR(13, &cpuset); CPU_SET(11, &cpuset); rv = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset); if (rv<0) { guppi_error("vegas_pfb_thread", "Error setting cpu affinity."); perror("sched_setaffinity"); } /* Set priority */ rv = setpriority(PRIO_PROCESS, 0, args->priority); if (rv<0) { guppi_error("vegas_pfb_thread", "Error setting priority level."); perror("set_priority"); } /* Attach to status shared mem area */ struct guppi_status st; rv = guppi_status_attach(&st); if (rv!=GUPPI_OK) { guppi_error("vegas_pfb_thread", "Error attaching to status shared memory."); pthread_exit(NULL); } pthread_cleanup_push((void *)guppi_status_detach, &st); pthread_cleanup_push((void *)set_exit_status, &st); pthread_cleanup_push((void *)guppi_thread_set_finished, args); /* Init status */ guppi_status_lock_safe(&st); hputs(st.buf, STATUS_KEY, "init"); guppi_status_unlock_safe(&st); /* Init structs */ struct guppi_params gp; struct sdfits sf; pthread_cleanup_push((void *)guppi_free_sdfits, &sf); /* Attach to databuf shared mem */ struct guppi_databuf *db_in, *db_out; db_in = guppi_databuf_attach(args->input_buffer); if (db_in==NULL) { char msg[256]; sprintf(msg, "Error attaching to databuf(%d) shared memory.", args->input_buffer); guppi_error("vegas_pfb_thread", msg); pthread_exit(NULL); } pthread_cleanup_push((void *)guppi_databuf_detach, db_in); db_out = guppi_databuf_attach(args->output_buffer); if (db_out==NULL) { char msg[256]; sprintf(msg, "Error attaching to databuf(%d) shared memory.", args->output_buffer); guppi_error("vegas_pfb_thread", msg); pthread_exit(NULL); } pthread_cleanup_push((void *)guppi_databuf_detach, db_out); /* Loop */ char *hdr_in = NULL; int curblock_in=0; int first=1; int acc_len = 0; int nchan = 0; int nsubband = 0; signal(SIGINT,cc); guppi_status_lock_safe(&st); if (hgeti4(st.buf, "NCHAN", &nchan)==0) { fprintf(stderr, "ERROR: %s not in status shm!\n", "NCHAN"); } if (hgeti4(st.buf, "NSUBBAND", &nsubband)==0) { fprintf(stderr, "ERROR: %s not in status shm!\n", "NSUBBAND"); } guppi_status_unlock_safe(&st); if (EXIT_SUCCESS != init_gpu(db_in->block_size, db_out->block_size, nsubband, nchan)) { (void) fprintf(stderr, "ERROR: GPU initialisation failed!\n"); run = 0; } while (run) { /* Note waiting status */ guppi_status_lock_safe(&st); hputs(st.buf, STATUS_KEY, "waiting"); guppi_status_unlock_safe(&st); /* Wait for buf to have data */ rv = guppi_databuf_wait_filled(db_in, curblock_in); if (rv!=0) continue; /* Note waiting status, current input block */ guppi_status_lock_safe(&st); hputs(st.buf, STATUS_KEY, "processing"); hputi4(st.buf, "PFBBLKIN", curblock_in); guppi_status_unlock_safe(&st); hdr_in = guppi_databuf_header(db_in, curblock_in); /* Get params */ if (first) { guppi_read_obs_params(hdr_in, &gp, &sf); /* Read required exposure from status shared memory, and calculate corresponding accumulation length */ acc_len = (sf.hdr.chan_bw * sf.hdr.hwexposr); } guppi_read_subint_params(hdr_in, &gp, &sf); /* Call PFB function */ do_pfb(db_in, curblock_in, db_out, first, st, acc_len); /* Mark input block as free */ guppi_databuf_set_free(db_in, curblock_in); /* Go to next input block */ curblock_in = (curblock_in + 1) % db_in->n_block; /* Check for cancel */ pthread_testcancel(); if (first) { first=0; } } run=0; //cudaThreadExit(); pthread_exit(NULL); cleanup_gpu(); pthread_cleanup_pop(0); /* Closes guppi_databuf_detach(out) */ pthread_cleanup_pop(0); /* Closes guppi_databuf_detach(in) */ pthread_cleanup_pop(0); /* Closes guppi_free_sdfits */ pthread_cleanup_pop(0); /* Closes guppi_thread_set_finished */ pthread_cleanup_pop(0); /* Closes set_exit_status */ pthread_cleanup_pop(0); /* Closes guppi_status_detach */ }
int main(int argc, char **argv) { char *iFuncName; int m, info; int maxEvals; int iFuncNumb, n, numIter; int contSucess = 0; int type; real *x; real epsg, epsf, epsx; real maxiters = 0; long fnEvals, mediaFnEvals = 0; long gradEvals, mediaGradEvals = 0; bool sucess; int64_t initialTime, finalTime; int64_t deltaTime, mediaTime = 0; ap::real_1d_array xBFGS; MGrasp *mgrasp; LBFGS *lbfgs; Funcao *func; real *gaps; real mediaGaps[7]; if (argc < 7) { usage(); return 1; } epsg = 0.000001; epsf = 0.000001; epsx = 0.000001; maxiters = 0; iFuncName = argv[1]; iFuncNumb = getFuncNumb(iFuncName); n = atoi(argv[2]); maxEvals = atoi(argv[3]); numIter = atoi(argv[4]); type = getType(argv[5]); m = atoi(argv[6]); if (iFuncNumb == -1) { printf("Função %s não existe... \n\n", iFuncName); return 1; } if (type == -1) { printf("Tipo %s não existe... \n\n", argv[4]); return 1; } if (m > n) { printf("'m' deve ser menor ou igual a 'n' \n\n"); return 1; } for (int i = 0; i < 7; i++) { mediaGaps[i] = 0.0; } if (iFuncNumb == Funcao::PAR_SPHERE) { if (!init_gpu(n)) { fprintf(stderr, "Erro inicializando GPU\n"); return 2; } } srand(time(NULL)); for (int i = 1; i <= numIter; i++) { sucess = false; printf("[%d]Iteracao \n", i); initialTime = getMilisegundos(); mgrasp = initMGrasp(iFuncNumb, n, &func); if (type == PURO) { printf("Puro... \n"); sucess = mgrasp->start(false, m, maxEvals); } else if (type == HIBRIDO) { printf("Hibrido... \n"); sucess = mgrasp->start(true, m, maxEvals); } else { printf("BFGS... \n"); x = new real[n]; mgrasp->unifRandom(x); xBFGS.setbounds(1, n); for (int j = 0; j < n; j++) { xBFGS(j+1) = x[j]; } lbfgs = new LBFGS(func, false); lbfgs->minimize(n, m, xBFGS, epsg, epsf, epsx, maxiters, info); sucess = true; printf("Info = %d \n", info); } finalTime = getMilisegundos(); printf("\tTime = %lld \n", finalTime - initialTime); if (maxEvals) { gaps = mgrasp->getGaps(); for (int j = 0; j < 7; j++){ mediaGaps[j] += gaps[j]; printf("[%d]Gap[%d] = %lf (%lf)... \n", i, j, mediaGaps[j], gaps[j]); } } else if (sucess) { contSucess++; fnEvals = func->getFnEvals(); gradEvals = func->getGradEvals(); deltaTime = finalTime - initialTime; mediaFnEvals += fnEvals; mediaGradEvals += gradEvals; mediaTime += deltaTime; printf("[%d]Sucesso(%d) = %ld (%d)... \n", i, contSucess, mediaFnEvals, fnEvals); printf("[%d]Grad(%d) = %ld (%d)... \n", i, contSucess, mediaGradEvals, gradEvals); } printf("\n"); delete mgrasp; delete func; } if (maxEvals) { saveGaps(mediaGaps, numIter); } else { printf("Num execucoes com sucesso... = %d \n", contSucess); printf("Media de avaliacao da funcao... = %d \n", (long)((real)mediaFnEvals/contSucess)); printf("Media de avaliacao do gradiente... = %d \n", (long)((real)mediaGradEvals/contSucess)); printf("Media de tempo... = %d \n", (long)((real)mediaTime/contSucess)); } if (iFuncNumb == Funcao::PAR_SPHERE) finalize_gpu(); return 0; }