int main(int argc, char *argv[]) { int mype =0, numpes=0, ierr; long long location; double xarray[10]; ierr = L7_Init(&mype, &numpes, &argc, argv); if (mype == 0) printf("\n\t\tStarting the L7 tests\n\n"); if (mype == 0){ if (ierr != L7_OK){ printf(" Error with L7_Init\n"); } else{ printf(" PASSED L7_Init\n"); } } broadcast_test(); reduction_test(); update_test(); #ifdef XXX location = L7_Address(xarray); if (mype == 0){ if (location != xarray){ printf(" Error with L7_Address\n"); } else{ printf(" PASSED L7_Address\n"); } } #endif if (mype == 0) printf("\n\t\tRunning the L7_Terminate test\n\n"); ierr = L7_Terminate(); if (mype == 0){ if (ierr != L7_OK){ printf(" Error with L7_Terminate\n"); } else{ printf(" PASSED L7_Terminate\n"); } } if (mype == 0) printf("\n\t\tFinished the L7 tests\n\n"); exit(0); }
int main (int argc, char **argv) { // Process command-line arguments, if any. int mype=0; int numpe=0; int do_quo_setup=0; int lttrace_on=0; L7_Init(&mype, &numpe, &argc, argv, do_quo_setup, lttrace_on); int nx = 4; int ny = 4; int levmx = 2; int ndim = 2; int boundary = 1; int parallel_in = 1; int do_gpu_calc = 0; double circ_radius = 6.0; circ_radius *= (double)nx / 128.0; // scaling radius for problem size mesh = new Mesh(nx, ny, levmx, ndim, boundary, parallel_in, do_gpu_calc); mesh->init(nx, ny, circ_radius, initial_order, do_gpu_calc); MallocPlus state_memory; real_t *density = (real_t *)state_memory.memory_malloc(mesh->ncells, sizeof(real_t), "density"); for (uint ic=0; ic<mesh->ncells; ic++){ density[ic]=1.0; } mesh->do_load_balance_local(mesh->ncells, NULL, state_memory); int ncells_test = mesh->ncells_global/mesh->numpe; if ((int)mesh->ncells_global%mesh->numpe > mype) ncells_test++; int ierr = 0; if (ncells_test != (int)mesh->ncells) ierr = 1; int ierr_global = 0; MPI_Allreduce(&ierr, &ierr_global, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); //printf("%d: DEBUG -- ncells %ld ncells_global %ld ncells_test %d ierr %d ierr_global %d\n",mype,mesh->ncells,mesh->ncells_global,ncells_test,ierr,ierr_global); if (mype == 0){ if (ierr_global){ printf(" Error with load balance\n"); } else{ printf(" PASSED load balance\n"); } } L7_Terminate(); exit(0); }
int main(int argc, char **argv) { // Process command-line arguments, if any. int mype=0; int numpe=0; parseInput(argc, argv); L7_Init(&mype, &numpe, &argc, argv); #if 1 // SKG make things sane for debugging signal(SIGSEGV, SIG_DFL); #endif struct timeval tstart_setup; cpu_timer_start(&tstart_setup); real_t circ_radius = 6.0; // Scale the circle appropriately for the mesh size. circ_radius = circ_radius * (real_t) nx / 128.0; int boundary = 1; int parallel_in = 1; // figure out the max number of threads that can be spawned if (0 == mype) { int nt = omp_get_max_threads(); printf("--- num openmp threads: %d\n", nt); fflush(stdout); } mesh = new Mesh(nx, ny, levmx, ndim, boundary, parallel_in, do_gpu_calc); if (DEBUG) { //if (mype == 0) mesh->print(); char filename[10]; sprintf(filename,"out%1d",mype); mesh->fp=fopen(filename,"w"); //mesh->print_local(); } mesh->init(nx, ny, circ_radius, initial_order, do_gpu_calc); size_t &ncells = mesh->ncells; size_t &ncells_global = mesh->ncells_global; int &noffset = mesh->noffset; state = new State(mesh); state->init(do_gpu_calc); vector<int> &nsizes = mesh->nsizes; vector<int> &ndispl = mesh->ndispl; vector<spatial_t> &x = mesh->x; vector<spatial_t> &dx = mesh->dx; vector<spatial_t> &y = mesh->y; vector<spatial_t> &dy = mesh->dy; nsizes.resize(numpe); ndispl.resize(numpe); int ncells_int = ncells; MPI_Allgather(&ncells_int, 1, MPI_INT, &nsizes[0], 1, MPI_INT, MPI_COMM_WORLD); ndispl[0]=0; for (int ip=1; ip<numpe; ip++){ ndispl[ip] = ndispl[ip-1] + nsizes[ip-1]; } noffset = ndispl[mype]; state->resize(ncells); state->fill_circle(circ_radius, 100.0, 7.0); x.clear(); dx.clear(); y.clear(); dy.clear(); // Kahan-type enhanced precision sum implementation. double H_sum = state->mass_sum(enhanced_precision_sum); if (mype == 0) printf ("Mass of initialized cells equal to %14.12lg\n", H_sum); H_sum_initial = H_sum; double cpu_time_main_setup = cpu_timer_stop(tstart_setup); mesh->parallel_timer_output("CPU: setup time time was",cpu_time_main_setup, 0); long long mem_used = memstats_memused(); if (mem_used > 0) { mesh->parallel_memory_output("Memory used in startup ",mem_used, 0); mesh->parallel_memory_output("Memory peak in startup ",memstats_mempeak(), 0); mesh->parallel_memory_output("Memory free at startup ",memstats_memfree(), 0); mesh->parallel_memory_output("Memory available at startup ",memstats_memtotal(), 0); } if (mype == 0) { printf("Iteration 0 timestep n/a Sim Time 0.0 cells %ld Mass Sum %14.12lg\n", ncells_global, H_sum); } for (int i = 0; i < MESH_COUNTER_SIZE; i++){ mesh->cpu_counters[i]=0; } for (int i = 0; i < MESH_TIMER_SIZE; i++){ mesh->cpu_timers[i]=0.0; } #ifdef HAVE_GRAPHICS #ifdef HAVE_OPENGL set_mysize(ncells_global); //vector<state_t> H_global; //vector<spatial_t> x_global; //vector<spatial_t> dx_global; //vector<spatial_t> y_global; //vector<spatial_t> dy_global; //vector<int> proc_global; if (mype == 0){ H_global.resize(ncells_global); x_global.resize(ncells_global); dx_global.resize(ncells_global); y_global.resize(ncells_global); dy_global.resize(ncells_global); proc_global.resize(ncells_global); } MPI_Gatherv(&x[0], nsizes[mype], MPI_SPATIAL_T, &x_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, 0, MPI_COMM_WORLD); MPI_Gatherv(&dx[0], nsizes[mype], MPI_SPATIAL_T, &dx_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, 0, MPI_COMM_WORLD); MPI_Gatherv(&y[0], nsizes[mype], MPI_SPATIAL_T, &y_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, 0, MPI_COMM_WORLD); MPI_Gatherv(&dy[0], nsizes[mype], MPI_SPATIAL_T, &dy_global[0], &nsizes[0], &ndispl[0], MPI_SPATIAL_T, 0, MPI_COMM_WORLD); MPI_Gatherv(&state->H[0], nsizes[mype], MPI_STATE_T, &H_global[0], &nsizes[0], &ndispl[0], MPI_STATE_T, 0, MPI_COMM_WORLD); set_cell_data(&H_global[0]); set_cell_coordinates(&x_global[0], &dx_global[0], &y_global[0], &dy_global[0]); if (view_mode == 0) { mesh->proc.resize(ncells); for (size_t ii = 0; ii<ncells; ii++){ mesh->proc[ii] = mesh->mype; } MPI_Gatherv(&mesh->proc[0], nsizes[mype], MPI_INT, &proc_global[0], &nsizes[0], &ndispl[0], MPI_C_REAL, 0, MPI_COMM_WORLD); } set_cell_proc(&proc_global[0]); #endif #ifdef HAVE_MPE set_mysize(ncells); set_cell_data(&state->H[0]); set_cell_coordinates(&mesh->x[0], &mesh->dx[0], &mesh->y[0], &mesh->dy[0]); set_cell_proc(&mesh->proc[0]); #endif set_window((float)mesh->xmin, (float)mesh->xmax, (float)mesh->ymin, (float)mesh->ymax); set_viewmode(view_mode); set_outline((int)outline); init_display(&argc, argv, "Shallow Water"); set_circle_radius(circle_radius); draw_scene(); if (verbose) sleep(5); sleep(2); // Set flag to show mesh results rather than domain decomposition. view_mode = 1; // Clear superposition of circle on grid output. circle_radius = -1.0; MPI_Barrier(MPI_COMM_WORLD); cpu_timer_start(&tstart); set_idle_function(&do_calc); start_main_loop(); #else MPI_Barrier(MPI_COMM_WORLD); cpu_timer_start(&tstart); for (int it = 0; it < 10000000; it++) { do_calc(); } #endif return 0; }