int main(int argc, char *argv[]) { if ((argc < 3) || (strncmp(argv[1],"-h",strlen("-h")) == 0) || (strncmp(argv[1],"-h",strlen("-H")) == 0)){ printf("usage: VOLREND num_processes input_file\n"); exit(-1); } MAIN_INITENV(, SH_MEM_AMT); THREAD_INIT_FREE(); num_nodes = atol(argv[1]); strcpy(filename,argv[2]); if (argc == 4) { if (strncmp(argv[3],"-a",strlen("-a")) == 0) adaptive = YES; else { printf("usage: VOLREND num_processes input_file [-a] \n"); exit(-1); } } Frame(); /* if (num_nodes > 1) WAIT_FOR_END(num_nodes-1);*/ if (num_nodes > 1) WAIT_FOR_END(num_nodes); MAIN_END; }
int main (int argc, string argv[]) #endif { #ifndef SIM_SOCLIB long c; while ((c = getopt(argc, argv, "h")) != -1) { switch(c) { case 'h': Help(); exit(-1); break; default: fprintf(stderr, "Only valid option is \"-h\".\n"); exit(-1); break; } } #endif Global = NULL; initparam(defv); startrun(); initoutput(); tab_init(); Global->tracktime = 0; Global->partitiontime = 0; Global->treebuildtime = 0; Global->forcecalctime = 0; Global->current_id = 0; CLOCK(Global->computestart); printf("COMPUTESTART = %12lu\n",Global->computestart); CREATE(SlaveStart, NPROC); WAIT_FOR_END(NPROC); CLOCK(Global->computeend); printf("COMPUTEEND = %12lu\n",Global->computeend); printf("COMPUTETIME = %12lu\n",Global->computeend - Global->computestart); printf("TRACKTIME = %12lu\n",Global->tracktime); printf("PARTITIONTIME = %12lu\t%5.2f\n",Global->partitiontime, ((float)Global->partitiontime)/Global->tracktime); printf("TREEBUILDTIME = %12lu\t%5.2f\n",Global->treebuildtime, ((float)Global->treebuildtime)/Global->tracktime); printf("FORCECALCTIME = %12lu\t%5.2f\n",Global->forcecalctime, ((float)Global->forcecalctime)/Global->tracktime); printf("RESTTIME = %12lu\t%5.2f\n", Global->tracktime - Global->partitiontime - Global->treebuildtime - Global->forcecalctime, ((float)(Global->tracktime-Global->partitiontime- Global->treebuildtime-Global->forcecalctime))/ Global->tracktime); MAIN_END; }
int main(int argc,char **argv) { int i,j,p,n; double **a,*b, count=1.0; unsigned int t1,t2; MAIN_INITENV if (argc!=3) { printf("Usage: pbksb P N\nAborting...\n"); exit(0); } gm = (GM*)G_MALLOC(sizeof(GM)); p = gm->p = atoi(argv[1]); gm->n = atoi(argv[2]); assert(p > 0); assert(p <= 8); n = gm->n; a = gm->a = (double**)G_MALLOC(n*sizeof(double*)); for(i = 0; i < n; i++) { a[i] = (double*)G_MALLOC(n*sizeof(double)); for(j = i;j < n;j++){ a[i][j] = count; count++; } } //----------------------------------------------- // Create 1D array a_prime and map a to a_prime //----------------------------------------------- gm->a_prime = (double*)G_MALLOC((n+1)*n/2*sizeof(double)) mapping(); b = gm->b = (double*)G_MALLOC(n*sizeof(double)); for(i = 0; i < n; i++) { b[i] = count; count++; } gm->pse = (char*)G_MALLOC(n*sizeof(char)); for(i = 0; i < n; i++) CLEARPAUSE(gm->pse[i]) for(i = 0; i < p-1; i++) CREATE(pbksb) CLOCK(t1) pbksb(); WAIT_FOR_END(p-1) CLOCK(t2) printf("Elapsed: %u us\n",t2-t1); for(i = 0; i < n; i++) printf("%lf ", gm->b[i]); printf("\n"); for(i = 0; i < n; i++) G_FREE(a[i],n*sizeof(double)) G_FREE(a,n*sizeof(double*)) G_FREE(b,n*sizeof(double)) G_FREE(gm->a_prime, (n+1)*n/2*sizeof(double)) MAIN_END return 0; }
int main (int argc, char **argv) { int i, j, p, n; int total; char **maxBoard; char **initialBoard; unsigned int t1, t2, t3; MAIN_INITENV //Enforce arguments if (argc != 2) { printf("Usage: nqueens-seq <N>\nAborting.\n"); exit(0); } gm = (GM*)G_MALLOC(sizeof(GM)); gm->p = 8; gm->n = atoi(argv[1]); n = gm->n; gm->total = 0; gm->maxBoard = (char**)G_MALLOC(n*sizeof(char*)); gm->initialBoard = (char**)G_MALLOC(n*sizeof(char*)); gm->global_max_profit = 0; for (i = 0; i < n; i++) { gm->maxBoard[i] = (char*)G_MALLOC(n*sizeof(char)); gm->initialBoard[i] = (char*)G_MALLOC(n*sizeof(char)); for (j = i; j < n; j++) { gm->maxBoard[i][j] = 0; gm->initialBoard[i][j] = 0; } } CLOCK(t1) for(i = 0; i < n; i++) CREATE(nqueens_wrapper) WAIT_FOR_END(n); CLOCK(t2) printf("Printing maximum profit board\n"); printBoard(gm->maxBoard, gm->global_max_profit); CLOCK(t3) printf("Computation time: %u microseconds\n", t2-t1); printf("Printing time: %u microseconds\n", t3-t2); MAIN_END return 0; }
int main(int argc, char *argv[]) { #ifdef ENABLE_PARSEC_HOOKS __parsec_bench_begin (__splash2_volrend); #endif if ((argc < 4) || (strncmp(argv[1],"-h",strlen("-h")) == 0) || (strncmp(argv[1],"-h",strlen("-H")) == 0)) { printf("usage: VOLREND num_processes input_file ROTATE_STEPS\n"); exit(-1); } MAIN_INITENV(, SH_MEM_AMT); num_nodes = atol(argv[1]); ROTATE_STEPS = atoi(argv[3]); strcpy(filename,argv[2]); if (argc == 5) { if (strncmp(argv[4],"-a",strlen("-a")) == 0) adaptive = YES; else { printf("usage: VOLREND num_processes input_file ROTATE_STEPS [-a] \n"); exit(-1); } } Frame(); /* if (num_nodes > 1) WAIT_FOR_END(num_nodes-1);*/ if (num_nodes > 1) { WAIT_FOR_END(num_nodes); #ifdef ENABLE_PARSEC_HOOKS __parsec_roi_end(); #endif } MAIN_END; #ifdef ENABLE_PARSEC_HOOKS __parsec_bench_end(); #endif }
int main (int argc, char *argv[]) { long c; extern char *optarg; CLOCK(starttime); while ((c = getopt(argc, argv, "osh")) != -1) { switch(c) { case 'o': do_output = 1; break; case 's': do_stats = 1; break; case 'h': Help(); break; } } MAIN_INITENV(,40000000); GetArguments(); printf("Number of processors: %d\n", Number_Of_Processors); THREAD_INIT(Number_Of_Processors); InitGlobalMemory(); InitExpTables(); CreateDistribution(Cluster, Model); /* for (i = 1; i < Number_Of_Processors; i++) { CREATE(ParallelExecute); } ParallelExecute(); WAIT_FOR_END(Number_Of_Processors - 1);*/ CREATE(ParallelExecute, Number_Of_Processors); WAIT_FOR_END(Number_Of_Processors); printf("Finished FMM\n"); PrintTimes(); if (do_output) { PrintAllParticles(); } MAIN_END; }
int main(int argc, char *argv[]) { long i; long j; long xextra; long xportion; long yextra; long yportion; long lower; double procsqrt; long k; long logtest; long my_num; unsigned long computeend; double min_total; double max_total; double avg_total; double min_multi; double max_multi; double avg_multi; double min_frac; double max_frac; double avg_frac; extern char *optarg; long ch; unsigned long start; CLOCK(start) while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) { switch(ch) { case 'n': im = atoi(optarg); if (im > IMAX) { printerr("Max grid size exceeded\n"); exit(-1); } if (log_2(im-2) == -1) { printerr("Grid must be ((power of 2)+2) in each dimension\n"); exit(-1); } break; case 'p': nprocs = atoi(optarg); if (nprocs < 1) { printerr("P must be >= 1\n"); exit(-1); } if (log_2(nprocs) == -1) { printerr("P must be a power of 2\n"); exit(-1); } break; case 'e': tolerance = atof(optarg); break; case 'r': res = atof(optarg); break; case 't': dtau = atof(optarg); break; case 's': do_stats = !do_stats; break; case 'o': do_output = !do_output; break; case 'h': printf("Usage: OCEAN <options>\n\n"); printf("options:\n"); printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n"); printf(" -pP : P = number of processors. P must be power of 2.\n"); printf(" -eE : E = error tolerance for iterative relaxation.\n"); printf(" -rR : R = distance between grid points in meters.\n"); printf(" -tT : T = timestep in seconds.\n"); printf(" -s : Print timing statistics.\n"); printf(" -o : Print out relaxation residual values.\n"); printf(" -h : Print out command line options.\n\n"); printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n", DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T); exit(0); break; } } MAIN_INITENV(,60000000) logtest = im-2; numlev = 1; while (logtest != 1) { if (logtest%2 != 0) { printerr("Cannot determine number of multigrid levels\n"); exit(-1); } logtest = logtest / 2; numlev++; } if (numlev > MAX_LEVELS) { printerr("Max grid levels exceeded for multigrid\n"); exit(-1); } jm = im; printf("\n"); printf("Ocean simulation with W-cycle multigrid solver\n"); printf(" Processors : %1ld\n",nprocs); printf(" Grid size : %1ld x %1ld\n",im,jm); printf(" Grid resolution (meters) : %0.2f\n",res); printf(" Time between relaxations (seconds) : %0.0f\n",dtau); printf(" Error tolerance : %0.7g\n",tolerance); printf("\n"); gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private)); for (i=0;i<nprocs;i++) { gp[i].multi_time = 0; gp[i].total_time = 0; } global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct)); fields = (struct fields_struct *) G_MALLOC(sizeof(struct fields_struct)); fields2 = (struct fields2_struct *) G_MALLOC(sizeof(struct fields2_struct)); wrk1 = (struct wrk1_struct *) G_MALLOC(sizeof(struct wrk1_struct)); wrk3 = (struct wrk3_struct *) G_MALLOC(sizeof(struct wrk3_struct)); wrk2 = (struct wrk2_struct *) G_MALLOC(sizeof(struct wrk2_struct)); wrk4 = (struct wrk4_struct *) G_MALLOC(sizeof(struct wrk4_struct)); wrk6 = (struct wrk6_struct *) G_MALLOC(sizeof(struct wrk6_struct)); wrk5 = (struct wrk5_struct *) G_MALLOC(sizeof(struct wrk5_struct)); frcng = (struct frcng_struct *) G_MALLOC(sizeof(struct frcng_struct)); iter = (struct iter_struct *) G_MALLOC(sizeof(struct iter_struct)); guess = (struct guess_struct *) G_MALLOC(sizeof(struct guess_struct)); multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct)); locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct)); bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct)); LOCKINIT(locks->idlock) LOCKINIT(locks->psiailock) LOCKINIT(locks->psibilock) LOCKINIT(locks->donelock) LOCKINIT(locks->error_lock) LOCKINIT(locks->bar_lock) BARINIT(bars->iteration) BARINIT(bars->gsudn) BARINIT(bars->p_setup) BARINIT(bars->p_redph) BARINIT(bars->p_soln) BARINIT(bars->p_subph) BARINIT(bars->sl_prini) BARINIT(bars->sl_psini) BARINIT(bars->sl_onetime) BARINIT(bars->sl_phase_1) BARINIT(bars->sl_phase_2) BARINIT(bars->sl_phase_3) BARINIT(bars->sl_phase_4) BARINIT(bars->sl_phase_5) BARINIT(bars->sl_phase_6) BARINIT(bars->sl_phase_7) BARINIT(bars->sl_phase_8) BARINIT(bars->sl_phase_9) BARINIT(bars->sl_phase_10) BARINIT(bars->error_barrier) imx[numlev-1] = im; jmx[numlev-1] = jm; lev_res[numlev-1] = res; lev_tol[numlev-1] = tolerance; multi->err_multi = 0.0; multi->numspin = 0; for (i=0;i<nprocs;i++) { multi->spinflag[i] = 0; } for (i=numlev-2;i>=0;i--) { imx[i] = ((imx[i+1] - 2) / 2) + 2; jmx[i] = ((jmx[i+1] - 2) / 2) + 2; lev_res[i] = lev_res[i+1] * 2; } xprocs = 0; yprocs = 0; procsqrt = sqrt((double) nprocs); j = (long) procsqrt; while ((xprocs == 0) && (j > 0)) { k = nprocs / j; if (k * j == nprocs) { if (k > j) { xprocs = j; yprocs = k; } else { xprocs = k; yprocs = j; } } j--; } if (xprocs == 0) { printerr("Could not find factors for subblocking\n"); exit(-1); } /* Determine starting coord and number of points to process in */ /* each direction */ for (i=0;i<numlev;i++) { xportion = (jmx[i] - 2) / xprocs; xextra = (jmx[i] - 2) % xprocs; for (j=0;j<xprocs;j++) { if (xextra == 0) { for (k=0;k<yprocs;k++) { gp[k*xprocs+j].rel_start_x[i] = j * xportion + 1; gp[k*xprocs+j].rel_num_x[i] = xportion; } } else { if (j + 1 > xextra) { for (k=0;k<yprocs;k++) { lower = xextra * (xportion + 1); gp[k*xprocs+j].rel_start_x[i] = lower + (j - xextra) * xportion + 1; gp[k*xprocs+j].rel_num_x[i] = xportion; } } else { for (k=0;k<yprocs;k++) { gp[k*xprocs+j].rel_start_x[i] = j * (xportion + 1) + 1; gp[k*xprocs+j].rel_num_x[i] = xportion + 1; } } } } yportion = (imx[i] - 2) / yprocs; yextra = (imx[i] - 2) % yprocs; for (j=0;j<yprocs;j++) { if (yextra == 0) { for (k=0;k<xprocs;k++) { gp[j*xprocs+k].rel_start_y[i] = j * yportion + 1; gp[j*xprocs+k].rel_num_y[i] = yportion; } } else { if (j + 1 > yextra) { for (k=0;k<xprocs;k++) { lower = yextra * (yportion + 1); gp[j*xprocs+k].rel_start_y[i] = lower + (j - yextra) * yportion + 1; gp[j*xprocs+k].rel_num_y[i] = yportion; } } else { for (k=0;k<xprocs;k++) { gp[j*xprocs+k].rel_start_y[i] = j * (yportion + 1) + 1; gp[j*xprocs+k].rel_num_y[i] = yportion + 1; } } } } } i_int_coeff[0] = 0.0; j_int_coeff[0] = 0.0; for (i=0;i<numlev;i++) { i_int_coeff[i] = 1.0/(imx[i]-1); j_int_coeff[i] = 1.0/(jmx[i]-1); } for (my_num=0;my_num<nprocs;my_num++) { for (i=0;i<numlev;i++) { gp[my_num].rlist[i] = gp[my_num].rel_start_y[i]; gp[my_num].rljst[i] = gp[my_num].rel_start_x[i]; gp[my_num].rlien[i] = gp[my_num].rlist[i] + gp[my_num].rel_num_y[i] - 1; gp[my_num].rljen[i] = gp[my_num].rljst[i] + gp[my_num].rel_num_x[i] - 1; gp[my_num].iist[i] = gp[my_num].rel_start_y[i]; gp[my_num].ijst[i] = gp[my_num].rel_start_x[i]; gp[my_num].iien[i] = gp[my_num].iist[i] + gp[my_num].rel_num_y[i] - 1; gp[my_num].ijen[i] = gp[my_num].ijst[i] + gp[my_num].rel_num_x[i] - 1; gp[my_num].pist[i] = gp[my_num].rel_start_y[i]; gp[my_num].pjst[i] = gp[my_num].rel_start_x[i]; gp[my_num].pien[i] = gp[my_num].pist[i] + gp[my_num].rel_num_y[i] - 1; gp[my_num].pjen[i] = gp[my_num].pjst[i] + gp[my_num].rel_num_x[i] - 1; if (gp[my_num].pist[i] == 1) { gp[my_num].pist[i] = 0; } if (gp[my_num].pjst[i] == 1) { gp[my_num].pjst[i] = 0; } if (gp[my_num].pien[i] == imx[i] - 2) { gp[my_num].pien[i] = imx[i]-1; } if (gp[my_num].pjen[i] == jmx[i] - 2) { gp[my_num].pjen[i] = jmx[i]-1; } if (gp[my_num].rlist[i] % 2 == 0) { gp[my_num].eist[i] = gp[my_num].rlist[i]; gp[my_num].oist[i] = gp[my_num].rlist[i] + 1; } else { gp[my_num].eist[i] = gp[my_num].rlist[i] + 1; gp[my_num].oist[i] = gp[my_num].rlist[i]; } if (gp[my_num].rljst[i] % 2 == 0) { gp[my_num].ejst[i] = gp[my_num].rljst[i]; gp[my_num].ojst[i] = gp[my_num].rljst[i] + 1; } else { gp[my_num].ejst[i] = gp[my_num].rljst[i] + 1; gp[my_num].ojst[i] = gp[my_num].rljst[i]; } if (gp[my_num].rlien[i] == imx[i]-2) { gp[my_num].rlien[i] = gp[my_num].rlien[i] - 1; if (gp[my_num].rlien[i] % 2 == 0) { gp[my_num].ojest[i] = gp[my_num].ojst[i]; gp[my_num].ejest[i] = gp[my_num].ejst[i]; } else { gp[my_num].ojest[i] = gp[my_num].ejst[i]; gp[my_num].ejest[i] = gp[my_num].ojst[i]; } } if (gp[my_num].rljen[i] == jmx[i]-2) { gp[my_num].rljen[i] = gp[my_num].rljen[i] - 1; if (gp[my_num].rljen[i] % 2 == 0) { gp[my_num].oiest[i] = gp[my_num].oist[i]; gp[my_num].eiest[i] = gp[my_num].eist[i]; } else { gp[my_num].oiest[i] = gp[my_num].eist[i]; gp[my_num].eiest[i] = gp[my_num].oist[i]; } } } } /* initialize constants and variables id is a global shared variable that has fetch-and-add operations performed on it by processes to obtain their pids. */ global->id = 0; global->psibi = 0.0; pi = atan(1.0); pi = 4.*pi; factjacob = -1./(12.*res*res); factlap = 1./(res*res); eig2 = -h*f0*f0/(h1*h3*gpr); jmm1 = jm-1 ; ysca = ((double) jmm1)*res ; for (i=0;i<im;i++) { for (j=0;j<jm;j++) { guess->oldga[i][j] = 0.0; guess->oldgb[i][j] = 0.0; } } if (do_output) { printf(" MULTIGRID OUTPUTS\n"); } CREATE(slave, nprocs); WAIT_FOR_END(nprocs); CLOCK(computeend) printf("\n"); printf(" PROCESS STATISTICS\n"); printf(" Total Multigrid Multigrid\n"); printf(" Proc Time Time Fraction\n"); printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time); if (do_stats) { min_total = max_total = avg_total = gp[0].total_time; min_multi = max_multi = avg_multi = gp[0].multi_time; min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time; for (i=1;i<nprocs;i++) { if (gp[i].total_time > max_total) { max_total = gp[i].total_time; } if (gp[i].total_time < min_total) { min_total = gp[i].total_time; } if (gp[i].multi_time > max_multi) { max_multi = gp[i].multi_time; } if (gp[i].multi_time < min_multi) { min_multi = gp[i].multi_time; } if (gp[i].multi_time/gp[i].total_time > max_frac) { max_frac = gp[i].multi_time/gp[i].total_time; } if (gp[i].multi_time/gp[i].total_time < min_frac) { min_frac = gp[i].multi_time/gp[i].total_time; } avg_total += gp[i].total_time; avg_multi += gp[i].multi_time; avg_frac += gp[i].multi_time/gp[i].total_time; } avg_total = avg_total / nprocs; avg_multi = avg_multi / nprocs; avg_frac = avg_frac / nprocs; for (i=1;i<nprocs;i++) { printf(" %3ld %15.0f %15.0f %10.3f\n", i, gp[i].total_time, gp[i].multi_time, gp[i].multi_time/gp[i].total_time); } printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac); printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac); printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac); } printf("\n"); global->starttime = start; printf(" TIMING INFORMATION\n"); printf("Start time : %16lu\n", global->starttime); printf("Initialization finish time : %16lu\n", global->trackstart); printf("Overall finish time : %16lu\n", computeend); printf("Total time with initialization : %16lu\n", computeend-global->starttime); printf("Total time without initialization : %16lu\n", computeend-global->trackstart); printf(" (excludes first timestep)\n"); printf("\n"); MAIN_END }
int main(int argc, CHAR *argv[]) { INT i; UINT begin; UINT end; UINT lapsed; MATRIX vtrans, Vinv; /* View transformation and inverse. */ /* * First, process command line arguments. */ i = 1; while ((i < argc) && (argv[i][0] == '-')) { switch (argv[i][1]) { case '?': case 'h': case 'H': Usage(); exit(1); case 'a': case 'A': AntiAlias = TRUE; if (argv[i][2] != '\0') { NumSubRays = atoi(&argv[i][2]); } else { NumSubRays = atoi(&argv[++i][0]); } break; case 'm': if (argv[i][2] != '\0') { MaxGlobMem = atoi(&argv[i][2]); } else { MaxGlobMem = atoi(&argv[++i][0]); } break; case 'p': if (argv[i][2] != '\0') { nprocs = atoi(&argv[i][2]); } else { nprocs = atoi(&argv[++i][0]); } break; case 's': case 'S': dostats = TRUE; break; default: fprintf(stderr, "%s: Invalid option \'%c\'.\n", ProgName, argv[i][0]); exit(1); } i++; } if (i == argc) { Usage(); exit(1); } /* * Make sure nprocs is within valid range. */ if (nprocs < 1 || nprocs > MAX_PROCS) { fprintf(stderr, "%s: Valid range for #processors is [1, %d].\n", ProgName, MAX_PROCS); exit(1); } /* * Print command line parameters. */ printf("\n"); printf("Number of processors: \t%ld\n", nprocs); printf("Global shared memory size:\t%ld MB\n", MaxGlobMem); printf("Samples per pixel: \t%ld\n", NumSubRays); printf("\n"); /* * Initialize the shared memory environment and request the total * amount of amount of shared memory we might need. This * includes memory for the database, grid, and framebuffer. */ MaxGlobMem <<= 20; /* Convert MB to bytes. */ MAIN_INITENV(,MaxGlobMem + 512*1024) THREAD_INIT_FREE(); gm = (GMEM *)G_MALLOC(sizeof(GMEM)); /* * Perform shared environment initializations. */ gm->nprocs = nprocs; gm->pid = 0; gm->rid = 1; BARINIT(gm->start, nprocs) LOCKINIT(gm->pidlock) LOCKINIT(gm->ridlock) LOCKINIT(gm->memlock) ALOCKINIT(gm->wplock, nprocs) /* POSSIBLE ENHANCEMENT: Here is where one might distribute the raystruct data structure across physically distributed memories as desired. */ if (!GlobalHeapInit(MaxGlobMem)) { fprintf(stderr, "%s: Cannot initialize global heap.\n", ProgName); exit(1); } /* * Initialize HUG parameters, read environment and geometry files. */ Huniform_defaults(); ReadEnvFile(/* *argv*/argv[i]); ReadGeoFile(GeoFileName); OpenFrameBuffer(); /* * Compute view transform and its inverse. */ CreateViewMatrix(); MatrixCopy(vtrans, View.vtrans); MatrixInverse(Vinv, vtrans); MatrixCopy(View.vtransInv, Vinv); /* * Print out what we have so far. */ printf("Number of primitive objects: \t%ld\n", prim_obj_cnt); printf("Number of primitive elements:\t%ld\n", prim_elem_cnt); /* * Preprocess database into hierarchical uniform grid. */ if (TraversalType == TT_HUG) BuildHierarchy_Uniform(); /* * Now create slave processes. */ CLOCK(begin) CREATE(StartRayTrace, gm->nprocs); WAIT_FOR_END(gm->nprocs); CLOCK(end) /* * We are finished. Clean up, print statistics and run time. */ CloseFrameBuffer(PicFileName); PrintStatistics(); lapsed = (end - begin) & 0x7FFFFFFF; printf("TIMING STATISTICS MEASURED BY MAIN PROCESS:\n"); printf(" Overall start time %20lu\n", begin); printf(" Overall end time %20lu\n", end); printf(" Total time with initialization %20lu\n", lapsed); printf(" Total time without initialization %20lu\n", end - gm->par_start_time); if (dostats) { unsigned totalproctime, maxproctime, minproctime; printf("\n\n\nPER-PROCESS STATISTICS:\n"); printf("%20s%20s\n","Proc","Time"); printf("%20s%20s\n\n","","Tracing Rays"); for (i = 0; i < gm->nprocs; i++) printf("%20ld%20ld\n",i,gm->partime[i]); totalproctime = gm->partime[0]; minproctime = gm->partime[0]; maxproctime = gm->partime[0]; for (i = 1; i < gm->nprocs; i++) { totalproctime += gm->partime[i]; if (gm->partime[i] > maxproctime) maxproctime = gm->partime[i]; if (gm->partime[i] < minproctime) minproctime = gm->partime[i]; } printf("\n\n%20s%20d\n","Max = ",maxproctime); printf("%20s%20d\n","Min = ",minproctime); printf("%20s%20d\n","Avg = ",(int) (((double) totalproctime) / ((double) (1.0 * gm->nprocs)))); } MAIN_END }
void start_radiosity(long val) #endif { static long state = 0 ; long i; long total_rad_time, max_rad_time, min_rad_time; long total_refine_time, max_refine_time, min_refine_time; long total_wait_time, max_wait_time, min_wait_time; long total_vertex_time, max_vertex_time, min_vertex_time; #if defined(SGI_GL) && defined(GL_NASA) long val ; val = g_get_choice_val( ap, &choices[0] ) ; #endif if( val == CHOICE_RAD_RUN ) { if( state == -1 ) { printf( "Please reset first\007\n" ) ; return ; } /* Time stamp */ CLOCK( time_rad_start ) ; global->index = 0; /* Create slave processes */ for (i = 0 ; i < n_processors ; i++ ) { taskqueue_id[i] = assign_taskq(0) ; } /* And start processing */ CREATE(radiosity, n_processors); WAIT_FOR_END(n_processors); /* Time stamp */ CLOCK( time_rad_end ); /* Print out running time */ /* Print out running time */ printf("TIMING STATISTICS MEASURED BY MAIN PROCESS:\n"); print_running_time(0); if (dostats) { printf("\n\n\nPER-PROCESS STATISTICS:\n"); printf("%8s%20s%20s%12s%12s\n","Proc","Total","Refine","Wait","Smooth"); printf("%8s%20s%20s%12s%12s\n\n","","Time","Time","Time","Time") ; for (i = 0; i < n_processors; i++) printf("%8ld%20lu%20lu%12lu%12lu\n",i,timing[i]->rad_time, timing[i]->refine_time, timing[i]->wait_time, timing[i]->vertex_time); total_rad_time = timing[0]->rad_time; max_rad_time = timing[0]->rad_time; min_rad_time = timing[0]->rad_time; total_refine_time = timing[0]->refine_time; max_refine_time = timing[0]->refine_time; min_refine_time = timing[0]->refine_time; total_wait_time = timing[0]->wait_time; max_wait_time = timing[0]->wait_time; min_wait_time = timing[0]->wait_time; total_vertex_time = timing[0]->vertex_time; max_vertex_time = timing[0]->vertex_time; min_vertex_time = timing[0]->vertex_time; for (i = 1; i < n_processors; i++) { total_rad_time += timing[i]->rad_time; if (timing[i]->rad_time > max_rad_time) max_rad_time = timing[i]->rad_time; if (timing[i]->rad_time < min_rad_time) min_rad_time = timing[i]->rad_time; total_refine_time += timing[i]->refine_time; if (timing[i]->refine_time > max_refine_time) max_refine_time = timing[i]->refine_time; if (timing[i]->refine_time < min_refine_time) min_refine_time = timing[i]->refine_time; total_wait_time += timing[i]->wait_time; if (timing[i]->wait_time > max_wait_time) max_wait_time = timing[i]->wait_time; if (timing[i]->wait_time < min_wait_time) min_wait_time = timing[i]->wait_time; total_vertex_time += timing[i]->vertex_time; if (timing[i]->vertex_time > max_vertex_time) max_vertex_time = timing[i]->vertex_time; if (timing[i]->vertex_time < min_vertex_time) min_vertex_time = timing[i]->vertex_time; } printf("\n\n%8s%20lu%20lu%12lu%12lu\n","Max", max_rad_time, max_refine_time, max_wait_time, max_vertex_time); printf("\n%8s%20lu%20lu%12lu%12lu\n","Min", min_rad_time, min_refine_time, min_wait_time, min_vertex_time); printf("\n%8s%20lu%20lu%12lu%12lu\n","Avg", (long) (((double) total_rad_time) / ((double) (1.0 * n_processors))), (long) (((double) total_refine_time) / ((double) (1.0 * n_processors))), (long) (((double) total_wait_time) / ((double) (1.0 * n_processors))), (long) (((double) total_vertex_time) / ((double) (1.0 * n_processors)))); printf("\n\n"); } /* print_fork_time(0) ; */ print_statistics( stdout, 0 ) ; /* Display image */ display_scene( disp_fill_mode, disp_patch_switch, disp_mesh_switch, disp_interaction_switch, 0) ; state = -1 ; } else if( val == CHOICE_RAD_STEP ) { if( state == -1 ) { printf( "Please reset first\007\n" ) ; return ; } /* Step execution */ switch( state ) { case 0: /* Step execute as a single process */ global->index = 1; /* Create slave processes */ for ( i = 0 ; i < n_processors ; i++ ) { taskqueue_id[i] = assign_taskq(0) ; } CREATE(radiosity, n_processors/* - 1*/); /* Decompose model objects into patches and build the BSP tree */ /* Create the first tasks (MASTER only) */ init_modeling_tasks(0) ; process_tasks(0) ; state ++ ; break ; case 1: if( init_ray_tasks(0) ) { BARRIER(global->barrier, n_processors); process_tasks(0) ; } else state++ ; break ; default: BARRIER(global->barrier, n_processors); init_radavg_tasks( RAD_AVERAGING_MODE, 0 ) ; process_tasks(0) ; init_radavg_tasks( RAD_NORMALIZING_MODE, 0 ) ; process_tasks(0) ; WAIT_FOR_END(n_processors/* - 1*/) state = -1 ; } /* Display image */ display_scene( disp_fill_mode, disp_patch_switch, disp_mesh_switch, disp_interaction_switch, 0) ; } else if( val == CHOICE_RAD_RESET ) { /* Initialize global variables again */ init_global(0) ; init_visibility_module(0) ; g_clear() ; state = 0 ; } }
int main(int argc, char *argv[]) { long i; long total_rad_time, max_rad_time, min_rad_time; long total_refine_time, max_refine_time, min_refine_time; long total_wait_time, max_wait_time, min_wait_time; long total_vertex_time, max_vertex_time, min_vertex_time; /* Parse arguments */ parse_args(argc, argv) ; choices[2].init_value = model_selector ; /* Initialize graphic device */ if( batch_mode == 0 ) { g_init(argc, argv) ; setup_view( DFLT_VIEW_ROT_X, DFLT_VIEW_ROT_Y, DFLT_VIEW_DIST, DFLT_VIEW_ZOOM,0 ) ; } /* Initialize ANL macro */ MAIN_INITENV(,60000000) ; THREAD_INIT_FREE(); /* Allocate global shared memory and initialize */ global = (Global *) G_MALLOC(sizeof(Global)) ; if( global == 0 ) { printf( "Can't allocate memory\n" ) ; exit(1) ; } init_global(0) ; timing = (Timing **) G_MALLOC(n_processors * sizeof(Timing *)); for (i = 0; i < n_processors; i++) timing[i] = (Timing *) G_MALLOC(sizeof(Timing)); /* Initialize shared lock */ init_sharedlock(0) ; /* Initial random testing rays array for visibility test. */ init_visibility_module(0) ; /* POSSIBLE ENHANCEMENT: Here is where one might distribute the sobj_struct, task_struct, and vis_struct data structures across physically distributed memories as desired. One way to place data is as follows: long i; for (i=0;i<n_processors;i++) { Place all addresses x such that &(sobj_struct[i]) <= x < &(sobj_struct[i+1]) on node i Place all addresses x such that &(task_struct[i]) <= x < &(task_struct[i+1]) on node i Place all addresses x such that &(vis_struct[i]) <= x < &(vis_struct[i+1]) on node i } */ if( batch_mode ) { /* In batch mode, create child processes and start immediately */ /* Time stamp */ CLOCK( time_rad_start ); global->index = 0; for( i = 0 ; i < n_processors ; i++ ) { taskqueue_id[i] = assign_taskq(0) ; } /* And start processing */ CREATE(radiosity, n_processors); WAIT_FOR_END(n_processors); /* Time stamp */ CLOCK( time_rad_end ); /* Print out running time */ printf("TIMING STATISTICS MEASURED BY MAIN PROCESS:\n"); print_running_time(0); if (dostats) { printf("\n\n\nPER-PROCESS STATISTICS:\n"); printf("%8s%20s%20s%12s%12s\n","Proc","Total","Refine","Wait","Smooth"); printf("%8s%20s%20s%12s%12s\n\n","","Time","Time","Time","Time"); for (i = 0; i < n_processors; i++) printf("%8ld%20lu%20lu%12lu%12lu\n",i,timing[i]->rad_time, timing[i]->refine_time, timing[i]->wait_time, timing[i]->vertex_time); total_rad_time = timing[0]->rad_time; max_rad_time = timing[0]->rad_time; min_rad_time = timing[0]->rad_time; total_refine_time = timing[0]->refine_time; max_refine_time = timing[0]->refine_time; min_refine_time = timing[0]->refine_time; total_wait_time = timing[0]->wait_time; max_wait_time = timing[0]->wait_time; min_wait_time = timing[0]->wait_time; total_vertex_time = timing[0]->vertex_time; max_vertex_time = timing[0]->vertex_time; min_vertex_time = timing[0]->vertex_time; for (i = 1; i < n_processors; i++) { total_rad_time += timing[i]->rad_time; if (timing[i]->rad_time > max_rad_time) max_rad_time = timing[i]->rad_time; if (timing[i]->rad_time < min_rad_time) min_rad_time = timing[i]->rad_time; total_refine_time += timing[i]->refine_time; if (timing[i]->refine_time > max_refine_time) max_refine_time = timing[i]->refine_time; if (timing[i]->refine_time < min_refine_time) min_refine_time = timing[i]->refine_time; total_wait_time += timing[i]->wait_time; if (timing[i]->wait_time > max_wait_time) max_wait_time = timing[i]->wait_time; if (timing[i]->wait_time < min_wait_time) min_wait_time = timing[i]->wait_time; total_vertex_time += timing[i]->vertex_time; if (timing[i]->vertex_time > max_vertex_time) max_vertex_time = timing[i]->vertex_time; if (timing[i]->vertex_time < min_vertex_time) min_vertex_time = timing[i]->vertex_time; } printf("\n\n%8s%20lu%20lu%12lu%12lu\n","Max", max_rad_time, max_refine_time, max_wait_time, max_vertex_time); printf("\n%8s%20lu%20lu%12lu%12lu\n","Min", min_rad_time, min_refine_time, min_wait_time, min_vertex_time); printf("\n%8s%20lu%20lu%12lu%12lu\n","Avg", (long) (((double) total_rad_time) / ((double) (1.0 * n_processors))), (long) (((double) total_refine_time) / ((double) (1.0 * n_processors))), (long) (((double) total_wait_time) / ((double) (1.0 * n_processors))), (long) (((double) total_vertex_time) / ((double) (1.0 * n_processors)))); printf("\n\n"); } /* print_fork_time(0) ; */ print_statistics( stdout, 0 ) ; } else { /* In interactive mode, start workers, and the master starts notification loop */ /* Start notification loop */ g_start( expose_callback, N_SLIDERS, sliders, N_CHOICES, choices ) ; } MAIN_END; exit(0) ; }
int main(int argc, char *argv[]) { long i; long c; extern char *optarg; long m1; long factor; long pages; unsigned long start; CLOCK(start); while ((c = getopt(argc, argv, "p:m:n:l:stoh")) != -1) { switch(c) { case 'p': P = atoi(optarg); if (P < 1) { printerr("P must be >= 1\n"); exit(-1); } if (log_2(P) == -1) { printerr("P must be a power of 2\n"); exit(-1); } break; case 'm': M = atoi(optarg); m1 = M/2; if (2*m1 != M) { printerr("M must be even\n"); exit(-1); } break; case 'n': num_cache_lines = atoi(optarg); orig_num_lines = num_cache_lines; if (num_cache_lines < 1) { printerr("Number of cache lines must be >= 1\n"); exit(-1); } break; case 'l': log2_line_size = atoi(optarg); if (log2_line_size < 0) { printerr("Log base 2 of cache line length in bytes must be >= 0\n"); exit(-1); } break; case 's': dostats = !dostats; break; case 't': test_result = !test_result; break; case 'o': doprint = !doprint; break; case 'h': printf("Usage: FFT <options>\n\n"); printf("options:\n"); printf(" -mM : M = even integer; 2**M total complex data points transformed.\n"); printf(" -pP : P = number of processors; Must be a power of 2.\n"); printf(" -nN : N = number of cache lines.\n"); printf(" -lL : L = Log base 2 of cache line length in bytes.\n"); printf(" -s : Print individual processor timing statistics.\n"); printf(" -t : Perform FFT and inverse FFT. Test output by comparing the\n"); printf(" integral of the original data to the integral of the data that\n"); printf(" results from performing the FFT and inverse FFT.\n"); printf(" -o : Print out complex data points.\n"); printf(" -h : Print out command line options.\n\n"); printf("Default: FFT -m%1d -p%1d -n%1d -l%1d\n", DEFAULT_M,DEFAULT_P,NUM_CACHE_LINES,LOG2_LINE_SIZE); exit(0); break; } } MAIN_INITENV(,80000000); N = 1<<M; rootN = 1<<(M/2); rowsperproc = rootN/P; if (rowsperproc == 0) { printerr("Matrix not large enough. 2**(M/2) must be >= P\n"); exit(-1); } line_size = 1 << log2_line_size; if (line_size < 2*sizeof(double)) { printf("WARNING: Each element is a complex double (%ld bytes)\n",2*sizeof(double)); printf(" => Less than one element per cache line\n"); printf(" Computing transpose blocking factor\n"); factor = (2*sizeof(double)) / line_size; num_cache_lines = orig_num_lines / factor; } if (line_size <= 2*sizeof(double)) { pad_length = 1; } else { pad_length = line_size / (2*sizeof(double)); } if (rowsperproc * rootN * 2 * sizeof(double) >= PAGE_SIZE) { pages = (2 * pad_length * sizeof(double) * rowsperproc) / PAGE_SIZE; if (pages * PAGE_SIZE != 2 * pad_length * sizeof(double) * rowsperproc) { pages ++; } pad_length = (pages * PAGE_SIZE) / (2 * sizeof(double) * rowsperproc); } else { pad_length = (PAGE_SIZE - (rowsperproc * rootN * 2 * sizeof(double))) / (2 * sizeof(double) * rowsperproc); if (pad_length * (2 * sizeof(double) * rowsperproc) != (PAGE_SIZE - (rowsperproc * rootN * 2 * sizeof(double)))) { printerr("Padding algorithm unsuccessful\n"); exit(-1); } } Global = (struct GlobalMemory *) G_MALLOC(sizeof(struct GlobalMemory)); x = (double *) G_MALLOC(2*(N+rootN*pad_length)*sizeof(double)+PAGE_SIZE); trans = (double *) G_MALLOC(2*(N+rootN*pad_length)*sizeof(double)+PAGE_SIZE); umain = (double *) G_MALLOC(2*rootN*sizeof(double)); umain2 = (double *) G_MALLOC(2*(N+rootN*pad_length)*sizeof(double)+PAGE_SIZE); Global->transtimes = (long *) G_MALLOC(P*sizeof(long)); Global->totaltimes = (long *) G_MALLOC(P*sizeof(long)); if (Global == NULL) { printerr("Could not malloc memory for Global\n"); exit(-1); } else if (x == NULL) { printerr("Could not malloc memory for x\n"); exit(-1); } else if (trans == NULL) { printerr("Could not malloc memory for trans\n"); exit(-1); } else if (umain == NULL) { printerr("Could not malloc memory for umain\n"); exit(-1); } else if (umain2 == NULL) { printerr("Could not malloc memory for umain2\n"); exit(-1); } x = (double *) (((unsigned long) x) + PAGE_SIZE - ((unsigned long) x) % PAGE_SIZE); trans = (double *) (((unsigned long) trans) + PAGE_SIZE - ((unsigned long) trans) % PAGE_SIZE); umain2 = (double *) (((unsigned long) umain2) + PAGE_SIZE - ((unsigned long) umain2) % PAGE_SIZE); /* In order to optimize data distribution, the data structures x, trans, and umain2 have been aligned so that each begins on a page boundary. This ensures that the amount of padding calculated by the program is such that each processor's partition ends on a page boundary, thus ensuring that all data from these structures that are needed by a processor can be allocated to its local memory */ /* POSSIBLE ENHANCEMENT: Here is where one might distribute the x, trans, and umain2 data structures across physically distributed memories as desired. One way to place data is as follows: double *base; long i; i = ((N/P)+(rootN/P)*pad_length)*2; base = &(x[0]); for (j=0;j<P;j++) { Place all addresses x such that (base <= x < base+i) on node j base += i; } The trans and umain2 data structures can be placed in a similar manner. */ printf("\n"); printf("FFT with Blocking Transpose\n"); printf(" %ld Complex Doubles\n",N); printf(" %ld Processors\n",P); if (num_cache_lines != orig_num_lines) { printf(" %ld Cache lines\n",orig_num_lines); printf(" %ld Cache lines for blocking transpose\n",num_cache_lines); } else { printf(" %ld Cache lines\n",num_cache_lines); } printf(" %d Byte line size\n",(1 << log2_line_size)); printf(" %d Bytes per page\n",PAGE_SIZE); printf("\n"); BARINIT(Global->start, P); LOCKINIT(Global->idlock); Global->id = 0; InitX(x); /* place random values in x */ if (test_result) { ck1 = CheckSum(x); } if (doprint) { printf("Original data values:\n"); PrintArray(N, x); } InitU(N,umain); /* initialize u arrays*/ InitU2(N,umain2,rootN); /* fire off P processes */ CREATE(SlaveStart, P); WAIT_FOR_END(P); if (doprint) { if (test_result) { printf("Data values after inverse FFT:\n"); } else { printf("Data values after FFT:\n"); } PrintArray(N, x); } transtime = Global->transtimes[0]; printf("\n"); printf(" PROCESS STATISTICS\n"); printf(" Computation Transpose Transpose\n"); printf(" Proc Time Time Fraction\n"); printf(" 0 %10ld %10ld %8.5f\n", Global->totaltimes[0],Global->transtimes[0], ((double)Global->transtimes[0])/Global->totaltimes[0]); if (dostats) { transtime2 = Global->transtimes[0]; avgtranstime = Global->transtimes[0]; avgcomptime = Global->totaltimes[0]; maxtotal = Global->totaltimes[0]; mintotal = Global->totaltimes[0]; maxfrac = ((double)Global->transtimes[0])/Global->totaltimes[0]; minfrac = ((double)Global->transtimes[0])/Global->totaltimes[0]; avgfractime = ((double)Global->transtimes[0])/Global->totaltimes[0]; for (i=1;i<P;i++) { if (Global->transtimes[i] > transtime) { transtime = Global->transtimes[i]; } if (Global->transtimes[i] < transtime2) { transtime2 = Global->transtimes[i]; } if (Global->totaltimes[i] > maxtotal) { maxtotal = Global->totaltimes[i]; } if (Global->totaltimes[i] < mintotal) { mintotal = Global->totaltimes[i]; } if (((double)Global->transtimes[i])/Global->totaltimes[i] > maxfrac) { maxfrac = ((double)Global->transtimes[i])/Global->totaltimes[i]; } if (((double)Global->transtimes[i])/Global->totaltimes[i] < minfrac) { minfrac = ((double)Global->transtimes[i])/Global->totaltimes[i]; } printf(" %3ld %10ld %10ld %8.5f\n", i,Global->totaltimes[i],Global->transtimes[i], ((double)Global->transtimes[i])/Global->totaltimes[i]); avgtranstime += Global->transtimes[i]; avgcomptime += Global->totaltimes[i]; avgfractime += ((double)Global->transtimes[i])/Global->totaltimes[i]; } printf(" Avg %10.0f %10.0f %8.5f\n", ((double) avgcomptime)/P,((double) avgtranstime)/P,avgfractime/P); printf(" Max %10ld %10ld %8.5f\n", maxtotal,transtime,maxfrac); printf(" Min %10ld %10ld %8.5f\n", mintotal,transtime2,minfrac); } Global->starttime = start; printf("\n"); printf(" TIMING INFORMATION\n"); printf("Start time : %16lu\n", Global->starttime); printf("Initialization finish time : %16lu\n", Global->initdonetime); printf("Overall finish time : %16lu\n", Global->finishtime); printf("Total time with initialization : %16lu\n", Global->finishtime-Global->starttime); printf("Total time without initialization : %16lu\n", Global->finishtime-Global->initdonetime); printf("Overall transpose time : %16ld\n", transtime); printf("Overall transpose fraction : %16.5f\n", ((double) transtime)/(Global->finishtime-Global->initdonetime)); printf("\n"); if (test_result) { ck3 = CheckSum(x); printf(" INVERSE FFT TEST RESULTS\n"); printf("Checksum difference is %.3f (%.3f, %.3f)\n", ck1-ck3, ck1, ck3); if (fabs(ck1-ck3) < 0.001) { printf("TEST PASSED\n"); } else { printf("TEST FAILED\n"); } } MAIN_END; }
int main(int argc, char **argv) { /* default values for the control parameters of the driver */ /* are in parameters.h */ if ((argc == 2) && ((strncmp(argv[1],"-h",strlen("-h")) == 0) || (strncmp(argv[1],"-H",strlen("-H")) == 0))) { printf("Usage: WATER-SPATIAL < infile, where the contents of infile can be\nobtained from the comments at the top of water.C and the first scanf \nin main() in water.C\n\n"); exit(0); } #else int main(void) { #endif /* POSSIBLE ENHANCEMENT: One might bind the first process to a processor here, even before the other (child) processes are bound later in mdmain(). */ six = stdout; TEMP =298.0; RHO =0.9980; /* read input */ #ifndef SIM_SOCLIB if (scanf("%lf%ld%ld%ld%ld%ld%ld%ld%ld%lf",&TSTEP, &NMOL, &NSTEP, &NORDER, &NSAVE, &NRST, &NPRINT, &NFMC,&NumProcs, &CUTOFF) != 10) fprintf(stderr,"ERROR: Usage: water < infile, which must have 10 fields, see SPLASH documentation or comment at top of water.C\n"); #else TSTEP = 1.5e-16; NMOL = NMOLS; NSTEP = 3; NORDER = 6; NSAVE = -1 ; NRST = 3000 ; NPRINT = 3 ; NFMC = 0; NumProcs = NB_P; CUTOFF = 6.212752; #endif printf("Using %ld procs on %ld steps of %ld mols\n", NumProcs, NSTEP, NMOL); printf("Other parameters:\n\tTSTEP = %8.2e\n\tNORDER = %ld\n\tNSAVE = %ld\n",TSTEP,NORDER,NSAVE); printf("\tNRST = %ld\n\tNPRINT = %ld\n\tNFMC = %ld\n\tCUTOFF = %lf\n\n",NRST,NPRINT,NFMC,CUTOFF); /* set up scaling factors and constants */ NORD1=NORDER+1; CNSTNT(NORD1,TLC); /* sub. call to set up constants */ SYSCNS(); /* sub. call to initialize system constants */ printf("%ld boxes with %ld processors\n\n", BOX_PER_SIDE * BOX_PER_SIDE * BOX_PER_SIDE, NumProcs); if (NumProcs > (BOX_PER_SIDE * BOX_PER_SIDE * BOX_PER_SIDE)) { fprintf(stderr,"ERROR: less boxes (%ld) than processors (%ld)\n", BOX_PER_SIDE * BOX_PER_SIDE * BOX_PER_SIDE, NumProcs); fflush(stderr); exit(-1); } fprintf(six,"\nTEMPERATURE = %8.2f K\n",TEMP); fprintf(six,"DENSITY = %8.5f G/C.C.\n",RHO); fprintf(six,"NUMBER OF MOLECULES = %8ld\n",NMOL); fprintf(six,"NUMBER OF PROCESSORS = %8ld\n",NumProcs); fprintf(six,"TIME STEP = %8.2e SEC\n",TSTEP); fprintf(six,"ORDER USED TO SOLVE F=MA = %8ld \n",NORDER); fprintf(six,"NO. OF TIME STEPS = %8ld \n",NSTEP); fprintf(six,"FREQUENCY OF DATA SAVING = %8ld \n",NSAVE); fprintf(six,"FREQUENCY TO WRITE RST FILE= %8ld \n",NRST); fflush(six); { /* do memory initializations */ long procnum, i, j, k, l; struct list_of_boxes *temp_box; long xprocs, yprocs, zprocs; long x_inc, y_inc, z_inc; long x_ct, y_ct, z_ct; long x_left, y_left, z_left; long x_first, y_first, z_first; long x_last, y_last, z_last; double proccbrt; long gmem_size = sizeof(struct GlobalMemory); MAIN_INITENV((NumProcs),40000000,); /* macro call to initialize shared memory etc. */ /* Allocate space for main (BOX) data structure as well as * synchronization variables */ start_end = (first_last_array **) G_MALLOC(sizeof(first_last_array *) * NumProcs); for (i=0; i < NumProcs; i++) { start_end[i] = (first_last_array *) G_MALLOC(sizeof(first_last_array)); } /* Calculate start and finish box numbers for processors */ xprocs = 0; yprocs = 0; proccbrt = (double) pow((double) NumProcs, 1.0/3.0) + 0.00000000000001; j = (long) proccbrt; if (j<1) j = 1; while ((xprocs == 0) && (j>0)) { k = (long) sqrt((double) (NumProcs / j)); if (k<1) k=1; while ((yprocs == 0) && (k>0)) { l = NumProcs/(j*k); if ((j*k*l) == NumProcs) { xprocs = j; yprocs = k; zprocs = l; } /* if */ k--; } /* while yprocs && k */ j--; } /* while xprocs && j */ printf("xprocs = %ld\typrocs = %ld\tzprocs = %ld\n", xprocs, yprocs, zprocs); fflush(stdout); /* Fill in start_end array values */ procnum = 0; x_inc = BOX_PER_SIDE/xprocs; y_inc = BOX_PER_SIDE/yprocs; z_inc = BOX_PER_SIDE/zprocs; x_left = BOX_PER_SIDE - (xprocs*x_inc); y_left = BOX_PER_SIDE - (yprocs*y_inc); z_left = BOX_PER_SIDE - (zprocs*z_inc); printf("x_inc = %ld\t y_inc = %ld\t z_inc = %ld\n",x_inc,y_inc,z_inc); printf("x_left = %ld\t y_left = %ld\t z_left = %ld\n",x_left,y_left,z_left); fflush(stdout); x_first = 0; x_ct = x_left; x_last = -1; x_inc++; for (i=0; i<xprocs; i++) { y_ct = y_left; if (x_ct == 0) x_inc--; x_last += x_inc; y_first = 0; y_last = -1; y_inc++; for (j=0; j<yprocs; j++) { z_ct = z_left; if (y_ct == 0) y_inc--; y_last += y_inc; z_first = 0; z_last = -1; z_inc++; for (k=0; k<zprocs; k++) { if (z_ct == 0) z_inc--; z_last += z_inc; start_end[procnum]->box[XDIR][FIRST] = x_first; start_end[procnum]->box[XDIR][LAST] = min(x_last, BOX_PER_SIDE - 1); start_end[procnum]->box[YDIR][FIRST] = y_first; start_end[procnum]->box[YDIR][LAST] = min(y_last, BOX_PER_SIDE - 1); start_end[procnum]->box[ZDIR][FIRST] = z_first; start_end[procnum]->box[ZDIR][LAST] = min(z_last, BOX_PER_SIDE - 1); z_first = z_last + 1; z_ct--; procnum++; } y_first = y_last + 1; y_ct--; } x_first = x_last + 1; x_ct--; } /* Allocate space for my_boxes array */ my_boxes = (box_list **) G_MALLOC(NumProcs * sizeof(box_list *)); /* Set all box ptrs to null */ for (i=0; i<NumProcs; i++) my_boxes[i] = NULL; /* Set up links for all boxes for initial interf and intraf */ temp_box = my_boxes[0]; while (temp_box) { temp_box = temp_box->next_box; } /* Allocate space for BOX array */ BOX = (box_type ***) G_MALLOC(BOX_PER_SIDE * sizeof(box_type **)); for (i=0; i < BOX_PER_SIDE; i++) { BOX[i] = (box_type **) G_MALLOC( BOX_PER_SIDE * sizeof(box_type *)); for (j=0; j < BOX_PER_SIDE; j++) { BOX[i][j] = (box_type *) G_MALLOC(BOX_PER_SIDE * sizeof(box_type)); for (k=0; k < BOX_PER_SIDE; k++) { BOX[i][j][k].list = NULL; LOCKINIT(BOX[i][j][k].boxlock); } } } /* for i */ gl = (struct GlobalMemory *) G_MALLOC(gmem_size); /* macro calls to initialize synch variables */ BARINIT(gl->start, NumProcs); BARINIT(gl->InterfBar, NumProcs); BARINIT(gl->PotengBar, NumProcs); LOCKINIT(gl->IOLock); LOCKINIT(gl->IndexLock); LOCKINIT(gl->IntrafVirLock); LOCKINIT(gl->InterfVirLock); LOCKINIT(gl->KinetiSumLock); LOCKINIT(gl->PotengSumLock); } fprintf(six,"SPHERICAL CUTOFF RADIUS = %8.4f ANGSTROM\n",CUTOFF); fflush(six); IRST=0; /* call initialization routine */ INITIA(); gl->tracktime = 0; gl->intratime = 0; gl->intertime = 0; /* initialize Index to 1 so that the first created child gets id 1, not 0 */ gl->Index = 1; if (NSAVE > 0) { /* not true for input decks provided */ fprintf(six,"COLLECTING X AND V DATA AT EVERY %4ld TIME STEPS \n",NSAVE); } /* spawn helper processes */ CLOCK(gl->computestart); CREATE(WorkStart, NumProcs); /* macro to make main process wait for all others to finish */ WAIT_FOR_END(NumProcs); CLOCK(gl->computeend); printf("COMPUTESTART (after initialization) = %lu\n",gl->computestart); printf("COMPUTEEND = %lu\n",gl->computeend); printf("COMPUTETIME (after initialization) = %lu\n",gl->computeend-gl->computestart); printf("Measured Time (2nd timestep onward) = %lu\n",gl->tracktime); printf("Intramolecular time only (2nd timestep onward) = %lu\n",gl->intratime); printf("Intermolecular time only (2nd timestep onward) = %lu\n",gl->intertime); printf("Other time (2nd timestep onward) = %lu\n",gl->tracktime - gl->intratime - gl->intertime); printf("\nExited Happily with XTT = %g (note: XTT value is garbage if NPRINT > NSTEP)\n", XTT); MAIN_END; } /* main.c */
int main(int argc, char **argv) { /* default values for the control parameters of the driver */ /* are in parameters.h */ if ((argc == 2) &&((strncmp(argv[1],"-h",strlen("-h")) == 0) || (strncmp(argv[1],"-H",strlen("-H")) == 0))) { printf("Usage: WATER-NSQUARED < infile, where the contents of infile can be\nobtained from the comments at the top of water.C and the first scanf \nin main() in water.C\n\n"); exit(0); } /* POSSIBLE ENHANCEMENT: Here's where one might bind the main process (process 0) to a processor if one wanted to. Others can be bound in the WorkStart routine. */ six = stdout; /* output file */ TEMP =298.0; RHO =0.9980; CUTOFF=0.0; /* read input */ if (scanf("%lf%ld%ld%ld%ld%ld%ld%ld%ld%lf",&TSTEP, &NMOL, &NSTEP, &NORDER, &NSAVE, &NRST, &NPRINT, &NFMC,&NumProcs, &CUTOFF) != 10) fprintf(stderr,"ERROR: Usage: water < infile, which must have 10 fields, see SPLASH documentation or comment at top of water.C\n"); if (NMOL > MAXLCKS) { fprintf(stderr, "Just so you know ... Lock array in global.H has size %ld < %ld (NMOL)\n code will still run correctly but there may be lock contention\n\n", MAXLCKS, NMOL); } printf("Using %ld procs on %ld steps of %ld mols\n", NumProcs, NSTEP, NMOL); printf("Other parameters:\n\tTSTEP = %8.2e\n\tNORDER = %ld\n\tNSAVE = %ld\n",TSTEP,NORDER,NSAVE); printf("\tNRST = %ld\n\tNPRINT = %ld\n\tNFMC = %ld\n\tCUTOFF = %lf\n\n",NRST,NPRINT,NFMC,CUTOFF); /* SET UP SCALING FACTORS AND CONSTANTS */ NORD1=NORDER+1; CNSTNT(NORD1,TLC); /* sub. call to set up constants */ { /* Do memory initializations */ long pid; long mol_size = sizeof(molecule_type) * NMOL; long gmem_size = sizeof(struct GlobalMemory); /* POSSIBLE ENHANCEMENT: One might bind the first process to a processor here, even before the other (child) processes are bound later in mdmain(). */ MAIN_INITENV(,70000000,); /* macro call to initialize shared memory etc. */ THREAD_INIT_FREE(); /* allocate space for main (VAR) data structure as well as synchronization variables */ /* POSSIBLE ENHANCEMENT: One might want to allocate a process's portion of the VAR array and what it points to in its local memory */ VAR = (molecule_type *) G_MALLOC(mol_size); gl = (struct GlobalMemory *) G_MALLOC(gmem_size); /* POSSIBLE ENHANCEMENT: One might want to allocate process i's PFORCES[i] array in its local memory */ PFORCES = (double ****) G_MALLOC(NumProcs * sizeof (double ***)); { long i,j,k; for (i = 0; i < NumProcs; i++) { PFORCES[i] = (double ***) G_MALLOC(NMOL * sizeof (double **)); for (j = 0; j < NMOL; j++) { PFORCES[i][j] = (double **) G_MALLOC(NDIR * sizeof (double *)); for (k = 0; k < NDIR; k++) { PFORCES[i][j][k] = (double *) G_MALLOC(NATOM * sizeof (double)); } } } } /* macro calls to initialize synch varibles */ BARINIT(gl->start, NumProcs); BARINIT(gl->InterfBar, NumProcs); BARINIT(gl->PotengBar, NumProcs); LOCKINIT(gl->IOLock); LOCKINIT(gl->IndexLock); LOCKINIT(gl->IntrafVirLock); LOCKINIT(gl->InterfVirLock); LOCKINIT(gl->FXLock); LOCKINIT(gl->FYLock); LOCKINIT(gl->FZLock); if (NMOL < MAXLCKS) { ALOCKINIT(gl->MolLock, NMOL); } else { ALOCKINIT(gl->MolLock, MAXLCKS); } LOCKINIT(gl->KinetiSumLock); LOCKINIT(gl->PotengSumLock); /* set up control for static scheduling */ MolsPerProc = NMOL/NumProcs; StartMol[0] = 0; for (pid = 1; pid < NumProcs; pid += 1) { StartMol[pid] = StartMol[pid-1] + MolsPerProc; } StartMol[NumProcs] = NMOL; } SYSCNS(); /* sub. call to initialize system constants */ fprintf(six,"\nTEMPERATURE = %8.2f K\n",TEMP); fprintf(six,"DENSITY = %8.5f G/C.C.\n",RHO); fprintf(six,"NUMBER OF MOLECULES = %8ld\n",NMOL); fprintf(six,"NUMBER OF PROCESSORS = %8ld\n",NumProcs); fprintf(six,"TIME STEP = %8.2e SEC\n",TSTEP); fprintf(six,"ORDER USED TO SOLVE F=MA = %8ld \n",NORDER); fprintf(six,"NO. OF TIME STEPS = %8ld \n",NSTEP); fprintf(six,"FREQUENCY OF DATA SAVING = %8ld \n",NSAVE); fprintf(six,"FREQUENCY TO WRITE RST FILE= %8ld \n",NRST); fprintf(six,"SPHERICAL CUTOFF RADIUS = %8.4f ANGSTROM\n",CUTOFF); fflush(six); /* initialization routine; also reads displacements and sets up random velocities*/ INITIA(); /*.....start molecular dynamic loop */ gl->tracktime = 0; gl->intratime = 0; gl->intertime = 0; /* initialize Index to 1 so that the first created child gets id 1, not 0 */ gl->Index = 1; if (NSAVE > 0) /* not true for input decks provided */ fprintf(six,"COLLECTING X AND V DATA AT EVERY %4ld TIME STEPS \n",NSAVE); /* spawn helper processes, each getting its unique process id */ CLOCK(gl->computestart); CREATE(WorkStart, NumProcs); /* macro to make main process wait for all others to finish */ WAIT_FOR_END(NumProcs); CLOCK(gl->computeend); printf("COMPUTESTART (after initialization) = %lu\n",gl->computestart); printf("COMPUTEEND = %lu\n",gl->computeend); printf("COMPUTETIME (after initialization) = %lu\n",gl->computeend-gl->computestart); printf("Measured Time (2nd timestep onward) = %lu\n",gl->tracktime); printf("Intramolecular time only (2nd timestep onward) = %lu\n",gl->intratime); printf("Intermolecular time only (2nd timestep onward) = %lu\n",gl->intertime); printf("Other time (2nd timestep onward) = %lu\n",gl->tracktime - gl->intratime - gl->intertime); printf("\nExited Happily with XTT = %g (note: XTT value is garbage if NPRINT > NSTEP)\n", XTT); MAIN_END; } /* main.c */
int main(int argc, char *argv[]) { long i; long j; long k; long x_part; long y_part; long d_size; long itemp; long jtemp; double procsqrt; long temp = 0; double min_total; double max_total; double avg_total; double min_multi; double max_multi; double avg_multi; double min_frac; double max_frac; double avg_frac; long ch; extern char *optarg; unsigned long computeend; unsigned long start; CLOCK(start) while ((ch = getopt(argc, argv, "n:p:e:r:t:soh")) != -1) { switch(ch) { case 'n': im = atoi(optarg); if (log_2(im-2) == -1) { printerr("Grid must be ((power of 2)+2) in each dimension\n"); exit(-1); } break; case 'p': nprocs = atoi(optarg); if (nprocs < 1) { printerr("P must be >= 1\n"); exit(-1); } if (log_2(nprocs) == -1) { printerr("P must be a power of 2\n"); exit(-1); } break; case 'e': tolerance = atof(optarg); break; case 'r': res = atof(optarg); break; case 't': dtau = atof(optarg); break; case 's': do_stats = !do_stats; break; case 'o': do_output = !do_output; break; case 'h': printf("Usage: OCEAN <options>\n\n"); printf("options:\n"); printf(" -nN : Simulate NxN ocean. N must be (power of 2)+2.\n"); printf(" -pP : P = number of processors. P must be power of 2.\n"); printf(" -eE : E = error tolerance for iterative relaxation.\n"); printf(" -rR : R = distance between grid points in meters.\n"); printf(" -tT : T = timestep in seconds.\n"); printf(" -s : Print timing statistics.\n"); printf(" -o : Print out relaxation residual values.\n"); printf(" -h : Print out command line options.\n\n"); printf("Default: OCEAN -n%1d -p%1d -e%1g -r%1g -t%1g\n", DEFAULT_N,DEFAULT_P,DEFAULT_E,DEFAULT_R,DEFAULT_T); exit(0); break; } } MAIN_INITENV(,60000000) THREAD_INIT_FREE(); jm = im; printf("\n"); printf("Ocean simulation with W-cycle multigrid solver\n"); printf(" Processors : %1ld\n",nprocs); printf(" Grid size : %1ld x %1ld\n",im,jm); printf(" Grid resolution (meters) : %0.2f\n",res); printf(" Time between relaxations (seconds) : %0.0f\n",dtau); printf(" Error tolerance : %0.7g\n",tolerance); printf("\n"); xprocs = 0; yprocs = 0; procsqrt = sqrt((double) nprocs); j = (long) procsqrt; while ((xprocs == 0) && (j > 0)) { k = nprocs / j; if (k * j == nprocs) { if (k > j) { xprocs = j; yprocs = k; } else { xprocs = k; yprocs = j; } } j--; } if (xprocs == 0) { printerr("Could not find factors for subblocking\n"); exit(-1); } minlevel = 0; itemp = 1; jtemp = 1; numlev = 0; minlevel = 0; while (itemp < (im-2)) { itemp = itemp*2; jtemp = jtemp*2; if ((itemp/yprocs > 1) && (jtemp/xprocs > 1)) { numlev++; } } if (numlev == 0) { printerr("Must have at least 2 grid points per processor in each dimension\n"); exit(-1); } imx = (long *) G_MALLOC(numlev*sizeof(long)); jmx = (long *) G_MALLOC(numlev*sizeof(long)); lev_res = (double *) G_MALLOC(numlev*sizeof(double)); lev_tol = (double *) G_MALLOC(numlev*sizeof(double)); i_int_coeff = (double *) G_MALLOC(numlev*sizeof(double)); j_int_coeff = (double *) G_MALLOC(numlev*sizeof(double)); xpts_per_proc = (long *) G_MALLOC(numlev*sizeof(long)); ypts_per_proc = (long *) G_MALLOC(numlev*sizeof(long)); imx[numlev-1] = im; jmx[numlev-1] = jm; lev_res[numlev-1] = res; lev_tol[numlev-1] = tolerance; for (i=numlev-2;i>=0;i--) { imx[i] = ((imx[i+1] - 2) / 2) + 2; jmx[i] = ((jmx[i+1] - 2) / 2) + 2; lev_res[i] = lev_res[i+1] * 2; } for (i=0;i<numlev;i++) { xpts_per_proc[i] = (jmx[i]-2) / xprocs; ypts_per_proc[i] = (imx[i]-2) / yprocs; } for (i=numlev-1;i>=0;i--) { if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) { minlevel = i+1; break; } } for (i=0;i<numlev;i++) { temp += imx[i]; } temp = 0; j = 0; for (k=0;k<numlev;k++) { for (i=0;i<imx[k];i++) { j++; temp += jmx[k]; } } d_size = nprocs*sizeof(double ***); psi = (double ****) G_MALLOC(d_size); psim = (double ****) G_MALLOC(d_size); work1 = (double ****) G_MALLOC(d_size); work4 = (double ****) G_MALLOC(d_size); work5 = (double ****) G_MALLOC(d_size); work7 = (double ****) G_MALLOC(d_size); temparray = (double ****) G_MALLOC(d_size); d_size = 2*sizeof(double **); for (i=0;i<nprocs;i++) { psi[i] = (double ***) G_MALLOC(d_size); psim[i] = (double ***) G_MALLOC(d_size); work1[i] = (double ***) G_MALLOC(d_size); work4[i] = (double ***) G_MALLOC(d_size); work5[i] = (double ***) G_MALLOC(d_size); work7[i] = (double ***) G_MALLOC(d_size); temparray[i] = (double ***) G_MALLOC(d_size); } d_size = nprocs*sizeof(double **); psium = (double ***) G_MALLOC(d_size); psilm = (double ***) G_MALLOC(d_size); psib = (double ***) G_MALLOC(d_size); ga = (double ***) G_MALLOC(d_size); gb = (double ***) G_MALLOC(d_size); work2 = (double ***) G_MALLOC(d_size); work3 = (double ***) G_MALLOC(d_size); work6 = (double ***) G_MALLOC(d_size); tauz = (double ***) G_MALLOC(d_size); oldga = (double ***) G_MALLOC(d_size); oldgb = (double ***) G_MALLOC(d_size); gp = (struct Global_Private *) G_MALLOC((nprocs+1)*sizeof(struct Global_Private)); for (i=0;i<nprocs;i++) { gp[i].rel_num_x = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].rel_num_y = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].eist = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].ejst = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].oist = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].ojst = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].rlist = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].rljst = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].rlien = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].rljen = (long *) G_MALLOC(numlev*sizeof(long)); gp[i].multi_time = 0; gp[i].total_time = 0; } subblock(); x_part = (jm - 2)/xprocs + 2; y_part = (im - 2)/yprocs + 2; d_size = x_part*y_part*sizeof(double) + y_part*sizeof(double *); global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct)); for (i=0;i<nprocs;i++) { psi[i][0] = (double **) G_MALLOC(d_size); psi[i][1] = (double **) G_MALLOC(d_size); psim[i][0] = (double **) G_MALLOC(d_size); psim[i][1] = (double **) G_MALLOC(d_size); psium[i] = (double **) G_MALLOC(d_size); psilm[i] = (double **) G_MALLOC(d_size); psib[i] = (double **) G_MALLOC(d_size); ga[i] = (double **) G_MALLOC(d_size); gb[i] = (double **) G_MALLOC(d_size); work1[i][0] = (double **) G_MALLOC(d_size); work1[i][1] = (double **) G_MALLOC(d_size); work2[i] = (double **) G_MALLOC(d_size); work3[i] = (double **) G_MALLOC(d_size); work4[i][0] = (double **) G_MALLOC(d_size); work4[i][1] = (double **) G_MALLOC(d_size); work5[i][0] = (double **) G_MALLOC(d_size); work5[i][1] = (double **) G_MALLOC(d_size); work6[i] = (double **) G_MALLOC(d_size); work7[i][0] = (double **) G_MALLOC(d_size); work7[i][1] = (double **) G_MALLOC(d_size); temparray[i][0] = (double **) G_MALLOC(d_size); temparray[i][1] = (double **) G_MALLOC(d_size); tauz[i] = (double **) G_MALLOC(d_size); oldga[i] = (double **) G_MALLOC(d_size); oldgb[i] = (double **) G_MALLOC(d_size); } f = (double *) G_MALLOC(im*sizeof(double)); multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct)); d_size = numlev*sizeof(double **); if (numlev%2 == 1) { /* To make sure that the actual data starts double word aligned, add an extra pointer */ d_size += sizeof(double **); } for (i=0;i<numlev;i++) { d_size += ((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+ ((imx[i]-2)/yprocs+2)*sizeof(double *); } d_size *= nprocs; if (nprocs%2 == 1) { /* To make sure that the actual data starts double word aligned, add an extra pointer */ d_size += sizeof(double ***); } d_size += nprocs*sizeof(double ***); q_multi = (double ****) G_MALLOC(d_size); rhs_multi = (double ****) G_MALLOC(d_size); locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct)); bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct)); LOCKINIT(locks->idlock) LOCKINIT(locks->psiailock) LOCKINIT(locks->psibilock) LOCKINIT(locks->donelock) LOCKINIT(locks->error_lock) LOCKINIT(locks->bar_lock) #if defined(MULTIPLE_BARRIERS) BARINIT(bars->iteration, nprocs) BARINIT(bars->gsudn, nprocs) BARINIT(bars->p_setup, nprocs) BARINIT(bars->p_redph, nprocs) BARINIT(bars->p_soln, nprocs) BARINIT(bars->p_subph, nprocs) BARINIT(bars->sl_prini, nprocs) BARINIT(bars->sl_psini, nprocs) BARINIT(bars->sl_onetime, nprocs) BARINIT(bars->sl_phase_1, nprocs) BARINIT(bars->sl_phase_2, nprocs) BARINIT(bars->sl_phase_3, nprocs) BARINIT(bars->sl_phase_4, nprocs) BARINIT(bars->sl_phase_5, nprocs) BARINIT(bars->sl_phase_6, nprocs) BARINIT(bars->sl_phase_7, nprocs) BARINIT(bars->sl_phase_8, nprocs) BARINIT(bars->sl_phase_9, nprocs) BARINIT(bars->sl_phase_10, nprocs) BARINIT(bars->error_barrier, nprocs) #else BARINIT(bars->barrier, nprocs) #endif link_all(); multi->err_multi = 0.0; i_int_coeff[0] = 0.0; j_int_coeff[0] = 0.0; for (i=0;i<numlev;i++) { i_int_coeff[i] = 1.0/(imx[i]-1); j_int_coeff[i] = 1.0/(jmx[i]-1); } /* initialize constants and variables id is a global shared variable that has fetch-and-add operations performed on it by processes to obtain their pids. */ global->id = 0; global->psibi = 0.0; pi = atan(1.0); pi = 4.*pi; factjacob = -1./(12.*res*res); factlap = 1./(res*res); eig2 = -h*f0*f0/(h1*h3*gpr); jmm1 = jm-1 ; ysca = ((double) jmm1)*res ; im = (imx[numlev-1]-2)/yprocs + 2; jm = (jmx[numlev-1]-2)/xprocs + 2; if (do_output) { printf(" MULTIGRID OUTPUTS\n"); } CREATE(slave, nprocs); WAIT_FOR_END(nprocs); CLOCK(computeend) printf("\n"); printf(" PROCESS STATISTICS\n"); printf(" Total Multigrid Multigrid\n"); printf(" Proc Time Time Fraction\n"); printf(" 0 %15.0f %15.0f %10.3f\n", gp[0].total_time,gp[0].multi_time, gp[0].multi_time/gp[0].total_time); if (do_stats) { min_total = max_total = avg_total = gp[0].total_time; min_multi = max_multi = avg_multi = gp[0].multi_time; min_frac = max_frac = avg_frac = gp[0].multi_time/gp[0].total_time; for (i=1;i<nprocs;i++) { if (gp[i].total_time > max_total) { max_total = gp[i].total_time; } if (gp[i].total_time < min_total) { min_total = gp[i].total_time; } if (gp[i].multi_time > max_multi) { max_multi = gp[i].multi_time; } if (gp[i].multi_time < min_multi) { min_multi = gp[i].multi_time; } if (gp[i].multi_time/gp[i].total_time > max_frac) { max_frac = gp[i].multi_time/gp[i].total_time; } if (gp[i].multi_time/gp[i].total_time < min_frac) { min_frac = gp[i].multi_time/gp[i].total_time; } avg_total += gp[i].total_time; avg_multi += gp[i].multi_time; avg_frac += gp[i].multi_time/gp[i].total_time; } avg_total = avg_total / nprocs; avg_multi = avg_multi / nprocs; avg_frac = avg_frac / nprocs; for (i=1;i<nprocs;i++) { printf(" %3ld %15.0f %15.0f %10.3f\n", i,gp[i].total_time,gp[i].multi_time, gp[i].multi_time/gp[i].total_time); } printf(" Avg %15.0f %15.0f %10.3f\n", avg_total,avg_multi,avg_frac); printf(" Min %15.0f %15.0f %10.3f\n", min_total,min_multi,min_frac); printf(" Max %15.0f %15.0f %10.3f\n", max_total,max_multi,max_frac); } printf("\n"); global->starttime = start; printf(" TIMING INFORMATION\n"); printf("Start time : %16lu\n", global->starttime); printf("Initialization finish time : %16lu\n", global->trackstart); printf("Overall finish time : %16lu\n", computeend); printf("Total time with initialization : %16lu\n", computeend-global->starttime); printf("Total time without initialization : %16lu\n", computeend-global->trackstart); printf(" (excludes first timestep)\n"); printf("\n"); MAIN_END }