/* * InitGlobalMemory () * * Args : none. * * Returns : nothing. * * Side Effects : Allocates all the global storage for G_Memory. * */ void InitGlobalMemory () { int i; G_Memory = (g_mem *) G_MALLOC(sizeof(g_mem)); G_Memory->i_array = (int *) G_MALLOC(Number_Of_Processors * sizeof(int)); G_Memory->d_array = (double *) G_MALLOC(Number_Of_Processors * sizeof(double)); if (G_Memory == NULL) { printf("Ran out of global memory in InitGlobalMemory\n"); exit(-1); } G_Memory->count = 0; G_Memory->id = 0; LOCKINIT(G_Memory->io_lock); LOCKINIT(G_Memory->mal_lock); LOCKINIT(G_Memory->single_lock); LOCKINIT(G_Memory->count_lock); ALOCKINIT(G_Memory->lock_array, MAX_LOCKS); BARINIT(G_Memory->synch); BARINIT(G_Memory->done_barrier); G_Memory->max_x = -MAX_REAL; G_Memory->min_x = MAX_REAL; G_Memory->max_y = -MAX_REAL; G_Memory->min_y = MAX_REAL; }
int main(int argc, CHAR *argv[]) { INT i; UINT begin; UINT end; UINT lapsed; MATRIX vtrans, Vinv; /* View transformation and inverse. */ /* * First, process command line arguments. */ i = 1; while ((i < argc) && (argv[i][0] == '-')) { switch (argv[i][1]) { case '?': case 'h': case 'H': Usage(); exit(1); case 'a': case 'A': AntiAlias = TRUE; if (argv[i][2] != '\0') { NumSubRays = atoi(&argv[i][2]); } else { NumSubRays = atoi(&argv[++i][0]); } break; case 'm': if (argv[i][2] != '\0') { MaxGlobMem = atoi(&argv[i][2]); } else { MaxGlobMem = atoi(&argv[++i][0]); } break; case 'p': if (argv[i][2] != '\0') { nprocs = atoi(&argv[i][2]); } else { nprocs = atoi(&argv[++i][0]); } break; case 's': case 'S': dostats = TRUE; break; default: fprintf(stderr, "%s: Invalid option \'%c\'.\n", ProgName, argv[i][0]); exit(1); } i++; } if (i == argc) { Usage(); exit(1); } /* * Make sure nprocs is within valid range. */ if (nprocs < 1 || nprocs > MAX_PROCS) { fprintf(stderr, "%s: Valid range for #processors is [1, %d].\n", ProgName, MAX_PROCS); exit(1); } /* * Print command line parameters. */ printf("\n"); printf("Number of processors: \t%ld\n", nprocs); printf("Global shared memory size:\t%ld MB\n", MaxGlobMem); printf("Samples per pixel: \t%ld\n", NumSubRays); printf("\n"); /* * Initialize the shared memory environment and request the total * amount of amount of shared memory we might need. This * includes memory for the database, grid, and framebuffer. */ MaxGlobMem <<= 20; /* Convert MB to bytes. */ MAIN_INITENV(,MaxGlobMem + 512*1024) THREAD_INIT_FREE(); gm = (GMEM *)G_MALLOC(sizeof(GMEM)); /* * Perform shared environment initializations. */ gm->nprocs = nprocs; gm->pid = 0; gm->rid = 1; BARINIT(gm->start, nprocs) LOCKINIT(gm->pidlock) LOCKINIT(gm->ridlock) LOCKINIT(gm->memlock) ALOCKINIT(gm->wplock, nprocs) /* POSSIBLE ENHANCEMENT: Here is where one might distribute the raystruct data structure across physically distributed memories as desired. */ if (!GlobalHeapInit(MaxGlobMem)) { fprintf(stderr, "%s: Cannot initialize global heap.\n", ProgName); exit(1); } /* * Initialize HUG parameters, read environment and geometry files. */ Huniform_defaults(); ReadEnvFile(/* *argv*/argv[i]); ReadGeoFile(GeoFileName); OpenFrameBuffer(); /* * Compute view transform and its inverse. */ CreateViewMatrix(); MatrixCopy(vtrans, View.vtrans); MatrixInverse(Vinv, vtrans); MatrixCopy(View.vtransInv, Vinv); /* * Print out what we have so far. */ printf("Number of primitive objects: \t%ld\n", prim_obj_cnt); printf("Number of primitive elements:\t%ld\n", prim_elem_cnt); /* * Preprocess database into hierarchical uniform grid. */ if (TraversalType == TT_HUG) BuildHierarchy_Uniform(); /* * Now create slave processes. */ CLOCK(begin) CREATE(StartRayTrace, gm->nprocs); WAIT_FOR_END(gm->nprocs); CLOCK(end) /* * We are finished. Clean up, print statistics and run time. */ CloseFrameBuffer(PicFileName); PrintStatistics(); lapsed = (end - begin) & 0x7FFFFFFF; printf("TIMING STATISTICS MEASURED BY MAIN PROCESS:\n"); printf(" Overall start time %20lu\n", begin); printf(" Overall end time %20lu\n", end); printf(" Total time with initialization %20lu\n", lapsed); printf(" Total time without initialization %20lu\n", end - gm->par_start_time); if (dostats) { unsigned totalproctime, maxproctime, minproctime; printf("\n\n\nPER-PROCESS STATISTICS:\n"); printf("%20s%20s\n","Proc","Time"); printf("%20s%20s\n\n","","Tracing Rays"); for (i = 0; i < gm->nprocs; i++) printf("%20ld%20ld\n",i,gm->partime[i]); totalproctime = gm->partime[0]; minproctime = gm->partime[0]; maxproctime = gm->partime[0]; for (i = 1; i < gm->nprocs; i++) { totalproctime += gm->partime[i]; if (gm->partime[i] > maxproctime) maxproctime = gm->partime[i]; if (gm->partime[i] < minproctime) minproctime = gm->partime[i]; } printf("\n\n%20s%20d\n","Max = ",maxproctime); printf("%20s%20d\n","Min = ",minproctime); printf("%20s%20d\n","Avg = ",(int) (((double) totalproctime) / ((double) (1.0 * gm->nprocs)))); } MAIN_END }
void Frame() { long starttime,stoptime,exectime,i; Init_Options(); printf("*****Entering init_decomposition with num_nodes = %ld\n",num_nodes); fflush(stdout); Init_Decomposition(); printf("*****Exited init_decomposition with num_nodes = %ld\n",num_nodes); fflush(stdout); Global = (struct GlobalMemory *)NU_MALLOC(sizeof(struct GlobalMemory),0); BARINIT(Global->SlaveBarrier, num_nodes); BARINIT(Global->TimeBarrier, num_nodes); LOCKINIT(Global->IndexLock); LOCKINIT(Global->CountLock); ALOCKINIT(Global->QLock,MAX_NUMPROC+1); /* load dataset from file to each node */ #ifndef RENDER_ONLY CLOCK(starttime); Load_Map(filename); CLOCK(stoptime); mclock(stoptime,starttime,&exectime); printf("wall clock execution time to load map: %lu ms\n", exectime); #endif CLOCK(starttime); #ifndef RENDER_ONLY Compute_Normal(); #ifdef PREPROCESS Store_Normal(filename); #endif #else Load_Normal(filename); #endif CLOCK(stoptime); mclock(stoptime,starttime,&exectime); printf("wall clock execution time to compute normal: %lu ms\n", exectime); CLOCK(starttime); #ifndef RENDER_ONLY Compute_Opacity(); #ifdef PREPROCESS Store_Opacity(filename); #endif #else Load_Opacity(filename); #endif CLOCK(stoptime); mclock(stoptime,starttime,&exectime); printf("wall clock execution time to compute opacity: %lu ms\n", exectime); Compute_Pre_View(); shd_length = LOOKUP_SIZE; Allocate_Shading_Table(&shd_address,shd_length); /* allocate space for image */ image_len[X] = frust_len; image_len[Y] = frust_len; image_length = image_len[X] * image_len[Y]; Allocate_Image(&image_address,image_length); if (num_nodes == 1) { block_xlen = image_len[X]; block_ylen = image_len[Y]; num_blocks = 1; num_xblocks = 1; num_yblocks = 1; image_block = image_address; } else { num_xblocks = ROUNDUP((float)image_len[X]/(float)block_xlen); num_yblocks = ROUNDUP((float)image_len[Y]/(float)block_ylen); num_blocks = num_xblocks * num_yblocks; Lallocate_Image(&image_block,block_xlen*block_ylen); } CLOCK(starttime); #ifndef RENDER_ONLY Compute_Octree(); #ifdef PREPROCESS Store_Octree(filename); #endif #else Load_Octree(filename); #endif CLOCK(stoptime); mclock(stoptime,starttime,&exectime); printf("wall clock execution time to compute octree: %lu ms\n", exectime); #ifdef PREPROCESS return; #endif if (adaptive) { printf("1.\n"); for (i=0; i<NI; i++) { mask_image_len[i] = image_len[i]; } mask_image_length = image_length; Allocate_MImage(&mask_image_address, mask_image_length); if (num_nodes == 1) mask_image_block = (PIXEL *)mask_image_address; else Lallocate_Image(&mask_image_block, block_xlen*block_ylen); printf("2.\n"); } #ifndef RENDER_ONLY Deallocate_Map(&map_address); #endif Global->Index = NODE0; printf("\nRendering...\n"); printf("node\tframe\ttime\titime\trays\thrays\tsamples trilirped\n"); CREATE(Render_Loop, num_nodes); }
int main(int argc, char **argv) { /* default values for the control parameters of the driver */ /* are in parameters.h */ if ((argc == 2) &&((strncmp(argv[1],"-h",strlen("-h")) == 0) || (strncmp(argv[1],"-H",strlen("-H")) == 0))) { printf("Usage: WATER-NSQUARED < infile, where the contents of infile can be\nobtained from the comments at the top of water.C and the first scanf \nin main() in water.C\n\n"); exit(0); } /* POSSIBLE ENHANCEMENT: Here's where one might bind the main process (process 0) to a processor if one wanted to. Others can be bound in the WorkStart routine. */ six = stdout; /* output file */ TEMP =298.0; RHO =0.9980; CUTOFF=0.0; /* read input */ if (scanf("%lf%ld%ld%ld%ld%ld%ld%ld%ld%lf",&TSTEP, &NMOL, &NSTEP, &NORDER, &NSAVE, &NRST, &NPRINT, &NFMC,&NumProcs, &CUTOFF) != 10) fprintf(stderr,"ERROR: Usage: water < infile, which must have 10 fields, see SPLASH documentation or comment at top of water.C\n"); if (NMOL > MAXLCKS) { fprintf(stderr, "Just so you know ... Lock array in global.H has size %ld < %ld (NMOL)\n code will still run correctly but there may be lock contention\n\n", MAXLCKS, NMOL); } printf("Using %ld procs on %ld steps of %ld mols\n", NumProcs, NSTEP, NMOL); printf("Other parameters:\n\tTSTEP = %8.2e\n\tNORDER = %ld\n\tNSAVE = %ld\n",TSTEP,NORDER,NSAVE); printf("\tNRST = %ld\n\tNPRINT = %ld\n\tNFMC = %ld\n\tCUTOFF = %lf\n\n",NRST,NPRINT,NFMC,CUTOFF); /* SET UP SCALING FACTORS AND CONSTANTS */ NORD1=NORDER+1; CNSTNT(NORD1,TLC); /* sub. call to set up constants */ { /* Do memory initializations */ long pid; long mol_size = sizeof(molecule_type) * NMOL; long gmem_size = sizeof(struct GlobalMemory); /* POSSIBLE ENHANCEMENT: One might bind the first process to a processor here, even before the other (child) processes are bound later in mdmain(). */ MAIN_INITENV(,70000000,); /* macro call to initialize shared memory etc. */ THREAD_INIT_FREE(); /* allocate space for main (VAR) data structure as well as synchronization variables */ /* POSSIBLE ENHANCEMENT: One might want to allocate a process's portion of the VAR array and what it points to in its local memory */ VAR = (molecule_type *) G_MALLOC(mol_size); gl = (struct GlobalMemory *) G_MALLOC(gmem_size); /* POSSIBLE ENHANCEMENT: One might want to allocate process i's PFORCES[i] array in its local memory */ PFORCES = (double ****) G_MALLOC(NumProcs * sizeof (double ***)); { long i,j,k; for (i = 0; i < NumProcs; i++) { PFORCES[i] = (double ***) G_MALLOC(NMOL * sizeof (double **)); for (j = 0; j < NMOL; j++) { PFORCES[i][j] = (double **) G_MALLOC(NDIR * sizeof (double *)); for (k = 0; k < NDIR; k++) { PFORCES[i][j][k] = (double *) G_MALLOC(NATOM * sizeof (double)); } } } } /* macro calls to initialize synch varibles */ BARINIT(gl->start, NumProcs); BARINIT(gl->InterfBar, NumProcs); BARINIT(gl->PotengBar, NumProcs); LOCKINIT(gl->IOLock); LOCKINIT(gl->IndexLock); LOCKINIT(gl->IntrafVirLock); LOCKINIT(gl->InterfVirLock); LOCKINIT(gl->FXLock); LOCKINIT(gl->FYLock); LOCKINIT(gl->FZLock); if (NMOL < MAXLCKS) { ALOCKINIT(gl->MolLock, NMOL); } else { ALOCKINIT(gl->MolLock, MAXLCKS); } LOCKINIT(gl->KinetiSumLock); LOCKINIT(gl->PotengSumLock); /* set up control for static scheduling */ MolsPerProc = NMOL/NumProcs; StartMol[0] = 0; for (pid = 1; pid < NumProcs; pid += 1) { StartMol[pid] = StartMol[pid-1] + MolsPerProc; } StartMol[NumProcs] = NMOL; } SYSCNS(); /* sub. call to initialize system constants */ fprintf(six,"\nTEMPERATURE = %8.2f K\n",TEMP); fprintf(six,"DENSITY = %8.5f G/C.C.\n",RHO); fprintf(six,"NUMBER OF MOLECULES = %8ld\n",NMOL); fprintf(six,"NUMBER OF PROCESSORS = %8ld\n",NumProcs); fprintf(six,"TIME STEP = %8.2e SEC\n",TSTEP); fprintf(six,"ORDER USED TO SOLVE F=MA = %8ld \n",NORDER); fprintf(six,"NO. OF TIME STEPS = %8ld \n",NSTEP); fprintf(six,"FREQUENCY OF DATA SAVING = %8ld \n",NSAVE); fprintf(six,"FREQUENCY TO WRITE RST FILE= %8ld \n",NRST); fprintf(six,"SPHERICAL CUTOFF RADIUS = %8.4f ANGSTROM\n",CUTOFF); fflush(six); /* initialization routine; also reads displacements and sets up random velocities*/ INITIA(); /*.....start molecular dynamic loop */ gl->tracktime = 0; gl->intratime = 0; gl->intertime = 0; /* initialize Index to 1 so that the first created child gets id 1, not 0 */ gl->Index = 1; if (NSAVE > 0) /* not true for input decks provided */ fprintf(six,"COLLECTING X AND V DATA AT EVERY %4ld TIME STEPS \n",NSAVE); /* spawn helper processes, each getting its unique process id */ CLOCK(gl->computestart); CREATE(WorkStart, NumProcs); /* macro to make main process wait for all others to finish */ WAIT_FOR_END(NumProcs); CLOCK(gl->computeend); printf("COMPUTESTART (after initialization) = %lu\n",gl->computestart); printf("COMPUTEEND = %lu\n",gl->computeend); printf("COMPUTETIME (after initialization) = %lu\n",gl->computeend-gl->computestart); printf("Measured Time (2nd timestep onward) = %lu\n",gl->tracktime); printf("Intramolecular time only (2nd timestep onward) = %lu\n",gl->intratime); printf("Intermolecular time only (2nd timestep onward) = %lu\n",gl->intertime); printf("Other time (2nd timestep onward) = %lu\n",gl->tracktime - gl->intratime - gl->intertime); printf("\nExited Happily with XTT = %g (note: XTT value is garbage if NPRINT > NSTEP)\n", XTT); MAIN_END; } /* main.c */