void WorkStart() /* routine that each created process starts at; it simply calls the timestep routine */ { long ProcID; double LocalXTT; LOCK(gl->IndexLock); ProcID = gl->Index++; UNLOCK(gl->IndexLock); BARINCLUDE(gl->start); BARINCLUDE(gl->InterfBar); BARINCLUDE(gl->PotengBar); ProcID = ProcID % NumProcs; /* POSSIBLE ENHANCEMENT: Here's where one might bind processes to processors if one wanted to. */ LocalXTT = MDMAIN(NSTEP,NPRINT,NSAVE,NORD1,ProcID); if (ProcID == 0) { XTT = LocalXTT; } }
VOID StartRayTrace() { INT pid; /* Our internal process id number. */ UINT begin; UINT end; THREAD_INIT_FREE(); LOCK(gm->pidlock) pid = gm->pid++; UNLOCK(gm->pidlock) BARINCLUDE(gm->start); if ((pid == 0) || (dostats)) CLOCK(begin); /* POSSIBLE ENHANCEMENT: Here's where one might lock processes down to processors if need be */ InitWorkPool(pid); InitRayTreeStack(Display.maxlevel, pid); /* * Wait for all processes to be created, initialize their work * pools, and arrive at this point; then proceed. This BARRIER * is absolutely required. Read comments in PutJob before * moving this barrier. */ BARRIER(gm->start, gm->nprocs) /* POSSIBLE ENHANCEMENT: Here's where one would RESET STATISTICS and TIMING if one wanted to measure only the parallel part */ // Reset Models CarbonEnableModels(); RayTrace(pid); if ((pid == 0) || (dostats)) { CLOCK(end); gm->partime[pid] = (end - begin) & 0x7FFFFFFF; if (pid == 0) gm->par_start_time = begin; } }
void radiosity() { long process_id; long rad_start, refine_done, vertex_start, vertex_done; THREAD_INIT_FREE(); LOCK(global->index_lock); process_id = global->index++; UNLOCK(global->index_lock); process_id = process_id % n_processors; BARINCLUDE(global->barrier); if ((process_id == 0) || (dostats)) CLOCK(rad_start); /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to processors to avoid migration */ /* POSSIBLE ENHANCEMENT: Here is where one might reset the statistics that one is measuring about the parallel execution */ // Enable Modeling CarbonEnableModels(); /* Decompose model objects into patches and build the BSP tree */ /* Create the initial tasks */ init_modeling_tasks(process_id) ; process_tasks(process_id) ; /* Gather rays & do BF refinement */ while( init_ray_tasks(process_id) ) { /* Wait till tasks are put in the queue */ BARRIER(global->barrier, n_processors); /* Then perform ray-gathering and BF-refinement till the solution converges */ process_tasks(process_id) ; } if ((process_id == 0) || (dostats)) CLOCK(refine_done); BARRIER(global->barrier, n_processors); if ((process_id == 0) || (dostats)) CLOCK(vertex_start); /* Compute area-weighted radiosity value at each vertex */ init_radavg_tasks( RAD_AVERAGING_MODE, process_id ) ; process_tasks(process_id) ; /* Then normalize the radiosity at vertices */ init_radavg_tasks( RAD_NORMALIZING_MODE, process_id ) ; process_tasks(process_id) ; if ((process_id == 0) || (dostats)) CLOCK(vertex_done); if ((process_id == 0) || (dostats)) { timing[process_id]->rad_start = rad_start; timing[process_id]->rad_time = vertex_done - rad_start; timing[process_id]->refine_time = refine_done - rad_start; timing[process_id]->vertex_time = vertex_done - vertex_start; timing[process_id]->wait_time = vertex_start - refine_done; } // Disable Models CarbonDisableModels(); }
void SlaveStart() { long i; long MyNum; double *upriv; long initdone; long finish; long l_transtime=0; long MyFirst; long MyLast; LOCK(Global->idlock); MyNum = Global->id; Global->id++; UNLOCK(Global->idlock); BARINCLUDE(Global->start); /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to processors to avoid migration */ BARRIER(Global->start, P); upriv = (double *) malloc(2*(rootN-1)*sizeof(double)); if (upriv == NULL) { fprintf(stderr,"Proc %ld could not malloc memory for upriv\n",MyNum); exit(-1); } for (i=0;i<2*(rootN-1);i++) { upriv[i] = umain[i]; } MyFirst = rootN*MyNum/P; MyLast = rootN*(MyNum+1)/P; TouchArray(x, trans, umain2, upriv, MyFirst, MyLast); BARRIER(Global->start, P); /* POSSIBLE ENHANCEMENT: Here is where one might reset the statistics that one is measuring about the parallel execution */ if ((MyNum == 0) || (dostats)) { CLOCK(initdone); } /* perform forward FFT */ FFT1D(1, M, N, x, trans, upriv, umain2, MyNum, &l_transtime, MyFirst, MyLast, pad_length, test_result, dostats); /* perform backward FFT */ if (test_result) { FFT1D(-1, M, N, x, trans, upriv, umain2, MyNum, &l_transtime, MyFirst, MyLast, pad_length, test_result, dostats); } if ((MyNum == 0) || (dostats)) { CLOCK(finish); Global->transtimes[MyNum] = l_transtime; Global->totaltimes[MyNum] = finish-initdone; } if (MyNum == 0) { Global->finishtime = finish; Global->initdonetime = initdone; } }
void ParallelExecute () { long my_id; long num_boxes; unsigned long start, finish = 0; time_info *local_time; long time_all = 0; time_info *timing; unsigned long local_init_done = 0; BARINCLUDE(G_Memory->synch); local_time = (time_info *) malloc(sizeof(struct _Time_Info) * MAX_TIME_STEPS); BARRIER(G_Memory->synch, Number_Of_Processors); LOCK(G_Memory->count_lock); my_id = G_Memory->id; G_Memory->id++; UNLOCK(G_Memory->count_lock); /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to processors to avoid migration */ if (my_id == 0) { time_all = 1; } else if (do_stats) { time_all = 1; } if (my_id == 0) { /* have to allocate extra space since it will construct the grid by * itself for the first time step */ CreateParticleList(my_id, Total_Particles); InitParticleList(my_id, Total_Particles, 0); } else { CreateParticleList(my_id, ((Total_Particles * PDF) / Number_Of_Processors)); InitParticleList(my_id, 0, 0); } // num_boxes = 1333 * (Total_Particles / (OCCUPANCY * MAX_PARTICLES_PER_BOX)) /1000; num_boxes = 1333 * 4 * Total_Particles / (3 * MAX_PARTICLES_PER_BOX * 1000 ); if (my_id == 0) CreateBoxes(my_id, TOLERANCE * num_boxes); else CreateBoxes(my_id, TOLERANCE * num_boxes * BDF / Number_Of_Processors); if (my_id == 0) { LockedPrint("Starting FMM with %d processor%s\n", Number_Of_Processors, (Number_Of_Processors == 1) ? "" : "s"); } BARRIER(G_Memory->synch, Number_Of_Processors); Local[my_id].Time = 0.0; for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) { if (MY_TIME_STEP == 2) { /* POSSIBLE ENHANCEMENT: Here is where one might reset the statistics that one is measuring about the parallel execution */ } if (MY_TIME_STEP == 2) { if (do_stats || my_id == 0) { CLOCK(local_init_done); } } if (MY_TIME_STEP == 0) { CLOCK(start); } else start = finish; ConstructGrid(my_id,local_time,time_all); ConstructLists(my_id,local_time,time_all); PartitionGrid(my_id,local_time,time_all); StepSimulation(my_id,local_time,time_all); DestroyGrid(my_id,local_time,time_all); CLOCK(finish); Local[my_id].Time += Timestep_Dur; MY_TIMING[MY_TIME_STEP].total_time = finish - start; } if (my_id == 0) { CLOCK(endtime); } BARRIER(G_Memory->synch, Number_Of_Processors); for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) { timing = &(MY_TIMING[MY_TIME_STEP]); timing->other_time = local_time[MY_TIME_STEP].other_time; timing->construct_time = local_time[MY_TIME_STEP].construct_time; timing->list_time = local_time[MY_TIME_STEP].list_time; timing->partition_time = local_time[MY_TIME_STEP].partition_time; timing->pass_time = local_time[MY_TIME_STEP].pass_time; timing->inter_time = local_time[MY_TIME_STEP].inter_time; timing->barrier_time = local_time[MY_TIME_STEP].barrier_time; timing->intra_time = local_time[MY_TIME_STEP].intra_time; } Local[my_id].init_done_times = local_init_done; BARRIER(G_Memory->synch, Number_Of_Processors); }
void Render_Loop() { long step,i; PIXEL *local_image_address; MPIXEL *local_mask_image_address; char outfile[FILENAME_STRING_SIZE]; long image_partition,mask_image_partition; float inv_num_nodes; long my_node; THREAD_INIT_FREE(); LOCK(Global->IndexLock); my_node = Global->Index++; UNLOCK(Global->IndexLock); my_node = my_node%num_nodes; BARINCLUDE(Global->TimeBarrier); BARINCLUDE(Global->SlaveBarrier); /* POSSIBLE ENHANCEMENT: Here's where one might bind the process to a processor, if one wanted to. */ // Reset Models Here CarbonEnableModels(); inv_num_nodes = 1.0/(float)num_nodes; image_partition = ROUNDUP(image_length*inv_num_nodes); mask_image_partition = ROUNDUP(mask_image_length*inv_num_nodes); #ifdef DIM for (dim=0; dim<NM; dim++) { #endif for (step=0; step<ROTATE_STEPS; step++) { /* do rotation sequence */ /* POSSIBLE ENHANCEMENT: Here is where one might reset statistics, if one wanted to. */ frame = step; /* initialize images here */ local_image_address = image_address + image_partition * my_node; local_mask_image_address = mask_image_address + mask_image_partition * my_node; BARRIER(Global->SlaveBarrier,num_nodes); if (my_node == num_nodes-1) { for (i=image_partition*my_node; i<image_length; i++) *local_image_address++ = background; if (adaptive) for (i=mask_image_partition*my_node; i<mask_image_length; i++) *local_mask_image_address++ = NULL_PIXEL; } else { for (i=0; i<image_partition; i++) *local_image_address++ = background; if (adaptive) for (i=0; i<mask_image_partition; i++) *local_mask_image_address++ = NULL_PIXEL; } if (my_node == ROOT) { #ifdef DIM Select_View((float)STEP_SIZE, dim); #else Select_View((float)STEP_SIZE, Y); #endif } BARRIER(Global->SlaveBarrier,num_nodes); Global->Counter = num_nodes; Global->Queue[num_nodes][0] = num_nodes; Global->Queue[my_node][0] = 0; Render(my_node); if (my_node == ROOT) { if (ROTATE_STEPS > 1) { #ifdef DIM sprintf(outfile, "%s_%ld",filename, 1000+dim*ROTATE_STEPS+step); #else sprintf(outfile, "%s_%ld.tiff",filename, 1000+step); #endif /* Store_Image(outfile); p = image_address; for (zz = 0;zz < image_length;zz++) { tiff_image[zz] = (long) ((*p)*256*256*256 + (*p)*256*256 + (*p)*256 + (*p)); p++; } tiff_save_rgba(outfile,tiff_image,image_len[X],image_len[Y]); */ WriteGrayscaleTIFF(outfile, image_len[X],image_len[Y],image_len[X], image_address); } else { /* Store_Image(filename); p = image_address; for (zz = 0;zz < image_length;zz++) { tiff_image[zz] = (long) ((*p)*256*256*256 + (*p)*256*256 + (*p)*256 + (*p)); p++; } tiff_save_rgba(filename,tiff_image,image_len[X],image_len[Y]); */ strcat(filename,".tiff"); WriteGrayscaleTIFF(filename, image_len[X],image_len[Y],image_len[X], image_address); } } } #ifdef DIM } #endif }
void SlaveStart() { long i; long MyNum; double *upriv; //int a = 2*(rootN-1)*sizeof(double); //double upriv[a]; long initdone; long finish; long l_transtime=0; long MyFirst; long MyLast; BARRIER(Global->start, P); LOCK(Global->idlock); MyNum = Global->id; Global->id++; UNLOCK(Global->idlock); BARINCLUDE(Global->start); BARRIER(Global->start, P); //upriv = (double *) malloc(2*(rootN-1)*sizeof(double)); upriv = (double *) our_malloc(2*(rootN-1)*sizeof(double)); if (upriv == NULL) { fprintf(stderr,"Proc %ld could not malloc memory for upriv\n",MyNum); exit(-1); } for (i=0;i<2*(rootN-1);i++) { upriv[i] = umain[i]; } MyFirst = rootN*MyNum/P; MyLast = rootN*(MyNum+1)/P; TouchArray(x, trans, umain2, upriv, MyFirst, MyLast); BARRIER(Global->start, P); if ((MyNum == 0) || (dostats)) { CLOCK(initdone); } //printf("\nentrando em forward FFT\n"); /* perform forward FFT */ FFT1D(1, M, N, x, trans, upriv, umain2, MyNum, &l_transtime, MyFirst, MyLast, pad_length, test_result, dostats); /* perform backward FFT */ if (test_result) { FFT1D(-1, M, N, x, trans, upriv, umain2, MyNum, &l_transtime, MyFirst, MyLast, pad_length, test_result, dostats); } if ((MyNum == 0) || (dostats)) { CLOCK(finish); Global->transtimes[MyNum] = l_transtime; Global->totaltimes[MyNum] = finish-initdone; } if (MyNum == 0) { Global->finishtime = finish; Global->initdonetime = initdone; } join_point(&myJoinPoint); }