void DestroyGrid (long my_id, time_info *local_time, long time_all) { box *b_scan, *tb; particle *p; long i; long particle_cost; unsigned long start = 0, finish; if (time_all) CLOCK(start); b_scan = Local[my_id].Childless_Partition; MY_NUM_PARTICLES = 0; while (b_scan != NULL) { tb = b_scan; b_scan = b_scan->next; particle_cost = tb->cost / tb->num_particles; for (i = 0; i < tb->num_particles; i++) { if (MY_MAX_PARTICLES <= MY_NUM_PARTICLES) { LockedPrint("ERROR (P%d) : Too many particles in local array\n", my_id); exit(-1); } p = tb->particles[i]; p->cost = particle_cost; MY_PARTICLES[MY_NUM_PARTICLES++] = p; } } if (my_id == 0) Grid = NULL; if (time_all) { CLOCK(finish); local_time[MY_TIME_STEP].other_time += finish - start; } }
void ListIterate (long my_id, box *b, box **list, long length, list_function function) { long i; for (i = 0; i < length; i++) { if (list[i] == NULL) { LockedPrint("ERROR (P%d) : NULL list entry\n", my_id); exit(-1); } (*function)(my_id, list[i], b); } }
/* * InitBox (long my_id, real x_center, real y_center, real length, long level, box *parent) * * Args : the x_center and y_center of the center of the box; * the length of the box; * the level of the box; * the address of b's parent. * * Returns : the address of the newly created box. * * Side Effects : Initializes num_particles to 0, all other pointers to NULL, * and sets the box ID to a unique number. It also creates the space for * the two expansion arrays. * */ box * InitBox (long my_id, real x_center, real y_center, real length, box *parent) { box *b; if (Local[my_id].Index_B_Heap == Local[my_id].Max_B_Heap) { LockedPrint("ERROR (P%d) : Ran out of boxes\n", my_id); exit(-1); } b = &Local[my_id].B_Heap[Local[my_id].Index_B_Heap++]; b->x_center = x_center; b->y_center = y_center; b->length = length; b->parent = parent; if (parent == NULL) b->level = 0; else b->level = parent->level + 1; return b; }
void ParallelExecute () { long my_id; long num_boxes; unsigned long start, finish = 0; time_info *local_time; long time_all = 0; time_info *timing; unsigned long local_init_done = 0; BARINCLUDE(G_Memory->synch); local_time = (time_info *) malloc(sizeof(struct _Time_Info) * MAX_TIME_STEPS); BARRIER(G_Memory->synch, Number_Of_Processors); LOCK(G_Memory->count_lock); my_id = G_Memory->id; G_Memory->id++; UNLOCK(G_Memory->count_lock); /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to processors to avoid migration */ if (my_id == 0) { time_all = 1; } else if (do_stats) { time_all = 1; } if (my_id == 0) { /* have to allocate extra space since it will construct the grid by * itself for the first time step */ CreateParticleList(my_id, Total_Particles); InitParticleList(my_id, Total_Particles, 0); } else { CreateParticleList(my_id, ((Total_Particles * PDF) / Number_Of_Processors)); InitParticleList(my_id, 0, 0); } // num_boxes = 1333 * (Total_Particles / (OCCUPANCY * MAX_PARTICLES_PER_BOX)) /1000; num_boxes = 1333 * 4 * Total_Particles / (3 * MAX_PARTICLES_PER_BOX * 1000 ); if (my_id == 0) CreateBoxes(my_id, TOLERANCE * num_boxes); else CreateBoxes(my_id, TOLERANCE * num_boxes * BDF / Number_Of_Processors); if (my_id == 0) { LockedPrint("Starting FMM with %d processor%s\n", Number_Of_Processors, (Number_Of_Processors == 1) ? "" : "s"); } BARRIER(G_Memory->synch, Number_Of_Processors); Local[my_id].Time = 0.0; for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) { if (MY_TIME_STEP == 2) { /* POSSIBLE ENHANCEMENT: Here is where one might reset the statistics that one is measuring about the parallel execution */ } if (MY_TIME_STEP == 2) { if (do_stats || my_id == 0) { CLOCK(local_init_done); } } if (MY_TIME_STEP == 0) { CLOCK(start); } else start = finish; ConstructGrid(my_id,local_time,time_all); ConstructLists(my_id,local_time,time_all); PartitionGrid(my_id,local_time,time_all); StepSimulation(my_id,local_time,time_all); DestroyGrid(my_id,local_time,time_all); CLOCK(finish); Local[my_id].Time += Timestep_Dur; MY_TIMING[MY_TIME_STEP].total_time = finish - start; } if (my_id == 0) { CLOCK(endtime); } BARRIER(G_Memory->synch, Number_Of_Processors); for (MY_TIME_STEP = 0; MY_TIME_STEP < Time_Steps; MY_TIME_STEP++) { timing = &(MY_TIMING[MY_TIME_STEP]); timing->other_time = local_time[MY_TIME_STEP].other_time; timing->construct_time = local_time[MY_TIME_STEP].construct_time; timing->list_time = local_time[MY_TIME_STEP].list_time; timing->partition_time = local_time[MY_TIME_STEP].partition_time; timing->pass_time = local_time[MY_TIME_STEP].pass_time; timing->inter_time = local_time[MY_TIME_STEP].inter_time; timing->barrier_time = local_time[MY_TIME_STEP].barrier_time; timing->intra_time = local_time[MY_TIME_STEP].intra_time; } Local[my_id].init_done_times = local_init_done; BARRIER(G_Memory->synch, Number_Of_Processors); }