void interact_cells( int center, int nbr) { for ( int i = 0; i < grid[center].num_particles; i++ ) { for ( int j = 0; j < grid[nbr].num_particles; j++ ) { apply_force( *(grid[center].members[i]), *(grid[nbr].members[j]) ); // BAD } } }
// // benchmarking program // int main( int argc, char **argv ) { if( find_option( argc, argv, "-h" ) >= 0 ) { printf( "Options:\n" ); printf( "-h to see this help\n" ); printf( "-n <int> to set the number of particles\n" ); printf( "-o <filename> to specify the output file name\n" ); return 0; } int n = read_int( argc, argv, "-n", 1000 ); char *savename = read_string( argc, argv, "-o", NULL ); FILE *fsave = savename ? fopen( savename, "w" ) : NULL; particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t) ); set_size( n ); init_particles( n, particles ); // // simulate a number of time steps // double simulation_time = read_timer( ); for( int step = 0; step < NSTEPS; step++ ) { // // compute forces // for( int i = 0; i < n; i++ ) { particles[i].ax = particles[i].ay = 0; for (int j = 0; j < n; j++ ) apply_force( particles[i], particles[j] ); } // // move particles // for( int i = 0; i < n; i++ ) move( particles[i] ); // // save if necessary // if( fsave && (step%SAVEFREQ) == 0 ) save( fsave, n, particles ); } simulation_time = read_timer( ) - simulation_time; printf( "n = %d, simulation time = %g seconds\n", n, simulation_time ); free( particles ); if( fsave ) fclose( fsave ); return 0; }
void pd::player::move_right() { m_stoped = false; m_ticks_until_stop = SDL_GetTicks(); flipped(false); pd::vec2 vec = linear_velocity(); if (vec.x < (airborne() ? max_airborne_velocity : max_velocity)) apply_force(movement_force, 0.0f); }
void Rocket::run( const vector< shared_ptr< Obstacle > >& obstacles ) { if ( ( ! hit_obstacle ) && ( ! hit_target ) ) { apply_force( dna->get_gene( gene_counter ) ); gene_counter = ( gene_counter + 1 ) % dna->get_genes_length(); update(); check_obstacles( obstacles ); theta = toDegrees( atan2f( velocity.y, velocity.x ) ); } }
void seek(PVector target, float force, float dt) { PVector desired = sub(target, location); desired = limit(desired, maxspeed); PVector steer = sub(desired, velocity); steer = mult(steer, force); steer = limit(steer, maxforce); apply_force(steer, dt); }
void pd::player::update(float dt) { apply_force(0.0f, 800.0f); m_thermal_idle_anim.update(dt); m_flamethrower_anim.update(dt); if (m_shooting) { weapon_damage_test(dt); m_energy = std::max(0.0f, m_energy - dt * 0.35f); if (m_energy == 0.0f) m_shooting = false; } }
// // This is where the action happens // void *thread_routine( void *pthread_id ) { int thread_id = *(int*)pthread_id; int particles_per_thread = (n + n_threads - 1) / n_threads; int first = min( thread_id * particles_per_thread, n ); int last = min( (thread_id+1) * particles_per_thread, n ); // // simulate a number of time steps // for( int step = 0; step < NSTEPS; step++ ) { // // compute forces // for( int i = first; i < last; i++ ) { particles[i].ax = particles[i].ay = 0; for (int j = 0; j < n; j++ ) apply_force( particles[i], particles[j] ); } pthread_barrier_wait( &barrier ); // // move particles // for( int i = first; i < last; i++ ) move( particles[i] ); pthread_barrier_wait( &barrier ); // // save if necessary // if( thread_id == 0 && fsave && (step%SAVEFREQ) == 0 ) save( fsave, n, particles ); } return NULL; }
void Step(float dt) { if (impulse[0]) force[0]+=impulse[0]; if (impulse[1]) force[1]+=impulse[1]; if (impulse[2]) force[2]+=impulse[2]; float weight; weight = mass * gravity[1]; impulse[0] = impulse[0] * 0.9f; if (abs(impulse[0])<0.001f) impulse[0] = 0.0f; impulse[1] = impulse[1] * 0.9f; if (abs(impulse[1])<0.001f) impulse[1] = 0.0f; impulse[2] = impulse[2] * 0.9f; if (abs(impulse[2])<0.001f) impulse[2] = 0.0f; if (!active) { return; } apply_force(&accel[0], &force[0], mass); if (active) accel[0] += gravity[0]; if (active) accel[1] += gravity[1]; if (active) accel[2] += gravity[2]; apply_acceleration(&vel[0], &accel[0], dt); vel[0]*= friction; vel[1]*= friction; vel[2]*= friction; apply_velocity(&pos[0], &vel[0], dt); momentum[0] = mass * vel[0]; momentum[1] = mass * vel[1]; momentum[2] = mass * vel[2]; }
// // benchmarking program // int main( int argc, char **argv ) { if( find_option( argc, argv, "-h" ) >= 0 ) { printf( "Options:\n" ); printf( "-h to see this help\n" ); printf( "-n <int> to set the number of particles\n" ); printf( "-s <int> to set the number of steps in the simulation\n"); printf( "-o <filename> to specify the output file name\n" ); printf( "-f <int> to set the frequency of saving particle coordinates (e.g. each ten's step)"); return 0; } int n = read_int(argc, argv, "-n", 1000); int s = read_int(argc, argv, "-s", NSTEPS); int f = read_int(argc, argv, "-f", SAVEFREQ); char *savename = read_string( argc, argv, "-o", NULL ); FILE *fsave = savename ? fopen( savename, "w" ) : NULL; particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t) ); set_size( n ); init_particles( n, particles ); prtcl::GridHashSet* grid = new prtcl::GridHashSet(n, size, cutoff); insert_into_grid(n, particles, grid); // // simulate a number of time steps // double simulation_time = read_timer( ); for( int step = 0; step < s; step++ ) { // // compute forces // for (int i = 0; i < n; ++i) { particles[i].ax = particles[i].ay = 0; // Iterate over all neighbors in the surrounding of current particle. // This should be constant w.r.t. n. prtcl::GridHashSet::surr_iterator neighbors_it; prtcl::GridHashSet::surr_iterator neighbors_it_end = grid->surr_end( particles[i]); for (neighbors_it = grid->surr_begin(particles[i]); neighbors_it != neighbors_it_end; ++neighbors_it) { apply_force(particles[i], **neighbors_it); } } // // move particles // for( int i = 0; i < n; i++ ) { move( particles[i] ); } // Update grid hash set. grid->clear(); insert_into_grid(n, particles, grid); // // save if necessary // if( fsave && (step % f) == 0 ) save( fsave, n, particles ); } simulation_time = read_timer( ) - simulation_time; printf("n = %d, steps = %d, savefreq = %d, simulation time = %g seconds\n", n, s, f, simulation_time); delete grid; free( particles ); if( fsave ) fclose( fsave ); return 0; }
int BinArray::Bin::Assign (particle_t& new_guy) { /* Acquire a lock on this bin, and clockwise around all neighbors */ /* If lock is unavailable, return failure */ if ( 0 /* Fail */ ) return -1; // Compute forces with other particles in this bin, if already assigned /* This section requires a write lock on this bin's particles */ if ( !particles.empty() ) for ( std::list<particle_t*>::iterator i = particles.begin() ; i != particles.end(); ++i ) { apply_force ( **i, new_guy ); apply_force ( new_guy, **i ); } // Add this particle to local list; particles.push_back(&new_guy); // Mark neighbors for (int i = 0; i < 9; ++i) if (i != 4) { (*(neighbors + i))->Mark(this); /* No longer need a write lock on this neighbor's markings. * Still need a write lock on their particles */ } /* Still need a write lock on my own particles and markings */ if ( !markings.empty() ) { // Compute forces _from_ other particles in adjacent bins for ( std::list<Bin*>::iterator b = markings.begin() ; b != markings.end(); ++b ) /* Bin b's particles must be locked */ for ( std::list<particle_t*>::iterator i = (*b)->particles.begin() ; i != (*b)->particles.end(); ++i ) /* My particles must be locked too */ apply_force ( new_guy, **i ); /* Release write lock on this bin's particles - no more changes to * new_guy from this call. * Maintain lock on my markings */ // Compute forces _on_ other particles in adjacent bins for ( std::list<Bin*>::iterator b = markings.begin() ; b != markings.end(); ++b ) /* Bin b's particles must still be locked */ for ( std::list<particle_t*>::iterator i = (*b)->particles.begin() ; i != (*b)->particles.end(); ++i ) { apply_force ( **i, new_guy ); } /* Release lock on b's particles */ } /* Release lock on my markings */ /* Since we hold a lock on b's particles and my markings until the end, * probably better not to bother with random unlockings - not much can * really happen */ return 0; }
// // This is the subblock force computing routine // void compute_sub_forces( int blockId, int bIdx, int bIdy ) { // Compute Forces double leftBnd, rightBnd, topBnd, botBnd; double leftDist, rightDist, topDist, botDist; int bLeft, bRight, bBottom, bTop, bTopLeft, bTopRight, bBotLeft, bBotRight; for (int i = 0; i<bin_part_num[blockId]; ++i) { for (int j = 0; j<bin_part_num[blockId]; ++j) { // The jth particle in bth apply_force(*bins[blockId*MAX_PART_SUBB + i], *bins[blockId*MAX_PART_SUBB + j]); } leftBnd = bIdx*subblks_size; rightBnd = (bIdx*subblks_size) + subblks_size; topBnd = bIdy*subblks_size; botBnd = bIdy*subblks_size + subblks_size; leftDist = bins[blockId*MAX_PART_SUBB + i]->x - leftBnd; rightDist = rightBnd - bins[blockId*MAX_PART_SUBB + i]->x; topDist = bins[blockId*MAX_PART_SUBB + i]->y - topBnd; botDist = botBnd - bins[blockId*MAX_PART_SUBB + i]->y; // Consider 8 different adjacent subBlocks if (leftDist<=cutoff && bIdx != 0) { bLeft = blockId - totblks_num; for (int k=0; k<bin_part_num[bLeft]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bLeft*MAX_PART_SUBB + k]); } } //2 if (rightDist<=cutoff && bIdx != totblks_num-1) { bRight = blockId + totblks_num; for (int k=0; k<bin_part_num[bRight]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bRight*MAX_PART_SUBB + k]); } } //3 if (topDist<=cutoff && bIdy != 0) { bTop = blockId - 1; for (int k=0; k<bin_part_num[bTop]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bTop*MAX_PART_SUBB + k]); } } //4 if (botDist<=cutoff && bIdy != totblks_num-1) { bBottom = blockId + 1; for (int k=0; k<bin_part_num[bBottom]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bBottom*MAX_PART_SUBB + k]); } } //5 if (topDist<=cutoff && leftDist<=cutoff && bIdy != 0 && bIdx !=0) { bTopLeft = blockId - totblks_num - 1; for (int k=0; k<bin_part_num[bTopLeft]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bTopLeft*MAX_PART_SUBB + k]); } } //6 if (botDist<=cutoff && leftDist<=cutoff && bIdy != totblks_num-1 && bIdx != 0) { bBotLeft = blockId-totblks_num+1; for (int k=0; k<bin_part_num[bBotLeft]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bBotLeft*MAX_PART_SUBB + k]); } } //7 if (topDist<=cutoff && rightDist<=cutoff && bIdy != 0 && bIdx != totblks_num-1) { bTopRight = blockId+totblks_num-1; for (int k=0; k<bin_part_num[bTopRight]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bTopRight*MAX_PART_SUBB + k]); } } //8 if (botDist<=cutoff && rightDist<=cutoff && bIdy!=totblks_num-1 && bIdx!=totblks_num-1) { bBotRight = blockId+totblks_num+1; for (int k=0; k<bin_part_num[bBotRight]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bBotRight*MAX_PART_SUBB + k]); } } } }
// // This is where we compute forces for a given block // void compute_sup_forces( int sup_blockId, int step ) { int xIdx = (sup_blockId / blks_num) * subblks_num; int xIdy = (sup_blockId % blks_num) * subblks_num; int bIdx, bIdy, blockId; double leftBnd, rightBnd, topBnd, botBnd; double leftDist, rightDist, topDist, botDist; int bLeft, bRight, bBottom, bTop, bTopLeft, bTopRight, bBotLeft, bBotRight; //handle the middle for (int xsub = 0; xsub < subblks_num; ++xsub) { bIdx = xIdx + xsub; for (int ysub = 0; ysub < subblks_num; ++ysub) { bIdy = xIdy + ysub; blockId = bIdx * totblks_num + bIdy; //compute_sub_forces(blockId, bIdx, bIdy); for (int i = 0; i<bin_part_num[blockId]; ++i) { for (int j = 0; j<bin_part_num[blockId]; ++j) { apply_force(*bins[blockId*MAX_PART_SUBB + i], *bins[blockId*MAX_PART_SUBB + j]); } leftBnd = bIdx*subblks_size; rightBnd = (bIdx*subblks_size) + subblks_size; topBnd = bIdy*subblks_size; botBnd = bIdy*subblks_size + subblks_size; leftDist = bins[blockId*MAX_PART_SUBB + i]->x - leftBnd; rightDist = rightBnd - bins[blockId*MAX_PART_SUBB + i]->x; topDist = bins[blockId*MAX_PART_SUBB + i]->y - topBnd; botDist = botBnd - bins[blockId*MAX_PART_SUBB + i]->y; if (leftDist < 0) printf("leftDist <0 \n"); if (rightDist < 0) printf("rightDist <0 \n"); if (botDist < 0) printf("botDist <0 \n"); if (topDist < 0) printf("topDist <0 \n"); // Consider 8 different adjacent subBlocks if (leftDist<=cutoff && bIdx != 0) { bLeft = blockId - totblks_num; for (int k=0; k<bin_part_num[bLeft]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bLeft*MAX_PART_SUBB + k]); } } //2 if (rightDist<=cutoff && bIdx != totblks_num-1) { bRight = blockId + totblks_num; for (int k=0; k<bin_part_num[bRight]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bRight*MAX_PART_SUBB + k]); } } //3 if (topDist<=cutoff && bIdy != 0) { bTop = blockId - 1; for (int k=0; k<bin_part_num[bTop]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bTop*MAX_PART_SUBB + k]); } } //4 if (botDist<=cutoff && bIdy != totblks_num-1) { bBottom = blockId + 1; for (int k=0; k<bin_part_num[bBottom]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bBottom*MAX_PART_SUBB + k]); } } //5 if (topDist<=cutoff && leftDist<=cutoff && bIdy != 0 && bIdx !=0) { bTopLeft = blockId - totblks_num - 1; for (int k=0; k<bin_part_num[bTopLeft]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bTopLeft*MAX_PART_SUBB + k]); } } //6 if (botDist<=cutoff && leftDist<=cutoff && bIdy != totblks_num-1 && bIdx != 0) { bBotLeft = blockId-totblks_num+1; for (int k=0; k<bin_part_num[bBotLeft]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bBotLeft*MAX_PART_SUBB + k]); } } //7 if (topDist<=cutoff && rightDist<=cutoff && bIdy != 0 && bIdx != totblks_num-1) { bTopRight = blockId+totblks_num-1; for (int k=0; k<bin_part_num[bTopRight]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bTopRight*MAX_PART_SUBB + k]); } } //8 if (botDist<=cutoff && rightDist<=cutoff && bIdy!=totblks_num-1 && bIdx!=totblks_num-1) { bBotRight = blockId+totblks_num+1; for (int k=0; k<bin_part_num[bBotRight]; ++k) { apply_force(*bins[blockId*MAX_PART_SUBB + i],*bins[bBotRight*MAX_PART_SUBB + k]); } } } } } }
// // benchmarking program // int main( int argc, char **argv ) { int navg, nabsavg=0; double dmin, absmin=1.0,davg,absavg=0.0; double rdavg,rdmin; int rnavg; // // process command line parameters // if( find_option( argc, argv, "-h" ) >= 0 ) { printf( "Options:\n" ); printf( "-h to see this help\n" ); printf( "-n <int> to set the number of particles\n" ); printf( "-o <filename> to specify the output file name\n" ); printf( "-s <filename> to specify a summary file name\n" ); printf( "-no turns off all correctness checks and particle output\n"); return 0; } int n = read_int( argc, argv, "-n", 1000 ); char *savename = read_string( argc, argv, "-o", NULL ); char *sumname = read_string( argc, argv, "-s", NULL ); // // set up MPI // int n_proc, rank; MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &n_proc ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); // // allocate generic resources // FILE *fsave = savename && rank == 0 ? fopen( savename, "w" ) : NULL; FILE *fsum = sumname && rank == 0 ? fopen ( sumname, "a" ) : NULL; particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t) ); MPI_Datatype PARTICLE; MPI_Type_contiguous( 6, MPI_DOUBLE, &PARTICLE ); MPI_Type_commit( &PARTICLE ); // // set up the data partitioning across processors // int particle_per_proc = (n + n_proc - 1) / n_proc; int *partition_offsets = (int*) malloc( (n_proc+1) * sizeof(int) ); for( int i = 0; i < n_proc+1; i++ ) partition_offsets[i] = min( i * particle_per_proc, n ); int *partition_sizes = (int*) malloc( n_proc * sizeof(int) ); for( int i = 0; i < n_proc; i++ ) partition_sizes[i] = partition_offsets[i+1] - partition_offsets[i]; // // allocate storage for local partition // int nlocal = partition_sizes[rank]; particle_t *local = (particle_t*) malloc( nlocal * sizeof(particle_t) ); // // initialize and distribute the particles (that's fine to leave it unoptimized) // set_size( n ); if( rank == 0 ) init_particles( n, particles ); MPI_Scatterv( particles, partition_sizes, partition_offsets, PARTICLE, local, nlocal, PARTICLE, 0, MPI_COMM_WORLD ); // // simulate a number of time steps // double simulation_time = read_timer( ); for( int step = 0; step < NSTEPS; step++ ) { navg = 0; dmin = 1.0; davg = 0.0; // // collect all global data locally (not good idea to do) // MPI_Allgatherv( local, nlocal, PARTICLE, particles, partition_sizes, partition_offsets, PARTICLE, MPI_COMM_WORLD ); // // save current step if necessary (slightly different semantics than in other codes) // if( find_option( argc, argv, "-no" ) == -1 ) if( fsave && (step%SAVEFREQ) == 0 ) save( fsave, n, particles ); // // compute all forces // for( int i = 0; i < nlocal; i++ ) { local[i].ax = local[i].ay = 0; for (int j = 0; j < n; j++ ) apply_force( local[i], particles[j], &dmin, &davg, &navg ); } if( find_option( argc, argv, "-no" ) == -1 ) { MPI_Reduce(&davg,&rdavg,1,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&navg,&rnavg,1,MPI_INT,MPI_SUM,0,MPI_COMM_WORLD); MPI_Reduce(&dmin,&rdmin,1,MPI_DOUBLE,MPI_MIN,0,MPI_COMM_WORLD); if (rank == 0){ // // Computing statistical data // if (rnavg) { absavg += rdavg/rnavg; nabsavg++; } if (rdmin < absmin) absmin = rdmin; } } // // move particles // for( int i = 0; i < nlocal; i++ ) move( local[i] ); } simulation_time = read_timer( ) - simulation_time; if (rank == 0) { printf( "n = %d, simulation time = %g seconds", n, simulation_time); if( find_option( argc, argv, "-no" ) == -1 ) { if (nabsavg) absavg /= nabsavg; // // -the minimum distance absmin between 2 particles during the run of the simulation // -A Correct simulation will have particles stay at greater than 0.4 (of cutoff) with typical values between .7-.8 // -A simulation were particles don't interact correctly will be less than 0.4 (of cutoff) with typical values between .01-.05 // // -The average distance absavg is ~.95 when most particles are interacting correctly and ~.66 when no particles are interacting // printf( ", absmin = %lf, absavg = %lf", absmin, absavg); if (absmin < 0.4) printf ("\nThe minimum distance is below 0.4 meaning that some particle is not interacting"); if (absavg < 0.8) printf ("\nThe average distance is below 0.8 meaning that most particles are not interacting"); } printf("\n"); // // Printing summary data // if( fsum) fprintf(fsum,"%d %d %g\n",n,n_proc,simulation_time); } // // release resources // if ( fsum ) fclose( fsum ); free( partition_offsets ); free( partition_sizes ); free( local ); free( particles ); if( fsave ) fclose( fsave ); MPI_Finalize( ); return 0; }
// An old main(), including a serial bottleneck. I've left it here for // now for benchmarking purposes. int bottlenecked_main(int argc, char **argv) { int numthreads; if( find_option( argc, argv, "-h" ) >= 0 ) { printf( "Options:\n" ); printf( "-h to see this help\n" ); printf( "-n <int> to set number of particles\n" ); printf( "-o <filename> to specify the output file name\n" ); printf( "-s <filename> to specify a summary file name\n" ); printf( "-no turns off all correctness checks and particle output\n"); printf( "-p <int> to set the (maximum) number of threads used\n"); return 0; } const int n = read_int( argc, argv, "-n", 1000 ); const bool fast = (find_option( argc, argv, "-no" ) != -1); const char *savename = read_string( argc, argv, "-o", NULL ); const char *sumname = read_string( argc, argv, "-s", NULL ); const int num_threads_override = read_int( argc, argv, "-p", 0); FILE *fsave = savename ? fopen( savename, "w" ) : NULL; FILE *fsum = sumname ? fopen ( sumname, "a" ) : NULL; const double size = set_size( n ); // We need to set the size of a grid square so that the average number of // particles per grid square is constant. The simulation already ensures // that the average number of particles in an arbitrary region is constant // and proportional to the area. So this is just a constant. const double grid_square_size = sqrt(0.0005) + 0.000001; const int num_grid_squares_per_side = size / grid_square_size; printf("Using %d grid squares of side-length %f for %d particles.\n", num_grid_squares_per_side*num_grid_squares_per_side, grid_square_size, n); std::unique_ptr<std::vector<particle_t> > particles = init_particles(n); if (num_threads_override > 0) { omp_set_dynamic(0); omp_set_num_threads(num_threads_override); } // // simulate a number of time steps // double simulation_time = read_timer( ); int max_num_threads = omp_get_max_threads(); // User-defined reductions aren't available in the version of OMP we're // using. Instead, we accumulate per-thread stats in this global array // and reduce manually when we're done. Stats per_thread_stats[max_num_threads]; // Shared across threads. std::unique_ptr<Grid> g(new Grid(size, num_grid_squares_per_side)); #pragma omp parallel { numthreads = omp_get_num_threads(); for (int step = 0; step < 1000; step++) { //TODO: Does this need to be declared private? int thread_idx; #pragma omp single g.reset(new Grid(size, num_grid_squares_per_side, *particles)); //TODO: Could improve data locality by blocking according to the block // structure of the grid. That would require keeping track, dynamically, // of the locations of each particle. It would be interesting to test // whether manually allocating sub-blocks (as in the distributed memory // code) to threads improves things further. #pragma omp for for (int i = 0; i < n; i++) { thread_idx = omp_get_thread_num(); particle_t& p = (*particles)[i]; p.ax = p.ay = 0; std::unique_ptr<SimpleIterator<particle_t&> > neighbors = (*g).neighbor_iterator(p); while (neighbors->hasNext()) { particle_t& neighbor = neighbors->next(); apply_force(p, neighbor, per_thread_stats[thread_idx]); } } // There is an implicit barrier here, which is important for correctness. // (Technically, some asynchrony could be allowed: A thread's sub-block // can be moved once it receives force messages from its neighboring // sub-blocks.) // // move particles // #pragma omp for for (int i = 0; i < n; i++) { move((*particles)[i]); } if (!fast) { // // save if necessary // #pragma omp master if( fsave && (step%SAVEFREQ) == 0 ) { save( fsave, n, (*particles).data() ); } } } } simulation_time = read_timer( ) - simulation_time; // Could do a tree reduce here, but it seems unnecessary. Stats overall_stats; for (int thread_idx = 0; thread_idx < max_num_threads; thread_idx++) { overall_stats.aggregate_left(per_thread_stats[thread_idx]); } printf( "n = %d,threads = %d, simulation time = %g seconds", n,numthreads, simulation_time); if (!fast) { // // -the minimum distance absmin between 2 particles during the run of the simulation // -A Correct simulation will have particles stay at greater than 0.4 (of cutoff) with typical values between .7-.8 // -A simulation were particles don't interact correctly will be less than 0.4 (of cutoff) with typical values between .01-.05 // // -The average distance absavg is ~.95 when most particles are interacting correctly and ~.66 when no particles are interacting // printf( ", absmin = %lf, absavg = %lf", overall_stats.min, overall_stats.avg); if (overall_stats.min < 0.4) printf ("\nThe minimum distance is below 0.4 meaning that some particle is not interacting"); if (overall_stats.avg < 0.8) printf ("\nThe average distance is below 0.8 meaning that most particles are not interacting"); } printf("\n"); // // Printing summary data // if( fsum) fprintf(fsum,"%d %d %g\n",n,numthreads,simulation_time); // // Clearing space // if( fsum ) fclose( fsum ); if( fsave ) fclose( fsave ); return 0; }
// // benchmarking program // int main(int argc, char **argv) { if( find_option( argc, argv, "-h" ) >= 0 ) { printf( "Options:\n" ); printf( "-h to see this help\n" ); printf( "-n <int> to set number of particles\n" ); printf( "-o <filename> to specify the output file name\n" ); printf( "-s <filename> to specify a summary file name\n" ); printf( "-no turns off all correctness checks and particle output\n"); printf( "-p <int> to set the (maximum) number of threads used\n"); return 0; } const int n = read_int( argc, argv, "-n", 1000 ); const bool fast = (find_option( argc, argv, "-no" ) != -1); const char *savename = read_string( argc, argv, "-o", NULL ); const char *sumname = read_string( argc, argv, "-s", NULL ); const int num_threads_override = read_int( argc, argv, "-p", 0); FILE *fsave = ((!fast) && savename) ? fopen( savename, "w" ) : NULL; FILE *fsum = sumname ? fopen ( sumname, "a" ) : NULL; const double size = set_size( n ); // We need to set the size of a grid square so that the average number of // particles per grid square is constant. The simulation already ensures // that the average number of particles in an arbitrary region is constant // and proportional to the area. So this is just a constant. const double grid_square_size = sqrt(0.0005) + 0.000001; const int num_grid_squares_per_side = size / grid_square_size; printf("Using %d grid squares of side-length %f for %d particles.\n", num_grid_squares_per_side*num_grid_squares_per_side, grid_square_size, n); std::unique_ptr<std::vector<particle_t> > particles = init_particles(n); if (num_threads_override > 0) { omp_set_dynamic(0); // fixed number of threads omp_set_num_threads(num_threads_override); // assign number of threads } // // simulate a number of time steps // double simulation_time = read_timer( ); int max_num_threads = omp_get_max_threads(); int num_actual_threads; // User-defined reductions aren't available in the version of OMP we're // using. Instead, we accumulate per-thread stats in this global array // and reduce manually when we're done. Stats per_thread_stats[max_num_threads]; // Shared across threads. std::unique_ptr<OmpThreadsafeGrid> old_grid(new OmpThreadsafeGrid(size, num_grid_squares_per_side)); std::unique_ptr<OmpThreadsafeGrid> next_grid(new OmpThreadsafeGrid(size, num_grid_squares_per_side)); #pragma omp parallel { #pragma omp atomic write num_actual_threads = omp_get_num_threads(); //get number of actual threads int thread_idx = omp_get_thread_num(); Stats thread_stats; for (int step = 0; step < 1000; step++) { // If this is the first step, we must initialize the grid here // without respecting cache locality. Since we cannot use the existing // grid, we have to just divide the particles arbitrarily. This // means that the subsequent code for simulating forces and movement // will have almost no cache locality on the first iteration: Each thread // has picked up an arbitrary subset of the particles to insert into the // grid, and then the threads are responsible for simulating a different, // mostly-disjoint subset of the particles. On subsequent iterations, // only the particles that have moved will cause cache misses, so we // should have much better locality. If we want to really optimize, // it may be worth rethinking how we store particles and communicate among // threads. But at that point we might as well write distributed-memory // code. if (step == 0) { #pragma omp for for (int i = 0; i < n; i++) { next_grid->add((*particles)[i]); } } // Here we are building the grid that maps locations to sets of // particles. This step does O(n) work, so it is a bottleneck if done // serially. For performance comparisons, we have two versions of the // grid-formation code. The second simply forms the grid serially, in a // single arbitrary thread. The first is parallel and attempts // some cache locality. Each thread is responsible for re-inserting // the grid elements that previously lay in its subgrid. For that reason // we need to keep around the old grid while we are building the new one; // this is why we have old_grid and next_grid. // NOTE: We could instead re-insert each particle right after moving it. // This would be faster, but it would require us to think about // simultaneous parallel delete and add, while the current scheme needs // only support parallel add. (Deleting the entire grid at once is an // O(1) operation, so we can do it in one thread with a barrier.) // (The actual simulation operations are read-only on the grid structure // and write to each particle only once, so we can simply use two // barriers to protect them. #pragma omp single { old_grid.swap(next_grid); next_grid.reset(new OmpThreadsafeGrid(size, num_grid_squares_per_side)); } // Now insert each particle into the new grid. { std::unique_ptr<SimpleIterator<particle_t&> > particles_to_insert = old_grid->subgrid(thread_idx, num_actual_threads); while (particles_to_insert->hasNext()) { particle_t& p = particles_to_insert->next(); next_grid->add(p); } } // Now we compute forces for particles. Each thread handles its assigned // subgrid. We first need a barrier to ensure that everyone sees all // the particles in next_grid. #pragma omp barrier { std::unique_ptr<SimpleIterator<particle_t&> > particles_to_force = next_grid->subgrid(thread_idx, num_actual_threads); while (particles_to_force->hasNext()) { particle_t& p = particles_to_force->next(); p.ax = p.ay = 0; std::unique_ptr<SimpleIterator<particle_t&> > neighbors = next_grid->neighbor_iterator(p); while (neighbors->hasNext()) { particle_t& neighbor = neighbors->next(); apply_force(p, neighbor, thread_stats); } } } // The barrier here ensures that no particle is moved before it is used // in apply_force above. #pragma omp barrier // Now we move each particle. std::unique_ptr<SimpleIterator<particle_t&> > particles_to_move = next_grid->subgrid(thread_idx, num_actual_threads); while (particles_to_move->hasNext()) { particle_t& p = particles_to_move->next(); move(p); } // This barrier is probably unnecessary unless save() is going to happen. #pragma omp barrier if (!fast) { // // save if necessary // #pragma omp master if( fsave && (step%SAVEFREQ) == 0 ) { save( fsave, n, (*particles).data() ); } } // This barrier is probably unnecessary unless save() happened. #pragma omp barrier } #pragma omp critical per_thread_stats[thread_idx] = thread_stats; } simulation_time = read_timer( ) - simulation_time; // Could do a tree reduce here, but it seems unnecessary. Stats overall_stats; for (int thread_idx = 0; thread_idx < max_num_threads; thread_idx++) { overall_stats.aggregate_left(per_thread_stats[thread_idx]); } printf( "n = %d,threads = %d, simulation time = %g seconds", n,num_actual_threads, simulation_time); if (!fast) { // // -the minimum distance absmin between 2 particles during the run of the simulation // -A Correct simulation will have particles stay at greater than 0.4 (of cutoff) with typical values between .7-.8 // -A simulation were particles don't interact correctly will be less than 0.4 (of cutoff) with typical values between .01-.05 // // -The average distance absavg is ~.95 when most particles are interacting correctly and ~.66 when no particles are interacting // printf( ", absmin = %lf, absavg = %lf", overall_stats.min, overall_stats.avg); if (overall_stats.min < 0.4) printf ("\nThe minimum distance is below 0.4 meaning that some particle is not interacting"); if (overall_stats.avg < 0.8) printf ("\nThe average distance is below 0.8 meaning that most particles are not interacting"); } printf("\n"); // // Printing summary data // if( fsum) fprintf(fsum,"%d %d %g\n",n,num_actual_threads, simulation_time); // // Clearing space // if( fsum ) fclose( fsum ); if( fsave ) fclose( fsave ); return 0; }
int main (int argc, char* args[]) { //SDL Window setup if (init(SCREEN_WIDTH, SCREEN_HEIGHT) == 1) { return 0; } int i = 0; int j = 0; int offset = 0; struct vector2d translation = {-SCREEN_WIDTH / 2, -SCREEN_HEIGHT / 2}; //set up icons used to represent player lives for (i = 0; i < LIVES; i++) { init_player(&lives[i]); lives[i].lives = 1; //shrink lives for (j = 0; j < P_VERTS; j++) { divide_vector(&lives[i].obj_vert[j], 2); } //convert screen space vector into world space struct vector2d top_left = {20 + offset, 20}; add_vector(&top_left, &translation); lives[i].location = top_left; update_player(&lives[i]); offset += 20; } //set up player and asteroids in world space init_player(&p); init_asteroids(asteroids, ASTEROIDS); int sleep = 0; int quit = 0; SDL_Event event; Uint32 next_game_tick = SDL_GetTicks(); //render loop while(quit == 0) { //check for new events every frame SDL_PumpEvents(); const Uint8 *state = SDL_GetKeyboardState(NULL); if (state[SDL_SCANCODE_ESCAPE]) { quit = 1; } if (state[SDL_SCANCODE_UP]) { struct vector2d thrust = get_direction(&p); multiply_vector(&thrust, .06); apply_force(&p.velocity, thrust); } if (state[SDL_SCANCODE_LEFT]) { rotate_player(&p, -4); } if (state[SDL_SCANCODE_RIGHT]) { rotate_player(&p, 4); } while (SDL_PollEvent(&event)) { switch(event.type) { case SDL_KEYDOWN: switch( event.key.keysym.sym ) { case SDLK_SPACE: if (p.lives > 0) { shoot_bullet(&p); } break; } } } //draw to the pixel buffer clear_pixels(pixels, 0x00000000); draw_player(pixels, &p); draw_player(pixels, &lives[0]); draw_player(pixels, &lives[1]); draw_player(pixels, &lives[2]); draw_asteroids(pixels, asteroids, ASTEROIDS); update_player(&p); bounds_player(&p); bounds_asteroids(asteroids, ASTEROIDS); int res = collision_asteroids(asteroids, ASTEROIDS, &p.location, p.hit_radius); if (res != -1) { p.lives--; p.location.x = 0; p.location.y = 0; p.velocity.x = 0; p.velocity.y = 0; int i = LIVES - 1; for ( i = LIVES; i >= 0; i--) { if(lives[i].lives > 0) { lives[i].lives = 0; break; } } } int i = 0; struct vector2d translation = {-SCREEN_WIDTH / 2, -SCREEN_HEIGHT / 2}; for (i = 0; i < BULLETS; i++) { //only check for collision for bullets that are shown on screen if (p.bullets[i].alive == TRUE) { //convert bullet screen space location to world space to compare //with asteroids world space to detect a collision struct vector2d world = add_vector_new(&p.bullets[i].location, &translation); int index = collision_asteroids(asteroids, ASTEROIDS, &world, 1); //collision occured if (index != -1) { asteroids[index].alive = 0; p.bullets[i].alive = FALSE; if (asteroids[index].size != SMALL) { spawn_asteroids(asteroids, ASTEROIDS, asteroids[index].size, asteroids[index].location); } } } } update_asteroids(asteroids, ASTEROIDS); //draw buffer to the texture representing the screen SDL_UpdateTexture(screen, NULL, pixels, SCREEN_WIDTH * sizeof (Uint32)); //draw to the screen SDL_RenderClear(renderer); SDL_RenderCopy(renderer, screen, NULL, NULL); SDL_RenderPresent(renderer); //time it takes to render 1 frame in milliseconds next_game_tick += 1000 / 60; sleep = next_game_tick - SDL_GetTicks(); if( sleep >= 0 ) { SDL_Delay(sleep); } } //free the screen buffer free(pixels); //Destroy window SDL_DestroyWindow(window); //Quit SDL subsystems SDL_Quit(); return 0; }
// // benchmarking program // int main( int argc, char **argv ) { int navg,nabsavg=0,numthreads; double dmin, absmin=1.0,davg,absavg=0.0; if( find_option( argc, argv, "-h" ) >= 0 ) { printf( "Options:\n" ); printf( "-h to see this help\n" ); printf( "-n <int> to set number of particles\n" ); printf( "-o <filename> to specify the output file name\n" ); printf( "-s <filename> to specify a summary file name\n" ); printf( "-no turns off all correctness checks and particle output\n"); return 0; } int n = read_int( argc, argv, "-n", 1000 ); char *savename = read_string( argc, argv, "-o", NULL ); char *sumname = read_string( argc, argv, "-s", NULL ); FILE *fsave = savename ? fopen( savename, "w" ) : NULL; FILE *fsum = sumname ? fopen ( sumname, "a" ) : NULL; particle_t *particles = (particle_t*) malloc( n * sizeof(particle_t) ); set_size( n ); init_particles( n, particles ); // // simulate a number of time steps // double simulation_time = read_timer( ); #pragma omp parallel private(dmin) { numthreads = omp_get_num_threads(); for( int step = 0; step < NSTEPS; step++ ) { navg = 0; davg = 0.0; dmin = 1.0; // // compute all forces // #pragma omp for reduction (+:navg) reduction(+:davg) for( int i = 0; i < n; i++ ) { particles[i].ax = particles[i].ay = 0; for (int j = 0; j < n; j++ ) apply_force( particles[i], particles[j],&dmin,&davg,&navg); } // // move particles // #pragma omp for for( int i = 0; i < n; i++ ) move( particles[i] ); if( find_option( argc, argv, "-no" ) == -1 ) { // // compute statistical data // #pragma omp master if (navg) { absavg += davg/navg; nabsavg++; } #pragma omp critical if (dmin < absmin) absmin = dmin; // // save if necessary // #pragma omp master if( fsave && (step%SAVEFREQ) == 0 ) save( fsave, n, particles ); } } } simulation_time = read_timer( ) - simulation_time; printf( "n = %d,threads = %d, simulation time = %g seconds", n,numthreads, simulation_time); if( find_option( argc, argv, "-no" ) == -1 ) { if (nabsavg) absavg /= nabsavg; // // -The minimum distance absmin between 2 particles during the run of the simulation // -A Correct simulation will have particles stay at greater than 0.4 (of cutoff) with typical values between .7-.8 // -A simulation where particles don't interact correctly will be less than 0.4 (of cutoff) with typical values between .01-.05 // // -The average distance absavg is ~.95 when most particles are interacting correctly and ~.66 when no particles are interacting // printf( ", absmin = %lf, absavg = %lf", absmin, absavg); if (absmin < 0.4) printf ("\nThe minimum distance is below 0.4 meaning that some particle is not interacting"); if (absavg < 0.8) printf ("\nThe average distance is below 0.8 meaning that most particles are not interacting"); } printf("\n"); // // Printing summary data // if( fsum) fprintf(fsum,"%d %d %g\n",n,numthreads,simulation_time); // // Clearing space // if( fsum ) fclose( fsum ); free( particles ); if( fsave ) fclose( fsave ); return 0; }
// // This is where the action happens // void *thread_routine( void *pthread_id ) { int navg,nabsavg=0; double dmin,absmin=1.0,davg,absavg=0.0; int thread_id = *(int*)pthread_id; int particles_per_thread = (n + n_threads - 1) / n_threads; int first = min( thread_id * particles_per_thread, n ); int last = min( (thread_id+1) * particles_per_thread, n ); // // simulate a number of time steps // for( int step = 0; step < NSTEPS; step++ ) { dmin = 1.0; navg = 0; davg = 0.0; // // compute forces // for( int i = first; i < last; i++ ) { particles[i].ax = particles[i].ay = 0; for (int j = 0; j < n; j++ ) apply_force( particles[i], particles[j], &dmin, &davg, &navg ); } pthread_barrier_wait( &barrier ); if( no_output == 0 ) { // // Computing statistical data // if (navg) { absavg += davg/navg; nabsavg++; } if (dmin < absmin) absmin = dmin; } // // move particles // for( int i = first; i < last; i++ ) move( particles[i] ); pthread_barrier_wait( &barrier ); // // save if necessary // if (no_output == 0) if( thread_id == 0 && fsave && (step%SAVEFREQ) == 0 ) save( fsave, n, particles ); } if (no_output == 0 ) { absavg /= nabsavg; //printf("Thread %d has absmin = %lf and absavg = %lf\n",thread_id,absmin,absavg); pthread_mutex_lock(&mutex); gabsavg += absavg; if (absmin < gabsmin) gabsmin = absmin; pthread_mutex_unlock(&mutex); } return NULL; }