void rcb_rec2D(vector<T> *p_coord, vector<int> *p_map, vector<int> *p_part, int start, int end, int dims, int cur_depth, int ttl_depth){ // end of partitioning if(cur_depth == 0) return; //cout << "-------------------------------------------------------------" << endl; //cout << " start=" << start << " end=" << end << " cur_depth=" << cur_depth << endl; tmr_span.start(); // calculate max distance on each axis double x_span = calc_span(p_coord, start, end, dims, 0); double y_span = calc_span(p_coord, start, end, dims, 1); tmr_span.stop_and_add_to_total();; // choose axis int axis = -1; if(x_span >= y_span) axis = 0; else axis = 1; //cout << " x_span=" << x_span << " y_span=" << y_span << " axis=" << axis << endl; // find mid-point tmr_pivot1.start(); T pivot = find_pivot(p_coord, start, end, dims, axis); tmr_pivot1.stop_and_add_to_total();; //cout << " pivot=" << pivot <<endl; // partition into two int level= cur_depth-1; int part_index = partition(p_coord, p_map, p_part, start, end, dims, axis, pivot, level); //cout << " part_index=" << part_index << endl; // next partitioning rcb_rec2D(p_coord, p_map, p_part, start, start+part_index, dims, cur_depth-1, ttl_depth); rcb_rec2D(p_coord, p_map, p_part, start+part_index, end, dims, cur_depth-1, ttl_depth); }
void part_rcb(int numoflevel, int dims, int nnode, int nedge, int nbedge, int ncell, point *partnode, point *partedge, point *partbedge, point *partcell, int *cell, int *ecell, int *becell, int *edge, float *x){ // calc coordinate of center of gravity in each cell vector<float> coord_cell(ncell*dims); calc_cellcentre(ncell, dims, cell, x, &coord_cell); // initialize map and partition data vector<int> map, part; for(int i=0; i<ncell; i++){ map.push_back(i); part.push_back(0); } // call recursive coordinate bisection algorithm rcb_rec2D(&coord_cell, &map, &part, 0, ncell, dims, numoflevel, numoflevel); // Debug Print //part_rcb_DebugPrint(ncell, dims, &coord_cell, &map, &part); // output partition data tmr_out.start(); /* if(check_partdata(ncell, &map, &part, numoflevel)){ generate_partdata(nnode, nedge, nbedge, ncell, &map, &part, partnode, partedge, partbedge, partcell, cell, ecell, becell); }else{ cout << "partition data is invalid" << endl; exit(-1); } */ tmr_out.stop_and_add_to_total(); printf("span =%lf\n", tmr_span.total_time()); printf("pivot =%lf\n", tmr_pivot1.total_time()); printf("part1 =%lf\n", tmr_part1.total_time()); printf("part2 =%lf\n", tmr_part2.total_time()); printf("part3 =%lf\n", tmr_part3.total_time()); printf("out =%lf\n", tmr_out.total_time()); }
// this function expects that simulation params be updated beforehand int launchExpt(){ SimpleTimer clockExpt; clockExpt.reset(); clockExpt.start(); cout << "\n> Launch Experiment: " << exptDesc << "_runSet(" << iEns << ")\n"; cout << " > Simulate " << genMax << " generations, plot after every " << plotStep << " steps.\n > "; cout.flush(); // start run // int k=0, g=0; while(1){ // infinite loop needed to poll anim_on signal. if (graphicsQual > 0) glutMainLoopEvent(); // animate particles if (b_anim_on) { animateParticles(); ++stepNum; if (b_displayEveryStep && stepNum % (-dispInterval) == 0) displayDevArrays(); // update display if every step update is on } // if indefinite number of steps desired, skip gen-advance check if (moveStepsPerGen < 0) continue; // check if generation is to be advanced. if (stepNum >= moveStepsPerGen){ stepNum = 0; advanceGen(); // Note: CudaMemCpy at start and end of advanceGen() ensure that all movement kernel calls are done ++genNum; if (genNum % plotStep == 0) { cout << "."; cout.flush(); } } // when genMax genenrations are done, end run if (genNum >= genMax) break; } // end run, reset counters cout << " > DONE. "; // << "Return to main().\n\n"; printTime_hhmm(clockExpt.getTime()); stepNum = genNum = 0; ++iExpt; return 0; }
int main() { SelSorter dateObjectI1; MrgSorter dateObjectM1; QkSorter dateObjectQ1; DateType dateValue; SimpleTimer timer; char tempString[ SMALL_STR_LEN ], insTime[ SMALL_STR_LEN ]; char qkTime[ SMALL_STR_LEN ], mrgTime[ SMALL_STR_LEN ]; bool qSortGood = false, mSortGood = false, iSortGood = false; // load dates //////////////////////////////////////////////////////////// cout << endl << "Enter list of dates: " << endl; while( getALine( cin, tempString ) ) { dateObjectI1.add( tempString ); } // assign dates to other objects ///////////////////////////////////////// dateObjectM1 = dateObjectI1; dateObjectQ1 = dateObjectM1; // display lists, unsorted /////////////////////////////////////////////// displayList( dateObjectI1, 'S', UNSORTED ); displayList( dateObjectM1, 'M', UNSORTED ); displayList( dateObjectQ1, 'Q', UNSORTED ); // Selection sort operation ////////////////////////////////////////////// timer.start(); if( dateObjectI1.sort() ) { timer.stop(); timer.getElapsedTime( insTime ); displayList( dateObjectI1, 'S', SORTED ); iSortGood = true; } // stop timer in case of failure timer.stop(); // Merge sort operation ////////////////////////////////////////////////// timer.start(); if( dateObjectM1.sort() ) { timer.stop(); timer.getElapsedTime( mrgTime ); displayList( dateObjectM1, 'M', SORTED ); mSortGood = true; } // stop timer in case of failure timer.stop(); // Quick sort operation ////////////////////////////////////////////////// timer.start(); if( dateObjectQ1.sort() ) { timer.stop(); timer.getElapsedTime( qkTime ); displayList( dateObjectQ1, 'Q', SORTED ); qSortGood = true; } // stop timer in case of failure timer.stop(); // Results displayed ///////////////////////////////////////////////////// if( iSortGood ) { cout << "Elapsed Time for Selection Sort: " << insTime << " seconds." << endl; } else { cout << "ERROR: Failure of Selection sort due to bad input" << endl << endl; } if( mSortGood ) { cout << endl << "Elapsed Time for Merge Sort: " << mrgTime << " seconds." << endl; } else { cout << "ERROR: Failure of merge sort due to bad input" << endl << endl; } if( qSortGood ) { cout << endl << "Elapsed Time for Quick Sort: " << qkTime << " seconds." << endl << endl; } else { cout << "ERROR: Failure of quick sort due to bad input" << endl << endl; } return 0; }
int partition(vector<T> *p_in, vector<int> *p_map, vector<int> *p_part, int start, int end, int dims, int axis, T pivot, int level){ /* tmr_part1.start(); // Buffer vector<T> out1, out2; vector<int> map1, map2; int cnt1=0, cnt2=0; for(int i=start; i < end; i++){ if(p_in->at(i*dims+axis)> pivot){ for(int d_index=0; d_index<dims; d_index++) out1.push_back(p_in->at(i*dims+d_index)); map1.push_back(p_map->at(i)); cnt1+=1; }else{ for(int d_index=0; d_index<dims; d_index++) out2.push_back(p_in->at(i*dims+d_index)); map2.push_back(p_map->at(i)); cnt2+=1; } } tmr_part1.stop_and_add_to_total(); */ tmr_part1.start(); // Buffer vector<T> out1, out2; vector<int> map1, map2; int cnt1=0, cnt2=0; int halfcnt = (end-start)/2; map1.reserve(halfcnt); map2.reserve(halfcnt); out1.reserve(halfcnt*dims); out2.reserve(halfcnt*dims); T tmpcoord; for(int i=start; i < end; i++){ tmpcoord = p_in->at(i*dims+axis); // - balance load when multiple node has same coordinate value if(tmpcoord == pivot){ // if cnt1 is less than half size, data belong to map1 if(halfcnt>cnt1){ for(int d_index=0; d_index<dims; d_index++) out1.push_back(p_in->at(i*dims+d_index)); map1.push_back(p_map->at(i)); cnt1+=1; // if cnt1 is more than half size, data belong to map2 }else{ for(int d_index=0; d_index<dims; d_index++) out2.push_back(p_in->at(i*dims+d_index)); map2.push_back(p_map->at(i)); cnt2+=1; } }else if(tmpcoord > pivot){ for(int d_index=0; d_index<dims; d_index++) out1.push_back(p_in->at(i*dims+d_index)); map1.push_back(p_map->at(i)); cnt1+=1; }else{ for(int d_index=0; d_index<dims; d_index++) out2.push_back(p_in->at(i*dims+d_index)); map2.push_back(p_map->at(i)); cnt2+=1; } } tmr_part1.stop_and_add_to_total();; //cout << " out1.size =" << out1.size()/dims << " out2.size =" << out2.size()/dims << endl; //cout << " level =" << level << endl; // Replace to original coord data <--- can be replaced with this loop to memcpy but may not safe.. tmr_part2.start(); for(int i=0; i < (int)out1.size(); i++){ p_in->at(start*dims+i) = out1.at(i); } for(int i=0; i < (int)out2.size(); i++){ p_in->at(start*dims+out1.size()+i) = out2.at(i); } tmr_part2.stop_and_add_to_total();; tmr_part3.start(); for(int i=0; i<cnt2; i++){ p_part->at(start+cnt1+i) |= (1 << level); } // Replace to original map/part data for(int i=0; i < (int)map1.size(); i++) p_map->at(start+i) = map1.at(i); for(int i=0; i < (int)map2.size(); i++) p_map->at(start+map1.size()+i) = map2.at(i); tmr_part3.stop_and_add_to_total();; return cnt1; }
int runKDTreePerformanceTests() { std::cout << "Setup..." << std::endl; // DEBUG. SimpleTimer timer; mesh *obj_mesh = new mesh( "meshes\\bunny_small_0.obj" ); float fovy = 45.0f; glm::vec2 reso( 256, 256 ); glm::vec3 eyep( 0.5f, 0.5f, 1.0f ); glm::vec3 vdir( 0.0f, 0.0f, -1.0f ); glm::vec3 uvec( 0.0f, 1.0f, 0.0f ); Camera *camera = new Camera( fovy, reso, eyep, vdir, uvec ); BMP output_img; output_img.SetSize( ( int )reso.x, ( int )reso.y ); output_img.SetBitDepth( 24 ); float time_elapsed; std::string output_img_path; //////////////////////////////////////////////////// // CPU brute force. //////////////////////////////////////////////////// std::cout << "CPU brute force..." << std::endl; // DEBUG. timer.start(); // Iterate through all pixels. for ( int y = 0; y < reso.y; ++y ) { for ( int x = 0; x < reso.x; ++x ) { Ray ray = camera->computeRayThroughPixel( x, y ); glm::vec3 pixel_color = bruteForceMeshTraversal( obj_mesh, ray ); // Write pixel. output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f ); output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f ); output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f ); } } time_elapsed = timer.stop(); std::cout << "TRAVERSAL - CPU brute force: " << time_elapsed << std::endl; output_img_path = "ray_casting_output\\cpu_brute_force.bmp"; output_img.WriteToFile( output_img_path.c_str() ); //////////////////////////////////////////////////// // CPU stack. //////////////////////////////////////////////////// std::cout << "CPU stack..." << std::endl; // DEBUG. timer.start(); KDTreeCPU *kd_tree = new KDTreeCPU( obj_mesh->numTris, obj_mesh->tris, obj_mesh->numVerts, obj_mesh->verts ); time_elapsed = timer.stop(); std::cout << "CONSTRUCTION - CPU kd-tree: " << time_elapsed << std::endl; timer.start(); // Iterate through all pixels. for ( int y = 0; y < reso.y; ++y ) { for ( int x = 0; x < reso.x; ++x ) { Ray ray = camera->computeRayThroughPixel( x, y ); glm::vec3 pixel_color = kdTreeMeshTraversal( kd_tree, ray ); // Write pixel. output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f ); output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f ); output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f ); } } time_elapsed = timer.stop(); std::cout << "TRAVERSAL - CPU stack: " << time_elapsed << std::endl; output_img_path = "ray_casting_output\\cpu_stack.bmp"; output_img.WriteToFile( output_img_path.c_str() ); //////////////////////////////////////////////////// // CPU stackless with CPU structs. //////////////////////////////////////////////////// std::cout << "CPU stackless with CPU structs..." << std::endl; // DEBUG. timer.start(); kd_tree->buildRopeStructure(); time_elapsed = timer.stop(); std::cout << "CONSTRUCTION - CPU kd-tree rope structure: " << time_elapsed << std::endl; timer.start(); // Iterate through all pixels. for ( int y = 0; y < reso.y; ++y ) { for ( int x = 0; x < reso.x; ++x ) { Ray ray = camera->computeRayThroughPixel( x, y ); glm::vec3 pixel_color = kdTreeMeshStacklessTraversal( kd_tree, ray ); // Write pixel. output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f ); output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f ); output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f ); } } time_elapsed = timer.stop(); std::cout << "TRAVERSAL - CPU stackless with CPU structs: " << time_elapsed << std::endl; output_img_path = "ray_casting_output\\cpu_stackless_with_cpu_structs.bmp"; output_img.WriteToFile( output_img_path.c_str() ); //////////////////////////////////////////////////// // CPU stackless with GPU structs. //////////////////////////////////////////////////// std::cout << "CPU stackless with GPU structs..." << std::endl; // DEBUG. timer.start(); KDTreeGPU *kd_tree_gpu = new KDTreeGPU( kd_tree ); // Create list of triangle indices for GPU kd-tree. std::vector<int> tri_index_vector = kd_tree_gpu->getTriIndexList(); int *tri_index_array = new int[tri_index_vector.size()]; for ( int i = 0; i < tri_index_vector.size(); ++i ) { tri_index_array[i] = tri_index_vector[i]; } time_elapsed = timer.stop(); std::cout << "CONSTRUCTION - GPU kd-tree: " << time_elapsed << std::endl; timer.start(); // Iterate through all pixels. for ( int y = 0; y < reso.y; ++y ) { for ( int x = 0; x < reso.x; ++x ) { Ray ray = camera->computeRayThroughPixel( x, y ); glm::vec3 pixel_color = kdTreeGPUMeshStacklessTraversal( kd_tree_gpu, tri_index_array, ray ); // Write pixel. output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f ); output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f ); output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f ); } } time_elapsed = timer.stop(); std::cout << "TRAVERSAL - CPU stackless with GPU structs: " << time_elapsed << std::endl; output_img_path = "ray_casting_output\\cpu_stackless_with_gpu_structs.bmp"; output_img.WriteToFile( output_img_path.c_str() ); //////////////////////////////////////////////////// // GPU brute force. //////////////////////////////////////////////////// std::cout << "GPU brute force..." << std::endl; // DEBUG. timer.start(); glm::vec3 *ray_cast_image_brute_force = cudaRayCastObj( camera, obj_mesh, kd_tree_gpu, true ); time_elapsed = timer.stop(); std::cout << "TRAVERSAL - GPU brute force: " << time_elapsed << std::endl; // Iterate through all pixels. for ( int y = 0; y < reso.y; ++y ) { for ( int x = 0; x < reso.x; ++x ) { int index = ( y * ( int )reso.x ) + x; glm::vec3 pixel_color = ray_cast_image_brute_force[index];; // Write pixel. output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f ); output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f ); output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f ); } } output_img_path = "ray_casting_output\\gpu_brute_force.bmp"; output_img.WriteToFile( output_img_path.c_str() ); //////////////////////////////////////////////////// // GPU stackless. //////////////////////////////////////////////////// std::cout << "GPU stackless..." << std::endl; // DEBUG. timer.start(); glm::vec3 *ray_cast_image_stackless = cudaRayCastObj( camera, obj_mesh, kd_tree_gpu, false ); time_elapsed = timer.stop(); std::cout << "TRAVERSAL - GPU stackless: " << time_elapsed << std::endl; // Iterate through all pixels. for ( int y = 0; y < reso.y; ++y ) { for ( int x = 0; x < reso.x; ++x ) { int index = ( y * ( int )reso.x ) + x; glm::vec3 pixel_color = ray_cast_image_stackless[index];; // Write pixel. output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f ); output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f ); output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f ); } } output_img_path = "ray_casting_output\\gpu_stackless.bmp"; output_img.WriteToFile( output_img_path.c_str() ); //////////////////////////////////////////////////// // Cleanup. //////////////////////////////////////////////////// delete camera; delete obj_mesh; delete kd_tree; delete kd_tree_gpu; delete[] tri_index_array; delete[] ray_cast_image_brute_force; delete[] ray_cast_image_stackless; return 0; }
int main (int argc, char *argv[]){ srand(time(NULL)); //seeding rand with time time_t t; time(&t); const float systemSize = 16.0; const int particles = 128; const int nPred = 1; const int wait = 1; const float maxeta = 5; //maximum noise parameter float predNoise = 0.0; const int realisations = 1; //number of realisations const int iterations = 2000000; //number of time steps const int last = 100; //number of last steps over which order parameter would be averaged int c; //int *gsd; //pointer to initialise array that stores different group size float timeElapsed; Store store(particles); //Store class object store.fileOpen(); Swarm swarm(particles, systemSize, nPred); swarm.allocate(); swarm.launchRandInit((unsigned long) t); SimpleTimer time; time.reset(); time.start(); float avgEta = 0.0; for (float eta = maxeta; eta <= maxeta; eta = eta + 0.2){ //loop to iterate over different noise values store.orientationParam = 0.0; //initialize OP to zero before each round of replication for (int rep = 0; rep < realisations; rep++){ //loop to perform more number of realizations swarm.init(eta); swarm.initPredator(predNoise); swarm.initid(); swarm.initAttack(); swarm.cudaCopy(); Screen screen; if (screen.init() == false){ cout << "error initialising SDL." << endl; } for (int i = 0; i < iterations; i++){ //loop to run the simulations for number of timesteps screen.clear(); swarm.update(); const Particle * const pParticles = swarm.returnParticles(); //store the particle for (int p = 0; p < particles; p++){ Particle particle = pParticles[p]; avgEta = avgEta + particle.eta; } avgEta = avgEta / particles; for (int p = 0; p < particles; p++){ Particle particle = pParticles[p]; int x = particle.coord.x * Screen::SCREEN_WIDTH / systemSize; int y = particle.coord.y * Screen::SCREEN_HEIGHT / systemSize; //store.printCoord(x,y); screen.setPixel(x, y, int(255 * particle.eta / maxeta), 0, int(255 * abs(maxeta - particle.eta) / maxeta)); } const Predator * const pPredators = swarm.returnPredators(); for (int p = 0; p < nPred; p++){ Predator predator = pPredators[p]; int x = predator.coord.x * Screen::SCREEN_WIDTH / systemSize; int y = predator.coord.y * Screen::SCREEN_HEIGHT / systemSize; //store.printCoord(x,y); screen.setPixel(x, y, 0, 0, 0); } screen.update(); if (i >= iterations - last){ swarm.cudaBackCopy(); store.orientationParam += swarm.calcOrderparam(); } if (screen.processEvents() == false){ break; } if (i%10000 == 0){ for (int p = 0; p < particles; p++){ Particle particle = pParticles[p]; store.eta[p] = particle.eta; store.print(p); } store.endl(); } if (i%wait == 0) swarm.initAttack(); } screen.close(); /*if (cudaDeviceSynchronize() != cudaSuccess) cout << "Device synchronisation failed \n"; swarm.cudaUniteIdBackCopy(); swarm.grouping(); c = swarm.findgroups(); cout << "number of independent groups are " << c << "\n"; gsd = new int[c]; swarm.calcgsd(gsd); for (int i = 0; i < c; i++){ store.printGroupSize(gsd[i]); } store.endl();*/ } /*store.endl(); store.orientationParam = store.orientationParam / realisations / last; //cout << store.orientationParam << "\n"; store.print(eta); store.endl();*/ } time.stop(); timeElapsed = time.printTime(); store.printTime(timeElapsed); store.fileClose(); //delete []gsd; return 0; }