void rcb_rec2D(vector<T> *p_coord,
               vector<int> *p_map,
               vector<int> *p_part,
               int start,
               int end,
               int dims, 
               int cur_depth,
               int ttl_depth){

	// end of partitioning
	if(cur_depth == 0) return;

	//cout << "-------------------------------------------------------------" << endl;
	//cout << "    start=" << start << " end=" << end << " cur_depth=" << cur_depth << endl;

tmr_span.start();
	// calculate max distance on each axis
	double x_span = calc_span(p_coord, start, end, dims, 0);
	double y_span = calc_span(p_coord, start, end, dims, 1);
tmr_span.stop_and_add_to_total();;

	// choose axis
	int axis = -1;
	if(x_span >= y_span)
		axis = 0;
	else
		axis = 1;

	//cout << "    x_span=" << x_span << " y_span=" << y_span << " axis=" << axis << endl;

	// find mid-point
tmr_pivot1.start();
	T pivot = find_pivot(p_coord, start, end, dims, axis);
tmr_pivot1.stop_and_add_to_total();;
	//cout << "    pivot=" << pivot <<endl;

	// partition into two
	int level= cur_depth-1;
	int part_index = partition(p_coord, p_map, p_part, start, end, dims, axis, pivot, level);

	//cout << "    part_index=" << part_index << endl;

	// next partitioning
	rcb_rec2D(p_coord, p_map, p_part, start, start+part_index, dims, cur_depth-1, ttl_depth);
	rcb_rec2D(p_coord, p_map, p_part, start+part_index, end, dims, cur_depth-1, ttl_depth);

}
void part_rcb(int numoflevel, int dims,
              int nnode, int nedge, int nbedge, int ncell,
              point *partnode, point *partedge, point *partbedge, point *partcell,
              int *cell, int *ecell, int *becell, int *edge,
              float *x){

	// calc coordinate of center of gravity in each cell
	vector<float> coord_cell(ncell*dims);
	calc_cellcentre(ncell, dims, cell, x, &coord_cell);

	// initialize map and partition data
	vector<int> map, part;
	for(int i=0; i<ncell; i++){
		map.push_back(i);
		part.push_back(0);
	}

 	// call recursive coordinate bisection algorithm
	rcb_rec2D(&coord_cell, &map, &part, 0, ncell, dims, numoflevel, numoflevel);

	// Debug Print
	//part_rcb_DebugPrint(ncell, dims, &coord_cell, &map, &part);
	
 	// output partition data
tmr_out.start();
/*
	if(check_partdata(ncell, &map, &part, numoflevel)){
		generate_partdata(nnode, nedge, nbedge, ncell,
                          &map, &part, 
                          partnode, partedge, partbedge, partcell, 
                          cell, ecell, becell);
	}else{
		cout << "partition data is invalid" << endl;
		exit(-1);
	}
*/
tmr_out.stop_and_add_to_total();

printf("span  =%lf\n", tmr_span.total_time());
printf("pivot =%lf\n", tmr_pivot1.total_time());
printf("part1 =%lf\n", tmr_part1.total_time());
printf("part2 =%lf\n", tmr_part2.total_time());
printf("part3 =%lf\n", tmr_part3.total_time());
printf("out   =%lf\n", tmr_out.total_time());

}
Beispiel #3
0
// this function expects that simulation params be updated beforehand
int launchExpt(){

	SimpleTimer clockExpt; clockExpt.reset();
	clockExpt.start();

	cout << "\n> Launch Experiment: " << exptDesc << "_runSet(" << iEns << ")\n";
	cout << "   > Simulate " << genMax << " generations, plot after every " << plotStep << " steps.\n   > "; 
	cout.flush();

	// start run
//	int k=0, g=0;
	while(1){	// infinite loop needed to poll anim_on signal.
		if (graphicsQual > 0) glutMainLoopEvent();		

		// animate particles
		if (b_anim_on) {
			animateParticles(); 
			++stepNum;
			if (b_displayEveryStep && stepNum % (-dispInterval) == 0) displayDevArrays();	// update display if every step update is on
		}

		// if indefinite number of steps desired, skip gen-advance check
		if (moveStepsPerGen < 0) continue;	

		// check if generation is to be advanced.
		if (stepNum >= moveStepsPerGen){
			stepNum = 0;
			advanceGen();	// Note: CudaMemCpy at start and end of advanceGen() ensure that all movement kernel calls are done
			++genNum;			
			if (genNum % plotStep == 0) { cout << "."; cout.flush(); }
		}

		// when genMax genenrations are done, end run
		if (genNum >= genMax) break;
	}

	// end run, reset counters
	cout << " > DONE. "; // << "Return to main().\n\n";
	printTime_hhmm(clockExpt.getTime());
	stepNum = genNum = 0;
	++iExpt;
	return 0;
}
int  main()
   {
    SelSorter dateObjectI1;
    MrgSorter dateObjectM1;
    QkSorter dateObjectQ1;
    DateType dateValue;
    SimpleTimer timer;
    char tempString[ SMALL_STR_LEN ], insTime[ SMALL_STR_LEN ];
    char qkTime[ SMALL_STR_LEN ], mrgTime[ SMALL_STR_LEN ]; 
    bool qSortGood = false, mSortGood = false, iSortGood = false;

    // load dates ////////////////////////////////////////////////////////////
    cout << endl << "Enter list of dates: " << endl;

    while( getALine( cin, tempString ) )
       {
        dateObjectI1.add( tempString );
       }

    // assign dates to other objects /////////////////////////////////////////
    dateObjectM1 = dateObjectI1;

    dateObjectQ1 = dateObjectM1;

    // display lists, unsorted ///////////////////////////////////////////////
    displayList( dateObjectI1, 'S', UNSORTED );

    displayList( dateObjectM1, 'M', UNSORTED );

    displayList( dateObjectQ1, 'Q', UNSORTED );

    // Selection sort operation //////////////////////////////////////////////
    timer.start();

    if( dateObjectI1.sort() )
       {
        timer.stop();
        timer.getElapsedTime( insTime );
        displayList( dateObjectI1, 'S', SORTED );
        iSortGood = true;
       }

    // stop timer in case of failure
    timer.stop();

    // Merge sort operation //////////////////////////////////////////////////
    timer.start();

    if( dateObjectM1.sort() )
       {
        timer.stop();
        timer.getElapsedTime( mrgTime );
        displayList( dateObjectM1, 'M', SORTED );
        mSortGood = true;
       }

    // stop timer in case of failure
    timer.stop();

    // Quick sort operation //////////////////////////////////////////////////
    timer.start();

    if( dateObjectQ1.sort() )
       {
        timer.stop();
        timer.getElapsedTime( qkTime );
        displayList( dateObjectQ1, 'Q', SORTED );
        qSortGood = true;
       }

    // stop timer in case of failure
    timer.stop();

    // Results displayed /////////////////////////////////////////////////////
    if( iSortGood )
       {
        cout << "Elapsed Time for Selection Sort: " 
             << insTime << " seconds." << endl;
       }
    else
       {
        cout << "ERROR: Failure of Selection sort due to bad input" 
             << endl << endl;
       }

    if( mSortGood )
       {
        cout << endl << "Elapsed Time for Merge Sort: " 
             << mrgTime << " seconds." << endl;
       }
    else
       {
        cout << "ERROR: Failure of merge sort due to bad input" 
             << endl << endl;
       }


    if( qSortGood )
       {
        cout << endl << "Elapsed Time for Quick Sort: " 
             << qkTime << " seconds." << endl << endl;
       }
    else
       {
        cout << "ERROR: Failure of quick sort due to bad input" 
             << endl << endl;
       }

    return 0;
   }
int partition(vector<T> *p_in, 
              vector<int> *p_map,
              vector<int> *p_part,
              int start, 
              int end, 
              int dims, 
              int axis, 
              T pivot,
              int level){

/*
tmr_part1.start();
	// Buffer
	vector<T> out1, out2;
	vector<int> map1, map2;
	int cnt1=0, cnt2=0;
	for(int i=start; i < end; i++){
		if(p_in->at(i*dims+axis)> pivot){
			for(int d_index=0; d_index<dims; d_index++)
				out1.push_back(p_in->at(i*dims+d_index));
			map1.push_back(p_map->at(i));
			cnt1+=1;
		}else{
			for(int d_index=0; d_index<dims; d_index++)
				out2.push_back(p_in->at(i*dims+d_index));
			map2.push_back(p_map->at(i));
			cnt2+=1;
		}
	}

tmr_part1.stop_and_add_to_total();
*/


tmr_part1.start();

	// Buffer
	vector<T> out1, out2;
	vector<int> map1, map2;
	int cnt1=0, cnt2=0;
	int halfcnt = (end-start)/2;
	map1.reserve(halfcnt);
	map2.reserve(halfcnt);
	out1.reserve(halfcnt*dims);
	out2.reserve(halfcnt*dims);
	T tmpcoord;
	for(int i=start; i < end; i++){
	
		tmpcoord = p_in->at(i*dims+axis);

		// - balance load when multiple node has same coordinate value
		if(tmpcoord == pivot){
			// if cnt1 is less than half size, data belong to map1
			if(halfcnt>cnt1){
				for(int d_index=0; d_index<dims; d_index++)
					out1.push_back(p_in->at(i*dims+d_index));
				map1.push_back(p_map->at(i));
				cnt1+=1;
			// if cnt1 is more than half size, data belong to map2
			}else{
				for(int d_index=0; d_index<dims; d_index++)
					out2.push_back(p_in->at(i*dims+d_index));
				map2.push_back(p_map->at(i));
				cnt2+=1;
			}
		}else if(tmpcoord > pivot){
			for(int d_index=0; d_index<dims; d_index++)
				out1.push_back(p_in->at(i*dims+d_index));
			map1.push_back(p_map->at(i));
			cnt1+=1;
		}else{
			for(int d_index=0; d_index<dims; d_index++)
				out2.push_back(p_in->at(i*dims+d_index));
			map2.push_back(p_map->at(i));
			cnt2+=1;
		}
	}
tmr_part1.stop_and_add_to_total();;

	//cout << "    out1.size =" << out1.size()/dims << " out2.size =" << out2.size()/dims << endl;
	//cout << "    level =" << level  << endl;

	// Replace to original coord data <--- can be replaced with this loop to memcpy but may not safe..

tmr_part2.start();

	for(int i=0; i < (int)out1.size(); i++){
		p_in->at(start*dims+i) = out1.at(i);
	}

	for(int i=0; i < (int)out2.size(); i++){
		p_in->at(start*dims+out1.size()+i) = out2.at(i);
	}
tmr_part2.stop_and_add_to_total();;


tmr_part3.start();
	for(int i=0; i<cnt2; i++){
		p_part->at(start+cnt1+i) |= (1 << level);
	}

	// Replace to original map/part data
	for(int i=0; i < (int)map1.size(); i++)
		p_map->at(start+i) = map1.at(i);

	for(int i=0; i < (int)map2.size(); i++)
		p_map->at(start+map1.size()+i) = map2.at(i);
tmr_part3.stop_and_add_to_total();;

	return cnt1;
}
Beispiel #6
0
int runKDTreePerformanceTests()
{
	std::cout << "Setup..." << std::endl; // DEBUG.

	SimpleTimer timer;

	mesh *obj_mesh = new mesh( "meshes\\bunny_small_0.obj" );

	float fovy = 45.0f;
	glm::vec2 reso( 256, 256 );
	glm::vec3 eyep( 0.5f, 0.5f, 1.0f );
	glm::vec3 vdir( 0.0f, 0.0f, -1.0f );
	glm::vec3 uvec( 0.0f, 1.0f, 0.0f );
	Camera *camera = new Camera( fovy, reso, eyep, vdir, uvec );

	BMP output_img;
	output_img.SetSize( ( int )reso.x, ( int )reso.y );
	output_img.SetBitDepth( 24 );

	float time_elapsed;
	std::string output_img_path;


	////////////////////////////////////////////////////
	// CPU brute force.
	////////////////////////////////////////////////////

	std::cout << "CPU brute force..." << std::endl; // DEBUG.

	timer.start();

	// Iterate through all pixels.
	for ( int y = 0; y < reso.y; ++y ) {
		for ( int x = 0; x < reso.x; ++x ) {
			Ray ray = camera->computeRayThroughPixel( x, y );
			glm::vec3 pixel_color = bruteForceMeshTraversal( obj_mesh, ray );

			// Write pixel.
			output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f );
			output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f );
			output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f );
		}
	}

	time_elapsed = timer.stop();
	std::cout << "TRAVERSAL - CPU brute force: " << time_elapsed << std::endl;

	output_img_path = "ray_casting_output\\cpu_brute_force.bmp";
	output_img.WriteToFile( output_img_path.c_str() );


	////////////////////////////////////////////////////
	// CPU stack.
	////////////////////////////////////////////////////

	std::cout << "CPU stack..." << std::endl; // DEBUG.

	timer.start();
	KDTreeCPU *kd_tree = new KDTreeCPU( obj_mesh->numTris, obj_mesh->tris, obj_mesh->numVerts, obj_mesh->verts );
	time_elapsed = timer.stop();
	std::cout << "CONSTRUCTION - CPU kd-tree: " << time_elapsed << std::endl;

	timer.start();

	// Iterate through all pixels.
	for ( int y = 0; y < reso.y; ++y ) {
		for ( int x = 0; x < reso.x; ++x ) {
			Ray ray = camera->computeRayThroughPixel( x, y );
			glm::vec3 pixel_color = kdTreeMeshTraversal( kd_tree, ray );

			// Write pixel.
			output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f );
			output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f );
			output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f );
		}
	}

	time_elapsed = timer.stop();
	std::cout << "TRAVERSAL - CPU stack: " << time_elapsed << std::endl;

	output_img_path = "ray_casting_output\\cpu_stack.bmp";
	output_img.WriteToFile( output_img_path.c_str() );


	////////////////////////////////////////////////////
	// CPU stackless with CPU structs.
	////////////////////////////////////////////////////

	std::cout << "CPU stackless with CPU structs..." << std::endl; // DEBUG.

	timer.start();
	kd_tree->buildRopeStructure();
	time_elapsed = timer.stop();
	std::cout << "CONSTRUCTION - CPU kd-tree rope structure: " << time_elapsed << std::endl;

	timer.start();

	// Iterate through all pixels.
	for ( int y = 0; y < reso.y; ++y ) {
		for ( int x = 0; x < reso.x; ++x ) {
			Ray ray = camera->computeRayThroughPixel( x, y );
			glm::vec3 pixel_color = kdTreeMeshStacklessTraversal( kd_tree, ray );

			// Write pixel.
			output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f );
			output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f );
			output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f );
		}
	}

	time_elapsed = timer.stop();
	std::cout << "TRAVERSAL - CPU stackless with CPU structs: " << time_elapsed << std::endl;

	output_img_path = "ray_casting_output\\cpu_stackless_with_cpu_structs.bmp";
	output_img.WriteToFile( output_img_path.c_str() );


	////////////////////////////////////////////////////
	// CPU stackless with GPU structs.
	////////////////////////////////////////////////////

	std::cout << "CPU stackless with GPU structs..." << std::endl; // DEBUG.

	timer.start();
	KDTreeGPU *kd_tree_gpu = new KDTreeGPU( kd_tree );

	// Create list of triangle indices for GPU kd-tree.
	std::vector<int> tri_index_vector = kd_tree_gpu->getTriIndexList();
	int *tri_index_array = new int[tri_index_vector.size()];
	for ( int i = 0; i < tri_index_vector.size(); ++i ) {
		tri_index_array[i] = tri_index_vector[i];
	}

	time_elapsed = timer.stop();
	std::cout << "CONSTRUCTION - GPU kd-tree: " << time_elapsed << std::endl;

	timer.start();

	// Iterate through all pixels.
	for ( int y = 0; y < reso.y; ++y ) {
		for ( int x = 0; x < reso.x; ++x ) {
			Ray ray = camera->computeRayThroughPixel( x, y );
			glm::vec3 pixel_color = kdTreeGPUMeshStacklessTraversal( kd_tree_gpu, tri_index_array, ray );

			// Write pixel.
			output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f );
			output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f );
			output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f );
		}
	}

	time_elapsed = timer.stop();
	std::cout << "TRAVERSAL - CPU stackless with GPU structs: " << time_elapsed << std::endl;

	output_img_path = "ray_casting_output\\cpu_stackless_with_gpu_structs.bmp";
	output_img.WriteToFile( output_img_path.c_str() );


	////////////////////////////////////////////////////
	// GPU brute force.
	////////////////////////////////////////////////////

	std::cout << "GPU brute force..." << std::endl; // DEBUG.

	timer.start();
	glm::vec3 *ray_cast_image_brute_force = cudaRayCastObj( camera, obj_mesh, kd_tree_gpu, true );
	time_elapsed = timer.stop();
	std::cout << "TRAVERSAL - GPU brute force: " << time_elapsed << std::endl;

	// Iterate through all pixels.
	for ( int y = 0; y < reso.y; ++y ) {
		for ( int x = 0; x < reso.x; ++x ) {
			int index = ( y * ( int )reso.x ) + x;
			glm::vec3 pixel_color = ray_cast_image_brute_force[index];;

			// Write pixel.
			output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f );
			output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f );
			output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f );
		}
	}

	output_img_path = "ray_casting_output\\gpu_brute_force.bmp";
	output_img.WriteToFile( output_img_path.c_str() );


	////////////////////////////////////////////////////
	// GPU stackless.
	////////////////////////////////////////////////////

	std::cout << "GPU stackless..." << std::endl; // DEBUG.

	timer.start();
	glm::vec3 *ray_cast_image_stackless = cudaRayCastObj( camera, obj_mesh, kd_tree_gpu, false );
	time_elapsed = timer.stop();
	std::cout << "TRAVERSAL - GPU stackless: " << time_elapsed << std::endl;

	// Iterate through all pixels.
	for ( int y = 0; y < reso.y; ++y ) {
		for ( int x = 0; x < reso.x; ++x ) {
			int index = ( y * ( int )reso.x ) + x;
			glm::vec3 pixel_color = ray_cast_image_stackless[index];;

			// Write pixel.
			output_img( x, y )->Red = ( ebmpBYTE )( pixel_color.x * 255.0f );
			output_img( x, y )->Green = ( ebmpBYTE )( pixel_color.y * 255.0f );
			output_img( x, y )->Blue = ( ebmpBYTE )( pixel_color.z * 255.0f );
		}
	}

	output_img_path = "ray_casting_output\\gpu_stackless.bmp";
	output_img.WriteToFile( output_img_path.c_str() );


	////////////////////////////////////////////////////
	// Cleanup.
	////////////////////////////////////////////////////

	delete camera;
	delete obj_mesh;
	delete kd_tree;
	delete kd_tree_gpu;
	delete[] tri_index_array;
	delete[] ray_cast_image_brute_force;
	delete[] ray_cast_image_stackless;

	return 0;
}
Beispiel #7
0
int main (int argc, char *argv[]){
	srand(time(NULL));		//seeding rand with time
	time_t t;				
	time(&t);
	const float systemSize = 16.0;		
	const int particles = 128;
	const int nPred = 1;
	const int wait = 1;
	const float maxeta = 5;		//maximum noise parameter
	float predNoise = 0.0;
	const int realisations = 1;	//number of realisations
	const int iterations = 2000000;	//number of time steps
	const int last = 100;			//number of last steps over which order parameter would be averaged
	int c;
	//int *gsd;						//pointer to initialise array that stores different group size 
	float timeElapsed;
	Store store(particles);			//Store class object 
	store.fileOpen();
	Swarm swarm(particles, systemSize, nPred);
	swarm.allocate();
	swarm.launchRandInit((unsigned long) t);
	SimpleTimer time; time.reset();
	time.start();
	float avgEta = 0.0;
	for (float eta = maxeta; eta <= maxeta; eta = eta + 0.2){		//loop to iterate over different noise values
		store.orientationParam = 0.0;				//initialize OP to zero before each round of replication
		for (int rep = 0; rep < realisations; rep++){		//loop to perform more number of realizations
			swarm.init(eta);
			swarm.initPredator(predNoise);
			swarm.initid();
			swarm.initAttack();
			swarm.cudaCopy();
			Screen screen;
			if (screen.init() == false){
				cout << "error initialising SDL." << endl;
			}
			for (int i = 0; i < iterations; i++){		//loop to run the simulations for number of timesteps
				screen.clear();
				swarm.update();
				const Particle * const pParticles = swarm.returnParticles();	//store the particle
				for (int p = 0; p < particles; p++){
					Particle particle = pParticles[p];
					avgEta = avgEta + particle.eta;
					}
				avgEta = avgEta / particles;
				for (int p = 0; p < particles; p++){
					Particle particle = pParticles[p];

					int x = particle.coord.x * Screen::SCREEN_WIDTH / systemSize;
					int y = particle.coord.y * Screen::SCREEN_HEIGHT / systemSize;
					//store.printCoord(x,y);
					screen.setPixel(x, y, int(255 * particle.eta / maxeta), 0, int(255 * abs(maxeta - particle.eta) / maxeta));
					}
				const Predator * const pPredators = swarm.returnPredators();
				for (int p = 0; p < nPred; p++){
					Predator predator = pPredators[p];

					int x = predator.coord.x * Screen::SCREEN_WIDTH / systemSize;
					int y = predator.coord.y * Screen::SCREEN_HEIGHT / systemSize;
					//store.printCoord(x,y);
					screen.setPixel(x, y, 0, 0, 0);
					}
				screen.update();	
				if (i >= iterations - last){
					swarm.cudaBackCopy();
					store.orientationParam += swarm.calcOrderparam();
				}
				if (screen.processEvents() == false){
					break;
				}
				if (i%10000 == 0){
					for (int p = 0; p < particles; p++){
						Particle particle = pParticles[p];
						store.eta[p] = particle.eta;
						store.print(p);
					}
					store.endl();
				}
				if (i%wait == 0) swarm.initAttack();
			}
			screen.close();
			/*if (cudaDeviceSynchronize() != cudaSuccess)
				cout << "Device synchronisation failed \n";
			swarm.cudaUniteIdBackCopy();
			swarm.grouping();
			c = swarm.findgroups();
			cout << "number of independent groups are " << c << "\n";
			gsd = new int[c];
			swarm.calcgsd(gsd);
			for (int i = 0; i < c; i++){
				store.printGroupSize(gsd[i]);
			}
			store.endl();*/	
		}
		/*store.endl();
		store.orientationParam = store.orientationParam / realisations / last;
		//cout << store.orientationParam << "\n";
		store.print(eta);
		store.endl();*/
	}
	time.stop();
	timeElapsed = time.printTime();
	store.printTime(timeElapsed);
	store.fileClose();
	
	//delete []gsd;
	return 0;
}