Exemplo n.º 1
0
int main(int argc, char** argv) 
{

   int step,burst;

   int nparticle = 8192; /* MUST be a nice power of two for simplicity */
   int nstep = 500;
   int nburst = 20; /* MUST divide the value of nstep without remainder */
   int nthread = 64; /* chosen for ATI Radeon HD 5870 */

   float dt = 0.0001;
   float eps = 0.0001;
   cl_float4* pos1 = (cl_float4*)clmalloc(stdgpu,nparticle*sizeof(cl_float4),0);
   cl_float4* pos2 = (cl_float4*)clmalloc(stdgpu,nparticle*sizeof(cl_float4),0);
   cl_float4* vel = (cl_float4*)clmalloc(stdgpu,nparticle*sizeof(cl_float4),0);

   nbody_init(nparticle,pos1,vel);

   void* h = clopen(stdgpu,"nbody_kern.cl",CLLD_NOW);
   cl_kernel krn = clsym(stdgpu,h,"nbody_kern",CLLD_NOW);

   clndrange_t ndr = clndrange_init1d(0,nparticle,nthread);

   clarg_set(stdgpu,krn,0,dt);
   clarg_set(stdgpu,krn,1,eps);
   clarg_set_global(stdgpu,krn,4,vel);
   clarg_set_local(stdgpu,krn,5,nthread*sizeof(cl_float4));

	clmsync(stdgpu,0,pos1,CL_MEM_DEVICE|CL_EVENT_NOWAIT);
	clmsync(stdgpu,0,vel,CL_MEM_DEVICE|CL_EVENT_NOWAIT);

   for(step=0; step<nstep; step+=nburst) {

      for(burst=0; burst<nburst; burst+=2) {

         clarg_set_global(stdgpu,krn,2,pos1);
         clarg_set_global(stdgpu,krn,3,pos2);
         clfork(stdgpu,0,krn,&ndr,CL_EVENT_NOWAIT);

         clarg_set_global(stdgpu,krn,2,pos2);
         clarg_set_global(stdgpu,krn,3,pos1);
         clfork(stdgpu,0,krn,&ndr,CL_EVENT_NOWAIT);
      
      }

      clmsync(stdgpu,0,pos1,CL_MEM_HOST|CL_EVENT_NOWAIT);

      clwait(stdgpu,0,CL_KERNEL_EVENT|CL_MEM_EVENT);

   }

   nbody_output(nparticle,pos1,vel);

   clclose(stdgpu,h);

   clfree(pos1);
   clfree(pos2);
   clfree(vel);  
}
Exemplo n.º 2
0
Arquivo: nbody.c Projeto: Hkau/kth
int main(int argc, char* argv[])
{
	parse_args(argc, argv); // Done differently depending on application

	nbody_init(num_bodies);

	gui_init("nbody1");

/*	// Make some clumped bodies with initial velocity for a more interesting simulation
	for(i = 0; i < num_bodies/4; ++i)
	{
		body[i].pos.x = -50 + rand() % 20;
		body[i].pos.y = -50 + rand() % 20;
		body[i].pos.z = -50 + rand() % 20;
		body[i].vel.x = 0.05;
	}
	for(i = num_bodies/4; i < num_bodies/2; ++i)
	{
		body[i].pos.x = 30 + rand() % 20;
		body[i].pos.y = 30 + rand() % 20;
		body[i].pos.z = 30 + rand() % 20;
		body[i].vel.x = -0.05;
	}*/

	float delta = 0.f;

	clock_t start = times(NULL);

	client_start();

	int iterations = 0;
	while(gui_update())
	{
		// simulate stuff
		delta += turbo;
		while(delta >= 1.f)
		{
			// random mass flip
			//if(allow_negative)
			//		body[rand() % num_bodies].mass *= -1.f;

			calc_forces();

			add_velocity();

			delta -= 1.f;
			++iterations;
		}

		// if there's a limit, count down and break if reached
		if(num_steps > 0 && (--num_steps == 0))
			break;
	}

	clock_t stop = times(NULL);

	fputs(argv[0], stdout);
	int i;
	for(i = 1; i < argc; ++i)
	{
		fputc(' ', stdout);
		fputs(argv[i], stdout);
	}

	long ticks_per_sec = sysconf(_SC_CLK_TCK);
	printf("\n%d iterations.\n", iterations);
	clock_t time = stop - start;
	printf("elapsed: %f seconds.\navg: %f seconds per iteration.\n", ((float)(time))/ticks_per_sec, ((float)(time))/ticks_per_sec/iterations);

	client_exit();
	gui_quit();

	free(body);

	return 0;
}