Exemple #1
0
int main(int argc, char **argv)
{
	srand((unsigned)time(NULL));

	Kernel kernel;
	CommandQueue queue;
	Context context;

	{
		std::vector<Platform> platformList;
		Platform::get(&platformList);

		clog << "Platform number is: " << platformList.size() << endl;

		std::string platformVendor;
		platformList[0].getInfo((cl_platform_info)CL_PLATFORM_VENDOR, &platformVendor);
		clog << "Platform is by: " << platformVendor << "\n";

		cl_context_properties cprops[] = {
			CL_CONTEXT_PLATFORM, (cl_context_properties) platformList[0](),
			0
		};
		context = Context(GET_TARGET_PLATFORM, cprops);

		std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
		queue = CommandQueue(context, devices[0]);

		std::string sourceCode = "#include \"es.cl\"\n";
		Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1));
		Program program = Program(context, source);

		try
		{
			program.build(devices, "-I.");
		}
		catch (Error &)
		{
			std::string errors;
			program.getBuildInfo(devices[0], CL_PROGRAM_BUILD_LOG, &errors);
			std::cerr << "Build log: " << endl << errors << endl;
			return 1;
		}

		kernel = Kernel(program, "es");
	}

	individual *individuals = new individual[LAMBDA];
	for (int i = 0; i < LAMBDA; i++)
	{
		for (int j = 0; j < DIM; ++j)
		{
			individuals[i].x[j] = (rand()/((float)RAND_MAX)) * (XMAX-XMIN) + XMIN;
			individuals[i].s[j] = (XMAX-XMIN) / 6.f;
		}
		for (int j = 0; j < DIM_A; ++j)
		{
			individuals[i].a[j] = (rand()/((float)RAND_MAX)) * (2*PI) - PI;
		}
		
		individuals[i].fitness = 0;
	}

	float gbest = std::numeric_limits<float>::infinity(), xbest[DIM];
	
	Buffer esBuffer = Buffer(context, 0, INDIVIDUALS_SIZE);
	Event ev;
	queue.enqueueMapBuffer(esBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, INDIVIDUALS_SIZE);
	
	for (int i = 0; i < 1000; i++)
	{
		queue.enqueueWriteBuffer(esBuffer, CL_TRUE, 0, INDIVIDUALS_SIZE, individuals);
		kernel.setArg(1, (cl_ulong)rand());
		kernel.setArg(0, esBuffer);
		queue.enqueueNDRangeKernel(kernel, NullRange, NDRange(LAMBDA), NDRange(1), NULL, &ev);
		ev.wait();
		queue.enqueueReadBuffer(esBuffer, CL_TRUE, 0, INDIVIDUALS_SIZE, individuals);
		
		std::sort(individuals, individuals + LAMBDA, individual_comp);
		individual mean = get_mean(individuals);
		for (int j = 0; j < LAMBDA; ++j)
		{
			individuals[j] = mean;
		}
	}
	gbest = individuals[0].fitness;
	for (int i = 0; i < DIM; ++i) xbest[i] = individuals[0].x[i];
	clog << "Best value " << gbest << " found at (";
	for (int i = 0; i < DIM; ++i) clog << xbest[i] << (i == DIM-1 ? ")" : ", ");
	clog << "\n";
	clog << "Our computation estemates it: f(" << xbest[0] << ", ..., " << xbest[DIM-1] << ") = " << es_f(xbest) << endl;

	delete[] individuals;

	return 0;
}
Exemple #2
0
/******************************************************************************
* main
******************************************************************************/
int main(int argc, char *argv[])
{
   /*-------------------------------------------------------------------------
   * Catch ctrl-c so we ensure that we call dtors and the dsp is reset properly
   *------------------------------------------------------------------------*/
   signal(SIGABRT, exit);
   signal(SIGTERM, exit);

   struct timespec tp_start, tp_end;

   try 
   {
     /*------------------------------------------------------------------------
     * One time OpenCL Setup
     *-----------------------------------------------------------------------*/
     Context             context(CL_DEVICE_TYPE_ALL); 
     std::vector<Device> devices(context.getInfo<CL_CONTEXT_DEVICES>());
 
     CommandQueue        *QcpuIO = NULL;
     CommandQueue        *QcpuOO = NULL;
     CommandQueue        *QdspOO = NULL;

     std::vector<Device> dspDevices;
     for (int d = 0; d < devices.size(); d++)
     {
	cl_device_type type;
	devices[d].getInfo(CL_DEVICE_TYPE, &type);

	if (type == CL_DEVICE_TYPE_CPU)
	{
	   QcpuIO = new CommandQueue(context, devices[d], PROFILE);
	   QcpuOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
	}
	else if (type == CL_DEVICE_TYPE_ACCELERATOR)
        {
	   QdspOO  = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
           dspDevices.push_back(devices[d]);
        }
     }

     if (QcpuIO == NULL)
     {
	std::cout << 
	"CPU devices are not fully supported in the current" << std::endl <<
	"OpenCL implementation (native kernel support only)." << std::endl << 
	"As a result, CPU devices are not enabled by " << std::endl <<
	"default.  This example uses OpenCL CPU native" << std::endl <<
	"kernels and can be run with the CPU device enabled." << std::endl << 
        "To enable a CPU device define the environment variable" << std::endl <<
        "'TI_OCL_CPU_DEVICE_ENABLE' before running the example." << std::endl;
	 exit(-1);
     }

     assert(QdspOO != NULL);

     Program::Sources    source (1, std::make_pair(kernStr, strlen(kernStr)));
     Program             program(Program(context, source));

     program.build(dspDevices);
     Kernel K(program, "compute");
     K.setArg(1, elements);

     /*------------------------------------------------------------------------
     * Define a Buffer for each possible in flight task
     *-----------------------------------------------------------------------*/
     std::vector<BufUP> bufs;
     for (int i = 0; i < inflight; ++i) 
         bufs.push_back(BufUP(new Buffer(context, CL_MEM_READ_WRITE, size)));

     /*------------------------------------------------------------------------
     * Define a 3-D vector of OpenCL Events.  1st dim is for the number of 
     * in flight tasks, the second dim is for the processing stages of a single
     * task.  The 3rd dim is an artifact of the c++ binding for event wait 
     * lists.  All enqueue API's take a wait list which is a vector<Event>*, and
     * they take an Event*.  All events in the wait list vector must complete,
     * before this event will execute.  The single event argument is for the 
     * event that will be set as a result of this enqueue. 
     *-----------------------------------------------------------------------*/
     vecVecVecEv evt(inflight, vecVecEv(STAGES, vecEv(1)));

     clock_gettime(CLOCK_MONOTONIC, &tp_start);

     /*------------------------------------------------------------------------
     * Iterate for as many tasks as there are
     *-----------------------------------------------------------------------*/
     for (int i = 0; i < tasks; ++i)
     {
        /*---------------------------------------------------------------------
        * Variables to ensure that this iteration is using the correct circular
        * resources: i.e. buffers and arrays.
        *--------------------------------------------------------------------*/
        int     circIdx = i % inflight;
        Buffer &buf(*bufs[circIdx]);
        int    *ary(arys [circIdx]);
        Event   nullEv;

        K.setArg(0, buf);

        /*---------------------------------------------------------------------
        * Since we are reusing N sets of buffers in this loop, we need to make
        * sure than iteration I does not start until after iteration I-N 
        * completes. Iterations < N can start immediately.
        *--------------------------------------------------------------------*/
        int    eIdx = circIdx;
        vecEv *start_waits = (i < inflight) ? 0 : &evt[eIdx][RUM];

        evt[circIdx][WMP][0] = nullEv;
        evt[circIdx][PRD][0] = nullEv;
        evt[circIdx][WUM][0] = nullEv;
        evt[circIdx][CMP][0] = nullEv;
        evt[circIdx][RMP][0] = nullEv;
        evt[circIdx][CNS][0] = nullEv;

        int *p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_WRITE, 
                                  0, size, start_waits,  &evt[eIdx][WMP][0]);

        evt[circIdx][RUM][0] = nullEv;

        /*---------------------------------------------------------------------
        * Native kernels are only passed a single pointer, so define a structure
        * that contains the actual arguments, populate that and then create 
        * a C++ binding native argument class that has the pointer and a size.
        *--------------------------------------------------------------------*/
        arguments_t proArgs = { p, elements, i,   i };
        native_arg_t proNargs(&proArgs, sizeof(proArgs));

        QcpuOO->enqueueNativeKernel(cpu_produce, proNargs, 0, 0,          
                &evt[eIdx][WMP], &evt[eIdx][PRD][0]);

        QdspOO->enqueueUnmapMemObject(buf, p,                               
                &evt[eIdx][PRD], &evt[eIdx][WUM][0]);

        QdspOO->enqueueTask(K,                                    
                &evt[eIdx][WUM], &evt[eIdx][CMP][0]);

        p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_READ, 0, size,
                &evt[eIdx][CMP], &evt[eIdx][RMP][0]);

        arguments_t conArgs = { p, elements, i+1, i };
        native_arg_t conNargs(&conArgs, sizeof(conArgs));

        QcpuIO->enqueueNativeKernel (cpu_consume, conNargs, 0, 0,          
                &evt[eIdx][RMP], &evt[eIdx][CNS][0]);

        QdspOO->enqueueUnmapMemObject (buf, p,                               
                &evt[eIdx][CNS], &evt[eIdx][RUM][0]);
     }

     /*------------------------------------------------------------------------
     * Only need to wait for the CPU In Order queue to finish, since all all
     * other enqueue events must finish before the CPU IO queue can finish
     *-----------------------------------------------------------------------*/
     // QcpuIO.finish();
     QdspOO->finish();

     delete QcpuIO;
     delete QcpuOO;
     delete QdspOO;

     clock_gettime(CLOCK_MONOTONIC, &tp_end);
     double elapsed = clock_diff (&tp_start, &tp_end);
     printf("Elapsed : %8.4f secs\n", elapsed);

     /*------------------------------------------------------------------------
     * After the running is complete, report timing for each step
     *-----------------------------------------------------------------------*/
#if PROFILE 
     cl_ulong ref;
     evt[0][0][0].getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &ref);

     for (int i = 0; i < tasks; ++i)
     {
          for (int s = 0; s < STAGES; ++s)
              ocl_relative_times(evt[i][s][0], stage_names[s], ref);
          cout << endl;
     }
#endif
   }

   catch (Error err)
   {
       cerr << "ERROR: " << err.what() << "("
            << ocl_decode_error(err.err()) << ")"
            << endl;
       incorrect_results = true;
   }

   if (incorrect_results) return -1;
}