예제 #1
0
파일: ooo.cpp 프로젝트: rcn-ee/ti-opencl
/******************************************************************************
* main
******************************************************************************/
int main(int argc, char *argv[])
{
   /*-------------------------------------------------------------------------
   * Catch ctrl-c so we ensure that we call dtors and the dsp is reset properly
   *------------------------------------------------------------------------*/
   signal(SIGABRT, exit);
   signal(SIGTERM, exit);

   struct timespec tp_start, tp_end;

   try 
   {
     /*------------------------------------------------------------------------
     * One time OpenCL Setup
     *-----------------------------------------------------------------------*/
     Context             context(CL_DEVICE_TYPE_ALL); 
     std::vector<Device> devices(context.getInfo<CL_CONTEXT_DEVICES>());
 
     CommandQueue        *QcpuIO = NULL;
     CommandQueue        *QcpuOO = NULL;
     CommandQueue        *QdspOO = NULL;

     std::vector<Device> dspDevices;
     for (int d = 0; d < devices.size(); d++)
     {
	cl_device_type type;
	devices[d].getInfo(CL_DEVICE_TYPE, &type);

	if (type == CL_DEVICE_TYPE_CPU)
	{
	   QcpuIO = new CommandQueue(context, devices[d], PROFILE);
	   QcpuOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
	}
	else if (type == CL_DEVICE_TYPE_ACCELERATOR)
        {
	   QdspOO  = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
           dspDevices.push_back(devices[d]);
        }
     }

     if (QcpuIO == NULL)
     {
	std::cout << 
	"CPU devices are not fully supported in the current" << std::endl <<
	"OpenCL implementation (native kernel support only)." << std::endl << 
	"As a result, CPU devices are not enabled by " << std::endl <<
	"default.  This example uses OpenCL CPU native" << std::endl <<
	"kernels and can be run with the CPU device enabled." << std::endl << 
        "To enable a CPU device define the environment variable" << std::endl <<
        "'TI_OCL_CPU_DEVICE_ENABLE' before running the example." << std::endl;
	 exit(-1);
     }

     assert(QdspOO != NULL);

     Program::Sources    source (1, std::make_pair(kernStr, strlen(kernStr)));
     Program             program(Program(context, source));

     program.build(dspDevices);
     Kernel K(program, "compute");
     K.setArg(1, elements);

     /*------------------------------------------------------------------------
     * Define a Buffer for each possible in flight task
     *-----------------------------------------------------------------------*/
     std::vector<BufUP> bufs;
     for (int i = 0; i < inflight; ++i) 
         bufs.push_back(BufUP(new Buffer(context, CL_MEM_READ_WRITE, size)));

     /*------------------------------------------------------------------------
     * Define a 3-D vector of OpenCL Events.  1st dim is for the number of 
     * in flight tasks, the second dim is for the processing stages of a single
     * task.  The 3rd dim is an artifact of the c++ binding for event wait 
     * lists.  All enqueue API's take a wait list which is a vector<Event>*, and
     * they take an Event*.  All events in the wait list vector must complete,
     * before this event will execute.  The single event argument is for the 
     * event that will be set as a result of this enqueue. 
     *-----------------------------------------------------------------------*/
     vecVecVecEv evt(inflight, vecVecEv(STAGES, vecEv(1)));

     clock_gettime(CLOCK_MONOTONIC, &tp_start);

     /*------------------------------------------------------------------------
     * Iterate for as many tasks as there are
     *-----------------------------------------------------------------------*/
     for (int i = 0; i < tasks; ++i)
     {
        /*---------------------------------------------------------------------
        * Variables to ensure that this iteration is using the correct circular
        * resources: i.e. buffers and arrays.
        *--------------------------------------------------------------------*/
        int     circIdx = i % inflight;
        Buffer &buf(*bufs[circIdx]);
        int    *ary(arys [circIdx]);
        Event   nullEv;

        K.setArg(0, buf);

        /*---------------------------------------------------------------------
        * Since we are reusing N sets of buffers in this loop, we need to make
        * sure than iteration I does not start until after iteration I-N 
        * completes. Iterations < N can start immediately.
        *--------------------------------------------------------------------*/
        int    eIdx = circIdx;
        vecEv *start_waits = (i < inflight) ? 0 : &evt[eIdx][RUM];

        evt[circIdx][WMP][0] = nullEv;
        evt[circIdx][PRD][0] = nullEv;
        evt[circIdx][WUM][0] = nullEv;
        evt[circIdx][CMP][0] = nullEv;
        evt[circIdx][RMP][0] = nullEv;
        evt[circIdx][CNS][0] = nullEv;

        int *p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_WRITE, 
                                  0, size, start_waits,  &evt[eIdx][WMP][0]);

        evt[circIdx][RUM][0] = nullEv;

        /*---------------------------------------------------------------------
        * Native kernels are only passed a single pointer, so define a structure
        * that contains the actual arguments, populate that and then create 
        * a C++ binding native argument class that has the pointer and a size.
        *--------------------------------------------------------------------*/
        arguments_t proArgs = { p, elements, i,   i };
        native_arg_t proNargs(&proArgs, sizeof(proArgs));

        QcpuOO->enqueueNativeKernel(cpu_produce, proNargs, 0, 0,          
                &evt[eIdx][WMP], &evt[eIdx][PRD][0]);

        QdspOO->enqueueUnmapMemObject(buf, p,                               
                &evt[eIdx][PRD], &evt[eIdx][WUM][0]);

        QdspOO->enqueueTask(K,                                    
                &evt[eIdx][WUM], &evt[eIdx][CMP][0]);

        p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_READ, 0, size,
                &evt[eIdx][CMP], &evt[eIdx][RMP][0]);

        arguments_t conArgs = { p, elements, i+1, i };
        native_arg_t conNargs(&conArgs, sizeof(conArgs));

        QcpuIO->enqueueNativeKernel (cpu_consume, conNargs, 0, 0,          
                &evt[eIdx][RMP], &evt[eIdx][CNS][0]);

        QdspOO->enqueueUnmapMemObject (buf, p,                               
                &evt[eIdx][CNS], &evt[eIdx][RUM][0]);
     }

     /*------------------------------------------------------------------------
     * Only need to wait for the CPU In Order queue to finish, since all all
     * other enqueue events must finish before the CPU IO queue can finish
     *-----------------------------------------------------------------------*/
     // QcpuIO.finish();
     QdspOO->finish();

     delete QcpuIO;
     delete QcpuOO;
     delete QdspOO;

     clock_gettime(CLOCK_MONOTONIC, &tp_end);
     double elapsed = clock_diff (&tp_start, &tp_end);
     printf("Elapsed : %8.4f secs\n", elapsed);

     /*------------------------------------------------------------------------
     * After the running is complete, report timing for each step
     *-----------------------------------------------------------------------*/
#if PROFILE 
     cl_ulong ref;
     evt[0][0][0].getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &ref);

     for (int i = 0; i < tasks; ++i)
     {
          for (int s = 0; s < STAGES; ++s)
              ocl_relative_times(evt[i][s][0], stage_names[s], ref);
          cout << endl;
     }
#endif
   }

   catch (Error err)
   {
       cerr << "ERROR: " << err.what() << "("
            << ocl_decode_error(err.err()) << ")"
            << endl;
       incorrect_results = true;
   }

   if (incorrect_results) return -1;
}
예제 #2
-1
/**
* Prepare and execute the OpenCL Kernel
*/
void executeCL(void) {
	Event wait;

	try {
		// Stage
		stageExecuteCL();

		// Execute
		cl_int err = m_queue.enqueueNDRangeKernel(m_kernel, NullRange, m_global, m_local, NULL, &wait);
		clPrintErr(err, "Execute Error -> ", stdout);
		wait.wait();
		m_queue.finish();

		// Collate
		collateExecuteCL();

	} catch(Error error) {
		std::cout << std::endl << error.what() << "(" << error.err() << ")" << std::endl;
		fail("OpenCL Error");
	}
}