Beispiel #1
0
 static int GetContext(lua_State *L) {
     cl_event event = *traits::CheckObject(L, 1);
     size_t size = 0;
     cl_context context = NULL;
     cl_int err = clGetEventInfo(event, CL_EVENT_CONTEXT, 0, NULL, &size);
     CheckCLError(L, err, "Failed requesting size of context from event: %s.");
     if (LUACL_UNLIKELY(size != sizeof(cl_context))) {
         return luaL_error(L, "Failed requesting context from event: value size mismatch.");
     }
     
     err = clGetEventInfo(event, CL_EVENT_CONTEXT, size, &context, NULL);
     CheckCLError(L, err, "Failed requesting context from event: %s.");
     luacl_object<cl_context>::Wrap(L, context);
     return 1;
 }
Beispiel #2
0
 static int GetCommandQueue(lua_State *L) {
     cl_event event = *traits::CheckObject(L, 1);
     size_t size = 0;
     cl_command_queue cmdqueue = NULL;
     cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, 0, NULL, &size);
     CheckCLError(L, err, "Failed requesting size of command queue from event: %s.");
     if (LUACL_UNLIKELY(size != sizeof(cl_command_queue))) {
         return luaL_error(L, "Failed requesting command queue from event: value size mismatch.");
     }
     
     err = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, size, &cmdqueue, NULL);
     CheckCLError(L, err, "Failed requesting command queue from event: %s.");
     luacl_object<cl_command_queue>::Wrap(L, cmdqueue);
     return 1;
 }
Beispiel #3
0
WebCLGetInfo WebCLEvent::getInfo(int param_name, ExceptionState& es) {  
    printf("getInfo Called = %d\n", param_name); 
    cl_int err = 0;
    cl_uint uint_units = 0;
    cl_command_type command_type = 0;
    cl_command_queue command_queue = 0;
    RefPtr<WebCLCommandQueue> cqObj = nullptr;
    if (m_cl_Event == NULL) {
        printf("Error: Invalid CL Event\n");
        es.throwWebCLException(
                WebCLException::INVALID_EVENT,
                WebCLException::invalidEventMessage);
        return WebCLGetInfo();
    }

    switch(param_name) {   
        case WebCL::EVENT_COMMAND_EXECUTION_STATUS :
            err = clGetEventInfo(m_cl_Event, CL_EVENT_COMMAND_EXECUTION_STATUS, 
                                 sizeof(cl_uint), &uint_units, NULL);
            if (err == CL_SUCCESS)
                return WebCLGetInfo(static_cast<unsigned int>(uint_units)); 
            break;
        case WebCL::EVENT_COMMAND_TYPE:         
            err = clGetEventInfo(m_cl_Event, CL_EVENT_COMMAND_TYPE, 
                                 sizeof(cl_command_type), &command_type, NULL);
            if (err == CL_SUCCESS)
                return WebCLGetInfo(static_cast<unsigned int>(command_type));
            break;
        case WebCL::EVENT_COMMAND_QUEUE:            
            err = clGetEventInfo(m_cl_Event, CL_EVENT_COMMAND_QUEUE, 
                                 sizeof(cl_command_queue), &command_queue, 
                                 NULL);
            cqObj = WebCLCommandQueue::create(command_queue, m_context, 
                                              NULL);
            if(cqObj == NULL) {
                printf("SUCCESS: Cl Event Command Queue\n");
                return WebCLGetInfo();
            }
            if (err == CL_SUCCESS)
                return WebCLGetInfo(PassRefPtr<WebCLCommandQueue>(cqObj));
            break;
        default:
            printf("Error: Unsupported Event Info type\n");
            return WebCLGetInfo();
    }
    WebCLException::throwException(err, es);
    return WebCLGetInfo();
}
Beispiel #4
0
/* static */
cl_int EventWrapper::eventInfoHelper (Wrapper const* aInstance, int aName,
                                      size_t aSize, void* aValueOut, size_t* aSizeOut) {
    cl_int err = CL_SUCCESS;
    EventWrapper const* instance = dynamic_cast<EventWrapper const*>(aInstance);
    VALIDATE_ARG_POINTER (instance, &err, err);
    return clGetEventInfo (instance->getWrapped (), aName, aSize, aValueOut, aSizeOut);
}
Beispiel #5
0
cl_int
waitForSuccessfulFinish(
    cl_uint numCommandQueues,
    cl_command_queue *commandQueues,
    cl_event *events)
{
    cl_int err = CL_SUCCESS;
    cl_uint i;

    for (i = 0; i < numCommandQueues; i++) {
        cl_int e;
        cl_int status;

        e = clFinish(commandQueues[i]);
        if ((events != NULL) && (events[i] != NULL)) {
            if (e == CL_SUCCESS) {
        status = CL_COMPLETE;
                e = clGetEventInfo(events[i], CL_EVENT_COMMAND_EXECUTION_STATUS,
            sizeof(status), &status, NULL);
                if ((e == CL_SUCCESS) && (status < 0)) {
                    e = -status;
        }
        }
            clReleaseEvent(events[i]);
    }

        if (err == CL_SUCCESS) {
            err = e;
}
    }

    return err;
}
Beispiel #6
0
static void CL_CALLBACK  kernel_profiler_cb (cl_event event,
                                             cl_int event_command_exec_status,
                                             void *user_data)
{
  static cl_ulong tstart, tstop, len;
  cl_int refcnt;
  struct ld_kernel_s *ldKernel = (struct ld_kernel_s *) user_data;
  
  pthread_mutex_lock(&stats_lock);
  clReleaseEvent(event);
  clCheck(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(tstop), &tstop, NULL));
  clCheck(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(tstart), &tstart, NULL));

  clCheck(clGetEventInfo(event,  CL_EVENT_REFERENCE_COUNT, sizeof(refcnt), &refcnt, NULL));
  
  len = tstop - tstart;
  if (tstart > tstop) {
    len = tstart - tstop;
  }

  if (tstart == 0ul || tstop == 0ul) {
    // invalid timestamps
    len = 0;
  }

  ldKernel->exec_span_ns += len;
  pthread_mutex_unlock(&stats_lock);
}
unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel)
{
#ifdef STARPU_SIMGRID
	unsigned ret;
	STARPU_PTHREAD_MUTEX_LOCK(&async_channel->event.mutex);
	ret = async_channel->event.finished;
	STARPU_PTHREAD_MUTEX_UNLOCK(&async_channel->event.mutex);
	return ret;
#else /* !SIMGRID */
	enum starpu_node_kind kind = async_channel->type;
	unsigned success = 0;
#ifdef STARPU_USE_CUDA
	cudaEvent_t event;
#endif

	switch (kind)
	{
#ifdef STARPU_USE_CUDA
	case STARPU_CUDA_RAM:
		event = (*async_channel).event.cuda_event;
		cudaError_t cures = cudaEventQuery(event);

		success = (cures == cudaSuccess);
		if (success)
			cudaEventDestroy(event);
		else if (cures != cudaErrorNotReady)
			STARPU_CUDA_REPORT_ERROR(cures);
		break;
#endif
#ifdef STARPU_USE_OPENCL
	case STARPU_OPENCL_RAM:
	{
		cl_int event_status;
		cl_event opencl_event = (*async_channel).event.opencl_event;
		if (opencl_event == NULL) STARPU_ABORT();
		cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
		if (STARPU_UNLIKELY(err != CL_SUCCESS))
			STARPU_OPENCL_REPORT_ERROR(err);
		if (event_status < 0)
			STARPU_OPENCL_REPORT_ERROR(event_status);
		success = (event_status == CL_COMPLETE);
		break;
	}
#endif
#ifdef STARPU_USE_MIC
	case STARPU_MIC_RAM:
		success = _starpu_mic_request_is_complete(&(async_channel->event.mic_event));
		break;
#endif
	case STARPU_DISK_RAM:
		success = starpu_disk_test_request(async_channel);
		break;
	case STARPU_CPU_RAM:
	default:
		STARPU_ABORT();
	}

	return success;
#endif /* !SIMGRID */
}
/*! \brief Returns the reference count of this Event object.*/
size_t ocl::Event::reference_count() const
{
	if(this->_id == nullptr) throw std::runtime_error("id not valid");
    cl_uint info;
    OPENCL_SAFE_CALL( clGetEventInfo (_id, CL_EVENT_REFERENCE_COUNT, sizeof(info), &info, NULL)) ;
    return size_t(info);
}
Beispiel #9
0
template <typename M> void
MQueueClass<M>::run()
{
    cl_int err;
    bool b = metod.prepareDataToRun();
    ASSERT_EQ(b, true);

    int qmax = metod.qnum;

    metod.initOutEvent();
    cl_int ret = CL_SUCCESS;

    err = metod.run();
    ASSERT_EQ(err, CL_SUCCESS);
    //::std::cerr << "queues = " << base->numCommandQueues() << std::endl;


    for (int q = 0; q < qmax; ++q) {
        err = clFinish(metod.queues[q]);
        ASSERT_EQ(err, CL_SUCCESS) << "clFinish()";

        err = clGetEventInfo(metod.outEvent[q], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &ret, NULL);
        //std::cerr << "2: err=" <<  err <<" ret=" <<  ret << std::endl;
        ASSERT_EQ(err, CL_SUCCESS) << "clGetEventInfo()";
        ASSERT_EQ(ret, CL_COMPLETE) << "clGetEventInfo()";
     }
}
cl_int spinForEventsComplete( cl_uint num_events, cl_event *event_list )
{
    cl_int ret = 0;
#if 0
    ret = clWaitForEvents( num_events, event_list );
#else
    cl_int param_value;
    size_t param_value_size_ret;

    for( cl_uint e = 0; e < num_events; e++ )
    {
        while(1)
        {
            ret |= clGetEventInfo( event_list[ e ], 
                                   CL_EVENT_COMMAND_EXECUTION_STATUS,
                                   sizeof( cl_int ),
                                   &param_value,
                                   &param_value_size_ret );

            if( param_value == CL_COMPLETE )
                break;
        }
    }
#endif

    for( cl_uint e = 0; e < num_events; e++ )
        clReleaseEvent( event_list[e] );

    return ret;
}
Beispiel #11
0
/*! \brief Returns the reference count of this Event object.*/
size_t ocl::Event::reference_count() const
{
    TRUE_ASSERT(_id != 0, "Cannot get reference count for this. Not yet created.");
    cl_uint info;
    OPENCL_SAFE_CALL( clGetEventInfo (_id, CL_EVENT_REFERENCE_COUNT, sizeof(info), &info, NULL)) ;
    return size_t(info);
}
Beispiel #12
0
cl_int WINAPI wine_clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size,
                                  void * param_value, size_t * param_value_size_ret)
{
    cl_int ret;
    TRACE("\n");
    ret = clGetEventInfo(event, param_name, param_value_size, param_value, param_value_size_ret);
    return ret;
}
Beispiel #13
0
int WebCLEvent::getStatus()
{
    cl_int intUnits = 0;
    cl_int err = clGetEventInfo(m_clEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &intUnits, nullptr);
    if (err == CL_SUCCESS)
        return static_cast<int>(intUnits);
    return CL_INVALID_VALUE;
}
/*! \brief Instantiates an Event returned by an command Queue instruction.
  *
  * Do not instantiate user events with this constructor.
  *
  * \param id is an OpenCL event id provided by the creating command Queue instruction.
  * \param ctxt is a valid Context provided which is the same as the command queue Context.
  */
ocl::Event::Event(cl_event id, ocl::Context* ctxt) : _id(id), _ctxt(ctxt)
{
	if(this->_id   == nullptr) throw std::runtime_error("Event not valid");
	if(this->_ctxt == nullptr) throw std::runtime_error("Context not valid");

    cl_context cl_ctxt = 0;
    OPENCL_SAFE_CALL( clGetEventInfo (this->id(), CL_EVENT_CONTEXT , sizeof(cl_ctxt), &cl_ctxt, NULL));
	if(_ctxt->id() != cl_ctxt) throw std::runtime_error("Contexts must be the same");
}
Beispiel #15
0
/*! \brief Instantiates an Event returned by an command Queue instruction.
  *
  * Do not instantiate user events with this constructor.
  *
  * \param id is an OpenCL event id provided by the creating command Queue instruction.
  * \param ctxt is a valid Context provided which is the same as the command queue Context.
  */
ocl::Event::Event(cl_event id, ocl::Context* ctxt) : _id(id), _ctxt(ctxt)
{
    TRUE_ASSERT(id != 0, "Event not valid.");
    TRUE_ASSERT(ctxt != 0, "Context not valid");

    cl_context cl_ctxt = 0;
    OPENCL_SAFE_CALL( clGetEventInfo (this->id(), CL_EVENT_CONTEXT , sizeof(cl_ctxt), &cl_ctxt, NULL));
    TRUE_ASSERT(_ctxt->id() == cl_ctxt, "Context must be the same");
}
int
decipherEvent(cl_event* event) {
    cl_int status = CL_SUCCESS;
    cl_int eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE) {
        status = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL);
    }
    clReleaseEvent(*event);
    return 0;
}
cl_int CLWEvent::GetCommandExecutionStatus() const
{
    cl_int status, execstatus;

    status = clGetEventInfo(*this, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &execstatus, nullptr);

    ThrowIf(status != CL_SUCCESS, status, "clGetEventInfo failed");

    return execstatus;
}
 /**
  * waitForEventAndRelease
  * waits for a event to complete and release the event afterwards
  * @param event cl_event object
  * @return 0 if success else nonzero
  */
 static int waitForEventAndRelease(cl_event *event) {
   cl_int status = CL_SUCCESS;
   cl_int eventStatus = CL_QUEUED;
   while (eventStatus != CL_COMPLETE) {
     status = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS,
                             sizeof(cl_int), &eventStatus, NULL);
     CHECK_BOLT_ERROR(status, "clGetEventEventInfo Failed with Error Code:");
   }
   status = clReleaseEvent(*event);
   CHECK_BOLT_ERROR(status, "clReleaseEvent Failed with Error Code:");
   return SDK_SUCCESS;
 }
Beispiel #19
0
struct _cl_version clGetEventVersion(cl_event event)
{
    struct _cl_version version;
    version.major = 0;
    version.minor = 0;
    cl_command_queue command_queue = NULL;
    cl_int flag = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE,
                                 sizeof(cl_command_queue), &command_queue, NULL);
    if(flag != CL_SUCCESS)
        return version;
    return clGetCommandQueueVersion(command_queue);
}
Beispiel #20
0
 Value eventInfo(cl_event id, cl_event_info info)
 {
     Value value;
     cl_int error = CL_SUCCESS;			
     if(!id || (error = clGetEventInfo(id, info, 
             sizeof(Value), &value, nullptr)) != CL_SUCCESS)
     {
         reportError("eventInfo(): ", error);
         return Value(-1);
     }
     return value;
 }
Beispiel #21
0
ScriptValue WebCLEvent::getInfo(ScriptState* scriptState, unsigned paramName, ExceptionState& es)
{
    v8::Handle<v8::Object> creationContext = scriptState->context()->Global();
    v8::Isolate* isolate = scriptState->isolate();

    if (isReleased()) {
        es.throwWebCLException(WebCLException::INVALID_EVENT, WebCLException::invalidEventMessage);
        return ScriptValue(scriptState, v8::Null(isolate));
    }

    cl_int err = CL_SUCCESS;
    cl_int intUnits = 0;
    cl_command_type commandType = 0;
    switch(paramName) {
    case CL_EVENT_COMMAND_EXECUTION_STATUS:
        err = clGetEventInfo(m_clEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &intUnits, nullptr);
        if (err == CL_SUCCESS)
            return ScriptValue(scriptState, v8::Integer::New(isolate, static_cast<int>(intUnits)));
        break;
    case CL_EVENT_COMMAND_TYPE:
        err = clGetEventInfo(m_clEvent, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, nullptr);
        if (err == CL_SUCCESS)
            return ScriptValue(scriptState, v8::Integer::NewFromUnsigned(isolate, static_cast<unsigned>(commandType)));
        break;
    case CL_EVENT_CONTEXT:
        ASSERT(!isUserEvent());
        return ScriptValue(scriptState, toV8(context(), creationContext, isolate));
    case CL_EVENT_COMMAND_QUEUE:
        ASSERT(m_commandQueue);
        ASSERT(!isUserEvent());
        return ScriptValue(scriptState, toV8(m_commandQueue, creationContext, isolate));
    default:
        es.throwWebCLException(WebCLException::INVALID_VALUE, WebCLException::invalidValueMessage);
        return ScriptValue(scriptState, v8::Null(isolate));
    }
    WebCLException::throwException(err, es);
    return ScriptValue(scriptState, v8::Null(isolate));
}
/* Optionally manually poll for event completion.  This is because some
   versions of both the Nvidia and AMD drivers developed an issue
   where clWaitForEvents/clFinish would cause 100% CPU usage.
*/
cl_int mwCLWaitForEvent(CLInfo* ci, cl_event ev, cl_uint initialWait)
{
    cl_int err;
    cl_int pollingMode = ci->pollingMode;

    if (pollingMode <= MW_POLL_CL_WAIT_FOR_EVENTS)
    {
        return clWaitForEvents(1, &ev);
    }
    else if (pollingMode == MW_POLL_SLEEP_CL_WAIT_FOR_EVENTS)
    {
        err = clFlush(ci->queue);
        if (err != CL_SUCCESS)
            return err;

        mwMilliSleep(initialWait);
        return clWaitForEvents(1, &ev);
    }
    else  /* Manually poll for pollingMode milliseconds */
    {
        cl_int execStatus;

        err = clFlush(ci->queue);  /* Make sure the task is submitted before we wait for it */
        if (err != CL_SUCCESS)
            return err;

        mwMilliSleep(initialWait);

        do
        {
            err = clGetEventInfo(ev,
                                 CL_EVENT_COMMAND_EXECUTION_STATUS,
                                 sizeof(cl_int),
                                 &execStatus,
                                 NULL);
            if (err != CL_SUCCESS)
                return err;

            mwMilliSleep(pollingMode);
        }
        while (execStatus != CL_COMPLETE);

        return CL_SUCCESS;
    }
}
Beispiel #23
0
    void wait_and_check( )
    {
        cl_int wait_status = ::clWaitForEvents( 1, &event );

        if( wait_status != CL_SUCCESS )
        {
            if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
            {
                cl_int err;
                clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS,
                    sizeof( cl_int ), &err, NULL );
                std::cout << "blas function execution status error: " << err << std::endl;
            }
            else
            {
                std::cout << "blas function wait status error: " << wait_status << std::endl;
            }
        }
    }
Beispiel #24
0
void 
xGemm<cl_float>::
xGemm_Function(bool flush, cl_uint apiCallCount )
{
  for (unsigned int i = 0; i < numQueues; i++) {
    events_[i] = NULL;
  }
	for (unsigned int i = 0; i < apiCallCount; i++)
	{
		clblasSgemm(order_, buffer_.trans_a_, buffer_.trans_b_,
			buffer_.m_, buffer_.n_, buffer_.k_, buffer_.alpha_,
			buffer_.buf_a_, buffer_.offA_, buffer_.lda_,
			buffer_.buf_b_, buffer_.offB_, buffer_.ldb_,
			buffer_.beta_, buffer_.buf_c_, buffer_.offC_,
			buffer_.ldc_, numQueuesToUse, queues_, 0, NULL, events_);
	}
	//flush==true if only the kernel time (library call) is timed
	//flush==false if memory time is also timed
	if (flush==true)
	{
    // check if any valid events returned
    cl_uint numValidEvents = 0;
    for (unsigned int i = 0; i < numQueuesToUse; i++) {
      if (events_[i]) {
        cl_uint clReferenceCount;
        cl_int err = clGetEventInfo(events_[i], CL_EVENT_REFERENCE_COUNT, sizeof(clReferenceCount), &clReferenceCount, NULL);
        if ( err == CL_SUCCESS) {
          //printf("events[%u/%u] has %u references\n", i, numQueuesToUse, clReferenceCount );
          numValidEvents++;
        } else {
          //printf("events[%u/%u] invalid; err = %i\n", i, numQueuesToUse, err );
        }
      } else {
        //printf("events[%u/%u] is NULL\n", i, numQueuesToUse );
      }
    }
    
    for (unsigned int i = 0; i < numQueuesToUse; i++) {
      clFlush(queues_[i]);
    }
		clWaitForEvents(numValidEvents, events_);
	}
}
Beispiel #25
0
int acc_event_query (void *event, int *has_occured){
  //declarations
  cl_int param_value;

  // debug info
  if (verbose_print){
    fprintf(stdout, "\n ... EVENT QUERYING ... \n");
    fprintf(stdout, " ---> Entering: acc_event_query.\n");
  }

  // local event pointer
  cl_event *clevent = (cl_event *) event;

  // get event status
  cl_error = clGetEventInfo(
              *clevent,                          // cl_event      event
              CL_EVENT_COMMAND_EXECUTION_STATUS, // cl_event_info param_name
              (size_t) sizeof(cl_int),           // size_t        param_value_size
              &param_value,                      // void          *param_value
              NULL);                             // size_t        *param_value_size_ret
  if (acc_opencl_error_check(cl_error, __LINE__))
    return -1;

  // check event status
  if (param_value == CL_COMPLETE){
    *has_occured = 1;
  } else {
    *has_occured = 0;
  }

  // debug info
  if (verbose_print) fprintf(stdout, "Leaving: acc_event_query.\n");
  if (verbose_print){
    fprintf(stdout, "      Result: %d\n", *has_occured);
    fprintf(stdout, " ---> Entering: acc_event_query.\n");
  }

  // assign return value
  return 0;
}
Beispiel #26
0
bool
CLEvent::get_cl_event_info (
    cl_event_info param_name, size_t param_size,
    void *param, size_t *param_size_ret)
{
    cl_int error_code = CL_SUCCESS;

    XCAM_FAIL_RETURN (
        DEBUG,
        _event_id,
        false,
        "cl event wait failed, there's no event id");

    clGetEventInfo (_event_id, param_name, param_size, param, param_size_ret);

    XCAM_FAIL_RETURN(
        WARNING,
        error_code == CL_SUCCESS,
        false,
        "clGetEventInfo failed on param:%d, errno:%d", param_name, error_code);
    return true;
}
//-----------------------------------------------------------------------------
// Name: ReleaseTexturesFromOpenCL()
// Desc: Release Textures from OpenCL
//-----------------------------------------------------------------------------
void ReleaseTexturesFromOpenCL()
{
	cl_event event;
	cl_mem memToAcquire[6+1+1];
	memToAcquire[0] = g_texture_2d.clTexture;
	memToAcquire[1] = g_texture_vol.clTexture;
	memToAcquire[2] = g_texture_cube.clTexture[0];
	memToAcquire[3] = g_texture_cube.clTexture[1];
	memToAcquire[4] = g_texture_cube.clTexture[2];
	memToAcquire[5] = g_texture_cube.clTexture[3];
	memToAcquire[6] = g_texture_cube.clTexture[4];
	memToAcquire[7] = g_texture_cube.clTexture[5];
    // do the acquire
    ciErrNum = clEnqueueReleaseD3D9ObjectsNV(
        cqCommandQueue,
        6 + 1 + 1, //cube map + tex2d + volume texture
        memToAcquire,
        0,
        NULL,
        &event);
	oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);

    // make sure the event type is correct
    cl_uint eventType = 0;
    ciErrNum = clGetEventInfo(
        event,
        CL_EVENT_COMMAND_TYPE,
        sizeof(eventType),
        &eventType,
        NULL);
	oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
    if(eventType != CL_COMMAND_RELEASE_D3D9_OBJECTS_NV)
	{
		shrLog("event type is not CL_COMMAND_RELEASE_D3D9_OBJECTS_NV !\n");
	}
    ciErrNum = clReleaseEvent(event);
	oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup);
}
int AtomicCounters::runGlobalAtomicKernel() {
  cl_int status = CL_SUCCESS;
  // Set Global and Local work items
  size_t globalWorkItems = length;
  size_t localWorkItems = globalWorkGroupSize;
  // Initialize the counter value
  cl_event writeEvt;
  status =
      clEnqueueWriteBuffer(commandQueue, globalOutBuf, CL_FALSE, 0,
                           sizeof(cl_uint), &initValue, 0, NULL, &writeEvt);
  CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer(globalOutBuf) failed.");
  status = clFlush(commandQueue);
  CHECK_OPENCL_ERROR(status, "clFlush() failed.");
  // Wait for event and release event
  status = waitForEventAndRelease(&writeEvt);
  CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(writeEvt) failed.");
  // Set kernel arguments
  status = clSetKernelArg(globalKernel, 0, sizeof(cl_mem), &inBuf);
  CHECK_OPENCL_ERROR(status, "clSetKernelArg(inBuf) failed.");
  status = clSetKernelArg(globalKernel, 1, sizeof(cl_uint), &value);
  CHECK_OPENCL_ERROR(status, "clSetKernelArg(value) failed.");
  status = clSetKernelArg(globalKernel, 2, sizeof(cl_mem), &globalOutBuf);
  CHECK_OPENCL_ERROR(status, "clSetKernelArg(globalOutBuf) failed.");
  // Run Kernel
  cl_event ndrEvt;
  status = clEnqueueNDRangeKernel(commandQueue, globalKernel, 1, NULL,
                                  &globalWorkItems, &localWorkItems, 0, NULL,
                                  &ndrEvt);
  CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel(globalKernel) failed.");
  status = clFlush(commandQueue);
  CHECK_OPENCL_ERROR(status, "clFlush(commandQueue) failed.");
  cl_int eventStatus = CL_QUEUED;
  while (eventStatus != CL_COMPLETE) {
    status = clGetEventInfo(ndrEvt, CL_EVENT_COMMAND_EXECUTION_STATUS,
                            sizeof(cl_int), &eventStatus, NULL);
    CHECK_OPENCL_ERROR(status, "clGetEventInfo(ndrEvt) failed.");
  }
  cl_ulong startTime;
  cl_ulong endTime;
  // Get profiling information
  status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_START,
                                   sizeof(cl_ulong), &startTime, NULL);
  CHECK_OPENCL_ERROR(
      status, "clGetEventProfilingInfo(CL_PROFILING_COMMAND_START) failed.");
  status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_END,
                                   sizeof(cl_ulong), &endTime, NULL);
  CHECK_OPENCL_ERROR(
      status, "clGetEventProfilingInfo(CL_PROFILING_COMMAND_END) failed.");
  double sec = 1e-9 * (endTime - startTime);
  kTimeAtomGlobal += sec;
  status = clReleaseEvent(ndrEvt);
  CHECK_OPENCL_ERROR(status, "clReleaseEvent(ndrEvt) failed.");
  // Get the occurrences of Value from atomicKernel
  cl_event readEvt;
  status = clEnqueueReadBuffer(commandQueue, globalOutBuf, CL_FALSE, 0,
                               sizeof(cl_uint), &globalOut, 0, NULL, &readEvt);
  CHECK_OPENCL_ERROR(status, "clEnqueueReadBuffer(globalOutBuf) failed.");
  status = clFlush(commandQueue);
  CHECK_OPENCL_ERROR(status, "clFlush() failed.");
  // Wait for event and release event
  status = waitForEventAndRelease(&readEvt);
  CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(readEvt) failed.");
  return SDK_SUCCESS;
}
int 
MatrixMulImage::runCLKernels(void)
{
    cl_int   status;

    /* 
     * Kernel runs over complete output matrix with blocks of blockSize x blockSize 
     * running concurrently
     */
    size_t globalThreads[2]= {width1 / 4, height0 / 8};
    size_t localThreads[2] = {blockSize, blockSize};

    status = kernelInfo.setKernelWorkGroupInfo(kernel, devices[deviceId]);
    CHECK_OPENCL_ERROR(status, "kernelInfo.setKernelWorkGroupInfo failed");
    
    availableLocalMemory = deviceInfo.localMemSize - kernelInfo.localMemoryUsed;
    neededLocalMemory = 2 * blockSize * blockSize * sizeof(cl_float); 
    if(neededLocalMemory > availableLocalMemory)
    {
        std::cout << "Unsupported: Insufficient local memory on device." << std::endl;
        return SDK_SUCCESS;
    }

    if((cl_uint)(localThreads[0]*localThreads[1]) > kernelInfo.kernelWorkGroupSize)
    {
       if(kernelInfo.kernelWorkGroupSize >= 64)
        {
            blockSize = 8; 
            localThreads[0] = blockSize;
            localThreads[1] = blockSize;
        }
        else if(kernelInfo.kernelWorkGroupSize >= 32)
        {
            blockSize = 4; 
            localThreads[0] = blockSize;
            localThreads[1] = blockSize;
        }
        else
        {
            std::cout << "Out of Resources!" << std::endl;
            std::cout << "Group Size specified : " << localThreads[0] * localThreads[1] << std::endl;
            std::cout << "Max Group Size supported on the kernel : " 
                      << kernelInfo.kernelWorkGroupSize<<std::endl;
            return SDK_FAILURE;
        }
    }

    if(localThreads[0] > deviceInfo.maxWorkItemSizes[0] ||
       localThreads[1] > deviceInfo.maxWorkItemSizes[1] ||
       localThreads[0]*localThreads[1] > deviceInfo.maxWorkGroupSize)
    {
        std::cout << "Unsupported: Device does not support requested number of work items." << std::endl;
        return SDK_FAILURE;
    }

    //For small matrix sizes
    while(globalThreads[0] % localThreads[0])
        localThreads[0] /= 2;

    while(globalThreads[1] % localThreads[1])
        localThreads[1] /= 2;

    // Set appropriate arguments to the kernel
    
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer0);
    CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (outputBuffer)");
   
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&inputBuffer1);
    CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inputBuffer0)");
    
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&outputBuffer);
    CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inputBuffer1)");
   
    status = clSetKernelArg(kernel, 3, sizeof(cl_int),(void*)&width0);
    CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (width0)");
    
    status = clSetKernelArg(kernel, 4, sizeof(cl_int), &width1);
    CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (width1)");
   
    // Enqueue a kernel run call
    cl_event ndrEvt;
    status = clEnqueueNDRangeKernel(
                 commandQueue,
                 kernel,
                 2,
                 NULL,
                 globalThreads,
                 localThreads,
                 0,
                 NULL,
                 &ndrEvt);
    CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed.");
   
    status = clFlush(commandQueue);
    CHECK_OPENCL_ERROR(status, "clFlush failed.");
   
    cl_int eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = clGetEventInfo(
                        ndrEvt, 
                        CL_EVENT_COMMAND_EXECUTION_STATUS, 
                        sizeof(cl_int),
                        &eventStatus,
                        NULL);
        CHECK_OPENCL_ERROR(status, "clGetEventInfo failed.");
        
    }

    // Calculate performance
    cl_ulong startTime;
    cl_ulong endTime;
    
    // Get kernel profiling info
    status = clGetEventProfilingInfo(ndrEvt,
                                     CL_PROFILING_COMMAND_START,
                                     sizeof(cl_ulong),
                                     &startTime,
                                     0);
    CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(startTime)");
   

    status = clGetEventProfilingInfo(ndrEvt,
                                     CL_PROFILING_COMMAND_END,
                                     sizeof(cl_ulong),
                                     &endTime,
                                     0);
    CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(endTime)");

    status = clReleaseEvent(ndrEvt);
    CHECK_OPENCL_ERROR(status, "clReleaseEvent failed.(ndrEvt)");
    
    // Print performance numbers
    double sec = 1e-9 * (endTime - startTime);
    std::cout << "KernelTime (ms) : " << sec * 1000 << std::endl;

    double flops = 2 * width0 * width1;
    double perf = (flops / sec) * height0 * 1e-9;
    
    std::cout << "GFlops achieved : " << perf << std::endl << std::endl;

    size_t origin[] = {0, 0, 0};
    size_t region[] = {width1 / 4, height0, 1};
    cl_event readEvt;
    status = clEnqueueReadImage(commandQueue,
                                outputBuffer,
                                CL_FALSE,
                                origin,
                                region,
                                0,
                                0,
                                output,
                                0,
                                NULL,
                                &readEvt);
    CHECK_OPENCL_ERROR(status, "outputBuffer failed.(clEnqueueReadImage)");
    
    status = clFlush(commandQueue);
    CHECK_OPENCL_ERROR(status, "clFlush failed.(commandQueue)");
    
    status = sampleCommon->waitForEventAndRelease(&readEvt);
    CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(readEvt) Failed");

    return SDK_SUCCESS;
}
Beispiel #30
0
/*! \brief Returns true if the executing command is submitted.*/
bool ocl::Event::isSubmitted () const
{
	cl_int info;
	OPENCL_SAFE_CALL(  clGetEventInfo (this->id(),CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(info), &info, NULL));
	return info == CL_SUBMITTED;
}