static int GetContext(lua_State *L) { cl_event event = *traits::CheckObject(L, 1); size_t size = 0; cl_context context = NULL; cl_int err = clGetEventInfo(event, CL_EVENT_CONTEXT, 0, NULL, &size); CheckCLError(L, err, "Failed requesting size of context from event: %s."); if (LUACL_UNLIKELY(size != sizeof(cl_context))) { return luaL_error(L, "Failed requesting context from event: value size mismatch."); } err = clGetEventInfo(event, CL_EVENT_CONTEXT, size, &context, NULL); CheckCLError(L, err, "Failed requesting context from event: %s."); luacl_object<cl_context>::Wrap(L, context); return 1; }
static int GetCommandQueue(lua_State *L) { cl_event event = *traits::CheckObject(L, 1); size_t size = 0; cl_command_queue cmdqueue = NULL; cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, 0, NULL, &size); CheckCLError(L, err, "Failed requesting size of command queue from event: %s."); if (LUACL_UNLIKELY(size != sizeof(cl_command_queue))) { return luaL_error(L, "Failed requesting command queue from event: value size mismatch."); } err = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, size, &cmdqueue, NULL); CheckCLError(L, err, "Failed requesting command queue from event: %s."); luacl_object<cl_command_queue>::Wrap(L, cmdqueue); return 1; }
WebCLGetInfo WebCLEvent::getInfo(int param_name, ExceptionState& es) { printf("getInfo Called = %d\n", param_name); cl_int err = 0; cl_uint uint_units = 0; cl_command_type command_type = 0; cl_command_queue command_queue = 0; RefPtr<WebCLCommandQueue> cqObj = nullptr; if (m_cl_Event == NULL) { printf("Error: Invalid CL Event\n"); es.throwWebCLException( WebCLException::INVALID_EVENT, WebCLException::invalidEventMessage); return WebCLGetInfo(); } switch(param_name) { case WebCL::EVENT_COMMAND_EXECUTION_STATUS : err = clGetEventInfo(m_cl_Event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_uint), &uint_units, NULL); if (err == CL_SUCCESS) return WebCLGetInfo(static_cast<unsigned int>(uint_units)); break; case WebCL::EVENT_COMMAND_TYPE: err = clGetEventInfo(m_cl_Event, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &command_type, NULL); if (err == CL_SUCCESS) return WebCLGetInfo(static_cast<unsigned int>(command_type)); break; case WebCL::EVENT_COMMAND_QUEUE: err = clGetEventInfo(m_cl_Event, CL_EVENT_COMMAND_QUEUE, sizeof(cl_command_queue), &command_queue, NULL); cqObj = WebCLCommandQueue::create(command_queue, m_context, NULL); if(cqObj == NULL) { printf("SUCCESS: Cl Event Command Queue\n"); return WebCLGetInfo(); } if (err == CL_SUCCESS) return WebCLGetInfo(PassRefPtr<WebCLCommandQueue>(cqObj)); break; default: printf("Error: Unsupported Event Info type\n"); return WebCLGetInfo(); } WebCLException::throwException(err, es); return WebCLGetInfo(); }
/* static */ cl_int EventWrapper::eventInfoHelper (Wrapper const* aInstance, int aName, size_t aSize, void* aValueOut, size_t* aSizeOut) { cl_int err = CL_SUCCESS; EventWrapper const* instance = dynamic_cast<EventWrapper const*>(aInstance); VALIDATE_ARG_POINTER (instance, &err, err); return clGetEventInfo (instance->getWrapped (), aName, aSize, aValueOut, aSizeOut); }
cl_int waitForSuccessfulFinish( cl_uint numCommandQueues, cl_command_queue *commandQueues, cl_event *events) { cl_int err = CL_SUCCESS; cl_uint i; for (i = 0; i < numCommandQueues; i++) { cl_int e; cl_int status; e = clFinish(commandQueues[i]); if ((events != NULL) && (events[i] != NULL)) { if (e == CL_SUCCESS) { status = CL_COMPLETE; e = clGetEventInfo(events[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL); if ((e == CL_SUCCESS) && (status < 0)) { e = -status; } } clReleaseEvent(events[i]); } if (err == CL_SUCCESS) { err = e; } } return err; }
static void CL_CALLBACK kernel_profiler_cb (cl_event event, cl_int event_command_exec_status, void *user_data) { static cl_ulong tstart, tstop, len; cl_int refcnt; struct ld_kernel_s *ldKernel = (struct ld_kernel_s *) user_data; pthread_mutex_lock(&stats_lock); clReleaseEvent(event); clCheck(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(tstop), &tstop, NULL)); clCheck(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(tstart), &tstart, NULL)); clCheck(clGetEventInfo(event, CL_EVENT_REFERENCE_COUNT, sizeof(refcnt), &refcnt, NULL)); len = tstop - tstart; if (tstart > tstop) { len = tstart - tstop; } if (tstart == 0ul || tstop == 0ul) { // invalid timestamps len = 0; } ldKernel->exec_span_ns += len; pthread_mutex_unlock(&stats_lock); }
unsigned _starpu_driver_test_request_completion(struct _starpu_async_channel *async_channel) { #ifdef STARPU_SIMGRID unsigned ret; STARPU_PTHREAD_MUTEX_LOCK(&async_channel->event.mutex); ret = async_channel->event.finished; STARPU_PTHREAD_MUTEX_UNLOCK(&async_channel->event.mutex); return ret; #else /* !SIMGRID */ enum starpu_node_kind kind = async_channel->type; unsigned success = 0; #ifdef STARPU_USE_CUDA cudaEvent_t event; #endif switch (kind) { #ifdef STARPU_USE_CUDA case STARPU_CUDA_RAM: event = (*async_channel).event.cuda_event; cudaError_t cures = cudaEventQuery(event); success = (cures == cudaSuccess); if (success) cudaEventDestroy(event); else if (cures != cudaErrorNotReady) STARPU_CUDA_REPORT_ERROR(cures); break; #endif #ifdef STARPU_USE_OPENCL case STARPU_OPENCL_RAM: { cl_int event_status; cl_event opencl_event = (*async_channel).event.opencl_event; if (opencl_event == NULL) STARPU_ABORT(); cl_int err = clGetEventInfo(opencl_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL); if (STARPU_UNLIKELY(err != CL_SUCCESS)) STARPU_OPENCL_REPORT_ERROR(err); if (event_status < 0) STARPU_OPENCL_REPORT_ERROR(event_status); success = (event_status == CL_COMPLETE); break; } #endif #ifdef STARPU_USE_MIC case STARPU_MIC_RAM: success = _starpu_mic_request_is_complete(&(async_channel->event.mic_event)); break; #endif case STARPU_DISK_RAM: success = starpu_disk_test_request(async_channel); break; case STARPU_CPU_RAM: default: STARPU_ABORT(); } return success; #endif /* !SIMGRID */ }
/*! \brief Returns the reference count of this Event object.*/ size_t ocl::Event::reference_count() const { if(this->_id == nullptr) throw std::runtime_error("id not valid"); cl_uint info; OPENCL_SAFE_CALL( clGetEventInfo (_id, CL_EVENT_REFERENCE_COUNT, sizeof(info), &info, NULL)) ; return size_t(info); }
template <typename M> void MQueueClass<M>::run() { cl_int err; bool b = metod.prepareDataToRun(); ASSERT_EQ(b, true); int qmax = metod.qnum; metod.initOutEvent(); cl_int ret = CL_SUCCESS; err = metod.run(); ASSERT_EQ(err, CL_SUCCESS); //::std::cerr << "queues = " << base->numCommandQueues() << std::endl; for (int q = 0; q < qmax; ++q) { err = clFinish(metod.queues[q]); ASSERT_EQ(err, CL_SUCCESS) << "clFinish()"; err = clGetEventInfo(metod.outEvent[q], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &ret, NULL); //std::cerr << "2: err=" << err <<" ret=" << ret << std::endl; ASSERT_EQ(err, CL_SUCCESS) << "clGetEventInfo()"; ASSERT_EQ(ret, CL_COMPLETE) << "clGetEventInfo()"; } }
cl_int spinForEventsComplete( cl_uint num_events, cl_event *event_list ) { cl_int ret = 0; #if 0 ret = clWaitForEvents( num_events, event_list ); #else cl_int param_value; size_t param_value_size_ret; for( cl_uint e = 0; e < num_events; e++ ) { while(1) { ret |= clGetEventInfo( event_list[ e ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( cl_int ), ¶m_value, ¶m_value_size_ret ); if( param_value == CL_COMPLETE ) break; } } #endif for( cl_uint e = 0; e < num_events; e++ ) clReleaseEvent( event_list[e] ); return ret; }
/*! \brief Returns the reference count of this Event object.*/ size_t ocl::Event::reference_count() const { TRUE_ASSERT(_id != 0, "Cannot get reference count for this. Not yet created."); cl_uint info; OPENCL_SAFE_CALL( clGetEventInfo (_id, CL_EVENT_REFERENCE_COUNT, sizeof(info), &info, NULL)) ; return size_t(info); }
cl_int WINAPI wine_clGetEventInfo(cl_event event, cl_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret) { cl_int ret; TRACE("\n"); ret = clGetEventInfo(event, param_name, param_value_size, param_value, param_value_size_ret); return ret; }
int WebCLEvent::getStatus() { cl_int intUnits = 0; cl_int err = clGetEventInfo(m_clEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &intUnits, nullptr); if (err == CL_SUCCESS) return static_cast<int>(intUnits); return CL_INVALID_VALUE; }
/*! \brief Instantiates an Event returned by an command Queue instruction. * * Do not instantiate user events with this constructor. * * \param id is an OpenCL event id provided by the creating command Queue instruction. * \param ctxt is a valid Context provided which is the same as the command queue Context. */ ocl::Event::Event(cl_event id, ocl::Context* ctxt) : _id(id), _ctxt(ctxt) { if(this->_id == nullptr) throw std::runtime_error("Event not valid"); if(this->_ctxt == nullptr) throw std::runtime_error("Context not valid"); cl_context cl_ctxt = 0; OPENCL_SAFE_CALL( clGetEventInfo (this->id(), CL_EVENT_CONTEXT , sizeof(cl_ctxt), &cl_ctxt, NULL)); if(_ctxt->id() != cl_ctxt) throw std::runtime_error("Contexts must be the same"); }
/*! \brief Instantiates an Event returned by an command Queue instruction. * * Do not instantiate user events with this constructor. * * \param id is an OpenCL event id provided by the creating command Queue instruction. * \param ctxt is a valid Context provided which is the same as the command queue Context. */ ocl::Event::Event(cl_event id, ocl::Context* ctxt) : _id(id), _ctxt(ctxt) { TRUE_ASSERT(id != 0, "Event not valid."); TRUE_ASSERT(ctxt != 0, "Context not valid"); cl_context cl_ctxt = 0; OPENCL_SAFE_CALL( clGetEventInfo (this->id(), CL_EVENT_CONTEXT , sizeof(cl_ctxt), &cl_ctxt, NULL)); TRUE_ASSERT(_ctxt->id() == cl_ctxt, "Context must be the same"); }
int decipherEvent(cl_event* event) { cl_int status = CL_SUCCESS; cl_int eventStatus = CL_QUEUED; while(eventStatus != CL_COMPLETE) { status = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); } clReleaseEvent(*event); return 0; }
cl_int CLWEvent::GetCommandExecutionStatus() const { cl_int status, execstatus; status = clGetEventInfo(*this, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &execstatus, nullptr); ThrowIf(status != CL_SUCCESS, status, "clGetEventInfo failed"); return execstatus; }
/** * waitForEventAndRelease * waits for a event to complete and release the event afterwards * @param event cl_event object * @return 0 if success else nonzero */ static int waitForEventAndRelease(cl_event *event) { cl_int status = CL_SUCCESS; cl_int eventStatus = CL_QUEUED; while (eventStatus != CL_COMPLETE) { status = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); CHECK_BOLT_ERROR(status, "clGetEventEventInfo Failed with Error Code:"); } status = clReleaseEvent(*event); CHECK_BOLT_ERROR(status, "clReleaseEvent Failed with Error Code:"); return SDK_SUCCESS; }
struct _cl_version clGetEventVersion(cl_event event) { struct _cl_version version; version.major = 0; version.minor = 0; cl_command_queue command_queue = NULL; cl_int flag = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, sizeof(cl_command_queue), &command_queue, NULL); if(flag != CL_SUCCESS) return version; return clGetCommandQueueVersion(command_queue); }
Value eventInfo(cl_event id, cl_event_info info) { Value value; cl_int error = CL_SUCCESS; if(!id || (error = clGetEventInfo(id, info, sizeof(Value), &value, nullptr)) != CL_SUCCESS) { reportError("eventInfo(): ", error); return Value(-1); } return value; }
ScriptValue WebCLEvent::getInfo(ScriptState* scriptState, unsigned paramName, ExceptionState& es) { v8::Handle<v8::Object> creationContext = scriptState->context()->Global(); v8::Isolate* isolate = scriptState->isolate(); if (isReleased()) { es.throwWebCLException(WebCLException::INVALID_EVENT, WebCLException::invalidEventMessage); return ScriptValue(scriptState, v8::Null(isolate)); } cl_int err = CL_SUCCESS; cl_int intUnits = 0; cl_command_type commandType = 0; switch(paramName) { case CL_EVENT_COMMAND_EXECUTION_STATUS: err = clGetEventInfo(m_clEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &intUnits, nullptr); if (err == CL_SUCCESS) return ScriptValue(scriptState, v8::Integer::New(isolate, static_cast<int>(intUnits))); break; case CL_EVENT_COMMAND_TYPE: err = clGetEventInfo(m_clEvent, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, nullptr); if (err == CL_SUCCESS) return ScriptValue(scriptState, v8::Integer::NewFromUnsigned(isolate, static_cast<unsigned>(commandType))); break; case CL_EVENT_CONTEXT: ASSERT(!isUserEvent()); return ScriptValue(scriptState, toV8(context(), creationContext, isolate)); case CL_EVENT_COMMAND_QUEUE: ASSERT(m_commandQueue); ASSERT(!isUserEvent()); return ScriptValue(scriptState, toV8(m_commandQueue, creationContext, isolate)); default: es.throwWebCLException(WebCLException::INVALID_VALUE, WebCLException::invalidValueMessage); return ScriptValue(scriptState, v8::Null(isolate)); } WebCLException::throwException(err, es); return ScriptValue(scriptState, v8::Null(isolate)); }
/* Optionally manually poll for event completion. This is because some versions of both the Nvidia and AMD drivers developed an issue where clWaitForEvents/clFinish would cause 100% CPU usage. */ cl_int mwCLWaitForEvent(CLInfo* ci, cl_event ev, cl_uint initialWait) { cl_int err; cl_int pollingMode = ci->pollingMode; if (pollingMode <= MW_POLL_CL_WAIT_FOR_EVENTS) { return clWaitForEvents(1, &ev); } else if (pollingMode == MW_POLL_SLEEP_CL_WAIT_FOR_EVENTS) { err = clFlush(ci->queue); if (err != CL_SUCCESS) return err; mwMilliSleep(initialWait); return clWaitForEvents(1, &ev); } else /* Manually poll for pollingMode milliseconds */ { cl_int execStatus; err = clFlush(ci->queue); /* Make sure the task is submitted before we wait for it */ if (err != CL_SUCCESS) return err; mwMilliSleep(initialWait); do { err = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &execStatus, NULL); if (err != CL_SUCCESS) return err; mwMilliSleep(pollingMode); } while (execStatus != CL_COMPLETE); return CL_SUCCESS; } }
void wait_and_check( ) { cl_int wait_status = ::clWaitForEvents( 1, &event ); if( wait_status != CL_SUCCESS ) { if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ) { cl_int err; clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( cl_int ), &err, NULL ); std::cout << "blas function execution status error: " << err << std::endl; } else { std::cout << "blas function wait status error: " << wait_status << std::endl; } } }
void xGemm<cl_float>:: xGemm_Function(bool flush, cl_uint apiCallCount ) { for (unsigned int i = 0; i < numQueues; i++) { events_[i] = NULL; } for (unsigned int i = 0; i < apiCallCount; i++) { clblasSgemm(order_, buffer_.trans_a_, buffer_.trans_b_, buffer_.m_, buffer_.n_, buffer_.k_, buffer_.alpha_, buffer_.buf_a_, buffer_.offA_, buffer_.lda_, buffer_.buf_b_, buffer_.offB_, buffer_.ldb_, buffer_.beta_, buffer_.buf_c_, buffer_.offC_, buffer_.ldc_, numQueuesToUse, queues_, 0, NULL, events_); } //flush==true if only the kernel time (library call) is timed //flush==false if memory time is also timed if (flush==true) { // check if any valid events returned cl_uint numValidEvents = 0; for (unsigned int i = 0; i < numQueuesToUse; i++) { if (events_[i]) { cl_uint clReferenceCount; cl_int err = clGetEventInfo(events_[i], CL_EVENT_REFERENCE_COUNT, sizeof(clReferenceCount), &clReferenceCount, NULL); if ( err == CL_SUCCESS) { //printf("events[%u/%u] has %u references\n", i, numQueuesToUse, clReferenceCount ); numValidEvents++; } else { //printf("events[%u/%u] invalid; err = %i\n", i, numQueuesToUse, err ); } } else { //printf("events[%u/%u] is NULL\n", i, numQueuesToUse ); } } for (unsigned int i = 0; i < numQueuesToUse; i++) { clFlush(queues_[i]); } clWaitForEvents(numValidEvents, events_); } }
int acc_event_query (void *event, int *has_occured){ //declarations cl_int param_value; // debug info if (verbose_print){ fprintf(stdout, "\n ... EVENT QUERYING ... \n"); fprintf(stdout, " ---> Entering: acc_event_query.\n"); } // local event pointer cl_event *clevent = (cl_event *) event; // get event status cl_error = clGetEventInfo( *clevent, // cl_event event CL_EVENT_COMMAND_EXECUTION_STATUS, // cl_event_info param_name (size_t) sizeof(cl_int), // size_t param_value_size ¶m_value, // void *param_value NULL); // size_t *param_value_size_ret if (acc_opencl_error_check(cl_error, __LINE__)) return -1; // check event status if (param_value == CL_COMPLETE){ *has_occured = 1; } else { *has_occured = 0; } // debug info if (verbose_print) fprintf(stdout, "Leaving: acc_event_query.\n"); if (verbose_print){ fprintf(stdout, " Result: %d\n", *has_occured); fprintf(stdout, " ---> Entering: acc_event_query.\n"); } // assign return value return 0; }
bool CLEvent::get_cl_event_info ( cl_event_info param_name, size_t param_size, void *param, size_t *param_size_ret) { cl_int error_code = CL_SUCCESS; XCAM_FAIL_RETURN ( DEBUG, _event_id, false, "cl event wait failed, there's no event id"); clGetEventInfo (_event_id, param_name, param_size, param, param_size_ret); XCAM_FAIL_RETURN( WARNING, error_code == CL_SUCCESS, false, "clGetEventInfo failed on param:%d, errno:%d", param_name, error_code); return true; }
//----------------------------------------------------------------------------- // Name: ReleaseTexturesFromOpenCL() // Desc: Release Textures from OpenCL //----------------------------------------------------------------------------- void ReleaseTexturesFromOpenCL() { cl_event event; cl_mem memToAcquire[6+1+1]; memToAcquire[0] = g_texture_2d.clTexture; memToAcquire[1] = g_texture_vol.clTexture; memToAcquire[2] = g_texture_cube.clTexture[0]; memToAcquire[3] = g_texture_cube.clTexture[1]; memToAcquire[4] = g_texture_cube.clTexture[2]; memToAcquire[5] = g_texture_cube.clTexture[3]; memToAcquire[6] = g_texture_cube.clTexture[4]; memToAcquire[7] = g_texture_cube.clTexture[5]; // do the acquire ciErrNum = clEnqueueReleaseD3D9ObjectsNV( cqCommandQueue, 6 + 1 + 1, //cube map + tex2d + volume texture memToAcquire, 0, NULL, &event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // make sure the event type is correct cl_uint eventType = 0; ciErrNum = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); if(eventType != CL_COMMAND_RELEASE_D3D9_OBJECTS_NV) { shrLog("event type is not CL_COMMAND_RELEASE_D3D9_OBJECTS_NV !\n"); } ciErrNum = clReleaseEvent(event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); }
int AtomicCounters::runGlobalAtomicKernel() { cl_int status = CL_SUCCESS; // Set Global and Local work items size_t globalWorkItems = length; size_t localWorkItems = globalWorkGroupSize; // Initialize the counter value cl_event writeEvt; status = clEnqueueWriteBuffer(commandQueue, globalOutBuf, CL_FALSE, 0, sizeof(cl_uint), &initValue, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer(globalOutBuf) failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush() failed."); // Wait for event and release event status = waitForEventAndRelease(&writeEvt); CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(writeEvt) failed."); // Set kernel arguments status = clSetKernelArg(globalKernel, 0, sizeof(cl_mem), &inBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg(inBuf) failed."); status = clSetKernelArg(globalKernel, 1, sizeof(cl_uint), &value); CHECK_OPENCL_ERROR(status, "clSetKernelArg(value) failed."); status = clSetKernelArg(globalKernel, 2, sizeof(cl_mem), &globalOutBuf); CHECK_OPENCL_ERROR(status, "clSetKernelArg(globalOutBuf) failed."); // Run Kernel cl_event ndrEvt; status = clEnqueueNDRangeKernel(commandQueue, globalKernel, 1, NULL, &globalWorkItems, &localWorkItems, 0, NULL, &ndrEvt); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel(globalKernel) failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush(commandQueue) failed."); cl_int eventStatus = CL_QUEUED; while (eventStatus != CL_COMPLETE) { status = clGetEventInfo(ndrEvt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); CHECK_OPENCL_ERROR(status, "clGetEventInfo(ndrEvt) failed."); } cl_ulong startTime; cl_ulong endTime; // Get profiling information status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, NULL); CHECK_OPENCL_ERROR( status, "clGetEventProfilingInfo(CL_PROFILING_COMMAND_START) failed."); status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, NULL); CHECK_OPENCL_ERROR( status, "clGetEventProfilingInfo(CL_PROFILING_COMMAND_END) failed."); double sec = 1e-9 * (endTime - startTime); kTimeAtomGlobal += sec; status = clReleaseEvent(ndrEvt); CHECK_OPENCL_ERROR(status, "clReleaseEvent(ndrEvt) failed."); // Get the occurrences of Value from atomicKernel cl_event readEvt; status = clEnqueueReadBuffer(commandQueue, globalOutBuf, CL_FALSE, 0, sizeof(cl_uint), &globalOut, 0, NULL, &readEvt); CHECK_OPENCL_ERROR(status, "clEnqueueReadBuffer(globalOutBuf) failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush() failed."); // Wait for event and release event status = waitForEventAndRelease(&readEvt); CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(readEvt) failed."); return SDK_SUCCESS; }
int MatrixMulImage::runCLKernels(void) { cl_int status; /* * Kernel runs over complete output matrix with blocks of blockSize x blockSize * running concurrently */ size_t globalThreads[2]= {width1 / 4, height0 / 8}; size_t localThreads[2] = {blockSize, blockSize}; status = kernelInfo.setKernelWorkGroupInfo(kernel, devices[deviceId]); CHECK_OPENCL_ERROR(status, "kernelInfo.setKernelWorkGroupInfo failed"); availableLocalMemory = deviceInfo.localMemSize - kernelInfo.localMemoryUsed; neededLocalMemory = 2 * blockSize * blockSize * sizeof(cl_float); if(neededLocalMemory > availableLocalMemory) { std::cout << "Unsupported: Insufficient local memory on device." << std::endl; return SDK_SUCCESS; } if((cl_uint)(localThreads[0]*localThreads[1]) > kernelInfo.kernelWorkGroupSize) { if(kernelInfo.kernelWorkGroupSize >= 64) { blockSize = 8; localThreads[0] = blockSize; localThreads[1] = blockSize; } else if(kernelInfo.kernelWorkGroupSize >= 32) { blockSize = 4; localThreads[0] = blockSize; localThreads[1] = blockSize; } else { std::cout << "Out of Resources!" << std::endl; std::cout << "Group Size specified : " << localThreads[0] * localThreads[1] << std::endl; std::cout << "Max Group Size supported on the kernel : " << kernelInfo.kernelWorkGroupSize<<std::endl; return SDK_FAILURE; } } if(localThreads[0] > deviceInfo.maxWorkItemSizes[0] || localThreads[1] > deviceInfo.maxWorkItemSizes[1] || localThreads[0]*localThreads[1] > deviceInfo.maxWorkGroupSize) { std::cout << "Unsupported: Device does not support requested number of work items." << std::endl; return SDK_FAILURE; } //For small matrix sizes while(globalThreads[0] % localThreads[0]) localThreads[0] /= 2; while(globalThreads[1] % localThreads[1]) localThreads[1] /= 2; // Set appropriate arguments to the kernel status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&inputBuffer0); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (outputBuffer)"); status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&inputBuffer1); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inputBuffer0)"); status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&outputBuffer); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (inputBuffer1)"); status = clSetKernelArg(kernel, 3, sizeof(cl_int),(void*)&width0); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (width0)"); status = clSetKernelArg(kernel, 4, sizeof(cl_int), &width1); CHECK_OPENCL_ERROR(status, "clSetKernelArg failed. (width1)"); // Enqueue a kernel run call cl_event ndrEvt; status = clEnqueueNDRangeKernel( commandQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, &ndrEvt); CHECK_OPENCL_ERROR(status, "clEnqueueNDRangeKernel failed."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); cl_int eventStatus = CL_QUEUED; while(eventStatus != CL_COMPLETE) { status = clGetEventInfo( ndrEvt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, NULL); CHECK_OPENCL_ERROR(status, "clGetEventInfo failed."); } // Calculate performance cl_ulong startTime; cl_ulong endTime; // Get kernel profiling info status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &startTime, 0); CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(startTime)"); status = clGetEventProfilingInfo(ndrEvt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endTime, 0); CHECK_OPENCL_ERROR(status, "clGetEventProfilingInfo failed.(endTime)"); status = clReleaseEvent(ndrEvt); CHECK_OPENCL_ERROR(status, "clReleaseEvent failed.(ndrEvt)"); // Print performance numbers double sec = 1e-9 * (endTime - startTime); std::cout << "KernelTime (ms) : " << sec * 1000 << std::endl; double flops = 2 * width0 * width1; double perf = (flops / sec) * height0 * 1e-9; std::cout << "GFlops achieved : " << perf << std::endl << std::endl; size_t origin[] = {0, 0, 0}; size_t region[] = {width1 / 4, height0, 1}; cl_event readEvt; status = clEnqueueReadImage(commandQueue, outputBuffer, CL_FALSE, origin, region, 0, 0, output, 0, NULL, &readEvt); CHECK_OPENCL_ERROR(status, "outputBuffer failed.(clEnqueueReadImage)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed.(commandQueue)"); status = sampleCommon->waitForEventAndRelease(&readEvt); CHECK_ERROR(status, SDK_SUCCESS, "WaitForEventAndRelease(readEvt) Failed"); return SDK_SUCCESS; }
/*! \brief Returns true if the executing command is submitted.*/ bool ocl::Event::isSubmitted () const { cl_int info; OPENCL_SAFE_CALL( clGetEventInfo (this->id(),CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(info), &info, NULL)); return info == CL_SUBMITTED; }