JNIEXPORT jint JNICALL Java_org_lwjgl_opencl_CL20_nclSetKernelArgSVMPointer(JNIEnv *__env, jclass clazz, jlong kernelAddress, jint arg_index, jlong arg_valueAddress, jlong __functionAddress) { cl_kernel kernel = (cl_kernel)(intptr_t)kernelAddress; const void *arg_value = (const void *)(intptr_t)arg_valueAddress; clSetKernelArgSVMPointerPROC clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerPROC)(intptr_t)__functionAddress; UNUSED_PARAMS(__env, clazz) return (jint)clSetKernelArgSVMPointer(kernel, arg_index, arg_value); }
static void set(cl::Kernel &k, unsigned argpos, const svm_ptr<T> &arg) { cl_int ret = clSetKernelArgSVMPointer(k(), argpos, arg.ptr); if (ret != CL_SUCCESS) throw cl::Error(ret, "clSetKernelArgSVMPointer"); }
DWORD CFrontEnd::dwCFrontThread() { size_t pLocalSize[1] = {OCL_WG_SIZE}; size_t pGlobalSize[1] = {OCL_WG_SIZE}; DWORD dwWaitStatus = WaitForSingleObject(tCloseThreadEvent,0); while(dwWaitStatus == WAIT_TIMEOUT) { //service the queued requests TQueuedRequest cQReq; BOOL bSubmitFlag = FALSE; UINT uiReqCount = 0; while(bSubmitFlag == FALSE) { if(uiReqCount < OCL_REQ_QUEUE_SIZE) { if(tReqQueue.uiGet(&cQReq) == HTS_OK) { pOclReqQueue[uiReqCount].tFid = cQReq.tFid; pOclReqQueue[uiReqCount].uiReqId = cQReq.uiReqId; pOclReqQueue[uiReqCount].uiType = cQReq.uiType; pOclReqQueue[uiReqCount].uiKey = cQReq.uiKey; pOclReqQueue[uiReqCount].uiFlags = cQReq.uiFlags; pOclReqQueue[uiReqCount].uiStatus = cQReq.uiStatus; //std::cout << "k:" << pOclReqQueue[uiReqCount].uiKey; uiReqCount++; } else { bSubmitFlag = TRUE; } } else { bSubmitFlag = TRUE; } } //std::cout << "submitting request:" << uiReqCount << std::endl; //submit all requests to GPU if(uiReqCount > 0) { UINT uiGlobalSize = uiReqCount*OCL_WG_SIZE; pGlobalSize[0] = (size_t)(uiGlobalSize); pLocalSize[0] = OCL_WG_SIZE; clSetKernelArgSVMPointer(oclKernel, 0, (void *)(pOclReqQueue)); clSetKernelArgSVMPointer(oclKernel, 1, (void *)(pNodePool)); clSetKernelArgSVMPointer(oclKernel, 2, (void *)(pMiscData)); clSetKernelArg(oclKernel, 3, sizeof(cl_uint), (void *)(&uiReqCount)); clEnqueueNDRangeKernel(pOclContext->oclCommandQueue, oclKernel, 1, NULL, pGlobalSize, pLocalSize, 0, NULL, NULL); clFinish(pOclContext->oclCommandQueue); //std::cout << "finished kernel." << std::endl; } //update the request status to each thread for (unsigned int i = 0; i < uiReqCount; ++i) { TFid tFid = pOclReqQueue[i].tFid; cl_uint uiReqId = pOclReqQueue[i].uiReqId; //(tFid->pThreadRequest[uiReqId]).uiFlags //= pOclReqQueue[i].uiFlags | HTS_REQ_COMPLETED; (tFid->pThreadRequest[uiReqId]).uiStatus = pOclReqQueue[i].uiStatus; (tFid->pThreadRequest[uiReqId]).uiFlags = pOclReqQueue[i].uiFlags; tFid->uiReqCount--; SetEvent(tFid->hBlockEvent); } dwWaitStatus = WaitForSingleObject(tCloseThreadEvent,0); } return HTS_OK; }