void OpenClKernel::work(void) { const auto &inputs = this->inputs(); const auto &outputs = this->outputs(); std::vector<cl_mem> inputBuffs(inputs.size()); std::vector<cl_mem> outputBuffs(outputs.size()); cl_int err = 0; if (this->workInfo().minElements == 0) return; //calculate number of elements size_t inputElems = this->workInfo().minInElements; size_t outputElems = this->workInfo().minOutElements; if (_productionFactor > 1.0) { outputElems = std::min<size_t>(inputElems*_productionFactor, outputElems); inputElems = outputElems/_productionFactor; } else { inputElems = std::min<size_t>(outputElems/_productionFactor, inputElems); outputElems = inputElems*_productionFactor; } size_t globalSize = inputElems*_globalFactor; /* Create data buffer */ size_t argNo = 0; for (size_t i = 0; i < inputs.size(); i++) { inputBuffs[i] = getClBufferFromManaged(inputs[i]->buffer().getManagedBuffer()); err = clSetKernelArg(*_kernel, argNo++, sizeof(cl_mem), &inputBuffs[i]); if (err < 0) throw Pothos::Exception("OpenClKernel::work::clSetKernelArg()", clErrToStr(err)); } for (size_t i = 0; i < outputs.size(); i++) { outputBuffs[i] = getClBufferFromManaged(outputs[i]->buffer().getManagedBuffer()); err = clSetKernelArg(*_kernel, argNo++, sizeof(cl_mem), &outputBuffs[i]); if (err < 0) throw Pothos::Exception("OpenClKernel::work::clSetKernelArg()", clErrToStr(err)); } /* Enqueue kernel */ err = clEnqueueNDRangeKernel(*_queue, *_kernel, 1, nullptr, &globalSize, &_localSize, 0, nullptr, nullptr); if (err < 0) throw Pothos::Exception("OpenClKernel::work::enqueueKernel()", clErrToStr(err)); /* Read the kernel's output */ for (size_t i = 0; i < inputs.size(); i++) { inputs[i]->consume(inputElems); } for (size_t i = 0; i < outputs.size(); i++) { err = clEnqueueReadBuffer(*_queue, outputBuffs[i], CL_TRUE, 0, outputElems*outputs[i]->dtype().size(), outputs[i]->buffer().as<void *>(), 0, nullptr, nullptr); if (err < 0) throw Pothos::Exception("OpenClKernel::work::clEnqueueReadBuffer()", clErrToStr(err)); outputs[i]->produce(outputElems); } }
OpenClKernel::OpenClKernel(const std::string &deviceId, const std::string &portMarkup): _localSize(1), _globalFactor(1.0), _productionFactor(1.0) { const auto colon = deviceId.find(":"); const auto platformIndex = Poco::NumberParser::parseUnsigned(deviceId.substr(0, colon)); const auto deviceIndex = Poco::NumberParser::parseUnsigned(deviceId.substr(colon+1)); /* Identify a platform */ cl_int err = 0; cl_uint num_platforms = 0; cl_platform_id platforms[64]; err = clGetPlatformIDs(64, platforms, &num_platforms); if (err < 0) throw Pothos::Exception("OpenClKernel::clGetPlatformIDs()", clErrToStr(err)); if (platformIndex >= num_platforms) throw Pothos::Exception("OpenClKernel()", "platform index does not exist"); _platform = platforms[platformIndex]; /* Access a device */ cl_uint num_devices = 0; cl_device_id devices[64]; err = clGetDeviceIDs(_platform, CL_DEVICE_TYPE_ALL, 64, devices, &num_devices); if (err < 0) throw Pothos::Exception("OpenClKernel::clGetDeviceIDs()", clErrToStr(err)); if (deviceIndex >= num_devices) throw Pothos::Exception("OpenClKernel()", "device index does not exist"); _device = devices[deviceIndex]; /* Create context */ _context = lookupContextCache(_device); /* Create ports */ _myDomain = "OpenCl_"+std::to_string(size_t(_device)); Poco::JSON::Parser p; p.parse(portMarkup); const auto ports = p.getHandler()->asVar().extract<Poco::JSON::Array::Ptr>(); const auto inputs = ports->getArray(0); const auto outputs = ports->getArray(1); for (size_t i = 0; i < inputs->size(); i++) { this->setupInput(i, Pothos::DType("custom", inputs->getElement<int>(i)), _myDomain); } for (size_t i = 0; i < outputs->size(); i++) { this->setupOutput(i, Pothos::DType("custom", outputs->getElement<int>(i)), _myDomain); } this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, setSource)); this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, setLocalSize)); this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, getLocalSize)); this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, setGlobalFactor)); this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, getGlobalFactor)); this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, setProductionFactor)); this->registerCall(POTHOS_FCN_TUPLE(OpenClKernel, getProductionFactor)); }
void OpenClKernel::setSource(const std::string &kernelName, const std::string &kernelSource) { cl_int err = 0; /* Create program from source */ if (kernelSource.empty()) throw Pothos::Exception("OpenClKernel::activate::createProgram()", "no source specified"); const char *sourcePtr = kernelSource.data(); const size_t sourceSize = kernelSource.size(); auto program = clCreateProgramWithSource(*_context, 1, &sourcePtr, &sourceSize, &err); if(err < 0) throw Pothos::Exception("OpenClKernel::clCreateProgramWithSource()", clErrToStr(err)); _program.reset(new cl_program(program), clReleaseProgramPtr); /* Build program */ err = clBuildProgram(*_program, 0, nullptr, nullptr, nullptr, nullptr); if (err < 0) { /* Find size of log and print to std output */ size_t logSize = 0; clGetProgramBuildInfo(*_program, _device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &logSize); std::vector<char> errorLog(logSize); clGetProgramBuildInfo(*_program, _device, CL_PROGRAM_BUILD_LOG, logSize, errorLog.data(), nullptr); std::string errorString(errorLog.begin(), errorLog.end()); throw Pothos::Exception("OpenClKernel::clBuildProgram()", errorString); } /* Create a command queue */ auto queue = clCreateCommandQueue(*_context, _device, 0, &err); if (err < 0) throw Pothos::Exception("OpenClKernel::clCreateCommandQueue()", clErrToStr(err)); _queue.reset(new cl_command_queue(queue), clReleaseCommandQueuePtr); /* Create a kernel */ auto kernel = clCreateKernel(*_program, kernelName.c_str(), &err); if (err < 0) throw Pothos::Exception("OpenClKernel::clCreateKernel()", clErrToStr(err)); _kernel.reset(new cl_kernel(kernel), clReleaseKernelPtr); }
void pop(const size_t numBytes) { assert(not _readyBuffs.empty()); auto buff = _readyBuffs.front(); _readyBuffs.pop_front(); if (_readyBuffs.empty()) this->setFrontBuffer(Pothos::BufferChunk::null()); else this->setFrontBuffer(_readyBuffs.front()); auto container = std::static_pointer_cast<OpenClBufferContainer>(buff.getBuffer().getContainer()); assert(container); //perform non blocking write //kernel will be enqueued after this if (_clArgs.map_flags == CL_MAP_WRITE) { const cl_int err = clEnqueueWriteBuffer( *_clArgs.queue, container->memobj, CL_FALSE, 0, numBytes, container->mapped_ptr, 0, nullptr, nullptr ); if (err < 0) throw Pothos::Exception("OpenClBufferManager::clEnqueueWriteBuffer()", clErrToStr(err)); } //perform blocking read //must block before giving downstream memory if (_clArgs.map_flags == CL_MAP_READ) { const cl_int err = clEnqueueReadBuffer( *_clArgs.queue, container->memobj, CL_TRUE, 0, numBytes, container->mapped_ptr, 0, nullptr, nullptr ); if (err < 0) throw Pothos::Exception("OpenClBufferManager::clEnqueueReadBuffer()", clErrToStr(err)); } }
OpenClBufferContainer(const OpenClBufferContainerArgs &clArgs, const size_t bufferSize): _clArgs(clArgs) { cl_int err = 0; memobj = clCreateBuffer(*_clArgs.context, _clArgs.mem_flags, bufferSize, nullptr, &err); if (err < 0) throw Pothos::Exception("OpenClBufferContainer::clCreateBuffer()", clErrToStr(err)); mapped_ptr = clEnqueueMapBuffer( *_clArgs.queue, memobj, CL_TRUE, /*blocking map*/ _clArgs.map_flags, 0, //offset bufferSize, 0, nullptr, nullptr, &err); if (err < 0) throw Pothos::Exception("OpenClBufferContainer::clEnqueueMapBuffer()", clErrToStr(err)); }