void SeededRegionGrowing::recompileOpenCLCode(Image::pointer input) { // Check if there is a need to recompile OpenCL code if(input->getDimensions() == mDimensionCLCodeCompiledFor && input->getDataType() == mTypeCLCodeCompiledFor) return; OpenCLDevice::pointer device = getMainDevice(); std::string buildOptions = ""; if(input->getDataType() == TYPE_FLOAT) { buildOptions = "-DTYPE_FLOAT"; } else if(input->getDataType() == TYPE_INT8 || input->getDataType() == TYPE_INT16) { buildOptions = "-DTYPE_INT"; } else { buildOptions = "-DTYPE_UINT"; } std::string filename; if(input->getDimensions() == 2) { filename = "Algorithms/SeededRegionGrowing/SeededRegionGrowing2D.cl"; } else { filename = "Algorithms/SeededRegionGrowing/SeededRegionGrowing3D.cl"; } int programNr = device->createProgramFromSource(std::string(FAST_SOURCE_DIR) + filename, buildOptions); mKernel = cl::Kernel(device->getProgram(programNr), "seededRegionGrowing"); mDimensionCLCodeCompiledFor = input->getDimensions(); mTypeCLCodeCompiledFor = input->getDataType(); }
void DoubleFilter::execute() { if(!mInput.isValid()) { throw Exception("No input supplied to GaussianSmoothingFilter"); } Image::pointer input = mInput; Image::pointer output = mOutput; // Initialize output image output->createFromImage(input, mDevice); if(mDevice->isHost()) { // Execution device is Host, use the executeAlgorithmOnHost function with the given data type switch(input->getDataType()) { // This macro creates a case statement for each data type and sets FAST_TYPE to the correct C++ data type fastSwitchTypeMacro(executeAlgorithmOnHost<FAST_TYPE>(input, output)); } } else { // Execution device is an OpenCL device OpenCLDevice::pointer device = boost::static_pointer_cast<OpenCLDevice>(mDevice); // Set build options based on the data type of the data std::string buildOptions = ""; switch(input->getDataType()) { case TYPE_FLOAT: buildOptions = "-DTYPE=float"; break; case TYPE_INT8: buildOptions = "-DTYPE=char"; break; case TYPE_UINT8: buildOptions = "-DTYPE=uchar"; break; case TYPE_INT16: buildOptions = "-DTYPE=short"; break; case TYPE_UINT16: buildOptions = "-DTYPE=ushort"; break; } // Compile the code int programNr = device->createProgramFromSource(std::string(FAST_SOURCE_DIR) + "Tests/Algorithms/DoubleFilter.cl", buildOptions); cl::Kernel kernel = cl::Kernel(device->getProgram(programNr), "doubleFilter"); // Get global size for the kernel cl::NDRange globalSize(input->getWidth()*input->getHeight()*input->getDepth()*input->getNrOfComponents()); // Set the arguments for the kernel OpenCLBufferAccess inputAccess = input->getOpenCLBufferAccess(ACCESS_READ, device); OpenCLBufferAccess outputAccess = output->getOpenCLBufferAccess(ACCESS_READ_WRITE, device); kernel.setArg(0, *inputAccess.get()); kernel.setArg(1, *outputAccess.get()); // Execute the kernel device->getCommandQueue().enqueueNDRangeKernel( kernel, cl::NullRange, globalSize, cl::NullRange ); } // Update timestamp of the output data output->updateModifiedTimestamp(); }
void getIntensitySumFromOpenCLImage(OpenCLDevice::pointer device, cl::Image2D image, DataType type, float* sum) { // Get power of two size unsigned int powerOfTwoSize = getPowerOfTwoSize(std::max(image.getImageInfo<CL_IMAGE_WIDTH>(), image.getImageInfo<CL_IMAGE_HEIGHT>())); // Create image levels unsigned int size = powerOfTwoSize; size /= 2; std::vector<cl::Image2D> levels; while(size >= 4) { cl::Image2D level = cl::Image2D(device->getContext(), CL_MEM_READ_WRITE, getOpenCLImageFormat(device, CL_MEM_OBJECT_IMAGE2D, TYPE_FLOAT, 1), size, size); levels.push_back(level); size /= 2; } // Compile OpenCL code std::string buildOptions = ""; switch(type) { case TYPE_FLOAT: buildOptions = "-DTYPE_FLOAT"; break; case TYPE_UINT8: buildOptions = "-DTYPE_UINT8"; break; case TYPE_INT8: buildOptions = "-DTYPE_INT8"; break; case TYPE_UINT16: buildOptions = "-DTYPE_UINT16"; break; case TYPE_INT16: buildOptions = "-DTYPE_INT16"; break; } std::string sourceFilename = std::string(FAST_SOURCE_DIR) + "/ImageSum.cl"; std::string programName = sourceFilename + buildOptions; // Only create program if it doesn't exist for this device from before if(!device->hasProgram(programName)) device->createProgramFromSourceWithName(programName, sourceFilename, buildOptions); cl::Program program = device->getProgram(programName); cl::CommandQueue queue = device->getCommandQueue(); // Fill first level size = powerOfTwoSize/2; cl::Kernel firstLevel(program, "createFirstSumImage2DLevel"); firstLevel.setArg(0, image); firstLevel.setArg(1, levels[0]); queue.enqueueNDRangeKernel( firstLevel, cl::NullRange, cl::NDRange(size,size), cl::NullRange ); // Fill all other levels cl::Kernel createLevel(program, "createSumImage2DLevel"); int i = 0; size /= 2; while(size >= 4) { createLevel.setArg(0, levels[i]); createLevel.setArg(1, levels[i+1]); queue.enqueueNDRangeKernel( createLevel, cl::NullRange, cl::NDRange(size,size), cl::NullRange ); i++; size /= 2; } // Get result from the last level unsigned int nrOfElements = 4*4; unsigned int nrOfComponents = getOpenCLImageFormat(device, CL_MEM_OBJECT_IMAGE2D, TYPE_FLOAT, 1).image_channel_order == CL_RGBA ? 4 : 1; float* result = (float*)allocateDataArray(nrOfElements,TYPE_FLOAT,nrOfComponents); queue.enqueueReadImage(levels[levels.size()-1],CL_TRUE,createOrigoRegion(),createRegion(4,4,1),0,0,result); *sum = getSumFromOpenCLImageResult<float>(result, nrOfElements, nrOfComponents); delete[] result; }
void getMaxAndMinFromOpenCLBuffer(OpenCLDevice::pointer device, cl::Buffer buffer, unsigned int size, DataType type, float* min, float* max) { // Compile OpenCL code std::string buildOptions = ""; switch(type) { case TYPE_FLOAT: buildOptions = "-DTYPE_FLOAT"; break; case TYPE_UINT8: buildOptions = "-DTYPE_UINT8"; break; case TYPE_INT8: buildOptions = "-DTYPE_INT8"; break; case TYPE_UINT16: buildOptions = "-DTYPE_UINT16"; break; case TYPE_INT16: buildOptions = "-DTYPE_INT16"; break; } std::string sourceFilename = std::string(FAST_SOURCE_DIR) + "/ImageMinMax.cl"; std::string programName = sourceFilename + buildOptions; // Only create program if it doesn't exist for this device from before if(!device->hasProgram(programName)) device->createProgramFromSourceWithName(programName, sourceFilename, buildOptions); cl::Program program = device->getProgram(programName); cl::CommandQueue queue = device->getCommandQueue(); // Nr of work groups must be set so that work-group size does not exceed max work-group size (256 on AMD) int length = size; cl::Kernel reduce(program, "reduce"); cl::Buffer current = buffer; cl::Buffer clResult; int workGroupSize = 256; int workGroups = 256; int X = ceil((float)length / (workGroups*workGroupSize)); clResult = cl::Buffer(device->getContext(), CL_MEM_READ_WRITE, getSizeOfDataType(type,1)*workGroups*2); reduce.setArg(0, current); reduce.setArg(1, workGroupSize * getSizeOfDataType(type,1), NULL); reduce.setArg(2, workGroupSize * getSizeOfDataType(type,1), NULL); reduce.setArg(3, size); reduce.setArg(4, X); reduce.setArg(5, clResult); queue.enqueueNDRangeKernel( reduce, cl::NullRange, cl::NDRange(workGroups*workGroupSize), cl::NDRange(workGroupSize) ); length = workGroups; void* result = allocateDataArray(length, type, 2); unsigned int nrOfElements = length; queue.enqueueReadBuffer(clResult,CL_TRUE,0,getSizeOfDataType(type,1)*workGroups*2,result); switch(type) { case TYPE_FLOAT: getMaxAndMinFromOpenCLImageResult<float>(result, nrOfElements, 2, min, max); break; case TYPE_INT8: getMaxAndMinFromOpenCLImageResult<char>(result, nrOfElements, 2, min, max); break; case TYPE_UINT8: getMaxAndMinFromOpenCLImageResult<uchar>(result, nrOfElements, 2, min, max); break; case TYPE_INT16: getMaxAndMinFromOpenCLImageResult<short>(result, nrOfElements, 2, min, max); break; case TYPE_UINT16: getMaxAndMinFromOpenCLImageResult<ushort>(result, nrOfElements, 2, min, max); break; } deleteArray(result, type); }