cl_mem piglit_cl_create_image(piglit_cl_context context, cl_mem_flags flags, const cl_image_format *format, const piglit_image_desc *desc) { cl_int errNo; cl_mem image = NULL; #ifdef CL_VERSION_1_2 if (piglit_cl_get_platform_version(context->platform_id) >= 12) { image = clCreateImage(context->cl_ctx, flags, format, desc, NULL, &errNo); } else #endif if (desc->image_type == CL_MEM_OBJECT_IMAGE2D) { image = clCreateImage2D(context->cl_ctx, flags, format, desc->image_width, desc->image_height, 0, NULL, &errNo); } else if (desc->image_type == CL_MEM_OBJECT_IMAGE3D) { image = clCreateImage3D(context->cl_ctx, flags, format, desc->image_width, desc->image_height, desc->image_depth, 0, 0, NULL, &errNo); } else { fprintf(stderr, "Invalid image mem object type: %s\n", piglit_cl_get_enum_name(desc->image_type)); } if(!piglit_cl_check_error(errNo, CL_SUCCESS)) { fprintf(stderr, "Could not create image: %s\n", piglit_cl_get_error_name(errNo)); } return image; }
/** * \brief ocl::Image::create Creates cl_mem for this Image. * * Note that no Memory is allocated. Allocation takes place when data is transfered. * It is assumed that an active Queue exists. * * \param width Width of the image. * \param height Height of the image. * \param depth Depth of the image. * \param type Channeltype of the image. * \param order Channelorder of the image. */ void ocl::Image::create(size_t width, size_t height, size_t depth, ChannelType type, ChannelOrder order, Access access) { TRUE_ASSERT(this->_context != 0, "Context not valid - cannot create Image"); cl_mem_flags flags = access; cl_image_format format; format.image_channel_order = order; format.image_channel_data_type = type; cl_int status; #if defined(OPENCL_V1_0) || defined(OPENCL_V1_1) this->_id = clCreateImage3D(this->_context->id(), flags, &format, width, height, depth, 0, 0, NULL, &status); #else _cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_height = height; desc.image_width = width; desc.image_depth = depth; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; desc.buffer = NULL; this->_id = clCreateImage(this->_context->id(), flags, &format, &desc, NULL, &status); #endif OPENCL_SAFE_CALL(status); TRUE_ASSERT(this->_id != 0, "Could not create 3D image."); }
OpenCLHandle FilterEffect::createOpenCLImageResult(uint8_t* source) { FilterContextOpenCL* context = FilterContextOpenCL::context(); ASSERT(context); if (context->inError()) return 0; ASSERT(!hasResult()); cl_image_format clImageFormat; clImageFormat.image_channel_order = CL_RGBA; clImageFormat.image_channel_data_type = CL_UNORM_INT8; int errorCode = 0; #ifdef CL_API_SUFFIX__VERSION_1_2 cl_image_desc imageDescriptor = { CL_MEM_OBJECT_IMAGE2D, m_absolutePaintRect.width(), m_absolutePaintRect.height(), 0, 0, 0, 0, 0, 0, 0}; m_openCLImageResult = clCreateImage(context->deviceContext(), CL_MEM_READ_WRITE | (source ? CL_MEM_COPY_HOST_PTR : 0), &clImageFormat, &imageDescriptor, source, &errorCode); #else m_openCLImageResult = clCreateImage2D(context->deviceContext(), CL_MEM_READ_WRITE | (source ? CL_MEM_COPY_HOST_PTR : 0), &clImageFormat, m_absolutePaintRect.width(), m_absolutePaintRect.height(), 0, source, &errorCode); #endif if (context->isFailed(errorCode)) return 0; return m_openCLImageResult; }
JNIEXPORT jlong JNICALL Java_org_lwjgl_opencl_CL12_nclCreateImage(JNIEnv *env, jclass clazz, jlong context, jlong flags, jlong image_format, jlong image_desc, jlong host_ptr, jlong errcode_ret, jlong function_pointer) { const cl_image_format *image_format_address = (const cl_image_format *)(intptr_t)image_format; const cl_image_desc *image_desc_address = (const cl_image_desc *)(intptr_t)image_desc; cl_void *host_ptr_address = (cl_void *)(intptr_t)host_ptr; cl_int *errcode_ret_address = (cl_int *)(intptr_t)errcode_ret; clCreateImagePROC clCreateImage = (clCreateImagePROC)((intptr_t)function_pointer); cl_mem __result = clCreateImage((cl_context)(intptr_t)context, flags, image_format_address, image_desc_address, host_ptr_address, errcode_ret_address); return (intptr_t)__result; }
ImageBuffer(CLcontext context, unsigned short width, unsigned short height, void* data) { cl_int err; cl_image_format format; format.image_channel_order = CL_RGBA; format.image_channel_data_type = CL_FLOAT; cl_image_desc desc; desc.image_width = width; desc.image_height = height; desc.image_type = CL_MEM_OBJECT_IMAGE2D; image = clCreateImage(context.context, T, &format, &desc, data, &err); if(err != CL_SUCCESS) { THROW_EXCEPTION("Failed to create memory object"); } }
cl_mem CLContext::create_image ( cl_mem_flags flags, const cl_image_format& format, const cl_image_desc &image_info, void *host_ptr) { cl_mem mem_id = NULL; cl_int errcode = CL_SUCCESS; mem_id = clCreateImage ( _context_id, flags, &format, &image_info, host_ptr, &errcode); XCAM_FAIL_RETURN ( WARNING, errcode == CL_SUCCESS, NULL, "create cl image failed"); return mem_id; }
cl_mem bindTexture(const oclMat &mat) { cl_mem texture; cl_image_format format; int err; int depth = mat.depth(); int channels = mat.channels(); switch(depth) { case CV_8U: format.image_channel_data_type = CL_UNSIGNED_INT8; break; case CV_32S: format.image_channel_data_type = CL_UNSIGNED_INT32; break; case CV_32F: format.image_channel_data_type = CL_FLOAT; break; default: throw std::exception(); break; } switch(channels) { case 1: format.image_channel_order = CL_R; break; case 3: format.image_channel_order = CL_RGB; break; case 4: format.image_channel_order = CL_RGBA; break; default: throw std::exception(); break; } #if CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.cols; desc.image_height = mat.rows; desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else texture = clCreateImage2D( mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, mat.cols, mat.rows, 0, NULL, &err); #endif size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); } else { devData = (cl_mem)mat.data; } clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { clFinish(mat.clCxt->impl->clCmdQueue); clReleaseMemObject(devData); } openCLSafeCall(err); return texture; }
int main(int argc, char **argv) { /* test name */ char name[] = "test_image_query_funcs"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; size_t source_size, source_read; char const *sources[1]; char *filename = NULL; char *source = NULL; FILE *source_file = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int result; int retval = -1; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image2_desc, image3_desc; printf("Running test %s...\n", name); memset(&image2_desc, 0, sizeof(cl_image_desc)); image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image2_desc.image_width = 2; image2_desc.image_height = 4; memset(&image3_desc, 0, sizeof(cl_image_desc)); image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image3_desc.image_width = 2; image3_desc.image_height = 4; image3_desc.image_depth = 8; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); if (imageData == NULL) { puts("out of host memory\n"); goto error; } memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); if (!filename) { puts("out of memory"); goto error; } snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source_file = fopen(filename, "r"); if (!source_file) { puts("source file not found\n"); goto error; } fseek(source_file, 0, SEEK_END); source_size = ftell(source_file); fseek(source_file, 0, SEEK_SET); source = (char *)malloc(source_size + 1); if (!source) { puts("out of memory\n"); goto error; } source_read = fread(source, 1, source_size, source_file); if (source_read != source_size) { puts("error reading from file\n"); goto error; } source[source_size] = '\0'; fclose(source_file); source_file = NULL; /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); if (!context) { puts("clCreateContextFromType call failed\n"); goto error; } result = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (result != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } /* Create image */ cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image2_desc, imageData, &result); if (result != CL_SUCCESS) { puts("image2 creation failed\n"); goto error; } cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image3_desc, imageData, &result); if (result != CL_SUCCESS) { puts("image3 creation failed\n"); goto error; } /* create and build program */ sources[0] = source; program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); if (!program) { puts("clCreateProgramWithSource call failed\n"); goto error; } result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (result != CL_SUCCESS) { puts("clBuildProgram call failed\n"); goto error; } /* execute the kernel with give name */ kernel = clCreateKernel(program, name, NULL); if (!kernel) { puts("clCreateKernel call failed\n"); goto error; } result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image2); if (result) { puts("clSetKernelArg 0 failed\n"); goto error; } result = clSetKernelArg( kernel, 1, sizeof(cl_mem), &image3); if (result) { puts("clSetKernelArg 1 failed\n"); goto error; } result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (result != CL_SUCCESS) { puts("clEnqueueNDRangeKernel call failed\n"); goto error; } result = clFinish(queue); if (result == CL_SUCCESS) retval = 0; error: if (kernel) { clReleaseKernel(kernel); } if (program) { clReleaseProgram(program); } if (queue) { clReleaseCommandQueue(queue); } if (context) { clReleaseContext(context); } if (source_file) { fclose(source_file); } if (source) { free(source); } if (filename) { free(filename); } if (imageData) { free(imageData); } if (retval) { printf("FAIL\n"); return 1; } printf("OK\n"); return 0; }
int main(int argc, char **argv) { /* Host data */ float *hInputImage = NULL; float *hOutputImage = NULL; /* Angle for rotation (degrees) */ const float theta = 45.0f; /* Allocate space for the input image and read the * data from disk */ int imageRows; int imageCols; hInputImage = readBmpFloat("../../Images/cat-face.bmp", &imageRows, &imageCols); const int imageElements = imageRows*imageCols; const size_t imageSize = imageElements*sizeof(float); /* Allocate space for the output image */ hOutputImage = (float*)malloc(imageSize); if (!hOutputImage) { exit(-1); } /* Use this to check the output of each API call */ cl_int status; /* Get the first platform */ cl_platform_id platform; status = clGetPlatformIDs(1, &platform, NULL); check(status); /* Get the first device */ cl_device_id device; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); check(status); /* Create a context and associate it with the device */ cl_context context; context = clCreateContext(NULL, 1, &device, NULL, NULL, &status); check(status); /* Create a command queue and associate it with the device */ cl_command_queue cmdQueue; cmdQueue = clCreateCommandQueue(context, device, 0, &status); check(status); /* The image descriptor describes how the data will be stored * in memory. This descriptor initializes a 2D image with no pitch */ cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = imageCols; desc.image_height = imageRows; desc.image_depth = 0; desc.image_array_size = 0; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; desc.buffer = NULL; /* The image format describes the properties of each pixel */ cl_image_format format; format.image_channel_order = CL_R; // single channel format.image_channel_data_type = CL_FLOAT; /* Create the input image and initialize it using a * pointer to the image data on the host. */ cl_mem inputImage = clCreateImage(context, CL_MEM_READ_ONLY, &format, &desc, NULL, NULL); /* Create the output image */ cl_mem outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY, &format, &desc, NULL, NULL); /* Copy the host image data to the device */ size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from size_t region[3] = {imageCols, imageRows, 1}; // Elements to per dimension clEnqueueWriteImage(cmdQueue, inputImage, CL_TRUE, origin, region, 0 /* row-pitch */, 0 /* slice-pitch */, hInputImage, 0, NULL, NULL); /* Create a program with source code */ char *programSource = readFile("image-rotation.cl"); size_t programSourceLen = strlen(programSource); cl_program program = clCreateProgramWithSource(context, 1, (const char**)&programSource, &programSourceLen, &status); check(status); /* Build (compile) the program for the device */ status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); if (status != CL_SUCCESS) { printCompilerError(program, device); exit(-1); } /* Create the kernel */ cl_kernel kernel; kernel = clCreateKernel(program, "rotation", &status); check(status); /* Set the kernel arguments */ status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage); status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage); status |= clSetKernelArg(kernel, 2, sizeof(int), &imageCols); status |= clSetKernelArg(kernel, 3, sizeof(int), &imageRows); status |= clSetKernelArg(kernel, 4, sizeof(float), &theta); check(status); /* Define the index space and work-group size */ size_t globalWorkSize[2]; globalWorkSize[0] = imageCols; globalWorkSize[1] = imageRows; size_t localWorkSize[2]; localWorkSize[0] = 8; localWorkSize[1] = 8; /* Enqueue the kernel for execution */ status = clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); check(status); /* Read the output image buffer to the host */ status = clEnqueueReadImage(cmdQueue, outputImage, CL_TRUE, origin, region, 0 /* row-pitch */, 0 /* slice-pitch */, hOutputImage, 0, NULL, NULL); check(status); /* Write the output image to file */ writeBmpFloat(hOutputImage, "rotated-cat.bmp", imageRows, imageCols, "../../Images/cat-face.bmp"); /* Free OpenCL resources */ clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmdQueue); clReleaseMemObject(inputImage); clReleaseMemObject(outputImage); clReleaseContext(context); /* Free host resources */ free(hInputImage); free(hOutputImage); free(programSource); return 0; }
void clInvert3D(CL* cl, VglImage* img){ cl_int err; cl_image_desc desc = getDesc(img->shape[0], img->shape[1], 3, img->shape[2]); cl_image_desc descOut = getDesc(img->shape[0], img->shape[1], 3, img->shape[2]); cl_image_format src; cl_image_format out; switch(img->nChannels){ case 1: src.image_channel_order = CL_A; out.image_channel_order = CL_A; break; case 3: rgb2rgba(NULL, img); src.image_channel_order = CL_RGBA; out.image_channel_order = CL_RGBA; break; case 4: src.image_channel_order = CL_RGBA; out.image_channel_order = CL_RGBA; break; default: printf("Numero de canais não suportado\n"); exit; } src.image_channel_data_type = CL_UNORM_INT8; out.image_channel_data_type = CL_UNORM_INT8; cl_mem src_mem = clCreateImage(cl->context, CL_MEM_READ_ONLY, &src, &desc, NULL, &err); printf("IMAGE STATUS SOURCE\t"); cl_error(err); cl_mem out_mem = clCreateImage(cl->context, CL_MEM_WRITE_ONLY, &out, &descOut, NULL, &err); printf("IMAGE STATUS OUT\t"); cl_error(err); clGetMemObjectInfo(src_mem, CL_MEM_TYPE, sizeof(cl_int), &err, NULL); if(err == CL_MEM_OBJECT_IMAGE3D) printf("IMAGE TYPE:\t\tCL_MEM_OBJECT_IMAGE3D\n"); size_t *src_origin=(size_t*)malloc(sizeof(size_t)*3); src_origin[0] = 0; src_origin[1] = 0; src_origin[2] = 0; size_t *src_region=(size_t*)malloc(sizeof(size_t)*3); src_region[0] = img->shape[0]; src_region[1] = img->shape[1]; src_region[2] = img->shape[2]; err = clEnqueueWriteImage(cl->queue, src_mem, CL_TRUE, src_origin, src_region, 0, 0, (void*)img->ndarray, 0, 0, NULL); printf("ENQUEUE IMAGE STATUS "); cl_error(err); cl_program program; cl_kernel kernel; const char* k = "./CLdemos/CL/Invert3D_RGBA.cl"; const char* k2 = "./CLdemos/CL/Invert3D_A.cl"; char** fonte; if(img->nChannels==1) fonte = (char**)getKernelPtr(k2); if(img->nChannels==4) fonte = (char**)getKernelPtr(k); program = clCreateProgramWithSource(cl->context, 1, (const char**)fonte, NULL, &err); printf("CREATE PROGRAM STATUS: "); cl_error(err); clBuildProgram(program, 0, NULL, NULL, NULL, &err); printf("BUILD PROGRAM STATUS: "); cl_error(err); kernel = clCreateKernel(program, "invert", &err); printf("KERNEL STATUS "); cl_error(err); err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &src_mem); printf("SET 1 KERNEL ARG "); cl_error(err); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &out_mem); printf("SET 2 KERNEL ARG "); cl_error(err); size_t worksize[] = { img->shape[0], img->shape[1], img->shape[2]}; err = clEnqueueNDRangeKernel(cl->queue, kernel, 2, NULL, worksize, 0, 0, 0, 0); printf("ENQUEUE ND KERNEL STATUS "); cl_error(err); clFinish(cl->queue); char* auxout = (char*)malloc(img->shape[0]*img->shape[1]*img->shape[2]*img->nChannels); err = clEnqueueReadImage(cl->queue, out_mem, CL_TRUE, src_origin, src_region, 0, 0, auxout, 0, NULL, NULL); printf("READ NEW IMAGE STATUS "); cl_error(err); for(int i=0; i<(img->shape[0]*img->nChannels*img->shape[1]*img->shape[2]); i++) img->ndarray[i] = auxout[i]; free(auxout); clReleaseKernel(kernel); clReleaseProgram(program); }
int MatrixMulImage::setupCL(void) { cl_int status = 0; cl_device_type dType; if(deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed"); // Display available devices. retValue = sampleCommon->displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled()); CHECK_ERROR(status, 0, "sampleCommon::getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "deviceInfo.setDeviceInfo. failed"); { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; prop |= CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue( context, devices[deviceId], prop, &status); CHECK_ERROR(retValue, SDK_SUCCESS, "clCreateCommandQueue. failed"); } cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_FLOAT; imageFormat.image_channel_order = CL_RGBA; if(!deviceInfo.imageSupport) { std::cout << "Expected Error: Image is not supported on the Device" << std::endl; return SDK_EXPECTED_FAILURE; } cl_image_desc imageDesc; memset(&imageDesc, '\0', sizeof(cl_image_desc)); imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; // Create image for matrix A imageDesc.image_width = width0 / 4; imageDesc.image_height = height0; inputBuffer0 = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, input0, &status); CHECK_OPENCL_ERROR(status, "clCreateImage failed. (inputBuffer0)"); // Create image for matrix B imageDesc.image_width = width1 / 4; imageDesc.image_height = height1; inputBuffer1 = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, &imageDesc, input1, &status); CHECK_OPENCL_ERROR(status, "clCreateImage failed. (inputBuffer1)"); // Create image for matrix C imageDesc.image_width = width1 / 4; imageDesc.image_height = height0; outputBuffer = clCreateImage(context, CL_MEM_WRITE_ONLY, &imageFormat, &imageDesc, 0, &status); CHECK_OPENCL_ERROR(status, "clCreateImage failed. (outputBuffer)"); // create a CL program using the kernel source streamsdk::buildProgramData buildData; buildData.kernelName = std::string("MatrixMulImage_Kernels.cl"); buildData.devices = devices; buildData.deviceId = deviceId; buildData.flagsStr = std::string(""); if(isLoadBinaryEnabled()) buildData.binaryName = std::string(loadBinary.c_str()); if(isComplierFlagsSpecified()) buildData.flagsFileName = std::string(flags.c_str()); retValue = sampleCommon->buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::buildOpenCLProgram() failed"); kernel = clCreateKernel(program, "mmmKernel3", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(kernel)"); return SDK_SUCCESS; }
bool CL_Image3D::Create(cl_uint uWidth, cl_uint uHeight, cl_uint uDepth, cl_uint uRowPitch, void* pImgInput, CL_ImageOrder OrderType, CL_ImageChannel ChannelType, CL_MemAccess AccessType, CL_MemStorage StorageType) { CL_CPP_CONDITIONAL_RETURN_FALSE(m_Image); CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef); CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef->IsValid()); // CL_CPP_CONDITIONAL_RETURN_FALSE((StorageType == CL_MemStorage_UseHostInput || StorageType == CL_MemStorage_CopyInputToDevice) && !pImgInput); cl_mem_flags uMemFlags = 0; cl_image_format ImgFormat; // Determine the access flags. switch(AccessType) { case CL_MemAccess_ReadOnly: uMemFlags = CL_MEM_READ_ONLY; break; case CL_MemAccess_WriteOnly: uMemFlags = CL_MEM_WRITE_ONLY; break; case CL_MemAccess_ReadWrite: uMemFlags = CL_MEM_READ_WRITE; break; default: return false; } // Determine the storage flags. switch(StorageType) { case CL_MemStorage_AllocateOnDevice: /* default setting, do nothing */ break; case CL_MemStorage_AllocateOnHost: uMemFlags |= CL_MEM_ALLOC_HOST_PTR; break; case CL_MemStorage_UseHostInput: uMemFlags |= CL_MEM_USE_HOST_PTR; break; case CL_MemStorage_CopyInputToDevice: uMemFlags |= CL_MEM_COPY_HOST_PTR; break; default: return false; } // Determine the image channel order. switch(OrderType) { case CL_ImageOrder_R: ImgFormat.image_channel_order = CL_R; break; case CL_ImageOrder_A: ImgFormat.image_channel_order = CL_A; break; case CL_ImageOrder_RG: ImgFormat.image_channel_order = CL_RG; break; case CL_ImageOrder_RA: ImgFormat.image_channel_order = CL_RA; break; case CL_ImageOrder_RGB: ImgFormat.image_channel_order = CL_RGB; break; case CL_ImageOrder_RGBA: ImgFormat.image_channel_order = CL_RGBA; break; case CL_ImageOrder_BGRA: ImgFormat.image_channel_order = CL_BGRA; break; case CL_ImageOrder_ARGB: ImgFormat.image_channel_order = CL_ARGB; break; case CL_ImageOrder_Intensity: ImgFormat.image_channel_order = CL_INTENSITY; break; case CL_ImageOrder_Luminance: ImgFormat.image_channel_order = CL_LUMINANCE; break; #ifdef CL_VERSION_1_1 case CL_ImageOrder_Rx: ImgFormat.image_channel_order = CL_Rx; break; case CL_ImageOrder_RGx: ImgFormat.image_channel_order = CL_RGx; break; case CL_ImageOrder_RGBx: ImgFormat.image_channel_order = CL_RGBx; break; #endif } // Determine the image channel data type. switch(ChannelType) { case CL_ImageChannel_Norm_Int8: ImgFormat.image_channel_data_type = CL_SNORM_INT8; break; case CL_ImageChannel_Norm_Int16: ImgFormat.image_channel_data_type = CL_SNORM_INT16; break; case CL_ImageChannel_Norm_UInt8: ImgFormat.image_channel_data_type = CL_UNORM_INT8; break; case CL_ImageChannel_Norm_UInt16: ImgFormat.image_channel_data_type = CL_UNORM_INT16; break; case CL_ImageChannel_Norm_UShort_555: ImgFormat.image_channel_data_type = CL_UNORM_SHORT_555; break; case CL_ImageChannel_Norm_UShort_565: ImgFormat.image_channel_data_type = CL_UNORM_SHORT_565; break; case CL_ImageChannel_Norm_UInt_101010: ImgFormat.image_channel_data_type = CL_UNORM_INT_101010; break; case CL_ImageChannel_Int8: ImgFormat.image_channel_data_type = CL_SIGNED_INT8; break; case CL_ImageChannel_Int16: ImgFormat.image_channel_data_type = CL_SIGNED_INT16; break; case CL_ImageChannel_Int32: ImgFormat.image_channel_data_type = CL_SIGNED_INT32; break; case CL_ImageChannel_UInt8: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT8; break; case CL_ImageChannel_UInt16: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT16; break; case CL_ImageChannel_UInt32: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT32; break; case CL_ImageChannel_Float16: ImgFormat.image_channel_data_type = CL_HALF_FLOAT; break; case CL_ImageChannel_Float32: ImgFormat.image_channel_data_type = CL_FLOAT; break; } cl_context Context = m_pContextRef->GetContext(); cl_int iErrorCode = CL_SUCCESS; cl_uint uSlicePitch = uRowPitch * uHeight; // Create the image object. #if defined(CL_VERSION_1_2) cl_image_desc ImgDesc; ImgDesc.image_width = uWidth; ImgDesc.image_height = uHeight; ImgDesc.image_depth = uDepth; ImgDesc.image_array_size = 1; ImgDesc.image_row_pitch = (pImgInput) ? uRowPitch : 0; ImgDesc.image_slice_pitch = (pImgInput) ? uSlicePitch : 0; ImgDesc.num_mip_levels = 0; ImgDesc.num_samples = 0; ImgDesc.buffer = NULL; m_Image = clCreateImage(Context, uMemFlags, &ImgFormat, &ImgDesc, pImgInput, &iErrorCode); #else m_Image = clCreateImage3D(Context, uMemFlags, &ImgFormat, uWidth, uHeight, uDepth, (pImgInput) ? uRowPitch : 0, (pImgInput) ? uSlicePitch : 0, pImgInput, &iErrorCode); #endif CL_CPP_CATCH_ERROR(iErrorCode); CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode); m_uWidth = uWidth; m_uHeight = uHeight; m_uDepth = uDepth; m_uRowPitch = uRowPitch; m_uSlicePitch = uSlicePitch; m_uTotalSize = m_uSlicePitch * m_uHeight; // Ask OpenCL for the sizeo of each individual element in this image. clGetImageInfo(m_Image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &m_uElementSize, NULL); m_ImageOrder = OrderType; m_ImageChannel = ChannelType; m_MemAccess = AccessType; m_MemStorage = StorageType; return true; }
/*! * @function clut_blurImage_local_unlimited * Blurs the image at [filename] with a filter of size [filter_size], and saves the result * to the file "output_unlimited.png". This function should be optimized to run on * local memory. * @param filename * The name of the file. * @param filter_size * The size of the blur filter. * @return * 0 on success, non-0 on failure. */ int clut_blurImage_local_unlimited(const cl_device_id device, const char * const filename, const unsigned int filter_size) { const char * const fname = "clut_blurImage_local"; int return_value = 1; cl_int ret; if (NULL == filename) { Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname); goto error1; } /* compute work group size */ size_t local_width, local_height; if (0 != clut_getMaxWGSize(device, &local_width, &local_height)) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to get work group sizes.\n", fname); goto error1; } Debug_out(DEBUG_HOMEWORK, "%s: Max work group size is [%zu]x[%zu].\n", fname, local_width, local_height); /* openCL wants to know the size of __local statically allocated arrays at compile time, * so the local size must be set with a #define */ char *flags = calloc(128, sizeof(char)); if (NULL == flags) { Debug_out(DEBUG_HOMEWORK, "%s: A calloc failed.\n", fname); goto error1; } sprintf(flags, "-D LOCAL_WIDTH=%zu -D LOCAL_HEIGHT=%zu -D FILTER_SIZE=%d", local_width, local_height, filter_size); Debug_out(DEBUG_HOMEWORK, "%s: Local flags are: '%s'.\n", fname, flags); /* Create context */ cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage_local_unlimited", &ret); CLUT_CHECK_ERROR(ret, "Unable to create context", error2); Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname); /* Create program */ cl_program program = clut_createProgramFromFile(context, "homework_unlimited.cl", flags); if (NULL == program) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname); goto error3; } Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname); /* Create kernel */ cl_kernel kernel = clCreateKernel(program, "blurImage_local_unlimited", &ret); CLUT_CHECK_ERROR(ret, "Unable to create kernel", error4); Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname); /* Create command_queue */ cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret); CLUT_CHECK_ERROR(ret, "Unable to create command queue", error5); Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname); /* open source image */ int width, height; cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height); if (NULL == source_image) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname); goto error6; } if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) { Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname); goto error7; } /* crate destination image */ cl_image_format image_format = {0, 0}; cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; // image_desc.image_width = 0; // image_desc.image_height = 0; // image_desc.image_depth = 0; /* only for 3D images */ // image_desc.image_array_size = 0; /* only for image arrays */ // image_desc.image_row_pitch = 0; // image_desc.image_slice_pitch = 0; /* only for 3D images */ // image_desc.num_mip_levels = 0; /* mandatory */ // image_desc.num_samples = 0; /* mandatory */ // image_desc.buffer = NULL; /* only for 1D image buffers */ ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL); CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error7); int components = clut_getImageFormatComponents(image_format); if (0 > components) { Debug_out(DEBUG_HOMEWORK, "%s: Unknown components for source image.\n", fname); goto error7; } Debug_out(DEBUG_HOMEWORK, "%s: Source image has %d components.\n", fname, components); image_desc.image_width = width - filter_size + 1; image_desc.image_height = height - filter_size + 1; image_desc.image_row_pitch = image_desc.image_width * components; cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret); CLUT_CHECK_ERROR(ret, "Unable to create second image", error7); /* fill result image with black */ const unsigned int fill_color[4] = { 0, 0, 0, 255 }; const size_t fill_origin[3] = { 0, 0, 0 }; const size_t fill_region[3] = { width - filter_size + 1, height - filter_size + 1, 1 }; ret = clEnqueueFillImage(command_queue, result_image, fill_color, fill_origin, fill_region, 0, NULL, NULL); CLUT_CHECK_ERROR(ret, "Unable to fill result image", error8); Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname); /* create filter matrix */ unsigned char *filter_matrix = createFilterMatrix(filter_size); if (NULL == filter_matrix) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname); goto error8; } Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname); // printFilterMatrix(filter_matrix, filter_size); /* copy filter matrix to device */ cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret); CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error9); /* set kernel arguments */ ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image); CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error10); Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname); ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image); CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error10); Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname); ret = clSetKernelArg(kernel, 2, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer); CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error10); Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname); Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname); const size_t work_size[2] = { COMPUTE_GLOBAL_SIZE(height - filter_size + 1, local_height), COMPUTE_GLOBAL_SIZE(width - filter_size + 1, local_width) }; const size_t wg_size[2] = { local_height, local_width }; Debug_out(DEBUG_HOMEWORK, "%s: work size is [%zu]x[%zu].\n", fname, work_size[0], work_size[1]); /* run kernel */ cl_event kernel_event; ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, wg_size, 0, NULL, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error10); ret = clFinish(command_queue); CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error10); Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname); ret = clWaitForEvents(1, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error10); /* check that kernel executed correctly */ cl_int kernel_ret; ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error10); Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret); if (CL_COMPLETE != kernel_ret) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret)); goto error10; } cl_ulong end_time; ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error10); if (0 == end_time) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname); goto error10; } cl_double time_double = clut_getEventDuration(kernel_event); cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event); Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong); /* save image back to file */ clut_saveImageToFile("output_unlimited.png", command_queue, result_image); /* output filter size, local width, local height, and duration in nanoseconds for profiling */ printf("%d,%zu,%zu,%lld\n", filter_size, local_width, local_height, clut_getEventDuration_ns(kernel_event)); return_value = 0; error10: clReleaseMemObject(filter_matrix_buffer); error9: free(filter_matrix); error8: clReleaseMemObject(result_image); error7: clReleaseMemObject(source_image); error6: clReleaseCommandQueue(command_queue); error5: clReleaseKernel(kernel); error4: clReleaseProgram(program); error3: clReleaseContext(context); error2: free(flags); error1: return return_value; }
/*! * @function clut_blurImage * Blurs the image at [filename] with a filter of size [filter_size], and saves the result * to the file "output.png". * @param filename * The name of the file. * @param filter_size * The size of the blur filter. * @return * 0 on success, non-0 on failure. */ int clut_blurImage(const cl_device_id device, const char * const filename, const unsigned int filter_size) { const char * const fname = "clut_blurImage"; int return_value = 1; cl_int ret; if (NULL == filename) { Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname); goto error1; } /* Create context */ cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage", &ret); CLUT_CHECK_ERROR(ret, "Unable to create context", error1); Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname); /* Create program */ cl_program program = clut_createProgramFromFile(context, "homework_global.cl", NULL); if (NULL == program) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname); goto error3; } Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname); /* Create kernel */ cl_kernel kernel = clCreateKernel(program, "blurImage", &ret); CLUT_CHECK_ERROR(ret, "Unable to create kernel", error3); Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname); /* Create command_queue */ cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret); CLUT_CHECK_ERROR(ret, "Unable to create command queue", error4); Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname); /* load source image */ int width, height; cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height); if (NULL == source_image) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname); goto error5; } if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) { Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname); goto error6; } /* create destination image */ cl_image_format image_format = {0, 0}; cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; // image_desc.image_width = 0; // image_desc.image_height = 0; // image_desc.image_depth = 0; /* only for 3D images */ // image_desc.image_array_size = 0; /* only for image arrays */ // image_desc.image_row_pitch = 0; // image_desc.image_slice_pitch = 0; /* only for 3D images */ // image_desc.num_mip_levels = 0; /* mandatory */ // image_desc.num_samples = 0; /* mandatory */ // image_desc.buffer = NULL; /* only for 1D image buffers */ image_desc.image_width = width - filter_size + 1; image_desc.image_height = height - filter_size + 1; ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL); CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error6); cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret); CLUT_CHECK_ERROR(ret, "Unable to create second image", error6); Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname); /* create filter matrix */ unsigned char *filter_matrix = createFilterMatrix(filter_size); if (NULL == filter_matrix) { Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname); goto error7; } Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname); // printFilterMatrix(filter_matrix, filter_size); /* copy filter matrix to device */ cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret); CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error8); /* set kernel arguments */ ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image); CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname); ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image); CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname); ret = clSetKernelArg(kernel, 2, sizeof(filter_size), (void *) &filter_size); CLUT_CHECK_ERROR(ret, "Unable to set filter size argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Filter size argument set.\n", fname); ret = clSetKernelArg(kernel, 3, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer); CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error9); Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname); Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname); /* run kernel */ cl_event kernel_event; const size_t work_size[2] = { height - filter_size + 1, width - filter_size + 1}; ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error9); ret = clFinish(command_queue); CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error9); Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname); ret = clWaitForEvents(1, &kernel_event); CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error9); /* check that kernel executed correctly */ cl_int kernel_ret; ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error9); Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret); if (CL_COMPLETE != kernel_ret) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret)); goto error9; } cl_ulong end_time; ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL); CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error9); if (0 == end_time) { Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname); goto error9; } cl_double time_double = clut_getEventDuration(kernel_event); cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event); Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong); /* save image */ clut_saveImageToFile("output.png", command_queue, result_image); /* print filter size and duration in nanoseconds for profiling */ printf("%d,%llu\n", filter_size, clut_getEventDuration_ns(kernel_event)); return_value = 0; error9: clReleaseMemObject(filter_matrix_buffer); error8: free(filter_matrix); error7: clReleaseMemObject(result_image); error6: clReleaseMemObject(source_image); error5: clReleaseCommandQueue(command_queue); error4: clReleaseKernel(kernel); error3: clReleaseProgram(program); error2: clReleaseContext(context); error1: return return_value; }
int main(int argc, char *argv[]) { cl_int ret; /* get platform ID */ cl_platform_id platform_id; ret = clGetPlatformIDs(1, &platform_id, NULL); assert(CL_SUCCESS == ret); /* get device IDs */ cl_device_id device_id; ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1, &device_id, NULL); assert(CL_SUCCESS == ret); /* create context */ cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); assert(CL_SUCCESS == ret); /* create command queue */ cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); assert(CL_SUCCESS == ret); /* create image object */ cl_image_format format; format.image_channel_order = CL_R; format.image_channel_data_type = CL_UNSIGNED_INT8; cl_image_desc desc; memset(&desc, 0, sizeof(desc)); desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = IMAGE_W; desc.image_height = IMAGE_H; cl_mem image = clCreateImage(context, 0, &format, &desc, NULL, &ret); assert(CL_SUCCESS == ret); /* filling background image */ { const size_t origin[] = {0, 0, 0}; const size_t region[] = {IMAGE_W, IMAGE_H, 1}; cl_uchar4 fill_color; fill_color.s[0] = 0; fill_color.s[1] = 0; fill_color.s[2] = 0; fill_color.s[3] = 0; ret = clEnqueueFillImage(command_queue, image, &fill_color, origin, region, 0, NULL, NULL); assert(CL_SUCCESS == ret); } /* filling front image */ { const size_t origin[] = {(IMAGE_W*1)/4, (IMAGE_H*1)/4, 0}; const size_t region[] = {(IMAGE_W*2)/4, (IMAGE_H*2)/4, 1}; cl_uchar4 fill_color; fill_color.s[0] = 255; fill_color.s[1] = 0; fill_color.s[2] = 0; fill_color.s[3] = 0; ret = clEnqueueFillImage(command_queue, image, &fill_color, origin, region, 0, NULL, NULL); assert(CL_SUCCESS == ret); } /* reading image */ cl_uchar *data = NULL; { size_t num_channels = 1; data = static_cast<cl_uchar*>(ALIGNED_MALLOC(IMAGE_W*IMAGE_H*sizeof(cl_uchar), num_channels*sizeof(cl_uchar))); assert(NULL != data); std::fill(&data[0], &data[IMAGE_W*IMAGE_H], 128); const size_t origin[] = {0, 0, 0}; const size_t region[] = {IMAGE_W, IMAGE_H, 1}; ret = clEnqueueReadImage(command_queue, image, CL_TRUE, origin, region, IMAGE_W*sizeof(cl_uchar), 0, data, 0, NULL, NULL); assert(CL_SUCCESS == ret); } /* print image */ for (unsigned int h=0; h<IMAGE_H; ++h) { for (unsigned int w=0; w<IMAGE_W; ++w) { std::cout << std::setw(5) << std::right << static_cast<int>(data[h*IMAGE_W+w]); } std::cout << std::endl; } /* finalizing */ ALIGNED_FREE(data); clReleaseMemObject(image); clReleaseCommandQueue(command_queue); clReleaseContext(context); return 0; }
void createCLImages() { // CL images cl_image_desc pixelDesc; memset(&pixelDesc, '\0', sizeof(cl_image_desc)); pixelDesc.image_type = CL_MEM_OBJECT_IMAGE2D; pixelDesc.image_width = imageWidth; pixelDesc.image_height = imageHeight; cl_int ret; void *hostPtr = NULL; if( inFlags & CL_MEM_USE_HOST_PTR || inFlags & CL_MEM_COPY_HOST_PTR ) hostPtr = memIn; inputImage = clCreateImage ( context, inFlags, &pixelFormat, &pixelDesc, hostPtr, &ret ); ASSERT_CL_RETURN( ret ); hostPtr = NULL; if( outFlags & CL_MEM_USE_HOST_PTR || outFlags & CL_MEM_COPY_HOST_PTR ) hostPtr = memOut; outputImage = clCreateImage ( context, outFlags, &pixelFormat, &pixelDesc, hostPtr, &ret ); ASSERT_CL_RETURN( ret ); hostPtr = NULL; if( copyFlags & CL_MEM_USE_HOST_PTR || copyFlags & CL_MEM_COPY_HOST_PTR ) hostPtr = memIn; if( whichTest == 2 ) { inputCopyImage = clCreateImage ( context, copyFlags, &pixelFormat, &pixelDesc, hostPtr, &ret ); ASSERT_CL_RETURN( ret ); } hostPtr = NULL; if( copyFlags & CL_MEM_USE_HOST_PTR || copyFlags & CL_MEM_COPY_HOST_PTR ) hostPtr = memOut; if( whichTest == 2 ) { outputCopyImage = clCreateImage ( context, copyFlags, &pixelFormat, &pixelDesc, hostPtr, &ret ); ASSERT_CL_RETURN( ret ); } }
/** * \related cl_Mem_Object_t * * This function allocates memory for Memory Object with OpenCL image & sets * function pointers. * * @param[in] parent_thread parent Steel Thread, which gives OpenCL context, etc * @param[in] mem_flags OpenCL memory flags, which will be used for OpenCL * memory objects creation * @param[in] image_format OpenCL image format, that describe characteristics * @param[in] width image width * @param[in] height image height * @param[in] host_ptr pointer to Host-side memory region (if any). This argument * is optional. If not needed - provide null pointer instead. * * @return pointer to allocated structure in case of success, * \ref VOID_MEM_OBJ_PTR otherwise * * @warning always use 'Destroy' function pointer to free memory, allocated * by this function. */ scow_Mem_Object* Make_Image( scow_Steel_Thread *parent_thread, const cl_mem_flags mem_flags, const cl_image_format *image_format, const size_t width, const size_t height, const size_t row_pitch, void *host_ptr) { cl_int ret; scow_Mem_Object* self; OCL_CHECK_EXISTENCE(parent_thread, VOID_MEM_OBJ_PTR); OCL_CHECK_EXISTENCE(image_format, VOID_MEM_OBJ_PTR); self = (scow_Mem_Object*) calloc(1, sizeof(*self)); OCL_CHECK_EXISTENCE(self, VOID_MEM_OBJ_PTR); self->obj_mem_type = IMAGE; self->parent_thread = parent_thread; self->host_ptr = host_ptr; self->mem_flags = mem_flags; self->width = width; self->height = height; self->row_pitch = 0; self->error = Make_Error(); self->timer = Make_Timer(VOID_KERNEL_PTR); self->Get_Mem_Obj = Mem_Object_Get_Mem_Obj; self->Destroy = Mem_Object_Destroy; self->Swap = Mem_Object_Swap; self->Unmap = Mem_Object_Unmap; self->Map = Image_Map; self->Write = Image_Send_To_Device; self->Read = Image_Get_From_Device; self->Copy = Image_Copy; self->Erase = NULL; self->Sync = Mem_Object_Sync; self->Get_Height = Image_Get_Height; self->Get_Width = Image_Get_Width; self->Get_Row_Pitch = Image_Get_Row_Pitch; self->Make_Child = NULL; #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS self->cl_mem_object = clCreateImage2D(self->parent_thread->context, mem_flags, image_format, self->width, self->height, self->row_pitch, host_ptr, &ret); #else cl_image_desc image_desc = { .image_type = CL_MEM_OBJECT_IMAGE2D, .image_width = self->width, .image_height = self->height, .image_array_size = 1, .image_row_pitch = self->row_pitch, .image_slice_pitch = host_ptr ? (self->row_pitch * self->height) : 0, .num_mip_levels = 0, .num_samples = 0, .buffer = NULL }; self->cl_mem_object = clCreateImage(self->parent_thread->context, mem_flags, image_format, &image_desc, host_ptr, &ret); #endif OCL_DIE_ON_ERROR(ret, CL_SUCCESS, self->Destroy(self), VOID_MEM_OBJ_PTR); return self; }
cl_int Simulator_3D<T>::build_kernel(const char * kernel_file) { cl_int err; cl_program program; // Build program with source (filename) on device+context CHECK_RETURN_N(program, CreateProgram(Simulator<T>::context, Simulator<T>::device, kernel_file, err), err); CHECK_RETURN_N(_kernel_brac_3d, clCreateKernel(program, "brac_3d", &err), err); CHECK_RETURN_N(_kernel_step_3d, clCreateKernel(program, "step_3d", &err), err); //Create Images; cl_image_desc r_desc; r_desc.image_type=CL_MEM_OBJECT_IMAGE3D; r_desc.image_width=Simulator<T>::_dim.x; r_desc.image_height=Simulator<T>::_dim.y; r_desc.image_depth=Simulator<T>::_dim.z; r_desc.image_row_pitch=0; r_desc.image_slice_pitch=0; r_desc.num_mip_levels=0; r_desc.num_samples=0; r_desc.buffer=NULL; cl_image_format fmt; fmt.image_channel_data_type=CL_FLOAT; /* DOES NOT WORK WITH DOUBLE */ fmt.image_channel_order=CL_R; T *v=(T *)calloc(Simulator<T>::_size,sizeof(T)); for(int i=0;i<Simulator<T>::_size;i++) { v[i]=rand()%101/(T)100; } CHECK_RETURN_N(_img_Phi,clCreateImage(Simulator<T>::context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, &fmt, &r_desc, v, &err),err) CHECK_RETURN_N(_img_Bracket,clCreateImage(Simulator<T>::context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, &fmt, &r_desc, v, &err),err) CHECK_RETURN_N(_img_PhiNext,clCreateImage(Simulator<T>::context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, &fmt, &r_desc, v, &err),err) CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 0, sizeof(cl_mem), &_img_Phi)) CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 1, sizeof(cl_mem), &_img_Bracket)) CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 2, sizeof(T), &_a_2)) CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 3, sizeof(T), &_a_4)) CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 4, sizeof(T), &_K)) CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 5, sizeof(T), &_dx)) CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 0, sizeof(cl_mem), &_img_Phi)) CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 1, sizeof(cl_mem), &_img_Bracket)) CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 2, sizeof(cl_mem), &_img_PhiNext)) CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 3, sizeof(T), &_M)) CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 4, sizeof(T), &_dx)) CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 5, sizeof(T), &_dt)) free(v); _local_size[0]=std::min((unsigned int)8,Simulator<T>::_dim.x); _local_size[1]=std::min((unsigned int)8,Simulator<T>::_dim.y); _local_size[2]=std::min((unsigned int)4,Simulator<T>::_dim.z); _global_size[0]=Simulator<T>::_dim.x; _global_size[1]=Simulator<T>::_dim.y; _global_size[2]=Simulator<T>::_dim.z; Simulator<T>::_cl_initialized = true; return CL_SUCCESS; }
int main(void) { cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES]; cl_uint ndevices; cl_uint i, j; size_t el, row, col; CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms)); for (i = 0; i < nplatforms; i++) { CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices)); /* Only test the devices we actually have room for */ if (ndevices > MAX_DEVICES) ndevices = MAX_DEVICES; for (j = 0; j < ndevices; j++) { /* skip devices that do not support images */ cl_bool has_img; CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL)); if (!has_img) continue; cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err); CHECK_OPENCL_ERROR_IN("clCreateCommandQueue"); cl_ulong alloc; size_t max_height; size_t max_width; #define MAXALLOC (1024U*1024U) CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc), &alloc, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_width), &max_width, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_height), &max_height, NULL)); while (alloc > MAXALLOC) alloc /= 2; // fit at least one max_width inside the alloc (shrink max_width for this) while (max_width*pixel_size > alloc) max_width /= 2; // round number of elements to next multiple of max_width elements const size_t nels = (alloc/pixel_size/max_width)*max_width; const size_t buf_size = nels*pixel_size; cl_image_desc img_desc; memset(&img_desc, 0, sizeof(img_desc)); img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = max_width; img_desc.image_height = nels/max_width; img_desc.image_depth = 1; cl_ushort null_pixel[4] = {0, 0, 0, 0}; cl_ushort *host_buf = malloc(buf_size); TEST_ASSERT(host_buf); for (el = 0; el < nels; el+=4) { host_buf[el] = el & CHANNEL_MAX; host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX; host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX; host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX; } cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateImage"); CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL)); const size_t offset = 0; const size_t origin[] = {0, 0, 0}; const size_t region[] = {img_desc.image_width, img_desc.image_height, 1}; CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL)); size_t row_pitch, slice_pitch; cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region, &row_pitch, &slice_pitch, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapImage"); CHECK_CL_ERROR(clFinish(queue)); for (row = 0; row < img_desc.image_height; ++row) { for (col = 0; col < img_desc.image_width; ++col) { cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4; cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4; if (memcmp(img_pixel, buf_pixel, pixel_size) != 0) printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n", row, col, (size_t)(buf_pixel - host_buf), buf_pixel[0], buf_pixel[1], buf_pixel[2], buf_pixel[3], img_pixel[0], img_pixel[1], img_pixel[2], img_pixel[3]); TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0); } } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL)); /* Clear the buffer, and ensure it has been cleared */ CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL)); cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer"); CHECK_CL_ERROR(clFinish(queue)); for (el = 0; el < nels; ++el) { #if 0 // debug if (buf_map[el] != 0) { printf("%zu/%zu => %u\n", el, nels, buf_map[el]); } #endif TEST_ASSERT(buf_map[el] == 0); } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL)); /* Copy data from image to buffer, and check that it's again equal to the original buffer */ CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL)); buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_CL_ERROR(clFinish(queue)); TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0); CHECK_CL_ERROR ( clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL)); CHECK_CL_ERROR (clFinish (queue)); free(host_buf); CHECK_CL_ERROR (clReleaseMemObject (img)); CHECK_CL_ERROR (clReleaseMemObject (buf)); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseContext (context)); } } return EXIT_SUCCESS; }
int ImageOverlap::setupCL() { cl_int status = CL_SUCCESS; cl_device_type dType; if(deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed"); // Display available devices. retValue = sampleCommon->displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed"); // If we could find our platform, use it. Otherwise use just available platform. cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "sampleCommon::getDevices() failed"); status = deviceInfo.setDeviceInfo(devices[deviceId]); CHECK_OPENCL_ERROR(status, "deviceInfo.setDeviceInfo failed"); if(!deviceInfo.imageSupport) { OPENCL_EXPECTED_ERROR(" Expected Error: Device does not support Images"); } blockSizeX = deviceInfo.maxWorkGroupSize<GROUP_SIZE?deviceInfo.maxWorkGroupSize:GROUP_SIZE; // Create command queue cl_command_queue_properties prop = 0; for(int i=0;i<3;i++) { commandQueue[i] = clCreateCommandQueue( context, devices[deviceId], prop, &status); CHECK_OPENCL_ERROR(status,"clCreateCommandQueuefailed."); } // Create and initialize image objects // Create map image mapImage = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, &image_desc, mapImageData, &status); CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (mapImage)"); int color[4] = {0,0,80,255}; size_t origin[3] = {300,300,0}; size_t region[3] = {100,100,1}; status = clEnqueueFillImage(commandQueue[0], mapImage, color, origin, region, NULL, NULL, &eventlist[0]); // Create fill image fillImage = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &imageFormat, &image_desc, fillImageData, &status); CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (fillImage)"); color[0] = 80; color[1] = 0; color[2] = 0; color[3] = 0; origin[0] = 50; origin[1] = 50; status = clEnqueueFillImage(commandQueue[1], fillImage, color, origin, region, NULL, NULL, &eventlist[1]); //Create output image outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, &imageFormat, &image_desc, NULL, &status); CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (outputImage)"); // create a CL program using the kernel source streamsdk::buildProgramData buildData; buildData.kernelName = std::string("ImageOverlap_Kernels.cl"); buildData.devices = devices; buildData.deviceId = deviceId; buildData.flagsStr = std::string(""); if(isLoadBinaryEnabled()) buildData.binaryName = std::string(loadBinary.c_str()); if(isComplierFlagsSpecified()) buildData.flagsFileName = std::string(flags.c_str()); retValue = sampleCommon->buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name kernelOverLap = clCreateKernel(program, "OverLap", &status); CHECK_OPENCL_ERROR(status,"clCreateKernel failed.(OverLap)"); return SDK_SUCCESS; }
int main() { cl_platform_id platform = NULL; cl_device_id device = NULL; cl_context context = NULL; cl_command_queue command_queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int status = 0; cl_event task_event, map_event; cl_device_type dType = CL_DEVICE_TYPE_GPU; cl_int image_width, image_height; cl_float4 *result; int i, j; cl_mem clImage, out; cl_bool support; int pixels_read = 8; //Setup the OpenCL Platform, //Get the first available platform. Use it as the default platform status = clGetPlatformIDs(1, &platform, NULL); LOG_OCL_ERROR(status, "clGetPlatformIDs Failed" ); //Get the first available device status = clGetDeviceIDs (platform, dType, 1, &device, NULL); LOG_OCL_ERROR(status, "clGetDeviceIDs Failed" ); /*Check if the device support images */ clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(support), &support, NULL); if (support != CL_TRUE) { std::cout <<"IMAGES not supported\n"; return 1; } //Create an execution context for the selected platform and device. cl_context_properties contextProperty[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( contextProperty, dType, NULL, NULL, &status); LOG_OCL_ERROR(status, "clCreateContextFromType Failed" ); /*Create command queue*/ command_queue = clCreateCommandQueue(context, device, 0, &status); LOG_OCL_ERROR(status, "clCreateCommandQueue Failed" ); /* Create Image Object */ //Create OpenCL device input image with the format and descriptor as below cl_image_format image_format; image_format.image_channel_data_type = CL_FLOAT; image_format.image_channel_order = CL_R; //We create a 5 X 5 2D image image_width = 5; image_height = 5; cl_image_desc image_desc; image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image_desc.image_width = image_width; image_desc.image_height = image_height; image_desc.image_depth = 1; image_desc.image_array_size = 1; image_desc.image_row_pitch = image_width*sizeof(float); image_desc.image_slice_pitch = 25*sizeof(float); image_desc.num_mip_levels = 0; image_desc.num_samples = 0; image_desc.buffer = NULL; /* Create output buffer */ out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4)*pixels_read, NULL, &status); LOG_OCL_ERROR(status, "clCreateBuffer Failed" ); size_t origin[] = {0,0,0}; /* Transfer target coordinate*/ size_t region[] = {image_width,image_height,1}; /* Size of object to be transferred */ float *data = (float *)malloc(image_width*image_height*sizeof(float)); float pixels[] = { /* Transfer Data */ 10, 20, 10, 40, 50, 10, 20, 20, 40, 50, 10, 20, 30, 40, 50, 10, 20, 40, 40, 50, 10, 20, 50, 40, 50 }; memcpy(data, pixels, image_width*image_height*sizeof(float)); clImage = clCreateImage(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &image_format, &image_desc, pixels, &status); LOG_OCL_ERROR(status, "clCreateImage Failed" ); /* If the image was not created using CL_MEM_USE_HOST_PTR, then you can write the image data to the device using the clEnqueueWriteImage function. */ //status = clEnqueueWriteImage(command_queue, clImage, CL_TRUE, origin, region, 5*sizeof(float), 25*sizeof(float), data, 0, NULL, NULL); //LOG_OCL_ERROR(status, "clCreateBuffer Failed" ); /* Build program */ program = clCreateProgramWithSource(context, 1, (const char **)&sample_image_kernel, NULL, &status); LOG_OCL_ERROR(status, "clCreateProgramWithSource Failed" ); // Build the program status = clBuildProgram(program, 1, &device, "", NULL, NULL); LOG_OCL_ERROR(status, "clBuildProgram Failed" ); if(status != CL_SUCCESS) { if(status == CL_BUILD_PROGRAM_FAILURE) LOG_OCL_COMPILER_ERROR(program, device); LOG_OCL_ERROR(status, "clBuildProgram Failed" ); } printf("Printing the image pixels\n"); for (i=0; i<image_height; i++) { for (j=0; j<image_width; j++) { printf("%f ",data[i*image_width +j]); } printf("\n"); } //Create kernel and set the kernel arguments kernel = clCreateKernel(program, "image_test", &status); clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clImage); clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out); /*********Image sampler with image repeated at every 1.0 normalized coordinate***********/ /*If host side sampler is not required the sampler objects can also be created on the kernel code. Don't pass the thirsd argument to the kernel and create a sample object as shown below in the kernel code*/ //const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; cl_sampler sampler = clCreateSampler (context, CL_TRUE, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST, &status); clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler); //Enqueue the kernel status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event); LOG_OCL_ERROR(status, "clEnqueueTask Failed" ); /* Map the result back to host address */ result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status); printf(" SAMPLER mode set to CL_ADDRESS_REPEAT | CL_FILTER_NEAREST\n"); printf("\nPixel values retreived based on the filter and Addressing mode selected\n"); printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]); printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]); printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]); printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]); printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]); printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]); printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]); printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]); clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL); clReleaseSampler(sampler); /*********Image sampler with image mirrored at every 1.0 normalized coordinate***********/ sampler = clCreateSampler (context, CL_TRUE, CL_ADDRESS_MIRRORED_REPEAT, CL_FILTER_LINEAR, &status); clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler); //Enqueue the kernel status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event); LOG_OCL_ERROR(status, "clEnqueueTask Failed" ); /* Map the result back to host address */ result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status); printf(" SAMPLER mode set to CL_ADDRESS_MIRRORED_REPEAT | CL_FILTER_LINEAR\n"); printf("\nPixel values retreived based on the filter and Addressing mode selected\n"); printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]); printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]); printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]); printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]); printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]); printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]); printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]); printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]); clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL); clReleaseSampler(sampler); /********************/ //Free All OpenCL objects. clReleaseMemObject(out); clReleaseMemObject(clImage); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(command_queue); clReleaseContext(context); return 0; }
enum piglit_result piglit_cl_test(const int argc, const char **argv, const struct piglit_cl_api_test_config* config, const struct piglit_cl_api_test_env* env) { #if defined(CL_VERSION_1_2) enum piglit_result result = PIGLIT_PASS; cl_int err; #define IMG_WIDTH 4 #define IMG_HEIGHT 4 #define IMG_DATA_SIZE 4 #define IMG_BUFFER_SIZE IMG_WIDTH * IMG_HEIGHT * IMG_DATA_SIZE unsigned char img_buf[IMG_BUFFER_SIZE] = {0}; unsigned char dst_buf[IMG_BUFFER_SIZE] = {0}; unsigned char exp_buf[IMG_BUFFER_SIZE] = {0}; int pattern[4] = {129, 33, 77, 255}; size_t origin[3] = {0, 0, 0}; size_t region[3] = {2, 2, 1}; size_t tmp; cl_event event; cl_mem image; cl_image_format img_format; cl_image_desc img_desc = {0}; cl_command_queue queue = env->context->command_queues[0]; int i; cl_bool *image_support = piglit_cl_get_device_info(env->context->device_ids[0], CL_DEVICE_IMAGE_SUPPORT); if (!*image_support) { fprintf(stderr, "No image support\n"); free(image_support); return PIGLIT_SKIP; } img_format.image_channel_order = CL_RGBA; img_format.image_channel_data_type = CL_UNSIGNED_INT8; img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = IMG_WIDTH; img_desc.image_height = IMG_HEIGHT; img_desc.buffer = NULL; /*** Normal usage ***/ image = clCreateImage(env->context->cl_ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &img_format, &img_desc, &img_buf, &err); if(!piglit_cl_check_error(err, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Creating an image\n", piglit_cl_get_error_name(err)); return PIGLIT_FAIL; } if (!test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_SUCCESS, &result, "Enqueuing the image to be filled")) { return PIGLIT_FAIL; } region[0] = IMG_WIDTH; region[1] = IMG_HEIGHT; err = clEnqueueReadImage(queue, image, 1, origin, region, 0, 0, dst_buf, 0, NULL, NULL); if(!piglit_cl_check_error(err, CL_SUCCESS)) { fprintf(stderr, "Failed (error code: %s): Reading image\n", piglit_cl_get_error_name(err)); return PIGLIT_FAIL; } /* * fill the host buffer with the pattern * for exemple : pattern == 1234 * * 12341234abcdabcd * 12341234abcdabcd * abcdabcdabcdabcd * abcdabcdabcdabcd */ exp_buf[0] = pattern[0]; exp_buf[1] = pattern[1]; exp_buf[2] = pattern[2]; exp_buf[3] = pattern[3]; memcpy(exp_buf + (IMG_DATA_SIZE * 1), exp_buf, IMG_DATA_SIZE); memcpy(exp_buf + (IMG_DATA_SIZE * 4), exp_buf, IMG_DATA_SIZE); memcpy(exp_buf + (IMG_DATA_SIZE * 5), exp_buf, IMG_DATA_SIZE); for (i = 0; i < sizeof(dst_buf) / sizeof(dst_buf[0]); ++i) { if (!piglit_cl_probe_integer(dst_buf[i], exp_buf[i], 0)) { fprintf(stderr, "Error at %d: got %d, expected %d\n", i, dst_buf[i], exp_buf[i]); return PIGLIT_FAIL; } } /*** Errors ***/ /* * CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue. */ test(NULL, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_COMMAND_QUEUE, &result, "CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue"); /* * CL_INVALID_CONTEXT if the context associated with command_queue and * image are not the same or if the context associated with command_queue * and events in event_wait_list are not the same. */ { piglit_cl_context context; cl_int err; context = piglit_cl_create_context(env->platform_id, env->context->device_ids, 1); if (context) { event = clCreateUserEvent(context->cl_ctx, &err); if (err == CL_SUCCESS) { err = clSetUserEventStatus(event, CL_COMPLETE); if (err == CL_SUCCESS) { test(context->command_queues[0], image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_CONTEXT, &result, "CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same"); test(queue, image, pattern, origin, region, 1, &event, NULL, CL_INVALID_CONTEXT, &result, "CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same"); } else { fprintf(stderr, "Could not set event status.\n"); piglit_merge_result(&result, PIGLIT_WARN); } clReleaseEvent(event); } else { fprintf(stderr, "Could not create user event.\n"); piglit_merge_result(&result, PIGLIT_WARN); } piglit_cl_release_context(context); } else { fprintf(stderr, "Could not test triggering CL_INVALID_CONTEXT.\n"); piglit_merge_result(&result, PIGLIT_WARN); } } /* * CL_INVALID_MEM_OBJECT if image is not a valid buffer object. */ test(queue, NULL, pattern, origin, region, 0, NULL, NULL, CL_INVALID_MEM_OBJECT, &result, "CL_INVALID_MEM_OBJECT if image is not a valid buffer object"); /* * CL_INVALID_VALUE if fill_color is NULL. */ test(queue, image, NULL, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if fill_color is NULL"); /* * CL_INVALID_VALUE if the region being written specified by origin and * region is out of bounds or if ptr is a NULL value. */ tmp = origin[0]; origin[0] = IMG_WIDTH + 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (origin)"); origin[0] = tmp; tmp = region[0]; region[0] = IMG_WIDTH + 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (region)"); region[0] = tmp; test(queue, image, pattern, NULL, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if ptr is a NULL value (origin)"); test(queue, image, pattern, origin, NULL, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if ptr is a NULL value (region)"); /* * CL_INVALID_VALUE if values in origin and region do not follow rules * described in the argument description for origin and region. */ tmp = origin[2]; origin[2] = 1; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if values in origin do not follow rules described in the argument description for origin"); origin[2] = tmp; tmp = region[2]; region[2] = 0; test(queue, image, pattern, origin, region, 0, NULL, NULL, CL_INVALID_VALUE, &result, "CL_INVALID_VALUE if values in region do not follow rules described in the argument description for region"); region[2] = tmp; /* * CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and * num_events_in_wait_list > 0, or event_wait_list is not NULL and * num_events_in_wait_list is 0, or if event objects in event_wait_list * are not valid events. */ event = NULL; test(queue, image, pattern, origin, region, 1, NULL, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0"); test(queue, image, pattern, origin, region, 0, &event, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is not NULL and num_events_in_wait_list is 0"); test(queue, image, pattern, origin, region, 1, &event, NULL, CL_INVALID_EVENT_WAIT_LIST, &result, "CL_INVALID_EVENT_WAIT_LIST if event objects in event_wait_list are not valid events"); /* * CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified * or compute row and/or slice pitch) for image are not supported by device * associated with queue. */ /* This is a per device test, clCreateImage would have failed before */ /* * CL_INVALID_IMAGE_FORMAT if image format (image channel order and data type) * for image are not supported by device associated with queue. */ /* This is a per device test, clCreateImage would have failed before */ free(image_support); clReleaseMemObject(image); return result; #else return PIGLIT_SKIP; #endif }
int main(int argc, char **argv) { /* test name */ char name[] = "test_image_query_funcs"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; char *filename = NULL; char *source = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int err; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image2_desc, image3_desc; printf("Running test %s...\n", name); memset(&image2_desc, 0, sizeof(cl_image_desc)); image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image2_desc.image_width = 2; image2_desc.image_height = 4; memset(&image3_desc, 0, sizeof(cl_image_desc)); image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image3_desc.image_width = 2; image3_desc.image_height = 4; image3_desc.image_depth = 8; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); TEST_ASSERT (imageData != NULL && "out of host memory\n"); memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); TEST_ASSERT (filename != NULL && "out of host memory\n"); snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source = poclu_read_file (filename); TEST_ASSERT (source != NULL && "Kernel .cl not found."); /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); TEST_ASSERT (context != NULL && "clCreateContextFromType call failed\n"); cl_sampler external_sampler = clCreateSampler ( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &err); CHECK_OPENCL_ERROR_IN ("clCreateSampler"); CHECK_CL_ERROR (clGetContextInfo (context, CL_CONTEXT_DEVICES, sizeof (cl_device_id), devices, NULL)); queue = clCreateCommandQueue (context, devices[0], 0, &err); CHECK_OPENCL_ERROR_IN ("clCreateCommandQueue"); /* Create image */ cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image2_desc, imageData, &err); CHECK_OPENCL_ERROR_IN ("clCreateImage image2"); cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image3_desc, imageData, &err); CHECK_OPENCL_ERROR_IN ("clCreateImage image3"); unsigned color[4] = { 2, 9, 11, 7 }; size_t orig[3] = { 0, 0, 0 }; size_t reg[3] = { 2, 4, 1 }; err = clEnqueueFillImage (queue, image2, color, orig, reg, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clCreateImage image3"); /* create and build program */ program = clCreateProgramWithSource (context, 1, (const char **)&source, NULL, &err); CHECK_OPENCL_ERROR_IN ("clCreateProgramWithSource"); err = clBuildProgram (program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clBuildProgram"); /* execute the kernel with give name */ kernel = clCreateKernel (program, name, NULL); CHECK_OPENCL_ERROR_IN ("clCreateKernel"); err = clSetKernelArg (kernel, 0, sizeof (cl_mem), &image2); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 0"); err = clSetKernelArg (kernel, 1, sizeof (cl_mem), &image3); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 1"); err = clSetKernelArg (kernel, 2, sizeof (cl_sampler), &external_sampler); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 2"); err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clEnqueueNDRangeKernel"); err = clFinish (queue); CHECK_OPENCL_ERROR_IN ("clFinish"); clReleaseMemObject (image2); clReleaseMemObject (image3); clReleaseKernel (kernel); clReleaseProgram (program); clReleaseCommandQueue (queue); clReleaseSampler (external_sampler); clUnloadCompiler (); clReleaseContext (context); free (source); free (filename); free (imageData); printf("OK\n"); return 0; }
cl_mem bindTexture(const oclMat &mat) { cl_mem texture; cl_image_format format; int err; int depth = mat.depth(); int channels = mat.oclchannels(); switch(depth) { case CV_8U: format.image_channel_data_type = CL_UNSIGNED_INT8; break; case CV_32S: format.image_channel_data_type = CL_UNSIGNED_INT32; break; case CV_32F: format.image_channel_data_type = CL_FLOAT; break; default: CV_Error(-1, "Image forma is not supported"); break; } switch(channels) { case 1: format.image_channel_order = CL_R; break; case 3: format.image_channel_order = CL_RGB; break; case 4: format.image_channel_order = CL_RGBA; break; default: CV_Error(-1, "Image format is not supported"); break; } #ifdef CL_VERSION_1_2 //this enables backwards portability to //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2)) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = mat.cols; desc.image_height = mat.rows; desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err); } else #endif { texture = clCreateImage2D( *(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, mat.cols, mat.rows, 0, NULL, &err); } size_t origin[] = { 0, 0, 0 }; size_t region[] = { mat.cols, mat.rows, 1 }; cl_mem devData; if (mat.cols * mat.elemSize() != mat.step) { devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows * mat.elemSize(), NULL, NULL); const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); } else { devData = (cl_mem)mat.data; } clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr()); clReleaseMemObject(devData); } openCLSafeCall(err); return texture; }
int main(int argc, char** argv) { GLFWwindow* glfwwindow; {//OpenGL/GLFW Init if (!glfwInit()) { std::cerr << "Error: GLFW init failed" << std::endl; exit(EXIT_FAILURE); } glfwwindow = glfwCreateWindow(200, 200, "Nvidia interop bug demo", nullptr, nullptr); glfwMakeContextCurrent(glfwwindow); glewInit(); std::cout << "OpenGL Info: " << (char*)glGetString(GL_VENDOR) << " " << (char*)glGetString(GL_RENDERER) << std::endl; } cl_context clcontext; cl_command_queue clqueue; {//OpenCL init cl_platform_id platform = nullptr; {//Platform init cl_uint numPlatforms; clGetPlatformIDs(0, nullptr, &numPlatforms); if (numPlatforms == 0) { std::cerr << "Error: No OpenCL platforms available" << std::endl; return EXIT_FAILURE; } cl_platform_id* all_platforms = new cl_platform_id[numPlatforms]; clGetPlatformIDs(numPlatforms, all_platforms, nullptr); for (size_t i = 0; i < numPlatforms; i++) //Select Nvidia out of the platforms { char name[300]; clGetPlatformInfo(all_platforms[i], CL_PLATFORM_NAME, sizeof(name), &name, nullptr); std::string namestring(name); if (namestring.find("NVIDIA") != std::string::npos || namestring.find("Nvidia") != std::string::npos) platform = all_platforms[i]; } if (platform == nullptr) { std::cerr << "No Nvidia OpenCL platform found, will default to platform 0 "; } delete[] all_platforms; } { //Create shared context cl_context_properties properties[7]; properties[0] = CL_CONTEXT_PLATFORM; //This is different for other operating systems than Windows properties[1] = (cl_context_properties)platform; properties[2] = CL_GL_CONTEXT_KHR; properties[3] = (cl_context_properties)wglGetCurrentContext(); properties[4] = CL_WGL_HDC_KHR; properties[5] = (cl_context_properties)wglGetCurrentDC(); properties[6] = 0; clcontext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, nullptr); } cl_device_id cldevice; { //Create cldevice cl_device_id* devices; cl_command_queue commandQueue = nullptr; size_t numDevices = 0; // First get the size of the devices buffer clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, 0, nullptr, &numDevices); if (numDevices == 0) { std::cerr << "Error: No OpenCL devices available" << std::endl; return EXIT_FAILURE; } devices = new cl_device_id[numDevices]; clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, numDevices, devices, nullptr); cldevice = devices[0]; delete[] devices; } { //Create CL command queue clqueue = clCreateCommandQueue(clcontext, cldevice, 0, nullptr); } char platformname[300]; char devicename[300]; clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platformname), &platformname, nullptr); clGetDeviceInfo(cldevice, CL_DEVICE_NAME, sizeof(devicename), &devicename, nullptr); std::cout << "OpenCL platform " << platformname << " device " << devicename << std::endl; } size_t size = 200 * 200 * 4; //w=200, h=200, 4 bytes per channel char* databuffer = new char[size]; GLuint glbuffer, gltexture; cl_mem unsharedbuffer, sharedbuffer, unsharedtexture, sharedtexture; { //Init test data glGenBuffers(1, &glbuffer); glBindBuffer(GL_ARRAY_BUFFER, glbuffer); glBufferData(GL_ARRAY_BUFFER, size, databuffer, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, GL_NONE); glGenTextures(1, &gltexture); glBindTexture(GL_TEXTURE_2D, gltexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 200, 200, 0, GL_RGBA, GL_UNSIGNED_BYTE, databuffer); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); //Intel needs this for shared textures glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); //Intel needs this for shared textures glBindTexture(GL_TEXTURE_2D, GL_NONE); sharedtexture = clCreateFromGLTexture(clcontext, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, gltexture, nullptr); sharedbuffer = clCreateFromGLBuffer(clcontext, CL_MEM_READ_WRITE, glbuffer, nullptr); unsharedbuffer = clCreateBuffer(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, databuffer, nullptr); cl_image_format imgformat; cl_image_desc desc; imgformat.image_channel_data_type = CL_UNSIGNED_INT8; imgformat.image_channel_order = CL_RGBA; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = 200; desc.image_height = 200; desc.image_depth = 1; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; desc.buffer = nullptr; unsharedtexture = clCreateImage(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imgformat, &desc, databuffer, nullptr); } { const size_t origin[3] = { 0, 0, 0 }; const size_t region[3] = { 200, 200, 1 }; size_t pitch; // //MAIN PART BEGINS HERE // { //OpenGL buffer std::cout << "Mapping buffer with OpenGL: "; glBindBuffer(GL_ARRAY_BUFFER, glbuffer); void* glmapptr = glMapBuffer(GL_ARRAY_BUFFER, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT); glUnmapBuffer(GL_ARRAY_BUFFER); glBindBuffer(GL_ARRAY_BUFFER, GL_NONE); std::cout << "OK" << std::endl; glFinish(); } { //OpenCL unshared texture std::cout << "Mapping unshared texture with OpenCL: "; void* unsimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared objects clEnqueueUnmapMemObject(clqueue, unsharedtexture, unsimgptr, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } { //OpenCL shared texture std::cout << "Mapping shared texture with OpenCL: "; clEnqueueAcquireGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr); void* shdimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine shared objects clEnqueueUnmapMemObject(clqueue, unsharedtexture, shdimgptr, 0, nullptr, nullptr); clEnqueueReleaseGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } { //OpenCL unshared buffer std::cout << "Mapping unshared buffer with OpenCL: "; void* unsbufptr = clEnqueueMapBuffer(clqueue, unsharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared buffers clEnqueueUnmapMemObject(clqueue, unsharedbuffer, unsbufptr, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } { //OpenCL shared buffer std::cout << "Mapping shared buffer with OpenCL (EXPECTING CRASH ON NVIDIA SYSTEMS): " << std::endl; clEnqueueAcquireGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr); // //CRITICAL PART BEGINS HERE // void* shdbufptr = clEnqueueMapBuffer(clqueue, sharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr); //On Nvidia systems when using shared objects, error 0xC0000005 occurs in ntdll.dll: write access violation at position 0xSOMETHING //This leaves my application in an unusable state //But it works fine everywhere else (tested on ARM, AMD, Intel systems) // //CRITICAL PART ENDS HERE // std::cout << "did not fail" << std::endl; clEnqueueUnmapMemObject(clqueue, sharedbuffer, shdbufptr, 0, nullptr, nullptr); clEnqueueReleaseGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr); std::cout << "OK" << std::endl; } // //MAIN PART ENDS HERE // } clFinish(clqueue); delete[] databuffer; clReleaseMemObject(sharedbuffer); clReleaseMemObject(unsharedbuffer); clReleaseMemObject(sharedtexture); clReleaseMemObject(unsharedtexture); clReleaseCommandQueue(clqueue); clReleaseContext(clcontext); glDeleteTextures(1, &gltexture); glDeleteBuffers(1, &glbuffer); glfwDestroyWindow(glfwwindow); glfwTerminate(); return EXIT_SUCCESS; }
int main(int argc, char **argv) { /* test name */ char name[] = "test_sampler_address_clamp"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; char *filename = NULL; char *source = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int result; int retval = -1; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image_desc; printf("Running test %s...\n", name); memset(&image_desc, 0, sizeof(cl_image_desc)); image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image_desc.image_width = 4; image_desc.image_height = 4; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); if (imageData == NULL) { puts("out of host memory\n"); goto error; } memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); if (!filename) { puts("out of memory"); goto error; } snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source = poclu_read_file (filename); TEST_ASSERT (source != NULL && "Kernel .cl not found."); /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); if (!context) { puts("clCreateContextFromType call failed\n"); goto error; } result = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (result != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } /* Create image */ cl_mem image = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image_desc, imageData, &result); if (result != CL_SUCCESS) { puts("image creation failed\n"); goto error; } /* create and build program */ program = clCreateProgramWithSource (context, 1, (const char **)&source, NULL, NULL); if (!program) { puts("clCreateProgramWithSource call failed\n"); goto error; } result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (result != CL_SUCCESS) { puts("clBuildProgram call failed\n"); goto error; } /* execute the kernel with give name */ kernel = clCreateKernel(program, name, NULL); if (!kernel) { puts("clCreateKernel call failed\n"); goto error; } result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image); if (result) { puts("clSetKernelArg failed\n"); goto error; } result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (result != CL_SUCCESS) { puts("clEnqueueNDRangeKernel call failed\n"); goto error; } result = clFinish(queue); if (result == CL_SUCCESS) retval = 0; error: if (image) { clReleaseMemObject (image); } if (kernel) { clReleaseKernel(kernel); } if (program) { clReleaseProgram(program); } if (queue) { clReleaseCommandQueue(queue); } if (context) { clUnloadCompiler (); clReleaseContext (context); } if (source) { free(source); } if (filename) { free(filename); } if (imageData) { free(imageData); } if (retval) { printf("FAIL\n"); return 1; } printf("OK\n"); return 0; }