void vglClToGl(VglImage* img) { //vglDownload(img); if (!vglIsInContext(img, VGL_CL_CONTEXT)) { //vglGlToCl(img); //fprintf(stderr, "vglClToGl: Error: image context = %d not in VGL_CL_CONTEXT\n", img->inContext); return; } cl_int err_cl; //printf("==========RELEASE: vgl = %p, ocl = %d\n", img, img->oclPtr); err_cl = clEnqueueReleaseGLObjects(cl.commandQueue, 1 , (cl_mem*) &img->oclPtr, 0 , NULL, NULL); vglClCheckError(err_cl, (char*) "clEnqueueReleaseGLObjects"); err_cl = clFlush(cl.commandQueue); vglClCheckError(err_cl, (char*) "clFlush"); err_cl = clFinish(cl.commandQueue); vglClCheckError(err_cl, (char*) "clFinish"); vglSetContext(img, VGL_GL_CONTEXT); //printf("Vai sair de %s\n", __FUNCTION__); }
void vglGlToCl(VglImage* img) { glFlush(); glFinish(); if (img->oclPtr == NULL) { vglClAlloc(img); } if (!vglIsInContext(img, VGL_CL_CONTEXT)) { //printf("==========ACQUIRE: vgl = %p, ocl = %d\n", img, img->oclPtr); cl_int err_cl = clFlush(cl.commandQueue); vglClCheckError(err_cl, (char*) "clFlush"); err_cl = clFinish(cl.commandQueue); vglClCheckError(err_cl, (char*) "clFinish"); err_cl = clEnqueueAcquireGLObjects(cl.commandQueue, 1 , (cl_mem*) &img->oclPtr, 0 , NULL, NULL); vglClCheckError(err_cl, (char*) "clEnqueueAcquireGLObjects"); vglSetContext(img, VGL_CL_CONTEXT); } //printf("Vai sair de %s\n", __FUNCTION__); }
float vglCl9root(float number) { cl_int err; static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL/roottest.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { std::string str("File not found: "); str.append(file_path); vglClCheckError(-1, (char*)str.c_str()); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglCl9Root", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } cl_mem input = clCreateBuffer(cl.context,CL_MEM_READ_ONLY,sizeof(float),NULL,&err); clEnqueueWriteBuffer(cl.commandQueue,input,true,0,sizeof(float),&number,0,0,0); cl_mem output = clCreateBuffer(cl.context,CL_MEM_READ_ONLY,sizeof(float),NULL,&err); err = clSetKernelArg( kernel, 0, sizeof(cl_mem ), &input ); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &output ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); size_t worksize[] = { 1, 1, 1 }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 1, NULL, worksize, 0, 0, 0, 0 ); float ret; clEnqueueReadBuffer(cl.commandQueue,output,true,0,sizeof(float),&ret,0,0,0); clReleaseMemObject(input); clReleaseMemObject(output); return ret; }
void vglClFlush() { cl_int err; err = clFlush( cl.commandQueue ); vglClCheckError(err, (char*) "clFlush command_queue"); err = clFinish( cl.commandQueue ); vglClCheckError(err, (char*) "clFinish command_queue"); }
int* vglClCumulativeSum(int* arr, int size) { cl_int err; int nsize = floor(log10(size)/log10(2)); nsize = pow(2,nsize+1); cl_mem mobj_arr = NULL; mobj_arr = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, size*sizeof(int), NULL, &err); vglClCheckError(err, (char*) "clCreateBuffer mobj_arr" ); err = clEnqueueWriteBuffer(cl.commandQueue,mobj_arr,CL_TRUE, 0, size*sizeof(int), arr, 0, NULL, NULL); vglClCheckError(err, (char*) "clEnqueueWriteBuffer mobj_arr"); static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL_UTIL/vglClMath.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { fprintf(stderr, "%s:%s: Error: File %s not found.\n", __FILE__, __FUNCTION__, file_path); exit(1); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglClCumSum", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &mobj_arr); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( int ), &size); vglClCheckError( err, (char*) "clSetKernelArg 1" ); size_t worksize[] = { nsize/2, 1, 1 }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 1, NULL, worksize, 0, 0, 0, 0 ); vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); int* cumsum = (int*) malloc(size*sizeof(int)); err = clEnqueueReadBuffer(cl.commandQueue,mobj_arr,CL_TRUE, 0, size*sizeof(int), cumsum, 0, NULL, NULL); vglClCheckError(err, (char*) "ReadBuffer histogram"); err = clReleaseMemObject( mobj_arr ); vglClCheckError(err, (char*) "clReleaseMemObject mobj_arr"); return cumsum; }
void vglClDownload(VglImage* img) { if (Interop && img->nChannels > 1) { vglClDownloadInterop(img); } else { if (img->nChannels == 3) { fprintf(stderr, "%s: %s: Error: ipl image field with 3 channels not supported. Please convert to 4 channels.\n", __FILE__, __FUNCTION__); exit(1); } if (!vglIsInContext(img, VGL_CL_CONTEXT)) { fprintf(stderr, "vglClDownload: Error: image context = %d not in VGL_CL_CONTEXT\n", img->inContext); return; } size_t Origin[3] = { 0, 0, 0}; if(img->ndim == 2) { size_t Size3d[3] = { img->shape[VGL_WIDTH], img->shape[VGL_HEIGHT], 1 }; cl_int err_cl = clEnqueueReadImage( cl.commandQueue, img->oclPtr, CL_TRUE, Origin, Size3d, 0, 0, img->ipl->imageData, 0, NULL, NULL ); vglClCheckError( err_cl, (char*) "clEnqueueReadImage2D" ); //cvCvtColor(img->iplRGBA, img->ipl, CV_RGBA2BGR); } else if(img->ndim == 3) { size_t Size3d[3] = { img->shape[VGL_WIDTH], img->shape[VGL_HEIGHT], img->shape[VGL_LENGTH] }; cl_int err_cl = clEnqueueReadImage( cl.commandQueue, img->oclPtr, CL_TRUE, Origin, Size3d, 0, 0,(char*) img->ndarray, 0, NULL, NULL ); vglClCheckError( err_cl, (char*) "clEnqueueReadImage3D" ); } else { void* imageData = img->getImageData(); if (!imageData) { fprintf(stderr, "%s: %s: Error: both ipl and ndarray are NULL.\n", __FILE__, __FUNCTION__); exit(1); } cl_int err = clEnqueueReadBuffer(cl.commandQueue, img->oclPtr, CL_TRUE, 0, img->getTotalSizeInBytes(), imageData, 0, NULL, NULL); vglClCheckError( err, (char*) "clEnqueueReadNDImage" ); } vglAddContext(img, VGL_RAM_CONTEXT); } }
void vglClAlloc(VglImage* img) { glFlush(); glFinish(); cl_int err_cl; if (img->oclPtr == NULL) { if (img->ndim == 2) { img->oclPtr = clCreateFromGLTexture2D(cl.context, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, img->tex, &err_cl); vglClCheckError(err_cl, (char*) "clCreateFromGLTexture"); } else if(img->ndim == 3) { img->oclPtr = clCreateFromGLTexture3D(cl.context, CL_MEM_READ_WRITE, GL_TEXTURE_3D, 0, img->tex, &err_cl); vglClCheckError(err_cl, (char*) "clCreateFromGLTexture"); } } }
int* vglClHistogram(VglImage* img_input){ cl_mem partial_hist; if (img_input->ndim == 2) { partial_hist = vglClPartialHistogram(img_input); } else if (img_input->ndim == 3) { partial_hist = vglCl3dPartialHistogram(img_input); } int* hist = vglClSumPartialHistogram(partial_hist,img_input->shape[VGL_WIDTH], img_input->nChannels); cl_int err = clReleaseMemObject( partial_hist ); vglClCheckError(err, (char*) "clReleaseMemObject partial_hist"); return hist; }
void vglCl3dMergeZByMean(VglImage* img_input, VglImage* img_output, int number_of_merges){ vglCheckContext(img_input, VGL_CL_CONTEXT); vglCheckContext(img_output, VGL_CL_CONTEXT); cl_int err; static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL/vglCl3dMergeZByMean.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { std::string str("File not found: "); str.append(file_path); vglClCheckError(-1, (char*)str.c_str()); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglCl3dMergeZByMean", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &img_input->oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void*) &img_output->oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); err = clSetKernelArg( kernel, 2, sizeof( int ), (void*) &number_of_merges ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); if (img_input->ndim <= 2){ printf("2D images not supported by this operation\n"); } else if (img_input->ndim == 3){ size_t worksize[] = { img_input->getWidth(), img_input->getHeight(), img_input->getLength() }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 3, NULL, worksize, 0, 0, 0, 0 ); } else{ printf("More than 3 dimensions not yet supported\n"); } vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); vglSetContext(img_output, VGL_CL_CONTEXT); }
/** Calculate sum a = a + b and save carry */ int vglClMpIsZero(VglImage* num_a){ int isZero = 200; cl_mem isZero_oclPtr = NULL; vglCheckContext(num_a, VGL_CL_CONTEXT); cl_int err; isZero_oclPtr = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, sizeof(isZero), NULL, &err); vglClCheckError( err, (char*) "clCreateNDImage" ); err = clEnqueueWriteBuffer(cl.commandQueue, isZero_oclPtr, CL_TRUE, 0, sizeof(isZero), &isZero, 0, NULL, NULL); vglClCheckError( err, (char*) "clEnqueueWriteBuffer" ); clFinish(cl.commandQueue); static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL_MP/vglClMpIsZero.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { fprintf(stderr, "%s:%s: Error: File %s not found.\n", __FILE__, __FUNCTION__, file_path); exit(1); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglClMpIsZero", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &num_a->oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void*) &isZero_oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); if (num_a->ndim <= 2){ size_t worksize[] = { num_a->shape[VGL_WIDTH], num_a->shape[VGL_HEIGHT], 1 }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 2, NULL, worksize, 0, 0, 0, 0 ); } else if (num_a->ndim == 3){ size_t worksize[] = { num_a->shape[VGL_WIDTH], num_a->shape[VGL_HEIGHT], num_a->shape[VGL_LENGTH] }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 3, NULL, worksize, 0, 0, 0, 0 ); } else{ printf("More than 3 dimensions not yet supported\n"); } vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); err = clEnqueueReadBuffer(cl.commandQueue, isZero_oclPtr, CL_TRUE, 0, sizeof(isZero), &isZero, 0, NULL, NULL); vglClCheckError( err, (char*) "clEnqueueReadNDImage" ); vglSetContext(num_a, VGL_CL_CONTEXT); return isZero; }
/** vglClUpload branch3d */ void vglClUpload(VglImage* img) { if (Interop && img->nChannels > 1) { vglClUploadInterop(img); } else { if (img->nChannels == 3) { fprintf(stderr, "%s: %s: Error: image with 3 channels not supported. Please convert to 4 channels.\n", __FILE__, __FUNCTION__); exit(1); } cl_int err; if ( !vglIsInContext(img, VGL_RAM_CONTEXT) && !vglIsInContext(img, VGL_BLANK_CONTEXT) ) { fprintf(stderr, "vglClUpload: Error: image context = %d not in VGL_RAM_CONTEXT or VGL_BLANK_CONTEXT\n", img->inContext); return; } if (img->oclPtr == NULL) { /*if (img->fbo != -1) { img->oclPtr = clCreateFromGLTexture2D(cl.context,CL_MEM_READ_WRITE,GL_TEXTURE_2D,0,img->fbo,&err); vglClCheckError( err, (char*) "clCreateFromGlTexture2D interop" ); clEnqueueAcquireGLObjects(cl.commandQueue, 1, &img->oclPtr, 0,0,0); } else {*/ cl_image_format format; if (img->nChannels == 1) { format.image_channel_order = CL_R; } else if (img->nChannels == 4) { format.image_channel_order = CL_RGBA; } if (img->depth == IPL_DEPTH_8U) { format.image_channel_data_type = CL_UNORM_INT8; } else if (img->depth == IPL_DEPTH_16U) { format.image_channel_data_type = CL_UNORM_INT16; } else if (img->depth == IPL_DEPTH_32S) { format.image_channel_data_type = CL_SIGNED_INT32; } else { fprintf(stderr, "%s: %s: Error: Unsupported image depth = %d.\n", __FILE__, __FUNCTION__, img->depth); format.image_channel_data_type = CL_UNORM_INT8; } if (img->ndim == 2) { img->oclPtr = clCreateImage2D(cl.context, CL_MEM_READ_WRITE, &format, img->shape[VGL_WIDTH], img->shape[VGL_HEIGHT], 0, NULL, &err); vglClCheckError( err, (char*) "clCreateImage2D" ); } else if(img->ndim == 3) { img->oclPtr = clCreateImage3D(cl.context, CL_MEM_READ_WRITE, &format, img->shape[VGL_WIDTH], img->shape[VGL_HEIGHT], img->shape[VGL_LENGTH], 0, 0, NULL, &err); vglClCheckError( err, (char*) "clCreateImage3D" ); } else { img->oclPtr = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, img->getTotalSizeInBytes(), NULL, &err); vglClCheckError( err, (char*) "clCreateNDImage" ); } /* cl_image_desc desc; if (img->ndim == 2) { desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = img->shape[VGL_WIDTH]; desc.image_height = img->shape[VGL_HEIGHT]; desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; desc.buffer = NULL; } else { desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = img->shape[VGL_WIDTH]; desc.image_height = img->shape[VGL_HEIGHT]; desc.image_depth = img->shape[VGL_LENGTH]; desc.image_array_size = 0; desc.image_row_pitch = 0; desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; desc.buffer = NULL; } img->oclPtr = clCreateImage(cl.context,CL_MEM_READ_WRITE, &format, &desc,NULL,&err); vglClCheckError(err, (char*) "clCreateImage"); */ } if (vglIsInContext(img, VGL_RAM_CONTEXT)) { size_t Origin[3] = { 0, 0, 0}; int nFrames = 1; if(img->ndim == 3) { nFrames = img->shape[VGL_LENGTH]; } void* imageData = img->getImageData(); if (!imageData) { fprintf(stderr, "%s: %s: Error: both ipl and ndarray are NULL.\n", __FILE__, __FUNCTION__); exit(1); } if ( (img->ndim == 2) || (img->ndim == 3) ) { size_t Size3d[3] = {img->shape[VGL_WIDTH], img->shape[VGL_HEIGHT], nFrames}; err = clEnqueueWriteImage( cl.commandQueue, img->oclPtr, CL_TRUE, Origin, Size3d, 0, 0, (char*)imageData, 0, NULL, NULL ); vglClCheckError( err, (char*) "clEnqueueWriteImage" ); clFinish(cl.commandQueue); } else { err = clEnqueueWriteBuffer(cl.commandQueue, img->oclPtr, CL_TRUE, 0, img->getTotalSizeInBytes(), imageData, 0, NULL, NULL); vglClCheckError( err, (char*) "clEnqueueWriteBuffer" ); clFinish(cl.commandQueue); } } vglAddContext(img, VGL_CL_CONTEXT); } }
void vglClInit() { cl_int err; cl_uint num_platforms, num_devices; cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT; err = clGetPlatformIDs(0, NULL, &num_platforms); vglClCheckError(err, (char*) "clGetPlatformIDs get number of platforms"); cl.platformId = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platforms); err = clGetPlatformIDs(num_platforms, cl.platformId, NULL); vglClCheckError(err, (char*) "clGetPlatformIDs get platforms id"); if (num_platforms == 0) printf("found no platform for opencl\n\n"); else if (num_platforms >= 1) printf("found %d platform(s) for opencl\n\n", num_platforms); err = clGetDeviceIDs(*cl.platformId, device_type, 0, NULL, &num_devices); vglClCheckError(err, (char*) "clGetDeviceIDs get number of devices"); if (num_devices == 0) { printf("unable to find OpenCL devices, halting the program"); exit(1); } else printf("found %d device(s)\n\n",num_devices); cl.deviceId = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices); err = clGetDeviceIDs(*cl.platformId, device_type, num_devices, cl.deviceId, NULL); vglClCheckError(err, (char*) "clGetDeviceIDs get devices id"); // To add CL_KHR_gl_sharing property to context, window id is needed. //cl_context_properties props[] = {CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext()}; const int msgLen = 2048; char msg[msgLen]; err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_EXTENSIONS, msgLen, msg, NULL); vglClCheckError(err, (char*) "clGetDeviceIDs get device info about extensions"); cl_device_type actual_device_type; err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_TYPE, sizeof(cl_device_type), &actual_device_type, NULL); vglClCheckError(err, (char*) "clGetDeviceIDs get device info about type"); char* search = strtok (msg, " "); bool found = false; if (actual_device_type != CL_DEVICE_TYPE_CPU) // Interoperability does not work on CPU { while (search != NULL && !found) { if (strcmp(search, "cl_khr_gl_sharing") == 0) { printf("FOUND INTEROPERABILITY\n"); Interop = true; } search = strtok(NULL, " "); } } #ifdef __linux__ cl_context_properties properties1[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), CL_CONTEXT_PLATFORM, (cl_context_properties) cl.platformId[0], 0 }; cl_context_properties properties2[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) cl.platformId[0], 0 }; cl_context_properties* properties; if (Interop) properties = properties1; else properties = properties2; #elif defined WIN32 cl_context_properties properties[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties) cl.platformId[0], 0 }; #endif cl.context = clCreateContext(properties,1,cl.deviceId,NULL, NULL, &err ); //cl.context = clCreateContext(NULL,1,cl.deviceId,NULL, NULL, &err ); vglClCheckError(err, (char*) "clCreateContext GPU"); cl.commandQueue = clCreateCommandQueue( cl.context, *cl.deviceId, 0, &err ); vglClCheckError( err, (char*) "clCreateCommandQueue" ); err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_EXTENSIONS, msgLen, msg, NULL); printf("%s: %s: CL_DEVICE_EXTENSIONS:\n%s\n", __FILE__, __FUNCTION__, msg); cl_ulong vlong; err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &vlong, NULL); printf("%s: %s: CL_DEVICE_MAX_MEM_ALLOC_SIZE: %5.2f mb\n", __FILE__, __FUNCTION__, vlong/(1024.0f*1024)); err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &vlong, NULL); printf("%s: %s: CL_DEVICE_GLOBAL_MEM_SIZE: %5.2f mb\n", __FILE__, __FUNCTION__, vlong/(1024.0f*1024)); size_t val; err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &val, NULL); printf("%s: %s: CL_DEVICE_IMAGE3D_MAX_DEPTH: %ld px\n", __FILE__, __FUNCTION__, val); err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &val, NULL); printf("%s: %s: CL_DEVICE_IMAGE3D_MAX_HEIGHT: %ld px\n", __FILE__, __FUNCTION__, val); err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &val, NULL); printf("%s: %s: CL_DEVICE_IMAGE3D_MAX_WIDTH: %ld px\n", __FILE__, __FUNCTION__, val); err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), &val, NULL); printf("%s: %s: CL_DEVICE_MAX_PARAMETER_SIZE: %ld bytes\n", __FILE__, __FUNCTION__, val); err = clGetDeviceInfo(cl.deviceId[0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &val, NULL); printf("%s: %s: CL_DEVICE_MAX_WORK_GROUP_SIZE: %ld bytes\n", __FILE__, __FUNCTION__, val); //vglClPrintSupportedImageFormats(); }
bool vglClEqual(VglImage* input1, VglImage* input2) { if(input1->ndim > 2) { fprintf(stderr, "%s: %s: Error: image with more then 2 dimensions not supported. Use vglCl3dEqual instead.\n", __FILE__, __FUNCTION__); return false; } vglCheckContext(input1,VGL_CL_CONTEXT); vglCheckContext(input2,VGL_CL_CONTEXT); cl_int err; cl_mem mobj_equal = NULL; mobj_equal = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, sizeof(char), NULL, &err); vglClCheckError( err, (char*) "clCreateBuffer histogram" ); char e = 200; err = clEnqueueWriteBuffer(cl.commandQueue,mobj_equal,CL_TRUE,0,sizeof(char),&e,0,NULL,NULL); vglClCheckError(err, (char*) "clEnqueueWriteBuffer mobj_arr"); static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL_UTIL/vglClEqual.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { fprintf(stderr, "%s:%s: Error: File %s not found.\n", __FILE__, __FUNCTION__, file_path); exit(1); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglCl3dEqual", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &input1->oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void*) &input2->oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); err = clSetKernelArg( kernel, 2, sizeof( cl_mem ), (void*) &mobj_equal ); vglClCheckError( err, (char*) "clSetKernelArg 2" ); size_t worksize[] = { input1->shape[VGL_WIDTH], input1->shape[VGL_HEIGHT], 0 }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 2, NULL, worksize, 0, 0, 0, 0 ); vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); err = clEnqueueReadBuffer(cl.commandQueue,mobj_equal,CL_TRUE,0,sizeof(char),&e,0,NULL,NULL); vglClCheckError( err, (char*) "clEnqueueReadBuffer" ); return e != 1; }
void vglCl3dGrayLevelTransform(VglImage* input, VglImage* output, int* transformation) { if (input->nChannels > 1 || output->nChannels > 1) { fprintf(stderr, "%s: %s: Error: image with more then 1 channel not supported. Please convert to 1 channel.\n", __FILE__, __FUNCTION__); return; } vglCheckContext(input,VGL_CL_CONTEXT); vglCheckContext(output,VGL_CL_CONTEXT); cl_int err; cl_mem mobj_arr = NULL; mobj_arr = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, 256*sizeof(int), NULL, &err); vglClCheckError(err, (char*) "clCreateBuffer mobj_arr" ); err = clEnqueueWriteBuffer(cl.commandQueue,mobj_arr,CL_TRUE, 0, 256*sizeof(int), transformation, 0, NULL, NULL); vglClCheckError(err, (char*) "clEnqueueWriteBuffer mobj_arr"); static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL_UTIL/vglClHistogramEq.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { fprintf(stderr, "%s:%s: Error: File %s not found.\n", __FILE__, __FUNCTION__, file_path); exit(1); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglCl3dGrayLevelTransform", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &input->oclPtr); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void*) &output->oclPtr); vglClCheckError( err, (char*) "clSetKernelArg 1" ); err = clSetKernelArg( kernel, 2, sizeof( cl_mem ), (void*) &mobj_arr); vglClCheckError( err, (char*) "clSetKernelArg 2" ); size_t worksize[] = { input->shape[VGL_WIDTH], input->shape[VGL_HEIGHT], input->shape[VGL_LENGTH] }; err = clEnqueueNDRangeKernel( cl.commandQueue, kernel, 3, NULL, worksize, 0, 0, 0, 0 ); vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); err = clReleaseMemObject( mobj_arr ); vglClCheckError(err, (char*) "clReleaseMemObject mobj_arr"); }
int* vglClSumPartialHistogram(cl_mem partial_hist, int size, int nchannels) { cl_int err; cl_mem mobj_histogram = NULL; mobj_histogram = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, nchannels*256*sizeof(int), NULL, &err); vglClCheckError(err, (char*) "clCreateBuffer mobj_histogram" ); float l = log10(size)/log10(2); int nsize; if (l - floor(log10(size)/log10(2)) > 0) nsize = l+1; else nsize = l; nsize = pow(2,nsize); static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL_UTIL/vglClHistogram.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { fprintf(stderr, "%s:%s: Error: File %s not found.\n", __FILE__, __FUNCTION__, file_path); exit(1); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglClSumPartialHistogram", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &partial_hist); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void*) &mobj_histogram ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); err = clSetKernelArg( kernel, 2, sizeof( unsigned int ), &size ); vglClCheckError( err, (char*) "clSetKernelArg 2" ); err = clSetKernelArg( kernel, 3, sizeof( int ), &nchannels ); vglClCheckError( err, (char*) "clSetKernelArg 2" ); size_t worksize[] = { 256, size, nchannels }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 2, NULL, worksize, 0, 0, 0, 0 ); vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); int* histogram = (int*) malloc(nchannels*256*sizeof(int)); err = clEnqueueReadBuffer(cl.commandQueue,mobj_histogram,CL_TRUE, 0, nchannels*256*sizeof(int), histogram, 0, NULL, NULL); vglClCheckError(err, (char*) "ReadBuffer histogram"); err = clReleaseMemObject( mobj_histogram ); vglClCheckError(err, (char*) "clReleaseMemObject mobj_histogram"); return histogram; }
cl_mem vglCl3dPartialHistogram(VglImage* img_input) { vglCheckContext(img_input, VGL_CL_CONTEXT); cl_int err; cl_mem mobj_histogram = NULL; mobj_histogram = clCreateBuffer(cl.context, CL_MEM_READ_WRITE, img_input->nChannels*256*img_input->shape[VGL_WIDTH]*sizeof(unsigned int), NULL, &err); vglClCheckError( err, (char*) "clCreateBuffer histogram" ); static cl_program program = NULL; if (program == NULL) { char* file_path = (char*) "CL_UTIL/vglClHistogram.cl"; printf("Compiling %s\n", file_path); std::ifstream file(file_path); if(file.fail()) { fprintf(stderr, "%s:%s: Error: File %s not found.\n", __FILE__, __FUNCTION__, file_path); exit(1); } std::string prog( std::istreambuf_iterator<char>( file ), ( std::istreambuf_iterator<char>() ) ); const char *source_str = prog.c_str(); #ifdef __DEBUG__ printf("Kernel to be compiled:\n%s\n", source_str); #endif program = clCreateProgramWithSource(cl.context, 1, (const char **) &source_str, 0, &err ); vglClCheckError(err, (char*) "clCreateProgramWithSource" ); err = clBuildProgram(program, 1, cl.deviceId, NULL, NULL, NULL ); vglClBuildDebug(err, program); } static cl_kernel kernel = NULL; if (kernel == NULL) { kernel = clCreateKernel( program, "vglCl3dPartialHistogram", &err ); vglClCheckError(err, (char*) "clCreateKernel" ); } err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void*) &img_input->oclPtr ); vglClCheckError( err, (char*) "clSetKernelArg 0" ); err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void*) &mobj_histogram ); vglClCheckError( err, (char*) "clSetKernelArg 1" ); err = clSetKernelArg( kernel, 2, sizeof( int ), &img_input->nChannels ); vglClCheckError( err, (char*) "clSetKernelArg 2" ); if (img_input->ndim == 3){ size_t worksize[] = { img_input->shape[VGL_WIDTH], 1, 1 }; clEnqueueNDRangeKernel( cl.commandQueue, kernel, 1, NULL, worksize, 0, 0, 0, 0 ); vglClCheckError( err, (char*) "clEnqueueNDRangeKernel" ); } else{ printf("dimension different of 3 not supported by this function, try to use another version of the function\n"); } return mobj_histogram; }