//! Print event information for all entries in kernel_events vector void EventList::printKernelEvents() { int numEvents = this->kernel_events.size(); cl_int status; cl_ulong timer; for(int i = 0; i < numEvents; i++) { printf("Kernel event %d: %s\n", i, kernel_events[i].second); status = clGetEventProfilingInfo(this->kernel_events[i].first, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &timer, NULL); cl_errChk(status, "profiling", true); printf("\tENQUEUE: %lu\n", timer); status = clGetEventProfilingInfo(this->kernel_events[i].first, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &timer, NULL); cl_errChk(status, "profiling", true); printf("\tSUBMIT: %lu\n", timer); status = clGetEventProfilingInfo(this->kernel_events[i].first, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &timer, NULL); cl_errChk(status, "profiling", true); printf("\tSTART: %lu\n", timer); status = clGetEventProfilingInfo(this->kernel_events[i].first, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &timer, NULL); cl_errChk(status, "profiling", true); printf("\tEND: %lu\n", timer); } }
int SetProgKernel(cl_program *prog, cl_kernel *ker, cl_context context, char *source_str, size_t source_size, cl_device_id* Devices, int dev_sel, char *kername) { cl_int ret; // Create a program form the Kernel source (string from .cl) *prog = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); cl_errChk(ret,"Error >> clCreateProgramWithSource"); // Build Program // ret = clBuildProgram(*prog, dev_sel, Devices, NULL, NULL, NULL); ret = clBuildProgram(*prog, 0, NULL, NULL, NULL, NULL); //ret = clBuildProgram(*prog, 0, NULL, "-cl-std=CL2.0", NULL, NULL); if (ret != CL_SUCCESS) { size_t len; char buffer[2048]; clGetProgramBuildInfo(*prog, Devices[dev_sel], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); printf("Error Build Prog.\n"); printf("Error code = %d\n", ret); exit(ret); } // Create the OpenCL kernel *ker = clCreateKernel(*prog, kername, &ret); cl_errChk(ret,"Error >> clCreateKernel"); return 0; }
float eventTime(cl_event event,cl_command_queue command_queue){ cl_int error=0; cl_ulong eventStart,eventEnd; DIVIDEND_CL_WRAP(clFinish)(command_queue); error = clGetEventProfilingInfo(event,CL_PROFILING_COMMAND_START, sizeof(cl_ulong),&eventStart,NULL); cl_errChk(error,"ERROR in Event Profiling.",true); error = clGetEventProfilingInfo(event,CL_PROFILING_COMMAND_END, sizeof(cl_ulong),&eventEnd,NULL); cl_errChk(error,"ERROR in Event Profiling.",true); return (float)((eventEnd-eventStart)/1e9); }
/*! \param mem cl_mem object \param mem_size Size of memory in bytes \param flags Optional cl_mem_flags \return Returns a host pointer that points to the mapped region */ void *cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags) { cl_int status; void *ptr; static int eventCnt = 0; cl_event* eventPtr = NULL, event; if(eventsEnabled) { eventPtr = &event; } ptr = (void *)clEnqueueMapBuffer(commandQueue, mem, CL_TRUE, flags, 0, mem_size, 0, NULL, eventPtr, &status); cl_errChk(status, "Error mapping a buffer", true); if(eventsEnabled) { char* eventStr = catStringWithInt("MapBuffer", eventCnt++); events->newIOEvent(*eventPtr, eventStr); } return ptr; }
void cl_executeKernel(cl_kernel kernel, cl_uint work_dim, const size_t* global_work_size, const size_t* local_work_size, const char* description, int identifier) { cl_int status; cl_event* eventPtr = NULL, event; // eventsEnabled = phasechecker(description, identifier, granularity); if(eventsEnabled) { eventPtr = &event; } status = clEnqueueNDRangeKernel(commandQueue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, eventPtr); cl_errChk(status, "Executing kernel", true); if(eventsEnabled) { char* eventString = catStringWithInt(description, identifier); events->newKernelEvent(*eventPtr, eventString); } }
cl_mem *cl_allocTexture(int width, int height, void *data, size_t elementSize, cl_channel_type type) { cl_int status; cl_mem *mem; mem = (cl_mem *) malloc(sizeof(cl_mem)); cl_image_format image_format; image_format.image_channel_order = CL_R; image_format.image_channel_data_type = type; *mem = clCreateImage2D(clGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &image_format, width, height, width*elementSize, // TODO make sure this is good data, &status); if(cl_errChk(status, "creating texture")) { exit(1); } return mem; }
/*! Set an argument for a OpenCL kernel \param kernel The kernel for which the argument is being set \param index The argument index \param size The size of the argument \param data A pointer to the argument */ void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size, void* data) { cl_int status; status = clSetKernelArg(kernel, index, size, data); cl_errChk(status, "Setting kernel arg", true); }
int SetCont(int plt_sel, int dev_sel, cl_context *cont) { // check if the environment is valid // gain the device numbers cl_int ret = clGetDeviceIDs(platforms[plt_sel], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); cl_errChk(ret,"Error >> clGetDeviceIDs"); if (dev_sel > numDevices) { printf("Invalid Device Number\n"); exit(1); } // get the select platform & device ret = clGetDeviceIDs(platforms[plt_sel], CL_DEVICE_TYPE_ALL, numDevices, Devices, NULL); // check the device is a CPU or GPU cl_device_type DeviceTyep; //cl_device_id DeviceID; //DeviceID = Devices[dev_sel]; ret = clGetDeviceInfo(Devices[dev_sel], CL_DEVICE_TYPE, sizeof(DeviceTyep),(void *)&DeviceTyep,NULL); cl_errChk(ret,"Error >> clGetDeviceInfo_DeviceTyep"); if(DeviceTyep == CL_DEVICE_TYPE_GPU) printf("Creating GPU Context\n"); else if (DeviceTyep == CL_DEVICE_TYPE_CPU) printf("Creating CPU Context\n"); else printf("This Context Type not Supported.\n"); // Create a context cl_context_properties prop[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[plt_sel], 0 }; *cont = clCreateContextFromType(prop, (cl_device_type)DeviceTyep, NULL, NULL, &ret); if (*cont == 0) { printf("Cannot create OpenCL context\n"); return 0; } return 0; }
int GetHW() { char local_plat_buf[100]; int i; // Get & Set OpenCL Platforms // get Platform numbers cl_int ret = clGetPlatformIDs(1, NULL, &numPlatforms); cl_errChk(ret,"Error 0>> clGetPlatformIDs"); // get memory to store platform IDs platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id)); // store IDs into memory ret = clGetPlatformIDs(numPlatforms, platforms, NULL); cl_errChk(ret,"Error 1>> clGetPlatformIDs"); // Get OpenCL Platforms & Devices Info. for (i = 0; i < numPlatforms; i++) { // Get Platform Info. ret = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(local_plat_buf), local_plat_buf, NULL); cl_errChk(ret,"Error >> clGetPlatformInfo"); // get Devices numbers ret = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); cl_errChk(ret,"Error >> clGetDeviceIDs"); // get memory to store device IDs Devices = (cl_device_id*)malloc(sizeof(cl_device_id)* numDevices); if (numDevices == 0) { printf("!! There is no device in platform #%d\n", i); exit(0); } else { ret = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices, Devices, NULL); } } return 0; }
/*! \param mem The device pointer to release */ void cl_freeMem(cl_mem mem) { cl_int status; if(mem != NULL) { status = clReleaseMemObject(mem); cl_errChk(status, "Releasing mem object", true); } }
/*! \param mem The program object to release */ void cl_freeProgram(cl_program program) { cl_int status; if(program != NULL) { status = clReleaseProgram(program); cl_errChk(status, "Releasing program object", true); } }
/*! \param mem The kernel object to release */ void cl_freeKernel(cl_kernel kernel) { cl_int status; if(kernel != NULL) { status = clReleaseKernel(kernel); cl_errChk(status, "Releasing kernel object", true); } }
void cl_copyToHost(void* dst, cl_mem src, unsigned mem_size) { cl_int status; status = clEnqueueReadBuffer(clCommandQueue, src, CL_TRUE, 0, mem_size, dst, 0, NULL, NULL); cl_sync(); if(cl_errChk(status, "read buffer")) { exit(1); } }
void cl_freeDevice(cl_mem *mem) { cl_int status; printf("cl_mem:%p\n",*mem); status = clReleaseMemObject(*mem); if(cl_errChk(status, "releasing mem object")) { exit(1); } free(mem); }
/*! Create a kernel from compiled source \param program Compiled OpenCL program \param kernel_name Name of the kernel in the program \return Returns a cl_kernel object for the specified kernel */ cl_kernel cl_createKernel(cl_program program, const char* kernel_name) { cl_kernel kernel; cl_int status; kernel = clCreateKernel(program, kernel_name, &status); cl_errChk(status, "Creating kernel", true); return kernel; }
//! The the name of the device as supplied by the OpenCL implementation char* cl_getPlatformVendor(cl_platform_id platform) { cl_int status; size_t platformInfoSize; char* platformInfoStr = NULL; // Print the name status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, 0, NULL, &platformInfoSize); cl_errChk(status, "Getting platform name", true); platformInfoStr = (char*)alloc(platformInfoSize); status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, platformInfoSize, platformInfoStr, NULL); cl_errChk(status, "Getting platform name", true); return(platformInfoStr); }
/*! Prints out the time taken between the start and end of an event \param event_time */ void cl_KernelTime(cl_event event_time) { cl_int kerneltimer; cl_ulong starttime; cl_ulong endtime; kerneltimer = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &starttime, NULL); if(cl_errChk(kerneltimer, "Error in Start Time \n"))exit(1); kerneltimer = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endtime, NULL); if(cl_errChk(kerneltimer, "Error in End Time \n"))exit(1); unsigned long elapsed = (unsigned long)(endtime - starttime); printf("\tKernel Execution\t%ld ns\n",elapsed); }
/*! \param mem_size Size of memory in bytes \param host_ptr Host pointer that contains the data \return Returns a cl_mem object that points to device memory */ cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr) { cl_mem mem; cl_int status; mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, host_ptr, &status); cl_errChk(status, "Error creating const mem buffer", true); return mem; }
void cl_copyTextureToDevice(cl_mem dst, void* src, int width, int height) { cl_int status; const size_t szTexOrigin[3] = {0, 0, 0}; const size_t szTexRegion[3] = {height, width, 1}; status = clEnqueueWriteImage(clCommandQueue, dst, CL_TRUE, szTexOrigin, szTexRegion, 0, 0, src, 0, NULL, NULL); if(cl_errChk(status, "write buffer texture")) { exit(1); } }
/*! \param mem_size Size of memory in bytes \return Returns a cl_mem object that points to pinned memory on the host */ cl_mem cl_allocBufferPinned(size_t mem_size) { cl_mem mem; cl_int status; mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mem_size, NULL, &status); cl_errChk(status, "Error allocating pinned memory", true); return mem; }
// Queries the supported image formats for the device and prints // them to the screen void printSupportedImageFormats() { cl_uint numFormats; cl_int status; status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats); cl_errChk(status, "getting supported image formats", true); cl_image_format* imageFormats = NULL; imageFormats = (cl_image_format*)alloc(sizeof(cl_image_format)*numFormats); status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D, numFormats, imageFormats, NULL); printf("There are %d supported image formats\n", numFormats); cl_uint orders[]={CL_R, CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_ARGB, CL_BGRA}; char *orderstr[]={"CL_R", "CL_A","CL_INTENSITY", "CL_LUMINANCE", "CL_RG", "CL_RA", "CL_RGB", "CL_RGBA", "CL_ARGB", "CL_BGRA"}; cl_uint types[]={ CL_SNORM_INT8 , CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16, CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT}; char * typesstr[]={ "CL_SNORM_INT8" ,"CL_SNORM_INT16","CL_UNORM_INT8","CL_UNORM_INT16", "CL_UNORM_SHORT_565","CL_UNORM_SHORT_555","CL_UNORM_INT_101010", "CL_SIGNED_INT8","CL_SIGNED_INT16","CL_SIGNED_INT32","CL_UNSIGNED_INT8", "CL_UNSIGNED_INT16","CL_UNSIGNED_INT32","CL_HALF_FLOAT","CL_FLOAT"}; printf("Supported Formats:\n"); for(int i = 0; i < (int)numFormats; i++) { printf("\tFormat %d: ", i); for(int j = 0; j < (int)(sizeof(orders)/sizeof(cl_int)); j++) { if(imageFormats[i].image_channel_order == orders[j]) { printf("%s, ", orderstr[j]); } } for(int j = 0; j < (int)(sizeof(types)/sizeof(cl_int)); j++) { if(imageFormats[i].image_channel_data_type == types[j]) { printf("%s, ", typesstr[j]); } } printf("\n"); } free(imageFormats); }
//! This function will recieve information of where in the program we are as "event_id" void cl_copyToDevice(cl_mem dst, void* src, unsigned mem_size, unsigned int event_id ) { cl_int status; if(event_id == 999) { status = clEnqueueWriteBuffer(clCommandQueue, dst, CL_TRUE, 0, mem_size, src, 0, NULL, NULL); if(cl_errChk(status, "write buffer")) { exit(1); } } }
/*! \param mem cl_mem object \param ptr A host pointer that points to the mapped region */ void cl_unmapBuffer(cl_mem mem, void *ptr) { // TODO It looks like AMD doesn't support profiling unmapping yet. Leaving the // commented code here until it's supported cl_int status; status = clEnqueueUnmapMemObject(commandQueue, mem, ptr, 0, NULL, NULL); cl_errChk(status, "Error unmapping a buffer or image", true); }
/*! Prints out the time taken between the start and end of an event \param event_time */ double cl_computeExecTime(cl_event event_time) { cl_int status; cl_ulong starttime; cl_ulong endtime; double elapsed; status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &starttime, NULL); cl_errChk(status, "profiling start", true); status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &endtime, NULL); cl_errChk(status, "profiling end", true); // Convert to ms elapsed = (double)(endtime-starttime)/1000000.0; return elapsed; }
void cl_TimeStop(cl_event event_time, cl_profiling_info profile_mode, char * event_name) { cl_int kerneltimer; cl_ulong endTime; kerneltimer = clGetEventProfilingInfo(event_time, profile_mode, sizeof(cl_ulong), &endTime, NULL); if(cl_errChk(kerneltimer, "Error in Profiling\n"))exit(1); printf("%s\t%lu\n",event_name,(unsigned long)endTime); }
/*! Prints out the time taken between the start and end of an event.\n Adds synchronization in order to be sure that events have occured otherwise profiling calls will fail \n Shouldnt be used on critical path due to the necessary flushing of the queue \param event_time */ void cl_KernelTimeSync(cl_event event_time) { cl_int kerneltimer; clFlush(cl_getCommandQueue()); clFinish(cl_getCommandQueue()); cl_ulong starttime; cl_ulong endtime; kerneltimer = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &starttime, NULL); if(cl_errChk(kerneltimer, "Error in Start Time \n"))exit(1); kerneltimer = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_END , sizeof(cl_ulong), &endtime, NULL); if(cl_errChk(kerneltimer, "Error in Start Time \n"))exit(1); unsigned long elapsed = (unsigned long)(endtime - starttime); printf("\tTime Elapsed in Kernel is %ld ns\n",elapsed); }
//! The the name of the device as supplied by the OpenCL implementation char* cl_getDeviceName(cl_device_id dev) { cl_int status; size_t devInfoSize; char* devInfoStr = NULL; // If dev is NULL, set it to the default device if(dev == NULL) { dev = device; } // Print the name status = clGetDeviceInfo(dev, CL_DEVICE_NAME, 0, NULL, &devInfoSize); cl_errChk(status, "Getting device name", true); devInfoStr = (char*)alloc(devInfoSize); status = clGetDeviceInfo(dev, CL_DEVICE_NAME, devInfoSize, devInfoStr, NULL); cl_errChk(status, "Getting device name", true); return(devInfoStr); }
//! Get the name of the vendor for a device char* cl_getDeviceVersion(cl_device_id dev) { cl_int status; size_t devInfoSize; char* devInfoStr = NULL; // If dev is NULL, set it to the default device if(dev == NULL) { dev = device; } // Print the vendor status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0, NULL, &devInfoSize); cl_errChk(status, "Getting vendor name", true); devInfoStr = (char*)alloc(devInfoSize); status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devInfoSize, devInfoStr, NULL); cl_errChk(status, "Getting vendor name", true); return devInfoStr; }
cl_mem * cl_allocDeviceConst(unsigned mem_size, void * host_ptr) { cl_mem * mem; mem = (cl_mem *) malloc(sizeof(cl_mem)); cl_int status; *mem = clCreateBuffer(clGPUContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, mem_size, host_ptr, &status); if(cl_errChk(status, "Error creating Const Mem buffer")) { printf("Error Allocating %u BYTES in Const Memory\n", mem_size); exit(1); } return mem; }
/*! \param dst Valid host pointer \param src Device pointer that contains the data \param mem_size Size of data to copy \param blocking Blocking or non-blocking operation */ void cl_copyBufferToHost(void* dst, cl_mem src, size_t mem_size, cl_bool blocking) { static int eventCnt = 0; cl_event* eventPtr = NULL, event; if(eventsEnabled) { eventPtr = &event; } cl_int status; status = clEnqueueReadBuffer(commandQueue, src, blocking, 0, mem_size, dst, 0, NULL, eventPtr); cl_errChk(status, "Reading buffer", true); if(eventsEnabled) { char* eventStr = catStringWithInt("copyBufferToHost", eventCnt++); events->newIOEvent(*eventPtr, eventStr); } }