void CLContext::finish( cl_command_queue& command_queue ) { cl_int status; status = clFinish( commandQueue ); if(!checkVal( status, CL_SUCCESS, "clFlush failed.")) exit(1); }
void CLContext::setKernelArg( const cl_kernel& kernel, int argnum, int size, void* arg ) { cl_int status; status = clSetKernelArg( kernel, argnum, size, arg); if(!checkVal( status, CL_SUCCESS, "clSetKernelArg failed. (updatedPos)")) exit(1); }
void CLContext::createKernel( cl_kernel& kernel, const char* kname ) { cl_int status; kernel = clCreateKernel( program, kname, &status); if(!checkVal( status, CL_SUCCESS, "clCreateKernel failed.")) exit(1); }
// API void CLContext::createBuffer( cl_mem& target, int size, cl_float* floatP, int flags ) { cl_int status; target = clCreateBuffer( context, flags, size, floatP, &status); if (!checkVal( status, CL_SUCCESS, "clCreateBuffer failed. (updatePos)")) exit(1); }
void CLContext::enqueueReadBuffer( cl_command_queue& command_queue, cl_mem& buffer, cl_bool blocking_read, size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) { cl_int status; status = clEnqueueReadBuffer( command_queue, buffer, blocking_read, offset, cb, ptr, num_events_in_wait_list, event_wait_list, event); if(!checkVal( status, CL_SUCCESS, "clEnqueueReadBuffer failed.")) exit(1); }
void CLContext::enqueueNDRangeKernel( cl_command_queue& command_queue, const cl_kernel& kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event ) { cl_int status; status = clEnqueueNDRangeKernel( command_queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event); if(!checkVal( status, CL_SUCCESS, "clEnqueueNDRangeKernel failed.")) exit(1); }
void CLContext::waitForEvents( cl_uint num_events, const cl_event *event_list ) { cl_int status; status = clWaitForEvents(num_events, event_list); if(!checkVal( status, CL_SUCCESS, "clWaitForEvents failed.")) exit(1); clReleaseEvent(events[0]); }
void CLContext::checkLocalMemsize( const cl_kernel& kernel ) { cl_int status; cl_ulong usedLocalMemory; // Used local memory status = clGetKernelWorkGroupInfo(kernel, devices[0], CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &usedLocalMemory, NULL); if( !checkVal( status, CL_SUCCESS, "clGetKernelWorkGroupInfo CL_KERNEL_LOCAL_MEM_SIZE failed." ) ) exit(1); if( usedLocalMemory > totalLocalMemory ) { std::cout << "Unsupported: Insufficient local memory on device." << std::endl; exit(1); } }
void CLContext::checkWorkGroupsize( const cl_kernel& kernel ) { cl_int status; size_t kernelWorkGroupSize; // Group size returned by kernel status = clGetKernelWorkGroupInfo(kernel, devices[0], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0); if(!checkVal( status, CL_SUCCESS, "clGetKernelWorkGroupInfo CL_KERNEL_COMPILE_WORK_GROUP_SIZE failed.")) exit(1); if(groupSize > kernelWorkGroupSize) { std::cout << "Out of Resources!" << std::endl; std::cout << "Group Size specified : " << groupSize << std::endl; std::cout << "Max Group Size supported on the kernel : " << kernelWorkGroupSize<<std::endl; std::cout << "Falling back to " << kernelWorkGroupSize << std::endl; groupSize = kernelWorkGroupSize; } }
int MersenneTwister::setupCL(void) { cl_int status = 0; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); if(checkVal(status, CL_SUCCESS, "clCreateContextFromType failed.")) { return SDK_FAILURE; } // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, 0, "getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, 0, "SDKDeviceInfo::setDeviceInfo() failed"); { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], prop, &status); if(checkVal(status, 0, "clCreateCommandQueue failed.")) { return SDK_FAILURE; } } // Set Persistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if(sampleArgs->isAmdPlatform()) { inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; } seedsBuf = clCreateBuffer(context, inMemFlags, width * height * sizeof(cl_float4), 0, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (seedsBuf)"); resultBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * height * sizeof(cl_float4) * mulFactor, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (resultBuf)"); cl_event writeEvt; // Enqueue write to seedsBuf status = clEnqueueWriteBuffer(commandQueue, seedsBuf, CL_FALSE, 0, width * height * sizeof(cl_float4), seeds, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer failed. (seedsBuf)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = waitForEventAndRelease(&writeEvt); CHECK_ERROR(status,SDK_SUCCESS, "WaitForEventAndRelease(inMapEvt1) Failed"); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("MersenneTwister_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string("-x clc++ "); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name kernel = clCreateKernel(program, "gaussianRand", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); return SDK_SUCCESS; }
// SETUP int CLContext::setupCL() { cl_int status = CL_SUCCESS; cl_device_type dType; int gpu = 1; if(gpu == 0) dType = CL_DEVICE_TYPE_CPU; else //deviceType = "gpu" dType = CL_DEVICE_TYPE_GPU; /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. <----- LOL check out the amd propaganda */ cl_uint numPlatforms; cl_platform_id platform = NULL; status = clGetPlatformIDs(0, NULL, &numPlatforms); if(!checkVal(status, CL_SUCCESS, "clGetPlatformIDs failed.")) return CL_FAILURE; if (0 < numPlatforms) { cl_platform_id* platforms = new cl_platform_id[numPlatforms]; status = clGetPlatformIDs(numPlatforms, platforms, NULL); if(!checkVal(status, CL_SUCCESS, "clGetPlatformIDs failed.")) return CL_FAILURE; for (unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); if(!checkVal(status, CL_SUCCESS, "clGetPlatformInfo failed.")) return CL_FAILURE; platform = platforms[i]; if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) break; } delete[] platforms; } /* * If we could find our platform, use it. Otherwise pass a NULL and get whatever the * implementation thinks we should be using. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; /* Use NULL for backward compatibility */ cl_context_properties* cprops = (NULL == platform) ? NULL : cps; context = clCreateContextFromType( cprops, dType, NULL, NULL, &status); if(!checkVal( status, CL_SUCCESS, "clCreateContextFromType failed.")) return CL_FAILURE; size_t deviceListSize; /* First, get the size of device list data */ status = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize); if(!checkVal( status, CL_SUCCESS, "clGetContextInfo failed.")) return CL_FAILURE; /* Now allocate memory for device list based on the size we got earlier */ devices = (cl_device_id*)malloc(deviceListSize); if(devices==NULL) { cout << "Failed to allocate memory (devices)." << endl; return CL_FAILURE; } /* Now, get the device list data */ status = clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, devices, NULL); if(!checkVal( status, CL_SUCCESS, "clGetContextInfo failed.")) return CL_FAILURE; /* Create command queue */ commandQueue = clCreateCommandQueue( context, devices[0], 0, &status); if(!checkVal( status, CL_SUCCESS, "clCreateCommandQueue failed.")) return CL_FAILURE; /* Get Device specific Information */ status = clGetDeviceInfo( devices[0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void*)&maxWorkGroupSize, NULL); if(!checkVal( status, CL_SUCCESS, "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed.")) return CL_FAILURE; status = clGetDeviceInfo( devices[0], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void*)&maxDimensions, NULL); if(!checkVal( status, CL_SUCCESS, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed.")) return CL_FAILURE; maxWorkItemSizes = (size_t *)malloc(maxDimensions * sizeof(unsigned int)); status = clGetDeviceInfo( devices[0], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, (void*)maxWorkItemSizes, NULL); if(!checkVal( status, CL_SUCCESS, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed.")) return CL_FAILURE; status = clGetDeviceInfo( devices[0], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), (void *)&totalLocalMemory, NULL); if(!checkVal( status, CL_SUCCESS, "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZE failed.")) return CL_FAILURE; /* * Create and initialize memory objects */ /* create a CL program using the kernel source */ string content; fileH.open( "critterding.cl", content ); const char * source = content.c_str(); size_t sourceSize[] = { strlen(source) }; program = clCreateProgramWithSource( context, 1, &source, sourceSize, &status); if(!checkVal( status, CL_SUCCESS, "clCreateProgramWithSource failed.")) return CL_FAILURE; /* create a cl program executable for all the devices specified */ status = clBuildProgram( program, 1, &devices[0], NULL, NULL, NULL); if(status != CL_SUCCESS) { if(status == CL_BUILD_PROGRAM_FAILURE) { cl_int logStatus; char * buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo (program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize); if(!checkVal( logStatus, CL_SUCCESS, "clGetProgramBuildInfo failed.")) return CL_FAILURE; buildLog = (char*)malloc(buildLogSize); if(buildLog == NULL) { cout << "Failed to allocate host memory. (buildLog)" << endl; return CL_FAILURE; } memset(buildLog, 0, buildLogSize); logStatus = clGetProgramBuildInfo (program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL); if(!checkVal( logStatus, CL_SUCCESS, "clGetProgramBuildInfo failed.")) { free(buildLog); return CL_FAILURE; } std::cout << " \n\t\t\tBUILD LOG\n"; std::cout << " ************************************************\n"; std::cout << buildLog << std::endl; std::cout << " ************************************************\n"; free(buildLog); } if(!checkVal( status, CL_SUCCESS, "clBuildProgram failed.")) return CL_FAILURE; } return CL_SUCCESS; }