int MotionDetector::setupKernel(std::string name){ cl_int status = CL_SUCCESS; // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string(name+"_Kernel.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if (sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if (sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } int retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, 0, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name char* charname = &name[0]; kernl = clCreateKernel( program, charname, &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); status = kernelInfo.setKernelWorkGroupInfo(kernl, devices[sampleArgs->deviceId]); CHECK_ERROR(status, SDK_SUCCESS, "kernelInfo.setKernelWorkGroupInfo() failed"); return SDK_SUCCESS; }
int AtomicCounters::setupCL(void) { cl_int status = 0; cl_device_type dType; if (sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else // deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if (sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed."); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed."); cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0}; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed "); // Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); // Check device extensions if (!strstr(deviceInfo.extensions, "cl_ext_atomic_counters_32")) { OPENCL_EXPECTED_ERROR( "Device does not support cl_ext_atomic_counters_32 extension!"); } if (!strstr(deviceInfo.extensions, "cl_khr_local_int32_base_atomics")) { OPENCL_EXPECTED_ERROR( "Device does not support cl_khr_local_int32_base_atomics extension!"); } // Get OpenCL device version std::string deviceVersionStr = std::string(deviceInfo.deviceVersion); size_t vStart = deviceVersionStr.find(" ", 0); size_t vEnd = deviceVersionStr.find(" ", vStart + 1); std::string vStrVal = deviceVersionStr.substr(vStart + 1, vEnd - vStart - 1); // Check of OPENCL_C_VERSION if device version is 1.1 or later #ifdef CL_VERSION_1_1 if (deviceInfo.openclCVersion) { // Exit if OpenCL C device version is 1.0 deviceVersionStr = std::string(deviceInfo.openclCVersion); vStart = deviceVersionStr.find(" ", 0); vStart = deviceVersionStr.find(" ", vStart + 1); vEnd = deviceVersionStr.find(" ", vStart + 1); vStrVal = deviceVersionStr.substr(vStart + 1, vEnd - vStart - 1); if (vStrVal.compare("1.0") <= 0) { OPENCL_EXPECTED_ERROR( "Unsupported device! Required CL_DEVICE_OPENCL_C_VERSION as 1.1"); } } else { OPENCL_EXPECTED_ERROR( "Unsupported device! Required CL_DEVICE_OPENCL_C_VERSION as 1.1"); } #else OPENCL_EXPECTED_ERROR( "Unsupported device! Required CL_DEVICE_OPENCL_C_VERSION as 1.1"); #endif // Setup application data if (setupAtomicCounters() != SDK_SUCCESS) { return SDK_FAILURE; } cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], props, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed(commandQueue)"); // Set Persistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if (sampleArgs->isAmdPlatform()) { inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; } // Create buffer for input array inBuf = clCreateBuffer(context, inMemFlags, length * sizeof(cl_uint), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.(inBuf)"); // Set up data for input array cl_event writeEvt; status = clEnqueueWriteBuffer(commandQueue, inBuf, CL_FALSE, 0, length * sizeof(cl_uint), input, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer(inBuf) failed.."); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush(commandQueue) failed."); counterOutBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.(counterOutBuf)."); globalOutBuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed.(globalOutBuf)."); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("AtomicCounters_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if (sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if (sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // ConstantBuffer bandwidth from single access counterKernel = clCreateKernel(program, "atomicCounters", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(counterKernel)."); globalKernel = clCreateKernel(program, "globalAtomics", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel(globalKernel) failed."); status = kernelInfoC.setKernelWorkGroupInfo(counterKernel, devices[sampleArgs->deviceId]); CHECK_OPENCL_ERROR(status, "kernelInfo.setKernelWorkGroupInfo failed"); status = kernelInfoG.setKernelWorkGroupInfo(globalKernel, devices[sampleArgs->deviceId]); CHECK_OPENCL_ERROR(status, "kernelInfo.setKernelWorkGroupInfo failed"); if (counterWorkGroupSize > kernelInfoC.kernelWorkGroupSize) { if (!sampleArgs->quiet) { std::cout << "Out of Resources!" << std::endl; std::cout << "Group Size specified : " << counterWorkGroupSize << std::endl; std::cout << "Max Group Size supported on the kernel(readKernel) : " << kernelInfoC.kernelWorkGroupSize << std::endl; std::cout << "Falling back to " << kernelInfoC.kernelWorkGroupSize << std::endl; } counterWorkGroupSize = kernelInfoC.kernelWorkGroupSize; } if (globalWorkGroupSize > kernelInfoG.kernelWorkGroupSize) { if (!sampleArgs->quiet) { std::cout << "Out of Resources!" << std::endl; std::cout << "Group Size specified : " << globalWorkGroupSize << std::endl; std::cout << "Max Group Size supported on the kernel(writeKernel) : " << kernelInfoG.kernelWorkGroupSize << std::endl; std::cout << "Falling back to " << kernelInfoG.kernelWorkGroupSize << std::endl; } globalWorkGroupSize = kernelInfoG.kernelWorkGroupSize; } // Wait for event and release event status = waitForEventAndRelease(&writeEvt); CHECK_OPENCL_ERROR(status, "waitForEventAndRelease(writeEvt) failed."); return SDK_SUCCESS; }
int DwtHaar1D::setupCL(void) { if(iterations < 1) { std::cout<<"Error, iterations cannot be 0 or negative. Exiting..\n"; exit(0); } cl_int status = 0; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //sampleArgs->deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); // If we could find our platform, use it. Otherwise use just available platform. cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], 0, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, 0, "SDKDeviceInfo::setDeviceInfo() failed"); // Set Presistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if(sampleArgs->isAmdPlatform()) { inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; } inDataBuf = clCreateBuffer(context, inMemFlags, sizeof(cl_float) * signalLength, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (inDataBuf)"); dOutDataBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, signalLength * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (dOutDataBuf)"); dPartialOutDataBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, signalLength * sizeof(cl_float), NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (dPartialOutDataBuf)"); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("DwtHaar1D_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, 0, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name kernel = clCreateKernel(program, "dwtHaar1D", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); status = kernelInfo.setKernelWorkGroupInfo(kernel, devices[sampleArgs->deviceId]); CHECK_ERROR(status, SDK_SUCCESS, " setKernelWorkGroupInfo() failed"); return SDK_SUCCESS; }
int ComputeBench::setupCL(void) { cl_int status = 0; cl_device_type dType; if (sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if (sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); std::string deviceStr(deviceInfo.deviceVersion); size_t vStart = deviceStr.find(" ", 0); size_t vEnd = deviceStr.find(" ", vStart + 1); std::string vStrVal = deviceStr.substr(vStart + 1, vEnd - vStart - 1); // OpenCL 1.1 has inbuilt support for vec3 data types if (vec3 == true) { OPENCL_EXPECTED_ERROR("Device doesn't support built-in 3 component vectors!"); } // The block is to move the declaration of prop closer to its use /* Note: Using deprecated clCreateCommandQueue as CL_QUEUE_PROFILING_ENABLE flag not currently working ***with clCreateCommandQueueWithProperties*/ cl_command_queue_properties prop = 0; prop |= CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], prop, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); if (sampleArgs->isLoadBinaryEnabled()) { // Always assuming kernel was dumped for vector-width 1 if (vectorSize != 0) { std::cout << "Ignoring specified vector-width. Assuming kernel was dumped for vector-width 1" << std::endl; } vectorSize = 1; } else { // If vector-size is not specified in the command-line, choose the preferred size for the device if (vectorSize == 0) { vectorSize = deviceInfo.preferredFloatVecWidth; } else if (vectorSize == 3) { //Make vectorSize as 4 if -v option is 3. //This memory alignment is required as per OpenCL for type3 vectors vec3 = true; vectorSize = 4; } else if ((1 != vectorSize) && (2 != vectorSize) && (4 != vectorSize) && (8 != vectorSize) && (16 != vectorSize)) { std::cout << "The vectorsize can only be one of 1,2,3(4),4,8,16!" << std::endl; return SDK_FAILURE; } } outputKadd = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof (cl_float) * vectorSize * length, 0, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (outputKadd)"); // create a CL program using the kernel source char buildOption[512]; if (vectorSize == 1) { sprintf(buildOption, "-D DATATYPE=uint -D DATATYPE2=uint4 "); //sprintf(buildOption, "-D DATATYPE=float -D DATATYPE2=float4 "); } else { sprintf(buildOption, "-D DATATYPE=uint%d -D DATATYPE2=uint%d ", (vec3 == true) ? 3 : vectorSize, (vec3 == true) ? 3 : vectorSize); //sprintf(buildOption, "-D DATATYPE=float%d -D DATATYPE2=float%d ", (vec3 == true) ? 3 : vectorSize, (vec3 == true) ? 3 : vectorSize); } strcat(buildOption, "-D IDXTYPE=uint "); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("ComputeBench.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(buildOption); if (sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if (sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // Global memory bandwidth from read-single access kernel[0] = clCreateKernel(program, "Kadd", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(Kadd)"); return SDK_SUCCESS; }
int URNG::setupCL() { cl_int status = CL_SUCCESS; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); // If we could find our platform, use it. Otherwise use just available platform. cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR( status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; commandQueue = clCreateCommandQueue( context, devices[sampleArgs->deviceId], prop, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); } //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); // Create and initialize memory objects // Set Presistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if(sampleArgs->isAmdPlatform()) { inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; } // Create memory object for input Image inputImageBuffer = clCreateBuffer( context, inMemFlags, width * height * pixelSize, 0, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (inputImageBuffer)"); // Create memory objects for output Image outputImageBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, width * height * pixelSize, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (outputImageBuffer)"); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("URNG_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name kernel = clCreateKernel( program, "noise_uniform", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); status = kernelInfo.setKernelWorkGroupInfo(kernel, devices[sampleArgs->deviceId]); CHECK_ERROR(status, SDK_SUCCESS, "setKernelWorkGroupInfo() failed"); if((blockSizeX * blockSizeY) > kernelInfo.kernelWorkGroupSize) { if(!sampleArgs->quiet) { std::cout << "Out of Resources!" << std::endl; std::cout << "Group Size specified : " << blockSizeX * blockSizeY << std::endl; std::cout << "Max Group Size supported on the kernel : " << kernelInfo.kernelWorkGroupSize << std::endl; std::cout << "Falling back to " << kernelInfo.kernelWorkGroupSize << std::endl; } // Three possible cases if(blockSizeX > kernelInfo.kernelWorkGroupSize) { blockSizeX = kernelInfo.kernelWorkGroupSize; blockSizeY = 1; } } return SDK_SUCCESS; }
int Histogram::setupCL(void) { cl_int status = 0; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //sampleArgs->deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); // Create command queue commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], 0, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); if(scalar && vector)//if both options are specified { std::cout<<"Ignoring --scalar and --vector option and using the default vector width of the device"<<std::endl; vectorWidth = deviceInfo.preferredFloatVecWidth; } else if(scalar) { vectorWidth = 1; } else if(vector) { vectorWidth = 4; } else //if no option is specified. { vectorWidth = deviceInfo.preferredFloatVecWidth; } if(!sampleArgs->quiet) { if(vectorWidth == 1) { std::cout<<"Selecting scalar kernel\n"<<std::endl; } else { std::cout<<"Selecting vector kernel\n"<<std::endl; } } subHistgCnt = (width * height) / (groupSize * groupIterations); // Check if byte-addressable store is supported if(!strstr(deviceInfo.extensions, "cl_khr_byte_addressable_store")) { byteRWSupport = false; OPENCL_EXPECTED_ERROR("Device does not support cl_khr_byte_addressable_store extension!"); } dataBuf = clCreateBuffer( context, CL_MEM_READ_ONLY, sizeof(cl_uint) * width * height, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (dataBuf)"); midDeviceBinBuf = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof(cl_uint) * binSize * subHistgCnt, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (midDeviceBinBuf)"); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("Histogram_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, 0, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name const char *kernelName = (vectorWidth == 4)? "histogram256_vector": "histogram256_scalar"; kernel = clCreateKernel(program, kernelName, &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); return SDK_SUCCESS; }
int LDSBandwidth::setupCL(void) { cl_int status = 0; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); std::string deviceStr(deviceInfo.deviceVersion); size_t vStart = deviceStr.find(" ", 0); size_t vEnd = deviceStr.find(" ", vStart + 1); std::string vStrVal = deviceStr.substr(vStart + 1, vEnd - vStart - 1); #ifdef CL_VERSION_1_1 if(vStrVal.compare("1.0") > 0) { char openclVersion[1024]; status = clGetDeviceInfo(devices[sampleArgs->deviceId], CL_DEVICE_OPENCL_C_VERSION, sizeof(openclVersion), openclVersion, 0); CHECK_OPENCL_ERROR(status, "clGetDeviceInfo failed."); std::string tempStr(openclVersion); size_t dotPos = tempStr.find_first_of("."); size_t spacePos = tempStr.find_last_of(" "); tempStr = tempStr.substr(dotPos + 1, spacePos - dotPos); int minorVersion = atoi(tempStr.c_str()); // OpenCL 1.1 has inbuilt support for vec3 data types if(minorVersion < 1 && vec3 == true) { OPENCL_EXPECTED_ERROR("Device doesn't support built-in 3 component vectors!"); } } else { // OpenCL 1.1 has inbuilt support for vec3 data types if(vec3 == true) { OPENCL_EXPECTED_ERROR("Device doesn't support built-in 3 component vectors!"); } } #else // OpenCL 1.1 has inbuilt support for vec3 data types if(vec3 == true) { OPENCL_EXPECTED_ERROR("Device doesn't support built-in 3 component vectors!"); } #endif { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; prop |= CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], prop, &status); CHECK_OPENCL_ERROR(status, "clCreateCommandQueue failed."); } outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * vectorSize * length, 0, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (outputBuffer)"); // create a CL program using the kernel source char buildOption[64]; if(vectorSize == 1) { sprintf(buildOption, "-D DATATYPE=float "); } else { sprintf(buildOption, "-D DATATYPE=float%d ", vec3 == true ? 3 : vectorSize); } buildProgramData buildData; buildData.kernelName = std::string("LDSBandwidth_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(buildOption); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // ConstantBuffer bandwidth from single access kernel[0] = clCreateKernel(program, "LDSBandwidth_single", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(LDSBandwidth_single)"); // ConstantBuffer bandwidth from linear access kernel[1] = clCreateKernel(program, "LDSBandwidth_linear", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(LDSBandwidth_linear)"); kernel[2] = clCreateKernel(program, "LDSBandwidth_single_verify", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(LDSBandwidth_single_verify)"); kernel[3] = clCreateKernel(program, "LDSBandwidth_linear_verify", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(LDSBandwidth_linear_verify)"); kernel[4] = clCreateKernel(program, "LDSBandwidth_write_linear", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(LDSBandwidth_linear_verify)"); kernel[5] = clCreateKernel(program, "LDSBandwidth_write_linear_verify", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(LDSBandwidth_linear_verify)"); return SDK_SUCCESS; }
int MersenneTwister::setupCL(void) { cl_int status = 0; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType(cps, dType, NULL, NULL, &status); if(checkVal(status, CL_SUCCESS, "clCreateContextFromType failed.")) { return SDK_FAILURE; } // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, 0, "getDevices() failed"); //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, 0, "SDKDeviceInfo::setDeviceInfo() failed"); { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; commandQueue = clCreateCommandQueue(context, devices[sampleArgs->deviceId], prop, &status); if(checkVal(status, 0, "clCreateCommandQueue failed.")) { return SDK_FAILURE; } } // Set Persistent memory only for AMD platform cl_mem_flags inMemFlags = CL_MEM_READ_ONLY; if(sampleArgs->isAmdPlatform()) { inMemFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; } seedsBuf = clCreateBuffer(context, inMemFlags, width * height * sizeof(cl_float4), 0, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (seedsBuf)"); resultBuf = clCreateBuffer(context, CL_MEM_WRITE_ONLY, width * height * sizeof(cl_float4) * mulFactor, NULL, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (resultBuf)"); cl_event writeEvt; // Enqueue write to seedsBuf status = clEnqueueWriteBuffer(commandQueue, seedsBuf, CL_FALSE, 0, width * height * sizeof(cl_float4), seeds, 0, NULL, &writeEvt); CHECK_OPENCL_ERROR(status, "clEnqueueWriteBuffer failed. (seedsBuf)"); status = clFlush(commandQueue); CHECK_OPENCL_ERROR(status, "clFlush failed."); status = waitForEventAndRelease(&writeEvt); CHECK_ERROR(status,SDK_SUCCESS, "WaitForEventAndRelease(inMapEvt1) Failed"); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("MersenneTwister_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string("-x clc++ "); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name kernel = clCreateKernel(program, "gaussianRand", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); return SDK_SUCCESS; }
int FastWalshTransform::setupCL(void) { cl_int status = 0; cl_device_type dType; if(sampleArgs->deviceType.compare("cpu") == 0) { dType = CL_DEVICE_TYPE_CPU; } else //sampleArgs->deviceType = "gpu" { dType = CL_DEVICE_TYPE_GPU; if(sampleArgs->isThereGPU() == false) { std::cout << "GPU not found. Falling back to CPU device" << std::endl; dType = CL_DEVICE_TYPE_CPU; } } /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = getPlatform(platform, sampleArgs->platformId, sampleArgs->isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform() failed"); // Display available devices. retValue = displayDevices(platform, dType); CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices() failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; context = clCreateContextFromType( cps, dType, NULL, NULL, &status); CHECK_OPENCL_ERROR( status, "clCreateContextFromType failed."); // getting device on which to run the sample status = getDevices(context, &devices, sampleArgs->deviceId, sampleArgs->isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "getDevices() failed"); { // The block is to move the declaration of prop closer to its use cl_command_queue_properties prop = 0; commandQueue = clCreateCommandQueue( context, devices[sampleArgs->deviceId], prop, &status); CHECK_OPENCL_ERROR( status, "clCreateCommandQueue failed."); } //Set device info of given cl_device_id retValue = deviceInfo.setDeviceInfo(devices[sampleArgs->deviceId]); CHECK_ERROR(retValue, SDK_SUCCESS, "SDKDeviceInfo::setDeviceInfo() failed"); inputBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_float) * length, 0, &status); CHECK_OPENCL_ERROR(status, "clCreateBuffer failed. (inputBuffer)"); // create a CL program using the kernel source buildProgramData buildData; buildData.kernelName = std::string("FastWalshTransform_Kernels.cl"); buildData.devices = devices; buildData.deviceId = sampleArgs->deviceId; buildData.flagsStr = std::string(""); if(sampleArgs->isLoadBinaryEnabled()) { buildData.binaryName = std::string(sampleArgs->loadBinary.c_str()); } if(sampleArgs->isComplierFlagsSpecified()) { buildData.flagsFileName = std::string(sampleArgs->flags.c_str()); } retValue = buildOpenCLProgram(program, context, buildData); CHECK_ERROR(retValue, SDK_SUCCESS, "buildOpenCLProgram() failed"); // get a kernel object handle for a kernel with the given name kernel = clCreateKernel(program, "fastWalshTransform", &status); CHECK_OPENCL_ERROR(status, "clCreateKernel failed."); return SDK_SUCCESS; }