cl_double mwDeviceEstimateGFLOPs(const DevInfo* di, cl_bool useDouble) { cl_double gflops = 0.0; if (di->devType == CL_DEVICE_TYPE_GPU) { if (mwIsNvidiaGPUDevice(di)) { gflops = mwCUDAEstimateGFLOPs(di, useDouble); } else if (mwIsAMDGPUDevice(di)) { gflops = mwAMDEstimateGFLOPs(di, useDouble); } else { mw_printf("Unhandled GPU vendor '%s' (0x%x)\n", di->vendor, di->vendorID); gflops = 100.0; } } else { mw_printf("Missing flops estimate for device type %s\n", showCLDeviceType(di->devType)); return 1.0; } return gflops; }
cl_bool mwDriverHasHighCPUWaitIssue(CLInfo* ci) { const DevInfo* di = &ci->di; int major = 0, minor = 0, patchLevel = 0; if (mwIsNvidiaGPUDevice(di)) { if (sscanf(di->driver, "%u.%u", &major, &minor) != 2) { return CL_FALSE; } /* Issue started around 270.xx and hasn't been fixed yet. */ return (major >= 270); } else if (mwIsAMDGPUDevice(di)) { /* Sometimes it has other stuff like (VM) after. Not sure what that means. */ if (sscanf(di->driver, "CAL %d.%d.%d", &major, &minor, &patchLevel) != 3) { return CL_FALSE; } /* I think it happened in 11.7 and 11.8 */ return (major == 1 && minor == 4 && patchLevel >= 1457 && patchLevel < 1546); } else { return CL_FALSE; } }
cl_bool mwCALVersionMin(const DevInfo* di, int minMajor, int minMinor, int minPatchLevel) { int major = 0, minor = 0, patchLevel = 0; assert(mwIsAMDGPUDevice(di)); if (sscanf(di->driver, "CAL %d.%d.%d", &major, &minor, &patchLevel) != 3) { return CL_FALSE; } if (major > minMajor) { return CL_TRUE; } else if (major < minMajor) { return CL_FALSE; } else { if (minor > minMinor) { return CL_TRUE; } else if (minor < minMinor) { return CL_FALSE; } else { return (patchLevel >= minPatchLevel); } } }
// The driver version format changed to stop using the CAL version cl_bool mwAMDCLVersionMin(const DevInfo* di, int minMajor, int minPatchLevel) { int major = 0, patchLevel = 0; int matches = sscanf(di->driver, "%d.%d%*s", &major, &patchLevel); assert(mwIsAMDGPUDevice(di)); /* <number>.<number> (VM) where the (VM) is optional. I have no idea what the minor part is. */ if (matches != 2) { return CL_FALSE; } if (major > minMajor) { return CL_TRUE; } else if (major < minMajor) { return CL_FALSE; } else { return (patchLevel > minPatchLevel); } }
static cl_bool usingILKernelIsAcceptable(const CLInfo* ci, const AstronomyParameters* ap, const CLRequest* clr) { const DevInfo* di = &ci->di; static const cl_int maxILKernelStreams = 4; if (!DOUBLEPREC || clr->forceNoILKernel) return CL_FALSE; /* Supporting these unused options with the IL kernel is too much work */ if (ap->number_streams > maxILKernelStreams || ap->aux_bg_profile || ap->number_streams == 0) return CL_FALSE; /* Make sure an acceptable device */ return (mwIsAMDGPUDevice(di) && isILKernelTarget(di) && mwPlatformSupportsAMDOfflineDevices(ci)); }
cl_int mwGetDevInfo(DevInfo* di, cl_device_id dev) { const AMDGPUData* amdData; cl_int err = CL_SUCCESS; di->devID = dev; err |= clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(di->devType), &di->devType, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_NAME, sizeof(di->devName), di->devName, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_VENDOR, sizeof(di->vendor), di->vendor, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_VENDOR_ID, sizeof(cl_uint), &di->vendorID, NULL); err |= clGetDeviceInfo(dev, CL_DRIVER_VERSION, sizeof(di->driver), di->driver, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(di->version), di->version, NULL); //err |= clGetDeviceInfo(dev, CL_DEVICE_OPENCL_C_VERSION, sizeof(di->clCVer), di->clCVer, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_ENDIAN_LITTLE, sizeof(cl_bool), &di->littleEndian, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(cl_bool), &di->errCorrect, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &di->imgSupport, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &di->addrBits, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &di->maxCompUnits, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), &di->clockFreq, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &di->memSize, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &di->maxMemAlloc, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(cl_ulong), &di->gMemCache, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint), &di->cachelineSize, NULL); //err |= clGetDeviceInfo(dev, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(cl_ulong), &unifiedMem, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type), &di->localMemType, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config), &di->doubleFPConfig, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &di->floatFPConfig, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &di->localMemSize, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(cl_uint), &di->maxConstArgs, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &di->maxConstBufSize, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), &di->maxParamSize, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &di->maxWorkGroupSize, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &di->maxWorkItemDim, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(di->maxWorkItemSizes), di->maxWorkItemSizes, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &di->memBaseAddrAlign, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, sizeof(cl_uint), &di->minAlignSize, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t), &di->timerRes, NULL); err |= clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, sizeof(di->exts), &di->exts, NULL); di->computeCapabilityMajor = di->computeCapabilityMinor = 0; di->warpSize = 0; if (err == CL_SUCCESS) { if (strstr(di->exts, "cl_nv_device_attribute_query") != NULL) { err |= clGetDeviceInfo(dev, CL_DEVICE_WARP_SIZE_NV, sizeof(di->warpSize), &di->warpSize, NULL); err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &di->computeCapabilityMajor, NULL); err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &di->computeCapabilityMinor, NULL); } else { if (di->devType == CL_DEVICE_TYPE_CPU) { di->warpSize = 1; } else if (di->devType == CL_DEVICE_TYPE_GPU) { /* FIXME: How do I get this on AMD? It's 64 for all of * the high end stuff, but 32 for lower. I think it's * 64 for all the GPUs that do have doubles */ di->warpSize = 64; } else { mw_printf("Unknown device type, using warp size = 1\n"); di->warpSize = 1; } } } di->nonOutput = mwDeviceIsNonOutput(di); di->hasGraphicsQOS = mwDeviceHasGraphicsQOS(di); if (mwIsNvidiaGPUDevice(di)) { di->aluPerCU = mwCUDACoresPerComputeUnit(di); di->doubleFrac = mwCUDAEstimateDoubleFrac(di); di->calTarget = MW_CAL_TARGET_INVALID; if (strstr(di->exts, "cl_nv_device_attribute_query") != NULL) { err |= clGetDeviceInfo(dev, CL_DEVICE_WARP_SIZE_NV, sizeof(di->warpSize), &di->warpSize, NULL); err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &di->computeCapabilityMajor, NULL); err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &di->computeCapabilityMinor, NULL); } } else if (mwIsAMDGPUDevice(di)) { amdData = mwLookupAMDGPUInfo(di); di->aluPerCU = amdData->aluPerCU; di->doubleFrac = amdData->doubleFrac; di->calTarget = amdData->target; di->warpSize = amdData->wavefrontSize; } if (di->warpSize == 0) { mw_printf("Unknown device type, using warp size = 1\n"); di->warpSize = 1; } if (err != CL_SUCCESS) { mwPerrorCL(err, "Error getting device information"); } else { di->doubleExts = mwGetDoubleExts(di->exts); } return err; }