cl_double mwDeviceEstimateGFLOPs(const DevInfo* di, cl_bool useDouble)
{
    cl_double gflops = 0.0;

    if (di->devType == CL_DEVICE_TYPE_GPU)
    {
        if (mwIsNvidiaGPUDevice(di))
        {
            gflops = mwCUDAEstimateGFLOPs(di, useDouble);
        }
        else if (mwIsAMDGPUDevice(di))
        {
            gflops = mwAMDEstimateGFLOPs(di, useDouble);
        }
        else
        {
            mw_printf("Unhandled GPU vendor '%s' (0x%x)\n", di->vendor, di->vendorID);
            gflops = 100.0;
        }
    }
    else
    {
        mw_printf("Missing flops estimate for device type %s\n", showCLDeviceType(di->devType));
        return 1.0;
    }

    return gflops;
}
cl_bool mwDriverHasHighCPUWaitIssue(CLInfo* ci)
{
    const DevInfo* di = &ci->di;
    int major = 0, minor = 0, patchLevel = 0;

    if (mwIsNvidiaGPUDevice(di))
    {
        if (sscanf(di->driver, "%u.%u", &major, &minor) != 2)
        {
            return CL_FALSE;
        }

        /* Issue started around 270.xx and hasn't been fixed yet. */
        return (major >= 270);
    }
    else if (mwIsAMDGPUDevice(di))
    {
        /* Sometimes it has other stuff like (VM) after. Not sure what that means. */
        if (sscanf(di->driver, "CAL %d.%d.%d", &major, &minor, &patchLevel) != 3)
        {
            return CL_FALSE;
        }

        /* I think it happened in 11.7 and 11.8 */
        return (major == 1 && minor == 4 && patchLevel >= 1457 && patchLevel < 1546);
    }
    else
    {
        return CL_FALSE;
    }
}
cl_bool mwCALVersionMin(const DevInfo* di, int minMajor, int minMinor, int minPatchLevel)
{
    int major = 0, minor = 0, patchLevel = 0;

    assert(mwIsAMDGPUDevice(di));

    if (sscanf(di->driver, "CAL %d.%d.%d", &major, &minor, &patchLevel) != 3)
    {
        return CL_FALSE;
    }

    if (major > minMajor)
    {
        return CL_TRUE;
    }
    else if (major < minMajor)
    {
        return CL_FALSE;
    }
    else
    {
        if (minor > minMinor)
        {
            return CL_TRUE;
        }
        else if (minor < minMinor)
        {
            return CL_FALSE;
        }
        else
        {
            return (patchLevel >= minPatchLevel);
        }
    }
}
// The driver version format changed to stop using the CAL version
cl_bool mwAMDCLVersionMin(const DevInfo* di, int minMajor, int minPatchLevel)
{
    int major = 0, patchLevel = 0;
    int matches = sscanf(di->driver, "%d.%d%*s", &major, &patchLevel);

    assert(mwIsAMDGPUDevice(di));

    /* <number>.<number> (VM)
       where the (VM) is optional.
       I have no idea what the minor part is.
    */

    if (matches != 2)
    {
        return CL_FALSE;
    }

    if (major > minMajor)
    {
        return CL_TRUE;
    }
    else if (major < minMajor)
    {
        return CL_FALSE;
    }
    else
    {
        return (patchLevel > minPatchLevel);
    }
}
예제 #5
0
static cl_bool usingILKernelIsAcceptable(const CLInfo* ci, const AstronomyParameters* ap, const CLRequest* clr)
{
    const DevInfo* di = &ci->di;
    static const cl_int maxILKernelStreams = 4;

    if (!DOUBLEPREC || clr->forceNoILKernel)
        return CL_FALSE;

    /* Supporting these unused options with the IL kernel is too much work */
    if (ap->number_streams > maxILKernelStreams || ap->aux_bg_profile || ap->number_streams == 0)
        return CL_FALSE;

    /* Make sure an acceptable device */
    return (mwIsAMDGPUDevice(di) && isILKernelTarget(di) && mwPlatformSupportsAMDOfflineDevices(ci));
}
cl_int mwGetDevInfo(DevInfo* di, cl_device_id dev)
{
    const AMDGPUData* amdData;
    cl_int err = CL_SUCCESS;

    di->devID = dev;

    err |= clGetDeviceInfo(dev, CL_DEVICE_TYPE,                     sizeof(di->devType),  &di->devType, NULL);

    err |= clGetDeviceInfo(dev, CL_DEVICE_NAME,                     sizeof(di->devName),  di->devName, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_VENDOR,                   sizeof(di->vendor),   di->vendor, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_VENDOR_ID,                sizeof(cl_uint),  &di->vendorID, NULL);
    err |= clGetDeviceInfo(dev, CL_DRIVER_VERSION,                  sizeof(di->driver),   di->driver, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_VERSION,                  sizeof(di->version),  di->version, NULL);
  //err |= clGetDeviceInfo(dev, CL_DEVICE_OPENCL_C_VERSION,         sizeof(di->clCVer),   di->clCVer, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_ENDIAN_LITTLE,            sizeof(cl_bool),  &di->littleEndian, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(cl_bool),  &di->errCorrect, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),  &di->imgSupport, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_ADDRESS_BITS,             sizeof(cl_uint),  &di->addrBits, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,        sizeof(cl_uint),  &di->maxCompUnits, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY,      sizeof(cl_uint),  &di->clockFreq, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE,          sizeof(cl_ulong), &di->memSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE,       sizeof(cl_ulong), &di->maxMemAlloc, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,    sizeof(cl_ulong), &di->gMemCache, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint), &di->cachelineSize, NULL);

  //err |= clGetDeviceInfo(dev, CL_DEVICE_HOST_UNIFIED_MEMORY,      sizeof(cl_ulong), &unifiedMem, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type), &di->localMemType, NULL);

    err |= clGetDeviceInfo(dev, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config), &di->doubleFPConfig, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &di->floatFPConfig, NULL);

    err |= clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE,           sizeof(cl_ulong), &di->localMemSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CONSTANT_ARGS,        sizeof(cl_uint),  &di->maxConstArgs, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &di->maxConstBufSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), &di->maxParamSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &di->maxWorkGroupSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &di->maxWorkItemDim, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(di->maxWorkItemSizes), di->maxWorkItemSizes, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &di->memBaseAddrAlign, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, sizeof(cl_uint), &di->minAlignSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t), &di->timerRes, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, sizeof(di->exts), &di->exts, NULL);

    di->computeCapabilityMajor = di->computeCapabilityMinor = 0;
    di->warpSize = 0;
    if (err == CL_SUCCESS)
    {
        if (strstr(di->exts, "cl_nv_device_attribute_query") != NULL)
        {
            err |= clGetDeviceInfo(dev, CL_DEVICE_WARP_SIZE_NV,
                                   sizeof(di->warpSize), &di->warpSize, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMajor, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMinor, NULL);
        }
        else
        {
            if (di->devType == CL_DEVICE_TYPE_CPU)
            {
                di->warpSize = 1;
            }
            else if (di->devType == CL_DEVICE_TYPE_GPU)
            {
                /* FIXME: How do I get this on AMD? It's 64 for all of
                 * the high end stuff, but 32 for lower. I think it's
                 * 64 for all the GPUs that do have doubles */
                di->warpSize = 64;
            }
            else
            {
                mw_printf("Unknown device type, using warp size = 1\n");
                di->warpSize = 1;
            }
        }
    }

    di->nonOutput = mwDeviceIsNonOutput(di);
    di->hasGraphicsQOS = mwDeviceHasGraphicsQOS(di);


    if (mwIsNvidiaGPUDevice(di))
    {
        di->aluPerCU = mwCUDACoresPerComputeUnit(di);
        di->doubleFrac = mwCUDAEstimateDoubleFrac(di);
        di->calTarget = MW_CAL_TARGET_INVALID;

        if (strstr(di->exts, "cl_nv_device_attribute_query") != NULL)
        {
            err |= clGetDeviceInfo(dev, CL_DEVICE_WARP_SIZE_NV,
                                   sizeof(di->warpSize), &di->warpSize, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMajor, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMinor, NULL);
        }
    }
    else if (mwIsAMDGPUDevice(di))
    {
        amdData = mwLookupAMDGPUInfo(di);

        di->aluPerCU   = amdData->aluPerCU;
        di->doubleFrac = amdData->doubleFrac;
        di->calTarget  = amdData->target;
        di->warpSize   = amdData->wavefrontSize;
    }

    if (di->warpSize == 0)
    {
        mw_printf("Unknown device type, using warp size = 1\n");
        di->warpSize = 1;
    }

    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error getting device information");
    }
    else
    {
        di->doubleExts = mwGetDoubleExts(di->exts);
    }

    return err;
}