コード例 #1
0
cl_double mwDeviceEstimateGFLOPs(const DevInfo* di, cl_bool useDouble)
{
    cl_double gflops = 0.0;

    if (di->devType == CL_DEVICE_TYPE_GPU)
    {
        if (mwIsNvidiaGPUDevice(di))
        {
            gflops = mwCUDAEstimateGFLOPs(di, useDouble);
        }
        else if (mwIsAMDGPUDevice(di))
        {
            gflops = mwAMDEstimateGFLOPs(di, useDouble);
        }
        else
        {
            mw_printf("Unhandled GPU vendor '%s' (0x%x)\n", di->vendor, di->vendorID);
            gflops = 100.0;
        }
    }
    else
    {
        mw_printf("Missing flops estimate for device type %s\n", showCLDeviceType(di->devType));
        return 1.0;
    }

    return gflops;
}
コード例 #2
0
cl_bool mwDriverHasHighCPUWaitIssue(CLInfo* ci)
{
    const DevInfo* di = &ci->di;
    int major = 0, minor = 0, patchLevel = 0;

    if (mwIsNvidiaGPUDevice(di))
    {
        if (sscanf(di->driver, "%u.%u", &major, &minor) != 2)
        {
            return CL_FALSE;
        }

        /* Issue started around 270.xx and hasn't been fixed yet. */
        return (major >= 270);
    }
    else if (mwIsAMDGPUDevice(di))
    {
        /* Sometimes it has other stuff like (VM) after. Not sure what that means. */
        if (sscanf(di->driver, "CAL %d.%d.%d", &major, &minor, &patchLevel) != 3)
        {
            return CL_FALSE;
        }

        /* I think it happened in 11.7 and 11.8 */
        return (major == 1 && minor == 4 && patchLevel >= 1457 && patchLevel < 1546);
    }
    else
    {
        return CL_FALSE;
    }
}
コード例 #3
0
cl_bool mwNvidiaDriverVersionGreaterEqual(const DevInfo* di, cl_uint minMajor, cl_uint minMinor)
{
    cl_uint minor = 0;
    cl_uint major = 0;

    if (!mwIsNvidiaGPUDevice(di) || (sscanf(di->driver, "%u.%u", &major, &minor) != 2))
    {
        return CL_FALSE;
    }

    return (major > minMajor) || (major == minMajor && minor >= minMinor);
}
コード例 #4
0
NBodyStatus nbInitNBodyStateCL(NBodyState* st, const NBodyCtx* ctx)
{
    cl_int err;
    const DevInfo* devInfo;

    if (!st->usesCL)
    {
        mw_printf("CL not setup for CL state initialization\n");
        return NBODY_CONSISTENCY_ERROR;
    }

    /* Bodies must be set before trying to use this */
    if (!st->bodytab)
    {
        mw_printf("Bodies not set for CL state initialization\n");
        return NBODY_CONSISTENCY_ERROR;
    }

    if (ctx->potentialType == EXTERNAL_POTENTIAL_CUSTOM_LUA)
    {
        mw_printf("Cannot use Lua potential with OpenCL\n");
        return NBODY_UNSUPPORTED;
    }

    devInfo = &st->ci->di;

    if (!nbCheckDevCapabilities(devInfo, ctx, st->nbody))
        return NBODY_CAPABILITY_ERROR;

    if (   nbSetThreadCounts(st->workSizes, devInfo, ctx)
        || nbSetWorkSizes(st->workSizes, devInfo, st->nbody, st->ignoreResponsive))
        return NBODY_ERROR;

    st->effNBody = nbFindEffectiveNBody(st->workSizes, st->usesExact, st->nbody);
    st->maxDepth = nbFindMaxDepthForDevice(devInfo, st->workSizes, ctx->useQuad);

    st->usesConsistentMemory =  (mwIsNvidiaGPUDevice(devInfo) && mwNvidiaInlinePTXAvailable(st->ci->plat))
                              || mwDeviceHasConsistentMemory(devInfo);

    if (nbLoadKernels(ctx, st))
        return NBODY_CL_ERROR;

    err = nbCreateBuffers(ctx, st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbSetInitialTreeStatus(st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbSetAllKernelArguments(st);
    if (err != CL_SUCCESS)
        return NBODY_CL_ERROR;

    err = nbMarshalBodies(st, CL_TRUE);
    if (err != CL_SUCCESS)
    {
        mw_printf("Error marshalling initial bodies\n");
        return NBODY_CL_ERROR;
    }

    return NBODY_SUCCESS;
}
コード例 #5
0
cl_int mwGetDevInfo(DevInfo* di, cl_device_id dev)
{
    const AMDGPUData* amdData;
    cl_int err = CL_SUCCESS;

    di->devID = dev;

    err |= clGetDeviceInfo(dev, CL_DEVICE_TYPE,                     sizeof(di->devType),  &di->devType, NULL);

    err |= clGetDeviceInfo(dev, CL_DEVICE_NAME,                     sizeof(di->devName),  di->devName, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_VENDOR,                   sizeof(di->vendor),   di->vendor, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_VENDOR_ID,                sizeof(cl_uint),  &di->vendorID, NULL);
    err |= clGetDeviceInfo(dev, CL_DRIVER_VERSION,                  sizeof(di->driver),   di->driver, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_VERSION,                  sizeof(di->version),  di->version, NULL);
  //err |= clGetDeviceInfo(dev, CL_DEVICE_OPENCL_C_VERSION,         sizeof(di->clCVer),   di->clCVer, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_ENDIAN_LITTLE,            sizeof(cl_bool),  &di->littleEndian, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(cl_bool),  &di->errCorrect, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),  &di->imgSupport, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_ADDRESS_BITS,             sizeof(cl_uint),  &di->addrBits, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,        sizeof(cl_uint),  &di->maxCompUnits, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CLOCK_FREQUENCY,      sizeof(cl_uint),  &di->clockFreq, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE,          sizeof(cl_ulong), &di->memSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_MEM_ALLOC_SIZE,       sizeof(cl_ulong), &di->maxMemAlloc, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,    sizeof(cl_ulong), &di->gMemCache, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(cl_uint), &di->cachelineSize, NULL);

  //err |= clGetDeviceInfo(dev, CL_DEVICE_HOST_UNIFIED_MEMORY,      sizeof(cl_ulong), &unifiedMem, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(cl_device_local_mem_type), &di->localMemType, NULL);

    err |= clGetDeviceInfo(dev, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(cl_device_fp_config), &di->doubleFPConfig, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &di->floatFPConfig, NULL);

    err |= clGetDeviceInfo(dev, CL_DEVICE_LOCAL_MEM_SIZE,           sizeof(cl_ulong), &di->localMemSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CONSTANT_ARGS,        sizeof(cl_uint),  &di->maxConstArgs, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &di->maxConstBufSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t), &di->maxParamSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &di->maxWorkGroupSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &di->maxWorkItemDim, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(di->maxWorkItemSizes), di->maxWorkItemSizes, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &di->memBaseAddrAlign, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, sizeof(cl_uint), &di->minAlignSize, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(size_t), &di->timerRes, NULL);
    err |= clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, sizeof(di->exts), &di->exts, NULL);

    di->computeCapabilityMajor = di->computeCapabilityMinor = 0;
    di->warpSize = 0;
    if (err == CL_SUCCESS)
    {
        if (strstr(di->exts, "cl_nv_device_attribute_query") != NULL)
        {
            err |= clGetDeviceInfo(dev, CL_DEVICE_WARP_SIZE_NV,
                                   sizeof(di->warpSize), &di->warpSize, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMajor, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMinor, NULL);
        }
        else
        {
            if (di->devType == CL_DEVICE_TYPE_CPU)
            {
                di->warpSize = 1;
            }
            else if (di->devType == CL_DEVICE_TYPE_GPU)
            {
                /* FIXME: How do I get this on AMD? It's 64 for all of
                 * the high end stuff, but 32 for lower. I think it's
                 * 64 for all the GPUs that do have doubles */
                di->warpSize = 64;
            }
            else
            {
                mw_printf("Unknown device type, using warp size = 1\n");
                di->warpSize = 1;
            }
        }
    }

    di->nonOutput = mwDeviceIsNonOutput(di);
    di->hasGraphicsQOS = mwDeviceHasGraphicsQOS(di);


    if (mwIsNvidiaGPUDevice(di))
    {
        di->aluPerCU = mwCUDACoresPerComputeUnit(di);
        di->doubleFrac = mwCUDAEstimateDoubleFrac(di);
        di->calTarget = MW_CAL_TARGET_INVALID;

        if (strstr(di->exts, "cl_nv_device_attribute_query") != NULL)
        {
            err |= clGetDeviceInfo(dev, CL_DEVICE_WARP_SIZE_NV,
                                   sizeof(di->warpSize), &di->warpSize, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMajor, NULL);
            err |= clGetDeviceInfo(di->devID, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,
                                   sizeof(cl_uint), &di->computeCapabilityMinor, NULL);
        }
    }
    else if (mwIsAMDGPUDevice(di))
    {
        amdData = mwLookupAMDGPUInfo(di);

        di->aluPerCU   = amdData->aluPerCU;
        di->doubleFrac = amdData->doubleFrac;
        di->calTarget  = amdData->target;
        di->warpSize   = amdData->wavefrontSize;
    }

    if (di->warpSize == 0)
    {
        mw_printf("Unknown device type, using warp size = 1\n");
        di->warpSize = 1;
    }

    if (err != CL_SUCCESS)
    {
        mwPerrorCL(err, "Error getting device information");
    }
    else
    {
        di->doubleExts = mwGetDoubleExts(di->exts);
    }

    return err;
}