Пример #1
0
/*! \brief Clears nonbonded shift force output array and energy outputs on the GPU.
 */
static void
nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t *nb)
{

    cl_int               cl_error;
    cl_atomdata_t *      adat     = nb->atdat;
    cl_command_queue     ls       = nb->stream[eintLocal];

    size_t               local_work_size[3]   = {1, 1, 1};
    size_t               global_work_size[3]  = {1, 1, 1};

    cl_int               shifts   = SHIFTS*3;

    cl_int               arg_no;

    cl_kernel            zero_e_fshift = nb->kernel_zero_e_fshift;

    local_work_size[0]   = 64;
    // Round the total number of threads up from the array size
    global_work_size[0]  = ((shifts + local_work_size[0] - 1)/local_work_size[0])*local_work_size[0];

    arg_no    = 0;
    cl_error  = clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->fshift));
    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->e_lj));
    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->e_el));
    cl_error |= clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_uint), &shifts);
    GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());

    cl_error = clEnqueueNDRangeKernel(ls, zero_e_fshift, 3, nullptr, global_work_size, local_work_size, 0, nullptr, nullptr);
    GMX_ASSERT(cl_error == CL_SUCCESS, ocl_get_error_string(cl_error).c_str());
}
Пример #2
0
//! This function is documented in the header file
bool canDetectGpus(std::string *errorMessage)
{
    cl_uint numPlatforms;
    cl_int  status       = clGetPlatformIDs(0, nullptr, &numPlatforms);
    GMX_ASSERT(status != CL_INVALID_VALUE, "Incorrect call of clGetPlatformIDs detected");
#ifdef cl_khr_icd
    if (status == CL_PLATFORM_NOT_FOUND_KHR)
    {
        // No valid ICDs found
        if (errorMessage != nullptr)
        {
            errorMessage->assign("No valid OpenCL driver found");
        }
        return false;
    }
#endif
    GMX_RELEASE_ASSERT(status == CL_SUCCESS,
                       gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
                                         status, ocl_get_error_string(status).c_str()).c_str());
    bool foundPlatform = (numPlatforms > 0);
    if (!foundPlatform && errorMessage != nullptr)
    {
        errorMessage->assign("No OpenCL platforms found even though the driver was valid");
    }
    return foundPlatform;
}
Пример #3
0
/*! \brief Creates context for OpenCL GPU given by \p mygpu
 *
 * A fatal error results if creation fails.
 *
 * \param[inout] runtimeData runtime data including program and context
 * \param[in]    devInfo     device info struct
 * \param[in]    rank        MPI rank (for error reporting)
 */
static void
nbnxn_gpu_create_context(gmx_device_runtime_data_t *runtimeData,
                         const gmx_device_info_t   *devInfo,
                         int                        rank)
{
    cl_context_properties     context_properties[3];
    cl_platform_id            platform_id;
    cl_device_id              device_id;
    cl_context                context;
    cl_int                    cl_error;

    assert(runtimeData != nullptr);
    assert(devInfo != nullptr);

    platform_id      = devInfo->ocl_gpu_id.ocl_platform_id;
    device_id        = devInfo->ocl_gpu_id.ocl_device_id;

    context_properties[0] = CL_CONTEXT_PLATFORM;
    context_properties[1] = reinterpret_cast<cl_context_properties>(platform_id);
    context_properties[2] = 0; /* Terminates the list of properties */

    context = clCreateContext(context_properties, 1, &device_id, nullptr, nullptr, &cl_error);
    if (CL_SUCCESS != cl_error)
    {
        gmx_fatal(FARGS, "On rank %d failed to create context for GPU #%s:\n OpenCL error %d: %s",
                  rank,
                  devInfo->device_name,
                  cl_error, ocl_get_error_string(cl_error).c_str());
    }

    runtimeData->context = context;
}
Пример #4
0
/*!  \brief Get the warp size reported by device
 *
 *  This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
 *  Nvidia reports 32, AMD for GPU 64. Ignore the rest
 *
 *  \param  context   Current OpenCL context
 *  \param  deviceId OpenCL device with the context
 *  \return cl_int value of the warp size
 *
 * \throws InternalError if an OpenCL error was encountered
 */
static size_t
getWarpSize(cl_context context, cl_device_id deviceId)
{
    cl_int      cl_error;
    const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
    cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
    }

    cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
    }

    cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
    }

    size_t warpSize = 0;
    cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
                                        sizeof(warpSize), &warpSize, NULL);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
    }
    if (warpSize == 0)
    {
        GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
    }

    cl_error = clReleaseKernel(kernel);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
    }
    cl_error = clReleaseProgram(program);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
    }

    return warpSize;
}
Пример #5
0
/*! \brief Clears the first natoms_clear elements of the GPU nonbonded force output array.
 */
static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
{
    if (natoms_clear == 0)
    {
        return;
    }

    cl_int gmx_used_in_debug cl_error;

    cl_atomdata_t           *atomData = nb->atdat;
    cl_command_queue         ls       = nb->stream[eintLocal];
    cl_float                 value    = 0.0f;

    cl_error = clEnqueueFillBuffer(ls, atomData->f, &value, sizeof(cl_float),
                                   0, natoms_clear*sizeof(rvec), 0, nullptr, nullptr);
    GMX_RELEASE_ASSERT(cl_error == CL_SUCCESS, ("nbnxn_ocl_clear_f failed: " +
                                                ocl_get_error_string(cl_error)).c_str());
}
Пример #6
0
/*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
 *
 * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
 * variable is set or the compilation failed, then the OpenCL
 * compilation log is written.
 *
 * \param fplog               Open file pointer to log file
 * \param program             OpenCL program that was compiled
 * \param deviceId            Id of the device for which compilation took place
 * \param kernelFilename      File name containing the kernel
 * \param preprocessorOptions String containing the preprocessor command-line options used for the build
 * \param buildFailed         Whether the OpenCL build succeeded
 *
 * \throws std::bad_alloc if out of memory */
static void
writeOclBuildLog(FILE              *fplog,
                 cl_program         program,
                 cl_device_id       deviceId,
                 const std::string &kernelFilename,
                 const std::string &preprocessorOptions,
                 bool               buildFailed)
{
    bool writeOutput = ((fplog != nullptr) &&
                        (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));

    if (!writeOutput)
    {
        return;
    }

    // Get build log string size
    size_t buildLogSize;
    cl_int cl_error = clGetProgramBuildInfo(program,
                                            deviceId,
                                            CL_PROGRAM_BUILD_LOG,
                                            0,
                                            NULL,
                                            &buildLogSize);
    if (cl_error != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
    }

    char             *buildLog = nullptr;
    scoped_cptr<char> buildLogGuard;
    if (buildLogSize != 0)
    {
        /* Allocate memory to fit the build log,
           it can be very large in case of errors */
        snew(buildLog, buildLogSize);
        buildLogGuard.reset(buildLog);

        /* Get the actual compilation log */
        cl_error = clGetProgramBuildInfo(program,
                                         deviceId,
                                         CL_PROGRAM_BUILD_LOG,
                                         buildLogSize,
                                         buildLog,
                                         NULL);
        if (cl_error != CL_SUCCESS)
        {
            GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
        }
    }

    std::string message;
    if (buildFailed)
    {
        message += "Compilation of source file " + kernelFilename + " failed!\n";
    }
    else
    {
        message += "Compilation of source file " + kernelFilename + " was successful!\n";
    }
    message += "-- Used build options: " + preprocessorOptions + "\n";
    message += "--------------LOG START---------------\n";
    message += buildLog;
    message += "---------------LOG END----------------\n";;

    fputs(message.c_str(), fplog);
}
Пример #7
0
cl_program
compileProgram(FILE              *fplog,
               const std::string &kernelBaseFilename,
               const std::string &extraDefines,
               cl_context         context,
               cl_device_id       deviceId,
               ocl_vendor_id_t    deviceVendorId)
{
    cl_int      cl_error;
    std::string kernelRootPath = getKernelRootPath();

    GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");

    /* Load OpenCL source files */
    std::string kernelFilename = Path::join(kernelRootPath,
                                            kernelBaseFilename);

    /* Make the build options */
    std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
                                                              getWarpSize(context, deviceId),
                                                              deviceVendorId,
                                                              extraDefines);

    bool        buildCacheWasRead = false;

    std::string cacheFilename;
    if (useBuildCache)
    {
        cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
    }

    /* Create OpenCL program */
    cl_program program = nullptr;
    if (useBuildCache)
    {
        if (File::exists(cacheFilename, File::returnFalseOnError))
        {
            /* Check if there's a valid cache available */
            try
            {
                program           = makeProgramFromCache(cacheFilename, context, deviceId);
                buildCacheWasRead = true;
            }
            catch (FileIOError &e)
            {
                // Failing to read from the cache is not a critical error
                formatExceptionMessageToFile(fplog, e);
            }
        }
        else
        {
            fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
        }
    }
    if (program == nullptr)
    {
        // Compile OpenCL program from source
        std::string kernelSource = TextReader::readFileToString(kernelFilename);
        if (kernelSource.empty())
        {
            GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
        }
        const char *kernelSourcePtr  = kernelSource.c_str();
        size_t      kernelSourceSize = kernelSource.size();
        /* Create program from source code */
        program = clCreateProgramWithSource(context,
                                            1,
                                            &kernelSourcePtr,
                                            &kernelSourceSize,
                                            &cl_error);
        if (cl_error != CL_SUCCESS)
        {
            GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
        }
    }

    /* Build the OpenCL program, keeping the status to potentially
       write to the simulation log file. */
    cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL);

    /* Write log first, and then throw exception that the user know what is
       the issue even if the build fails. */
    writeOclBuildLog(fplog,
                     program,
                     deviceId,
                     kernelFilename,
                     preprocessorOptions,
                     buildStatus != CL_SUCCESS);

    if (buildStatus != CL_SUCCESS)
    {
        GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
    }

    if (useBuildCache)
    {
        if (!buildCacheWasRead)
        {
            /* If OpenCL caching is ON, but the current cache is not
               valid => update it */
            try
            {
                writeBinaryToCache(program, cacheFilename);
            }
            catch (GromacsException &e)
            {
                // Failing to write the cache is not a critical error
                formatExceptionMessageToFile(fplog, e);
            }
        }
    }
    if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
    {
        /* If dumping intermediate files has been requested and this is an NVIDIA card
           => write PTX to file */
        char buffer[STRLEN];

        cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
        if (cl_error != CL_SUCCESS)
        {
            GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
        }
        std::string ptxFilename = buffer;
        ptxFilename += ".ptx";

        try
        {
            writeBinaryToCache(program, ptxFilename);
        }
        catch (GromacsException &e)
        {
            // Failing to write the cache is not a critical error
            formatExceptionMessageToFile(fplog, e);
        }
    }

    return program;
}
Пример #8
0
//! This function is documented in the header file
void findGpus(gmx_gpu_info_t *gpu_info)
{
    cl_uint         ocl_platform_count;
    cl_platform_id *ocl_platform_ids;
    cl_device_type  req_dev_type = CL_DEVICE_TYPE_GPU;

    ocl_platform_ids = nullptr;

    if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
    {
        req_dev_type = CL_DEVICE_TYPE_CPU;
    }

    while (true)
    {
        cl_int status = clGetPlatformIDs(0, nullptr, &ocl_platform_count);
        if (CL_SUCCESS != status)
        {
            GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
                                                           status) + ocl_get_error_string(status)));
        }

        if (1 > ocl_platform_count)
        {
            // TODO this should have a descriptive error message that we only support one OpenCL platform
            break;
        }

        snew(ocl_platform_ids, ocl_platform_count);

        status = clGetPlatformIDs(ocl_platform_count, ocl_platform_ids, nullptr);
        if (CL_SUCCESS != status)
        {
            GMX_THROW(gmx::InternalError(gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ",
                                                           status) + ocl_get_error_string(status)));
        }

        for (unsigned int i = 0; i < ocl_platform_count; i++)
        {
            cl_uint ocl_device_count;

            /* If requesting req_dev_type devices fails, just go to the next platform */
            if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, 0, nullptr, &ocl_device_count))
            {
                continue;
            }

            if (1 <= ocl_device_count)
            {
                gpu_info->n_dev += ocl_device_count;
            }
        }

        if (1 > gpu_info->n_dev)
        {
            break;
        }

        snew(gpu_info->gpu_dev, gpu_info->n_dev);

        {
            int           device_index;
            cl_device_id *ocl_device_ids;

            snew(ocl_device_ids, gpu_info->n_dev);
            device_index = 0;

            for (unsigned int i = 0; i < ocl_platform_count; i++)
            {
                cl_uint ocl_device_count;

                /* If requesting req_dev_type devices fails, just go to the next platform */
                if (CL_SUCCESS != clGetDeviceIDs(ocl_platform_ids[i], req_dev_type, gpu_info->n_dev, ocl_device_ids, &ocl_device_count))
                {
                    continue;
                }

                if (1 > ocl_device_count)
                {
                    break;
                }

                for (unsigned int j = 0; j < ocl_device_count; j++)
                {
                    gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_platform_id = ocl_platform_ids[i];
                    gpu_info->gpu_dev[device_index].ocl_gpu_id.ocl_device_id   = ocl_device_ids[j];

                    gpu_info->gpu_dev[device_index].device_name[0] = 0;
                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_NAME, sizeof(gpu_info->gpu_dev[device_index].device_name), gpu_info->gpu_dev[device_index].device_name, nullptr);

                    gpu_info->gpu_dev[device_index].device_version[0] = 0;
                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VERSION, sizeof(gpu_info->gpu_dev[device_index].device_version), gpu_info->gpu_dev[device_index].device_version, nullptr);

                    gpu_info->gpu_dev[device_index].device_vendor[0] = 0;
                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_VENDOR, sizeof(gpu_info->gpu_dev[device_index].device_vendor), gpu_info->gpu_dev[device_index].device_vendor, nullptr);

                    gpu_info->gpu_dev[device_index].compute_units = 0;
                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(gpu_info->gpu_dev[device_index].compute_units), &(gpu_info->gpu_dev[device_index].compute_units), nullptr);

                    gpu_info->gpu_dev[device_index].adress_bits = 0;
                    clGetDeviceInfo(ocl_device_ids[j], CL_DEVICE_ADDRESS_BITS, sizeof(gpu_info->gpu_dev[device_index].adress_bits), &(gpu_info->gpu_dev[device_index].adress_bits), nullptr);

                    gpu_info->gpu_dev[device_index].vendor_e = get_vendor_id(gpu_info->gpu_dev[device_index].device_vendor);

                    gpu_info->gpu_dev[device_index].stat = is_gmx_supported_gpu_id(gpu_info->gpu_dev + device_index);

                    if (egpuCompatible == gpu_info->gpu_dev[device_index].stat)
                    {
                        gpu_info->n_dev_compatible++;
                    }

                    device_index++;
                }
            }

            gpu_info->n_dev = device_index;

            /* Dummy sort of devices -  AMD first, then NVIDIA, then Intel */
            // TODO: Sort devices based on performance.
            if (0 < gpu_info->n_dev)
            {
                int last = -1;
                for (int i = 0; i < gpu_info->n_dev; i++)
                {
                    if (OCL_VENDOR_AMD == gpu_info->gpu_dev[i].vendor_e)
                    {
                        last++;

                        if (last < i)
                        {
                            gmx_device_info_t ocl_gpu_info;
                            ocl_gpu_info            = gpu_info->gpu_dev[i];
                            gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
                            gpu_info->gpu_dev[last] = ocl_gpu_info;
                        }
                    }
                }

                /* if more than 1 device left to be sorted */
                if ((gpu_info->n_dev - 1 - last) > 1)
                {
                    for (int i = 0; i < gpu_info->n_dev; i++)
                    {
                        if (OCL_VENDOR_NVIDIA == gpu_info->gpu_dev[i].vendor_e)
                        {
                            last++;

                            if (last < i)
                            {
                                gmx_device_info_t ocl_gpu_info;
                                ocl_gpu_info            = gpu_info->gpu_dev[i];
                                gpu_info->gpu_dev[i]    = gpu_info->gpu_dev[last];
                                gpu_info->gpu_dev[last] = ocl_gpu_info;
                            }
                        }
                    }
                }
            }

            sfree(ocl_device_ids);
        }

        break;
    }

    sfree(ocl_platform_ids);
}