bool Program::build(const Epic::Core::Array<Device>& devices, const Epic::Core::ASCIIString& options) const { Array<cl_device_id> deviceArray; size_t numDevices = 0; if(devices.isEmpty()) { numDevices = devices.count(); deviceArray = deviceIDHelper(devices); } cl_int err = clBuildProgram(programHandle(), numDevices, deviceArray.data(), options.data(), nullptr, nullptr); /* Skip CL_BUILD_PROGRAM_FAILURE error, will be handled later in this code. */ if((err != CL_BUILD_PROGRAM_FAILURE) && (err != CL_SUCCESS)) { throw OpenCLException(err); } Epic::Core::Array<Device> devs; bool ret = true; if(devices.isEmpty()) { devs = this->devices(); } else { devs = devices; } for(size_t i = 0; i < devs.count(); i++) { cl_build_status status = buildStatus(devs[i]); cout << "Build log for device " << devs[i].deviceID() << endl; cout << buildLog(devs[i]) << endl; while(status == CL_BUILD_IN_PROGRESS) { status = buildStatus(devs[i]); } if(status == CL_BUILD_SUCCESS) { cout << "Program compilation successful" << endl; } else { cout << "Program compilation failed" << endl; } ret &= (status == CL_BUILD_SUCCESS); } return ret; }
/*! \brief Checks whether the build process was successfull or not.*/ void ocl::Program::checkBuild(cl_int buildErr) const { // Exiting the program is not acceptable. if ( buildErr == CL_SUCCESS ) return; std::ostringstream oss; bool headerSet = false; for ( auto const& device : _context->devices() ) { cl_build_status buildStatus = CL_SUCCESS; clGetProgramBuildInfo( _id, device.id(), CL_PROGRAM_BUILD_STATUS, sizeof buildStatus, &buildStatus, nullptr ); if ( buildStatus != CL_SUCCESS ) { if ( !headerSet ) { oss << "Program failed to build.\n"; headerSet = true; } size_t size = 0u; clGetProgramBuildInfo( _id, device.id(), CL_PROGRAM_BUILD_LOG, 0u, nullptr, &size ); std::unique_ptr< cl_char[] > buildLog( new cl_char[size] ); clGetProgramBuildInfo( _id, device.id(), CL_PROGRAM_BUILD_LOG, size, buildLog.get(), nullptr ); oss << "Device " << device.name() << " Build Log:\n" << buildLog.get() << '\n'; } } if ( !oss.str().empty() ) throw std::runtime_error( oss.str() ); }
// Compile the kernels that this plan uses, and store into the plan clfftStatus FFTAction::compileKernels( const cl_command_queue commQueueFFT, const clfftPlanHandle plHandle, FFTPlan* fftPlan ) { cl_int status = 0; size_t deviceListSize = 0; FFTRepo& fftRepo = FFTRepo::getInstance( ); // create a cl program executable for the device associated with command queue // Get the device cl_device_id &q_device = fftPlan->bakeDevice; cl_program program; if( fftRepo.getclProgram( this->getGenerator(), this->getSignatureData(), program, q_device, fftPlan->context ) == CLFFT_INVALID_PROGRAM ) { FFTBinaryLookup lookup (this->getGenerator(), plHandle, fftPlan->context, q_device); lookup.variantRaw(this->getSignatureData(), this->getSignatureData()->datasize); if (lookup.found()) { #if FFT_CACHE_DEBUG // debug message in debug mode to ensure that the cache is used fprintf(stderr, "Kernel loaded from cache\n"); #endif program = lookup.getProgram(); } else { #if FFT_CACHE_DEBUG fprintf(stderr, "Kernel built from source\n"); #endif // If the user wishes us to write the kernels out to disk, we do so if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS ) { OPENCL_V( writeKernel( plHandle, this->getGenerator(), this->getSignatureData(), fftPlan->context, fftPlan->bakeDevice ), _T( "writeKernel failed." ) ); } std::string programCode; OPENCL_V( fftRepo.getProgramCode( this->getGenerator(), this->getSignatureData(), programCode, q_device, fftPlan->context ), _T( "fftRepo.getProgramCode failed." ) ); const char* source = programCode.c_str(); program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status ); OPENCL_V( status, _T( "clCreateProgramWithSource failed." ) ); // create a cl program executable for the device associated with command queue #if defined(DEBUGGING) status = clBuildProgram( program, 1, &q_device, "-g -cl-opt-disable", NULL, NULL); // good for debugging kernels // if you have trouble creating smbols that GDB can pick up to set a breakpoint after kernels are loaded into memory // this can be used to stop execution to allow you to set a breakpoint in a kernel after kernel symbols are in memory. #ifdef DEBUG_BREAK_GDB __debugbreak(); #endif #else status = clBuildProgram( program, 1, &q_device, "", NULL, NULL); #endif if( status != CL_SUCCESS ) { if( status == CL_BUILD_PROGRAM_FAILURE ) { size_t buildLogSize = 0; OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ), _T( "clGetProgramBuildInfo failed" ) ); std::vector< char > buildLog( buildLogSize ); ::memset( &buildLog[ 0 ], 0x0, buildLogSize ); OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ), _T( "clGetProgramBuildInfo failed" ) ); std::cerr << "\n\t\t\tBUILD LOG\n"; std::cerr << "************************************************\n"; std::cerr << &buildLog[ 0 ] << std::endl; std::cerr << "************************************************\n"; } OPENCL_V( status, _T( "clBuildProgram failed" ) ); } lookup.setProgram(program, source); lookup.populateCache(); } fftRepo.setclProgram( this->getGenerator(), this->getSignatureData(), program, q_device, fftPlan->context ); // For real transforms we compile either forward or backward kernel bool buildFwdKernel = buildForwardKernel(); bool buildBwdKernel = buildBackwardKernel(); // get a kernel object handle for a kernel with the given name cl_kernel kernel; if( buildFwdKernel ) { if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; OPENCL_V( fftRepo.getProgramEntryPoint( this->getGenerator(), this->getSignatureData(), CLFFT_FORWARD, entryPoint, q_device, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); fftRepo.setclKernel( program, CLFFT_FORWARD, kernel ); } } if( buildBwdKernel ) { if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; OPENCL_V( fftRepo.getProgramEntryPoint( this->getGenerator(), this->getSignatureData(), CLFFT_BACKWARD, entryPoint, q_device, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); fftRepo.setclKernel( program, CLFFT_BACKWARD, kernel ); } } } return CLFFT_SUCCESS; }
int Parallel::setup() { /** * OpenCL initialization. */ cl_int status = Simulator::setup(); CheckStatus(status, "Simulator::setup() failed."); cl_uint numPlatforms; status = clGetPlatformIDs(0, NULL, &numPlatforms); CheckStatus(status, "clGetPlatformIDs, fetching number"); DEBUG_STDOUT("Number of platforms: " << numPlatforms); cl_platform_id platform = NULL; if (numPlatforms > 0) { std::unique_ptr<cl_platform_id[]> platforms (new cl_platform_id[numPlatforms]); status = clGetPlatformIDs(numPlatforms, platforms.get(), NULL); CheckStatus(status, "clGetPlatformIDs, fetching platforms"); for (unsigned i = 0; i < numPlatforms; ++i) { char pbuf[100]; status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); CheckStatus(status, "clGetPlatformInfo"); } // Just grab the first platform. platform = platforms[0]; } CheckConditional(platform != NULL, "platform == NULL"); cl_uint numDevices; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); CheckStatus(status, "clGetDeviceIDs: fetching number"); DEBUG_STDOUT("Number of devices: " << numDevices); cl_device_id *devices = new cl_device_id[numDevices]; status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, numDevices, devices, NULL); CheckStatus(status, "clGetDeviceIDs: fetching devices"); int deviceIndex = 0; for (unsigned i = 0; i < numDevices; ++i) { char pbuf[100]; status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuf), pbuf, NULL); if (!strncmp(pbuf, "ATI", 3)) { deviceIndex = i; } } /* Create the context. */ context = clCreateContext(0, numDevices, devices, NULL, NULL, &status); CheckConditional(context != NULL, "clCreateContextFromType"); /* Create command queue */ cl_command_queue_properties prop = CL_QUEUE_PROFILING_ENABLE; commandQueue = clCreateCommandQueue(context, devices[deviceIndex], prop, &status); CheckStatus(status, "clCreateCommandQueue"); /* Create a CL program using the kernel source */ SDKFile kernelFile; std::string kernelPath = getenv("HOME") + std::string("/md-simulator/src/TestKernel.cl"); if(!kernelFile.open(kernelPath.c_str())) { DEBUG_STDERR("Failed to load kernel file : " << kernelPath); return MD_FAILURE; } const char *source = kernelFile.source().c_str(); size_t sourceSize[] = {strlen(source)}; program = clCreateProgramWithSource(context, 1, &source, sourceSize, &status); CheckStatus(status, "clCreateProgramWithSource"); /* Create a cl program executable for all the devices specified */ status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL); if (status != CL_SUCCESS) { if (status == CL_BUILD_PROGRAM_FAILURE) { cl_int logStatus; std::unique_ptr<char[]> buildLog (nullptr); //char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(program, devices[deviceIndex], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog.get(), &buildLogSize); CheckStatus(logStatus, "clGetProgramBuildInfo"); buildLog = std::unique_ptr<char[]>(new char[buildLogSize]); if(!buildLog) { return MD_FAILURE; } std::fill_n(buildLog.get(), buildLogSize, 0); logStatus = clGetProgramBuildInfo(program, devices[deviceIndex], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog.get(), NULL); CheckStatus(logStatus, "clGetProgramBuildInfo (2)"); DEBUG_STDERR("\n\t\t\tBUILD LOG\n"); DEBUG_STDERR("************************************************\n"); DEBUG_STDERR(buildLog.get()); DEBUG_STDERR("************************************************\n"); } } CheckStatus(status, "clBuildProgram"); /* Get a kernel object handle for a kernel with the given name */ kernel = clCreateKernel(program, "computeAccelerations", &status); CheckStatus(status, "clCreateKernel"); /* Check group size against group size returned by kernel */ status = clGetKernelWorkGroupInfo(kernel, devices[deviceIndex], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0); CheckStatus(status, "clGetKernelWorkGroupInfo"); DEBUG_STDOUT("kernelWorkGroupSize: " << kernelWorkGroupSize); /** * Initialize some simulator data structures. */ global = particleCount * particleCount; local = particleCount; if (global * local > kernelWorkGroupSize) { DEBUG_STDERR("WARNING - global * local > kernelWorkGroupSize; global: " << global << ", local: " << local << ", kernelWorkGroupSize: " << kernelWorkGroupSize); return MD_FAILURE; } // Data holds the molecule positions. data = std::unique_ptr<float[]> (new float[particleCount * 3]); // Constants holds simulator constants. constants = std::unique_ptr<float[]> (new float[NUM_CONSTANTS]); // Copy constants to buffer; constants[0] = epsilon; constants[1] = sigma; constants[2] = negForceCutoffMinusHalf; constants[3] = forceCutoffMinusHalf; constants[4] = wallStiffness; // Results holds pairwise forces. results = std::unique_ptr<float[]> (new float[particleCount * particleCount * 3]); return MD_SUCCESS; }