cl::Kernel SpatialSEIR::OCLProvider::buildProgramForKernel(std::string kernelFile, DeviceContainer* device) { int err = 1; std::vector<cl::Device> devices; devices.push_back(**(device -> device)); std::string log; // LKD is set at compile time, intall directory of OpenCL kernels. std::string LKD(LSS_KERNEL_DIRECTORY); LKD = LKD.append(kernelFile); const char* progName = LKD.c_str(); std::ifstream programFile(progName); std::string programString(std::istreambuf_iterator<char>(programFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(programString.c_str(), programString.length() + 1)); cl::Program program(**currentContext, source); std::vector<cl::Kernel> kernels; try { err = program.build(devices); log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); if (log.find("warning") != std::string::npos) { lssCout << "Warnings generated while building kernel.\n"; lssCout << "CL_PROGRAM_BUILD_LOG: \n" << log << "\n"; } program.createKernels(&kernels); } catch(cl::Error e) { lssCout << "CL Error in: " << e.what()<< "\n"; lssCout << "CL Error: " << e.err()<< "\n"; log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]); err = e.err(); } if (err != 0) { std::cerr << "Error building OpenCL Kernel, code: " << err << "\n"; std::cerr << "Looking for kernel file here: " << progName << "\n"; std::cerr << "Build Log: \n" << log << "\n"; std::cerr << "Kernel Source: \n" << programString.c_str() << "\n"; throw(-1); } return(kernels[0]); }
int main(void) { cl::vector<cl::Platform> platforms; cl::vector<cl::Device> devices; cl::Event profileEvent; cl_ulong start, end; int data[10]; try { // Place the GPU devices of the first platform into a context cl::Platform::get(&platforms); platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); cl::Context context(devices); // Create kernel std::ifstream programFile(PROGRAM_FILE); std::string programString(std::istreambuf_iterator<char>(programFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(programString.c_str(), programString.length()+1)); cl::Program program(context, source); program.build(devices); cl::Kernel kernel(program, KERNEL_FUNC); // Create buffer and make it a kernel argument cl::Buffer buffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(data), data); kernel.setArg(0, buffer); // Enqueue kernel-execution command with profiling event cl::CommandQueue queue(context, devices[0], CL_QUEUE_PROFILING_ENABLE); queue.enqueueTask(kernel, NULL, &profileEvent); queue.finish(); // Configure event processing start = profileEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>(); end = profileEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>(); std::cout << "Elapsed time: " << (end - start) << " ns." << std::endl; } catch(cl::Error e) { std::cout << e.what() << ": Error code " << e.err() << std::endl; } return 0; }
int main(void) { cl::vector<cl::Platform> platforms; cl::vector<cl::Device> devices; cl::vector<cl::Kernel> allKernels; std::string kernelName; try { // Place the GPU devices of the first platform into a context cl::Platform::get(&platforms); platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); cl::Context context(devices); // Create and build program std::ifstream programFile("kernels.cl"); std::string programString(std::istreambuf_iterator<char>(programFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(programString.c_str(), programString.length()+1)); cl::Program program(context, source); program.build(devices); // Create individual kernels cl::Kernel addKernel(program, "add"); cl::Kernel subKernel(program, "subtract"); cl::Kernel multKernel(program, "multiply"); // Create all kernels in program program.createKernels(&allKernels); for(unsigned int i=0; i<allKernels.size(); i++) { kernelName = allKernels[i].getInfo<CL_KERNEL_FUNCTION_NAME>(); std::cout << "Kernel: " << kernelName << std::endl; } } catch(cl::Error e) { std::cout << e.what() << ": Error code " << e.err() << std::endl; } return 0; }
bool LoadProgram(GLenum target, char * filename) { //Open file std::ifstream programFile(filename, std::ios::in | std::ios::binary); if(programFile.fail()) { printf("Unable to open %s\n", filename); return false; } //calculate the size of the file programFile.seekg(0, std::ios::end); int programSize=programFile.tellg(); programFile.seekg(0, std::ios::beg); //allocate memory unsigned char * programText=new unsigned char[programSize]; if(!programText) { printf("Unable to allocate space for program text for %s\n", filename); return false; } //read file programFile.read(reinterpret_cast<char *>(programText), programSize); programFile.close(); //Send program string to OpenGL glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, programSize, programText); if(programText) delete [] programText; programText=NULL; //Output position of any error int programErrorPos; glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &programErrorPos); if(programErrorPos!=-1) LOG::Instance()->OutputError("Program error at position %d in %s", programErrorPos, filename); //Output error/warning messages if any const GLubyte * programErrorString=glGetString(GL_PROGRAM_ERROR_STRING_ARB); if(strlen((const char *)programErrorString)>0) { LOG::Instance()->OutputMisc("Program Error String for %s:\n%s", filename, programErrorString); } //Is the program under native limits? (Not supported by NV_fragment_program) if(target!=GL_FRAGMENT_PROGRAM_NV) { GLint underNativeLimits; glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &underNativeLimits); if(underNativeLimits==0) LOG::Instance()->OutputError("%s exceeds native limits", filename); } //Return false in case of error if(programErrorPos!=-1) return false; LOG::Instance()->OutputSuccess("%s loaded successfully", filename); return true; }
void NASM::parseLstFile(QFile &lst, QVector<Assembler::LineNum> &lines, quint64 offset) { bool inTextSection = false; QRegExp sectionTextRegExp("SECTION\\s+\\.?(text|code)"); sectionTextRegExp.setCaseSensitivity(Qt::CaseInsensitive); QRegExp sectionRegExp("SECTION"); sectionRegExp.setCaseSensitivity(Qt::CaseInsensitive); QList<QPair<quint64, QString> > instrList; QTextStream lstStream(&lst); lstStream.seek(0); while (!lstStream.atEnd()) { QString line = lstStream.readLine(); if (line.indexOf(QRegExp("^ +[0-9]+ +<[0-9]+>")) != -1) { //macro continue; } if (line.indexOf(sectionTextRegExp) != -1) { inTextSection = true; } else if (line.indexOf(sectionRegExp) != -1) { inTextSection = false; } //! omit strings with data only //! if in list : line number, address, data and it is all (without instruction) - //! omit this string and subtract 1 from offset if (line.indexOf(QRegExp("^(\\s+[^\\s]+){4}")) == -1) { continue; } if (inTextSection) { QRegExp lineRegExp("^\\s+[0-9]+\\s+([0-9a-fA-F]+)\\s+\\S+\\s+(.*)"); lineRegExp.setMinimal(false); if (lineRegExp.indexIn(line) == 0) { quint64 address = lineRegExp.cap(1).toULongLong(0, 16); QString instruction = lineRegExp.cap(2).trimmed(); instrList.append(QPair<quint64, QString>(address + offset, instruction)); } } } QFile programFile(Common::pathInTemp("program.asm")); programFile.open(QFile::ReadOnly); QTextStream programStream(&programFile); //! Offset in list int i = 0; int numInCode = 0; while (!programStream.atEnd()) { if (i >= instrList.size()) { break; } QString line = programStream.readLine(); numInCode++; line = line.trimmed(); if (line == instrList[i].second) { LineNum l; l.numInCode = numInCode; l.numInMem = instrList[i].first; lines.append(l); i++; } } programFile.close(); }
int main(int argc, char** argv) { try { std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); std::vector<cl::Device> platformDevices; platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &platformDevices); cl::Context context(platformDevices); auto contextDevices = context.getInfo<CL_CONTEXT_DEVICES>(); for (auto dev : contextDevices) { std::cout << "Using " << dev.getInfo<CL_DEVICE_NAME>() << std::endl; } std::ifstream programFile("mandelbrot.cl"); std::string programString(std::istreambuf_iterator<char>(programFile), (std::istreambuf_iterator<char>())); cl::Program::Sources source(1, std::make_pair(programString.c_str(), programString.length()+1)); cl::Program program(context, source); try { program.build(contextDevices); } catch (cl::Error e) { // FIXME may not be the device that failed std::cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(contextDevices[0]) << std::endl; } cl::Kernel mandelbrot(program, "mandelbrot"); // command queues std::vector<cl::CommandQueue> queues; for (auto device : contextDevices) { cl::CommandQueue queue(context, device, CL_QUEUE_PROFILING_ENABLE); queues.push_back(queue); } unsigned char* iteration_counts = new unsigned char[3500*2500]; auto start = std::chrono::high_resolution_clock::now(); // partition the "y" dimension int i = 0; int workItemsPerQueue = 2500/queues.size(); // FIXME requires work size to be evenly divisible by number of queues std::vector<cl::Buffer> outputs; for (auto queue : queues) { cl::NDRange offset(0, 0); //i*workItemsPerQueue); cl::NDRange global_size(3500, workItemsPerQueue); // storage for results per device cl_int err = CL_SUCCESS; cl::Buffer output(context, CL_MEM_WRITE_ONLY, (size_t)3500*workItemsPerQueue, (void*)NULL, &err); mandelbrot.setArg(0, output); mandelbrot.setArg(1, i*workItemsPerQueue); outputs.push_back(output); queue.enqueueNDRangeKernel(mandelbrot, offset, global_size); queue.enqueueBarrierWithWaitList(); std::cout << "enqueued range " << i*workItemsPerQueue << " of length " << workItemsPerQueue << std::endl; i++; } // read results unsigned char* results = new unsigned char[3500*2500]; std::vector<cl::Event> readWaitList; i = 0; for (auto queue : queues) { size_t offset = i*3500*workItemsPerQueue; cl::Event readDoneEvent; queue.enqueueReadBuffer(outputs[i], CL_FALSE, 0, 3500*workItemsPerQueue, &(results[offset]), NULL, &readDoneEvent); // NOTE: can't push onto vector until the event is valid, since it will be copied readWaitList.push_back(readDoneEvent); i++; } cl::Event::waitForEvents(readWaitList); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double> elapsed_seconds = end - start; std::cout << "computation took " << elapsed_seconds.count() << "s" << std::endl; stbi_write_png("mandelbrot_cl.png", 3500, 2500, 1, results, 3500); } catch (cl::Error e) { std::cout << e.what() << ": Error code " << e.err() << std::endl; } return 0; }