vtkSmartPointer<vtkShaderProgram2> createSphereShadingProgram() { vtkSmartPointer<vtkShaderProgram2> program = vtkShaderProgram2::New(); std::string code; vtkSmartPointer<vtkShader2> vertex = vtkShader2::New(); vertex->SetType(VTK_SHADER_TYPE_VERTEX); code = readSource("sphere.vert"); vertex->SetSourceCode(code.c_str()); program->GetShaders()->AddItem(vertex); vtkSmartPointer<vtkShader2> geometry = vtkShader2::New(); geometry->SetType(VTK_SHADER_TYPE_GEOMETRY); code = readSource("sphere.geom"); geometry->SetSourceCode(code.c_str()); program->GetShaders()->AddItem(geometry); vtkSmartPointer<vtkShader2> fragment = vtkShader2::New(); fragment->SetType(VTK_SHADER_TYPE_FRAGMENT); code = readSource("sphere.frag"); fragment->SetSourceCode(code.c_str()); program->GetShaders()->AddItem(fragment); program->SetGeometryTypeIn(VTK_GEOMETRY_SHADER_IN_TYPE_POINTS); program->SetGeometryTypeOut(VTK_GEOMETRY_SHADER_OUT_TYPE_TRIANGLE_STRIP); program->SetGeometryVerticesOut(4); return program; }
// Create a GLSL program object from vertex and fragment shader files GLuint InitShader(const char* vShaderFile, const char* fShaderFile) { std::string sources[2] = { readSource(vShaderFile), readSource(fShaderFile) }; GLenum types[2] = { GL_VERTEX_SHADER, GL_FRAGMENT_SHADER }; GLuint program = glCreateProgram(); for ( int i = 0; i < 2; ++i ) { if ( sources[i].c_str() == NULL ) { std::cerr << "Failed to read " << i << std::endl; exit( EXIT_FAILURE ); } GLuint shader = glCreateShader( types[i] ); const char *src = sources[i].c_str(); glShaderSource( shader, 1, (const GLchar**) &src, NULL ); glCompileShader( shader ); GLint compiled; glGetShaderiv( shader, GL_COMPILE_STATUS, &compiled ); if ( !compiled ) { std::cerr << " failed to compile:" << std::endl; GLint logSize; glGetShaderiv( shader, GL_INFO_LOG_LENGTH, &logSize ); char* logMsg = new char[logSize]; glGetShaderInfoLog( shader, logSize, NULL, logMsg ); std::cerr << logMsg << std::endl; delete [] logMsg; exit( EXIT_FAILURE ); } glAttachShader( program, shader ); } /* link and error check */ glLinkProgram(program); GLint linked; glGetProgramiv( program, GL_LINK_STATUS, &linked ); if ( !linked ) { std::cerr << "Shader program failed to link" << std::endl; GLint logSize; glGetProgramiv( program, GL_INFO_LOG_LENGTH, &logSize); char* logMsg = new char[logSize]; glGetProgramInfoLog( program, logSize, NULL, logMsg ); std::cerr << logMsg << std::endl; delete [] logMsg; exit( EXIT_FAILURE ); } /* use program object */ glUseProgram(program); return program; }
bool CineUnpacker::unpack(const byte *src, unsigned int srcLen, byte *dst, unsigned int dstLen) { // Initialize variables used for detecting errors during unpacking _error = false; _srcBegin = src; _srcEnd = src + srcLen; _dstBegin = dst; _dstEnd = dst + dstLen; // Initialize other variables _src = _srcBegin + srcLen - 4; uint32 unpackedLength = readSource(); // Unpacked length in bytes _dst = _dstBegin + unpackedLength - 1; _crc = readSource(); _chunk32b = readSource(); _crc ^= _chunk32b; while (_dst >= _dstBegin && !_error) { /* Bits => Action: 0 0 => unpackRawBytes(3 bits + 1) i.e. unpackRawBytes(1..8) 1 1 1 => unpackRawBytes(8 bits + 9) i.e. unpackRawBytes(9..264) 0 1 => copyRelocatedBytes(8 bits, 2) i.e. copyRelocatedBytes(0..255, 2) 1 0 0 => copyRelocatedBytes(9 bits, 3) i.e. copyRelocatedBytes(0..511, 3) 1 0 1 => copyRelocatedBytes(10 bits, 4) i.e. copyRelocatedBytes(0..1023, 4) 1 1 0 => copyRelocatedBytes(12 bits, 8 bits + 1) i.e. copyRelocatedBytes(0..4095, 1..256) */ if (!nextBit()) { // 0... if (!nextBit()) { // 0 0 unsigned int numBytes = getBits(3) + 1; unpackRawBytes(numBytes); } else { // 0 1 unsigned int numBytes = 2; unsigned int offset = getBits(8); copyRelocatedBytes(offset, numBytes); } } else { // 1... unsigned int c = getBits(2); if (c == 3) { // 1 1 1 unsigned int numBytes = getBits(8) + 9; unpackRawBytes(numBytes); } else if (c < 2) { // 1 0 x unsigned int numBytes = c + 3; unsigned int offset = getBits(c + 9); copyRelocatedBytes(offset, numBytes); } else { // 1 1 0 unsigned int numBytes = getBits(8) + 1; unsigned int offset = getBits(12); copyRelocatedBytes(offset, numBytes); } } } return !_error && (_crc == 0); }
Shader::Shader(const std::string& path) : path_(path), vertexShaderID_(0), fragmentShaderID_(0), programID_(0), loaded_(false) { const GLchar* source[1]; int length = 0; // Load the fragment shader and compile std::vector<char> fragmentSource = readSource(path + ".frag.glsl"); source[0] = &fragmentSource.front(); length = fragmentSource.size()-1; fragmentShaderID_ = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fragmentShaderID_, 1, source, &length); glCompileShader(fragmentShaderID_); // Load the vertex shader and compile std::vector<char> vertexSource = readSource(path + ".vert.glsl"); source[0] = &vertexSource.front(); length = vertexSource.size()-1; vertexShaderID_ = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vertexShaderID_, 1, source, &length); glCompileShader(vertexShaderID_); // Create the vertex program programID_ = glCreateProgram(); glAttachShader(programID_, fragmentShaderID_); glAttachShader(programID_, vertexShaderID_); glLinkProgram(programID_); // Error checking glGetProgramiv(programID_, GL_LINK_STATUS, (GLint*)&loaded_); //glGetShaderiv(vertexShaderID_, GL_COMPILE_STATUS, (GLint*)&loaded_); if (!loaded_) { GLchar tempErrorLog[ERROR_BUFSIZE]; GLsizei length; glGetShaderInfoLog(fragmentShaderID_, ERROR_BUFSIZE, &length, tempErrorLog); errors_ += "Fragment shader errors:\n"; errors_ += std::string(tempErrorLog, length) + "\n"; glGetShaderInfoLog(vertexShaderID_, ERROR_BUFSIZE, &length, tempErrorLog); errors_ += "Vertex shader errors:\n"; errors_ += std::string(tempErrorLog, length) + "\n"; glGetProgramInfoLog(programID_, ERROR_BUFSIZE, &length, tempErrorLog); errors_ += "Linker errors:\n"; errors_ += std::string(tempErrorLog, length) + "\n"; } }
int main(int argc, char *argv[]) { int error = theConfiguration.init(argc,argv); if (EXIT_SUCCESS == error) { SourceDevice *device = 0; const char *source = theConfiguration.getSource(); error = SourcesClassFactory(source,&device); if (EXIT_SUCCESS == error) { error = startServants(argc,argv); if (EXIT_SUCCESS == error) { // start the Msg parsing & database thread pthread_t parser; error = pthread_create(&parser, NULL,internalDatabaseWriter,NULL); if (0 == error) { // then the reader loops error = readSource(device); // wait for the end of the parser DEBUG_MSG("waiting for the parser ending"); error = pthread_join(parser,NULL); } else { ERROR_MSG("pthread_create internalDatabaseWriter error %d",error); } } // (EXIT_SUCCESS == startServants ) error already printed } // !(EXIT_SUCCESS == SourcesClassFactory ) error already printed } // !(EXIT_SUCCESS == theConfiguration.init) error already printed DEBUG_MSG("Program ended"); return error; }
OCLWrapper::OCLWrapper (char* file) { cl_int result; platform_ids = NULL; device_id = NULL; arguments = vector<cl_mem > (); failed = false; printDebug = true; ran = false; built = false; readSource(file); /* Setting up opencl */ clGetPlatformIDs(1, &platform_ids, &num_platforms); //Get avalible platform failed = !(num_platforms > 0); clGetDeviceIDs(platform_ids, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &num_devices); //Get avalible devices on platform failed = !(num_devices > 0); context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &result); //Retrive context from the devices handleError(result, __LINE__); command_queue = clCreateCommandQueue(context, device_id, 0, &result); //Retrive a command queue from the context handleError(result, __LINE__); }
int BasicCL::getProgramFromFile(cl_program *program, cl_context context, const char *sourceFilename) { char *kernelSourceCode; kernelSourceCode = readSource(sourceFilename); //cout << kernelSourceCode << endl; int err = getProgram(program, context, kernelSourceCode); return err; }
void CTextViewerWindow::asSystemDefault() { QTextCodec * codec = QTextCodec::codecForLocale(); if (!codec) return; ui->textBrowser->setPlainText(codec->toUnicode(readSource())); ui->actionSystemLocale->setChecked(true); }
bool System::loadSource(const char *fileName) { // loads _programSrc char *source = readSource(fileName); if (source != NULL) { free(source); return true; } return false; }
void ReadMatlabSettings:: checkIfReady() { bool finished = function_->hasProcessCrashed() || function_->hasProcessEnded(); string file = function_->isReady(); if (file.empty() && !finished) { QTimer::singleShot(100, this, SLOT(checkIfReady())); return; } QByteArray ba; if (function_->getProcess()) ba = function_->getProcess()->readAllStandardOutput(); QString s( ba ); s = s.trimmed(); if (s.isEmpty()) TaskInfo("ReadMatlabSettings %s: no output", function_->matlabFunction().c_str()); else TaskInfo("ReadMatlabSettings %s: output: %s", function_->matlabFunction().c_str(), s.toLatin1().data()); bool success = false; if (!file.empty()) try { switch (type_) { case MetaData_Settings: readSettings(file); break; case MetaData_Source: readSource(file); break; default: EXCEPTION_ASSERT( false ); break; } success = true; } catch (const runtime_error& x) { TaskInfo("ReadMatlabSettings::%s %s", vartype(x).c_str(), x.what()); s += "\n"; s += vartype(x).c_str(); s += ": "; s += x.what(); } if (!success) emit failed( settings.scriptname().c_str(), s ); if (deletethis_) delete this; }
Shader::Shader(GLenum type, const char* filename) { m_compiled = false; m_id = glCreateShader(type); if (m_id == 0) throw Exception((const char*) glGetString(glGetError())); readSource(filename); glShaderSource(m_id, 1, (const GLchar**) &m_source, NULL); }
unsigned int CineUnpacker::nextBit() { unsigned int carry = rcr(false); // Normally if the chunk becomes zero then the carry is one as // the end of chunk marker is always the last to be shifted out. if (_chunk32b == 0) { _chunk32b = readSource(); _crc ^= _chunk32b; carry = rcr(true); // Put the end of chunk marker in the most significant bit } return carry; }
int main(int argc, char *argv[]) { QString inputFileName = "input.pas"; if (argc > 1) inputFileName = argv[1]; QString result = QString("\nResults for file: \"%1\"\n").arg(getRelativeFileName(inputFileName) ); QString source = readSource(inputFileName); if (source.isEmpty() ) { printf("File not found or empty!"); system("pause"); return 0; } cleanSource(source); uint practicalGlobalCalls = 0, // Aup availableGlobalCalls = 0, // Pup blocksCount = 0, varsCount = 0; countGlobalVars(source, practicalGlobalCalls, availableGlobalCalls, blocksCount, varsCount); result += QString("\nVariables: %1\nBlocks: %2\n\n").arg(varsCount).arg(blocksCount); result += QString("\n[Aup/Pup]: %1/%2\n\n").arg(practicalGlobalCalls).arg(availableGlobalCalls); result += "Rup = "; if (availableGlobalCalls != 0) { result += (QString::number( (float)practicalGlobalCalls/availableGlobalCalls, 'g', 10) ); } else { result += "N/A"; } result += "\n\n"; writeResult(result, "output.txt"); system("pause"); return 0; }
int readClosingSource(Source * that) { ClosingSource * tp = (ClosingSource *)that; int data; if (tp->closed) { data = EOF; } else if (tp->ended) { data = EOF; } else if ((data = readSource(tp->primary)) == EOF) { tp->ended = !0; } else { /* Do nothing. */ } return data; }
int main(int argc, char** argv) { if(argc<3) { printf("Usage: serial [filename (.gr)] [filename (.ss)]\n"); exit(-1); } long long vNum, eNum; char* graphfile = argv[1]; char* specfile = argv[2]; List *edgelist, *sourcelist; edgelist = readGraph(graphfile, &vNum, &eNum); sourceList = readSource(specfile); return 0; }
int main() { BNFInstance bnf; { ifstream fi("syntax.y"); bnf.parse(fi); } { ofstream fo("syntax2.y"); bnf.dump(fo); } string types[] = {"SLR", "LALR", "CLR"}; for (auto type : types) { LROutput_Print printer(type, "o_" + type + ".txt"); LRParser parser(type, bnf, &printer); parser.parse(readSource("source.txt")); } }
void ShaderProgram::init(const char *vsFile, const char *fsFile){ // Create the shaders std::cout << "using vertex shader: " << vsFile << std::endl; std::cout << "using fragment shader: " << fsFile << std::endl; shaderVP = glCreateShader(GL_VERTEX_SHADER); shaderFP = glCreateShader(GL_FRAGMENT_SHADER); const char *vp = readSource(vsFile); const char *fp = readSource(fsFile); if (vp == NULL || fp == NULL) { std::cerr << "ShaderProgram::Init ERROR: ONE OR BOTH FILES NOT FOUND!" << std::endl; exit(EXIT_FAILURE); } // Set the source codes glShaderSource(shaderVP, 1, &vp, 0); glShaderSource(shaderFP, 1, &fp, 0); delete [] vp; delete [] fp; // Compile the shader source glCompileShader(shaderVP); // Check for errors GLint isCompiled = 0; glGetShaderiv(shaderVP, GL_COMPILE_STATUS, &isCompiled); if(isCompiled == GL_FALSE) { GLint maxLength = 0; glGetShaderiv(shaderVP, GL_INFO_LOG_LENGTH, &maxLength); // The maxLength includes the NULL character std::vector<GLchar> errorLog(maxLength); glGetShaderInfoLog(shaderVP, maxLength, &maxLength, &errorLog[0]); // Provide the infolog in whatever manor you deem best. std::cout << "Vertex Shader Compilation Error:" << std::endl; for (std::vector<GLchar>::iterator it = errorLog.begin(); it != errorLog.end(); it++) { std::cout << *it; } std::cout << std::endl; // Exit with failure. glDeleteShader(shaderVP); // Don't leak the shader. exit(EXIT_FAILURE); } glCompileShader(shaderFP); isCompiled = 0; glGetShaderiv(shaderFP, GL_COMPILE_STATUS, &isCompiled); if(isCompiled == GL_FALSE) { GLint maxLength = 0; glGetShaderiv(shaderFP, GL_INFO_LOG_LENGTH, &maxLength); // The maxLength includes the NULL character std::vector<GLchar> errorLog(maxLength); glGetShaderInfoLog(shaderFP, maxLength, &maxLength, &errorLog[0]); // Provide the infolog in whatever manor you deem best. std::cout << "Fragment Shader Compilation Error:" << std::endl; for (std::vector<GLchar>::iterator it = errorLog.begin(); it != errorLog.end(); it++) { std::cout << *it; } std::cout << std::endl; // Exit with failure. glDeleteShader(shaderVP); glDeleteShader(shaderFP); // Don't leak the shader. exit(EXIT_FAILURE); } // Create the shader program shaderID = glCreateProgram(); // Attach the shaders to the program glAttachShader(shaderID, shaderVP); glAttachShader(shaderID, shaderFP); glLinkProgram(shaderID); GLint isLinked; glGetProgramiv(shaderID, GL_LINK_STATUS, &isLinked); if (isLinked == GL_FALSE) { GLint maxLength; glGetProgramiv(shaderID, GL_INFO_LOG_LENGTH, &maxLength); //The maxLength includes the NULL character std::vector<GLchar> infoLog(maxLength); glGetProgramInfoLog(shaderID, maxLength, &maxLength, &infoLog[0]); //The program is useless now. So delete it. glDeleteProgram(shaderID); //Provide the infolog in whatever manner you deem best. for (std::vector<GLchar>::iterator it = infoLog.begin(); it != infoLog.end(); it++) { std::cout << *it; } std::cout << std::endl; //Exit with failure. glDeleteShader(shaderVP); glDeleteShader(shaderFP); exit(EXIT_FAILURE); } glDetachShader(shaderID, shaderVP); glDetachShader(shaderID, shaderFP); glDeleteShader(shaderVP); glDeleteShader(shaderFP); glCheckErrors("After Linking"); glBindAttribLocation(shaderID, 0, "vert_position"); glCheckErrors("Bind Attrib"); std::cout << "Shader Program " << shaderID << ": " << vsFile << " + " << fsFile << std::endl; }
int main() { // Set the image rotation (in degrees) float theta = 3.14159/6; float cos_theta = cosf(theta); float sin_theta = sinf(theta); printf("theta = %f (cos theta = %f, sin theta = %f)\n", theta, cos_theta, sin_theta); // Rows and columns in the input image int imageHeight; int imageWidth; const char* inputFile = "input.bmp"; const char* outputFile = "output.bmp"; // Homegrown function to read a BMP from file float* inputImage = readImage(inputFile, &imageWidth, &imageHeight); // Size of the input and output images on the host int dataSize = imageHeight*imageWidth*sizeof(float); // Output image on the host float* outputImage = NULL; outputImage = (float*)malloc(dataSize); // Set up the OpenCL environment cl_int status; // Discovery platform cl_platform_id platforms[2]; cl_platform_id platform; status = clGetPlatformIDs(2, platforms, NULL); chk(status, "clGetPlatformIDs"); platform = platforms[PLATFORM_TO_USE]; // Discover device cl_device_id device; clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL); chk(status, "clGetDeviceIDs"); // Create context cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platform), 0}; cl_context context; context = clCreateContext(props, 1, &device, NULL, NULL, &status); chk(status, "clCreateContext"); // Create command queue cl_command_queue queue; queue = clCreateCommandQueue(context, device, 0, &status); chk(status, "clCreateCommandQueue"); // Create the input and output buffers cl_mem d_input; d_input = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, &status); chk(status, "clCreateBuffer"); cl_mem d_output; d_output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status); chk(status, "clCreateBuffer"); // Copy the input image to the device status = clEnqueueWriteBuffer(queue, d_input, CL_TRUE, 0, dataSize, inputImage, 0, NULL, NULL); chk(status, "clEnqueueWriteBuffer"); const char* source = readSource("rotation.cl"); // Create a program object with source and build it cl_program program; program = clCreateProgramWithSource(context, 1, &source, NULL, NULL); chk(status, "clCreateProgramWithSource"); status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); chk(status, "clBuildProgram"); // Create the kernel object cl_kernel kernel; kernel = clCreateKernel(program, "img_rotate", &status); chk(status, "clCreateKernel"); // Set the kernel arguments status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_output); status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_input); status |= clSetKernelArg(kernel, 2, sizeof(int), &imageWidth); status |= clSetKernelArg(kernel, 3, sizeof(int), &imageHeight); status |= clSetKernelArg(kernel, 4, sizeof(float), &sin_theta); status |= clSetKernelArg(kernel, 5, sizeof(float), &cos_theta); chk(status, "clSetKernelArg"); // Set the work item dimensions size_t globalSize[2] = {imageWidth, imageHeight}; status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL); chk(status, "clEnqueueNDRange"); // Read the image back to the host status = clEnqueueReadBuffer(queue, d_output, CL_TRUE, 0, dataSize, outputImage, 0, NULL, NULL); chk(status, "clEnqueueReadBuffer"); // Write the output image to file storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); return 0; }
int main(int argc, char **argv) { if(argc < 2) { usage(); return -1; } //init the filter array float filter[49] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 49, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; //operate the params of cmd const char* inputFileName; const char* outputFileName; inputFileName = (argv[1]); outputFileName = (argv[2]); //the image height and width int imageHeight, imageWidth; int filterWidth = 7; //read the bmp image to the memory float* inputImage = readBmpImage(inputFileName, &imageWidth, &imageHeight); //to check the read is succ printf("the width of the image is %d, the height of the image is %d\n", imageWidth, imageHeight); //calculate the datasize int dataSize = imageHeight * imageWidth * sizeof(float); int filterSize = sizeof(float) * filterWidth * filterWidth; //output image float *outputImage = NULL; outputImage = (float*)malloc(dataSize); //set up the OpenCL environment cl_int status; //Discovery platform cl_platform_id platforms[2]; cl_platform_id platform; status = clGetPlatformIDs(2, platforms, NULL); check(status, "clGetPlatformIDs"); platform = platforms[PLATFORM_TO_USE]; //Discovery device cl_device_id device; clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); check(status, "clGetDeviceIDs"); //create context cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platform), 0}; cl_context context; context = clCreateContext(props, 1, &device, NULL, NULL, &status); check(status, "clCreateContext"); //create command queue cl_command_queue queue; queue = clCreateCommandQueue(context, device, 0, &status); check(status, "clCreateCommandQueue"); //create the input and output buffers cl_mem d_input, d_output, d_filter; d_input = clCreateBuffer(context, CL_MEM_READ_ONLY, dataSize, NULL, &status); check(status, "clCreateBuffer"); d_filter = clCreateBuffer(context, CL_MEM_READ_ONLY, filterSize, NULL, &status); check(status, "clCreateBuffer"); // Copy the input image to the device d_output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status); check(status, "clCreateBuffer"); status = clEnqueueWriteBuffer(queue, d_input, CL_TRUE, 0, dataSize, inputImage, 0, NULL, NULL); check(status, "clEnqueueWriteBuffer"); status = clEnqueueWriteBuffer(queue, d_filter, CL_TRUE, 0, filterSize, filter, 0, NULL, NULL); check(status, "clEnqueueWriteBuffer"); const char* source = readSource(kernelPath); //create a program object with source and build it cl_program program; program = clCreateProgramWithSource(context, 1, &source, NULL, NULL); check(status, "clCreateProgramWithSource"); status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); size_t log_size; char *program_log; if(status < 0) { clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char*)malloc(log_size + 1); program_log[log_size] = '\0'; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL); printf("%s\n", program_log); free(program_log); exit(1); } check(status, "clBuildProgram"); //create the kernel object cl_kernel kernel; kernel = clCreateKernel(program, "sharpen", &status); check(status, "clCreateKernel"); //set the kernel arguments status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_output); status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_input); status |= clSetKernelArg(kernel, 2, sizeof(int), &imageWidth); status |= clSetKernelArg(kernel, 3, sizeof(int), &imageHeight); status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &d_filter); status |= clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); check(status, "clSetKernelArg"); // Set the work item dimensions size_t globalSize[2] = {imageWidth, imageHeight}; status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL); check(status, "clEnqueueNDRange"); // Read the image back to the host status = clEnqueueReadBuffer(queue, d_output, CL_TRUE, 0, dataSize, outputImage, 0, NULL, NULL); check(status, "clEnqueueReadBuffer"); // Write the output image to file storeBmpImage(outputImage, outputFileName, imageHeight, imageWidth, inputFileName); //free opencl resources clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseMemObject(d_input); clReleaseMemObject(d_output); clReleaseMemObject(d_filter); clReleaseContext(context); //free host resources free(inputImage); free(outputImage); }
/** * @param argc コマンドライン引数の数 * @param argv コマンドライン引数 */ int main(int argc, const char * const *argv) { Array source = {0}; Array bitArray1 = {0}; Array bitArray2 = {0}; Array *curBits = &bitArray1; Array *oldBits = &bitArray2; HashEntryCache cache = {0}; Hash hash = {0}; int result = EXIT_SUCCESS; size_t len = 0; size_t i = 0; size_t j = 0; /* 入力ファイルを読み込む */ if(readSource(&source, stdin) != R_OK) { perror("error at readSource"); result = EXIT_FAILURE; goto Lerror; } /* ハッシュを初期化する */ if(HashEntryCache_initialize(&cache, 65536) != R_OK) { perror("error at HashEntryCache_initialize"); goto Lerror; } if(Hash_initialize(&hash, 65536, &cache) != R_OK) { perror("error at Hash_initialize"); goto Lerror; } /* ビット配列を確保する */ if(Array_resize(&bitArray1, source.length) != R_OK) { perror("error at Array_resize(bitArray1)"); result = EXIT_FAILURE; goto Lerror; } if(Array_resize(&bitArray2, bitArray1.length) != R_OK) { perror("error at Array_resize(bitArray1)"); result = EXIT_FAILURE; goto Lerror; } /* 最初は全文字位置をチェックするので、以前のビット列は全て1で初期化 */ memset(oldBits->p, 0x1, oldBits->length); /* 長さが入力全体-1までの部分文字列全てについて、重複が見つからなくなるまでループ */ /* ループ終了時点で、oldBitsには最長の重複部分文字列が存在する位置が格納されている */ for(len = 1; len < source.length; ++len) { memset(curBits->p, 0x00, curBits->length); if(scanSameSubstrings(&source, curBits, oldBits, len, &hash) == R_NOTFOUND) { --len; break; } /* ビット配列を入れ替え */ if(curBits == &bitArray1) { curBits = &bitArray2; oldBits = &bitArray1; } else { curBits = &bitArray1; oldBits = &bitArray2; } } /* 最初の重複位置を探す */ i = Array_findFirstBit(oldBits, 0); /* 最初の重複位置の部分文字列と同じ部分文字列を探す */ for(j = i + 1; j < oldBits->length; ++j) { j = Array_findFirstBit(oldBits, j); if(memcmp(Array_pointer(&source, i), Array_pointer(&source, j), len) == 0) { const char * const cp = (const char *)source.p; printf("length: %ld, %.*s[%ld] == %.*s[%ld]\n", len, (int)len, &cp[i], i, (int)len, &cp[j], j); } } Lerror: Array_free(&bitArray2); Array_free(&bitArray1); Array_free(&source); return result; }
/** * Loads the OpenCL program by loading the source code, setting up devices and context, * as well as building the actual kernel */ void CL_Program::loadProgram() { const std::string hw("Hello World\n"); std::vector<cl::Platform> platforms; error = cl::Platform::get(&platforms); print_errors("cl::Platform::get", error); std::cout << "Available platforms: " << platforms.size() << std::endl; if (platforms.size() == 0) { std::cout << "-OpenCL: There are no available platforms. This probably means proper GPU drivers aren't installed." << std::endl; } std::string platformVendor; if (app.getApplicationFlags()->opencl_devices_debug) { std::remove("gpu_debug.txt"); for (auto iter : platforms) { printPlatformInfo(iter); } } device_used = 0; cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; if (app.getApplicationFlags()->use_GPU) { std::cout << "+OpenCL: Attempting to use GPU as OpenCL device" << std::endl; std::cout << "+OpenCL: If this causes errors, switch to CPU by changing \"use_GPU\" to \"no\" in config.json" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_GPU, properties); } catch (cl::Error e) { std::cout << "----------------------------------------" << std::endl; std::cout << e.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: Could not use GPU as OpenCL device. Most of the time this is due to GPU drivers not having the required functionality." << std::endl; std::cout << "-OpenCL: I'm switching to CPU OpenCL. This is slower, but should work" << std::endl; std::cout << "----------------------------------------" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_CPU, properties); std::cout << "+OpenCL: I was able to create a backup context using the CPU as OpenCL device" << std::endl; std::cout << "+OpenCL: Consider tweaking your GPU drivers later so that the program runs faster." << std::endl; app.getApplicationFlags()->use_GPU = false; } catch (cl::Error e2) { std::cout << "----------------------------------------" << std::endl; std::cout << e.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: I was not able to use CPU as a backup OpenCL device. Something real bad is going on.\nAborting.\nContact the software author!" << std::endl; std::cout << "----------------------------------------" << std::endl; app.exit(); return; } } } else { std::cout << "+OpenCL: Attempting to use CPU as OpenCL device" << std::endl; std::cout << "+OpenCL: If you have modern GPU drivers, please switch to GPU for better performance" << std::endl; std::cout << "+OpenCL: This can be done by changing \"use_GPU\" to \"yes\" in config.json" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_CPU, properties); } catch (cl::Error e) { std::cout << "----------------------------------------" << std::endl; std::cout << e.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: Error at creating context with CPU as OpenCL device" << std::endl; std::cout << "-OpenCL: This should not happen, but it did. Trying GPU as a backup device" << std::endl; std::cout << "----------------------------------------" << std::endl; try { context = cl::Context(CL_DEVICE_TYPE_GPU, properties); } catch (cl::Error e2) { std::cout << "----------------------------------------" << std::endl; std::cout << e2.what() << ", " << e.err() << std::endl; std::cout << "-OpenCL: Using GPU as a backup device failed. This is probably due to problems with the GPU drivers" << std::endl; std::cout << "-OpenCL: There were no OpenCL capable devices. The program cannot continue :(" << std::endl; std::cout << "----------------------------------------" << std::endl; app.exit(); return; } } } devices = context.getInfo<CL_CONTEXT_DEVICES>(); std::cout << "+OpenCL: Devices available: " << devices.size() << std::endl; commandQueue = cl::CommandQueue(context, devices[device_used], 0, &error); print_errors("cl::CommandQueue", error); programSourceRaw = readSource(sourcepath); if (app.getApplicationFlags()->print_cl_programs) { std::cout << "+OpenCL: Kernel size: " << programSourceRaw.size() << std::endl; std::cout << "+OpenCL: Kernel: " << programSourceRaw << std::endl; } try { programSource = cl::Program::Sources(1, std::make_pair(programSourceRaw.c_str(), programSourceRaw.size())); program = cl::Program(context, programSource); } catch (cl::Error er) { std::cout << "-OpenCL Exception: " << er.what() << ", " << er.err() << std::endl; } try { error = program.build(devices); } catch (cl::Error err) { std::cout << "-OpenCL Exception: " << err.what() << ", " << err.err() << std::endl; print_errors("program.build()", error); } std::cout << "Build status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0]) << std::endl; std::cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0]) << std::endl; std::cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl; }
int main(int argc, char** argv) { // Set up the data on the host clock_t start, start0; start0 = clock(); start = clock(); // Rows and columns in the input image int imageHeight; int imageWidth; const char* inputFile = "input.bmp"; const char* outputFile = "output.bmp"; // Homegrown function to read a BMP from file float* inputImage = readImage(inputFile, &imageWidth, &imageHeight); // Size of the input and output images on the host int dataSize = imageHeight*imageWidth*sizeof(float); // Pad the number of columns #ifdef NON_OPTIMIZED int deviceWidth = imageWidth; #else // READ_ALIGNED || READ4 int deviceWidth = roundUp(imageWidth, WGX); #endif int deviceHeight = imageHeight; // Size of the input and output images on the device int deviceDataSize = imageHeight*deviceWidth*sizeof(float); // Output image on the host float* outputImage = NULL; outputImage = (float*)malloc(dataSize); int i, j; for(i = 0; i < imageHeight; i++) { for(j = 0; j < imageWidth; j++) { outputImage[i*imageWidth+j] = 0; } } // 45 degree motion blur float filter[49] = {0, 0, 0, 0, 0, 0.0145, 0, 0, 0, 0, 0, 0.0376, 0.1283, 0.0145, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0145, 0.1283, 0.0376, 0, 0, 0, 0, 0, 0.0145, 0, 0, 0, 0, 0}; int filterWidth = 7; int paddingPixels = (int)(filterWidth/2) * 2; stoptime(start, "set up input, output."); start = clock(); // Set up the OpenCL environment // Discovery platform cl_platform_id platform; clGetPlatformIDs(1, &platform, NULL); // Discover device cl_device_id device; clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); size_t time_res; clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(time_res), &time_res, NULL); printf("Device profiling timer resolution: %zu ns.\n", time_res); // Create context cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platform), 0}; cl_context context; context = clCreateContext(props, 1, &device, NULL, NULL, NULL); // Create command queue cl_ulong time_start, time_end, exec_time; cl_event timing_event; cl_command_queue queue; queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL); // Create memory buffers cl_mem d_inputImage; cl_mem d_outputImage; cl_mem d_filter; d_inputImage = clCreateBuffer(context, CL_MEM_READ_ONLY, deviceDataSize, NULL, NULL); d_outputImage = clCreateBuffer(context, CL_MEM_WRITE_ONLY, deviceDataSize, NULL, NULL); d_filter = clCreateBuffer(context, CL_MEM_READ_ONLY, 49*sizeof(float),NULL, NULL); // Write input data to the device #ifdef NON_OPTIMIZED clEnqueueWriteBuffer(queue, d_inputImage, CL_TRUE, 0, deviceDataSize, inputImage, 0, NULL, NULL); #else // READ_ALIGNED || READ4 size_t buffer_origin[3] = {0,0,0}; size_t host_origin[3] = {0,0,0}; size_t region[3] = {deviceWidth*sizeof(float), imageHeight, 1}; clEnqueueWriteBufferRect(queue, d_inputImage, CL_TRUE, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, imageWidth*sizeof(float), 0, inputImage, 0, NULL, NULL); #endif // Write the filter to the device clEnqueueWriteBuffer(queue, d_filter, CL_TRUE, 0, 49*sizeof(float), filter, 0, NULL, NULL); // Read in the program from file char* source = readSource("convolution.cl"); // Create the program cl_program program; // Create and compile the program program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, NULL); cl_int build_status; build_status = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // Create the kernel cl_kernel kernel; #if defined NON_OPTIMIZED || defined READ_ALIGNED // Only the host-side code differs for the aligned reads kernel = clCreateKernel(program, "convolution", NULL); #else // READ4 kernel = clCreateKernel(program, "convolution_read4", NULL); #endif // Selected work group size is 16x16 int wgWidth = WGX; int wgHeight = WGY; // When computing the total number of work items, the // padding work items do not need to be considered int totalWorkItemsX = roundUp(imageWidth-paddingPixels, wgWidth); int totalWorkItemsY = roundUp(imageHeight-paddingPixels, wgHeight); // Size of a work group size_t localSize[2] = {wgWidth, wgHeight}; // Size of the NDRange size_t globalSize[2] = {totalWorkItemsX, totalWorkItemsY}; // The amount of local data that is cached is the size of the // work groups plus the padding pixels #if defined NON_OPTIMIZED || defined READ_ALIGNED int localWidth = localSize[0] + paddingPixels; #else // READ4 // Round the local width up to 4 for the read4 kernel int localWidth = roundUp(localSize[0]+paddingPixels, 4); #endif int localHeight = localSize[1] + paddingPixels; // Compute the size of local memory (needed for dynamic // allocation) size_t localMemSize = (localWidth * localHeight * sizeof(float)); // Set the kernel arguments clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_inputImage); clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_outputImage); clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_filter); clSetKernelArg(kernel, 3, sizeof(int), &deviceHeight); clSetKernelArg(kernel, 4, sizeof(int), &deviceWidth); clSetKernelArg(kernel, 5, sizeof(int), &filterWidth); clSetKernelArg(kernel, 6, localMemSize, NULL); clSetKernelArg(kernel, 7, sizeof(int), &localHeight); clSetKernelArg(kernel, 8, sizeof(int), &localWidth); stoptime(start, "set up kernel"); start = clock(); // Execute the kernel clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, localSize, 0, NULL, &timing_event); // Wait for kernel to complete clFinish(queue); stoptime(start, "run kernel"); clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL); clGetEventProfilingInfo(timing_event, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL); exec_time = time_end-time_start; printf("Profile execution time = %.3lf sec.\n", (double) exec_time/1000000000); // Read back the output image #ifdef NON_OPTIMIZED clEnqueueReadBuffer(queue, d_outputImage, CL_TRUE, 0, deviceDataSize, outputImage, 0, NULL, NULL); #else // READ_ALIGNED || READ4 // Begin reading output from (3,3) on the device // (for 7x7 filter with radius 3) buffer_origin[0] = 3*sizeof(float); buffer_origin[1] = 3; buffer_origin[2] = 0; // Read data into (3,3) on the host host_origin[0] = 3*sizeof(float); host_origin[1] = 3; host_origin[2] = 0; // Region is image size minus padding pixels region[0] = (imageWidth-paddingPixels)*sizeof(float); region[1] = (imageHeight-paddingPixels); region[2] = 1; // Perform the read clEnqueueReadBufferRect(queue, d_outputImage, CL_TRUE, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, imageWidth*sizeof(float), 0, outputImage, 0, NULL, NULL); #endif // Homegrown function to write the image to file storeImage(outputImage, outputFile, imageHeight, imageWidth, inputFile); // Free OpenCL objects clReleaseMemObject(d_inputImage); clReleaseMemObject(d_outputImage); clReleaseMemObject(d_filter); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; }
void CTextViewerWindow::asUtf8() { ui->textBrowser->setPlainText(QString::fromUtf8(readSource())); ui->actionUTF_8->setChecked(true); }
void ColladaInterface::readGeometries(std::vector<ColGeom>* v, const char* file_name) { pugi::xml_document doc; pugi::xml_node geometry, mesh, vertices, input, source, primitive; std::string source_name; int prim_count, num_indices; // Create document, load COLLADA file, and access <geometry> element pugi::xml_parse_result result = doc.load_file(file_name); qDebug() << "File name:" << file_name; geometry = doc.child("COLLADA").child("library_geometries").child("geometry"); // Iterate through geometry elements while(geometry != NULL) { // Create new geometry ColGeom data; // Set the geometry name data.name = geometry.attribute("id").value(); // Iterate through mesh elements mesh = geometry.child("mesh"); while(mesh != NULL) { vertices = mesh.child("vertices"); input = vertices.child("input"); // Iterate through input elements while(input != NULL) { source_name = std::string(input.attribute("source").value()); source_name = source_name.erase(0, 1); source = mesh.child("source"); // Iterate through source elements while(source != NULL) { if(std::string(source.attribute("id").value()) == source_name) { data.map[std::string(input.attribute("semantic").value())] = readSource(source); } source = source.next_sibling("source"); } input = input.next_sibling("input"); } // Determine primitive type for(int i=0; i<7; i++) { primitive = mesh.child(primitive_types[i]); if(primitive != NULL) { // Determine number of primitives prim_count = primitive.attribute("count").as_int(); // Determine primitive type and set count switch(i) { case 0: data.primitive = GL_LINES; num_indices = prim_count * 2; break; case 1: data.primitive = GL_LINE_STRIP; num_indices = prim_count + 1; break; case 4: data.primitive = GL_TRIANGLES; num_indices = prim_count * 3; break; case 5: data.primitive = GL_TRIANGLE_FAN; num_indices = prim_count + 2; break; case 6: data.primitive = GL_TRIANGLE_STRIP; num_indices = prim_count + 2; break; default: std::cout << "Primitive " << primitive_types[i] << " not supported" << std::endl; } data.index_count = num_indices; // Allocate memory for indices data.indices = (unsigned short*)malloc(num_indices * sizeof(unsigned short)); // Read the index values char* text = (char*)(primitive.child("p").child_value()); data.indices[0] = (unsigned short)atoi(strtok(text, " ")); for(int index=1; index<num_indices; index++) { data.indices[index] = (unsigned short)atoi(strtok(NULL, " ")); } } } mesh = mesh.next_sibling("mesh"); } v->push_back(data); geometry = geometry.next_sibling("geometry"); } }
int main(int argc, char ** argv) { printf("Running Vector Addition program\n\n"); size_t datasize = sizeof(int)*ELEMENTS; int *A, *B; // Input arrays int *C; // Output array // Allocate space for input/output data A = (int*)malloc(datasize); B = (int*)malloc(datasize); C = (int*)malloc(datasize); if(A == NULL || B == NULL || C == NULL) { perror("malloc"); exit(-1); } // Initialize the input data for(int i = 0; i < ELEMENTS; i++) { A[i] = i; B[i] = i; } cl_int status; // use as return value for most OpenCL functions cl_uint numPlatforms = 0; cl_platform_id *platforms; // Query for the number of recongnized platforms status = clGetPlatformIDs(0, NULL, &numPlatforms); if(status != CL_SUCCESS) { printf("clGetPlatformIDs failed\n"); exit(-1); } // Make sure some platforms were found if(numPlatforms == 0) { printf("No platforms detected.\n"); exit(-1); } // Allocate enough space for each platform platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id)); if(platforms == NULL) { perror("malloc"); exit(-1); } // Fill in platforms clGetPlatformIDs(numPlatforms, platforms, NULL); if(status != CL_SUCCESS) { printf("clGetPlatformIDs failed\n"); exit(-1); } // Print out some basic information about each platform printf("%u platforms detected\n", numPlatforms); for(unsigned int i = 0; i < numPlatforms; i++) { char buf[100]; printf("Platform %u: \n", i); status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL); printf("\tVendor: %s\n", buf); status |= clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(buf), buf, NULL); printf("\tName: %s\n", buf); if(status != CL_SUCCESS) { printf("clGetPlatformInfo failed\n"); exit(-1); } } printf("\n"); cl_uint numDevices = 0; cl_device_id *devices; // Retrive the number of devices present status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); if(status != CL_SUCCESS) { printf("clGetDeviceIDs failed\n"); exit(-1); } // Make sure some devices were found if(numDevices == 0) { printf("No devices detected.\n"); exit(-1); } // Allocate enough space for each device devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id)); if(devices == NULL) { perror("malloc"); exit(-1); } // Fill in devices status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); if(status != CL_SUCCESS) { printf("clGetDeviceIDs failed\n"); exit(-1); } // Print out some basic information about each device printf("%u devices detected\n", numDevices); for(unsigned int i = 0; i < numDevices; i++) { char buf[100]; printf("Device %u: \n", i); status = clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buf), buf, NULL); printf("\tDevice: %s\n", buf); status |= clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buf), buf, NULL); printf("\tName: %s\n", buf); if(status != CL_SUCCESS) { printf("clGetDeviceInfo failed\n"); exit(-1); } } printf("\n"); cl_context context; // Create a context and associate it with the devices context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status); if(status != CL_SUCCESS || context == NULL) { printf("clCreateContext failed\n"); exit(-1); } cl_command_queue cmdQueue; // Create a command queue and associate it with the device you // want to execute on cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status); if(status != CL_SUCCESS || cmdQueue == NULL) { printf("clCreateCommandQueue failed\n"); exit(-1); } cl_mem d_A, d_B; // Input buffers on device cl_mem d_C; // Output buffer on device // Create a buffer object (d_A) that contains the data from the host ptr A d_A = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize, A, &status); if(status != CL_SUCCESS || d_A == NULL) { printf("clCreateBuffer failed\n"); exit(-1); } // Create a buffer object (d_B) that contains the data from the host ptr B d_B = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize, B, &status); if(status != CL_SUCCESS || d_B == NULL) { printf("clCreateBuffer failed\n"); exit(-1); } // Create a buffer object (d_C) with enough space to hold the output data d_C = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &status); if(status != CL_SUCCESS || d_C == NULL) { printf("clCreateBuffer failed\n"); exit(-1); } cl_program program; char *source; const char *sourceFile = "vectoradd.cl"; // This function reads in the source code of the program source = readSource(sourceFile); //printf("Program source is:\n%s\n", source); // Create a program. The 'source' string is the code from the // vectoradd.cl file. program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, &status); if(status != CL_SUCCESS) { printf("clCreateProgramWithSource failed\n"); exit(-1); } cl_int buildErr; // Build (compile & link) the program for the devices. // Save the return value in 'buildErr' (the following // code will print any compilation errors to the screen) buildErr = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL); // If there are build errors, print them to the screen if(buildErr != CL_SUCCESS) { printf("Program failed to build.\n"); cl_build_status buildStatus; for(unsigned int i = 0; i < numDevices; i++) { clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &buildStatus, NULL); if(buildStatus == CL_SUCCESS) { continue; } char *buildLog; size_t buildLogSize; clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize); buildLog = (char*)malloc(buildLogSize); if(buildLog == NULL) { perror("malloc"); exit(-1); } clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL); buildLog[buildLogSize-1] = '\0'; printf("Device %u Build Log:\n%s\n", i, buildLog); free(buildLog); } exit(0); } else { printf("No build errors\n"); } cl_kernel kernel; // Create a kernel from the vector addition function (named "vecadd") kernel = clCreateKernel(program, "vecadd", &status); if(status != CL_SUCCESS) { printf("clCreateKernel failed\n"); exit(-1); } // Associate the input and output buffers with the kernel status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_A); status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_B); status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_C); if(status != CL_SUCCESS) { printf("clSetKernelArg failed\n"); exit(-1); } // Define an index space (global work size) of threads for execution. // A workgroup size (local work size) is not required, but can be used. size_t globalWorkSize[1]; // There are ELEMENTS threads globalWorkSize[0] = ELEMENTS; // Execute the kernel. // 'globalWorkSize' is the 1D dimension of the work-items status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL); if(status != CL_SUCCESS) { printf("clEnqueueNDRangeKernel failed\n"); exit(-1); } // Read the OpenCL output buffer (d_C) to the host output array (C) clEnqueueReadBuffer(cmdQueue, d_C, CL_TRUE, 0, datasize, C, 0, NULL, NULL); // Verify correctness bool result = true; for(int i = 0; i < ELEMENTS; i++) { if(C[i] != i+i) { result = false; break; } } if(result) { printf("Output is correct\n"); } else { printf("Output is incorrect\n"); } clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmdQueue); clReleaseMemObject(d_A); clReleaseMemObject(d_B); clReleaseMemObject(d_C); clReleaseContext(context); free(A); free(B); free(C); free(source); free(platforms); free(devices); }
int main(int argc, char **argv ) { /* This is the shortest path project for CPSC424/524. Author: Bo Song, Yale University Date: 4/25/2016 Credits: This program is based on the description provided by Andrew Sherman */ double wct0, wct1, total_time, cput; char* sourceFile, * graphFile; int count[8]; #pragma omp parallel printf("num of threads = %d\n", omp_get_num_threads()); for(int i = 0; i < 8; i++) count[i] = 0; for(int i = 0; i < 8; i++) loopCount[i] = 0; for(int i = 0; i < 8; i++) updateCount[i] = 0; if(argc != 3) { printf("serial <graphfile> <sourcefile>\n"); return -1; } graphFile = argv[1]; sourceFile = argv[2]; timing(&wct0, &cput); printf("reading graph...\n"); readGraph(graphFile); printf("reading source...\n"); readSource(sourceFile); // print_adj_list(adj_listhead, N); #pragma omp parallel #pragma omp for schedule(static, 1) for(int i = 0; i < num_sources; i++) { count[omp_get_thread_num()]++; moore(sources[i]); } timing(&wct1, &cput); //get the end time total_time = wct1 - wct0; printf("Message printed by master: Total elapsed time is %f seconds.\n",total_time); // free resources for(int i = 1; i <= N; i++) { adj_node* node = adj_listhead[i]; while(node) { adj_node* next = node->next; free(node); node = next; } } printf("Load balance among threads: "); long long sumLoop = 0, sumUpdate = 0; for(int i = 0; i < 8; i++) { printf("%d ", count[i]); sumLoop += loopCount[i]; sumUpdate += updateCount[i]; } printf("portion = %f", (float)sumUpdate / sumLoop); printf("\n"); free(sources); }
void* run(void* arg) { thread_info* ti = (thread_info*)arg; int tid = ti->tid; cl_int status; // use as return value for most OpenCL functions cl_uint numPlatforms = 0; cl_platform_id *platforms; // AMD's OpenCL implementation doesn't seem to like when two threads // concurrently access clGetPlatformIDs pthread_mutex_lock(&lock); // Query for the number of recongnized platforms status = clGetPlatformIDs(0, NULL, &numPlatforms); if(status != CL_SUCCESS) { printf("clGetPlatformIDs failed\n"); exit(-1); } pthread_mutex_unlock(&lock); // Make sure some platforms were found if(numPlatforms == 0) { printf("No platforms detected.\n"); exit(-1); } // Allocate enough space for each platform platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id)); if(platforms == NULL) { perror("malloc"); exit(-1); } pthread_mutex_lock(&lock); // Fill in platforms clGetPlatformIDs(numPlatforms, platforms, NULL); if(status != CL_SUCCESS) { printf("clGetPlatformIDs failed\n"); exit(-1); } pthread_mutex_unlock(&lock); // Print out some basic information about each platform if(tid == 0) { printf("%u platforms detected\n", numPlatforms); for(unsigned int i = 0; i < numPlatforms; i++) { char buf[100]; printf("Platform %u: \n", i); status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL); printf("\tVendor: %s\n", buf); status |= clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(buf), buf, NULL); printf("\tName: %s\n", buf); if(status != CL_SUCCESS) { printf("clGetPlatformInfo failed\n"); exit(-1); } } printf("\n"); } cl_uint numDevices = 0; cl_device_id *devices; // Retrive the number of devices present status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices); if(status != CL_SUCCESS) { printf("clGetDeviceIDs failed\n"); exit(-1); } // Make sure some devices were found if(numDevices < tid) { printf("Not enough devices found.\n"); exit(-1); } // Allocate enough space for each device devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id)); if(devices == NULL) { perror("malloc"); exit(-1); } // Fill in devices status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL); if(status != CL_SUCCESS) { printf("clGetDeviceIDs failed\n"); exit(-1); } // Print out some basic information about each device if(tid == 0) { printf("%u devices detected\n", numDevices); for(unsigned int i = 0; i < numDevices; i++) { char buf[100]; printf("Device %u: \n", i); status = clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buf), buf, NULL); printf("\tDevice: %s\n", buf); status |= clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buf), buf, NULL); printf("\tName: %s\n", buf); if(status != CL_SUCCESS) { printf("clGetDeviceInfo failed\n"); exit(-1); } } printf("\n"); } cl_context context; // Create a context and associate it with the devices context = clCreateContext(NULL, 1, &devices[tid], NULL, NULL, &status); if(status != CL_SUCCESS || context == NULL) { printf("clCreateContext failed\n"); exit(-1); } cl_command_queue cmdQueue; // Create a command queue and associate it with the device you // want to execute on cmdQueue = clCreateCommandQueue(context, devices[tid], 0, &status); if(status != CL_SUCCESS || cmdQueue == NULL) { printf("clCreateCommandQueue failed\n"); exit(-1); } cl_mem d_A, d_B; // Input buffers on device cl_mem d_C; // Output buffer on device int myDatasize = datasize/NUMTHREADS; int myOffset = tid*(ELEMENTS/NUMTHREADS); printf("T%d: myOffset = %d, myDatasize = %d\n", tid, myOffset, myDatasize); // Create a buffer object (d_A) that contains the data from the host ptr A d_A = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, myDatasize, &A[myOffset], &status); if(status != CL_SUCCESS || d_A == NULL) { printf("clCreateBuffer failed\n"); exit(-1); } // Create a buffer object (d_B) that contains the data from the host ptr B d_B = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, myDatasize, &B[myOffset], &status); if(status != CL_SUCCESS || d_B == NULL) { printf("clCreateBuffer failed\n"); exit(-1); } // Create a buffer object (d_C) with enough space to hold the output data d_C = clCreateBuffer(context, CL_MEM_READ_WRITE, myDatasize, NULL, &status); if(status != CL_SUCCESS || d_C == NULL) { printf("clCreateBuffer failed\n"); exit(-1); } cl_program program; char *source; const char *sourceFile = "vectoradd.cl"; // This function reads in the source code of the program source = readSource(sourceFile); //printf("Program source is:\n%s\n", source); // Create a program. The 'source' string is the code from the // vectoradd.cl file. program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, &status); if(status != CL_SUCCESS) { printf("clCreateProgramWithSource failed\n"); exit(-1); } cl_int buildErr; // Build (compile & link) the program for the devices. // Save the return value in 'buildErr' (the following // code will print any compilation errors to the screen) buildErr = clBuildProgram(program, 1, &devices[tid], NULL, NULL, NULL); // If there are build errors, print them to the screen if(buildErr != CL_SUCCESS) { printf("Program failed to build.\n"); cl_build_status buildStatus; clGetProgramBuildInfo(program, devices[tid], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &buildStatus, NULL); char *buildLog; size_t buildLogSize; clGetProgramBuildInfo(program, devices[tid], CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize); buildLog = (char*)malloc(buildLogSize); if(buildLog == NULL) { perror("malloc"); exit(-1); } clGetProgramBuildInfo(program, devices[tid], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL); buildLog[buildLogSize-1] = '\0'; printf("Device %u Build Log:\n%s\n", tid, buildLog); free(buildLog); exit(0); } else { printf("No build errors\n"); } cl_kernel kernel; // Create a kernel from the vector addition function (named "vecadd") kernel = clCreateKernel(program, "vecadd", &status); if(status != CL_SUCCESS) { printf("clCreateKernel failed\n"); exit(-1); } // Associate the input and output buffers with the kernel status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_A); status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_B); status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_C); if(status != CL_SUCCESS) { printf("clSetKernelArg failed\n"); exit(-1); } // Define an index space (global work size) of threads for execution. // A workgroup size (local work size) is not required, but can be used. size_t globalWorkSize[1]; // There are ELEMENTS threads globalWorkSize[0] = ELEMENTS/NUMTHREADS; // Execute the kernel. // 'globalWorkSize' is the 1D dimension of the work-items status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL); if(status != CL_SUCCESS) { printf("clEnqueueNDRangeKernel failed\n"); exit(-1); } clFinish(cmdQueue); // Read the OpenCL output buffer (d_C) to the host output array (C) clEnqueueReadBuffer(cmdQueue, d_C, CL_TRUE, 0, myDatasize, &C[myOffset], 0, NULL, NULL); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmdQueue); clReleaseMemObject(d_A); clReleaseMemObject(d_B); clReleaseMemObject(d_C); clReleaseContext(context); free(source); free(platforms); free(devices); return NULL; }