int getOclSoftware(oclSoftware &soft, const oclHardware &hardware) { cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; cl_int err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, 0); if ( err != CL_SUCCESS) { std::cout << oclErrorCode(err) << "\n"; return -1; } unsigned char *kernelCode = 0; std::cout << "Loading " << soft.mFileName << "\n"; int size = loadFile2Memory(soft.mFileName, (char **) &kernelCode); if (size < 0) { std::cout << "Failed to load kernel\n"; return -2; } if (deviceType == CL_DEVICE_TYPE_ACCELERATOR) { size_t n = size; soft.mProgram = clCreateProgramWithBinary(hardware.mContext, 1, &hardware.mDevice, &n, (const unsigned char **) &kernelCode, 0, &err); } else { soft.mProgram = clCreateProgramWithSource(hardware.mContext, 1, (const char **)&kernelCode, 0, &err); } if (!soft.mProgram || (err != CL_SUCCESS)) { std::cout << oclErrorCode(err) << "\n"; return -3; } int status = compileProgram(hardware, soft); delete [] kernelCode; return status; }
GLSLProgram::GLSLProgram(const char *vsource, const char *fsource) { curVSName = NULL; curFSName = NULL; curGSName = NULL; compileProgram(vsource, 0, fsource); }
void SXFunctionInternal::allocOpenCL() { // OpenCL return flag cl_int ret; // Generate the kernel source code stringstream ss; // Add kernel prefix ss << "__kernel "; // Generate the function CodeGenerator gen; generateFunction(ss, "evaluate", "__global const double*", "__global double*", "double", gen); // Form c-string std::string s = ss.str(); if (verbose()) { userOut() << "Kernel source code for numerical evaluation:" << endl; userOut() << " ***** " << endl; userOut() << s; userOut() << " ***** " << endl; } const char* cstr = s.c_str(); // Parse kernel source code program_ = clCreateProgramWithSource(sparsity_propagation_kernel_.context, 1, static_cast<const char **>(&cstr), 0, &ret); casadi_assert(ret == CL_SUCCESS); casadi_assert(program_ != 0); // Build Kernel Program compileProgram(program_); // Create OpenCL kernel for forward propatation kernel_ = clCreateKernel(program_, "evaluate", &ret); casadi_assert(ret == CL_SUCCESS); // Memory buffer for each of the input arrays input_memobj_.resize(nIn(), static_cast<cl_mem>(0)); for (int i=0; i<input_memobj_.size(); ++i) { input_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, inputNoCheck(i).size() * sizeof(cl_double), static_cast<void*>(inputNoCheck(i).ptr()), &ret); casadi_assert(ret == CL_SUCCESS); } // Memory buffer for each of the output arrays output_memobj_.resize(nOut(), static_cast<cl_mem>(0)); for (int i=0; i<output_memobj_.size(); ++i) { output_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, outputNoCheck(i).size() * sizeof(cl_double), static_cast<void*>(outputNoCheck(i).ptr()), &ret); casadi_assert(ret == CL_SUCCESS); } }
bool GLSLProgram::setSourceFromStrings(const char *vertSrc, const char *fragSrc) { if (m_program) { glDeleteProgram(m_program); m_program = 0; } m_program = compileProgram(vertSrc, fragSrc); return m_program != 0; }
Shader::Shader(const char* vsource, const char* fsource) { m_Program = glCreateProgram(); addVertexShader(vsource); addFragmentShader(fsource); bindAttributeLocations(); compileProgram(); }
GL3ShaderProgram::GL3ShaderProgram(FileLoader *loader, const GL3ShaderDefinition &definition) { _handle = compileProgram(loader, definition); // Now get the locations of the uniforms for (auto &mapping : definition.uniforms) { auto loc = glGetUniformLocation(_handle, mapping.name); _uniformLocations[static_cast<size_t>(mapping.parameter)] = loc; } }
Shader::Shader(const char* vsource, const char* fsource, const char* gsource) { m_Program = glCreateProgram(); addVertexShader(vsource); addFragmentShader(fsource); if(gsource!=NULL) { addGeometryShader(gsource); } bindAttributeLocations(); compileProgram(); }
GLuint GL3ShaderManager::getProgram(ShaderType type, ShaderFlags flags) { auto iter = _programCache.find(std::make_pair(type, flags)); if (iter != _programCache.end()) { return iter->second; } auto definition = getShaderDefinition(type); auto prog = compileProgram(_fileLoader, definition, getHeader(flags)); bindLocations(prog, definition); _programCache.insert(std::make_pair(std::make_pair(type, flags), prog)); return prog; }
int compile(char *fileName) { if (openInputStream(fileName) == IO_ERROR) return IO_ERROR; currentToken = NULL; lookAhead = getValidToken(); compileProgram(); free(currentToken); free(lookAhead); closeInputStream(); return IO_SUCCESS; }
void GLSLProgram::loadFromFiles(const char *vFilename, const char *gFilename, const char *fFilename, GLenum gsInput, GLenum gsOutput, int maxVerts) { char *vsource = readTextFile(vFilename); char *gsource = 0; if (gFilename) { gsource = readTextFile(gFilename); } char *fsource = readTextFile(fFilename); mProg = compileProgram(vsource, gsource, fsource, gsInput, gsOutput, maxVerts); delete [] vsource; if (gsource) delete [] gsource; delete [] fsource; }
JNIEXPORT void JNICALL Java_com_bullet_DemoLib_change(JNIEnv * env, jobject obj, jint width, jint height) { static bool first = true; // init shoot box //box_body.clear(); stepFront(width,height); LOGI("View Port and frustrum set."); if(first){ LOGI("m_screenSpaceRenderer creation start"); m_screenSpaceRenderer = new ScreenSpaceFluidRendererGL(m_glutScreenWidth, m_glutScreenHeight); basicShader = compileProgram(vBasicShader, fBasicShader); if(m_screenSpaceRenderer) LOGI("m_screenSpaceRenderer is created"); first = false; } //updateCamera(); }
int compile(char *fileName) { if (openInputStream(fileName) == IO_ERROR) return IO_ERROR; currentToken = NULL; lookAhead = getValidToken(); initSymTab(); compileProgram(); printObject(symtab->program,0); cleanSymTab(); free(currentToken); free(lookAhead); closeInputStream(); return IO_SUCCESS; }
GLuint GLSLProgram::compileProgramFromFiles(const char *vFilename, const char *gFilename, const char *fFilename, GLenum gsInput, GLenum gsOutput, int maxVerts) { char *vsource = readTextFile(vFilename); char *gsource = 0; if (gFilename) { gsource = readTextFile(gFilename); } char *fsource = readTextFile(fFilename); if(vsource) { GLSLProgram::setShaderNames(NULL, vFilename, gFilename, fFilename); mProg = compileProgram(vsource, gsource, fsource, gsInput, gsOutput, maxVerts); delete [] vsource; if (gsource) delete [] gsource; delete [] fsource; return mProg; } return 0; }
virtual void SetUp() { ANGLETest::SetUp(); const std::string testVertexShaderSource = SHADER_SOURCE ( attribute highp vec4 aPosition; void main(void) { gl_Position = aPosition; } ); const std::string testFragmentShaderSource = SHADER_SOURCE ( uniform highp vec4 color; void main(void) { gl_FragColor = color; } ); mProgram = compileProgram(testVertexShaderSource, testFragmentShaderSource); if (mProgram == 0) { FAIL() << "shader compilation failed."; } mColorLocation = glGetUniformLocation(mProgram, "color"); glUseProgram(mProgram); glClearColor(0, 0, 0, 0); glClearDepthf(0.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glEnable(GL_BLEND); glDisable(GL_DEPTH_TEST); }
Program::Program(Shader *shaders[], int shaderCount, bool vertices, bool uvs, bool normals) : vertices(vertices), uvs(uvs), normals(normals) { this->programID = compileProgram(shaders, shaderCount); linked = (programID > 0); }
bool Shader::compile() { int max_log = 2048; compiled = true; if (vertexLog != NULL) { free(vertexLog); vertexLog = NULL; } if (fragmentLog != NULL) { free(fragmentLog); fragmentLog = NULL; } if (linkLog != NULL) { free(linkLog); linkLog = NULL; } vertexShader = compileProgram(GL_VERTEX_SHADER, vertexSource); GLsizei logLen; GLint vertResult; GLint fragResult; GLchar tmpChar; glGetShaderiv(vertexShader, GL_COMPILE_STATUS, &vertResult); if (vertResult == GL_FALSE) { glGetShaderInfoLog(vertexShader, 1, &logLen, &tmpChar); max_log = logLen; vertexLog = (GLchar *)malloc(max_log); glGetShaderInfoLog(vertexShader, max_log, &logLen, vertexLog); compiled = false; } fragmentShader = compileProgram(GL_FRAGMENT_SHADER, fragmentSource); glGetShaderiv(fragmentShader, GL_COMPILE_STATUS, &fragResult); if (fragResult == GL_FALSE) { glGetShaderInfoLog(fragmentShader, 1, &logLen, &tmpChar); max_log = logLen; fragmentLog = (GLchar *)malloc(max_log); glGetShaderInfoLog(fragmentShader, max_log, &logLen, fragmentLog); compiled = false; } if (!compiled) { return false; } program = glCreateProgram(); glAttachShader(program, vertexShader); glAttachShader(program, fragmentShader); glLinkProgram(program); GLint linkResult; glGetProgramiv(program, GL_LINK_STATUS, &linkResult); if (linkResult == GL_FALSE) { glGetProgramInfoLog(program, 1, &logLen, &tmpChar); max_log = logLen; linkLog = (GLchar *)malloc(max_log); glGetProgramInfoLog(program, max_log, &logLen, linkLog); compiled = false; return false; } if (!glIsProgram(program)) { cout << "Program is not valid.\n" << endl; } return compiled; }
static bool init_rc5_72_il4a_nand(u32 Device) { if(CContext[Device].coreID!=CORE_IL4NA) AMDStreamReinitializeDevice(Device); if(!CContext[Device].active) { Log("Thread %u: Device is not supported\n", Device); return false; } else{ switch(CContext[Device].attribs.target) { case CAL_TARGET_600: CContext[Device].domainSizeX=56; CContext[Device].domainSizeY=56; CContext[Device].maxIters=128; break; case CAL_TARGET_610: CContext[Device].domainSizeX=40; CContext[Device].domainSizeY=40; CContext[Device].maxIters=10; break; case CAL_TARGET_630: CContext[Device].domainSizeX=24; CContext[Device].domainSizeY=24; CContext[Device].maxIters=350; break; case CAL_TARGET_670: CContext[Device].domainSizeX=32; CContext[Device].domainSizeY=32; CContext[Device].maxIters=300; break; case CAL_TARGET_7XX: //TODO:domainSize break; case CAL_TARGET_770: CContext[Device].domainSizeX=600; CContext[Device].domainSizeY=600; CContext[Device].maxIters=5; break; case CAL_TARGET_710: //TODO:domainSize CContext[Device].domainSizeX=80; CContext[Device].domainSizeY=80; CContext[Device].maxIters=128; break; case CAL_TARGET_730: CContext[Device].domainSizeX=120; CContext[Device].domainSizeY=120; CContext[Device].maxIters=30; break; case 8: //RV870 CContext[Device].domainSizeX=728; CContext[Device].domainSizeY=728; CContext[Device].maxIters=4; break; case 9: //RV840 CContext[Device].domainSizeX=656; CContext[Device].domainSizeY=656; CContext[Device].maxIters=3; break; case 17://Barts CContext[Device].domainSizeX=904; CContext[Device].domainSizeY=904; CContext[Device].maxIters=3; default: break; } } CALresult result; result=calCtxCreate(&CContext[Device].ctx, CContext[Device].device); if(result!=CAL_RESULT_OK) { Log("Thread %u: creating context failed! Reason:%u\n",Device,result); return false; } CContext[Device].globalRes0=0; if(CContext[Device].attribs.target<20) if(CContext[Device].attribs.memExport) { calResAllocRemote2D(&CContext[Device].globalRes0, &CContext[Device].device, 1, 64, 1, CAL_FORMAT_UINT_1, CAL_RESALLOC_GLOBAL_BUFFER); } //------------------------------------------------------------------------- // Compiling Device Program //------------------------------------------------------------------------- result=compileProgram(&CContext[Device].ctx,&CContext[Device].image,&CContext[Device].module0, (CALchar *)il4ag_nand_src,CContext[Device].attribs.target,CContext[Device].globalRes0); if ( result!= CAL_RESULT_OK) { Log("Core compilation failed. Exiting.\n"); return false; } //------------------------------------------------------------------------- // Allocating and initializing resources //------------------------------------------------------------------------- // Input and output resources CContext[Device].outputRes0=0; if(CContext[Device].attribs.cachedRemoteRAM>0) calResAllocRemote2D(&CContext[Device].outputRes0, &CContext[Device].device, 1, CContext[Device].domainSizeX, CContext[Device].domainSizeY, CAL_FORMAT_UINT_1, CAL_RESALLOC_CACHEABLE); if(!CContext[Device].outputRes0) { if(calResAllocRemote2D(&CContext[Device].outputRes0, &CContext[Device].device, 1, CContext[Device].domainSizeX, CContext[Device].domainSizeY, CAL_FORMAT_UINT_1, 0)!=CAL_RESULT_OK) { Log("Failed to allocate output buffer\n"); return false; } } // Constant resource if(calResAllocRemote1D(&CContext[Device].constRes0, &CContext[Device].device, 1, 3, CAL_FORMAT_UINT_4, 0)!=CAL_RESULT_OK) { Log("Failed to allocate constants buffer\n"); return false; } // Mapping output resource to CPU and initializing values // Getting memory handle from resources result=calCtxGetMem(&CContext[Device].outputMem0, CContext[Device].ctx, CContext[Device].outputRes0); if(result==CAL_RESULT_OK) result=calCtxGetMem(&CContext[Device].constMem0, CContext[Device].ctx, CContext[Device].constRes0); if(result!=CAL_RESULT_OK) { Log("Failed to map resources!\n"); return false; } // Defining entry point for the module result=calModuleGetEntry(&CContext[Device].func0, CContext[Device].ctx, CContext[Device].module0, "main"); if(result==CAL_RESULT_OK) { result=calModuleGetName(&CContext[Device].outName0, CContext[Device].ctx, CContext[Device].module0, "o0"); if(result==CAL_RESULT_OK) result=calModuleGetName(&CContext[Device].constName0, CContext[Device].ctx, CContext[Device].module0, "cb0"); } if(result!=CAL_RESULT_OK) { Log("Failed to get entry points!\n"); return false; } if(CContext[Device].globalRes0) { result=calCtxGetMem(&CContext[Device].globalMem0, CContext[Device].ctx, CContext[Device].globalRes0); if(result==CAL_RESULT_OK) { result=calModuleGetName(&CContext[Device].globalName0, CContext[Device].ctx, CContext[Device].module0, "g[]"); if(result==CAL_RESULT_OK) result=calCtxSetMem(CContext[Device].ctx, CContext[Device].globalName0, CContext[Device].globalMem0); } if(result!=CAL_RESULT_OK) { Log("Failed to allocate global buffer!\n"); return false; } } // Setting input and output buffers // used in the kernel result=calCtxSetMem(CContext[Device].ctx, CContext[Device].outName0, CContext[Device].outputMem0); if(result==CAL_RESULT_OK) result=calCtxSetMem(CContext[Device].ctx, CContext[Device].constName0, CContext[Device].constMem0); if(result!=CAL_RESULT_OK) { Log("Failed to set buffers!\n"); return false; } CContext[Device].coreID=CORE_IL4NA; return true; }
std::unique_ptr<Renderer::ShaderProgram> OpenGLRenderer::createShader(const std::string& vert, const std::string& frag) { return std::make_unique<OpenGLShaderProgram>(compileProgram(vert.c_str(), frag.c_str())); }
void ShaderEditOverlay::handleKeyDown(SDL_KeyboardEvent& event) { if (event.keysym.sym=='s' && isModEnabled(KMOD_CTRL, event.keysym.mod)) { saveShaderSource(); } if ('1'<=event.keysym.sym && event.keysym.sym<='3' && isModEnabled(KMOD_ALT, event.keysym.mod)) { mActiveEditor->Command(SCI_SETFOCUS, false); switch(event.keysym.sym) { case '1': mActiveEditor=&mSelectionList; break; case '2': mActiveEditor=&mShaderEditor; break; case '3': mActiveEditor=&mDebugOutputView; break; } mActiveEditor->Command(SCI_SETFOCUS, true); } if (event.keysym.sym==SDLK_F7 && isModEnabled(0, event.keysym.mod)&&mSelectedShader&&mSelectedProgram) { compileProgram(); mRequireReset = true; } else if (mActiveEditor==&mSelectionList) { switch (event.keysym.sym) { case SDLK_UP: mSelectionList.Command(SCI_LINEUP); break; case SDLK_DOWN: mSelectionList.Command(SCI_LINEDOWN); break; case SDLK_RETURN: if (mSelectionMode==SELMODE_PROGRAM_LIST) { fillListWithShaders(); mSelectionMode=SELMODE_SHADER_LIST; } else { loadShaderSource(); } break; case SDLK_BACKSPACE: if (mSelectionMode==SELMODE_SHADER_LIST) { fillListWithPrograms(); mSelectionMode=SELMODE_PROGRAM_LIST; } break; } } else { int sciKey; switch(event.keysym.sym) { case SDLK_DOWN: sciKey = SCK_DOWN; break; case SDLK_UP: sciKey = SCK_UP; break; case SDLK_LEFT: sciKey = SCK_LEFT; break; case SDLK_RIGHT: sciKey = SCK_RIGHT; break; case SDLK_HOME: sciKey = SCK_HOME; break; case SDLK_END: sciKey = SCK_END; break; case SDLK_PAGEUP: sciKey = SCK_PRIOR; break; case SDLK_PAGEDOWN: sciKey = SCK_NEXT; break; case SDLK_DELETE: sciKey = SCK_DELETE; break; case SDLK_INSERT: sciKey = SCK_INSERT; break; case SDLK_ESCAPE: sciKey = SCK_ESCAPE; break; case SDLK_BACKSPACE: sciKey = SCK_BACK; break; case SDLK_TAB: sciKey = SCK_TAB; break; case SDLK_RETURN: sciKey = SCK_RETURN; break; case SDLK_KP_PLUS: sciKey = SCK_ADD; break; case SDLK_KP_MINUS: sciKey = SCK_SUBTRACT; break; case SDLK_KP_DIVIDE: sciKey = SCK_DIVIDE; break; case SDLK_LGUI: sciKey = SCK_WIN; break; case SDLK_RGUI: sciKey = SCK_RWIN; break; case SDLK_MENU: sciKey = SCK_MENU; break; case SDLK_SLASH: sciKey = '/'; break; case SDLK_ASTERISK: sciKey = '`'; break; case SDLK_LEFTBRACKET: sciKey = '['; break; case SDLK_BACKSLASH: sciKey = '\\'; break; case SDLK_RIGHTBRACKET: sciKey = ']'; break; case SDLK_LSHIFT: case SDLK_RSHIFT: case SDLK_LALT: case SDLK_RALT: case SDLK_LCTRL: case SDLK_RCTRL: sciKey = 0; break; default: sciKey = event.keysym.sym; } if (sciKey) { bool consumed; bool ctrlPressed = event.keysym.mod&KMOD_LCTRL || event.keysym.mod&KMOD_RCTRL; bool altPressed = event.keysym.mod&KMOD_LALT || event.keysym.mod&KMOD_RALT; bool shiftPressed = event.keysym.mod&KMOD_LSHIFT || event.keysym.mod&KMOD_RSHIFT; mActiveEditor->KeyDown((SDLK_a<=sciKey && sciKey<=SDLK_z)?sciKey-'a'+'A':sciKey, shiftPressed, ctrlPressed, altPressed, &consumed ); } } }
GLSLProgram::GLSLProgram(const char *vsource, const char *fsource) { mProg = compileProgram(vsource, 0, fsource); }
void SXFunctionInternal::spAllocOpenCL() { // OpenCL return flag cl_int ret; // Generate the kernel source code stringstream ss; const char* fcn_name[2] = {"sp_evaluate_fwd", "sp_evaluate_adj"}; for (int kernel=0; kernel<2; ++kernel) { bool use_fwd = kernel==0; ss << "__kernel void " << fcn_name[kernel] << "("; bool first=true; for (int i=0; i<nIn(); ++i) { if (first) first=false; else ss << ", "; ss << "__global unsigned long *x" << i; } for (int i=0; i<nOut(); ++i) { if (first) first=false; else ss << ", "; ss << "__global unsigned long *r" << i; } ss << ") { " << endl; if (use_fwd) { // Which variables have been declared vector<bool> declared(n_w_, false); // Propagate sparsity forward for (vector<AlgEl>::iterator it=algorithm_.begin(); it!=algorithm_.end(); ++it) { if (it->op==OP_OUTPUT) { ss << "if (r" << it->i0 << "!=0) r" << it->i0 << "[" << it->i2 << "]=" << "a" << it->i1; } else { // Declare result if not already declared if (!declared[it->i0]) { ss << "ulong "; declared[it->i0]=true; } // Where to store the result ss << "a" << it->i0 << "="; // What to store if (it->op==OP_CONST || it->op==OP_PARAMETER) { ss << "0"; } else if (it->op==OP_INPUT) { ss << "x" << it->i1 << "[" << it->i2 << "]"; } else { int ndep = casadi_math<double>::ndeps(it->op); for (int c=0; c<ndep; ++c) { if (c==0) { ss << "a" << it->i1; } else { ss << "|"; ss << "a" << it->i2; } } } } ss << ";" << endl; } } else { // Backward propagation // Temporary variable ss << "ulong t;" << endl; // Declare and initialize work vector for (int i=0; i<n_w_; ++i) { ss << "ulong a" << i << "=0;"<< endl; } // Propagate sparsity backward for (vector<AlgEl>::reverse_iterator it=algorithm_.rbegin(); it!=algorithm_.rend(); ++it) { if (it->op==OP_OUTPUT) { ss << "if (r" << it->i0 << "!=0) a" << it->i1 << "|=r" << it->i0 << "[" << it->i2 << "];" << endl; } else { if (it->op==OP_INPUT) { ss << "x" << it->i1 << "[" << it->i2 << "]=a" << it->i0 << "; "; ss << "a" << it->i0 << "=0;" << endl; } else if (it->op==OP_CONST || it->op==OP_PARAMETER) { ss << "a" << it->i0 << "=0;" << endl; } else { int ndep = casadi_math<double>::ndeps(it->op); ss << "t=a" << it->i0 << "; "; ss << "a" << it->i0 << "=0; "; ss << "a" << it->i1 << "|=" << "t" << "; "; if (ndep>1) { ss << "a" << it->i2 << "|=" << "t" << "; "; } ss << endl; } } } } ss << "}" << endl << endl; } // Form c-string std::string s = ss.str(); if (verbose()) { userOut() << "Kernel source code for sparsity propagation:" << endl; userOut() << " ***** " << endl; userOut() << s; userOut() << " ***** " << endl; } const char* cstr = s.c_str(); // Parse kernel source code sp_program_ = clCreateProgramWithSource(sparsity_propagation_kernel_.context, 1, static_cast<const char **>(&cstr), 0, &ret); casadi_assert(ret == CL_SUCCESS); casadi_assert(sp_program_ != 0); // Build Kernel Program compileProgram(sp_program_); // Create OpenCL kernel for forward propatation sp_fwd_kernel_ = clCreateKernel(sp_program_, fcn_name[0], &ret); casadi_assert(ret == CL_SUCCESS); // Create OpenCL kernel for backward propatation sp_adj_kernel_ = clCreateKernel(sp_program_, fcn_name[1], &ret); casadi_assert(ret == CL_SUCCESS); // Memory buffer for each of the input arrays sp_input_memobj_.resize(nIn(), static_cast<cl_mem>(0)); for (int i=0; i<sp_input_memobj_.size(); ++i) { sp_input_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, inputNoCheck(i).size() * sizeof(cl_ulong), reinterpret_cast<void*>(inputNoCheck(i).ptr()), &ret); casadi_assert(ret == CL_SUCCESS); } // Memory buffer for each of the output arrays sp_output_memobj_.resize(nOut(), static_cast<cl_mem>(0)); for (int i=0; i<sp_output_memobj_.size(); ++i) { sp_output_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, outputNoCheck(i).size() * sizeof(cl_ulong), reinterpret_cast<void*>(outputNoCheck(i).ptr()), &ret); casadi_assert(ret == CL_SUCCESS); } }
GameRenderer::GameRenderer(Logger* log, GameData* _data) : data(_data) , logger(log) , renderer(new OpenGLRenderer) , _renderAlpha(0.f) , _renderWorld(nullptr) , cullOverride(false) , map(renderer, _data) , water(this) , text(this) { logger->info("Renderer", renderer->getIDString()); worldProg = renderer->createShader( GameShaders::WorldObject::VertexShader, GameShaders::WorldObject::FragmentShader); renderer->setUniformTexture(worldProg, "texture", 0); renderer->setProgramBlockBinding(worldProg, "SceneData", 1); renderer->setProgramBlockBinding(worldProg, "ObjectData", 2); particleProg = renderer->createShader( GameShaders::WorldObject::VertexShader, GameShaders::Particle::FragmentShader); renderer->setUniformTexture(particleProg, "texture", 0); renderer->setProgramBlockBinding(particleProg, "SceneData", 1); renderer->setProgramBlockBinding(particleProg, "ObjectData", 2); skyProg = renderer->createShader( GameShaders::Sky::VertexShader, GameShaders::Sky::FragmentShader); renderer->setProgramBlockBinding(skyProg, "SceneData", 1); postProg = renderer->createShader( GameShaders::DefaultPostProcess::VertexShader, GameShaders::DefaultPostProcess::FragmentShader); glGenVertexArrays( 1, &vao ); glGenTextures(1, &m_missingTexture); glBindTexture(GL_TEXTURE_2D, m_missingTexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, kMissingTextureBytes); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glGenFramebuffers(1, &framebufferName); glBindFramebuffer(GL_FRAMEBUFFER, framebufferName); glGenTextures(2, fbTextures); glBindTexture(GL_TEXTURE_2D, fbTextures[0]); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 128, 128, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glBindTexture(GL_TEXTURE_2D, fbTextures[1]); glTexImage2D(GL_TEXTURE_2D, 0, GL_R16F, 128, 128, 0, GL_RED, GL_FLOAT, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fbTextures[0], 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, fbTextures[1], 0); // Give water renderer the data texture water.setDataTexture(1, fbTextures[1]); glGenRenderbuffers(1, fbRenderBuffers); glBindRenderbuffer(GL_RENDERBUFFER, fbRenderBuffers[0]); glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, 128, 128); glFramebufferRenderbuffer( GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, fbRenderBuffers[0] ); // Create the skydome size_t segments = skydomeSegments, rows = skydomeRows; float R = 1.f/(float)(rows-1); float S = 1.f/(float)(segments-1); std::vector<VertexP3> skydomeVerts; skydomeVerts.resize(rows * segments); for( size_t r = 0, i = 0; r < rows; ++r) { for( size_t s = 0; s < segments; ++s) { skydomeVerts[i++].position = glm::vec3( cos(2.f * M_PI * s * S) * cos(M_PI_2 * r * R), sin(2.f * M_PI * s * S) * cos(M_PI_2 * r * R), sin(M_PI_2 * r * R) ); } } skyGbuff.uploadVertices(skydomeVerts); skyDbuff.addGeometry(&skyGbuff); skyDbuff.setFaceType(GL_TRIANGLES); glGenBuffers(1, &skydomeIBO); std::vector<GLuint> skydomeIndBuff; skydomeIndBuff.resize(rows*segments*6); for( size_t r = 0, i = 0; r < (rows-1); ++r ) { for( size_t s = 0; s < (segments-1); ++s ) { skydomeIndBuff[i++] = r * segments + s; skydomeIndBuff[i++] = r * segments + (s+1); skydomeIndBuff[i++] = (r+1) * segments + (s+1); skydomeIndBuff[i++] = r * segments + s; skydomeIndBuff[i++] = (r+1) * segments + (s+1); skydomeIndBuff[i++] = (r+1) * segments + s; } } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, skydomeIBO); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint) * skydomeIndBuff.size(), skydomeIndBuff.data(), GL_STATIC_DRAW); glBindVertexArray(0); glGenBuffers(1, &debugVBO); glGenTextures(1, &debugTex); glGenVertexArrays(1, &debugVAO); particleGeom.uploadVertices<ParticleVert>( { { 0.5f, 0.5f, 1.f, 1.f, 1.f, 1.f, 1.f}, {-0.5f, 0.5f, 0.f, 1.f, 1.f, 1.f, 1.f}, { 0.5f,-0.5f, 1.f, 0.f, 1.f, 1.f, 1.f}, {-0.5f,-0.5f, 0.f, 0.f, 1.f, 1.f, 1.f} }); particleDraw.addGeometry(&particleGeom); particleDraw.setFaceType(GL_TRIANGLE_STRIP); ssRectGeom.uploadVertices(sspaceRect); ssRectDraw.addGeometry(&ssRectGeom); ssRectDraw.setFaceType(GL_TRIANGLE_STRIP); ssRectProgram = compileProgram(GameShaders::ScreenSpaceRect::VertexShader, GameShaders::ScreenSpaceRect::FragmentShader); ssRectTexture = glGetUniformLocation(ssRectProgram, "texture"); ssRectColour = glGetUniformLocation(ssRectProgram, "colour"); ssRectSize = glGetUniformLocation(ssRectProgram, "size"); ssRectOffset = glGetUniformLocation(ssRectProgram, "offset"); const static int cylsegments = 16; std::vector<Model::GeometryVertex> cylverts; for(int s = 0; s < cylsegments; ++s) { float theta = (2.f*glm::pi<float>()/cylsegments) * (s+0); float gamma = (2.f*glm::pi<float>()/cylsegments) * (s+1); glm::vec2 p0( glm::sin(theta), glm::cos(theta) ); glm::vec2 p1( glm::sin(gamma), glm::cos(gamma) ); p0 *= 0.5f; p1 *= 0.5f; cylverts.push_back({glm::vec3(p0, 2.f), glm::vec3(), glm::vec2(0.45f,0.6f), glm::u8vec4(255, 255, 255, 50)}); cylverts.push_back({glm::vec3(p0,-1.f), glm::vec3(), glm::vec2(0.45f,0.4f), glm::u8vec4(255, 255, 255, 150)}); cylverts.push_back({glm::vec3(p1, 2.f), glm::vec3(), glm::vec2(0.55f,0.6f), glm::u8vec4(255, 255, 255, 50)}); cylverts.push_back({glm::vec3(p0,-1.f), glm::vec3(), glm::vec2(0.45f,0.4f), glm::u8vec4(255, 255, 255, 150)}); cylverts.push_back({glm::vec3(p1,-1.f), glm::vec3(), glm::vec2(0.55f,0.4f), glm::u8vec4(255, 255, 255, 150)}); cylverts.push_back({glm::vec3(p1, 2.f), glm::vec3(), glm::vec2(0.55f,0.6f), glm::u8vec4(255, 255, 255, 50)}); } cylinderGeometry.uploadVertices<Model::GeometryVertex>(cylverts); cylinderBuffer.addGeometry(&cylinderGeometry); cylinderBuffer.setFaceType(GL_TRIANGLES); }
GLSLProgram::GLSLProgram(const char *vsource, const char *gsource, const char *fsource, GLenum gsInput, GLenum gsOutput, int maxVerts) { mProg = compileProgram(vsource, gsource, fsource, gsInput, gsOutput, maxVerts); }
int gpuFFTFour(CLEnv *cl, int N, float *data0){ int Ns, Is; int even_odd = 0; size_t globalws[1]; size_t localws[1]; cl_event myevent; cl_ulong task_queued, task_start, task_end; double total_time = 0; int err; if(compileProgram(cl, "gpuFFTFour.cl", &program) < 0){ printf("Runtime Error: gpuFFTFour. Failed to call compileProgram.\n"); releaseStuff(); return -1; } kernel_gpuFFTFour = clCreateKernel(program, "gpuFFTFour", &err); if(err < 0){ printf("Runtime Error: gpuFFTFour. Calling clCreateKernel failed with code %d.\n", err); releaseStuff(); return err; } buffer_data0 = clCreateBuffer(cl->context, CL_MEM_READ_WRITE, sizeof(float) * N * 2, NULL, &err); if(err < 0){ printf("Runtime Error: gpuFFTFour. Calling clCreateBuffer failed with code %d.\n", err); releaseStuff(); return err; } buffer_data1 = clCreateBuffer(cl->context, CL_MEM_READ_WRITE, sizeof(float) * N * 2, NULL, &err); if(err < 0){ printf("Runtime Error: gpuFFTFour. Calling clCreateBuffer failed with code %d.\n", err); releaseStuff(); return err; } err = clEnqueueWriteBuffer(cl->queue, buffer_data0, CL_FALSE, 0, sizeof(float) * N * 2, data0, 0, NULL, NULL); if(err < 0){ printf("Runtime Error: gpuFFTFour. Calling clEnqueueWriteBUffer failed with code %d.\n", err); releaseStuff(); return err; } for(Ns = 1, Is = 0; Ns < N; Ns *= 4, Is += 2){ err += clSetKernelArg(kernel_gpuFFTFour, even_odd, sizeof(cl_mem), &buffer_data0); err += clSetKernelArg(kernel_gpuFFTFour, 1 - even_odd, sizeof(cl_mem), &buffer_data1); err += clSetKernelArg(kernel_gpuFFTFour, 2, sizeof(int), &N); err += clSetKernelArg(kernel_gpuFFTFour, 3, sizeof(int), &Ns); even_odd = 1 - even_odd; if(err < 0){ printf("Runtime Error: gpuFFTFour. Setting args of kernel_gpuFFTFour failed with code %d.\n", err); releaseStuff(); return err; } globalws[0] = N / 4; if(N / 4 < 256) localws[0] = N / 4; else localws[0] = 256; err = clEnqueueNDRangeKernel(cl->queue, kernel_gpuFFTFour, 1, NULL, globalws, localws, 0, NULL, &myevent); if(err < 0){ printf("Runtime Error: gpuFFTFour. Enqueuing kernel_gpuFFTFour failed with code %d.\n", err); releaseStuff(); return err; } clFinish(cl->queue); clGetEventProfilingInfo(myevent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &task_start, NULL); clGetEventProfilingInfo(myevent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &task_end, NULL); total_time += (double)(task_end - task_start) / 1000.0; //us clReleaseEvent(myevent); } if(even_odd == 1) err = clEnqueueReadBuffer(cl->queue, buffer_data1, CL_FALSE, 0, sizeof(float) * N * 2, data0, 0, NULL, NULL); else err = clEnqueueReadBuffer(cl->queue, buffer_data0, CL_FALSE, 0, sizeof(float) * N * 2, data0, 0, NULL, NULL); if(err < 0){ printf("Runtime Error: gpuFFTFour. Calling clEnqueueReadBuffer failed with code %d.\n", err); releaseStuff(); return err; } clFinish(cl->queue); releaseStuff(); return (int)total_time; }
void bluesteinsFFTGpu(const char* const argv[],const unsigned n, const unsigned orign,const unsigned size) { const unsigned powM = (unsigned) log2(n); printf("Compiling Bluesteins Program..\n"); compileProgram(argv, "fft.h", "kernels/bluesteins.cl"); printf("Creating Kernel\n"); for (unsigned i = 0; i < deviceCount; ++i) { createKernel(i, "bluesteins"); } const unsigned sizePerGPU = size / deviceCount; for (unsigned i = 0; i < deviceCount; ++i) { workSize[i] = (i != (deviceCount - 1)) ? sizePerGPU : (size - workOffset[i]); allocateDeviceMemoryBS(i , workSize[i], workOffset[i]); clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void*) &d_Hreal[i]); clSetKernelArg(kernel[i], 1, sizeof(cl_mem), (void*) &d_Himag[i]); clSetKernelArg(kernel[i], 2, sizeof(cl_mem), (void*) &d_Yreal[i]); clSetKernelArg(kernel[i], 3, sizeof(cl_mem), (void*) &d_Yimag[i]); clSetKernelArg(kernel[i], 4, sizeof(cl_mem), (void*) &d_Zreal[i]); clSetKernelArg(kernel[i], 5, sizeof(cl_mem), (void*) &d_Zimag[i]); clSetKernelArg(kernel[i], 6, sizeof(unsigned), &n); clSetKernelArg(kernel[i], 7, sizeof(unsigned), &orign); clSetKernelArg(kernel[i], 8, sizeof(unsigned), &powM); clSetKernelArg(kernel[i], 9, sizeof(unsigned), &blockSize); if ((i + 1) < deviceCount) { workOffset[i + 1] = workOffset[i] + workSize[i]; } } size_t localWorkSize[] = {blockSize}; for (unsigned i = 0; i < deviceCount; ++i) { size_t globalWorkSize[] = {shrRoundUp(blockSize, workSize[i])}; // kernel non blocking execution runKernel(i, localWorkSize, globalWorkSize); } h_Rreal = h_Hreal; h_Rimag = h_Himag; for (unsigned i = 0; i < deviceCount; ++i) { copyFromDevice(i, d_Hreal[i], h_Rreal + workOffset[i], workSize[i]); copyFromDevice(i, d_Himag[i], h_Rimag + workOffset[i], workSize[i]); } // wait for copy event const cl_int ciErrNum = clWaitForEvents(deviceCount, gpuDone); checkError(ciErrNum, CL_SUCCESS, "clWaitForEvents"); printGpuTime(); }