Exemplo n.º 1
0
int getOclSoftware(oclSoftware &soft, const oclHardware &hardware)
{
    cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT;
    cl_int err = clGetDeviceInfo(hardware.mDevice, CL_DEVICE_TYPE, sizeof(deviceType), &deviceType, 0);
    if ( err != CL_SUCCESS) {
        std::cout << oclErrorCode(err) << "\n";
        return -1;
    }

    unsigned char *kernelCode = 0;
    std::cout << "Loading " << soft.mFileName << "\n";

    int size = loadFile2Memory(soft.mFileName, (char **) &kernelCode);
    if (size < 0) {
        std::cout << "Failed to load kernel\n";
        return -2;
    }

    if (deviceType == CL_DEVICE_TYPE_ACCELERATOR) {
        size_t n = size;
        soft.mProgram = clCreateProgramWithBinary(hardware.mContext, 1, &hardware.mDevice, &n,
                                                  (const unsigned char **) &kernelCode, 0, &err);
    }
    else {
        soft.mProgram = clCreateProgramWithSource(hardware.mContext, 1, (const char **)&kernelCode, 0, &err);
    }
    if (!soft.mProgram || (err != CL_SUCCESS)) {
        std::cout << oclErrorCode(err) << "\n";
        return -3;
    }

    int status = compileProgram(hardware, soft);
    delete [] kernelCode;
    return status;
}
Exemplo n.º 2
0
GLSLProgram::GLSLProgram(const char *vsource, const char *fsource)
{
    curVSName = NULL;
    curFSName = NULL;
    curGSName = NULL;
    compileProgram(vsource, 0, fsource);
}
Exemplo n.º 3
0
  void SXFunctionInternal::allocOpenCL() {
    // OpenCL return flag
    cl_int ret;

    // Generate the kernel source code
    stringstream ss;

    // Add kernel prefix
    ss << "__kernel ";

    // Generate the function
    CodeGenerator gen;
    generateFunction(ss, "evaluate", "__global const double*", "__global double*", "double", gen);

    // Form c-string
    std::string s = ss.str();
    if (verbose()) {
      userOut() << "Kernel source code for numerical evaluation:" << endl;
      userOut() << " ***** " << endl;
      userOut() << s;
      userOut() << " ***** " << endl;
    }
    const char* cstr = s.c_str();

    // Parse kernel source code
    program_ = clCreateProgramWithSource(sparsity_propagation_kernel_.context, 1,
                                         static_cast<const char **>(&cstr), 0, &ret);
    casadi_assert(ret == CL_SUCCESS);
    casadi_assert(program_ != 0);

    // Build Kernel Program
    compileProgram(program_);

    // Create OpenCL kernel for forward propatation
    kernel_ = clCreateKernel(program_, "evaluate", &ret);
    casadi_assert(ret == CL_SUCCESS);

    // Memory buffer for each of the input arrays
    input_memobj_.resize(nIn(), static_cast<cl_mem>(0));
    for (int i=0; i<input_memobj_.size(); ++i) {
      input_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context,
                                        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                        inputNoCheck(i).size() * sizeof(cl_double),
                                        static_cast<void*>(inputNoCheck(i).ptr()), &ret);
      casadi_assert(ret == CL_SUCCESS);
    }

    // Memory buffer for each of the output arrays
    output_memobj_.resize(nOut(), static_cast<cl_mem>(0));
    for (int i=0; i<output_memobj_.size(); ++i) {
      output_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context,
                                         CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
                                         outputNoCheck(i).size() * sizeof(cl_double),
                                         static_cast<void*>(outputNoCheck(i).ptr()), &ret);
      casadi_assert(ret == CL_SUCCESS);
    }


  }
Exemplo n.º 4
0
bool GLSLProgram::setSourceFromStrings(const char *vertSrc, const char *fragSrc) {
    if (m_program) {
        glDeleteProgram(m_program);
        m_program = 0;
    }
    m_program = compileProgram(vertSrc, fragSrc);
    return m_program != 0;
}
Exemplo n.º 5
0
Shader::Shader(const char* vsource, const char* fsource)
{
	m_Program = glCreateProgram();
	
	addVertexShader(vsource);
	addFragmentShader(fsource);
	bindAttributeLocations();
	compileProgram();
}
Exemplo n.º 6
0
GL3ShaderProgram::GL3ShaderProgram(FileLoader *loader, const GL3ShaderDefinition &definition) {
    _handle = compileProgram(loader, definition);

    // Now get the locations of the uniforms
    for (auto &mapping : definition.uniforms) {
        auto loc = glGetUniformLocation(_handle, mapping.name);

        _uniformLocations[static_cast<size_t>(mapping.parameter)] = loc;
    }
}
Exemplo n.º 7
0
Shader::Shader(const char* vsource, const char* fsource, const char* gsource)
{
	m_Program = glCreateProgram();

	addVertexShader(vsource);
	addFragmentShader(fsource);
	if(gsource!=NULL)
	{
		addGeometryShader(gsource);
	}
	bindAttributeLocations();
	compileProgram();
}
Exemplo n.º 8
0
GLuint GL3ShaderManager::getProgram(ShaderType type, ShaderFlags flags) {
    auto iter = _programCache.find(std::make_pair(type, flags));
    if (iter != _programCache.end()) {
        return iter->second;
    }

    auto definition = getShaderDefinition(type);

    auto prog = compileProgram(_fileLoader, definition, getHeader(flags));
    bindLocations(prog, definition);
    _programCache.insert(std::make_pair(std::make_pair(type, flags), prog));

    return prog;
}
Exemplo n.º 9
0
int compile(char *fileName) {
  if (openInputStream(fileName) == IO_ERROR)
    return IO_ERROR;

  currentToken = NULL;
  lookAhead = getValidToken();

  compileProgram();

  free(currentToken);
  free(lookAhead);
  closeInputStream();
  return IO_SUCCESS;
}
Exemplo n.º 10
0
void
GLSLProgram::loadFromFiles(const char *vFilename,  const char *gFilename, const char *fFilename,
                           GLenum gsInput, GLenum gsOutput, int maxVerts)
{
    char *vsource = readTextFile(vFilename);
    char *gsource = 0;
    if (gFilename) {
        gsource = readTextFile(gFilename);
    }
    char *fsource = readTextFile(fFilename);

    mProg = compileProgram(vsource, gsource, fsource, gsInput, gsOutput, maxVerts);

    delete [] vsource;
    if (gsource) delete [] gsource;
    delete [] fsource;
}
Exemplo n.º 11
0
JNIEXPORT void JNICALL Java_com_bullet_DemoLib_change(JNIEnv * env, jobject obj,  jint width, jint height)
{
	static bool first = true;
	// init shoot box
	//box_body.clear();

	stepFront(width,height);
	LOGI("View Port and frustrum set.");

	if(first){
		LOGI("m_screenSpaceRenderer creation start");
		m_screenSpaceRenderer = new ScreenSpaceFluidRendererGL(m_glutScreenWidth, m_glutScreenHeight);
		basicShader = compileProgram(vBasicShader, fBasicShader);
		if(m_screenSpaceRenderer)
			LOGI("m_screenSpaceRenderer is created");
		first = false;
	}
	//updateCamera();
}
Exemplo n.º 12
0
int compile(char *fileName) {
  if (openInputStream(fileName) == IO_ERROR)
    return IO_ERROR;

  currentToken = NULL;
  lookAhead = getValidToken();

  initSymTab();

  compileProgram();

  printObject(symtab->program,0);

  cleanSymTab();
  free(currentToken);
  free(lookAhead);
  closeInputStream();
  return IO_SUCCESS;

}
Exemplo n.º 13
0
GLuint
GLSLProgram::compileProgramFromFiles(const char *vFilename,  const char *gFilename, const char *fFilename,
                           GLenum gsInput, GLenum gsOutput, int maxVerts)
{
    char *vsource = readTextFile(vFilename);
    char *gsource = 0;
    if (gFilename) {
        gsource = readTextFile(gFilename);
    }
    char *fsource = readTextFile(fFilename);
    if(vsource)
    {
        GLSLProgram::setShaderNames(NULL, vFilename, gFilename, fFilename);
        mProg = compileProgram(vsource, gsource, fsource, gsInput, gsOutput, maxVerts);
        delete [] vsource;
        if (gsource) delete [] gsource;
        delete [] fsource;
        return mProg;
    }
    return 0;
}
    virtual void SetUp()
    {
        ANGLETest::SetUp();

        const std::string testVertexShaderSource = SHADER_SOURCE
        (
            attribute highp vec4 aPosition;

            void main(void)
            {
                gl_Position = aPosition;
            }
        );

        const std::string testFragmentShaderSource = SHADER_SOURCE
        (
            uniform highp vec4 color;
            void main(void)
            {
                gl_FragColor = color;
            }
        );

        mProgram = compileProgram(testVertexShaderSource, testFragmentShaderSource);
        if (mProgram == 0)
        {
            FAIL() << "shader compilation failed.";
        }

        mColorLocation = glGetUniformLocation(mProgram, "color");

        glUseProgram(mProgram);

        glClearColor(0, 0, 0, 0);
        glClearDepthf(0.0);
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

        glEnable(GL_BLEND);
        glDisable(GL_DEPTH_TEST);
    }
Exemplo n.º 15
0
 Program::Program(Shader *shaders[], int shaderCount,
                  bool vertices, bool uvs, bool normals)
                : vertices(vertices), uvs(uvs), normals(normals) {
     this->programID = compileProgram(shaders, shaderCount);
     linked = (programID > 0);
 }
Exemplo n.º 16
0
    bool Shader::compile() {
        int max_log = 2048;
        
        compiled = true;
        
        if (vertexLog != NULL) {
            free(vertexLog);
            vertexLog = NULL;
        }
        if (fragmentLog != NULL) {
            free(fragmentLog);
            fragmentLog = NULL;
        }
        if (linkLog != NULL) {
            free(linkLog);
            linkLog = NULL;
        }
        
        vertexShader = compileProgram(GL_VERTEX_SHADER, vertexSource);
        
        GLsizei logLen;
        GLint vertResult;
        GLint fragResult;
        GLchar tmpChar;
        
        glGetShaderiv(vertexShader, GL_COMPILE_STATUS, &vertResult);
        
        if (vertResult == GL_FALSE) {
            glGetShaderInfoLog(vertexShader, 1, &logLen, &tmpChar);
            max_log = logLen;
            vertexLog = (GLchar *)malloc(max_log);
            glGetShaderInfoLog(vertexShader, max_log, &logLen, vertexLog);
            compiled = false;
        }
        
        fragmentShader = compileProgram(GL_FRAGMENT_SHADER, fragmentSource);
        
        glGetShaderiv(fragmentShader, GL_COMPILE_STATUS, &fragResult);
        
        if (fragResult == GL_FALSE) {
            glGetShaderInfoLog(fragmentShader, 1, &logLen, &tmpChar);
            max_log = logLen;
            fragmentLog = (GLchar *)malloc(max_log);
            glGetShaderInfoLog(fragmentShader, max_log, &logLen, fragmentLog);
            compiled = false;
        }
        
        if (!compiled) {
            return false;
        }
        
        program = glCreateProgram();
        
        glAttachShader(program, vertexShader);
        glAttachShader(program, fragmentShader);
        glLinkProgram(program);
        
        GLint linkResult;
        glGetProgramiv(program, GL_LINK_STATUS, &linkResult);
        
        if (linkResult == GL_FALSE) {
            glGetProgramInfoLog(program, 1, &logLen, &tmpChar);
            max_log = logLen;
            linkLog = (GLchar *)malloc(max_log);
            glGetProgramInfoLog(program, max_log, &logLen, linkLog);
            compiled = false;
            return false;
        }
        
        if (!glIsProgram(program)) {
            cout << "Program is not valid.\n" << endl;
        }

        
        return compiled;
    }
Exemplo n.º 17
0
static bool init_rc5_72_il4a_nand(u32 Device)
{
  if(CContext[Device].coreID!=CORE_IL4NA)
    AMDStreamReinitializeDevice(Device);

  if(!CContext[Device].active)
  {
    Log("Thread %u: Device is not supported\n", Device);
    return false;
  } else{
    switch(CContext[Device].attribs.target) {
    case CAL_TARGET_600:
      CContext[Device].domainSizeX=56;
      CContext[Device].domainSizeY=56;
      CContext[Device].maxIters=128;
      break;
    case CAL_TARGET_610:
      CContext[Device].domainSizeX=40;
      CContext[Device].domainSizeY=40;
      CContext[Device].maxIters=10;
      break;
    case CAL_TARGET_630:
      CContext[Device].domainSizeX=24;
      CContext[Device].domainSizeY=24;
      CContext[Device].maxIters=350;
      break;
    case CAL_TARGET_670:
      CContext[Device].domainSizeX=32;
      CContext[Device].domainSizeY=32;
      CContext[Device].maxIters=300;
      break;
    case CAL_TARGET_7XX:
      //TODO:domainSize
      break;
    case CAL_TARGET_770:
      CContext[Device].domainSizeX=600;
      CContext[Device].domainSizeY=600;
      CContext[Device].maxIters=5;
      break;
    case CAL_TARGET_710:
      //TODO:domainSize
      CContext[Device].domainSizeX=80;
      CContext[Device].domainSizeY=80;
      CContext[Device].maxIters=128;
      break;
    case CAL_TARGET_730:
      CContext[Device].domainSizeX=120;
      CContext[Device].domainSizeY=120;
      CContext[Device].maxIters=30;
      break;
    case 8: //RV870
      CContext[Device].domainSizeX=728;
      CContext[Device].domainSizeY=728;
      CContext[Device].maxIters=4;
      break;
    case 9: //RV840
      CContext[Device].domainSizeX=656;
      CContext[Device].domainSizeY=656;
      CContext[Device].maxIters=3;
      break;
    case 17://Barts
      CContext[Device].domainSizeX=904;
      CContext[Device].domainSizeY=904;
      CContext[Device].maxIters=3;
    default:
      break;
    }
  }

  CALresult result;
  result=calCtxCreate(&CContext[Device].ctx, CContext[Device].device);
  if(result!=CAL_RESULT_OK)
  {
    Log("Thread %u: creating context failed! Reason:%u\n",Device,result);
    return false;
  }

  CContext[Device].globalRes0=0;
  if(CContext[Device].attribs.target<20)
    if(CContext[Device].attribs.memExport) {
      calResAllocRemote2D(&CContext[Device].globalRes0, &CContext[Device].device, 1, 64,
                        1, CAL_FORMAT_UINT_1, CAL_RESALLOC_GLOBAL_BUFFER);
    }

  //-------------------------------------------------------------------------
  // Compiling Device Program
  //-------------------------------------------------------------------------
  result=compileProgram(&CContext[Device].ctx,&CContext[Device].image,&CContext[Device].module0,
                        (CALchar *)il4ag_nand_src,CContext[Device].attribs.target,CContext[Device].globalRes0);

  if ( result!= CAL_RESULT_OK)
  {
    Log("Core compilation failed. Exiting.\n");
    return false;
  }

  //-------------------------------------------------------------------------
  // Allocating and initializing resources
  //-------------------------------------------------------------------------

  // Input and output resources
  CContext[Device].outputRes0=0;
  if(CContext[Device].attribs.cachedRemoteRAM>0)
    calResAllocRemote2D(&CContext[Device].outputRes0, &CContext[Device].device, 1, CContext[Device].domainSizeX,
                        CContext[Device].domainSizeY, CAL_FORMAT_UINT_1, CAL_RESALLOC_CACHEABLE);

  if(!CContext[Device].outputRes0) {
    if(calResAllocRemote2D(&CContext[Device].outputRes0, &CContext[Device].device, 1, CContext[Device].domainSizeX,
                           CContext[Device].domainSizeY, CAL_FORMAT_UINT_1, 0)!=CAL_RESULT_OK)
    {
      Log("Failed to allocate output buffer\n");
      return false;
    }
  }

  // Constant resource
  if(calResAllocRemote1D(&CContext[Device].constRes0, &CContext[Device].device, 1, 3, CAL_FORMAT_UINT_4, 0)!=CAL_RESULT_OK)
  {
    Log("Failed to allocate constants buffer\n");
    return false;
  }

  // Mapping output resource to CPU and initializing values
  // Getting memory handle from resources
  result=calCtxGetMem(&CContext[Device].outputMem0, CContext[Device].ctx, CContext[Device].outputRes0);
  if(result==CAL_RESULT_OK)
    result=calCtxGetMem(&CContext[Device].constMem0, CContext[Device].ctx, CContext[Device].constRes0);
  if(result!=CAL_RESULT_OK)
  {
    Log("Failed to map resources!\n");
    return false;
  }

  // Defining entry point for the module
  result=calModuleGetEntry(&CContext[Device].func0, CContext[Device].ctx, CContext[Device].module0, "main");
  if(result==CAL_RESULT_OK) {
    result=calModuleGetName(&CContext[Device].outName0, CContext[Device].ctx, CContext[Device].module0, "o0");
    if(result==CAL_RESULT_OK)
      result=calModuleGetName(&CContext[Device].constName0, CContext[Device].ctx, CContext[Device].module0, "cb0");
  }
  if(result!=CAL_RESULT_OK)
  {
    Log("Failed to get entry points!\n");
    return false;
  }

  if(CContext[Device].globalRes0) {
    result=calCtxGetMem(&CContext[Device].globalMem0, CContext[Device].ctx, CContext[Device].globalRes0);
    if(result==CAL_RESULT_OK) {
      result=calModuleGetName(&CContext[Device].globalName0, CContext[Device].ctx, CContext[Device].module0, "g[]");
      if(result==CAL_RESULT_OK)
        result=calCtxSetMem(CContext[Device].ctx, CContext[Device].globalName0, CContext[Device].globalMem0);
    }
    if(result!=CAL_RESULT_OK)
    {
      Log("Failed to allocate global buffer!\n");
      return false;
    }
  }

  // Setting input and output buffers
  // used in the kernel
  result=calCtxSetMem(CContext[Device].ctx, CContext[Device].outName0, CContext[Device].outputMem0);
  if(result==CAL_RESULT_OK)
    result=calCtxSetMem(CContext[Device].ctx, CContext[Device].constName0, CContext[Device].constMem0);
  if(result!=CAL_RESULT_OK)
  {
    Log("Failed to set buffers!\n");
    return false;
  }

  CContext[Device].coreID=CORE_IL4NA;

  return true;
}
Exemplo n.º 18
0
std::unique_ptr<Renderer::ShaderProgram> OpenGLRenderer::createShader(const std::string& vert,
                                                      const std::string& frag) {
    return std::make_unique<OpenGLShaderProgram>(compileProgram(vert.c_str(), frag.c_str()));
}
Exemplo n.º 19
0
void ShaderEditOverlay::handleKeyDown(SDL_KeyboardEvent& event)
{
    if (event.keysym.sym=='s' && isModEnabled(KMOD_CTRL, event.keysym.mod))
    {
        saveShaderSource();
    }
    if ('1'<=event.keysym.sym && event.keysym.sym<='3' && isModEnabled(KMOD_ALT, event.keysym.mod))
    {
        mActiveEditor->Command(SCI_SETFOCUS, false);
        switch(event.keysym.sym)
        {
            case '1': mActiveEditor=&mSelectionList; break;
            case '2': mActiveEditor=&mShaderEditor; break;
            case '3': mActiveEditor=&mDebugOutputView; break;
        }
        mActiveEditor->Command(SCI_SETFOCUS, true);
    }
    if (event.keysym.sym==SDLK_F7 && isModEnabled(0, event.keysym.mod)&&mSelectedShader&&mSelectedProgram)
    {
        compileProgram();
        mRequireReset = true;
    }
    else if (mActiveEditor==&mSelectionList)
    {
        switch (event.keysym.sym)
        {
            case SDLK_UP:
                mSelectionList.Command(SCI_LINEUP);
                break;
            case SDLK_DOWN:
                mSelectionList.Command(SCI_LINEDOWN);
                break;
            case SDLK_RETURN:
                if (mSelectionMode==SELMODE_PROGRAM_LIST)
                {
                    fillListWithShaders();
                    mSelectionMode=SELMODE_SHADER_LIST;
                }
                else
                {
                    loadShaderSource();
                }
                break;
            case SDLK_BACKSPACE:
                if (mSelectionMode==SELMODE_SHADER_LIST)
                {
                    fillListWithPrograms();
                    mSelectionMode=SELMODE_PROGRAM_LIST;
                }
                break;
        }

    }
    else
    {
        int sciKey;
        switch(event.keysym.sym)
        {
            case SDLK_DOWN:             sciKey = SCK_DOWN;          break;
            case SDLK_UP:               sciKey = SCK_UP;            break;
            case SDLK_LEFT:             sciKey = SCK_LEFT;          break;
            case SDLK_RIGHT:            sciKey = SCK_RIGHT;         break;
            case SDLK_HOME:             sciKey = SCK_HOME;          break;
            case SDLK_END:              sciKey = SCK_END;           break;
            case SDLK_PAGEUP:           sciKey = SCK_PRIOR;         break;
            case SDLK_PAGEDOWN:         sciKey = SCK_NEXT;	        break;
            case SDLK_DELETE:           sciKey = SCK_DELETE;        break;
            case SDLK_INSERT:           sciKey = SCK_INSERT;        break;
            case SDLK_ESCAPE:           sciKey = SCK_ESCAPE;        break;
            case SDLK_BACKSPACE:        sciKey = SCK_BACK;          break;
            case SDLK_TAB:              sciKey = SCK_TAB;           break;
            case SDLK_RETURN:           sciKey = SCK_RETURN;        break;
            case SDLK_KP_PLUS:          sciKey = SCK_ADD;           break;
            case SDLK_KP_MINUS:         sciKey = SCK_SUBTRACT;      break;
            case SDLK_KP_DIVIDE:        sciKey = SCK_DIVIDE;        break;
            case SDLK_LGUI:             sciKey = SCK_WIN;           break;
            case SDLK_RGUI:             sciKey = SCK_RWIN;          break;
            case SDLK_MENU:             sciKey = SCK_MENU;          break;
            case SDLK_SLASH:            sciKey = '/';               break;
            case SDLK_ASTERISK:         sciKey = '`';               break;
            case SDLK_LEFTBRACKET:      sciKey = '[';               break;
            case SDLK_BACKSLASH:        sciKey = '\\';              break;
            case SDLK_RIGHTBRACKET:     sciKey = ']';               break;
            case SDLK_LSHIFT:
            case SDLK_RSHIFT:
            case SDLK_LALT:
            case SDLK_RALT:
            case SDLK_LCTRL:
            case SDLK_RCTRL:
                sciKey = 0;
                break;
            default:
                sciKey = event.keysym.sym;
        }

        if (sciKey)
        {
            bool consumed;
            bool ctrlPressed  = event.keysym.mod&KMOD_LCTRL  || event.keysym.mod&KMOD_RCTRL;
            bool altPressed   = event.keysym.mod&KMOD_LALT   || event.keysym.mod&KMOD_RALT;
            bool shiftPressed = event.keysym.mod&KMOD_LSHIFT || event.keysym.mod&KMOD_RSHIFT;
            mActiveEditor->KeyDown((SDLK_a<=sciKey && sciKey<=SDLK_z)?sciKey-'a'+'A':sciKey,
                shiftPressed, ctrlPressed, altPressed,
                &consumed
                );
        }
    }
}
Exemplo n.º 20
0
GLSLProgram::GLSLProgram(const char *vsource, const char *fsource)
{
	mProg = compileProgram(vsource, 0, fsource);
}
Exemplo n.º 21
0
  void SXFunctionInternal::spAllocOpenCL() {
    // OpenCL return flag
    cl_int ret;

    // Generate the kernel source code
    stringstream ss;

    const char* fcn_name[2] = {"sp_evaluate_fwd", "sp_evaluate_adj"};
    for (int kernel=0; kernel<2; ++kernel) {
      bool use_fwd = kernel==0;
      ss << "__kernel void " << fcn_name[kernel] << "(";
      bool first=true;
      for (int i=0; i<nIn(); ++i) {
        if (first) first=false;
        else      ss << ", ";
        ss << "__global unsigned long *x" << i;
      }
      for (int i=0; i<nOut(); ++i) {
        if (first) first=false;
        else      ss << ", ";
        ss << "__global unsigned long *r" << i;
      }
      ss << ") { " << endl;

      if (use_fwd) {
        // Which variables have been declared
        vector<bool> declared(n_w_, false);

        // Propagate sparsity forward
        for (vector<AlgEl>::iterator it=algorithm_.begin(); it!=algorithm_.end(); ++it) {
          if (it->op==OP_OUTPUT) {
            ss << "if (r" << it->i0 << "!=0) r" << it->i0 << "[" << it->i2 << "]=" << "a" << it->i1;
          } else {
            // Declare result if not already declared
            if (!declared[it->i0]) {
              ss << "ulong ";
              declared[it->i0]=true;
            }

            // Where to store the result
            ss << "a" << it->i0 << "=";

            // What to store
            if (it->op==OP_CONST || it->op==OP_PARAMETER) {
              ss << "0";
            } else if (it->op==OP_INPUT) {
              ss << "x" << it->i1 << "[" << it->i2 << "]";
            } else {
              int ndep = casadi_math<double>::ndeps(it->op);
              for (int c=0; c<ndep; ++c) {
                if (c==0) {
                  ss << "a" << it->i1;
                } else {
                  ss << "|";
                  ss << "a" << it->i2;
                }
              }
            }
          }
          ss  << ";" << endl;
        }

      } else { // Backward propagation
        // Temporary variable
        ss << "ulong t;" << endl;

        // Declare and initialize work vector
        for (int i=0; i<n_w_; ++i) {
          ss << "ulong a" << i << "=0;"<< endl;
        }

        // Propagate sparsity backward
        for (vector<AlgEl>::reverse_iterator it=algorithm_.rbegin(); it!=algorithm_.rend(); ++it) {
          if (it->op==OP_OUTPUT) {
            ss << "if (r" << it->i0 << "!=0) a" << it->i1
               << "|=r" << it->i0 << "[" << it->i2 << "];" << endl;
          } else {
            if (it->op==OP_INPUT) {
              ss << "x" << it->i1 << "[" << it->i2 << "]=a" << it->i0 << "; ";
              ss << "a" << it->i0 << "=0;" << endl;
            } else if (it->op==OP_CONST || it->op==OP_PARAMETER) {
              ss << "a" << it->i0 << "=0;" << endl;
            } else {
              int ndep = casadi_math<double>::ndeps(it->op);
              ss << "t=a" << it->i0 << "; ";
              ss << "a" << it->i0 << "=0; ";
              ss << "a" << it->i1 << "|=" << "t" << "; ";
              if (ndep>1) {
                ss << "a" << it->i2 << "|=" << "t" << "; ";
              }
              ss << endl;
            }
          }
        }
      }
      ss << "}" << endl << endl;
    }

    // Form c-string
    std::string s = ss.str();
    if (verbose()) {
      userOut() << "Kernel source code for sparsity propagation:" << endl;
      userOut() << " ***** " << endl;
      userOut() << s;
      userOut() << " ***** " << endl;
    }
    const char* cstr = s.c_str();

    // Parse kernel source code
    sp_program_ = clCreateProgramWithSource(sparsity_propagation_kernel_.context,
                                            1, static_cast<const char **>(&cstr), 0, &ret);
    casadi_assert(ret == CL_SUCCESS);
    casadi_assert(sp_program_ != 0);

    // Build Kernel Program
    compileProgram(sp_program_);

    // Create OpenCL kernel for forward propatation
    sp_fwd_kernel_ = clCreateKernel(sp_program_, fcn_name[0], &ret);
    casadi_assert(ret == CL_SUCCESS);

    // Create OpenCL kernel for backward propatation
    sp_adj_kernel_ = clCreateKernel(sp_program_, fcn_name[1], &ret);
    casadi_assert(ret == CL_SUCCESS);

    // Memory buffer for each of the input arrays
    sp_input_memobj_.resize(nIn(), static_cast<cl_mem>(0));
    for (int i=0; i<sp_input_memobj_.size(); ++i) {
      sp_input_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context,
                                           CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
                                           inputNoCheck(i).size() * sizeof(cl_ulong),
                                           reinterpret_cast<void*>(inputNoCheck(i).ptr()), &ret);
      casadi_assert(ret == CL_SUCCESS);
    }

    // Memory buffer for each of the output arrays
    sp_output_memobj_.resize(nOut(), static_cast<cl_mem>(0));
    for (int i=0; i<sp_output_memobj_.size(); ++i) {
      sp_output_memobj_[i] = clCreateBuffer(sparsity_propagation_kernel_.context,
                                            CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
                                            outputNoCheck(i).size() * sizeof(cl_ulong),
                                            reinterpret_cast<void*>(outputNoCheck(i).ptr()), &ret);
      casadi_assert(ret == CL_SUCCESS);
    }
  }
Exemplo n.º 22
0
GameRenderer::GameRenderer(Logger* log, GameData* _data)
	: data(_data)
	, logger(log)
	, renderer(new OpenGLRenderer)
	, _renderAlpha(0.f)
	, _renderWorld(nullptr)
	, cullOverride(false)
	, map(renderer, _data)
	, water(this)
	, text(this)
{
	logger->info("Renderer", renderer->getIDString());

	worldProg = renderer->createShader(
				GameShaders::WorldObject::VertexShader,
				GameShaders::WorldObject::FragmentShader);

	renderer->setUniformTexture(worldProg, "texture", 0);
	renderer->setProgramBlockBinding(worldProg, "SceneData", 1);
	renderer->setProgramBlockBinding(worldProg, "ObjectData", 2);
	
	particleProg = renderer->createShader(
		GameShaders::WorldObject::VertexShader,
		GameShaders::Particle::FragmentShader);
	
	renderer->setUniformTexture(particleProg, "texture", 0);
	renderer->setProgramBlockBinding(particleProg, "SceneData", 1);
	renderer->setProgramBlockBinding(particleProg, "ObjectData", 2);

	skyProg = renderer->createShader(
		GameShaders::Sky::VertexShader,
		GameShaders::Sky::FragmentShader);

	renderer->setProgramBlockBinding(skyProg, "SceneData", 1);

	postProg = renderer->createShader(
		GameShaders::DefaultPostProcess::VertexShader,
		GameShaders::DefaultPostProcess::FragmentShader);

	glGenVertexArrays( 1, &vao );

	glGenTextures(1, &m_missingTexture);
	glBindTexture(GL_TEXTURE_2D, m_missingTexture);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, kMissingTextureBytes);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);

	glGenFramebuffers(1, &framebufferName);
	glBindFramebuffer(GL_FRAMEBUFFER, framebufferName);
	glGenTextures(2, fbTextures);
	
	glBindTexture(GL_TEXTURE_2D, fbTextures[0]);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 128, 128, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	
	glBindTexture(GL_TEXTURE_2D, fbTextures[1]);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_R16F, 128, 128, 0, GL_RED, GL_FLOAT, NULL);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);

	glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fbTextures[0], 0);
	glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, fbTextures[1], 0);
	
	// Give water renderer the data texture
	water.setDataTexture(1, fbTextures[1]);
	
	glGenRenderbuffers(1, fbRenderBuffers);
	glBindRenderbuffer(GL_RENDERBUFFER, fbRenderBuffers[0]);
	glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, 128, 128);
	glFramebufferRenderbuffer(
		GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, fbRenderBuffers[0]
	);
	
	// Create the skydome

	size_t segments = skydomeSegments, rows = skydomeRows;

    float R = 1.f/(float)(rows-1);
    float S = 1.f/(float)(segments-1);
	std::vector<VertexP3> skydomeVerts;
	skydomeVerts.resize(rows * segments);
    for( size_t r = 0, i = 0; r < rows; ++r) {
        for( size_t s = 0; s < segments; ++s) {
			skydomeVerts[i++].position = glm::vec3(
                        cos(2.f * M_PI * s * S) * cos(M_PI_2 * r * R),
                        sin(2.f * M_PI * s * S) * cos(M_PI_2 * r * R),
                        sin(M_PI_2 * r * R)
                        );
		}
	}
	skyGbuff.uploadVertices(skydomeVerts);
	skyDbuff.addGeometry(&skyGbuff);
	skyDbuff.setFaceType(GL_TRIANGLES);

    glGenBuffers(1, &skydomeIBO);
	std::vector<GLuint> skydomeIndBuff;
	skydomeIndBuff.resize(rows*segments*6);
    for( size_t r = 0, i = 0; r < (rows-1); ++r ) {
        for( size_t s = 0; s < (segments-1); ++s ) {
            skydomeIndBuff[i++] = r * segments + s;
            skydomeIndBuff[i++] = r * segments + (s+1);
            skydomeIndBuff[i++] = (r+1) * segments + (s+1);
            skydomeIndBuff[i++] = r * segments + s;
            skydomeIndBuff[i++] = (r+1) * segments + (s+1);
            skydomeIndBuff[i++] = (r+1) * segments + s;
        }
    }
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, skydomeIBO);
	glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint) * skydomeIndBuff.size(), skydomeIndBuff.data(), GL_STATIC_DRAW);

	glBindVertexArray(0);

    glGenBuffers(1, &debugVBO);
    glGenTextures(1, &debugTex);
    glGenVertexArrays(1, &debugVAO);

	particleGeom.uploadVertices<ParticleVert>(
	{
					{ 0.5f, 0.5f, 1.f, 1.f, 1.f, 1.f, 1.f},
					{-0.5f, 0.5f, 0.f, 1.f, 1.f, 1.f, 1.f},
					{ 0.5f,-0.5f, 1.f, 0.f, 1.f, 1.f, 1.f},
					{-0.5f,-0.5f, 0.f, 0.f, 1.f, 1.f, 1.f}
	});
	particleDraw.addGeometry(&particleGeom);
	particleDraw.setFaceType(GL_TRIANGLE_STRIP);

	ssRectGeom.uploadVertices(sspaceRect);
	ssRectDraw.addGeometry(&ssRectGeom);
	ssRectDraw.setFaceType(GL_TRIANGLE_STRIP);

	ssRectProgram = compileProgram(GameShaders::ScreenSpaceRect::VertexShader,
								  GameShaders::ScreenSpaceRect::FragmentShader);

	ssRectTexture = glGetUniformLocation(ssRectProgram, "texture");
	ssRectColour = glGetUniformLocation(ssRectProgram, "colour");
	ssRectSize = glGetUniformLocation(ssRectProgram, "size");
	ssRectOffset = glGetUniformLocation(ssRectProgram, "offset");
	
	const static int cylsegments = 16;
	std::vector<Model::GeometryVertex> cylverts;
	for(int s = 0; s < cylsegments; ++s)
	{
		float theta = (2.f*glm::pi<float>()/cylsegments) * (s+0);
		float gamma = (2.f*glm::pi<float>()/cylsegments) * (s+1);
		glm::vec2 p0( glm::sin(theta), glm::cos(theta) );
		glm::vec2 p1( glm::sin(gamma), glm::cos(gamma) );
		
		p0 *= 0.5f;
		p1 *= 0.5f;
		
		cylverts.push_back({glm::vec3(p0, 2.f), glm::vec3(), glm::vec2(0.45f,0.6f), glm::u8vec4(255, 255, 255, 50)});
		cylverts.push_back({glm::vec3(p0,-1.f), glm::vec3(), glm::vec2(0.45f,0.4f), glm::u8vec4(255, 255, 255, 150)});
		cylverts.push_back({glm::vec3(p1, 2.f), glm::vec3(), glm::vec2(0.55f,0.6f), glm::u8vec4(255, 255, 255, 50)});
		
		cylverts.push_back({glm::vec3(p0,-1.f), glm::vec3(), glm::vec2(0.45f,0.4f), glm::u8vec4(255, 255, 255, 150)});
		cylverts.push_back({glm::vec3(p1,-1.f), glm::vec3(), glm::vec2(0.55f,0.4f), glm::u8vec4(255, 255, 255, 150)});
		cylverts.push_back({glm::vec3(p1, 2.f), glm::vec3(), glm::vec2(0.55f,0.6f), glm::u8vec4(255, 255, 255, 50)});
	}
	cylinderGeometry.uploadVertices<Model::GeometryVertex>(cylverts);
	cylinderBuffer.addGeometry(&cylinderGeometry);
	cylinderBuffer.setFaceType(GL_TRIANGLES);
}
Exemplo n.º 23
0
GLSLProgram::GLSLProgram(const char *vsource, const char *gsource, const char *fsource,
                         GLenum gsInput, GLenum gsOutput, int maxVerts)
{
    mProg = compileProgram(vsource, gsource, fsource, gsInput, gsOutput, maxVerts);
}
Exemplo n.º 24
0
int gpuFFTFour(CLEnv *cl, int N, float *data0){
	int Ns, Is;
	int even_odd = 0;
	size_t globalws[1];
	size_t localws[1];
	cl_event myevent;
	cl_ulong task_queued, task_start, task_end;
	double total_time = 0;
	int err;

	if(compileProgram(cl, "gpuFFTFour.cl", &program) < 0){
		printf("Runtime Error: gpuFFTFour. Failed to call compileProgram.\n");
		releaseStuff();
		return -1;
	}

	kernel_gpuFFTFour = clCreateKernel(program, "gpuFFTFour", &err);
	if(err < 0){
		printf("Runtime Error: gpuFFTFour. Calling clCreateKernel failed with code %d.\n", err);
		releaseStuff();
		return err;
	}

	buffer_data0 = clCreateBuffer(cl->context, CL_MEM_READ_WRITE, sizeof(float) * N * 2, NULL, &err);
	if(err < 0){
		printf("Runtime Error: gpuFFTFour. Calling clCreateBuffer failed with code %d.\n", err);
		releaseStuff();
		return err;
	}
	buffer_data1 = clCreateBuffer(cl->context, CL_MEM_READ_WRITE, sizeof(float) * N * 2, NULL, &err);
	if(err < 0){
		printf("Runtime Error: gpuFFTFour. Calling clCreateBuffer failed with code %d.\n", err);
		releaseStuff();
		return err;
	}

	err = clEnqueueWriteBuffer(cl->queue, buffer_data0, CL_FALSE, 0, sizeof(float) * N * 2, data0, 0, NULL, NULL);
	if(err < 0){
		printf("Runtime Error: gpuFFTFour. Calling clEnqueueWriteBUffer failed with code %d.\n", err);
		releaseStuff();
		return err;
	}

	for(Ns = 1, Is = 0; Ns < N; Ns *= 4, Is += 2){
		err += clSetKernelArg(kernel_gpuFFTFour, even_odd, sizeof(cl_mem), &buffer_data0);
		err += clSetKernelArg(kernel_gpuFFTFour, 1 - even_odd, sizeof(cl_mem), &buffer_data1);
		err += clSetKernelArg(kernel_gpuFFTFour, 2, sizeof(int), &N);
		err += clSetKernelArg(kernel_gpuFFTFour, 3, sizeof(int), &Ns);
		even_odd = 1 - even_odd;
		if(err < 0){
			printf("Runtime Error: gpuFFTFour. Setting args of kernel_gpuFFTFour failed with code %d.\n", err);
			releaseStuff();
			return err;
		}

		globalws[0] = N / 4;
		if(N / 4 < 256)
			localws[0] = N / 4;
		else
			localws[0] = 256;
		err = clEnqueueNDRangeKernel(cl->queue, kernel_gpuFFTFour, 1, NULL, globalws, localws, 0, NULL, &myevent);
		if(err < 0){
			printf("Runtime Error: gpuFFTFour. Enqueuing kernel_gpuFFTFour failed with code %d.\n", err);
			releaseStuff();
			return err;
		}
		clFinish(cl->queue);
		clGetEventProfilingInfo(myevent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &task_start, NULL);
		clGetEventProfilingInfo(myevent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &task_end, NULL);
		total_time += (double)(task_end - task_start) / 1000.0;	//us
		clReleaseEvent(myevent);
	}

	if(even_odd == 1)
		err = clEnqueueReadBuffer(cl->queue, buffer_data1, CL_FALSE, 0, sizeof(float) * N * 2, data0, 0, NULL, NULL);
	else
		err = clEnqueueReadBuffer(cl->queue, buffer_data0, CL_FALSE, 0, sizeof(float) * N * 2, data0, 0, NULL, NULL);
	if(err < 0){
		printf("Runtime Error: gpuFFTFour. Calling clEnqueueReadBuffer failed with code %d.\n", err);
		releaseStuff();
		return err;
	}

	clFinish(cl->queue);
	releaseStuff();
	return (int)total_time;
}
Exemplo n.º 25
0
void
bluesteinsFFTGpu(const char* const argv[],const unsigned n, 
		 const unsigned orign,const unsigned size)
{
  const unsigned powM = (unsigned) log2(n);
  printf("Compiling Bluesteins Program..\n");

  compileProgram(argv, "fft.h", "kernels/bluesteins.cl");

    printf("Creating Kernel\n");
    for (unsigned i = 0; i < deviceCount; ++i) {
        createKernel(i, "bluesteins");
    }

    const unsigned sizePerGPU = size / deviceCount;
    for (unsigned i = 0; i < deviceCount; ++i) {
        workSize[i] = (i != (deviceCount - 1)) ? sizePerGPU 
                                               : (size - workOffset[i]);       
        
        allocateDeviceMemoryBS(i , workSize[i], workOffset[i]);
        
        clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void*) &d_Hreal[i]);
        clSetKernelArg(kernel[i], 1, sizeof(cl_mem), (void*) &d_Himag[i]);
	clSetKernelArg(kernel[i], 2, sizeof(cl_mem), (void*) &d_Yreal[i]);
        clSetKernelArg(kernel[i], 3, sizeof(cl_mem), (void*) &d_Yimag[i]);
	clSetKernelArg(kernel[i], 4, sizeof(cl_mem), (void*) &d_Zreal[i]);
        clSetKernelArg(kernel[i], 5, sizeof(cl_mem), (void*) &d_Zimag[i]);
	clSetKernelArg(kernel[i], 6, sizeof(unsigned), &n);
	clSetKernelArg(kernel[i], 7, sizeof(unsigned), &orign);
	clSetKernelArg(kernel[i], 8, sizeof(unsigned), &powM);
	clSetKernelArg(kernel[i], 9, sizeof(unsigned), &blockSize);
       

        if ((i + 1) < deviceCount) {
            workOffset[i + 1] = workOffset[i] + workSize[i];
        } 

    }

    size_t localWorkSize[] = {blockSize};
    for (unsigned i = 0; i < deviceCount; ++i) {
        size_t globalWorkSize[] = {shrRoundUp(blockSize, workSize[i])}; 
        // kernel non blocking execution 
        runKernel(i, localWorkSize, globalWorkSize);
    }

    h_Rreal = h_Hreal;
    h_Rimag = h_Himag;
    
    for (unsigned i = 0; i < deviceCount; ++i) {
        copyFromDevice(i, d_Hreal[i], h_Rreal + workOffset[i],
                                                workSize[i]); 
        copyFromDevice(i, d_Himag[i], h_Rimag + workOffset[i],
                                                 workSize[i]);
    }

    // wait for copy event
    const cl_int ciErrNum = clWaitForEvents(deviceCount, gpuDone);
    checkError(ciErrNum, CL_SUCCESS, "clWaitForEvents");
    printGpuTime();
}