void RGBDOdometry::initICP(GPUTexture * filteredDepth, const float depthCutoff) { cudaArray * textPtr; cudaGraphicsMapResources(1, &filteredDepth->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, filteredDepth->cudaRes, 0, 0); cudaMemcpy2DFromArray(depth_tmp[0].ptr(0), depth_tmp[0].step(), textPtr, 0, 0, depth_tmp[0].colsBytes(), depth_tmp[0].rows(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &filteredDepth->cudaRes); for(int i = 1; i < NUM_PYRS; ++i) { pyrDown(depth_tmp[i - 1], depth_tmp[i]); } for(int i = 0; i < NUM_PYRS; ++i) { createVMap(intr(i), depth_tmp[i], vmaps_curr_[i], depthCutoff); createNMap(vmaps_curr_[i], nmaps_curr_[i]); } cudaDeviceSynchronize(); }
bool WorkingBuffers::copyToGL(struct cudaGraphicsResource* destination, TsOutputType outputType, int level, int styleIndex, float vizGain, bool vizNormalize, int vizMode) { if (!_is_initialized) return false; cudaArray* d_result; cudaError_t error = cudaGraphicsMapResources(1, &destination, 0); if (error != cudaSuccess) {std::cerr << "ERROR! GraphicsMapResources" << std::endl; } assert(error == cudaSuccess); error = cudaGraphicsSubResourceGetMappedArray(&d_result, destination, 0, 0); if (error != cudaSuccess) {std::cerr << "ERROR! GraphicsSubResourceGetMappedArray" << std::endl; } assert(error == cudaSuccess); if (_history_position > 0) level = _history_params[_history_position].level; copyOutputArray(outputType, level, d_result, styleIndex, vizGain, vizNormalize, vizMode); error = cudaGraphicsUnmapResources(1, &destination, 0); if (error != cudaSuccess) {std::cerr << "ERROR! GraphicsUnmapResources" << std::endl; } assert(error == cudaSuccess); return true; }
//////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void runCuda() { cudaStream_t stream = 0; const int nbResources = 2; cudaGraphicsResource *ppResources[nbResources] = { g_histogram.cudaResource, g_color.cudaResource, }; // Map resources for Cuda checkCudaErrors(cudaGraphicsMapResources(nbResources, ppResources, stream)); getLastCudaError("cudaGraphicsMapResources(2) failed"); // Get pointers checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&g_histogram.cudaBuffer, &g_histogram.size, g_histogram.cudaResource)); getLastCudaError("cudaGraphicsResourceGetMappedPointer (g_color.pBuffer) failed"); cudaGraphicsSubResourceGetMappedArray(&g_color.pCudaArray, g_color.cudaResource, 0, 0); getLastCudaError("cudaGraphicsSubResourceGetMappedArray (g_color.pBuffer) failed"); // Execute kernel createHistogramTex(g_histogram.cudaBuffer, g_WindowWidth, g_WindowHeight, g_color.pCudaArray); checkCudaError(); // // unmap the resources // checkCudaErrors(cudaGraphicsUnmapResources(nbResources, ppResources, stream)); getLastCudaError("cudaGraphicsUnmapResources(2) failed"); }
/** Object-specific part of map action. */ void mapInternal() { CUDA_CHECK(cudaGraphicsSubResourceGetMappedArray(&this->array, resource, 0, 0)); if(this->array == 0) CUDA_ERROR("map image object failed"); }
void RGBDOdometry::populateRGBDData(GPUTexture * rgb, DeviceArray2D<float> * destDepths, DeviceArray2D<unsigned char> * destImages) { verticesToDepth(vmaps_tmp, destDepths[0], maxDepthRGB); for(int i = 0; i + 1 < NUM_PYRS; i++) { pyrDownGaussF(destDepths[i], destDepths[i + 1]); } cudaArray * textPtr; cudaGraphicsMapResources(1, &rgb->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, rgb->cudaRes, 0, 0); imageBGRToIntensity(textPtr, destImages[0]); cudaGraphicsUnmapResources(1, &rgb->cudaRes); for(int i = 0; i + 1 < NUM_PYRS; i++) { pyrDownUcharGauss(destImages[i], destImages[i + 1]); } cudaDeviceSynchronize(); }
CudaGraphicsResourceMappedArray(CudaGraphicsResource* resource) : CudaGraphicsResourceMapped(resource) { memset(&m_resDesc, 0, sizeof(m_resDesc)); m_resDesc.resType = cudaResourceTypeArray; CUDA_CALL(cudaGraphicsSubResourceGetMappedArray(&m_resDesc.res.array.array, m_resource->getResource(), 0, 0)); }
void CudaTexture::copyFrom(void * src, unsigned size) { std::cout<<" cu tex cpy "<<size<<"\n"; cudaArray * arr; cutilSafeCall(cudaGraphicsMapResources(1, &_cuda_tex_resource, 0)); cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&arr, _cuda_tex_resource, 0, 0)); cutilSafeCall(cudaMemcpyToArray(arr, 0, 0, src, size, cudaMemcpyDeviceToDevice)); cudaGraphicsUnmapResources(1, &_cuda_tex_resource, 0); }
void CUDARenderer::draw(const glm::mat4 &View, const glm::mat4 &Projection) { setCameraInfo(camera.cam); gpuErrchk(cudaGraphicsMapResources(1, &cudaSurfRes)); { cudaArray *viewCudaArray; gpuErrchk(cudaGraphicsSubResourceGetMappedArray(&viewCudaArray, cudaSurfRes, 0, 0)); renderToTexture(dimBlock, dimGrid, d_scene, viewCudaArray); } gpuErrchk(cudaGraphicsUnmapResources(1, &cudaSurfRes)); glDrawArrays(GL_TRIANGLES, 0, 6); }
void VolRenRaycastCuda::raycast(const QMatrix4x4&, const QMatrix4x4& matView, const QMatrix4x4&) { cc(cudaGraphicsMapResources(1, &entryRes, 0)); cc(cudaGraphicsMapResources(1, &exitRes, 0)); cc(cudaGraphicsMapResources(1, &outRes, 0)); cc(cudaGraphicsMapResources(1, &volRes, 0)); cc(cudaGraphicsMapResources(1, &tfFullRes, 0)); cc(cudaGraphicsMapResources(1, &tfBackRes, 0)); cudaArray *entryArr, *exitArr, *volArr, *tfFullArr, *tfBackArr; float *outPtr; size_t nBytes; cc(cudaGraphicsSubResourceGetMappedArray(&entryArr, entryRes, 0, 0)); cc(cudaGraphicsSubResourceGetMappedArray(&exitArr, exitRes, 0, 0)); cc(cudaGraphicsResourceGetMappedPointer((void**)&outPtr, &nBytes, outRes)); cc(cudaGraphicsSubResourceGetMappedArray(&volArr, volRes, 0, 0)); cc(cudaGraphicsSubResourceGetMappedArray(&tfFullArr, tfFullRes, 0, 0)); cc(cudaGraphicsSubResourceGetMappedArray(&tfBackArr, tfBackRes, 0, 0)); static std::map<Filter, cudaTextureFilterMode> vr2cu = { { Filter_Linear, cudaFilterModeLinear } , { Filter_Nearest, cudaFilterModePoint } }; assert(vr2cu.count(tfFilter) > 0); updateCUDALights(matView); cudacast(vol()->w(), vol()->h(), vol()->d(), volArr, tfInteg->getTexFull()->width(), tfInteg->getTexFull()->height(), stepsize, vr2cu[tfFilter], tfFullArr, tfBackArr, scalarMin, scalarMax, frustum.getTextureWidth(), frustum.getTextureHeight(), entryArr, exitArr, outPtr); cc(cudaGraphicsUnmapResources(1, &tfBackRes, 0)); cc(cudaGraphicsUnmapResources(1, &tfFullRes, 0)); cc(cudaGraphicsUnmapResources(1, &volRes, 0)); cc(cudaGraphicsUnmapResources(1, &entryRes, 0)); cc(cudaGraphicsUnmapResources(1, &exitRes, 0)); cc(cudaGraphicsUnmapResources(1, &outRes, 0)); }
void RGBDOdometry::initICPModel(GPUTexture * predictedVertices, GPUTexture * predictedNormals, const float depthCutoff, const Eigen::Matrix4f & modelPose) { cudaArray * textPtr; cudaGraphicsMapResources(1, &predictedVertices->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, predictedVertices->cudaRes, 0, 0); cudaMemcpyFromArray(vmaps_tmp.ptr(), textPtr, 0, 0, vmaps_tmp.sizeBytes(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &predictedVertices->cudaRes); cudaGraphicsMapResources(1, &predictedNormals->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, predictedNormals->cudaRes, 0, 0); cudaMemcpyFromArray(nmaps_tmp.ptr(), textPtr, 0, 0, nmaps_tmp.sizeBytes(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &predictedNormals->cudaRes); copyMaps(vmaps_tmp, nmaps_tmp, vmaps_g_prev_[0], nmaps_g_prev_[0]); for(int i = 1; i < NUM_PYRS; ++i) { resizeVMap(vmaps_g_prev_[i - 1], vmaps_g_prev_[i]); resizeNMap(nmaps_g_prev_[i - 1], nmaps_g_prev_[i]); } Eigen::Matrix<float, 3, 3, Eigen::RowMajor> Rcam = modelPose.topLeftCorner(3, 3); Eigen::Vector3f tcam = modelPose.topRightCorner(3, 1); mat33 device_Rcam = Rcam; float3 device_tcam = *reinterpret_cast<float3*>(tcam.data()); for(int i = 0; i < NUM_PYRS; ++i) { tranformMaps(vmaps_g_prev_[i], nmaps_g_prev_[i], device_Rcam, device_tcam, vmaps_g_prev_[i], nmaps_g_prev_[i]); } cudaDeviceSynchronize(); }
void generateCUDAImage() { unsigned int* out_data = cuda_dest_resource; dim3 block(16, 16, 1); dim3 grid(image_width / block.x, image_height / block.y, 1); launch_cudaProcess(grid, block, 0, out_data, image_width); cudaArray *texture_ptr; cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0); cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0); int num_texels = image_width * image_height; int num_values = num_texels * 4; int size_tex_data = sizeof(GLubyte) * num_values; cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice); }
void RGBDOdometry::initICP(GPUTexture * predictedVertices, GPUTexture * predictedNormals, const float depthCutoff) { cudaArray * textPtr; cudaGraphicsMapResources(1, &predictedVertices->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, predictedVertices->cudaRes, 0, 0); cudaMemcpyFromArray(vmaps_tmp.ptr(), textPtr, 0, 0, vmaps_tmp.sizeBytes(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &predictedVertices->cudaRes); cudaGraphicsMapResources(1, &predictedNormals->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, predictedNormals->cudaRes, 0, 0); cudaMemcpyFromArray(nmaps_tmp.ptr(), textPtr, 0, 0, nmaps_tmp.sizeBytes(), cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &predictedNormals->cudaRes); copyMaps(vmaps_tmp, nmaps_tmp, vmaps_curr_[0], nmaps_curr_[0]); for(int i = 1; i < NUM_PYRS; ++i) { resizeVMap(vmaps_curr_[i - 1], vmaps_curr_[i]); resizeNMap(nmaps_curr_[i - 1], nmaps_curr_[i]); } cudaDeviceSynchronize(); }
void Renderer::render_disparity(const uint16_t* d_disp, int disp_size) { glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); // cuda-gl interop cudaGraphicsResource_t cuda_gl_tex_resource; cudaGraphicsGLRegisterImage(&cuda_gl_tex_resource, disp_texture_, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsSurfaceLoadStore); cudaGraphicsMapResources(1, &cuda_gl_tex_resource); cudaArray_t texture_array; cudaGraphicsSubResourceGetMappedArray(&texture_array, cuda_gl_tex_resource, 0, 0); cudaResourceDesc desc; desc.resType = cudaResourceTypeArray; desc.res.array.array = texture_array; cudaSurfaceObject_t write_surface; cudaCreateSurfaceObject(&write_surface, &desc); write_surface_U16_with_multiplication(write_surface, d_disp, width_, height_, 256); cudaDestroySurfaceObject(write_surface); cudaGraphicsUnmapResources(1, &cuda_gl_tex_resource); cudaGraphicsUnregisterResource(cuda_gl_tex_resource); // end cuda-gl interop glUseProgram(program_disp_); glBindTexture(GL_TEXTURE_2D, disp_texture_); glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); glBindBuffer(GL_ARRAY_BUFFER, vert_buffer_); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 5, (void*)0); glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 5, (float*)0 + 3); GLint loc; loc = glGetUniformLocation(program_disp_, "tex_sampler"); if (loc != -1) { glUniform1i(loc, 0); } loc = glGetUniformLocation(program_cdisp_, "inv_disp_size"); if (loc != -1) { glUniform1i(loc, 256 / disp_size); } glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); glBindTexture(GL_TEXTURE_2D, 0); }
void processImage() { processLayer(sim_width, sim_height, cuda_dest_resource); cudaArray *texture_ptr; cutilSafeCall(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0)); cutilSafeCall(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0)); int num_texels = sim_width * sim_height; int num_values = num_texels * 4; int size_tex_data = sizeof(GLubyte) * num_values; cutilSafeCall(cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice)); cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0)); }
void RGBDOdometry::initFirstRGB(GPUTexture * rgb) { cudaArray * textPtr; cudaGraphicsMapResources(1, &rgb->cudaRes); cudaGraphicsSubResourceGetMappedArray(&textPtr, rgb->cudaRes, 0, 0); imageBGRToIntensity(textPtr, lastNextImage[0]); cudaGraphicsUnmapResources(1, &rgb->cudaRes); for(int i = 0; i + 1 < NUM_PYRS; i++) { pyrDownUcharGauss(lastNextImage[i], lastNextImage[i + 1]); } }
//////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void process(int width, int height, int radius) { cudaArray *in_array; unsigned int *out_data; #ifdef USE_TEXSUBIMAGE2D checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_dest_resource, 0)); size_t num_bytes; checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&out_data, &num_bytes, cuda_pbo_dest_resource)); //printf("CUDA mapped pointer of pbo_out: May access %ld bytes, expected %d\n", num_bytes, size_tex_data); #else out_data = cuda_dest_resource; #endif // map buffer objects to get CUDA device pointers checkCudaErrors(cudaGraphicsMapResources(1, &cuda_tex_screen_resource, 0)); //printf("Mapping tex_in\n"); checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&in_array, cuda_tex_screen_resource, 0, 0)); // calculate grid size dim3 block(16, 16, 1); //dim3 block(16, 16, 1); dim3 grid(width / block.x, height / block.y, 1); int sbytes = (block.x+(2*radius))*(block.y+(2*radius))*sizeof(unsigned int); // execute CUDA kernel launch_cudaProcess(grid, block, sbytes, in_array, out_data, width, height, block.x+(2*radius), radius, 0.8f, 4.0f); checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_tex_screen_resource, 0)); #ifdef USE_TEXSUBIMAGE2D checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_dest_resource, 0)); #endif }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { unsigned int cmd, glhandle, gltarget, direction, flags, keepmapped, ispbo; unsigned long nrbytes; void *gpuptr; int slot = 0; cudaGraphicsResource_t resource = NULL; struct cudaArray *mappedArray = NULL; void* mappedPtr = NULL; size_t mappedSize = 0; /* Be optimistic, assume success unless told otherwise: */ cudastatus = cudaSuccess; if (firsttime) { firsttime = 0; mexPrintf("\n%s: A simple CUDA <=> OpenGL interoperation interface.\n", mexFunctionName()); mexPrintf("(c) 2013 by Mario Kleiner. Licensed to you under the MIT license.\n\n"); /* Reset cache clock to zero and clear the cache: */ cacheclock = 0; memset(resourceCache, 0, sizeof(resourceCache[0]) * MAX_CACHE_SLOTS); /* Start off with an effective cache capacity of 8 slots (1 slot is blocked from use): */ cachesize = 8 + 1; firstLRUCycle = 1; /* Make sure the cache is flushed at mex file shutdown time: */ mexAtExit(mexExit); } /* Retrieve command code: Give usage info if none given. */ if (nrhs < 1) { usageExit(0); return; } cmd = (unsigned int) mxGetScalar(prhs[0]); /* Change of verbosity? */ if (cmd == 6) { if (nrhs < 2) usageExit(1); verbose = (unsigned int) mxGetScalar(prhs[1]); if (verbose) mexPrintf("\n%s: Verbose tracing of operations enabled.\n", mexFunctionName()); return; } /* Resizing the LRU cache requested? */ if (cmd == 5) { if (nrhs < 2) usageExit(1); /* Reset LRU cache full warning: */ firstLRUCycle = 1; slot = (unsigned int) mxGetScalar(prhs[1]); /* Increment request by 1 to compensate for the "lost" slot 0: */ slot = slot + 1; /* Child protections: */ if (slot > MAX_CACHE_SLOTS) { mexPrintf("%s: Requested new softlimit %i for cache exceeds compiled in maximum %i. Will clamp to maximum.\n", mexFunctionName(), slot - 1, MAX_CACHE_SLOTS - 1); cachesize = MAX_CACHE_SLOTS; return; } if (slot < cachesize) { /* Shrinking the cache requested. This implies a full cache flush: */ mexPrintf("%s: Requested new softlimit %i for cache is smaller than old softlimit %i. Will flush the cache before shrinking it.\n", mexFunctionName(), slot - 1, cachesize - 1); cacheFlush(); } /* Set new softlimit: */ cachesize = slot; mexPrintf("%s: New softlimit for LRU cache set to %i slots.\n", mexFunctionName(), cachesize - 1); return; } if (cmd == 0) { /* Cache flush requested: */ cacheFlush(); return; } /* Following ops require at least object handle and target type: */ if (nrhs < 3) usageExit(1); /* Time to increment the age of our cached items by a clock tick: */ ageCache(); /* Retrieve OpenGL object handle to our image buffer: */ glhandle = (unsigned int) mxGetScalar(prhs[1]); /* Get GLEnum target: */ gltarget = (unsigned int) mxGetScalar(prhs[2]); if (cmd == 1) { /* Unmap resource if it is mapped: */ unmapResource(glhandle, gltarget); return; } if (cmd == 2) { /* Unmap and unregister resource if it is mapped and/or registered: */ unregisterResource(glhandle, gltarget); return; } if (nrhs < 6) usageExit(1); /* Retrieve CUDA memory pointer to source/destination CUDA memory buffer: */ gpuptr = (void*) (unsigned long) mxGetScalar(prhs[3]); /* Retrieve number of bytes to copy: */ nrbytes = (unsigned long) mxGetScalar(prhs[4]); /* Retrieve direction: 0 = OpenGL -> CUDA, 1 = CUDA -> OpenGL : */ direction = (unsigned int) mxGetScalar(prhs[5]); /* Retrieve optional 'keepmapped' flag. */ keepmapped = 0; if (nrhs >= 7) keepmapped = (unsigned int) mxGetScalar(prhs[6]); /* Define CUDA optimization flags, depending if this is a OpenGL->CUDA or * CUDA->OpenGL copy operation. */ if ((nrhs >= 8) && (mxGetScalar(prhs[7]) >= 0)) { /* Override map flags provided. Use them: */ flags = (unsigned int) mxGetScalar(prhs[7]); } else { /* Use auto-selected map flags: */ flags = (direction) ? cudaGraphicsRegisterFlagsWriteDiscard : cudaGraphicsRegisterFlagsReadOnly; } /* Is gltarget a OpenGL pixelbuffer object? Check for gltarget == GL_PACK_BUFFER or GL_UNPACK_BUFFER. */ ispbo = (gltarget == 35051 || gltarget == 35052) ? 1 : 0; /* Copy of data or mapped resource access pointer requested? */ if (cmd == 3 || cmd == 4) { /* Register OpenGL object with CUDA as 'resource': */ /* Already in cache? This would mean it is registered already with compatible mapping flags: */ slot = cacheInsert(glhandle, gltarget, flags); if (slot < 0) { /* Not yet in cache. This means it is not registered at this time, either because it wasn't registered at all, or because it was registered with incompatible 'flags', so it just got unregistered and expelled from the cache. In any case, we need to insert it into the cache and register it. -slot is the free target slot for this purpose. */ /* Turn slot into something useful: */ slot = -slot; if (ispbo) { /* OpenGL Pixelbuffer object (GL_PACK_BUFFER or GL_UNPACK_BUFFER): */ cudastatus = cudaGraphicsGLRegisterBuffer(&(resourceCache[slot].resource), glhandle, flags); } else { /* OpenGL texture or renderbuffer object: */ cudastatus = cudaGraphicsGLRegisterImage(&(resourceCache[slot].resource), glhandle, gltarget, flags); } if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in %s(): %s\n", (ispbo) ? "cudaGraphicsGLRegisterBuffer" : "cudaGraphicsGLRegisterImage", cudaGetErrorString(cudastatus)); resourceCache[slot].resource = NULL; goto err_final; } if (verbose) mexPrintf("\n%s: cacheInsert(%i): CUDA resource registered (globject %i, gltarget %i, flags %i).\n", mexFunctionName(), slot, glhandle, gltarget, flags); /* Fill cache slot: */ resourceCache[slot].glhandle = glhandle; resourceCache[slot].gltarget = gltarget; resourceCache[slot].mapflags = flags; resourceCache[slot].lastaccess = cacheclock; resourceCache[slot].ismapped = 0; } /* At this point, the resource is stored in slot 'slot' of the cache and registered in a compatible way: */ /* Map the 'resource', unless it is already mapped: */ if (!resourceCache[slot].ismapped) { /* Map it: */ cudastatus = cudaGraphicsMapResources(1, &(resourceCache[slot].resource), 0); if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaGraphicsMapResources(): %s\n", cudaGetErrorString(cudastatus)); goto err_unregister; } if (verbose) mexPrintf("\n%s: CUDA resource %i mapped (globject %i, gltarget %i, flags %i).\n", mexFunctionName(), slot, glhandle, gltarget, flags); /* Successfully mapped: */ resourceCache[slot].ismapped = 1; } /* Get simpler handle: */ resource = resourceCache[slot].resource; /* Get mapped resource image array handle or PBO pointer: */ if (ispbo) { cudastatus = cudaGraphicsResourceGetMappedPointer(&mappedPtr, &mappedSize, resource); } else { cudastatus = cudaGraphicsSubResourceGetMappedArray(&mappedArray, resource, 0, 0); } if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in %s(): %s\n", (ispbo) ? "cudaGraphicsResourceGetMappedPointer" : "cudaGraphicsSubResourceGetMappedArray", cudaGetErrorString(cudastatus)); goto err_unmap; } } /* Copy of PBO data between CUDA and OpenGL requested? */ if (cmd == 3 && ispbo) { /* Copy from OpenGL PBO to CUDA buffer? */ if (direction == 0) { /* OpenGL -> CUDA copy: */ cudastatus = cudaMemcpyAsync(gpuptr, (const void*) mappedPtr, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0); if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyAsync(): %s\n", cudaGetErrorString(cudastatus)); goto err_unmap; } } if (direction == 1) { /* CUDA -> OpenGL copy: */ cudastatus = cudaMemcpyAsync(mappedPtr, (const void*) gpuptr, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0); if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyAsync(): %s\n", cudaGetErrorString(cudastatus)); goto err_unmap; } } } /* Copy of texture or renderbuffer data between CUDA and OpenGL requested? */ if (cmd == 3 && !ispbo) { /* Copy from OpenGL object to CUDA buffer? */ if (direction == 0) { /* OpenGL -> CUDA copy: */ cudastatus = cudaMemcpyFromArrayAsync(gpuptr, (const struct cudaArray*) mappedArray, 0, 0, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0); if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyFromArray(): %s\n", cudaGetErrorString(cudastatus)); goto err_unmap; } } if (direction == 1) { /* CUDA -> OpenGL copy: */ cudastatus = cudaMemcpyToArrayAsync((struct cudaArray*) mappedArray, 0, 0, (const void*) gpuptr, (size_t) nrbytes, cudaMemcpyDeviceToDevice, 0); if (cudastatus != cudaSuccess) { mexPrintf("\nmemcpyCudaOpenGL: ERROR in cudaMemcpyToArray(): %s\n", cudaGetErrorString(cudastatus)); goto err_unmap; } } } /* Return of pointers to mapped resource requested? */ if (cmd == 4) { /* Yes: This implies we must not unmap the resource now, as otherwise the * returned pointers would be dead on arrival. */ keepmapped = 1; /* Cast pointer to void* then store it in a 64-Bit unsigned integer return value: */ plhs[0] = mxCreateNumericMatrix(1, 1, mxUINT64_CLASS, mxREAL); *((unsigned long long*) mxGetData(plhs[0])) = (unsigned long long) (void*) ((ispbo) ? mappedPtr : mappedArray); } /* Keep resource mapped? */ if (slot && !keepmapped) doCudaUnmap(slot); /* Successfully completed: */ return; /* Error handling -- Unwind in reverse order: */ err_unmap: /* Unmap the 'resource': */ unmapResource(glhandle, gltarget); err_unregister: /* Unregister the 'resource': */ unregisterResource(glhandle, gltarget); err_final: if (cudastatus != cudaSuccess) mexErrMsgTxt("Error in memcpyCudaOpenGL(), reason see above."); }
void RenderTarget::Map(void) { glBindTexture(GL_TEXTURE_2D, 0); CUDA_CALL(cudaGraphicsMapResources(1, &_resource, 0)); CUDA_CALL(cudaGraphicsSubResourceGetMappedArray(&_array, _resource, 0, 0)); }
cudaError_t WINAPI wine_cudaGraphicsSubResourceGetMappedArray( struct cudaArray **arrayPtr, struct cudaGraphicsResource *resource, unsigned int arrayIndex, unsigned int mipLevel) { WINE_TRACE("\n"); return cudaGraphicsSubResourceGetMappedArray( arrayPtr, resource, arrayIndex, mipLevel ); }
int main(int argc, char **argv) { // Initialize SDL2's context SDL_Init(SDL_INIT_VIDEO); // Initialize Oculus' context ovrResult result = ovr_Initialize(nullptr); if (OVR_FAILURE(result)) { std::cout << "ERROR: Failed to initialize libOVR" << std::endl; SDL_Quit(); return -1; } ovrSession session; ovrGraphicsLuid luid; // Connect to the Oculus headset result = ovr_Create(&session, &luid); if (OVR_FAILURE(result)) { std::cout << "ERROR: Oculus Rift not detected" << std::endl; ovr_Shutdown(); SDL_Quit(); return -1; } int x = SDL_WINDOWPOS_CENTERED, y = SDL_WINDOWPOS_CENTERED; int winWidth = 1280; int winHeight = 720; Uint32 flags = SDL_WINDOW_OPENGL | SDL_WINDOW_SHOWN; // Create SDL2 Window SDL_Window* window = SDL_CreateWindow("OVR ZED App", x, y, winWidth, winHeight, flags); // Create OpenGL context SDL_GLContext glContext = SDL_GL_CreateContext(window); // Initialize GLEW glewInit(); // Turn off vsync to let the compositor do its magic SDL_GL_SetSwapInterval(0); // Initialize the ZED Camera sl::zed::Camera* zed = 0; zed = new sl::zed::Camera(sl::zed::HD720); sl::zed::ERRCODE zederr = zed->init(sl::zed::MODE::PERFORMANCE, 0); int zedWidth = zed->getImageSize().width; int zedHeight = zed->getImageSize().height; if (zederr != sl::zed::SUCCESS) { std::cout << "ERROR: " << sl::zed::errcode2str(zederr) << std::endl; ovr_Destroy(session); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; return -1; } GLuint zedTextureID_L, zedTextureID_R; // Generate OpenGL texture for left images of the ZED camera glGenTextures(1, &zedTextureID_L); glBindTexture(GL_TEXTURE_2D, zedTextureID_L); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, zedWidth, zedHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); // Generate OpenGL texture for right images of the ZED camera glGenTextures(1, &zedTextureID_R); glBindTexture(GL_TEXTURE_2D, zedTextureID_R); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, zedWidth, zedHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glBindTexture(GL_TEXTURE_2D, 0); #if OPENGL_GPU_INTEROP cudaGraphicsResource* cimg_L; cudaGraphicsResource* cimg_R; cudaError_t errL, errR; errL = cudaGraphicsGLRegisterImage(&cimg_L, zedTextureID_L, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone); errR = cudaGraphicsGLRegisterImage(&cimg_R, zedTextureID_R, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone); if (errL != cudaSuccess || errR != cudaSuccess) { std::cout << "ERROR: cannot create CUDA texture : " << errL << "|" << errR << std::endl; } #endif ovrHmdDesc hmdDesc = ovr_GetHmdDesc(session); // Get the texture sizes of Oculus eyes ovrSizei textureSize0 = ovr_GetFovTextureSize(session, ovrEye_Left, hmdDesc.DefaultEyeFov[0], 1.0f); ovrSizei textureSize1 = ovr_GetFovTextureSize(session, ovrEye_Right, hmdDesc.DefaultEyeFov[1], 1.0f); // Compute the final size of the render buffer ovrSizei bufferSize; bufferSize.w = textureSize0.w + textureSize1.w; bufferSize.h = std::max(textureSize0.h, textureSize1.h); // Initialize OpenGL swap textures to render ovrTextureSwapChain textureChain = nullptr; // Description of the swap chain ovrTextureSwapChainDesc descTextureSwap = {}; descTextureSwap.Type = ovrTexture_2D; descTextureSwap.ArraySize = 1; descTextureSwap.Width = bufferSize.w; descTextureSwap.Height = bufferSize.h; descTextureSwap.MipLevels = 1; descTextureSwap.Format = OVR_FORMAT_R8G8B8A8_UNORM_SRGB; descTextureSwap.SampleCount = 1; descTextureSwap.StaticImage = ovrFalse; // Create the OpenGL texture swap chain result = ovr_CreateTextureSwapChainGL(session, &descTextureSwap, &textureChain); int length = 0; ovr_GetTextureSwapChainLength(session, textureChain, &length); if (OVR_SUCCESS(result)) { for (int i = 0; i < length; ++i) { GLuint chainTexId; ovr_GetTextureSwapChainBufferGL(session, textureChain, i, &chainTexId); glBindTexture(GL_TEXTURE_2D, chainTexId); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } } else { std::cout << "ERROR: failed creating swap texture" << std::endl; ovr_Destroy(session); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; return -1; } // Generate frame buffer to render GLuint fboID; glGenFramebuffers(1, &fboID); // Generate depth buffer of the frame buffer GLuint depthBuffID; glGenTextures(1, &depthBuffID); glBindTexture(GL_TEXTURE_2D, depthBuffID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); GLenum internalFormat = GL_DEPTH_COMPONENT24; GLenum type = GL_UNSIGNED_INT; glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, bufferSize.w, bufferSize.h, 0, GL_DEPTH_COMPONENT, type, NULL); // Create a mirror texture to display the render result in the SDL2 window ovrMirrorTextureDesc descMirrorTexture; memset(&descMirrorTexture, 0, sizeof(descMirrorTexture)); descMirrorTexture.Width = winWidth; descMirrorTexture.Height = winHeight; descMirrorTexture.Format = OVR_FORMAT_R8G8B8A8_UNORM_SRGB; ovrMirrorTexture mirrorTexture = nullptr; result = ovr_CreateMirrorTextureGL(session, &descMirrorTexture, &mirrorTexture); if (!OVR_SUCCESS(result)) { std::cout << "ERROR: Failed to create mirror texture" << std::endl; } GLuint mirrorTextureId; ovr_GetMirrorTextureBufferGL(session, mirrorTexture, &mirrorTextureId); GLuint mirrorFBOID; glGenFramebuffers(1, &mirrorFBOID); glBindFramebuffer(GL_READ_FRAMEBUFFER, mirrorFBOID); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mirrorTextureId, 0); glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Frame index used by the compositor // it needs to be updated each new frame long long frameIndex = 0; // FloorLevel will give tracking poses where the floor height is 0 ovr_SetTrackingOriginType(session, ovrTrackingOrigin_FloorLevel); // Initialize a default Pose ovrPosef eyeRenderPose[2]; // Get the render description of the left and right "eyes" of the Oculus headset ovrEyeRenderDesc eyeRenderDesc[2]; eyeRenderDesc[0] = ovr_GetRenderDesc(session, ovrEye_Left, hmdDesc.DefaultEyeFov[0]); eyeRenderDesc[1] = ovr_GetRenderDesc(session, ovrEye_Right, hmdDesc.DefaultEyeFov[1]); // Get the Oculus view scale description ovrVector3f hmdToEyeOffset[2]; double sensorSampleTime; // Create and compile the shader's sources Shader shader(OVR_ZED_VS, OVR_ZED_FS); // Compute the ZED image field of view with the ZED parameters float zedFovH = atanf(zed->getImageSize().width / (zed->getParameters()->LeftCam.fx *2.f)) * 2.f; // Compute the Horizontal Oculus' field of view with its parameters float ovrFovH = (atanf(hmdDesc.DefaultEyeFov[0].LeftTan) + atanf(hmdDesc.DefaultEyeFov[0].RightTan)); // Compute the useful part of the ZED image unsigned int usefulWidth = zed->getImageSize().width * ovrFovH / zedFovH; // Compute the size of the final image displayed in the headset with the ZED image's aspect-ratio kept unsigned int widthFinal = bufferSize.w / 2; float heightGL = 1.f; float widthGL = 1.f; if (usefulWidth > 0.f) { unsigned int heightFinal = zed->getImageSize().height * widthFinal / usefulWidth; // Convert this size to OpenGL viewport's frame's coordinates heightGL = (heightFinal) / (float)(bufferSize.h); widthGL = ((zed->getImageSize().width * (heightFinal / (float)zed->getImageSize().height)) / (float)widthFinal); } else { std::cout << "WARNING: ZED parameters got wrong values." "Default vertical and horizontal FOV are used.\n" "Check your calibration file or check if your ZED is not too close to a surface or an object." << std::endl; } // Compute the Vertical Oculus' field of view with its parameters float ovrFovV = (atanf(hmdDesc.DefaultEyeFov[0].UpTan) + atanf(hmdDesc.DefaultEyeFov[0].DownTan)); // Compute the center of the optical lenses of the headset float offsetLensCenterX = ((atanf(hmdDesc.DefaultEyeFov[0].LeftTan)) / ovrFovH) * 2.f - 1.f; float offsetLensCenterY = ((atanf(hmdDesc.DefaultEyeFov[0].UpTan)) / ovrFovV) * 2.f - 1.f; // Create a rectangle with the computed coordinates and push it in GPU memory. struct GLScreenCoordinates { float left, up, right, down; } screenCoord; screenCoord.up = heightGL + offsetLensCenterY; screenCoord.down = heightGL - offsetLensCenterY; screenCoord.right = widthGL + offsetLensCenterX; screenCoord.left = widthGL - offsetLensCenterX; float rectVertices[12] = { -screenCoord.left, -screenCoord.up, 0, screenCoord.right, -screenCoord.up, 0, screenCoord.right, screenCoord.down, 0, -screenCoord.left, screenCoord.down, 0 }; GLuint rectVBO[3]; glGenBuffers(1, &rectVBO[0]); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[0]); glBufferData(GL_ARRAY_BUFFER, sizeof(rectVertices), rectVertices, GL_STATIC_DRAW); float rectTexCoord[8] = { 0, 1, 1, 1, 1, 0, 0, 0 }; glGenBuffers(1, &rectVBO[1]); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[1]); glBufferData(GL_ARRAY_BUFFER, sizeof(rectTexCoord), rectTexCoord, GL_STATIC_DRAW); unsigned int rectIndices[6] = { 0, 1, 2, 0, 2, 3 }; glGenBuffers(1, &rectVBO[2]); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, rectVBO[2]); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(rectIndices), rectIndices, GL_STATIC_DRAW); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); // Initialize hit value float hit = 0.02f; // Initialize a boolean that will be used to stop the application’s loop and another one to pause/unpause rendering bool end = false; bool refresh = true; // SDL variable that will be used to store input events SDL_Event events; // Initialize time variables. They will be used to limit the number of frames rendered per second. // Frame counter unsigned int riftc = 0, zedc = 1; // Chronometer unsigned int rifttime = 0, zedtime = 0, zedFPS = 0; int time1 = 0, timePerFrame = 0; int frameRate = (int)(1000 / MAX_FPS); // This boolean is used to test if the application is focused bool isVisible = true; // Enable the shader glUseProgram(shader.getProgramId()); // Bind the Vertex Buffer Objects of the rectangle that displays ZED images // vertices glEnableVertexAttribArray(Shader::ATTRIB_VERTICES_POS); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[0]); glVertexAttribPointer(Shader::ATTRIB_VERTICES_POS, 3, GL_FLOAT, GL_FALSE, 0, 0); // indices glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, rectVBO[2]); // texture coordinates glEnableVertexAttribArray(Shader::ATTRIB_TEXTURE2D_POS); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[1]); glVertexAttribPointer(Shader::ATTRIB_TEXTURE2D_POS, 2, GL_FLOAT, GL_FALSE, 0, 0); // Main loop while (!end) { // Compute the time used to render the previous frame timePerFrame = SDL_GetTicks() - time1; // If the previous frame has been rendered too fast if (timePerFrame < frameRate) { // Pause the loop to have a max FPS equal to MAX_FPS SDL_Delay(frameRate - timePerFrame); timePerFrame = frameRate; } // Increment the ZED chronometer zedtime += timePerFrame; // If ZED chronometer reached 1 second if (zedtime > 1000) { zedFPS = zedc; zedc = 0; zedtime = 0; } // Increment the Rift chronometer and the Rift frame counter rifttime += timePerFrame; riftc++; // If Rift chronometer reached 200 milliseconds if (rifttime > 200) { // Display FPS std::cout << "\rRIFT FPS: " << 1000 / (rifttime / riftc) << " | ZED FPS: " << zedFPS; // Reset Rift chronometer rifttime = 0; // Reset Rift frame counter riftc = 0; } // Start frame chronometer time1 = SDL_GetTicks(); // While there is an event catched and not tested while (SDL_PollEvent(&events)) { // If a key is released if (events.type == SDL_KEYUP) { // If Q quit the application if (events.key.keysym.scancode == SDL_SCANCODE_Q) end = true; // If R reset the hit value else if (events.key.keysym.scancode == SDL_SCANCODE_R) hit = 0.0f; // If C pause/unpause rendering else if (events.key.keysym.scancode == SDL_SCANCODE_C) refresh = !refresh; } // If the mouse wheel is used if (events.type == SDL_MOUSEWHEEL) { // Increase or decrease hit value float s; events.wheel.y > 0 ? s = 1.0f : s = -1.0f; hit += 0.005f * s; } } // Get texture swap index where we must draw our frame GLuint curTexId; int curIndex; ovr_GetTextureSwapChainCurrentIndex(session, textureChain, &curIndex); ovr_GetTextureSwapChainBufferGL(session, textureChain, curIndex, &curTexId); // Call ovr_GetRenderDesc each frame to get the ovrEyeRenderDesc, as the returned values (e.g. HmdToEyeOffset) may change at runtime. eyeRenderDesc[0] = ovr_GetRenderDesc(session, ovrEye_Left, hmdDesc.DefaultEyeFov[0]); eyeRenderDesc[1] = ovr_GetRenderDesc(session, ovrEye_Right, hmdDesc.DefaultEyeFov[1]); hmdToEyeOffset[0] = eyeRenderDesc[0].HmdToEyeOffset; hmdToEyeOffset[1] = eyeRenderDesc[1].HmdToEyeOffset; // Get eye poses, feeding in correct IPD offset ovr_GetEyePoses(session, frameIndex, ovrTrue, hmdToEyeOffset, eyeRenderPose, &sensorSampleTime); // If the application is focused if (isVisible) { // If successful grab a new ZED image if (!zed->grab(sl::zed::SENSING_MODE::RAW, false, false)) { // Update the ZED frame counter zedc++; if (refresh) { #if OPENGL_GPU_INTEROP sl::zed::Mat m = zed->retrieveImage_gpu(sl::zed::SIDE::LEFT); cudaArray_t arrIm; cudaGraphicsMapResources(1, &cimg_L, 0); cudaGraphicsSubResourceGetMappedArray(&arrIm, cimg_L, 0, 0); cudaMemcpy2DToArray(arrIm, 0, 0, m.data, m.step, zedWidth * 4, zedHeight, cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &cimg_L, 0); m = zed->retrieveImage_gpu(sl::zed::SIDE::RIGHT); cudaGraphicsMapResources(1, &cimg_R, 0); cudaGraphicsSubResourceGetMappedArray(&arrIm, cimg_R, 0, 0); cudaMemcpy2DToArray(arrIm, 0, 0, m.data, m.step, zedWidth * 4, zedHeight, cudaMemcpyDeviceToDevice); // *4 = 4 channels * 1 bytes (uint) cudaGraphicsUnmapResources(1, &cimg_R, 0); #endif // Bind the frame buffer glBindFramebuffer(GL_FRAMEBUFFER, fboID); // Set its color layer 0 as the current swap texture glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, curTexId, 0); // Set its depth layer as our depth buffer glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthBuffID, 0); // Clear the frame buffer glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glClearColor(0, 0, 0, 1); // Render for each Oculus eye the equivalent ZED image for (int eye = 0; eye < 2; eye++) { // Set the left or right vertical half of the buffer as the viewport glViewport(eye == ovrEye_Left ? 0 : bufferSize.w / 2, 0, bufferSize.w / 2, bufferSize.h); // Bind the left or right ZED image glBindTexture(GL_TEXTURE_2D, eye == ovrEye_Left ? zedTextureID_L : zedTextureID_R); #if !OPENGL_GPU_INTEROP glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, zedWidth, zedHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, zed->retrieveImage(eye == ovrEye_Left ? sl::zed::SIDE::LEFT : sl::zed::SIDE::RIGHT).data); #endif // Bind the hit value glUniform1f(glGetUniformLocation(shader.getProgramId(), "hit"), eye == ovrEye_Left ? hit : -hit); // Bind the isLeft value glUniform1ui(glGetUniformLocation(shader.getProgramId(), "isLeft"), eye == ovrEye_Left ? 1U : 0U); // Draw the ZED image glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); } // Avoids an error when calling SetAndClearRenderSurface during next iteration. // Without this, during the next while loop iteration SetAndClearRenderSurface // would bind a framebuffer with an invalid COLOR_ATTACHMENT0 because the texture ID // associated with COLOR_ATTACHMENT0 had been unlocked by calling wglDXUnlockObjectsNV. glBindFramebuffer(GL_FRAMEBUFFER, fboID); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, 0, 0); // Commit changes to the textures so they get picked up frame ovr_CommitTextureSwapChain(session, textureChain); } // Do not forget to increment the frameIndex! frameIndex++; } } /* Note: Even if we don't ask to refresh the framebuffer or if the Camera::grab() doesn't catch a new frame, we have to submit an image to the Rift; it needs 75Hz refresh. Else there will be jumbs, black frames and/or glitches in the headset. */ ovrLayerEyeFov ld; ld.Header.Type = ovrLayerType_EyeFov; // Tell to the Oculus compositor that our texture origin is at the bottom left ld.Header.Flags = ovrLayerFlag_TextureOriginAtBottomLeft; // Because OpenGL | Disable head tracking // Set the Oculus layer eye field of view for each view for (int eye = 0; eye < 2; ++eye) { // Set the color texture as the current swap texture ld.ColorTexture[eye] = textureChain; // Set the viewport as the right or left vertical half part of the color texture ld.Viewport[eye] = OVR::Recti(eye == ovrEye_Left ? 0 : bufferSize.w / 2, 0, bufferSize.w / 2, bufferSize.h); // Set the field of view ld.Fov[eye] = hmdDesc.DefaultEyeFov[eye]; // Set the pose matrix ld.RenderPose[eye] = eyeRenderPose[eye]; } ld.SensorSampleTime = sensorSampleTime; ovrLayerHeader* layers = &ld.Header; // Submit the frame to the Oculus compositor // which will display the frame in the Oculus headset result = ovr_SubmitFrame(session, frameIndex, nullptr, &layers, 1); if (!OVR_SUCCESS(result)) { std::cout << "ERROR: failed to submit frame" << std::endl; glDeleteBuffers(3, rectVBO); ovr_DestroyTextureSwapChain(session, textureChain); ovr_DestroyMirrorTexture(session, mirrorTexture); ovr_Destroy(session); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; return -1; } if (result == ovrSuccess && !isVisible) { std::cout << "The application is now shown in the headset." << std::endl; } isVisible = (result == ovrSuccess); // This is not really needed for this application but it may be usefull for an more advanced application ovrSessionStatus sessionStatus; ovr_GetSessionStatus(session, &sessionStatus); if (sessionStatus.ShouldRecenter) { std::cout << "Recenter Tracking asked by Session" << std::endl; ovr_RecenterTrackingOrigin(session); } // Copy the frame to the mirror buffer // which will be drawn in the SDL2 image glBindFramebuffer(GL_READ_FRAMEBUFFER, mirrorFBOID); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); GLint w = winWidth; GLint h = winHeight; glBlitFramebuffer(0, h, w, 0, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_NEAREST); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Swap the SDL2 window SDL_GL_SwapWindow(window); } // Disable all OpenGL buffer glDisableVertexAttribArray(Shader::ATTRIB_TEXTURE2D_POS); glDisableVertexAttribArray(Shader::ATTRIB_VERTICES_POS); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindTexture(GL_TEXTURE_2D, 0); glUseProgram(0); glBindVertexArray(0); // Delete the Vertex Buffer Objects of the rectangle glDeleteBuffers(3, rectVBO); // Delete SDL, OpenGL, Oculus and ZED context ovr_DestroyTextureSwapChain(session, textureChain); ovr_DestroyMirrorTexture(session, mirrorTexture); ovr_Destroy(session); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; // quit return 0; }
int main() { //Checks for memory leaks in debug mode _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); glfwInit(); glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 4); glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); glfwWindowHint(GLFW_RESIZABLE, GL_FALSE); GLFWwindow* window = glfwCreateWindow(width, height, "Hikari", nullptr, nullptr); glfwMakeContextCurrent(window); //Set callbacks for keyboard and mouse glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_DISABLED); glewExperimental = GL_TRUE; glewInit(); glGetError(); //Define the viewport dimensions glViewport(0, 0, width, height); //Initialize cuda->opengl context cudaCheck(cudaGLSetGLDevice(0)); cudaGraphicsResource *resource; //Create a texture to store ray tracing result GLuint tex; glActiveTexture(GL_TEXTURE0); glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D, tex); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, width, height, 0, GL_RGBA, GL_FLOAT, NULL); cudaCheck(cudaGraphicsGLRegisterImage(&resource, tex, GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard)); glBindTexture(GL_TEXTURE_2D, 0); Shader final = Shader("fsQuad.vert", "fsQuad.frag"); FullscreenQuad fsQuad = FullscreenQuad(); float4* buffer; cudaCheck(cudaMalloc((void**)&buffer, width * height * sizeof(float4))); cudaCheck(cudaMemset(buffer, 0, width * height * sizeof(float4))); //Mesh float3 offset = make_float3(0); float3 scale = make_float3(15); Mesh cBox("objs/Avent", 0, scale, offset); offset = make_float3(0, 55, 0); scale = make_float3(100); Mesh light("objs/plane", (int)cBox.triangles.size(), scale, offset); cBox.triangles.insert(cBox.triangles.end(), light.triangles.begin(), light.triangles.end()); cBox.aabbs.insert(cBox.aabbs.end(), light.aabbs.begin(), light.aabbs.end()); std::cout << "Num triangles: " << cBox.triangles.size() << std::endl; cBox.root = AABB(fminf(cBox.root.minBounds, light.root.minBounds), fmaxf(cBox.root.maxBounds, light.root.maxBounds)); BVH bvh(cBox.aabbs, cBox.triangles, cBox.root); Camera cam(make_float3(14, 15, 80), make_int2(width, height), 45.0f, 0.04f, 80.0f); Camera* dCam; cudaCheck(cudaMalloc((void**)&dCam, sizeof(Camera))); cudaCheck(cudaMemcpy(dCam, &cam, sizeof(Camera), cudaMemcpyHostToDevice)); cudaCheck(cudaGraphicsMapResources(1, &resource, 0)); cudaArray* pixels; cudaCheck(cudaGraphicsSubResourceGetMappedArray(&pixels, resource, 0, 0)); cudaResourceDesc viewCudaArrayResourceDesc; viewCudaArrayResourceDesc.resType = cudaResourceTypeArray; viewCudaArrayResourceDesc.res.array.array = pixels; cudaSurfaceObject_t viewCudaSurfaceObject; cudaCheck(cudaCreateSurfaceObject(&viewCudaSurfaceObject, &viewCudaArrayResourceDesc)); cudaCheck(cudaGraphicsUnmapResources(1, &resource, 0)); while (!glfwWindowShouldClose(window)) { float currentFrame = float(glfwGetTime()); deltaTime = currentFrame - lastFrame; lastFrame = currentFrame; //Check and call events glfwPollEvents(); handleInput(window, cam); if (cam.moved) { frameNumber = 0; cudaCheck(cudaMemset(buffer, 0, width * height * sizeof(float4))); } cam.rebuildCamera(); cudaCheck(cudaMemcpy(dCam, &cam, sizeof(Camera), cudaMemcpyHostToDevice)); frameNumber++; if (frameNumber < 20000) { cudaCheck(cudaGraphicsMapResources(1, &resource, 0)); std::chrono::time_point<std::chrono::system_clock> start, end; start = std::chrono::system_clock::now(); render(cam, dCam, viewCudaSurfaceObject, buffer, bvh.dTriangles, bvh.dNodes, frameNumber, cam.moved); end = std::chrono::system_clock::now(); std::chrono::duration<double> elapsed = end - start; std::cout << "Frame: " << frameNumber << " --- Elapsed time: " << elapsed.count() << "s\n"; cudaCheck(cudaGraphicsUnmapResources(1, &resource, 0)); } cam.moved = false; glUseProgram(final.program); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, tex); glClear(GL_COLOR_BUFFER_BIT); final.setUniformi("tRender", 0); fsQuad.render(); //std::cout << glGetError() << std::endl; //Swap the buffers glfwSwapBuffers(window); glfwSetCursorPos(window, lastX, lastY); }
int main(int argc, char **argv) { // Initialize SDL2's context SDL_Init(SDL_INIT_VIDEO); // Initialize Oculus' context ovrResult result = ovr_Initialize(nullptr); if (OVR_FAILURE(result)) { std::cout << "ERROR: Failed to initialize libOVR" << std::endl; SDL_Quit(); return -1; } ovrSession hmd; ovrGraphicsLuid luid; // Connect to the Oculus headset result = ovr_Create(&hmd, &luid); if (OVR_FAILURE(result)) { std::cout << "ERROR: Oculus Rift not detected" << std::endl; ovr_Shutdown(); SDL_Quit(); return -1; } int x = SDL_WINDOWPOS_CENTERED, y = SDL_WINDOWPOS_CENTERED; int winWidth = 1280; int winHeight = 720; Uint32 flags = SDL_WINDOW_OPENGL | SDL_WINDOW_SHOWN; // Create SDL2 Window SDL_Window* window = SDL_CreateWindow("OVR ZED App", x, y, winWidth, winHeight, flags); // Create OpenGL context SDL_GLContext glContext = SDL_GL_CreateContext(window); // Initialize GLEW glewInit(); // Turn off vsync to let the compositor do its magic SDL_GL_SetSwapInterval(0); // Initialize the ZED Camera sl::zed::Camera* zed = 0; zed = new sl::zed::Camera(sl::zed::HD720); sl::zed::ERRCODE zederr = zed->init(sl::zed::MODE::PERFORMANCE, 0); int zedWidth = zed->getImageSize().width; int zedHeight = zed->getImageSize().height; if (zederr != sl::zed::SUCCESS) { std::cout << "ERROR: " << sl::zed::errcode2str(zederr) << std::endl; ovr_Destroy(hmd); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; return -1; } GLuint zedTextureID_L, zedTextureID_R; // Generate OpenGL texture for left images of the ZED camera glGenTextures(1, &zedTextureID_L); glBindTexture(GL_TEXTURE_2D, zedTextureID_L); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, zedWidth, zedHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); // Generate OpenGL texture for right images of the ZED camera glGenTextures(1, &zedTextureID_R); glBindTexture(GL_TEXTURE_2D, zedTextureID_R); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, zedWidth, zedHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glBindTexture(GL_TEXTURE_2D, 0); #if OPENGL_GPU_INTEROP cudaGraphicsResource* cimg_L; cudaGraphicsResource* cimg_R; cudaError_t errL, errR; errL = cudaGraphicsGLRegisterImage(&cimg_L, zedTextureID_L, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone); errR = cudaGraphicsGLRegisterImage(&cimg_R, zedTextureID_R, GL_TEXTURE_2D, cudaGraphicsMapFlagsNone); if (errL != cudaSuccess || errR != cudaSuccess) { std::cout << "ERROR: cannot create CUDA texture : " << errL << "|" << errR << std::endl; } #endif ovrHmdDesc hmdDesc = ovr_GetHmdDesc(hmd); // Get the texture sizes of Oculus eyes ovrSizei textureSize0 = ovr_GetFovTextureSize(hmd, ovrEye_Left, hmdDesc.DefaultEyeFov[0], 1.0f); ovrSizei textureSize1 = ovr_GetFovTextureSize(hmd, ovrEye_Right, hmdDesc.DefaultEyeFov[1], 1.0f); // Compute the final size of the render buffer ovrSizei bufferSize; bufferSize.w = textureSize0.w + textureSize1.w; bufferSize.h = std::max(textureSize0.h, textureSize1.h); // Initialize OpenGL swap textures to render ovrSwapTextureSet* ptextureSet = 0; if (OVR_SUCCESS(ovr_CreateSwapTextureSetGL(hmd, GL_SRGB8_ALPHA8, bufferSize.w, bufferSize.h, &ptextureSet))) { for (int i = 0; i < ptextureSet->TextureCount; ++i) { ovrGLTexture* tex = (ovrGLTexture*)&ptextureSet->Textures[i]; glBindTexture(GL_TEXTURE_2D, tex->OGL.TexId); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } } else { std::cout << "ERROR: failed creating swap texture" << std::endl; ovr_Destroy(hmd); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; return -1; } // Generate frame buffer to render GLuint fboID; glGenFramebuffers(1, &fboID); // Generate depth buffer of the frame buffer GLuint depthBuffID; glGenTextures(1, &depthBuffID); glBindTexture(GL_TEXTURE_2D, depthBuffID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); GLenum internalFormat = GL_DEPTH_COMPONENT24; GLenum type = GL_UNSIGNED_INT; glTexImage2D(GL_TEXTURE_2D, 0, internalFormat, bufferSize.w, bufferSize.h, 0, GL_DEPTH_COMPONENT, type, NULL); // Create a mirror texture to display the render result in the SDL2 window ovrGLTexture* mirrorTexture = nullptr; result = ovr_CreateMirrorTextureGL(hmd, GL_SRGB8_ALPHA8, winWidth, winHeight, reinterpret_cast<ovrTexture**>(&mirrorTexture)); if (!OVR_SUCCESS(result)) { std::cout << "ERROR: Failed to create mirror texture" << std::endl; } GLuint mirrorFBOID; glGenFramebuffers(1, &mirrorFBOID); glBindFramebuffer(GL_READ_FRAMEBUFFER, mirrorFBOID); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mirrorTexture->OGL.TexId, 0); glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, 0); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Initialize a default Pose ovrPosef eyeRenderPose; // Set Identity quaternion eyeRenderPose.Orientation.x = 0; eyeRenderPose.Orientation.y = 0; eyeRenderPose.Orientation.z = 0; eyeRenderPose.Orientation.w = 1; // Set World's origin position eyeRenderPose.Position.x = 0.f; eyeRenderPose.Position.y = 0.f; eyeRenderPose.Position.z = 0; ovrLayerEyeFov ld; ld.Header.Type = ovrLayerType_EyeFov; // Tell to the Oculus compositor that our texture origin is at the bottom left ld.Header.Flags = ovrLayerFlag_TextureOriginAtBottomLeft | ovrLayerFlag_HeadLocked; // Because OpenGL | Disable head tracking // Set the Oculus layer eye field of view for each view for (int eye = 0; eye < 2; ++eye) { // Set the color texture as the current swap texture ld.ColorTexture[eye] = ptextureSet; // Set the viewport as the right or left vertical half part of the color texture ld.Viewport[eye] = OVR::Recti(eye == ovrEye_Left ? 0 : bufferSize.w / 2, 0, bufferSize.w / 2, bufferSize.h); // Set the field of view ld.Fov[eye] = hmdDesc.DefaultEyeFov[eye]; // Set the pose matrix ld.RenderPose[eye] = eyeRenderPose; } double sensorSampleTime = ovr_GetTimeInSeconds(); ld.SensorSampleTime = sensorSampleTime; // Get the render description of the left and right "eyes" of the Oculus headset ovrEyeRenderDesc eyeRenderDesc[2]; eyeRenderDesc[0] = ovr_GetRenderDesc(hmd, ovrEye_Left, hmdDesc.DefaultEyeFov[0]); eyeRenderDesc[1] = ovr_GetRenderDesc(hmd, ovrEye_Right, hmdDesc.DefaultEyeFov[1]); // Get the Oculus view scale description ovrVector3f viewOffset[2] = { eyeRenderDesc[0].HmdToEyeViewOffset, eyeRenderDesc[1].HmdToEyeViewOffset }; ovrViewScaleDesc viewScaleDesc; viewScaleDesc.HmdSpaceToWorldScaleInMeters = 1.0f; viewScaleDesc.HmdToEyeViewOffset[0] = viewOffset[0]; viewScaleDesc.HmdToEyeViewOffset[1] = viewOffset[1]; // Create and compile the shader's sources Shader shader(OVR_ZED_VS, OVR_ZED_FS); // Compute the ZED image field of view with the ZED parameters float zedFovH = atanf(zed->getImageSize().width / (zed->getParameters()->LeftCam.fx *2.f)) * 2.f; // Compute the Oculus' field of view with its parameters float ovrFovH = (atanf(hmdDesc.DefaultEyeFov[0].LeftTan) + atanf(hmdDesc.DefaultEyeFov[0].RightTan)); // Compute the useful part of the ZED image unsigned int usefulWidth = zed->getImageSize().width * ovrFovH / zedFovH; // Compute the size of the final image displayed in the headset with the ZED image's aspect-ratio kept unsigned int widthFinal = bufferSize.w / 2; unsigned int heightFinal = zed->getImageSize().height * widthFinal / usefulWidth; // Convert this size to OpenGL viewport's frame's coordinates float heightGL = (heightFinal) / (float)(bufferSize.h); float widthGL = ((zed->getImageSize().width * (heightFinal / (float)zed->getImageSize().height)) / (float)widthFinal); // Create a rectangle with the coordonates computed and push it in GPU memory. float rectVertices[12] = { -widthGL, -heightGL, 0, widthGL, -heightGL, 0, widthGL, heightGL, 0, -widthGL, heightGL, 0 }; GLuint rectVBO[3]; glGenBuffers(1, &rectVBO[0]); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[0]); glBufferData(GL_ARRAY_BUFFER, sizeof(rectVertices), rectVertices, GL_STATIC_DRAW); float rectTexCoord[8] = { 0, 1, 1, 1, 1, 0, 0, 0 }; glGenBuffers(1, &rectVBO[1]); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[1]); glBufferData(GL_ARRAY_BUFFER, sizeof(rectTexCoord), rectTexCoord, GL_STATIC_DRAW); unsigned int rectIndices[6] = { 0, 1, 2, 0, 2, 3 }; glGenBuffers(1, &rectVBO[2]); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, rectVBO[2]); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(rectIndices), rectIndices, GL_STATIC_DRAW); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); // Initialize hit value float hit = 0.02f; // Initialize a boolean that will be used to stop the application’s loop and another one to pause/unpause rendering bool end = false; bool refresh = true; // SDL variable that will be used to store input events SDL_Event events; // Initialize time variables. They will be used to limit the number of frames rendered per second. // Frame counter unsigned int riftc = 0, zedc = 1; // Chronometer unsigned int rifttime = 0, zedtime = 0, zedFPS = 0; int time1 = 0, timePerFrame = 0; int frameRate = (int)(1000 / MAX_FPS); // Enable the shader glUseProgram(shader.getProgramId()); // Bind the Vertex Buffer Objects of the rectangle that displays ZED images // vertices glEnableVertexAttribArray(Shader::ATTRIB_VERTICES_POS); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[0]); glVertexAttribPointer(Shader::ATTRIB_VERTICES_POS, 3, GL_FLOAT, GL_FALSE, 0, 0); // indices glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, rectVBO[2]); // texture coordinates glEnableVertexAttribArray(Shader::ATTRIB_TEXTURE2D_POS); glBindBuffer(GL_ARRAY_BUFFER, rectVBO[1]); glVertexAttribPointer(Shader::ATTRIB_TEXTURE2D_POS, 2, GL_FLOAT, GL_FALSE, 0, 0); // Main loop while (!end) { // Compute the time used to render the previous frame timePerFrame = SDL_GetTicks() - time1; // If the previous frame has been rendered too fast if (timePerFrame < frameRate) { // Pause the loop to have a max FPS equal to MAX_FPS SDL_Delay(frameRate - timePerFrame); timePerFrame = frameRate; } // Increment the ZED chronometer zedtime += timePerFrame; // If ZED chronometer reached 1 second if (zedtime > 1000) { zedFPS = zedc; zedc = 0; zedtime = 0; } // Increment the Rift chronometer and the Rift frame counter rifttime += timePerFrame; riftc++; // If Rift chronometer reached 200 milliseconds if (rifttime > 200) { // Display FPS std::cout << "\rRIFT FPS: " << 1000 / (rifttime / riftc) << " | ZED FPS: " << zedFPS; // Reset Rift chronometer rifttime = 0; // Reset Rift frame counter riftc = 0; } // Start frame chronometer time1 = SDL_GetTicks(); // While there is an event catched and not tested while (SDL_PollEvent(&events)) { // If a key is released if (events.type == SDL_KEYUP) { // If Q quit the application if (events.key.keysym.scancode == SDL_SCANCODE_Q) end = true; // If R reset the hit value else if (events.key.keysym.scancode == SDL_SCANCODE_R) hit = 0.0f; // If C pause/unpause rendering else if (events.key.keysym.scancode == SDL_SCANCODE_C) refresh = !refresh; } // If the mouse wheel is used if (events.type == SDL_MOUSEWHEEL) { // Increase or decrease hit value float s; events.wheel.y > 0 ? s = 1.0f : s = -1.0f; hit += 0.005f * s; } } // If rendering is unpaused and // successful grab ZED image if (!zed->grab(sl::zed::SENSING_MODE::RAW, false, false)) { // Update the ZED frame counter zedc++; if (refresh) { #if OPENGL_GPU_INTEROP sl::zed::Mat m = zed->retrieveImage_gpu(sl::zed::SIDE::LEFT); cudaArray_t arrIm; cudaGraphicsMapResources(1, &cimg_L, 0); cudaGraphicsSubResourceGetMappedArray(&arrIm, cimg_L, 0, 0); cudaMemcpy2DToArray(arrIm, 0, 0, m.data, m.step, zedWidth * 4, zedHeight, cudaMemcpyDeviceToDevice); cudaGraphicsUnmapResources(1, &cimg_L, 0); m = zed->retrieveImage_gpu(sl::zed::SIDE::RIGHT); cudaGraphicsMapResources(1, &cimg_R, 0); cudaGraphicsSubResourceGetMappedArray(&arrIm, cimg_R, 0, 0); cudaMemcpy2DToArray(arrIm, 0, 0, m.data, m.step, zedWidth * 4, zedHeight, cudaMemcpyDeviceToDevice); // *4 = 4 channels * 1 bytes (uint) cudaGraphicsUnmapResources(1, &cimg_R, 0); #endif // Increment the CurrentIndex to point to the next texture within the output swap texture set. // CurrentIndex must be advanced round-robin fashion every time we draw a new frame ptextureSet->CurrentIndex = (ptextureSet->CurrentIndex + 1) % ptextureSet->TextureCount; // Get the current swap texture pointer auto tex = reinterpret_cast<ovrGLTexture*>(&ptextureSet->Textures[ptextureSet->CurrentIndex]); // Bind the frame buffer glBindFramebuffer(GL_FRAMEBUFFER, fboID); // Set its color layer 0 as the current swap texture glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, tex->OGL.TexId, 0); // Set its depth layer as our depth buffer glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthBuffID, 0); // Clear the frame buffer glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glClearColor(0, 0, 0, 1); // Render for each Oculus eye the equivalent ZED image for (int eye = 0; eye < 2; eye++) { // Set the left or right vertical half of the buffer as the viewport glViewport(ld.Viewport[eye].Pos.x, ld.Viewport[eye].Pos.y, ld.Viewport[eye].Size.w, ld.Viewport[eye].Size.h); // Bind the left or right ZED image glBindTexture(GL_TEXTURE_2D, eye == ovrEye_Left ? zedTextureID_L : zedTextureID_R); #if !OPENGL_GPU_INTEROP glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, zedWidth, zedHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, zed->retrieveImage(eye == ovrEye_Left ? sl::zed::SIDE::LEFT : sl::zed::SIDE::RIGHT).data); #endif // Bind the hit value glUniform1f(glGetUniformLocation(shader.getProgramId(), "hit"), eye == ovrEye_Left ? hit : -hit); // Draw the ZED image glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0); } } } /* Note: Even if we don't ask to refresh the framebuffer or if the Camera::grab() doesn't catch a new frame, we have to submit an image to the Rift; it needs 75Hz refresh. Else there will be jumbs, black frames and/or glitches in the headset. */ ovrLayerHeader* layers = &ld.Header; // Submit the frame to the Oculus compositor // which will display the frame in the Oculus headset result = ovr_SubmitFrame(hmd, 0, &viewScaleDesc, &layers, 1); if (!OVR_SUCCESS(result)) { std::cout << "ERROR: failed to submit frame" << std::endl; glDeleteBuffers(3, rectVBO); ovr_DestroySwapTextureSet(hmd, ptextureSet); ovr_DestroyMirrorTexture(hmd, &mirrorTexture->Texture); ovr_Destroy(hmd); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; return -1; } // Copy the frame to the mirror buffer // which will be drawn in the SDL2 image glBindFramebuffer(GL_READ_FRAMEBUFFER, mirrorFBOID); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); GLint w = mirrorTexture->OGL.Header.TextureSize.w; GLint h = mirrorTexture->OGL.Header.TextureSize.h; glBlitFramebuffer(0, h, w, 0, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_NEAREST); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Swap the SDL2 window SDL_GL_SwapWindow(window); } // Disable all OpenGL buffer glDisableVertexAttribArray(Shader::ATTRIB_TEXTURE2D_POS); glDisableVertexAttribArray(Shader::ATTRIB_VERTICES_POS); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glBindBuffer(GL_ARRAY_BUFFER, 0); glBindTexture(GL_TEXTURE_2D, 0); glUseProgram(0); glBindVertexArray(0); // Delete the Vertex Buffer Objects of the rectangle glDeleteBuffers(3, rectVBO); // Delete SDL, OpenGL, Oculus and ZED context ovr_DestroySwapTextureSet(hmd, ptextureSet); ovr_DestroyMirrorTexture(hmd, &mirrorTexture->Texture); ovr_Destroy(hmd); ovr_Shutdown(); SDL_GL_DeleteContext(glContext); SDL_DestroyWindow(window); SDL_Quit(); delete zed; // quit return 0; }
int window_loop() { GLFWwindow* window; window = glfwCreateWindow(640, 480, "Shader test", NULL, NULL); if (!window) { glfwTerminate(); return 0; } glfwMakeContextCurrent(window); //glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_DISABLED); glfwSetKeyCallback(window, key_callback); //glfwSetCursorPosCallback(window, cursor_callback); //glEnable(GL_CULL_FACE); glEnable(GL_LIGHT0); //glEnable(GL_DEPTH_TEST); // glEnable(GL_LIGHTING); // glEnable(GL_BLEND); // glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_ADD); // glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_ZERO); // glEnable(GL_NORMALIZE); //glEnable(GL_NORMALIZE); glEnable(GL_TEXTURE_2D); //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); // glPolygonMode( GL_FRONT, GL_LINE ); // glPolygonMode( GL_BACK, GL_POINT ); // glEnable(GL_COLOR_MATERIAL); cudaGLSetGLDevice(0); double ot, nt = glfwGetTime(); GLuint textureID[6]; glGenTextures(1, textureID); png_bytep* tex1; int lw, lh; printf("Laddar PNG\n"); read_png_file("/srv/texturer/Slate Tiles - (Normal Map).png", &tex1, &lw, &lh); printf("Laddade textur som är %i x %i pixelitaz stor.\n", lw, lh); float3* normal_map = NULL; size_t normal_map_bufferSize = 1024 * 1024 * sizeof(float3); cudaMalloc( &normal_map, normal_map_bufferSize ); float3* host_normal_map = calloc(1024*1024, sizeof(float3)); glBindTexture(GL_TEXTURE_2D, textureID[0]); for (int y=0; y<1024; y++) { for (int x=0; x<1024; x++) { host_normal_map[y*1024+x].x = (float)(tex1[y][x*3+0]-127) / 127; host_normal_map[y*1024+x].y = (float)(tex1[y][x*3+1]-127) / 127; host_normal_map[y*1024+x].z = (float)(tex1[y][x*3+2]-127) / 127; } } cudaMemcpy(normal_map, host_normal_map, normal_map_bufferSize, cudaMemcpyHostToDevice); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA, 1024, 1024, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); double cx, cy; glfwGetCursorPos(window, &cx, &cy); struct cudaGraphicsResource *test1; int r1=cudaGraphicsGLRegisterImage(&test1, textureID[0], GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard); printf("r1=%i\n"); uchar4* g_dstBuffer = NULL; size_t bufferSize = 1024 * 1024 * sizeof(uchar4); cudaMalloc( &g_dstBuffer, bufferSize ); cudaMemset(g_dstBuffer, 0x7F, bufferSize); //Make texture gray to start with printf("cuda alloc: %p\n", g_dstBuffer); double fps_time =0 ; int fps_count=0; while (!glfwWindowShouldClose(window)) { ot=nt; nt =glfwGetTime(); float dt = nt - ot; fps_time += dt; fps_count++; if (fps_time > 1) { printf("FPS: %f\n", fps_count/fps_time); fps_time=0; fps_count =0; } int width, height; glfwGetFramebufferSize(window, &width, &height); glClearColor(0.0, 0.0, 0.1, 1.0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glViewport(0, 0, width-1, height-1); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0, width-1, height-1, 0,0,1); glMatrixMode(GL_MODELVIEW); for (int testa_flera=0; testa_flera<16; testa_flera++) { glLoadIdentity(); glTranslatef(testa_flera*150, testa_flera*50+100, 0); glRotatef(testa_flera*10, 0,0,1); glTranslatef(0, testa_flera*50, 0); glScalef(0.5, 0.5, 0.5); float ta = fmod(nt+testa_flera*0.2, M_PI*2.0); float tb = fmod(nt*0.7+testa_flera*0.4, M_PI*2.0); float tc = fmod(nt*0.3+testa_flera*0.1, M_PI*2.0); float3 cam_vec = {sin(ta), sin(tb), sin(tc)}; int res=cudaGraphicsMapResources(1, &test1, 0); //printf("res: %i (succ=%i)\n", res, cudaSuccess); struct cudaArray* dstArray = 0; int r2 = cudaGraphicsSubResourceGetMappedArray( &dstArray, test1, 0, 0 ); //printf("r2: %i array: %p\n", r2, dstArray); first_test(g_dstBuffer, normal_map, cam_vec, 1024, 1024); cudaMemcpyToArray( dstArray, 0, 0, g_dstBuffer, bufferSize, cudaMemcpyDeviceToDevice ); cudaGraphicsUnmapResources(1, &test1, 0); glColor3f(1,1,1); glBegin(GL_QUADS); glTexCoord2f(0,0); glVertex3f(0,0,0); glTexCoord2f(1,0); glVertex3f(511,0,0); glTexCoord2f(1,1); glVertex3f(511,511,0); glTexCoord2f(0,1); glVertex3f(0,511,0); glEnd(); } glfwSwapBuffers(window); glfwPollEvents(); } glfwDestroyWindow(window); glfwTerminate(); return(EXIT_SUCCESS); }
cudaArray* bind() { checkCudaErrors(cudaGraphicsMapResources(1, &resouce, 0)); checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&array, resouce, 0, 0)); return array; }
//////////////////////////////////////////////////////////////////////////////// //! Run the Cuda part of the computation //////////////////////////////////////////////////////////////////////////////// void RunKernels() { static float t = 0.0f; // populate the 2d texture { cudaArray *cuArray; cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_2d.cudaResource, 0, 0); getLastCudaError("cudaGraphicsSubResourceGetMappedArray (cuda_texture_2d) failed"); // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it cuda_texture_2d(g_texture_2d.cudaLinearMemory, g_texture_2d.width, g_texture_2d.height, g_texture_2d.pitch, t); getLastCudaError("cuda_texture_2d failed"); // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray cudaMemcpy2DToArray( cuArray, // dst array 0, 0, // offset g_texture_2d.cudaLinearMemory, g_texture_2d.pitch, // src g_texture_2d.width*4*sizeof(float), g_texture_2d.height, // extent cudaMemcpyDeviceToDevice); // kind getLastCudaError("cudaMemcpy2DToArray failed"); } // populate the volume texture { size_t pitchSlice = g_texture_vol.pitch * g_texture_vol.height; cudaArray *cuArray; cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_vol.cudaResource, 0, 0); getLastCudaError("cudaGraphicsSubResourceGetMappedArray (cuda_texture_3d) failed"); // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it cuda_texture_volume(g_texture_vol.cudaLinearMemory, g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth, g_texture_vol.pitch, pitchSlice, t); getLastCudaError("cuda_texture_3d failed"); // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray struct cudaMemcpy3DParms memcpyParams = {0}; memcpyParams.dstArray = cuArray; memcpyParams.srcPtr.ptr = g_texture_vol.cudaLinearMemory; memcpyParams.srcPtr.pitch = g_texture_vol.pitch; memcpyParams.srcPtr.xsize = g_texture_vol.width; memcpyParams.srcPtr.ysize = g_texture_vol.height; memcpyParams.extent.width = g_texture_vol.width; memcpyParams.extent.height = g_texture_vol.height; memcpyParams.extent.depth = g_texture_vol.depth; memcpyParams.kind = cudaMemcpyDeviceToDevice; cudaMemcpy3D(&memcpyParams); getLastCudaError("cudaMemcpy3D failed"); } // populate the faces of the cube map for (int face = 0; face < 6; ++face) { cudaArray *cuArray; cudaGraphicsSubResourceGetMappedArray(&cuArray, g_texture_cube.cudaResource, face, 0); getLastCudaError("cudaGraphicsSubResourceGetMappedArray (cuda_texture_cube) failed"); // kick off the kernel and send the staging buffer cudaLinearMemory as an argument to allow the kernel to write to it cuda_texture_cube(g_texture_cube.cudaLinearMemory, g_texture_cube.size, g_texture_cube.size, g_texture_cube.pitch, face, t); getLastCudaError("cuda_texture_cube failed"); // then we want to copy cudaLinearMemory to the D3D texture, via its mapped form : cudaArray cudaMemcpy2DToArray( cuArray, // dst array 0, 0, // offset g_texture_cube.cudaLinearMemory, g_texture_cube.pitch, // src g_texture_cube.size*4, g_texture_cube.size, // extent cudaMemcpyDeviceToDevice); // kind getLastCudaError("cudaMemcpy2DToArray failed"); } t += 0.1f; }