cudaError_t WINAPI wine_cudaGraphicsGLRegisterBuffer( struct cudaGraphicsResource **resource, GLuint buffer, unsigned int Flags ) { WINE_TRACE("\n"); return cudaGraphicsGLRegisterBuffer( resource, buffer, Flags ); }
bool CudaGLVertexBuffer::allocate() { int size = _numElements * _numVertices * sizeof(float); glGenBuffers(1, &_vbo); #if defined(GL_EXT_direct_state_access) if (glNamedBufferDataEXT) { glNamedBufferDataEXT(_vbo, size, 0, GL_DYNAMIC_DRAW); } else { #else { #endif glBindBuffer(GL_ARRAY_BUFFER, _vbo); glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } // register vbo as cuda resource cudaError_t err = cudaGraphicsGLRegisterBuffer( &_cudaResource, _vbo, cudaGraphicsMapFlagsWriteDiscard); if (err != cudaSuccess) return false; return true; } void CudaGLVertexBuffer::map() { if (_devicePtr) return; size_t num_bytes; void *ptr; cudaError_t err = cudaGraphicsMapResources(1, &_cudaResource, 0); if (err != cudaSuccess) Far::Error(Far::FAR_RUNTIME_ERROR, "CudaGLVertexBuffer::map failed.\n%s\n", cudaGetErrorString(err)); err = cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource); if (err != cudaSuccess) Far::Error(Far::FAR_RUNTIME_ERROR, "CudaGLVertexBuffer::map failed.\n%s\n", cudaGetErrorString(err)); _devicePtr = ptr; } void CudaGLVertexBuffer::unmap() { if (_devicePtr == NULL) return; cudaError_t err = cudaGraphicsUnmapResources(1, &_cudaResource, 0); if (err != cudaSuccess) Far::Error(Far::FAR_RUNTIME_ERROR, "CudaGLVertexBuffer::unmap failed.\n%s\n", cudaGetErrorString(err)); _devicePtr = NULL; } } // end namespace Osd
void shutDown(unsigned char k, int /*x*/, int /*y*/) { switch (k){ case '\033': case 'q': case 'Q': printf("Shutting down...\n"); cutilCheckError( cutStopTimer(hTimer) ); cutilCheckError( cutDeleteTimer(hTimer) ); // DEPRECATED: cutilSafeCall( cudaGLRegisterBufferObject(gl_PBO) ); cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO, cudaGraphicsMapFlagsWriteDiscard)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0); glDeleteBuffers(1, &gl_PBO); glDeleteTextures(1, &gl_Tex); cutilSafeCall( CUDA_FreeArray() ); free(h_Src); printf("Shutdown done.\n"); cutilDeviceReset(); exit(0); break; case '1': printf("Passthrough.\n"); g_Kernel = 0; break; case '2': printf("KNN method \n"); g_Kernel = 1; break; case '3': printf("NLM method\n"); g_Kernel = 2; break; case '4': printf("Quick NLM(NLM2) method\n"); g_Kernel = 3; break; case ' ': printf(g_Diag ? "LERP highlighting mode.\n" : "Normal mode.\n"); g_Diag = !g_Diag; break; case 'n': printf("Decrease noise level.\n"); knnNoise -= noiseStep; nlmNoise -= noiseStep; break; case 'N': printf("Increase noise level.\n"); knnNoise += noiseStep; nlmNoise += noiseStep; break; case 'l': printf("Decrease LERP quotent.\n"); lerpC = MAX(lerpC - lerpStep, 0.0f); break; case 'L': printf("Increase LERP quotent.\n"); lerpC = MIN(lerpC + lerpStep, 1.0f); break; case 'f' : case 'F': g_FPS = true; break; case '?': printf("lerpC = %5.5f\n", lerpC); printf("knnNoise = %5.5f\n", knnNoise); printf("nlmNoise = %5.5f\n", nlmNoise); break; } }
int main() { int width = 800; int height = 600; int mesh_width = 256; int mesh_height = 256; // Creation du device cutilSafeCall( cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ) ); // Creation d'une fenetre C3::Window OpenGLWin; OpenGLWin.Create(C3::WindowMode(width,height),"CudaC3"); // Glew init GLenum err = glewInit(); if(err != GLEW_OK) std::cout << "Error on GLEW initialization.\n"; // Configuration OpenGL glClearColor(0.0, 0.0, 0.0, 1.0); glDisable(GL_DEPTH_TEST); glViewport(0, 0, width, height); glMatrixMode(GL_PROJECTION); glLoadIdentity(); gluPerspective(60.0, (GLfloat)width / (GLfloat) height, 0.1, 10.0); // VBO // *** Create GLuint vbo; glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); // *** Initialize unsigned int size = mesh_width * mesh_height * 4 * sizeof(float); glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); // *** Register in CUDA cudaGraphicsResource *cuda_vbo_resource = NULL; cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard)); float g_fAnim = 0.f; int nbFrame = 0; float timeFPS = 0.f; while(OpenGLWin.IsOpened()) { // Events C3::Event event; while(OpenGLWin.PoolEvent(event)) { //std::cout << "Event !" << std::endl; if(event.Type == C3::Event::Closed) { std::cout << "Close ... " << std::endl; OpenGLWin.Close(); } else if(event.Type == C3::Event::KeyPressed) { if(event.Key.Code == C3::Key::Escape) { std::cout << "Close ... " << std::endl; OpenGLWin.Close(); } } } // Mise a jour du temps g_fAnim += OpenGLWin.GetFrameTime() / 1000.f; timeFPS += OpenGLWin.GetFrameTime() / 1000.f; nbFrame++; if(timeFPS > 1.0f) { std::stringstream ss; ss << "CudaC3 [" << (int)ceil( nbFrame / timeFPS ) << " FPS]"; OpenGLWin.SetTitle(ss.str()); timeFPS = 0.f; nbFrame = 0; } // Draw the scene glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); // Lancer le calcul CUDA // *** map OpenGL buffer object for writing from CUDA float4 *dptr; cutilSafeCall(cudaGraphicsMapResources(1, &cuda_vbo_resource, 0)); size_t num_bytes; cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes, cuda_vbo_resource)); // *** Run kernel runKernel(dptr, mesh_width, mesh_height,g_fAnim); cutilSafeCall( cutilDeviceSynchronize() ); // *** Unmap cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0)); // OpenGL // *** Make some transformation glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glTranslatef(0.0, 0.0, -3.0); glRotatef(0.0, 1.0, 0.0, 0.0); glRotatef(0.0, 0.0, 1.0, 0.0); // *** Render VBO // --- Bind glBindBuffer(GL_ARRAY_BUFFER, vbo); glVertexPointer(4, GL_FLOAT, 0, 0); // --- Draw glEnableClientState(GL_VERTEX_ARRAY); glColor3f(1.0, 0.0, 0.0); glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height); glDisableClientState(GL_VERTEX_ARRAY); // Swap buffers OpenGLWin.Display(); } // Liberation des ressources cudaGraphicsUnregisterResource(cuda_vbo_resource); glBindBuffer(1, vbo); glDeleteBuffers(1, &vbo); // Close device cutilDeviceReset(); return 0; }
void BodySystemGPU<T>::_initialize(unsigned numBodies) { assert(!m_bInitialized); m_numBodies = numBodies; unsigned int memSize = sizeof(T) * 4 * numBodies; m_deviceData = new DeviceData<T>[m_numDevices]; // divide up the workload amongst Devices float *weights = new float[m_numDevices]; int *numSms = new int[m_numDevices]; float total = 0; for (unsigned int i = 0; i < m_numDevices; i++) { cudaDeviceProp props; checkCudaErrors(cudaGetDeviceProperties(&props, i)); // Choose the weight based on the Compute Capability // We estimate that a CC2.0 SM is about 4.0x faster than a CC 1.x SM for // this application (since a 15-SM GF100 is about 2X faster than a 30-SM GT200). numSms[i] = props.multiProcessorCount; weights[i] = numSms[i] * (props.major >= 2 ? 4.f : 1.f); total += weights[i]; } unsigned int offset = 0; unsigned int remaining = m_numBodies; for (unsigned int i = 0; i < m_numDevices; i++) { unsigned int count = (int)((weights[i] / total) * m_numBodies); unsigned int round = numSms[i] * 256; count = round * ((count + round - 1) / round); if (count > remaining) { count = remaining; } remaining -= count; m_deviceData[i].offset = offset; m_deviceData[i].numBodies = count; offset += count; if ((i == m_numDevices - 1) && (offset < m_numBodies-1)) { m_deviceData[i].numBodies += m_numBodies - offset; } } delete [] weights; delete [] numSms; if (m_bUseSysMem) { checkCudaErrors(cudaHostAlloc((void **)&m_hPos[0], memSize, cudaHostAllocMapped | cudaHostAllocPortable)); checkCudaErrors(cudaHostAlloc((void **)&m_hPos[1], memSize, cudaHostAllocMapped | cudaHostAllocPortable)); checkCudaErrors(cudaHostAlloc((void **)&m_hVel, memSize, cudaHostAllocMapped | cudaHostAllocPortable)); memset(m_hPos[0], 0, memSize); memset(m_hPos[1], 0, memSize); memset(m_hVel, 0, memSize); for (unsigned int i = 0; i < m_numDevices; i++) { if (m_numDevices > 1) { checkCudaErrors(cudaSetDevice(i)); } checkCudaErrors(cudaEventCreate(&m_deviceData[i].event)); checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dPos[0], (void *)m_hPos[0], 0)); checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dPos[1], (void *)m_hPos[1], 0)); checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dVel, (void *)m_hVel, 0)); } } else { m_hPos[0] = new T[m_numBodies*4]; m_hVel = new T[m_numBodies*4]; memset(m_hPos[0], 0, memSize); memset(m_hVel, 0, memSize); checkCudaErrors(cudaEventCreate(&m_deviceData[0].event)); if (m_bUsePBO) { // create the position pixel buffer objects for rendering // we will actually compute directly from this memory in CUDA too glGenBuffers(2, (GLuint *)m_pbo); for (int i = 0; i < 2; ++i) { glBindBuffer(GL_ARRAY_BUFFER, m_pbo[i]); glBufferData(GL_ARRAY_BUFFER, memSize, m_hPos[0], GL_DYNAMIC_DRAW); int size = 0; glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size); if ((unsigned)size != memSize) { fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n"); } glBindBuffer(GL_ARRAY_BUFFER, 0); checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_pGRes[i], m_pbo[i], cudaGraphicsMapFlagsNone)); } } else { checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[0], memSize)); checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[1], memSize)); } checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dVel, memSize)); } m_bInitialized = true; }
/* GL interop test. Julia. */ TEST(GLInteropTest, Julia) { try { const int width = 1280; const int height = 720; const int bufWidth = 1920; const int bufHeight = 1080; // ------------------------------------------------------------ GLTestWindow window(width, height, true); GLContextParam param; param.DebugMode = true; param.Multisample = 8; GLContext context(window.Handle(), param); GLUtil::EnableDebugOutput(GLUtil::DebugOutputFrequencyLow); // ------------------------------------------------------------ // Choose device cudaDeviceProp prop; memset(&prop, 0, sizeof(cudaDeviceProp)); prop.major = 2; prop.minor = 0; int devID; HandleCudaError(cudaChooseDevice(&devID, &prop)); HandleCudaError(cudaGLSetGLDevice(devID)); // Get properties HandleCudaError(cudaGetDeviceProperties(&prop, devID)); // Create texture and PBO GLTexture2D texture; texture.SetMagFilter(GL_LINEAR); texture.SetMinFilter(GL_LINEAR); texture.SetWrap(GL_CLAMP_TO_EDGE); texture.Allocate(bufWidth, bufHeight, GL_RGBA8); GLPixelUnpackBuffer pbo; pbo.Allocate(bufWidth * bufHeight * 4, NULL, GL_DYNAMIC_DRAW); // Register cudaGraphicsResource* cudaPbo; HandleCudaError(cudaGraphicsGLRegisterBuffer(&cudaPbo, pbo.ID(), cudaGraphicsMapFlagsWriteDiscard)); // ------------------------------------------------------------ GLShader shader; shader.Compile("../resources/texturetest_simple2d.vert"); shader.Compile("../resources/texturetest_simple2d.frag"); shader.Link(); GLVertexArray vao; GLVertexBuffer positionVbo; GLIndexBuffer ibo; glm::vec3 v[] = { glm::vec3( 1.0f, 1.0f, 0.0f), glm::vec3(-1.0f, 1.0f, 0.0f), glm::vec3(-1.0f, -1.0f, 0.0f), glm::vec3( 1.0f, -1.0f, 0.0f) }; GLuint i[] = { 0, 1, 2, 2, 3, 0 }; positionVbo.AddStatic(12, &v[0].x); vao.Add(GLDefaultVertexAttribute::Position, &positionVbo); ibo.AddStatic(6, i); // ------------------------------------------------------------ double fps = 0.0; double timeSum = 0.0; double prevTime = GLTestUtil::CurrentTimeMilli(); int frameCount = 0; double start = GLTestUtil::CurrentTimeMilli(); float xcparam = -0.8f; float ycparam = 0.165f; float inc = 0.001f; while (window.ProcessEvent()) { // ------------------------------------------------------------ double currentTime = GLTestUtil::CurrentTimeMilli(); double elapsedTime = currentTime - prevTime; timeSum += elapsedTime; frameCount++; if (frameCount >= 13) { fps = 1000.0 * 13.0 / timeSum; timeSum = 0.0; frameCount = 0; } prevTime = currentTime; window.SetTitle((boost::format("GLInteropTest_Julia [FPS %.1f]") % fps).str()); // ------------------------------------------------------------ double elapsed = GLTestUtil::CurrentTimeMilli() - start; if (elapsed >= 1000.0) { break; } xcparam += inc; if (xcparam > -0.799f || xcparam < -0.811f) { inc *= -1.0f; } // ------------------------------------------------------------ HandleCudaError(cudaGraphicsMapResources(1, &cudaPbo, NULL)); // Get device pointer uchar4* devPtr; size_t bufferSize; HandleCudaError(cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &bufferSize, cudaPbo)); Run_GLInteropTestJuliaKernel(bufWidth, bufHeight, prop.multiProcessorCount, xcparam, ycparam, devPtr); HandleCudaError(cudaGraphicsUnmapResources(1, &cudaPbo, NULL)); texture.Replace(&pbo, glm::ivec4(0, 0, bufWidth, bufHeight), GL_RGBA, GL_UNSIGNED_BYTE); // ------------------------------------------------------------ glClearBufferfv(GL_COLOR, 0, glm::value_ptr(glm::vec4(0.0f))); glViewportIndexedfv(0, glm::value_ptr(glm::vec4(0, 0, width, height))); shader.Begin(); shader.SetUniform("tex", 0); texture.Bind(); vao.Draw(GL_TRIANGLES, &ibo); texture.Unbind(); shader.End(); context.SwapBuffers(); } cudaDeviceReset(); } catch (const GLException& e) { FAIL() << GLTestUtil::PrintGLException(e); } }
OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) : OsdGpuVertexBuffer(numElements, numVertices) { // register vbo as cuda resource cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone); }