Esempio n. 1
0
cudaError_t WINAPI wine_cudaGraphicsGLRegisterBuffer( struct cudaGraphicsResource **resource, GLuint buffer, unsigned int Flags ) {
    WINE_TRACE("\n");
    return cudaGraphicsGLRegisterBuffer( resource, buffer, Flags );
}
Esempio n. 2
0
bool
CudaGLVertexBuffer::allocate() {

    int size = _numElements * _numVertices * sizeof(float);

    glGenBuffers(1, &_vbo);

#if defined(GL_EXT_direct_state_access)
    if (glNamedBufferDataEXT) {
        glNamedBufferDataEXT(_vbo, size, 0, GL_DYNAMIC_DRAW);
    } else {
#else
    {
#endif
        glBindBuffer(GL_ARRAY_BUFFER, _vbo);
        glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
        glBindBuffer(GL_ARRAY_BUFFER, 0);
    }

    // register vbo as cuda resource
    cudaError_t err = cudaGraphicsGLRegisterBuffer(
        &_cudaResource, _vbo, cudaGraphicsMapFlagsWriteDiscard);

    if (err != cudaSuccess) return false;
    return true;
}

void
CudaGLVertexBuffer::map() {

    if (_devicePtr) return;
    size_t num_bytes;
    void *ptr;

    cudaError_t err = cudaGraphicsMapResources(1, &_cudaResource, 0);
    if (err != cudaSuccess)
        Far::Error(Far::FAR_RUNTIME_ERROR,
                   "CudaGLVertexBuffer::map failed.\n%s\n",
                   cudaGetErrorString(err));
    err = cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource);
    if (err != cudaSuccess)
        Far::Error(Far::FAR_RUNTIME_ERROR,
                   "CudaGLVertexBuffer::map failed.\n%s\n",
                   cudaGetErrorString(err));
    _devicePtr = ptr;
}

void
CudaGLVertexBuffer::unmap() {

    if (_devicePtr == NULL) return;
    cudaError_t err = cudaGraphicsUnmapResources(1, &_cudaResource, 0);
    if (err != cudaSuccess)
       Far::Error(Far::FAR_RUNTIME_ERROR,
                  "CudaGLVertexBuffer::unmap failed.\n%s\n",
                  cudaGetErrorString(err));
    _devicePtr = NULL;
}


}  // end namespace Osd
void shutDown(unsigned char k, int /*x*/, int /*y*/)
{
    switch (k){
        case '\033':
        case 'q':
        case 'Q':
            printf("Shutting down...\n");
                cutilCheckError( cutStopTimer(hTimer)   );
                cutilCheckError( cutDeleteTimer(hTimer) );
				// DEPRECATED: cutilSafeCall( cudaGLRegisterBufferObject(gl_PBO) );
				cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO, 
									   cudaGraphicsMapFlagsWriteDiscard));
                glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
                glDeleteBuffers(1, &gl_PBO);
                glDeleteTextures(1, &gl_Tex);

                cutilSafeCall( CUDA_FreeArray() );
                free(h_Src);
            printf("Shutdown done.\n");

            cutilDeviceReset();
            exit(0);
        break;

        case '1':
            printf("Passthrough.\n");
            g_Kernel = 0;
        break;

        case '2':
            printf("KNN method \n");
            g_Kernel = 1;
        break;

        case '3':
            printf("NLM method\n");
            g_Kernel = 2;
        break;

        case '4':
            printf("Quick NLM(NLM2) method\n");
            g_Kernel = 3;
        break;

        case ' ':
            printf(g_Diag ? "LERP highlighting mode.\n" : "Normal mode.\n");
            g_Diag = !g_Diag;
        break;

        case 'n':
            printf("Decrease noise level.\n");
            knnNoise -= noiseStep;
            nlmNoise -= noiseStep;
        break;

        case 'N':
            printf("Increase noise level.\n");
            knnNoise += noiseStep;
            nlmNoise += noiseStep;
        break;

        case 'l':
            printf("Decrease LERP quotent.\n");
            lerpC = MAX(lerpC - lerpStep, 0.0f);
        break;

        case 'L':
            printf("Increase LERP quotent.\n");
            lerpC = MIN(lerpC + lerpStep, 1.0f);
        break;

        case 'f' : case 'F':
            g_FPS = true;
        break;

        case '?':
            printf("lerpC = %5.5f\n", lerpC);
            printf("knnNoise = %5.5f\n", knnNoise);
            printf("nlmNoise = %5.5f\n", nlmNoise);
        break;
    }
}
Esempio n. 4
0
int main()
{
	int width = 800;
	int height = 600;

	int mesh_width = 256;
	int mesh_height = 256;

	// Creation du device
	cutilSafeCall( cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ) );

	// Creation d'une fenetre
	C3::Window OpenGLWin;
	OpenGLWin.Create(C3::WindowMode(width,height),"CudaC3");
	
	// Glew init
	GLenum err = glewInit();
	if(err != GLEW_OK)
		std::cout << "Error on GLEW initialization.\n";


	// Configuration OpenGL
	glClearColor(0.0, 0.0, 0.0, 1.0);
	glDisable(GL_DEPTH_TEST);
	glViewport(0, 0, width, height);
	glMatrixMode(GL_PROJECTION);
    glLoadIdentity();
    gluPerspective(60.0, (GLfloat)width / (GLfloat) height, 0.1, 10.0);

	// VBO
	// *** Create
	GLuint vbo;
	glGenBuffers(1, &vbo);
	glBindBuffer(GL_ARRAY_BUFFER, vbo);
	// *** Initialize
	unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
	glBufferData(GL_ARRAY_BUFFER, size, 0, GL_DYNAMIC_DRAW);
	glBindBuffer(GL_ARRAY_BUFFER, 0);
	// *** Register in CUDA
	cudaGraphicsResource *cuda_vbo_resource = NULL;
	cutilSafeCall(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard));


	float g_fAnim = 0.f;
	int nbFrame = 0;
	float timeFPS = 0.f;
	while(OpenGLWin.IsOpened())
	{
		// Events
		C3::Event event;
		while(OpenGLWin.PoolEvent(event))
		{
                        //std::cout << "Event !" << std::endl;
			if(event.Type == C3::Event::Closed)
			{
				std::cout << "Close ... " << std::endl;
				OpenGLWin.Close();
			}
			else if(event.Type == C3::Event::KeyPressed)
			{
				if(event.Key.Code == C3::Key::Escape)
				{
					std::cout << "Close ... " << std::endl;
					OpenGLWin.Close();
				}
			}
		}

		// Mise a jour du temps
		g_fAnim += OpenGLWin.GetFrameTime() / 1000.f;
		timeFPS += OpenGLWin.GetFrameTime() / 1000.f;
		nbFrame++;
		if(timeFPS > 1.0f)
		{
			std::stringstream ss;
			ss << "CudaC3 [" << (int)ceil( nbFrame / timeFPS ) << " FPS]";
			OpenGLWin.SetTitle(ss.str());
			timeFPS = 0.f;
			nbFrame = 0;
		}
		
		// Draw the scene
		glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

		// Lancer le calcul CUDA
		// *** map OpenGL buffer object for writing from CUDA
		float4 *dptr;
		cutilSafeCall(cudaGraphicsMapResources(1, &cuda_vbo_resource, 0));
		size_t num_bytes; 
		cutilSafeCall(cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes, cuda_vbo_resource));
		// *** Run kernel
		runKernel(dptr, mesh_width, mesh_height,g_fAnim);
		cutilSafeCall( cutilDeviceSynchronize() );
		// *** Unmap
		cutilSafeCall(cudaGraphicsUnmapResources(1, &cuda_vbo_resource, 0));
		
		// OpenGL
		// *** Make some transformation
		glMatrixMode(GL_MODELVIEW);
		glLoadIdentity();
		glTranslatef(0.0, 0.0, -3.0);
		glRotatef(0.0, 1.0, 0.0, 0.0);
		glRotatef(0.0, 0.0, 1.0, 0.0);
		// *** Render VBO
		// --- Bind
		glBindBuffer(GL_ARRAY_BUFFER, vbo);
		glVertexPointer(4, GL_FLOAT, 0, 0);
		// --- Draw
		glEnableClientState(GL_VERTEX_ARRAY);
		glColor3f(1.0, 0.0, 0.0);
		glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
		glDisableClientState(GL_VERTEX_ARRAY);

		// Swap buffers
		OpenGLWin.Display();
	}

	// Liberation des ressources
	cudaGraphicsUnregisterResource(cuda_vbo_resource);
	
	glBindBuffer(1, vbo);
	glDeleteBuffers(1, &vbo);

	// Close device
	cutilDeviceReset();

	return 0;
}
Esempio n. 5
0
void BodySystemGPU<T>::_initialize(unsigned numBodies)
{
    assert(!m_bInitialized);

    m_numBodies = numBodies;

    unsigned int memSize = sizeof(T) * 4 * numBodies;

    m_deviceData = new DeviceData<T>[m_numDevices];

    // divide up the workload amongst Devices
    float *weights = new float[m_numDevices];
    int *numSms = new int[m_numDevices];
    float total = 0;

    for (unsigned int i = 0; i < m_numDevices; i++)
    {
        cudaDeviceProp props;
        checkCudaErrors(cudaGetDeviceProperties(&props, i));

        // Choose the weight based on the Compute Capability
        // We estimate that a CC2.0 SM is about 4.0x faster than a CC 1.x SM for
        // this application (since a 15-SM GF100 is about 2X faster than a 30-SM GT200).
        numSms[i] = props.multiProcessorCount;
        weights[i] = numSms[i] * (props.major >= 2 ? 4.f : 1.f);
        total += weights[i];

    }

    unsigned int offset = 0;
    unsigned int remaining = m_numBodies;

    for (unsigned int i = 0; i < m_numDevices; i++)
    {
        unsigned int count = (int)((weights[i] / total) * m_numBodies);
        unsigned int round = numSms[i] * 256;
        count = round * ((count + round - 1) / round);

        if (count > remaining)
        {
            count = remaining;
        }

        remaining -= count;
        m_deviceData[i].offset = offset;
        m_deviceData[i].numBodies = count;
        offset += count;

        if ((i == m_numDevices - 1) && (offset < m_numBodies-1))
        {
            m_deviceData[i].numBodies += m_numBodies - offset;
        }
    }

    delete [] weights;
    delete [] numSms;

    if (m_bUseSysMem)
    {
        checkCudaErrors(cudaHostAlloc((void **)&m_hPos[0], memSize, cudaHostAllocMapped | cudaHostAllocPortable));
        checkCudaErrors(cudaHostAlloc((void **)&m_hPos[1], memSize, cudaHostAllocMapped | cudaHostAllocPortable));
        checkCudaErrors(cudaHostAlloc((void **)&m_hVel,    memSize, cudaHostAllocMapped | cudaHostAllocPortable));

        memset(m_hPos[0], 0, memSize);
        memset(m_hPos[1], 0, memSize);
        memset(m_hVel, 0, memSize);

        for (unsigned int i = 0; i < m_numDevices; i++)
        {
            if (m_numDevices > 1)
            {
                checkCudaErrors(cudaSetDevice(i));
            }

            checkCudaErrors(cudaEventCreate(&m_deviceData[i].event));
            checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dPos[0], (void *)m_hPos[0], 0));
            checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dPos[1], (void *)m_hPos[1], 0));
            checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dVel, (void *)m_hVel, 0));
        }
    }
    else
    {
        m_hPos[0] = new T[m_numBodies*4];
        m_hVel = new T[m_numBodies*4];

        memset(m_hPos[0], 0, memSize);
        memset(m_hVel, 0, memSize);

        checkCudaErrors(cudaEventCreate(&m_deviceData[0].event));

        if (m_bUsePBO)
        {
            // create the position pixel buffer objects for rendering
            // we will actually compute directly from this memory in CUDA too
            glGenBuffers(2, (GLuint *)m_pbo);

            for (int i = 0; i < 2; ++i)
            {
                glBindBuffer(GL_ARRAY_BUFFER, m_pbo[i]);
                glBufferData(GL_ARRAY_BUFFER, memSize, m_hPos[0], GL_DYNAMIC_DRAW);

                int size = 0;
                glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);

                if ((unsigned)size != memSize)
                {
                    fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n");
                }

                glBindBuffer(GL_ARRAY_BUFFER, 0);
                checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_pGRes[i],
                                                             m_pbo[i],
                                                             cudaGraphicsMapFlagsNone));
            }
        }
        else
        {
            checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[0], memSize));
            checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[1], memSize));
        }

        checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dVel, memSize));
    }

    m_bInitialized = true;
}
Esempio n. 6
0
/*
	GL interop test.
	Julia.
*/
TEST(GLInteropTest, Julia)
{
	try
	{
		const int width = 1280;
		const int height = 720;
		const int bufWidth = 1920;
		const int bufHeight = 1080;

		// ------------------------------------------------------------

		GLTestWindow window(width, height, true);
		GLContextParam param;

		param.DebugMode = true;
		param.Multisample = 8;

		GLContext context(window.Handle(), param);
		GLUtil::EnableDebugOutput(GLUtil::DebugOutputFrequencyLow);

		// ------------------------------------------------------------

		// Choose device
		cudaDeviceProp prop;
		memset(&prop, 0, sizeof(cudaDeviceProp));
		prop.major = 2;
		prop.minor = 0;

		int devID;
		HandleCudaError(cudaChooseDevice(&devID, &prop));
		HandleCudaError(cudaGLSetGLDevice(devID));

		// Get properties
		HandleCudaError(cudaGetDeviceProperties(&prop, devID));

		// Create texture and PBO
		GLTexture2D texture;
		texture.SetMagFilter(GL_LINEAR);
		texture.SetMinFilter(GL_LINEAR);
		texture.SetWrap(GL_CLAMP_TO_EDGE);
		texture.Allocate(bufWidth, bufHeight, GL_RGBA8);
		
		GLPixelUnpackBuffer pbo;
		pbo.Allocate(bufWidth * bufHeight * 4, NULL, GL_DYNAMIC_DRAW);

		// Register
		cudaGraphicsResource* cudaPbo;
		HandleCudaError(cudaGraphicsGLRegisterBuffer(&cudaPbo, pbo.ID(), cudaGraphicsMapFlagsWriteDiscard));

		// ------------------------------------------------------------

		GLShader shader;
		shader.Compile("../resources/texturetest_simple2d.vert");
		shader.Compile("../resources/texturetest_simple2d.frag");
		shader.Link();

		GLVertexArray vao;
		GLVertexBuffer positionVbo;
		GLIndexBuffer ibo;

		glm::vec3 v[] =
		{
			glm::vec3( 1.0f,  1.0f, 0.0f),
			glm::vec3(-1.0f,  1.0f, 0.0f),
			glm::vec3(-1.0f, -1.0f, 0.0f),
			glm::vec3( 1.0f, -1.0f, 0.0f)
		};

		GLuint i[] =
		{
			0, 1, 2,
			2, 3, 0
		};

		positionVbo.AddStatic(12, &v[0].x);
		vao.Add(GLDefaultVertexAttribute::Position, &positionVbo);
		ibo.AddStatic(6, i);

		// ------------------------------------------------------------

		double fps = 0.0;
		double timeSum = 0.0;
		double prevTime = GLTestUtil::CurrentTimeMilli();
		int frameCount = 0;

		double start = GLTestUtil::CurrentTimeMilli();
		float xcparam = -0.8f;
		float ycparam = 0.165f;
		float inc = 0.001f;

		while (window.ProcessEvent())
		{
			// ------------------------------------------------------------

			double currentTime = GLTestUtil::CurrentTimeMilli();
			double elapsedTime = currentTime - prevTime;

			timeSum += elapsedTime;
			frameCount++;

			if (frameCount >= 13)
			{
				fps = 1000.0 * 13.0 / timeSum;
				timeSum = 0.0;
				frameCount = 0;
			}

			prevTime = currentTime;
			window.SetTitle((boost::format("GLInteropTest_Julia [FPS %.1f]") % fps).str());

			// ------------------------------------------------------------

			double elapsed = GLTestUtil::CurrentTimeMilli() - start;
			if (elapsed >= 1000.0)
			{
				break;
			}

			xcparam += inc;
			if (xcparam > -0.799f || xcparam < -0.811f) 
			{
				inc *= -1.0f;
			}

			// ------------------------------------------------------------

			HandleCudaError(cudaGraphicsMapResources(1, &cudaPbo, NULL));
			
			// Get device pointer
			uchar4* devPtr;
			size_t bufferSize;
			HandleCudaError(cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &bufferSize, cudaPbo));
			Run_GLInteropTestJuliaKernel(bufWidth, bufHeight, prop.multiProcessorCount, xcparam, ycparam, devPtr);
			HandleCudaError(cudaGraphicsUnmapResources(1, &cudaPbo, NULL));

			texture.Replace(&pbo, glm::ivec4(0, 0, bufWidth, bufHeight), GL_RGBA, GL_UNSIGNED_BYTE);

			// ------------------------------------------------------------

			glClearBufferfv(GL_COLOR, 0, glm::value_ptr(glm::vec4(0.0f)));
			glViewportIndexedfv(0, glm::value_ptr(glm::vec4(0, 0, width, height)));

			shader.Begin();
			shader.SetUniform("tex", 0);
			texture.Bind();
			vao.Draw(GL_TRIANGLES, &ibo);
			texture.Unbind();
			shader.End();

			context.SwapBuffers();
		}

		cudaDeviceReset();
	}
	catch (const GLException& e)
	{
		FAIL() << GLTestUtil::PrintGLException(e);
	}
}
Esempio n. 7
0
OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) :
    OsdGpuVertexBuffer(numElements, numVertices) {

    // register vbo as cuda resource
    cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone);
}