HRESULT CudaVideoRender::reinitCudaResources()
	{
		// Free resources
		cleanup(false);

		// Reinit VideoSource and Frame Queue
		m_bIsProgressive = loadVideoSource(m_sFileName, m_nVideoWidth, m_nVideoHeight, m_nWindowWidth, m_nWindowHeight );
		//m_bIsProgressive = loadVideoSource(m_sFileName.c_str(), m_nVideoWidth, m_nVideoHeight, m_nWindowWidth, m_nWindowHeight );

		/////////////////Change///////////////////////////
		initCudaVideo     ( );
		/////////////////////////////////////////

		return S_OK;
	}
Exemplo n.º 2
0
HRESULT reinitCudaResources()
{
    // Free resources
    cleanup(false);

    // Reinit VideoSource and Frame Queue
    g_bIsProgressive = loadVideoSource(sFileName.c_str(),
                                       g_nVideoWidth, g_nVideoHeight,
                                       g_nWindowWidth, g_nWindowHeight);

    /////////////////Change///////////////////////////
    initCudaVideo();
    initD3D9Surface(g_pVideoDecoder->targetWidth(),
                    g_pVideoDecoder->targetHeight());
    /////////////////////////////////////////

    return S_OK;
}
Exemplo n.º 3
0
HRESULT initCudaResources(int argc, char **argv, int bUseInterop, int bTCC)
{
    HRESULT hr = S_OK;

    CUdevice cuda_device;

    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
    {
        cuda_device = getCmdLineArgumentInt(argc, (const char **) argv, "device");

        // If interop is disabled, then we need to create a CUDA context w/o the GL context
        if (bUseInterop && !bTCC)
        {
            cuda_device = findCudaDeviceDRV(argc, (const char **)argv);
        }
        else
        {
            cuda_device = findCudaGLDeviceDRV(argc, (const char **)argv);
        }

        if (cuda_device < 0)
        {
            printf("No CUDA Capable devices found, exiting...\n");
            exit(EXIT_SUCCESS);
        }

        checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
    }
    else
    {
        // If we want to use Graphics Interop, then choose the GPU that is capable
        if (bUseInterop)
        {
            cuda_device = gpuGetMaxGflopsGLDeviceIdDRV();
            checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
        }
        else
        {
            cuda_device = gpuGetMaxGflopsDeviceIdDRV();
            checkCudaErrors(cuDeviceGet(&g_oDevice, cuda_device));
        }
    }

    // get compute capabilities and the devicename
    int major, minor;
    size_t totalGlobalMem;
    char deviceName[256];
    checkCudaErrors(cuDeviceComputeCapability(&major, &minor, g_oDevice));
    checkCudaErrors(cuDeviceGetName(deviceName, 256, g_oDevice));
    printf("> Using GPU Device %d: %s has SM %d.%d compute capability\n", cuda_device, deviceName, major, minor);

    checkCudaErrors(cuDeviceTotalMem(&totalGlobalMem, g_oDevice));
    printf("  Total amount of global memory:     %4.4f MB\n", (float)totalGlobalMem/(1024*1024));

    // Create CUDA Device w/ D3D9 interop (if WDDM), otherwise CUDA w/o interop (if TCC)
    // (use CU_CTX_BLOCKING_SYNC for better CPU synchronization)
    if (bUseInterop)
    {
        checkCudaErrors(cuD3D9CtxCreate(&g_oContext, &g_oDevice, CU_CTX_BLOCKING_SYNC, g_pD3DDevice));
    }
    else
    {
        checkCudaErrors(cuCtxCreate(&g_oContext, CU_CTX_BLOCKING_SYNC, g_oDevice));
    }

    try
    {
        // Initialize CUDA releated Driver API (32-bit or 64-bit), depending the platform running
        if (sizeof(void *) == 4)
        {
            g_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_Win32.ptx", exec_path, 2, 2, 2);
        }
        else
        {
            g_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_x64.ptx", exec_path, 2, 2, 2);
        }
    }
    catch (char const *p_file)
    {
        // If the CUmoduleManager constructor fails to load the PTX file, it will throw an exception
        printf("\n>> CUmoduleManager::Exception!  %s not found!\n", p_file);
        printf(">> Please rebuild NV12ToARGB_drvapi.cu or re-install this sample.\n");
        return E_FAIL;
    }

    g_pCudaModule->GetCudaFunction("NV12ToARGB_drvapi",   &gfpNV12toARGB);
    g_pCudaModule->GetCudaFunction("Passthru_drvapi",     &gfpPassthru);

    /////////////////Change///////////////////////////
    // Now we create the CUDA resources and the CUDA decoder context
    initCudaVideo();

    if (bUseInterop)
    {
        initD3D9Surface(g_pVideoDecoder->targetWidth(),
                        g_pVideoDecoder->targetHeight());
    }
    else
    {
        checkCudaErrors(cuMemAlloc(&g_pInteropFrame[0], g_pVideoDecoder->targetWidth() * g_pVideoDecoder->targetHeight() * 2));
        checkCudaErrors(cuMemAlloc(&g_pInteropFrame[1], g_pVideoDecoder->targetWidth() * g_pVideoDecoder->targetHeight() * 2));
    }

    CUcontext cuCurrent = NULL;
    CUresult result = cuCtxPopCurrent(&cuCurrent);

    if (result != CUDA_SUCCESS)
    {
        printf("cuCtxPopCurrent: %d\n", result);
        assert(0);
    }

    /////////////////////////////////////////
    return ((g_pCudaModule && g_pVideoDecoder && (g_pImageDX || g_pInteropFrame[0])) ? S_OK : E_FAIL);
}
	HRESULT CudaVideoRender::initCudaResources(int bUseInterop, int bTCC)
	{
		HRESULT hr = S_OK;

		CUdevice cuda_device;
		{
			// If we want to use Graphics Interop, then choose the GPU that is capable
			if (bUseInterop) {
				cuda_device = cutilDrvGetMaxGflopsGraphicsDeviceId();
				cutilDrvSafeCallNoSync(cuDeviceGet(&m_cuDevice, cuda_device ));
			} else {
				cuda_device = cutilDrvGetMaxGflopsDeviceId();
				cutilDrvSafeCallNoSync(cuDeviceGet(&m_cuDevice, cuda_device ));
			}
		}

		// get compute capabilities and the devicename
		int major, minor;
		size_t totalGlobalMem;
		char deviceName[256];
		cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, m_cuDevice) );
		cutilDrvSafeCallNoSync( cuDeviceGetName(deviceName, 256, m_cuDevice) );
		printf("> Using GPU Device %d: %s has SM %d.%d compute capability\n", cuda_device, deviceName, major, minor);

		cutilDrvSafeCallNoSync( cuDeviceTotalMem(&totalGlobalMem, m_cuDevice) );
		printf("  Total amount of global memory:     %4.4f MB\n", (float)totalGlobalMem/(1024*1024) );

		// Create CUDA Device w/ D3D9 interop (if WDDM), otherwise CUDA w/o interop (if TCC)
		// (use CU_CTX_BLOCKING_SYNC for better CPU synchronization)
		if (bUseInterop) {
			cutilDrvSafeCallNoSync( cuD3D9CtxCreate(&m_cuContext, &m_cuDevice, CU_CTX_BLOCKING_SYNC, m_pRenderer9->getDevice()) );
		} else {
			cutilDrvSafeCallNoSync( cuCtxCreate(&m_cuContext, CU_CTX_BLOCKING_SYNC, m_cuDevice) );
		}

		// Initialize CUDA releated Driver API (32-bit or 64-bit), depending the platform running
		if (sizeof(void *) == 4) {
			m_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_Win32.ptx", "./", 2, 2, 2);
		} else {
			m_pCudaModule = new CUmoduleManager("NV12ToARGB_drvapi_x64.ptx", "./", 2, 2, 2);
		}

		m_pCudaModule->GetCudaFunction("NV12ToARGB_drvapi",   &m_fpNV12toARGB);
		m_pCudaModule->GetCudaFunction("Passthru_drvapi",     &m_fpPassthru);

		/////////////////Change///////////////////////////
		// Now we create the CUDA resources and the CUDA decoder context
		initCudaVideo();

		if (bUseInterop) {
			//initD3D9Surface   ( m_pVideoDecoder->targetWidth(), 
			//					m_pVideoDecoder->targetHeight() );
		} else {
			cutilDrvSafeCallNoSync( cuMemAlloc(&m_pInteropFrame[0], m_pVideoDecoder->targetWidth() * m_pVideoDecoder->targetHeight() * 2) );
			cutilDrvSafeCallNoSync( cuMemAlloc(&m_pInteropFrame[1], m_pVideoDecoder->targetWidth() * m_pVideoDecoder->targetHeight() * 2) );
		}

		CUcontext cuCurrent = NULL;
		CUresult result = cuCtxPopCurrent(&cuCurrent);
		if (result != CUDA_SUCCESS) {
			printf("cuCtxPopCurrent: %d\n", result);
			assert(0);
		}

		/////////////////////////////////////////
		return ((m_pCudaModule && m_pVideoDecoder) ? S_OK : E_FAIL);
	}