Ejemplo n.º 1
0
Archivo: glcu.cpp Proyecto: Answeror/cg
void glcu::init_cuda()
{
    cudaDeviceProp prop = {0};
    int dev;
    prop.major = 1;
    prop.minor = 0;
    HANDLE_ERROR(cudaChooseDevice(&dev, &prop));
    HANDLE_ERROR(cudaGLSetGLDevice(dev));
}
	void CGLUtil::setCudaDeviceForGLInteroperation() {
		cudaDeviceProp  sProp;
		memset( &sProp, 0, sizeof( cudaDeviceProp ) );
		sProp.major = 1;
		sProp.minor = 0;
		int nDev;
		cudaSafeCall( cudaChooseDevice( &nDev, &sProp ) );
		// tell CUDA which nDev we will be using for graphic interop
		// from the programming guide:  Interoperability with OpenGL
		//     requires that the CUDA nDeviceNO_ be specified by
		//     cudaGLSetGLDevice() before any other runtime calls.
		//cudaSafeCall( cudaGLSetGLDevice( nDev ) ;

		return;
	}//setCudaDeviceForGLInteroperation()
Ejemplo n.º 3
0
int main(int argc, char **argv)
{
    uchar *h_Data;
    uint  *h_HistogramCPU, *h_HistogramGPU;
    uchar *d_Data;
    uint  *d_Histogram;
    uint hTimer;
    int PassFailFlag = 1;
    uint byteCount = 64 * 1048576;
    uint uiSizeMult = 1;

    cudaDeviceProp deviceProp;
    deviceProp.major = 0;
    deviceProp.minor = 0;
    int dev;

	shrQAStart(argc, argv);

	// set logfile name and start logs
    shrSetLogFileName ("histogram.txt");

    //Use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if( shrCheckCmdLineFlag(argc, (const char**)argv, "device") ) {
        dev = cutilDeviceInit(argc, argv);
        if (dev < 0) {
           printf("No CUDA Capable Devices found, exiting...\n");
           shrQAFinishExit(argc, (const char **)argv, QA_WAIVED);
        }
    } else {
        cudaSetDevice( dev = cutGetMaxGflopsDeviceId() );
        cutilSafeCall( cudaChooseDevice(&dev, &deviceProp) );
    }
    cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev) );

	printf("CUDA device [%s] has %d Multi-Processors, Compute %d.%d\n", 
		deviceProp.name, deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);

	int version = deviceProp.major * 0x10 + deviceProp.minor;

	if(version < 0x11) 
    {
        printf("There is no device supporting a minimum of CUDA compute capability 1.1 for this SDK sample\n");
        cutilDeviceReset();
		shrQAFinishExit(argc, (const char **)argv, QA_WAIVED);
    }

    cutilCheckError(cutCreateTimer(&hTimer));

    // Optional Command-line multiplier to increase size of array to histogram
    if (shrGetCmdLineArgumentu(argc, (const char**)argv, "sizemult", &uiSizeMult))
    {
        uiSizeMult = CLAMP(uiSizeMult, 1, 10);
        byteCount *= uiSizeMult;
    }

    shrLog("Initializing data...\n");
        shrLog("...allocating CPU memory.\n");
            h_Data         = (uchar *)malloc(byteCount);
            h_HistogramCPU = (uint  *)malloc(HISTOGRAM256_BIN_COUNT * sizeof(uint));
            h_HistogramGPU = (uint  *)malloc(HISTOGRAM256_BIN_COUNT * sizeof(uint));

        shrLog("...generating input data\n");
            srand(2009);
            for(uint i = 0; i < byteCount; i++) 
                h_Data[i] = rand() % 256;

        shrLog("...allocating GPU memory and copying input data\n\n");
            cutilSafeCall( cudaMalloc((void **)&d_Data, byteCount  ) );
            cutilSafeCall( cudaMalloc((void **)&d_Histogram, HISTOGRAM256_BIN_COUNT * sizeof(uint)  ) );
            cutilSafeCall( cudaMemcpy(d_Data, h_Data, byteCount, cudaMemcpyHostToDevice) );

    {
        shrLog("Starting up 64-bin histogram...\n\n");
            initHistogram64();

        shrLog("Running 64-bin GPU histogram for %u bytes (%u runs)...\n\n", byteCount, numRuns);
            for(int iter = -1; iter < numRuns; iter++){
                //iter == -1 -- warmup iteration
                if(iter == 0){
                    cutilSafeCall( cutilDeviceSynchronize() );
                    cutilCheckError( cutResetTimer(hTimer) );
                    cutilCheckError( cutStartTimer(hTimer) );
                }

                histogram64(d_Histogram, d_Data, byteCount);
            }

            cutilSafeCall( cutilDeviceSynchronize() );
            cutilCheckError(  cutStopTimer(hTimer));
            double dAvgSecs = 1.0e-3 * (double)cutGetTimerValue(hTimer) / (double)numRuns;
        shrLog("histogram64() time (average) : %.5f sec, %.4f MB/sec\n\n", dAvgSecs, ((double)byteCount * 1.0e-6) / dAvgSecs);
        shrLogEx(LOGBOTH | MASTER, 0, "histogram64, Throughput = %.4f MB/s, Time = %.5f s, Size = %u Bytes, NumDevsUsed = %u, Workgroup = %u\n", 
                (1.0e-6 * (double)byteCount / dAvgSecs), dAvgSecs, byteCount, 1, HISTOGRAM64_THREADBLOCK_SIZE); 

        shrLog("\nValidating GPU results...\n");
            shrLog(" ...reading back GPU results\n");
                cutilSafeCall( cudaMemcpy(h_HistogramGPU, d_Histogram, HISTOGRAM64_BIN_COUNT * sizeof(uint), cudaMemcpyDeviceToHost) );

            shrLog(" ...histogram64CPU()\n");
               histogram64CPU(
                    h_HistogramCPU,
                    h_Data,
                    byteCount
                );

            shrLog(" ...comparing the results...\n");
                for(uint i = 0; i < HISTOGRAM64_BIN_COUNT; i++)
                    if(h_HistogramGPU[i] != h_HistogramCPU[i]) PassFailFlag = 0;
            shrLog(PassFailFlag ? " ...64-bin histograms match\n\n" : " ***64-bin histograms do not match!!!***\n\n" );

        shrLog("Shutting down 64-bin histogram...\n\n\n");
            closeHistogram64();
    }

    {
        shrLog("Initializing 256-bin histogram...\n");
            initHistogram256();

        shrLog("Running 256-bin GPU histogram for %u bytes (%u runs)...\n\n", byteCount, numRuns);
            for(int iter = -1; iter < numRuns; iter++){
                //iter == -1 -- warmup iteration
                if(iter == 0){
                    cutilSafeCall( cutilDeviceSynchronize() );
                    cutilCheckError( cutResetTimer(hTimer) );
                    cutilCheckError( cutStartTimer(hTimer) );
                }

                histogram256(d_Histogram, d_Data, byteCount);
            }

            cutilSafeCall( cutilDeviceSynchronize() );
            cutilCheckError(  cutStopTimer(hTimer));
            double dAvgSecs = 1.0e-3 * (double)cutGetTimerValue(hTimer) / (double)numRuns;
        shrLog("histogram256() time (average) : %.5f sec, %.4f MB/sec\n\n", dAvgSecs, ((double)byteCount * 1.0e-6) / dAvgSecs);
        shrLogEx(LOGBOTH | MASTER, 0, "histogram256, Throughput = %.4f MB/s, Time = %.5f s, Size = %u Bytes, NumDevsUsed = %u, Workgroup = %u\n", 
                (1.0e-6 * (double)byteCount / dAvgSecs), dAvgSecs, byteCount, 1, HISTOGRAM256_THREADBLOCK_SIZE); 
                
        shrLog("\nValidating GPU results...\n");
            shrLog(" ...reading back GPU results\n");
                cutilSafeCall( cudaMemcpy(h_HistogramGPU, d_Histogram, HISTOGRAM256_BIN_COUNT * sizeof(uint), cudaMemcpyDeviceToHost) );

            shrLog(" ...histogram256CPU()\n");
                histogram256CPU(
                    h_HistogramCPU,
                    h_Data,
                    byteCount
                );

            shrLog(" ...comparing the results\n");
                for(uint i = 0; i < HISTOGRAM256_BIN_COUNT; i++)
                    if(h_HistogramGPU[i] != h_HistogramCPU[i]) PassFailFlag = 0;
            shrLog(PassFailFlag ? " ...256-bin histograms match\n\n" : " ***256-bin histograms do not match!!!***\n\n" );

        shrLog("Shutting down 256-bin histogram...\n\n\n");
            closeHistogram256();
    }

    shrLog("Shutting down...\n");
        cutilCheckError(cutDeleteTimer(hTimer));
        cutilSafeCall( cudaFree(d_Histogram) );
        cutilSafeCall( cudaFree(d_Data) );
        free(h_HistogramGPU);
        free(h_HistogramCPU);
        free(h_Data);

    cutilDeviceReset();
	shrLog("%s - Test Summary\n", sSDKsample);
    // pass or fail (for both 64 bit and 256 bit histograms)
    shrQAFinishExit(argc, (const char **)argv, (PassFailFlag ? QA_PASSED : QA_FAILED));
}
Ejemplo n.º 4
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv) 
{
    shrQAStart( argc, argv );
    shrSetLogFileName ("reduction.txt");

		char *reduceMethod;
    cutGetCmdLineArgumentstr( argc, (const char**) argv, "method", &reduceMethod);
    
    char *typeChoice;
    cutGetCmdLineArgumentstr( argc, (const char**) argv, "type", &typeChoice);

    if (0 == typeChoice)
    {
        typeChoice = (char*)malloc(4 * sizeof(char));
        strcpy(typeChoice, "int");
    }

    ReduceType datatype = REDUCE_INT;

    if (!strcasecmp(typeChoice, "float"))
        datatype = REDUCE_FLOAT;
    else if (!strcasecmp(typeChoice, "double"))
        datatype = REDUCE_DOUBLE;
    else
        datatype = REDUCE_INT;

    cudaDeviceProp deviceProp;
    deviceProp.major = 1;
    deviceProp.minor = 0;
    int minimumComputeVersion = 10;

    if (datatype == REDUCE_DOUBLE)
    {
        deviceProp.minor = 3;
        minimumComputeVersion = 13;
    }

    int dev;

		if(!cutCheckCmdLineFlag(argc, (const char**)argv, "method") )
		{
				fprintf(stderr, "MISSING --method FLAG.\nYou must provide --method={ SUM | MIN | MAX }.\n");
				exit(1);
		}

    if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) 
    {
        cutilDeviceInit(argc, argv);
        cutilSafeCallNoSync(cudaGetDevice(&dev));
    } 
    else
    {
        cutilSafeCallNoSync(cudaChooseDevice(&dev, &deviceProp));
        
    }

    cutilSafeCallNoSync(cudaGetDeviceProperties(&deviceProp, dev));

    if((deviceProp.major * 10 + deviceProp.minor) >= minimumComputeVersion)
    {
        shrLog("Using Device %d: %s\n\n", dev, deviceProp.name);
        cutilSafeCallNoSync(cudaSetDevice(dev));
    }
    else 
    {
        shrLog("Error: the selected device does not support the minimum compute capability of %d.%d.\n\n",
            minimumComputeVersion / 10, minimumComputeVersion % 10);

        cutilDeviceReset();
        shrQAFinishExit(argc, (const char **)argv, QA_WAIVED);
    }   

    shrLog("Reducing array of type %s\n\n", typeChoice);

	bool bResult = false;

    switch (datatype)
    {
    default:
    case REDUCE_INT:
				if (strcmp("SUM", reduceMethod) == 0) {
					bResult = runTestSum<int>( argc, argv, datatype);
				} else if ( strcmp("MAX", reduceMethod) == 0 ) {
					bResult = runTestMax<int>( argc, argv, datatype);
				} else if ( strcmp("MIN", reduceMethod) == 0 ) {
					bResult = runTestMin<int>( argc, argv, datatype);
				} else {
					fprintf(stderr, "No --method specified!\n");
					exit(1);
				}
        break;
    case REDUCE_FLOAT:
				if (strcmp("SUM", reduceMethod) == 0) {
					bResult = runTestSum<float>( argc, argv, datatype);
				} else if ( strcmp("MAX", reduceMethod) == 0 ) {
					bResult = runTestMax<float>( argc, argv, datatype);
				} else if ( strcmp("MIN", reduceMethod) == 0 ) {
					bResult = runTestMin<float>( argc, argv, datatype);
				} else {
					fprintf(stderr, "No --method specified!\n");
					exit(1);
				}
        break;
    case REDUCE_DOUBLE:
				if (strcmp("SUM", reduceMethod) == 0) {
					bResult = runTestSum<double>( argc, argv, datatype);
				} else if ( strcmp("MAX", reduceMethod) == 0 ) {
					bResult = runTestMax<double>( argc, argv, datatype);
				} else if ( strcmp("MIN", reduceMethod) == 0 ) {
					bResult = runTestMin<double>( argc, argv, datatype);
				} else {
					fprintf(stderr, "No --method specified!\n");
					exit(1);
				}
        break;
    }
    
    cutilDeviceReset();
	shrQAFinishExit(argc, (const char**)argv, (bResult ? QA_PASSED : QA_FAILED));
}
Ejemplo n.º 5
0
cudaError_t WINAPI wine_cudaChooseDevice( int *device, const struct cudaDeviceProp *prop ) {
    WINE_TRACE("\n");
    return cudaChooseDevice( device, prop );
}
Ejemplo n.º 6
0
void cuda_choose_device( int *device, const struct cudaDeviceProp *prop)
{
    check_cuda_error( cudaChooseDevice( device, prop));
}
Ejemplo n.º 7
0
/*
	GL interop test.
	Julia.
*/
TEST(GLInteropTest, Julia)
{
	try
	{
		const int width = 1280;
		const int height = 720;
		const int bufWidth = 1920;
		const int bufHeight = 1080;

		// ------------------------------------------------------------

		GLTestWindow window(width, height, true);
		GLContextParam param;

		param.DebugMode = true;
		param.Multisample = 8;

		GLContext context(window.Handle(), param);
		GLUtil::EnableDebugOutput(GLUtil::DebugOutputFrequencyLow);

		// ------------------------------------------------------------

		// Choose device
		cudaDeviceProp prop;
		memset(&prop, 0, sizeof(cudaDeviceProp));
		prop.major = 2;
		prop.minor = 0;

		int devID;
		HandleCudaError(cudaChooseDevice(&devID, &prop));
		HandleCudaError(cudaGLSetGLDevice(devID));

		// Get properties
		HandleCudaError(cudaGetDeviceProperties(&prop, devID));

		// Create texture and PBO
		GLTexture2D texture;
		texture.SetMagFilter(GL_LINEAR);
		texture.SetMinFilter(GL_LINEAR);
		texture.SetWrap(GL_CLAMP_TO_EDGE);
		texture.Allocate(bufWidth, bufHeight, GL_RGBA8);
		
		GLPixelUnpackBuffer pbo;
		pbo.Allocate(bufWidth * bufHeight * 4, NULL, GL_DYNAMIC_DRAW);

		// Register
		cudaGraphicsResource* cudaPbo;
		HandleCudaError(cudaGraphicsGLRegisterBuffer(&cudaPbo, pbo.ID(), cudaGraphicsMapFlagsWriteDiscard));

		// ------------------------------------------------------------

		GLShader shader;
		shader.Compile("../resources/texturetest_simple2d.vert");
		shader.Compile("../resources/texturetest_simple2d.frag");
		shader.Link();

		GLVertexArray vao;
		GLVertexBuffer positionVbo;
		GLIndexBuffer ibo;

		glm::vec3 v[] =
		{
			glm::vec3( 1.0f,  1.0f, 0.0f),
			glm::vec3(-1.0f,  1.0f, 0.0f),
			glm::vec3(-1.0f, -1.0f, 0.0f),
			glm::vec3( 1.0f, -1.0f, 0.0f)
		};

		GLuint i[] =
		{
			0, 1, 2,
			2, 3, 0
		};

		positionVbo.AddStatic(12, &v[0].x);
		vao.Add(GLDefaultVertexAttribute::Position, &positionVbo);
		ibo.AddStatic(6, i);

		// ------------------------------------------------------------

		double fps = 0.0;
		double timeSum = 0.0;
		double prevTime = GLTestUtil::CurrentTimeMilli();
		int frameCount = 0;

		double start = GLTestUtil::CurrentTimeMilli();
		float xcparam = -0.8f;
		float ycparam = 0.165f;
		float inc = 0.001f;

		while (window.ProcessEvent())
		{
			// ------------------------------------------------------------

			double currentTime = GLTestUtil::CurrentTimeMilli();
			double elapsedTime = currentTime - prevTime;

			timeSum += elapsedTime;
			frameCount++;

			if (frameCount >= 13)
			{
				fps = 1000.0 * 13.0 / timeSum;
				timeSum = 0.0;
				frameCount = 0;
			}

			prevTime = currentTime;
			window.SetTitle((boost::format("GLInteropTest_Julia [FPS %.1f]") % fps).str());

			// ------------------------------------------------------------

			double elapsed = GLTestUtil::CurrentTimeMilli() - start;
			if (elapsed >= 1000.0)
			{
				break;
			}

			xcparam += inc;
			if (xcparam > -0.799f || xcparam < -0.811f) 
			{
				inc *= -1.0f;
			}

			// ------------------------------------------------------------

			HandleCudaError(cudaGraphicsMapResources(1, &cudaPbo, NULL));
			
			// Get device pointer
			uchar4* devPtr;
			size_t bufferSize;
			HandleCudaError(cudaGraphicsResourceGetMappedPointer((void**)&devPtr, &bufferSize, cudaPbo));
			Run_GLInteropTestJuliaKernel(bufWidth, bufHeight, prop.multiProcessorCount, xcparam, ycparam, devPtr);
			HandleCudaError(cudaGraphicsUnmapResources(1, &cudaPbo, NULL));

			texture.Replace(&pbo, glm::ivec4(0, 0, bufWidth, bufHeight), GL_RGBA, GL_UNSIGNED_BYTE);

			// ------------------------------------------------------------

			glClearBufferfv(GL_COLOR, 0, glm::value_ptr(glm::vec4(0.0f)));
			glViewportIndexedfv(0, glm::value_ptr(glm::vec4(0, 0, width, height)));

			shader.Begin();
			shader.SetUniform("tex", 0);
			texture.Bind();
			vao.Draw(GL_TRIANGLES, &ibo);
			texture.Unbind();
			shader.End();

			context.SwapBuffers();
		}

		cudaDeviceReset();
	}
	catch (const GLException& e)
	{
		FAIL() << GLTestUtil::PrintGLException(e);
	}
}