Example #1
0
extern "C" void
magma_finalize( magma_context *cntxt)
{
/*  -- MAGMA (version 1.6.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    This function finalizes the MAGMA hardware context.

    Arguments
    =========
    CNTXT  (input) MAGMA_CONTEXT
           Pointer to the MAGMA hardware context to be closed
    ===================================================================== */

  if (cntxt->num_cores > 1)
    /* Shut down the QUARK scheduler */
    QUARK_Delete(cntxt->quark);

  if (cntxt->num_gpus == 1)
    {
      /* Shutdown CUDA and CUBLAS*/
      cuCtxDetach( cntxt->gpu_context[0] );
      cublasShutdown();

      free(cntxt->gpu_context);
    }

  free(cntxt);
}
GPUInterface::~GPUInterface() {
#ifdef BEAGLE_DEBUG_FLOW
    fprintf(stderr,"\t\t\tEntering GPUInterface::~GPUInterface\n");
#endif

    if (cudaContext != NULL) {
        SAFE_CUDA(cuCtxPushCurrent(cudaContext));
        SAFE_CUDA(cuCtxDetach(cudaContext));
    }

    if (kernelResource != NULL) {
        delete kernelResource;
    }

    if (resourceMap) {
        delete resourceMap;
    }

#ifdef BEAGLE_DEBUG_FLOW
    fprintf(stderr,"\t\t\tLeaving  GPUInterface::~GPUInterface\n");
#endif

}
Example #3
0
/**
 * PPU program entry point.
 */
int main(int argc, char** argv)
{
    /* Get global memory pointer */
    fixedgrid_t* const G = &G_GLOBAL;
    
    /* Iterators */
    uint32_t k, iter;
    
    /* Start wall clock timer */
    timer_start(&G->metrics.wallclock);

    /* Check dimensions */
    if(NX % BLOCK_X != 0)
    {
        fprintf(stderr, "NX must be a multiple of %d\n", BLOCK_X);
        exit(1);
    }    
    if(NY % BLOCK_Y != 0)
    {
        fprintf(stderr, "NY must be a multiple of %d\n", BLOCK_Y);
        exit(1);
    }
    if(NZ % BLOCK_Z != 0)
    {
        fprintf(stderr, "NZ must be a multiple of %d\n", BLOCK_Z);
        exit(1);
    }
    
    /* Initialize the model parameters */
    init_model(G);
    
    /* Add emissions */
    process_emissions(G);
    
    /* Print startup banner */
    print_start_banner(G);
    
    /* Store initial concentration */
    printf("Writing initial concentration data... ");
    write_conc(G, 0, 0);
    printf("done.\n");    
        
    printf("\n!!!!FIXME: Report # FPEs\n");
        
    /* BEGIN CALCULATIONS */
    for(iter=1, G->time = G->tstart; G->time < G->tend; G->time += G->dt, ++iter)
    {
        start_saprc99(G);
        
        for(k=0; k<NLOOKAT; k++)
        {
            // Copy concentration data to device
            CU_SAFE_CALL(cuMemcpyHtoD(G->dev_conc, &G->conc(0, 0, 0, MONITOR[k]), NX*NY*NZ*sizeof(real_t)));
            
            discretize_all_x(G, G->dt*0.5);
            
            discretize_all_y(G, G->dt*0.5);
            
            discretize_all_z(G, G->dt);
            
            discretize_all_y(G, G->dt*0.5);
            
            discretize_all_x(G, G->dt*0.5);
            
            // Copy updated concentrations back to host
            CU_SAFE_CALL(cuMemcpyDtoH((void*)&G->conc(0, 0, 0, MONITOR[k]), G->dev_conc_out, NX*NY*NZ*sizeof(real_t)));            
        }

        update_model(G);
        
        #if WRITE_EACH_ITER == 1
        write_conc(G, iter, 0);
        #endif

        printf("  After iteration %02d: Model time = %07.2f sec.\n", iter, iter*G->dt);
    }
    /* END CALCULATIONS */
    
    /* Store concentration */
    #if WRITE_EACH_ITER != 1
    write_conc(G, iter-1, 0);
    #endif
    
    /* Show final time */
    printf("\nFinal time: %f seconds.\n", (iter-1)*G->dt);
    
    timer_stop(&G->metrics.wallclock);
    
    /* Write metrics to CSV file */
    write_metrics_as_csv(G, "NVidia CUDA");
    
    /* Cleanup and exit */

    CU_SAFE_CALL(cuMemFree(G->dev_conc));
    CU_SAFE_CALL(cuMemFree(G->dev_wind));
    CU_SAFE_CALL(cuMemFree(G->dev_diff));
    CU_SAFE_CALL(cuMemFree(G->dev_buff));
    CU_SAFE_CALL(cuMemFree(G->dev_conc_out));
    CU_SAFE_CALL_NO_SYNC(cuCtxDetach(cu_context_global));
    
    return 0;
}
Example #4
0
void WaterPlaneCUDA::configure(Vector upperLeft, Vector lowerRight, float dampFactor, float resolution)
{

	cudaSetDevice(cutGetMaxGflopsDeviceId());
	cudaGLSetGLDevice(cutGetMaxGflopsDeviceId());
	
	timeSinceLast = timePassed = 0;

	unsigned int free, total;
	int gpuCount, i;
	CUresult res;
	CUdevice dev;
	CUcontext ctx;

	cuInit(0);

	cuDeviceGetCount(&gpuCount);
	printf("Detected %d GPU\n",gpuCount);

	for (i=0; i<gpuCount; i++)
	{
		cuDeviceGet(&dev,i);
		cuCtxCreate(&ctx, 0, dev);
		res = cuMemGetInfo(&free, &total);
		if(res != CUDA_SUCCESS)
			printf("!!!! cuMemGetInfo failed! (status = %x)", res);
		printf("^^^^ Device: %d\n",i);
		printf("^^^^ Free : %lu bytes (%lu KB) (%lu MB)\n", free, inKB(free), inMB(free));
		printf("^^^^ Total: %lu bytes (%lu KB) (%lu MB)\n", total, inKB(total), inMB(total));
		printf("^^^^ %f%% free, %f%% used\n", 100.0*free/(double)total, 100.0*(total - free)/(double)total);
		cuCtxDetach(ctx);
	}


	this->stepSize = 1.0f/resolution;

	this->resolutionFactor = resolution;

	//reale Z - Achse ist x - Achse der WaterPlaneCUDA
	this->sizeX = (unsigned int) abs(upperLeft.z - lowerRight.z);

	//reale X -Achse ist y- Achse der WaterPlaneCUDA
	this->sizeY = (unsigned int) abs(upperLeft.x - lowerRight.x);

	//Anzahl der Netzpunkte in X -Richtung
	this->pointsX = (unsigned int)(sizeX * resolution);

	//Anzahl der Netzpunkte in Y -Richtung
	pointsY = (unsigned int)(sizeY * resolution);

	uLeft = upperLeft;

	lRight = lowerRight;

	//Der "Meeresspiegel"
	baseHeight = lRight.y;

	//Das Höhenfeld der WaterPlaneCUDA
	waveMap = NULL;

	initBuffer();

	gpu_newVertices = new float3[pointsX*pointsY];
	gpu_oldVertices = new float3[pointsX*pointsY];
	gpu_normals = new float3[pointsX*pointsY];

	for (int i=0;i<pointsX*pointsY;i++) {
		gpu_newVertices[i]=make_float3(0,0,0);
		gpu_oldVertices[i]=make_float3(0,0,0);
		gpu_normals[i]=make_float3(0,1.0,0);
	}
	cutilSafeCall(cudaMalloc((void**)&gpu_newVertices,pointsX*pointsY*sizeof(float3)));
	cutilSafeCall(cudaMalloc((void**)&gpu_oldVertices,pointsX*pointsY*sizeof(float3)));
	cutilSafeCall(cudaMalloc((void**)&gpu_normals,pointsX*pointsY*sizeof(float3)));

	drawMesh();
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest(int argc, char** argv)
{
    CUcontext cuContext;

    // initialize CUDA
    CUfunction pk = NULL;
    const char cubin_name [] = "pass_kernel.cubin";
    const char kernel_name [] = "pass_kernel";

    CU_SAFE_CALL(initCuda(cuContext, argv[0], &pk, argc, argv, cubin_name, kernel_name));
    printf("initCuda-returned CUfunction:\n");

    // cuParamSetx, x=i f v
    // http://visionexperts.blogspot.com/2010/07/cuda-parameter-alignment.html - check alignment
    #define ALIGN_UP(offset, alignment)					\
        (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1)

    size_t offset = 0;

    // input integers
    // CU paramset i.
    for(int i = 0 ; i < NUM_ARG ; i++) 
    {
 	int align = __alignof(int);
	ALIGN_UP(offset, align);
	cuParamSeti(pk, offset, i);
	printf ("offset %d = %d\n", i, offset);
	offset += sizeof(int);
    }

    // return array for updated inputs
    int size_int = sizeof(int);

    int size_array = size_int * NUM_ARG;
    CUdeviceptr d_return_values;
    cuMemAlloc (&d_return_values, size_array);
    void* ptr = (void*)(size_t)d_return_values;
    int align = __alignof(ptr);
    ALIGN_UP(offset, align);
    cuParamSetv(pk, offset, &ptr, sizeof(ptr));
    printf("return values offset:%d\n", offset);
    offset += sizeof(ptr);

    CUdeviceptr d_return_N;
    cuMemAlloc(&d_return_N, size_int);
    void* ptrN = (void*)(size_t)d_return_N;
    int alignN = __alignof(ptrN);
    ALIGN_UP(offset, alignN);
    cuParamSetv(pk, offset, &ptrN, sizeof(ptr));
    printf("return int offset:%d\n", offset);
    offset += sizeof(ptrN);

    // Calling kernel
    int BLOCK_SIZE_X = NUM_ARG;
    int BLOCK_SIZE_Y = 1;
    int BLOCK_SIZE_Z = 1;
    int GRID_SIZE = 1;
    cutilDrvSafeCallNoSync(cuFuncSetBlockShape(pk, BLOCK_SIZE_X, BLOCK_SIZE_Y, BLOCK_SIZE_Z));
 
    printf("paramsetsize:%d\n", offset);
    CU_SAFE_CALL(cuParamSetSize(pk, offset));
    CU_SAFE_CALL(cuLaunchGrid(pk, GRID_SIZE, GRID_SIZE));

    int* h_return_values = (int*)malloc(NUM_ARG * sizeof(int));
    CU_SAFE_CALL(cuMemcpyDtoH((void*)h_return_values, d_return_values, size_array));
    CU_SAFE_CALL(cuMemFree(d_return_values));

    for(int i=0;i<NUM_ARG;i++)
        printf("%dth value = %d\n", i, h_return_values[i]);
    free(h_return_values);

    int* h_return_N = (int*)malloc(sizeof(int));
    CU_SAFE_CALL(cuMemcpyDtoH((void*)h_return_N, d_return_N, size_int));
    CU_SAFE_CALL(cuMemFree(d_return_N));

    printf("%d sizeof array\n", *h_return_N);

    if(cuContext !=NULL) cuCtxDetach(cuContext);
}
CUresult Error(CUcontext ctx, CUresult status)
{
    printf("initCuda is n't SUCESS, code=%d\n", status);
    cuCtxDetach(ctx);
    return status;
}