extern "C" void magma_finalize( magma_context *cntxt) { /* -- MAGMA (version 1.6.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= This function finalizes the MAGMA hardware context. Arguments ========= CNTXT (input) MAGMA_CONTEXT Pointer to the MAGMA hardware context to be closed ===================================================================== */ if (cntxt->num_cores > 1) /* Shut down the QUARK scheduler */ QUARK_Delete(cntxt->quark); if (cntxt->num_gpus == 1) { /* Shutdown CUDA and CUBLAS*/ cuCtxDetach( cntxt->gpu_context[0] ); cublasShutdown(); free(cntxt->gpu_context); } free(cntxt); }
GPUInterface::~GPUInterface() { #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr,"\t\t\tEntering GPUInterface::~GPUInterface\n"); #endif if (cudaContext != NULL) { SAFE_CUDA(cuCtxPushCurrent(cudaContext)); SAFE_CUDA(cuCtxDetach(cudaContext)); } if (kernelResource != NULL) { delete kernelResource; } if (resourceMap) { delete resourceMap; } #ifdef BEAGLE_DEBUG_FLOW fprintf(stderr,"\t\t\tLeaving GPUInterface::~GPUInterface\n"); #endif }
/** * PPU program entry point. */ int main(int argc, char** argv) { /* Get global memory pointer */ fixedgrid_t* const G = &G_GLOBAL; /* Iterators */ uint32_t k, iter; /* Start wall clock timer */ timer_start(&G->metrics.wallclock); /* Check dimensions */ if(NX % BLOCK_X != 0) { fprintf(stderr, "NX must be a multiple of %d\n", BLOCK_X); exit(1); } if(NY % BLOCK_Y != 0) { fprintf(stderr, "NY must be a multiple of %d\n", BLOCK_Y); exit(1); } if(NZ % BLOCK_Z != 0) { fprintf(stderr, "NZ must be a multiple of %d\n", BLOCK_Z); exit(1); } /* Initialize the model parameters */ init_model(G); /* Add emissions */ process_emissions(G); /* Print startup banner */ print_start_banner(G); /* Store initial concentration */ printf("Writing initial concentration data... "); write_conc(G, 0, 0); printf("done.\n"); printf("\n!!!!FIXME: Report # FPEs\n"); /* BEGIN CALCULATIONS */ for(iter=1, G->time = G->tstart; G->time < G->tend; G->time += G->dt, ++iter) { start_saprc99(G); for(k=0; k<NLOOKAT; k++) { // Copy concentration data to device CU_SAFE_CALL(cuMemcpyHtoD(G->dev_conc, &G->conc(0, 0, 0, MONITOR[k]), NX*NY*NZ*sizeof(real_t))); discretize_all_x(G, G->dt*0.5); discretize_all_y(G, G->dt*0.5); discretize_all_z(G, G->dt); discretize_all_y(G, G->dt*0.5); discretize_all_x(G, G->dt*0.5); // Copy updated concentrations back to host CU_SAFE_CALL(cuMemcpyDtoH((void*)&G->conc(0, 0, 0, MONITOR[k]), G->dev_conc_out, NX*NY*NZ*sizeof(real_t))); } update_model(G); #if WRITE_EACH_ITER == 1 write_conc(G, iter, 0); #endif printf(" After iteration %02d: Model time = %07.2f sec.\n", iter, iter*G->dt); } /* END CALCULATIONS */ /* Store concentration */ #if WRITE_EACH_ITER != 1 write_conc(G, iter-1, 0); #endif /* Show final time */ printf("\nFinal time: %f seconds.\n", (iter-1)*G->dt); timer_stop(&G->metrics.wallclock); /* Write metrics to CSV file */ write_metrics_as_csv(G, "NVidia CUDA"); /* Cleanup and exit */ CU_SAFE_CALL(cuMemFree(G->dev_conc)); CU_SAFE_CALL(cuMemFree(G->dev_wind)); CU_SAFE_CALL(cuMemFree(G->dev_diff)); CU_SAFE_CALL(cuMemFree(G->dev_buff)); CU_SAFE_CALL(cuMemFree(G->dev_conc_out)); CU_SAFE_CALL_NO_SYNC(cuCtxDetach(cu_context_global)); return 0; }
void WaterPlaneCUDA::configure(Vector upperLeft, Vector lowerRight, float dampFactor, float resolution) { cudaSetDevice(cutGetMaxGflopsDeviceId()); cudaGLSetGLDevice(cutGetMaxGflopsDeviceId()); timeSinceLast = timePassed = 0; unsigned int free, total; int gpuCount, i; CUresult res; CUdevice dev; CUcontext ctx; cuInit(0); cuDeviceGetCount(&gpuCount); printf("Detected %d GPU\n",gpuCount); for (i=0; i<gpuCount; i++) { cuDeviceGet(&dev,i); cuCtxCreate(&ctx, 0, dev); res = cuMemGetInfo(&free, &total); if(res != CUDA_SUCCESS) printf("!!!! cuMemGetInfo failed! (status = %x)", res); printf("^^^^ Device: %d\n",i); printf("^^^^ Free : %lu bytes (%lu KB) (%lu MB)\n", free, inKB(free), inMB(free)); printf("^^^^ Total: %lu bytes (%lu KB) (%lu MB)\n", total, inKB(total), inMB(total)); printf("^^^^ %f%% free, %f%% used\n", 100.0*free/(double)total, 100.0*(total - free)/(double)total); cuCtxDetach(ctx); } this->stepSize = 1.0f/resolution; this->resolutionFactor = resolution; //reale Z - Achse ist x - Achse der WaterPlaneCUDA this->sizeX = (unsigned int) abs(upperLeft.z - lowerRight.z); //reale X -Achse ist y- Achse der WaterPlaneCUDA this->sizeY = (unsigned int) abs(upperLeft.x - lowerRight.x); //Anzahl der Netzpunkte in X -Richtung this->pointsX = (unsigned int)(sizeX * resolution); //Anzahl der Netzpunkte in Y -Richtung pointsY = (unsigned int)(sizeY * resolution); uLeft = upperLeft; lRight = lowerRight; //Der "Meeresspiegel" baseHeight = lRight.y; //Das Höhenfeld der WaterPlaneCUDA waveMap = NULL; initBuffer(); gpu_newVertices = new float3[pointsX*pointsY]; gpu_oldVertices = new float3[pointsX*pointsY]; gpu_normals = new float3[pointsX*pointsY]; for (int i=0;i<pointsX*pointsY;i++) { gpu_newVertices[i]=make_float3(0,0,0); gpu_oldVertices[i]=make_float3(0,0,0); gpu_normals[i]=make_float3(0,1.0,0); } cutilSafeCall(cudaMalloc((void**)&gpu_newVertices,pointsX*pointsY*sizeof(float3))); cutilSafeCall(cudaMalloc((void**)&gpu_oldVertices,pointsX*pointsY*sizeof(float3))); cutilSafeCall(cudaMalloc((void**)&gpu_normals,pointsX*pointsY*sizeof(float3))); drawMesh(); }
//////////////////////////////////////////////////////////////////////////////// //! Run a simple test for CUDA //////////////////////////////////////////////////////////////////////////////// void runTest(int argc, char** argv) { CUcontext cuContext; // initialize CUDA CUfunction pk = NULL; const char cubin_name [] = "pass_kernel.cubin"; const char kernel_name [] = "pass_kernel"; CU_SAFE_CALL(initCuda(cuContext, argv[0], &pk, argc, argv, cubin_name, kernel_name)); printf("initCuda-returned CUfunction:\n"); // cuParamSetx, x=i f v // http://visionexperts.blogspot.com/2010/07/cuda-parameter-alignment.html - check alignment #define ALIGN_UP(offset, alignment) \ (offset) = ((offset) + (alignment) - 1) & ~((alignment) - 1) size_t offset = 0; // input integers // CU paramset i. for(int i = 0 ; i < NUM_ARG ; i++) { int align = __alignof(int); ALIGN_UP(offset, align); cuParamSeti(pk, offset, i); printf ("offset %d = %d\n", i, offset); offset += sizeof(int); } // return array for updated inputs int size_int = sizeof(int); int size_array = size_int * NUM_ARG; CUdeviceptr d_return_values; cuMemAlloc (&d_return_values, size_array); void* ptr = (void*)(size_t)d_return_values; int align = __alignof(ptr); ALIGN_UP(offset, align); cuParamSetv(pk, offset, &ptr, sizeof(ptr)); printf("return values offset:%d\n", offset); offset += sizeof(ptr); CUdeviceptr d_return_N; cuMemAlloc(&d_return_N, size_int); void* ptrN = (void*)(size_t)d_return_N; int alignN = __alignof(ptrN); ALIGN_UP(offset, alignN); cuParamSetv(pk, offset, &ptrN, sizeof(ptr)); printf("return int offset:%d\n", offset); offset += sizeof(ptrN); // Calling kernel int BLOCK_SIZE_X = NUM_ARG; int BLOCK_SIZE_Y = 1; int BLOCK_SIZE_Z = 1; int GRID_SIZE = 1; cutilDrvSafeCallNoSync(cuFuncSetBlockShape(pk, BLOCK_SIZE_X, BLOCK_SIZE_Y, BLOCK_SIZE_Z)); printf("paramsetsize:%d\n", offset); CU_SAFE_CALL(cuParamSetSize(pk, offset)); CU_SAFE_CALL(cuLaunchGrid(pk, GRID_SIZE, GRID_SIZE)); int* h_return_values = (int*)malloc(NUM_ARG * sizeof(int)); CU_SAFE_CALL(cuMemcpyDtoH((void*)h_return_values, d_return_values, size_array)); CU_SAFE_CALL(cuMemFree(d_return_values)); for(int i=0;i<NUM_ARG;i++) printf("%dth value = %d\n", i, h_return_values[i]); free(h_return_values); int* h_return_N = (int*)malloc(sizeof(int)); CU_SAFE_CALL(cuMemcpyDtoH((void*)h_return_N, d_return_N, size_int)); CU_SAFE_CALL(cuMemFree(d_return_N)); printf("%d sizeof array\n", *h_return_N); if(cuContext !=NULL) cuCtxDetach(cuContext); }
CUresult Error(CUcontext ctx, CUresult status) { printf("initCuda is n't SUCESS, code=%d\n", status); cuCtxDetach(ctx); return status; }