unsigned int GPUInterface::GetAvailableMemory() { #if CUDA_VERSION >= 3020 size_t availableMem = 0; size_t totalMem = 0; SAFE_CUPP(cuMemGetInfo(&availableMem, &totalMem)); #else unsigned int availableMem = 0; unsigned int totalMem = 0; SAFE_CUPP(cuMemGetInfo(&availableMem, &totalMem)); #endif return availableMem; }
int main() { int ngpu; CUdevice cuDevice; CUcontext cuContext; cuInit(0); cuDeviceGetCount(&ngpu); //printf("ngpu = %d\n", ngpu); size_t *totals, *frees ; totals = (size_t *) calloc (ngpu, sizeof(size_t)); frees = (size_t *) calloc (ngpu, sizeof(size_t)); int tid; omp_set_num_threads(ngpu); #pragma omp parallel private(tid, cuDevice, cuContext) shared(frees, totals) { tid = omp_get_thread_num(); //printf("nthreads = %d, tid = %d\n", omp_get_num_threads(), tid); cuDeviceGet(&cuDevice, tid); cuCtxCreate(&cuContext, tid, cuDevice); cuMemGetInfo((size_t*)&frees[tid], (size_t*)&totals[tid]); } printf ("\ttotal\t\tfree\t\tused\n"); for(int i=0; i<ngpu; i++) { printf("GPU %d\t%lu\t%lu\t%lu\n", i, (size_t)totals[i], (size_t)frees[i], (size_t)totals[i]-(size_t)frees[i]); } return 0; }
size_t initContext(JNIEnv * env, jint max_blocks_per_proc, jint max_threads_per_block) { size_t to_space_size; int status; int deviceCount = 0; size_t f_mem; size_t t_mem; jint num_blocks; status = cuDeviceGetCount(&deviceCount); CHECK_STATUS_RTN(env,"error in cuDeviceGetCount",status, 0); getBestDevice(env); status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); CHECK_STATUS_RTN(env,"error in cuCtxCreate",status, 0) status = cuMemGetInfo (&f_mem, &t_mem); CHECK_STATUS_RTN(env,"error in cuMemGetInfo",status, 0) to_space_size = f_mem; //space for 100 types in the scene classMemSize = sizeof(jint)*100; num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc; gc_space_size = 1024; to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= gc_space_size; to_space_size -= classMemSize; return to_space_size; }
candidate* immaculate_conception (CUZMEM_CONTEXT ctx) { unsigned long long DNA; unsigned int loc, gpu_mem_free, gpu_mem_total, gpu_mem_req; unsigned int creating = 1; cuzmem_plan* entry = ctx->plan; candidate* c = (candidate*)malloc (sizeof(candidate)); cuMemGetInfo (&gpu_mem_free, &gpu_mem_total); while (creating) { c->DNA = rand(); c->DNA = c->DNA << 32; c->DNA = c->DNA + rand(); c->DNA &= generate_mask(ctx->num_knobs); // gpu memory utilization gpu_mem_req = 0; entry = ctx->plan; while (entry != NULL) { if (entry->gold_member) { loc = (c->DNA >> entry->id) & 0x0001; gpu_mem_req += entry->size * loc; } entry = entry->next; } // check constraint if (gpu_mem_req > gpu_mem_free * MIN_GPU_MEM) { creating = 0; } }
int main(int argc, char* argv[]) { cuInit(0); int devs = 0; cuDeviceGetCount(&devs); assert(devs > 0); CUdevice dev; CUresult status; CUcontext ctx = 0; cuDeviceGet(&dev, 0); cuCtxCreate(&ctx, 0, dev); { size_t f = 0, t = 0; CUresult r = cuMemGetInfo(&f, &t); fprintf( stderr, "Do cuMemGetInfo: %d, %zu/%zu\n", r, f, t ); } __init("\n"); printf("\nPress any key to exit..."); char c; scanf("%c", &c); return 0; }
/* ========================================================================== */ int sci_gpuDeviceMemInfo(char *fname) { #ifdef WITH_CUDA if(isGpuInit()) { if (useCuda()) { size_t free = 0, total = 0; cuMemGetInfo(&free,&total); double freeMem = (double)free; createScalarDouble(pvApiCtx, Rhs + 1, freeMem); } else { double zero = 0.; createScalarDouble(pvApiCtx, Rhs + 1, zero); sciprint("not implemented with OpenCL.\n"); } LhsVar(1) = Rhs + 1; PutLhsVar(); } else { Scierror(999,"%s","gpu is not initialised. Please launch gpuInit() before use this function.\n"); } #else sciprint("not implemented with OpenCL.\n"); #endif return 0; }
size_t swanMemAvailable( void ) { size_t free, total; try_init(); cuMemGetInfo( &free, &total ); return free; }
void checkCUDAmemory(char* t) { //cudaDeviceSynchronize(); size_t free, total; cuMemGetInfo(&free, &total); fprintf(stderr,"%s mem %ld total %ld\n", t, free / 1024 / 1024, total / 1024 / 1024); }
/** * Returns the memory usage statistics. * * @param usedMem receives the current used memory. * @param totalMem receives the total memory of the device. */ void getMemoryUsage(size_t* usedMem, size_t* totalMem) { size_t myFreeMem; size_t myTotalMem; cudaFree(0); // Ensures the CUDA context creation. Otherwise cuMemGetInfo (driver API) will return zero. cuMemGetInfo(&myFreeMem, &myTotalMem); if (usedMem != NULL) { *usedMem = myTotalMem-myFreeMem; } if (totalMem != NULL) { *totalMem = myTotalMem; } }
S64 CudaModule::getMemoryUsed(void) { staticInit(); if (!s_available) { return 0; } size_t free = 0; size_t total = 0; cuMemGetInfo(&free, &total); return total - free; }
int main(int argc, char *argv[]) { char c; CUcontext ctx; CUdevice dev = 0; void *toSpace; int status, free, total; CUdeviceptr ptr = (CUdeviceptr)NULL; int size; if(argc != 2){ fprintf(stderr,"Usage: mem_alloc.exe [MEMORY TO ALLOCATE IN MB]\n"); exit(1); } printf("All status results should be 0, if not an error has occured.\nIf 2 is reported an out of memory error has occured for\nwhich you should decrease the memory input\n"); size = atoi(argv[1]); printf("\nTrying to allocate %iMB of memory on host and GPU\n",size); if(size <= 0){ fprintf(stderr,"\nERROR: Memory must be greater than 0\n"); exit(1); } status = cuInit(0); printf("Init status: %i\n",status); status = cuCtxCreate(&ctx, 0, dev); printf("Context creation status: %i\n",status); cuMemGetInfo(&free, &total); printf("Get memory info status: %i\n",status); printf("\n%.1f/%.1f (Free/Total) MB\n", free/1024.0/1024.0, total/1024.0/1024.0); status = cuMemHostAlloc(&toSpace, size*1024*1024, 0); printf("Host allocation status: %i %s\n",status, (status==CUDA_SUCCESS) ? "SUCCESS" : "FAILED"); status = cuMemAlloc(&ptr, size*1024*1024); printf("GPU allocation status: %i %s\n",status, (status==CUDA_SUCCESS) ? "SUCCESS" : "FAILED"); printf("\nPress any key to exit..."); scanf("%c", &c); status = cuCtxDestroy(ctx); printf("Context destroy status: %i\n",status); return 0; }
/*===========================================================================*/ void Device::update() { kvs::cuda::DriverAPI::Context context( *this ); #if defined( cuMemGetInfo ) /* In this case, the function 'cuMemGetInfo' is defined to 'cuMemGetInfo_v2' * as "#define cuMemGetInfo cuMemGetInfo_v2". And then, the function * 'cuMemGetInfo_v2' is defined as follows: * CUresult cuMemGetInfo_v2( size_t * free, size_t * total ) */ KVS_CU_CALL( cuMemGetInfo( &m_free_memory, &m_total_memory ) ); #else /* The function 'cuMemGetInfo' is defined as follows: * CUresult cuMemGetInfo( unsigned int * free, unsigned int * total ) * Therefore, the temporary parameters defined as unsigned int are used * to obtain the memory information. */ unsigned int free_memory = 0; unsigned int total_memory = 0; KVS_CU_CALL( cuMemGetInfo( &free_memory, &total_memory ) ); m_free_memory = static_cast<size_t>( free_memory ); m_total_memory = static_cast<size_t>( total_memory ); #endif }
SEXP R_auto_cuMemGetInfo() { SEXP r_ans = R_NilValue; size_t free; size_t total; CUresult ans; ans = cuMemGetInfo(& free, & total); if(ans) return(R_cudaErrorInfo(ans)); PROTECT(r_ans = NEW_LIST(2)); SEXP r_names; PROTECT(r_names = NEW_CHARACTER(2)); SET_VECTOR_ELT(r_ans, 0, ScalarReal(free)); SET_VECTOR_ELT(r_ans, 1, ScalarReal(total)); SET_STRING_ELT(r_names, 0, mkChar("free")); SET_STRING_ELT(r_names, 1, mkChar("total")); SET_NAMES(r_ans, r_names); UNPROTECT(2); return(r_ans); }
static int cuda_property(void *c, gpudata *buf, gpukernel *k, int prop_id, void *res) { cuda_context *ctx = NULL; if (c != NULL) { ctx = (cuda_context *)c; ASSERT_CTX(ctx); } else if (buf != NULL) { ASSERT_BUF(buf); ctx = buf->ctx; } else if (k != NULL) { ASSERT_KER(k); ctx = k->ctx; } /* I know that 512 and 1024 are magic numbers. There is an indication in buffer.h, though. */ if (prop_id < 512) { if (ctx == NULL) return GA_VALUE_ERROR; } else if (prop_id < 1024) { if (buf == NULL) return GA_VALUE_ERROR; } else { if (k == NULL) return GA_VALUE_ERROR; } switch (prop_id) { char *s; CUdevice id; int i; size_t sz; case GA_CTX_PROP_DEVNAME: cuda_enter(ctx); ctx->err = cuCtxGetDevice(&id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } /* 256 is what the CUDA API uses so it's good enough for me */ s = malloc(256); if (s == NULL) { cuda_exit(ctx); return GA_MEMORY_ERROR; } ctx->err = cuDeviceGetName(s, 256, id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } *((char **)res) = s; cuda_exit(ctx); return GA_NO_ERROR; case GA_CTX_PROP_MAXLSIZE: cuda_enter(ctx); ctx->err = cuCtxGetDevice(&id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } ctx->err = cuDeviceGetAttribute(&i, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } *((size_t *)res) = i; cuda_exit(ctx); return GA_NO_ERROR; case GA_CTX_PROP_LMEMSIZE: cuda_enter(ctx); ctx->err = cuCtxGetDevice(&id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } ctx->err = cuDeviceGetAttribute(&i, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } *((size_t *)res) = i; cuda_exit(ctx); return GA_NO_ERROR; case GA_CTX_PROP_NUMPROCS: cuda_enter(ctx); ctx->err = cuCtxGetDevice(&id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } ctx->err = cuDeviceGetAttribute(&i, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } *((unsigned int *)res) = i; cuda_exit(ctx); return GA_NO_ERROR; case GA_CTX_PROP_MAXGSIZE: cuda_enter(ctx); ctx->err = cuCtxGetDevice(&id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } ctx->err = cuDeviceGetAttribute(&i, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } *((size_t *)res) = i; cuda_exit(ctx); return GA_NO_ERROR; case GA_CTX_PROP_BLAS_OPS: #ifdef WITH_CUDA_CUBLAS *((gpuarray_blas_ops **)res) = &cublas_ops; return GA_NO_ERROR; #else *((void **)res) = NULL; return GA_DEVSUP_ERROR; #endif case GA_CTX_PROP_BIN_ID: *((const char **)res) = ctx->bin_id; return GA_NO_ERROR; case GA_CTX_PROP_ERRBUF: *((gpudata **)res) = ctx->errbuf; return GA_NO_ERROR; case GA_CTX_PROP_TOTAL_GMEM: cuda_enter(ctx); ctx->err = cuMemGetInfo(&sz, (size_t *)res); cuda_exit(ctx); return ctx->err == CUDA_SUCCESS ? GA_NO_ERROR : GA_IMPL_ERROR; case GA_CTX_PROP_FREE_GMEM: cuda_enter(ctx); ctx->err = cuMemGetInfo((size_t *)res, &sz); cuda_exit(ctx); return ctx->err == CUDA_SUCCESS ? GA_NO_ERROR : GA_IMPL_ERROR; case GA_BUFFER_PROP_REFCNT: *((unsigned int *)res) = buf->refcnt; return GA_NO_ERROR; case GA_BUFFER_PROP_SIZE: *((size_t *)res) = buf->sz; return GA_NO_ERROR; case GA_BUFFER_PROP_CTX: case GA_KERNEL_PROP_CTX: *((void **)res) = (void *)ctx; return GA_NO_ERROR; case GA_KERNEL_PROP_MAXLSIZE: cuda_enter(ctx); ctx->err = cuFuncGetAttribute(&i, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, k->k); cuda_exit(ctx); if (ctx->err != CUDA_SUCCESS) return GA_IMPL_ERROR; *((size_t *)res) = i; return GA_NO_ERROR; case GA_KERNEL_PROP_PREFLSIZE: cuda_enter(ctx); ctx->err = cuCtxGetDevice(&id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } ctx->err = cuDeviceGetAttribute(&i, CU_DEVICE_ATTRIBUTE_WARP_SIZE, id); if (ctx->err != CUDA_SUCCESS) { cuda_exit(ctx); return GA_IMPL_ERROR; } cuda_exit(ctx); *((size_t *)res) = i; return GA_NO_ERROR; case GA_KERNEL_PROP_NUMARGS: *((unsigned int *)res) = k->argcount; return GA_NO_ERROR; case GA_KERNEL_PROP_TYPES: *((const int **)res) = k->types; return GA_NO_ERROR; default: return GA_INVALID_ERROR; } }
void WaterPlaneCUDA::configure(Vector upperLeft, Vector lowerRight, float dampFactor, float resolution) { cudaSetDevice(cutGetMaxGflopsDeviceId()); cudaGLSetGLDevice(cutGetMaxGflopsDeviceId()); timeSinceLast = timePassed = 0; unsigned int free, total; int gpuCount, i; CUresult res; CUdevice dev; CUcontext ctx; cuInit(0); cuDeviceGetCount(&gpuCount); printf("Detected %d GPU\n",gpuCount); for (i=0; i<gpuCount; i++) { cuDeviceGet(&dev,i); cuCtxCreate(&ctx, 0, dev); res = cuMemGetInfo(&free, &total); if(res != CUDA_SUCCESS) printf("!!!! cuMemGetInfo failed! (status = %x)", res); printf("^^^^ Device: %d\n",i); printf("^^^^ Free : %lu bytes (%lu KB) (%lu MB)\n", free, inKB(free), inMB(free)); printf("^^^^ Total: %lu bytes (%lu KB) (%lu MB)\n", total, inKB(total), inMB(total)); printf("^^^^ %f%% free, %f%% used\n", 100.0*free/(double)total, 100.0*(total - free)/(double)total); cuCtxDetach(ctx); } this->stepSize = 1.0f/resolution; this->resolutionFactor = resolution; //reale Z - Achse ist x - Achse der WaterPlaneCUDA this->sizeX = (unsigned int) abs(upperLeft.z - lowerRight.z); //reale X -Achse ist y- Achse der WaterPlaneCUDA this->sizeY = (unsigned int) abs(upperLeft.x - lowerRight.x); //Anzahl der Netzpunkte in X -Richtung this->pointsX = (unsigned int)(sizeX * resolution); //Anzahl der Netzpunkte in Y -Richtung pointsY = (unsigned int)(sizeY * resolution); uLeft = upperLeft; lRight = lowerRight; //Der "Meeresspiegel" baseHeight = lRight.y; //Das Höhenfeld der WaterPlaneCUDA waveMap = NULL; initBuffer(); gpu_newVertices = new float3[pointsX*pointsY]; gpu_oldVertices = new float3[pointsX*pointsY]; gpu_normals = new float3[pointsX*pointsY]; for (int i=0;i<pointsX*pointsY;i++) { gpu_newVertices[i]=make_float3(0,0,0); gpu_oldVertices[i]=make_float3(0,0,0); gpu_normals[i]=make_float3(0,1.0,0); } cutilSafeCall(cudaMalloc((void**)&gpu_newVertices,pointsX*pointsY*sizeof(float3))); cutilSafeCall(cudaMalloc((void**)&gpu_oldVertices,pointsX*pointsY*sizeof(float3))); cutilSafeCall(cudaMalloc((void**)&gpu_normals,pointsX*pointsY*sizeof(float3))); drawMesh(); }
void pocl_cuda_init (cl_device_id device, const char *parameters) { CUresult result; result = cuInit (0); CUDA_CHECK (result, "cuInit"); if (device->data) return; pocl_cuda_device_data_t *data = malloc (sizeof (pocl_cuda_device_data_t)); result = cuDeviceGet (&data->device, 0); CUDA_CHECK (result, "cuDeviceGet"); // Get specific device name device->long_name = device->short_name = malloc (256 * sizeof (char)); cuDeviceGetName (device->long_name, 256, data->device); // Get other device properties cuDeviceGetAttribute ((int *)&device->max_work_group_size, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, data->device); cuDeviceGetAttribute ((int *)(device->max_work_item_sizes + 0), CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, data->device); cuDeviceGetAttribute ((int *)(device->max_work_item_sizes + 1), CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, data->device); cuDeviceGetAttribute ((int *)(device->max_work_item_sizes + 2), CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, data->device); cuDeviceGetAttribute ( (int *)&device->local_mem_size, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, data->device); cuDeviceGetAttribute ((int *)&device->max_compute_units, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, data->device); cuDeviceGetAttribute ((int *)&device->max_clock_frequency, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, data->device); cuDeviceGetAttribute ((int *)&device->error_correction_support, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, data->device); cuDeviceGetAttribute ((int *)&device->host_unified_memory, CU_DEVICE_ATTRIBUTE_INTEGRATED, data->device); cuDeviceGetAttribute ((int *)&device->max_constant_buffer_size, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, data->device); device->preferred_vector_width_char = 1; device->preferred_vector_width_short = 1; device->preferred_vector_width_int = 1; device->preferred_vector_width_long = 1; device->preferred_vector_width_float = 1; device->preferred_vector_width_double = 1; device->preferred_vector_width_half = 0; device->native_vector_width_char = 1; device->native_vector_width_short = 1; device->native_vector_width_int = 1; device->native_vector_width_long = 1; device->native_vector_width_float = 1; device->native_vector_width_double = 1; device->native_vector_width_half = 0; device->single_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA | CL_FP_INF_NAN | CL_FP_DENORM; device->double_fp_config = CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA | CL_FP_INF_NAN | CL_FP_DENORM; device->local_mem_type = CL_LOCAL; device->host_unified_memory = 0; // Get GPU architecture name int sm_maj, sm_min; cuDeviceGetAttribute (&sm_maj, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, data->device); cuDeviceGetAttribute (&sm_min, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, data->device); char *gpu_arch = malloc (16 * sizeof (char)); snprintf (gpu_arch, 16, "sm_%d%d", sm_maj, sm_min); device->llvm_cpu = pocl_get_string_option ("POCL_CUDA_GPU_ARCH", gpu_arch); POCL_MSG_PRINT_INFO ("[CUDA] GPU architecture = %s\n", device->llvm_cpu); // Create context result = cuCtxCreate (&data->context, CU_CTX_MAP_HOST, data->device); CUDA_CHECK (result, "cuCtxCreate"); // Get global memory size size_t memfree, memtotal; result = cuMemGetInfo (&memfree, &memtotal); device->max_mem_alloc_size = max (memtotal / 4, 128 * 1024 * 1024); device->global_mem_size = memtotal; device->data = data; }
/* * Class: edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2 * Method: setup * Signature: ()V */ JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_setup (JNIEnv *env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jint free_space) { int status; jint num_blocks; int deviceCount = 0; size_t f_mem; size_t t_mem; size_t to_space_size; //size_t free_space = 1530L*1024L*1024L; textureMemSize = 1; status = cuInit(0); if (CUDA_SUCCESS != status) { printf("error in cuInit\n"); } status = cuDeviceGetCount(&deviceCount); if (CUDA_SUCCESS != status) { printf("error in cuDeviceGet\n"); } getBestDevice(); status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); if (CUDA_SUCCESS != status) { printf("error in cuCtxCreate %d\n", status); } // ddb - not using this as this returns the total memory not the free memory //to_space_size = memSize(); cuMemGetInfo(&f_mem, &t_mem); to_space_size = f_mem; num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc; #if DEBUG printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024); printf("num_blocks = %i\n",num_blocks); printf("numMultiProcessors = %i\n",numMultiProcessors); printf("max_threads_per_block = %i\n",max_threads_per_block); printf("max_blocks_per_proc = %i\n",max_blocks_per_proc); fflush(stdout); #endif gc_space_size = 1024; to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= gc_space_size; to_space_size -= free_space; //to_space_size -= textureMemSize; bufferSize = to_space_size; status = cuMemHostAlloc(&toSpace, to_space_size, 0); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "toSpace memory allocation failed", status); return; } status = cuMemAlloc(&gpuToSpace, to_space_size); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuToSpace memory allocation failed", status); return; } /* status = cuMemHostAlloc(&textureMemory, textureMemSize, 0); if (CUDA_SUCCESS != status) { printf("error in cuMemHostAlloc textureMemory %d\n", status); } status = cuMemAlloc(&gpuTexture, textureMemSize); if (CUDA_SUCCESS != status) { printf("error in cuMemAlloc gpuTexture %d\n", status); } */ status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "handlesMemory memory allocation failed", status); return; } status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuHandlesMemory memory allocation failed", status); return; } status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "exceptionsMemory memory allocation failed", status); return; } status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuExceptionsMemory memory allocation failed", status); return; } status = cuMemAlloc(&gcInfoSpace, gc_space_size); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gcInfoSpace memory allocation failed", status); return; } status = cuMemAlloc(&gpuHeapEndPtr, 8); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuHeapEndPtr memory allocation failed", status); return; } status = cuMemAlloc(&gpuBufferSize, 8); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuBufferSize memory allocation failed", status); return; } thisRefClass = (*env)->GetObjectClass(env, this_ref); setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace); setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace); setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory); setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture); setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory); setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory); setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory); setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory); setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize); setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim); setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors); }
value spoc_getCudaDevice(value i) { CAMLparam1(i); CAMLlocal4(general_info, cuda_info, specific_info, gc_info); CAMLlocal3(device, maxT, maxG); int nb_devices; CUdevprop dev_infos; CUdevice dev; CUcontext ctx; CUstream queue[2]; spoc_cu_context *spoc_ctx; //CUcontext gl_ctx; char infoStr[1024]; int infoInt; size_t infoUInt; int major, minor; enum cudaError_enum cuda_error; cuDeviceGetCount (&nb_devices); if ((Int_val(i)) > nb_devices) raise_constant(*caml_named_value("no_cuda_device")) ; CUDA_CHECK_CALL(cuDeviceGet(&dev, Int_val(i))); CUDA_CHECK_CALL(cuDeviceGetProperties(&dev_infos, dev)); general_info = caml_alloc (9, 0); CUDA_CHECK_CALL(cuDeviceGetName(infoStr, sizeof(infoStr), dev)); Store_field(general_info,0, copy_string(infoStr));// CUDA_CHECK_CALL(cuDeviceTotalMem(&infoUInt, dev)); Store_field(general_info,1, Val_int(infoUInt));// Store_field(general_info,2, Val_int(dev_infos.sharedMemPerBlock));// Store_field(general_info,3, Val_int(dev_infos.clockRate));// Store_field(general_info,4, Val_int(dev_infos.totalConstantMemory));// CUDA_CHECK_CALL(cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev)); Store_field(general_info,5, Val_int(infoInt));// CUDA_CHECK_CALL(cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev)); Store_field(general_info,6, Val_bool(infoInt));// Store_field(general_info,7, i); CUDA_CHECK_CALL(cuCtxCreate (&ctx, CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_MAP_HOST, dev)); spoc_ctx = malloc(sizeof(spoc_cl_context)); spoc_ctx->ctx = ctx; CUDA_CHECK_CALL(cuStreamCreate(&queue[0], 0)); CUDA_CHECK_CALL(cuStreamCreate(&queue[1], 0)); spoc_ctx->queue[0] = queue[0]; spoc_ctx->queue[1] = queue[1]; Store_field(general_info,8, (value)spoc_ctx); CUDA_CHECK_CALL(cuCtxSetCurrent(ctx)); cuda_info = caml_alloc(1, 0); //0 -> Cuda specific_info = caml_alloc(18, 0); cuDeviceComputeCapability(&major, &minor, dev); Store_field(specific_info,0, Val_int(major));// Store_field(specific_info,1, Val_int(minor));// Store_field(specific_info,2, Val_int(dev_infos.regsPerBlock));// Store_field(specific_info,3, Val_int(dev_infos.SIMDWidth));// Store_field(specific_info,4, Val_int(dev_infos.memPitch));// Store_field(specific_info,5, Val_int(dev_infos.maxThreadsPerBlock));// maxT = caml_alloc(3, 0); Store_field(maxT,0, Val_int(dev_infos.maxThreadsDim[0]));// Store_field(maxT,1, Val_int(dev_infos.maxThreadsDim[1]));// Store_field(maxT,2, Val_int(dev_infos.maxThreadsDim[2]));// Store_field(specific_info,6, maxT); maxG = caml_alloc(3, 0); Store_field(maxG,0, Val_int(dev_infos.maxGridSize[0]));// Store_field(maxG,1, Val_int(dev_infos.maxGridSize[1]));// Store_field(maxG,2, Val_int(dev_infos.maxGridSize[2]));// Store_field(specific_info,7, maxG); Store_field(specific_info,8, Val_int(dev_infos.textureAlign));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev); Store_field(specific_info,9, Val_bool(infoInt));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, dev); Store_field(specific_info,10, Val_bool(infoInt));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev); Store_field(specific_info,11, Val_bool(infoInt));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev); Store_field(specific_info,12, Val_bool(infoInt));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev); Store_field(specific_info,13, Val_int(infoInt));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev); Store_field(specific_info,14, Val_bool(infoInt));// cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, dev); Store_field(specific_info,15, Val_int(infoInt)); cuDeviceGetAttribute(&infoInt, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, dev); Store_field(specific_info,16, Val_int(infoInt)); cuDriverGetVersion(&infoInt); Store_field(specific_info, 17, Val_int(infoInt)); Store_field(cuda_info, 0, specific_info); device = caml_alloc(4, 0); Store_field(device, 0, general_info); Store_field(device, 1, cuda_info); {spoc_cuda_gc_info* gcInfo = (spoc_cuda_gc_info*)malloc(sizeof(spoc_cuda_gc_info)); CUDA_CHECK_CALL(cuMemGetInfo(&infoUInt, NULL)); infoUInt -= (32*1024*1024); Store_field(device, 2, (value)gcInfo); {cuda_event_list* events = NULL; Store_field(device, 3, (value)events); CAMLreturn(device);}} }
JNIEXPORT jobject JNICALL Java_org_trifort_rootbeer_runtime_CUDARuntime_loadGpuDevices (JNIEnv * env, jobject this_ref) { int i; int status; int num_devices; CUdevice device; CUcontext context; jclass array_list_class; jmethodID array_list_init; jmethodID array_list_add; jobject ret; jclass gpu_device_class; jmethodID gpu_device_init; jobject gpu_device; int major_version; int minor_version; char device_name[4096]; size_t free_mem; size_t total_mem; int registers_per_block; int warp_size; int pitch; int threads_per_block; int shared_mem_per_block; int clock_rate; int mem_clock_rate; int const_mem; int integrated; int threads_per_multiprocessor; int multiprocessor_count; int max_block_dim_x; int max_block_dim_y; int max_block_dim_z; int max_grid_dim_x; int max_grid_dim_y; int max_grid_dim_z; array_list_class = (*env)->FindClass(env, "java/util/ArrayList"); array_list_init = (*env)->GetMethodID(env, array_list_class, "<init>", "()V"); array_list_add = (*env)->GetMethodID(env, array_list_class, "add", "(Ljava/lang/Object;)Z"); ret = (*env)->NewObject(env, array_list_class, array_list_init); gpu_device_class = (*env)->FindClass(env, "org/trifort/rootbeer/runtime/GpuDevice"); gpu_device_init = (*env)->GetStaticMethodID(env, gpu_device_class, "newCudaDevice", "(IIILjava/lang/String;JJIIIIIIIIZIIIIIIII)Lorg/trifort/rootbeer/runtime/GpuDevice;"); status = cuInit(0); if(status != CUDA_SUCCESS){ return ret; } cuDeviceGetCount(&num_devices); for(i = 0; i < num_devices; ++i){ status = cuDeviceGet(&device, i); if(status != CUDA_SUCCESS){ continue; } cuDeviceComputeCapability(&major_version, &minor_version, device); cuDeviceGetName(device_name, 4096, device); cuCtxCreate(&context, CU_CTX_MAP_HOST, device); cuMemGetInfo(&free_mem, &total_mem); cuCtxDestroy(context); cuDeviceGetAttribute(®isters_per_block, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, device); cuDeviceGetAttribute(&warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, device); cuDeviceGetAttribute(&pitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, device); cuDeviceGetAttribute(&threads_per_block, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device); cuDeviceGetAttribute(&shared_mem_per_block, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, device); cuDeviceGetAttribute(&clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device); cuDeviceGetAttribute(&mem_clock_rate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device); cuDeviceGetAttribute(&const_mem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, device); cuDeviceGetAttribute(&integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, device); cuDeviceGetAttribute(&threads_per_multiprocessor, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, device); cuDeviceGetAttribute(&multiprocessor_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device); cuDeviceGetAttribute(&max_block_dim_x, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, device); cuDeviceGetAttribute(&max_block_dim_y, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, device); cuDeviceGetAttribute(&max_block_dim_z, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, device); cuDeviceGetAttribute(&max_grid_dim_x, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, device); cuDeviceGetAttribute(&max_grid_dim_y, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, device); cuDeviceGetAttribute(&max_grid_dim_z, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, device); gpu_device = (*env)->CallObjectMethod(env, gpu_device_class, gpu_device_init, i, major_version, minor_version, (*env)->NewStringUTF(env, device_name), (jlong) free_mem, (jlong) total_mem, registers_per_block, warp_size, pitch, threads_per_block, shared_mem_per_block, clock_rate, mem_clock_rate, const_mem, integrated, threads_per_multiprocessor, multiprocessor_count, max_block_dim_x, max_block_dim_y, max_block_dim_z, max_grid_dim_x, max_grid_dim_y, max_grid_dim_z); (*env)->CallBooleanMethod(env, ret, array_list_add, gpu_device); } return ret; }
/** * initialization after the GL context is set */ void GLCUPixelBuffer::init(int w, int h) { width = w+BLOCKSZ-(w%BLOCKSZ); height = h+BLOCKSZ-(h%BLOCKSZ); Q_ASSERT(pixelBuffer==0); int size_tex_data = sizeof(GLubyte) * width * height * 4; /* * there will be in and out here, * take care to the usage pattern. * the blockop we will ll be pointing at * will show the desired level of op on the framebuffer * side note : working on ultra high resolution, * will require a downsampling before rendering */ size_t free, total; CUDCHK(cuMemGetInfo(&free,&total)) ; qDebug() << "GPU memory : free="<<free<< " /"<< total; pixelBuffer = new QGLBuffer(QGLBuffer::PixelUnpackBuffer); pixelBuffer->setUsagePattern(QGLBuffer::StreamDraw); pixelBuffer->create(); pixelBuffer->bind(); pixelBuffer->allocate(size_tex_data); pixelBuffer->release(); // cudaGLRegisterBufferObject( pixelBuffer->bufferId() ) ); // deprecated as of cuda 3.0 CUDCHK ( cuGraphicsGLRegisterBuffer(&cudaResource, pixelBuffer->bufferId(), CU_GRAPHICS_REGISTER_FLAGS_NONE //CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD ) ); //cudaGraphicsRegisterFlagsWriteDiscard) ); /* * create textures * delete texture object if necessary * for reallocating tex mem, e.g. at different size */ deleteTexture(); textureID = new GLuint[1]; // Generate a texture id glGenTextures(1, textureID); glBindTexture( GL_TEXTURE_2D, textureID[0]); // Allocate the texture memory. The last parameter is NULL since we only // want to allocate memory, not initialize it glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA,GL_UNSIGNED_BYTE, NULL); // Must set the filter mode, GL_LINEAR enables interpolation when scaling /*glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);*/ /* Note: GL_TEXTURE_RECTANGLE_ARB may be used instead of GL_TEXTURE_2D for improved performance if linear interpolation is not desired. Replace GL_LINEAR with GL_NEAREST in the glTexParameteri() call */ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glBindTexture(GL_TEXTURE_2D, 0); //CUDCHK(cuMemGetInfo(&free,&total)) ; //qDebug() << "memory : free="<<free<< " /"<< total; }
void initDevice(JNIEnv * env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jlong free_space) { int status; jint num_blocks; int deviceCount = 0; size_t f_mem; size_t t_mem; size_t to_space_size; textureMemSize = 1; status = cuDeviceGetCount(&deviceCount); CHECK_STATUS(env,"error in cuDeviceGetCount",status) getBestDevice(env); status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); CHECK_STATUS(env,"error in cuCtxCreate",status) status = cuMemGetInfo (&f_mem, &t_mem); CHECK_STATUS(env,"error in cuMemGetInfo",status) to_space_size = f_mem; num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc; #if DEBUG printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024); printf("num_blocks = %i\n",num_blocks); printf("numMultiProcessors = %i\n",numMultiProcessors); printf("max_threads_per_block = %i\n",max_threads_per_block); printf("max_blocks_per_proc = %i\n",max_blocks_per_proc); fflush(stdout); #endif //space for 100 types in the scene classMemSize = sizeof(jint)*100; gc_space_size = 1024; to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= gc_space_size; to_space_size -= free_space; to_space_size -= classMemSize; //to_space_size -= textureMemSize; bufferSize = to_space_size; status = cuMemHostAlloc(&toSpace, to_space_size, 0); CHECK_STATUS(env,"toSpace memory allocation failed",status) status = cuMemAlloc(&gpuToSpace, to_space_size); CHECK_STATUS(env,"gpuToSpace memory allocation failed",status) status = cuMemAlloc(&gpuClassMemory, classMemSize); CHECK_STATUS(env,"gpuClassMemory memory allocation failed",status) /* status = cuMemHostAlloc(&textureMemory, textureMemSize, 0); if (CUDA_SUCCESS != status) { printf("error in cuMemHostAlloc textureMemory %d\n", status); } status = cuMemAlloc(&gpuTexture, textureMemSize); if (CUDA_SUCCESS != status) { printf("error in cuMemAlloc gpuTexture %d\n", status); } */ status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); CHECK_STATUS(env,"handlesMemory memory allocation failed",status) status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); CHECK_STATUS(env,"gpuHandlesMemory memory allocation failed",status) status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); CHECK_STATUS(env,"exceptionsMemory memory allocation failed",status) status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); CHECK_STATUS(env,"gpuExceptionsMemory memory allocation failed",status) status = cuMemAlloc(&gcInfoSpace, gc_space_size); CHECK_STATUS(env,"gcInfoSpace memory allocation failed",status) status = cuMemAlloc(&gpuHeapEndPtr, 8); CHECK_STATUS(env,"gpuHeapEndPtr memory allocation failed",status) status = cuMemAlloc(&gpuBufferSize, 8); CHECK_STATUS(env,"gpuBufferSize memory allocation failed",status) thisRefClass = (*env)->GetObjectClass(env, this_ref); setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace); setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace); setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory); setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture); setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory); setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory); setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory); setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory); setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize); setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim); setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors); }
/* * Class: edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2 * Method: printDeviceInfo * Signature: ()V */ JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_printDeviceInfo (JNIEnv *env, jclass cls) { int i, a=0, b=0, status; int num_devices = 0; char str[1024]; size_t free_mem, total_mem; status = cuInit(0); CHECK_STATUS(env,"error in cuInit",status) cuDeviceGetCount(&num_devices); printf("%d cuda gpus found\n", num_devices); for (i = 0; i < num_devices; ++i) { CUdevice dev; status = cuDeviceGet(&dev, i); CHECK_STATUS(env,"error in cuDeviceGet",status) status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, dev); CHECK_STATUS(env,"error in cuCtxCreate",status) printf("\nGPU:%d\n", i); if(cuDeviceComputeCapability(&a, &b, dev) == CUDA_SUCCESS) printf("Version: %i.%i\n", a, b); if(cuDeviceGetName(str,1024,dev) == CUDA_SUCCESS) printf("Name: %s\n", str); if(cuMemGetInfo(&free_mem, &total_mem) == CUDA_SUCCESS){ #if (defined linux || defined __APPLE_CC__) printf("Total global memory: %zu/%zu (Free/Total) MBytes\n", free_mem/1024/1024, total_mem/1024/1024); #else printf("Total global memory: %Iu/%Iu (Free/Total) MBytes\n", free_mem/1024/1024, total_mem/1024/1024); #endif } if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,dev) == CUDA_SUCCESS) printf("Total registers per block: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_WARP_SIZE,dev) == CUDA_SUCCESS) printf("Warp size: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_PITCH,dev) == CUDA_SUCCESS) printf("Maximum memory pitch: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,dev) == CUDA_SUCCESS) printf("Maximum threads per block: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,dev) == CUDA_SUCCESS) printf("Total shared memory per block %.2f KB\n", a/1024.0); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_CLOCK_RATE,dev) == CUDA_SUCCESS) printf("Clock rate: %.2f MHz\n", a/1000000.0); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,dev) == CUDA_SUCCESS) printf("Memory Clock rate: %.2f\n", a/1000000.0); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY,dev) == CUDA_SUCCESS) printf("Total constant memory: %.2f MB\n", a/1024.0/1024.0); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_INTEGRATED,dev) == CUDA_SUCCESS) printf("Integrated: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,dev) == CUDA_SUCCESS) printf("Max threads per multiprocessor:%i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,dev) == CUDA_SUCCESS) printf("Number of multiprocessors: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X,dev) == CUDA_SUCCESS) printf("Maximum dimension x of block: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y,dev) == CUDA_SUCCESS) printf("Maximum dimension y of block: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z,dev) == CUDA_SUCCESS) printf("Maximum dimension z of block: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,dev) == CUDA_SUCCESS) printf("Maximum dimension x of grid: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y,dev) == CUDA_SUCCESS) printf("Maximum dimension y of grid: %i\n", a); if(cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z,dev) == CUDA_SUCCESS) printf("Maximum dimension z of grid: %i\n", a); cuCtxDestroy(cuContext); } }
IMT_MRGui::IMT_MRGui(QWidget *parent) : QMainWindow(parent), ui(new Ui::IMT_MRGui) { // Initialize the first GPU GPU0_.allocateGPU(0); GPU0_.printInformation(std::cout); size_t free; size_t total; cuMemGetInfo(&free, &total); std::cout << "free memory: " << free / 1024 / 1024 << "mb, total memory: " << total / 1024 / 1024 << "mb" << std::endl; ui->setupUi(this); read_siemens = 0; fftobj_ = new agile::FFT<TType>(); kspacefovobj_ = new agile::KSpaceFOV<TType>(); open_irgnpara_window = new IRGN_para(); // Be sure to destroy you window somewhere open_irgnpara_window->hide(); _pathsetting = new PathSetting(); // Be sure to destroy you window somewhere _pathsetting->hide(); _specialsetting = new SpecialSetting(); // Be sure to destroy you window somewhere _specialsetting->hide(); setStatusBar(); setTitleText(""); future = new QFuture<void>; watcher = new QFutureWatcher<void>; _cycleread_thread = new CycleRead; _cycleread_thread->set_max_acq_time(_specialsetting->get_autoload_timeout()); _infotext=""; QObject::connect(this,SIGNAL(sendInfo(QString, bool)),this,SLOT(set_Info(QString, bool))); QObject::connect(this,SIGNAL(sendWarning(QString)),this,SLOT(set_Warning(QString))); QObject::connect(this,SIGNAL(sendSaveFile(QString,QString)),this,SLOT(set_SaveFile(QString,QString)),Qt::BlockingQueuedConnection); QObject::connect(_cycleread_thread, SIGNAL(send_filenames(QStringList, QStringList)), this, SLOT(startauto(QStringList, QStringList))); _act_index = 0; _automatic_on = false; _write_file_index=0; ui->cb_savingtype->setCurrentIndex(2); ui->pb_automatic->setText("Auto Off"); _postprocess = new agile::PostProcess<TType, TType_real>(); QLabel* imageLabel = new QLabel(this); imageLabel->setBackgroundRole(QPalette::Base); imageLabel->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored); imageLabel->setScaledContents(true); QImage image(":/images/wappen.png"); imageLabel->setPixmap(QPixmap::fromImage(image)); imageLabel->resize(imageLabel->pixmap()->size()); imageLabel->setGeometry((this->width() - imageLabel->sizeHint().width()), 40, imageLabel->sizeHint().width(), imageLabel->sizeHint().height()); imageLabel->show(); }