size_t initContext(JNIEnv * env, jint max_blocks_per_proc, jint max_threads_per_block) { size_t to_space_size; int status; int deviceCount = 0; size_t f_mem; size_t t_mem; jint num_blocks; status = cuDeviceGetCount(&deviceCount); CHECK_STATUS_RTN(env,"error in cuDeviceGetCount",status, 0); getBestDevice(env); status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); CHECK_STATUS_RTN(env,"error in cuCtxCreate",status, 0) status = cuMemGetInfo (&f_mem, &t_mem); CHECK_STATUS_RTN(env,"error in cuMemGetInfo",status, 0) to_space_size = f_mem; //space for 100 types in the scene classMemSize = sizeof(jint)*100; num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc; gc_space_size = 1024; to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= gc_space_size; to_space_size -= classMemSize; return to_space_size; }
/* * Class: edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2 * Method: setup * Signature: ()V */ JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_setup (JNIEnv *env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jint free_space) { int status; jint num_blocks; int deviceCount = 0; size_t f_mem; size_t t_mem; size_t to_space_size; //size_t free_space = 1530L*1024L*1024L; textureMemSize = 1; status = cuInit(0); if (CUDA_SUCCESS != status) { printf("error in cuInit\n"); } status = cuDeviceGetCount(&deviceCount); if (CUDA_SUCCESS != status) { printf("error in cuDeviceGet\n"); } getBestDevice(); status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); if (CUDA_SUCCESS != status) { printf("error in cuCtxCreate %d\n", status); } // ddb - not using this as this returns the total memory not the free memory //to_space_size = memSize(); cuMemGetInfo(&f_mem, &t_mem); to_space_size = f_mem; num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc; #if DEBUG printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024); printf("num_blocks = %i\n",num_blocks); printf("numMultiProcessors = %i\n",numMultiProcessors); printf("max_threads_per_block = %i\n",max_threads_per_block); printf("max_blocks_per_proc = %i\n",max_blocks_per_proc); fflush(stdout); #endif gc_space_size = 1024; to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= gc_space_size; to_space_size -= free_space; //to_space_size -= textureMemSize; bufferSize = to_space_size; status = cuMemHostAlloc(&toSpace, to_space_size, 0); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "toSpace memory allocation failed", status); return; } status = cuMemAlloc(&gpuToSpace, to_space_size); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuToSpace memory allocation failed", status); return; } /* status = cuMemHostAlloc(&textureMemory, textureMemSize, 0); if (CUDA_SUCCESS != status) { printf("error in cuMemHostAlloc textureMemory %d\n", status); } status = cuMemAlloc(&gpuTexture, textureMemSize); if (CUDA_SUCCESS != status) { printf("error in cuMemAlloc gpuTexture %d\n", status); } */ status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "handlesMemory memory allocation failed", status); return; } status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuHandlesMemory memory allocation failed", status); return; } status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "exceptionsMemory memory allocation failed", status); return; } status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuExceptionsMemory memory allocation failed", status); return; } status = cuMemAlloc(&gcInfoSpace, gc_space_size); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gcInfoSpace memory allocation failed", status); return; } status = cuMemAlloc(&gpuHeapEndPtr, 8); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuHeapEndPtr memory allocation failed", status); return; } status = cuMemAlloc(&gpuBufferSize, 8); if (CUDA_SUCCESS != status) { throw_cuda_errror_exception(env, "gpuBufferSize memory allocation failed", status); return; } thisRefClass = (*env)->GetObjectClass(env, this_ref); setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace); setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace); setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory); setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture); setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory); setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory); setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory); setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory); setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize); setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim); setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors); }
void initDevice(JNIEnv * env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jlong free_space) { int status; jint num_blocks; int deviceCount = 0; size_t f_mem; size_t t_mem; size_t to_space_size; textureMemSize = 1; status = cuDeviceGetCount(&deviceCount); CHECK_STATUS(env,"error in cuDeviceGetCount",status) getBestDevice(env); status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice); CHECK_STATUS(env,"error in cuCtxCreate",status) status = cuMemGetInfo (&f_mem, &t_mem); CHECK_STATUS(env,"error in cuMemGetInfo",status) to_space_size = f_mem; num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc; #if DEBUG printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024); printf("num_blocks = %i\n",num_blocks); printf("numMultiProcessors = %i\n",numMultiProcessors); printf("max_threads_per_block = %i\n",max_threads_per_block); printf("max_blocks_per_proc = %i\n",max_blocks_per_proc); fflush(stdout); #endif //space for 100 types in the scene classMemSize = sizeof(jint)*100; gc_space_size = 1024; to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= (num_blocks * sizeof(jlong)); to_space_size -= gc_space_size; to_space_size -= free_space; to_space_size -= classMemSize; //to_space_size -= textureMemSize; bufferSize = to_space_size; status = cuMemHostAlloc(&toSpace, to_space_size, 0); CHECK_STATUS(env,"toSpace memory allocation failed",status) status = cuMemAlloc(&gpuToSpace, to_space_size); CHECK_STATUS(env,"gpuToSpace memory allocation failed",status) status = cuMemAlloc(&gpuClassMemory, classMemSize); CHECK_STATUS(env,"gpuClassMemory memory allocation failed",status) /* status = cuMemHostAlloc(&textureMemory, textureMemSize, 0); if (CUDA_SUCCESS != status) { printf("error in cuMemHostAlloc textureMemory %d\n", status); } status = cuMemAlloc(&gpuTexture, textureMemSize); if (CUDA_SUCCESS != status) { printf("error in cuMemAlloc gpuTexture %d\n", status); } */ status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); CHECK_STATUS(env,"handlesMemory memory allocation failed",status) status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); CHECK_STATUS(env,"gpuHandlesMemory memory allocation failed",status) status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); CHECK_STATUS(env,"exceptionsMemory memory allocation failed",status) status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); CHECK_STATUS(env,"gpuExceptionsMemory memory allocation failed",status) status = cuMemAlloc(&gcInfoSpace, gc_space_size); CHECK_STATUS(env,"gcInfoSpace memory allocation failed",status) status = cuMemAlloc(&gpuHeapEndPtr, 8); CHECK_STATUS(env,"gpuHeapEndPtr memory allocation failed",status) status = cuMemAlloc(&gpuBufferSize, 8); CHECK_STATUS(env,"gpuBufferSize memory allocation failed",status) thisRefClass = (*env)->GetObjectClass(env, this_ref); setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace); setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace); setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory); setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture); setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory); setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory); setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory); setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory); setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize); setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim); setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors); }
compute_context::compute_context() : device(getBestDevice()), context(device), transfer_q(context, device), computation_q(context, device) { }