SEXP R_cuCtxGetApiVersion(SEXP r_ctx) { SEXP r_ans = R_NilValue; unsigned int version; CUcontext ctx = (CUcontext) getRReference(r_ctx); CUresult ans; ans = cuCtxGetApiVersion( ctx, & version); if(ans) return(R_cudaErrorInfo(ans)); r_ans = ScalarReal(version) ; return(r_ans); }
static CUresult create_context(void *user_context, CUcontext *ctx) { // Initialize CUDA CUresult err = cuInit(0); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuInit failed (%s)", _get_error_name(err)); return err; } // Make sure we have a device int deviceCount = 0; err = cuDeviceGetCount(&deviceCount); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuGetDeviceCount failed (%s)", _get_error_name(err)); return err; } if (deviceCount <= 0) { halide_error(user_context, "CUDA: No devices available"); return CUDA_ERROR_NO_DEVICE; } int device = halide_get_gpu_device(user_context); if (device == -1) { device = deviceCount - 1; } // Get device CUdevice dev; CUresult status = cuDeviceGet(&dev, device); if (status != CUDA_SUCCESS) { halide_error(user_context, "CUDA: Failed to get device\n"); return status; } DEBUG_PRINTF( user_context, " Got device %d\n", dev ); // Dump device attributes #ifdef DEBUG { char name[256]; name[0] = 0; err = cuDeviceGetName(name, 256, dev); DEBUG_PRINTF(user_context, " %s\n", name); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuDeviceGetName failed (%s)", _get_error_name(err)); return err; } size_t memory = 0; err = cuDeviceTotalMem(&memory, dev); DEBUG_PRINTF(user_context, " total memory: %d MB\n", (int)(memory >> 20)); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuDeviceTotalMem failed (%s)", _get_error_name(err)); return err; } // Declare variables for other state we want to query. int max_threads_per_block = 0, warp_size = 0, num_cores = 0; int max_block_size[] = {0, 0, 0}; int max_grid_size[] = {0, 0, 0}; int max_shared_mem = 0, max_constant_mem = 0; int cc_major = 0, cc_minor = 0; struct {int *dst; CUdevice_attribute attr;} attrs[] = { {&max_threads_per_block, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK}, {&warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE}, {&num_cores, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT}, {&max_block_size[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X}, {&max_block_size[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y}, {&max_block_size[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z}, {&max_grid_size[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X}, {&max_grid_size[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y}, {&max_grid_size[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z}, {&max_shared_mem, CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK}, {&max_constant_mem, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY}, {&cc_major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR}, {&cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR}, {NULL, CU_DEVICE_ATTRIBUTE_MAX}}; // Do all the queries. for (int i = 0; attrs[i].dst; i++) { err = cuDeviceGetAttribute(attrs[i].dst, attrs[i].attr, dev); if (err != CUDA_SUCCESS) { halide_error_varargs(user_context, "CUDA: cuDeviceGetAttribute failed (%s) for attribute %d", _get_error_name(err), (int)attrs[i].attr); return err; } } // threads per core is a function of the compute capability int threads_per_core = (cc_major == 1 ? 8 : cc_major == 2 ? (cc_minor == 0 ? 32 : 48) : cc_major == 3 ? 192 : cc_major == 5 ? 128 : 0); DEBUG_PRINTF(user_context, " max threads per block: %d\n" " warp size: %d\n" " max block size: %d %d %d\n" " max grid size: %d %d %d\n" " max shared memory per block: %d\n" " max constant memory per block: %d\n" " compute capability %d.%d\n" " cuda cores: %d x %d = %d\n", max_threads_per_block, warp_size, max_block_size[0], max_block_size[1], max_block_size[2], max_grid_size[0], max_grid_size[1], max_grid_size[2], max_shared_mem, max_constant_mem, cc_major, cc_minor, num_cores, threads_per_core, num_cores * threads_per_core); } #endif // Create context DEBUG_PRINTF( user_context, " cuCtxCreate %d -> ", dev ); err = cuCtxCreate(ctx, 0, dev); if (err != CUDA_SUCCESS) { DEBUG_PRINTF( user_context, "%s\n", _get_error_name(err) ); halide_error_varargs(user_context, "CUDA: cuCtxCreate failed (%s)", _get_error_name(err)); return err; } else { unsigned int version = 0; cuCtxGetApiVersion(*ctx, &version); DEBUG_PRINTF( user_context, "%p (%d)\n", *ctx, version); } return CUDA_SUCCESS; }