int main(int argc, char **argv) { //----------------------- // Input pointers float *h_Volume; void* allMemoryPointers[500]; int numberOfMemoryPointers = 0; nifti_image* allNiftiImages[500]; int numberOfNiftiImages = 0; int OPENCL_PLATFORM = 0; int OPENCL_DEVICE = 0; // Size parameters int DATA_H, DATA_W, DATA_D; float VOXEL_SIZE_X, VOXEL_SIZE_Y, VOXEL_SIZE_Z; //--------------------- /* Input arguments */ FILE *fp = NULL; // No inputs, so print help text if (argc == 1) { printf("\nThe function renders a volume using direct volume rendering.\n\n"); printf("Usage:\n\n"); printf("RenderVolume volume.nii [options]\n\n"); printf("Options:\n\n"); printf(" -platform The OpenCL platform to use (default 0) \n"); printf(" -device The OpenCL device to use for the specificed platform (default 0) \n"); printf(" -verbose Print extra stuff (default false) \n"); printf(" -debug Get additional debug information saved as nifti files (default no). Warning: This will use a lot of extra memory! \n"); printf("\n\n"); return EXIT_SUCCESS; } // Try to open files else if (argc > 1) { fp = fopen(argv[1],"r"); if (fp == NULL) { printf("Could not open file %s !\n",argv[1]); return EXIT_FAILURE; } fclose(fp); } // Loop over additional inputs int i = 2; while (i < argc) { char *input = argv[i]; char *p; if (strcmp(input,"-platform") == 0) { if ( (i+1) >= argc ) { printf("Unable to read value after -platform !\n"); return EXIT_FAILURE; } OPENCL_PLATFORM = (int)strtol(argv[i+1], &p, 10); if (!isspace(*p) && *p != 0) { printf("OpenCL platform must be an integer! You provided %s \n",argv[i+1]); return EXIT_FAILURE; } else if (OPENCL_PLATFORM < 0) { printf("OpenCL platform must be >= 0!\n"); return EXIT_FAILURE; } i += 2; } else if (strcmp(input,"-device") == 0) { if ( (i+1) >= argc ) { printf("Unable to read value after -device !\n"); return EXIT_FAILURE; } OPENCL_DEVICE = (int)strtol(argv[i+1], &p, 10); if (!isspace(*p) && *p != 0) { printf("OpenCL device must be an integer! You provided %s \n",argv[i+1]); return EXIT_FAILURE; } else if (OPENCL_DEVICE < 0) { printf("OpenCL device must be >= 0!\n"); return EXIT_FAILURE; } i += 2; } else { printf("Unrecognized option! %s \n",argv[i]); return EXIT_FAILURE; } } // Read first volume // ----------------------------------- nifti_image *inputVolume = nifti_image_read(argv[1],1); if (inputVolume == NULL) { printf("Could not open volume to render!\n"); return EXIT_FAILURE; } allNiftiImages[numberOfNiftiImages] = inputVolume; numberOfNiftiImages++; // ----------------------------------- // Get data dimensions from input data DATA_W = inputVolume->nx; DATA_H = inputVolume->ny; DATA_D = inputVolume->nz; // Get voxel sizes from input data VOXEL_SIZE_X = inputVolume->dx; VOXEL_SIZE_Y = inputVolume->dy; VOXEL_SIZE_Z = inputVolume->dz; int VOLUME_SIZE = DATA_W * DATA_H * DATA_D * sizeof(float); // Print some info printf("Authored by K.A. Eklund \n"); printf("Volume size: %i x %i x %i \n", DATA_W, DATA_H, DATA_D); printf("Volume voxel size: %f x %f x %f mm \n", VOXEL_SIZE_X, VOXEL_SIZE_Y, VOXEL_SIZE_Z); // ------------------------------------------------ // Allocate memory on the host AllocateMemory(h_Volume, VOLUME_SIZE, allMemoryPointers, numberOfMemoryPointers, allNiftiImages, numberOfNiftiImages, "INPUT_VOLUME"); // Convert data to floats if ( inputVolume->datatype == DT_SIGNED_SHORT ) { short int *p = (short int*)inputVolume->data; for (int i = 0; i < DATA_W * DATA_H * DATA_D; i++) { h_Volume[i] = (float)p[i]; } } else if ( inputVolume->datatype == DT_UINT8 ) { unsigned char *p = (unsigned char*)inputVolume->data; for (int i = 0; i < DATA_W * DATA_H * DATA_D; i++) { h_Volume[i] = (float)p[i]; } } else if ( inputVolume->datatype == DT_FLOAT ) { float *p = (float*)inputVolume->data; for (int i = 0; i < DATA_W * DATA_H * DATA_D; i++) { h_Volume[i] = p[i]; } } else { printf("Unknown data type in input volume, aborting!\n"); FreeAllMemory(allMemoryPointers,numberOfMemoryPointers); FreeAllNiftiImages(allNiftiImages,numberOfNiftiImages); return EXIT_FAILURE; } //------------------------ // First initialize OpenGL context, so we can properly setup the OpenGL / OpenCL interop. InitGL(&argc, argv); // Create OpenCL context, get device info, select device, select options for image/texture and CL-GL interop //createCLContext(argc, (const char**)argv); createCLContext(OPENCL_PLATFORM, OPENCL_DEVICE); cl_int error; // create a command-queue cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[uiDeviceUsed], 0, &ciErrNum); //oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); //clGetDeviceInfo(cdDevices[uiDeviceUsed], CL_DEVICE_IMAGE_SUPPORT, sizeof(g_bImageSupport), &g_bImageSupport, NULL); g_bImageSupport = true; // Read the kernel code from file std::fstream kernelFile("volumeRender.cl",std::ios::in); std::ostringstream oss; oss << kernelFile.rdbuf(); std::string src = oss.str(); const char *srcstr = src.c_str(); // Create program and build the code for the selected device cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char**)&srcstr, NULL, &error); printf("Create program with source error is %i \n",error); // build the program std::string buildOpts = "-cl-fast-relaxed-math"; buildOpts += g_bImageSupport ? " -DIMAGE_SUPPORT" : ""; ciErrNum = clBuildProgram(cpProgram, 0, NULL, buildOpts.c_str(), NULL, NULL); printf("Build program error is %i \n",error); if (ciErrNum != CL_SUCCESS) { printf("Building failed!\n"); // write out standard error, Build Log and PTX, then cleanup and return error //shrlogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); //oclLogBuildInfo(cpProgram, oclGetFirstDev(cxGPUContext)); //oclLogPtx(cpProgram, oclGetFirstDev(cxGPUContext), "oclVolumeRender.ptx"); Cleanup(EXIT_FAILURE); } // create the kernel ckKernel = clCreateKernel(cpProgram, "d_render", &error); printf("Create kernel error is %i \n",error); //oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Init OpenCL initCLVolume(h_Volume, DATA_W, DATA_H, DATA_D); // init timer 1 for fps measurement //shrDeltaT(1); // Create buffers and textures, // and then start main GLUT rendering loop for processing and rendering, // or otherwise run No-GL Q/A test sequence initPixelBuffer(); glutMainLoop(); // Normally unused return path Cleanup(EXIT_SUCCESS); // Free all memory FreeAllMemory(allMemoryPointers,numberOfMemoryPointers); FreeAllNiftiImages(allNiftiImages,numberOfNiftiImages); return EXIT_SUCCESS; }
int InitOpenCLContext() { // start logs shrSetLogFileName ("oclVolumeRender.txt"); // get command line arg for quick test, if provided // process command line arguments // First initialize OpenGL context, so we can properly setup the OpenGL / OpenCL interop. // glewInit(); // GLboolean bGLEW = glewIsSupported("GL_VERSION_2_0 GL_ARB_pixel_buffer_object"); // oclCheckErrorEX(bGLEW, shrTRUE, pCleanup); g_glInterop = true; // Create OpenCL context, get device info, select device, select options for image/texture and CL-GL interop createCLContext(); // Print device info clGetDeviceInfo(cdDevices[uiDeviceUsed], CL_DEVICE_IMAGE_SUPPORT, sizeof(g_bImageSupport), &g_bImageSupport, NULL); //shrLog("%s...\n\n", g_bImageSupport ? "Using Image (Texture)" : "No Image (Texuture) Support"); // shrLog("Detailed Device info:\n\n"); oclPrintDevInfo(LOGBOTH, cdDevices[uiDeviceUsed]); // create a command-queue cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevices[uiDeviceUsed], 0, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Program Setup size_t program_length; cPathAndName = shrFindFilePath("Transform.cl", "."); oclCheckErrorEX(cPathAndName != NULL, shrTRUE, pCleanup); cSourceCL = oclLoadProgSource(cPathAndName, "", &program_length); oclCheckErrorEX(cSourceCL != NULL, shrTRUE, pCleanup); // create the program cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&cSourceCL, &program_length, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // build the program std::string buildOpts = "-cl-single-precision_constant"; // buildOpts += g_bImageSupport ? " -DIMAGE_SUPPORT" : ""; // ciErrNum = clBuildProgram(cpProgram, 1, &cdDevices[uiDeviceUsed],"-cl-fast-relaxed-math", NULL, NULL); ciErrNum = clBuildProgram(cpProgram, 1, &cdDevices[uiDeviceUsed],NULL, NULL, NULL); if (ciErrNum != CL_SUCCESS) { // write out standard error, Build Log and PTX, then cleanup and return error shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); oclLogBuildInfo(cpProgram, oclGetFirstDev(cxGPUContext)); oclLogPtx(cpProgram, oclGetFirstDev(cxGPUContext), "oclVolumeRender.ptx"); Cleanup(EXIT_FAILURE); } // create the kernel ScalseKernel = clCreateKernel(cpProgram, "d_render", &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); TransformKernel = clCreateKernel(cpProgram, "angle", &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); LongToShortKernel = clCreateKernel(cpProgram, "transfer", &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); return TRUE; }
int main(int argc, char **argv) { const char *kernelname = "counthits"; unsigned count =10000; cl_int err; cl_context cl_context; cl_program program; cl_kernel cl_kernel; cl_mem cl_out; cl_command_queue cl_queue; size_t i, nthreads, hits_sz; size_t cores, work_group_size; cl_uint2 * hits_host; double d = 0.; // timer d = timer(&d); progname = argv[0]; CHECK(cl::Platform::get(&platformList)); CHECKERR( cl_context = createCLContext(CL_DEVICE_TYPE_GPU,cl_vendor::VENDOR_AMD, &err) ); std::vector<cl::Device> devices; CHECKERR( devices = cl_context.getInfo<CL_CONTEXT_DEVICES>(&err) ); size_t length = 0; const char * sourceStr = loadFileToString("pi_opencl_kernel.ocl","",&length); cl::Program::Sources sources(1, std::make_pair(sourceStr, length)); program = cl::Program(cl_context, sources); CHECK( program.build(devices,"-I ..\\include") ); CHECKERR(work_group_size = devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(&err) ); CHECKERR(cores = devices[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(&err) ); cores *= 16*4; //Tahiti. if (work_group_size > 64) work_group_size /= 2; nthreads = cores * work_group_size*32; //2048*128 = 262144 if (count == 0) count = NTRIES/nthreads; //38 printf("Count: %lu\n",count); hits_sz = nthreads * sizeof(hits_host[0]);//2097152 CHECKNOTZERO(hits_host = (cl_uint2 *)malloc(hits_sz)); CHECKERR ( cl_out = cl::Buffer( cl_context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, hits_sz, hits_host, &err)); CHECKERR ( cl_kernel = cl::Kernel(program,kernelname,&err) ); CHECK ( cl_kernel.setArg( 0, count) ); CHECK ( cl_kernel.setArg( 1, cl_out) ); CHECKERR (cl_queue = cl::CommandQueue(cl_context, devices[0], 0, &err) ); cl::Event event; CHECK( cl_queue.enqueueNDRangeKernel(cl_kernel,cl::NullRange,cl::NDRange(nthreads), cl::NDRange(work_group_size), NULL, &event) ); event.wait(); CHECK( cl_queue.enqueueReadBuffer(cl_out, CL_TRUE, 0,hits_sz, hits_host) ); unsigned long hits = 0, tries = 0; for (i = 0; i < nthreads; i++) { #ifdef _DEBUG printf("%lu %u %u\n", (unsigned long)i, hits_host[i].s[0], hits_host[i].s[1]); #endif hits += hits_host[i].s[0]; tries += hits_host[i].s[1]; } return pi_check(hits, tries); }