bool Cssao::Init(unsigned int WindowWidth, unsigned int WindowHeight){ m_noiseScale[0] = WindowWidth / m_noise_size; m_noiseScale[1] = WindowHeight / m_noise_size; CreateKernel(); CreateNoise(); //Create FBO glGenFramebuffers(1, &m_fbo); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_fbo); glGenTextures(1, &m_ssaoTexture); glBindTexture(GL_TEXTURE_2D, m_ssaoTexture); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, WindowWidth, WindowHeight, 0, GL_RGBA, GL_FLOAT, NULL); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_ssaoTexture, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLenum DrawBuffers[] = {GL_COLOR_ATTACHMENT0}; glDrawBuffers(1, DrawBuffers); GLenum Status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if(Status != GL_FRAMEBUFFER_COMPLETE){ printf("FB error, status 0x%04x", Status); return false; } //Restore default framebuffer glBindFramebuffer(GL_FRAMEBUFFER, 0); return true; }
static cl_kernel GetKernel(cl_context p_Context) { static std::map<cl_context, cl_kernel> contextKernelMap; cl_kernel kernel; std::map<cl_context, cl_kernel>::iterator iter = contextKernelMap.find(p_Context); if (iter == contextKernelMap.end()) { kernel = CreateKernel(p_Context); contextKernelMap[p_Context] = kernel; } else { kernel = iter->second; } return kernel; }
int main( int argc, char* argv[] ) { if( argc != 3 ) { std::cerr << "Usage: "<< std::endl; std::cerr << argv[0]; std::cerr << " <InputFileName> n"; std::cerr << std::endl; return EXIT_FAILURE; } int operations = atoi(argv[2]); //sscanf(&operations,"%d",argv[2]); //printf("%d\n", operations); itk::TimeProbe itkClock; double t0 = 0.0; double tf = 0.0; itk::MultiThreader::SetGlobalDefaultNumberOfThreads(1); // Loading file ReaderType::Pointer reader = ReaderType::New(); reader->SetFileName( argv[1] ); reader->Update(); ImageType::Pointer image = reader->GetOutput(); #ifdef GPU GPUReaderType::Pointer gpureader = GPUReaderType::New(); gpureader->SetFileName( argv[1] ); gpureader->Update(); GPUImageType::Pointer gpuImage = gpureader->GetOutput(); #endif saveFile((char*) "/tmp/itk_input.dcm", image); // Allocate output image ImageType::Pointer output = ImageType::New(); ImageType::RegionType region = image->GetBufferedRegion(); output->SetRegions( region ); output->SetOrigin( image->GetOrigin() ); output->SetSpacing( image->GetSpacing() ); output->Allocate(); // Negative typedef itk::UnaryFunctorImageFilter<ImageType,ImageType, Negate<ImageType::PixelType,ImageType::PixelType> > NegateImageFilterType; NegateImageFilterType::Pointer negateFilter = NegateImageFilterType::New(); negateFilter = NegateImageFilterType::New(); negateFilter->SetInput(image); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { negateFilter->Modified(); negateFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d negative: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; #endif // Saving Not result saveFile((char*) "/tmp/itk_not.dcm", negateFilter->GetOutput()); #ifdef GPU // GPU Negative typedef itk::GPUUnaryFunctorImageFilter<ImageType,ImageType, Negate<ImageType::PixelType,ImageType::PixelType> > GPUNegateImageFilterType; GPUNegateImageFilterType::Pointer gpuNegateFilter = GPUNegateImageFilterType::New(); gpuNegateFilter->SetInput(gpureader->GetOutput()); gpuNegateFilter->Update(); // Saving Not result //saveFile("/tmp/itk_gpu_not.dcm", gpuNegateFilter->GetOutput()); #endif // Common Threshold int lowerThreshold = 100; int upperThreshold = 200; // Threshold typedef itk::BinaryThresholdImageFilter <ImageType, ImageType> BinaryThresholdImageFilterType; BinaryThresholdImageFilterType::Pointer thresholdFilter = BinaryThresholdImageFilterType::New(); thresholdFilter = BinaryThresholdImageFilterType::New(); thresholdFilter->SetInput(reader->GetOutput()); thresholdFilter->SetLowerThreshold(lowerThreshold); thresholdFilter->SetUpperThreshold(upperThreshold); thresholdFilter->SetInsideValue(255); thresholdFilter->SetOutsideValue(0); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { thresholdFilter->Modified(); thresholdFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d threshold: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Threshold result saveFile((char*) "/tmp/itk_thresh.dcm", thresholdFilter->GetOutput()); #endif #ifdef GPU // GPU Threshold typedef itk::GPUBinaryThresholdImageFilter <GPUImageType, GPUImageType> GPUBinaryThresholdImageFilterType; GPUBinaryThresholdImageFilterType::Pointer gpuThresholdFilter = GPUBinaryThresholdImageFilterType::New(); gpuThresholdFilter->SetInput(gpureader->GetOutput()); gpuThresholdFilter->SetLowerThreshold(lowerThreshold); gpuThresholdFilter->SetUpperThreshold(upperThreshold); gpuThresholdFilter->SetInsideValue(255); gpuThresholdFilter->SetOutsideValue(0); itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { gpuThresholdFilter->Modified(); gpuThresholdFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d GPU threshold: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving GPU Threshold result gpuThresholdFilter->GetOutput()->UpdateBuffers(); saveFile((char*) "/tmp/itk_gpu_thresh.dcm", gpuThresholdFilter->GetOutput()); #endif // Mean typedef itk::MeanImageFilter< ImageType, ImageType > MeanFilterType; MeanFilterType::Pointer meanFilter = MeanFilterType::New(); meanFilter = MeanFilterType::New(); meanFilter->SetInput( image ); meanFilter->SetRadius( 1 ); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { meanFilter->Modified(); meanFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d mean blur: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Convolution result saveFile((char*) "/tmp/itk_mean3x3.dcm", meanFilter->GetOutput()); #endif // Binomial Blur (aproximation of gaussian blur) typedef itk::BinomialBlurImageFilter<ImageType, ImageType> BinomialBlurImageFilterType; int repetitions = 1; BinomialBlurImageFilterType::Pointer blurFilter = BinomialBlurImageFilterType::New(); blurFilter = BinomialBlurImageFilterType::New(); blurFilter->SetInput( reader->GetOutput() ); blurFilter->SetRepetitions( repetitions ); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { blurFilter->Modified(); blurFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d blur: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Blur result saveFile((char*) "/tmp/itk_blur.dcm", blurFilter->GetOutput()); #endif #ifdef GPU // GPU Blur typedef itk::BoxImageFilter< GPUImageType, GPUImageType > BoxImageFilterType; typedef itk::GPUBoxImageFilter< GPUImageType, GPUImageType, BoxImageFilterType > GPUBoxImageFilterType; GPUBoxImageFilterType::Pointer GPUBlurFilter = GPUBoxImageFilterType::New(); //ImageType::SizeType indexRadius; //indexRadius[0] = 2; //indexRadius[1] = 2; //indexRadius[2] = 2; GPUBlurFilter->SetInput(gpureader->GetOutput()); //GPUBlurFilter->SetRadius(indexRadius); itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { GPUBlurFilter->Update(); GPUBlurFilter->Modified(); } itkClock.Stop(); printf("Tempo gasto para fazer %d gpu blur: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; GPUBlurFilter->GetOutput()->UpdateBuffers(); // Saving GPU Blur result saveFile((char*) "/tmp/itk_gpu_blur.dcm", GPUBlurFilter->GetOutput()); #endif //Erosion Common typedef itk::BinaryBallStructuringElement< ImageType::PixelType, 3> StructuringElementType; typedef itk::GrayscaleErodeImageFilter <ImageType, ImageType, StructuringElementType> GrayscaleErodeImageFilterType; unsigned int radius; // Erosion 3x3 StructuringElementType structuringElement3x3; radius = 1; structuringElement3x3.SetRadius(radius); structuringElement3x3.CreateStructuringElement(); GrayscaleErodeImageFilterType::Pointer erodeFilter3x3; erodeFilter3x3= GrayscaleErodeImageFilterType::New(); erodeFilter3x3->SetInput(reader->GetOutput()); erodeFilter3x3->SetKernel(structuringElement3x3); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { erodeFilter3x3->Modified(); erodeFilter3x3->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d erosion 3x3: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Erosion result saveFile((char*) "/tmp/itk_erode3x3.dcm", erodeFilter3x3->GetOutput()); #endif // Erosion 5x5 StructuringElementType structuringElement5x5; radius = 2; structuringElement5x5.SetRadius(radius); structuringElement5x5.CreateStructuringElement(); GrayscaleErodeImageFilterType::Pointer erodeFilter5x5; erodeFilter5x5 = GrayscaleErodeImageFilterType::New(); erodeFilter5x5->SetInput(reader->GetOutput()); erodeFilter5x5->SetKernel(structuringElement5x5); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { erodeFilter5x5->Modified(); erodeFilter5x5->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d erosion 5x5: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Erosion result saveFile((char*) "/tmp/itk_erode5x5.dcm", erodeFilter5x5->GetOutput()); #endif // Copy typedef itk::ImageDuplicator< ImageType > DuplicatorType; DuplicatorType::Pointer duplicator; duplicator = DuplicatorType::New(); duplicator->SetInputImage(image); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { duplicator->Modified(); duplicator->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d copias cpu: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Copy result saveFile((char*) "/tmp/itk_copy.dcm", duplicator->GetOutput()); #endif // Convolution common typedef itk::ConvolutionImageFilter<ImageType> ConvolutionImageFilterType; ConvolutionImageFilterType::Pointer convolutionFilter; convolutionFilter = ConvolutionImageFilterType::New(); convolutionFilter->SetInput(reader->GetOutput()); int convWidth; // Convolution 3x3 ImageType::Pointer kernel3x3 = ImageType::New(); convWidth = 3; CreateKernel(kernel3x3, convWidth); convolutionFilter->SetKernelImage(kernel3x3); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { convolutionFilter->Modified(); convolutionFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d convolucoes 3x3 cpu: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Convolution result saveFile((char*) "/tmp/itk_convolution3x3.dcm", convolutionFilter->GetOutput()); #endif // Convolution 5x5 ImageType::Pointer kernel5x5 = ImageType::New(); convWidth = 5; CreateKernel(kernel5x5, convWidth); convolutionFilter->SetKernelImage(kernel5x5); #ifndef GPU_only itkClock.Start(); TimerStart(); for(int n = 0; n < operations; n++) { convolutionFilter->Modified(); convolutionFilter->Update(); } itkClock.Stop(); printf("Tempo gasto para fazer %d convolucoes 5x5 cpu: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; // Saving Convolution result saveFile((char*) "/tmp/itk_convolution5x5.dcm", convolutionFilter->GetOutput()); #endif #ifdef GPU // GPU Mean typedef itk::GPUMeanImageFilter<GPUImageType, GPUImageType> GPUMeanFilterType; GPUMeanFilterType::Pointer GPUMean = GPUMeanFilterType::New(); GPUMean->SetInput(gpureader->GetOutput()); GPUMean->SetRadius( 1 ); TimerStart(); for(int n = 0; n < operations; n++) { GPUMean->Update(); GPUMean->Modified(); } itkClock.Stop(); printf("Tempo gasto para fazer %d GPU mean blur: %s\n",operations, getTimeElapsedInSeconds()); tf = itkClock.GetTotal(); std::cout << "My: " << (tf - t0) << std::endl; t0 = tf; GPUMean->GetOutput()->UpdateBuffers(); saveFile((char*) "/tmp/itk_gpu_blurmean.dcm", GPUMean->GetOutput()); #endif // Visualize /* QuickView viewer; viewer.AddImage<ImageType>( image,true, itksys::SystemTools::GetFilenameName(argv[1])); std::stringstream desc; desc << "ITK QuickView: " << argv[1]; viewer.Visualize(); */ // Saving input image as is saveFile((char*) "/tmp/itk_input.dcm", image); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { cl_int cl_error; /** Init **/ unsigned int matrix_size = MATRIX_SIZE; unsigned int matrix_total_size = matrix_size*matrix_size; size_t cl_buff_size = matrix_total_size * sizeof(MATRIX_TYPE); printf("Matrix size : %d (%d length)\t |\t sous bloc de %d\n",matrix_size,matrix_total_size,LOCAL_DIM_KERNEL); MATRIX_TYPE * matA = malloc(matrix_total_size * sizeof(*matA)); MATRIX_TYPE * matB = malloc(matrix_total_size * sizeof(*matB)); MATRIX_TYPE * matC = malloc(matrix_total_size * sizeof(*matC)); MATRIX_TYPE * matD = malloc(matrix_total_size * sizeof(*matD)); // Init matA InitMatrix2(matA, matrix_size); // InitMatrix(matB, matrix_size); printf("Matrix A:\n"); DisplayMatrix(matA,matrix_size); // Init GPU cl_uint nb_platf; clGetPlatformIDs(0, NULL, &nb_platf); printf("Nombre de plateformes: %d\t", nb_platf); cl_platform_id platfs[nb_platf]; clGetPlatformIDs(nb_platf, platfs, NULL); size_t plat_name_size; clGetPlatformInfo(platfs[0], CL_PLATFORM_NAME, 0, NULL, &plat_name_size); char plat_name[plat_name_size]; clGetPlatformInfo(platfs[0], CL_PLATFORM_NAME, plat_name_size, &plat_name, NULL); printf("( %s ", plat_name); size_t plat_vendor_size; clGetPlatformInfo(platfs[0], CL_PLATFORM_VENDOR, 0, NULL, &plat_vendor_size); char plat_vendor[plat_vendor_size]; clGetPlatformInfo(platfs[0], CL_PLATFORM_VENDOR, plat_vendor_size, &plat_vendor, NULL); printf("| %s)\n", plat_vendor); cl_uint nb_devs; clGetDeviceIDs(platfs[0], CL_DEVICE_TYPE_ALL, 0, NULL, &nb_devs); cl_device_id devs[nb_devs]; clGetDeviceIDs(platfs[0], CL_DEVICE_TYPE_ALL, nb_devs, devs, NULL); cl_context ctx = clCreateContext(NULL, nb_devs, devs, NULL, NULL, NULL); cl_command_queue command_queue = clCreateCommandQueue(ctx, devs[0], CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, NULL); /** Chargement des kernels **/ cl_kernel ker1, ker2, ker3; bool verbose = false; CreateKernel(&ker1, ctx, devs, nb_devs, "./cholesky_kernel1.cl", "cholesky_diag", verbose); CreateKernel(&ker2, ctx, devs, nb_devs, "./cholesky_kernel2.cl", "cholesky_inf", verbose); CreateKernel(&ker3, ctx, devs, nb_devs, "./cholesky_kernel3.cl", "cholesky_subdiag", verbose); int nombre_de_kernel = 3; cl_event ev_ker[nombre_de_kernel], ev_readA; // Création/Préparation du buffer GPU cl_mem bufA = clCreateBuffer(ctx, CL_MEM_READ_WRITE, cl_buff_size, NULL, &cl_error); CHECK_ERROR(cl_error,"clCreateBuffer"); clEnqueueWriteBuffer(command_queue, bufA, CL_TRUE, 0, cl_buff_size, matA, 0, NULL, &ev_ker[nombre_de_kernel-1]); // ev_ker[n-1] pour préparer la boucle. size_t globalDim[] = {matrix_size, matrix_size}; size_t localDim[] = {LOCAL_DIM_KERNEL, LOCAL_DIM_KERNEL}; // GPU Calculs int i; for (i=0 ; i<matrix_size/LOCAL_DIM_KERNEL/**/ ; i++) { // Kernel 1 : Bloc diagonal clSetKernelArg(ker1, 0, sizeof(bufA), &bufA); clSetKernelArg(ker1, 1, sizeof(i), &i); clEnqueueNDRangeKernel(command_queue, ker1, 2, NULL, globalDim, localDim, 1, &ev_ker[nombre_de_kernel-1], &ev_ker[0]); // Kernel 2 : Blocs sous-diagonaux inferieur clSetKernelArg(ker2, 0, sizeof(bufA), &bufA); clSetKernelArg(ker2, 1, sizeof(i), &i); clEnqueueNDRangeKernel(command_queue, ker2, 2, NULL, globalDim, localDim, 1, &ev_ker[0], &ev_ker[1]); // Kernel 3 : Blocs sous-diagonaux clSetKernelArg(ker3, 0, sizeof(bufA), &bufA); clSetKernelArg(ker3, 1, sizeof(i), &i); clEnqueueNDRangeKernel(command_queue, ker3, 2, NULL, globalDim, localDim, 1, &ev_ker[1], &ev_ker[2]); } clEnqueueReadBuffer(command_queue, bufA, CL_TRUE, 0, cl_buff_size, matB, 1, &ev_ker[nombre_de_kernel-1], &ev_readA); clFinish(command_queue); // clGetEvenProfilingInfo clReleaseMemObject(bufA); /********************/ /*** CHECK RESULT ***/ /********************/ // ClearUpMatrix(matB,matrix_size); printf("\nMatrix B:\n"); DisplayMatrix(matB,matrix_size); /**/ ClearUpMatrix(matB,matrix_size); TransposeMatrix(matB,matC,matrix_size); MullMatrix(matB,matC,matD,matrix_size); MinusMatrix(matD,matA,matrix_size); printf("\nMatrix D - A:\n"); DisplayMatrix(matD,matrix_size); //*/ // Free free(matA); free(matB); free(matC); free(matD); return 0; }
void Cssao::setSamples(unsigned int num_samples){ m_kernel_size = num_samples; delete[] m_kernel; CreateKernel(); m_update_kernel = true; }