void CMeanFilter_GPU::Apply(const Image& input, Image& output) { const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius")); const int width = input.Width(); const int height = input.Height(); const int NbPixel = width * height; const int depth = input.Depth(); const unsigned char *image_data = input.Data(); unsigned char *output_data = output.Data(); //int *TempPix = new int[ NbPixel * 2 ]; unsigned char *TempMask = new unsigned char[NbPixel]; //memset( TempPix, 0, NbPixel * 2 * sizeof( int ) ); memset(TempMask, 255, NbPixel * sizeof(unsigned char)); auto current_device = OpenCLUtils::Instance()->GetCurrentDevice(); const cl::Context Context = current_device.Context(); cl_int Error; cl_mem_flags InOutMemFlags = CL_MEM_READ_WRITE; cl_mem_flags InMemFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; cl_mem_flags OutMemFlags = CL_MEM_WRITE_ONLY; // If you ever change the local size, don't forget to change the size in the kernel // Global size must be a multiple of local size unsigned int GlobalSizeY = height; unsigned int GlobalSizeX = width; cl::Buffer ImBuffer(Context, InMemFlags, NbPixel * depth * sizeof(unsigned char), (void*)image_data, &Error); cl::Buffer MaskBuffer(Context, InMemFlags, NbPixel * sizeof(unsigned char), (void*)TempMask, &Error); cl::Buffer OutBuffer(Context, OutMemFlags, NbPixel * depth * sizeof(unsigned char), 0, &Error); cl::CommandQueue Queue = current_device.CommandQueue(); cl::Event Event; // Horizontal pass int Index = 0; m_KernelH.setArg(Index++, ImBuffer); m_KernelH.setArg(Index++, MaskBuffer); m_KernelH.setArg(Index++, OutBuffer); m_KernelH.setArg(Index++, height); m_KernelH.setArg(Index++, width); m_KernelH.setArg(Index++, radius); Queue.enqueueNDRangeKernel(m_KernelH, cl::NullRange, cl::NDRange(GlobalSizeX, GlobalSizeY), cl::NullRange, 0, &Event); Event.wait(); Queue.enqueueReadBuffer(OutBuffer, true, 0, NbPixel * depth * sizeof(unsigned char), (void*)output_data, 0, &Event); Event.wait(); Queue.finish(); delete[] TempMask; }
int FileHandle::Printf(const char *fmt, ...) { va_list args; va_start(args, fmt); std::vector<char> OutBuffer(100); int ret = -1; while (OutBuffer.size() < 1024 * 1024) { ret = vsnprintf(OutBuffer.data(), OutBuffer.size(), fmt, args); if (ret > 0 && ret < (int)OutBuffer.size()) break; OutBuffer.resize(OutBuffer.size() * 2); } va_end(args); avio_write(avio, reinterpret_cast<const unsigned char *>(OutBuffer.data()), ret); avio_flush(avio); return avio->error < 0 ? avio->error : ret; }
void CMeanFilter_GPUPad::Apply(const Image& input, Image& output) { const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius")); const int GroupSize = 32; const int width = input.Width(); const int height = input.Height(); const int depth = input.Depth(); const int NbPixel = width * height; const uchar *image_data = input.Data(); uchar *output_data = output.Data(); const int NewHeight = Utils::GetNextMultipleOf(height, GroupSize) + radius * 2 + GroupSize; const int NewWidth = Utils::GetNextMultipleOf(width, GroupSize) + radius * 2 + GroupSize; const int NewNbPixel = NewHeight * NewWidth; Image NewImage(NewWidth, NewHeight, depth); int *TempPix = new int[NewNbPixel * 2 * depth]; unsigned char *TempMask = new unsigned char[NewNbPixel]; Image NewOut(NewWidth, NewHeight, depth); memset(TempPix, 0, NewNbPixel * 2 * depth * sizeof(int)); memset(TempMask, 0, NewNbPixel * sizeof(unsigned char)); // Copy image into subrect int NewImageIndex = 0; int OldImageIndex = 0; int temp_val = 0; for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { for (int k = 0; k < depth; ++k) { NewImage(i + radius,j + radius,k) = input(i,j,k); } TempMask[(i+radius) * NewWidth + j + radius] = 1; } } auto current_device = OpenCLUtils::Instance()->GetCurrentDevice(); const cl::Context Context = current_device.Context(); cl_int Error; cl_mem_flags InOutMemFlags = CL_MEM_READ_WRITE; cl_mem_flags InMemFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; cl_mem_flags OutMemFlags = CL_MEM_WRITE_ONLY; // If you ever change the local size, don't forget to change the size in the kernel unsigned int LocalSizeY = 1; unsigned int LocalSizeX = GroupSize; // Global size must be a multiple of local size unsigned int GlobalSizeY = (height + LocalSizeY - 1) / LocalSizeY; GlobalSizeY *= LocalSizeY; unsigned int GlobalSizeX = (width + LocalSizeX - 1) / LocalSizeX; GlobalSizeX *= LocalSizeX; cl::Buffer ImBuffer(Context, InMemFlags, NewNbPixel * depth * sizeof(unsigned char), (void*)NewImage.Data(), &Error); cl::Buffer MaskBuffer(Context, InMemFlags, NewNbPixel * sizeof(unsigned char), (void*)TempMask, &Error); cl::Buffer ImTempBuffer(Context, InOutMemFlags, NewNbPixel * 2 * depth * sizeof(int), 0, &Error); cl::Buffer OutBuffer(Context, OutMemFlags, NewNbPixel * depth * sizeof(unsigned char), 0, &Error); cl::CommandQueue Queue = current_device.CommandQueue(); cl::Event Event; // Zero out temp buffer m_KernelZero.setArg(0, ImTempBuffer); Queue.enqueueNDRangeKernel(m_KernelZero, cl::NullRange, cl::NDRange(NewNbPixel), cl::NullRange, 0, &Event); Event.wait(); // Horizontal pass int Index = 0; m_KernelH.setArg(Index++, ImBuffer); m_KernelH.setArg(Index++, MaskBuffer); m_KernelH.setArg(Index++, ImTempBuffer); m_KernelH.setArg(Index++, NewHeight); m_KernelH.setArg(Index++, NewWidth); m_KernelH.setArg(Index++, radius); Queue.enqueueNDRangeKernel(m_KernelH, cl::NullRange, cl::NDRange(GlobalSizeX, GlobalSizeY), cl::NDRange(LocalSizeX, LocalSizeY), 0, &Event); Event.wait(); // Vertical pass LocalSizeY = GroupSize; LocalSizeX = 1; // Global size must be a multiple of local size GlobalSizeY = (height + LocalSizeY - 1) / LocalSizeY; GlobalSizeY *= LocalSizeY; GlobalSizeX = (width + LocalSizeX - 1) / LocalSizeX; GlobalSizeX *= LocalSizeX; Index = 0; m_KernelV.setArg(Index++, ImTempBuffer); m_KernelV.setArg(Index++, OutBuffer); m_KernelV.setArg(Index++, NewHeight); m_KernelV.setArg(Index++, NewWidth); m_KernelV.setArg(Index++, radius); Queue.enqueueNDRangeKernel(m_KernelV, cl::NullRange, cl::NDRange(GlobalSizeY, GlobalSizeX), cl::NDRange(LocalSizeY, LocalSizeX), 0, &Event); Event.wait(); Queue.enqueueReadBuffer(OutBuffer, true, 0, NewNbPixel * depth * sizeof(unsigned char), (void*)NewOut.Data(), 0, &Event); Event.wait(); Queue.finish(); //PrintToFile( NewOut, (size_t)NewWidth, (size_t)NewHeight, "GPUPad.txt" ); // Read Sub rect for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { for (int k = 0; k < depth; ++k) { output(i,j,k) = NewOut(i+radius, j+radius,k); } } } delete[] TempPix; delete[] TempMask; }