void CMeanFilter_GPU::Apply(const Image& input, Image& output) { const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius")); const int width = input.Width(); const int height = input.Height(); const int NbPixel = width * height; const int depth = input.Depth(); const unsigned char *image_data = input.Data(); unsigned char *output_data = output.Data(); //int *TempPix = new int[ NbPixel * 2 ]; unsigned char *TempMask = new unsigned char[NbPixel]; //memset( TempPix, 0, NbPixel * 2 * sizeof( int ) ); memset(TempMask, 255, NbPixel * sizeof(unsigned char)); auto current_device = OpenCLUtils::Instance()->GetCurrentDevice(); const cl::Context Context = current_device.Context(); cl_int Error; cl_mem_flags InOutMemFlags = CL_MEM_READ_WRITE; cl_mem_flags InMemFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; cl_mem_flags OutMemFlags = CL_MEM_WRITE_ONLY; // If you ever change the local size, don't forget to change the size in the kernel // Global size must be a multiple of local size unsigned int GlobalSizeY = height; unsigned int GlobalSizeX = width; cl::Buffer ImBuffer(Context, InMemFlags, NbPixel * depth * sizeof(unsigned char), (void*)image_data, &Error); cl::Buffer MaskBuffer(Context, InMemFlags, NbPixel * sizeof(unsigned char), (void*)TempMask, &Error); cl::Buffer OutBuffer(Context, OutMemFlags, NbPixel * depth * sizeof(unsigned char), 0, &Error); cl::CommandQueue Queue = current_device.CommandQueue(); cl::Event Event; // Horizontal pass int Index = 0; m_KernelH.setArg(Index++, ImBuffer); m_KernelH.setArg(Index++, MaskBuffer); m_KernelH.setArg(Index++, OutBuffer); m_KernelH.setArg(Index++, height); m_KernelH.setArg(Index++, width); m_KernelH.setArg(Index++, radius); Queue.enqueueNDRangeKernel(m_KernelH, cl::NullRange, cl::NDRange(GlobalSizeX, GlobalSizeY), cl::NullRange, 0, &Event); Event.wait(); Queue.enqueueReadBuffer(OutBuffer, true, 0, NbPixel * depth * sizeof(unsigned char), (void*)output_data, 0, &Event); Event.wait(); Queue.finish(); delete[] TempMask; }
bool Texture::BeginLoad(Stream& source) { loadImages.Clear(); loadImages.Push(new Image()); if (!loadImages[0]->Load(source)) { loadImages.Clear(); return false; } // If image uses unsupported format, decompress to RGBA now if (loadImages[0]->Format() >= FMT_ETC1) { Image* rgbaImage = new Image(); rgbaImage->SetSize(loadImages[0]->Size(), FMT_RGBA8); loadImages[0]->DecompressLevel(rgbaImage->Data(), 0); loadImages[0] = rgbaImage; // This destroys the original compressed image } // Construct mip levels now if image is uncompressed if (!loadImages[0]->IsCompressed()) { Image* mipImage = loadImages[0]; while (mipImage->Width() > 1 || mipImage->Height() > 1) { loadImages.Push(new Image()); mipImage->GenerateMipImage(*loadImages.Back()); mipImage = loadImages.Back(); } } return true; }
void CMeanFilter_GPUPad::Apply(const Image& input, Image& output) { const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius")); const int GroupSize = 32; const int width = input.Width(); const int height = input.Height(); const int depth = input.Depth(); const int NbPixel = width * height; const uchar *image_data = input.Data(); uchar *output_data = output.Data(); const int NewHeight = Utils::GetNextMultipleOf(height, GroupSize) + radius * 2 + GroupSize; const int NewWidth = Utils::GetNextMultipleOf(width, GroupSize) + radius * 2 + GroupSize; const int NewNbPixel = NewHeight * NewWidth; Image NewImage(NewWidth, NewHeight, depth); int *TempPix = new int[NewNbPixel * 2 * depth]; unsigned char *TempMask = new unsigned char[NewNbPixel]; Image NewOut(NewWidth, NewHeight, depth); memset(TempPix, 0, NewNbPixel * 2 * depth * sizeof(int)); memset(TempMask, 0, NewNbPixel * sizeof(unsigned char)); // Copy image into subrect int NewImageIndex = 0; int OldImageIndex = 0; int temp_val = 0; for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { for (int k = 0; k < depth; ++k) { NewImage(i + radius,j + radius,k) = input(i,j,k); } TempMask[(i+radius) * NewWidth + j + radius] = 1; } } auto current_device = OpenCLUtils::Instance()->GetCurrentDevice(); const cl::Context Context = current_device.Context(); cl_int Error; cl_mem_flags InOutMemFlags = CL_MEM_READ_WRITE; cl_mem_flags InMemFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR; cl_mem_flags OutMemFlags = CL_MEM_WRITE_ONLY; // If you ever change the local size, don't forget to change the size in the kernel unsigned int LocalSizeY = 1; unsigned int LocalSizeX = GroupSize; // Global size must be a multiple of local size unsigned int GlobalSizeY = (height + LocalSizeY - 1) / LocalSizeY; GlobalSizeY *= LocalSizeY; unsigned int GlobalSizeX = (width + LocalSizeX - 1) / LocalSizeX; GlobalSizeX *= LocalSizeX; cl::Buffer ImBuffer(Context, InMemFlags, NewNbPixel * depth * sizeof(unsigned char), (void*)NewImage.Data(), &Error); cl::Buffer MaskBuffer(Context, InMemFlags, NewNbPixel * sizeof(unsigned char), (void*)TempMask, &Error); cl::Buffer ImTempBuffer(Context, InOutMemFlags, NewNbPixel * 2 * depth * sizeof(int), 0, &Error); cl::Buffer OutBuffer(Context, OutMemFlags, NewNbPixel * depth * sizeof(unsigned char), 0, &Error); cl::CommandQueue Queue = current_device.CommandQueue(); cl::Event Event; // Zero out temp buffer m_KernelZero.setArg(0, ImTempBuffer); Queue.enqueueNDRangeKernel(m_KernelZero, cl::NullRange, cl::NDRange(NewNbPixel), cl::NullRange, 0, &Event); Event.wait(); // Horizontal pass int Index = 0; m_KernelH.setArg(Index++, ImBuffer); m_KernelH.setArg(Index++, MaskBuffer); m_KernelH.setArg(Index++, ImTempBuffer); m_KernelH.setArg(Index++, NewHeight); m_KernelH.setArg(Index++, NewWidth); m_KernelH.setArg(Index++, radius); Queue.enqueueNDRangeKernel(m_KernelH, cl::NullRange, cl::NDRange(GlobalSizeX, GlobalSizeY), cl::NDRange(LocalSizeX, LocalSizeY), 0, &Event); Event.wait(); // Vertical pass LocalSizeY = GroupSize; LocalSizeX = 1; // Global size must be a multiple of local size GlobalSizeY = (height + LocalSizeY - 1) / LocalSizeY; GlobalSizeY *= LocalSizeY; GlobalSizeX = (width + LocalSizeX - 1) / LocalSizeX; GlobalSizeX *= LocalSizeX; Index = 0; m_KernelV.setArg(Index++, ImTempBuffer); m_KernelV.setArg(Index++, OutBuffer); m_KernelV.setArg(Index++, NewHeight); m_KernelV.setArg(Index++, NewWidth); m_KernelV.setArg(Index++, radius); Queue.enqueueNDRangeKernel(m_KernelV, cl::NullRange, cl::NDRange(GlobalSizeY, GlobalSizeX), cl::NDRange(LocalSizeY, LocalSizeX), 0, &Event); Event.wait(); Queue.enqueueReadBuffer(OutBuffer, true, 0, NewNbPixel * depth * sizeof(unsigned char), (void*)NewOut.Data(), 0, &Event); Event.wait(); Queue.finish(); //PrintToFile( NewOut, (size_t)NewWidth, (size_t)NewHeight, "GPUPad.txt" ); // Read Sub rect for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { for (int k = 0; k < depth; ++k) { output(i,j,k) = NewOut(i+radius, j+radius,k); } } } delete[] TempPix; delete[] TempMask; }
void MeanFilter_CPU::Apply(const Image &input, Image &output) { const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius")); const int width = input.Width(); const int height = input.Height(); const int depth = input.Depth(); const unsigned char *data = input.Data(); unsigned char *out_data = output.Data(); int i = 0, j = 0, ii = 0, jj = 0; int borneInfx = 0, borneSupx = radius + 1; std::vector<int> accuYMemoryR; accuYMemoryR.resize(radius + 1, 0); std::vector<int> compteurYMemoryR; compteurYMemoryR.resize(radius + 1, 0); std::vector<int> accuYMemoryG; accuYMemoryG.resize(radius + 1, 0); std::vector<int> compteurYMemoryG; compteurYMemoryG.resize(radius + 1, 0); std::vector<int> accuYMemoryB; accuYMemoryB.resize(radius + 1, 0); std::vector<int> compteurYMemoryB; compteurYMemoryB.resize(radius + 1, 0); int index = 0; int nbpt_R = 0; int nbpttemp_R = 0; int sommetemp_R = 0; int somme_R = 0; int compteurligneY_R = 0; int sommeligneY_R = 0; int valtemp_R = 0; int nbpt_G = 0; int nbpttemp_G = 0; int sommetemp_G = 0; int somme_G = 0; int compteurligneY_G = 0; int sommeligneY_G = 0; int valtemp_G = 0; int nbpt_B = 0; int nbpttemp_B = 0; int sommetemp_B = 0; int somme_B = 0; int compteurligneY_B = 0; int sommeligneY_B = 0; int valtemp_B = 0; // Initialisation for (jj = 0; jj < radius + 1; jj++){ for (ii = 0; ii < radius + 1; ii++){ index = (jj * width + ii) * depth; valtemp_R = int(data[index]); accuYMemoryR[jj] = accuYMemoryR[jj] + valtemp_R; // Somme des valeurs horizontalement compteurYMemoryR[jj] = compteurYMemoryR[jj] + 1; somme_R += valtemp_R; nbpt_R += 1; valtemp_G = int(data[index + 1]); accuYMemoryG[jj] = accuYMemoryG[jj] + valtemp_G; // Somme des valeurs horizontalement compteurYMemoryG[jj] = compteurYMemoryG[jj] + 1; somme_G += valtemp_G; nbpt_G += 1; valtemp_B = int(data[index + 2]); accuYMemoryB[jj] = accuYMemoryB[jj] + valtemp_B; // Somme des valeurs horizontalement compteurYMemoryB[jj] = compteurYMemoryB[jj] + 1; somme_B += valtemp_B; nbpt_B += 1; } } std::vector<int> compteurY_R; compteurY_R.resize(radius + 1, 0); std::vector<int> accuY_R; accuY_R.resize(radius + 1, 0); std::vector<int> compteurY_G; compteurY_G.resize(radius + 1, 0); std::vector<int> accuY_G; accuY_G.resize(radius + 1, 0); std::vector<int> compteurY_B; compteurY_B.resize(radius + 1, 0); std::vector<int> accuY_B; accuY_B.resize(radius + 1, 0); // Boucle principale for (i = 0; i < width; ++i){ accuY_R = accuYMemoryR; compteurY_R = compteurYMemoryR; accuY_G = accuYMemoryG; compteurY_G = compteurYMemoryG; accuY_B = accuYMemoryB; compteurY_B = compteurYMemoryB; sommetemp_R = somme_R; nbpttemp_R = nbpt_R; sommetemp_G = somme_G; nbpttemp_G = nbpt_G; sommetemp_B = somme_B; nbpttemp_B = nbpt_B; index = 0; if (nbpttemp_R != 0){ out_data[i * depth] = (unsigned char)((double)sommetemp_R / (double)nbpttemp_R); } if (nbpttemp_G != 0){ out_data[i * depth + 1] = (unsigned char)((double)sommetemp_G / (double)nbpttemp_G); } if (nbpttemp_B != 0){ out_data[i * depth + 2] = (unsigned char)((double)sommetemp_B / (double)nbpttemp_B); } borneInfx = (i - radius > 0) ? (i - radius) : 0; // ATTENTION borneSupx = (i + radius + 1 < width) ? (i + radius + 1) : width; for (j = 1; j < height; ++j){ jj = j + radius; // Si on peut entrer une nouvelle ligne if (jj < height){ for (ii = borneInfx; ii < borneSupx; ii++){ sommeligneY_R += int(data[(jj * width + ii) * depth]); compteurligneY_R++; sommeligneY_G += int(data[(jj * width + ii) * depth + 1]); compteurligneY_G++; sommeligneY_B += int(data[(jj * width + ii) * depth + 2]); compteurligneY_B++; } sommetemp_R += sommeligneY_R; nbpttemp_R += compteurligneY_R; accuY_R.push_back(sommeligneY_R); compteurY_R.push_back(compteurligneY_R); sommeligneY_R = 0; compteurligneY_R = 0; sommetemp_G += sommeligneY_G; nbpttemp_G += compteurligneY_G; accuY_G.push_back(sommeligneY_G); compteurY_G.push_back(compteurligneY_G); sommeligneY_G = 0; compteurligneY_G = 0; sommetemp_B += sommeligneY_B; nbpttemp_B += compteurligneY_B; accuY_B.push_back(sommeligneY_B); compteurY_B.push_back(compteurligneY_B); sommeligneY_B = 0; compteurligneY_B = 0; } // Si on ne touche plus le bord if ((j - radius) > 0){ sommetemp_R -= accuY_R[index]; nbpttemp_R -= compteurY_R[index]; sommetemp_G -= accuY_G[index]; nbpttemp_G -= compteurY_G[index]; sommetemp_B -= accuY_B[index]; nbpttemp_B -= compteurY_B[index]; index++; } if (nbpttemp_R != 0){ out_data[(j * width + i) * depth] = (unsigned char)((double)sommetemp_R / (double)nbpttemp_R); } if (nbpttemp_G != 0){ out_data[(j * width + i) * depth + 1] = (unsigned char)((double)sommetemp_G / (double)nbpttemp_G); } if (nbpttemp_B != 0){ out_data[(j * width + i) * depth + 2] = (unsigned char)((double)sommetemp_B / (double)nbpttemp_B); } } // Reinitialisation accuY_R.resize(radius + 1, 0); compteurY_R.resize(radius + 1, 0); accuY_G.resize(radius + 1, 0); compteurY_G.resize(radius + 1, 0); accuY_B.resize(radius + 1, 0); compteurY_B.resize(radius + 1, 0); index = 0; if (i - radius >= 0){ for (jj = 0; jj < (radius + 1) && jj < height; jj++){ valtemp_R = int(data[(jj * width + borneInfx) * depth]); accuYMemoryR[jj] -= valtemp_R;// accuYMemory[jj] - valtemp; compteurYMemoryR[jj]--;// = compteurYMemory[jj] - 1; somme_R -= valtemp_R; nbpt_R--; valtemp_G = int(data[(jj * width + borneInfx) * depth + 1]); accuYMemoryG[jj] -= valtemp_G;// accuYMemory[jj] - valtemp; compteurYMemoryG[jj]--;// = compteurYMemory[jj] - 1; somme_G -= valtemp_G; nbpt_G--; valtemp_B = int(data[(jj * width + borneInfx) * depth + 2]); accuYMemoryB[jj] -= valtemp_B;// accuYMemory[jj] - valtemp; compteurYMemoryB[jj]--;// = compteurYMemory[jj] - 1; somme_B -= valtemp_B; nbpt_B--; } } if (borneSupx < width){ for (jj = 0; jj < (radius + 1) && jj < height; jj++){ valtemp_R = int(data[(jj * width + borneSupx) * depth]); accuYMemoryR[jj] += valtemp_R;// accuYMemory[jj] + valtemp; compteurYMemoryR[jj]++;// = compteurYMemory[jj] + 1; somme_R += valtemp_R; nbpt_R++; valtemp_G = int(data[(jj * width + borneSupx) * depth + 1]); accuYMemoryG[jj] += valtemp_G;// accuYMemory[jj] + valtemp; compteurYMemoryG[jj]++;// = compteurYMemory[jj] + 1; somme_G += valtemp_G; nbpt_G++; valtemp_B = int(data[(jj * width + borneSupx) * depth + 2]); accuYMemoryB[jj] += valtemp_B;// accuYMemory[jj] + valtemp; compteurYMemoryB[jj]++;// = compteurYMemory[jj] + 1; somme_B += valtemp_B; nbpt_B++; } } } }