コード例 #1
0
void CMeanFilter_GPU::Apply(const Image& input, Image& output)
{
	const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius"));
	const int width = input.Width();
	const int height = input.Height();
	const int NbPixel = width * height;
	const int depth = input.Depth();
	const unsigned char *image_data = input.Data();
	unsigned char *output_data = output.Data();

	//int *TempPix = new int[ NbPixel * 2 ];
	unsigned char *TempMask = new unsigned char[NbPixel];
	//memset( TempPix, 0, NbPixel * 2 * sizeof( int ) );
	memset(TempMask, 255, NbPixel * sizeof(unsigned char));

	auto current_device = OpenCLUtils::Instance()->GetCurrentDevice();
	const cl::Context Context = current_device.Context();

	cl_int Error;
	cl_mem_flags InOutMemFlags = CL_MEM_READ_WRITE;
	cl_mem_flags InMemFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR;
	cl_mem_flags OutMemFlags = CL_MEM_WRITE_ONLY;

	// If you ever change the local size, don't forget to change the size in the kernel
	// Global size must be a multiple of local size
	unsigned int GlobalSizeY = height;
	unsigned int GlobalSizeX = width;

	cl::Buffer ImBuffer(Context, InMemFlags, NbPixel * depth * sizeof(unsigned char), (void*)image_data, &Error);
	cl::Buffer MaskBuffer(Context, InMemFlags, NbPixel * sizeof(unsigned char), (void*)TempMask, &Error);
	cl::Buffer OutBuffer(Context, OutMemFlags, NbPixel * depth * sizeof(unsigned char), 0, &Error);

	cl::CommandQueue Queue = current_device.CommandQueue();
	cl::Event Event;

	// Horizontal pass
	int Index = 0;
	m_KernelH.setArg(Index++, ImBuffer);
	m_KernelH.setArg(Index++, MaskBuffer);
	m_KernelH.setArg(Index++, OutBuffer);
	m_KernelH.setArg(Index++, height);
	m_KernelH.setArg(Index++, width);
	m_KernelH.setArg(Index++, radius);

	Queue.enqueueNDRangeKernel(m_KernelH, cl::NullRange, cl::NDRange(GlobalSizeX, GlobalSizeY), cl::NullRange, 0, &Event);
	Event.wait();

	Queue.enqueueReadBuffer(OutBuffer, true, 0, NbPixel * depth * sizeof(unsigned char), (void*)output_data, 0, &Event);
	Event.wait();

	Queue.finish();

	delete[] TempMask;
}
コード例 #2
0
ファイル: filehandle.cpp プロジェクト: KonDiter42/ffms2
int FileHandle::Printf(const char *fmt, ...) {
	va_list args;
	va_start(args, fmt);

	std::vector<char> OutBuffer(100);
	int ret = -1;
	while (OutBuffer.size() < 1024 * 1024) {
		ret = vsnprintf(OutBuffer.data(), OutBuffer.size(), fmt, args);
		if (ret > 0 && ret < (int)OutBuffer.size())
			break;
		OutBuffer.resize(OutBuffer.size() * 2);
	}

	va_end(args);

	avio_write(avio, reinterpret_cast<const unsigned char *>(OutBuffer.data()), ret);
	avio_flush(avio);

	return avio->error < 0 ? avio->error : ret;
}
コード例 #3
0
void CMeanFilter_GPUPad::Apply(const Image& input, Image& output)
{
	const int radius = CFilterParameterInterpreter<int>::Convert(_parameters->GetParameter("Radius"));
	const int GroupSize = 32;
	const int width = input.Width();
	const int height = input.Height();
	const int depth = input.Depth();
	const int NbPixel = width * height;
	const uchar *image_data = input.Data();
	uchar *output_data = output.Data();
	const int NewHeight = Utils::GetNextMultipleOf(height, GroupSize) + radius * 2 + GroupSize;
	const int NewWidth = Utils::GetNextMultipleOf(width, GroupSize) + radius * 2 + GroupSize;
	const int NewNbPixel = NewHeight * NewWidth;

	Image NewImage(NewWidth, NewHeight, depth);
	int *TempPix = new int[NewNbPixel * 2 * depth];
	unsigned char *TempMask = new unsigned char[NewNbPixel];
	Image NewOut(NewWidth, NewHeight, depth);
	memset(TempPix, 0, NewNbPixel * 2 * depth * sizeof(int));
	memset(TempMask, 0, NewNbPixel * sizeof(unsigned char));

	// Copy image into subrect
	int NewImageIndex = 0;
	int OldImageIndex = 0;
	int temp_val = 0;
	for (int i = 0; i < height; ++i)
	{
		for (int j = 0; j < width; ++j)
		{
			for (int k = 0; k < depth; ++k)
			{
				NewImage(i + radius,j + radius,k) = input(i,j,k);
			}
			TempMask[(i+radius) * NewWidth + j + radius] = 1;
		}
	}

	auto current_device = OpenCLUtils::Instance()->GetCurrentDevice();
	const cl::Context Context = current_device.Context();

	cl_int Error;
	cl_mem_flags InOutMemFlags = CL_MEM_READ_WRITE;
	cl_mem_flags InMemFlags = CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR;
	cl_mem_flags OutMemFlags = CL_MEM_WRITE_ONLY;

	// If you ever change the local size, don't forget to change the size in the kernel
	unsigned int LocalSizeY = 1;
	unsigned int LocalSizeX = GroupSize;
	// Global size must be a multiple of local size
	unsigned int GlobalSizeY = (height + LocalSizeY - 1) / LocalSizeY;
	GlobalSizeY *= LocalSizeY;
	unsigned int GlobalSizeX = (width + LocalSizeX - 1) / LocalSizeX;
	GlobalSizeX *= LocalSizeX;

	cl::Buffer ImBuffer(Context, InMemFlags, NewNbPixel * depth * sizeof(unsigned char), (void*)NewImage.Data(), &Error);
	cl::Buffer MaskBuffer(Context, InMemFlags, NewNbPixel * sizeof(unsigned char), (void*)TempMask, &Error);
	cl::Buffer ImTempBuffer(Context,
		InOutMemFlags, NewNbPixel * 2 * depth * sizeof(int),
		0, &Error);
	cl::Buffer OutBuffer(Context, OutMemFlags, NewNbPixel * depth * sizeof(unsigned char), 0, &Error);

	cl::CommandQueue Queue = current_device.CommandQueue();
	cl::Event Event;

	// Zero out temp buffer
	m_KernelZero.setArg(0, ImTempBuffer);
	Queue.enqueueNDRangeKernel(m_KernelZero, cl::NullRange, cl::NDRange(NewNbPixel), cl::NullRange, 0, &Event);
	Event.wait();

	// Horizontal pass
	int Index = 0;
	m_KernelH.setArg(Index++, ImBuffer);
	m_KernelH.setArg(Index++, MaskBuffer);
	m_KernelH.setArg(Index++, ImTempBuffer);
	m_KernelH.setArg(Index++, NewHeight);
	m_KernelH.setArg(Index++, NewWidth);
	m_KernelH.setArg(Index++, radius);

	Queue.enqueueNDRangeKernel(m_KernelH, cl::NullRange, cl::NDRange(GlobalSizeX, GlobalSizeY), cl::NDRange(LocalSizeX, LocalSizeY), 0, &Event);
	Event.wait();

	// Vertical pass
	LocalSizeY = GroupSize;
	LocalSizeX = 1;
	// Global size must be a multiple of local size
	GlobalSizeY = (height + LocalSizeY - 1) / LocalSizeY;
	GlobalSizeY *= LocalSizeY;
	GlobalSizeX = (width + LocalSizeX - 1) / LocalSizeX;
	GlobalSizeX *= LocalSizeX;

	Index = 0;
	m_KernelV.setArg(Index++, ImTempBuffer);
	m_KernelV.setArg(Index++, OutBuffer);
	m_KernelV.setArg(Index++, NewHeight);
	m_KernelV.setArg(Index++, NewWidth);
	m_KernelV.setArg(Index++, radius);

	Queue.enqueueNDRangeKernel(m_KernelV, cl::NullRange, cl::NDRange(GlobalSizeY, GlobalSizeX), cl::NDRange(LocalSizeY, LocalSizeX), 0, &Event);
	Event.wait();

	Queue.enqueueReadBuffer(OutBuffer, true, 0, NewNbPixel * depth * sizeof(unsigned char), (void*)NewOut.Data(), 0, &Event);
	Event.wait();

	Queue.finish();

	//PrintToFile( NewOut, (size_t)NewWidth, (size_t)NewHeight, "GPUPad.txt" );

	// Read Sub rect
	for (int i = 0; i < height; ++i)
	{
		for (int j = 0; j < width; ++j)
		{
			for (int k = 0; k < depth; ++k)
			{
				output(i,j,k) = NewOut(i+radius, j+radius,k);
			}
		}
	}

	delete[] TempPix;
	delete[] TempMask;
}