void APawnWithCamera::UpdateTextureRegions(UTexture2D* Texture, int32 MipIndex, uint32 NumRegions, FUpdateTextureRegion2D* Regions,
	uint32 SrcPitch, uint32 SrcBpp, uint8* SrcData, bool bFreeData)
{

	if (Texture->Resource)
	{
		struct FUpdateTextureRegionsData
		{
			FTexture2DResource* Texture2DResource;
			int32 MipIndex;
			uint32 NumRegions;
			FUpdateTextureRegion2D* Regions;
			uint32 SrcPitch;
			uint32 SrcBpp;
			uint8* SrcData;
		};

		FUpdateTextureRegionsData* RegionData = new FUpdateTextureRegionsData;

		RegionData->Texture2DResource = (FTexture2DResource*)Texture->Resource;
		RegionData->MipIndex = MipIndex;
		RegionData->NumRegions = NumRegions;
		RegionData->Regions = Regions;
		RegionData->SrcPitch = SrcPitch;
		RegionData->SrcBpp = SrcBpp;
		RegionData->SrcData = SrcData;


		ENQUEUE_UNIQUE_RENDER_COMMAND_TWOPARAMETER(
			UpdateTextureRegionsData,
			FUpdateTextureRegionsData*, RegionData, RegionData,
			bool, bFreeData, bFreeData,
			{
				for (uint32 RegionIndex = 0; RegionIndex < RegionData->NumRegions; ++RegionIndex)
				{
					int32 CurrentFirstMip = RegionData->Texture2DResource->GetCurrentFirstMip();
					if (RegionData->MipIndex >= CurrentFirstMip)
					{
						RHIUpdateTexture2D(
							RegionData->Texture2DResource->GetTexture2DRHI(),
							RegionData->MipIndex - CurrentFirstMip,
							RegionData->Regions[RegionIndex],
							RegionData->SrcPitch,
							RegionData->SrcData
							+ RegionData->Regions[RegionIndex].SrcY * RegionData->SrcPitch
							+ RegionData->Regions[RegionIndex].SrcX * RegionData->SrcBpp
							);
					}
				}
		if (bFreeData)
		{
			FMemory::Free(RegionData->Regions);
			FMemory::Free(RegionData->SrcData);
		}
		delete RegionData;
			});


	}
// Called when the game starts
void UOpenCVComponent::BeginPlay()
{
	Super::BeginPlay();

	// ...
	
	if (nullptr != Texture2D)
	{
		if (cv::ocl::haveOpenCL())
		{
			cv::ocl::Context Context;
			if (Context.create(cv::ocl::Device::TYPE_GPU))
			{
				cv::ocl::Device(Context.device(0));

				//!< Simple OpenCL Code
				const cv::String Code = "__kernel void main(__global uchar* dst, const int pitch, const int offset, const int rows, const int cols) {"
					"const int2 uv = { get_global_id(0), get_global_id(1) };"
					"const int2 dim = { rows, cols };"
					"const int2 grid = { 16, 16 };"
					"const int2 repeate = dim / grid;"
					"const int index = mad24(uv.y, pitch, uv.x + offset);"
					"dst[index] = ((uv.x % grid.x) * repeate.x >> 1) + ((uv.y % grid.y) * repeate.y >> 1);"
					"}";
				const cv::ocl::ProgramSource ProgramSource(Code);

				//!< Build OpenCL
				const cv::String Buildopt = "";
				cv::String Errmsg;
				const auto Program = Context.getProg(ProgramSource, Buildopt, Errmsg);

				const auto Width = Texture2D->GetSizeX();
				const auto Height = Texture2D->GetSizeY();
				//!< Result destination
				const auto Mat = cv::UMat(Height, Width, CV_8U, cv::ACCESS_WRITE, cv::USAGE_ALLOCATE_DEVICE_MEMORY);

				//!< OpenCL function name and arguments
				cv::ocl::Kernel Kernel("main", Program);
				Kernel.args(cv::ocl::KernelArg::ReadWrite(Mat));

				//!< Execute OpenCL
				size_t Threads[] = { Width, Height, 1 };
				if (!Kernel.run(ARRAY_COUNT(Threads), Threads, nullptr, true))
				{
					GEngine->AddOnScreenDebugMessage(-1, 5.0f, FColor::Red, TEXT("OpenCL run failed"));
				}

				const auto Result = Mat.getMat(cv::ACCESS_READ);

				//!< Add some OpenCV operations
				cv::putText(Mat, cv::String("Hello OpenCV"), cv::Point(0, 255), CV_FONT_HERSHEY_PLAIN, 2.0, cv::Scalar(0, 127, 127));
				cv::rectangle(Mat, cv::Point(64 + 5, 64 + 5), cv::Point(96 + 5, 96 + 5), cv::Scalar(0, 255, 0));
				cv::circle(Mat, cv::Point(128, 128), 32, cv::Scalar(0, 0, 255));

				//cv::imshow("Result", Result);

				//!< cv::Mat -> TArray<FColor>
				Colors.Empty();
				Colors.Reserve(Width * Height);
				for (auto i = 0; i < Height; ++i)
				{
					for (auto j = 0; j < Width; ++j)
					{
						const auto Value = Result.data[i * Width + j];
						Colors.Add(FColor(Value, Value, Value));
					}
				}

				//!< Update UTexture2D
				ENQUEUE_RENDER_COMMAND(UpdateTexture2D)(
					[Tex = Texture2D, this](FRHICommandListImmediate& RHICmdList)
					{
						const auto TexWidth = Tex->GetSizeX();
						const auto TexHeight = Tex->GetSizeY();
						const auto Pitch = GPixelFormats[Tex->GetPixelFormat()].BlockBytes * TexWidth;
						RHIUpdateTexture2D(Tex->Resource->TextureRHI->GetTexture2D(), 0, FUpdateTextureRegion2D(0, 0, 0, 0, TexWidth, TexHeight), Pitch, reinterpret_cast<const uint8*>(&Colors[0]));
					});
			}
		}
	}
}
void FHZBOcclusionTester::Submit(FRHICommandListImmediate& RHICmdList, const FViewInfo& View)
{
	SCOPED_DRAW_EVENT(RHICmdList, SubmitHZB);

	FSceneViewState* ViewState = (FSceneViewState*)View.State;
	if( !ViewState )
	{
		return;
	}

	TRefCountPtr< IPooledRenderTarget >	BoundsCenterTexture;
	TRefCountPtr< IPooledRenderTarget >	BoundsExtentTexture;
	{
		uint32 Flags = TexCreate_ShaderResource | TexCreate_Dynamic;
		FPooledRenderTargetDesc Desc( FPooledRenderTargetDesc::Create2DDesc( FIntPoint( SizeX, SizeY ), PF_A32B32G32R32F, FClearValueBinding::None, Flags, TexCreate_None, false ) );

		GRenderTargetPool.FindFreeElement(RHICmdList, Desc, BoundsCenterTexture, TEXT("HZBBoundsCenter") );
		GRenderTargetPool.FindFreeElement(RHICmdList, Desc, BoundsExtentTexture, TEXT("HZBBoundsExtent") );
	}

	TRefCountPtr< IPooledRenderTarget >	ResultsTextureGPU;
	{
		FPooledRenderTargetDesc Desc( FPooledRenderTargetDesc::Create2DDesc( FIntPoint( SizeX, SizeY ), PF_B8G8R8A8, FClearValueBinding::None, TexCreate_None, TexCreate_RenderTargetable, false ) );
		GRenderTargetPool.FindFreeElement(RHICmdList, Desc, ResultsTextureGPU, TEXT("HZBResultsGPU") );
	}

	{
#if 0
		static float CenterBuffer[ SizeX * SizeY ][4];
		static float ExtentBuffer[ SizeX * SizeY ][4];

		FMemory::Memset( CenterBuffer, 0, sizeof( CenterBuffer ) );
		FMemory::Memset( ExtentBuffer, 0, sizeof( ExtentBuffer ) );

		const uint32 NumPrimitives = Primitives.Num();
		for( uint32 i = 0; i < NumPrimitives; i++ )
		{
			const FOcclusionPrimitive& Primitive = Primitives[i];

			CenterBuffer[i][0] = Primitive.Center.X;
			CenterBuffer[i][1] = Primitive.Center.Y;
			CenterBuffer[i][2] = Primitive.Center.Z;
			CenterBuffer[i][3] = 0.0f;

			ExtentBuffer[i][0] = Primitive.Extent.X;
			ExtentBuffer[i][1] = Primitive.Extent.Y;
			ExtentBuffer[i][2] = Primitive.Extent.Z;
			ExtentBuffer[i][3] = 1.0f;
		}

		FUpdateTextureRegion2D Region( 0, 0, 0, 0, SizeX, SizeY );
		RHIUpdateTexture2D( (FTexture2DRHIRef&)BoundsCenterTexture->GetRenderTargetItem().ShaderResourceTexture, 0, Region, SizeX * 4 * sizeof( float ), (uint8*)CenterBuffer );
		RHIUpdateTexture2D( (FTexture2DRHIRef&)BoundsExtentTexture->GetRenderTargetItem().ShaderResourceTexture, 0, Region, SizeX * 4 * sizeof( float ), (uint8*)ExtentBuffer );
#elif 0
		static float CenterBuffer[ SizeX * SizeY ][4];
		static float ExtentBuffer[ SizeX * SizeY ][4];

		{
			QUICK_SCOPE_CYCLE_COUNTER(STAT_HZBPackPrimitiveData);
			
			FMemory::Memset( CenterBuffer, 0, sizeof( CenterBuffer ) );
			FMemory::Memset( ExtentBuffer, 0, sizeof( ExtentBuffer ) );

			const uint32 NumPrimitives = Primitives.Num();
			for( uint32 i = 0; i < NumPrimitives; i++ )
			{
				const FOcclusionPrimitive& Primitive = Primitives[i];

				uint32 x = FMath::ReverseMortonCode2( i >> 0 );
				uint32 y = FMath::ReverseMortonCode2( i >> 1 );
				uint32 m = x + y * SizeX;

				CenterBuffer[m][0] = Primitive.Center.X;
				CenterBuffer[m][1] = Primitive.Center.Y;
				CenterBuffer[m][2] = Primitive.Center.Z;
				CenterBuffer[m][3] = 0.0f;

				ExtentBuffer[m][0] = Primitive.Extent.X;
				ExtentBuffer[m][1] = Primitive.Extent.Y;
				ExtentBuffer[m][2] = Primitive.Extent.Z;
				ExtentBuffer[m][3] = 1.0f;
			}
		}
		
		QUICK_SCOPE_CYCLE_COUNTER(STAT_HZBUpdateTextures);
		FUpdateTextureRegion2D Region( 0, 0, 0, 0, SizeX, SizeY );
		RHIUpdateTexture2D( (FTexture2DRHIRef&)BoundsCenterTexture->GetRenderTargetItem().ShaderResourceTexture, 0, Region, SizeX * 4 * sizeof( float ), (uint8*)CenterBuffer );
		RHIUpdateTexture2D( (FTexture2DRHIRef&)BoundsExtentTexture->GetRenderTargetItem().ShaderResourceTexture, 0, Region, SizeX * 4 * sizeof( float ), (uint8*)ExtentBuffer );
#else
		// Update in blocks to avoid large update
		const uint32 BlockSize = 8;
		const uint32 SizeInBlocksX = SizeX / BlockSize;
		const uint32 SizeInBlocksY = SizeY / BlockSize;
		const uint32 BlockStride = BlockSize * 4 * sizeof( float );

		float CenterBuffer[ BlockSize * BlockSize ][4];
		float ExtentBuffer[ BlockSize * BlockSize ][4];

		const uint32 NumPrimitives = Primitives.Num();
		for( uint32 i = 0; i < NumPrimitives; i += BlockSize * BlockSize )
		{
			const uint32 BlockEnd = FMath::Min( BlockSize * BlockSize, NumPrimitives - i );
			for( uint32 b = 0; b < BlockEnd; b++ )
			{
				const FOcclusionPrimitive& Primitive = Primitives[ i + b ];

				CenterBuffer[b][0] = Primitive.Center.X;
				CenterBuffer[b][1] = Primitive.Center.Y;
				CenterBuffer[b][2] = Primitive.Center.Z;
				CenterBuffer[b][3] = 0.0f;

				ExtentBuffer[b][0] = Primitive.Extent.X;
				ExtentBuffer[b][1] = Primitive.Extent.Y;
				ExtentBuffer[b][2] = Primitive.Extent.Z;
				ExtentBuffer[b][3] = 1.0f;
			}

			// Clear rest of block
			if( BlockEnd < BlockSize * BlockSize )
			{
				FMemory::Memset( (float*)CenterBuffer + BlockEnd * 4, 0, sizeof( CenterBuffer ) - BlockEnd * 4 * sizeof(float) );
				FMemory::Memset( (float*)ExtentBuffer + BlockEnd * 4, 0, sizeof( ExtentBuffer ) - BlockEnd * 4 * sizeof(float) );
			}

			const int32 BlockIndex = i / (BlockSize * BlockSize);
			const int32 BlockX = BlockIndex % SizeInBlocksX;
			const int32 BlockY = BlockIndex / SizeInBlocksY;

			FUpdateTextureRegion2D Region( BlockX * BlockSize, BlockY * BlockSize, 0, 0, BlockSize, BlockSize );
			RHIUpdateTexture2D( (FTexture2DRHIRef&)BoundsCenterTexture->GetRenderTargetItem().ShaderResourceTexture, 0, Region, BlockStride, (uint8*)CenterBuffer );
			RHIUpdateTexture2D( (FTexture2DRHIRef&)BoundsExtentTexture->GetRenderTargetItem().ShaderResourceTexture, 0, Region, BlockStride, (uint8*)ExtentBuffer );
		}
#endif
		Primitives.Empty();
	}

	// Draw test
	{
		SCOPED_DRAW_EVENT(RHICmdList, TestHZB);

		SetRenderTarget(RHICmdList, ResultsTextureGPU->GetRenderTargetItem().TargetableTexture, NULL);

		RHICmdList.SetBlendState(TStaticBlendState<>::GetRHI());
		RHICmdList.SetRasterizerState(TStaticRasterizerState<>::GetRHI());
		RHICmdList.SetDepthStencilState(TStaticDepthStencilState< false, CF_Always >::GetRHI());

		TShaderMapRef< FScreenVS >	VertexShader(View.ShaderMap);
		TShaderMapRef< FHZBTestPS >	PixelShader(View.ShaderMap);

		static FGlobalBoundShaderState BoundShaderState;
		SetGlobalBoundShaderState(RHICmdList, View.GetFeatureLevel(), BoundShaderState, GFilterVertexDeclaration.VertexDeclarationRHI, *VertexShader, *PixelShader);

		PixelShader->SetParameters(RHICmdList, View, BoundsCenterTexture->GetRenderTargetItem().ShaderResourceTexture, BoundsExtentTexture->GetRenderTargetItem().ShaderResourceTexture );

		RHICmdList.SetViewport(0, 0, 0.0f, SizeX, SizeY, 1.0f);

		// TODO draw quads covering blocks added above
		DrawRectangle(
			RHICmdList,
			0, 0,
			SizeX, SizeY,
			0, 0,
			SizeX, SizeY,
			FIntPoint( SizeX, SizeY ),
			FIntPoint( SizeX, SizeY ),
			*VertexShader,
			EDRF_UseTriangleOptimization);
	}

	GRenderTargetPool.VisualizeTexture.SetCheckPoint(RHICmdList, ResultsTextureGPU);

	// Transfer memory GPU -> CPU
	RHICmdList.CopyToResolveTarget(ResultsTextureGPU->GetRenderTargetItem().TargetableTexture, ResultsTextureCPU->GetRenderTargetItem().ShaderResourceTexture, false, FResolveParams());
}