PixelData D3D11TextureCore::lockImpl(GpuLockOptions options, UINT32 mipLevel, UINT32 face)
	{
		if (mProperties.getMultisampleCount() > 1)
			BS_EXCEPT(InvalidStateException, "Multisampled textures cannot be accessed from the CPU directly.");

#if BS_PROFILING_ENABLED
		if (options == GBL_READ_ONLY || options == GBL_READ_WRITE)
		{
			BS_INC_RENDER_STAT_CAT(ResRead, RenderStatObject_Texture);
		}

		if (options == GBL_READ_WRITE || options == GBL_WRITE_ONLY || options == GBL_WRITE_ONLY_DISCARD || options == GBL_WRITE_ONLY_NO_OVERWRITE)
		{
			BS_INC_RENDER_STAT_CAT(ResWrite, RenderStatObject_Texture);
		}
#endif

		UINT32 mipWidth = std::max(1u, mProperties.getWidth() >> mipLevel);
		UINT32 mipHeight = std::max(1u, mProperties.getHeight() >> mipLevel);
		UINT32 mipDepth = std::max(1u, mProperties.getDepth() >> mipLevel);

		PixelData lockedArea(mipWidth, mipHeight, mipDepth, mProperties.getFormat());

		D3D11_MAP flags = D3D11Mappings::getLockOptions(options);

		UINT32 rowPitch, slicePitch;

		if(flags == D3D11_MAP_READ || flags == D3D11_MAP_READ_WRITE)
		{
			UINT8* data = (UINT8*)mapstagingbuffer(flags, face, mipLevel, rowPitch, slicePitch);
			lockedArea.setExternalBuffer(data);
			lockedArea.setRowPitch(rowPitch);
			lockedArea.setSlicePitch(slicePitch);
			mLockedForReading = true;
		}
		else
		{
			if (mProperties.getUsage() == TU_DYNAMIC)
			{
				UINT8* data = (UINT8*)map(mTex, flags, face, mipLevel, rowPitch, slicePitch);
				lockedArea.setExternalBuffer(data);
				lockedArea.setRowPitch(rowPitch);
				lockedArea.setSlicePitch(slicePitch);
			}
			else
				lockedArea.setExternalBuffer((UINT8*)mapstaticbuffer(lockedArea, mipLevel, face));

			mLockedForReading = false;
		}

		return lockedArea;
	}
	PixelData VulkanTexture::lockImpl(GpuLockOptions options, UINT32 mipLevel, UINT32 face, UINT32 deviceIdx,
										  UINT32 queueIdx)
	{
		const TextureProperties& props = getProperties();

		if (props.getNumSamples() > 1)
		{
			LOGERR("Multisampled textures cannot be accessed from the CPU directly.");
			return PixelData();
		}

#if BS_PROFILING_ENABLED
		if (options == GBL_READ_ONLY || options == GBL_READ_WRITE)
		{
			BS_INC_RENDER_STAT_CAT(ResRead, RenderStatObject_Texture);
		}

		if (options == GBL_READ_WRITE || options == GBL_WRITE_ONLY || options == GBL_WRITE_ONLY_DISCARD || options == GBL_WRITE_ONLY_NO_OVERWRITE)
		{
			BS_INC_RENDER_STAT_CAT(ResWrite, RenderStatObject_Texture);
		}
#endif

		UINT32 mipWidth = std::max(1u, props.getWidth() >> mipLevel);
		UINT32 mipHeight = std::max(1u, props.getHeight() >> mipLevel);
		UINT32 mipDepth = std::max(1u, props.getDepth() >> mipLevel);

		PixelData lockedArea(mipWidth, mipHeight, mipDepth, mInternalFormats[deviceIdx]);

		VulkanImage* image = mImages[deviceIdx];

		if (image == nullptr)
			return PixelData();

		mIsMapped = true;
		mMappedDeviceIdx = deviceIdx;
		mMappedGlobalQueueIdx = queueIdx;
		mMappedFace = face;
		mMappedMip = mipLevel;
		mMappedLockOptions = options;

		VulkanRenderAPI& rapi = static_cast<VulkanRenderAPI&>(RenderAPI::instance());
		VulkanDevice& device = *rapi._getDevice(deviceIdx);

		VulkanCommandBufferManager& cbManager = gVulkanCBManager();
		GpuQueueType queueType;
		UINT32 localQueueIdx = CommandSyncMask::getQueueIdxAndType(queueIdx, queueType);

		VulkanImageSubresource* subresource = image->getSubresource(face, mipLevel);

		// If memory is host visible try mapping it directly
		if (mDirectlyMappable)
		{
			// Initially the texture will be in preinitialized layout, and it will transition to general layout on first
			// use in shader. No further transitions are allowed for directly mappable textures.
			assert(subresource->getLayout() == VK_IMAGE_LAYOUT_PREINITIALIZED || 
				   subresource->getLayout() == VK_IMAGE_LAYOUT_GENERAL);

			// GPU should never be allowed to write to a directly mappable texture, since only linear tiling is supported
			// for direct mapping, and we don't support using it with either storage textures or render targets.
			assert(!mSupportsGPUWrites);

			// Check is the GPU currently reading from the image
			UINT32 useMask = subresource->getUseInfo(VulkanUseFlag::Read);
			bool isUsedOnGPU = useMask != 0;

			// We're safe to map directly since GPU isn't using the subresource
			if (!isUsedOnGPU)
			{
				// If some CB has an operation queued that will be using the current contents of the image, create a new 
				// image so we don't modify the previous use of the image
				if (subresource->isBound())
				{
					VulkanImage* newImage = createImage(device, mInternalFormats[deviceIdx]);

					// Copy contents of the current image to the new one, unless caller explicitly specifies he doesn't
					// care about the current contents
					if (options != GBL_WRITE_ONLY_DISCARD)
					{
						VkMemoryRequirements memReqs;
						vkGetImageMemoryRequirements(device.getLogical(), image->getHandle(), &memReqs);

						UINT8* src = image->map(0, (UINT32)memReqs.size);
						UINT8* dst = newImage->map(0, (UINT32)memReqs.size);

						memcpy(dst, src, memReqs.size);

						image->unmap();
						newImage->unmap();
					}

					image->destroy();
					image = newImage;
					mImages[deviceIdx] = image;
				}

				image->map(face, mipLevel, lockedArea);
				return lockedArea;
			}

			// Caller guarantees he won't touch the same data as the GPU, so just map even though the GPU is using the
			// subresource
			if (options == GBL_WRITE_ONLY_NO_OVERWRITE)
			{
				image->map(face, mipLevel, lockedArea);
				return lockedArea;
			}

			// Caller doesn't care about buffer contents, so just discard the existing buffer and create a new one
			if (options == GBL_WRITE_ONLY_DISCARD)
			{
				// We need to discard the entire image, even though we're only writing to a single sub-resource
				image->destroy();

				image = createImage(device, mInternalFormats[deviceIdx]);
				mImages[deviceIdx] = image;

				image->map(face, mipLevel, lockedArea);
				return lockedArea;
			}

			// We need to read the buffer contents
			if (options == GBL_READ_ONLY || options == GBL_READ_WRITE)
			{
				VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);

				// Ensure flush() will wait for all queues currently using to the texture (if any) to finish
				// If only reading, wait for all writes to complete, otherwise wait on both writes and reads
				if (options == GBL_READ_ONLY)
					useMask = subresource->getUseInfo(VulkanUseFlag::Write);
				else
					useMask = subresource->getUseInfo(VulkanUseFlag::Read | VulkanUseFlag::Write);

				transferCB->appendMask(useMask);

				// Submit the command buffer and wait until it finishes
				transferCB->flush(true);

				// If writing and some CB has an operation queued that will be using the current contents of the image, 
				// create a new image so we don't modify the previous use of the image
				if (options == GBL_READ_WRITE && subresource->isBound())
				{
					VulkanImage* newImage = createImage(device, mInternalFormats[deviceIdx]);

					VkMemoryRequirements memReqs;
					vkGetImageMemoryRequirements(device.getLogical(), image->getHandle(), &memReqs);

					UINT8* src = image->map(0, (UINT32)memReqs.size);
					UINT8* dst = newImage->map(0, (UINT32)memReqs.size);

					memcpy(dst, src, memReqs.size);

					image->unmap();
					newImage->unmap();

					image->destroy();
					image = newImage;
					mImages[deviceIdx] = image;
				}

				image->map(face, mipLevel, lockedArea);
				return lockedArea;
			}

			// Otherwise, we're doing write only, in which case it's best to use the staging buffer to avoid waiting
			// and blocking, so fall through
		}

		// Can't use direct mapping, so use a staging buffer

		// We might need to copy the current contents of the image to the staging buffer. Even if the user doesn't plan on
		// reading, it is still required as we will eventually copy all of the contents back to the original image,
		// and we can't write potentially uninitialized data. The only exception is when the caller specifies the image
		// contents should be discarded in which he guarantees he will overwrite the entire locked area with his own
		// contents.
		bool needRead = options != GBL_WRITE_ONLY_DISCARD_RANGE && options != GBL_WRITE_ONLY_DISCARD;

		// Allocate a staging buffer
		mStagingBuffer = createStaging(device, lockedArea, needRead);

		if (needRead) // If reading, we need to copy the current contents of the image to the staging buffer
		{
			VulkanTransferBuffer* transferCB = cbManager.getTransferBuffer(deviceIdx, queueType, localQueueIdx);

			// Similar to above, if image supports GPU writes or is currently being written to, we need to wait on any
			// potential writes to complete
			UINT32 writeUseMask = subresource->getUseInfo(VulkanUseFlag::Write);

			if (mSupportsGPUWrites || writeUseMask != 0)
			{
				// Ensure flush() will wait for all queues currently writing to the image (if any) to finish
				transferCB->appendMask(writeUseMask);
			}

			VkImageSubresourceRange range;
			range.aspectMask = image->getAspectFlags();
			range.baseArrayLayer = face;
			range.layerCount = 1;
			range.baseMipLevel = mipLevel;
			range.levelCount = 1;

			VkImageSubresourceLayers rangeLayers;
			if ((props.getUsage() & TU_DEPTHSTENCIL) != 0)
				rangeLayers.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
			else
				rangeLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;

			rangeLayers.baseArrayLayer = range.baseArrayLayer;
			rangeLayers.layerCount = range.layerCount;
			rangeLayers.mipLevel = range.baseMipLevel;

			VkExtent3D extent;
			PixelUtil::getSizeForMipLevel(props.getWidth(), props.getHeight(), props.getDepth(), mMappedMip,
										  extent.width, extent.height, extent.depth);

			// Transfer texture to a valid layout
			VkAccessFlags currentAccessMask = image->getAccessFlags(subresource->getLayout());
			transferCB->setLayout(image->getHandle(), currentAccessMask, VK_ACCESS_TRANSFER_READ_BIT, subresource->getLayout(),
								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, range);

			// Queue copy command
			image->copy(transferCB, mStagingBuffer, extent, rangeLayers, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);

			// Transfer back to original layout
			VkImageLayout dstLayout = image->getOptimalLayout();
			currentAccessMask = image->getAccessFlags(dstLayout);

			transferCB->setLayout(image->getHandle(), VK_ACCESS_TRANSFER_READ_BIT, currentAccessMask,
								  VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dstLayout, range);
			transferCB->getCB()->registerResource(image, range, VulkanUseFlag::Read, ResourceUsage::Transfer);

			// Ensure data written to the staging buffer is visible
			VkAccessFlags stagingAccessFlags;
			if (options == GBL_READ_ONLY)
				stagingAccessFlags = VK_ACCESS_HOST_READ_BIT;
			else // Must be read/write
				stagingAccessFlags = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT;

			transferCB->memoryBarrier(mStagingBuffer->getHandle(),
									  VK_ACCESS_TRANSFER_WRITE_BIT,
									  stagingAccessFlags,
									  VK_PIPELINE_STAGE_TRANSFER_BIT,
									  VK_PIPELINE_STAGE_HOST_BIT);

			// Submit the command buffer and wait until it finishes
			transferCB->flush(true);
		}

		UINT8* data = mStagingBuffer->map(0, lockedArea.getSize());
		lockedArea.setExternalBuffer(data);

		return lockedArea;
	}