예제 #1
0
static VkResult
tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
{
   const uint32_t table_size = cache->table_size * 2;
   const uint32_t old_table_size = cache->table_size;
   const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
   struct cache_entry **table;
   struct cache_entry **old_table = cache->hash_table;

   table = malloc(byte_size);
   if (table == NULL)
      return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   cache->hash_table = table;
   cache->table_size = table_size;
   cache->kernel_count = 0;
   cache->total_size = 0;

   memset(cache->hash_table, 0, byte_size);
   for (uint32_t i = 0; i < old_table_size; i++) {
      struct cache_entry *entry = old_table[i];
      if (!entry)
         continue;

      tu_pipeline_cache_set_entry(cache, entry);
   }

   free(old_table);

   return VK_SUCCESS;
}
예제 #2
0
VkResult
tu_CreatePipelineCache(VkDevice _device,
                       const VkPipelineCacheCreateInfo *pCreateInfo,
                       const VkAllocationCallbacks *pAllocator,
                       VkPipelineCache *pPipelineCache)
{
   TU_FROM_HANDLE(tu_device, device, _device);
   struct tu_pipeline_cache *cache;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
   assert(pCreateInfo->flags == 0);

   cache = vk_alloc2(&device->alloc, pAllocator, sizeof(*cache), 8,
                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (cache == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   if (pAllocator)
      cache->alloc = *pAllocator;
   else
      cache->alloc = device->alloc;

   tu_pipeline_cache_init(cache, device);

   if (pCreateInfo->initialDataSize > 0) {
      tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
                             pCreateInfo->initialDataSize);
   }

   *pPipelineCache = tu_pipeline_cache_to_handle(cache);

   return VK_SUCCESS;
}
예제 #3
0
VkResult radv_CreatePipelineLayout(
	VkDevice                                    _device,
	const VkPipelineLayoutCreateInfo*           pCreateInfo,
	const VkAllocationCallbacks*                pAllocator,
	VkPipelineLayout*                           pPipelineLayout)
{
	RADV_FROM_HANDLE(radv_device, device, _device);
	struct radv_pipeline_layout *layout;
	struct mesa_sha1 ctx;

	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);

	layout = vk_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8,
			     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
	if (layout == NULL)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	layout->num_sets = pCreateInfo->setLayoutCount;

	unsigned dynamic_offset_count = 0;


	_mesa_sha1_init(&ctx);
	for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
		RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout,
				 pCreateInfo->pSetLayouts[set]);
		layout->set[set].layout = set_layout;

		layout->set[set].dynamic_offset_start = dynamic_offset_count;
		for (uint32_t b = 0; b < set_layout->binding_count; b++) {
			dynamic_offset_count += set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
			if (set_layout->binding[b].immutable_samplers_offset)
				_mesa_sha1_update(&ctx, radv_immutable_samplers(set_layout, set_layout->binding + b),
				                  set_layout->binding[b].array_size * 4 * sizeof(uint32_t));
		}
		_mesa_sha1_update(&ctx, set_layout->binding,
				  sizeof(set_layout->binding[0]) * set_layout->binding_count);
	}

	layout->dynamic_offset_count = dynamic_offset_count;
	layout->push_constant_size = 0;

	for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
		const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
		layout->push_constant_size = MAX2(layout->push_constant_size,
						  range->offset + range->size);
	}

	layout->push_constant_size = align(layout->push_constant_size, 16);
	_mesa_sha1_update(&ctx, &layout->push_constant_size,
			  sizeof(layout->push_constant_size));
	_mesa_sha1_final(&ctx, layout->sha1);
	*pPipelineLayout = radv_pipeline_layout_to_handle(layout);

	return VK_SUCCESS;
}
예제 #4
0
파일: anv_query.c 프로젝트: etnaviv/mesa
VkResult anv_CreateQueryPool(
    VkDevice                                    _device,
    const VkQueryPoolCreateInfo*                pCreateInfo,
    const VkAllocationCallbacks*                pAllocator,
    VkQueryPool*                                pQueryPool)
{
   ANV_FROM_HANDLE(anv_device, device, _device);
   struct anv_query_pool *pool;
   VkResult result;
   uint32_t slot_size;
   uint64_t size;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);

   switch (pCreateInfo->queryType) {
   case VK_QUERY_TYPE_OCCLUSION:
   case VK_QUERY_TYPE_TIMESTAMP:
      break;
   case VK_QUERY_TYPE_PIPELINE_STATISTICS:
      return VK_ERROR_INCOMPATIBLE_DRIVER;
   default:
      assert(!"Invalid query type");
   }

   slot_size = sizeof(struct anv_query_pool_slot);
   pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (pool == NULL)
      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

   pool->type = pCreateInfo->queryType;
   pool->slots = pCreateInfo->queryCount;

   size = pCreateInfo->queryCount * slot_size;
   result = anv_bo_init_new(&pool->bo, device, size);
   if (result != VK_SUCCESS)
      goto fail;

   pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0, size, 0);

   *pQueryPool = anv_query_pool_to_handle(pool);

   return VK_SUCCESS;

 fail:
   vk_free2(&device->alloc, pAllocator, pool);

   return result;
}
예제 #5
0
파일: anv_intel.c 프로젝트: chemecse/mesa
VkResult anv_CreateDmaBufImageINTEL(
    VkDevice                                    _device,
    const VkDmaBufImageCreateInfo*              pCreateInfo,
    const VkAllocationCallbacks*                pAllocator,
    VkDeviceMemory*                             pMem,
    VkImage*                                    pImage)
{
   ANV_FROM_HANDLE(anv_device, device, _device);
   struct anv_device_memory *mem;
   struct anv_image *image;
   VkResult result;
   VkImage image_h;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DMA_BUF_IMAGE_CREATE_INFO_INTEL);

   mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (mem == NULL)
      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

   result = anv_image_create(_device,
      &(struct anv_image_create_info) {
         .isl_tiling_flags = ISL_TILING_X_BIT,
         .stride = pCreateInfo->strideInBytes,
         .vk_info =
      &(VkImageCreateInfo) {
         .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
         .imageType = VK_IMAGE_TYPE_2D,
         .format = pCreateInfo->format,
         .extent = pCreateInfo->extent,
         .mipLevels = 1,
         .arrayLayers = 1,
         .samples = 1,
         /* FIXME: Need a way to use X tiling to allow scanout */
         .tiling = VK_IMAGE_TILING_OPTIMAL,
         .usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
         .flags = 0,
      }},
예제 #6
0
static VkResult
compute_pipeline_create(
    VkDevice                                    _device,
    struct anv_pipeline_cache *                 cache,
    const VkComputePipelineCreateInfo*          pCreateInfo,
    const VkAllocationCallbacks*                pAllocator,
    VkPipeline*                                 pPipeline)
{
   ANV_FROM_HANDLE(anv_device, device, _device);
   const struct anv_physical_device *physical_device =
      &device->instance->physicalDevice;
   const struct gen_device_info *devinfo = &physical_device->info;
   struct anv_pipeline *pipeline;
   VkResult result;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);

   pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (pipeline == NULL)
      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

   pipeline->device = device;
   pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);

   pipeline->blend_state.map = NULL;

   result = anv_reloc_list_init(&pipeline->batch_relocs,
                                pAllocator ? pAllocator : &device->alloc);
   if (result != VK_SUCCESS) {
      vk_free2(&device->alloc, pAllocator, pipeline);
      return result;
   }
   pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
   pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
   pipeline->batch.relocs = &pipeline->batch_relocs;

   /* When we free the pipeline, we detect stages based on the NULL status
    * of various prog_data pointers.  Make them NULL by default.
    */
   memset(pipeline->shaders, 0, sizeof(pipeline->shaders));

   pipeline->vs_simd8 = NO_KERNEL;
   pipeline->vs_vec4 = NO_KERNEL;
   pipeline->gs_kernel = NO_KERNEL;

   pipeline->active_stages = 0;

   pipeline->needs_data_cache = false;

   assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
   ANV_FROM_HANDLE(anv_shader_module, module,  pCreateInfo->stage.module);
   result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
                                    pCreateInfo->stage.pName,
                                    pCreateInfo->stage.pSpecializationInfo);
   if (result != VK_SUCCESS) {
      vk_free2(&device->alloc, pAllocator, pipeline);
      return result;
   }

   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);

   anv_pipeline_setup_l3_config(pipeline, cs_prog_data->base.total_shared > 0);

   uint32_t group_size = cs_prog_data->local_size[0] *
      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
   uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);

   if (remainder > 0)
      pipeline->cs_right_mask = ~0u >> (32 - remainder);
   else
예제 #7
0
VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice _device,
                                                const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo,
                                                const VkAllocationCallbacks *pAllocator,
                                                VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate)
{
	RADV_FROM_HANDLE(radv_device, device, _device);
	RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
	const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
	const size_t size = sizeof(struct radv_descriptor_update_template) +
		sizeof(struct radv_descriptor_update_template_entry) * entry_count;
	struct radv_descriptor_update_template *templ;
	uint32_t i;

	templ = vk_alloc2(&device->alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
	if (!templ)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	templ->entry_count = entry_count;

	for (i = 0; i < entry_count; i++) {
		const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
		const struct radv_descriptor_set_binding_layout *binding_layout =
			set_layout->binding + entry->dstBinding;
		const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
		const uint32_t *immutable_samplers = NULL;
		uint32_t dst_offset;
		uint32_t dst_stride;

		/* dst_offset is an offset into dynamic_descriptors when the descriptor
		   is dynamic, and an offset into mapped_ptr otherwise */
		switch (entry->descriptorType) {
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
			assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR);
			dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
			dst_stride = 0; /* Not used */
			break;
		default:
			switch (entry->descriptorType) {
			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
			case VK_DESCRIPTOR_TYPE_SAMPLER:
				/* Immutable samplers are copied into push descriptors when they are pushed */
				if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
				    binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) {
					immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
				}
				break;
			default:
				break;
			}
			dst_offset = binding_layout->offset / 4 + binding_layout->size * entry->dstArrayElement / 4;
			dst_stride = binding_layout->size / 4;
			break;
		}

		templ->entry[i] = (struct radv_descriptor_update_template_entry) {
			.descriptor_type = entry->descriptorType,
			.descriptor_count = entry->descriptorCount,
			.src_offset = entry->offset,
			.src_stride = entry->stride,
			.dst_offset = dst_offset,
			.dst_stride = dst_stride,
			.buffer_offset = buffer_offset,
			.has_sampler = !binding_layout->immutable_samplers_offset,
			.immutable_samplers = immutable_samplers
		};
	}

	*pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
	return VK_SUCCESS;
}

void radv_DestroyDescriptorUpdateTemplateKHR(VkDevice _device,
                                             VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
                                             const VkAllocationCallbacks *pAllocator)
{
	RADV_FROM_HANDLE(radv_device, device, _device);
	RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);

	if (!templ)
		return;

	vk_free2(&device->alloc, pAllocator, templ);
}
예제 #8
0
VkResult radv_CreateDescriptorPool(
	VkDevice                                    _device,
	const VkDescriptorPoolCreateInfo*           pCreateInfo,
	const VkAllocationCallbacks*                pAllocator,
	VkDescriptorPool*                           pDescriptorPool)
{
	RADV_FROM_HANDLE(radv_device, device, _device);
	struct radv_descriptor_pool *pool;
	int size = sizeof(struct radv_descriptor_pool);
	uint64_t bo_size = 0, bo_count = 0, range_count = 0;


	for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
		if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
			bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;

		switch(pCreateInfo->pPoolSizes[i].type) {
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
			range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
		case VK_DESCRIPTOR_TYPE_SAMPLER:
			/* 32 as we may need to align for images */
			bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
			bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
			bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		default:
			unreachable("unknown descriptor type\n");
			break;
		}
	}

	if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
		uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
		host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
		host_size += sizeof(struct radv_descriptor_range) * range_count;
		size += host_size;
	} else {
		size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
	}

	pool = vk_alloc2(&device->alloc, pAllocator, size, 8,
	                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
	if (!pool)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	memset(pool, 0, sizeof(*pool));

	if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
		pool->host_memory_base = (uint8_t*)pool + sizeof(struct radv_descriptor_pool);
		pool->host_memory_ptr = pool->host_memory_base;
		pool->host_memory_end = (uint8_t*)pool + size;
	}

	if (bo_size) {
		pool->bo = device->ws->buffer_create(device->ws, bo_size, 32,
						     RADEON_DOMAIN_VRAM,
						     RADEON_FLAG_NO_INTERPROCESS_SHARING |
						     RADEON_FLAG_READ_ONLY);
		pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
	}
	pool->size = bo_size;
	pool->max_entry_count = pCreateInfo->maxSets;

	*pDescriptorPool = radv_descriptor_pool_to_handle(pool);
	return VK_SUCCESS;
}
예제 #9
0
VkResult radv_CreateDescriptorSetLayout(
	VkDevice                                    _device,
	const VkDescriptorSetLayoutCreateInfo*      pCreateInfo,
	const VkAllocationCallbacks*                pAllocator,
	VkDescriptorSetLayout*                      pSetLayout)
{
	RADV_FROM_HANDLE(radv_device, device, _device);
	struct radv_descriptor_set_layout *set_layout;

	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);

	uint32_t max_binding = 0;
	uint32_t immutable_sampler_count = 0;
	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
		max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
		if (pCreateInfo->pBindings[j].pImmutableSamplers)
			immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
	}

	uint32_t samplers_offset = sizeof(struct radv_descriptor_set_layout) +
		(max_binding + 1) * sizeof(set_layout->binding[0]);
	size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);

	set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8,
				 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
	if (!set_layout)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	set_layout->flags = pCreateInfo->flags;

	/* We just allocate all the samplers at the end of the struct */
	uint32_t *samplers = (uint32_t*)&set_layout->binding[max_binding + 1];

	set_layout->binding_count = max_binding + 1;
	set_layout->shader_stages = 0;
	set_layout->dynamic_shader_stages = 0;
	set_layout->has_immutable_samplers = false;
	set_layout->size = 0;

	memset(set_layout->binding, 0, size - sizeof(struct radv_descriptor_set_layout));

	uint32_t buffer_count = 0;
	uint32_t dynamic_offset_count = 0;

	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
		const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
		uint32_t b = binding->binding;
		uint32_t alignment;
		unsigned binding_buffer_count = 0;

		switch (binding->descriptorType) {
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
			assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
			set_layout->binding[b].dynamic_offset_count = 1;
			set_layout->dynamic_shader_stages |= binding->stageFlags;
			set_layout->binding[b].size = 0;
			binding_buffer_count = 1;
			alignment = 1;
			break;
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
			set_layout->binding[b].size = 16;
			binding_buffer_count = 1;
			alignment = 16;
			break;
		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
			/* main descriptor + fmask descriptor */
			set_layout->binding[b].size = 64;
			binding_buffer_count = 1;
			alignment = 32;
			break;
		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
			/* main descriptor + fmask descriptor + sampler */
			set_layout->binding[b].size = 96;
			binding_buffer_count = 1;
			alignment = 32;
			break;
		case VK_DESCRIPTOR_TYPE_SAMPLER:
			set_layout->binding[b].size = 16;
			alignment = 16;
			break;
		default:
			unreachable("unknown descriptor type\n");
			break;
		}

		set_layout->size = align(set_layout->size, alignment);
		assert(binding->descriptorCount > 0);
		set_layout->binding[b].type = binding->descriptorType;
		set_layout->binding[b].array_size = binding->descriptorCount;
		set_layout->binding[b].offset = set_layout->size;
		set_layout->binding[b].buffer_offset = buffer_count;
		set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;

		if (binding->pImmutableSamplers) {
			set_layout->binding[b].immutable_samplers_offset = samplers_offset;
			set_layout->binding[b].immutable_samplers_equal = true;
			set_layout->has_immutable_samplers = true;


			for (uint32_t i = 0; i < binding->descriptorCount; i++)
				memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
			for (uint32_t i = 1; i < binding->descriptorCount; i++)
				if (memcmp(samplers + 4 * i, samplers, 16) != 0)
					set_layout->binding[b].immutable_samplers_equal = false;

			/* Don't reserve space for the samplers if they're not accessed. */
			if (set_layout->binding[b].immutable_samplers_equal) {
				if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
					set_layout->binding[b].size -= 32;
				else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
					set_layout->binding[b].size -= 16;
			}
			samplers += 4 * binding->descriptorCount;
			samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
		}

		set_layout->size += binding->descriptorCount * set_layout->binding[b].size;
		buffer_count += binding->descriptorCount * binding_buffer_count;
		dynamic_offset_count += binding->descriptorCount *
			set_layout->binding[b].dynamic_offset_count;
		set_layout->shader_stages |= binding->stageFlags;
	}

	set_layout->buffer_count = buffer_count;
	set_layout->dynamic_offset_count = dynamic_offset_count;

	*pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);

	return VK_SUCCESS;
}
예제 #10
0
static VkResult
radv_descriptor_set_create(struct radv_device *device,
			   struct radv_descriptor_pool *pool,
			   const struct radv_descriptor_set_layout *layout,
			   struct radv_descriptor_set **out_set)
{
	struct radv_descriptor_set *set;
	unsigned range_offset = sizeof(struct radv_descriptor_set) +
		sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
	unsigned mem_size = range_offset +
		sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;

	if (pool->host_memory_base) {
		if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);

		set = (struct radv_descriptor_set*)pool->host_memory_ptr;
		pool->host_memory_ptr += mem_size;
	} else {
		set = vk_alloc2(&device->alloc, NULL, mem_size, 8,
		                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

		if (!set)
			return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
	}

	memset(set, 0, mem_size);

	if (layout->dynamic_offset_count) {
		set->dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset);
	}

	set->layout = layout;
	if (layout->size) {
		uint32_t layout_size = align_u32(layout->size, 32);
		set->size = layout->size;

		if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
			vk_free2(&device->alloc, NULL, set);
			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
		}

		/* try to allocate linearly first, so that we don't spend
		 * time looking for gaps if the app only allocates &
		 * resets via the pool. */
		if (pool->current_offset + layout_size <= pool->size) {
			set->bo = pool->bo;
			set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
			set->va = radv_buffer_get_va(set->bo) + pool->current_offset;
			if (!pool->host_memory_base) {
				pool->entries[pool->entry_count].offset = pool->current_offset;
				pool->entries[pool->entry_count].size = layout_size;
				pool->entries[pool->entry_count].set = set;
				pool->entry_count++;
			}
			pool->current_offset += layout_size;
		} else if (!pool->host_memory_base) {
			uint64_t offset = 0;
			int index;

			for (index = 0; index < pool->entry_count; ++index) {
				if (pool->entries[index].offset - offset >= layout_size)
					break;
				offset = pool->entries[index].offset + pool->entries[index].size;
			}

			if (pool->size - offset < layout_size) {
				vk_free2(&device->alloc, NULL, set);
				return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
			}
			set->bo = pool->bo;
			set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
			set->va = radv_buffer_get_va(set->bo) + offset;
			memmove(&pool->entries[index + 1], &pool->entries[index],
				sizeof(pool->entries[0]) * (pool->entry_count - index));
			pool->entries[index].offset = offset;
			pool->entries[index].size = layout_size;
			pool->entries[index].set = set;
			pool->entry_count++;
		} else
			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
	}

	if (layout->has_immutable_samplers) {
		for (unsigned i = 0; i < layout->binding_count; ++i) {
			if (!layout->binding[i].immutable_samplers_offset ||
			layout->binding[i].immutable_samplers_equal)
				continue;

			unsigned offset = layout->binding[i].offset / 4;
			if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
				offset += 16;

			const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
			for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
				memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16);
				offset += layout->binding[i].size / 4;
			}

		}
	}
	*out_set = set;
	return VK_SUCCESS;
}
예제 #11
0
VkResult radv_CreateDescriptorPool(
	VkDevice                                    _device,
	const VkDescriptorPoolCreateInfo*           pCreateInfo,
	const VkAllocationCallbacks*                pAllocator,
	VkDescriptorPool*                           pDescriptorPool)
{
	RADV_FROM_HANDLE(radv_device, device, _device);
	struct radv_descriptor_pool *pool;
	unsigned max_sets = pCreateInfo->maxSets * 2;
	int size = sizeof(struct radv_descriptor_pool) +
	           max_sets * sizeof(struct radv_descriptor_pool_free_node);
	uint64_t bo_size = 0;
	pool = vk_alloc2(&device->alloc, pAllocator, size, 8,
			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
	if (!pool)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	memset(pool, 0, sizeof(*pool));

	pool->free_list = -1;
	pool->full_list = 0;
	pool->free_nodes[max_sets - 1].next = -1;
	pool->max_sets = max_sets;
	pool->allocated_sets = 0;

	for (int i = 0; i  + 1 < max_sets; ++i)
		pool->free_nodes[i].next = i + 1;

	for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
		switch(pCreateInfo->pPoolSizes[i].type) {
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
			break;
		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
		case VK_DESCRIPTOR_TYPE_SAMPLER:
			/* 32 as we may need to align for images */
			bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
			bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
			bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
			break;
		default:
			unreachable("unknown descriptor type\n");
			break;
		}
	}

	if (bo_size) {
		pool->bo = device->ws->buffer_create(device->ws, bo_size,
							32, RADEON_DOMAIN_VRAM, 0);
		pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
	}
	pool->size = bo_size;

	list_inithead(&pool->descriptor_sets);
	*pDescriptorPool = radv_descriptor_pool_to_handle(pool);
	return VK_SUCCESS;
}
예제 #12
0
static VkResult
radv_descriptor_set_create(struct radv_device *device,
			   struct radv_descriptor_pool *pool,
			   struct radv_cmd_buffer *cmd_buffer,
			   const struct radv_descriptor_set_layout *layout,
			   struct radv_descriptor_set **out_set)
{
	struct radv_descriptor_set *set;
	unsigned mem_size = sizeof(struct radv_descriptor_set) +
		sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
	set = vk_alloc2(&device->alloc, NULL, mem_size, 8,
			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

	if (!set)
		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

	memset(set, 0, mem_size);

	if (layout->dynamic_offset_count) {
		unsigned size = sizeof(struct radv_descriptor_range) *
		                layout->dynamic_offset_count;
		set->dynamic_descriptors = vk_alloc2(&device->alloc, NULL, size, 8,
			                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

		if (!set->dynamic_descriptors) {
			vk_free2(&device->alloc, NULL, set);
			return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
		}
	}

	set->layout = layout;
	if (layout->size) {
		uint32_t layout_size = align_u32(layout->size, 32);
		set->size = layout->size;
		if (!cmd_buffer) {
			if (pool->current_offset + layout_size <= pool->size &&
			    pool->allocated_sets < pool->max_sets) {
				set->bo = pool->bo;
				set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
				set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
				pool->current_offset += layout_size;
				++pool->allocated_sets;
			} else {
				int entry = pool->free_list, prev_entry = -1;
				uint32_t offset;
				while (entry >= 0) {
					if (pool->free_nodes[entry].size >= layout_size) {
						if (prev_entry >= 0)
							pool->free_nodes[prev_entry].next = pool->free_nodes[entry].next;
						else
							pool->free_list = pool->free_nodes[entry].next;
						break;
					}
					prev_entry = entry;
					entry = pool->free_nodes[entry].next;
				}

				if (entry < 0) {
					vk_free2(&device->alloc, NULL, set);
					return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
				}
				offset = pool->free_nodes[entry].offset;
				pool->free_nodes[entry].next = pool->full_list;
				pool->full_list = entry;

				set->bo = pool->bo;
				set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
				set->va = device->ws->buffer_get_va(set->bo) + offset;
			}
		} else {
			unsigned bo_offset;
			if (!radv_cmd_buffer_upload_alloc(cmd_buffer, set->size, 32,
							  &bo_offset,
							  (void**)&set->mapped_ptr)) {
				vk_free2(&device->alloc, NULL, set->dynamic_descriptors);
				vk_free2(&device->alloc, NULL, set);
				return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
			}

			set->va = device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
			set->va += bo_offset;
		}
	}

	if (pool)
		list_add(&set->descriptor_pool, &pool->descriptor_sets);
	else
		list_inithead(&set->descriptor_pool);

	for (unsigned i = 0; i < layout->binding_count; ++i) {
		if (!layout->binding[i].immutable_samplers)
			continue;

		unsigned offset = layout->binding[i].offset / 4;
		if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
			offset += 16;

		for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
			struct radv_sampler* sampler = layout->binding[i].immutable_samplers[j];

			memcpy(set->mapped_ptr + offset, &sampler->state, 16);
			offset += layout->binding[i].size / 4;
		}

	}
	*out_set = set;
	return VK_SUCCESS;
}
예제 #13
0
파일: genX_pipeline.c 프로젝트: phomes/mesa
VkResult
genX(compute_pipeline_create)(
    VkDevice                                    _device,
    struct anv_pipeline_cache *                 cache,
    const VkComputePipelineCreateInfo*          pCreateInfo,
    const VkAllocationCallbacks*                pAllocator,
    VkPipeline*                                 pPipeline)
{
   ANV_FROM_HANDLE(anv_device, device, _device);
   struct anv_pipeline *pipeline;
   VkResult result;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);

   pipeline = anv_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (pipeline == NULL)
      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

   pipeline->device = device;
   pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);

   pipeline->blend_state.map = NULL;

   result = anv_reloc_list_init(&pipeline->batch_relocs,
                                pAllocator ? pAllocator : &device->alloc);
   if (result != VK_SUCCESS) {
      anv_free2(&device->alloc, pAllocator, pipeline);
      return result;
   }
   pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
   pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
   pipeline->batch.relocs = &pipeline->batch_relocs;

   /* When we free the pipeline, we detect stages based on the NULL status
    * of various prog_data pointers.  Make them NULL by default.
    */
   memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
   memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
   memset(pipeline->bindings, 0, sizeof(pipeline->bindings));

   pipeline->vs_simd8 = NO_KERNEL;
   pipeline->vs_vec4 = NO_KERNEL;
   pipeline->gs_kernel = NO_KERNEL;

   pipeline->active_stages = 0;
   pipeline->total_scratch = 0;

   assert(pCreateInfo->stage.stage == VK_SHADER_STAGE_COMPUTE_BIT);
   ANV_FROM_HANDLE(anv_shader_module, module,  pCreateInfo->stage.module);
   anv_pipeline_compile_cs(pipeline, cache, pCreateInfo, module,
                           pCreateInfo->stage.pName,
                           pCreateInfo->stage.pSpecializationInfo);

   pipeline->use_repclear = false;

   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
   const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;

   unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
   unsigned push_constant_data_size =
      (prog_data->nr_params + local_id_dwords) * 4;
   unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
   unsigned push_constant_regs = reg_aligned_constant_size / 32;

   uint32_t group_size = cs_prog_data->local_size[0] *
      cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
   pipeline->cs_thread_width_max =
      DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
   uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);

   if (remainder > 0)
      pipeline->cs_right_mask = ~0u >> (32 - remainder);
   else