예제 #1
0
static void write_buffer_descriptor(struct radv_device *device,
                                    struct radv_cmd_buffer *cmd_buffer,
                                    unsigned *dst,
                                    struct radeon_winsys_bo **buffer_list,
                                    const VkDescriptorBufferInfo *buffer_info)
{
	RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
	uint64_t va = radv_buffer_get_va(buffer->bo);
	uint32_t range = buffer_info->range;

	if (buffer_info->range == VK_WHOLE_SIZE)
		range = buffer->size - buffer_info->offset;

	va += buffer_info->offset + buffer->offset;
	dst[0] = va;
	dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
	dst[2] = range;
	dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
		S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
		S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
		S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
		S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
		S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);

	if (cmd_buffer)
		radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo, 7);
	else
		*buffer_list = buffer->bo;
}
예제 #2
0
static void
si_set_mutable_tex_desc_fields(struct radv_device *device,
			       struct radv_image *image,
			       const struct legacy_surf_level *base_level_info,
			       unsigned base_level, unsigned first_level,
			       unsigned block_width, bool is_stencil,
			       bool is_storage_image, uint32_t *state)
{
	uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
	uint64_t va = gpu_address;
	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
	uint64_t meta_va = 0;
	if (chip_class >= GFX9) {
		if (is_stencil)
			va += image->surface.u.gfx9.stencil_offset;
		else
			va += image->surface.u.gfx9.surf_offset;
	} else
		va += base_level_info->offset;

	state[0] = va >> 8;
	if (chip_class >= GFX9 ||
	    base_level_info->mode == RADEON_SURF_MODE_2D)
		state[0] |= image->surface.tile_swizzle;
	state[1] &= C_008F14_BASE_ADDRESS_HI;
	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);

	if (chip_class >= VI) {
		state[6] &= C_008F28_COMPRESSION_EN;
		state[7] = 0;
		if (!is_storage_image && radv_dcc_enabled(image, first_level)) {
			meta_va = gpu_address + image->dcc_offset;
			if (chip_class <= VI)
				meta_va += base_level_info->dcc_offset;
		} else if (!is_storage_image &&
			   radv_image_is_tc_compat_htile(image)) {
			meta_va = gpu_address + image->htile_offset;
		}

		if (meta_va) {
			state[6] |= S_008F28_COMPRESSION_EN(1);
			state[7] = meta_va >> 8;
			state[7] |= image->surface.tile_swizzle;
		}
	}
예제 #3
0
static void write_dynamic_buffer_descriptor(struct radv_device *device,
                                            struct radv_descriptor_range *range,
                                            struct radeon_winsys_bo **buffer_list,
                                            const VkDescriptorBufferInfo *buffer_info)
{
	RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
	uint64_t va = radv_buffer_get_va(buffer->bo);
	unsigned size = buffer_info->range;

	if (buffer_info->range == VK_WHOLE_SIZE)
		size = buffer->size - buffer_info->offset;

	va += buffer_info->offset + buffer->offset;
	range->va = va;
	range->size = size;

	*buffer_list = buffer->bo;
}
예제 #4
0
static void
radv_make_buffer_descriptor(struct radv_device *device,
			    struct radv_buffer *buffer,
			    VkFormat vk_format,
			    unsigned offset,
			    unsigned range,
			    uint32_t *state)
{
	const struct vk_format_description *desc;
	unsigned stride;
	uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
	uint64_t va = gpu_address + buffer->offset;
	unsigned num_format, data_format;
	int first_non_void;
	desc = vk_format_description(vk_format);
	first_non_void = vk_format_get_first_non_void_channel(vk_format);
	stride = desc->block.bits / 8;

	num_format = radv_translate_buffer_numformat(desc, first_non_void);
	data_format = radv_translate_buffer_dataformat(desc, first_non_void);

	va += offset;
	state[0] = va;
	state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
		S_008F04_STRIDE(stride);

	if (device->physical_device->rad_info.chip_class != VI && stride) {
		range /= stride;
	}

	state[2] = range;
	state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
		   S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
		   S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) |
		   S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) |
		   S_008F0C_NUM_FORMAT(num_format) |
		   S_008F0C_DATA_FORMAT(data_format);
}
예제 #5
0
static VkResult
radv_descriptor_set_create(struct radv_device *device,
			   struct radv_descriptor_pool *pool,
			   const struct radv_descriptor_set_layout *layout,
			   struct radv_descriptor_set **out_set)
{
	struct radv_descriptor_set *set;
	unsigned range_offset = sizeof(struct radv_descriptor_set) +
		sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
	unsigned mem_size = range_offset +
		sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;

	if (pool->host_memory_base) {
		if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);

		set = (struct radv_descriptor_set*)pool->host_memory_ptr;
		pool->host_memory_ptr += mem_size;
	} else {
		set = vk_alloc2(&device->alloc, NULL, mem_size, 8,
		                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

		if (!set)
			return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
	}

	memset(set, 0, mem_size);

	if (layout->dynamic_offset_count) {
		set->dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset);
	}

	set->layout = layout;
	if (layout->size) {
		uint32_t layout_size = align_u32(layout->size, 32);
		set->size = layout->size;

		if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
			vk_free2(&device->alloc, NULL, set);
			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
		}

		/* try to allocate linearly first, so that we don't spend
		 * time looking for gaps if the app only allocates &
		 * resets via the pool. */
		if (pool->current_offset + layout_size <= pool->size) {
			set->bo = pool->bo;
			set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
			set->va = radv_buffer_get_va(set->bo) + pool->current_offset;
			if (!pool->host_memory_base) {
				pool->entries[pool->entry_count].offset = pool->current_offset;
				pool->entries[pool->entry_count].size = layout_size;
				pool->entries[pool->entry_count].set = set;
				pool->entry_count++;
			}
			pool->current_offset += layout_size;
		} else if (!pool->host_memory_base) {
			uint64_t offset = 0;
			int index;

			for (index = 0; index < pool->entry_count; ++index) {
				if (pool->entries[index].offset - offset >= layout_size)
					break;
				offset = pool->entries[index].offset + pool->entries[index].size;
			}

			if (pool->size - offset < layout_size) {
				vk_free2(&device->alloc, NULL, set);
				return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
			}
			set->bo = pool->bo;
			set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
			set->va = radv_buffer_get_va(set->bo) + offset;
			memmove(&pool->entries[index + 1], &pool->entries[index],
				sizeof(pool->entries[0]) * (pool->entry_count - index));
			pool->entries[index].offset = offset;
			pool->entries[index].size = layout_size;
			pool->entries[index].set = set;
			pool->entry_count++;
		} else
			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
	}

	if (layout->has_immutable_samplers) {
		for (unsigned i = 0; i < layout->binding_count; ++i) {
			if (!layout->binding[i].immutable_samplers_offset ||
			layout->binding[i].immutable_samplers_equal)
				continue;

			unsigned offset = layout->binding[i].offset / 4;
			if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
				offset += 16;

			const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
			for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
				memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16);
				offset += layout->binding[i].size / 4;
			}

		}
	}
	*out_set = set;
	return VK_SUCCESS;
}
예제 #6
0
파일: radv_debug.c 프로젝트: FireBurn/mesa
static void
radv_dump_annotated_shader(struct radv_shader_variant *shader,
			   gl_shader_stage stage, struct ac_wave_info *waves,
			   unsigned num_waves, FILE *f)
{
	uint64_t start_addr, end_addr;
	unsigned i;

	if (!shader)
		return;

	start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
	end_addr = start_addr + shader->code_size;

	/* See if any wave executes the shader. */
	for (i = 0; i < num_waves; i++) {
		if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
			break;
	}

	if (i == num_waves)
		return; /* the shader is not being executed */

	/* Remember the first found wave. The waves are sorted according to PC. */
	waves = &waves[i];
	num_waves -= i;

	/* Get the list of instructions.
	 * Buffer size / 4 is the upper bound of the instruction count.
	 */
	unsigned num_inst = 0;
	struct radv_shader_inst *instructions =
		calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));

	si_add_split_disasm(shader->disasm_string,
			    start_addr, &num_inst, instructions);

	fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
		radv_get_shader_name(shader, stage));

	/* Print instructions with annotations. */
	for (i = 0; i < num_inst; i++) {
		struct radv_shader_inst *inst = &instructions[i];

		fprintf(f, "%s\n", inst->text);

		/* Print which waves execute the instruction right now. */
		while (num_waves && start_addr + inst->offset == waves->pc) {
			fprintf(f,
				"          " COLOR_GREEN "^ SE%u SH%u CU%u "
				"SIMD%u WAVE%u  EXEC=%016"PRIx64 "  ",
				waves->se, waves->sh, waves->cu, waves->simd,
				waves->wave, waves->exec);

			if (inst->size == 4) {
				fprintf(f, "INST32=%08X" COLOR_RESET "\n",
					waves->inst_dw0);
			} else {
				fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
					waves->inst_dw0, waves->inst_dw1);
			}

			waves->matched = true;
			waves = &waves[1];
			num_waves--;
		}
	}

	fprintf(f, "\n\n");
	free(instructions);
}