static void write_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, unsigned *dst, struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info) { RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer); uint64_t va = radv_buffer_get_va(buffer->bo); uint32_t range = buffer_info->range; if (buffer_info->range == VK_WHOLE_SIZE) range = buffer->size - buffer_info->offset; va += buffer_info->offset + buffer->offset; dst[0] = va; dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); dst[2] = range; dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); if (cmd_buffer) radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo, 7); else *buffer_list = buffer->bo; }
static void si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image, const struct legacy_surf_level *base_level_info, unsigned base_level, unsigned first_level, unsigned block_width, bool is_stencil, bool is_storage_image, uint32_t *state) { uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0; uint64_t va = gpu_address; enum chip_class chip_class = device->physical_device->rad_info.chip_class; uint64_t meta_va = 0; if (chip_class >= GFX9) { if (is_stencil) va += image->surface.u.gfx9.stencil_offset; else va += image->surface.u.gfx9.surf_offset; } else va += base_level_info->offset; state[0] = va >> 8; if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D) state[0] |= image->surface.tile_swizzle; state[1] &= C_008F14_BASE_ADDRESS_HI; state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); if (chip_class >= VI) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; if (!is_storage_image && radv_dcc_enabled(image, first_level)) { meta_va = gpu_address + image->dcc_offset; if (chip_class <= VI) meta_va += base_level_info->dcc_offset; } else if (!is_storage_image && radv_image_is_tc_compat_htile(image)) { meta_va = gpu_address + image->htile_offset; } if (meta_va) { state[6] |= S_008F28_COMPRESSION_EN(1); state[7] = meta_va >> 8; state[7] |= image->surface.tile_swizzle; } }
static void write_dynamic_buffer_descriptor(struct radv_device *device, struct radv_descriptor_range *range, struct radeon_winsys_bo **buffer_list, const VkDescriptorBufferInfo *buffer_info) { RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer); uint64_t va = radv_buffer_get_va(buffer->bo); unsigned size = buffer_info->range; if (buffer_info->range == VK_WHOLE_SIZE) size = buffer->size - buffer_info->offset; va += buffer_info->offset + buffer->offset; range->va = va; range->size = size; *buffer_list = buffer->bo; }
static void radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer, VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state) { const struct vk_format_description *desc; unsigned stride; uint64_t gpu_address = radv_buffer_get_va(buffer->bo); uint64_t va = gpu_address + buffer->offset; unsigned num_format, data_format; int first_non_void; desc = vk_format_description(vk_format); first_non_void = vk_format_get_first_non_void_channel(vk_format); stride = desc->block.bits / 8; num_format = radv_translate_buffer_numformat(desc, first_non_void); data_format = radv_translate_buffer_dataformat(desc, first_non_void); va += offset; state[0] = va; state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); if (device->physical_device->rad_info.chip_class != VI && stride) { range /= stride; } state[2] = range; state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) | S_008F0C_DST_SEL_Z(radv_map_swizzle(desc->swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(desc->swizzle[3])) | S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); }
static VkResult radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_pool *pool, const struct radv_descriptor_set_layout *layout, struct radv_descriptor_set **out_set) { struct radv_descriptor_set *set; unsigned range_offset = sizeof(struct radv_descriptor_set) + sizeof(struct radeon_winsys_bo *) * layout->buffer_count; unsigned mem_size = range_offset + sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count; if (pool->host_memory_base) { if (pool->host_memory_end - pool->host_memory_ptr < mem_size) return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); set = (struct radv_descriptor_set*)pool->host_memory_ptr; pool->host_memory_ptr += mem_size; } else { set = vk_alloc2(&device->alloc, NULL, mem_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!set) return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } memset(set, 0, mem_size); if (layout->dynamic_offset_count) { set->dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset); } set->layout = layout; if (layout->size) { uint32_t layout_size = align_u32(layout->size, 32); set->size = layout->size; if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) { vk_free2(&device->alloc, NULL, set); return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); } /* try to allocate linearly first, so that we don't spend * time looking for gaps if the app only allocates & * resets via the pool. */ if (pool->current_offset + layout_size <= pool->size) { set->bo = pool->bo; set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset); set->va = radv_buffer_get_va(set->bo) + pool->current_offset; if (!pool->host_memory_base) { pool->entries[pool->entry_count].offset = pool->current_offset; pool->entries[pool->entry_count].size = layout_size; pool->entries[pool->entry_count].set = set; pool->entry_count++; } pool->current_offset += layout_size; } else if (!pool->host_memory_base) { uint64_t offset = 0; int index; for (index = 0; index < pool->entry_count; ++index) { if (pool->entries[index].offset - offset >= layout_size) break; offset = pool->entries[index].offset + pool->entries[index].size; } if (pool->size - offset < layout_size) { vk_free2(&device->alloc, NULL, set); return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); } set->bo = pool->bo; set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset); set->va = radv_buffer_get_va(set->bo) + offset; memmove(&pool->entries[index + 1], &pool->entries[index], sizeof(pool->entries[0]) * (pool->entry_count - index)); pool->entries[index].offset = offset; pool->entries[index].size = layout_size; pool->entries[index].set = set; pool->entry_count++; } else return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); } if (layout->has_immutable_samplers) { for (unsigned i = 0; i < layout->binding_count; ++i) { if (!layout->binding[i].immutable_samplers_offset || layout->binding[i].immutable_samplers_equal) continue; unsigned offset = layout->binding[i].offset / 4; if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) offset += 16; const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset); for (unsigned j = 0; j < layout->binding[i].array_size; ++j) { memcpy(set->mapped_ptr + offset, samplers + 4 * j, 16); offset += layout->binding[i].size / 4; } } } *out_set = set; return VK_SUCCESS; }
static void radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage, struct ac_wave_info *waves, unsigned num_waves, FILE *f) { uint64_t start_addr, end_addr; unsigned i; if (!shader) return; start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset; end_addr = start_addr + shader->code_size; /* See if any wave executes the shader. */ for (i = 0; i < num_waves; i++) { if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) break; } if (i == num_waves) return; /* the shader is not being executed */ /* Remember the first found wave. The waves are sorted according to PC. */ waves = &waves[i]; num_waves -= i; /* Get the list of instructions. * Buffer size / 4 is the upper bound of the instruction count. */ unsigned num_inst = 0; struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(shader, stage)); /* Print instructions with annotations. */ for (i = 0; i < num_inst; i++) { struct radv_shader_inst *inst = &instructions[i]; fprintf(f, "%s\n", inst->text); /* Print which waves execute the instruction right now. */ while (num_waves && start_addr + inst->offset == waves->pc) { fprintf(f, " " COLOR_GREEN "^ SE%u SH%u CU%u " "SIMD%u WAVE%u EXEC=%016"PRIx64 " ", waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); if (inst->size == 4) { fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); } else { fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); } waves->matched = true; waves = &waves[1]; num_waves--; } } fprintf(f, "\n\n"); free(instructions); }