static void si_blit_decompress_depth_in_place(struct si_context *sctx, struct r600_texture *texture, bool is_stencil_sampler, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer, level; unsigned *dirty_level_mask; if (is_stencil_sampler) { sctx->db_flush_stencil_inplace = true; dirty_level_mask = &texture->stencil_dirty_level_mask; } else { sctx->db_flush_depth_inplace = true; dirty_level_mask = &texture->dirty_level_mask; } si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; for (level = first_level; level <= last_level; level++) { if (!(*dirty_level_mask & (1 << level))) continue; surf_tmpl.u.tex.level = level; /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&texture->resource.b.b, level); checked_last_layer = last_layer < max_layer ? last_layer : max_layer; for (layer = first_layer; layer <= checked_last_layer; layer++) { surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl); si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, sctx->custom_dsa_flush, 1.0f); si_blitter_end(&sctx->b.b); pipe_surface_reference(&zsurf, NULL); } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { *dirty_level_mask &= ~(1 << level); } } sctx->db_flush_depth_inplace = false; sctx->db_flush_stencil_inplace = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); }
static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc) { unsigned list_size = desc->num_elements * desc->element_dw_size * 4; void *ptr; if (!desc->list_dirty) return true; u_upload_alloc(sctx->b.uploader, 0, list_size, &desc->buffer_offset, (struct pipe_resource**)&desc->buffer, &ptr); if (!desc->buffer) return false; /* skip the draw call */ util_memcpy_cpu_to_le32(ptr, desc->list, list_size); r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); desc->list_dirty = false; desc->pointer_dirty = true; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); return true; }
static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc, struct r600_atom * atom) { unsigned list_size = desc->num_elements * desc->element_dw_size * 4; if (!desc->dirty_mask) return true; if (sctx->ce_ib) { uint32_t const* list = (uint32_t const*)desc->list; if (desc->ce_ram_dirty) si_reinitialize_ce_ram(sctx, desc); while(desc->dirty_mask) { int begin, count; u_bit_scan_consecutive_range(&desc->dirty_mask, &begin, &count); begin *= desc->element_dw_size; count *= desc->element_dw_size; radeon_emit(sctx->ce_ib, PKT3(PKT3_WRITE_CONST_RAM, count, 0)); radeon_emit(sctx->ce_ib, desc->ce_offset + begin * 4); radeon_emit_array(sctx->ce_ib, list + begin, count); } if (!si_ce_upload(sctx, desc->ce_offset, list_size, &desc->buffer_offset, &desc->buffer)) return false; } else { void *ptr; u_upload_alloc(sctx->b.uploader, 0, list_size, 256, &desc->buffer_offset, (struct pipe_resource**)&desc->buffer, &ptr); if (!desc->buffer) return false; /* skip the draw call */ util_memcpy_cpu_to_le32(ptr, desc->list, list_size); radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS); } desc->pointer_dirty = true; desc->dirty_mask = 0; if (atom) si_mark_atom_dirty(sctx, atom); return true; }
static void si_set_scissor_states(struct pipe_context *pctx, unsigned start_slot, unsigned num_scissors, const struct pipe_scissor_state *state) { struct si_context *ctx = (struct si_context *)pctx; int i; for (i = 0; i < num_scissors; i++) ctx->scissors.states[start_slot + i] = state[i]; if (!ctx->queued.named.rasterizer || !ctx->queued.named.rasterizer->scissor_enable) return; ctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; si_mark_atom_dirty(ctx, &ctx->scissors.atom); }
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) { struct si_descriptors *desc = &sctx->vertex_buffers; bool bound[SI_NUM_VERTEX_BUFFERS] = {}; unsigned i, count = sctx->vertex_elements->count; uint64_t va; uint32_t *ptr; if (!sctx->vertex_buffers_dirty) return true; if (!count || !sctx->vertex_elements) return true; /* Vertex buffer descriptors are the only ones which are uploaded * directly through a staging buffer and don't go through * the fine-grained upload path. */ u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset, (struct pipe_resource**)&desc->buffer, (void**)&ptr); if (!desc->buffer) return false; r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); assert(count <= SI_NUM_VERTEX_BUFFERS); for (i = 0; i < count; i++) { struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i]; struct pipe_vertex_buffer *vb; struct r600_resource *rbuffer; unsigned offset; uint32_t *desc = &ptr[i*4]; if (ve->vertex_buffer_index >= Elements(sctx->vertex_buffer)) { memset(desc, 0, 16); continue; } vb = &sctx->vertex_buffer[ve->vertex_buffer_index]; rbuffer = (struct r600_resource*)vb->buffer; if (rbuffer == NULL) { memset(desc, 0, 16); continue; } offset = vb->buffer_offset + ve->src_offset; va = rbuffer->gpu_address + offset; /* Fill in T# buffer resource description */ desc[0] = va & 0xFFFFFFFF; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride); if (sctx->b.chip_class <= CIK && vb->stride) /* Round up by rounding down and adding 1 */ desc[2] = (vb->buffer->width0 - offset - sctx->vertex_elements->format_size[i]) / vb->stride + 1; else desc[2] = vb->buffer->width0 - offset; desc[3] = sctx->vertex_elements->rsrc_word3[i]; if (!bound[ve->vertex_buffer_index]) { r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)vb->buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO); bound[ve->vertex_buffer_index] = true; } } /* Don't flush the const cache. It would have a very negative effect * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ desc->pointer_dirty = true; si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); sctx->vertex_buffers_dirty = false; return true; }
void si_begin_new_cs(struct si_context *ctx) { if (ctx->is_debug) { uint32_t zero = 0; /* Create a buffer used for writing trace IDs and initialize it to 0. */ assert(!ctx->trace_buf); ctx->trace_buf = (struct r600_resource*) pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 4); if (ctx->trace_buf) pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, 0, sizeof(zero), &zero); ctx->trace_id = 0; } if (ctx->trace_buf) si_trace_emit(ctx); /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_ICACHE; /* set all valid group as dirty so they get reemited on * next draw command */ si_pm4_reset_emitted(ctx); /* The CS initialization should be emitted before everything else. */ si_pm4_emit(ctx, ctx->init_config); if (ctx->init_config_gs_rings) si_pm4_emit(ctx, ctx->init_config_gs_rings); ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->clip_state.atom); si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); si_mark_atom_dirty(ctx, &ctx->cb_render_state); si_mark_atom_dirty(ctx, &ctx->blend_color.atom); si_mark_atom_dirty(ctx, &ctx->db_render_state); si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom); si_mark_atom_dirty(ctx, &ctx->spi_map); si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); si_all_descriptors_begin_new_cs(ctx); ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; ctx->viewports.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(ctx, &ctx->scissors.atom); si_mark_atom_dirty(ctx, &ctx->viewports.atom); r600_postflush_resume_features(&ctx->b); ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw; /* Invalidate various draw states so that they are emitted before * the first draw call. */ si_invalidate_draw_sh_constants(ctx); ctx->last_primitive_restart_en = -1; ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; ctx->last_gs_out_prim = -1; ctx->last_prim = -1; ctx->last_multi_vgt_param = -1; ctx->last_ls_hs_config = -1; ctx->last_rast_prim = -1; ctx->last_sc_line_stipple = ~0; ctx->emit_scratch_reloc = true; ctx->last_ls = NULL; ctx->last_tcs = NULL; ctx->last_tes_sh_base = -1; ctx->last_num_tcs_input_cp = -1; }
static void si_clear(struct pipe_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct si_context *sctx = (struct si_context *)ctx; struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; struct pipe_surface *zsbuf = fb->zsbuf; struct r600_texture *zstex = zsbuf ? (struct r600_texture*)zsbuf->texture : NULL; if (buffers & PIPE_CLEAR_COLOR) { evergreen_do_fast_color_clear(&sctx->b, fb, &sctx->framebuffer.atom, &buffers, &sctx->framebuffer.dirty_cbufs, color); if (!buffers) return; /* all buffers have been fast cleared */ } if (buffers & PIPE_CLEAR_COLOR) { int i; /* These buffers cannot use fast clear, make sure to disable expansion. */ for (i = 0; i < fb->nr_cbufs; i++) { struct r600_texture *tex; /* If not clearing this buffer, skip. */ if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) continue; if (!fb->cbufs[i]) continue; tex = (struct r600_texture *)fb->cbufs[i]->texture; if (tex->fmask.size == 0) tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level); } } if (zstex && zstex->htile_buffer && zsbuf->u.tex.level == 0 && zsbuf->u.tex.first_layer == 0 && zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) { if (buffers & PIPE_CLEAR_DEPTH) { /* Need to disable EXPCLEAR temporarily if clearing * to a new value. */ if (!zstex->depth_cleared || zstex->depth_clear_value != depth) { sctx->db_depth_disable_expclear = true; } zstex->depth_clear_value = depth; sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */ sctx->db_depth_clear = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); } if (buffers & PIPE_CLEAR_STENCIL) { stencil &= 0xff; /* Need to disable EXPCLEAR temporarily if clearing * to a new value. */ if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) { sctx->db_stencil_disable_expclear = true; } zstex->stencil_clear_value = stencil; sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */ sctx->db_stencil_clear = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); } } si_blitter_begin(ctx, SI_CLEAR); util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb), buffers, color, depth, stencil); si_blitter_end(ctx); if (sctx->db_depth_clear) { sctx->db_depth_clear = false; sctx->db_depth_disable_expclear = false; zstex->depth_cleared = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); } if (sctx->db_stencil_clear) { sctx->db_stencil_clear = false; sctx->db_stencil_disable_expclear = false; zstex->stencil_cleared = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); } }
/* Helper function for si_blit_decompress_zs_in_place. */ static void si_blit_decompress_zs_planes_in_place(struct si_context *sctx, struct r600_texture *texture, unsigned planes, unsigned level_mask, unsigned first_layer, unsigned last_layer) { struct pipe_surface *zsurf, surf_tmpl = {{0}}; unsigned layer, max_layer, checked_last_layer; unsigned fully_decompressed_mask = 0; if (!level_mask) return; if (planes & PIPE_MASK_S) sctx->db_flush_stencil_inplace = true; if (planes & PIPE_MASK_Z) sctx->db_flush_depth_inplace = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; while (level_mask) { unsigned level = u_bit_scan(&level_mask); surf_tmpl.u.tex.level = level; /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&texture->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); for (layer = first_layer; layer <= checked_last_layer; layer++) { surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl); si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0, sctx->custom_dsa_flush, 1.0f); si_blitter_end(&sctx->b.b); pipe_surface_reference(&zsurf, NULL); } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { fully_decompressed_mask |= 1u << level; } } if (planes & PIPE_MASK_Z) texture->dirty_level_mask &= ~fully_decompressed_mask; if (planes & PIPE_MASK_S) texture->stencil_dirty_level_mask &= ~fully_decompressed_mask; sctx->db_flush_depth_inplace = false; sctx->db_flush_stencil_inplace = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); }
static void si_blit_decompress_depth(struct pipe_context *ctx, struct r600_texture *texture, struct r600_texture *staging, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned first_sample, unsigned last_sample) { struct si_context *sctx = (struct si_context *)ctx; unsigned layer, level, sample, checked_last_layer, max_layer; float depth = 1.0f; const struct util_format_description *desc; assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); desc = util_format_description(staging->resource.b.b.format); if (util_format_has_depth(desc)) sctx->dbcb_depth_copy_enabled = true; if (util_format_has_stencil(desc)) sctx->dbcb_stencil_copy_enabled = true; assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); for (level = first_level; level <= last_level; level++) { /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&texture->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); for (layer = first_layer; layer <= checked_last_layer; layer++) { for (sample = first_sample; sample <= last_sample; sample++) { struct pipe_surface *zsurf, *cbsurf, surf_tmpl; sctx->dbcb_copy_sample = sample; si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl); surf_tmpl.format = staging->resource.b.b.format; cbsurf = ctx->create_surface(ctx, (struct pipe_resource*)staging, &surf_tmpl); si_blitter_begin(ctx, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, sctx->custom_dsa_flush, depth); si_blitter_end(ctx); pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); } } } sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); }
static void si_blit_decompress_depth(struct pipe_context *ctx, struct r600_texture *texture, struct r600_texture *staging, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned first_sample, unsigned last_sample) { struct si_context *sctx = (struct si_context *)ctx; unsigned layer, level, sample, checked_last_layer, max_layer, max_sample; float depth = 1.0f; const struct util_format_description *desc; struct r600_texture *flushed_depth_texture = staging ? staging : texture->flushed_depth_texture; if (!staging && !texture->dirty_level_mask) return; max_sample = u_max_sample(&texture->resource.b.b); desc = util_format_description(flushed_depth_texture->resource.b.b.format); if (util_format_has_depth(desc)) sctx->dbcb_depth_copy_enabled = true; if (util_format_has_stencil(desc)) sctx->dbcb_stencil_copy_enabled = true; assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); for (level = first_level; level <= last_level; level++) { if (!staging && !(texture->dirty_level_mask & (1 << level))) continue; /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&texture->resource.b.b, level); checked_last_layer = last_layer < max_layer ? last_layer : max_layer; for (layer = first_layer; layer <= checked_last_layer; layer++) { for (sample = first_sample; sample <= last_sample; sample++) { struct pipe_surface *zsurf, *cbsurf, surf_tmpl; sctx->dbcb_copy_sample = sample; si_mark_atom_dirty(sctx, &sctx->db_render_state); surf_tmpl.format = texture->resource.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = ctx->create_surface(ctx, &texture->resource.b.b, &surf_tmpl); surf_tmpl.format = flushed_depth_texture->resource.b.b.format; cbsurf = ctx->create_surface(ctx, (struct pipe_resource*)flushed_depth_texture, &surf_tmpl); si_blitter_begin(ctx, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, sctx->custom_dsa_flush, depth); si_blitter_end(ctx); pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); } } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case can occur though. */ if (!staging && first_layer == 0 && last_layer == max_layer && first_sample == 0 && last_sample == max_sample) { texture->dirty_level_mask &= ~(1 << level); } } sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); }
static unsigned si_blit_dbcb_copy(struct si_context *sctx, struct r600_texture *src, struct r600_texture *dst, unsigned planes, unsigned level_mask, unsigned first_layer, unsigned last_layer, unsigned first_sample, unsigned last_sample) { struct pipe_surface surf_tmpl = {{0}}; unsigned layer, sample, checked_last_layer, max_layer; unsigned fully_copied_levels = 0; if (planes & PIPE_MASK_Z) sctx->dbcb_depth_copy_enabled = true; if (planes & PIPE_MASK_S) sctx->dbcb_stencil_copy_enabled = true; si_mark_atom_dirty(sctx, &sctx->db_render_state); assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled); while (level_mask) { unsigned level = u_bit_scan(&level_mask); /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&src->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); surf_tmpl.u.tex.level = level; for (layer = first_layer; layer <= checked_last_layer; layer++) { struct pipe_surface *zsurf, *cbsurf; surf_tmpl.format = src->resource.b.b.format; surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl); surf_tmpl.format = dst->resource.b.b.format; cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl); for (sample = first_sample; sample <= last_sample; sample++) { if (sample != sctx->dbcb_copy_sample) { sctx->dbcb_copy_sample = sample; si_mark_atom_dirty(sctx, &sctx->db_render_state); } si_blitter_begin(&sctx->b.b, SI_DECOMPRESS); util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample, sctx->custom_dsa_flush, 1.0f); si_blitter_end(&sctx->b.b); } pipe_surface_reference(&zsurf, NULL); pipe_surface_reference(&cbsurf, NULL); } if (first_layer == 0 && last_layer >= max_layer && first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b)) fully_copied_levels |= 1u << level; } sctx->dbcb_depth_copy_enabled = false; sctx->dbcb_stencil_copy_enabled = false; si_mark_atom_dirty(sctx, &sctx->db_render_state); return fully_copied_levels; }
static void si_clear(struct pipe_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil) { struct si_context *sctx = (struct si_context *)ctx; struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; struct pipe_surface *zsbuf = fb->zsbuf; struct si_texture *zstex = zsbuf ? (struct si_texture*)zsbuf->texture : NULL; if (buffers & PIPE_CLEAR_COLOR) { si_do_fast_color_clear(sctx, &buffers, color); if (!buffers) return; /* all buffers have been fast cleared */ /* These buffers cannot use fast clear, make sure to disable expansion. */ for (unsigned i = 0; i < fb->nr_cbufs; i++) { struct si_texture *tex; /* If not clearing this buffer, skip. */ if (!(buffers & (PIPE_CLEAR_COLOR0 << i)) || !fb->cbufs[i]) continue; tex = (struct si_texture *)fb->cbufs[i]->texture; if (tex->surface.fmask_size == 0) tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level); } } if (zstex && si_htile_enabled(zstex, zsbuf->u.tex.level) && zsbuf->u.tex.first_layer == 0 && zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) { /* TC-compatible HTILE only supports depth clears to 0 or 1. */ if (buffers & PIPE_CLEAR_DEPTH && (!zstex->tc_compatible_htile || depth == 0 || depth == 1)) { /* Need to disable EXPCLEAR temporarily if clearing * to a new value. */ if (!zstex->depth_cleared || zstex->depth_clear_value != depth) { sctx->db_depth_disable_expclear = true; } if (zstex->depth_clear_value != (float)depth) { /* Update DB_DEPTH_CLEAR. */ zstex->depth_clear_value = depth; sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); } sctx->db_depth_clear = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); } /* TC-compatible HTILE only supports stencil clears to 0. */ if (buffers & PIPE_CLEAR_STENCIL && (!zstex->tc_compatible_htile || stencil == 0)) { stencil &= 0xff; /* Need to disable EXPCLEAR temporarily if clearing * to a new value. */ if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) { sctx->db_stencil_disable_expclear = true; } if (zstex->stencil_clear_value != (uint8_t)stencil) { /* Update DB_STENCIL_CLEAR. */ zstex->stencil_clear_value = stencil; sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); } sctx->db_stencil_clear = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); } /* TODO: Find out what's wrong here. Fast depth clear leads to * corruption in ARK: Survival Evolved, but that may just be * a coincidence and the root cause is elsewhere. * * The corruption can be fixed by putting the DB flush before * or after the depth clear. (surprisingly) * * https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace) * * This hack decreases back-to-back ClearDepth performance. */ if ((sctx->db_depth_clear || sctx->db_stencil_clear) && sctx->screen->options.clear_db_cache_before_clear) sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB; } si_blitter_begin(sctx, SI_CLEAR); util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb), buffers, color, depth, stencil); si_blitter_end(sctx); if (sctx->db_depth_clear) { sctx->db_depth_clear = false; sctx->db_depth_disable_expclear = false; zstex->depth_cleared = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); } if (sctx->db_stencil_clear) { sctx->db_stencil_clear = false; sctx->db_stencil_disable_expclear = false; zstex->stencil_cleared = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); } }
static void si_do_fast_color_clear(struct si_context *sctx, unsigned *buffers, const union pipe_color_union *color) { struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; int i; /* This function is broken in BE, so just disable this path for now */ #ifdef PIPE_ARCH_BIG_ENDIAN return; #endif if (sctx->render_cond) return; for (i = 0; i < fb->nr_cbufs; i++) { struct si_texture *tex; unsigned clear_bit = PIPE_CLEAR_COLOR0 << i; if (!fb->cbufs[i]) continue; /* if this colorbuffer is not being cleared */ if (!(*buffers & clear_bit)) continue; unsigned level = fb->cbufs[i]->u.tex.level; if (level > 0) continue; tex = (struct si_texture *)fb->cbufs[i]->texture; /* TODO: GFX9: Implement DCC fast clear for level 0 of * mipmapped textures. Mipmapped DCC has to clear a rectangular * area of DCC for level 0 (because the whole miptree is * organized in a 2D plane). */ if (sctx->chip_class >= GFX9 && tex->buffer.b.b.last_level > 0) continue; /* the clear is allowed if all layers are bound */ if (fb->cbufs[i]->u.tex.first_layer != 0 || fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->buffer.b.b, 0)) { continue; } /* only supported on tiled surfaces */ if (tex->surface.is_linear) { continue; } /* shared textures can't use fast clear without an explicit flush, * because there is no way to communicate the clear color among * all clients */ if (tex->buffer.b.is_shared && !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) continue; if (sctx->chip_class <= GFX8 && tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && !sctx->screen->info.htile_cmask_support_1d_tiling) continue; /* Use a slow clear for small surfaces where the cost of * the eliminate pass can be higher than the benefit of fast * clear. The closed driver does this, but the numbers may differ. * * This helps on both dGPUs and APUs, even small APUs like Mullins. */ bool too_small = tex->buffer.b.b.nr_samples <= 1 && tex->buffer.b.b.width0 * tex->buffer.b.b.height0 <= 512 * 512; bool eliminate_needed = false; bool fmask_decompress_needed = false; /* Fast clear is the most appropriate place to enable DCC for * displayable surfaces. */ if (sctx->family == CHIP_STONEY && !too_small) { vi_separate_dcc_try_enable(sctx, tex); /* RB+ isn't supported with a CMASK clear only on Stoney, * so all clears are considered to be hypothetically slow * clears, which is weighed when determining whether to * enable separate DCC. */ if (tex->dcc_gather_statistics) /* only for Stoney */ tex->num_slow_clears++; } /* Try to clear DCC first, otherwise try CMASK. */ if (vi_dcc_enabled(tex, 0)) { uint32_t reset_value; if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR)) continue; /* This can happen with mipmapping or MSAA. */ if (sctx->chip_class == GFX8 && !tex->surface.u.legacy.level[level].dcc_fast_clear_size) continue; if (!vi_get_fast_clear_parameters(tex->buffer.b.b.format, fb->cbufs[i]->format, color, &reset_value, &eliminate_needed)) continue; if (eliminate_needed && too_small) continue; /* DCC fast clear with MSAA should clear CMASK to 0xC. */ if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) { /* TODO: This doesn't work with MSAA. */ if (eliminate_needed) continue; uint32_t clear_value = 0xCCCCCCCC; si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, &clear_value, 4, SI_COHERENCY_CB_META, false); fmask_decompress_needed = true; } vi_dcc_clear_level(sctx, tex, 0, reset_value); tex->separate_dcc_dirty = true; } else { if (too_small) continue; /* 128-bit formats are unusupported */ if (tex->surface.bpe > 8) { continue; } /* RB+ doesn't work with CMASK fast clear on Stoney. */ if (sctx->family == CHIP_STONEY) continue; /* ensure CMASK is enabled */ si_alloc_separate_cmask(sctx->screen, tex); if (!tex->cmask_buffer) continue; /* Do the fast clear. */ uint32_t clear_value = 0; si_clear_buffer(sctx, &tex->cmask_buffer->b.b, tex->cmask_offset, tex->surface.cmask_size, &clear_value, 4, SI_COHERENCY_CB_META, false); eliminate_needed = true; } if ((eliminate_needed || fmask_decompress_needed) && !(tex->dirty_level_mask & (1 << level))) { tex->dirty_level_mask |= 1 << level; p_atomic_inc(&sctx->screen->compressed_colortex_counter); } /* We can change the micro tile mode before a full clear. */ si_set_optimal_micro_tile_mode(sctx->screen, tex); *buffers &= ~clear_bit; /* Chips with DCC constant encoding don't need to set the clear * color registers for DCC clear values 0 and 1. */ if (sctx->screen->has_dcc_constant_encode && !eliminate_needed) continue; if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) { sctx->framebuffer.dirty_cbufs |= 1 << i; si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); } } }