static void si_blit_decompress_color(struct pipe_context *ctx, struct r600_texture *rtex, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, bool need_dcc_decompress) { struct si_context *sctx = (struct si_context *)ctx; void* custom_blend; unsigned layer, checked_last_layer, max_layer; unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); if (!need_dcc_decompress) level_mask &= rtex->dirty_level_mask; if (!level_mask) return; if (rtex->dcc_offset && need_dcc_decompress) { custom_blend = sctx->custom_blend_dcc_decompress; } else if (rtex->fmask.size) { custom_blend = sctx->custom_blend_decompress; } else { custom_blend = sctx->custom_blend_fastclear; } while (level_mask) { unsigned level = u_bit_scan(&level_mask); /* The smaller the mipmap level, the less layers there are * as far as 3D textures are concerned. */ max_layer = util_max_layer(&rtex->resource.b.b, level); checked_last_layer = MIN2(last_layer, max_layer); for (layer = first_layer; layer <= checked_last_layer; layer++) { struct pipe_surface *cbsurf, surf_tmpl; surf_tmpl.format = rtex->resource.b.b.format; surf_tmpl.u.tex.level = level; surf_tmpl.u.tex.first_layer = layer; surf_tmpl.u.tex.last_layer = layer; cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl); si_blitter_begin(ctx, SI_DECOMPRESS); util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend); si_blitter_end(ctx); pipe_surface_reference(&cbsurf, NULL); } /* The texture will always be dirty if some layers aren't flushed. * I don't think this case occurs often though. */ if (first_layer == 0 && last_layer == max_layer) { rtex->dirty_level_mask &= ~(1 << level); } } }
/* Decompress Z and/or S planes in place, depending on mask. */ static void si_blit_decompress_zs_in_place(struct si_context *sctx, struct r600_texture *texture, unsigned planes, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); unsigned cur_level_mask; /* First, do combined Z & S decompresses for levels that need it. */ if (planes == (PIPE_MASK_Z | PIPE_MASK_S)) { cur_level_mask = level_mask & texture->dirty_level_mask & texture->stencil_dirty_level_mask; si_blit_decompress_zs_planes_in_place( sctx, texture, PIPE_MASK_Z | PIPE_MASK_S, cur_level_mask, first_layer, last_layer); level_mask &= ~cur_level_mask; } /* Now do separate Z and S decompresses. */ if (planes & PIPE_MASK_Z) { cur_level_mask = level_mask & texture->dirty_level_mask; si_blit_decompress_zs_planes_in_place( sctx, texture, PIPE_MASK_Z, cur_level_mask, first_layer, last_layer); level_mask &= ~cur_level_mask; } if (planes & PIPE_MASK_S) { cur_level_mask = level_mask & texture->stencil_dirty_level_mask; si_blit_decompress_zs_planes_in_place( sctx, texture, PIPE_MASK_S, cur_level_mask, first_layer, last_layer); } }
static void si_blit_decompress_depth(struct pipe_context *ctx, struct r600_texture *texture, struct r600_texture *staging, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned first_sample, unsigned last_sample) { const struct util_format_description *desc; unsigned planes = 0; assert(staging != NULL && "use si_blit_decompress_zs_in_place instead"); desc = util_format_description(staging->resource.b.b.format); if (util_format_has_depth(desc)) planes |= PIPE_MASK_Z; if (util_format_has_stencil(desc)) planes |= PIPE_MASK_S; si_blit_dbcb_copy( (struct si_context *)ctx, texture, staging, planes, u_bit_consecutive(first_level, last_level - first_level + 1), first_layer, last_layer, first_sample, last_sample); }
/* Create a compute shader implementing clear_buffer or copy_buffer. */ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy) { assert(util_is_power_of_two_nonzero(num_dwords_per_thread)); unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT; if (dst_stream_cache_policy) store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY; /* Don't cache loads, because there is no reuse. */ unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY; unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4); unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned)); for (unsigned i = 0; i < num_mem_ops; i++) { if (i*4 < num_dwords_per_thread) inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4); } struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); if (!ureg) return NULL; ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); struct ureg_src value; if (!is_copy) { ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]); value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0); } struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false)); struct ureg_src srcbuf; struct ureg_src *values = NULL; if (is_copy) { srcbuf = ureg_DECL_buffer(ureg, 1, false); values = malloc(num_mem_ops * sizeof(struct ureg_src)); } /* If there are multiple stores, the first store writes into 0+tid, * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc. */ ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid); /* Convert from a "store size unit" into bytes. */ ureg_UMUL(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[0])); ureg_MOV(ureg, load_addr, ureg_src(store_addr)); /* Distance between a load and a store for latency hiding. */ unsigned load_store_distance = is_copy ? 8 : 0; for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) { int d = i - load_store_distance; if (is_copy && i < num_mem_ops) { if (i) { ureg_UADD(ureg, load_addr, ureg_src(load_addr), ureg_imm1u(ureg, 4 * inst_dwords[i] * 64)); } values[i] = ureg_src(ureg_DECL_temporary(ureg)); struct ureg_dst dst = ureg_writemask(ureg_dst(values[i]), u_bit_consecutive(0, inst_dwords[i])); struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)}; ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2, load_qualifier, TGSI_TEXTURE_BUFFER, 0); } if (d >= 0) { if (d) { ureg_UADD(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[d] * 64)); } struct ureg_dst dst = ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d])); struct ureg_src srcs[] = {ureg_src(store_addr), is_copy ? values[d] : value}; ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2, store_qualifier, TGSI_TEXTURE_BUFFER, 0); } } ureg_END(ureg); struct pipe_compute_state state = {}; state.ir_type = PIPE_SHADER_IR_TGSI; state.prog = ureg_get_tokens(ureg, NULL); void *cs = ctx->create_compute_state(ctx, &state); ureg_destroy(ureg); free(values); return cs; }
void si_pm4_reset_emitted(struct si_context *sctx) { memset(&sctx->emitted, 0, sizeof(sctx->emitted)); sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES); }
static void si_flush_depth_texture(struct si_context *sctx, struct r600_texture *tex, unsigned required_planes, unsigned first_level, unsigned last_level, unsigned first_layer, unsigned last_layer) { unsigned inplace_planes = 0; unsigned copy_planes = 0; unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1); unsigned levels_z = 0; unsigned levels_s = 0; if (required_planes & PIPE_MASK_Z) { levels_z = level_mask & tex->dirty_level_mask; if (levels_z) { if (r600_can_sample_zs(tex, false)) inplace_planes |= PIPE_MASK_Z; else copy_planes |= PIPE_MASK_Z; } } if (required_planes & PIPE_MASK_S) { levels_s = level_mask & tex->stencil_dirty_level_mask; if (levels_s) { if (r600_can_sample_zs(tex, true)) inplace_planes |= PIPE_MASK_S; else copy_planes |= PIPE_MASK_S; } } /* We may have to allocate the flushed texture here when called from * si_decompress_subresource. */ if (copy_planes && (tex->flushed_depth_texture || r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) { struct r600_texture *dst = tex->flushed_depth_texture; unsigned fully_copied_levels; unsigned levels = 0; assert(tex->flushed_depth_texture); if (util_format_is_depth_and_stencil(dst->resource.b.b.format)) copy_planes = PIPE_MASK_Z | PIPE_MASK_S; if (copy_planes & PIPE_MASK_Z) { levels |= levels_z; levels_z = 0; } if (copy_planes & PIPE_MASK_S) { levels |= levels_s; levels_s = 0; } fully_copied_levels = si_blit_dbcb_copy( sctx, tex, dst, copy_planes, levels, first_layer, last_layer, 0, u_max_sample(&tex->resource.b.b)); if (copy_planes & PIPE_MASK_Z) tex->dirty_level_mask &= ~fully_copied_levels; if (copy_planes & PIPE_MASK_S) tex->stencil_dirty_level_mask &= ~fully_copied_levels; } if (inplace_planes) { si_blit_decompress_zs_in_place( sctx, tex, levels_z, levels_s, first_layer, last_layer); } }
/* Too complex to figure out, just check every time: */ static void check_program_state( struct st_context *st ) { struct gl_context *ctx = st->ctx; struct st_vertex_program *old_vp = st->vp; struct st_common_program *old_tcp = st->tcp; struct st_common_program *old_tep = st->tep; struct st_common_program *old_gp = st->gp; struct st_fragment_program *old_fp = st->fp; struct gl_program *new_vp = ctx->VertexProgram._Current; struct gl_program *new_tcp = ctx->TessCtrlProgram._Current; struct gl_program *new_tep = ctx->TessEvalProgram._Current; struct gl_program *new_gp = ctx->GeometryProgram._Current; struct gl_program *new_fp = ctx->FragmentProgram._Current; uint64_t dirty = 0; unsigned num_viewports = 1; /* Flag states used by both new and old shaders to unbind shader resources * properly when transitioning to shaders that don't use them. */ if (unlikely(new_vp != &old_vp->Base)) { if (old_vp) dirty |= old_vp->affected_states; if (new_vp) dirty |= ST_NEW_VERTEX_PROGRAM(st, st_vertex_program(new_vp)); } if (unlikely(new_tcp != &old_tcp->Base)) { if (old_tcp) dirty |= old_tcp->affected_states; if (new_tcp) dirty |= st_common_program(new_tcp)->affected_states; } if (unlikely(new_tep != &old_tep->Base)) { if (old_tep) dirty |= old_tep->affected_states; if (new_tep) dirty |= st_common_program(new_tep)->affected_states; } if (unlikely(new_gp != &old_gp->Base)) { if (old_gp) dirty |= old_gp->affected_states; if (new_gp) dirty |= st_common_program(new_gp)->affected_states; } if (unlikely(new_fp != &old_fp->Base)) { if (old_fp) dirty |= old_fp->affected_states; if (new_fp) dirty |= st_fragment_program(new_fp)->affected_states; } /* Find out the number of viewports. This determines how many scissors * and viewport states we need to update. */ struct gl_program *last_prim_shader = new_gp ? new_gp : new_tep ? new_tep : new_vp; if (last_prim_shader && last_prim_shader->info.outputs_written & VARYING_BIT_VIEWPORT) num_viewports = ctx->Const.MaxViewports; if (st->state.num_viewports != num_viewports) { st->state.num_viewports = num_viewports; dirty |= ST_NEW_VIEWPORT; if (ctx->Scissor.EnableFlags & u_bit_consecutive(0, num_viewports)) dirty |= ST_NEW_SCISSOR; } st->dirty |= dirty; }
static bool vi_get_fast_clear_parameters(enum pipe_format base_format, enum pipe_format surface_format, const union pipe_color_union *color, uint32_t* clear_value, bool *eliminate_needed) { /* If we want to clear without needing a fast clear eliminate step, we * can set color and alpha independently to 0 or 1 (or 0/max for integer * formats). */ bool values[4] = {}; /* whether to clear to 0 or 1 */ bool color_value = false; /* clear color to 0 or 1 */ bool alpha_value = false; /* clear alpha to 0 or 1 */ int alpha_channel; /* index of the alpha component */ bool has_color = false; bool has_alpha = false; const struct util_format_description *desc = util_format_description(si_simplify_cb_format(surface_format)); /* 128-bit fast clear with different R,G,B values is unsupported. */ if (desc->block.bits == 128 && (color->ui[0] != color->ui[1] || color->ui[0] != color->ui[2])) return false; *eliminate_needed = true; *clear_value = DCC_CLEAR_COLOR_REG; if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return true; /* need ELIMINATE_FAST_CLEAR */ bool base_alpha_is_on_msb = vi_alpha_is_on_msb(base_format); bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(surface_format); /* Formats with 3 channels can't have alpha. */ if (desc->nr_channels == 3) alpha_channel = -1; else if (surf_alpha_is_on_msb) alpha_channel = desc->nr_channels - 1; else alpha_channel = 0; for (int i = 0; i < 4; ++i) { if (desc->swizzle[i] >= PIPE_SWIZZLE_0) continue; if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { /* Use the maximum value for clamping the clear color. */ int max = u_bit_consecutive(0, desc->channel[i].size - 1); values[i] = color->i[i] != 0; if (color->i[i] != 0 && MIN2(color->i[i], max) != max) return true; /* need ELIMINATE_FAST_CLEAR */ } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { /* Use the maximum value for clamping the clear color. */ unsigned max = u_bit_consecutive(0, desc->channel[i].size); values[i] = color->ui[i] != 0U; if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max) return true; /* need ELIMINATE_FAST_CLEAR */ } else { values[i] = color->f[i] != 0.0F; if (color->f[i] != 0.0F && color->f[i] != 1.0F) return true; /* need ELIMINATE_FAST_CLEAR */ } if (desc->swizzle[i] == alpha_channel) { alpha_value = values[i]; has_alpha = true; } else { color_value = values[i]; has_color = true; } } /* If alpha isn't present, make it the same as color, and vice versa. */ if (!has_alpha) alpha_value = color_value; else if (!has_color) color_value = alpha_value; if (color_value != alpha_value && base_alpha_is_on_msb != surf_alpha_is_on_msb) return true; /* require ELIMINATE_FAST_CLEAR */ /* Check if all color values are equal if they are present. */ for (int i = 0; i < 4; ++i) { if (desc->swizzle[i] <= PIPE_SWIZZLE_W && desc->swizzle[i] != alpha_channel && values[i] != color_value) return true; /* require ELIMINATE_FAST_CLEAR */ } /* This doesn't need ELIMINATE_FAST_CLEAR. * On chips predating Raven2, the DCC clear codes and the CB clear * color registers must match. */ *eliminate_needed = false; if (color_value) { if (alpha_value) *clear_value = DCC_CLEAR_COLOR_1111; else *clear_value = DCC_CLEAR_COLOR_1110; } else { if (alpha_value) *clear_value = DCC_CLEAR_COLOR_0001; else *clear_value = DCC_CLEAR_COLOR_0000; } return true; }