static nir_shader *
build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
{
	nir_builder b;
	char name[64];
	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
	const struct glsl_type *vec4 = glsl_vec4_type();
	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
								 false,
								 false,
								 GLSL_TYPE_FLOAT);

	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
	b.shader->info.name = ralloc_strdup(b.shader, name);

	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
						      sampler_type, "s_tex");
	input_img->data.descriptor_set = 0;
	input_img->data.binding = 0;

	nir_variable *fs_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "fs_pos_in");
	fs_pos_in->data.location = VARYING_SLOT_POS;

	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
						      vec4, "f_color");
	color_out->data.location = FRAG_RESULT_DATA0;

	nir_ssa_def *pos_in = nir_load_var(&b, fs_pos_in);
	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	nir_intrinsic_set_base(src_offset, 0);
	nir_intrinsic_set_range(src_offset, 8);
	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
	src_offset->num_components = 2;
	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
	nir_builder_instr_insert(&b, &src_offset->instr);

	nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);

	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
	                                    color, img_coord);

	nir_ssa_def *outval = nir_load_var(&b, color);
	nir_store_var(&b, color_out, outval, 0xf);
	return b.shader;
}
static nir_ssa_def *
get_transform(lower_wpos_ytransform_state *state)
{
    if (state->transform == NULL) {
        /* NOTE: name must be prefixed w/ "gl_" to trigger slot based
         * special handling in uniform setup:
         */
        nir_variable *var = nir_variable_create(state->shader,
                                                nir_var_uniform,
                                                glsl_vec4_type(),
                                                "gl_FbWposYTransform");

        var->num_state_slots = 1;
        var->state_slots = ralloc_array(var, nir_state_slot, 1);
        var->state_slots[0].swizzle = SWIZZLE_XYZW;
        memcpy(var->state_slots[0].tokens, state->options->state_tokens,
               sizeof(var->state_slots[0].tokens));

        state->transform = var;
    }
    return nir_load_var(&state->b, state->transform);
}
static nir_shader *
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int samples)
{
	nir_builder b;
	char name[64];
	nir_if *outer_if = NULL;
	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
								 false,
								 false,
								 GLSL_TYPE_FLOAT);
	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
							     false,
							     false,
							     GLSL_TYPE_FLOAT);
	snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
	b.shader->info->name = ralloc_strdup(b.shader, name);
	b.shader->info->cs.local_size[0] = 16;
	b.shader->info->cs.local_size[1] = 16;
	b.shader->info->cs.local_size[2] = 1;

	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
						      sampler_type, "s_tex");
	input_img->data.descriptor_set = 0;
	input_img->data.binding = 0;

	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
						       img_type, "out_img");
	output_img->data.descriptor_set = 0;
	output_img->data.binding = 1;
	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
	nir_ssa_def *block_size = nir_imm_ivec4(&b,
						b.shader->info->cs.local_size[0],
						b.shader->info->cs.local_size[1],
						b.shader->info->cs.local_size[2], 0);

	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
	src_offset->num_components = 2;
	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
	nir_builder_instr_insert(&b, &src_offset->instr);

	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
	dst_offset->num_components = 2;
	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
	nir_builder_instr_insert(&b, &dst_offset->instr);

	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
	/* do a txf_ms on each sample */
	nir_ssa_def *tmp;

	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
	tex->op = nir_texop_txf_ms;
	tex->src[0].src_type = nir_tex_src_coord;
	tex->src[0].src = nir_src_for_ssa(img_coord);
	tex->src[1].src_type = nir_tex_src_ms_index;
	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
	tex->dest_type = nir_type_float;
	tex->is_array = false;
	tex->coord_components = 2;
	tex->texture = nir_deref_var_create(tex, input_img);
	tex->sampler = NULL;

	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
	nir_builder_instr_insert(&b, &tex->instr);

	tmp = &tex->dest.ssa;
	nir_variable *color =
		nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

	if (!is_integer && samples > 1) {
		nir_tex_instr *tex_all_same = nir_tex_instr_create(b.shader, 1);
		tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
		tex_all_same->op = nir_texop_samples_identical;
		tex_all_same->src[0].src_type = nir_tex_src_coord;
		tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
		tex_all_same->dest_type = nir_type_float;
		tex_all_same->is_array = false;
		tex_all_same->coord_components = 2;
		tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
		tex_all_same->sampler = NULL;

		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
		nir_builder_instr_insert(&b, &tex_all_same->instr);

		nir_ssa_def *all_same = nir_ine(&b, &tex_all_same->dest.ssa, nir_imm_int(&b, 0));
		nir_if *if_stmt = nir_if_create(b.shader);
		if_stmt->condition = nir_src_for_ssa(all_same);
		nir_cf_node_insert(b.cursor, &if_stmt->cf_node);

		b.cursor = nir_after_cf_list(&if_stmt->then_list);
		for (int i = 1; i < samples; i++) {
			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 2);
			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
			tex_add->op = nir_texop_txf_ms;
			tex_add->src[0].src_type = nir_tex_src_coord;
			tex_add->src[0].src = nir_src_for_ssa(img_coord);
			tex_add->src[1].src_type = nir_tex_src_ms_index;
			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
			tex_add->dest_type = nir_type_float;
			tex_add->is_array = false;
			tex_add->coord_components = 2;
			tex_add->texture = nir_deref_var_create(tex_add, input_img);
			tex_add->sampler = NULL;

			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
			nir_builder_instr_insert(&b, &tex_add->instr);

			tmp = nir_fadd(&b, tmp, &tex_add->dest.ssa);
		}

		tmp = nir_fdiv(&b, tmp, nir_imm_float(&b, samples));
		nir_store_var(&b, color, tmp, 0xf);
		b.cursor = nir_after_cf_list(&if_stmt->else_list);
		outer_if = if_stmt;
	}
	nir_store_var(&b, color, &tex->dest.ssa, 0xf);

	if (outer_if)
		b.cursor = nir_after_cf_node(&outer_if->cf_node);

	nir_ssa_def *newv = nir_load_var(&b, color);
	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
	store->src[0] = nir_src_for_ssa(coord);
	store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
	store->src[2] = nir_src_for_ssa(newv);
	store->variables[0] = nir_deref_var_create(store, output_img);
	nir_builder_instr_insert(&b, &store->instr);
	return b.shader;
}
/* see emit_wpos_adjustment() in st_mesa_to_tgsi.c */
static void
emit_wpos_adjustment(lower_wpos_ytransform_state *state,
                     nir_intrinsic_instr *intr,
                     bool invert, float adjX, float adjY[2])
{
    nir_builder *b = &state->b;
    nir_variable *fragcoord = intr->variables[0]->var;
    nir_ssa_def *wpostrans, *wpos_temp, *wpos_temp_y, *wpos_input;

    assert(intr->dest.is_ssa);

    b->cursor = nir_before_instr(&intr->instr);

    wpostrans = get_transform(state);
    wpos_input = nir_load_var(b, fragcoord);

    /* First, apply the coordinate shift: */
    if (adjX || adjY[0] || adjY[1]) {
        if (adjY[0] != adjY[1]) {
            /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
             * depending on whether inversion is actually going to be applied
             * or not, which is determined by testing against the inversion
             * state variable used below, which will be either +1 or -1.
             */
            nir_ssa_def *adj_temp;

            adj_temp = nir_cmp(b,
                               nir_channel(b, wpostrans, invert ? 2 : 0),
                               nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f),
                               nir_imm_vec4(b, adjX, adjY[1], 0.0f, 0.0f));

            wpos_temp = nir_fadd(b, wpos_input, adj_temp);
        } else {
            wpos_temp = nir_fadd(b,
                                 wpos_input,
                                 nir_imm_vec4(b, adjX, adjY[0], 0.0f, 0.0f));
        }
        wpos_input = wpos_temp;
    } else {
        /* MOV wpos_temp, input[wpos]
         */
        wpos_temp = wpos_input;
    }

    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
     * inversion/identity, or the other way around if we're drawing to an FBO.
     */
    if (invert) {
        /* wpos_temp.y = wpos_input * wpostrans.xxxx + wpostrans.yyyy */
        wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
                                           nir_channel(b, wpostrans, 0)),
                               nir_channel(b, wpostrans, 1));
    } else {
        /* wpos_temp.y = wpos_input * wpostrans.zzzz + wpostrans.wwww */
        wpos_temp_y = nir_fadd(b, nir_fmul(b, nir_channel(b, wpos_temp, 1),
                                           nir_channel(b, wpostrans, 2)),
                               nir_channel(b, wpostrans, 3));
    }

    wpos_temp = nir_vec4(b,
                         nir_channel(b, wpos_temp, 0),
                         wpos_temp_y,
                         nir_channel(b, wpos_temp, 2),
                         nir_channel(b, wpos_temp, 3));

    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(wpos_temp));
}
Exemple #5
0
static void *vc4_get_yuv_vs(struct pipe_context *pctx)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;

   if (vc4->yuv_linear_blit_vs)
           return vc4->yuv_linear_blit_vs;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_VERTEX);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
   b.shader->info.name = ralloc_strdup(b.shader, "linear_blit_vs");

   const struct glsl_type *vec4 = glsl_vec4_type();
   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");

   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
                                               vec4, "gl_Position");
   pos_out->data.location = VARYING_SLOT_POS;

   nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);

   return vc4->yuv_linear_blit_vs;
}

static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
{
   struct vc4_context *vc4 = vc4_context(pctx);
   struct pipe_screen *pscreen = pctx->screen;
   struct pipe_shader_state **cached_shader;
   const char *name;

   if (cpp == 1) {
           cached_shader = &vc4->yuv_linear_blit_fs_8bit;
           name = "linear_blit_8bit_fs";
   } else {
           cached_shader = &vc4->yuv_linear_blit_fs_16bit;
           name = "linear_blit_16bit_fs";
   }

   if (*cached_shader)
           return *cached_shader;

   const struct nir_shader_compiler_options *options =
           pscreen->get_compiler_options(pscreen,
                                         PIPE_SHADER_IR_NIR,
                                         PIPE_SHADER_FRAGMENT);

   nir_builder b;
   nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
   b.shader->info.name = ralloc_strdup(b.shader, name);

   const struct glsl_type *vec4 = glsl_vec4_type();
   const struct glsl_type *glsl_int = glsl_int_type();

   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
                                                 vec4, "f_color");
   color_out->data.location = FRAG_RESULT_COLOR;

   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
                                              vec4, "pos");
   pos_in->data.location = VARYING_SLOT_POS;
   nir_ssa_def *pos = nir_load_var(&b, pos_in);

   nir_ssa_def *one = nir_imm_int(&b, 1);
   nir_ssa_def *two = nir_imm_int(&b, 2);

   nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
   nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));

   nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
                                                 glsl_int, "stride");
   nir_ssa_def *stride = nir_load_var(&b, stride_in);

   nir_ssa_def *x_offset;
   nir_ssa_def *y_offset;
   if (cpp == 1) {
           nir_ssa_def *intra_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, one), two);
           nir_ssa_def *inter_utile_x_offset =
                   nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);

           x_offset = nir_iadd(&b,
                               intra_utile_x_offset,
                               inter_utile_x_offset);
           y_offset = nir_imul(&b,
                               nir_iadd(&b,
                                        nir_ishl(&b, y, one),
                                        nir_ushr(&b, nir_iand(&b, x, two), one)),
                               stride);
   } else {
           x_offset = nir_ishl(&b, x, two);
           y_offset = nir_imul(&b, y, stride);
   }

   nir_intrinsic_instr *load =
           nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
   load->num_components = 1;
   nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, 32, NULL);
   load->src[0] = nir_src_for_ssa(one);
   load->src[1] = nir_src_for_ssa(nir_iadd(&b, x_offset, y_offset));
   nir_builder_instr_insert(&b, &load->instr);

   nir_store_var(&b, color_out,
                 nir_unpack_unorm_4x8(&b, &load->dest.ssa),
                 0xf);

   struct pipe_shader_state shader_tmpl = {
           .type = PIPE_SHADER_IR_NIR,
           .ir.nir = b.shader,
   };

   *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);

   return *cached_shader;
}

static bool
vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_resource *src = vc4_resource(info->src.resource);
        struct vc4_resource *dst = vc4_resource(info->dst.resource);
        bool ok;

        if (src->tiled)
                return false;
        if (src->base.format != PIPE_FORMAT_R8_UNORM &&
            src->base.format != PIPE_FORMAT_R8G8_UNORM)
                return false;

        /* YUV blits always turn raster-order to tiled */
        assert(dst->base.format == src->base.format);
        assert(dst->tiled);

        /* Always 1:1 and at the origin */
        assert(info->src.box.x == 0 && info->dst.box.x == 0);
        assert(info->src.box.y == 0 && info->dst.box.y == 0);
        assert(info->src.box.width == info->dst.box.width);
        assert(info->src.box.height == info->dst.box.height);

        if ((src->slices[info->src.level].offset & 3) ||
            (src->slices[info->src.level].stride & 3)) {
                perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
                           src->slices[info->src.level].offset,
                           src->slices[info->src.level].stride);
                goto fallback;
        }

        vc4_blitter_save(vc4);

        /* Create a renderable surface mapping the T-tiled shadow buffer.
         */
        struct pipe_surface dst_tmpl;
        util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
                                         info->dst.level, info->dst.box.z);
        dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
        struct pipe_surface *dst_surf =
                pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
        if (!dst_surf) {
                fprintf(stderr, "Failed to create YUV dst surface\n");
                util_blitter_unset_running_flag(vc4->blitter);
                return false;
        }
        dst_surf->width /= 2;
        if (dst->cpp == 1)
                dst_surf->height /= 2;

        /* Set the constant buffer. */
        uint32_t stride = src->slices[info->src.level].stride;
        struct pipe_constant_buffer cb_uniforms = {
                .user_buffer = &stride,
                .buffer_size = sizeof(stride),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, &cb_uniforms);
        struct pipe_constant_buffer cb_src = {
                .buffer = info->src.resource,
                .buffer_offset = src->slices[info->src.level].offset,
                .buffer_size = (src->bo->size -
                                src->slices[info->src.level].offset),
        };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_src);

        /* Unbind the textures, to make sure we don't try to recurse into the
         * shadow blit.
         */
        pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
        pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);

        util_blitter_custom_shader(vc4->blitter, dst_surf,
                                   vc4_get_yuv_vs(pctx),
                                   vc4_get_yuv_fs(pctx, src->cpp));

        util_blitter_restore_textures(vc4->blitter);
        util_blitter_restore_constant_buffer_state(vc4->blitter);
        /* Restore cb1 (util_blitter doesn't handle this one). */
        struct pipe_constant_buffer cb_disabled = { 0 };
        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, &cb_disabled);

        pipe_surface_reference(&dst_surf, NULL);

        return true;

fallback:
        /* Do an immediate SW fallback, since the render blit path
         * would just recurse.
         */
        ok = util_try_blit_via_copy_region(pctx, info);
        assert(ok); (void)ok;

        return true;
}

static bool
vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
{
        struct vc4_context *vc4 = vc4_context(ctx);

        if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
                fprintf(stderr, "blit unsupported %s -> %s\n",
                    util_format_short_name(info->src.resource->format),
                    util_format_short_name(info->dst.resource->format));
                return false;
        }

        /* Enable the scissor, so we get a minimal set of tiles rendered. */
        if (!info->scissor_enable) {
                info->scissor_enable = true;
                info->scissor.minx = info->dst.box.x;
                info->scissor.miny = info->dst.box.y;
                info->scissor.maxx = info->dst.box.x + info->dst.box.width;
                info->scissor.maxy = info->dst.box.y + info->dst.box.height;
        }

        vc4_blitter_save(vc4);
        util_blitter_blit(vc4->blitter, info);

        return true;
}

/* Optimal hardware path for blitting pixels.
 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
 */
void
vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
{
        struct pipe_blit_info info = *blit_info;

        if (vc4_yuv_blit(pctx, blit_info))
                return;

        if (vc4_tile_blit(pctx, blit_info))
                return;

        if (info.mask & PIPE_MASK_S) {
                if (util_try_blit_via_copy_region(pctx, &info))
                        return;

                info.mask &= ~PIPE_MASK_S;
                fprintf(stderr, "cannot blit stencil, skipping\n");
        }

        if (vc4_render_blit(pctx, &info))
                return;

        fprintf(stderr, "Unsupported blit\n");
}