Beispiel #1
0
static bool si_set_clear_color(struct si_texture *tex,
			       enum pipe_format surface_format,
			       const union pipe_color_union *color)
{
	union util_color uc;

	memset(&uc, 0, sizeof(uc));

	if (tex->surface.bpe == 16) {
		/* DCC fast clear only:
		 *   CLEAR_WORD0 = R = G = B
		 *   CLEAR_WORD1 = A
		 */
		assert(color->ui[0] == color->ui[1] &&
		       color->ui[0] == color->ui[2]);
		uc.ui[0] = color->ui[0];
		uc.ui[1] = color->ui[3];
	} else if (util_format_is_pure_uint(surface_format)) {
		util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
	} else if (util_format_is_pure_sint(surface_format)) {
		util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
	} else {
		util_pack_color(color->f, surface_format, &uc);
	}

	if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0)
		return false;

	memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t));
	return true;
}
Beispiel #2
0
/**
 * Helper function to set the fragment shaders.
 */
static inline void
set_fragment_shader(struct blit_state *ctx, uint writemask,
                    enum pipe_format format,
                    enum pipe_texture_target pipe_tex)
{
   enum tgsi_return_type stype;
   unsigned idx;

   if (util_format_is_pure_uint(format)) {
      stype = TGSI_RETURN_TYPE_UINT;
      idx = 0;
   } else if (util_format_is_pure_sint(format)) {
      stype = TGSI_RETURN_TYPE_SINT;
      idx = 1;
   } else {
      stype = TGSI_RETURN_TYPE_FLOAT;
      idx = 2;
   }

   if (!ctx->fs[pipe_tex][writemask][idx]) {
      unsigned tgsi_tex = util_pipe_tex_to_tgsi_tex(pipe_tex, 0);

      /* OpenGL does not allow blits from signed to unsigned integer
       * or vice versa. */
      ctx->fs[pipe_tex][writemask][idx] =
         util_make_fragment_tex_shader_writemask(ctx->pipe, tgsi_tex,
                                                 TGSI_INTERPOLATE_LINEAR,
                                                 writemask,
                                                 stype, stype);
   }

   cso_set_fragment_shader_handle(ctx->cso, ctx->fs[pipe_tex][writemask][idx]);
}
Beispiel #3
0
static bool
layout_want_mcs(struct ilo_layout *layout,
                struct ilo_layout_params *params)
{
   const struct pipe_resource *templ = params->templ;
   bool want_mcs = false;

   /* MCS is for RT on GEN7+ */
   if (ilo_dev_gen(params->dev) < ILO_GEN(7))
      return false;

   if (templ->target != PIPE_TEXTURE_2D ||
       !(templ->bind & PIPE_BIND_RENDER_TARGET))
      return false;

   /*
    * From the Ivy Bridge PRM, volume 4 part 1, page 77:
    *
    *     "For Render Target and Sampling Engine Surfaces:If the surface is
    *      multisampled (Number of Multisamples any value other than
    *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
    *
    *     "This field must be set to 0 for all SINT MSRTs when all RT channels
    *      are not written"
    */
   if (templ->nr_samples > 1 && !layout->interleaved_samples &&
       !util_format_is_pure_sint(templ->format)) {
      want_mcs = true;
   } else if (templ->nr_samples <= 1) {
      /*
       * From the Ivy Bridge PRM, volume 2 part 1, page 326:
       *
       *     "When MCS is buffer is used for color clear of non-multisampler
       *      render target, the following restrictions apply.
       *      - Support is limited to tiled render targets.
       *      - Support is for non-mip-mapped and non-array surface types
       *        only.
       *      - Clear is supported only on the full RT; i.e., no partial clear
       *        or overlapping clears.
       *      - MCS buffer for non-MSRT is supported only for RT formats
       *        32bpp, 64bpp and 128bpp.
       *      ..."
       */
      if (layout->tiling != INTEL_TILING_NONE &&
          templ->last_level == 0 && templ->array_size == 1) {
         switch (layout->block_size) {
         case 4:
         case 8:
         case 16:
            want_mcs = true;
            break;
         default:
            break;
         }
      }
   }

   return want_mcs;
}
Beispiel #4
0
static void evergreen_set_clear_color(struct pipe_surface *cbuf,
				      const union pipe_color_union *color)
{
	unsigned *clear_value = ((struct r600_texture *)cbuf->texture)->color_clear_value;
	union util_color uc;

	memset(&uc, 0, sizeof(uc));

	if (util_format_is_pure_uint(cbuf->format)) {
		util_format_write_4ui(cbuf->format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
	} else if (util_format_is_pure_sint(cbuf->format)) {
		util_format_write_4i(cbuf->format, color->i, 0, &uc, 0, 0, 0, 1, 1);
	} else {
		util_pack_color(color->f, cbuf->format, &uc);
	}

	memcpy(clear_value, &uc, 2 * sizeof(uint32_t));
}
static void evergreen_set_clear_color(struct r600_texture *rtex,
				      enum pipe_format surface_format,
				      const union pipe_color_union *color)
{
	union util_color uc;

	memset(&uc, 0, sizeof(uc));

	if (util_format_is_pure_uint(surface_format)) {
		util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
	} else if (util_format_is_pure_sint(surface_format)) {
		util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
	} else {
		util_pack_color(color->f, surface_format, &uc);
	}

	memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
}
Beispiel #6
0
/**
 * Set pixels in a tile to the given clear color/value, float.
 */
static void
clear_tile_rgba(struct softpipe_cached_tile *tile,
                enum pipe_format format,
                const union pipe_color_union *clear_value)
{
    if (clear_value->f[0] == 0.0 &&
            clear_value->f[1] == 0.0 &&
            clear_value->f[2] == 0.0 &&
            clear_value->f[3] == 0.0) {
        memset(tile->data.color, 0, sizeof(tile->data.color));
    }
    else {
        uint i, j;

        if (util_format_is_pure_uint(format)) {
            for (i = 0; i < TILE_SIZE; i++) {
                for (j = 0; j < TILE_SIZE; j++) {
                    tile->data.colorui128[i][j][0] = clear_value->ui[0];
                    tile->data.colorui128[i][j][1] = clear_value->ui[1];
                    tile->data.colorui128[i][j][2] = clear_value->ui[2];
                    tile->data.colorui128[i][j][3] = clear_value->ui[3];
                }
            }
        } else if (util_format_is_pure_sint(format)) {
            for (i = 0; i < TILE_SIZE; i++) {
                for (j = 0; j < TILE_SIZE; j++) {
                    tile->data.colori128[i][j][0] = clear_value->i[0];
                    tile->data.colori128[i][j][1] = clear_value->i[1];
                    tile->data.colori128[i][j][2] = clear_value->i[2];
                    tile->data.colori128[i][j][3] = clear_value->i[3];
                }
            }
        } else {
            for (i = 0; i < TILE_SIZE; i++) {
                for (j = 0; j < TILE_SIZE; j++) {
                    tile->data.color[i][j][0] = clear_value->f[0];
                    tile->data.color[i][j][1] = clear_value->f[1];
                    tile->data.color[i][j][2] = clear_value->f[2];
                    tile->data.color[i][j][3] = clear_value->f[3];
                }
            }
        }
    }
}
Beispiel #7
0
static void
sp_flush_tile(struct softpipe_tile_cache* tc, unsigned pos)
{
    if (!tc->tile_addrs[pos].bits.invalid) {
        if (tc->depth_stencil) {
            pipe_put_tile_raw(tc->pipe, tc->transfer,
                              tc->tile_addrs[pos].bits.x * TILE_SIZE,
                              tc->tile_addrs[pos].bits.y * TILE_SIZE,
                              TILE_SIZE, TILE_SIZE,
                              tc->entries[pos]->data.depth32, 0/*STRIDE*/);
        }
        else {
            if (util_format_is_pure_uint(tc->surface->format)) {
                pipe_put_tile_ui_format(tc->pipe, tc->transfer,
                                        tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                        tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                        TILE_SIZE, TILE_SIZE,
                                        tc->surface->format,
                                        (unsigned *) tc->entries[pos]->data.colorui128);
            } else if (util_format_is_pure_sint(tc->surface->format)) {
                pipe_put_tile_i_format(tc->pipe, tc->transfer,
                                       tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                       tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                       TILE_SIZE, TILE_SIZE,
                                       tc->surface->format,
                                       (int *) tc->entries[pos]->data.colori128);
            } else {
                pipe_put_tile_rgba_format(tc->pipe, tc->transfer,
                                          tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                          tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                          TILE_SIZE, TILE_SIZE,
                                          tc->surface->format,
                                          (float *) tc->entries[pos]->data.color);
            }
        }
        tc->tile_addrs[pos].bits.invalid = 1;  /* mark as empty */
    }
}
Beispiel #8
0
void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
				 int nr, struct pipe_surface **bufs)
{
	const struct ir3_shader_variant *vp, *fp;
	const struct ir3_info *vsi, *fsi;
	enum a3xx_instrbuffermode fpbuffer, vpbuffer;
	uint32_t fpbuffersz, vpbuffersz, fsoff;
	uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
	int constmode;
	int i, j, k;

	debug_assert(nr <= ARRAY_SIZE(color_regid));

	vp = fd3_emit_get_vp(emit);
	fp = fd3_emit_get_fp(emit);

	vsi = &vp->info;
	fsi = &fp->info;

	fpbuffer = BUFFER;
	vpbuffer = BUFFER;
	fpbuffersz = fp->instrlen;
	vpbuffersz = vp->instrlen;

	/*
	 * Decide whether to use BUFFER or CACHE mode for VS and FS.  It
	 * appears like 256 is the hard limit, but when the combined size
	 * exceeds 128 then blob will try to keep FS in BUFFER mode and
	 * switch to CACHE for VS until VS is too large.  The blob seems
	 * to switch FS out of BUFFER mode at slightly under 128.  But
	 * a bit fuzzy on the decision tree, so use slightly conservative
	 * limits.
	 *
	 * TODO check if these thresholds for BUFFER vs CACHE mode are the
	 *      same for all a3xx or whether we need to consider the gpuid
	 */

	if ((fpbuffersz + vpbuffersz) > 128) {
		if (fpbuffersz < 112) {
			/* FP:BUFFER   VP:CACHE  */
			vpbuffer = CACHE;
			vpbuffersz = 256 - fpbuffersz;
		} else if (vpbuffersz < 112) {
			/* FP:CACHE    VP:BUFFER */
			fpbuffer = CACHE;
			fpbuffersz = 256 - vpbuffersz;
		} else {
			/* FP:CACHE    VP:CACHE  */
			vpbuffer = fpbuffer = CACHE;
			vpbuffersz = fpbuffersz = 192;
		}
	}

	if (fpbuffer == BUFFER) {
		fsoff = 128 - fpbuffersz;
	} else {
		fsoff = 256 - fpbuffersz;
	}

	/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
	constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;

	pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
	posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
	psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
	if (fp->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
			ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
	}

	/* adjust regids for alpha output formats. there is no alpha render
	 * format, so it's just treated like red
	 */
	for (i = 0; i < nr; i++)
		if (util_format_is_alpha(pipe_surface_format(bufs[i])))
			color_regid[i] += 3;

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
			A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
			 * flush some caches? I think we only need to set those
			 * bits if we have updated const or shader..
			 */
			A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
			A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
			COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) |
					A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2))));
	OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
	OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
	OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
			A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
			A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
	OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
			A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
			A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));

	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
			COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
			A3XX_SP_SP_CTRL_REG_L0MODE(0));

	OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
	OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));

	OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
	OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
			A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
			COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
			A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
			A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
			A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
			A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
			A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
	OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
			A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
			A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
	OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
			A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
			A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));

	for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);

		j = ir3_next_varying(fp, j);
		if (j < fp->inputs_count) {
			k = ir3_find_output(vp, fp->inputs[j].slot);
			reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
			reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
		}

		j = ir3_next_varying(fp, j);
		if (j < fp->inputs_count) {
			k = ir3_find_output(vp, fp->inputs[j].slot);
			reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
			reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
		}

		OUT_RING(ring, reg);
	}

	for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);

		j = ir3_next_varying(fp, j);
		if (j < fp->inputs_count)
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc);
		j = ir3_next_varying(fp, j);
		if (j < fp->inputs_count)
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc);
		j = ir3_next_varying(fp, j);
		if (j < fp->inputs_count)
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc);
		j = ir3_next_varying(fp, j);
		if (j < fp->inputs_count)
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc);

		OUT_RING(ring, reg);
	}

	OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
	OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
			A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
	OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */

	if (emit->key.binning_pass) {
		OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
		OUT_RING(ring, 0x00000000);

		OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
		OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
				A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
		OUT_RING(ring, 0x00000000);

		OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
		OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
				A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
	} else {
		OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
		OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));

		OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
		OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
				A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
				COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
				A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
				A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
				A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
				A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
				A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
				COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
				A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
		OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
				A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
				A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
				A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));

		OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
		OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
					MAX2(128, vp->constlen)) |
				A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
		OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
	}

	OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
	OUT_RING(ring,
			 COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
			 A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
			 A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));

	OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
	for (i = 0; i < 4; i++) {
		uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
			COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);

		if (i < nr) {
			enum pipe_format fmt = pipe_surface_format(bufs[i]);
			mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
				COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
		}
		OUT_RING(ring, mrt_reg);
	}

	if (emit->key.binning_pass) {
		OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
		OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
				A3XX_VPC_ATTR_LMSIZE(1) |
				COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
		OUT_RING(ring, 0x00000000);
	} else {
		uint32_t vinterp[4], flatshade[2], vpsrepl[4];

		memset(vinterp, 0, sizeof(vinterp));
		memset(flatshade, 0, sizeof(flatshade));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
			/* NOTE: varyings are packed, so if compmask is 0xb
			 * then first, third, and fourth component occupy
			 * three consecutive varying slots:
			 */
			unsigned compmask = fp->inputs[j].compmask;

			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
			 * instead.. rather than -8 everywhere else..
			 */
			uint32_t inloc = fp->inputs[j].inloc - 8;

			if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
					(fp->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++) {
					if (compmask & (1 << i)) {
						vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
						flatshade[loc / 32] |= 1 << (loc % 32);
						loc++;
					}
				}
			}

			gl_varying_slot slot = fp->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					/* mask is two 2-bit fields, where:
					 *   '01' -> S
					 *   '10' -> T
					 *   '11' -> 1 - T  (flip mode)
					 */
					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
					uint32_t loc = inloc;
					if (compmask & 0x1) {
						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x2) {
						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x4) {
						/* .z <- 0.0f */
						vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x8) {
						/* .w <- 1.0f */
						vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
						loc++;
					}
				}
Beispiel #9
0
static void si_clear_texture(struct pipe_context *pipe,
			     struct pipe_resource *tex,
			     unsigned level,
			     const struct pipe_box *box,
			     const void *data)
{
	struct pipe_screen *screen = pipe->screen;
	struct si_texture *stex = (struct si_texture*)tex;
	struct pipe_surface tmpl = {{0}};
	struct pipe_surface *sf;
	const struct util_format_description *desc =
		util_format_description(tex->format);

	tmpl.format = tex->format;
	tmpl.u.tex.first_layer = box->z;
	tmpl.u.tex.last_layer = box->z + box->depth - 1;
	tmpl.u.tex.level = level;
	sf = pipe->create_surface(pipe, tex, &tmpl);
	if (!sf)
		return;

	if (stex->is_depth) {
		unsigned clear;
		float depth;
		uint8_t stencil = 0;

		/* Depth is always present. */
		clear = PIPE_CLEAR_DEPTH;
		desc->unpack_z_float(&depth, 0, data, 0, 1, 1);

		if (stex->surface.has_stencil) {
			clear |= PIPE_CLEAR_STENCIL;
			desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
		}

		si_clear_depth_stencil(pipe, sf, clear, depth, stencil,
				       box->x, box->y,
				       box->width, box->height, false);
	} else {
		union pipe_color_union color;

		/* pipe_color_union requires the full vec4 representation. */
		if (util_format_is_pure_uint(tex->format))
			desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
		else if (util_format_is_pure_sint(tex->format))
			desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
		else
			desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);

		if (screen->is_format_supported(screen, tex->format,
						tex->target, 0, 0,
						PIPE_BIND_RENDER_TARGET)) {
			si_clear_render_target(pipe, sf, &color,
					       box->x, box->y,
					       box->width, box->height, false);
		} else {
			/* Software fallback - just for R9G9B9E5_FLOAT */
			util_clear_render_target(pipe, sf, &color,
						 box->x, box->y,
						 box->width, box->height);
		}
	}
	pipe_surface_reference(&sf, NULL);
}
static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
					 const union pipe_color_union *color,
					 uint32_t* reset_value,
					 bool* clear_words_needed)
{
	bool values[4] = {};
	int i;
	bool main_value = false;
	bool extra_value = false;
	int extra_channel;
	const struct util_format_description *desc = util_format_description(surface_format);

	*clear_words_needed = true;
	*reset_value = 0x20202020U;

	/* If we want to clear without needing a fast clear eliminate step, we
	 * can set each channel to 0 or 1 (or 0/max for integer formats). We
	 * have two sets of flags, one for the last or first channel(extra) and
	 * one for the other channels(main).
	 */

	if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
	    surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
	    surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
		extra_channel = -1;
	} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
		if(r600_translate_colorswap(surface_format) <= 1)
			extra_channel = desc->nr_channels - 1;
		else
			extra_channel = 0;
	} else
		return;

	for (i = 0; i < 4; ++i) {
		int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X;

		if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X ||
		    desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W)
			continue;

		if (util_format_is_pure_sint(surface_format)) {
			values[i] = color->i[i] != 0;
			if (color->i[i] != 0 && color->i[i] != INT32_MAX)
				return;
		} else if (util_format_is_pure_uint(surface_format)) {
			values[i] = color->ui[i] != 0U;
			if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX)
				return;
		} else {
			values[i] = color->f[i] != 0.0F;
			if (color->f[i] != 0.0F && color->f[i] != 1.0F)
				return;
		}

		if (index == extra_channel)
			extra_value = values[i];
		else
			main_value = values[i];
	}

	for (int i = 0; i < 4; ++i)
		if (values[i] != main_value &&
		    desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel &&
		    desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X &&
		    desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W)
			return;

	*clear_words_needed = false;
	if (main_value)
		*reset_value |= 0x80808080U;

	if (extra_value)
		*reset_value |= 0x40404040U;
}
Beispiel #11
0
/**
 * Get a tile from the cache.
 * \param x, y  position of tile, in pixels
 */
struct softpipe_cached_tile *
sp_find_cached_tile(struct softpipe_tile_cache *tc,
                    union tile_address addr )
{
    struct pipe_transfer *pt = tc->transfer;
    /* cache pos/entry: */
    const int pos = CACHE_POS(addr.bits.x,
                              addr.bits.y);
    struct softpipe_cached_tile *tile = tc->entries[pos];

    if (!tile) {
        tile = sp_alloc_tile(tc);
        tc->entries[pos] = tile;
    }

    if (addr.value != tc->tile_addrs[pos].value) {

        assert(pt->resource);
        if (tc->tile_addrs[pos].bits.invalid == 0) {
            /* put dirty tile back in framebuffer */
            if (tc->depth_stencil) {
                pipe_put_tile_raw(tc->pipe, pt,
                                  tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                  tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                  TILE_SIZE, TILE_SIZE,
                                  tile->data.depth32, 0/*STRIDE*/);
            }
            else {
                if (util_format_is_pure_uint(tc->surface->format)) {
                    pipe_put_tile_ui_format(tc->pipe, pt,
                                            tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                            tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                            TILE_SIZE, TILE_SIZE,
                                            tc->surface->format,
                                            (unsigned *) tile->data.colorui128);
                } else if (util_format_is_pure_sint(tc->surface->format)) {
                    pipe_put_tile_i_format(tc->pipe, pt,
                                           tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                           tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                           TILE_SIZE, TILE_SIZE,
                                           tc->surface->format,
                                           (int *) tile->data.colori128);
                } else {
                    pipe_put_tile_rgba_format(tc->pipe, pt,
                                              tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                              tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                              TILE_SIZE, TILE_SIZE,
                                              tc->surface->format,
                                              (float *) tile->data.color);
                }
            }
        }

        tc->tile_addrs[pos] = addr;

        if (is_clear_flag_set(tc->clear_flags, addr)) {
            /* don't get tile from framebuffer, just clear it */
            if (tc->depth_stencil) {
                clear_tile(tile, pt->resource->format, tc->clear_val);
            }
            else {
                clear_tile_rgba(tile, pt->resource->format, &tc->clear_color);
            }
            clear_clear_flag(tc->clear_flags, addr);
        }
        else {
            /* get new tile data from transfer */
            if (tc->depth_stencil) {
                pipe_get_tile_raw(tc->pipe, pt,
                                  tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                  tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                  TILE_SIZE, TILE_SIZE,
                                  tile->data.depth32, 0/*STRIDE*/);
            }
            else {
                if (util_format_is_pure_uint(tc->surface->format)) {
                    pipe_get_tile_ui_format(tc->pipe, pt,
                                            tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                            tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                            TILE_SIZE, TILE_SIZE,
                                            tc->surface->format,
                                            (unsigned *) tile->data.colorui128);
                } else if (util_format_is_pure_sint(tc->surface->format)) {
                    pipe_get_tile_i_format(tc->pipe, pt,
                                           tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                           tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                           TILE_SIZE, TILE_SIZE,
                                           tc->surface->format,
                                           (int *) tile->data.colori128);
                } else {
                    pipe_get_tile_rgba_format(tc->pipe, pt,
                                              tc->tile_addrs[pos].bits.x * TILE_SIZE,
                                              tc->tile_addrs[pos].bits.y * TILE_SIZE,
                                              TILE_SIZE, TILE_SIZE,
                                              tc->surface->format,
                                              (float *) tile->data.color);
                }
            }
        }
    }

    tc->last_tile = tile;
    tc->last_tile_addr = addr;
    return tile;
}
Beispiel #12
0
/**
 * Actually clear the tiles which were flagged as being in a clear state.
 */
static void
sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
{
    struct pipe_transfer *pt = tc->transfer;
    const uint w = tc->transfer->box.width;
    const uint h = tc->transfer->box.height;
    uint x, y;
    uint numCleared = 0;

    assert(pt->resource);
    if (!tc->tile)
        tc->tile = sp_alloc_tile(tc);

    /* clear the scratch tile to the clear value */
    if (tc->depth_stencil) {
        clear_tile(tc->tile, pt->resource->format, tc->clear_val);
    } else {
        clear_tile_rgba(tc->tile, pt->resource->format, &tc->clear_color);
    }

    /* push the tile to all positions marked as clear */
    for (y = 0; y < h; y += TILE_SIZE) {
        for (x = 0; x < w; x += TILE_SIZE) {
            union tile_address addr = tile_address(x, y);

            if (is_clear_flag_set(tc->clear_flags, addr)) {
                /* write the scratch tile to the surface */
                if (tc->depth_stencil) {
                    pipe_put_tile_raw(tc->pipe,
                                      pt,
                                      x, y, TILE_SIZE, TILE_SIZE,
                                      tc->tile->data.any, 0/*STRIDE*/);
                }
                else {
                    if (util_format_is_pure_uint(tc->surface->format)) {
                        pipe_put_tile_ui_format(tc->pipe, pt,
                                                x, y, TILE_SIZE, TILE_SIZE,
                                                pt->resource->format,
                                                (unsigned *) tc->tile->data.colorui128);
                    } else if (util_format_is_pure_sint(tc->surface->format)) {
                        pipe_put_tile_i_format(tc->pipe, pt,
                                               x, y, TILE_SIZE, TILE_SIZE,
                                               pt->resource->format,
                                               (int *) tc->tile->data.colori128);
                    } else {
                        pipe_put_tile_rgba(tc->pipe, pt,
                                           x, y, TILE_SIZE, TILE_SIZE,
                                           (float *) tc->tile->data.color);
                    }
                }
                numCleared++;
            }
        }
    }

    /* reset all clear flags to zero */
    memset(tc->clear_flags, 0, sizeof(tc->clear_flags));

#if 0
    debug_printf("num cleared: %u\n", numCleared);
#endif
}
/**
 * Similar to sp_get_cached_tile() but for textures.
 * Tiles are read-only and indexed with more params.
 */
const struct softpipe_tex_cached_tile *
sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc, 
                        union tex_tile_address addr )
{
   struct softpipe_tex_cached_tile *tile;
   boolean zs = util_format_is_depth_or_stencil(tc->format);

   tile = tc->entries + tex_cache_pos( addr );

   if (addr.value != tile->addr.value) {

      /* cache miss.  Most misses are because we've invaldiated the
       * texture cache previously -- most commonly on binding a new
       * texture.  Currently we effectively flush the cache on texture
       * bind.
       */
#if 0
      _debug_printf("miss at %u:  x=%d y=%d z=%d face=%d level=%d\n"
                    "   tile %u:  x=%d y=%d z=%d face=%d level=%d\n",
                    pos, x/TILE_SIZE, y/TILE_SIZE, z, face, level,
                    pos, tile->addr.bits.x, tile->addr.bits.y, tile->z, tile->face, tile->level);
#endif

      /* check if we need to get a new transfer */
      if (!tc->tex_trans ||
          tc->tex_face != addr.bits.face ||
          tc->tex_level != addr.bits.level ||
          tc->tex_z != addr.bits.z) {
         /* get new transfer (view into texture) */
         unsigned width, height, layer;

         if (tc->tex_trans) {
            if (tc->tex_trans_map) {
               tc->pipe->transfer_unmap(tc->pipe, tc->tex_trans);
               tc->tex_trans_map = NULL;
            }

            tc->pipe->transfer_destroy(tc->pipe, tc->tex_trans);
            tc->tex_trans = NULL;
         }

         width = u_minify(tc->texture->width0, addr.bits.level);
         if (tc->texture->target == PIPE_TEXTURE_1D_ARRAY) {
            height = tc->texture->array_size;
            layer = 0;
         }
         else {
            height = u_minify(tc->texture->height0, addr.bits.level);
            layer = addr.bits.face + addr.bits.z;
         }

         tc->tex_trans = 
            pipe_get_transfer(tc->pipe, tc->texture,
                              addr.bits.level,
                              layer,
                              PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED,
                              0, 0, width, height);

         tc->tex_trans_map = tc->pipe->transfer_map(tc->pipe, tc->tex_trans);

         tc->tex_face = addr.bits.face;
         tc->tex_level = addr.bits.level;
         tc->tex_z = addr.bits.z;
      }

      /* Get tile from the transfer (view into texture), explicitly passing
       * the image format.
       */
      if (!zs && util_format_is_pure_uint(tc->format)) {
         pipe_get_tile_ui_format(tc->pipe,
                                 tc->tex_trans,
                                 addr.bits.x * TILE_SIZE,
                                 addr.bits.y * TILE_SIZE,
                                 TILE_SIZE,
                                 TILE_SIZE,
                                 tc->format,
                                 (unsigned *) tile->data.colorui);
      } else if (!zs && util_format_is_pure_sint(tc->format)) {
         pipe_get_tile_i_format(tc->pipe,
                                tc->tex_trans,
                                addr.bits.x * TILE_SIZE,
                                addr.bits.y * TILE_SIZE,
                                TILE_SIZE,
                                 TILE_SIZE,
                                tc->format,
                                (int *) tile->data.colori);
      } else {
         pipe_get_tile_rgba_format(tc->pipe,
                                   tc->tex_trans,
                                   addr.bits.x * TILE_SIZE,
                                   addr.bits.y * TILE_SIZE,
                                   TILE_SIZE,
                                   TILE_SIZE,
                                   tc->format,
                                   (float *) tile->data.color);
      }
      tile->addr = addr;
   }

   tc->last_tile = tile;
   return tile;
}
Beispiel #14
0
static bool
fd6_clear(struct fd_context *ctx, unsigned buffers,
		const union pipe_color_union *color, double depth, unsigned stencil)
{
	struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
	struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
	struct fd_ringbuffer *ring = ctx->batch->draw;

	if ((buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
			is_z32(pfb->zsbuf->format))
		return false;

	fd6_emit_render_cntl(ctx, true, false);

	OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
	OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(scissor->minx) |
			 A6XX_RB_BLIT_SCISSOR_TL_Y(scissor->miny));
	OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(scissor->maxx - 1) |
			 A6XX_RB_BLIT_SCISSOR_BR_Y(scissor->maxy - 1));

	if (buffers & PIPE_CLEAR_COLOR) {
		for (int i = 0; i < pfb->nr_cbufs; i++) {
			union util_color uc = {0};

			if (!pfb->cbufs[i])
				continue;

			if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
				continue;

			enum pipe_format pfmt = pfb->cbufs[i]->format;

			// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
			union pipe_color_union swapped;
			switch (fd6_pipe2swap(pfmt)) {
			case WZYX:
				swapped.ui[0] = color->ui[0];
				swapped.ui[1] = color->ui[1];
				swapped.ui[2] = color->ui[2];
				swapped.ui[3] = color->ui[3];
				break;
			case WXYZ:
				swapped.ui[2] = color->ui[0];
				swapped.ui[1] = color->ui[1];
				swapped.ui[0] = color->ui[2];
				swapped.ui[3] = color->ui[3];
				break;
			case ZYXW:
				swapped.ui[3] = color->ui[0];
				swapped.ui[0] = color->ui[1];
				swapped.ui[1] = color->ui[2];
				swapped.ui[2] = color->ui[3];
				break;
			case XYZW:
				swapped.ui[3] = color->ui[0];
				swapped.ui[2] = color->ui[1];
				swapped.ui[1] = color->ui[2];
				swapped.ui[0] = color->ui[3];
				break;
			}

			if (util_format_is_pure_uint(pfmt)) {
				util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
			} else if (util_format_is_pure_sint(pfmt)) {
				util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
			} else {
				util_pack_color(swapped.f, pfmt, &uc);
			}

			OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
			OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
				A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));

			OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
			OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
				A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));

			OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
			OUT_RINGP(ring, i, &ctx->batch->gmem_patches);

			OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
			OUT_RING(ring, 0);

			OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
			OUT_RING(ring, uc.ui[0]);
			OUT_RING(ring, uc.ui[1]);
			OUT_RING(ring, uc.ui[2]);
			OUT_RING(ring, uc.ui[3]);

			fd6_emit_blit(ctx->batch, ring);
		}
	}

	if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
		enum pipe_format pfmt = pfb->zsbuf->format;
		uint32_t clear = util_pack_z_stencil(pfmt, depth, stencil);
		uint32_t mask = 0;

		if (buffers & PIPE_CLEAR_DEPTH)
			mask |= 0x1;

		if (buffers & PIPE_CLEAR_STENCIL)
			mask |= 0x2;

		OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
		OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
			A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_pipe2color(pfmt)));

		OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
		OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
			// XXX UNK0 for separate stencil ??
			A6XX_RB_BLIT_INFO_DEPTH |
			A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));

		OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
		OUT_RINGP(ring, MAX_RENDER_TARGETS, &ctx->batch->gmem_patches);

		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
		OUT_RING(ring, 0);

		OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
		OUT_RING(ring, clear);

		fd6_emit_blit(ctx->batch, ring);

		if (pfb->zsbuf && (buffers & PIPE_CLEAR_DEPTH)) {
			struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
			if (zsbuf->lrz) {
				zsbuf->lrz_valid = true;
				fd6_clear_lrz(ctx->batch, zsbuf, depth);
			}
		}
	}

	return true;
}
Beispiel #15
0
void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
                 int nr, struct pipe_surface **bufs)
{
    const struct ir3_shader_variant *vp, *fp;
    const struct ir3_info *vsi, *fsi;
    enum a3xx_instrbuffermode fpbuffer, vpbuffer;
    uint32_t fpbuffersz, vpbuffersz, fsoff;
    uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
    int constmode;
    int i, j, k;

    debug_assert(nr <= ARRAY_SIZE(color_regid));

    vp = fd3_emit_get_vp(emit);

    if (emit->key.binning_pass) {
        /* use dummy stateobj to simplify binning vs non-binning: */
        static const struct ir3_shader_variant binning_fp = {};
        fp = &binning_fp;
    } else {
        fp = fd3_emit_get_fp(emit);
    }

    vsi = &vp->info;
    fsi = &fp->info;

    fpbuffer = BUFFER;
    vpbuffer = BUFFER;
    fpbuffersz = fp->instrlen;
    vpbuffersz = vp->instrlen;

    /*
     * Decide whether to use BUFFER or CACHE mode for VS and FS.  It
     * appears like 256 is the hard limit, but when the combined size
     * exceeds 128 then blob will try to keep FS in BUFFER mode and
     * switch to CACHE for VS until VS is too large.  The blob seems
     * to switch FS out of BUFFER mode at slightly under 128.  But
     * a bit fuzzy on the decision tree, so use slightly conservative
     * limits.
     *
     * TODO check if these thresholds for BUFFER vs CACHE mode are the
     *      same for all a3xx or whether we need to consider the gpuid
     */

    if ((fpbuffersz + vpbuffersz) > 128) {
        if (fpbuffersz < 112) {
            /* FP:BUFFER   VP:CACHE  */
            vpbuffer = CACHE;
            vpbuffersz = 256 - fpbuffersz;
        } else if (vpbuffersz < 112) {
            /* FP:CACHE    VP:BUFFER */
            fpbuffer = CACHE;
            fpbuffersz = 256 - vpbuffersz;
        } else {
            /* FP:CACHE    VP:CACHE  */
            vpbuffer = fpbuffer = CACHE;
            vpbuffersz = fpbuffersz = 192;
        }
    }

    if (fpbuffer == BUFFER) {
        fsoff = 128 - fpbuffersz;
    } else {
        fsoff = 256 - fpbuffersz;
    }

    /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
    constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;

    pos_regid = ir3_find_output_regid(vp,
                                      ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
    posz_regid = ir3_find_output_regid(fp,
                                       ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
    psize_regid = ir3_find_output_regid(vp,
                                        ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
    if (fp->color0_mrt) {
        color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
                                              ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
    } else {
        for (i = 0; i < fp->outputs_count; i++) {
            ir3_semantic sem = fp->outputs[i].semantic;
            unsigned idx = sem2idx(sem);
            if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
                continue;
            debug_assert(idx < ARRAY_SIZE(color_regid));
            color_regid[idx] = fp->outputs[i].regid;
        }
    }

    /* adjust regids for alpha output formats. there is no alpha render
     * format, so it's just treated like red
     */
    for (i = 0; i < nr; i++)
        if (util_format_is_alpha(pipe_surface_format(bufs[i])))
            color_regid[i] += 3;

    /* we could probably divide this up into things that need to be
     * emitted if frag-prog is dirty vs if vert-prog is dirty..
     */

    OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
    OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
             A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
             /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
              * flush some caches? I think we only need to set those
              * bits if we have updated const or shader..
              */
             A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
             A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
    OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
             A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
             COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD));
    OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
    OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
    OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
             A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
             A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
    OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
             A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
             A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));

    OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
    OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
             COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
             A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
             A3XX_SP_SP_CTRL_REG_L0MODE(0));

    OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
    OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));

    OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
    OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
             A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
             COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
             A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
             A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
             A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
             A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
             A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
             COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
             A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
    OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
             A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
             A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
    OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
             A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
             A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));

    for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
        uint32_t reg = 0;

        OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);

        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count) {
            k = ir3_find_output(vp, fp->inputs[j].semantic);
            reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
            reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
        }

        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count) {
            k = ir3_find_output(vp, fp->inputs[j].semantic);
            reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
            reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
        }

        OUT_RING(ring, reg);
    }

    for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) {
        uint32_t reg = 0;

        OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);

        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc);
        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc);
        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc);
        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc);

        OUT_RING(ring, reg);
    }

    OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
    OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
             A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
    OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */

    if (emit->key.binning_pass) {
        OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
        OUT_RING(ring, 0x00000000);

        OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
        OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
                 A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
        OUT_RING(ring, 0x00000000);

        OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
        OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
                 A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
    } else {
        OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
        OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));

        OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
        OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
                 A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
                 COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
                 A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
                 A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
                 A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
                 A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
                 A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
                 COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
                 A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
        OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
                 A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
                 A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
                 A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));

        OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
        OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
                     MAX2(128, vp->constlen)) |
                 A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
        OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
    }

    OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
    OUT_RING(ring,
             COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
             A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
             A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));

    OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
    for (i = 0; i < 4; i++) {
        uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
                           COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);

        if (i < nr) {
            enum pipe_format fmt = pipe_surface_format(bufs[i]);
            mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
                       COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
        }
        OUT_RING(ring, mrt_reg);
    }

    if (emit->key.binning_pass) {
        OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
        OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
                 A3XX_VPC_ATTR_LMSIZE(1) |
                 COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
        OUT_RING(ring, 0x00000000);
    } else {
        uint32_t vinterp[4], flatshade[2], vpsrepl[4];

        memset(vinterp, 0, sizeof(vinterp));
        memset(flatshade, 0, sizeof(flatshade));
        memset(vpsrepl, 0, sizeof(vpsrepl));

        /* figure out VARYING_INTERP / FLAT_SHAD register values: */
        for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
            uint32_t interp = fp->inputs[j].interpolate;

            /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
             * instead.. rather than -8 everywhere else..
             */
            uint32_t inloc = fp->inputs[j].inloc - 8;

            /* currently assuming varyings aligned to 4 (not
             * packed):
             */
            debug_assert((inloc % 4) == 0);

            if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
                    ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
                uint32_t loc = inloc;
                for (i = 0; i < 4; i++, loc++) {
                    vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
                    flatshade[loc / 32] |= 1 << (loc % 32);
                }
            }

            /* Replace the .xy coordinates with S/T from the point sprite. Set
             * interpolation bits for .zw such that they become .01
             */
            if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
                vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
                                       << ((inloc % 16) * 2);
                vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
                vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
            }
        }

        OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
        OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
                 A3XX_VPC_ATTR_THRDASSIGN(1) |
                 A3XX_VPC_ATTR_LMSIZE(1) |
                 COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
        OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
                 A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));

        OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
        OUT_RING(ring, vinterp[0]);    /* VPC_VARYING_INTERP[0].MODE */
        OUT_RING(ring, vinterp[1]);    /* VPC_VARYING_INTERP[1].MODE */
        OUT_RING(ring, vinterp[2]);    /* VPC_VARYING_INTERP[2].MODE */
        OUT_RING(ring, vinterp[3]);    /* VPC_VARYING_INTERP[3].MODE */

        OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
        OUT_RING(ring, vpsrepl[0]);    /* VPC_VARYING_PS_REPL[0].MODE */
        OUT_RING(ring, vpsrepl[1]);    /* VPC_VARYING_PS_REPL[1].MODE */
        OUT_RING(ring, vpsrepl[2]);    /* VPC_VARYING_PS_REPL[2].MODE */
        OUT_RING(ring, vpsrepl[3]);    /* VPC_VARYING_PS_REPL[3].MODE */

        OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
        OUT_RING(ring, flatshade[0]);        /* SP_FS_FLAT_SHAD_MODE_REG_0 */
        OUT_RING(ring, flatshade[1]);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */
    }

    if (vpbuffer == BUFFER)
        emit_shader(ring, vp);

    OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
    OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */

    if (!emit->key.binning_pass) {
        if (fpbuffer == BUFFER)
            emit_shader(ring, fp);

        OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
        OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
    }
}
Beispiel #16
0
static void util_blitter_clear_custom(struct blitter_context *blitter,
                                      unsigned width, unsigned height,
                                      unsigned num_cbufs,
                                      unsigned clear_buffers,
                                      enum pipe_format cbuf_format,
                                      const union pipe_color_union *color,
                                      double depth, unsigned stencil,
                                      void *custom_blend, void *custom_dsa)
{
   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
   struct pipe_context *pipe = ctx->base.pipe;
   struct pipe_stencil_ref sr = { { 0 } };
   boolean int_format = util_format_is_pure_integer(cbuf_format);
   assert(num_cbufs <= PIPE_MAX_COLOR_BUFS);

   blitter_set_running_flag(ctx);
   blitter_check_saved_vertex_states(ctx);
   blitter_check_saved_fragment_states(ctx);

   /* bind states */
   if (custom_blend) {
      pipe->bind_blend_state(pipe, custom_blend);
   } else if (clear_buffers & PIPE_CLEAR_COLOR) {
      pipe->bind_blend_state(pipe, ctx->blend_write_color);
   } else {
      pipe->bind_blend_state(pipe, ctx->blend_keep_color);
   }

   if (custom_dsa) {
      pipe->bind_depth_stencil_alpha_state(pipe, custom_dsa);
   } else if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
      pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_stencil);
   } else if (clear_buffers & PIPE_CLEAR_DEPTH) {
      pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_write_depth_keep_stencil);
   } else if (clear_buffers & PIPE_CLEAR_STENCIL) {
      pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_write_stencil);
   } else {
      pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
   }

   sr.ref_value[0] = stencil & 0xff;
   pipe->set_stencil_ref(pipe, &sr);

   pipe->bind_rasterizer_state(pipe, ctx->rs_state);
   if (util_format_is_pure_sint(cbuf_format)) {
      pipe->bind_vertex_elements_state(pipe, ctx->velem_sint_state);
   } else if (util_format_is_pure_uint(cbuf_format)) {
      pipe->bind_vertex_elements_state(pipe, ctx->velem_uint_state);
   } else {
      pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
   }
   pipe->bind_fs_state(pipe, blitter_get_fs_col(ctx, num_cbufs, int_format));
   pipe->bind_vs_state(pipe, ctx->vs);
   if (ctx->has_geometry_shader)
      pipe->bind_gs_state(pipe, NULL);

   blitter_set_dst_dimensions(ctx, width, height);
   blitter->draw_rectangle(blitter, 0, 0, width, height, depth,
                           UTIL_BLITTER_ATTRIB_COLOR, color);

   blitter_restore_vertex_states(ctx);
   blitter_restore_fragment_states(ctx);
   blitter_unset_running_flag(ctx);
}
Beispiel #17
0
/**
 * Clear the rasterizer's current color tile.
 * This is a bin command called during bin processing.
 */
static void
lp_rast_clear_color(struct lp_rasterizer_task *task,
                    const union lp_rast_cmd_arg arg)
{
   const struct lp_scene *scene = task->scene;

   if (scene->fb.nr_cbufs) {
      unsigned i;
      union util_color uc;

      if (util_format_is_pure_integer(scene->fb.cbufs[0]->format)) {
         /*
          * We expect int/uint clear values here, though some APIs
          * might disagree (but in any case util_pack_color()
          * couldn't handle it)...
          */
         LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
                    arg.clear_color.ui[0],
                    arg.clear_color.ui[1],
                    arg.clear_color.ui[2],
                    arg.clear_color.ui[3]);

         for (i = 0; i < scene->fb.nr_cbufs; i++) {
            enum pipe_format format = scene->fb.cbufs[i]->format;

            if (util_format_is_pure_sint(format)) {
               util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
            }
            else {
               assert(util_format_is_pure_uint(format));
               util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
            }

            util_fill_rect(scene->cbufs[i].map,
                           scene->fb.cbufs[i]->format,
                           scene->cbufs[i].stride,
                           task->x,
                           task->y,
                           TILE_SIZE,
                           TILE_SIZE,
                           &uc);
         }
      }
      else {
         uint8_t clear_color[4];

         for (i = 0; i < 4; ++i) {
            clear_color[i] = float_to_ubyte(arg.clear_color.f[i]);
         }

         LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
                    clear_color[0],
                    clear_color[1],
                    clear_color[2],
                    clear_color[3]);

         for (i = 0; i < scene->fb.nr_cbufs; i++) {

            util_pack_color(arg.clear_color.f,
                            scene->fb.cbufs[i]->format, &uc);

            util_fill_rect(scene->cbufs[i].map,
                           scene->fb.cbufs[i]->format,
                           scene->cbufs[i].stride,
                           task->x,
                           task->y,
                           TILE_SIZE,
                           TILE_SIZE,
                           &uc);
         }
      }
   }

   LP_COUNT(nr_color_tile_clear);
}
Beispiel #18
0
/**
 * Fallback for pipe->clear_render_target() function.
 * XXX this looks too hackish to be really useful.
 * cpp > 4 looks like a gross hack at best...
 * Plus can't use these transfer fallbacks when clearing
 * multisampled surfaces for instance.
 * Clears all bound layers.
 */
void
util_clear_render_target(struct pipe_context *pipe,
                         struct pipe_surface *dst,
                         const union pipe_color_union *color,
                         unsigned dstx, unsigned dsty,
                         unsigned width, unsigned height)
{
   struct pipe_transfer *dst_trans;
   ubyte *dst_map;
   union util_color uc;
   unsigned max_layer;

   assert(dst->texture);
   if (!dst->texture)
      return;

   if (dst->texture->target == PIPE_BUFFER) {
      /*
       * The fill naturally works on the surface format, however
       * the transfer uses resource format which is just bytes for buffers.
       */
      unsigned dx, w;
      unsigned pixstride = util_format_get_blocksize(dst->format);
      dx = (dst->u.buf.first_element + dstx) * pixstride;
      w = width * pixstride;
      max_layer = 0;
      dst_map = pipe_transfer_map(pipe,
                                  dst->texture,
                                  0, 0,
                                  PIPE_TRANSFER_WRITE,
                                  dx, 0, w, 1,
                                  &dst_trans);
   }
   else {
      max_layer = dst->u.tex.last_layer - dst->u.tex.first_layer;
      dst_map = pipe_transfer_map_3d(pipe,
                                     dst->texture,
                                     dst->u.tex.level,
                                     PIPE_TRANSFER_WRITE,
                                     dstx, dsty, dst->u.tex.first_layer,
                                     width, height, max_layer + 1, &dst_trans);
   }

   assert(dst_map);

   if (dst_map) {
      enum pipe_format format = dst->format;
      assert(dst_trans->stride > 0);

      if (util_format_is_pure_integer(format)) {
         /*
          * We expect int/uint clear values here, though some APIs
          * might disagree (but in any case util_pack_color()
          * couldn't handle it)...
          */
         if (util_format_is_pure_sint(format)) {
            util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
         }
         else {
            assert(util_format_is_pure_uint(format));
            util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
         }
      }
      else {
         util_pack_color(color->f, format, &uc);
      }

      util_fill_box(dst_map, dst->format,
                    dst_trans->stride, dst_trans->layer_stride,
                    0, 0, 0, width, height, max_layer + 1, &uc);

      pipe->transfer_unmap(pipe, dst_trans);
   }
}
Beispiel #19
0
/**
 * Perform the fetch from API vertex elements & vertex buffers, to a
 * contiguous set of float[4] attributes as required for the
 * vertex_shader->run_linear() method.
 *
 * This is used in all cases except pure passthrough
 * (draw_pt_fetch_emit.c) which has its own version to translate
 * directly to hw vertices.
 *
 */
void
draw_pt_fetch_prepare(struct pt_fetch *fetch,
                      unsigned vs_input_count,
                      unsigned vertex_size,
                      unsigned instance_id_index)
{
   struct draw_context *draw = fetch->draw;
   unsigned nr_inputs;
   unsigned i, nr = 0, ei = 0;
   unsigned dst_offset = 0;
   unsigned num_extra_inputs = 0;
   struct translate_key key;

   fetch->vertex_size = vertex_size;

   /* Leave the clipmask/edgeflags/pad/vertex_id untouched
    */
   dst_offset += 1 * sizeof(float);
   /* Just leave the clip[] and pre_clip_pos[] array untouched.
    */
   dst_offset += 8 * sizeof(float);

   if (instance_id_index != ~0) {
      num_extra_inputs++;
   }

   assert(draw->pt.nr_vertex_elements + num_extra_inputs >= vs_input_count);

   nr_inputs = MIN2(vs_input_count, draw->pt.nr_vertex_elements + num_extra_inputs);

   for (i = 0; i < nr_inputs; i++) {
      if (i == instance_id_index) {
         key.element[nr].type = TRANSLATE_ELEMENT_INSTANCE_ID;
         key.element[nr].input_format = PIPE_FORMAT_R32_USCALED;
         key.element[nr].output_format = PIPE_FORMAT_R32_USCALED;
         key.element[nr].output_offset = dst_offset;

         dst_offset += sizeof(uint);
      } else if (util_format_is_pure_sint(draw->pt.vertex_element[i].src_format)) {
         key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
         key.element[nr].input_format = draw->pt.vertex_element[ei].src_format;
         key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index;
         key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset;
         key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor;
         key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_SINT;
         key.element[nr].output_offset = dst_offset;

         ei++;
         dst_offset += 4 * sizeof(int);
      } else if (util_format_is_pure_uint(draw->pt.vertex_element[i].src_format)) {
         key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
         key.element[nr].input_format = draw->pt.vertex_element[ei].src_format;
         key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index;
         key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset;
         key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor;
         key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_UINT;
         key.element[nr].output_offset = dst_offset;

         ei++;
         dst_offset += 4 * sizeof(unsigned);
      } else {
         key.element[nr].type = TRANSLATE_ELEMENT_NORMAL;
         key.element[nr].input_format = draw->pt.vertex_element[ei].src_format;
         key.element[nr].input_buffer = draw->pt.vertex_element[ei].vertex_buffer_index;
         key.element[nr].input_offset = draw->pt.vertex_element[ei].src_offset;
         key.element[nr].instance_divisor = draw->pt.vertex_element[ei].instance_divisor;
         key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
         key.element[nr].output_offset = dst_offset;

         ei++;
         dst_offset += 4 * sizeof(float);
      }

      nr++;
   }

   assert(dst_offset <= vertex_size);

   key.nr_elements = nr;
   key.output_stride = vertex_size;

   if (!fetch->translate ||
       translate_key_compare(&fetch->translate->key, &key) != 0)
   {
      translate_key_sanitize(&key);
      fetch->translate = translate_cache_find(fetch->cache, &key);
   }
}
Beispiel #20
0
/*
 * Try to clear one color buffer of the attached fb, either by binning a clear
 * command or queuing up the clear for later (when binning is started).
 */
static boolean
lp_setup_try_clear_color_buffer(struct lp_setup_context *setup,
                                const union pipe_color_union *color,
                                unsigned cbuf)
{
   union lp_rast_cmd_arg clearrb_arg;
   union util_color uc;
   enum pipe_format format = setup->fb.cbufs[cbuf]->format;

   LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);

   if (util_format_is_pure_integer(format)) {
      /*
       * We expect int/uint clear values here, though some APIs
       * might disagree (but in any case util_pack_color()
       * couldn't handle it)...
       */
      if (util_format_is_pure_sint(format)) {
         util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
      }
      else {
         assert(util_format_is_pure_uint(format));
         util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
      }
   }
   else {
      util_pack_color(color->f, format, &uc);
   }

   if (setup->state == SETUP_ACTIVE) {
      struct lp_scene *scene = setup->scene;

      /* Add the clear to existing scene.  In the unusual case where
       * both color and depth-stencil are being cleared when there's
       * already been some rendering, we could discard the currently
       * binned scene and start again, but I don't see that as being
       * a common usage.
       */
      struct lp_rast_clear_rb *cc_scene =
         (struct lp_rast_clear_rb *)
            lp_scene_alloc_aligned(scene, sizeof(struct lp_rast_clear_rb), 8);

      if (!cc_scene) {
         return FALSE;
      }

      cc_scene->cbuf = cbuf;
      cc_scene->color_val = uc;
      clearrb_arg.clear_rb = cc_scene;

      if (!lp_scene_bin_everywhere(scene,
                                   LP_RAST_OP_CLEAR_COLOR,
                                   clearrb_arg))
         return FALSE;
   }
   else {
      /* Put ourselves into the 'pre-clear' state, specifically to try
       * and accumulate multiple clears to color and depth_stencil
       * buffers which the app or state-tracker might issue
       * separately.
       */
      set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );

      assert(PIPE_CLEAR_COLOR0 == (1 << 2));
      setup->clear.flags |= 1 << (cbuf + 2);
      setup->clear.color_val[cbuf] = uc;
   }

   return TRUE;
}
static void
svga_clear_texture(struct pipe_context *pipe,
                   struct pipe_resource *res,
                   unsigned level,
                   const struct pipe_box *box,
                   const void *data)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_surface *svga_surface_dst;
   enum pipe_error ret;
   struct pipe_surface tmpl;
   struct pipe_surface *surface;

   memset(&tmpl, 0, sizeof(tmpl));
   tmpl.format = res->format;
   tmpl.u.tex.first_layer = box->z;
   tmpl.u.tex.last_layer = box->z + box->depth - 1;
   tmpl.u.tex.level = level;

   surface = pipe->create_surface(pipe, res, &tmpl);
   if (surface == NULL) {
      debug_printf("failed to create surface\n");
      return;
   }
   svga_surface_dst = svga_surface(surface);

   union pipe_color_union color;
   const struct util_format_description *desc =
      util_format_description(surface->format);

   if (util_format_is_depth_or_stencil(surface->format)) {
      float depth;
      uint8_t stencil;
      unsigned clear_flags = 0;

      /* If data is NULL, then set depthValue and stencilValue to zeros */
      if (data == NULL) {
         depth = 0.0;
         stencil = 0;
      }
      else {
         desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
         desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
      }

      if (util_format_has_depth(desc)) {
         clear_flags |= PIPE_CLEAR_DEPTH;
      }
      if (util_format_has_stencil(desc)) {
         clear_flags |= PIPE_CLEAR_STENCIL;
      }

      /* Setup depth stencil view */
      struct pipe_surface *dsv =
         svga_validate_surface_view(svga, svga_surface_dst);

      if (!dsv) {
         pipe_surface_reference(&surface, NULL);
         return;
      }

      if (box->x == 0 && box->y == 0 && box->width == surface->width &&
          box->height == surface->height) {
         /* clearing whole surface, use direct VGPU10 command */


         ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv,
                                                   clear_flags,
                                                   stencil, depth);
         if (ret != PIPE_OK) {
            /* flush and try again */
            svga_context_flush(svga, NULL);
            ret = SVGA3D_vgpu10_ClearDepthStencilView(svga->swc, dsv,
                                                      clear_flags,
                                                      stencil, depth);
            assert(ret == PIPE_OK);
         }
      }
      else {
         /* To clear subtexture use software fallback */

         util_blitter_save_framebuffer(svga->blitter,
                                       &svga->curr.framebuffer);
         begin_blit(svga);
         util_blitter_clear_depth_stencil(svga->blitter,
                                          dsv, clear_flags,
                                          depth,stencil,
                                          box->x, box->y,
                                          box->width, box->height);
      }
   }
   else {
      /* non depth-stencil formats */

      if (data == NULL) {
         /* If data is NULL, the texture image is filled with zeros */
         color.f[0] = color.f[1] = color.f[2] = color.f[3] = 0;
      }
      else {
         if (util_format_is_pure_sint(surface->format)) {
            /* signed integer */
            desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
         }
         else if (util_format_is_pure_uint(surface->format)) {
            /* unsigned integer */
            desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
         }
         else {
            /* floating point */
            desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
         }
      }

      /* Setup render target view */
      struct pipe_surface *rtv =
         svga_validate_surface_view(svga, svga_surface_dst);

      if (!rtv) {
         pipe_surface_reference(&surface, NULL);
         return;
      }

      if (box->x == 0 && box->y == 0 && box->width == surface->width &&
          box->height == surface->height) {
         struct pipe_framebuffer_state *curr =  &svga->curr.framebuffer;

         if (is_integer_target(curr, PIPE_CLEAR_COLOR) &&
             !ints_fit_in_floats(&color)) {
            /* To clear full texture with integer format */
            clear_buffers_with_quad(svga, PIPE_CLEAR_COLOR, &color, 0.0, 0);
         }
         else {
            /* clearing whole surface using VGPU10 command */
            ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv,
                                                      color.f);
            if (ret != PIPE_OK) {
               svga_context_flush(svga,NULL);
               ret = SVGA3D_vgpu10_ClearRenderTargetView(svga->swc, rtv,
                                                         color.f);
               assert(ret == PIPE_OK);
            }
         }
      }
      else {
         /* To clear subtexture use software fallback */

         /**
          * util_blitter_clear_render_target doesn't support PIPE_TEXTURE_3D
          * It tries to draw quad with depth 0 for PIPE_TEXTURE_3D so use
          * util_clear_render_target() for PIPE_TEXTURE_3D.
          */
         if (rtv->texture->target != PIPE_TEXTURE_3D &&
             pipe->screen->is_format_supported(pipe->screen, rtv->format,
                                               rtv->texture->target,
                                               rtv->texture->nr_samples,
                                               PIPE_BIND_RENDER_TARGET)) {
            /* clear with quad drawing */
            util_blitter_save_framebuffer(svga->blitter,
                                          &svga->curr.framebuffer);
            begin_blit(svga);
            util_blitter_clear_render_target(svga->blitter,
                                             rtv,
                                             &color,
                                             box->x, box->y,
                                             box->width, box->height);
         }
         else {
            /* clear with map/write/unmap */

            /* store layer values */
            unsigned first_layer = rtv->u.tex.first_layer;
            unsigned last_layer = rtv->u.tex.last_layer;
            unsigned box_depth = last_layer - first_layer + 1;

            for (unsigned i = 0; i < box_depth; i++) {
               rtv->u.tex.first_layer = rtv->u.tex.last_layer =
                  first_layer + i;
               util_clear_render_target(pipe, rtv, &color, box->x, box->y,
                                        box->width, box->height);
            }
            /* restore layer values */
            rtv->u.tex.first_layer = first_layer;
            rtv->u.tex.last_layer = last_layer;
         }
      }
   }
   pipe_surface_reference(&surface, NULL);
}