Пример #1
0
static void si_dma_copy_buffer(struct si_context *ctx,
				struct pipe_resource *dst,
				struct pipe_resource *src,
				uint64_t dst_offset,
				uint64_t src_offset,
				uint64_t size)
{
	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
	unsigned i, ncopy, csize, max_csize, sub_cmd, shift;
	struct r600_resource *rdst = (struct r600_resource*)dst;
	struct r600_resource *rsrc = (struct r600_resource*)src;

	/* Mark the buffer range of destination as valid (initialized),
	 * so that transfer_map knows it should wait for the GPU when mapping
	 * that range. */
	util_range_add(&rdst->valid_buffer_range, dst_offset,
		       dst_offset + size);

	dst_offset += r600_resource_va(&ctx->screen->b.b, dst);
	src_offset += r600_resource_va(&ctx->screen->b.b, src);

	/* see if we use dword or byte copy */
	if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
		size >>= 2;
		sub_cmd = SI_DMA_COPY_DWORD_ALIGNED;
		shift = 2;
		max_csize = SI_DMA_COPY_MAX_SIZE_DW;
	} else {
bool r600_init_resource(struct r600_common_screen *rscreen,
			struct r600_resource *res,
			unsigned size, unsigned alignment,
			bool use_reusable_pool, unsigned usage)
{
	uint32_t initial_domain, domains;

	switch(usage) {
	case PIPE_USAGE_STAGING:
		/* Staging resources participate in transfers, i.e. are used
		 * for uploads and downloads from regular resources.
		 * We generate them internally for some transfers.
		 */
		initial_domain = RADEON_DOMAIN_GTT;
		domains = RADEON_DOMAIN_GTT;
		break;
	case PIPE_USAGE_DYNAMIC:
	case PIPE_USAGE_STREAM:
		/* Default to GTT, but allow the memory manager to move it to VRAM. */
		initial_domain = RADEON_DOMAIN_GTT;
		domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
		break;
	case PIPE_USAGE_DEFAULT:
	case PIPE_USAGE_STATIC:
	case PIPE_USAGE_IMMUTABLE:
	default:
		/* Don't list GTT here, because the memory manager would put some
		 * resources to GTT no matter what the initial domain is.
		 * Not listing GTT in the domains improves performance a lot. */
		initial_domain = RADEON_DOMAIN_VRAM;
		domains = RADEON_DOMAIN_VRAM;
		break;
	}

	res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
                                              use_reusable_pool,
                                              initial_domain);
	if (!res->buf) {
		return false;
	}

	res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
	res->domains = domains;
	util_range_set_empty(&res->valid_buffer_range);

	if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
		fprintf(stderr, "VM start=0x%"PRIu64"  end=0x%"PRIu64" | Buffer %u bytes\n",
			r600_resource_va(&rscreen->b, &res->b.b),
			r600_resource_va(&rscreen->b, &res->b.b) + res->buf->size,
			res->buf->size);
	}
	return true;
}
Пример #3
0
/**
 * Emit function for r600_cs_shader_state atom
 */
void evergreen_emit_cs_shader(
		struct r600_context *rctx,
		struct r600_atom *atom)
{
	struct r600_cs_shader_state *state =
					(struct r600_cs_shader_state*)atom;
	struct r600_pipe_compute *shader = state->shader;
	struct radeon_winsys_cs *cs = rctx->cs;
	uint64_t va;

	va = r600_resource_va(&rctx->screen->screen, &shader->shader_code_bo->b.b);

	r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
	r600_write_value(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
	r600_write_value(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
			S_0288D4_NUM_GPRS(shader->bc.ngpr)
			| S_0288D4_STACK_SIZE(shader->bc.nstack));
	r600_write_value(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

	r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0));
	r600_write_value(cs, r600_context_bo_reloc(rctx, shader->shader_code_bo,
							RADEON_USAGE_READ));

	rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH;
}
Пример #4
0
bool r600_init_resource(struct r600_common_screen *rscreen,
                        struct r600_resource *res,
                        unsigned size, unsigned alignment,
                        bool use_reusable_pool)
{
    struct r600_texture *rtex = (struct r600_texture*)res;

    switch (res->b.b.usage) {
    case PIPE_USAGE_STAGING:
    case PIPE_USAGE_DYNAMIC:
    case PIPE_USAGE_STREAM:
        /* Transfers are likely to occur more often with these resources. */
        res->domains = RADEON_DOMAIN_GTT;
        break;
    case PIPE_USAGE_DEFAULT:
    case PIPE_USAGE_IMMUTABLE:
    default:
        /* Not listing GTT here improves performance in some apps. */
        res->domains = RADEON_DOMAIN_VRAM;
        break;
    }

    /* Tiled textures are unmappable. Always put them in VRAM. */
    if (res->b.b.target != PIPE_BUFFER &&
            rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
        res->domains = RADEON_DOMAIN_VRAM;
    }

    /* Allocate the resource. */
    res->buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
                                          use_reusable_pool,
                                          res->domains);
    if (!res->buf) {
        return false;
    }

    res->cs_buf = rscreen->ws->buffer_get_cs_handle(res->buf);
    util_range_set_empty(&res->valid_buffer_range);

    if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
        fprintf(stderr, "VM start=0x%"PRIu64"  end=0x%"PRIu64" | Buffer %u bytes\n",
                r600_resource_va(&rscreen->b, &res->b.b),
                r600_resource_va(&rscreen->b, &res->b.b) + res->buf->size,
                res->buf->size);
    }
    return true;
}
Пример #5
0
void evergreen_dma_copy(struct r600_context *rctx,
		struct pipe_resource *dst,
		struct pipe_resource *src,
		uint64_t dst_offset,
		uint64_t src_offset,
		uint64_t size)
{
	struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
	unsigned i, ncopy, csize, sub_cmd, shift;
	struct r600_resource *rdst = (struct r600_resource*)dst;
	struct r600_resource *rsrc = (struct r600_resource*)src;

	/* make sure that the dma ring is only one active */
	rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
	dst_offset += r600_resource_va(&rctx->screen->screen, dst);
	src_offset += r600_resource_va(&rctx->screen->screen, src);

	/* see if we use dword or byte copy */
	if (!(dst_offset & 0x3) && !(src_offset & 0x3) && !(size & 0x3)) {
		size >>= 2;
		sub_cmd = 0x00;
		shift = 2;
	} else {
Пример #6
0
void si_trace_emit(struct si_context *sctx)
{
	struct si_screen *sscreen = sctx->screen;
	struct radeon_winsys_cs *cs = sctx->cs;
	uint64_t va;

	va = r600_resource_va(&sscreen->screen, (void*)sscreen->b.trace_bo);
	r600_context_bo_reloc(sctx, sscreen->b.trace_bo, RADEON_USAGE_READWRITE);
	cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
	cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
				PKT3_WRITE_DATA_WR_CONFIRM |
				PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
	cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
	cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
	cs->buf[cs->cdw++] = cs->cdw;
	cs->buf[cs->cdw++] = sscreen->b.cs_count;
}
Пример #7
0
static void si_pipe_shader_es(struct pipe_context *ctx, struct si_pipe_shader *shader)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct si_pm4_state *pm4;
	unsigned num_sgprs, num_user_sgprs;
	unsigned vgpr_comp_cnt;
	uint64_t va;

	si_pm4_delete_state(sctx, es, shader->pm4);
	pm4 = shader->pm4 = si_pm4_alloc_state(sctx);

	if (pm4 == NULL)
		return;

	va = r600_resource_va(ctx->screen, (void *)shader->bo);
	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);

	vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0;

	num_user_sgprs = SI_VS_NUM_USER_SGPR;
	num_sgprs = shader->num_sgprs;
	/* One SGPR after user SGPRs is pre-loaded with es2gs_offset */
	if ((num_user_sgprs + 1) > num_sgprs) {
		/* Last 2 reserved SGPRs are used for VCC */
		num_sgprs = num_user_sgprs + 1 + 2;
	}
	assert(num_sgprs <= 104);

	si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
	si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
	si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
		       S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
		       S_00B328_SGPRS((num_sgprs - 1) / 8) |
		       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt));
	si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
		       S_00B32C_USER_SGPR(num_user_sgprs));

	sctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE;
}
Пример #8
0
/**
 * Emit function for r600_cs_shader_state atom
 */
void evergreen_emit_cs_shader(
		struct r600_context *rctx,
		struct r600_atom *atom)
{
	struct r600_cs_shader_state *state =
					(struct r600_cs_shader_state*)atom;
	struct r600_pipe_compute *shader = state->shader;
	struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
	uint64_t va;

	va = r600_resource_va(&rctx->screen->b.b, &kernel->code_bo->b.b);

	r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
	radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
	radeon_emit(cs,           /* R_0288D4_SQ_PGM_RESOURCES_LS */
			S_0288D4_NUM_GPRS(kernel->bc.ngpr)
			| S_0288D4_STACK_SIZE(kernel->bc.nstack));
	radeon_emit(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

	radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
	radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
							kernel->code_bo, RADEON_USAGE_READ));
}
Пример #9
0
	for (i = 0; i < rctx->streamout.num_targets; i++) {
		if (!t[i])
			continue;

		t[i]->stride_in_dw = stride_in_dw[i];

		if (rctx->chip_class >= SI) {
			/* SI binds streamout buffers as shader resources.
			 * VGT only counts primitives and tells the shader
			 * through SGPRs what to do. */
			r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
			radeon_emit(cs, (t[i]->b.buffer_offset +
					 t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
		} else {
			uint64_t va = r600_resource_va(rctx->b.screen,
						       (void*)t[i]->b.buffer);

			update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);

			r600_write_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
			radeon_emit(cs, (t[i]->b.buffer_offset +
					 t[i]->b.buffer_size) >> 2);	/* BUFFER_SIZE (in DW) */
			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
			radeon_emit(cs, va >> 8);			/* BUFFER_BASE */

			r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
					RADEON_USAGE_WRITE);

			/* R7xx requires this packet after updating BUFFER_BASE.
			 * Without this, R7xx locks up. */
			if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
Пример #10
0
static void si_pipe_shader_gs(struct pipe_context *ctx, struct si_pipe_shader *shader)
{
	struct si_context *sctx = (struct si_context *)ctx;
	unsigned gs_vert_itemsize = shader->shader.noutput * (16 >> 2);
	unsigned gs_max_vert_out = shader->shader.gs_max_out_vertices;
	unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
	unsigned cut_mode;
	struct si_pm4_state *pm4;
	unsigned num_sgprs, num_user_sgprs;
	uint64_t va;

	/* The GSVS_RING_ITEMSIZE register takes 15 bits */
	assert(gsvs_itemsize < (1 << 15));

	si_pm4_delete_state(sctx, gs, shader->pm4);
	pm4 = shader->pm4 = si_pm4_alloc_state(sctx);

	if (pm4 == NULL)
		return;

	if (gs_max_vert_out <= 128) {
		cut_mode = V_028A40_GS_CUT_128;
	} else if (gs_max_vert_out <= 256) {
		cut_mode = V_028A40_GS_CUT_256;
	} else if (gs_max_vert_out <= 512) {
		cut_mode = V_028A40_GS_CUT_512;
	} else {
		assert(gs_max_vert_out <= 1024);
		cut_mode = V_028A40_GS_CUT_1024;
	}

	si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
		       S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
		       S_028A40_CUT_MODE(cut_mode)|
		       S_028A40_ES_WRITE_OPTIMIZE(1) |
		       S_028A40_GS_WRITE_OPTIMIZE(1));

	si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
	si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
	si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);

	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
		       shader->shader.nparam * (16 >> 2));
	si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);

	si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);

	si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);

	va = r600_resource_va(ctx->screen, (void *)shader->bo);
	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
	si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
	si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40);

	num_user_sgprs = SI_GS_NUM_USER_SGPR;
	num_sgprs = shader->num_sgprs;
	/* Two SGPRs after user SGPRs are pre-loaded with gs2vs_offset, gs_wave_id */
	if ((num_user_sgprs + 2) > num_sgprs) {
		/* Last 2 reserved SGPRs are used for VCC */
		num_sgprs = num_user_sgprs + 2 + 2;
	}
	assert(num_sgprs <= 104);

	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
		       S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
		       S_00B228_SGPRS((num_sgprs - 1) / 8));
	si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
		       S_00B22C_USER_SGPR(num_user_sgprs));

	sctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE;
}
Пример #11
0
static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *shader)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct si_pm4_state *pm4;
	unsigned i, exports_ps, spi_ps_in_control, db_shader_control;
	unsigned num_sgprs, num_user_sgprs;
	unsigned spi_baryc_cntl = 0, spi_ps_input_ena, spi_shader_z_format;
	uint64_t va;

	si_pm4_delete_state(sctx, ps, shader->pm4);
	pm4 = shader->pm4 = si_pm4_alloc_state(sctx);

	if (pm4 == NULL)
		return;

	db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
			    S_02880C_ALPHA_TO_MASK_DISABLE(sctx->fb_cb0_is_integer);

	for (i = 0; i < shader->shader.ninput; i++) {
		switch (shader->shader.input[i].name) {
		case TGSI_SEMANTIC_POSITION:
			if (shader->shader.input[i].centroid) {
				/* SPI_BARYC_CNTL.POS_FLOAT_LOCATION
				 * Possible vaules:
				 * 0 -> Position = pixel center (default)
				 * 1 -> Position = pixel centroid
				 * 2 -> Position = iterated sample number XXX:
				 *                        What does this mean?
			 	 */
				spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(1);
			}
			/* Fall through */
		case TGSI_SEMANTIC_FACE:
			continue;
		}
	}

	for (i = 0; i < shader->shader.noutput; i++) {
		if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION)
			db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
		if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
			db_shader_control |= S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
	}
	if (shader->shader.uses_kill || shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
		db_shader_control |= S_02880C_KILL_ENABLE(1);

	exports_ps = 0;
	for (i = 0; i < shader->shader.noutput; i++) {
		if (shader->shader.output[i].name == TGSI_SEMANTIC_POSITION ||
		    shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL)
			exports_ps |= 1;
	}
	if (!exports_ps) {
		/* always at least export 1 component per pixel */
		exports_ps = 2;
	}

	spi_ps_in_control = S_0286D8_NUM_INTERP(shader->shader.nparam) |
		S_0286D8_BC_OPTIMIZE_DISABLE(1);

	si_pm4_set_reg(pm4, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
	spi_ps_input_ena = shader->spi_ps_input_ena;
	/* we need to enable at least one of them, otherwise we hang the GPU */
	assert(G_0286CC_PERSP_SAMPLE_ENA(spi_ps_input_ena) ||
	    G_0286CC_PERSP_CENTER_ENA(spi_ps_input_ena) ||
	    G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena) ||
	    G_0286CC_PERSP_PULL_MODEL_ENA(spi_ps_input_ena) ||
	    G_0286CC_LINEAR_SAMPLE_ENA(spi_ps_input_ena) ||
	    G_0286CC_LINEAR_CENTER_ENA(spi_ps_input_ena) ||
	    G_0286CC_LINEAR_CENTROID_ENA(spi_ps_input_ena) ||
	    G_0286CC_LINE_STIPPLE_TEX_ENA(spi_ps_input_ena));

	si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, spi_ps_input_ena);
	si_pm4_set_reg(pm4, R_0286D0_SPI_PS_INPUT_ADDR, spi_ps_input_ena);
	si_pm4_set_reg(pm4, R_0286D8_SPI_PS_IN_CONTROL, spi_ps_in_control);

	if (G_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(db_shader_control))
		spi_shader_z_format = V_028710_SPI_SHADER_32_GR;
	else if (G_02880C_Z_EXPORT_ENABLE(db_shader_control))
		spi_shader_z_format = V_028710_SPI_SHADER_32_R;
	else
		spi_shader_z_format = 0;
	si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, spi_shader_z_format);
	si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT,
		       shader->spi_shader_col_format);
	si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask);

	va = r600_resource_va(ctx->screen, (void *)shader->bo);
	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);
	si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
	si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40);

	num_user_sgprs = SI_PS_NUM_USER_SGPR;
	num_sgprs = shader->num_sgprs;
	/* One SGPR after user SGPRs is pre-loaded with {prim_mask, lds_offset} */
	if ((num_user_sgprs + 1) > num_sgprs) {
		/* Last 2 reserved SGPRs are used for VCC */
		num_sgprs = num_user_sgprs + 1 + 2;
	}
	assert(num_sgprs <= 104);

	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
		       S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
		       S_00B028_SGPRS((num_sgprs - 1) / 8));
	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
		       S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
		       S_00B02C_USER_SGPR(num_user_sgprs));

	si_pm4_set_reg(pm4, R_02880C_DB_SHADER_CONTROL, db_shader_control);

	shader->cb0_is_integer = sctx->fb_cb0_is_integer;
	shader->sprite_coord_enable = sctx->sprite_coord_enable;
	sctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE;
}
Пример #12
0
static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader)
{
	struct si_context *sctx = (struct si_context *)ctx;
	struct si_pm4_state *pm4;
	unsigned num_sgprs, num_user_sgprs;
	unsigned nparams, i, vgpr_comp_cnt;
	uint64_t va;

	si_pm4_delete_state(sctx, vs, shader->pm4);
	pm4 = shader->pm4 = si_pm4_alloc_state(sctx);

	if (pm4 == NULL)
		return;

	va = r600_resource_va(ctx->screen, (void *)shader->bo);
	si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ);

	vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0;

	num_user_sgprs = SI_VS_NUM_USER_SGPR;
	num_sgprs = shader->num_sgprs;
	if (num_user_sgprs > num_sgprs) {
		/* Last 2 reserved SGPRs are used for VCC */
		num_sgprs = num_user_sgprs + 2;
	}
	assert(num_sgprs <= 104);

	/* Certain attributes (position, psize, etc.) don't count as params.
	 * VS is required to export at least one param and r600_shader_from_tgsi()
	 * takes care of adding a dummy export.
	 */
	for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
		switch (shader->shader.output[i].name) {
		case TGSI_SEMANTIC_CLIPVERTEX:
		case TGSI_SEMANTIC_POSITION:
		case TGSI_SEMANTIC_PSIZE:
			break;
		default:
			nparams++;
		}
	}
	if (nparams < 1)
		nparams = 1;

	si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG,
		       S_0286C4_VS_EXPORT_COUNT(nparams - 1));

	si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
		       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
		       S_02870C_POS1_EXPORT_FORMAT(shader->shader.nr_pos_exports > 1 ?
						   V_02870C_SPI_SHADER_4COMP :
						   V_02870C_SPI_SHADER_NONE) |
		       S_02870C_POS2_EXPORT_FORMAT(shader->shader.nr_pos_exports > 2 ?
						   V_02870C_SPI_SHADER_4COMP :
						   V_02870C_SPI_SHADER_NONE) |
		       S_02870C_POS3_EXPORT_FORMAT(shader->shader.nr_pos_exports > 3 ?
						   V_02870C_SPI_SHADER_4COMP :
						   V_02870C_SPI_SHADER_NONE));

	si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
	si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, va >> 40);
	si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS,
		       S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt));
	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
		       S_00B12C_USER_SGPR(num_user_sgprs) |
		       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
		       S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
		       S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
		       S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
		       S_00B12C_SO_EN(!!shader->selector->so.num_outputs));

	sctx->b.flags |= R600_CONTEXT_INV_SHADER_CACHE;
}
Пример #13
0
bool r600_init_resource(struct r600_common_screen *rscreen,
			struct r600_resource *res,
			unsigned size, unsigned alignment,
			bool use_reusable_pool)
{
	struct r600_texture *rtex = (struct r600_texture*)res;
	struct pb_buffer *old_buf, *new_buf;

	switch (res->b.b.usage) {
	case PIPE_USAGE_STAGING:
	case PIPE_USAGE_DYNAMIC:
	case PIPE_USAGE_STREAM:
		/* Transfers are likely to occur more often with these resources. */
		res->domains = RADEON_DOMAIN_GTT;
		break;
	case PIPE_USAGE_DEFAULT:
	case PIPE_USAGE_IMMUTABLE:
	default:
		/* Not listing GTT here improves performance in some apps. */
		res->domains = RADEON_DOMAIN_VRAM;
		break;
	}

	/* Use GTT for all persistent mappings, because they are
	 * always cached and coherent. */
	if (res->b.b.target == PIPE_BUFFER &&
	    res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
			      PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
		res->domains = RADEON_DOMAIN_GTT;
	}

	/* Tiled textures are unmappable. Always put them in VRAM. */
	if (res->b.b.target != PIPE_BUFFER &&
	    rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
		res->domains = RADEON_DOMAIN_VRAM;
	}

	/* Allocate a new resource. */
	new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment,
					     use_reusable_pool,
					     res->domains);
	if (!new_buf) {
		return false;
	}

	/* Replace the pointer such that if res->buf wasn't NULL, it won't be
	 * NULL. This should prevent crashes with multiple contexts using
	 * the same buffer where one of the contexts invalidates it while
	 * the others are using it. */
	old_buf = res->buf;
	res->cs_buf = rscreen->ws->buffer_get_cs_handle(new_buf); /* should be atomic */
	res->buf = new_buf; /* should be atomic */
	pb_reference(&old_buf, NULL);

	util_range_set_empty(&res->valid_buffer_range);

	if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) {
		fprintf(stderr, "VM start=0x%"PRIu64"  end=0x%"PRIu64" | Buffer %u bytes\n",
			r600_resource_va(&rscreen->b, &res->b.b),
			r600_resource_va(&rscreen->b, &res->b.b) + res->buf->size,
			res->buf->size);
	}
	return true;
}