Beispiel #1
0
static void *
nvfx_sampler_state_create(struct pipe_context *pipe,
			  const struct pipe_sampler_state *cso)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	struct nvfx_sampler_state *ps;

	ps = MALLOC(sizeof(struct nvfx_sampler_state));

	/* on nv30, we use this as an internal flag */
	ps->fmt = cso->normalized_coords ? 0 : NV40_3D_TEX_FORMAT_RECT;
	ps->en = 0;
	ps->filt = nvfx_tex_filter(cso) | 0x2000; /*voodoo*/
	ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV30_3D_TEX_WRAP_S__SHIFT) |
		    (nvfx_tex_wrap_mode(cso->wrap_t) << NV30_3D_TEX_WRAP_T__SHIFT) |
		    (nvfx_tex_wrap_mode(cso->wrap_r) << NV30_3D_TEX_WRAP_R__SHIFT);
	ps->compare = FALSE;

	if(cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
	{
		ps->wrap |= nvfx_tex_wrap_compare_mode(cso->compare_func);
		ps->compare = TRUE;
	}
	ps->bcol = nvfx_tex_border_color(cso->border_color.f);

	if(nvfx->is_nv4x)
		nv40_sampler_state_init(pipe, ps, cso);
	else
		nv30_sampler_state_init(pipe, ps, cso);

	return (void *)ps;
}
void
nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	unsigned i;
	void *map;

	if (!nvfx_state_validate_swtnl(nvfx))
		return;

	nvfx_state_emit(nvfx);

	/* these must be passed without adding the offsets */
	for (i = 0; i < nvfx->vtxbuf_nr; i++) {
		map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data;
		draw_set_mapped_vertex_buffer(nvfx->draw, i, map);
	}

	map = NULL;
	if (info->indexed && nvfx->idxbuf.buffer)
		map = nvfx_buffer(nvfx->idxbuf.buffer)->data;
	draw_set_mapped_index_buffer(nvfx->draw, map);

	if (nvfx->constbuf[PIPE_SHADER_VERTEX]) {
		const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX];

		map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data;
		draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0,
                                                map, nr);
	}

	draw_vbo(nvfx->draw, info);

	draw_flush(nvfx->draw);
}
static void
nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);

	nvfx->vtxelt = hwcso;
	nvfx->use_vertex_buffers = -1;
	nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	unsigned upload_mode = 0;

	if (!nvfx->vtxelt->needs_translate)
		upload_mode = nvfx_decide_upload_mode(pipe, info);

	nvfx->use_index_buffer = upload_mode > 1;

	if ((upload_mode > 0) != nvfx->use_vertex_buffers)
	{
		nvfx->use_vertex_buffers = (upload_mode > 0);
		nvfx->dirty |= NVFX_NEW_ARRAYS;
		nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
	}

	if (upload_mode > 0)
	{
		for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
		{
			struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
			struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
			nvfx_buffer_upload(nvfx_buffer(vb->buffer));
		}

		if (upload_mode > 1)
		{
			nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer));

			if (unlikely(info->index_bias != nvfx->base_vertex))
			{
				nvfx->base_vertex = info->index_bias;
				nvfx->dirty |= NVFX_NEW_ARRAYS;
			}
		}
		else
		{
			if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex))
			{
				nvfx->base_vertex = 0;
				nvfx->dirty |= NVFX_NEW_ARRAYS;
			}
		}
	}

	if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx))
		nvfx_draw_vbo_swtnl(pipe, info);
	else
		nvfx_push_vbo(pipe, info);
}
Beispiel #5
0
static void
nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	unsigned unit;

	for (unit = 0; unit < nr; unit++) {
		nvfx->tex_sampler[unit] = sampler[unit];
		nvfx->dirty_samplers |= (1 << unit);
	}

	for (unit = nr; unit < nvfx->nr_samplers; unit++) {
		nvfx->tex_sampler[unit] = NULL;
		nvfx->dirty_samplers |= (1 << unit);
	}

	nvfx->nr_samplers = nr;
	nvfx->dirty |= NVFX_NEW_SAMPLER;
}
static void
nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count,
			const struct pipe_vertex_buffer *vb)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);

	for(unsigned i = 0; i < count; ++i)
	{
		pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer);
		nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset;
		nvfx->vtxbuf[i].max_index = vb[i].max_index;
		nvfx->vtxbuf[i].stride = vb[i].stride;
	}

	for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i)
		pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0);

	nvfx->vtxbuf_nr = count;
	nvfx->use_vertex_buffers = -1;
	nvfx->draw_dirty |= NVFX_NEW_ARRAYS;
}
static void
nvfx_set_index_buffer(struct pipe_context *pipe,
		      const struct pipe_index_buffer *ib)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);

	if(ib)
	{
		pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer);
		nvfx->idxbuf.index_size = ib->index_size;
		nvfx->idxbuf.offset = ib->offset;
	}
	else
	{
		pipe_resource_reference(&nvfx->idxbuf.buffer, 0);
		nvfx->idxbuf.index_size = 0;
		nvfx->idxbuf.offset = 0;
	}

	nvfx->dirty |= NVFX_NEW_INDEX;
	nvfx->draw_dirty |= NVFX_NEW_INDEX;
}
Beispiel #8
0
static void
nvfx_set_fragment_sampler_views(struct pipe_context *pipe,
				unsigned nr,
				struct pipe_sampler_view **views)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	unsigned unit;

	for (unit = 0; unit < nr; unit++) {
		pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
                                            views[unit]);
		nvfx->dirty_samplers |= (1 << unit);
	}

	for (unit = nr; unit < nvfx->nr_textures; unit++) {
		pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit],
                                            NULL);
		nvfx->dirty_samplers |= (1 << unit);
	}

	nvfx->nr_textures = nr;
	nvfx->dirty |= NVFX_NEW_SAMPLER;
}
Beispiel #9
0
static struct pipe_sampler_view *
nvfx_create_sampler_view(struct pipe_context *pipe,
			 struct pipe_resource *pt,
			 const struct pipe_sampler_view *templ)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	struct nvfx_sampler_view *sv = CALLOC_STRUCT(nvfx_sampler_view);
	struct nvfx_texture_format *tf = &nvfx_texture_formats[templ->format];
	unsigned txf;

	if (!sv)
		return NULL;

	sv->base = *templ;
	sv->base.reference.count = 1;
	sv->base.texture = NULL;
	pipe_resource_reference(&sv->base.texture, pt);
	sv->base.context = pipe;

	txf = NV30_3D_TEX_FORMAT_NO_BORDER;

	switch (pt->target) {
	case PIPE_TEXTURE_CUBE:
		txf |= NV30_3D_TEX_FORMAT_CUBIC;
		/* fall-through */
	case PIPE_TEXTURE_2D:
	case PIPE_TEXTURE_RECT:
		txf |= NV30_3D_TEX_FORMAT_DIMS_2D;
		break;
	case PIPE_TEXTURE_3D:
		txf |= NV30_3D_TEX_FORMAT_DIMS_3D;
		break;
	case PIPE_TEXTURE_1D:
		txf |= NV30_3D_TEX_FORMAT_DIMS_1D;
		break;
	default:
		assert(0);
	}
	sv->u.init_fmt = txf;

	sv->swizzle = 0
			| (tf->src[sv->base.swizzle_r] << NV30_3D_TEX_SWIZZLE_S0_Z__SHIFT)
			| (tf->src[sv->base.swizzle_g] << NV30_3D_TEX_SWIZZLE_S0_Y__SHIFT)
			| (tf->src[sv->base.swizzle_b] << NV30_3D_TEX_SWIZZLE_S0_X__SHIFT)
			| (tf->src[sv->base.swizzle_a] << NV30_3D_TEX_SWIZZLE_S0_W__SHIFT)
			| (tf->comp[sv->base.swizzle_r] << NV30_3D_TEX_SWIZZLE_S1_Z__SHIFT)
			| (tf->comp[sv->base.swizzle_g] << NV30_3D_TEX_SWIZZLE_S1_Y__SHIFT)
			| (tf->comp[sv->base.swizzle_b] << NV30_3D_TEX_SWIZZLE_S1_X__SHIFT)
			| (tf->comp[sv->base.swizzle_a] << NV30_3D_TEX_SWIZZLE_S1_W__SHIFT);

	sv->filt = tf->sign;
	sv->wrap = tf->wrap;
	sv->wrap_mask = ~0;

	if (pt->target == PIPE_TEXTURE_CUBE)
	{
		sv->offset = 0;
		sv->npot_size = (pt->width0 << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | pt->height0;
	}
	else
	{
		sv->offset = nvfx_subresource_offset(pt, 0, sv->base.u.tex.first_level, 0);
		sv->npot_size = (u_minify(pt->width0, sv->base.u.tex.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.u.tex.first_level);

		/* apparently, we need to ignore the t coordinate for 1D textures to fix piglit tex1d-2dborder */
		if(pt->target == PIPE_TEXTURE_1D)
		{
			sv->wrap_mask &=~ NV30_3D_TEX_WRAP_T__MASK;
			sv->wrap |= NV30_3D_TEX_WRAP_T_REPEAT;
		}
	}

	if(nvfx->is_nv4x)
		nv40_sampler_view_init(pipe, sv);
	else
		nv30_sampler_view_init(pipe, sv);

	return &sv->base;
}
void
nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
	struct nvfx_context *nvfx = nvfx_context(pipe);
	struct nouveau_channel *chan = nvfx->screen->base.channel;
	struct push_context ctx;
	struct util_split_prim s;
	unsigned instances_left = info->instance_count;
	int vtx_value;
	unsigned hw_mode = nvgl_primitive(info->mode);
	int i;
	struct
	{
		uint8_t* map;
		unsigned step;
	} per_instance[16];
	unsigned p_overhead = 64 /* magic fix */
			+ 4 /* begin/end */
			+ 4; /* potential edgeflag enable/disable */

	ctx.chan = nvfx->screen->base.channel;
	ctx.translate = nvfx->vtxelt->translate;
	ctx.idxbuf = NULL;
	ctx.vertex_length = nvfx->vtxelt->vertex_length;
	ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet;
	ctx.edgeflag = 0.5f;
	// TODO: figure out if we really want to handle this, and do so in that case
	ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in;

	if(!nvfx->use_vertex_buffers)
	{
		for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
		{
			struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
			struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
			uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset;
			if(info->indexed)
				data += info->index_bias * vb->stride;
			ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
		}

		if(ctx.edgeflag_attr < 16)
			vtx_value = -(ctx.vertex_length + 3);  /* vertex data and edgeflag header and value */
		else
		{
			p_overhead += 1; /* initial vertex_data header */
			vtx_value = -ctx.vertex_length;  /* vertex data and edgeflag header and value */
		}

		if (info->indexed) {
			// XXX: this case and is broken and probably need a new VTX_ATTR push path
			if (nvfx->idxbuf.index_size == 1)
				s.emit = emit_vertices_lookup8;
			else if (nvfx->idxbuf.index_size == 2)
				s.emit = emit_vertices_lookup16;
			else
				s.emit = emit_vertices_lookup32;
		} else
			s.emit = emit_vertices;
	}
	else
	{
		if(!info->indexed || nvfx->use_index_buffer)
		{
			s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges;
			p_overhead += 3;
			vtx_value = 0;
		}
		else if (nvfx->idxbuf.index_size == 4)
		{
			s.emit = emit_elt32;
			p_overhead += 1;
			vtx_value = 8;
		}
		else
		{
			s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8;
			p_overhead += 3;
			vtx_value = 7;
		}
	}

	ctx.idxbias = info->index_bias;
	if(nvfx->use_vertex_buffers)
		ctx.idxbias -= nvfx->base_vertex;

	/* map index buffer, if present */
	if (info->indexed && !nvfx->use_index_buffer)
		ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset;

	s.priv = &ctx;
	s.edge = emit_edgeflag;

	for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i)
	{
		struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i];
		struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index];
		float v[4];
		per_instance[i].step = info->start_instance % ve->instance_divisor;
		per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset;

		nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0);

		WAIT_RING(chan, 5);
		nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp);
	}

	/* per-instance loop */
	while (instances_left--) {
		int max_verts;
		boolean done;

		util_split_prim_init(&s, info->mode, info->start, info->count);
		nvfx_state_emit(nvfx);
		for(;;) {
			max_verts  = AVAIL_RING(chan);
			max_verts -= p_overhead;

			/* if vtx_value < 0, each vertex is -vtx_value words long
			 * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation)
			 */
			if(vtx_value < 0)
			{
				max_verts /= -vtx_value;
				max_verts -= (max_verts >> 10); /* vertex data headers */
			}
			else
			{
				if(max_verts >= (1 << 23)) /* avoid overflow here */
					max_verts = (1 << 23);
				max_verts = (max_verts * 255) >> vtx_value;
			}

			//printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts);

			if(max_verts >= 16)
			{
				/* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */
				/* this seems to cause issues on nv3x, and also be unneeded there */
				if(nvfx->is_nv4x)
				{
					int i;
					for(i = 0; i < 32; ++i)
					{
						OUT_RING(chan, RING_3D(0x1dac, 1));
						OUT_RING(chan, 0);
					}
				}

				OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
				OUT_RING(chan, hw_mode);
				done = util_split_prim_next(&s, max_verts);
				OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
				OUT_RING(chan, 0);

				if(done)
					break;
			}

			FIRE_RING(chan);
			nvfx_state_emit(nvfx);
		}
static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
	struct nvfx_context* nvfx = nvfx_context(pipe);
	unsigned hardware_cost = 0;
	unsigned inline_cost = 0;
	unsigned unique_vertices;
	unsigned upload_mode;
	float best_index_cost_for_hardware_vertices_as_inline_cost;
	boolean prefer_hardware_indices;
	unsigned index_inline_cost;
	unsigned index_hardware_cost;
	if (info->indexed)
		unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
	else
		unique_vertices = info->count;

	/* Here we try to figure out if we are better off writing vertex data directly on the FIFO,
	 * or create hardware buffer objects and pointing the hardware to them.
	 *
	 * This is done by computing the total memcpy cost of each option, ignoring uploads
	 * if we think that the buffer is static and thus the upload cost will be amortized over
	 * future draw calls.
	 *
	 * For instance, if everything looks static, we will always create buffer objects, while if
	 * everything is a user buffer and we are not doing indexed drawing, we never do.
	 *
	 * Other interesting cases are where a small user vertex buffer, but a huge user index buffer,
	 * where we will upload the vertex buffer, so that we can use hardware index lookup, and
	 * the opposite case, where we instead do index lookup in software to avoid uploading
	 * a huge amount of vertex data that is not going to be used.
	 *
	 * Otherwise, we generally move to the GPU the after it has been pushed
	 * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having
	 * been updated with a transfer (or just the buffer having been destroyed).
	 *
	 * There is no special handling for user buffers, since applications can use
	 * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this
	 * by the way.
	 *
	 * Note that currently we don't support only putting some data on the FIFO, and
	 * some on vertex buffers (constant and instanced data is independent from this).
	 *
	 * nVidia doesn't seem to do this either, even though it should be at least
	 * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed.
	 */

	for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++)
	{
		struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
		struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
		struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
		buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices;
		if (!nvfx_buffer_seems_static(buffer))
		{
			hardware_cost += buffer->dirty_end - buffer->dirty_begin;
			if (!buffer->base.bo)
				hardware_cost += nvfx->screen->buffer_allocation_cost;
		}
		inline_cost += vbi->per_vertex_size * info->count;
	}

	best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
	prefer_hardware_indices = FALSE;
	index_inline_cost = 0;
	index_hardware_cost = 0;

	if (info->indexed)
	{
		index_inline_cost = nvfx->idxbuf.index_size * info->count;
		if (nvfx->screen->index_buffer_reloc_flags
			&& (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4)
			&& !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1)))
		{
			struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer);
			buffer->bytes_to_draw_until_static -= index_inline_cost;

			prefer_hardware_indices = TRUE;

			if (!nvfx_buffer_seems_static(buffer))
			{
				index_hardware_cost = buffer->dirty_end - buffer->dirty_begin;
				if (!buffer->base.bo)
					index_hardware_cost += nvfx->screen->buffer_allocation_cost;
			}

			if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost)
			{
				best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost;
			}
			else
			{
				best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost;
				prefer_hardware_indices = TRUE;
			}
		}
	}

	/* let's finally figure out which of the 3 paths we want to take */
	if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost))
		upload_mode = 1 + prefer_hardware_indices;
	else
		upload_mode = 0;

#ifdef DEBUG
        if (unlikely(nvfx->screen->trace_draw))
          {
                  fprintf(stderr, "DRAW");
                  if (info->indexed)
                  {
                          fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size);
                          if (info->index_bias)
                                  fprintf(stderr, " biased %u", info->index_bias);
                          fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index);
                  }
                  if (info->instance_count > 1)
                          fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed);
                  fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode);
                  if (!upload_mode)
                          fprintf(stderr, " -> inline vertex data");
                  else if (upload_mode == 2 || !info->indexed)
                          fprintf(stderr, " -> buffer range");
                  else
                          fprintf(stderr, " -> inline indices");
                  fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost);
                  for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i)
                  {
                          struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i];
                          struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index];
                          struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
                          if (i)
                                  fprintf(stderr, ", ");
                          fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static);
                  }
                  fprintf(stderr, ">\n");
          }
#endif

	return upload_mode;
}