static void * nvfx_sampler_state_create(struct pipe_context *pipe, const struct pipe_sampler_state *cso) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_sampler_state *ps; ps = MALLOC(sizeof(struct nvfx_sampler_state)); /* on nv30, we use this as an internal flag */ ps->fmt = cso->normalized_coords ? 0 : NV40_3D_TEX_FORMAT_RECT; ps->en = 0; ps->filt = nvfx_tex_filter(cso) | 0x2000; /*voodoo*/ ps->wrap = (nvfx_tex_wrap_mode(cso->wrap_s) << NV30_3D_TEX_WRAP_S__SHIFT) | (nvfx_tex_wrap_mode(cso->wrap_t) << NV30_3D_TEX_WRAP_T__SHIFT) | (nvfx_tex_wrap_mode(cso->wrap_r) << NV30_3D_TEX_WRAP_R__SHIFT); ps->compare = FALSE; if(cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { ps->wrap |= nvfx_tex_wrap_compare_mode(cso->compare_func); ps->compare = TRUE; } ps->bcol = nvfx_tex_border_color(cso->border_color.f); if(nvfx->is_nv4x) nv40_sampler_state_init(pipe, ps, cso); else nv30_sampler_state_init(pipe, ps, cso); return (void *)ps; }
void nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info) { struct nvfx_context *nvfx = nvfx_context(pipe); unsigned i; void *map; if (!nvfx_state_validate_swtnl(nvfx)) return; nvfx_state_emit(nvfx); /* these must be passed without adding the offsets */ for (i = 0; i < nvfx->vtxbuf_nr; i++) { map = nvfx_buffer(nvfx->vtxbuf[i].buffer)->data; draw_set_mapped_vertex_buffer(nvfx->draw, i, map); } map = NULL; if (info->indexed && nvfx->idxbuf.buffer) map = nvfx_buffer(nvfx->idxbuf.buffer)->data; draw_set_mapped_index_buffer(nvfx->draw, map); if (nvfx->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nvfx->constbuf_nr[PIPE_SHADER_VERTEX]; map = nvfx_buffer(nvfx->constbuf[PIPE_SHADER_VERTEX])->data; draw_set_mapped_constant_buffer(nvfx->draw, PIPE_SHADER_VERTEX, 0, map, nr); } draw_vbo(nvfx->draw, info); draw_flush(nvfx->draw); }
static void nvfx_vtxelts_state_bind(struct pipe_context *pipe, void *hwcso) { struct nvfx_context *nvfx = nvfx_context(pipe); nvfx->vtxelt = hwcso; nvfx->use_vertex_buffers = -1; nvfx->draw_dirty |= NVFX_NEW_ARRAYS; }
void nvfx_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); unsigned upload_mode = 0; if (!nvfx->vtxelt->needs_translate) upload_mode = nvfx_decide_upload_mode(pipe, info); nvfx->use_index_buffer = upload_mode > 1; if ((upload_mode > 0) != nvfx->use_vertex_buffers) { nvfx->use_vertex_buffers = (upload_mode > 0); nvfx->dirty |= NVFX_NEW_ARRAYS; nvfx->draw_dirty |= NVFX_NEW_ARRAYS; } if (upload_mode > 0) { for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) { struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; nvfx_buffer_upload(nvfx_buffer(vb->buffer)); } if (upload_mode > 1) { nvfx_buffer_upload(nvfx_buffer(nvfx->idxbuf.buffer)); if (unlikely(info->index_bias != nvfx->base_vertex)) { nvfx->base_vertex = info->index_bias; nvfx->dirty |= NVFX_NEW_ARRAYS; } } else { if (unlikely(info->start < nvfx->base_vertex && nvfx->base_vertex)) { nvfx->base_vertex = 0; nvfx->dirty |= NVFX_NEW_ARRAYS; } } } if (nvfx->screen->force_swtnl || !nvfx_state_validate(nvfx)) nvfx_draw_vbo_swtnl(pipe, info); else nvfx_push_vbo(pipe, info); }
static void nvfx_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) { struct nvfx_context *nvfx = nvfx_context(pipe); unsigned unit; for (unit = 0; unit < nr; unit++) { nvfx->tex_sampler[unit] = sampler[unit]; nvfx->dirty_samplers |= (1 << unit); } for (unit = nr; unit < nvfx->nr_samplers; unit++) { nvfx->tex_sampler[unit] = NULL; nvfx->dirty_samplers |= (1 << unit); } nvfx->nr_samplers = nr; nvfx->dirty |= NVFX_NEW_SAMPLER; }
static void nvfx_set_vertex_buffers(struct pipe_context *pipe, unsigned count, const struct pipe_vertex_buffer *vb) { struct nvfx_context *nvfx = nvfx_context(pipe); for(unsigned i = 0; i < count; ++i) { pipe_resource_reference(&nvfx->vtxbuf[i].buffer, vb[i].buffer); nvfx->vtxbuf[i].buffer_offset = vb[i].buffer_offset; nvfx->vtxbuf[i].max_index = vb[i].max_index; nvfx->vtxbuf[i].stride = vb[i].stride; } for(unsigned i = count; i < nvfx->vtxbuf_nr; ++i) pipe_resource_reference(&nvfx->vtxbuf[i].buffer, 0); nvfx->vtxbuf_nr = count; nvfx->use_vertex_buffers = -1; nvfx->draw_dirty |= NVFX_NEW_ARRAYS; }
static void nvfx_set_index_buffer(struct pipe_context *pipe, const struct pipe_index_buffer *ib) { struct nvfx_context *nvfx = nvfx_context(pipe); if(ib) { pipe_resource_reference(&nvfx->idxbuf.buffer, ib->buffer); nvfx->idxbuf.index_size = ib->index_size; nvfx->idxbuf.offset = ib->offset; } else { pipe_resource_reference(&nvfx->idxbuf.buffer, 0); nvfx->idxbuf.index_size = 0; nvfx->idxbuf.offset = 0; } nvfx->dirty |= NVFX_NEW_INDEX; nvfx->draw_dirty |= NVFX_NEW_INDEX; }
static void nvfx_set_fragment_sampler_views(struct pipe_context *pipe, unsigned nr, struct pipe_sampler_view **views) { struct nvfx_context *nvfx = nvfx_context(pipe); unsigned unit; for (unit = 0; unit < nr; unit++) { pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit], views[unit]); nvfx->dirty_samplers |= (1 << unit); } for (unit = nr; unit < nvfx->nr_textures; unit++) { pipe_sampler_view_reference(&nvfx->fragment_sampler_views[unit], NULL); nvfx->dirty_samplers |= (1 << unit); } nvfx->nr_textures = nr; nvfx->dirty |= NVFX_NEW_SAMPLER; }
static struct pipe_sampler_view * nvfx_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *pt, const struct pipe_sampler_view *templ) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nvfx_sampler_view *sv = CALLOC_STRUCT(nvfx_sampler_view); struct nvfx_texture_format *tf = &nvfx_texture_formats[templ->format]; unsigned txf; if (!sv) return NULL; sv->base = *templ; sv->base.reference.count = 1; sv->base.texture = NULL; pipe_resource_reference(&sv->base.texture, pt); sv->base.context = pipe; txf = NV30_3D_TEX_FORMAT_NO_BORDER; switch (pt->target) { case PIPE_TEXTURE_CUBE: txf |= NV30_3D_TEX_FORMAT_CUBIC; /* fall-through */ case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: txf |= NV30_3D_TEX_FORMAT_DIMS_2D; break; case PIPE_TEXTURE_3D: txf |= NV30_3D_TEX_FORMAT_DIMS_3D; break; case PIPE_TEXTURE_1D: txf |= NV30_3D_TEX_FORMAT_DIMS_1D; break; default: assert(0); } sv->u.init_fmt = txf; sv->swizzle = 0 | (tf->src[sv->base.swizzle_r] << NV30_3D_TEX_SWIZZLE_S0_Z__SHIFT) | (tf->src[sv->base.swizzle_g] << NV30_3D_TEX_SWIZZLE_S0_Y__SHIFT) | (tf->src[sv->base.swizzle_b] << NV30_3D_TEX_SWIZZLE_S0_X__SHIFT) | (tf->src[sv->base.swizzle_a] << NV30_3D_TEX_SWIZZLE_S0_W__SHIFT) | (tf->comp[sv->base.swizzle_r] << NV30_3D_TEX_SWIZZLE_S1_Z__SHIFT) | (tf->comp[sv->base.swizzle_g] << NV30_3D_TEX_SWIZZLE_S1_Y__SHIFT) | (tf->comp[sv->base.swizzle_b] << NV30_3D_TEX_SWIZZLE_S1_X__SHIFT) | (tf->comp[sv->base.swizzle_a] << NV30_3D_TEX_SWIZZLE_S1_W__SHIFT); sv->filt = tf->sign; sv->wrap = tf->wrap; sv->wrap_mask = ~0; if (pt->target == PIPE_TEXTURE_CUBE) { sv->offset = 0; sv->npot_size = (pt->width0 << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | pt->height0; } else { sv->offset = nvfx_subresource_offset(pt, 0, sv->base.u.tex.first_level, 0); sv->npot_size = (u_minify(pt->width0, sv->base.u.tex.first_level) << NV30_3D_TEX_NPOT_SIZE_W__SHIFT) | u_minify(pt->height0, sv->base.u.tex.first_level); /* apparently, we need to ignore the t coordinate for 1D textures to fix piglit tex1d-2dborder */ if(pt->target == PIPE_TEXTURE_1D) { sv->wrap_mask &=~ NV30_3D_TEX_WRAP_T__MASK; sv->wrap |= NV30_3D_TEX_WRAP_T_REPEAT; } } if(nvfx->is_nv4x) nv40_sampler_view_init(pipe, sv); else nv30_sampler_view_init(pipe, sv); return &sv->base; }
void nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context *nvfx = nvfx_context(pipe); struct nouveau_channel *chan = nvfx->screen->base.channel; struct push_context ctx; struct util_split_prim s; unsigned instances_left = info->instance_count; int vtx_value; unsigned hw_mode = nvgl_primitive(info->mode); int i; struct { uint8_t* map; unsigned step; } per_instance[16]; unsigned p_overhead = 64 /* magic fix */ + 4 /* begin/end */ + 4; /* potential edgeflag enable/disable */ ctx.chan = nvfx->screen->base.channel; ctx.translate = nvfx->vtxelt->translate; ctx.idxbuf = NULL; ctx.vertex_length = nvfx->vtxelt->vertex_length; ctx.max_vertices_per_packet = nvfx->vtxelt->max_vertices_per_packet; ctx.edgeflag = 0.5f; // TODO: figure out if we really want to handle this, and do so in that case ctx.edgeflag_attr = 0xff; // nvfx->vertprog->cfg.edgeflag_in; if(!nvfx->use_vertex_buffers) { for(i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) { struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; uint8_t* data = nvfx_buffer(vb->buffer)->data + vb->buffer_offset; if(info->indexed) data += info->index_bias * vb->stride; ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0); } if(ctx.edgeflag_attr < 16) vtx_value = -(ctx.vertex_length + 3); /* vertex data and edgeflag header and value */ else { p_overhead += 1; /* initial vertex_data header */ vtx_value = -ctx.vertex_length; /* vertex data and edgeflag header and value */ } if (info->indexed) { // XXX: this case and is broken and probably need a new VTX_ATTR push path if (nvfx->idxbuf.index_size == 1) s.emit = emit_vertices_lookup8; else if (nvfx->idxbuf.index_size == 2) s.emit = emit_vertices_lookup16; else s.emit = emit_vertices_lookup32; } else s.emit = emit_vertices; } else { if(!info->indexed || nvfx->use_index_buffer) { s.emit = info->indexed ? emit_ib_ranges : emit_vb_ranges; p_overhead += 3; vtx_value = 0; } else if (nvfx->idxbuf.index_size == 4) { s.emit = emit_elt32; p_overhead += 1; vtx_value = 8; } else { s.emit = (nvfx->idxbuf.index_size == 2) ? emit_elt16 : emit_elt8; p_overhead += 3; vtx_value = 7; } } ctx.idxbias = info->index_bias; if(nvfx->use_vertex_buffers) ctx.idxbias -= nvfx->base_vertex; /* map index buffer, if present */ if (info->indexed && !nvfx->use_index_buffer) ctx.idxbuf = nvfx_buffer(nvfx->idxbuf.buffer)->data + nvfx->idxbuf.offset; s.priv = &ctx; s.edge = emit_edgeflag; for (i = 0; i < nvfx->vtxelt->num_per_instance; ++i) { struct nvfx_per_instance_element *ve = &nvfx->vtxelt->per_instance[i]; struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[ve->base.vertex_buffer_index]; float v[4]; per_instance[i].step = info->start_instance % ve->instance_divisor; per_instance[i].map = nvfx_buffer(vb->buffer)->data + vb->buffer_offset + ve->base.src_offset; nvfx->vtxelt->per_instance[i].base.fetch_rgba_float(v, per_instance[i].map, 0, 0); WAIT_RING(chan, 5); nvfx_emit_vtx_attr(chan, nvfx->vtxelt->per_instance[i].base.idx, v, nvfx->vtxelt->per_instance[i].base.ncomp); } /* per-instance loop */ while (instances_left--) { int max_verts; boolean done; util_split_prim_init(&s, info->mode, info->start, info->count); nvfx_state_emit(nvfx); for(;;) { max_verts = AVAIL_RING(chan); max_verts -= p_overhead; /* if vtx_value < 0, each vertex is -vtx_value words long * otherwise, each vertex is 2^(vtx_value) / 255 words long (this is an approximation) */ if(vtx_value < 0) { max_verts /= -vtx_value; max_verts -= (max_verts >> 10); /* vertex data headers */ } else { if(max_verts >= (1 << 23)) /* avoid overflow here */ max_verts = (1 << 23); max_verts = (max_verts * 255) >> vtx_value; } //printf("avail %u max_verts %u\n", AVAIL_RING(chan), max_verts); if(max_verts >= 16) { /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */ /* this seems to cause issues on nv3x, and also be unneeded there */ if(nvfx->is_nv4x) { int i; for(i = 0; i < 32; ++i) { OUT_RING(chan, RING_3D(0x1dac, 1)); OUT_RING(chan, 0); } } OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); OUT_RING(chan, hw_mode); done = util_split_prim_next(&s, max_verts); OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1)); OUT_RING(chan, 0); if(done) break; } FIRE_RING(chan); nvfx_state_emit(nvfx); }
static unsigned nvfx_decide_upload_mode(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nvfx_context* nvfx = nvfx_context(pipe); unsigned hardware_cost = 0; unsigned inline_cost = 0; unsigned unique_vertices; unsigned upload_mode; float best_index_cost_for_hardware_vertices_as_inline_cost; boolean prefer_hardware_indices; unsigned index_inline_cost; unsigned index_hardware_cost; if (info->indexed) unique_vertices = util_guess_unique_indices_count(info->mode, info->count); else unique_vertices = info->count; /* Here we try to figure out if we are better off writing vertex data directly on the FIFO, * or create hardware buffer objects and pointing the hardware to them. * * This is done by computing the total memcpy cost of each option, ignoring uploads * if we think that the buffer is static and thus the upload cost will be amortized over * future draw calls. * * For instance, if everything looks static, we will always create buffer objects, while if * everything is a user buffer and we are not doing indexed drawing, we never do. * * Other interesting cases are where a small user vertex buffer, but a huge user index buffer, * where we will upload the vertex buffer, so that we can use hardware index lookup, and * the opposite case, where we instead do index lookup in software to avoid uploading * a huge amount of vertex data that is not going to be used. * * Otherwise, we generally move to the GPU the after it has been pushed * NVFX_STATIC_BUFFER_MIN_REUSE_TIMES times to the GPU without having * been updated with a transfer (or just the buffer having been destroyed). * * There is no special handling for user buffers, since applications can use * OpenGL VBOs in a one-shot fashion. OpenGL 3/4 core profile forces this * by the way. * * Note that currently we don't support only putting some data on the FIFO, and * some on vertex buffers (constant and instanced data is independent from this). * * nVidia doesn't seem to do this either, even though it should be at least * doable with VTX_ATTR and possibly with VERTEX_DATA too if not indexed. */ for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; i++) { struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); buffer->bytes_to_draw_until_static -= vbi->per_vertex_size * unique_vertices; if (!nvfx_buffer_seems_static(buffer)) { hardware_cost += buffer->dirty_end - buffer->dirty_begin; if (!buffer->base.bo) hardware_cost += nvfx->screen->buffer_allocation_cost; } inline_cost += vbi->per_vertex_size * info->count; } best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f; prefer_hardware_indices = FALSE; index_inline_cost = 0; index_hardware_cost = 0; if (info->indexed) { index_inline_cost = nvfx->idxbuf.index_size * info->count; if (nvfx->screen->index_buffer_reloc_flags && (nvfx->idxbuf.index_size == 2 || nvfx->idxbuf.index_size == 4) && !(nvfx->idxbuf.offset & (nvfx->idxbuf.index_size - 1))) { struct nvfx_buffer* buffer = nvfx_buffer(nvfx->idxbuf.buffer); buffer->bytes_to_draw_until_static -= index_inline_cost; prefer_hardware_indices = TRUE; if (!nvfx_buffer_seems_static(buffer)) { index_hardware_cost = buffer->dirty_end - buffer->dirty_begin; if (!buffer->base.bo) index_hardware_cost += nvfx->screen->buffer_allocation_cost; } if ((float) index_inline_cost < (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost) { best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_inline_cost; } else { best_index_cost_for_hardware_vertices_as_inline_cost = (float) index_hardware_cost * nvfx->screen->inline_cost_per_hardware_cost; prefer_hardware_indices = TRUE; } } } /* let's finally figure out which of the 3 paths we want to take */ if ((float) (inline_cost + index_inline_cost) > ((float) hardware_cost * nvfx->screen->inline_cost_per_hardware_cost + best_index_cost_for_hardware_vertices_as_inline_cost)) upload_mode = 1 + prefer_hardware_indices; else upload_mode = 0; #ifdef DEBUG if (unlikely(nvfx->screen->trace_draw)) { fprintf(stderr, "DRAW"); if (info->indexed) { fprintf(stderr, "_IDX%u", nvfx->idxbuf.index_size); if (info->index_bias) fprintf(stderr, " biased %u", info->index_bias); fprintf(stderr, " idxrange %u -> %u", info->min_index, info->max_index); } if (info->instance_count > 1) fprintf(stderr, " %u instances from %u", info->instance_count, info->indexed); fprintf(stderr, " start %u count %u prim %u", info->start, info->count, info->mode); if (!upload_mode) fprintf(stderr, " -> inline vertex data"); else if (upload_mode == 2 || !info->indexed) fprintf(stderr, " -> buffer range"); else fprintf(stderr, " -> inline indices"); fprintf(stderr, " [ivtx %u hvtx %u iidx %u hidx %u bidx %f] <", inline_cost, hardware_cost, index_inline_cost, index_hardware_cost, best_index_cost_for_hardware_vertices_as_inline_cost); for (unsigned i = 0; i < nvfx->vtxelt->num_per_vertex_buffer_infos; ++i) { struct nvfx_per_vertex_buffer_info* vbi = &nvfx->vtxelt->per_vertex_buffer_info[i]; struct pipe_vertex_buffer *vb = &nvfx->vtxbuf[vbi->vertex_buffer_index]; struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer); if (i) fprintf(stderr, ", "); fprintf(stderr, "%p%s left %Li", buffer, buffer->last_update_static ? " static" : "", buffer->bytes_to_draw_until_static); } fprintf(stderr, ">\n"); } #endif return upload_mode; }