static INLINE uint32_t nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve) { uint32_t hw_type, hw_size; enum pipe_format pf = ve->src_format; const struct util_format_description *desc; unsigned size, nr_components; desc = util_format_description(pf); assert(desc); size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0); nr_components = util_format_get_nr_components(pf); hw_type = nv50_vbo_type_to_hw(pf); hw_size = nv50_vbo_size_to_hw(size, nr_components); if (!hw_type || !hw_size) { NOUVEAU_ERR("unsupported vbo format: %s\n", util_format_name(pf)); abort(); return 0x24e80000; } if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */ hw_size |= (1 << 31); /* no real swizzle bits :-( */ return (hw_type | hw_size); }
static void * nv30_vertex_state_create(struct pipe_context *pipe, unsigned num_elements, const struct pipe_vertex_element *elements) { struct nv30_vertex_stateobj *so; struct translate_key transkey; unsigned i; assert(num_elements); so = MALLOC(sizeof(*so) + sizeof(*so->element) * num_elements); if (!so) return NULL; memcpy(so->pipe, elements, sizeof(*elements) * num_elements); so->num_elements = num_elements; so->need_conversion = FALSE; transkey.nr_elements = 0; transkey.output_stride = 0; for (i = 0; i < num_elements; i++) { const struct pipe_vertex_element *ve = &elements[i]; const unsigned vbi = ve->vertex_buffer_index; enum pipe_format fmt = ve->src_format; so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw; if (!so->element[i].state) { switch (util_format_get_nr_components(fmt)) { case 1: fmt = PIPE_FORMAT_R32_FLOAT; break; case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break; case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break; case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break; default: assert(0); return NULL; } so->element[i].state = nv30_vtxfmt(pipe->screen, fmt)->hw; so->need_conversion = TRUE; } if (1) { unsigned j = transkey.nr_elements++; transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL; transkey.element[j].input_format = ve->src_format; transkey.element[j].input_buffer = vbi; transkey.element[j].input_offset = ve->src_offset; transkey.element[j].instance_divisor = ve->instance_divisor; transkey.element[j].output_format = fmt; transkey.element[j].output_offset = transkey.output_stride; transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3; } } so->translate = translate_create(&transkey); so->vtx_size = transkey.output_stride / 4; so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1); return so; }
static void nv30_emit_vtxattr(struct nv30_context *nv30, struct pipe_vertex_buffer *vb, struct pipe_vertex_element *ve, unsigned attr) { const unsigned nc = util_format_get_nr_components(ve->src_format); struct nouveau_pushbuf *push = nv30->base.pushbuf; struct nv04_resource *res = nv04_resource(vb->buffer); const struct util_format_description *desc = util_format_description(ve->src_format); const void *data; float v[4]; data = nouveau_resource_map_offset(&nv30->base, res, vb->buffer_offset + ve->src_offset, NOUVEAU_BO_RD); desc->unpack_rgba_float(v, 0, data, 0, 1, 1); switch (nc) { case 4: BEGIN_NV04(push, NV30_3D(VTX_ATTR_4F(attr)), 4); PUSH_DATAf(push, v[0]); PUSH_DATAf(push, v[1]); PUSH_DATAf(push, v[2]); PUSH_DATAf(push, v[3]); break; case 3: BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(attr)), 3); PUSH_DATAf(push, v[0]); PUSH_DATAf(push, v[1]); PUSH_DATAf(push, v[2]); break; case 2: BEGIN_NV04(push, NV30_3D(VTX_ATTR_2F(attr)), 2); PUSH_DATAf(push, v[0]); PUSH_DATAf(push, v[1]); break; case 1: BEGIN_NV04(push, NV30_3D(VTX_ATTR_1F(attr)), 1); PUSH_DATAf(push, v[0]); break; default: assert(0); break; } }
/** * Given a user-specified texture base format, the actual gallium texture * format and the current GL_DEPTH_MODE, return a texture swizzle. * * Consider the case where the user requests a GL_RGB internal texture * format the driver actually uses an RGBA format. The A component should * be ignored and sampling from the texture should always return (r,g,b,1). * But if we rendered to the texture we might have written A values != 1. * By sampling the texture with a ".xyz1" swizzle we'll get the expected A=1. * This function computes the texture swizzle needed to get the expected * values. * * In the case of depth textures, the GL_DEPTH_MODE state determines the * texture swizzle. * * This result must be composed with the user-specified swizzle to get * the final swizzle. */ static unsigned compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, enum pipe_format actualFormat) { switch (baseFormat) { case GL_RGBA: return SWIZZLE_XYZW; case GL_RGB: if (util_format_has_alpha(actualFormat)) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_RG: if (util_format_get_nr_components(actualFormat) > 2) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_RED: if (util_format_get_nr_components(actualFormat) > 1) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_ALPHA: if (util_format_get_nr_components(actualFormat) > 1) return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_W); else return SWIZZLE_XYZW; case GL_LUMINANCE: if (util_format_get_nr_components(actualFormat) > 1) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_LUMINANCE_ALPHA: if (util_format_get_nr_components(actualFormat) > 2) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_W); else return SWIZZLE_XYZW; case GL_INTENSITY: if (util_format_get_nr_components(actualFormat) > 1) return SWIZZLE_XXXX; else return SWIZZLE_XYZW; case GL_STENCIL_INDEX: return SWIZZLE_XYZW; case GL_DEPTH_STENCIL: /* fall-through */ case GL_DEPTH_COMPONENT: /* Now examine the depth mode */ switch (depthMode) { case GL_LUMINANCE: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); case GL_INTENSITY: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); case GL_ALPHA: return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_X); case GL_RED: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); default: assert(!"Unexpected depthMode"); return SWIZZLE_XYZW; } default: assert(!"Unexpected baseFormat"); return SWIZZLE_XYZW; } }
void vl_deint_filter_render(struct vl_deint_filter *filter, struct pipe_video_buffer *prevprev, struct pipe_video_buffer *prev, struct pipe_video_buffer *cur, struct pipe_video_buffer *next, unsigned field) { struct pipe_viewport_state viewport; struct pipe_framebuffer_state fb_state; struct pipe_sampler_view **cur_sv; struct pipe_sampler_view **prevprev_sv; struct pipe_sampler_view **prev_sv; struct pipe_sampler_view **next_sv; struct pipe_sampler_view *sampler_views[4]; struct pipe_surface **dst_surfaces; const unsigned *plane_order; int i; unsigned j; assert(filter && prevprev && prev && cur && next && field <= 1); /* set up destination and source */ dst_surfaces = filter->video_buffer->get_surfaces(filter->video_buffer); plane_order = vl_video_buffer_plane_order(filter->video_buffer->buffer_format); cur_sv = cur->get_sampler_view_components(cur); prevprev_sv = prevprev->get_sampler_view_components(prevprev); prev_sv = prev->get_sampler_view_components(prev); next_sv = next->get_sampler_view_components(next); /* set up pipe state */ filter->pipe->bind_rasterizer_state(filter->pipe, filter->rs_state); filter->pipe->set_vertex_buffers(filter->pipe, 0, 1, &filter->quad); filter->pipe->bind_vertex_elements_state(filter->pipe, filter->ves); filter->pipe->bind_vs_state(filter->pipe, filter->vs); filter->pipe->bind_sampler_states(filter->pipe, PIPE_SHADER_FRAGMENT, 0, 4, filter->sampler); /* prepare viewport */ memset(&viewport, 0, sizeof(viewport)); viewport.scale[2] = 1; /* prepare framebuffer */ memset(&fb_state, 0, sizeof(fb_state)); fb_state.nr_cbufs = 1; /* process each plane separately */ for (i = 0, j = 0; i < VL_NUM_COMPONENTS; ++i) { struct pipe_surface *blit_surf = dst_surfaces[field]; struct pipe_surface *dst_surf = dst_surfaces[1 - field]; int k = plane_order[i]; /* bind blend state for this component in the plane */ filter->pipe->bind_blend_state(filter->pipe, filter->blend[j]); /* update render target state */ viewport.scale[0] = blit_surf->texture->width0; viewport.scale[1] = blit_surf->texture->height0; fb_state.width = blit_surf->texture->width0; fb_state.height = blit_surf->texture->height0; /* update sampler view sources */ sampler_views[0] = prevprev_sv[k]; sampler_views[1] = prev_sv[k]; sampler_views[2] = cur_sv[k]; sampler_views[3] = next_sv[k]; filter->pipe->set_sampler_views(filter->pipe, PIPE_SHADER_FRAGMENT, 0, 4, sampler_views); /* blit current field */ fb_state.cbufs[0] = blit_surf; filter->pipe->bind_fs_state(filter->pipe, field ? filter->fs_copy_bottom : filter->fs_copy_top); filter->pipe->set_framebuffer_state(filter->pipe, &fb_state); filter->pipe->set_viewport_states(filter->pipe, 0, 1, &viewport); util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4); /* blit or interpolate other field */ fb_state.cbufs[0] = dst_surf; filter->pipe->set_framebuffer_state(filter->pipe, &fb_state); if (i > 0 && filter->skip_chroma) { util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4); } else { filter->pipe->bind_fs_state(filter->pipe, field ? filter->fs_deint_top : filter->fs_deint_bottom); util_draw_arrays(filter->pipe, PIPE_PRIM_QUADS, 0, 4); } if (++j >= util_format_get_nr_components(dst_surf->format)) { dst_surfaces += 2; j = 0; } } }
/** * Given a user-specified texture base format, the actual gallium texture * format and the current GL_DEPTH_MODE, return a texture swizzle. * * Consider the case where the user requests a GL_RGB internal texture * format the driver actually uses an RGBA format. The A component should * be ignored and sampling from the texture should always return (r,g,b,1). * But if we rendered to the texture we might have written A values != 1. * By sampling the texture with a ".xyz1" swizzle we'll get the expected A=1. * This function computes the texture swizzle needed to get the expected * values. * * In the case of depth textures, the GL_DEPTH_MODE state determines the * texture swizzle. * * This result must be composed with the user-specified swizzle to get * the final swizzle. */ static unsigned compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode, enum pipe_format actualFormat, unsigned glsl_version) { switch (baseFormat) { case GL_RGBA: return SWIZZLE_XYZW; case GL_RGB: if (util_format_has_alpha(actualFormat)) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_RG: if (util_format_get_nr_components(actualFormat) > 2) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_RED: if (util_format_get_nr_components(actualFormat) > 1) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_ALPHA: if (util_format_get_nr_components(actualFormat) > 1) return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_W); else return SWIZZLE_XYZW; case GL_LUMINANCE: if (util_format_get_nr_components(actualFormat) > 1) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); else return SWIZZLE_XYZW; case GL_LUMINANCE_ALPHA: if (util_format_get_nr_components(actualFormat) > 2) return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_W); else return SWIZZLE_XYZW; case GL_INTENSITY: if (util_format_get_nr_components(actualFormat) > 1) return SWIZZLE_XXXX; else return SWIZZLE_XYZW; case GL_STENCIL_INDEX: case GL_DEPTH_STENCIL: case GL_DEPTH_COMPONENT: /* Now examine the depth mode */ switch (depthMode) { case GL_LUMINANCE: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE); case GL_INTENSITY: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); case GL_ALPHA: /* The texture(sampler*Shadow) functions from GLSL 1.30 ignore * the depth mode and return float, while older shadow* functions * and ARB_fp instructions return vec4 according to the depth mode. * * The problem with the GLSL 1.30 functions is that GL_ALPHA forces * them to return 0, breaking them completely. * * A proper fix would increase code complexity and that's not worth * it for a rarely used feature such as the GL_ALPHA depth mode * in GL3. Therefore, change GL_ALPHA to GL_INTENSITY for all * shaders that use GLSL 1.30 or later. * * BTW, it's required that sampler views are updated when * shaders change (check_sampler_swizzle takes care of that). */ if (glsl_version && glsl_version >= 130) return SWIZZLE_XXXX; else return MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_X); case GL_RED: return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); default: assert(!"Unexpected depthMode"); return SWIZZLE_XYZW; } default: assert(!"Unexpected baseFormat"); return SWIZZLE_XYZW; } }
void nvc0_push_vbo2(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, n; unsigned inst = info->instance_count; unsigned prim = nvc0_prim_gl(info->mode); ctx.nvc0 = nvc0; ctx.vertex_size = nvc0->vertex->vtx_size; ctx.idxbuf = NULL; ctx.num_attrs = 0; ctx.edgeflag = 0.5f; ctx.edgeflag_input = 32; for (i = 0; i < nvc0->vertex->num_elements; ++i) { struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe; struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; struct nouveau_bo *bo = nvc0_resource(vb->buffer)->bo; unsigned nr_components; if (!(nvc0->vbo_fifo & (1 << i))) continue; n = ctx.num_attrs++; if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) return; ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; nouveau_bo_unmap(bo); ctx.attr[n].stride = vb->stride; ctx.attr[n].divisor = ve->instance_divisor; nr_components = util_format_get_nr_components(ve->src_format); switch (util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0)) { case 8: switch (nr_components) { case 1: ctx.attr[n].push = emit_b08_1; break; case 2: ctx.attr[n].push = emit_b16_1; break; case 3: ctx.attr[n].push = emit_b08_3; break; case 4: ctx.attr[n].push = emit_b32_1; break; } break; case 16: switch (nr_components) { case 1: ctx.attr[n].push = emit_b16_1; break; case 2: ctx.attr[n].push = emit_b32_1; break; case 3: ctx.attr[n].push = emit_b16_3; break; case 4: ctx.attr[n].push = emit_b32_2; break; } break; case 32: switch (nr_components) { case 1: ctx.attr[n].push = emit_b32_1; break; case 2: ctx.attr[n].push = emit_b32_2; break; case 3: ctx.attr[n].push = emit_b32_3; break; case 4: ctx.attr[n].push = emit_b32_4; break; } break; default: assert(0); break; } } if (info->indexed) { struct nvc0_resource *res = nvc0_resource(nvc0->idxbuf.buffer); if (!res || nouveau_bo_map(res->bo, NOUVEAU_BO_RD)) return; ctx.idxbuf = (uint8_t *)res->bo->map + nvc0->idxbuf.offset + res->offset; nouveau_bo_unmap(res->bo); ctx.idxsize = nvc0->idxbuf.index_size; } else { ctx.idxsize = 0; } while (inst--) { BEGIN_RING(nvc0->screen->base.channel, RING_3D(VERTEX_BEGIN_GL), 1); OUT_RING (nvc0->screen->base.channel, prim); switch (ctx.idxsize) { case 0: emit_seq(&ctx, info->start, info->count); break; case 1: emit_elt08(&ctx, info->start, info->count); break; case 2: emit_elt16(&ctx, info->start, info->count); break; case 4: emit_elt32(&ctx, info->start, info->count); break; } IMMED_RING(nvc0->screen->base.channel, RING_3D(VERTEX_END_GL), 0); prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } }
/** Create vertex element states, which define a layout for fetching * vertices for rendering. */ static void * etna_vertex_elements_state_create(struct pipe_context *pctx, unsigned num_elements, const struct pipe_vertex_element *elements) { struct etna_context *ctx = etna_context(pctx); struct compiled_vertex_elements_state *cs = CALLOC_STRUCT(compiled_vertex_elements_state); if (!cs) return NULL; if (num_elements > ctx->specs.vertex_max_elements) { BUG("number of elements (%u) exceeds chip maximum (%u)", num_elements, ctx->specs.vertex_max_elements); return NULL; } /* XXX could minimize number of consecutive stretches here by sorting, and * permuting the inputs in shader or does Mesa do this already? */ /* Check that vertex element binding is compatible with hardware; thus * elements[idx].vertex_buffer_index are < stream_count. If not, the binding * uses more streams than is supported, and u_vbuf should have done some * reorganization for compatibility. */ /* TODO: does mesa this for us? */ bool incompatible = false; for (unsigned idx = 0; idx < num_elements; ++idx) { if (elements[idx].vertex_buffer_index >= ctx->specs.stream_count || elements[idx].instance_divisor > 0) incompatible = true; } cs->num_elements = num_elements; if (incompatible || num_elements == 0) { DBG("Error: zero vertex elements, or more vertex buffers used than supported"); FREE(cs); return NULL; } unsigned start_offset = 0; /* start of current consecutive stretch */ bool nonconsecutive = true; /* previous value of nonconsecutive */ for (unsigned idx = 0; idx < num_elements; ++idx) { unsigned element_size = util_format_get_blocksize(elements[idx].src_format); unsigned end_offset = elements[idx].src_offset + element_size; uint32_t format_type, normalize; if (nonconsecutive) start_offset = elements[idx].src_offset; /* maximum vertex size is 256 bytes */ assert(element_size != 0 && end_offset <= 256); /* check whether next element is consecutive to this one */ nonconsecutive = (idx == (num_elements - 1)) || elements[idx + 1].vertex_buffer_index != elements[idx].vertex_buffer_index || end_offset != elements[idx + 1].src_offset; format_type = translate_vertex_format_type(elements[idx].src_format); normalize = translate_vertex_format_normalize(elements[idx].src_format); assert(format_type != ETNA_NO_MATCH); assert(normalize != ETNA_NO_MATCH); if (ctx->specs.halti < 5) { cs->FE_VERTEX_ELEMENT_CONFIG[idx] = COND(nonconsecutive, VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE) | format_type | VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(util_format_get_nr_components(elements[idx].src_format)) | normalize | VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(ENDIAN_MODE_NO_SWAP) | VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(elements[idx].vertex_buffer_index) | VIVS_FE_VERTEX_ELEMENT_CONFIG_START(elements[idx].src_offset) | VIVS_FE_VERTEX_ELEMENT_CONFIG_END(end_offset - start_offset); } else { /* HALTI5 spread vertex attrib config over two registers */ cs->NFE_GENERIC_ATTRIB_CONFIG0[idx] = format_type | VIVS_NFE_GENERIC_ATTRIB_CONFIG0_NUM(util_format_get_nr_components(elements[idx].src_format)) | normalize | VIVS_NFE_GENERIC_ATTRIB_CONFIG0_ENDIAN(ENDIAN_MODE_NO_SWAP) | VIVS_NFE_GENERIC_ATTRIB_CONFIG0_STREAM(elements[idx].vertex_buffer_index) | VIVS_NFE_GENERIC_ATTRIB_CONFIG0_START(elements[idx].src_offset); cs->NFE_GENERIC_ATTRIB_CONFIG1[idx] = COND(nonconsecutive, VIVS_NFE_GENERIC_ATTRIB_CONFIG1_NONCONSECUTIVE) | VIVS_NFE_GENERIC_ATTRIB_CONFIG1_END(end_offset - start_offset); } cs->NFE_GENERIC_ATTRIB_SCALE[idx] = 0x3f800000; /* 1 for integer, 1.0 for float */ } return cs; }
void nv50_push_elements_instanced(struct pipe_context *pipe, struct pipe_resource *idxbuf, unsigned idxsize, int idxbias, unsigned mode, unsigned start, unsigned count, unsigned i_start, unsigned i_count) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_grobj *tesla = nv50->screen->tesla; struct nouveau_channel *chan = tesla->channel; struct push_context ctx; const unsigned p_overhead = 4 + /* begin/end */ 4; /* potential edgeflag enable/disable */ const unsigned v_overhead = 1 + /* VERTEX_DATA packet header */ 2; /* potential edgeflag modification */ struct util_split_prim s; unsigned vtx_size; boolean nzi = FALSE; int i; ctx.nv50 = nv50; ctx.attr_nr = 0; ctx.idxbuf = NULL; ctx.vtx_size = 0; ctx.edgeflag = 0.5f; ctx.edgeflag_attr = nv50->vertprog->vp.edgeflag; /* map vertex buffers, determine vertex size */ for (i = 0; i < nv50->vtxelt->num_elements; i++) { struct pipe_vertex_element *ve = &nv50->vtxelt->pipe[i]; struct pipe_vertex_buffer *vb = &nv50->vtxbuf[ve->vertex_buffer_index]; struct nouveau_bo *bo = nv50_resource(vb->buffer)->bo; unsigned size, nr_components, n; if (!(nv50->vbo_fifo & (1 << i))) continue; n = ctx.attr_nr++; if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { assert(bo->map); return; } ctx.attr[n].map = (uint8_t *)bo->map + vb->buffer_offset + ve->src_offset; nouveau_bo_unmap(bo); ctx.attr[n].stride = vb->stride; ctx.attr[n].divisor = ve->instance_divisor; if (ctx.attr[n].divisor) { ctx.attr[n].step = i_start % ve->instance_divisor; ctx.attr[n].map = (uint8_t *)ctx.attr[n].map + i_start * vb->stride; } size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0); nr_components = util_format_get_nr_components(ve->src_format); switch (size) { case 8: switch (nr_components) { case 1: ctx.attr[n].push = emit_b08_1; break; case 2: ctx.attr[n].push = emit_b16_1; break; case 3: ctx.attr[n].push = emit_b08_3; break; case 4: ctx.attr[n].push = emit_b32_1; break; } ctx.vtx_size++; break; case 16: switch (nr_components) { case 1: ctx.attr[n].push = emit_b16_1; break; case 2: ctx.attr[n].push = emit_b32_1; break; case 3: ctx.attr[n].push = emit_b16_3; break; case 4: ctx.attr[n].push = emit_b32_2; break; } ctx.vtx_size += (nr_components + 1) >> 1; break; case 32: switch (nr_components) { case 1: ctx.attr[n].push = emit_b32_1; break; case 2: ctx.attr[n].push = emit_b32_2; break; case 3: ctx.attr[n].push = emit_b32_3; break; case 4: ctx.attr[n].push = emit_b32_4; break; } ctx.vtx_size += nr_components; break; default: assert(0); return; } } vtx_size = ctx.vtx_size + v_overhead; /* map index buffer, if present */ if (idxbuf) { struct nouveau_bo *bo = nv50_resource(idxbuf)->bo; if (nouveau_bo_map(bo, NOUVEAU_BO_RD)) { assert(bo->map); return; } ctx.idxbuf = bo->map; ctx.idxbias = idxbias; ctx.idxsize = idxsize; nouveau_bo_unmap(bo); } s.priv = &ctx; s.edge = emit_edgeflag; if (idxbuf) { if (idxsize == 1) s.emit = idxbias ? emit_elt08_biased : emit_elt08; else if (idxsize == 2) s.emit = idxbias ? emit_elt16_biased : emit_elt16; else s.emit = idxbias ? emit_elt32_biased : emit_elt32; } else s.emit = emit_verts; /* per-instance loop */ BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 2); OUT_RING (chan, NV50_CB_AUX | (24 << 8)); OUT_RING (chan, i_start); while (i_count--) { unsigned max_verts; boolean done; for (i = 0; i < ctx.attr_nr; i++) { if (!ctx.attr[i].divisor || ctx.attr[i].divisor != ++ctx.attr[i].step) continue; ctx.attr[i].step = 0; ctx.attr[i].map = (uint8_t *)ctx.attr[i].map + ctx.attr[i].stride; } util_split_prim_init(&s, mode, start, count); do { if (AVAIL_RING(chan) < p_overhead + (6 * vtx_size)) { FIRE_RING(chan); if (!nv50_state_validate(nv50, p_overhead + (6 * vtx_size))) { assert(0); return; } } max_verts = AVAIL_RING(chan); max_verts -= p_overhead; max_verts /= vtx_size; BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); OUT_RING (chan, nv50_prim(s.mode) | (nzi ? (1 << 28) : 0)); done = util_split_prim_next(&s, max_verts); BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); OUT_RING (chan, 0); } while (!done); nzi = TRUE; } }
static void brw_translate_vertex_elements(struct brw_context *brw, struct brw_vertex_element_packet *brw_velems, const struct pipe_vertex_element *attribs, unsigned count) { unsigned i; /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. * * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ brw_velems->header.opcode = CMD_VERTEX_ELEMENT; if (count == 0) { brw_velems->header.length = 1; brw_velems->ve[0].ve0.src_offset = 0; brw_velems->ve[0].ve0.src_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; brw_velems->ve[0].ve0.valid = 1; brw_velems->ve[0].ve0.vertex_buffer_index = 0; brw_velems->ve[0].ve1.dst_offset = 0; brw_velems->ve[0].ve1.vfcomponent0 = BRW_VE1_COMPONENT_STORE_0; brw_velems->ve[0].ve1.vfcomponent1 = BRW_VE1_COMPONENT_STORE_0; brw_velems->ve[0].ve1.vfcomponent2 = BRW_VE1_COMPONENT_STORE_0; brw_velems->ve[0].ve1.vfcomponent3 = BRW_VE1_COMPONENT_STORE_1_FLT; return; } /* Now emit vertex element (VEP) state packets. * */ brw_velems->header.length = (1 + count * 2) - 2; for (i = 0; i < count; i++) { const struct pipe_vertex_element *input = &attribs[i]; unsigned nr_components = util_format_get_nr_components(input->src_format); uint32_t format = brw_translate_surface_format( input->src_format ); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; switch (nr_components) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; /* fallthrough */ case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; break; } brw_velems->ve[i].ve0.src_offset = input->src_offset; brw_velems->ve[i].ve0.src_format = format; brw_velems->ve[i].ve0.valid = 1; brw_velems->ve[i].ve0.vertex_buffer_index = input->vertex_buffer_index; brw_velems->ve[i].ve1.vfcomponent0 = comp0; brw_velems->ve[i].ve1.vfcomponent1 = comp1; brw_velems->ve[i].ve1.vfcomponent2 = comp2; brw_velems->ve[i].ve1.vfcomponent3 = comp3; if (BRW_IS_IGDNG(brw)) brw_velems->ve[i].ve1.dst_offset = 0; else brw_velems->ve[i].ve1.dst_offset = i * 4; } }
static void * nvfx_vtxelts_state_create(struct pipe_context *pipe, unsigned num_elements, const struct pipe_vertex_element *elements) { struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state); struct translate_key transkey; unsigned per_vertex_size[16]; unsigned vb_compacted_index[16]; if(num_elements > 16) { _debug_printf("Error: application attempted to use %u vertex elements, but only 16 are supported: ignoring the rest\n", num_elements); num_elements = 16; } memset(per_vertex_size, 0, sizeof(per_vertex_size)); memcpy(cso->pipe, elements, num_elements * sizeof(elements[0])); cso->num_elements = num_elements; cso->needs_translate = FALSE; transkey.nr_elements = 0; transkey.output_stride = 0; for(unsigned i = 0; i < num_elements; ++i) { const struct pipe_vertex_element* ve = &elements[i]; if(!ve->instance_divisor) per_vertex_size[ve->vertex_buffer_index] += util_format_get_stride(ve->src_format, 1); } for(unsigned i = 0; i < 16; ++i) { if(per_vertex_size[i]) { unsigned idx = cso->num_per_vertex_buffer_infos++; cso->per_vertex_buffer_info[idx].vertex_buffer_index = i; cso->per_vertex_buffer_info[idx].per_vertex_size = per_vertex_size[i]; vb_compacted_index[i] = idx; } } for(unsigned i = 0; i < num_elements; ++i) { const struct pipe_vertex_element* ve = &elements[i]; unsigned type = nvfx_vertex_formats[ve->src_format]; unsigned ncomp = util_format_get_nr_components(ve->src_format); //if(ve->frequency != PIPE_ELEMENT_FREQUENCY_PER_VERTEX) if(ve->instance_divisor) { struct nvfx_low_frequency_element* lfve; cso->vtxfmt[i] = NV30_3D_VTXFMT_TYPE_V32_FLOAT; //if(ve->frequency == PIPE_ELEMENT_FREQUENCY_CONSTANT) if(0) lfve = &cso->constant[cso->num_constant++]; else { lfve = &cso->per_instance[cso->num_per_instance++].base; ((struct nvfx_per_instance_element*)lfve)->instance_divisor = ve->instance_divisor; } lfve->idx = i; lfve->vertex_buffer_index = ve->vertex_buffer_index; lfve->src_offset = ve->src_offset; lfve->fetch_rgba_float = util_format_description(ve->src_format)->fetch_rgba_float; lfve->ncomp = ncomp; } else { unsigned idx; idx = cso->num_per_vertex++; cso->per_vertex[idx].idx = i; cso->per_vertex[idx].vertex_buffer_index = ve->vertex_buffer_index; cso->per_vertex[idx].src_offset = ve->src_offset; idx = transkey.nr_elements++; transkey.element[idx].input_format = ve->src_format; transkey.element[idx].input_buffer = vb_compacted_index[ve->vertex_buffer_index]; transkey.element[idx].input_offset = ve->src_offset; transkey.element[idx].instance_divisor = 0; transkey.element[idx].type = TRANSLATE_ELEMENT_NORMAL; if(type) { transkey.element[idx].output_format = ve->src_format; cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | type; } else { unsigned float32[4] = {PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT}; transkey.element[idx].output_format = float32[ncomp - 1]; cso->needs_translate = TRUE; cso->vtxfmt[i] = (ncomp << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT; } transkey.element[idx].output_offset = transkey.output_stride; transkey.output_stride += (util_format_get_stride(transkey.element[idx].output_format, 1) + 3) & ~3; } } cso->translate = translate_create(&transkey); cso->vertex_length = transkey.output_stride >> 2; cso->max_vertices_per_packet = 2047 / MAX2(cso->vertex_length, 1); return (void *)cso; }
struct pipe_video_buffer * nouveau_vp3_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *templat, int flags) { struct nouveau_vp3_video_buffer *buffer; struct pipe_resource templ; unsigned i, j, component; struct pipe_sampler_view sv_templ; struct pipe_surface surf_templ; assert(templat->interlaced); if (getenv("XVMC_VL") || templat->buffer_format != PIPE_FORMAT_NV12) return vl_video_buffer_create(pipe, templat); assert(templat->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); buffer = CALLOC_STRUCT(nouveau_vp3_video_buffer); if (!buffer) return NULL; buffer->base.buffer_format = templat->buffer_format; buffer->base.context = pipe; buffer->base.destroy = nouveau_vp3_video_buffer_destroy; buffer->base.chroma_format = templat->chroma_format; buffer->base.width = templat->width; buffer->base.height = templat->height; buffer->base.get_sampler_view_planes = nouveau_vp3_video_buffer_sampler_view_planes; buffer->base.get_sampler_view_components = nouveau_vp3_video_buffer_sampler_view_components; buffer->base.get_surfaces = nouveau_vp3_video_buffer_surfaces; buffer->base.interlaced = true; memset(&templ, 0, sizeof(templ)); templ.target = PIPE_TEXTURE_2D_ARRAY; templ.depth0 = 1; templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; templ.format = PIPE_FORMAT_R8_UNORM; templ.width0 = buffer->base.width; templ.height0 = (buffer->base.height + 1)/2; templ.flags = flags; templ.array_size = 2; buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ); if (!buffer->resources[0]) goto error; templ.format = PIPE_FORMAT_R8G8_UNORM; buffer->num_planes = 2; templ.width0 = (templ.width0 + 1) / 2; templ.height0 = (templ.height0 + 1) / 2; for (i = 1; i < buffer->num_planes; ++i) { buffer->resources[i] = pipe->screen->resource_create(pipe->screen, &templ); if (!buffer->resources[i]) goto error; } memset(&sv_templ, 0, sizeof(sv_templ)); for (component = 0, i = 0; i < buffer->num_planes; ++i ) { struct pipe_resource *res = buffer->resources[i]; unsigned nr_components = util_format_get_nr_components(res->format); u_sampler_view_default_template(&sv_templ, res, res->format); buffer->sampler_view_planes[i] = pipe->create_sampler_view(pipe, res, &sv_templ); if (!buffer->sampler_view_planes[i]) goto error; for (j = 0; j < nr_components; ++j, ++component) { sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_RED + j; sv_templ.swizzle_a = PIPE_SWIZZLE_ONE; buffer->sampler_view_components[component] = pipe->create_sampler_view(pipe, res, &sv_templ); if (!buffer->sampler_view_components[component]) goto error; } } memset(&surf_templ, 0, sizeof(surf_templ)); for (j = 0; j < buffer->num_planes; ++j) { surf_templ.format = buffer->resources[j]->format; surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 0; buffer->surfaces[j * 2] = pipe->create_surface(pipe, buffer->resources[j], &surf_templ); if (!buffer->surfaces[j * 2]) goto error; surf_templ.u.tex.first_layer = surf_templ.u.tex.last_layer = 1; buffer->surfaces[j * 2 + 1] = pipe->create_surface(pipe, buffer->resources[j], &surf_templ); if (!buffer->surfaces[j * 2 + 1]) goto error; } return &buffer->base; error: nouveau_vp3_video_buffer_destroy(&buffer->base); return NULL; }
static void vl_mpeg12_end_frame(struct pipe_video_decoder *decoder, struct pipe_video_buffer *target, struct pipe_picture_desc *picture) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; struct pipe_mpeg12_picture_desc *desc = (struct pipe_mpeg12_picture_desc *)picture; struct pipe_sampler_view **ref_frames[2]; struct pipe_sampler_view **mc_source_sv; struct pipe_surface **target_surfaces; struct pipe_vertex_buffer vb[3]; struct vl_mpeg12_buffer *buf; const unsigned *plane_order; unsigned i, j, component; unsigned nr_components; assert(dec && target && picture); assert(!target->interlaced); buf = vl_mpeg12_get_decode_buffer(dec, target); vl_vb_unmap(&buf->vertex_stream, dec->base.context); dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer); dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer); vb[0] = dec->quads; vb[1] = dec->pos; target_surfaces = target->get_surfaces(target); for (i = 0; i < VL_MAX_REF_FRAMES; ++i) { if (desc->ref[i]) ref_frames[i] = desc->ref[i]->get_sampler_view_planes(desc->ref[i]); else ref_frames[i] = NULL; } dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv); for (i = 0; i < VL_NUM_COMPONENTS; ++i) { if (!target_surfaces[i]) continue; vl_mc_set_surface(&buf->mc[i], target_surfaces[i]); for (j = 0; j < VL_MAX_REF_FRAMES; ++j) { if (!ref_frames[j] || !ref_frames[j][i]) continue; vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);; dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); vl_mc_render_ref(i ? &dec->mc_c : &dec->mc_y, &buf->mc[i], ref_frames[j][i]); } } dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr); for (i = 0; i < VL_NUM_COMPONENTS; ++i) { if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); vl_zscan_render(i ? &dec->zscan_c : & dec->zscan_y, &buf->zscan[i] , buf->num_ycbcr_blocks[i]); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_flush(i ? &dec->idct_c : &dec->idct_y, &buf->idct[i], buf->num_ycbcr_blocks[i]); } plane_order = vl_video_buffer_plane_order(target->buffer_format); mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source); for (i = 0, component = 0; component < VL_NUM_COMPONENTS; ++i) { if (!target_surfaces[i]) continue; nr_components = util_format_get_nr_components(target_surfaces[i]->texture->format); for (j = 0; j < nr_components; ++j, ++component) { unsigned plane = plane_order[component]; if (!buf->num_ycbcr_blocks[plane]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, plane); dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_prepare_stage2(i ? &dec->idct_c : &dec->idct_y, &buf->idct[plane]); else { dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[plane]); dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr); } vl_mc_render_ycbcr(i ? &dec->mc_c : &dec->mc_y, &buf->mc[i], j, buf->num_ycbcr_blocks[plane]); } } ++dec->current_buffer; dec->current_buffer %= 4; }