static struct pipe_sampler_view * fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) { struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); enum pipe_format format = cso->format; unsigned lvl, layers = 0; if (!so) return NULL; if (format == PIPE_FORMAT_X32_S8X24_UINT) { rsc = rsc->stencil; format = rsc->base.format; } so->base = *cso; pipe_reference(NULL, &prsc->reference); so->base.texture = prsc; so->base.reference.count = 1; so->base.context = pctx; so->texconst0 = A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(format)) | A5XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | fd5_tex_swiz(format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); /* NOTE: since we sample z24s8 using 8888_UINT format, the swizzle * we get isn't quite right. Use SWAP(XYZW) as a cheap and cheerful * way to re-arrange things so stencil component is where the swiz * expects. * * Note that gallium expects stencil sampler to return (s,s,s,s) * which isn't quite true. To make that happen we'd have to massage * the swizzle. But in practice only the .x component is used. */ if (format == PIPE_FORMAT_X24S8_UINT) { so->texconst0 |= A5XX_TEX_CONST_0_SWAP(XYZW); } if (util_format_is_srgb(format)) { if (use_astc_srgb_workaround(pctx, format)) so->astc_srgb = true; so->texconst0 |= A5XX_TEX_CONST_0_SRGB; } if (cso->target == PIPE_BUFFER) { unsigned elements = cso->u.buf.size / util_format_get_blocksize(format); lvl = 0; so->texconst1 = A5XX_TEX_CONST_1_WIDTH(elements) | A5XX_TEX_CONST_1_HEIGHT(1); so->texconst2 = A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(format)) | A5XX_TEX_CONST_2_PITCH(elements * rsc->cpp); so->offset = cso->u.buf.offset; } else { unsigned miplevels; lvl = fd_sampler_first_level(cso); miplevels = fd_sampler_last_level(cso) - lvl; layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels); so->texconst1 = A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); so->texconst2 = A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(format)) | A5XX_TEX_CONST_2_PITCH( util_format_get_nblocksx( format, rsc->slices[lvl].pitch) * rsc->cpp); so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); } so->texconst2 |= A5XX_TEX_CONST_2_TYPE(fd5_tex_type(cso->target)); switch (cso->target) { case PIPE_TEXTURE_RECT: case PIPE_TEXTURE_1D: case PIPE_TEXTURE_2D: so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(1); break; case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers); break; case PIPE_TEXTURE_CUBE: case PIPE_TEXTURE_CUBE_ARRAY: so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(layers / 6); break; case PIPE_TEXTURE_3D: so->texconst3 = A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); so->texconst5 = A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl)); break; default: so->texconst3 = 0x00000000; break; } return &so->base; }
/* Compare old and new render states and emit differences between them * to hardware. Simplest implementation would be to emit the whole of * the "to" state. */ static enum pipe_error emit_rss(struct svga_context *svga, unsigned dirty) { struct svga_screen *screen = svga_screen(svga->pipe.screen); struct rs_queue queue; float point_size_min; queue.rs_count = 0; if (dirty & SVGA_NEW_BLEND) { const struct svga_blend_state *curr = svga->curr.blend; EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail ); EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail ); if (curr->rt[0].blend_enable) { EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail ); EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail ); EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail ); EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable, SEPARATEALPHABLENDENABLE, fail ); if (curr->rt[0].separate_alpha_blend_enable) { EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail ); EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail ); EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail ); } } } if (dirty & SVGA_NEW_BLEND_COLOR) { uint32 color; uint32 r = float_to_ubyte(svga->curr.blend_color.color[0]); uint32 g = float_to_ubyte(svga->curr.blend_color.color[1]); uint32 b = float_to_ubyte(svga->curr.blend_color.color[2]); uint32 a = float_to_ubyte(svga->curr.blend_color.color[3]); color = (a << 24) | (r << 16) | (g << 8) | b; EMIT_RS( svga, color, BLENDCOLOR, fail ); } if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) { const struct svga_depth_stencil_state *curr = svga->curr.depth; const struct svga_rasterizer_state *rast = svga->curr.rast; if (!curr->stencil[0].enabled) { /* Stencil disabled */ EMIT_RS( svga, FALSE, STENCILENABLE, fail ); EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail ); } else if (curr->stencil[0].enabled && !curr->stencil[1].enabled) { /* Regular stencil */ EMIT_RS( svga, TRUE, STENCILENABLE, fail ); EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail ); EMIT_RS( svga, curr->stencil[0].func, STENCILFUNC, fail ); EMIT_RS( svga, curr->stencil[0].fail, STENCILFAIL, fail ); EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail ); EMIT_RS( svga, curr->stencil[0].pass, STENCILPASS, fail ); EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail ); EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail ); } else { int cw, ccw; /* Hardware frontwinding is always CW, so if ours is also CW, * then our definition of front face agrees with hardware. * Otherwise need to flip. */ if (rast->templ.front_ccw) { ccw = 0; cw = 1; } else { ccw = 1; cw = 0; } /* Twoside stencil */ EMIT_RS( svga, TRUE, STENCILENABLE, fail ); EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail ); EMIT_RS( svga, curr->stencil[cw].func, STENCILFUNC, fail ); EMIT_RS( svga, curr->stencil[cw].fail, STENCILFAIL, fail ); EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail ); EMIT_RS( svga, curr->stencil[cw].pass, STENCILPASS, fail ); EMIT_RS( svga, curr->stencil[ccw].func, CCWSTENCILFUNC, fail ); EMIT_RS( svga, curr->stencil[ccw].fail, CCWSTENCILFAIL, fail ); EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail ); EMIT_RS( svga, curr->stencil[ccw].pass, CCWSTENCILPASS, fail ); EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail ); EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail ); } EMIT_RS( svga, curr->zenable, ZENABLE, fail ); if (curr->zenable) { EMIT_RS( svga, curr->zfunc, ZFUNC, fail ); EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail ); } EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail ); if (curr->alphatestenable) { EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail ); EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail ); } } if (dirty & SVGA_NEW_STENCIL_REF) { EMIT_RS( svga, svga->curr.stencil_ref.ref_value[0], STENCILREF, fail ); } if (dirty & (SVGA_NEW_RAST | SVGA_NEW_NEED_PIPELINE)) { const struct svga_rasterizer_state *curr = svga->curr.rast; unsigned cullmode = curr->cullmode; /* Shademode: still need to rearrange index list to move * flat-shading PV first vertex. */ EMIT_RS( svga, curr->shademode, SHADEMODE, fail ); /* Don't do culling while the software pipeline is active. It * does it for us, and additionally introduces potentially * back-facing triangles. */ if (svga->state.sw.need_pipeline) cullmode = SVGA3D_FACE_NONE; point_size_min = util_get_min_point_size(&curr->templ); EMIT_RS( svga, cullmode, CULLMODE, fail ); EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail ); EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail ); EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail ); EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail ); EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail ); EMIT_RS_FLOAT( svga, point_size_min, POINTSIZEMIN, fail ); EMIT_RS_FLOAT( svga, screen->maxPointSize, POINTSIZEMAX, fail ); EMIT_RS( svga, curr->pointsprite, POINTSPRITEENABLE, fail); } if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE)) { const struct svga_rasterizer_state *curr = svga->curr.rast; float slope = 0.0; float bias = 0.0; /* Need to modify depth bias according to bound depthbuffer * format. Don't do hardware depthbias while the software * pipeline is active. */ if (!svga->state.sw.need_pipeline && svga->curr.framebuffer.zsbuf) { slope = curr->slopescaledepthbias; bias = svga->curr.depthscale * curr->depthbias; } EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail ); EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail ); } if (dirty & SVGA_NEW_FRAME_BUFFER) { /* XXX: we only look at the first color buffer's sRGB state */ float gamma = 1.0f; if (svga->curr.framebuffer.cbufs[0] && util_format_is_srgb(svga->curr.framebuffer.cbufs[0]->format)) { gamma = 2.2f; } EMIT_RS_FLOAT(svga, gamma, OUTPUTGAMMA, fail); } if (dirty & SVGA_NEW_RAST) { /* bitmask of the enabled clip planes */ unsigned enabled = svga->curr.rast->templ.clip_plane_enable; EMIT_RS( svga, enabled, CLIPPLANEENABLE, fail ); } if (queue.rs_count) { SVGA3dRenderState *rs; if (SVGA3D_BeginSetRenderState( svga->swc, &rs, queue.rs_count ) != PIPE_OK) goto fail; memcpy( rs, queue.rs, queue.rs_count * sizeof queue.rs[0]); SVGA_FIFOCommitAll( svga->swc ); } return PIPE_OK; fail: /* XXX: need to poison cached hardware state on failure to ensure * dirty state gets re-emitted. Fix this by re-instating partial * FIFOCommit command and only updating cached hw state once the * initial allocation has succeeded. */ memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs)); return PIPE_ERROR_OUT_OF_MEMORY; }
static struct pipe_sampler_view * fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) { struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view); struct fd_resource *rsc = fd_resource(prsc); unsigned lvl; uint32_t sz2 = 0; if (!so) return NULL; so->base = *cso; pipe_reference(NULL, &prsc->reference); so->base.texture = prsc; so->base.reference.count = 1; so->base.context = pctx; so->texconst0 = A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a); if (prsc->target == PIPE_BUFFER || util_format_is_pure_integer(cso->format)) so->texconst0 |= A3XX_TEX_CONST_0_NOCONVERT; if (util_format_is_srgb(cso->format)) so->texconst0 |= A3XX_TEX_CONST_0_SRGB; if (prsc->target == PIPE_BUFFER) { lvl = 0; so->texconst1 = A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / util_format_get_blocksize(cso->format)) | A3XX_TEX_CONST_1_HEIGHT(1); } else { unsigned miplevels; lvl = fd_sampler_first_level(cso); miplevels = fd_sampler_last_level(cso) - lvl; so->texconst0 |= A3XX_TEX_CONST_0_MIPLVLS(miplevels); so->texconst1 = A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); } /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ so->texconst2 = A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, rsc->slices[lvl].pitch) * rsc->cpp); switch (prsc->target) { case PIPE_TEXTURE_1D_ARRAY: case PIPE_TEXTURE_2D_ARRAY: so->texconst3 = A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) | A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0); break; case PIPE_TEXTURE_3D: so->texconst3 = A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) | A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[lvl].size0); while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0) sz2 = rsc->slices[++lvl].size0; so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(sz2); break; default: so->texconst3 = 0x00000000; break; } return &so->base; }
/** * Update framebuffer state (color, depth, stencil, etc. buffers) */ static void update_framebuffer_state( struct st_context *st ) { struct pipe_framebuffer_state *framebuffer = &st->state.framebuffer; struct gl_framebuffer *fb = st->ctx->DrawBuffer; struct st_renderbuffer *strb; GLuint i; st_flush_bitmap_cache(st); st->state.fb_orientation = st_fb_orientation(fb); framebuffer->width = fb->Width; framebuffer->height = fb->Height; /*printf("------ fb size %d x %d\n", fb->Width, fb->Height);*/ /* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state * to determine which surfaces to draw to */ framebuffer->nr_cbufs = fb->_NumColorDrawBuffers; for (i = 0; i < fb->_NumColorDrawBuffers; i++) { pipe_surface_reference(&framebuffer->cbufs[i], NULL); strb = st_renderbuffer(fb->_ColorDrawBuffers[i]); if (strb) { if (strb->is_rtt || (strb->texture && util_format_is_srgb(strb->texture->format))) { /* rendering to a GL texture, may have to update surface */ st_update_renderbuffer_surface(st, strb); } if (strb->surface) { pipe_surface_reference(&framebuffer->cbufs[i], strb->surface); } strb->defined = GL_TRUE; /* we'll be drawing something */ } } for (i = framebuffer->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&framebuffer->cbufs[i], NULL); } /* * Depth/Stencil renderbuffer/surface. */ strb = st_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); if (strb) { if (strb->is_rtt) { /* rendering to a GL texture, may have to update surface */ st_update_renderbuffer_surface(st, strb); } pipe_surface_reference(&framebuffer->zsbuf, strb->surface); } else { strb = st_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer); if (strb) { assert(strb->surface); pipe_surface_reference(&framebuffer->zsbuf, strb->surface); } else pipe_surface_reference(&framebuffer->zsbuf, NULL); } #ifdef DEBUG /* Make sure the resource binding flags were set properly */ for (i = 0; i < framebuffer->nr_cbufs; i++) { assert(!framebuffer->cbufs[i] || framebuffer->cbufs[i]->texture->bind & PIPE_BIND_RENDER_TARGET); } if (framebuffer->zsbuf) { assert(framebuffer->zsbuf->texture->bind & PIPE_BIND_DEPTH_STENCIL); } #endif cso_set_framebuffer(st->cso_context, framebuffer); }
/** * Implement pipe_screen::is_format_supported(). * \param bindings bitmask of PIPE_BIND_x flags */ static boolean svga_is_format_supported( struct pipe_screen *screen, enum pipe_format format, enum pipe_texture_target target, unsigned sample_count, unsigned bindings) { struct svga_screen *ss = svga_screen(screen); SVGA3dSurfaceFormat svga_format; SVGA3dSurfaceFormatCaps caps; SVGA3dSurfaceFormatCaps mask; assert(bindings); if (sample_count > 1) { /* In ms_samples, if bit N is set it means that we support * multisample with N+1 samples per pixel. */ if ((ss->ms_samples & (1 << (sample_count - 1))) == 0) { return FALSE; } } svga_format = svga_translate_format(ss, format, bindings); if (svga_format == SVGA3D_FORMAT_INVALID) { return FALSE; } /* we don't support sRGB rendering into display targets */ if (util_format_is_srgb(format) && (bindings & PIPE_BIND_DISPLAY_TARGET)) { return FALSE; } /* * For VGPU10 vertex formats, skip querying host capabilities */ if (ss->sws->have_vgpu10 && (bindings & PIPE_BIND_VERTEX_BUFFER)) { SVGA3dSurfaceFormat svga_format; unsigned flags; svga_translate_vertex_format_vgpu10(format, &svga_format, &flags); return svga_format != SVGA3D_FORMAT_INVALID; } /* * Override host capabilities, so that we end up with the same * visuals for all virtual hardware implementations. */ if (bindings & PIPE_BIND_DISPLAY_TARGET) { switch (svga_format) { case SVGA3D_A8R8G8B8: case SVGA3D_X8R8G8B8: case SVGA3D_R5G6B5: break; /* VGPU10 formats */ case SVGA3D_B8G8R8A8_UNORM: case SVGA3D_B8G8R8X8_UNORM: case SVGA3D_B5G6R5_UNORM: break; /* Often unsupported/problematic. This means we end up with the same * visuals for all virtual hardware implementations. */ case SVGA3D_A4R4G4B4: case SVGA3D_A1R5G5B5: return FALSE; default: return FALSE; } } /* * Query the host capabilities. */ svga_get_format_cap(ss, svga_format, &caps); if (bindings & PIPE_BIND_RENDER_TARGET) { /* Check that the color surface is blendable, unless it's an * integer format. */ if (!svga_format_is_integer(svga_format) && (caps.value & SVGA3DFORMAT_OP_NOALPHABLEND)) { return FALSE; } } mask.value = 0; if (bindings & PIPE_BIND_RENDER_TARGET) { mask.value |= SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET; } if (bindings & PIPE_BIND_DEPTH_STENCIL) { mask.value |= SVGA3DFORMAT_OP_ZSTENCIL; } if (bindings & PIPE_BIND_SAMPLER_VIEW) { mask.value |= SVGA3DFORMAT_OP_TEXTURE; } if (target == PIPE_TEXTURE_CUBE) { mask.value |= SVGA3DFORMAT_OP_CUBETEXTURE; } else if (target == PIPE_TEXTURE_3D) { mask.value |= SVGA3DFORMAT_OP_VOLUMETEXTURE; } return (caps.value & mask.value) == mask.value; }
static void write_texture_border_color(struct vc5_job *job, struct vc5_cl_out **uniforms, struct vc5_texture_stateobj *texstate, uint32_t unit) { struct pipe_sampler_state *sampler = texstate->samplers[unit]; struct pipe_sampler_view *texture = texstate->textures[unit]; struct vc5_resource *rsc = vc5_resource(texture->texture); union util_color uc; const struct util_format_description *tex_format_desc = util_format_description(texture->format); float border_color[4]; for (int i = 0; i < 4; i++) border_color[i] = sampler->border_color.f[i]; if (util_format_is_srgb(texture->format)) { for (int i = 0; i < 3; i++) border_color[i] = util_format_linear_to_srgb_float(border_color[i]); } /* Turn the border color into the layout of channels that it would * have when stored as texture contents. */ float storage_color[4]; util_format_unswizzle_4f(storage_color, border_color, tex_format_desc->swizzle); /* Now, pack so that when the vc5_format-sampled texture contents are * replaced with our border color, the vc5_get_format_swizzle() * swizzling will get the right channels. */ if (util_format_is_depth_or_stencil(texture->format)) { uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, sampler->border_color.f[0]) << 8; } else { switch (rsc->vc5_format) { default: case VC5_TEXTURE_TYPE_RGBA8888: util_pack_color(storage_color, PIPE_FORMAT_R8G8B8A8_UNORM, &uc); break; case VC5_TEXTURE_TYPE_RGBA4444: util_pack_color(storage_color, PIPE_FORMAT_A8B8G8R8_UNORM, &uc); break; case VC5_TEXTURE_TYPE_RGB565: util_pack_color(storage_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); break; case VC5_TEXTURE_TYPE_ALPHA: uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; break; case VC5_TEXTURE_TYPE_LUMALPHA: uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | (float_to_ubyte(storage_color[0]) << 0)); break; } } cl_aligned_u32(uniforms, uc.ui[0]); }
static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem) { enum a5xx_tile_mode tile_mode; unsigned i; if (gmem) { tile_mode = TILE5_2; } else { tile_mode = TILE5_LINEAR; } for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { enum a5xx_color_fmt format = 0; enum a3xx_color_swap swap = WZYX; bool srgb = false; struct fd_resource *rsc = NULL; struct fd_resource_slice *slice = NULL; uint32_t stride = 0; uint32_t size = 0; uint32_t base = 0; uint32_t offset = 0; if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; enum pipe_format pformat = psurf->format; rsc = fd_resource(psurf->texture); slice = fd_resource_slice(rsc, psurf->u.tex.level); format = fd5_pipe2color(pformat); swap = fd5_pipe2swap(pformat); srgb = util_format_is_srgb(pformat); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); if (gmem) { stride = gmem->bin_w * rsc->cpp; size = stride * gmem->bin_h; base = gmem->cbuf_base[i]; } else { stride = slice->pitch * rsc->cpp; size = slice->size0; } } OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5); OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | 0x800 | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */ COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB)); OUT_RING(ring, A5XX_RB_MRT_PITCH(stride)); OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size)); if (gmem || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */ OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */ } else { debug_assert((offset + size) <= fd_bo_size(rsc->bo)); OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */ } OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1); OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format)); /* when we support UBWC, these would be the system memory * addr/pitch/etc: */ OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4); OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0)); OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); } }
static void r300_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit) { struct r300_context *r300 = r300_context(pipe); struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct pipe_blit_info info = *blit; /* The driver supports sRGB textures but not framebuffers. Blitting * from sRGB to sRGB should be the same as blitting from linear * to linear, so use that, This avoids incorrect linearization. */ if (util_format_is_srgb(info.src.format)) { info.src.format = util_format_linear(info.src.format); info.dst.format = util_format_linear(info.dst.format); } /* MSAA resolve. */ if (info.src.resource->nr_samples > 1 && !util_format_is_depth_or_stencil(info.src.resource->format)) { r300_msaa_resolve(pipe, &info); return; } /* Can't read MSAA textures. */ if (info.src.resource->nr_samples > 1) { return; } /* Blit a combined depth-stencil resource as color. * S8Z24 is the only supported stencil format. */ if ((info.mask & PIPE_MASK_S) && info.src.format == PIPE_FORMAT_S8_UINT_Z24_UNORM && info.dst.format == PIPE_FORMAT_S8_UINT_Z24_UNORM) { if (info.dst.resource->nr_samples > 1) { /* Cannot do that with MSAA buffers. */ info.mask &= ~PIPE_MASK_S; if (!(info.mask & PIPE_MASK_Z)) { return; } } else { /* Single-sample buffer. */ info.src.format = PIPE_FORMAT_B8G8R8A8_UNORM; info.dst.format = PIPE_FORMAT_B8G8R8A8_UNORM; if (info.mask & PIPE_MASK_Z) { info.mask = PIPE_MASK_RGBA; /* depth+stencil */ } else { info.mask = PIPE_MASK_B; /* stencil only */ } } } /* Decompress ZMASK. */ if (r300->zmask_in_use && !r300->locked_zbuffer) { if (fb->zsbuf->texture == info.src.resource || fb->zsbuf->texture == info.dst.resource) { r300_decompress_zmask(r300); } } r300_blitter_begin(r300, R300_BLIT | (info.render_condition_enable ? 0 : R300_IGNORE_RENDER_COND)); util_blitter_blit(r300->blitter, &info); r300_blitter_end(r300); }
static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w, bool decode_srgb) { enum a4xx_tile_mode tile_mode; unsigned i; if (bin_w) { tile_mode = 2; } else { tile_mode = TILE4_LINEAR; } for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) { enum a4xx_color_fmt format = 0; enum a3xx_color_swap swap = WZYX; bool srgb = false; struct fd_resource *rsc = NULL; struct fd_resource_slice *slice = NULL; uint32_t stride = 0; uint32_t base = 0; uint32_t offset = 0; if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; enum pipe_format pformat = psurf->format; rsc = fd_resource(psurf->texture); /* In case we're drawing to Z32F_S8, the "color" actually goes to * the stencil */ if (rsc->stencil) { rsc = rsc->stencil; pformat = rsc->base.b.format; bases++; } slice = fd_resource_slice(rsc, psurf->u.tex.level); format = fd4_pipe2color(pformat); swap = fd4_pipe2swap(pformat); if (decode_srgb) srgb = util_format_is_srgb(pformat); else pformat = util_format_linear(pformat); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); if (bin_w) { stride = bin_w * rsc->cpp; if (bases) { base = bases[i]; } } else { stride = slice->pitch * rsc->cpp; } } else if ((i < nr_bufs) && bases) { base = bases[i]; } OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3); OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | COND(srgb, A4XX_RB_MRT_BUF_INFO_COLOR_SRGB)); if (bin_w || (i >= nr_bufs) || !bufs[i]) { OUT_RING(ring, base); OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride)); } else { OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d.. * not sure if we need to skip it for bypass or * not. */ OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0)); } } }
/** * Define a vgpu10 sampler state. */ static void define_sampler_state_object(struct svga_context *svga, struct svga_sampler_state *ss, const struct pipe_sampler_state *ps) { uint8_t max_aniso = (uint8_t) 255; /* XXX fix me */ boolean anisotropic; uint8 compare_func; SVGA3dFilter filter; SVGA3dRGBAFloat bcolor; unsigned try; float min_lod, max_lod; assert(svga_have_vgpu10(svga)); anisotropic = ss->aniso_level > 1.0f; filter = translate_filter_mode(ps->min_mip_filter, ps->min_img_filter, ps->mag_img_filter, anisotropic, ss->compare_mode); compare_func = translate_comparison_func(ss->compare_func); COPY_4V(bcolor.value, ps->border_color.f); assert(ps->min_lod <= ps->max_lod); if (ps->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* just use the base level image */ min_lod = max_lod = 0.0f; } else { min_lod = ps->min_lod; max_lod = ps->max_lod; } /* If shadow comparisons are enabled, create two sampler states: one * with the given shadow compare mode, another with shadow comparison off. * We need the later because in some cases, we have to do the shadow * compare in the shader. So, we don't want to do it twice. */ STATIC_ASSERT(PIPE_TEX_COMPARE_NONE == 0); STATIC_ASSERT(PIPE_TEX_COMPARE_R_TO_TEXTURE == 1); ss->id[1] = SVGA3D_INVALID_ID; unsigned i; for (i = 0; i <= ss->compare_mode; i++) { ss->id[i] = util_bitmask_add(svga->sampler_object_id_bm); /* Loop in case command buffer is full and we need to flush and retry */ for (try = 0; try < 2; try++) { enum pipe_error ret = SVGA3D_vgpu10_DefineSamplerState(svga->swc, ss->id[i], filter, ss->addressu, ss->addressv, ss->addressw, ss->lod_bias, /* float */ max_aniso, compare_func, bcolor, min_lod, /* float */ max_lod); /* float */ if (ret == PIPE_OK) break; svga_context_flush(svga, NULL); } /* turn off the shadow compare option for second iteration */ filter &= ~SVGA3D_FILTER_COMPARE; } } static void * svga_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { struct svga_context *svga = svga_context(pipe); struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state ); if (!cso) return NULL; cso->mipfilter = translate_mip_filter(sampler->min_mip_filter); cso->magfilter = translate_img_filter( sampler->mag_img_filter ); cso->minfilter = translate_img_filter( sampler->min_img_filter ); cso->aniso_level = MAX2( sampler->max_anisotropy, 1 ); if (sampler->max_anisotropy) cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC; cso->lod_bias = sampler->lod_bias; cso->addressu = translate_wrap_mode(sampler->wrap_s); cso->addressv = translate_wrap_mode(sampler->wrap_t); cso->addressw = translate_wrap_mode(sampler->wrap_r); cso->normalized_coords = sampler->normalized_coords; cso->compare_mode = sampler->compare_mode; cso->compare_func = sampler->compare_func; { uint32 r = float_to_ubyte(sampler->border_color.f[0]); uint32 g = float_to_ubyte(sampler->border_color.f[1]); uint32 b = float_to_ubyte(sampler->border_color.f[2]); uint32 a = float_to_ubyte(sampler->border_color.f[3]); cso->bordercolor = (a << 24) | (r << 16) | (g << 8) | b; } /* No SVGA3D support for: * - min/max LOD clamping */ cso->min_lod = 0; cso->view_min_lod = MAX2((int) (sampler->min_lod + 0.5), 0); cso->view_max_lod = MAX2((int) (sampler->max_lod + 0.5), 0); /* Use min_mipmap */ if (svga->debug.use_min_mipmap) { if (cso->view_min_lod == cso->view_max_lod) { cso->min_lod = cso->view_min_lod; cso->view_min_lod = 0; cso->view_max_lod = 1000; /* Just a high number */ cso->mipfilter = SVGA3D_TEX_FILTER_NONE; } } if (svga_have_vgpu10(svga)) { define_sampler_state_object(svga, cso, sampler); } SVGA_DBG(DEBUG_SAMPLERS, "New sampler: min %u, view(min %u, max %u) lod, mipfilter %s\n", cso->min_lod, cso->view_min_lod, cso->view_max_lod, cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING"); svga->hud.num_sampler_objects++; SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws, SVGA_STATS_COUNT_SAMPLER); return cso; } static void svga_bind_sampler_states(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned num, void **samplers) { struct svga_context *svga = svga_context(pipe); unsigned i; boolean any_change = FALSE; assert(shader < PIPE_SHADER_TYPES); assert(start + num <= PIPE_MAX_SAMPLERS); /* Pre-VGPU10 only supports FS textures */ if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT) return; for (i = 0; i < num; i++) { if (svga->curr.sampler[shader][start + i] != samplers[i]) any_change = TRUE; svga->curr.sampler[shader][start + i] = samplers[i]; } if (!any_change) { return; } /* find highest non-null sampler[] entry */ { unsigned j = MAX2(svga->curr.num_samplers[shader], start + num); while (j > 0 && svga->curr.sampler[shader][j - 1] == NULL) j--; svga->curr.num_samplers[shader] = j; } svga->dirty |= SVGA_NEW_SAMPLER; } static void svga_delete_sampler_state(struct pipe_context *pipe, void *sampler) { struct svga_sampler_state *ss = (struct svga_sampler_state *) sampler; struct svga_context *svga = svga_context(pipe); if (svga_have_vgpu10(svga)) { unsigned i; for (i = 0; i < 2; i++) { enum pipe_error ret; if (ss->id[i] != SVGA3D_INVALID_ID) { svga_hwtnl_flush_retry(svga); ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_DestroySamplerState(svga->swc, ss->id[i]); } util_bitmask_clear(svga->sampler_object_id_bm, ss->id[i]); } } } FREE(sampler); svga->hud.num_sampler_objects--; } static struct pipe_sampler_view * svga_create_sampler_view(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_sampler_view *templ) { struct svga_context *svga = svga_context(pipe); struct svga_pipe_sampler_view *sv = CALLOC_STRUCT(svga_pipe_sampler_view); if (!sv) { return NULL; } sv->base = *templ; sv->base.reference.count = 1; sv->base.texture = NULL; pipe_resource_reference(&sv->base.texture, texture); sv->base.context = pipe; sv->id = SVGA3D_INVALID_ID; svga->hud.num_samplerview_objects++; SVGA_STATS_COUNT_INC(svga_screen(svga->pipe.screen)->sws, SVGA_STATS_COUNT_SAMPLERVIEW); return &sv->base; } static void svga_sampler_view_destroy(struct pipe_context *pipe, struct pipe_sampler_view *view) { struct svga_context *svga = svga_context(pipe); struct svga_pipe_sampler_view *sv = svga_pipe_sampler_view(view); if (svga_have_vgpu10(svga) && sv->id != SVGA3D_INVALID_ID) { if (view->context != pipe) { /* The SVGA3D device will generate an error (and on Linux, cause * us to abort) if we try to destroy a shader resource view from * a context other than the one it was created with. Skip the * SVGA3D_vgpu10_DestroyShaderResourceView() and leak the sampler * view for now. This should only sometimes happen when a shared * texture is deleted. */ _debug_printf("context mismatch in %s\n", __func__); } else { enum pipe_error ret; svga_hwtnl_flush_retry(svga); /* XXX is this needed? */ ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); if (ret != PIPE_OK) { svga_context_flush(svga, NULL); ret = SVGA3D_vgpu10_DestroyShaderResourceView(svga->swc, sv->id); } util_bitmask_clear(svga->sampler_view_id_bm, sv->id); } } pipe_resource_reference(&sv->base.texture, NULL); FREE(sv); svga->hud.num_samplerview_objects--; } static void svga_set_sampler_views(struct pipe_context *pipe, enum pipe_shader_type shader, unsigned start, unsigned num, struct pipe_sampler_view **views) { struct svga_context *svga = svga_context(pipe); unsigned flag_1d = 0; unsigned flag_srgb = 0; uint i; boolean any_change = FALSE; assert(shader < PIPE_SHADER_TYPES); assert(start + num <= ARRAY_SIZE(svga->curr.sampler_views[shader])); /* Pre-VGPU10 only supports FS textures */ if (!svga_have_vgpu10(svga) && shader != PIPE_SHADER_FRAGMENT) return; SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_SETSAMPLERVIEWS); /* This bit of code works around a quirk in the CSO module. * If start=num=0 it means all sampler views should be released. * Note that the CSO module treats sampler views for fragment shaders * differently than other shader types. */ if (start == 0 && num == 0 && svga->curr.num_sampler_views[shader] > 0) { for (i = 0; i < svga->curr.num_sampler_views[shader]; i++) { pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][i]); } any_change = TRUE; } for (i = 0; i < num; i++) { enum pipe_texture_target target; if (svga->curr.sampler_views[shader][start + i] != views[i]) { /* Note: we're using pipe_sampler_view_release() here to work around * a possible crash when the old view belongs to another context that * was already destroyed. */ pipe_sampler_view_release(pipe, &svga->curr.sampler_views[shader][start + i]); pipe_sampler_view_reference(&svga->curr.sampler_views[shader][start + i], views[i]); any_change = TRUE; } if (!views[i]) continue; if (util_format_is_srgb(views[i]->format)) flag_srgb |= 1 << (start + i); target = views[i]->target; if (target == PIPE_TEXTURE_1D) { flag_1d |= 1 << (start + i); } else if (target == PIPE_TEXTURE_RECT) { /* If the size of the bound texture changes, we need to emit new * const buffer values. */ svga->dirty |= SVGA_NEW_TEXTURE_CONSTS; } else if (target == PIPE_BUFFER) { /* If the size of the bound buffer changes, we need to emit new * const buffer values. */ svga->dirty |= SVGA_NEW_TEXTURE_CONSTS; } } if (!any_change) { goto done; } /* find highest non-null sampler_views[] entry */ { unsigned j = MAX2(svga->curr.num_sampler_views[shader], start + num); while (j > 0 && svga->curr.sampler_views[shader][j - 1] == NULL) j--; svga->curr.num_sampler_views[shader] = j; } svga->dirty |= SVGA_NEW_TEXTURE_BINDING; if (flag_srgb != svga->curr.tex_flags.flag_srgb || flag_1d != svga->curr.tex_flags.flag_1d) { svga->dirty |= SVGA_NEW_TEXTURE_FLAGS; svga->curr.tex_flags.flag_1d = flag_1d; svga->curr.tex_flags.flag_srgb = flag_srgb; } /* Check if any of the sampler view resources collide with the framebuffer * color buffers or depth stencil resource. If so, set the NEW_FRAME_BUFFER * dirty bit so that emit_framebuffer can be invoked to create backed view * for the conflicted surface view. */ if (svga_check_sampler_framebuffer_resource_collision(svga, shader)) { svga->dirty |= SVGA_NEW_FRAME_BUFFER; } done: SVGA_STATS_TIME_POP(svga_sws(svga)); } /** * Clean up sampler, sampler view state at context destruction time */ void svga_cleanup_sampler_state(struct svga_context *svga) { enum pipe_shader_type shader; for (shader = 0; shader <= PIPE_SHADER_GEOMETRY; shader++) { unsigned i; for (i = 0; i < svga->state.hw_draw.num_sampler_views[shader]; i++) { pipe_sampler_view_release(&svga->pipe, &svga->state.hw_draw.sampler_views[shader][i]); } } /* free polygon stipple state */ if (svga->polygon_stipple.sampler) { svga->pipe.delete_sampler_state(&svga->pipe, svga->polygon_stipple.sampler); } if (svga->polygon_stipple.sampler_view) { svga->pipe.sampler_view_destroy(&svga->pipe, &svga->polygon_stipple.sampler_view->base); } pipe_resource_reference(&svga->polygon_stipple.texture, NULL); } void svga_init_sampler_functions( struct svga_context *svga ) { svga->pipe.create_sampler_state = svga_create_sampler_state; svga->pipe.bind_sampler_states = svga_bind_sampler_states; svga->pipe.delete_sampler_state = svga_delete_sampler_state; svga->pipe.set_sampler_views = svga_set_sampler_views; svga->pipe.create_sampler_view = svga_create_sampler_view; svga->pipe.sampler_view_destroy = svga_sampler_view_destroy; }
void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, struct pipe_surface **bufs) { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; uint32_t face_regid, coord_regid, zwcoord_regid; enum a3xx_threadsize fssz; int constmode; int i, j, k; debug_assert(nr <= ARRAY_SIZE(color_regid)); if (emit->key.binning_pass) nr = 0; setup_stages(emit, s); fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS; /* blob seems to always use constmode currently: */ constmode = 1; pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); if (pos_regid == regid(63, 0)) { /* hw dislikes when there is no position output, which can * happen for transform-feedback vertex shaders. Just tell * the hw to use r0.x, with whatever random value is there: */ pos_regid = regid(0, 0); } posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); if (s[FS].v->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); } else { color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); OUT_RING(ring, 0x00000003); OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5); OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) | A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe * flush some caches? I think we only need to set those * bits if we have updated const or shader.. */ A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) | A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid)); OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | 0xfcfcfc00); OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5); OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) | A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) | A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff)); OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) | A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) | A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff)); OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) | A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) | A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff)); OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) | A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) | A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff)); OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) | A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) | A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff)); OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, 0x140010 | /* XXX */ COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); OUT_RING(ring, 0x7f | /* XXX */ COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) | COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) | COND(s[VS].instrlen && s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER)); OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1); OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */ OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3); OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) | A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in)); for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) { k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid); reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask); } j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) { k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid); reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask); } OUT_RING(ring, reg); } for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc); OUT_RING(ring, reg); } OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) | A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff)); OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, 0x00000000); /* SP_FS_LENGTH_REG */ OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) | A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(0) | A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(0) | A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE); OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000); OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff)); OUT_RING(ring, 0x00000000); } else { OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */ OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) | A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000 | /* XXX */ COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) | COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff)); OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ } OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) | A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff)); OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) | A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff)); OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) | A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff)); OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1); OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) | COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) | COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD | A4XX_RB_RENDER_CONTROL2_YCOORD | A4XX_RB_RENDER_CONTROL2_ZCOORD | A4XX_RB_RENDER_CONTROL2_WCOORD)); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(nr) | COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z)); OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(nr) | COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8); for (i = 0; i < 8; i++) { enum a4xx_color_fmt format = 0; bool srgb = false; if (i < nr) { format = fd4_emit_format(bufs[i]); if (bufs[i] && !emit->no_decode_srgb) srgb = util_format_is_srgb(bufs[i]->format); } OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) | A4XX_SP_FS_MRT_REG_MRTFORMAT(format) | COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) | COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION)); } if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) | 0x40000000 | /* XXX */ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000); } else { uint32_t vinterp[8], vpsrepl[8]; memset(vinterp, 0, sizeof(vinterp)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* looks like we need to do int varyings in the frag * shader on a4xx (no flatshad reg? or a420.0 bug?): * * (sy)(ss)nop * (sy)ldlv.u32 r0.x,l[r0.x], 1 * ldlv.u32 r0.y,l[r0.x+1], 1 * (ss)bary.f (ei)r63.x, 0, r0.x * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x * (rpt5)nop * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 * * Possibly on later a4xx variants we'll be able to use * something like the code below instead of workaround * in the shader: */ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { /* NOTE: varyings are packed, so if compmask is 0xb * then first, third, and fourth component occupy * three consecutive varying slots: */ unsigned compmask = s[FS].v->inputs[j].compmask; /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG * instead.. rather than -8 everywhere else.. */ uint32_t inloc = s[FS].v->inputs[j].inloc - 8; if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++) { if (compmask & (1 << i)) { vinterp[loc / 16] |= 1 << ((loc % 16) * 2); //flatshade[loc / 32] |= 1 << (loc % 32); loc++; } } } gl_varying_slot slot = s[FS].v->inputs[j].slot; /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ if (slot >= VARYING_SLOT_VAR0) { unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { /* mask is two 2-bit fields, where: * '01' -> S * '10' -> T * '11' -> 1 - T (flip mode) */ unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; uint32_t loc = inloc; if (compmask & 0x1) { vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x2) { vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x4) { /* .z <- 0.0f */ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); loc++; } if (compmask & 0x8) { /* .w <- 1.0f */ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); loc++; } }