static void fse_run(struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, unsigned draw_count, unsigned prim_flags ) { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; void *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); if (!draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)fetch_count )) goto fail; hw_verts = draw->render->map_vertices( draw->render ); if (!hw_verts) goto fail; /* Single routine to fetch vertices, run shader and emit HW verts. */ fse->active->run_elts( fse->active, fetch_elts, fetch_count, hw_verts ); if (0) { unsigned i; for (i = 0; i < fetch_count; i++) { debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); draw_dump_emitted_vertex( fse->vinfo, (const uint8_t *)hw_verts + fse->key.output_stride * i ); } } draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) ); draw->render->draw_elements( draw->render, draw_elts, draw_count ); draw->render->release_vertices( draw->render ); return; fail: debug_warn_once("allocate or map of vertex buffer failed (out of memory?)"); return; }
/** * Translate TGSI semantic info into SVGA3d semantic info. * This is called for VS outputs and PS inputs only. */ static boolean translate_vs_ps_semantic(struct svga_shader_emitter *emit, struct tgsi_declaration_semantic semantic, unsigned *usage, unsigned *idx) { switch (semantic.Name) { case TGSI_SEMANTIC_POSITION: *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_POSITION; break; case TGSI_SEMANTIC_COLOR: *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_COLOR; break; case TGSI_SEMANTIC_BCOLOR: *idx = semantic.Index + 2; /* sharing with COLOR */ *usage = SVGA3D_DECLUSAGE_COLOR; break; case TGSI_SEMANTIC_FOG: *idx = 0; assert(semantic.Index == 0); *usage = SVGA3D_DECLUSAGE_TEXCOORD; break; case TGSI_SEMANTIC_PSIZE: *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_PSIZE; break; case TGSI_SEMANTIC_GENERIC: *idx = svga_remap_generic_index(emit->key.generic_remap_table, semantic.Index); *usage = SVGA3D_DECLUSAGE_TEXCOORD; break; case TGSI_SEMANTIC_NORMAL: *idx = semantic.Index; *usage = SVGA3D_DECLUSAGE_NORMAL; break; case TGSI_SEMANTIC_CLIPDIST: case TGSI_SEMANTIC_CLIPVERTEX: /* XXX at this time we don't support clip distance or clip vertices */ debug_warn_once("unsupported clip distance/vertex attribute\n"); *usage = SVGA3D_DECLUSAGE_TEXCOORD; *idx = 0; return TRUE; default: assert(0); *usage = SVGA3D_DECLUSAGE_TEXCOORD; *idx = 0; return FALSE; } return TRUE; }
/* SVGA_NEW_TEXTURE_BINDING * SVGA_NEW_RAST * SVGA_NEW_NEED_SWTNL * SVGA_NEW_SAMPLER */ static enum pipe_error make_fs_key(const struct svga_context *svga, struct svga_fragment_shader *fs, struct svga_fs_compile_key *key) { unsigned i; int idx = 0; memset(key, 0, sizeof *key); /* Only need fragment shader fixup for twoside lighting if doing * hwtnl. Otherwise the draw module does the whole job for us. * * SVGA_NEW_SWTNL */ if (!svga->state.sw.need_swtnl) { /* SVGA_NEW_RAST */ key->light_twoside = svga->curr.rast->templ.light_twoside; key->front_ccw = svga->curr.rast->templ.front_ccw; } /* The blend workaround for simulating logicop xor behaviour * requires that the incoming fragment color be white. This change * achieves that by creating a variant of the current fragment * shader that overrides all output colors with 1,1,1,1 * * This will work for most shaders, including those containing * TEXKIL and/or depth-write. However, it will break on the * combination of xor-logicop plus alphatest. * * Ultimately, we could implement alphatest in the shader using * texkil prior to overriding the outgoing fragment color. * * SVGA_NEW_BLEND */ if (svga->curr.blend->need_white_fragments) { key->white_fragments = 1; } #ifdef DEBUG /* * We expect a consistent set of samplers and sampler views. * Do some debug checks/warnings here. */ { static boolean warned = FALSE; unsigned i, n = MAX2(svga->curr.num_sampler_views, svga->curr.num_samplers); /* Only warn once to prevent too much debug output */ if (!warned) { if (svga->curr.num_sampler_views != svga->curr.num_samplers) { debug_printf("svga: mismatched number of sampler views (%u) " "vs. samplers (%u)\n", svga->curr.num_sampler_views, svga->curr.num_samplers); } for (i = 0; i < n; i++) { if ((svga->curr.sampler_views[i] == NULL) != (svga->curr.sampler[i] == NULL)) debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n", i, svga->curr.sampler_views[i], i, svga->curr.sampler[i]); } warned = TRUE; } } #endif /* XXX: want to limit this to the textures that the shader actually * refers to. * * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ for (i = 0; i < svga->curr.num_sampler_views; i++) { if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) { assert(svga->curr.sampler_views[i]->texture); key->tex[i].texture_target = svga->curr.sampler_views[i]->texture->target; if (!svga->curr.sampler[i]->normalized_coords) { key->tex[i].width_height_idx = idx++; key->tex[i].unnormalized = TRUE; ++key->num_unnormalized_coords; } key->tex[i].swizzle_r = svga->curr.sampler_views[i]->swizzle_r; key->tex[i].swizzle_g = svga->curr.sampler_views[i]->swizzle_g; key->tex[i].swizzle_b = svga->curr.sampler_views[i]->swizzle_b; key->tex[i].swizzle_a = svga->curr.sampler_views[i]->swizzle_a; } } key->num_textures = svga->curr.num_sampler_views; idx = 0; for (i = 0; i < svga->curr.num_samplers; ++i) { if (svga->curr.sampler_views[i] && svga->curr.sampler[i]) { struct pipe_resource *tex = svga->curr.sampler_views[i]->texture; struct svga_texture *stex = svga_texture(tex); SVGA3dSurfaceFormat format = stex->key.format; if (format == SVGA3D_Z_D16 || format == SVGA3D_Z_D24X8 || format == SVGA3D_Z_D24S8) { /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, * or SVGA3D_Z_D24S8 surface, we'll automatically get * shadow comparison. But we only get LEQUAL mode. * Set TEX_COMPARE_NONE here so we don't emit the extra FS * code for shadow comparison. */ key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; key->tex[i].compare_func = PIPE_FUNC_NEVER; /* These depth formats _only_ support comparison mode and * not ordinary sampling so warn if the later is expected. */ if (svga->curr.sampler[i]->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { debug_warn_once("Unsupported shadow compare mode"); } /* The only supported comparison mode is LEQUAL */ if (svga->curr.sampler[i]->compare_func != PIPE_FUNC_LEQUAL) { debug_warn_once("Unsupported shadow compare function"); } } else { /* For other texture formats, just use the compare func/mode * as-is. Should be no-ops for color textures. For depth * textures, we do not get automatic depth compare. We have * to do it ourselves in the shader. And we don't get PCF. */ key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode; key->tex[i].compare_func = svga->curr.sampler[i]->compare_func; } } } /* sprite coord gen state */ for (i = 0; i < svga->curr.num_samplers; ++i) { key->tex[i].sprite_texgen = svga->curr.rast->templ.sprite_coord_enable & (1 << i); } key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); /* SVGA_NEW_FRAME_BUFFER */ if (fs->base.info.color0_writes_all_cbufs) { /* Replicate color0 output to N colorbuffers */ key->write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; } return PIPE_OK; }
void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) { struct pipe_context *pipe = mgr->pipe; int start_vertex, min_index; unsigned num_vertices; boolean unroll_indices = FALSE; uint32_t used_vb_mask = mgr->ve->used_vb_mask; uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask; uint32_t incompatible_vb_mask = mgr->incompatible_vb_mask & used_vb_mask; struct pipe_draw_info new_info; /* Normal draw. No fallback and no user buffers. */ if (!incompatible_vb_mask && !mgr->ve->incompatible_elem_mask && !user_vb_mask) { /* Set vertex buffers if needed. */ if (mgr->dirty_real_vb_mask & used_vb_mask) { u_vbuf_set_driver_vertex_buffers(mgr); } pipe->draw_vbo(pipe, info); return; } new_info = *info; /* Fallback. We need to know all the parameters. */ if (new_info.indirect) { struct pipe_transfer *transfer = NULL; int *data; if (new_info.indexed) { data = pipe_buffer_map_range(pipe, new_info.indirect, new_info.indirect_offset, 20, PIPE_TRANSFER_READ, &transfer); new_info.index_bias = data[3]; new_info.start_instance = data[4]; } else { data = pipe_buffer_map_range(pipe, new_info.indirect, new_info.indirect_offset, 16, PIPE_TRANSFER_READ, &transfer); new_info.start_instance = data[3]; } new_info.count = data[0]; new_info.instance_count = data[1]; new_info.start = data[2]; pipe_buffer_unmap(pipe, transfer); new_info.indirect = NULL; } if (new_info.indexed) { /* See if anything needs to be done for per-vertex attribs. */ if (u_vbuf_need_minmax_index(mgr)) { int max_index; if (new_info.max_index != ~0u) { min_index = new_info.min_index; max_index = new_info.max_index; } else { u_vbuf_get_minmax_index(mgr->pipe, &mgr->index_buffer, new_info.primitive_restart, new_info.restart_index, new_info.start, new_info.count, &min_index, &max_index); } assert(min_index <= max_index); start_vertex = min_index + new_info.index_bias; num_vertices = max_index + 1 - min_index; /* Primitive restart doesn't work when unrolling indices. * We would have to break this drawing operation into several ones. */ /* Use some heuristic to see if unrolling indices improves * performance. */ if (!new_info.primitive_restart && num_vertices > new_info.count*2 && num_vertices - new_info.count > 32 && !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { unroll_indices = TRUE; user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & mgr->ve->noninstance_vb_mask_any); } } else { /* Nothing to do for per-vertex attribs. */ start_vertex = 0; num_vertices = 0; min_index = 0; } } else { start_vertex = new_info.start; num_vertices = new_info.count; min_index = 0; } /* Translate vertices with non-native layouts or formats. */ if (unroll_indices || incompatible_vb_mask || mgr->ve->incompatible_elem_mask) { if (!u_vbuf_translate_begin(mgr, start_vertex, num_vertices, new_info.start_instance, new_info.instance_count, new_info.start, new_info.count, min_index, unroll_indices)) { debug_warn_once("u_vbuf_translate_begin() failed"); return; } if (unroll_indices) { new_info.indexed = FALSE; new_info.index_bias = 0; new_info.min_index = 0; new_info.max_index = new_info.count - 1; new_info.start = 0; } user_vb_mask &= ~(incompatible_vb_mask | mgr->ve->incompatible_vb_mask_all); } /* Upload user buffers. */ if (user_vb_mask) { if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices, new_info.start_instance, new_info.instance_count) != PIPE_OK) { debug_warn_once("u_vbuf_upload_buffers() failed"); return; } mgr->dirty_real_vb_mask |= user_vb_mask; } /* if (unroll_indices) { printf("unrolling indices: start_vertex = %i, num_vertices = %i\n", start_vertex, num_vertices); util_dump_draw_info(stdout, info); printf("\n"); } unsigned i; for (i = 0; i < mgr->nr_vertex_buffers; i++) { printf("input %i: ", i); util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i); printf("\n"); } for (i = 0; i < mgr->nr_real_vertex_buffers; i++) { printf("real %i: ", i); util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i); printf("\n"); } */ u_upload_unmap(pipe->stream_uploader); u_vbuf_set_driver_vertex_buffers(mgr); pipe->draw_vbo(pipe, &new_info); if (mgr->using_translate) { u_vbuf_translate_end(mgr); } }
void draw_pt_emit_linear(struct pt_emit *emit, const struct draw_vertex_info *vert_info, const struct draw_prim_info *prim_info) { const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; unsigned stride = vert_info->stride; unsigned count = vert_info->count; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; void *hw_verts; unsigned start, i; #if 0 debug_printf("Linear emit\n"); #endif /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ draw->render->set_primitive(draw->render, emit->prim); if (!render->allocate_vertices(render, (ushort)translate->key.output_stride, (ushort)count)) goto fail; hw_verts = render->map_vertices( render ); if (!hw_verts) goto fail; translate->set_buffer(translate, 0, vertex_data, stride, count - 1); translate->set_buffer(translate, 1, &draw->rasterizer->point_size, 0, ~0); translate->run(translate, 0, count, draw->start_instance, draw->instance_id, hw_verts); if (0) { unsigned i; for (i = 0; i < count; i++) { debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); draw_dump_emitted_vertex( emit->vinfo, (const uint8_t *)hw_verts + translate->key.output_stride * i ); } } render->unmap_vertices( render, 0, count - 1 ); for (start = i = 0; i < prim_info->primitive_count; start += prim_info->primitive_lengths[i], i++) { render->draw_arrays(render, start, prim_info->primitive_lengths[i]); } render->release_vertices(render); return; fail: debug_warn_once("allocate or map of vertex buffer failed (out of memory?)"); return; }
void draw_pt_emit(struct pt_emit *emit, const struct draw_vertex_info *vert_info, const struct draw_prim_info *prim_info) { const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; unsigned vertex_count = vert_info->count; unsigned stride = vert_info->stride; const ushort *elts = prim_info->elts; struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; unsigned start, i; void *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); if (vertex_count == 0) return; /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ draw->render->set_primitive(draw->render, emit->prim); render->allocate_vertices(render, (ushort)translate->key.output_stride, (ushort)vertex_count); hw_verts = render->map_vertices( render ); if (!hw_verts) { debug_warn_once("map of vertex buffer failed (out of memory?)"); return; } translate->set_buffer(translate, 0, vertex_data, stride, ~0); translate->set_buffer(translate, 1, &draw->rasterizer->point_size, 0, ~0); /* fetch/translate vertex attribs to fill hw_verts[] */ translate->run(translate, 0, vertex_count, draw->start_instance, draw->instance_id, hw_verts ); render->unmap_vertices(render, 0, vertex_count - 1); for (start = i = 0; i < prim_info->primitive_count; start += prim_info->primitive_lengths[i], i++) { render->draw_elements(render, elts + start, prim_info->primitive_lengths[i]); } render->release_vertices(render); }
/** * Try issuing a quad blit. */ static bool try_blit(struct svga_context *svga, const struct pipe_blit_info *blit_info) { struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; struct pipe_resource *src = blit_info->src.resource; struct pipe_resource *dst = blit_info->dst.resource; struct pipe_resource *newSrc = NULL; struct pipe_resource *newDst = NULL; bool can_create_src_view; bool can_create_dst_view; bool ret = true; struct pipe_blit_info blit = *blit_info; SVGA_STATS_TIME_PUSH(sws, SVGA_STATS_TIME_BLITBLITTER); /** * Avoid using util_blitter_blit() for these depth formats on non-vgpu10 * devices because these depth formats only support comparison mode * and not ordinary sampling. */ if (!svga_have_vgpu10(svga) && (blit.mask & PIPE_MASK_Z) && (svga_texture(dst)->key.format == SVGA3D_Z_D16 || svga_texture(dst)->key.format == SVGA3D_Z_D24X8 || svga_texture(dst)->key.format == SVGA3D_Z_D24S8)) { ret = false; goto done; } /** * If format is srgb and blend is enabled then color values need * to be converted into linear format. */ if (is_blending_enabled(svga, &blit)) { blit.src.format = util_format_linear(blit.src.format); blit.dst.format = util_format_linear(blit.dst.format); } /* Check if we can create shader resource view and * render target view for the quad blitter to work */ can_create_src_view = is_view_format_compatible(src->format, svga_texture(src)->key.format, blit.src.format); can_create_dst_view = is_view_format_compatible(dst->format, svga_texture(dst)->key.format, blit.dst.format); if ((blit.mask & PIPE_MASK_S) || ((!can_create_dst_view || !can_create_src_view) && !svga_have_vgpu10(svga))) { /* Can't do stencil blits with textured quad blitter */ debug_warn_once("using software stencil blit"); ret = false; goto done; } if (!util_blitter_is_blit_supported(svga->blitter, &blit)) { debug_printf("svga: blit unsupported %s -> %s\n", util_format_short_name(blit.src.resource->format), util_format_short_name(blit.dst.resource->format)); ret = false; goto done; } /* XXX turn off occlusion and streamout queries */ util_blitter_save_vertex_buffer_slot(svga->blitter, svga->curr.vb); util_blitter_save_vertex_elements(svga->blitter, (void*)svga->curr.velems); util_blitter_save_vertex_shader(svga->blitter, svga->curr.vs); util_blitter_save_geometry_shader(svga->blitter, svga->curr.user_gs); util_blitter_save_so_targets(svga->blitter, svga->num_so_targets, (struct pipe_stream_output_target**)svga->so_targets); util_blitter_save_rasterizer(svga->blitter, (void*)svga->curr.rast); util_blitter_save_viewport(svga->blitter, &svga->curr.viewport); util_blitter_save_scissor(svga->blitter, &svga->curr.scissor); util_blitter_save_fragment_shader(svga->blitter, svga->curr.fs); util_blitter_save_blend(svga->blitter, (void*)svga->curr.blend); util_blitter_save_depth_stencil_alpha(svga->blitter, (void*)svga->curr.depth); util_blitter_save_stencil_ref(svga->blitter, &svga->curr.stencil_ref); util_blitter_save_sample_mask(svga->blitter, svga->curr.sample_mask); util_blitter_save_framebuffer(svga->blitter, &svga->curr.framebuffer); util_blitter_save_fragment_sampler_states(svga->blitter, svga->curr.num_samplers[PIPE_SHADER_FRAGMENT], (void**)svga->curr.sampler[PIPE_SHADER_FRAGMENT]); util_blitter_save_fragment_sampler_views(svga->blitter, svga->curr.num_sampler_views[PIPE_SHADER_FRAGMENT], svga->curr.sampler_views[PIPE_SHADER_FRAGMENT]); if (!can_create_src_view) { struct pipe_resource template; struct pipe_blit_info copy_region_blit;
/* SVGA_NEW_TEXTURE_BINDING * SVGA_NEW_RAST * SVGA_NEW_NEED_SWTNL * SVGA_NEW_SAMPLER */ static enum pipe_error make_fs_key(const struct svga_context *svga, struct svga_fragment_shader *fs, struct svga_compile_key *key) { const unsigned shader = PIPE_SHADER_FRAGMENT; unsigned i; memset(key, 0, sizeof *key); memcpy(key->generic_remap_table, fs->generic_remap_table, sizeof(fs->generic_remap_table)); /* SVGA_NEW_GS, SVGA_NEW_VS */ if (svga->curr.gs) { key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs; } else { key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs; } /* Only need fragment shader fixup for twoside lighting if doing * hwtnl. Otherwise the draw module does the whole job for us. * * SVGA_NEW_SWTNL */ if (!svga->state.sw.need_swtnl) { /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE */ key->fs.light_twoside = svga->curr.rast->templ.light_twoside; key->fs.front_ccw = svga->curr.rast->templ.front_ccw; key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable && svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES); key->fs.aa_point = (svga->curr.rast->templ.point_smooth && svga->curr.reduced_prim == PIPE_PRIM_POINTS && (svga->curr.rast->pointsize > 1.0 || svga->curr.vs->base.info.writes_psize)); if (key->fs.aa_point) { assert(svga->curr.gs != NULL); assert(svga->curr.gs->aa_point_coord_index != -1); key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index; } } /* The blend workaround for simulating logicop xor behaviour * requires that the incoming fragment color be white. This change * achieves that by creating a variant of the current fragment * shader that overrides all output colors with 1,1,1,1 * * This will work for most shaders, including those containing * TEXKIL and/or depth-write. However, it will break on the * combination of xor-logicop plus alphatest. * * Ultimately, we could implement alphatest in the shader using * texkil prior to overriding the outgoing fragment color. * * SVGA_NEW_BLEND */ if (svga->curr.blend->need_white_fragments) { key->fs.white_fragments = 1; } #ifdef DEBUG /* * We expect a consistent set of samplers and sampler views. * Do some debug checks/warnings here. */ { static boolean warned = FALSE; unsigned i, n = MAX2(svga->curr.num_sampler_views[shader], svga->curr.num_samplers[shader]); /* Only warn once to prevent too much debug output */ if (!warned) { if (svga->curr.num_sampler_views[shader] != svga->curr.num_samplers[shader]) { debug_printf("svga: mismatched number of sampler views (%u) " "vs. samplers (%u)\n", svga->curr.num_sampler_views[shader], svga->curr.num_samplers[shader]); } for (i = 0; i < n; i++) { if ((svga->curr.sampler_views[shader][i] == NULL) != (svga->curr.sampler[shader][i] == NULL)) debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n", i, svga->curr.sampler_views[shader][i], i, svga->curr.sampler[shader][i]); } warned = TRUE; } } #endif /* XXX: want to limit this to the textures that the shader actually * refers to. * * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER */ svga_init_shader_key_common(svga, shader, key); for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; if (view) { struct pipe_resource *tex = view->texture; if (tex->target != PIPE_BUFFER) { struct svga_texture *stex = svga_texture(tex); SVGA3dSurfaceFormat format = stex->key.format; if (!svga_have_vgpu10(svga) && (format == SVGA3D_Z_D16 || format == SVGA3D_Z_D24X8 || format == SVGA3D_Z_D24S8)) { /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, * or SVGA3D_Z_D24S8 surface, we'll automatically get * shadow comparison. But we only get LEQUAL mode. * Set TEX_COMPARE_NONE here so we don't emit the extra FS * code for shadow comparison. */ key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; key->tex[i].compare_func = PIPE_FUNC_NEVER; /* These depth formats _only_ support comparison mode and * not ordinary sampling so warn if the later is expected. */ if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { debug_warn_once("Unsupported shadow compare mode"); } /* The shader translation code can emit code to * handle ALWAYS and NEVER compare functions */ else if (sampler->compare_func == PIPE_FUNC_ALWAYS || sampler->compare_func == PIPE_FUNC_NEVER) { key->tex[i].compare_mode = sampler->compare_mode; key->tex[i].compare_func = sampler->compare_func; } else if (sampler->compare_func != PIPE_FUNC_LEQUAL) { debug_warn_once("Unsupported shadow compare function"); } } else { /* For other texture formats, just use the compare func/mode * as-is. Should be no-ops for color textures. For depth * textures, we do not get automatic depth compare. We have * to do it ourselves in the shader. And we don't get PCF. */ key->tex[i].compare_mode = sampler->compare_mode; key->tex[i].compare_func = sampler->compare_func; } } } } /* sprite coord gen state */ for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { key->tex[i].sprite_texgen = svga->curr.rast->templ.sprite_coord_enable & (1 << i); } key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT); key->fs.flatshade = svga->curr.rast->templ.flatshade; /* SVGA_NEW_DEPTH_STENCIL_ALPHA */ if (svga_have_vgpu10(svga)) { /* Alpha testing is not supported in integer-valued render targets. */ if (svga_has_any_integer_cbufs(svga)) { key->fs.alpha_func = SVGA3D_CMP_ALWAYS; key->fs.alpha_ref = 0; } else { key->fs.alpha_func = svga->curr.depth->alphafunc; key->fs.alpha_ref = svga->curr.depth->alpharef; } } /* SVGA_NEW_FRAME_BUFFER */ if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { /* Replicate color0 output to N colorbuffers */ key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; } return PIPE_OK; }
static void vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) { struct vc4_context *vc4 = vc4_context(pctx); if (info->mode >= PIPE_PRIM_QUADS) { util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base); util_primconvert_draw_vbo(vc4->primconvert, info); perf_debug("Fallback conversion for %d %s vertices\n", info->count, u_prim_name(info->mode)); return; } /* Before setting up the draw, do any fixup blits necessary. */ vc4_predraw_check_textures(pctx, &vc4->verttex); vc4_predraw_check_textures(pctx, &vc4->fragtex); vc4_hw_2116_workaround(pctx, info->count); struct vc4_job *job = vc4_get_job_for_fbo(vc4); vc4_get_draw_cl_space(job, info->count); if (vc4->prim_mode != info->mode) { vc4->prim_mode = info->mode; vc4->dirty |= VC4_DIRTY_PRIM_MODE; } vc4_start_draw(vc4); if (!vc4_update_compiled_shaders(vc4, info->mode)) { debug_warn_once("shader compile failed, skipping draw call.\n"); return; } vc4_emit_state(pctx); if ((vc4->dirty & (VC4_DIRTY_VTXBUF | VC4_DIRTY_VTXSTATE | VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER | VC4_DIRTY_COMPILED_CS | VC4_DIRTY_COMPILED_VS | VC4_DIRTY_COMPILED_FS | vc4->prog.cs->uniform_dirty_bits | vc4->prog.vs->uniform_dirty_bits | vc4->prog.fs->uniform_dirty_bits)) || vc4->last_index_bias != info->index_bias) { vc4_emit_gl_shader_state(vc4, info, 0); } vc4->dirty = 0; /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. */ struct vc4_cl_out *bcl = cl_start(&job->bcl); if (info->indexed) { uint32_t offset = vc4->indexbuf.offset; uint32_t index_size = vc4->indexbuf.index_size; struct pipe_resource *prsc; if (vc4->indexbuf.index_size == 4) { prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf, info->count, &offset); index_size = 2; } else { if (vc4->indexbuf.user_buffer) { prsc = NULL; u_upload_data(vc4->uploader, 0, info->count * index_size, 4, vc4->indexbuf.user_buffer, &offset, &prsc); } else { prsc = vc4->indexbuf.buffer; } } struct vc4_resource *rsc = vc4_resource(prsc); cl_start_reloc(&job->bcl, &bcl, 1); cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(&bcl, info->mode | (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(&bcl, info->count); cl_reloc(job, &job->bcl, &bcl, rsc->bo, offset); cl_u32(&bcl, vc4->max_index); job->draw_calls_queued++; if (vc4->indexbuf.index_size == 4 || vc4->indexbuf.user_buffer) pipe_resource_reference(&prsc, NULL); } else { uint32_t count = info->count; uint32_t start = info->start; uint32_t extra_index_bias = 0; while (count) { uint32_t this_count = count; uint32_t step = count; static const uint32_t max_verts = 65535; /* GFXH-515 / SW-5891: The binner emits 16 bit indices * for drawarrays, which means that if start + count > * 64k it would truncate the top bits. Work around * this by emitting a limited number of primitives at * a time and reemitting the shader state pointing * farther down the vertex attribute arrays. * * To do this properly for line loops or trifans, we'd * need to make a new VB containing the first vertex * plus whatever remainder. */ if (extra_index_bias) { cl_end(&job->bcl, bcl); vc4_emit_gl_shader_state(vc4, info, extra_index_bias); bcl = cl_start(&job->bcl); } if (start + count > max_verts) { switch (info->mode) { case PIPE_PRIM_POINTS: this_count = step = max_verts; break; case PIPE_PRIM_LINES: this_count = step = max_verts - (max_verts % 2); break; case PIPE_PRIM_LINE_STRIP: this_count = max_verts; step = max_verts - 1; break; case PIPE_PRIM_LINE_LOOP: this_count = max_verts; step = max_verts - 1; debug_warn_once("unhandled line loop " "looping behavior with " ">65535 verts\n"); break; case PIPE_PRIM_TRIANGLES: this_count = step = max_verts - (max_verts % 3); break; case PIPE_PRIM_TRIANGLE_STRIP: this_count = max_verts; step = max_verts - 2; break; default: debug_warn_once("unhandled primitive " "max vert count, truncating\n"); this_count = step = max_verts; } } cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); cl_u8(&bcl, info->mode); cl_u32(&bcl, this_count); cl_u32(&bcl, start); job->draw_calls_queued++; count -= step; extra_index_bias += start + step; start = 0; } } cl_end(&job->bcl, bcl); /* We shouldn't have tripped the HW_2116 bug with the GFXH-515 * workaround. */ assert(job->draw_calls_queued <= VC4_HW_2116_COUNT); if (vc4->zsa && vc4->framebuffer.zsbuf) { struct vc4_resource *rsc = vc4_resource(vc4->framebuffer.zsbuf->texture); if (vc4->zsa->base.depth.enabled) { job->resolve |= PIPE_CLEAR_DEPTH; rsc->initialized_buffers = PIPE_CLEAR_DEPTH; } if (vc4->zsa->base.stencil[0].enabled) { job->resolve |= PIPE_CLEAR_STENCIL; rsc->initialized_buffers |= PIPE_CLEAR_STENCIL; } } job->resolve |= PIPE_CLEAR_COLOR0; if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH) vc4_flush(pctx); }