/** * Compile the vertex shader. */ struct ilo_shader * ilo_shader_compile_vs(const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { struct vs_compile_context vcc; bool need_gs; if (!vs_setup(&vcc, state, variant)) return NULL; if (ilo_dev_gen(vcc.tc.dev) >= ILO_GEN(7)) { need_gs = false; } else { need_gs = variant->u.vs.rasterizer_discard || state->info.stream_output.num_outputs; } vs_write_vue(&vcc); if (!vs_compile(&vcc)) { FREE(vcc.shader); vcc.shader = NULL; } toy_tgsi_cleanup(&vcc.tgsi); toy_compiler_cleanup(&vcc.tc); if (need_gs) { int so_mapping[PIPE_MAX_SHADER_OUTPUTS]; int i, j; for (i = 0; i < vcc.tgsi.num_outputs; i++) { int attr = 0; for (j = 0; j < vcc.shader->out.count; j++) { if (vcc.tgsi.outputs[i].semantic_name == vcc.shader->out.semantic_names[j] && vcc.tgsi.outputs[i].semantic_index == vcc.shader->out.semantic_indices[j]) { attr = j; break; } } so_mapping[i] = attr; } if (!ilo_shader_compile_gs_passthrough(state, variant, so_mapping, vcc.shader)) { ilo_shader_destroy_kernel(vcc.shader); vcc.shader = NULL; } } return vcc.shader; }
bool ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state, const struct ilo_shader_variant *vs_variant, const int *so_mapping, struct ilo_shader *vs) { struct gs_compile_context gcc; struct ilo_shader_state state; struct ilo_shader_variant variant; const int num_verts = 3; int i; /* init GS state and variant */ state = *vs_state; state.info.tokens = NULL; for (i = 0; i < state.info.stream_output.num_outputs; i++) { const int reg = state.info.stream_output.output[i].register_index; state.info.stream_output.output[i].register_index = so_mapping[reg]; } variant = *vs_variant; variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard; variant.u.gs.num_inputs = vs->out.count; for (i = 0; i < vs->out.count; i++) { variant.u.gs.semantic_names[i] = vs->out.semantic_names[i]; variant.u.gs.semantic_indices[i] = vs->out.semantic_indices[i]; } if (!gs_setup(&gcc, &state, &variant, num_verts)) return false; if (!gs_compile_passthrough(&gcc)) { FREE(gcc.shader); gcc.shader = NULL; } /* no need to call toy_tgsi_cleanup() */ toy_compiler_cleanup(&gcc.tc); return append_gs_to_vs(vs, gcc.shader, num_verts); }
/** * Compile the geometry shader. */ struct ilo_shader * ilo_shader_compile_gs(const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { struct gs_compile_context gcc; if (!gs_setup(&gcc, state, variant, 0)) return NULL; if (!gs_compile(&gcc)) { FREE(gcc.shader); gcc.shader = NULL; } toy_tgsi_cleanup(&gcc.tgsi); toy_compiler_cleanup(&gcc.tc); return gcc.shader;; }
/** * Compile the fragment shader. */ struct ilo_shader * ilo_shader_compile_fs(const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { struct fs_compile_context fcc; if (!fs_setup(&fcc, state, variant)) return NULL; fs_write_fb(&fcc); if (!fs_compile(&fcc)) { FREE(fcc.shader); fcc.shader = NULL; } toy_tgsi_cleanup(&fcc.tgsi); toy_compiler_cleanup(&fcc.tc); return fcc.shader; }
/** * Compile the compute shader. */ struct ilo_shader * ilo_shader_compile_cs(const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { struct cs_compile_context ccc; ILO_DEV_ASSERT(state->info.dev, 7, 7.5); if (!cs_setup(&ccc, state, variant)) return NULL; cs_dummy(&ccc); if (!cs_compile(&ccc)) { FREE(ccc.shader); ccc.shader = NULL; } toy_compiler_cleanup(&ccc.tc); return ccc.shader; }
/** * Set up GS compile context. This includes translating the TGSI tokens. */ static bool gs_setup(struct gs_compile_context *gcc, const struct ilo_shader_state *state, const struct ilo_shader_variant *variant, int num_verts) { memset(gcc, 0, sizeof(*gcc)); gcc->shader = CALLOC_STRUCT(ilo_shader); if (!gcc->shader) return false; gcc->variant = variant; gcc->so_info = &state->info.stream_output; toy_compiler_init(&gcc->tc, state->info.dev); gcc->write_so = (state->info.stream_output.num_outputs > 0); gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard; gcc->tc.templ.access_mode = GEN6_ALIGN_16; gcc->tc.templ.exec_size = GEN6_EXECSIZE_4; gcc->tc.rect_linear_width = 4; if (state->info.tokens) { if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) { toy_compiler_cleanup(&gcc->tc); FREE(gcc->shader); return false; } switch (gcc->tgsi.props.gs_input_prim) { case PIPE_PRIM_POINTS: gcc->in_vue_count = 1; break; case PIPE_PRIM_LINES: gcc->in_vue_count = 2; gcc->shader->in.discard_adj = true; break; case PIPE_PRIM_TRIANGLES: gcc->in_vue_count = 3; gcc->shader->in.discard_adj = true; break; case PIPE_PRIM_LINES_ADJACENCY: gcc->in_vue_count = 4; break; case PIPE_PRIM_TRIANGLES_ADJACENCY: gcc->in_vue_count = 6; break; default: tc_fail(&gcc->tc, "unsupported GS input type"); gcc->in_vue_count = 0; break; } switch (gcc->tgsi.props.gs_output_prim) { case PIPE_PRIM_POINTS: gcc->out_vue_min_count = 1; break; case PIPE_PRIM_LINE_STRIP: gcc->out_vue_min_count = 2; break; case PIPE_PRIM_TRIANGLE_STRIP: gcc->out_vue_min_count = 3; break; default: tc_fail(&gcc->tc, "unsupported GS output type"); gcc->out_vue_min_count = 0; break; } } else { int i; gcc->in_vue_count = num_verts; gcc->out_vue_min_count = num_verts; gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs; for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) { gcc->tgsi.outputs[i].semantic_name = gcc->variant->u.gs.semantic_names[i]; gcc->tgsi.outputs[i].semantic_index = gcc->variant->u.gs.semantic_indices[i]; } } gcc->tc.templ.access_mode = GEN6_ALIGN_1; gs_setup_shader_in(gcc->shader, gcc->variant); gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map); gcc->in_vue_size = (gcc->shader->in.count + 1) / 2; gcc->out_vue_size = (gcc->shader->out.count + 1) / 2; gs_setup_payload(gcc); gs_setup_vars(gcc); /* m0 is reserved for system routines */ gcc->first_free_mrf = 1; gcc->last_free_mrf = 15; gcc->shader->bt.gen6_so_base = 0; gcc->shader->bt.gen6_so_count = gcc->so_info->num_outputs; gcc->shader->bt.total_count = gcc->shader->bt.gen6_so_count; return true; }
/** * Set up VS compile context. This includes translating the TGSI tokens. */ static bool vs_setup(struct vs_compile_context *vcc, const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { int num_consts; memset(vcc, 0, sizeof(*vcc)); vcc->shader = CALLOC_STRUCT(ilo_shader); if (!vcc->shader) return false; vcc->variant = variant; toy_compiler_init(&vcc->tc, state->info.dev); vcc->tc.templ.access_mode = GEN6_ALIGN_16; vcc->tc.templ.exec_size = GEN6_EXECSIZE_8; vcc->tc.rect_linear_width = 4; /* * The classic driver uses the sampler cache (gen6) or the data cache * (gen7). Why? */ vcc->const_cache = GEN6_SFID_DP_CC; if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) { toy_compiler_cleanup(&vcc->tc); FREE(vcc->shader); return false; } vs_setup_shader_in(vcc->shader, &vcc->tgsi); vs_setup_shader_out(vcc->shader, &vcc->tgsi, (vcc->variant->u.vs.num_ucps > 0), vcc->output_map); if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) { num_consts = (vcc->tgsi.const_count + 1) / 2; /* * From the Sandy Bridge PRM, volume 2 part 1, page 138: * * "The sum of all four read length fields (each incremented to * represent the actual read length) must be less than or equal to * 32" */ if (num_consts > 32) num_consts = 0; } else { num_consts = 0; } vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts); vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); /* r0 is reserved for payload header */ vcc->first_const_grf = 1; vcc->first_ucp_grf = vcc->first_const_grf + num_consts; /* fit each pair of user clip planes into a register */ vcc->first_vue_grf = vcc->first_ucp_grf + (vcc->variant->u.vs.num_ucps + 1) / 2; vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count; vcc->last_free_grf = 127; /* m0 is reserved for system routines */ vcc->first_free_mrf = 1; vcc->last_free_mrf = 15; vcc->num_grf_per_vrf = 1; if (ilo_dev_gen(vcc->tc.dev) >= ILO_GEN(7)) { vcc->last_free_grf -= 15; vcc->first_free_mrf = vcc->last_free_grf + 1; vcc->last_free_mrf = vcc->first_free_mrf + 14; } vcc->shader->in.start_grf = vcc->first_const_grf; vcc->shader->pcb.clip_state_size = vcc->variant->u.vs.num_ucps * (sizeof(float) * 4); vcc->shader->bt.tex_base = 0; vcc->shader->bt.tex_count = vcc->variant->num_sampler_views; vcc->shader->bt.const_base = vcc->shader->bt.tex_base + vcc->shader->bt.tex_count; vcc->shader->bt.const_count = state->info.constant_buffer_count; vcc->shader->bt.total_count = vcc->shader->bt.const_base + vcc->shader->bt.const_count; return true; }
/** * Set up FS compile context. This includes translating the TGSI tokens. */ static bool fs_setup(struct fs_compile_context *fcc, const struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { int num_consts; memset(fcc, 0, sizeof(*fcc)); fcc->shader = CALLOC_STRUCT(ilo_shader); if (!fcc->shader) return false; fcc->variant = variant; toy_compiler_init(&fcc->tc, state->info.gen); fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE; fcc->tc.templ.access_mode = BRW_ALIGN_1; if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) { fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H; fcc->tc.templ.exec_size = BRW_EXECUTE_16; } else { fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q; fcc->tc.templ.exec_size = BRW_EXECUTE_8; } fcc->tc.rect_linear_width = 8; /* * The classic driver uses the sampler cache (gen6) or the data cache * (gen7). Why? */ fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE; if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) { toy_compiler_cleanup(&fcc->tc); FREE(fcc->shader); return false; } fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade); fs_setup_shader_out(fcc->shader, &fcc->tgsi); /* we do not make use of push constant buffers yet */ num_consts = 0; fcc->first_const_grf = fs_setup_payloads(fcc); fcc->first_attr_grf = fcc->first_const_grf + num_consts; fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2; fcc->last_free_grf = 127; /* m0 is reserved for system routines */ fcc->first_free_mrf = 1; fcc->last_free_mrf = 15; /* instructions are compressed with BRW_EXECUTE_16 */ fcc->num_grf_per_vrf = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1; if (fcc->tc.gen >= ILO_GEN(7)) { fcc->last_free_grf -= 15; fcc->first_free_mrf = fcc->last_free_grf + 1; fcc->last_free_mrf = fcc->first_free_mrf + 14; } fcc->shader->in.start_grf = fcc->first_const_grf; fcc->shader->has_kill = fcc->tgsi.uses_kill; fcc->shader->dispatch_16 = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE); return true; }