/** * Compile the shader. */ static bool gs_compile(struct gs_compile_context *gcc) { struct toy_compiler *tc = &gcc->tc; struct ilo_shader *sh = gcc->shader; get_num_prims_static(gcc); if (gcc->is_static) { tc_head(tc); gs_init_vars(gcc); gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); if (gcc->write_so) gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); tc_tail(tc); } else { tc_fail(tc, "no control flow support"); return false; } if (!gcc->write_vue) gs_discard(gcc); gs_lower_virtual_opcodes(gcc); toy_compiler_legalize_for_ra(tc); toy_compiler_optimize(tc); toy_compiler_allocate_registers(tc, gcc->first_free_grf, gcc->last_free_grf, 1); toy_compiler_legalize_for_asm(tc); if (tc->fail) { ilo_err("failed to legalize GS instructions: %s\n", tc->reason); return false; } if (ilo_debug & ILO_DEBUG_GS) { ilo_printf("legalized instructions:\n"); toy_compiler_dump(tc); ilo_printf("\n"); } sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); if (!sh->kernel) return false; if (ilo_debug & ILO_DEBUG_GS) { ilo_printf("disassembly:\n"); toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); ilo_printf("\n"); } return true; }
static void vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc, struct toy_inst *inst) { struct toy_compiler *tc = &vcc->tc; enum tgsi_file_type file; int dim, idx; struct toy_src indirect_dim, indirect_idx; assert(inst->src[0].file == TOY_FILE_IMM); file = inst->src[0].val32; assert(inst->src[1].file == TOY_FILE_IMM); dim = inst->src[1].val32; indirect_dim = inst->src[2]; assert(inst->src[3].file == TOY_FILE_IMM); idx = inst->src[3].val32; indirect_idx = inst->src[4]; /* no dimension indirection */ assert(indirect_dim.file == TOY_FILE_IMM); dim += indirect_dim.val32; switch (inst->opcode) { case TOY_OPCODE_TGSI_INDIRECT_FETCH: if (file == TGSI_FILE_CONSTANT) { if (idx) { struct toy_dst tmp = tc_alloc_tmp(tc); tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); indirect_idx = tsrc_from(tmp); } if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx); else vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx); break; } /* fall through */ case TOY_OPCODE_TGSI_INDIRECT_STORE: default: tc_fail(tc, "unhandled TGSI indirection"); break; } tc_discard_inst(tc, inst); }
static void gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc, struct toy_inst *inst) { struct toy_compiler *tc = &gcc->tc; int dim, idx; assert(inst->src[0].file == TOY_FILE_IMM); dim = inst->src[0].val32; assert(inst->src[1].file == TOY_FILE_IMM); idx = inst->src[1].val32; switch (inst->opcode) { case TOY_OPCODE_TGSI_IN: gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx); /* fetch all dimensions */ if (dim == 0) { int i; for (i = 1; i < gcc->in_vue_count; i++) { const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx); struct toy_dst dst; if (vrf < 0) continue; dst = tdst(TOY_FILE_VRF, vrf, 0); gs_lower_opcode_tgsi_in(gcc, dst, i, idx); } } break; case TOY_OPCODE_TGSI_IMM: assert(!dim); gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx); break; case TOY_OPCODE_TGSI_CONST: case TOY_OPCODE_TGSI_SV: default: tc_fail(tc, "unhandled TGSI fetch"); break; } tc_discard_inst(tc, inst); }
static void vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc, struct toy_dst dst, int dim, int idx) { struct toy_compiler *tc = &vcc->tc; const struct toy_tgsi *tgsi = &vcc->tgsi; int slot; assert(!dim); slot = toy_tgsi_find_system_value(tgsi, idx); if (slot < 0) return; switch (tgsi->system_values[slot].semantic_name) { case TGSI_SEMANTIC_INSTANCEID: case TGSI_SEMANTIC_VERTEXID: /* * In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for * the generated IDs, with VID in the X channel and IID in the Y * channel. */ { const int grf = vcc->first_vue_grf; const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); const enum toy_swizzle swizzle = (tgsi->system_values[slot].semantic_name == TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X; tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle))); } break; case TGSI_SEMANTIC_PRIMID: default: tc_fail(tc, "unhandled system value"); tc_MOV(tc, dst, tsrc_imm_d(0)); break; } }
static void vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc, struct toy_inst *inst) { struct toy_compiler *tc = &vcc->tc; int dim, idx; assert(inst->src[0].file == TOY_FILE_IMM); dim = inst->src[0].val32; assert(inst->src[1].file == TOY_FILE_IMM); idx = inst->src[1].val32; switch (inst->opcode) { case TOY_OPCODE_TGSI_IN: vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx); break; case TOY_OPCODE_TGSI_CONST: if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]); else vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]); break; case TOY_OPCODE_TGSI_SV: vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx); break; case TOY_OPCODE_TGSI_IMM: assert(!dim); vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx); break; default: tc_fail(tc, "unhandled TGSI fetch"); break; } tc_discard_inst(tc, inst); }
static void fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc, struct toy_dst dst, int dim, int idx) { struct toy_compiler *tc = &fcc->tc; const struct toy_tgsi *tgsi = &fcc->tgsi; int slot; assert(!dim); slot = toy_tgsi_find_system_value(tgsi, idx); if (slot < 0) return; switch (tgsi->system_values[slot].semantic_name) { case TGSI_SEMANTIC_PRIMID: case TGSI_SEMANTIC_INSTANCEID: case TGSI_SEMANTIC_VERTEXID: default: tc_fail(tc, "unhandled system value"); tc_MOV(tc, dst, tsrc_imm_d(0)); break; } }
static void gs_lower_virtual_opcodes(struct gs_compile_context *gcc) { struct toy_compiler *tc = &gcc->tc; struct toy_inst *inst; tc_head(tc); while ((inst = tc_next(tc)) != NULL) { switch (inst->opcode) { case TOY_OPCODE_TGSI_IN: case TOY_OPCODE_TGSI_CONST: case TOY_OPCODE_TGSI_SV: case TOY_OPCODE_TGSI_IMM: gs_lower_opcode_tgsi_direct(gcc, inst); break; case TOY_OPCODE_TGSI_INDIRECT_FETCH: case TOY_OPCODE_TGSI_INDIRECT_STORE: /* TODO similar to VS */ tc_fail(tc, "no indirection support"); tc_discard_inst(tc, inst); break; case TOY_OPCODE_TGSI_TEX: case TOY_OPCODE_TGSI_TXB: case TOY_OPCODE_TGSI_TXD: case TOY_OPCODE_TGSI_TXL: case TOY_OPCODE_TGSI_TXP: case TOY_OPCODE_TGSI_TXF: case TOY_OPCODE_TGSI_TXQ: case TOY_OPCODE_TGSI_TXQ_LZ: case TOY_OPCODE_TGSI_TEX2: case TOY_OPCODE_TGSI_TXB2: case TOY_OPCODE_TGSI_TXL2: case TOY_OPCODE_TGSI_SAMPLE: case TOY_OPCODE_TGSI_SAMPLE_I: case TOY_OPCODE_TGSI_SAMPLE_I_MS: case TOY_OPCODE_TGSI_SAMPLE_B: case TOY_OPCODE_TGSI_SAMPLE_C: case TOY_OPCODE_TGSI_SAMPLE_C_LZ: case TOY_OPCODE_TGSI_SAMPLE_D: case TOY_OPCODE_TGSI_SAMPLE_L: case TOY_OPCODE_TGSI_GATHER4: case TOY_OPCODE_TGSI_SVIEWINFO: case TOY_OPCODE_TGSI_SAMPLE_POS: case TOY_OPCODE_TGSI_SAMPLE_INFO: /* TODO similar to VS */ tc_fail(tc, "no sampling support"); tc_discard_inst(tc, inst); break; case TOY_OPCODE_EMIT: gs_lower_opcode_emit(gcc, inst); tc_discard_inst(tc, inst); break; case TOY_OPCODE_ENDPRIM: gs_lower_opcode_endprim(gcc, inst); tc_discard_inst(tc, inst); break; default: break; } } tc_head(tc); while ((inst = tc_next(tc)) != NULL) { switch (inst->opcode) { case TOY_OPCODE_INV: case TOY_OPCODE_LOG: case TOY_OPCODE_EXP: case TOY_OPCODE_SQRT: case TOY_OPCODE_RSQ: case TOY_OPCODE_SIN: case TOY_OPCODE_COS: case TOY_OPCODE_FDIV: case TOY_OPCODE_POW: case TOY_OPCODE_INT_DIV_QUOTIENT: case TOY_OPCODE_INT_DIV_REMAINDER: toy_compiler_lower_math(tc, inst); break; case TOY_OPCODE_URB_WRITE: toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB); break; default: if (inst->opcode > 127) tc_fail(tc, "unhandled virtual opcode"); break; } } }
/** * Set up GS compile context. This includes translating the TGSI tokens. */ static bool gs_setup(struct gs_compile_context *gcc, const struct ilo_shader_state *state, const struct ilo_shader_variant *variant, int num_verts) { memset(gcc, 0, sizeof(*gcc)); gcc->shader = CALLOC_STRUCT(ilo_shader); if (!gcc->shader) return false; gcc->variant = variant; gcc->so_info = &state->info.stream_output; toy_compiler_init(&gcc->tc, state->info.dev); gcc->write_so = (state->info.stream_output.num_outputs > 0); gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard; gcc->tc.templ.access_mode = GEN6_ALIGN_16; gcc->tc.templ.exec_size = GEN6_EXECSIZE_4; gcc->tc.rect_linear_width = 4; if (state->info.tokens) { if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) { toy_compiler_cleanup(&gcc->tc); FREE(gcc->shader); return false; } switch (gcc->tgsi.props.gs_input_prim) { case PIPE_PRIM_POINTS: gcc->in_vue_count = 1; break; case PIPE_PRIM_LINES: gcc->in_vue_count = 2; gcc->shader->in.discard_adj = true; break; case PIPE_PRIM_TRIANGLES: gcc->in_vue_count = 3; gcc->shader->in.discard_adj = true; break; case PIPE_PRIM_LINES_ADJACENCY: gcc->in_vue_count = 4; break; case PIPE_PRIM_TRIANGLES_ADJACENCY: gcc->in_vue_count = 6; break; default: tc_fail(&gcc->tc, "unsupported GS input type"); gcc->in_vue_count = 0; break; } switch (gcc->tgsi.props.gs_output_prim) { case PIPE_PRIM_POINTS: gcc->out_vue_min_count = 1; break; case PIPE_PRIM_LINE_STRIP: gcc->out_vue_min_count = 2; break; case PIPE_PRIM_TRIANGLE_STRIP: gcc->out_vue_min_count = 3; break; default: tc_fail(&gcc->tc, "unsupported GS output type"); gcc->out_vue_min_count = 0; break; } } else { int i; gcc->in_vue_count = num_verts; gcc->out_vue_min_count = num_verts; gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs; for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) { gcc->tgsi.outputs[i].semantic_name = gcc->variant->u.gs.semantic_names[i]; gcc->tgsi.outputs[i].semantic_index = gcc->variant->u.gs.semantic_indices[i]; } } gcc->tc.templ.access_mode = GEN6_ALIGN_1; gs_setup_shader_in(gcc->shader, gcc->variant); gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map); gcc->in_vue_size = (gcc->shader->in.count + 1) / 2; gcc->out_vue_size = (gcc->shader->out.count + 1) / 2; gs_setup_payload(gcc); gs_setup_vars(gcc); /* m0 is reserved for system routines */ gcc->first_free_mrf = 1; gcc->last_free_mrf = 15; gcc->shader->bt.gen6_so_base = 0; gcc->shader->bt.gen6_so_count = gcc->so_info->num_outputs; gcc->shader->bt.total_count = gcc->shader->bt.gen6_so_count; return true; }
static void vs_lower_virtual_opcodes(struct vs_compile_context *vcc) { struct toy_compiler *tc = &vcc->tc; struct toy_inst *inst; tc_head(tc); while ((inst = tc_next(tc)) != NULL) { switch (inst->opcode) { case TOY_OPCODE_TGSI_IN: case TOY_OPCODE_TGSI_CONST: case TOY_OPCODE_TGSI_SV: case TOY_OPCODE_TGSI_IMM: vs_lower_opcode_tgsi_direct(vcc, inst); break; case TOY_OPCODE_TGSI_INDIRECT_FETCH: case TOY_OPCODE_TGSI_INDIRECT_STORE: vs_lower_opcode_tgsi_indirect(vcc, inst); break; case TOY_OPCODE_TGSI_TEX: case TOY_OPCODE_TGSI_TXB: case TOY_OPCODE_TGSI_TXD: case TOY_OPCODE_TGSI_TXL: case TOY_OPCODE_TGSI_TXP: case TOY_OPCODE_TGSI_TXF: case TOY_OPCODE_TGSI_TXQ: case TOY_OPCODE_TGSI_TXQ_LZ: case TOY_OPCODE_TGSI_TEX2: case TOY_OPCODE_TGSI_TXB2: case TOY_OPCODE_TGSI_TXL2: case TOY_OPCODE_TGSI_SAMPLE: case TOY_OPCODE_TGSI_SAMPLE_I: case TOY_OPCODE_TGSI_SAMPLE_I_MS: case TOY_OPCODE_TGSI_SAMPLE_B: case TOY_OPCODE_TGSI_SAMPLE_C: case TOY_OPCODE_TGSI_SAMPLE_C_LZ: case TOY_OPCODE_TGSI_SAMPLE_D: case TOY_OPCODE_TGSI_SAMPLE_L: case TOY_OPCODE_TGSI_GATHER4: case TOY_OPCODE_TGSI_SVIEWINFO: case TOY_OPCODE_TGSI_SAMPLE_POS: case TOY_OPCODE_TGSI_SAMPLE_INFO: vs_lower_opcode_tgsi_sampling(vcc, inst); break; case TOY_OPCODE_INV: case TOY_OPCODE_LOG: case TOY_OPCODE_EXP: case TOY_OPCODE_SQRT: case TOY_OPCODE_RSQ: case TOY_OPCODE_SIN: case TOY_OPCODE_COS: case TOY_OPCODE_FDIV: case TOY_OPCODE_POW: case TOY_OPCODE_INT_DIV_QUOTIENT: case TOY_OPCODE_INT_DIV_REMAINDER: toy_compiler_lower_math(tc, inst); break; case TOY_OPCODE_URB_WRITE: vs_lower_opcode_urb_write(tc, inst); break; default: if (inst->opcode > 127) tc_fail(tc, "unhandled virtual opcode"); break; } } }
/** * Set up message registers and return the message descriptor for sampling. */ static struct toy_src vs_prepare_tgsi_sampling(struct vs_compile_context *vcc, const struct toy_inst *inst, int base_mrf, unsigned *ret_sampler_index) { struct toy_compiler *tc = &vcc->tc; unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si; int num_coords, ref_pos, num_derivs; int sampler_src; simd_mode = GEN6_MSG_SAMPLER_SIMD4X2; coords = inst->src[0]; ddx = tsrc_null(); ddy = tsrc_null(); bias_or_lod = tsrc_null(); ref_or_si = tsrc_null(); num_derivs = 0; sampler_src = 1; num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos); /* extract the parameters */ switch (inst->opcode) { case TOY_OPCODE_TGSI_TXD: if (ref_pos >= 0) { assert(ref_pos < 4); msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C; ref_or_si = tsrc_swizzle1(coords, ref_pos); if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5)) tc_fail(tc, "TXD with shadow sampler not supported"); } else { msg_type = GEN6_MSG_SAMPLER_SAMPLE_D; } ddx = inst->src[1]; ddy = inst->src[2]; num_derivs = num_coords; sampler_src = 3; break; case TOY_OPCODE_TGSI_TXL: if (ref_pos >= 0) { assert(ref_pos < 3); msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; ref_or_si = tsrc_swizzle1(coords, ref_pos); } else { msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; } bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); break; case TOY_OPCODE_TGSI_TXF: msg_type = GEN6_MSG_SAMPLER_LD; switch (inst->tex.target) { case TGSI_TEXTURE_2D_MSAA: case TGSI_TEXTURE_2D_ARRAY_MSAA: assert(ref_pos >= 0 && ref_pos < 4); /* lod is always 0 */ bias_or_lod = tsrc_imm_d(0); ref_or_si = tsrc_swizzle1(coords, ref_pos); break; default: bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); break; } /* offset the coordinates */ if (!tsrc_is_null(inst->tex.offsets[0])) { struct toy_dst tmp; tmp = tc_alloc_tmp(tc); tc_ADD(tc, tmp, coords, inst->tex.offsets[0]); coords = tsrc_from(tmp); } sampler_src = 1; break; case TOY_OPCODE_TGSI_TXQ: msg_type = GEN6_MSG_SAMPLER_RESINFO; num_coords = 0; bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X); break; case TOY_OPCODE_TGSI_TXQ_LZ: msg_type = GEN6_MSG_SAMPLER_RESINFO; num_coords = 0; sampler_src = 0; break; case TOY_OPCODE_TGSI_TXL2: if (ref_pos >= 0) { assert(ref_pos < 4); msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; ref_or_si = tsrc_swizzle1(coords, ref_pos); } else { msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; } bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X); sampler_src = 2; break; default: assert(!"unhandled sampling opcode"); if (ret_sampler_index) *ret_sampler_index = 0; return tsrc_null(); break; } assert(inst->src[sampler_src].file == TOY_FILE_IMM); sampler_index = inst->src[sampler_src].val32; binding_table_index = vcc->shader->bt.tex_base + sampler_index; /* * From the Sandy Bridge PRM, volume 4 part 1, page 18: * * "Note that the (cube map) coordinates delivered to the sampling * engine must already have been divided by the component with the * largest absolute value." */ switch (inst->tex.target) { case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: /* TXQ does not need coordinates */ if (num_coords >= 3) { struct toy_dst tmp, max; struct toy_src abs_coords[3]; unsigned i; tmp = tc_alloc_tmp(tc); max = tdst_writemask(tmp, TOY_WRITEMASK_W); for (i = 0; i < 3; i++) abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i)); tc_SEL(tc, max, abs_coords[0], abs_coords[0], GEN6_COND_GE); tc_SEL(tc, max, tsrc_from(max), abs_coords[0], GEN6_COND_GE); tc_INV(tc, max, tsrc_from(max)); for (i = 0; i < 3; i++) tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max)); coords = tsrc_from(tmp); } break; } /* set up sampler parameters */ msg_len = vs_add_sampler_params(tc, msg_type, base_mrf, coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); /* * From the Sandy Bridge PRM, volume 4 part 1, page 136: * * "The maximum message length allowed to the sampler is 11. This would * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of * SIMD16." */ if (msg_len > 11) tc_fail(tc, "maximum length for messages to the sampler is 11"); if (ret_sampler_index) *ret_sampler_index = sampler_index; return tsrc_imm_mdesc_sampler(tc, msg_len, 1, false, simd_mode, msg_type, sampler_index, binding_table_index); }
/** * Emit instructions to move sampling parameters to the message registers. */ static int vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf, struct toy_src coords, int num_coords, struct toy_src bias_or_lod, struct toy_src ref_or_si, struct toy_src ddx, struct toy_src ddy, int num_derivs) { const unsigned coords_writemask = (1 << num_coords) - 1; struct toy_dst m[3]; int num_params, i; assert(num_coords <= 4); assert(num_derivs <= 3 && num_derivs <= num_coords); for (i = 0; i < Elements(m); i++) m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0); switch (msg_type) { case GEN6_MSG_SAMPLER_SAMPLE_L: tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod); num_params = 5; break; case GEN6_MSG_SAMPLER_SAMPLE_D: tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ), tsrc_swizzle(ddx, 0, 0, 1, 1)); tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW), tsrc_swizzle(ddy, 0, 0, 1, 1)); if (num_derivs > 2) { tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X), tsrc_swizzle1(ddx, 2)); tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y), tsrc_swizzle1(ddy, 2)); } num_params = 4 + num_derivs * 2; break; case GEN6_MSG_SAMPLER_SAMPLE_L_C: tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si); tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod); num_params = 6; break; case GEN6_MSG_SAMPLER_LD: assert(num_coords <= 3); tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords); tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod); if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { num_params = 4; } else { tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si); num_params = 5; } break; case GEN6_MSG_SAMPLER_RESINFO: tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod); num_params = 1; break; default: tc_fail(tc, "unknown sampler opcode"); num_params = 0; break; } return (num_params + 3) / 4; }
/** * Set up message registers and return the message descriptor for sampling. */ static struct toy_src fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst, int base_mrf, const uint32_t *saturate_coords, unsigned *ret_sampler_index) { unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si; int num_coords, ref_pos, num_derivs; int sampler_src, param_size, i; switch (inst->exec_size) { case BRW_EXECUTE_8: simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; param_size = 1; break; case BRW_EXECUTE_16: simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; param_size = 2; break; default: tc_fail(tc, "unsupported execute size for sampling"); return tsrc_null(); break; } num_coords = toy_tgsi_get_texture_coord_dim(inst->tex.target, &ref_pos); tsrc_transpose(inst->src[0], coords); bias_or_lod = tsrc_null(); ref_or_si = tsrc_null(); num_derivs = 0; sampler_src = 1; /* * For TXD, * * src0 := (x, y, z, w) * src1 := ddx * src2 := ddy * src3 := sampler * * For TEX2, TXB2, and TXL2, * * src0 := (x, y, z, w) * src1 := (v or bias or lod, ...) * src2 := sampler * * For TEX, TXB, TXL, and TXP, * * src0 := (x, y, z, w or bias or lod or projection) * src1 := sampler * * For TXQ, * * src0 := (lod, ...) * src1 := sampler * * For TXQ_LZ, * * src0 := sampler * * And for TXF, * * src0 := (x, y, z, w or lod) * src1 := sampler * * State trackers should not generate opcode+texture combinations with * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY). */ switch (inst->opcode) { case TOY_OPCODE_TGSI_TEX: if (ref_pos >= 0) { assert(ref_pos < 4); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; ref_or_si = coords[ref_pos]; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; } break; case TOY_OPCODE_TGSI_TXD: if (ref_pos >= 0) tc_fail(tc, "TXD with shadow sampler not supported"); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; tsrc_transpose(inst->src[1], ddx); tsrc_transpose(inst->src[2], ddy); num_derivs = num_coords; sampler_src = 3; break; case TOY_OPCODE_TGSI_TXP: if (ref_pos >= 0) { assert(ref_pos < 3); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; ref_or_si = coords[ref_pos]; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; } /* project the coordinates */ { struct toy_dst tmp[4]; tc_alloc_tmp4(tc, tmp); tc_INV(tc, tmp[3], coords[3]); for (i = 0; i < num_coords && i < 3; i++) { tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); coords[i] = tsrc_from(tmp[i]); } if (ref_pos >= i) { tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3])); ref_or_si = tsrc_from(tmp[ref_pos]); } } break; case TOY_OPCODE_TGSI_TXB: if (ref_pos >= 0) { assert(ref_pos < 3); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; ref_or_si = coords[ref_pos]; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; } bias_or_lod = coords[3]; break; case TOY_OPCODE_TGSI_TXL: if (ref_pos >= 0) { assert(ref_pos < 3); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; ref_or_si = coords[ref_pos]; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } bias_or_lod = coords[3]; break; case TOY_OPCODE_TGSI_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; switch (inst->tex.target) { case TGSI_TEXTURE_2D_MSAA: case TGSI_TEXTURE_2D_ARRAY_MSAA: assert(ref_pos >= 0 && ref_pos < 4); /* lod is always 0 */ bias_or_lod = tsrc_imm_d(0); ref_or_si = coords[ref_pos]; break; default: bias_or_lod = coords[3]; break; } /* offset the coordinates */ if (!tsrc_is_null(inst->tex.offsets[0])) { struct toy_dst tmp[4]; struct toy_src offsets[4]; tc_alloc_tmp4(tc, tmp); tsrc_transpose(inst->tex.offsets[0], offsets); for (i = 0; i < num_coords; i++) { tc_ADD(tc, tmp[i], coords[i], offsets[i]); coords[i] = tsrc_from(tmp[i]); } } sampler_src = 1; break; case TOY_OPCODE_TGSI_TXQ: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; num_coords = 0; bias_or_lod = coords[0]; break; case TOY_OPCODE_TGSI_TXQ_LZ: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; num_coords = 0; sampler_src = 0; break; case TOY_OPCODE_TGSI_TEX2: if (ref_pos >= 0) { assert(ref_pos < 5); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; if (ref_pos >= 4) { struct toy_src src1[4]; tsrc_transpose(inst->src[1], src1); ref_or_si = src1[ref_pos - 4]; } else { ref_or_si = coords[ref_pos]; } } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; } sampler_src = 2; break; case TOY_OPCODE_TGSI_TXB2: if (ref_pos >= 0) { assert(ref_pos < 4); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; ref_or_si = coords[ref_pos]; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; } { struct toy_src src1[4]; tsrc_transpose(inst->src[1], src1); bias_or_lod = src1[0]; } sampler_src = 2; break; case TOY_OPCODE_TGSI_TXL2: if (ref_pos >= 0) { assert(ref_pos < 4); msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; ref_or_si = coords[ref_pos]; } else { msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } { struct toy_src src1[4]; tsrc_transpose(inst->src[1], src1); bias_or_lod = src1[0]; } sampler_src = 2; break; default: assert(!"unhandled sampling opcode"); return tsrc_null(); break; } assert(inst->src[sampler_src].file == TOY_FILE_IMM); sampler_index = inst->src[sampler_src].val32; binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index); /* * From the Sandy Bridge PRM, volume 4 part 1, page 18: * * "Note that the (cube map) coordinates delivered to the sampling * engine must already have been divided by the component with the * largest absolute value." */ switch (inst->tex.target) { case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: case TGSI_TEXTURE_CUBE_ARRAY: case TGSI_TEXTURE_SHADOWCUBE_ARRAY: /* TXQ does not need coordinates */ if (num_coords >= 3) { struct toy_dst tmp[4]; tc_alloc_tmp4(tc, tmp); tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]), tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE); tc_SEL(tc, tmp[3], tsrc_from(tmp[3]), tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE); tc_INV(tc, tmp[3], tsrc_from(tmp[3])); for (i = 0; i < 3; i++) { tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); coords[i] = tsrc_from(tmp[i]); } } break; } /* * Saturate (s, t, r). saturate_coords is set for sampler and coordinate * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is * so that sampling outside the border gets the correct colors. */ for (i = 0; i < MIN2(num_coords, 3); i++) { bool is_rect; if (!(saturate_coords[i] & (1 << sampler_index))) continue; switch (inst->tex.target) { case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOWRECT: is_rect = true; break; default: is_rect = false; break; } if (is_rect) { struct toy_src min, max; struct toy_dst tmp; tc_fail(tc, "GL_CLAMP with rectangle texture unsupported"); tmp = tc_alloc_tmp(tc); /* saturate to [0, width] or [0, height] */ /* TODO TXQ? */ min = tsrc_imm_f(0.0f); max = tsrc_imm_f(2048.0f); tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G); tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L); coords[i] = tsrc_from(tmp); } else { struct toy_dst tmp; struct toy_inst *inst2; tmp = tc_alloc_tmp(tc); /* saturate to [0.0f, 1.0f] */ inst2 = tc_MOV(tc, tmp, coords[i]); inst2->saturate = true; coords[i] = tsrc_from(tmp); } } /* set up sampler parameters */ if (tc->gen >= ILO_GEN(7)) { msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size, coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); } else { msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size, coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); } /* * From the Sandy Bridge PRM, volume 4 part 1, page 136: * * "The maximum message length allowed to the sampler is 11. This would * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of * SIMD16." */ if (msg_len > 11) tc_fail(tc, "maximum length for messages to the sampler is 11"); if (ret_sampler_index) *ret_sampler_index = sampler_index; return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size, false, simd_mode, msg_type, sampler_index, binding_table_index); }
static int fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type, int base_mrf, int param_size, struct toy_src *coords, int num_coords, struct toy_src bias_or_lod, struct toy_src ref_or_si, struct toy_src *ddx, struct toy_src *ddy, int num_derivs) { int num_params, i; assert(num_coords <= 4); assert(num_derivs <= 3 && num_derivs <= num_coords); #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) switch (msg_type) { case GEN5_SAMPLER_MESSAGE_SAMPLE: for (i = 0; i < num_coords; i++) tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); num_params = num_coords; break; case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS: case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD: tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod); for (i = 0; i < num_coords; i++) tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); num_params = 1 + num_coords; break; case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE: tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); for (i = 0; i < num_coords; i++) tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); num_params = 1 + num_coords; break; case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS: for (i = 0; i < num_coords; i++) { tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]); if (i < num_derivs) { tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]); tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]); } } num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0); break; case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE: case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE: tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod); for (i = 0; i < num_coords; i++) tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]); num_params = 2 + num_coords; break; case GEN5_SAMPLER_MESSAGE_SAMPLE_LD: assert(num_coords >= 1 && num_coords <= 3); tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]); tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod); for (i = 1; i < num_coords; i++) tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]); num_params = 1 + num_coords; break; case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO: tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); num_params = 1; break; default: tc_fail(tc, "unknown sampler opcode"); num_params = 0; break; } #undef SAMPLER_PARAM return num_params * param_size; }
static void fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc, struct toy_inst *inst) { tc_fail(&fcc->tc, "no TGSI indirection support"); }
static void fs_lower_virtual_opcodes(struct fs_compile_context *fcc) { struct toy_compiler *tc = &fcc->tc; struct toy_inst *inst; /* lower TGSI's first, as they might be lowered to other virtual opcodes */ tc_head(tc); while ((inst = tc_next(tc)) != NULL) { switch (inst->opcode) { case TOY_OPCODE_TGSI_IN: case TOY_OPCODE_TGSI_CONST: case TOY_OPCODE_TGSI_SV: case TOY_OPCODE_TGSI_IMM: fs_lower_opcode_tgsi_direct(fcc, inst); break; case TOY_OPCODE_TGSI_INDIRECT_FETCH: case TOY_OPCODE_TGSI_INDIRECT_STORE: fs_lower_opcode_tgsi_indirect(fcc, inst); break; case TOY_OPCODE_TGSI_TEX: case TOY_OPCODE_TGSI_TXB: case TOY_OPCODE_TGSI_TXD: case TOY_OPCODE_TGSI_TXL: case TOY_OPCODE_TGSI_TXP: case TOY_OPCODE_TGSI_TXF: case TOY_OPCODE_TGSI_TXQ: case TOY_OPCODE_TGSI_TXQ_LZ: case TOY_OPCODE_TGSI_TEX2: case TOY_OPCODE_TGSI_TXB2: case TOY_OPCODE_TGSI_TXL2: case TOY_OPCODE_TGSI_SAMPLE: case TOY_OPCODE_TGSI_SAMPLE_I: case TOY_OPCODE_TGSI_SAMPLE_I_MS: case TOY_OPCODE_TGSI_SAMPLE_B: case TOY_OPCODE_TGSI_SAMPLE_C: case TOY_OPCODE_TGSI_SAMPLE_C_LZ: case TOY_OPCODE_TGSI_SAMPLE_D: case TOY_OPCODE_TGSI_SAMPLE_L: case TOY_OPCODE_TGSI_GATHER4: case TOY_OPCODE_TGSI_SVIEWINFO: case TOY_OPCODE_TGSI_SAMPLE_POS: case TOY_OPCODE_TGSI_SAMPLE_INFO: fs_lower_opcode_tgsi_sampling(fcc, inst); break; } } tc_head(tc); while ((inst = tc_next(tc)) != NULL) { switch (inst->opcode) { case TOY_OPCODE_INV: case TOY_OPCODE_LOG: case TOY_OPCODE_EXP: case TOY_OPCODE_SQRT: case TOY_OPCODE_RSQ: case TOY_OPCODE_SIN: case TOY_OPCODE_COS: case TOY_OPCODE_FDIV: case TOY_OPCODE_POW: case TOY_OPCODE_INT_DIV_QUOTIENT: case TOY_OPCODE_INT_DIV_REMAINDER: toy_compiler_lower_math(tc, inst); break; case TOY_OPCODE_DDX: case TOY_OPCODE_DDY: fs_lower_opcode_derivative(tc, inst); break; case TOY_OPCODE_FB_WRITE: fs_lower_opcode_fb_write(tc, inst); break; case TOY_OPCODE_KIL: fs_lower_opcode_kil(tc, inst); break; default: if (inst->opcode > 127) tc_fail(tc, "unhandled virtual opcode"); break; } } }