Example #1
0
/**
 * Compile the shader.
 */
static bool
gs_compile(struct gs_compile_context *gcc)
{
   struct toy_compiler *tc = &gcc->tc;
   struct ilo_shader *sh = gcc->shader;

   get_num_prims_static(gcc);

   if (gcc->is_static) {
      tc_head(tc);

      gs_init_vars(gcc);
      gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
      gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
      if (gcc->write_so)
         gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);

      tc_tail(tc);
   }
   else {
      tc_fail(tc, "no control flow support");
      return false;
   }

   if (!gcc->write_vue)
      gs_discard(gcc);

   gs_lower_virtual_opcodes(gcc);
   toy_compiler_legalize_for_ra(tc);
   toy_compiler_optimize(tc);
   toy_compiler_allocate_registers(tc,
         gcc->first_free_grf,
         gcc->last_free_grf,
         1);
   toy_compiler_legalize_for_asm(tc);

   if (tc->fail) {
      ilo_err("failed to legalize GS instructions: %s\n", tc->reason);
      return false;
   }

   if (ilo_debug & ILO_DEBUG_GS) {
      ilo_printf("legalized instructions:\n");
      toy_compiler_dump(tc);
      ilo_printf("\n");
   }

   sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
   if (!sh->kernel)
      return false;

   if (ilo_debug & ILO_DEBUG_GS) {
      ilo_printf("disassembly:\n");
      toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
      ilo_printf("\n");
   }

   return true;
}
static void
vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc,
                              struct toy_inst *inst)
{
   struct toy_compiler *tc = &vcc->tc;
   enum tgsi_file_type file;
   int dim, idx;
   struct toy_src indirect_dim, indirect_idx;

   assert(inst->src[0].file == TOY_FILE_IMM);
   file = inst->src[0].val32;

   assert(inst->src[1].file == TOY_FILE_IMM);
   dim = inst->src[1].val32;
   indirect_dim = inst->src[2];

   assert(inst->src[3].file == TOY_FILE_IMM);
   idx = inst->src[3].val32;
   indirect_idx = inst->src[4];

   /* no dimension indirection */
   assert(indirect_dim.file == TOY_FILE_IMM);
   dim += indirect_dim.val32;

   switch (inst->opcode) {
   case TOY_OPCODE_TGSI_INDIRECT_FETCH:
      if (file == TGSI_FILE_CONSTANT) {
         if (idx) {
            struct toy_dst tmp = tc_alloc_tmp(tc);

            tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
            indirect_idx = tsrc_from(tmp);
         }

         if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
            vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx);
         else
            vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx);
         break;
      }
      /* fall through */
   case TOY_OPCODE_TGSI_INDIRECT_STORE:
   default:
      tc_fail(tc, "unhandled TGSI indirection");
      break;
   }

   tc_discard_inst(tc, inst);
}
Example #3
0
static void
gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc,
                            struct toy_inst *inst)
{
   struct toy_compiler *tc = &gcc->tc;
   int dim, idx;

   assert(inst->src[0].file == TOY_FILE_IMM);
   dim = inst->src[0].val32;

   assert(inst->src[1].file == TOY_FILE_IMM);
   idx = inst->src[1].val32;

   switch (inst->opcode) {
   case TOY_OPCODE_TGSI_IN:
      gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx);
      /* fetch all dimensions */
      if (dim == 0) {
         int i;

         for (i = 1; i < gcc->in_vue_count; i++) {
            const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx);
            struct toy_dst dst;

            if (vrf < 0)
               continue;

            dst = tdst(TOY_FILE_VRF, vrf, 0);
            gs_lower_opcode_tgsi_in(gcc, dst, i, idx);
         }
      }
      break;
   case TOY_OPCODE_TGSI_IMM:
      assert(!dim);
      gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx);
      break;
   case TOY_OPCODE_TGSI_CONST:
   case TOY_OPCODE_TGSI_SV:
   default:
      tc_fail(tc, "unhandled TGSI fetch");
      break;
   }

   tc_discard_inst(tc, inst);
}
static void
vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc,
                        struct toy_dst dst, int dim, int idx)
{
   struct toy_compiler *tc = &vcc->tc;
   const struct toy_tgsi *tgsi = &vcc->tgsi;
   int slot;

   assert(!dim);

   slot = toy_tgsi_find_system_value(tgsi, idx);
   if (slot < 0)
      return;

   switch (tgsi->system_values[slot].semantic_name) {
   case TGSI_SEMANTIC_INSTANCEID:
   case TGSI_SEMANTIC_VERTEXID:
      /*
       * In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for
       * the generated IDs, with VID in the X channel and IID in the Y
       * channel.
       */
      {
         const int grf = vcc->first_vue_grf;
         const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
         const enum toy_swizzle swizzle =
            (tgsi->system_values[slot].semantic_name ==
             TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X;

         tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle)));
      }
      break;
   case TGSI_SEMANTIC_PRIMID:
   default:
      tc_fail(tc, "unhandled system value");
      tc_MOV(tc, dst, tsrc_imm_d(0));
      break;
   }
}
static void
vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc,
                            struct toy_inst *inst)
{
   struct toy_compiler *tc = &vcc->tc;
   int dim, idx;

   assert(inst->src[0].file == TOY_FILE_IMM);
   dim = inst->src[0].val32;

   assert(inst->src[1].file == TOY_FILE_IMM);
   idx = inst->src[1].val32;

   switch (inst->opcode) {
   case TOY_OPCODE_TGSI_IN:
      vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx);
      break;
   case TOY_OPCODE_TGSI_CONST:
      if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
         vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]);
      else
         vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]);
      break;
   case TOY_OPCODE_TGSI_SV:
      vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx);
      break;
   case TOY_OPCODE_TGSI_IMM:
      assert(!dim);
      vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx);
      break;
   default:
      tc_fail(tc, "unhandled TGSI fetch");
      break;
   }

   tc_discard_inst(tc, inst);
}
Example #6
0
static void
fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
                        struct toy_dst dst, int dim, int idx)
{
   struct toy_compiler *tc = &fcc->tc;
   const struct toy_tgsi *tgsi = &fcc->tgsi;
   int slot;

   assert(!dim);

   slot = toy_tgsi_find_system_value(tgsi, idx);
   if (slot < 0)
      return;

   switch (tgsi->system_values[slot].semantic_name) {
   case TGSI_SEMANTIC_PRIMID:
   case TGSI_SEMANTIC_INSTANCEID:
   case TGSI_SEMANTIC_VERTEXID:
   default:
      tc_fail(tc, "unhandled system value");
      tc_MOV(tc, dst, tsrc_imm_d(0));
      break;
   }
}
Example #7
0
static void
gs_lower_virtual_opcodes(struct gs_compile_context *gcc)
{
   struct toy_compiler *tc = &gcc->tc;
   struct toy_inst *inst;

   tc_head(tc);
   while ((inst = tc_next(tc)) != NULL) {
      switch (inst->opcode) {
      case TOY_OPCODE_TGSI_IN:
      case TOY_OPCODE_TGSI_CONST:
      case TOY_OPCODE_TGSI_SV:
      case TOY_OPCODE_TGSI_IMM:
         gs_lower_opcode_tgsi_direct(gcc, inst);
         break;
      case TOY_OPCODE_TGSI_INDIRECT_FETCH:
      case TOY_OPCODE_TGSI_INDIRECT_STORE:
         /* TODO similar to VS */
         tc_fail(tc, "no indirection support");
         tc_discard_inst(tc, inst);
         break;
      case TOY_OPCODE_TGSI_TEX:
      case TOY_OPCODE_TGSI_TXB:
      case TOY_OPCODE_TGSI_TXD:
      case TOY_OPCODE_TGSI_TXL:
      case TOY_OPCODE_TGSI_TXP:
      case TOY_OPCODE_TGSI_TXF:
      case TOY_OPCODE_TGSI_TXQ:
      case TOY_OPCODE_TGSI_TXQ_LZ:
      case TOY_OPCODE_TGSI_TEX2:
      case TOY_OPCODE_TGSI_TXB2:
      case TOY_OPCODE_TGSI_TXL2:
      case TOY_OPCODE_TGSI_SAMPLE:
      case TOY_OPCODE_TGSI_SAMPLE_I:
      case TOY_OPCODE_TGSI_SAMPLE_I_MS:
      case TOY_OPCODE_TGSI_SAMPLE_B:
      case TOY_OPCODE_TGSI_SAMPLE_C:
      case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
      case TOY_OPCODE_TGSI_SAMPLE_D:
      case TOY_OPCODE_TGSI_SAMPLE_L:
      case TOY_OPCODE_TGSI_GATHER4:
      case TOY_OPCODE_TGSI_SVIEWINFO:
      case TOY_OPCODE_TGSI_SAMPLE_POS:
      case TOY_OPCODE_TGSI_SAMPLE_INFO:
         /* TODO similar to VS */
         tc_fail(tc, "no sampling support");
         tc_discard_inst(tc, inst);
         break;
      case TOY_OPCODE_EMIT:
         gs_lower_opcode_emit(gcc, inst);
         tc_discard_inst(tc, inst);
         break;
      case TOY_OPCODE_ENDPRIM:
         gs_lower_opcode_endprim(gcc, inst);
         tc_discard_inst(tc, inst);
         break;
      default:
         break;
      }
   }

   tc_head(tc);
   while ((inst = tc_next(tc)) != NULL) {
      switch (inst->opcode) {
      case TOY_OPCODE_INV:
      case TOY_OPCODE_LOG:
      case TOY_OPCODE_EXP:
      case TOY_OPCODE_SQRT:
      case TOY_OPCODE_RSQ:
      case TOY_OPCODE_SIN:
      case TOY_OPCODE_COS:
      case TOY_OPCODE_FDIV:
      case TOY_OPCODE_POW:
      case TOY_OPCODE_INT_DIV_QUOTIENT:
      case TOY_OPCODE_INT_DIV_REMAINDER:
         toy_compiler_lower_math(tc, inst);
         break;
      case TOY_OPCODE_URB_WRITE:
         toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB);
         break;
      default:
         if (inst->opcode > 127)
            tc_fail(tc, "unhandled virtual opcode");
         break;
      }
   }
}
Example #8
0
/**
 * Set up GS compile context.  This includes translating the TGSI tokens.
 */
static bool
gs_setup(struct gs_compile_context *gcc,
         const struct ilo_shader_state *state,
         const struct ilo_shader_variant *variant,
         int num_verts)
{
   memset(gcc, 0, sizeof(*gcc));

   gcc->shader = CALLOC_STRUCT(ilo_shader);
   if (!gcc->shader)
      return false;

   gcc->variant = variant;
   gcc->so_info = &state->info.stream_output;

   toy_compiler_init(&gcc->tc, state->info.dev);

   gcc->write_so = (state->info.stream_output.num_outputs > 0);
   gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard;

   gcc->tc.templ.access_mode = GEN6_ALIGN_16;
   gcc->tc.templ.exec_size = GEN6_EXECSIZE_4;
   gcc->tc.rect_linear_width = 4;

   if (state->info.tokens) {
      if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) {
         toy_compiler_cleanup(&gcc->tc);
         FREE(gcc->shader);
         return false;
      }

      switch (gcc->tgsi.props.gs_input_prim) {
      case PIPE_PRIM_POINTS:
         gcc->in_vue_count = 1;
         break;
      case PIPE_PRIM_LINES:
         gcc->in_vue_count = 2;
         gcc->shader->in.discard_adj = true;
         break;
      case PIPE_PRIM_TRIANGLES:
         gcc->in_vue_count = 3;
         gcc->shader->in.discard_adj = true;
         break;
      case PIPE_PRIM_LINES_ADJACENCY:
         gcc->in_vue_count = 4;
         break;
      case PIPE_PRIM_TRIANGLES_ADJACENCY:
         gcc->in_vue_count = 6;
         break;
      default:
         tc_fail(&gcc->tc, "unsupported GS input type");
         gcc->in_vue_count = 0;
         break;
      }

      switch (gcc->tgsi.props.gs_output_prim) {
      case PIPE_PRIM_POINTS:
         gcc->out_vue_min_count = 1;
         break;
      case PIPE_PRIM_LINE_STRIP:
         gcc->out_vue_min_count = 2;
         break;
      case PIPE_PRIM_TRIANGLE_STRIP:
         gcc->out_vue_min_count = 3;
         break;
      default:
         tc_fail(&gcc->tc, "unsupported GS output type");
         gcc->out_vue_min_count = 0;
         break;
      }
   }
   else {
      int i;

      gcc->in_vue_count = num_verts;
      gcc->out_vue_min_count = num_verts;

      gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs;
      for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
         gcc->tgsi.outputs[i].semantic_name =
            gcc->variant->u.gs.semantic_names[i];
         gcc->tgsi.outputs[i].semantic_index =
            gcc->variant->u.gs.semantic_indices[i];
      }
   }

   gcc->tc.templ.access_mode = GEN6_ALIGN_1;

   gs_setup_shader_in(gcc->shader, gcc->variant);
   gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map);

   gcc->in_vue_size = (gcc->shader->in.count + 1) / 2;

   gcc->out_vue_size = (gcc->shader->out.count + 1) / 2;

   gs_setup_payload(gcc);
   gs_setup_vars(gcc);

   /* m0 is reserved for system routines */
   gcc->first_free_mrf = 1;
   gcc->last_free_mrf = 15;

   gcc->shader->bt.gen6_so_base = 0;
   gcc->shader->bt.gen6_so_count = gcc->so_info->num_outputs;

   gcc->shader->bt.total_count = gcc->shader->bt.gen6_so_count;

   return true;
}
static void
vs_lower_virtual_opcodes(struct vs_compile_context *vcc)
{
   struct toy_compiler *tc = &vcc->tc;
   struct toy_inst *inst;

   tc_head(tc);
   while ((inst = tc_next(tc)) != NULL) {
      switch (inst->opcode) {
      case TOY_OPCODE_TGSI_IN:
      case TOY_OPCODE_TGSI_CONST:
      case TOY_OPCODE_TGSI_SV:
      case TOY_OPCODE_TGSI_IMM:
         vs_lower_opcode_tgsi_direct(vcc, inst);
         break;
      case TOY_OPCODE_TGSI_INDIRECT_FETCH:
      case TOY_OPCODE_TGSI_INDIRECT_STORE:
         vs_lower_opcode_tgsi_indirect(vcc, inst);
         break;
      case TOY_OPCODE_TGSI_TEX:
      case TOY_OPCODE_TGSI_TXB:
      case TOY_OPCODE_TGSI_TXD:
      case TOY_OPCODE_TGSI_TXL:
      case TOY_OPCODE_TGSI_TXP:
      case TOY_OPCODE_TGSI_TXF:
      case TOY_OPCODE_TGSI_TXQ:
      case TOY_OPCODE_TGSI_TXQ_LZ:
      case TOY_OPCODE_TGSI_TEX2:
      case TOY_OPCODE_TGSI_TXB2:
      case TOY_OPCODE_TGSI_TXL2:
      case TOY_OPCODE_TGSI_SAMPLE:
      case TOY_OPCODE_TGSI_SAMPLE_I:
      case TOY_OPCODE_TGSI_SAMPLE_I_MS:
      case TOY_OPCODE_TGSI_SAMPLE_B:
      case TOY_OPCODE_TGSI_SAMPLE_C:
      case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
      case TOY_OPCODE_TGSI_SAMPLE_D:
      case TOY_OPCODE_TGSI_SAMPLE_L:
      case TOY_OPCODE_TGSI_GATHER4:
      case TOY_OPCODE_TGSI_SVIEWINFO:
      case TOY_OPCODE_TGSI_SAMPLE_POS:
      case TOY_OPCODE_TGSI_SAMPLE_INFO:
         vs_lower_opcode_tgsi_sampling(vcc, inst);
         break;
      case TOY_OPCODE_INV:
      case TOY_OPCODE_LOG:
      case TOY_OPCODE_EXP:
      case TOY_OPCODE_SQRT:
      case TOY_OPCODE_RSQ:
      case TOY_OPCODE_SIN:
      case TOY_OPCODE_COS:
      case TOY_OPCODE_FDIV:
      case TOY_OPCODE_POW:
      case TOY_OPCODE_INT_DIV_QUOTIENT:
      case TOY_OPCODE_INT_DIV_REMAINDER:
         toy_compiler_lower_math(tc, inst);
         break;
      case TOY_OPCODE_URB_WRITE:
         vs_lower_opcode_urb_write(tc, inst);
         break;
      default:
         if (inst->opcode > 127)
            tc_fail(tc, "unhandled virtual opcode");
         break;
      }
   }
}
/**
 * Set up message registers and return the message descriptor for sampling.
 */
static struct toy_src
vs_prepare_tgsi_sampling(struct vs_compile_context *vcc,
                         const struct toy_inst *inst,
                         int base_mrf, unsigned *ret_sampler_index)
{
   struct toy_compiler *tc = &vcc->tc;
   unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
   struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si;
   int num_coords, ref_pos, num_derivs;
   int sampler_src;

   simd_mode = GEN6_MSG_SAMPLER_SIMD4X2;

   coords = inst->src[0];
   ddx = tsrc_null();
   ddy = tsrc_null();
   bias_or_lod = tsrc_null();
   ref_or_si = tsrc_null();
   num_derivs = 0;
   sampler_src = 1;

   num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);

   /* extract the parameters */
   switch (inst->opcode) {
   case TOY_OPCODE_TGSI_TXD:
      if (ref_pos >= 0) {
         assert(ref_pos < 4);

         msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C;
         ref_or_si = tsrc_swizzle1(coords, ref_pos);

         if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5))
            tc_fail(tc, "TXD with shadow sampler not supported");
      }
      else {
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_D;
      }

      ddx = inst->src[1];
      ddy = inst->src[2];
      num_derivs = num_coords;
      sampler_src = 3;
      break;
   case TOY_OPCODE_TGSI_TXL:
      if (ref_pos >= 0) {
         assert(ref_pos < 3);

         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
         ref_or_si = tsrc_swizzle1(coords, ref_pos);
      }
      else {
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
      }

      bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
      break;
   case TOY_OPCODE_TGSI_TXF:
      msg_type = GEN6_MSG_SAMPLER_LD;

      switch (inst->tex.target) {
      case TGSI_TEXTURE_2D_MSAA:
      case TGSI_TEXTURE_2D_ARRAY_MSAA:
         assert(ref_pos >= 0 && ref_pos < 4);
         /* lod is always 0 */
         bias_or_lod = tsrc_imm_d(0);
         ref_or_si = tsrc_swizzle1(coords, ref_pos);
         break;
      default:
         bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
         break;
      }

      /* offset the coordinates */
      if (!tsrc_is_null(inst->tex.offsets[0])) {
         struct toy_dst tmp;

         tmp = tc_alloc_tmp(tc);
         tc_ADD(tc, tmp, coords, inst->tex.offsets[0]);
         coords = tsrc_from(tmp);
      }

      sampler_src = 1;
      break;
   case TOY_OPCODE_TGSI_TXQ:
      msg_type = GEN6_MSG_SAMPLER_RESINFO;
      num_coords = 0;
      bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X);
      break;
   case TOY_OPCODE_TGSI_TXQ_LZ:
      msg_type = GEN6_MSG_SAMPLER_RESINFO;
      num_coords = 0;
      sampler_src = 0;
      break;
   case TOY_OPCODE_TGSI_TXL2:
      if (ref_pos >= 0) {
         assert(ref_pos < 4);

         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
         ref_or_si = tsrc_swizzle1(coords, ref_pos);
      }
      else {
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
      }

      bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X);
      sampler_src = 2;
      break;
   default:
      assert(!"unhandled sampling opcode");
      if (ret_sampler_index)
         *ret_sampler_index = 0;
      return tsrc_null();
      break;
   }

   assert(inst->src[sampler_src].file == TOY_FILE_IMM);
   sampler_index = inst->src[sampler_src].val32;
   binding_table_index = vcc->shader->bt.tex_base + sampler_index;

   /*
    * From the Sandy Bridge PRM, volume 4 part 1, page 18:
    *
    *     "Note that the (cube map) coordinates delivered to the sampling
    *      engine must already have been divided by the component with the
    *      largest absolute value."
    */
   switch (inst->tex.target) {
   case TGSI_TEXTURE_CUBE:
   case TGSI_TEXTURE_SHADOWCUBE:
   case TGSI_TEXTURE_CUBE_ARRAY:
   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
      /* TXQ does not need coordinates */
      if (num_coords >= 3) {
         struct toy_dst tmp, max;
         struct toy_src abs_coords[3];
         unsigned i;

         tmp = tc_alloc_tmp(tc);
         max = tdst_writemask(tmp, TOY_WRITEMASK_W);

         for (i = 0; i < 3; i++)
            abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i));

         tc_SEL(tc, max, abs_coords[0], abs_coords[0], GEN6_COND_GE);
         tc_SEL(tc, max, tsrc_from(max), abs_coords[0], GEN6_COND_GE);
         tc_INV(tc, max, tsrc_from(max));

         for (i = 0; i < 3; i++)
            tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max));

         coords = tsrc_from(tmp);
      }
      break;
   }

   /* set up sampler parameters */
   msg_len = vs_add_sampler_params(tc, msg_type, base_mrf,
         coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);

   /*
    * From the Sandy Bridge PRM, volume 4 part 1, page 136:
    *
    *     "The maximum message length allowed to the sampler is 11. This would
    *      disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
    *      SIMD16."
    */
   if (msg_len > 11)
      tc_fail(tc, "maximum length for messages to the sampler is 11");

   if (ret_sampler_index)
      *ret_sampler_index = sampler_index;

   return tsrc_imm_mdesc_sampler(tc, msg_len, 1,
         false, simd_mode, msg_type, sampler_index, binding_table_index);
}
/**
 * Emit instructions to move sampling parameters to the message registers.
 */
static int
vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf,
                      struct toy_src coords, int num_coords,
                      struct toy_src bias_or_lod, struct toy_src ref_or_si,
                      struct toy_src ddx, struct toy_src ddy, int num_derivs)
{
   const unsigned coords_writemask = (1 << num_coords) - 1;
   struct toy_dst m[3];
   int num_params, i;

   assert(num_coords <= 4);
   assert(num_derivs <= 3 && num_derivs <= num_coords);

   for (i = 0; i < Elements(m); i++)
      m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0);

   switch (msg_type) {
   case GEN6_MSG_SAMPLER_SAMPLE_L:
      tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
      tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod);
      num_params = 5;
      break;
   case GEN6_MSG_SAMPLER_SAMPLE_D:
      tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
      tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ),
            tsrc_swizzle(ddx, 0, 0, 1, 1));
      tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW),
            tsrc_swizzle(ddy, 0, 0, 1, 1));
      if (num_derivs > 2) {
         tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X),
               tsrc_swizzle1(ddx, 2));
         tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y),
               tsrc_swizzle1(ddy, 2));
      }
      num_params = 4 + num_derivs * 2;
      break;
   case GEN6_MSG_SAMPLER_SAMPLE_L_C:
      tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
      tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si);
      tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod);
      num_params = 6;
      break;
   case GEN6_MSG_SAMPLER_LD:
      assert(num_coords <= 3);
      tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords);
      tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod);
      if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
         num_params = 4;
      }
      else {
         tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si);
         num_params = 5;
      }
      break;
   case GEN6_MSG_SAMPLER_RESINFO:
      tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod);
      num_params = 1;
      break;
   default:
      tc_fail(tc, "unknown sampler opcode");
      num_params = 0;
      break;
   }

   return (num_params + 3) / 4;
}
Example #12
0
/**
 * Set up message registers and return the message descriptor for sampling.
 */
static struct toy_src
fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
                         int base_mrf, const uint32_t *saturate_coords,
                         unsigned *ret_sampler_index)
{
   unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
   struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
   int num_coords, ref_pos, num_derivs;
   int sampler_src, param_size, i;

   switch (inst->exec_size) {
   case BRW_EXECUTE_8:
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
      param_size = 1;
      break;
   case BRW_EXECUTE_16:
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
      param_size = 2;
      break;
   default:
      tc_fail(tc, "unsupported execute size for sampling");
      return tsrc_null();
      break;
   }

   num_coords = toy_tgsi_get_texture_coord_dim(inst->tex.target, &ref_pos);
   tsrc_transpose(inst->src[0], coords);
   bias_or_lod = tsrc_null();
   ref_or_si = tsrc_null();
   num_derivs = 0;
   sampler_src = 1;

   /*
    * For TXD,
    *
    *   src0 := (x, y, z, w)
    *   src1 := ddx
    *   src2 := ddy
    *   src3 := sampler
    *
    * For TEX2, TXB2, and TXL2,
    *
    *   src0 := (x, y, z, w)
    *   src1 := (v or bias or lod, ...)
    *   src2 := sampler
    *
    * For TEX, TXB, TXL, and TXP,
    *
    *   src0 := (x, y, z, w or bias or lod or projection)
    *   src1 := sampler
    *
    * For TXQ,
    *
    *   src0 := (lod, ...)
    *   src1 := sampler
    *
    * For TXQ_LZ,
    *
    *   src0 := sampler
    *
    * And for TXF,
    *
    *   src0 := (x, y, z, w or lod)
    *   src1 := sampler
    *
    * State trackers should not generate opcode+texture combinations with
    * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
    */
   switch (inst->opcode) {
   case TOY_OPCODE_TGSI_TEX:
      if (ref_pos >= 0) {
         assert(ref_pos < 4);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
         ref_or_si = coords[ref_pos];
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
      }
      break;
   case TOY_OPCODE_TGSI_TXD:
      if (ref_pos >= 0)
         tc_fail(tc, "TXD with shadow sampler not supported");

      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
      tsrc_transpose(inst->src[1], ddx);
      tsrc_transpose(inst->src[2], ddy);
      num_derivs = num_coords;
      sampler_src = 3;
      break;
   case TOY_OPCODE_TGSI_TXP:
      if (ref_pos >= 0) {
         assert(ref_pos < 3);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
         ref_or_si = coords[ref_pos];
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
      }

      /* project the coordinates */
      {
         struct toy_dst tmp[4];

         tc_alloc_tmp4(tc, tmp);

         tc_INV(tc, tmp[3], coords[3]);
         for (i = 0; i < num_coords && i < 3; i++) {
            tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
            coords[i] = tsrc_from(tmp[i]);
         }

         if (ref_pos >= i) {
            tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
            ref_or_si = tsrc_from(tmp[ref_pos]);
         }
      }
      break;
   case TOY_OPCODE_TGSI_TXB:
      if (ref_pos >= 0) {
         assert(ref_pos < 3);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
         ref_or_si = coords[ref_pos];
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
      }

      bias_or_lod = coords[3];
      break;
   case TOY_OPCODE_TGSI_TXL:
      if (ref_pos >= 0) {
         assert(ref_pos < 3);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
         ref_or_si = coords[ref_pos];
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
      }

      bias_or_lod = coords[3];
      break;
   case TOY_OPCODE_TGSI_TXF:
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;

      switch (inst->tex.target) {
      case TGSI_TEXTURE_2D_MSAA:
      case TGSI_TEXTURE_2D_ARRAY_MSAA:
         assert(ref_pos >= 0 && ref_pos < 4);
         /* lod is always 0 */
         bias_or_lod = tsrc_imm_d(0);
         ref_or_si = coords[ref_pos];
         break;
      default:
         bias_or_lod = coords[3];
         break;
      }

      /* offset the coordinates */
      if (!tsrc_is_null(inst->tex.offsets[0])) {
         struct toy_dst tmp[4];
         struct toy_src offsets[4];

         tc_alloc_tmp4(tc, tmp);
         tsrc_transpose(inst->tex.offsets[0], offsets);

         for (i = 0; i < num_coords; i++) {
            tc_ADD(tc, tmp[i], coords[i], offsets[i]);
            coords[i] = tsrc_from(tmp[i]);
         }
      }

      sampler_src = 1;
      break;
   case TOY_OPCODE_TGSI_TXQ:
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
      num_coords = 0;
      bias_or_lod = coords[0];
      break;
   case TOY_OPCODE_TGSI_TXQ_LZ:
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
      num_coords = 0;
      sampler_src = 0;
      break;
   case TOY_OPCODE_TGSI_TEX2:
      if (ref_pos >= 0) {
         assert(ref_pos < 5);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;

         if (ref_pos >= 4) {
            struct toy_src src1[4];
            tsrc_transpose(inst->src[1], src1);
            ref_or_si = src1[ref_pos - 4];
         }
         else {
            ref_or_si = coords[ref_pos];
         }
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
      }

      sampler_src = 2;
      break;
   case TOY_OPCODE_TGSI_TXB2:
      if (ref_pos >= 0) {
         assert(ref_pos < 4);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
         ref_or_si = coords[ref_pos];
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
      }

      {
         struct toy_src src1[4];
         tsrc_transpose(inst->src[1], src1);
         bias_or_lod = src1[0];
      }

      sampler_src = 2;
      break;
   case TOY_OPCODE_TGSI_TXL2:
      if (ref_pos >= 0) {
         assert(ref_pos < 4);

         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
         ref_or_si = coords[ref_pos];
      }
      else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
      }

      {
         struct toy_src src1[4];
         tsrc_transpose(inst->src[1], src1);
         bias_or_lod = src1[0];
      }

      sampler_src = 2;
      break;
   default:
      assert(!"unhandled sampling opcode");
      return tsrc_null();
      break;
   }

   assert(inst->src[sampler_src].file == TOY_FILE_IMM);
   sampler_index = inst->src[sampler_src].val32;
   binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index);

   /*
    * From the Sandy Bridge PRM, volume 4 part 1, page 18:
    *
    *     "Note that the (cube map) coordinates delivered to the sampling
    *      engine must already have been divided by the component with the
    *      largest absolute value."
    */
   switch (inst->tex.target) {
   case TGSI_TEXTURE_CUBE:
   case TGSI_TEXTURE_SHADOWCUBE:
   case TGSI_TEXTURE_CUBE_ARRAY:
   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
      /* TXQ does not need coordinates */
      if (num_coords >= 3) {
         struct toy_dst tmp[4];

         tc_alloc_tmp4(tc, tmp);

         tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
               tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE);
         tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
               tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE);
         tc_INV(tc, tmp[3], tsrc_from(tmp[3]));

         for (i = 0; i < 3; i++) {
            tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
            coords[i] = tsrc_from(tmp[i]);
         }
      }
      break;
   }

   /*
    * Saturate (s, t, r).  saturate_coords is set for sampler and coordinate
    * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively.  It is
    * so that sampling outside the border gets the correct colors.
    */
   for (i = 0; i < MIN2(num_coords, 3); i++) {
      bool is_rect;

      if (!(saturate_coords[i] & (1 << sampler_index)))
         continue;

      switch (inst->tex.target) {
      case TGSI_TEXTURE_RECT:
      case TGSI_TEXTURE_SHADOWRECT:
         is_rect = true;
         break;
      default:
         is_rect = false;
         break;
      }

      if (is_rect) {
         struct toy_src min, max;
         struct toy_dst tmp;

         tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
         tmp = tc_alloc_tmp(tc);

         /* saturate to [0, width] or [0, height] */
         /* TODO TXQ? */
         min = tsrc_imm_f(0.0f);
         max = tsrc_imm_f(2048.0f);

         tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G);
         tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L);

         coords[i] = tsrc_from(tmp);
      }
      else {
         struct toy_dst tmp;
         struct toy_inst *inst2;

         tmp = tc_alloc_tmp(tc);

         /* saturate to [0.0f, 1.0f] */
         inst2 = tc_MOV(tc, tmp, coords[i]);
         inst2->saturate = true;

         coords[i] = tsrc_from(tmp);
      }
   }

   /* set up sampler parameters */
   if (tc->gen >= ILO_GEN(7)) {
      msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
            coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
   }
   else {
      msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
            coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
   }

   /*
    * From the Sandy Bridge PRM, volume 4 part 1, page 136:
    *
    *     "The maximum message length allowed to the sampler is 11. This would
    *      disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
    *      SIMD16."
    */
   if (msg_len > 11)
      tc_fail(tc, "maximum length for messages to the sampler is 11");

   if (ret_sampler_index)
      *ret_sampler_index = sampler_index;

   return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
         false, simd_mode, msg_type, sampler_index, binding_table_index);
}
Example #13
0
static int
fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
                           int base_mrf, int param_size,
                           struct toy_src *coords, int num_coords,
                           struct toy_src bias_or_lod, struct toy_src ref_or_si,
                           struct toy_src *ddx, struct toy_src *ddy,
                           int num_derivs)
{
   int num_params, i;

   assert(num_coords <= 4);
   assert(num_derivs <= 3 && num_derivs <= num_coords);

#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
   switch (msg_type) {
   case GEN5_SAMPLER_MESSAGE_SAMPLE:
      for (i = 0; i < num_coords; i++)
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
      num_params = num_coords;
      break;
   case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
   case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
      tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
      for (i = 0; i < num_coords; i++)
         tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
      num_params = 1 + num_coords;
      break;
   case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
      tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
      for (i = 0; i < num_coords; i++)
         tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
      num_params = 1 + num_coords;
      break;
   case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
      for (i = 0; i < num_coords; i++) {
         tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
         if (i < num_derivs) {
            tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
            tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
         }
      }
      num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
      break;
   case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
   case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
      tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
      tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
      for (i = 0; i < num_coords; i++)
         tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
      num_params = 2 + num_coords;
      break;
   case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
      assert(num_coords >= 1 && num_coords <= 3);

      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
      for (i = 1; i < num_coords; i++)
         tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
      num_params = 1 + num_coords;
      break;
   case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
      num_params = 1;
      break;
   default:
      tc_fail(tc, "unknown sampler opcode");
      num_params = 0;
      break;
   }
#undef SAMPLER_PARAM

   return num_params * param_size;
}
Example #14
0
static void
fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
                              struct toy_inst *inst)
{
   tc_fail(&fcc->tc, "no TGSI indirection support");
}
Example #15
0
static void
fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
{
   struct toy_compiler *tc = &fcc->tc;
   struct toy_inst *inst;

   /* lower TGSI's first, as they might be lowered to other virtual opcodes */
   tc_head(tc);
   while ((inst = tc_next(tc)) != NULL) {
      switch (inst->opcode) {
      case TOY_OPCODE_TGSI_IN:
      case TOY_OPCODE_TGSI_CONST:
      case TOY_OPCODE_TGSI_SV:
      case TOY_OPCODE_TGSI_IMM:
         fs_lower_opcode_tgsi_direct(fcc, inst);
         break;
      case TOY_OPCODE_TGSI_INDIRECT_FETCH:
      case TOY_OPCODE_TGSI_INDIRECT_STORE:
         fs_lower_opcode_tgsi_indirect(fcc, inst);
         break;
      case TOY_OPCODE_TGSI_TEX:
      case TOY_OPCODE_TGSI_TXB:
      case TOY_OPCODE_TGSI_TXD:
      case TOY_OPCODE_TGSI_TXL:
      case TOY_OPCODE_TGSI_TXP:
      case TOY_OPCODE_TGSI_TXF:
      case TOY_OPCODE_TGSI_TXQ:
      case TOY_OPCODE_TGSI_TXQ_LZ:
      case TOY_OPCODE_TGSI_TEX2:
      case TOY_OPCODE_TGSI_TXB2:
      case TOY_OPCODE_TGSI_TXL2:
      case TOY_OPCODE_TGSI_SAMPLE:
      case TOY_OPCODE_TGSI_SAMPLE_I:
      case TOY_OPCODE_TGSI_SAMPLE_I_MS:
      case TOY_OPCODE_TGSI_SAMPLE_B:
      case TOY_OPCODE_TGSI_SAMPLE_C:
      case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
      case TOY_OPCODE_TGSI_SAMPLE_D:
      case TOY_OPCODE_TGSI_SAMPLE_L:
      case TOY_OPCODE_TGSI_GATHER4:
      case TOY_OPCODE_TGSI_SVIEWINFO:
      case TOY_OPCODE_TGSI_SAMPLE_POS:
      case TOY_OPCODE_TGSI_SAMPLE_INFO:
         fs_lower_opcode_tgsi_sampling(fcc, inst);
         break;
      }
   }

   tc_head(tc);
   while ((inst = tc_next(tc)) != NULL) {
      switch (inst->opcode) {
      case TOY_OPCODE_INV:
      case TOY_OPCODE_LOG:
      case TOY_OPCODE_EXP:
      case TOY_OPCODE_SQRT:
      case TOY_OPCODE_RSQ:
      case TOY_OPCODE_SIN:
      case TOY_OPCODE_COS:
      case TOY_OPCODE_FDIV:
      case TOY_OPCODE_POW:
      case TOY_OPCODE_INT_DIV_QUOTIENT:
      case TOY_OPCODE_INT_DIV_REMAINDER:
         toy_compiler_lower_math(tc, inst);
         break;
      case TOY_OPCODE_DDX:
      case TOY_OPCODE_DDY:
         fs_lower_opcode_derivative(tc, inst);
         break;
      case TOY_OPCODE_FB_WRITE:
         fs_lower_opcode_fb_write(tc, inst);
         break;
      case TOY_OPCODE_KIL:
         fs_lower_opcode_kil(tc, inst);
         break;
      default:
         if (inst->opcode > 127)
            tc_fail(tc, "unhandled virtual opcode");
         break;
      }
   }
}