예제 #1
0
static void
fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
                         struct toy_dst dst, int idx)
{
   const uint32_t *imm;
   struct toy_dst real_dst[4];
   int ch;

   imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);

   tdst_transpose(dst, real_dst);
   /* raw moves */
   for (ch = 0; ch < 4; ch++)
      tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
}
예제 #2
0
static void
vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc,
                         struct toy_dst dst, int idx)
{
   const uint32_t *imm;
   int ch;

   imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL);

   for (ch = 0; ch < 4; ch++) {
      /* raw moves */
      tc_MOV(&vcc->tc,
            tdst_writemask(tdst_ud(dst), 1 << ch),
            tsrc_imm_ud(imm[ch]));
   }
}
예제 #3
0
static void
gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc,
                         struct toy_dst dst, int idx)
{
   const uint32_t *imm;
   int ch;

   imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL);

   for (ch = 0; ch < 4; ch++) {
      struct toy_inst *inst;

      /* raw moves */
      inst = tc_MOV(&gcc->tc,
            tdst_writemask(tdst_ud(dst), 1 << ch),
            tsrc_imm_ud(imm[ch]));
      inst->access_mode = GEN6_ALIGN_16;
   }
}
예제 #4
0
/**
 * Emit instructions to write the VUE.
 */
static void
vs_write_vue(struct vs_compile_context *vcc)
{
   struct toy_compiler *tc = &vcc->tc;
   struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS];
   struct toy_dst header;
   struct toy_src r0;
   struct toy_inst *inst;
   int sent_attrs, total_attrs;

   header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
   r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
   inst = tc_MOV(tc, header, r0);
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;

   if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
      inst = tc_OR(tc, tdst_offset(header, 0, 5),
            tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010),
            tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010));
      inst->exec_size = GEN6_EXECSIZE_1;
      inst->access_mode = GEN6_ALIGN_1;
      inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
   }

   total_attrs = vs_collect_outputs(vcc, outs);
   sent_attrs = 0;
   while (sent_attrs < total_attrs) {
      struct toy_src desc;
      int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs;
      int num_attrs, msg_len, i;
      bool eot;

      num_attrs = total_attrs - sent_attrs;
      eot = true;

      /* see if we need another message */
      avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1;
      if (num_attrs > avail_mrf_for_attrs) {
         /*
          * From the Sandy Bridge PRM, volume 4 part 2, page 22:
          *
          *     "Offset. This field specifies a destination offset (in 256-bit
          *      units) from the start of the URB entry(s), as referenced by
          *      URB Return Handle n, at which the data (if any) will be
          *      written."
          *
          * As we need to offset the following messages, we must make sure
          * this one writes an even number of attributes.
          */
         num_attrs = avail_mrf_for_attrs & ~1;
         eot = false;
      }

      if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
         /* do not forget about the header */
         msg_len = 1 + num_attrs;
      }
      else {
         /*
          * From the Sandy Bridge PRM, volume 4 part 2, page 26:
          *
          *     "At least 256 bits per vertex (512 bits total, M1 & M2) must
          *      be written.  Writing only 128 bits per vertex (256 bits
          *      total, M1 only) results in UNDEFINED operation."
          *
          *     "[DevSNB] Interleave writes must be in multiples of 256 per
          *      vertex."
          *
          * That is, we must write or appear to write an even number of
          * attributes, starting from two.
          */
         if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) {
            num_attrs--;
            eot = false;
         }

         msg_len = 1 + align(num_attrs, 2);
      }

      for (i = 0; i < num_attrs; i++)
         tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]);

      assert(sent_attrs % 2 == 0);
      desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0,
            eot, true, false, true, sent_attrs / 2, 0);

      tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc);

      sent_attrs += num_attrs;
   }
}
예제 #5
0
/**
 * Emit instructions to write the color buffers (and the depth buffer).
 */
static void
fs_write_fb(struct fs_compile_context *fcc)
{
   struct toy_compiler *tc = &fcc->tc;
   int base_mrf = fcc->first_free_mrf;
   const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
   bool header_present = false;
   struct toy_src desc;
   unsigned msg_type, ctrl;
   int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
   int pos_slot = -1, cbuf, i;

   for (i = 0; i < Elements(color_slots); i++)
      color_slots[i] = -1;

   for (i = 0; i < fcc->tgsi.num_outputs; i++) {
      if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
         assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
         color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
      }
      else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
         pos_slot = i;
      }
   }

   num_cbufs = fcc->variant->u.fs.num_cbufs;
   /* still need to send EOT (and probably depth) */
   if (!num_cbufs)
      num_cbufs = 1;

   /* we need the header to specify the pixel mask or render target */
   if (fcc->tgsi.uses_kill || num_cbufs > 1) {
      const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
      struct toy_inst *inst;

      inst = tc_MOV(tc, header, r0);
      inst->mask_ctrl = BRW_MASK_DISABLE;
      base_mrf += fcc->num_grf_per_vrf;

      /* this is a two-register header */
      if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) {
         inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
         inst->mask_ctrl = BRW_MASK_DISABLE;
         base_mrf += fcc->num_grf_per_vrf;
      }

      header_present = true;
   }

   for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
      const int slot =
         color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
      int mrf = base_mrf, vrf;
      struct toy_src src[4];

      if (slot >= 0) {
         const unsigned undefined_mask =
            fcc->tgsi.outputs[slot].undefined_mask;
         const int index = fcc->tgsi.outputs[slot].index;

         vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
         if (vrf >= 0) {
            const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
            tsrc_transpose(tmp, src);
         }
         else {
            /* use (0, 0, 0, 0) */
            tsrc_transpose(tsrc_imm_f(0.0f), src);
         }

         for (i = 0; i < 4; i++) {
            const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);

            if (undefined_mask & (1 << i))
               src[i] = tsrc_imm_f(0.0f);

            tc_MOV(tc, dst, src[i]);

            mrf += fcc->num_grf_per_vrf;
         }
      }
      else {
         /* use (0, 0, 0, 0) */
         for (i = 0; i < 4; i++) {
            const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);

            tc_MOV(tc, dst, tsrc_imm_f(0.0f));
            mrf += fcc->num_grf_per_vrf;
         }
      }

      /* select BLEND_STATE[rt] */
      if (cbuf > 0) {
         struct toy_inst *inst;

         inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
         inst->mask_ctrl = BRW_MASK_DISABLE;
         inst->exec_size = BRW_EXECUTE_1;
         inst->src[0].rect = TOY_RECT_010;
      }

      if (cbuf == 0 && pos_slot >= 0) {
         const int index = fcc->tgsi.outputs[pos_slot].index;
         const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
         struct toy_src src[4];
         int vrf;

         vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
         if (vrf >= 0) {
            const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
            tsrc_transpose(tmp, src);
         }
         else {
            /* use (0, 0, 0, 0) */
            tsrc_transpose(tsrc_imm_f(0.0f), src);
         }

         /* only Z */
         tc_MOV(tc, dst, src[2]);

         mrf += fcc->num_grf_per_vrf;
      }

      msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ?
         BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE :
         BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;

      ctrl = (cbuf == num_cbufs - 1) << 12 |
             msg_type << 8;

      desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
            mrf - fcc->first_free_mrf, 0,
            header_present, false,
            GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
            ctrl, ILO_WM_DRAW_SURFACE(cbuf));

      tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
            tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
   }
}