Beispiel #1
0
static void
gs_write_so(struct gs_compile_context *gcc,
            struct toy_dst dst,
            struct toy_src index, struct toy_src out,
            bool send_write_commit_message,
            int binding_table_index)
{
   struct toy_compiler *tc = &gcc->tc;
   struct toy_dst mrf_header;
   struct toy_src desc;

   mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));

   /* m0.5: destination index */
   gs_COPY1(tc, mrf_header, 5, index, 0);

   /* m0.0 - m0.3: RGBA */
   gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0);

   desc = tsrc_imm_mdesc_data_port(tc, false,
         1, send_write_commit_message,
         true, send_write_commit_message,
         GEN6_MSG_DP_SVB_WRITE, 0,
         binding_table_index);

   tc_SEND(tc, dst, tsrc_from(mrf_header), desc,
         GEN6_SFID_DP_RC);
}
static void
vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc,
                                struct toy_dst dst, int dim,
                                struct toy_src idx)
{
   struct toy_compiler *tc = &vcc->tc;
   const struct toy_dst offset =
      tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
   struct toy_src desc;

   if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
      return;

   /*
    * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was
    * changed from OWord Dual Block Read to ld to increase performance in the
    * classic driver.  Since we use the constant cache instead of the data
    * cache, I wonder if we still want to follow the classic driver.
    */

   /* set offset */
   tc_MOV(tc, offset, idx);

   desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
         GEN6_MSG_SAMPLER_SIMD4X2,
         GEN6_MSG_SAMPLER_LD,
         0,
         vcc->shader->bt.const_base + dim);

   tc_SEND(tc, dst, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
}
static void
vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
                                struct toy_dst dst, int dim,
                                struct toy_src idx)
{
   const struct toy_dst header =
      tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
   const struct toy_dst block_offsets =
      tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0));
   const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
   struct toy_compiler *tc = &vcc->tc;
   unsigned msg_type, msg_ctrl, msg_len;
   struct toy_inst *inst;
   struct toy_src desc;

   if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
      return;

   /* set message header */
   inst = tc_MOV(tc, header, r0);
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;

   /* set block offsets */
   tc_MOV(tc, block_offsets, idx);

   msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ;
   msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;
   msg_len = 2;

   desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
         msg_type, msg_ctrl, vcc->shader->bt.const_base + dim);

   tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache);
}
Beispiel #4
0
static void
gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst,
           struct toy_src num_prims)
{
   struct toy_compiler *tc = &gcc->tc;
   struct toy_dst mrf_header =
      tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
   struct toy_src desc;
   bool allocate;

   gs_COPY8(tc, mrf_header, gcc->payload.header);

   /* set NumSOVertsToWrite and NumSOPrimsNeeded */
   if (gcc->write_so) {
      if (num_prims.file == TOY_FILE_IMM) {
         const uint32_t v =
            (num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32;

         gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0);
      }
      else {
         struct toy_dst m0_0 = tdst_d(gcc->vars.tmp);

         tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16));
         tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims);

         gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0);
      }
   }

   /* set NumGSPrimsGenerated */
   if (gcc->write_vue)
      gs_COPY1(tc, mrf_header, 1, num_prims, 0);

   /*
    * From the Sandy Bridge PRM, volume 2 part 1, page 173:
    *
    *     "Programming Note: If the GS stage is enabled, software must always
    *      allocate at least one GS URB Entry. This is true even if the GS
    *      thread never needs to output vertices to the pipeline, e.g., when
    *      only performing stream output. This is an artifact of the need to
    *      pass the GS thread an initial destination URB handle."
    */
   allocate = true;
   desc = tsrc_imm_mdesc_urb(tc, false, 1, 1,
         false, false, allocate,
         false, 0, 1);

   tc_SEND(tc, dst, tsrc_from(mrf_header), desc, GEN6_SFID_URB);
}
Beispiel #5
0
static void
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
                                struct toy_dst dst, int dim, struct toy_src idx)
{
   const struct toy_dst header =
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
   const struct toy_dst global_offset =
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
   const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
   struct toy_compiler *tc = &fcc->tc;
   unsigned msg_type, msg_ctrl, msg_len;
   struct toy_inst *inst;
   struct toy_src desc;
   struct toy_dst tmp, real_dst[4];
   int i;

   /* set message header */
   inst = tc_MOV(tc, header, r0);
   inst->mask_ctrl = BRW_MASK_DISABLE;

   /* set global offset */
   inst = tc_MOV(tc, global_offset, idx);
   inst->mask_ctrl = BRW_MASK_DISABLE;
   inst->exec_size = BRW_EXECUTE_1;
   inst->src[0].rect = TOY_RECT_010;

   msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
   msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8;
   msg_len = 1;

   desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
         msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim));

   tmp = tc_alloc_tmp(tc);

   tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);

   tdst_transpose(dst, real_dst);
   for (i = 0; i < 4; i++) {
      const struct toy_src src =
         tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);

      /* cast to type D to make sure these are raw moves */
      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
   }
}
Beispiel #6
0
static void
fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
                                struct toy_dst dst, int dim, struct toy_src idx)
{
   struct toy_compiler *tc = &fcc->tc;
   const struct toy_dst offset =
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
   struct toy_src desc;
   struct toy_inst *inst;
   struct toy_dst tmp, real_dst[4];
   int i;

   /*
    * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
    * changed from OWord Block Read to ld to increase performance in the
    * classic driver.  Since we use the constant cache instead of the data
    * cache, I wonder if we still want to follow the classic driver.
    */

   /* set offset */
   inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
   inst->exec_size = BRW_EXECUTE_8;
   inst->mask_ctrl = BRW_MASK_DISABLE;

   desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
         BRW_SAMPLER_SIMD_MODE_SIMD4X2,
         GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
         0,
         ILO_WM_CONST_SURFACE(dim));

   tmp = tc_alloc_tmp(tc);
   inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
   inst->exec_size = BRW_EXECUTE_8;
   inst->mask_ctrl = BRW_MASK_DISABLE;

   tdst_transpose(dst, real_dst);
   for (i = 0; i < 4; i++) {
      const struct toy_src src =
         tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);

      /* cast to type D to make sure these are raw moves */
      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
   }
}
Beispiel #7
0
static void
cs_dummy(struct cs_compile_context *ccc)
{
   struct toy_compiler *tc = &ccc->tc;
   struct toy_dst header;
   struct toy_src r0, desc;
   struct toy_inst *inst;

   header = tdst_ud(tdst(TOY_FILE_MRF, ccc->first_free_mrf, 0));
   r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));

   inst = tc_MOV(tc, header, r0);
   inst->exec_size = GEN6_EXECSIZE_8;
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;

   desc = tsrc_imm_mdesc(tc, true, 1, 0, true,
         GEN6_MSG_TS_RESOURCE_SELECT_NO_DEREF |
         GEN6_MSG_TS_REQUESTER_TYPE_ROOT |
         GEN6_MSG_TS_OPCODE_DEREF);

   tc_SEND(tc, tdst_null(), tsrc_from(header), desc, GEN6_SFID_SPAWNER);
}