Esempio n. 1
0
void
gen8_vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
                                                  struct brw_reg src0,
                                                  struct brw_reg src1)
{
   /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
    * Header: M0.3):
    *
    *     Slot 0 Offset. This field, after adding to the Global Offset field
    *     in the message descriptor, specifies the offset (in 256-bit units)
    *     from the start of the URB entry, as referenced by URB Handle 0, at
    *     which the data will be accessed.
    *
    * Similar text describes DWORD M0.4, which is slot 1 offset.
    *
    * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
    * of the register for geometry shader invocations 0 and 1) by the
    * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
    *
    * We can do this with the following EU instruction:
    *
    *     mul(2) dst.3<1>UD src0<8;2,4>UD src1   { Align1 WE_all }
    */
   default_state.access_mode = BRW_ALIGN_1;
   gen8_instruction *inst =
      MUL(suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), src1);
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
   default_state.access_mode = BRW_ALIGN_16;
}
Esempio n. 2
0
gen8_instruction *
gen8_generator::next_inst(unsigned opcode)
{
   gen8_instruction *inst;

   if (nr_inst + 1 > unsigned(store_size)) {
      store_size <<= 1;
      store = reralloc(mem_ctx, store, gen8_instruction, store_size);
      assert(store);
   }

   next_inst_offset += 16;
   inst = &store[nr_inst++];

   memset(inst, 0, sizeof(gen8_instruction));

   gen8_set_opcode(inst, opcode);
   gen8_set_exec_size(inst, default_state.exec_size);
   gen8_set_access_mode(inst, default_state.access_mode);
   gen8_set_mask_control(inst, default_state.mask_control);
   gen8_set_qtr_control(inst, default_state.qtr_control);
   gen8_set_cond_modifier(inst, default_state.conditional_mod);
   gen8_set_pred_control(inst, default_state.predicate);
   gen8_set_pred_inv(inst, default_state.predicate_inverse);
   gen8_set_saturate(inst, default_state.saturate);
   gen8_set_flag_subreg_nr(inst, default_state.flag_subreg_nr);
   return inst;
}
Esempio n. 3
0
void
gen8_vec4_generator::generate_urb_write(vec4_instruction *ir, bool vs)
{
   struct brw_reg header = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);

   /* Copy g0. */
   if (vs)
      MOV_RAW(header, brw_vec8_grf(0, 0));

   gen8_instruction *inst;
   if (!(ir->urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
      /* Enable Channel Masks in the URB_WRITE_OWORD message header */
      default_state.access_mode = BRW_ALIGN_1;
      inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
                       BRW_REGISTER_TYPE_UD),
                retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
                brw_imm_ud(0xff00));
      gen8_set_mask_control(inst, BRW_MASK_DISABLE);
      default_state.access_mode = BRW_ALIGN_16;
   }

   inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_urb_message(brw, inst, ir->urb_write_flags, ir->mlen, 0, ir->offset,
                        true);
   gen8_set_dst(brw, inst, brw_null_reg());
   gen8_set_src0(brw, inst, header);
}
Esempio n. 4
0
gen8_instruction *
gen8_generator::MOV_RAW(struct brw_reg dst, struct brw_reg src0)
{
   gen8_instruction *inst = next_inst(BRW_OPCODE_MOV);
   gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
   gen8_set_src0(brw, inst, retype(src0, BRW_REGISTER_TYPE_UD));
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);

   return inst;
}
Esempio n. 5
0
void
gen8_vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
                                                   struct brw_reg src)
{
   assert(src.file == BRW_IMMEDIATE_VALUE);

   default_state.access_mode = BRW_ALIGN_1;

   gen8_instruction *inst = MOV(suboffset(vec1(dst), 2), src);
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);

   default_state.access_mode = BRW_ALIGN_16;
}
Esempio n. 6
0
void
gen8_vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
   /* We want to left shift just DWORD 4 (the x component belonging to the
    * second geometry shader invocation) by 4 bits.  So generate the
    * instruction:
    *
    *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
    */
   dst = suboffset(vec1(dst), 4);
   default_state.access_mode = BRW_ALIGN_1;
   gen8_instruction *inst = SHL(dst, dst, brw_imm_ud(4));
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
   default_state.access_mode = BRW_ALIGN_16;
}
Esempio n. 7
0
void
gen8_vec4_generator::generate_gs_thread_end(vec4_instruction *ir)
{
   struct brw_reg src = brw_vec8_grf(GEN7_MRF_HACK_START + ir->base_mrf, 0);
   gen8_instruction *inst;

   /* Enable Channel Masks in the URB_WRITE_HWORD message header */
   default_state.access_mode = BRW_ALIGN_1;
   inst = OR(retype(brw_vec1_grf(GEN7_MRF_HACK_START + ir->base_mrf, 5),
                    BRW_REGISTER_TYPE_UD),
             retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
             brw_imm_ud(0xff00)); /* could be 0x1100 but shouldn't matter */
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);
   default_state.access_mode = BRW_ALIGN_16;

   /* mlen = 2: g0 header + vertex count */
   inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_urb_message(brw, inst, BRW_URB_WRITE_EOT, 2, 0, 0, true);
   gen8_set_dst(brw, inst, brw_null_reg());
   gen8_set_src0(brw, inst, src);
}
Esempio n. 8
0
void
gen8_vec4_generator::generate_tex(vec4_instruction *ir, struct brw_reg dst)
{
   int msg_type = 0;

   switch (ir->opcode) {
   case SHADER_OPCODE_TEX:
   case SHADER_OPCODE_TXL:
      if (ir->shadow_compare) {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
      } else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
      }
      break;
   case SHADER_OPCODE_TXD:
      if (ir->shadow_compare) {
         msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
      } else {
         msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
      }
      break;
   case SHADER_OPCODE_TXF:
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
      break;
   case SHADER_OPCODE_TXF_CMS:
      msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
      break;
   case SHADER_OPCODE_TXF_MCS:
      msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
      break;
   case SHADER_OPCODE_TXS:
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
      break;
   case SHADER_OPCODE_TG4:
      if (ir->shadow_compare) {
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
      } else {
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
      }
      break;
   case SHADER_OPCODE_TG4_OFFSET:
      if (ir->shadow_compare) {
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
      } else {
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
      }
      break;
   default:
      assert(!"should not get here: invalid VS texture opcode");
      break;
   }

   if (ir->header_present) {
      MOV_RAW(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD),
              retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

      default_state.access_mode = BRW_ALIGN_1;

      if (ir->texture_offset) {
         /* Set the offset bits in DWord 2. */
         MOV_RAW(retype(brw_vec1_reg(MRF, ir->base_mrf, 2),
                        BRW_REGISTER_TYPE_UD),
                 brw_imm_ud(ir->texture_offset));
      }

      if (ir->sampler >= 16) {
         /* The "Sampler Index" field can only store values between 0 and 15.
          * However, we can add an offset to the "Sampler State Pointer"
          * field, effectively selecting a different set of 16 samplers.
          *
          * The "Sampler State Pointer" needs to be aligned to a 32-byte
          * offset, and each sampler state is only 16-bytes, so we can't
          * exclusively use the offset - we have to use both.
          */
         gen8_instruction *add =
            ADD(get_element_ud(brw_message_reg(ir->base_mrf), 3),
                get_element_ud(brw_vec8_grf(0, 0), 3),
                brw_imm_ud(16 * (ir->sampler / 16) *
                           sizeof(gen7_sampler_state)));
         gen8_set_mask_control(add, BRW_MASK_DISABLE);
      }

      default_state.access_mode = BRW_ALIGN_16;
   }

   uint32_t surf_index =
      prog_data->base.binding_table.texture_start + ir->sampler;

   gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_dst(brw, inst, dst);
   gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
   gen8_set_sampler_message(brw, inst,
                            surf_index,
                            ir->sampler % 16,
                            msg_type,
                            1,
                            ir->mlen,
                            ir->header_present,
                            BRW_SAMPLER_SIMD_MODE_SIMD4X2);

   mark_surface_used(surf_index);
}
Esempio n. 9
0
void
gen8_vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
                                                   struct brw_reg src)
{
   /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
    * Header: M0.5):
    *
    *     15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
    *
    *        When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
    *        DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
    *        Vertex 0 DATA[7].  This bit is ANDed with the corresponding
    *        channel enable to determine the final channel enable.  For the
    *        URB_READ_OWORD & URB_READ_HWORD messages, when final channel
    *        enable is 1 it indicates that Vertex 1 DATA [3] will be included
    *        in the writeback message.  For the URB_WRITE_OWORD &
    *        URB_WRITE_HWORD messages, when final channel enable is 1 it
    *        indicates that Vertex 1 DATA [3] will be written to the surface.
    *
    *        0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
    *        1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
    *
    *     14 Vertex 1 DATA [2] Channel Mask
    *     13 Vertex 1 DATA [1] Channel Mask
    *     12 Vertex 1 DATA [0] Channel Mask
    *     11 Vertex 0 DATA [3] Channel Mask
    *     10 Vertex 0 DATA [2] Channel Mask
    *      9 Vertex 0 DATA [1] Channel Mask
    *      8 Vertex 0 DATA [0] Channel Mask
    *
    * (This is from a section of the PRM that is agnostic to the particular
    * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
    * geometry shader invocations 0 and 1, respectively).  Since we have the
    * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
    * and the enable flags for geometry shader invocation 1 in bits 7:0 of
    * DWORD 4, we just need to OR them together and store the result in bits
    * 15:8 of DWORD 5.
    *
    * It's easier to get the EU to do this if we think of the src and dst
    * registers as composed of 32 bytes each; then, we want to pick up the
    * contents of bytes 0 and 16 from src, OR them together, and store them in
    * byte 21.
    *
    * We can do that by the following EU instruction:
    *
    *     or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
    *
    * Note: this relies on the source register having zeros in (a) bits 7:4 of
    * DWORD 0 and (b) bits 3:0 of DWORD 4.  We can rely on (b) because the
    * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
    * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
    * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
    * contain valid channel mask values (which are in the range 0x0-0xf).
    */
   dst = retype(dst, BRW_REGISTER_TYPE_UB);
   src = retype(src, BRW_REGISTER_TYPE_UB);

   default_state.access_mode = BRW_ALIGN_1;

   gen8_instruction *inst =
      OR(suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
   gen8_set_mask_control(inst, BRW_MASK_DISABLE);

   default_state.access_mode = BRW_ALIGN_16;
}