예제 #1
0
static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
				int channel, int msg, int result)
{
	struct brw_reg src0;
	int mlen, rlen;

	if (dw == 8) {
		/* SIMD8 sample return is not masked */
		mlen = 3;
		rlen = 4;
	} else {
		mlen = 5;
		rlen = 2;
	}

	if (p->gen >= 060)
		src0 = brw_message_reg(msg);
	else
		src0 = brw_vec8_grf(0, 0);

	brw_SAMPLE(p, sample_result(dw, result), msg, src0,
		   channel+1, channel, WRITEMASK_W, 0,
		   rlen, mlen, true, simd(dw));

	if (dw == 8)
		result += 3;

	return result;
}
static void emit_txb( struct brw_wm_compile *c,
		      const struct brw_wm_instruction *inst,
		      struct brw_reg *dst,
		      GLuint dst_flags,
		      struct brw_reg *arg )
{
   struct brw_compile *p = &c->func;
   GLuint msgLength;

   /* Shadow ignored for txb.
    */
   switch (inst->tex_idx) {
   case TEXTURE_1D_INDEX:
      brw_MOV(p, brw_message_reg(2), arg[0]);
      brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
      brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_RECT_INDEX:
      brw_MOV(p, brw_message_reg(2), arg[0]);
      brw_MOV(p, brw_message_reg(4), arg[1]);
      brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
      break;
   default:
      brw_MOV(p, brw_message_reg(2), arg[0]);
      brw_MOV(p, brw_message_reg(4), arg[1]);
      brw_MOV(p, brw_message_reg(6), arg[2]);
      break;
   }

   brw_MOV(p, brw_message_reg(8), arg[3]);
   msgLength = 9;


   brw_SAMPLE(p, 
	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
	      1,
	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
	      inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
	      inst->tex_unit,	  /* sampler */
	      inst->writemask,
	      BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
	      8,		/* responseLength */
	      msgLength,
	      0);	

}
/* TODO
   BIAS on SIMD8 not workind yet...
 */	
static void emit_txb(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    struct brw_reg dst[4], src[4], payload_reg;
    GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];

    GLuint i;
    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
    for (i = 0; i < 4; i++) 
	dst[i] = get_dst_reg(c, inst, i, 1);
    for (i = 0; i < 4; i++)
	src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);

    switch (inst->TexSrcTarget) {
	case TEXTURE_1D_INDEX:
	    brw_MOV(p, brw_message_reg(2), src[0]);
	    brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
	    break;
	case TEXTURE_2D_INDEX:
	case TEXTURE_RECT_INDEX:
	    brw_MOV(p, brw_message_reg(2), src[0]);
	    brw_MOV(p, brw_message_reg(3), src[1]);
	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
	    break;
	default:
	    brw_MOV(p, brw_message_reg(2), src[0]);
	    brw_MOV(p, brw_message_reg(3), src[1]);
	    brw_MOV(p, brw_message_reg(4), src[2]);
	    break;
    }
    brw_MOV(p, brw_message_reg(5), src[3]);
    brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
    brw_SAMPLE(p,
	    retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
	    1,
	    retype(payload_reg, BRW_REGISTER_TYPE_UW),
	    unit + MAX_DRAW_BUFFERS, /* surface */
	    unit,     /* sampler */
	    inst->DstReg.WriteMask,
	    BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
	    4,
	    4,
	    0);
}
예제 #4
0
static void wm_src_sample_argb(struct brw_compile *p)
{
	static const uint32_t fragment[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_write.g6b"
	};
	int n;

	brw_push_insn_state(p);
	brw_set_mask_control(p, BRW_MASK_DISABLE);
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
	brw_MOV(p,
		retype(brw_vec1_grf(0,2), BRW_REGISTER_TYPE_UD),
		brw_imm_ud(0));
	brw_pop_insn_state(p);

	brw_SAMPLE(p,
		   retype(vec16(brw_vec8_grf(14, 0)), BRW_REGISTER_TYPE_UW),
		   1,
		   retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD),
		   1, 0,
		   WRITEMASK_XYZW,
		   GEN5_SAMPLER_MESSAGE_SAMPLE,
		   8,
		   5,
		   true,
		   BRW_SAMPLER_SIMD_MODE_SIMD16);


	for (n = 0; n < p->nr_insn; n++) {
		brw_disasm(stdout, &p->store[n], 60);
	}

	printf("\n\n");
	for (n = 0; n < ARRAY_SIZE(fragment); n++) {
		brw_disasm(stdout,
			   (const struct brw_instruction *)&fragment[n][0],
			   60);
	}
}
예제 #5
0
static int brw_wm_sample(struct brw_compile *p, int dw,
			 int channel, int msg, int result)
{
	struct brw_reg src0;
	bool header;
	int len;

	len = dw == 16 ? 4 : 2;
	if (p->gen >= 060) {
		header = false;
		src0 = brw_message_reg(++msg);
	} else {
		header = true;
		src0 = brw_vec8_grf(0, 0);
	}

	brw_SAMPLE(p, sample_result(dw, result), msg, src0,
		   channel+1, channel, WRITEMASK_XYZW, 0,
		   2*len, len+header, header, simd(dw));
	return result;
}
예제 #6
0
void emit_txb(struct brw_wm_compile *c,
	      struct brw_reg *dst,
	      GLuint dst_flags,
	      struct brw_reg *arg,
	      struct brw_reg depth_payload,
	      GLuint tex_idx,
	      GLuint sampler)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   GLuint msgLength;
   GLuint msg_type;
   GLuint mrf_per_channel;
   GLuint response_length;
   struct brw_reg dst_retyped;

   /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
    * samples, so we'll use the 16-wide instruction, leave the second halves
    * undefined, and trust the execution mask to keep the undefined pixels
    * from mattering.
    */
   if (c->dispatch_width == 16 || intel->gen < 5) {
      if (intel->gen >= 5)
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
      else
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
      mrf_per_channel = 2;
      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
      response_length = 8;
   } else {
      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
      mrf_per_channel = 1;
      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
      response_length = 4;
   }

   /* Shadow ignored for txb. */
   switch (tex_idx) {
   case TEXTURE_1D_INDEX:
      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_RECT_INDEX:
      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
      break;
   case TEXTURE_3D_INDEX:
   case TEXTURE_CUBE_INDEX:
      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
      break;
   default:
      /* unexpected target */
      abort();
   }

   brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
   msgLength = 2 + 4 * mrf_per_channel - 1;

   brw_SAMPLE(p, 
	      dst_retyped,
	      1,
	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
              SURF_INDEX_TEXTURE(sampler),
	      sampler,
	      dst_flags & WRITEMASK_XYZW,
	      msg_type,
	      response_length,
	      msgLength,
	      1,
	      BRW_SAMPLER_SIMD_MODE_SIMD16,
	      BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
예제 #7
0
void emit_tex(struct brw_wm_compile *c,
	      struct brw_reg *dst,
	      GLuint dst_flags,
	      struct brw_reg *arg,
	      struct brw_reg depth_payload,
	      GLuint tex_idx,
	      GLuint sampler,
	      bool shadow)
{
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   struct brw_reg dst_retyped;
   GLuint cur_mrf = 2, response_length;
   GLuint i, nr_texcoords;
   GLuint emit;
   GLuint msg_type;
   GLuint mrf_per_channel;
   GLuint simd_mode;

   if (c->dispatch_width == 16) {
      mrf_per_channel = 2;
      response_length = 8;
      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
   } else {
      mrf_per_channel = 1;
      response_length = 4;
      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
   }

   /* How many input regs are there?
    */
   switch (tex_idx) {
   case TEXTURE_1D_INDEX:
      emit = WRITEMASK_X;
      nr_texcoords = 1;
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_1D_ARRAY_INDEX:
   case TEXTURE_RECT_INDEX:
      emit = WRITEMASK_XY;
      nr_texcoords = 2;
      break;
   case TEXTURE_3D_INDEX:
   case TEXTURE_2D_ARRAY_INDEX:
   case TEXTURE_CUBE_INDEX:
      emit = WRITEMASK_XYZ;
      nr_texcoords = 3;
      break;
   default:
      /* unexpected target */
      abort();
   }

   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
   if (intel->gen < 5 && c->dispatch_width == 8)
      nr_texcoords = 3;

   if (shadow) {
      if (intel->gen < 7) {
	 /* For shadow comparisons, we have to supply u,v,r. */
	 nr_texcoords = 3;
      } else {
	 /* On Ivybridge, the shadow comparitor comes first. Just load it. */
	 brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
	 cur_mrf += mrf_per_channel;
      }
   }

   /* Emit the texcoords. */
   for (i = 0; i < nr_texcoords; i++) {
      if (c->key.tex.gl_clamp_mask[i] & (1 << sampler))
	 brw_set_saturate(p, true);

      if (emit & (1<<i))
	 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
      else
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
      cur_mrf += mrf_per_channel;

      brw_set_saturate(p, false);
   }

   /* Fill in the shadow comparison reference value. */
   if (shadow && intel->gen < 7) {
      if (intel->gen >= 5) {
	 /* Fill in the cube map array index value. */
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
	 cur_mrf += mrf_per_channel;
      } else if (c->dispatch_width == 8) {
	 /* Fill in the LOD bias value. */
	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
	 cur_mrf += mrf_per_channel;
      }
      brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
      cur_mrf += mrf_per_channel;
   }

   if (intel->gen >= 5) {
      if (shadow)
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
      else
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
   } else {
      /* Note that G45 and older determines shadow compare and dispatch width
       * from message length for most messages.
       */
      if (c->dispatch_width == 16 && shadow)
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
      else
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
   }

   brw_SAMPLE(p,
	      dst_retyped,
	      1,
	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
              SURF_INDEX_TEXTURE(sampler),
	      sampler,
	      dst_flags & WRITEMASK_XYZW,
	      msg_type,
	      response_length,
	      cur_mrf - 1,
	      1,
	      simd_mode,
	      BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
}
예제 #8
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src)
{
    int msg_type = -1;

    if (brw->gen >= 5) {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
            }
            break;
        case SHADER_OPCODE_TXD:
            if (inst->shadow_compare) {
                /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
                assert(brw->is_haswell);
                msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
            }
            break;
        case SHADER_OPCODE_TXF:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_CMS:
            if (brw->gen >= 7)
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
            else
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_MCS:
            assert(brw->gen >= 7);
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
            break;
        case SHADER_OPCODE_TXS:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
            break;
        case SHADER_OPCODE_TG4:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
            }
            break;
        case SHADER_OPCODE_TG4_OFFSET:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
            }
            break;
        default:
            assert(!"should not get here: invalid vec4 texture opcode");
            break;
        }
    } else {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
                assert(inst->mlen == 3);
            } else {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
                assert(inst->mlen == 2);
            }
            break;
        case SHADER_OPCODE_TXD:
            /* There is no sample_d_c message; comparisons are done manually. */
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
            assert(inst->mlen == 4);
            break;
        case SHADER_OPCODE_TXF:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
            assert(inst->mlen == 2);
            break;
        case SHADER_OPCODE_TXS:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
            assert(inst->mlen == 2);
            break;
        default:
            assert(!"should not get here: invalid vec4 texture opcode");
            break;
        }
    }

    assert(msg_type != -1);

    /* Load the message header if present.  If there's a texture offset, we need
     * to set it up explicitly and load the offset bitfield.  Otherwise, we can
     * use an implied move from g0 to the first message register.
     */
    if (inst->header_present) {
        if (brw->gen < 6 && !inst->texture_offset) {
            /* Set up an implied move from g0 to the MRF. */
            src = brw_vec8_grf(0, 0);
        } else {
            struct brw_reg header =
                retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

            /* Explicitly set up the message header by copying g0 to the MRF. */
            brw_push_insn_state(p);
            brw_set_mask_control(p, BRW_MASK_DISABLE);
            brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

            brw_set_access_mode(p, BRW_ALIGN_1);

            if (inst->texture_offset) {
                /* Set the texel offset bits in DWord 2. */
                brw_MOV(p, get_element_ud(header, 2),
                        brw_imm_ud(inst->texture_offset));
            }

            if (inst->sampler >= 16) {
                /* The "Sampler Index" field can only store values between 0 and 15.
                 * However, we can add an offset to the "Sampler State Pointer"
                 * field, effectively selecting a different set of 16 samplers.
                 *
                 * The "Sampler State Pointer" needs to be aligned to a 32-byte
                 * offset, and each sampler state is only 16-bytes, so we can't
                 * exclusively use the offset - we have to use both.
                 */
                assert(brw->is_haswell); /* field only exists on Haswell */
                brw_ADD(p,
                        get_element_ud(header, 3),
                        get_element_ud(brw_vec8_grf(0, 0), 3),
                        brw_imm_ud(16 * (inst->sampler / 16) *
                                   sizeof(gen7_sampler_state)));
            }
            brw_pop_insn_state(p);
        }
    }

    uint32_t return_format;

    switch (dst.type) {
    case BRW_REGISTER_TYPE_D:
        return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
        break;
    case BRW_REGISTER_TYPE_UD:
        return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
        break;
    default:
        return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
        break;
    }

    uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
                               inst->opcode == SHADER_OPCODE_TG4_OFFSET)
                              ? prog_data->base.binding_table.gather_texture_start
                              : prog_data->base.binding_table.texture_start) + inst->sampler;

    brw_SAMPLE(p,
               dst,
               inst->base_mrf,
               src,
               surface_index,
               inst->sampler % 16,
               msg_type,
               1, /* response length */
               inst->mlen,
               inst->header_present,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               return_format);

    brw_mark_surface_used(&prog_data->base, surface_index);
}
예제 #9
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src,
                             struct brw_reg sampler_index)
{
   int msg_type = -1;

   if (brw->gen >= 5) {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
	 } else {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
	 }
	 break;
      case SHADER_OPCODE_TXD:
         if (inst->shadow_compare) {
            /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
            assert(brw->gen >= 8 || brw->is_haswell);
            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
         } else {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
         }
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
	 break;
      case SHADER_OPCODE_TXF_CMS:
         if (brw->gen >= 7)
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
         else
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
         break;
      case SHADER_OPCODE_TXF_MCS:
         assert(brw->gen >= 7);
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
         break;
      case SHADER_OPCODE_TXS:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
	 break;
      case SHADER_OPCODE_TG4:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
         }
         break;
      case SHADER_OPCODE_TG4_OFFSET:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
         }
         break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   } else {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
	    assert(inst->mlen == 3);
	 } else {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
	    assert(inst->mlen == 2);
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
	 assert(inst->mlen == 4);
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
	 assert(inst->mlen == 2);
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
	 assert(inst->mlen == 2);
	 break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   }

   assert(msg_type != -1);

   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);

   /* Load the message header if present.  If there's a texture offset, we need
    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
    * use an implied move from g0 to the first message register.
    */
   if (inst->header_present) {
      if (brw->gen < 6 && !inst->texture_offset) {
         /* Set up an implied move from g0 to the MRF. */
         src = brw_vec8_grf(0, 0);
      } else {
         struct brw_reg header =
            retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

         /* Explicitly set up the message header by copying g0 to the MRF. */
         brw_push_insn_state(p);
         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
         brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

         brw_set_default_access_mode(p, BRW_ALIGN_1);

         if (inst->texture_offset) {
            /* Set the texel offset bits in DWord 2. */
            brw_MOV(p, get_element_ud(header, 2),
                    brw_imm_ud(inst->texture_offset));
         }

         brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
         brw_pop_insn_state(p);
      }
   }

   uint32_t return_format;

   switch (dst.type) {
   case BRW_REGISTER_TYPE_D:
      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
      break;
   case BRW_REGISTER_TYPE_UD:
      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
      break;
   default:
      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      break;
   }

   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
         inst->opcode == SHADER_OPCODE_TG4_OFFSET)
         ? prog_data->base.binding_table.gather_texture_start
         : prog_data->base.binding_table.texture_start;

   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
      uint32_t sampler = sampler_index.dw1.ud;

      brw_SAMPLE(p,
                 dst,
                 inst->base_mrf,
                 src,
                 sampler + base_binding_table_index,
                 sampler % 16,
                 msg_type,
                 1, /* response length */
                 inst->mlen,
                 inst->header_present,
                 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                 return_format);

      brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
   } else {
      /* Non-constant sampler index. */
      /* Note: this clobbers `dst` as a temporary before emitting the send */

      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
      struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));

      struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* Some care required: `sampler` and `temp` may alias:
       *    addr = sampler & 0xff
       *    temp = (sampler << 8) & 0xf00
       *    addr = addr | temp
       */
      brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
      brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
      brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
      brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
      brw_OR(p, addr, addr, temp);

      /* a0.0 |= <descriptor> */
      brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
      brw_set_sampler_message(p, insn_or,
                              0 /* surface */,
                              0 /* sampler */,
                              msg_type,
                              1 /* rlen */,
                              inst->mlen /* mlen */,
                              inst->header_present /* header */,
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              return_format);
      brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
      brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
      brw_set_src0(p, insn_or, addr);
      brw_set_dest(p, insn_or, addr);


      /* dst = send(offset, a0.0) */
      brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn_send, dst);
      brw_set_src0(p, insn_send, src);
      brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);

      brw_pop_insn_state(p);

      /* visitor knows more than we do about the surface limit required,
       * so has already done marking.
       */
   }
}
예제 #10
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src)
{
   int msg_type = -1;

   if (intel->gen >= 5) {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
	 } else {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
	 break;
      default:
	 assert(!"should not get here: invalid VS texture opcode");
	 break;
      }
   } else {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
	    assert(inst->mlen == 3);
	 } else {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
	    assert(inst->mlen == 2);
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
	 assert(inst->mlen == 4);
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
	 assert(inst->mlen == 2);
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
	 assert(inst->mlen == 2);
	 break;
      default:
	 assert(!"should not get here: invalid VS texture opcode");
	 break;
      }
   }

   assert(msg_type != -1);

   /* Load the message header if present.  If there's a texture offset, we need
    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
    * use an implied move from g0 to the first message register.
    */
   if (inst->texture_offset) {
      /* Explicitly set up the message header by copying g0 to the MRF. */
      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
	         retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

      /* Then set the offset bits in DWord 2. */
      brw_set_access_mode(p, BRW_ALIGN_1);
      brw_MOV(p,
	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
		     BRW_REGISTER_TYPE_UD),
	      brw_imm_uw(inst->texture_offset));
      brw_set_access_mode(p, BRW_ALIGN_16);
   } else if (inst->header_present) {
      /* Set up an implied move from g0 to the MRF. */
      src = brw_vec8_grf(0, 0);
   }

   uint32_t return_format;

   switch (dst.type) {
   case BRW_REGISTER_TYPE_D:
      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
      break;
   case BRW_REGISTER_TYPE_UD:
      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
      break;
   default:
      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      break;
   }

   brw_SAMPLE(p,
	      dst,
	      inst->base_mrf,
	      src,
	      SURF_INDEX_VS_TEXTURE(inst->sampler),
	      inst->sampler,
	      WRITEMASK_XYZW,
	      msg_type,
	      1, /* response length */
	      inst->mlen,
	      inst->header_present,
	      BRW_SAMPLER_SIMD_MODE_SIMD4X2,
	      return_format);
}
static void emit_tex( struct brw_wm_compile *c,
		      const struct brw_wm_instruction *inst,
		      struct brw_reg *dst,
		      GLuint dst_flags,
		      struct brw_reg *arg )
{
   struct brw_compile *p = &c->func;
   GLuint msgLength, responseLength;
   GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
   GLuint i, nr;
   GLuint emit;

   /* How many input regs are there?
    */
   switch (inst->tex_idx) {
   case TEXTURE_1D_INDEX:
      emit = WRITEMASK_X;
      nr = 1;
      break;
   case TEXTURE_2D_INDEX:
   case TEXTURE_RECT_INDEX:
      emit = WRITEMASK_XY;
      nr = 2;
      break;
   default:
      emit = WRITEMASK_XYZ;
      nr = 3;
      break;
   }

   if (shadow) {
      nr = 4;
      emit |= WRITEMASK_W;
   }

   msgLength = 1;

   for (i = 0; i < nr; i++) {
      static const GLuint swz[4] = {0,1,2,2};
      if (emit & (1<<i)) 
	 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
      else
	 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
      msgLength += 2;
   }

   responseLength = 8;		/* always */

   brw_SAMPLE(p, 
	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
	      1,
	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
	      inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */
	      inst->tex_unit,	  /* sampler */
	      inst->writemask,
	      (shadow ? 
	       BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : 
	       BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
	      responseLength,
	      msgLength,
	      0);	
}
static void emit_tex(struct brw_wm_compile *c,
		struct prog_instruction *inst)
{
    struct brw_compile *p = &c->func;
    struct brw_reg dst[4], src[4], payload_reg;
    GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];

    GLuint msg_len;
    GLuint i, nr;
    GLuint emit;
    GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;

    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);

    for (i = 0; i < 4; i++) 
	dst[i] = get_dst_reg(c, inst, i, 1);
    for (i = 0; i < 4; i++)
	src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);


    switch (inst->TexSrcTarget) {
	case TEXTURE_1D_INDEX:
	    emit = WRITEMASK_X;
	    nr = 1;
	    break;
	case TEXTURE_2D_INDEX:
	case TEXTURE_RECT_INDEX:
	    emit = WRITEMASK_XY;
	    nr = 2;
	    break;
	default:
	    emit = WRITEMASK_XYZ;
	    nr = 3;
	    break;
    }
    msg_len = 1;

    for (i = 0; i < nr; i++) {
	static const GLuint swz[4] = {0,1,2,2};
	if (emit & (1<<i))
	    brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
	else
	    brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
	msg_len += 1;
    }

    if (shadow) {
	brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
	brw_MOV(p, brw_message_reg(6), src[2]);
    }

    brw_SAMPLE(p,
	    retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
	    1,
	    retype(payload_reg, BRW_REGISTER_TYPE_UW),
	    unit + MAX_DRAW_BUFFERS, /* surface */
	    unit,     /* sampler */
	    inst->DstReg.WriteMask,
	    BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
	    4,
	    shadow ? 6 : 4,
	    0);

    if (shadow)
	brw_MOV(p, dst[3], brw_imm_f(1.0));
}