Example #1
0
static bool
run_tests(struct brw_context *brw)
{
   bool fail = false;

   for (int i = 0; i < ARRAY_SIZE(tests); i++) {
      for (int align_16 = 0; align_16 <= 1; align_16++) {
	 struct brw_compile *p = rzalloc(NULL, struct brw_compile);
	 brw_init_compile(brw, p, p);

	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 if (align_16)
	    brw_set_access_mode(p, BRW_ALIGN_16);
	 else
	    brw_set_access_mode(p, BRW_ALIGN_1);

	 tests[i].func(p);
	 assert(p->nr_insn == 1);

	 if (!test_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 if (!test_fuzz_compact_instruction(p, p->store[0])) {
	    fail = true;
	    continue;
	 }

	 ralloc_free(p);
      }
   }

   return fail;
}
Example #2
0
void
vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst,
        struct brw_reg src)
{
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);

    /* If we think of the src and dst registers as composed of 8 DWORDs each,
     * we want to pick up the contents of DWORDs 0 and 4 from src, truncate
     * them to WORDs, and then pack them into DWORD 2 of dst.
     *
     * It's easier to get the EU to do this if we think of the src and dst
     * registers as composed of 16 WORDS each; then, we want to pick up the
     * contents of WORDs 0 and 8 from src, and pack them into WORDs 4 and 5 of
     * dst.
     *
     * We can do that by the following EU instruction:
     *
     *     mov (2) dst.4<1>:uw src<8;1,0>:uw   { Align1, Q1, NoMask }
     */
    brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
            stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Example #3
0
void
vec4_generator::generate_gs_set_write_offset(struct brw_reg dst,
        struct brw_reg src0,
        struct brw_reg src1)
{
    /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
     * Header: M0.3):
     *
     *     Slot 0 Offset. This field, after adding to the Global Offset field
     *     in the message descriptor, specifies the offset (in 256-bit units)
     *     from the start of the URB entry, as referenced by URB Handle 0, at
     *     which the data will be accessed.
     *
     * Similar text describes DWORD M0.4, which is slot 1 offset.
     *
     * Therefore, we want to multiply DWORDs 0 and 4 of src0 (the x components
     * of the register for geometry shader invocations 0 and 1) by the
     * immediate value in src1, and store the result in DWORDs 3 and 4 of dst.
     *
     * We can do this with the following EU instruction:
     *
     *     mul(2) dst.3<1>UD src0<8;2,4>UD src1   { Align1 WE_all }
     */
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
            src1);
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Example #4
0
void
vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
        struct brw_reg src)
{
    assert(src.file == BRW_IMMEDIATE_VALUE);

    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_MOV(p, suboffset(vec1(dst), 2), src);
    brw_set_access_mode(p, BRW_ALIGN_16);
    brw_pop_insn_state(p);
}
Example #5
0
/* Project 'pos' to screen space (or back again), overwrite with results:
 */
static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
{
   struct brw_compile *p = &c->func;

   /* calc rhw 
    */
   brw_math_invert(p, get_element(pos, W), get_element(pos, W));

   /* value.xyz *= value.rhw
    */
   brw_set_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
   brw_set_access_mode(p, BRW_ALIGN_1);
}
Example #6
0
/* This is performed against the original triangles, so no indirection
 * required:
BZZZT!
 */
static void compute_tri_direction( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg e = c->reg.tmp0;
   struct brw_reg f = c->reg.tmp1;
   GLuint hpos_offset = brw_vert_result_to_offset(&c->vue_map,
                                                  VARYING_SLOT_POS);
   struct brw_reg v0 = byte_offset(c->reg.vertex[0], hpos_offset);
   struct brw_reg v1 = byte_offset(c->reg.vertex[1], hpos_offset);
   struct brw_reg v2 = byte_offset(c->reg.vertex[2], hpos_offset);


   struct brw_reg v0n = get_tmp(c);
   struct brw_reg v1n = get_tmp(c);
   struct brw_reg v2n = get_tmp(c);

   /* Convert to NDC.
    * NOTE: We can't modify the original vertex coordinates,
    * as it may impact further operations.
    * So, we have to keep normalized coordinates in temp registers.
    *
    * TBD-KC
    * Try to optimize unnecessary MOV's.
    */
   brw_MOV(p, v0n, v0);
   brw_MOV(p, v1n, v1);
   brw_MOV(p, v2n, v2);

   brw_clip_project_position(c, v0n);
   brw_clip_project_position(c, v1n);
   brw_clip_project_position(c, v2n);

   /* Calculate the vectors of two edges of the triangle:
    */
   brw_ADD(p, e, v0n, negate(v2n)); 
   brw_ADD(p, f, v1n, negate(v2n)); 

   /* Take their crossproduct:
    */
   brw_set_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
   brw_set_access_mode(p, BRW_ALIGN_1);

   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
}
Example #7
0
void
vec4_generator::generate_math2_gen6(vec4_instruction *inst,
                                    struct brw_reg dst,
                                    struct brw_reg src0,
                                    struct brw_reg src1)
{
    /* Can't do writemask because math can't be align16. */
    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
    /* Source swizzles are ignored. */
    check_gen6_math_src_arg(src0);
    check_gen6_math_src_arg(src1);

    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math2(p,
              dst,
              brw_math_function(inst->opcode),
              src0, src1);
    brw_set_access_mode(p, BRW_ALIGN_16);
}
Example #8
0
void
vec4_generator::generate_math1_gen6(vec4_instruction *inst,
                                    struct brw_reg dst,
                                    struct brw_reg src)
{
    /* Can't do writemask because math can't be align16. */
    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
    check_gen6_math_src_arg(src);

    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_math(p,
             dst,
             brw_math_function(inst->opcode),
             inst->base_mrf,
             src,
             BRW_MATH_DATA_SCALAR,
             BRW_MATH_PRECISION_FULL);
    brw_set_access_mode(p, BRW_ALIGN_16);
}
Example #9
0
/* This is performed against the original triangles, so no indirection
 * required:
BZZZT!
 */
static void compute_tri_direction( struct brw_clip_compile *c )
{
   struct brw_compile *p = &c->func;
   struct brw_reg e = c->reg.tmp0;
   struct brw_reg f = c->reg.tmp1;
   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); 
   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); 
   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 


   /* Calculate the vectors of two edges of the triangle:
    */
   brw_ADD(p, e, v0, negate(v2)); 
   brw_ADD(p, f, v1, negate(v2)); 

   /* Take their crossproduct:
    */
   brw_set_access_mode(p, BRW_ALIGN_16);
   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
   brw_set_access_mode(p, BRW_ALIGN_1);

   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
}
Example #10
0
void
vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
{
    /* We want to left shift just DWORD 4 (the x component belonging to the
     * second geometry shader invocation) by 4 bits.  So generate the
     * instruction:
     *
     *     shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
     */
    dst = suboffset(vec1(dst), 4);
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_SHL(p, dst, dst, brw_imm_ud(4));
    brw_pop_insn_state(p);
}
Example #11
0
void
vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
{
    /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
     * and store into dst.0 & dst.4. So generate the instruction:
     *
     *     shr(8) dst<1> R0<1,4,0> GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q }
     */
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    dst = retype(dst, BRW_REGISTER_TYPE_UD);
    struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
    brw_SHR(p, dst, stride(r0, 1, 4, 0),
            brw_imm_ud(GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT));
    brw_pop_insn_state(p);
}
Example #12
0
void
vec4_generator::generate_unpack_flags(vec4_instruction *inst,
                                      struct brw_reg dst)
{
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_access_mode(p, BRW_ALIGN_1);

    struct brw_reg flags = brw_flag_reg(0, 0);
    struct brw_reg dst_0 = suboffset(vec1(dst), 0);
    struct brw_reg dst_4 = suboffset(vec1(dst), 4);

    brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
    brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
    brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));

    brw_pop_insn_state(p);
}
Example #13
0
void
vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
        struct brw_reg index)
{
    int second_vertex_offset;

    if (brw->gen >= 6)
        second_vertex_offset = 1;
    else
        second_vertex_offset = 16;

    m1 = retype(m1, BRW_REGISTER_TYPE_D);

    /* Set up M1 (message payload).  Only the block offsets in M1.0 and
     * M1.4 are used, and the rest are ignored.
     */
    struct brw_reg m1_0 = suboffset(vec1(m1), 0);
    struct brw_reg m1_4 = suboffset(vec1(m1), 4);
    struct brw_reg index_0 = suboffset(vec1(index), 0);
    struct brw_reg index_4 = suboffset(vec1(index), 4);

    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_access_mode(p, BRW_ALIGN_1);

    brw_MOV(p, m1_0, index_0);

    if (index.file == BRW_IMMEDIATE_VALUE) {
        index_4.dw1.ud += second_vertex_offset;
        brw_MOV(p, m1_4, index_4);
    } else {
        brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
    }

    brw_pop_insn_state(p);
}
Example #14
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src)
{
    int msg_type = -1;

    if (brw->gen >= 5) {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
            }
            break;
        case SHADER_OPCODE_TXD:
            if (inst->shadow_compare) {
                /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
                assert(brw->is_haswell);
                msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
            }
            break;
        case SHADER_OPCODE_TXF:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_CMS:
            if (brw->gen >= 7)
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
            else
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_MCS:
            assert(brw->gen >= 7);
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
            break;
        case SHADER_OPCODE_TXS:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
            break;
        case SHADER_OPCODE_TG4:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
            }
            break;
        case SHADER_OPCODE_TG4_OFFSET:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
            }
            break;
        default:
            assert(!"should not get here: invalid vec4 texture opcode");
            break;
        }
    } else {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
                assert(inst->mlen == 3);
            } else {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
                assert(inst->mlen == 2);
            }
            break;
        case SHADER_OPCODE_TXD:
            /* There is no sample_d_c message; comparisons are done manually. */
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
            assert(inst->mlen == 4);
            break;
        case SHADER_OPCODE_TXF:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
            assert(inst->mlen == 2);
            break;
        case SHADER_OPCODE_TXS:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
            assert(inst->mlen == 2);
            break;
        default:
            assert(!"should not get here: invalid vec4 texture opcode");
            break;
        }
    }

    assert(msg_type != -1);

    /* Load the message header if present.  If there's a texture offset, we need
     * to set it up explicitly and load the offset bitfield.  Otherwise, we can
     * use an implied move from g0 to the first message register.
     */
    if (inst->header_present) {
        if (brw->gen < 6 && !inst->texture_offset) {
            /* Set up an implied move from g0 to the MRF. */
            src = brw_vec8_grf(0, 0);
        } else {
            struct brw_reg header =
                retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

            /* Explicitly set up the message header by copying g0 to the MRF. */
            brw_push_insn_state(p);
            brw_set_mask_control(p, BRW_MASK_DISABLE);
            brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

            brw_set_access_mode(p, BRW_ALIGN_1);

            if (inst->texture_offset) {
                /* Set the texel offset bits in DWord 2. */
                brw_MOV(p, get_element_ud(header, 2),
                        brw_imm_ud(inst->texture_offset));
            }

            if (inst->sampler >= 16) {
                /* The "Sampler Index" field can only store values between 0 and 15.
                 * However, we can add an offset to the "Sampler State Pointer"
                 * field, effectively selecting a different set of 16 samplers.
                 *
                 * The "Sampler State Pointer" needs to be aligned to a 32-byte
                 * offset, and each sampler state is only 16-bytes, so we can't
                 * exclusively use the offset - we have to use both.
                 */
                assert(brw->is_haswell); /* field only exists on Haswell */
                brw_ADD(p,
                        get_element_ud(header, 3),
                        get_element_ud(brw_vec8_grf(0, 0), 3),
                        brw_imm_ud(16 * (inst->sampler / 16) *
                                   sizeof(gen7_sampler_state)));
            }
            brw_pop_insn_state(p);
        }
    }

    uint32_t return_format;

    switch (dst.type) {
    case BRW_REGISTER_TYPE_D:
        return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
        break;
    case BRW_REGISTER_TYPE_UD:
        return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
        break;
    default:
        return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
        break;
    }

    uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
                               inst->opcode == SHADER_OPCODE_TG4_OFFSET)
                              ? prog_data->base.binding_table.gather_texture_start
                              : prog_data->base.binding_table.texture_start) + inst->sampler;

    brw_SAMPLE(p,
               dst,
               inst->base_mrf,
               src,
               surface_index,
               inst->sampler % 16,
               msg_type,
               1, /* response length */
               inst->mlen,
               inst->header_present,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               return_format);

    brw_mark_surface_used(&prog_data->base, surface_index);
}
Example #15
0
void
vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
        struct brw_reg src)
{
    /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
     * Header: M0.5):
     *
     *     15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
     *
     *        When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
     *        DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
     *        Vertex 0 DATA[7].  This bit is ANDed with the corresponding
     *        channel enable to determine the final channel enable.  For the
     *        URB_READ_OWORD & URB_READ_HWORD messages, when final channel
     *        enable is 1 it indicates that Vertex 1 DATA [3] will be included
     *        in the writeback message.  For the URB_WRITE_OWORD &
     *        URB_WRITE_HWORD messages, when final channel enable is 1 it
     *        indicates that Vertex 1 DATA [3] will be written to the surface.
     *
     *        0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
     *        1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
     *
     *     14 Vertex 1 DATA [2] Channel Mask
     *     13 Vertex 1 DATA [1] Channel Mask
     *     12 Vertex 1 DATA [0] Channel Mask
     *     11 Vertex 0 DATA [3] Channel Mask
     *     10 Vertex 0 DATA [2] Channel Mask
     *      9 Vertex 0 DATA [1] Channel Mask
     *      8 Vertex 0 DATA [0] Channel Mask
     *
     * (This is from a section of the PRM that is agnostic to the particular
     * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
     * geometry shader invocations 0 and 1, respectively).  Since we have the
     * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
     * and the enable flags for geometry shader invocation 1 in bits 7:0 of
     * DWORD 4, we just need to OR them together and store the result in bits
     * 15:8 of DWORD 5.
     *
     * It's easier to get the EU to do this if we think of the src and dst
     * registers as composed of 32 bytes each; then, we want to pick up the
     * contents of bytes 0 and 16 from src, OR them together, and store them in
     * byte 21.
     *
     * We can do that by the following EU instruction:
     *
     *     or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
     *
     * Note: this relies on the source register having zeros in (a) bits 7:4 of
     * DWORD 0 and (b) bits 3:0 of DWORD 4.  We can rely on (b) because the
     * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
     * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
     * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
     * contain valid channel mask values (which are in the range 0x0-0xf).
     */
    dst = retype(dst, BRW_REGISTER_TYPE_UB);
    src = retype(src, BRW_REGISTER_TYPE_UB);
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
    brw_pop_insn_state(p);
}
Example #16
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src)
{
   int msg_type = -1;

   if (intel->gen >= 5) {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
	 } else {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
	 break;
      default:
	 assert(!"should not get here: invalid VS texture opcode");
	 break;
      }
   } else {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
	    assert(inst->mlen == 3);
	 } else {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
	    assert(inst->mlen == 2);
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
	 assert(inst->mlen == 4);
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
	 assert(inst->mlen == 2);
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
	 assert(inst->mlen == 2);
	 break;
      default:
	 assert(!"should not get here: invalid VS texture opcode");
	 break;
      }
   }

   assert(msg_type != -1);

   /* Load the message header if present.  If there's a texture offset, we need
    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
    * use an implied move from g0 to the first message register.
    */
   if (inst->texture_offset) {
      /* Explicitly set up the message header by copying g0 to the MRF. */
      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
	         retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

      /* Then set the offset bits in DWord 2. */
      brw_set_access_mode(p, BRW_ALIGN_1);
      brw_MOV(p,
	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
		     BRW_REGISTER_TYPE_UD),
	      brw_imm_uw(inst->texture_offset));
      brw_set_access_mode(p, BRW_ALIGN_16);
   } else if (inst->header_present) {
      /* Set up an implied move from g0 to the MRF. */
      src = brw_vec8_grf(0, 0);
   }

   uint32_t return_format;

   switch (dst.type) {
   case BRW_REGISTER_TYPE_D:
      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
      break;
   case BRW_REGISTER_TYPE_UD:
      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
      break;
   default:
      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      break;
   }

   brw_SAMPLE(p,
	      dst,
	      inst->base_mrf,
	      src,
	      SURF_INDEX_VS_TEXTURE(inst->sampler),
	      inst->sampler,
	      WRITEMASK_XYZW,
	      msg_type,
	      1, /* response length */
	      inst->mlen,
	      inst->header_present,
	      BRW_SAMPLER_SIMD_MODE_SIMD4X2,
	      return_format);
}
Example #17
0
void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
{
    struct brw_compile *p = &c->func;
    GLuint i;

    c->nr_verts = 1;

    if (allocate)
        alloc_regs(c);

    copy_z_inv_w(c);
    for (i = 0; i < c->nr_setup_regs; i++)
    {
        struct brw_reg a0 = offset(c->vert[0], i);
        GLushort pc, pc_persp, pc_linear, pc_coord_replace;
        GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);

        pc_coord_replace = calculate_point_sprite_mask(c, i);
        pc_persp &= ~pc_coord_replace;

        if (pc_persp) {
            brw_set_predicate_control_flag_value(p, pc_persp);
            brw_MUL(p, a0, a0, c->inv_w[0]);
        }

        /* Point sprite coordinate replacement: A texcoord with this
         * enabled gets replaced with the value (x, y, 0, 1) where x and
         * y vary from 0 to 1 across the horizontal and vertical of the
         * point.
         */
        if (pc_coord_replace) {
            brw_set_predicate_control_flag_value(p, pc_coord_replace);
            /* Caculate 1.0/PointWidth */
            brw_math(&c->func,
                     c->tmp,
                     BRW_MATH_FUNCTION_INV,
                     BRW_MATH_SATURATE_NONE,
                     0,
                     c->dx0,
                     BRW_MATH_DATA_SCALAR,
                     BRW_MATH_PRECISION_FULL);

            brw_set_access_mode(p, BRW_ALIGN_16);

            /* dA/dx, dA/dy */
            brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
            brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
            brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
            if (c->key.sprite_origin_lower_left) {
                brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
            } else {
                brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
            }

            /* attribute constant offset */
            brw_MOV(p, c->m3C0, brw_imm_f(0.0));
            if (c->key.sprite_origin_lower_left) {
                brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
            } else {
                brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
            }

            brw_set_access_mode(p, BRW_ALIGN_1);
        }

        if (pc & ~pc_coord_replace) {
            brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
            brw_MOV(p, c->m1Cx, brw_imm_ud(0));
            brw_MOV(p, c->m2Cy, brw_imm_ud(0));
            brw_MOV(p, c->m3C0, a0); /* constant value */
        }


        brw_set_predicate_control_flag_value(p, pc);
        /* Copy m0..m3 to URB. */
        brw_urb_WRITE(p,
                      brw_null_reg(),
                      0,
                      brw_vec8_grf(0, 0),
                      0, 	/* allocate */
                      1,	/* used */
                      4, 	/* msg len */
                      0,	/* response len */
                      last, 	/* eot */
                      last, 	/* writes complete */
                      i*4,	/* urb destination offset */
                      BRW_URB_SWIZZLE_TRANSPOSE);
    }
}
Example #18
0
/**
 * Generate the geometry shader program used on Gen6 to perform stream output
 * (transform feedback).
 */
void
gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
	         unsigned num_verts, bool check_edge_flags)
{
   struct brw_compile *p = &c->func;
   c->prog_data.svbi_postincrement_value = num_verts;

   brw_gs_alloc_regs(c, num_verts, true);
   brw_gs_initialize_header(c);

   if (key->num_transform_feedback_bindings > 0) {
      unsigned vertex, binding;
      struct brw_reg destination_indices_uw =
         vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW));

      /* Note: since we use the binding table to keep track of buffer offsets
       * and stride, the GS doesn't need to keep track of a separate pointer
       * into each buffer; it uses a single pointer which increments by 1 for
       * each vertex.  So we use SVBI0 for this pointer, regardless of whether
       * transform feedback is in interleaved or separate attribs mode.
       *
       * Make sure that the buffers have enough room for all the vertices.
       */
      brw_ADD(p, get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts));
      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE,
	         get_element_ud(c->reg.temp, 0),
	         get_element_ud(c->reg.SVBI, 4));
      brw_IF(p, BRW_EXECUTE_1);

      /* Compute the destination indices to write to.  Usually we use SVBI[0]
       * + (0, 1, 2).  However, for odd-numbered triangles in tristrips, the
       * vertices come down the pipeline in reversed winding order, so we need
       * to flip the order when writing to the transform feedback buffer.  To
       * ensure that flatshading accuracy is preserved, we need to write them
       * in order SVBI[0] + (0, 2, 1) if we're using the first provoking
       * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using
       * the last provoking vertex convention.
       *
       * Note: since brw_imm_v can only be used in instructions in
       * packed-word execution mode, and SVBI is a double-word, we need to
       * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1),
       * or (1, 0, 2)) to the destination_indices register, and then add SVBI
       * using a separate instruction.  Also, since the immediate constant is
       * expressed as packed words, and we need to load double-words into
       * destination_indices, we need to intersperse zeros to fill the upper
       * halves of each double-word.
       */
      brw_MOV(p, destination_indices_uw,
              brw_imm_v(0x00020100)); /* (0, 1, 2) */
      if (num_verts == 3) {
         /* Get primitive type into temp register. */
         brw_AND(p, get_element_ud(c->reg.temp, 0),
                 get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f));

         /* Test if primitive type is TRISTRIP_REVERSE.  We need to do this as
          * an 8-wide comparison so that the conditional MOV that follows
          * moves all 8 words correctly.
          */
         brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ,
                 get_element_ud(c->reg.temp, 0),
                 brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));

         /* If so, then overwrite destination_indices_uw with the appropriate
          * reordering.
          */
         brw_MOV(p, destination_indices_uw,
                 brw_imm_v(key->pv_first ? 0x00010200    /* (0, 2, 1) */
                                         : 0x00020001)); /* (1, 0, 2) */
         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      }
      brw_ADD(p, c->reg.destination_indices,
              c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0));

      /* For each vertex, generate code to output each varying using the
       * appropriate binding table entry.
       */
      for (vertex = 0; vertex < num_verts; ++vertex) {
         /* Set up the correct destination index for this vertex */
         brw_MOV(p, get_element_ud(c->reg.header, 5),
                 get_element_ud(c->reg.destination_indices, vertex));

         for (binding = 0; binding < key->num_transform_feedback_bindings;
              ++binding) {
            unsigned char varying =
               key->transform_feedback_bindings[binding];
            unsigned char slot = c->vue_map.varying_to_slot[varying];
            /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
             *
             *   "Prior to End of Thread with a URB_WRITE, the kernel must
             *   ensure that all writes are complete by sending the final
             *   write as a committed write."
             */
            bool final_write =
               binding == key->num_transform_feedback_bindings - 1 &&
               vertex == num_verts - 1;
            struct brw_reg vertex_slot = c->reg.vertex[vertex];
            vertex_slot.nr += slot / 2;
            vertex_slot.subnr = (slot % 2) * 16;
            /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
               ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
            brw_set_access_mode(p, BRW_ALIGN_16);
            brw_MOV(p, stride(c->reg.header, 4, 4, 1),
                    retype(vertex_slot, BRW_REGISTER_TYPE_UD));
            brw_set_access_mode(p, BRW_ALIGN_1);
            brw_svb_write(p,
                          final_write ? c->reg.temp : brw_null_reg(), /* dest */
                          1, /* msg_reg_nr */
                          c->reg.header, /* src0 */
                          SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */
                          final_write); /* send_commit_msg */
         }
      }
      brw_ENDIF(p);

      /* Now, reinitialize the header register from R0 to restore the parts of
       * the register that we overwrote while streaming out transform feedback
       * data.
       */
      brw_gs_initialize_header(c);

      /* Finally, wait for the write commit to occur so that we can proceed to
       * other things safely.
       *
       * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
       *
       *   The write commit does not modify the destination register, but
       *   merely clears the dependency associated with the destination
       *   register. Thus, a simple “mov” instruction using the register as a
       *   source is sufficient to wait for the write commit to occur.
       */
      brw_MOV(p, c->reg.temp, c->reg.temp);
   }

   brw_gs_ff_sync(c, 1);

   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
    * release the URB that was just allocated, and terminate the thread.
    */
   if (key->rasterizer_discard) {
      brw_gs_terminate(c);
      return;
   }

   brw_gs_overwrite_header_dw2_from_r0(c);
   switch (num_verts) {
   case 1:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
      brw_gs_emit_vue(c, c->reg.vertex[0], true);
      break;
   case 2:
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], true);
      break;
   case 3:
      if (check_edge_flags) {
         /* Only emit vertices 0 and 1 if this is the first triangle of the
          * polygon.  Otherwise they are redundant.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
         brw_IF(p, BRW_EXECUTE_1);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[0], false);
      brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
      brw_gs_emit_vue(c, c->reg.vertex[1], false);
      if (check_edge_flags) {
         brw_ENDIF(p);
         /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
          * of the polygon.  Otherwise leave the primitive incomplete because
          * there are more polygon vertices coming.
          */
         brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
         brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
                 get_element_ud(c->reg.R0, 2),
                 brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
         brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
      }
      brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
      brw_gs_emit_vue(c, c->reg.vertex[2], true);
      break;
   }
}
static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
{
#define MAX_IFSN 32
#define MAX_LOOP_DEPTH 32
    struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
    struct brw_instruction *inst0, *inst1;
    int i, if_insn = 0, loop_insn = 0;
    struct brw_compile *p = &c->func;
    struct brw_indirect stack_index = brw_indirect(0, 0);

    c->reg_index = 0;
    prealloc_reg(c);
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));

    for (i = 0; i < c->nr_fp_insns; i++) {
	struct prog_instruction *inst = &c->prog_instructions[i];
	struct prog_instruction *orig_inst;

	if ((orig_inst = inst->Data) != 0)
	    orig_inst->Data = current_insn(p);

	if (inst->CondUpdate)
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
	else
	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);

	switch (inst->Opcode) {
	    case WM_PIXELXY:
		emit_pixel_xy(c, inst);
		break;
	    case WM_DELTAXY: 
		emit_delta_xy(c, inst);
		break;
	    case WM_PIXELW:
		emit_pixel_w(c, inst);
		break;	
	    case WM_LINTERP:
		emit_linterp(c, inst);
		break;
	    case WM_PINTERP:
		emit_pinterp(c, inst);
		break;
	    case WM_CINTERP:
		emit_cinterp(c, inst);
		break;
	    case WM_WPOSXY:
		emit_wpos_xy(c, inst);
		break;
	    case WM_FB_WRITE:
		emit_fb_write(c, inst);
		break;
	    case OPCODE_ABS:
		emit_abs(c, inst);
		break;
	    case OPCODE_ADD:
		emit_add(c, inst);
		break;
	    case OPCODE_SUB:
		emit_sub(c, inst);
		break;
	    case OPCODE_FRC:
		emit_frc(c, inst);
		break;
	    case OPCODE_FLR:
		emit_flr(c, inst);
		break;
	    case OPCODE_LRP:
		emit_lrp(c, inst);
		break;
	    case OPCODE_INT:
		emit_int(c, inst);
		break;
	    case OPCODE_MOV:
		emit_mov(c, inst);
		break;
	    case OPCODE_DP3:
		emit_dp3(c, inst);
		break;
	    case OPCODE_DP4:
		emit_dp4(c, inst);
		break;
	    case OPCODE_XPD:
		emit_xpd(c, inst);
		break;
	    case OPCODE_DPH:
		emit_dph(c, inst);
		break;
	    case OPCODE_RCP:
		emit_rcp(c, inst);
		break;
	    case OPCODE_RSQ:
		emit_rsq(c, inst);
		break;
	    case OPCODE_SIN:
		emit_sin(c, inst);
		break;
	    case OPCODE_COS:
		emit_cos(c, inst);
		break;
	    case OPCODE_EX2:
		emit_ex2(c, inst);
		break;
	    case OPCODE_LG2:
		emit_lg2(c, inst);
		break;
	    case OPCODE_MAX:	
		emit_max(c, inst);
		break;
	    case OPCODE_MIN:	
		emit_min(c, inst);
		break;
	    case OPCODE_DDX:
		emit_ddx(c, inst);
		break;
	    case OPCODE_DDY:
                emit_ddy(c, inst);
                break;
	    case OPCODE_SLT:
		emit_slt(c, inst);
		break;
	    case OPCODE_SLE:
		emit_sle(c, inst);
		break;
	    case OPCODE_SGT:
		emit_sgt(c, inst);
		break;
	    case OPCODE_SGE:
		emit_sge(c, inst);
		break;
	    case OPCODE_SEQ:
		emit_seq(c, inst);
		break;
	    case OPCODE_SNE:
		emit_sne(c, inst);
		break;
	    case OPCODE_MUL:
		emit_mul(c, inst);
		break;
	    case OPCODE_POW:
		emit_pow(c, inst);
		break;
	    case OPCODE_MAD:
		emit_mad(c, inst);
		break;
	    case OPCODE_TEX:
		emit_tex(c, inst);
		break;
	    case OPCODE_TXB:
		emit_txb(c, inst);
		break;
	    case OPCODE_KIL_NV:
		emit_kil(c);
		break;
	    case OPCODE_IF:
		assert(if_insn < MAX_IFSN);
		if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
		break;
	    case OPCODE_ELSE:
		if_inst[if_insn-1]  = brw_ELSE(p, if_inst[if_insn-1]);
		break;
	    case OPCODE_ENDIF:
		assert(if_insn > 0);
		brw_ENDIF(p, if_inst[--if_insn]);
		break;
	    case OPCODE_BGNSUB:
	    case OPCODE_ENDSUB:
		break;
	    case OPCODE_CAL: 
		brw_push_insn_state(p);
		brw_set_mask_control(p, BRW_MASK_DISABLE);
                brw_set_access_mode(p, BRW_ALIGN_1);
                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
                brw_set_access_mode(p, BRW_ALIGN_16);
                brw_ADD(p, get_addr_reg(stack_index),
                         get_addr_reg(stack_index), brw_imm_d(4));
                orig_inst = inst->Data;
                orig_inst->Data = &p->store[p->nr_insn];
                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
                brw_pop_insn_state(p);
		break;

	    case OPCODE_RET:
		brw_push_insn_state(p);
		brw_set_mask_control(p, BRW_MASK_DISABLE);
                brw_ADD(p, get_addr_reg(stack_index),
                        get_addr_reg(stack_index), brw_imm_d(-4));
                brw_set_access_mode(p, BRW_ALIGN_1);
                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
                brw_set_access_mode(p, BRW_ALIGN_16);
		brw_pop_insn_state(p);

		break;
	    case OPCODE_BGNLOOP:
		loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
		break;
	    case OPCODE_BRK:
		brw_BREAK(p);
		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
		break;
	    case OPCODE_CONT:
		brw_CONT(p);
		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
		break;
	    case OPCODE_ENDLOOP: 
		loop_insn--;
		inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
		/* patch all the BREAK instructions from
		   last BEGINLOOP */
		while (inst0 > loop_inst[loop_insn]) {
		    inst0--;
		    if (inst0->header.opcode == BRW_OPCODE_BREAK) {
			inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
			inst0->bits3.if_else.pop_count = 0;
		    } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
                        inst0->bits3.if_else.jump_count = inst1 - inst0;
                        inst0->bits3.if_else.pop_count = 0;
                    }
		}
		break;
	    default:
		_mesa_printf("unsupported IR in fragment shader %d\n",
			inst->Opcode);
	}
	if (inst->CondUpdate)
	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
	else
	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
    post_wm_emit(c);
    for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
	c->fp->program.Base.Instructions[i].Data = NULL;
}