void
gen8_vec4_generator::generate_untyped_atomic(vec4_instruction *ir,
                                             struct brw_reg dst,
                                             struct brw_reg atomic_op,
                                             struct brw_reg surf_index)
{
   assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
          atomic_op.type == BRW_REGISTER_TYPE_UD &&
          surf_index.file == BRW_IMMEDIATE_VALUE &&
          surf_index.type == BRW_REGISTER_TYPE_UD);
   assert((atomic_op.dw1.ud & ~0xf) == 0);

   unsigned msg_control =
      atomic_op.dw1.ud | /* Atomic Operation Type: BRW_AOP_* */
      (1 << 5); /* Return data expected */

   gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
   gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
   gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
                       surf_index.dw1.ud,
                       HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2,
                       msg_control,
                       ir->mlen,
                       1,
                       ir->header_present,
                       false);

   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
void
gen8_vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
                                                 struct brw_reg dst,
                                                 struct brw_reg index,
                                                 struct brw_reg offset)
{
   assert(index.file == BRW_IMMEDIATE_VALUE &&
          index.type == BRW_REGISTER_TYPE_UD);
   uint32_t surf_index = index.dw1.ud;

   assert(offset.file == BRW_GENERAL_REGISTER_FILE);

   /* Each of the 8 channel enables is considered for whether each
    * dword is written.
    */
   gen8_instruction *send = next_inst(BRW_OPCODE_SEND);
   gen8_set_dst(brw, send, dst);
   gen8_set_src0(brw, send, offset);
   gen8_set_dp_message(brw, send, GEN7_SFID_DATAPORT_DATA_CACHE,
                       surf_index,
                       GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ,
                       0,      /* message control */
                       1,      /* mlen */
                       1,      /* rlen */
                       false,  /* no header */
                       false); /* EOT */

   brw_mark_surface_used(&prog_data->base, surf_index);
}
Esempio n. 3
0
void
vec4_generator::generate_untyped_surface_read(vec4_instruction *inst,
        struct brw_reg dst,
        struct brw_reg surf_index)
{
    assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
           surf_index.type == BRW_REGISTER_TYPE_UD);

    brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf),
                             surf_index.dw1.ud,
                             inst->mlen, 1);

    brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
Esempio n. 4
0
void
vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
        struct brw_reg dst,
        struct brw_reg index,
        struct brw_reg offset)
{
    assert(brw->gen <= 7);
    assert(index.file == BRW_IMMEDIATE_VALUE &&
           index.type == BRW_REGISTER_TYPE_UD);
    uint32_t surf_index = index.dw1.ud;

    struct brw_reg header = brw_vec8_grf(0, 0);

    gen6_resolve_implied_move(p, &header, inst->base_mrf);

    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
            offset);

    uint32_t msg_type;

    if (brw->gen >= 6)
        msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else if (brw->gen == 5 || brw->is_g4x)
        msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
    else
        msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;

    /* Each of the 8 channel enables is considered for whether each
     * dword is written.
     */
    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, send, dst);
    brw_set_src0(p, send, header);
    if (brw->gen < 6)
        send->header.destreg__conditionalmod = inst->base_mrf;
    brw_set_dp_read_message(p, send,
                            surf_index,
                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
                            msg_type,
                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
                            2, /* mlen */
                            true, /* header_present */
                            1 /* rlen */);

    brw_mark_surface_used(&prog_data->base, surf_index);
}
void
gen8_vec4_generator::generate_untyped_surface_read(vec4_instruction *ir,
                                                   struct brw_reg dst,
                                                   struct brw_reg surf_index)
{
   assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
          surf_index.type == BRW_REGISTER_TYPE_UD);

   gen8_instruction *inst = next_inst(BRW_OPCODE_SEND);
   gen8_set_dst(brw, inst, retype(dst, BRW_REGISTER_TYPE_UD));
   gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
   gen8_set_dp_message(brw, inst, HSW_SFID_DATAPORT_DATA_CACHE_1,
                       surf_index.dw1.ud,
                       HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ,
                       0xe, /* enable only the R channel */
                       ir->mlen,
                       1,
                       ir->header_present,
                       false);

   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
Esempio n. 6
0
void
vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
        struct brw_reg dst,
        struct brw_reg surf_index,
        struct brw_reg offset)
{
    assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
           surf_index.type == BRW_REGISTER_TYPE_UD);

    brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
    brw_set_dest(p, insn, dst);
    brw_set_src0(p, insn, offset);
    brw_set_sampler_message(p, insn,
                            surf_index.dw1.ud,
                            0, /* LD message ignores sampler unit */
                            GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                            1, /* rlen */
                            1, /* mlen */
                            false, /* no header */
                            BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                            0);

    brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
Esempio n. 7
0
/**
 * Generate assembly for a Vec4 IR instruction.
 *
 * \param instruction The Vec4 IR instruction to generate code for.
 * \param dst         The destination register.
 * \param src         An array of up to three source registers.
 */
void
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
        struct brw_reg dst,
        struct brw_reg *src)
{
    vec4_instruction *inst = (vec4_instruction *) instruction;

    if (dst.width == BRW_WIDTH_4) {
        /* This happens in attribute fixups for "dual instanced" geometry
         * shaders, since they use attributes that are vec4's.  Since the exec
         * width is only 4, it's essential that the caller set
         * force_writemask_all in order to make sure the instruction is executed
         * regardless of which channels are enabled.
         */
        assert(inst->force_writemask_all);

        /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
         * the following register region restrictions (from Graphics BSpec:
         * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
         * > Register Region Restrictions)
         *
         *     1. ExecSize must be greater than or equal to Width.
         *
         *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
         *        to Width * HorzStride."
         */
        for (int i = 0; i < 3; i++) {
            if (src[i].file == BRW_GENERAL_REGISTER_FILE)
                src[i] = stride(src[i], 4, 4, 1);
        }
    }

    switch (inst->opcode) {
    case BRW_OPCODE_MOV:
        brw_MOV(p, dst, src[0]);
        break;
    case BRW_OPCODE_ADD:
        brw_ADD(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_MUL:
        brw_MUL(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_MACH:
        brw_set_acc_write_control(p, 1);
        brw_MACH(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;

    case BRW_OPCODE_MAD:
        assert(brw->gen >= 6);
        brw_MAD(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_FRC:
        brw_FRC(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDD:
        brw_RNDD(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDE:
        brw_RNDE(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDZ:
        brw_RNDZ(p, dst, src[0]);
        break;

    case BRW_OPCODE_AND:
        brw_AND(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_OR:
        brw_OR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_XOR:
        brw_XOR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_NOT:
        brw_NOT(p, dst, src[0]);
        break;
    case BRW_OPCODE_ASR:
        brw_ASR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_SHR:
        brw_SHR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_SHL:
        brw_SHL(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_CMP:
        brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
        break;
    case BRW_OPCODE_SEL:
        brw_SEL(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DPH:
        brw_DPH(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP4:
        brw_DP4(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP3:
        brw_DP3(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP2:
        brw_DP2(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_F32TO16:
        assert(brw->gen >= 7);
        brw_F32TO16(p, dst, src[0]);
        break;

    case BRW_OPCODE_F16TO32:
        assert(brw->gen >= 7);
        brw_F16TO32(p, dst, src[0]);
        break;

    case BRW_OPCODE_LRP:
        assert(brw->gen >= 6);
        brw_LRP(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_BFREV:
        assert(brw->gen >= 7);
        /* BFREV only supports UD type for src and dst. */
        brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
                  retype(src[0], BRW_REGISTER_TYPE_UD));
        break;
    case BRW_OPCODE_FBH:
        assert(brw->gen >= 7);
        /* FBH only supports UD type for dst. */
        brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_FBL:
        assert(brw->gen >= 7);
        /* FBL only supports UD type for dst. */
        brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_CBIT:
        assert(brw->gen >= 7);
        /* CBIT only supports UD type for dst. */
        brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_ADDC:
        assert(brw->gen >= 7);
        brw_set_acc_write_control(p, 1);
        brw_ADDC(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;
    case BRW_OPCODE_SUBB:
        assert(brw->gen >= 7);
        brw_set_acc_write_control(p, 1);
        brw_SUBB(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;

    case BRW_OPCODE_BFE:
        assert(brw->gen >= 7);
        brw_BFE(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_BFI1:
        assert(brw->gen >= 7);
        brw_BFI1(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_BFI2:
        assert(brw->gen >= 7);
        brw_BFI2(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_IF:
        if (inst->src[0].file != BAD_FILE) {
            /* The instruction has an embedded compare (only allowed on gen6) */
            assert(brw->gen == 6);
            gen6_IF(p, inst->conditional_mod, src[0], src[1]);
        } else {
            struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
            brw_inst->header.predicate_control = inst->predicate;
        }
        break;

    case BRW_OPCODE_ELSE:
        brw_ELSE(p);
        break;
    case BRW_OPCODE_ENDIF:
        brw_ENDIF(p);
        break;

    case BRW_OPCODE_DO:
        brw_DO(p, BRW_EXECUTE_8);
        break;

    case BRW_OPCODE_BREAK:
        brw_BREAK(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
        break;
    case BRW_OPCODE_CONTINUE:
        /* FINISHME: We need to write the loop instruction support still. */
        if (brw->gen >= 6)
            gen6_CONT(p);
        else
            brw_CONT(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
        break;

    case BRW_OPCODE_WHILE:
        brw_WHILE(p);
        break;

    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
    case SHADER_OPCODE_SQRT:
    case SHADER_OPCODE_EXP2:
    case SHADER_OPCODE_LOG2:
    case SHADER_OPCODE_SIN:
    case SHADER_OPCODE_COS:
        if (brw->gen == 6) {
            generate_math1_gen6(inst, dst, src[0]);
        } else {
            /* Also works for Gen7. */
            generate_math1_gen4(inst, dst, src[0]);
        }
        break;

    case SHADER_OPCODE_POW:
    case SHADER_OPCODE_INT_QUOTIENT:
    case SHADER_OPCODE_INT_REMAINDER:
        if (brw->gen >= 7) {
            generate_math2_gen7(inst, dst, src[0], src[1]);
        } else if (brw->gen == 6) {
            generate_math2_gen6(inst, dst, src[0], src[1]);
        } else {
            generate_math2_gen4(inst, dst, src[0], src[1]);
        }
        break;

    case SHADER_OPCODE_TEX:
    case SHADER_OPCODE_TXD:
    case SHADER_OPCODE_TXF:
    case SHADER_OPCODE_TXF_CMS:
    case SHADER_OPCODE_TXF_MCS:
    case SHADER_OPCODE_TXL:
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
        generate_tex(inst, dst, src[0]);
        break;

    case VS_OPCODE_URB_WRITE:
        generate_vs_urb_write(inst);
        break;

    case SHADER_OPCODE_GEN4_SCRATCH_READ:
        generate_scratch_read(inst, dst, src[0]);
        break;

    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
        generate_scratch_write(inst, dst, src[0], src[1]);
        break;

    case VS_OPCODE_PULL_CONSTANT_LOAD:
        generate_pull_constant_load(inst, dst, src[0], src[1]);
        break;

    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
        generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
        break;

    case GS_OPCODE_URB_WRITE:
        generate_gs_urb_write(inst);
        break;

    case GS_OPCODE_THREAD_END:
        generate_gs_thread_end(inst);
        break;

    case GS_OPCODE_SET_WRITE_OFFSET:
        generate_gs_set_write_offset(dst, src[0], src[1]);
        break;

    case GS_OPCODE_SET_VERTEX_COUNT:
        generate_gs_set_vertex_count(dst, src[0]);
        break;

    case GS_OPCODE_SET_DWORD_2_IMMED:
        generate_gs_set_dword_2_immed(dst, src[0]);
        break;

    case GS_OPCODE_PREPARE_CHANNEL_MASKS:
        generate_gs_prepare_channel_masks(dst);
        break;

    case GS_OPCODE_SET_CHANNEL_MASKS:
        generate_gs_set_channel_masks(dst, src[0]);
        break;

    case GS_OPCODE_GET_INSTANCE_ID:
        generate_gs_get_instance_id(dst);
        break;

    case SHADER_OPCODE_SHADER_TIME_ADD:
        brw_shader_time_add(p, src[0],
                            prog_data->base.binding_table.shader_time_start);
        brw_mark_surface_used(&prog_data->base,
                              prog_data->base.binding_table.shader_time_start);
        break;

    case SHADER_OPCODE_UNTYPED_ATOMIC:
        generate_untyped_atomic(inst, dst, src[0], src[1]);
        break;

    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
        generate_untyped_surface_read(inst, dst, src[0]);
        break;

    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
        generate_unpack_flags(inst, dst);
        break;

    default:
        if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
            _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
                          opcode_descs[inst->opcode].name);
        } else {
            _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode);
        }
        abort();
    }
}
Esempio n. 8
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src)
{
    int msg_type = -1;

    if (brw->gen >= 5) {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
            }
            break;
        case SHADER_OPCODE_TXD:
            if (inst->shadow_compare) {
                /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
                assert(brw->is_haswell);
                msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
            } else {
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
            }
            break;
        case SHADER_OPCODE_TXF:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_CMS:
            if (brw->gen >= 7)
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
            else
                msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
            break;
        case SHADER_OPCODE_TXF_MCS:
            assert(brw->gen >= 7);
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
            break;
        case SHADER_OPCODE_TXS:
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
            break;
        case SHADER_OPCODE_TG4:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
            }
            break;
        case SHADER_OPCODE_TG4_OFFSET:
            if (inst->shadow_compare) {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
            } else {
                msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
            }
            break;
        default:
            assert(!"should not get here: invalid vec4 texture opcode");
            break;
        }
    } else {
        switch (inst->opcode) {
        case SHADER_OPCODE_TEX:
        case SHADER_OPCODE_TXL:
            if (inst->shadow_compare) {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
                assert(inst->mlen == 3);
            } else {
                msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
                assert(inst->mlen == 2);
            }
            break;
        case SHADER_OPCODE_TXD:
            /* There is no sample_d_c message; comparisons are done manually. */
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
            assert(inst->mlen == 4);
            break;
        case SHADER_OPCODE_TXF:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
            assert(inst->mlen == 2);
            break;
        case SHADER_OPCODE_TXS:
            msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
            assert(inst->mlen == 2);
            break;
        default:
            assert(!"should not get here: invalid vec4 texture opcode");
            break;
        }
    }

    assert(msg_type != -1);

    /* Load the message header if present.  If there's a texture offset, we need
     * to set it up explicitly and load the offset bitfield.  Otherwise, we can
     * use an implied move from g0 to the first message register.
     */
    if (inst->header_present) {
        if (brw->gen < 6 && !inst->texture_offset) {
            /* Set up an implied move from g0 to the MRF. */
            src = brw_vec8_grf(0, 0);
        } else {
            struct brw_reg header =
                retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

            /* Explicitly set up the message header by copying g0 to the MRF. */
            brw_push_insn_state(p);
            brw_set_mask_control(p, BRW_MASK_DISABLE);
            brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

            brw_set_access_mode(p, BRW_ALIGN_1);

            if (inst->texture_offset) {
                /* Set the texel offset bits in DWord 2. */
                brw_MOV(p, get_element_ud(header, 2),
                        brw_imm_ud(inst->texture_offset));
            }

            if (inst->sampler >= 16) {
                /* The "Sampler Index" field can only store values between 0 and 15.
                 * However, we can add an offset to the "Sampler State Pointer"
                 * field, effectively selecting a different set of 16 samplers.
                 *
                 * The "Sampler State Pointer" needs to be aligned to a 32-byte
                 * offset, and each sampler state is only 16-bytes, so we can't
                 * exclusively use the offset - we have to use both.
                 */
                assert(brw->is_haswell); /* field only exists on Haswell */
                brw_ADD(p,
                        get_element_ud(header, 3),
                        get_element_ud(brw_vec8_grf(0, 0), 3),
                        brw_imm_ud(16 * (inst->sampler / 16) *
                                   sizeof(gen7_sampler_state)));
            }
            brw_pop_insn_state(p);
        }
    }

    uint32_t return_format;

    switch (dst.type) {
    case BRW_REGISTER_TYPE_D:
        return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
        break;
    case BRW_REGISTER_TYPE_UD:
        return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
        break;
    default:
        return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
        break;
    }

    uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
                               inst->opcode == SHADER_OPCODE_TG4_OFFSET)
                              ? prog_data->base.binding_table.gather_texture_start
                              : prog_data->base.binding_table.texture_start) + inst->sampler;

    brw_SAMPLE(p,
               dst,
               inst->base_mrf,
               src,
               surface_index,
               inst->sampler % 16,
               msg_type,
               1, /* response length */
               inst->mlen,
               inst->header_present,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               return_format);

    brw_mark_surface_used(&prog_data->base, surface_index);
}
Esempio n. 9
0
void
vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                                                 struct brw_reg dst,
                                                 struct brw_reg surf_index,
                                                 struct brw_reg offset)
{
   assert(surf_index.type == BRW_REGISTER_TYPE_UD);

   if (surf_index.file == BRW_IMMEDIATE_VALUE) {

      brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn, dst);
      brw_set_src0(p, insn, offset);
      brw_set_sampler_message(p, insn,
                              surf_index.dw1.ud,
                              0, /* LD message ignores sampler unit */
                              GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                              1, /* rlen */
                              1, /* mlen */
                              false, /* no header */
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              0);

      brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);

   } else {

      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* a0.0 = surf_index & 0xff */
      brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
      brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1);
      brw_set_dest(p, insn_and, addr);
      brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD)));
      brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));


      /* a0.0 |= <descriptor> */
      brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
      brw_set_sampler_message(p, insn_or,
                              0 /* surface */,
                              0 /* sampler */,
                              GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                              1 /* rlen */,
                              1 /* mlen */,
                              false /* header */,
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              0);
      brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
      brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
      brw_set_src0(p, insn_or, addr);
      brw_set_dest(p, insn_or, addr);


      /* dst = send(offset, a0.0) */
      brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn_send, dst);
      brw_set_src0(p, insn_send, offset);
      brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);

      brw_pop_insn_state(p);

      /* visitor knows more than we do about the surface limit required,
       * so has already done marking.
       */
   }
}
Esempio n. 10
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src,
                             struct brw_reg sampler_index)
{
   int msg_type = -1;

   if (brw->gen >= 5) {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
	 } else {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
	 }
	 break;
      case SHADER_OPCODE_TXD:
         if (inst->shadow_compare) {
            /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
            assert(brw->gen >= 8 || brw->is_haswell);
            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
         } else {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
         }
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
	 break;
      case SHADER_OPCODE_TXF_CMS:
         if (brw->gen >= 7)
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
         else
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
         break;
      case SHADER_OPCODE_TXF_MCS:
         assert(brw->gen >= 7);
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
         break;
      case SHADER_OPCODE_TXS:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
	 break;
      case SHADER_OPCODE_TG4:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
         }
         break;
      case SHADER_OPCODE_TG4_OFFSET:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
         }
         break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   } else {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
	    assert(inst->mlen == 3);
	 } else {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
	    assert(inst->mlen == 2);
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
	 assert(inst->mlen == 4);
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
	 assert(inst->mlen == 2);
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
	 assert(inst->mlen == 2);
	 break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   }

   assert(msg_type != -1);

   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);

   /* Load the message header if present.  If there's a texture offset, we need
    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
    * use an implied move from g0 to the first message register.
    */
   if (inst->header_present) {
      if (brw->gen < 6 && !inst->texture_offset) {
         /* Set up an implied move from g0 to the MRF. */
         src = brw_vec8_grf(0, 0);
      } else {
         struct brw_reg header =
            retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

         /* Explicitly set up the message header by copying g0 to the MRF. */
         brw_push_insn_state(p);
         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
         brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

         brw_set_default_access_mode(p, BRW_ALIGN_1);

         if (inst->texture_offset) {
            /* Set the texel offset bits in DWord 2. */
            brw_MOV(p, get_element_ud(header, 2),
                    brw_imm_ud(inst->texture_offset));
         }

         brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
         brw_pop_insn_state(p);
      }
   }

   uint32_t return_format;

   switch (dst.type) {
   case BRW_REGISTER_TYPE_D:
      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
      break;
   case BRW_REGISTER_TYPE_UD:
      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
      break;
   default:
      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      break;
   }

   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
         inst->opcode == SHADER_OPCODE_TG4_OFFSET)
         ? prog_data->base.binding_table.gather_texture_start
         : prog_data->base.binding_table.texture_start;

   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
      uint32_t sampler = sampler_index.dw1.ud;

      brw_SAMPLE(p,
                 dst,
                 inst->base_mrf,
                 src,
                 sampler + base_binding_table_index,
                 sampler % 16,
                 msg_type,
                 1, /* response length */
                 inst->mlen,
                 inst->header_present,
                 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                 return_format);

      brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
   } else {
      /* Non-constant sampler index. */
      /* Note: this clobbers `dst` as a temporary before emitting the send */

      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
      struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));

      struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* Some care required: `sampler` and `temp` may alias:
       *    addr = sampler & 0xff
       *    temp = (sampler << 8) & 0xf00
       *    addr = addr | temp
       */
      brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
      brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
      brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
      brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
      brw_OR(p, addr, addr, temp);

      /* a0.0 |= <descriptor> */
      brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
      brw_set_sampler_message(p, insn_or,
                              0 /* surface */,
                              0 /* sampler */,
                              msg_type,
                              1 /* rlen */,
                              inst->mlen /* mlen */,
                              inst->header_present /* header */,
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              return_format);
      brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
      brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
      brw_set_src0(p, insn_or, addr);
      brw_set_dest(p, insn_or, addr);


      /* dst = send(offset, a0.0) */
      brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn_send, dst);
      brw_set_src0(p, insn_send, src);
      brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);

      brw_pop_insn_state(p);

      /* visitor knows more than we do about the surface limit required,
       * so has already done marking.
       */
   }
}