Пример #1
0
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    if (c->need_ff_sync) {
        struct brw_compile *p = &c->func;
        struct brw_instruction *need_ff_sync;

        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p, 
                    c->reg.R0,
                    0,
                    c->reg.R0,
                    1,	
                    1,		/* used */
                    1,  	/* msg length */
                    1,		/* response length */
                    0,		/* eot */
                    1,		/* write compelete */
                    0,		/* urb offset */
                    BRW_URB_SWIZZLE_NONE);
        }
        brw_ENDIF(p, need_ff_sync);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
void brw_clip_init_clipmask( struct brw_clip_compile *c )
{
   struct brw_codegen *p = &c->func;
   struct brw_reg incoming = get_element_ud(c->reg.R0, 2);

   /* Shift so that lowest outcode bit is rightmost:
    */
   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));

   if (c->key.nr_userclip) {
      struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);

      /* Rearrange userclip outcodes so that they come directly after
       * the fixed plane bits.
       */
      if (p->devinfo->gen == 5 || p->devinfo->is_g4x)
         brw_AND(p, tmp, incoming, brw_imm_ud(0xff<<14));
      else
         brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));

      brw_SHR(p, tmp, tmp, brw_imm_ud(8));
      brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);

      release_tmp(c, tmp);
   }
}
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct brw_codegen *p = &c->func;

    if (p->devinfo->gen == 5) {
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
Пример #4
0
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct brw_compile *p = &c->func;
    struct brw_context *brw = p->brw;

    if (brw->gen == 5) {
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_last_inst->header.destreg__conditionalmod = BRW_CONDITIONAL_Z;
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
Пример #5
0
void brw_clip_ff_sync(struct brw_clip_compile *c)
{
    struct intel_context *intel = &c->func.brw->intel;

    if (intel->needs_ff_sync) {
        struct brw_compile *p = &c->func;

        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
			c->reg.R0,
			0,
			c->reg.R0,
			1, /* allocate */
			1, /* response length */
			0 /* eot */);
        }
        brw_ENDIF(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
}
Пример #6
0
static void brw_clip_test( struct brw_clip_compile *c )
{
    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);

    struct brw_reg v0 = get_tmp(c);
    struct brw_reg v1 = get_tmp(c);
    struct brw_reg v2 = get_tmp(c);

    struct brw_indirect vt0 = brw_indirect(0, 0);
    struct brw_indirect vt1 = brw_indirect(1, 0);
    struct brw_indirect vt2 = brw_indirect(2, 0);

    struct brw_compile *p = &c->func;
    struct brw_instruction *is_outside;
    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
    brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos));
    brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos));
    brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos));
    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));

    /* test nearz, xmin, ymin plane */
    /* clip.xyz < -clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    is_outside = brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p, is_outside);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* test farz, xmax, ymax plane */
    /* clip.xyz > clip.w */
    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); 
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* All vertices are outside of a plane, rejected */
    brw_AND(p, t, t1, t2);
    brw_AND(p, t, t, t3);
    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
    is_outside = brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
    brw_ENDIF(p, is_outside);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
    brw_XOR(p, t, t1, t2);
    brw_XOR(p, t1, t2, t3);
    brw_OR(p, t, t, t1);
    brw_AND(p, t, t, brw_imm_ud(0x1));
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 0), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 1), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
            get_element(t, 2), brw_imm_ud(0));
    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    release_tmps(c);
}
Пример #7
0
/**
 * Generate assembly for a Vec4 IR instruction.
 *
 * \param instruction The Vec4 IR instruction to generate code for.
 * \param dst         The destination register.
 * \param src         An array of up to three source registers.
 */
void
vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
        struct brw_reg dst,
        struct brw_reg *src)
{
    vec4_instruction *inst = (vec4_instruction *) instruction;

    if (dst.width == BRW_WIDTH_4) {
        /* This happens in attribute fixups for "dual instanced" geometry
         * shaders, since they use attributes that are vec4's.  Since the exec
         * width is only 4, it's essential that the caller set
         * force_writemask_all in order to make sure the instruction is executed
         * regardless of which channels are enabled.
         */
        assert(inst->force_writemask_all);

        /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy
         * the following register region restrictions (from Graphics BSpec:
         * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions
         * > Register Region Restrictions)
         *
         *     1. ExecSize must be greater than or equal to Width.
         *
         *     2. If ExecSize = Width and HorzStride != 0, VertStride must be set
         *        to Width * HorzStride."
         */
        for (int i = 0; i < 3; i++) {
            if (src[i].file == BRW_GENERAL_REGISTER_FILE)
                src[i] = stride(src[i], 4, 4, 1);
        }
    }

    switch (inst->opcode) {
    case BRW_OPCODE_MOV:
        brw_MOV(p, dst, src[0]);
        break;
    case BRW_OPCODE_ADD:
        brw_ADD(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_MUL:
        brw_MUL(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_MACH:
        brw_set_acc_write_control(p, 1);
        brw_MACH(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;

    case BRW_OPCODE_MAD:
        assert(brw->gen >= 6);
        brw_MAD(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_FRC:
        brw_FRC(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDD:
        brw_RNDD(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDE:
        brw_RNDE(p, dst, src[0]);
        break;
    case BRW_OPCODE_RNDZ:
        brw_RNDZ(p, dst, src[0]);
        break;

    case BRW_OPCODE_AND:
        brw_AND(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_OR:
        brw_OR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_XOR:
        brw_XOR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_NOT:
        brw_NOT(p, dst, src[0]);
        break;
    case BRW_OPCODE_ASR:
        brw_ASR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_SHR:
        brw_SHR(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_SHL:
        brw_SHL(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_CMP:
        brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
        break;
    case BRW_OPCODE_SEL:
        brw_SEL(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DPH:
        brw_DPH(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP4:
        brw_DP4(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP3:
        brw_DP3(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_DP2:
        brw_DP2(p, dst, src[0], src[1]);
        break;

    case BRW_OPCODE_F32TO16:
        assert(brw->gen >= 7);
        brw_F32TO16(p, dst, src[0]);
        break;

    case BRW_OPCODE_F16TO32:
        assert(brw->gen >= 7);
        brw_F16TO32(p, dst, src[0]);
        break;

    case BRW_OPCODE_LRP:
        assert(brw->gen >= 6);
        brw_LRP(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_BFREV:
        assert(brw->gen >= 7);
        /* BFREV only supports UD type for src and dst. */
        brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
                  retype(src[0], BRW_REGISTER_TYPE_UD));
        break;
    case BRW_OPCODE_FBH:
        assert(brw->gen >= 7);
        /* FBH only supports UD type for dst. */
        brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_FBL:
        assert(brw->gen >= 7);
        /* FBL only supports UD type for dst. */
        brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_CBIT:
        assert(brw->gen >= 7);
        /* CBIT only supports UD type for dst. */
        brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
        break;
    case BRW_OPCODE_ADDC:
        assert(brw->gen >= 7);
        brw_set_acc_write_control(p, 1);
        brw_ADDC(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;
    case BRW_OPCODE_SUBB:
        assert(brw->gen >= 7);
        brw_set_acc_write_control(p, 1);
        brw_SUBB(p, dst, src[0], src[1]);
        brw_set_acc_write_control(p, 0);
        break;

    case BRW_OPCODE_BFE:
        assert(brw->gen >= 7);
        brw_BFE(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_BFI1:
        assert(brw->gen >= 7);
        brw_BFI1(p, dst, src[0], src[1]);
        break;
    case BRW_OPCODE_BFI2:
        assert(brw->gen >= 7);
        brw_BFI2(p, dst, src[0], src[1], src[2]);
        break;

    case BRW_OPCODE_IF:
        if (inst->src[0].file != BAD_FILE) {
            /* The instruction has an embedded compare (only allowed on gen6) */
            assert(brw->gen == 6);
            gen6_IF(p, inst->conditional_mod, src[0], src[1]);
        } else {
            struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
            brw_inst->header.predicate_control = inst->predicate;
        }
        break;

    case BRW_OPCODE_ELSE:
        brw_ELSE(p);
        break;
    case BRW_OPCODE_ENDIF:
        brw_ENDIF(p);
        break;

    case BRW_OPCODE_DO:
        brw_DO(p, BRW_EXECUTE_8);
        break;

    case BRW_OPCODE_BREAK:
        brw_BREAK(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
        break;
    case BRW_OPCODE_CONTINUE:
        /* FINISHME: We need to write the loop instruction support still. */
        if (brw->gen >= 6)
            gen6_CONT(p);
        else
            brw_CONT(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
        break;

    case BRW_OPCODE_WHILE:
        brw_WHILE(p);
        break;

    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
    case SHADER_OPCODE_SQRT:
    case SHADER_OPCODE_EXP2:
    case SHADER_OPCODE_LOG2:
    case SHADER_OPCODE_SIN:
    case SHADER_OPCODE_COS:
        if (brw->gen == 6) {
            generate_math1_gen6(inst, dst, src[0]);
        } else {
            /* Also works for Gen7. */
            generate_math1_gen4(inst, dst, src[0]);
        }
        break;

    case SHADER_OPCODE_POW:
    case SHADER_OPCODE_INT_QUOTIENT:
    case SHADER_OPCODE_INT_REMAINDER:
        if (brw->gen >= 7) {
            generate_math2_gen7(inst, dst, src[0], src[1]);
        } else if (brw->gen == 6) {
            generate_math2_gen6(inst, dst, src[0], src[1]);
        } else {
            generate_math2_gen4(inst, dst, src[0], src[1]);
        }
        break;

    case SHADER_OPCODE_TEX:
    case SHADER_OPCODE_TXD:
    case SHADER_OPCODE_TXF:
    case SHADER_OPCODE_TXF_CMS:
    case SHADER_OPCODE_TXF_MCS:
    case SHADER_OPCODE_TXL:
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
        generate_tex(inst, dst, src[0]);
        break;

    case VS_OPCODE_URB_WRITE:
        generate_vs_urb_write(inst);
        break;

    case SHADER_OPCODE_GEN4_SCRATCH_READ:
        generate_scratch_read(inst, dst, src[0]);
        break;

    case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
        generate_scratch_write(inst, dst, src[0], src[1]);
        break;

    case VS_OPCODE_PULL_CONSTANT_LOAD:
        generate_pull_constant_load(inst, dst, src[0], src[1]);
        break;

    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
        generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
        break;

    case GS_OPCODE_URB_WRITE:
        generate_gs_urb_write(inst);
        break;

    case GS_OPCODE_THREAD_END:
        generate_gs_thread_end(inst);
        break;

    case GS_OPCODE_SET_WRITE_OFFSET:
        generate_gs_set_write_offset(dst, src[0], src[1]);
        break;

    case GS_OPCODE_SET_VERTEX_COUNT:
        generate_gs_set_vertex_count(dst, src[0]);
        break;

    case GS_OPCODE_SET_DWORD_2_IMMED:
        generate_gs_set_dword_2_immed(dst, src[0]);
        break;

    case GS_OPCODE_PREPARE_CHANNEL_MASKS:
        generate_gs_prepare_channel_masks(dst);
        break;

    case GS_OPCODE_SET_CHANNEL_MASKS:
        generate_gs_set_channel_masks(dst, src[0]);
        break;

    case GS_OPCODE_GET_INSTANCE_ID:
        generate_gs_get_instance_id(dst);
        break;

    case SHADER_OPCODE_SHADER_TIME_ADD:
        brw_shader_time_add(p, src[0],
                            prog_data->base.binding_table.shader_time_start);
        brw_mark_surface_used(&prog_data->base,
                              prog_data->base.binding_table.shader_time_start);
        break;

    case SHADER_OPCODE_UNTYPED_ATOMIC:
        generate_untyped_atomic(inst, dst, src[0], src[1]);
        break;

    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
        generate_untyped_surface_read(inst, dst, src[0]);
        break;

    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
        generate_unpack_flags(inst, dst);
        break;

    default:
        if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
            _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
                          opcode_descs[inst->opcode].name);
        } else {
            _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode);
        }
        abort();
    }
}
Пример #8
0
void
vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
        struct brw_reg src)
{
    /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
     * Header: M0.5):
     *
     *     15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
     *
     *        When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
     *        DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
     *        Vertex 0 DATA[7].  This bit is ANDed with the corresponding
     *        channel enable to determine the final channel enable.  For the
     *        URB_READ_OWORD & URB_READ_HWORD messages, when final channel
     *        enable is 1 it indicates that Vertex 1 DATA [3] will be included
     *        in the writeback message.  For the URB_WRITE_OWORD &
     *        URB_WRITE_HWORD messages, when final channel enable is 1 it
     *        indicates that Vertex 1 DATA [3] will be written to the surface.
     *
     *        0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
     *        1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
     *
     *     14 Vertex 1 DATA [2] Channel Mask
     *     13 Vertex 1 DATA [1] Channel Mask
     *     12 Vertex 1 DATA [0] Channel Mask
     *     11 Vertex 0 DATA [3] Channel Mask
     *     10 Vertex 0 DATA [2] Channel Mask
     *      9 Vertex 0 DATA [1] Channel Mask
     *      8 Vertex 0 DATA [0] Channel Mask
     *
     * (This is from a section of the PRM that is agnostic to the particular
     * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to
     * geometry shader invocations 0 and 1, respectively).  Since we have the
     * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0,
     * and the enable flags for geometry shader invocation 1 in bits 7:0 of
     * DWORD 4, we just need to OR them together and store the result in bits
     * 15:8 of DWORD 5.
     *
     * It's easier to get the EU to do this if we think of the src and dst
     * registers as composed of 32 bytes each; then, we want to pick up the
     * contents of bytes 0 and 16 from src, OR them together, and store them in
     * byte 21.
     *
     * We can do that by the following EU instruction:
     *
     *     or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all }
     *
     * Note: this relies on the source register having zeros in (a) bits 7:4 of
     * DWORD 0 and (b) bits 3:0 of DWORD 4.  We can rely on (b) because the
     * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which
     * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to
     * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to
     * contain valid channel mask values (which are in the range 0x0-0xf).
     */
    dst = retype(dst, BRW_REGISTER_TYPE_UB);
    src = retype(src, BRW_REGISTER_TYPE_UB);
    brw_push_insn_state(p);
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
    brw_pop_insn_state(p);
}
Пример #9
0
/* Line clipping, more or less following the following algorithm:
 *
 *  for (p=0;p<MAX_PLANES;p++) {
 *     if (clipmask & (1 << p)) {
 *        GLfloat dp0 = DOTPROD( vtx0, plane[p] );
 *        GLfloat dp1 = DOTPROD( vtx1, plane[p] );
 *
 *        if (dp1 < 0.0f) {
 *           GLfloat t = dp1 / (dp1 - dp0);
 *           if (t > t1) t1 = t;
 *        } else {
 *           GLfloat t = dp0 / (dp0 - dp1);
 *           if (t > t0) t0 = t;
 *        }
 *
 *        if (t0 + t1 >= 1.0)
 *           return;
 *     }
 *  }
 *
 *  interp( ctx, newvtx0, vtx0, vtx1, t0 );
 *  interp( ctx, newvtx1, vtx1, vtx0, t1 );
 *
 */
static void clip_and_emit_line( struct brw_clip_compile *c )
{
   struct brw_codegen *p = &c->func;
   struct brw_indirect vtx0     = brw_indirect(0, 0);
   struct brw_indirect vtx1      = brw_indirect(1, 0);
   struct brw_indirect newvtx0   = brw_indirect(2, 0);
   struct brw_indirect newvtx1   = brw_indirect(3, 0);
   struct brw_indirect plane_ptr = brw_indirect(4, 0);
   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
   GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS);
   GLint clipdist0_offset = c->key.nr_userclip
      ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0)
      : 0;

   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2]));
   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3]));
   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));

   /* Note: init t0, t1 together:
    */
   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));

   brw_clip_init_planes(c);
   brw_clip_init_clipmask(c);

   /* -ve rhw workaround */
   if (p->devinfo->has_negative_rhw_bug) {
      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
              brw_imm_ud(1<<20));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
   }

   /* Set the initial vertex source mask: The first 6 planes are the bounds
    * of the view volume; the next 8 planes are the user clipping planes.
    */
   brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0));

   /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0].
    * We'll increment 6 times before we start hitting actual user clipping. */
   brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float)));

   brw_DO(p, BRW_EXECUTE_1);
   {
      /* if (planemask & 1)
       */
      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);

      brw_IF(p, BRW_EXECUTE_1);
      {
         brw_AND(p, v1_null_ud, c->reg.vertex_src_mask, brw_imm_ud(1));
         brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
         brw_IF(p, BRW_EXECUTE_1);
         {
            /* user clip distance: just fetch the correct float from each vertex */
            struct brw_indirect temp_ptr = brw_indirect(7, 0);
            brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx0), c->reg.clipdistance_offset);
            brw_MOV(p, c->reg.dp0, deref_1f(temp_ptr, 0));
            brw_ADD(p, get_addr_reg(temp_ptr), get_addr_reg(vtx1), c->reg.clipdistance_offset);
            brw_MOV(p, c->reg.dp1, deref_1f(temp_ptr, 0));
         }
         brw_ELSE(p);
         {
            /* fixed plane: fetch the hpos, dp4 against the plane. */
            if (c->key.nr_userclip)
               brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
            else
               brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));

            brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, hpos_offset), c->reg.plane_equation);
            brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, hpos_offset), c->reg.plane_equation);
         }
         brw_ENDIF(p);

         brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, vec1(c->reg.dp1), brw_imm_f(0.0f));

         brw_IF(p, BRW_EXECUTE_1);
         {
             /*
              * Both can be negative on GM965/G965 due to RHW workaround
              * if so, this object should be rejected.
              */
             if (p->devinfo->has_negative_rhw_bug) {
                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
                 brw_IF(p, BRW_EXECUTE_1);
                 {
                     brw_clip_kill_thread(c);
                 }
                 brw_ENDIF(p);
             }

             brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
             brw_math_invert(p, c->reg.t, c->reg.t);
             brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);

             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
             brw_MOV(p, c->reg.t1, c->reg.t);
             brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                       BRW_PREDICATE_NORMAL);
	 }
	 brw_ELSE(p);
	 {
             /* Coming back in.  We know that both cannot be negative
              * because the line would have been culled in that case.
              */

             /* If both are positive, do nothing */
             /* Only on GM965/G965 */
             if (p->devinfo->has_negative_rhw_bug) {
                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
                 brw_IF(p, BRW_EXECUTE_1);
             }

             {
                 brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
                 brw_math_invert(p, c->reg.t, c->reg.t);
                 brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);

                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
                 brw_MOV(p, c->reg.t0, c->reg.t);
                 brw_inst_set_pred_control(p->devinfo, brw_last_inst,
                                           BRW_PREDICATE_NORMAL);
             }

             if (p->devinfo->has_negative_rhw_bug) {
                 brw_ENDIF(p);
             }
         }
	 brw_ENDIF(p);
      }
      brw_ENDIF(p);

      /* plane_ptr++;
       */
      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));

      /* while (planemask>>=1) != 0
       */
      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
      brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
      brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1));
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
      brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float)));
      brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);
   }
   brw_WHILE(p);
   brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL);

   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, false);
      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, false);

      brw_clip_emit_vue(c, newvtx0, BRW_URB_WRITE_ALLOCATE_COMPLETE,
                        (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
                        | URB_WRITE_PRIM_START);
      brw_clip_emit_vue(c, newvtx1, BRW_URB_WRITE_EOT_COMPLETE,
                        (_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT)
                        | URB_WRITE_PRIM_END);
   }
   brw_ENDIF(p);
   brw_clip_kill_thread(c);
}
Пример #10
0
void
vec4_generator::generate_tex(vec4_instruction *inst,
                             struct brw_reg dst,
                             struct brw_reg src,
                             struct brw_reg sampler_index)
{
   int msg_type = -1;

   if (brw->gen >= 5) {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
	 } else {
	    msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
	 }
	 break;
      case SHADER_OPCODE_TXD:
         if (inst->shadow_compare) {
            /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
            assert(brw->gen >= 8 || brw->is_haswell);
            msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
         } else {
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
         }
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
	 break;
      case SHADER_OPCODE_TXF_CMS:
         if (brw->gen >= 7)
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
         else
            msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
         break;
      case SHADER_OPCODE_TXF_MCS:
         assert(brw->gen >= 7);
         msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
         break;
      case SHADER_OPCODE_TXS:
	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
	 break;
      case SHADER_OPCODE_TG4:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4;
         }
         break;
      case SHADER_OPCODE_TG4_OFFSET:
         if (inst->shadow_compare) {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C;
         } else {
            msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO;
         }
         break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   } else {
      switch (inst->opcode) {
      case SHADER_OPCODE_TEX:
      case SHADER_OPCODE_TXL:
	 if (inst->shadow_compare) {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
	    assert(inst->mlen == 3);
	 } else {
	    msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
	    assert(inst->mlen == 2);
	 }
	 break;
      case SHADER_OPCODE_TXD:
	 /* There is no sample_d_c message; comparisons are done manually. */
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
	 assert(inst->mlen == 4);
	 break;
      case SHADER_OPCODE_TXF:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
	 assert(inst->mlen == 2);
	 break;
      case SHADER_OPCODE_TXS:
	 msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
	 assert(inst->mlen == 2);
	 break;
      default:
	 unreachable("should not get here: invalid vec4 texture opcode");
      }
   }

   assert(msg_type != -1);

   assert(sampler_index.type == BRW_REGISTER_TYPE_UD);

   /* Load the message header if present.  If there's a texture offset, we need
    * to set it up explicitly and load the offset bitfield.  Otherwise, we can
    * use an implied move from g0 to the first message register.
    */
   if (inst->header_present) {
      if (brw->gen < 6 && !inst->texture_offset) {
         /* Set up an implied move from g0 to the MRF. */
         src = brw_vec8_grf(0, 0);
      } else {
         struct brw_reg header =
            retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);

         /* Explicitly set up the message header by copying g0 to the MRF. */
         brw_push_insn_state(p);
         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
         brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

         brw_set_default_access_mode(p, BRW_ALIGN_1);

         if (inst->texture_offset) {
            /* Set the texel offset bits in DWord 2. */
            brw_MOV(p, get_element_ud(header, 2),
                    brw_imm_ud(inst->texture_offset));
         }

         brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
         brw_pop_insn_state(p);
      }
   }

   uint32_t return_format;

   switch (dst.type) {
   case BRW_REGISTER_TYPE_D:
      return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
      break;
   case BRW_REGISTER_TYPE_UD:
      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
      break;
   default:
      return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
      break;
   }

   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
         inst->opcode == SHADER_OPCODE_TG4_OFFSET)
         ? prog_data->base.binding_table.gather_texture_start
         : prog_data->base.binding_table.texture_start;

   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
      uint32_t sampler = sampler_index.dw1.ud;

      brw_SAMPLE(p,
                 dst,
                 inst->base_mrf,
                 src,
                 sampler + base_binding_table_index,
                 sampler % 16,
                 msg_type,
                 1, /* response length */
                 inst->mlen,
                 inst->header_present,
                 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                 return_format);

      brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
   } else {
      /* Non-constant sampler index. */
      /* Note: this clobbers `dst` as a temporary before emitting the send */

      struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
      struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));

      struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_access_mode(p, BRW_ALIGN_1);

      /* Some care required: `sampler` and `temp` may alias:
       *    addr = sampler & 0xff
       *    temp = (sampler << 8) & 0xf00
       *    addr = addr | temp
       */
      brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
      brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
      brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
      brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
      brw_OR(p, addr, addr, temp);

      /* a0.0 |= <descriptor> */
      brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
      brw_set_sampler_message(p, insn_or,
                              0 /* surface */,
                              0 /* sampler */,
                              msg_type,
                              1 /* rlen */,
                              inst->mlen /* mlen */,
                              inst->header_present /* header */,
                              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                              return_format);
      brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
      brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
      brw_set_src0(p, insn_or, addr);
      brw_set_dest(p, insn_or, addr);


      /* dst = send(offset, a0.0) */
      brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
      brw_set_dest(p, insn_send, dst);
      brw_set_src0(p, insn_send, src);
      brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);

      brw_pop_insn_state(p);

      /* visitor knows more than we do about the surface limit required,
       * so has already done marking.
       */
   }
}
Пример #11
0
void
vec4_generator::generate_code(exec_list *instructions)
{
   int last_native_insn_offset = 0;
   const char *last_annotation_string = NULL;
   const void *last_annotation_ir = NULL;

   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
      if (shader) {
         printf("Native code for vertex shader %d:\n", prog->Name);
      } else {
         printf("Native code for vertex program %d:\n", c->vp->program.Base.Id);
      }
   }

   foreach_list(node, instructions) {
      vec4_instruction *inst = (vec4_instruction *)node;
      struct brw_reg src[3], dst;

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 if (last_annotation_ir != inst->ir) {
	    last_annotation_ir = inst->ir;
	    if (last_annotation_ir) {
	       printf("   ");
               if (shader) {
                  ((ir_instruction *) last_annotation_ir)->print();
               } else {
                  const prog_instruction *vpi;
                  vpi = (const prog_instruction *) inst->ir;
                  printf("%d: ", (int)(vpi - vp->Base.Instructions));
                  _mesa_fprint_instruction_opt(stdout, vpi, 0,
                                               PROG_PRINT_DEBUG, NULL);
               }
	       printf("\n");
	    }
	 }
	 if (last_annotation_string != inst->annotation) {
	    last_annotation_string = inst->annotation;
	    if (last_annotation_string)
	       printf("   %s\n", last_annotation_string);
	 }
      }

      for (unsigned int i = 0; i < 3; i++) {
	 src[i] = inst->get_src(i);
      }
      dst = inst->get_dst();

      brw_set_conditionalmod(p, inst->conditional_mod);
      brw_set_predicate_control(p, inst->predicate);
      brw_set_predicate_inverse(p, inst->predicate_inverse);
      brw_set_saturate(p, inst->saturate);

      switch (inst->opcode) {
      case BRW_OPCODE_MOV:
	 brw_MOV(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ADD:
	 brw_ADD(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MUL:
	 brw_MUL(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_MACH:
	 brw_set_acc_write_control(p, 1);
	 brw_MACH(p, dst, src[0], src[1]);
	 brw_set_acc_write_control(p, 0);
	 break;

      case BRW_OPCODE_FRC:
	 brw_FRC(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDD:
	 brw_RNDD(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDE:
	 brw_RNDE(p, dst, src[0]);
	 break;
      case BRW_OPCODE_RNDZ:
	 brw_RNDZ(p, dst, src[0]);
	 break;

      case BRW_OPCODE_AND:
	 brw_AND(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_OR:
	 brw_OR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_XOR:
	 brw_XOR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_NOT:
	 brw_NOT(p, dst, src[0]);
	 break;
      case BRW_OPCODE_ASR:
	 brw_ASR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHR:
	 brw_SHR(p, dst, src[0], src[1]);
	 break;
      case BRW_OPCODE_SHL:
	 brw_SHL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_CMP:
	 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
	 break;
      case BRW_OPCODE_SEL:
	 brw_SEL(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DPH:
	 brw_DPH(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP4:
	 brw_DP4(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP3:
	 brw_DP3(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_DP2:
	 brw_DP2(p, dst, src[0], src[1]);
	 break;

      case BRW_OPCODE_IF:
	 if (inst->src[0].file != BAD_FILE) {
	    /* The instruction has an embedded compare (only allowed on gen6) */
	    assert(intel->gen == 6);
	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
	 } else {
	    struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
	    brw_inst->header.predicate_control = inst->predicate;
	 }
	 break;

      case BRW_OPCODE_ELSE:
	 brw_ELSE(p);
	 break;
      case BRW_OPCODE_ENDIF:
	 brw_ENDIF(p);
	 break;

      case BRW_OPCODE_DO:
	 brw_DO(p, BRW_EXECUTE_8);
	 break;

      case BRW_OPCODE_BREAK:
	 brw_BREAK(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;
      case BRW_OPCODE_CONTINUE:
	 /* FINISHME: We need to write the loop instruction support still. */
	 if (intel->gen >= 6)
	    gen6_CONT(p);
	 else
	    brw_CONT(p);
	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
	 break;

      case BRW_OPCODE_WHILE:
	 brw_WHILE(p);
	 break;

      default:
	 generate_vs_instruction(inst, dst, src);
	 break;
      }

      if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
	 brw_dump_compile(p, stdout,
			  last_native_insn_offset, p->next_insn_offset);
      }

      last_native_insn_offset = p->next_insn_offset;
   }