예제 #1
0
uint
i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1)
{
   struct i915_fragment_shader *ifs = p->shader;
   unsigned reg, idx;

   if (c0 == 0.0)
      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
   if (c0 == 1.0)
      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);

   if (c1 == 0.0)
      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
   if (c1 == 1.0)
      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);

   // XXX emit swizzle here for 0, 1, -1 and any combination thereof
   // we can use swizzle + neg for that
   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
      if (ifs->constant_flags[reg] == 0xf ||
          ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
         continue;
      for (idx = 0; idx < 3; idx++) {
         if (!(ifs->constant_flags[reg] & (3 << idx))) {
            ifs->constants[reg][idx + 0] = c0;
            ifs->constants[reg][idx + 1] = c1;
            ifs->constant_flags[reg] |= 3 << idx;
            if (reg + 1 > ifs->num_constants)
               ifs->num_constants = reg + 1;
            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
         }
      }
   }

   i915_program_error(p, "i915_emit_const2f: out of constants");
   return 0;
}
예제 #2
0
GLuint
i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1)
{
   GLint reg, idx;

   if (c0 == 0.0)
      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
   if (c0 == 1.0)
      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);

   if (c1 == 0.0)
      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
   if (c1 == 1.0)
      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);

   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
      if (p->constant_flags[reg] == 0xf ||
          p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
         continue;
      for (idx = 0; idx < 3; idx++) {
         if (!(p->constant_flags[reg] & (3 << idx))) {
            p->constant[reg][idx] = c0;
            p->constant[reg][idx + 1] = c1;
            p->constant_flags[reg] |= 3 << idx;
            if (reg + 1 > p->nr_constants)
               p->nr_constants = reg + 1;
            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
                           ONE);
         }
      }
   }

   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
   p->error = 1;
   return 0;
}
/* Possible concerns:
 *
 * SIN, COS -- could use another taylor step?
 * LIT      -- results seem a little different to sw mesa
 * LOG      -- different to mesa on negative numbers, but this is conformant.
 * 
 * Parse failures -- Mesa doesn't currently give a good indication
 * internally whether a particular program string parsed or not.  This
 * can lead to confusion -- hopefully we cope with it ok now.
 *
 */
static void
upload_program(struct i915_fragment_program *p)
{
   const struct gl_fragment_program *program =
      p->ctx->FragmentProgram._Current;
   const struct prog_instruction *inst = program->Base.Instructions;

/*    _mesa_debug_fp_inst(program->Base.NumInstructions, inst); */

   /* Is this a parse-failed program?  Ensure a valid program is
    * loaded, as the flagging of an error isn't sufficient to stop
    * this being uploaded to hardware.
    */
   if (inst[0].Opcode == OPCODE_END) {
      GLuint tmp = i915_get_utemp(p);
      i915_emit_arith(p,
                      A0_MOV,
                      UREG(REG_TYPE_OC, 0),
                      A0_DEST_CHANNEL_ALL, 0,
                      swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
      return;
   }

   while (1) {
      GLuint src0, src1, src2, flags;
      GLuint tmp = 0;

      switch (inst->Opcode) {
      case OPCODE_ABS:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         i915_emit_arith(p,
                         A0_MAX,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         src0, negate(src0, 1, 1, 1, 1), 0);
         break;

      case OPCODE_ADD:
         EMIT_2ARG_ARITH(A0_ADD);
         break;

      case OPCODE_CMP:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         src2 = src_vector(p, &inst->SrcReg[2], program);
         i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
         break;

      case OPCODE_COS:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         src0, i915_emit_const1f(p, 1.0 / (M_PI)), 0);

         i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);

         /* By choosing different taylor constants, could get rid of this mul:
          */
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         tmp, i915_emit_const1f(p, (M_PI)), 0);

         /* 
          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
          * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
          * result = DP4 t0, cos_constants
          */
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XY, 0,
                         swizzle(tmp, X, X, ONE, ONE),
                         swizzle(tmp, X, ONE, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XYZ, 0,
                         swizzle(tmp, X, Y, X, ONE),
                         swizzle(tmp, X, X, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XYZ, 0,
                         swizzle(tmp, X, X, Z, ONE),
                         swizzle(tmp, Z, ONE, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_DP4,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(tmp, ONE, Z, Y, X),
                         i915_emit_const4fv(p, cos_constants), 0);

         break;

      case OPCODE_DP3:
         EMIT_2ARG_ARITH(A0_DP3);
         break;

      case OPCODE_DP4:
         EMIT_2ARG_ARITH(A0_DP4);
         break;

      case OPCODE_DPH:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);

         i915_emit_arith(p,
                         A0_DP4,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, Y, Z, ONE), src1, 0);
         break;

      case OPCODE_DST:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);

         /* result[0] = 1    * 1;
          * result[1] = a[1] * b[1];
          * result[2] = a[2] * 1;
          * result[3] = 1    * b[3];
          */
         i915_emit_arith(p,
                         A0_MUL,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, ONE, Y, Z, ONE),
                         swizzle(src1, ONE, Y, ONE, W), 0);
         break;

      case OPCODE_EX2:
         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_EXP,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_FLR:
         EMIT_1ARG_ARITH(A0_FLR);
         break;

      case OPCODE_FRC:
         EMIT_1ARG_ARITH(A0_FRC);
         break;

      case OPCODE_KIL:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
                         0, src0, T0_TEXKILL);
         break;

      case OPCODE_LG2:
         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_LOG,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_LIT:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         /* tmp = max( a.xyzw, a.00zw )
          * XXX: Clamp tmp.w to -128..128
          * tmp.y = log(tmp.y)
          * tmp.y = tmp.w * tmp.y
          * tmp.y = exp(tmp.y)
          * result = cmp (a.11-x1, a.1x01, a.1xy1 )
          */
         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
                         src0, swizzle(src0, ZERO, ZERO, Z, W), 0);

         i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp, Y, Y, Y, Y), 0, 0);

         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
                         swizzle(tmp, ZERO, W, ZERO, ZERO), 0);

         i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp, Y, Y, Y, Y), 0, 0);

         i915_emit_arith(p, A0_CMP,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
                         swizzle(tmp, ONE, X, ZERO, ONE),
                         swizzle(tmp, ONE, X, Y, ONE));

         break;

      case OPCODE_LRP:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         src2 = src_vector(p, &inst->SrcReg[2], program);
         flags = get_result_flags(inst);
         tmp = i915_get_utemp(p);

         /* b*a + c*(1-a)
          *
          * b*a + c - ca 
          *
          * tmp = b*a + c, 
          * result = (-c)*a + tmp 
          */
         i915_emit_arith(p, A0_MAD, tmp,
                         flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);

         i915_emit_arith(p, A0_MAD,
                         get_result_vector(p, inst),
                         flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
         break;

      case OPCODE_MAD:
         EMIT_3ARG_ARITH(A0_MAD);
         break;

      case OPCODE_MAX:
         EMIT_2ARG_ARITH(A0_MAX);
         break;

      case OPCODE_MIN:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         tmp = i915_get_utemp(p);
         flags = get_result_flags(inst);

         i915_emit_arith(p,
                         A0_MAX,
                         tmp, flags & A0_DEST_CHANNEL_ALL, 0,
                         negate(src0, 1, 1, 1, 1),
                         negate(src1, 1, 1, 1, 1), 0);

         i915_emit_arith(p,
                         A0_MOV,
                         get_result_vector(p, inst),
                         flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
         break;

      case OPCODE_MOV:
         EMIT_1ARG_ARITH(A0_MOV);
         break;

      case OPCODE_MUL:
         EMIT_2ARG_ARITH(A0_MUL);
         break;

      case OPCODE_POW:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         tmp = i915_get_utemp(p);
         flags = get_result_flags(inst);

         /* XXX: masking on intermediate values, here and elsewhere.
          */
         i915_emit_arith(p,
                         A0_LOG,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         swizzle(src0, X, X, X, X), 0, 0);

         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);


         i915_emit_arith(p,
                         A0_EXP,
                         get_result_vector(p, inst),
                         flags, 0, swizzle(tmp, X, X, X, X), 0, 0);

         break;

      case OPCODE_RCP:
         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_RCP,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_RSQ:

         src0 = src_vector(p, &inst->SrcReg[0], program);

         i915_emit_arith(p,
                         A0_RSQ,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, X, X, X, X), 0, 0);
         break;

      case OPCODE_SCS:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         /* 
          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
          * scs.x = DP4 t1, sin_constants
          * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
          * scs.y = DP4 t1, cos_constants
          */
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XY, 0,
                         swizzle(src0, X, X, ONE, ONE),
                         swizzle(src0, X, ONE, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(tmp, X, Y, X, Y),
                         swizzle(tmp, X, X, ONE, ONE), 0);

         if (inst->DstReg.WriteMask & WRITEMASK_Y) {
            GLuint tmp1;

            if (inst->DstReg.WriteMask & WRITEMASK_X)
               tmp1 = i915_get_utemp(p);
            else
               tmp1 = tmp;

            i915_emit_arith(p,
                            A0_MUL,
                            tmp1, A0_DEST_CHANNEL_ALL, 0,
                            swizzle(tmp, X, Y, Y, W),
                            swizzle(tmp, X, Z, ONE, ONE), 0);

            i915_emit_arith(p,
                            A0_DP4,
                            get_result_vector(p, inst),
                            A0_DEST_CHANNEL_Y, 0,
                            swizzle(tmp1, W, Z, Y, X),
                            i915_emit_const4fv(p, sin_constants), 0);
         }

         if (inst->DstReg.WriteMask & WRITEMASK_X) {
            i915_emit_arith(p,
                            A0_MUL,
                            tmp, A0_DEST_CHANNEL_XYZ, 0,
                            swizzle(tmp, X, X, Z, ONE),
                            swizzle(tmp, Z, ONE, ONE, ONE), 0);

            i915_emit_arith(p,
                            A0_DP4,
                            get_result_vector(p, inst),
                            A0_DEST_CHANNEL_X, 0,
                            swizzle(tmp, ONE, Z, Y, X),
                            i915_emit_const4fv(p, cos_constants), 0);
         }
         break;

      case OPCODE_SGE:
         EMIT_2ARG_ARITH(A0_SGE);
         break;

      case OPCODE_SIN:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         tmp = i915_get_utemp(p);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         src0, i915_emit_const1f(p, 1.0 / (M_PI)), 0);

         i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);

         /* By choosing different taylor constants, could get rid of this mul:
          */
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_X, 0,
                         tmp, i915_emit_const1f(p, (M_PI)), 0);

         /* 
          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
          * result = DP4 t1.wzyx, sin_constants
          */
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XY, 0,
                         swizzle(tmp, X, X, ONE, ONE),
                         swizzle(tmp, X, ONE, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(tmp, X, Y, X, Y),
                         swizzle(tmp, X, X, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(tmp, X, Y, Y, W),
                         swizzle(tmp, X, Z, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_DP4,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(tmp, W, Z, Y, X),
                         i915_emit_const4fv(p, sin_constants), 0);
         break;

      case OPCODE_SLT:
         EMIT_2ARG_ARITH(A0_SLT);
         break;

      case OPCODE_SUB:
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);

         i915_emit_arith(p,
                         A0_ADD,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         src0, negate(src1, 1, 1, 1, 1), 0);
         break;

      case OPCODE_SWZ:
         EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
         break;

      case OPCODE_TEX:
         EMIT_TEX(T0_TEXLD);
         break;

      case OPCODE_TXB:
         EMIT_TEX(T0_TEXLDB);
         break;

      case OPCODE_TXP:
         EMIT_TEX(T0_TEXLDP);
         break;

      case OPCODE_XPD:
         /* Cross product:
          *      result.x = src0.y * src1.z - src0.z * src1.y;
          *      result.y = src0.z * src1.x - src0.x * src1.z;
          *      result.z = src0.x * src1.y - src0.y * src1.x;
          *      result.w = undef;
          */
         src0 = src_vector(p, &inst->SrcReg[0], program);
         src1 = src_vector(p, &inst->SrcReg[1], program);
         tmp = i915_get_utemp(p);

         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(src0, Z, X, Y, ONE),
                         swizzle(src1, Y, Z, X, ONE), 0);

         i915_emit_arith(p,
                         A0_MAD,
                         get_result_vector(p, inst),
                         get_result_flags(inst), 0,
                         swizzle(src0, Y, Z, X, ONE),
                         swizzle(src1, Z, X, Y, ONE),
                         negate(tmp, 1, 1, 1, 0));
         break;

      case OPCODE_END:
         return;

      default:
         i915_program_error(p, "bad opcode");
         return;
      }

      inst++;
      i915_release_utemps(p);
   }
}
/*
 * Translate TGSI instruction to i915 instruction.
 *
 * Possible concerns:
 *
 * DDX, DDY -- return 0
 * SIN, COS -- could use another taylor step?
 * LIT      -- results seem a little different to sw mesa
 * LOG      -- different to mesa on negative numbers, but this is conformant.
 */
static void
i915_translate_instruction(struct i915_fp_compile *p,
                           const struct i915_full_instruction *inst,
                           struct i915_fragment_shader *fs)
{
   uint writemask;
   uint src0, src1, src2, flags;
   uint tmp = 0;

   switch (inst->Instruction.Opcode) {
   case TGSI_OPCODE_ABS:
      src0 = src_vector(p, &inst->Src[0], fs);
      i915_emit_arith(p,
                      A0_MAX,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      src0, negate(src0, 1, 1, 1, 1), 0);
      break;

   case TGSI_OPCODE_ADD:
      emit_simple_arith(p, inst, A0_ADD, 2, fs);
      break;

   case TGSI_OPCODE_CEIL:
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);
      flags = get_result_flags(inst);
      i915_emit_arith(p,
                      A0_FLR,
                      tmp,
                      flags & A0_DEST_CHANNEL_ALL, 0,
                      negate(src0, 1, 1, 1, 1), 0, 0);
      i915_emit_arith(p,
                      A0_MOV,
                      get_result_vector(p, &inst->Dst[0]),
                      flags, 0,
                      negate(tmp, 1, 1, 1, 1), 0, 0);
      break;

   case TGSI_OPCODE_CMP:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      src2 = src_vector(p, &inst->Src[2], fs);
      i915_emit_arith(p, A0_CMP,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst),
                      0, src0, src2, src1);   /* NOTE: order of src2, src1 */
      break;

   case TGSI_OPCODE_COS:
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_X, 0,
                      src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);

      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);

      /* 
       * t0.xy = MUL x.xx11, x.x111  ; x^2, x, 1, 1
       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
       * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
       * result = DP4 t0, cos_constants
       */
      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_XY, 0,
                      swizzle(tmp, X, X, ONE, ONE),
                      swizzle(tmp, X, ONE, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_XYZ, 0,
                      swizzle(tmp, X, Y, X, ONE),
                      swizzle(tmp, X, X, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_XYZ, 0,
                      swizzle(tmp, X, X, Z, ONE),
                      swizzle(tmp, Z, ONE, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_DP4,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(tmp, ONE, Z, Y, X),
                      i915_emit_const4fv(p, cos_constants), 0);
      break;

  case TGSI_OPCODE_DDX:
  case TGSI_OPCODE_DDY:
      /* XXX We just output 0 here */
      debug_printf("Punting DDX/DDX\n");
      src0 = get_result_vector(p, &inst->Dst[0]);
      i915_emit_arith(p,
                      A0_MOV,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
      break;

  case TGSI_OPCODE_DP2:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);

      i915_emit_arith(p,
                      A0_DP3,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
      break;

   case TGSI_OPCODE_DP3:
      emit_simple_arith(p, inst, A0_DP3, 2, fs);
      break;

   case TGSI_OPCODE_DP4:
      emit_simple_arith(p, inst, A0_DP4, 2, fs);
      break;

   case TGSI_OPCODE_DPH:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);

      i915_emit_arith(p,
                      A0_DP4,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, X, Y, Z, ONE), src1, 0);
      break;

   case TGSI_OPCODE_DST:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);

      /* result[0] = 1    * 1;
       * result[1] = a[1] * b[1];
       * result[2] = a[2] * 1;
       * result[3] = 1    * b[3];
       */
      i915_emit_arith(p,
                      A0_MUL,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, ONE, Y, Z, ONE),
                      swizzle(src1, ONE, Y, ONE, W), 0);
      break;

   case TGSI_OPCODE_END:
      /* no-op */
      break;

   case TGSI_OPCODE_EX2:
      src0 = src_vector(p, &inst->Src[0], fs);

      i915_emit_arith(p,
                      A0_EXP,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, X, X, X, X), 0, 0);
      break;

   case TGSI_OPCODE_FLR:
      emit_simple_arith(p, inst, A0_FLR, 1, fs);
      break;

   case TGSI_OPCODE_FRC:
      emit_simple_arith(p, inst, A0_FRC, 1, fs);
      break;

   case TGSI_OPCODE_KILL_IF:
      /* kill if src[0].x < 0 || src[0].y < 0 ... */
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);

      i915_emit_texld(p,
                      tmp,                   /* dest reg: a dummy reg */
                      A0_DEST_CHANNEL_ALL,   /* dest writemask */
                      0,                     /* sampler */
                      src0,                  /* coord*/
                      T0_TEXKILL,            /* opcode */
                      1);                    /* num_coord */
      break;

   case TGSI_OPCODE_KILL:
      /* unconditional kill */
      tmp = i915_get_utemp(p);

      i915_emit_texld(p,
                      tmp,                                   /* dest reg: a dummy reg */
                      A0_DEST_CHANNEL_ALL,                   /* dest writemask */
                      0,                                     /* sampler */
                      negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */
                      T0_TEXKILL,                            /* opcode */
                      1);                                    /* num_coord */
      break;

   case TGSI_OPCODE_LG2:
      src0 = src_vector(p, &inst->Src[0], fs);

      i915_emit_arith(p,
                      A0_LOG,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, X, X, X, X), 0, 0);
      break;

   case TGSI_OPCODE_LIT:
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);

      /* tmp = max( a.xyzw, a.00zw )
       * XXX: Clamp tmp.w to -128..128
       * tmp.y = log(tmp.y)
       * tmp.y = tmp.w * tmp.y
       * tmp.y = exp(tmp.y)
       * result = cmp (a.11-x1, a.1x01, a.1xy1 )
       */
      i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
                      src0, swizzle(src0, ZERO, ZERO, Z, W), 0);

      i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
                      swizzle(tmp, Y, Y, Y, Y), 0, 0);

      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
                      swizzle(tmp, ZERO, Y, ZERO, ZERO),
                      swizzle(tmp, ZERO, W, ZERO, ZERO), 0);

      i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
                      swizzle(tmp, Y, Y, Y, Y), 0, 0);

      i915_emit_arith(p, A0_CMP,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
                      swizzle(tmp, ONE, X, ZERO, ONE),
                      swizzle(tmp, ONE, X, Y, ONE));

      break;

   case TGSI_OPCODE_LRP:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      src2 = src_vector(p, &inst->Src[2], fs);
      flags = get_result_flags(inst);
      tmp = i915_get_utemp(p);

      /* b*a + c*(1-a)
       *
       * b*a + c - ca 
       *
       * tmp = b*a + c, 
       * result = (-c)*a + tmp 
       */
      i915_emit_arith(p, A0_MAD, tmp,
                      flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);

      i915_emit_arith(p, A0_MAD,
                      get_result_vector(p, &inst->Dst[0]),
                      flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
      break;

   case TGSI_OPCODE_MAD:
      emit_simple_arith(p, inst, A0_MAD, 3, fs);
      break;

   case TGSI_OPCODE_MAX:
      emit_simple_arith(p, inst, A0_MAX, 2, fs);
      break;

   case TGSI_OPCODE_MIN:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      tmp = i915_get_utemp(p);
      flags = get_result_flags(inst);

      i915_emit_arith(p,
                      A0_MAX,
                      tmp, flags & A0_DEST_CHANNEL_ALL, 0,
                      negate(src0, 1, 1, 1, 1),
                      negate(src1, 1, 1, 1, 1), 0);

      i915_emit_arith(p,
                      A0_MOV,
                      get_result_vector(p, &inst->Dst[0]),
                      flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
      break;

   case TGSI_OPCODE_MOV:
      emit_simple_arith(p, inst, A0_MOV, 1, fs);
      break;

   case TGSI_OPCODE_MUL:
      emit_simple_arith(p, inst, A0_MUL, 2, fs);
      break;

   case TGSI_OPCODE_NOP:
      break;

   case TGSI_OPCODE_POW:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      tmp = i915_get_utemp(p);
      flags = get_result_flags(inst);

      /* XXX: masking on intermediate values, here and elsewhere.
       */
      i915_emit_arith(p,
                      A0_LOG,
                      tmp, A0_DEST_CHANNEL_X, 0,
                      swizzle(src0, X, X, X, X), 0, 0);

      i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);

      i915_emit_arith(p,
                      A0_EXP,
                      get_result_vector(p, &inst->Dst[0]),
                      flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
      break;

   case TGSI_OPCODE_RET:
      /* XXX: no-op? */
      break;

   case TGSI_OPCODE_RCP:
      src0 = src_vector(p, &inst->Src[0], fs);

      i915_emit_arith(p,
                      A0_RCP,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, X, X, X, X), 0, 0);
      break;

   case TGSI_OPCODE_RSQ:
      src0 = src_vector(p, &inst->Src[0], fs);

      i915_emit_arith(p,
                      A0_RSQ,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, X, X, X, X), 0, 0);
      break;

   case TGSI_OPCODE_SCS:
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);

      /* 
       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
       * scs.x = DP4 t1, scs_sin_constants
       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
       * scs.y = DP4 t1, scs_cos_constants
       */
      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_XY, 0,
                      swizzle(src0, X, X, ONE, ONE),
                      swizzle(src0, X, ONE, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_ALL, 0,
                      swizzle(tmp, X, Y, X, Y),
                      swizzle(tmp, X, X, ONE, ONE), 0);

      writemask = inst->Dst[0].Register.WriteMask;

      if (writemask & TGSI_WRITEMASK_Y) {
         uint tmp1;

         if (writemask & TGSI_WRITEMASK_X)
            tmp1 = i915_get_utemp(p);
         else
            tmp1 = tmp;

         i915_emit_arith(p,
                         A0_MUL,
                         tmp1, A0_DEST_CHANNEL_ALL, 0,
                         swizzle(tmp, X, Y, Y, W),
                         swizzle(tmp, X, Z, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_DP4,
                         get_result_vector(p, &inst->Dst[0]),
                         A0_DEST_CHANNEL_Y, 0,
                         swizzle(tmp1, W, Z, Y, X),
                         i915_emit_const4fv(p, scs_sin_constants), 0);
      }

      if (writemask & TGSI_WRITEMASK_X) {
         i915_emit_arith(p,
                         A0_MUL,
                         tmp, A0_DEST_CHANNEL_XYZ, 0,
                         swizzle(tmp, X, X, Z, ONE),
                         swizzle(tmp, Z, ONE, ONE, ONE), 0);

         i915_emit_arith(p,
                         A0_DP4,
                         get_result_vector(p, &inst->Dst[0]),
                         A0_DEST_CHANNEL_X, 0,
                         swizzle(tmp, ONE, Z, Y, X),
                         i915_emit_const4fv(p, scs_cos_constants), 0);
      }
      break;

   case TGSI_OPCODE_SEQ:
      /* if we're both >= and <= then we're == */
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      tmp = i915_get_utemp(p);

      i915_emit_arith(p,
                      A0_SGE,
                      tmp, A0_DEST_CHANNEL_ALL, 0,
                      src0,
                      src1, 0);

      i915_emit_arith(p,
                      A0_SGE,
                      get_result_vector(p, &inst->Dst[0]),
                      A0_DEST_CHANNEL_ALL, 0,
                      src1,
                      src0, 0);

      i915_emit_arith(p,
                      A0_MUL,
                      get_result_vector(p, &inst->Dst[0]),
                      A0_DEST_CHANNEL_ALL, 0,
                      get_result_vector(p, &inst->Dst[0]),
                      tmp, 0);

      break;

   case TGSI_OPCODE_SGE:
      emit_simple_arith(p, inst, A0_SGE, 2, fs);
      break;

   case TGSI_OPCODE_SIN:
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_X, 0,
                      src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);

      i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);

      /* 
       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
       * result = DP4 t1.wzyx, sin_constants
       */
      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_XY, 0,
                      swizzle(tmp, X, X, ONE, ONE),
                      swizzle(tmp, X, ONE, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_ALL, 0,
                      swizzle(tmp, X, Y, X, Y),
                      swizzle(tmp, X, X, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_ALL, 0,
                      swizzle(tmp, X, Y, Y, W),
                      swizzle(tmp, X, Z, ONE, ONE), 0);

      i915_emit_arith(p,
                      A0_DP4,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(tmp, W, Z, Y, X),
                      i915_emit_const4fv(p, sin_constants), 0);
      break;

   case TGSI_OPCODE_SLE:
      /* like SGE, but swap reg0, reg1 */
      emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
      break;

   case TGSI_OPCODE_SLT:
      emit_simple_arith(p, inst, A0_SLT, 2, fs);
      break;

   case TGSI_OPCODE_SGT:
      /* like SLT, but swap reg0, reg1 */
      emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
      break;

   case TGSI_OPCODE_SNE:
      /* if we're < or > then we're != */
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      tmp = i915_get_utemp(p);

      i915_emit_arith(p,
                      A0_SLT,
                      tmp,
                      A0_DEST_CHANNEL_ALL, 0,
                      src0,
                      src1, 0);

      i915_emit_arith(p,
                      A0_SLT,
                      get_result_vector(p, &inst->Dst[0]),
                      A0_DEST_CHANNEL_ALL, 0,
                      src1,
                      src0, 0);

      i915_emit_arith(p,
                      A0_ADD,
                      get_result_vector(p, &inst->Dst[0]),
                      A0_DEST_CHANNEL_ALL, 0,
                      get_result_vector(p, &inst->Dst[0]),
                      tmp, 0);
      break;

   case TGSI_OPCODE_SSG:
      /* compute (src>0) - (src<0) */
      src0 = src_vector(p, &inst->Src[0], fs);
      tmp = i915_get_utemp(p);

      i915_emit_arith(p,
                      A0_SLT,
                      tmp,
                      A0_DEST_CHANNEL_ALL, 0,
                      src0,
                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);

      i915_emit_arith(p,
                      A0_SLT,
                      get_result_vector(p, &inst->Dst[0]),
                      A0_DEST_CHANNEL_ALL, 0,
                      swizzle(src0, ZERO, ZERO, ZERO, ZERO),
                      src0, 0);

      i915_emit_arith(p,
                      A0_ADD,
                      get_result_vector(p, &inst->Dst[0]),
                      A0_DEST_CHANNEL_ALL, 0,
                      get_result_vector(p, &inst->Dst[0]),
                      negate(tmp, 1, 1, 1, 1), 0);
      break;

   case TGSI_OPCODE_SUB:
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);

      i915_emit_arith(p,
                      A0_ADD,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      src0, negate(src1, 1, 1, 1, 1), 0);
      break;

   case TGSI_OPCODE_TEX:
      emit_tex(p, inst, T0_TEXLD, fs);
      break;

   case TGSI_OPCODE_TRUNC:
      emit_simple_arith(p, inst, A0_TRC, 1, fs);
      break;

   case TGSI_OPCODE_TXB:
      emit_tex(p, inst, T0_TEXLDB, fs);
      break;

   case TGSI_OPCODE_TXP:
      emit_tex(p, inst, T0_TEXLDP, fs);
      break;

   case TGSI_OPCODE_XPD:
      /* Cross product:
       *      result.x = src0.y * src1.z - src0.z * src1.y;
       *      result.y = src0.z * src1.x - src0.x * src1.z;
       *      result.z = src0.x * src1.y - src0.y * src1.x;
       *      result.w = undef;
       */
      src0 = src_vector(p, &inst->Src[0], fs);
      src1 = src_vector(p, &inst->Src[1], fs);
      tmp = i915_get_utemp(p);

      i915_emit_arith(p,
                      A0_MUL,
                      tmp, A0_DEST_CHANNEL_ALL, 0,
                      swizzle(src0, Z, X, Y, ONE),
                      swizzle(src1, Y, Z, X, ONE), 0);

      i915_emit_arith(p,
                      A0_MAD,
                      get_result_vector(p, &inst->Dst[0]),
                      get_result_flags(inst), 0,
                      swizzle(src0, Y, Z, X, ONE),
                      swizzle(src1, Z, X, Y, ONE),
                      negate(tmp, 1, 1, 1, 0));
      break;

   default:
      i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
      p->error = 1;
      return;
   }

   i915_release_utemps(p);
}
예제 #5
0
static GLuint emit_texenv( struct i915_fragment_program *p, int unit )
{
   struct gl_texture_unit *texUnit = &p->ctx->Texture.Unit[unit];
   GLenum envMode = texUnit->EnvMode;
   struct gl_texture_object *tObj = texUnit->_Current;
   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
   GLuint saturate = unit < p->last_tex_stage ? A0_DEST_SATURATE : 0;

   switch(envMode) {
   case GL_BLEND: {
      const int cf = get_source(p, GL_PREVIOUS, unit);
      const int cc = get_source(p, GL_CONSTANT, unit);
      const int cs = get_source(p, GL_TEXTURE, unit);
      const int out = get_dest(p, unit);

      if (format == GL_INTENSITY) {
	 /* cv = cf(1 - cs) + cc.cs
	  * cv = cf - cf.cs + cc.cs
	  */
	 /* u[2] = MAD( -cf * cs + cf )
	  * cv   = MAD( cc * cs + u[2] )
	  */
	 
	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0, 
			 negate(cf,1,1,1,1), cs, cf );

	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate, 
			 cc, cs, out );

	 return out;
      } else {
	 /* cv = cf(1 - cs) + cc.cs
	  * cv = cf - cf.cs + cc.cs
	  * av =      af.as
	  */
	 /* u[2] = MAD( cf.-x-y-zw * cs.xyzw + cf.xyz0 )
	  * oC   = MAD( cc.xyz0 * cs.xyz0 + u[2].xyzw )
	  */
	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0,
			 negate(cf,1,1,1,0),  
			 cs,
			 swizzle(cf,X,Y,Z,ZERO) );


	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
			 swizzle(cc,X,Y,Z,ZERO),  
			 swizzle(cs,X,Y,Z,ZERO),
			 out );

	 return out;
      }
   }

   case GL_DECAL: {
      if (format == GL_RGB ||
	  format == GL_RGBA) {
	 int cf = get_source( p, GL_PREVIOUS, unit );
	 int cs = get_source( p, GL_TEXTURE, unit );
	 int out = get_dest(p, unit);
	 
	 /* cv = cf(1-as) + cs.as
	  * cv = cf.(-as) + cf + cs.as
	  * av = af
	  */ 
	 
	 /* u[2] = mad( cf.xyzw * cs.-w-w-w1 + cf.xyz0 )
	  * oc = mad( cs.xyz0 * cs.www0 + u[2].xyzw )
	  */
	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, 0,
			 cf,  
			 negate(swizzle(cs,W,W,W,ONE),1,1,1,0),
			 swizzle(cf,X,Y,Z,ZERO) );
	 
	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
			 swizzle(cs,X,Y,Z,ZERO),  
			 swizzle(cs,W,W,W,ZERO),
			 out );
	 return out;
      }
      else {
	 return get_source( p, GL_PREVIOUS, unit );
      }
   }

   case GL_REPLACE: {
      const int cs = get_source( p, GL_TEXTURE, unit );	/* saturated */
      switch (format) {
      case GL_ALPHA: {
	 const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */
	 i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_XYZ, 0, cf, 0, 0 );
	 return cs;
      }
      case GL_RGB:
      case GL_LUMINANCE: {
	 const int cf = get_source( p, GL_PREVIOUS, unit ); /* saturated */
	 i915_emit_arith( p, A0_MOV, cs, A0_DEST_CHANNEL_W, 0, cf, 0, 0 );
	 return cs;
      }
      default:
	 return cs;
      }
   }

   case GL_MODULATE: {
      const int cf = get_source( p, GL_PREVIOUS, unit );
      const int cs = get_source( p, GL_TEXTURE, unit );
      const int out = get_dest(p, unit);
      switch (format) {
      case GL_ALPHA: 
	 i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate,
			 swizzle(cs, ONE, ONE, ONE, W), cf, 0 );
	 break;
      default:
	 i915_emit_arith( p, A0_MUL, out, A0_DEST_CHANNEL_ALL, saturate, 
			 cs, cf, 0 );
	 break;
      }
      return out;
   }
   case GL_ADD: {
      int cf = get_source( p, GL_PREVIOUS, unit );
      int cs = get_source( p, GL_TEXTURE, unit );
      const int out = get_dest( p, unit );

      if (format == GL_INTENSITY) {
	 /* output-color.rgba = add( incoming, u[1] )
	  */
	 i915_emit_arith( p, A0_ADD, out, A0_DEST_CHANNEL_ALL, saturate, 
			 cs, cf, 0 );
	 return out;
      }
      else {
	 /* cv.xyz = cf.xyz + cs.xyz
	  * cv.w   = cf.w * cs.w
	  *
	  * cv.xyzw = MAD( cf.111w * cs.xyzw + cf.xyz0 )
	  */
 	 i915_emit_arith( p, A0_MAD, out, A0_DEST_CHANNEL_ALL, saturate,
			 swizzle(cf,ONE,ONE,ONE,W), 
			 cs,  
			 swizzle(cf,X,Y,Z,ZERO) ); 
	 return out;
      }
      break;
   }
   case GL_COMBINE: {
      GLuint rgb_shift, alpha_shift, out, shift;
      GLuint dest = get_dest(p, unit);

      /* The EXT version of the DOT3 extension does not support the
       * scale factor, but the ARB version (and the version in OpenGL
       * 1.3) does.
       */
      switch (texUnit->Combine.ModeRGB) {
      case GL_DOT3_RGB_EXT:
	 alpha_shift = texUnit->Combine.ScaleShiftA;
	 rgb_shift = 0;
	 break;

      case GL_DOT3_RGBA_EXT:
	 alpha_shift = 0;
	 rgb_shift = 0;
	 break;

      default:
	 rgb_shift = texUnit->Combine.ScaleShiftRGB;
	 alpha_shift = texUnit->Combine.ScaleShiftA;
	 break;
      }


      /* Emit the RGB and A combine ops
       */
      if (texUnit->Combine.ModeRGB == texUnit->Combine.ModeA && 
	  args_match( texUnit )) {
	 out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate,
			     unit,
			     texUnit->Combine.ModeRGB,
			     texUnit->Combine.SourceRGB,
			     texUnit->Combine.OperandRGB );
      }
      else if (texUnit->Combine.ModeRGB == GL_DOT3_RGBA_EXT ||
	       texUnit->Combine.ModeRGB == GL_DOT3_RGBA) {

	 out = emit_combine( p, dest, A0_DEST_CHANNEL_ALL, saturate,
			     unit,
			     texUnit->Combine.ModeRGB,
			     texUnit->Combine.SourceRGB,
			     texUnit->Combine.OperandRGB );
      }
      else {
	 /* Need to do something to stop from re-emitting identical
	  * argument calculations here:
	  */
	 out = emit_combine( p, dest, A0_DEST_CHANNEL_XYZ, saturate,
			     unit,
			     texUnit->Combine.ModeRGB,
			     texUnit->Combine.SourceRGB,
			     texUnit->Combine.OperandRGB );
	 out = emit_combine( p, dest, A0_DEST_CHANNEL_W, saturate,
			     unit,
			     texUnit->Combine.ModeA,
			     texUnit->Combine.SourceA,
			     texUnit->Combine.OperandA );
      }

      /* Deal with the final shift:
       */
      if (alpha_shift || rgb_shift) {
	 if (rgb_shift == alpha_shift) {
	    shift = i915_emit_const1f(p, 1<<rgb_shift);
	    shift = swizzle(shift,X,X,X,X);
	 }
	 else {
	    shift = i915_emit_const2f(p, 1<<rgb_shift, 1<<alpha_shift);
	    shift = swizzle(shift,X,X,X,Y);
	 }
	 return i915_emit_arith( p, A0_MUL, dest, A0_DEST_CHANNEL_ALL, 
				saturate, out, shift, 0 );
      }

      return out;
   }

   default:
      return get_source(p, GL_PREVIOUS, 0);
   }
}
예제 #6
0
static GLuint emit_combine( struct i915_fragment_program *p,
			    GLuint dest,
			    GLuint mask,
			    GLuint saturate,
			    GLuint unit,
			    GLenum mode,
			    const GLenum *source,
			    const GLenum *operand)
{
   int tmp, src[3], nr = nr_args(mode);
   int i;

   for (i = 0; i < nr; i++)
      src[i] = emit_combine_source( p, mask, unit, source[i], operand[i] );

   switch (mode) {
   case GL_REPLACE: 
      if (mask == A0_DEST_CHANNEL_ALL && !saturate)
	 return src[0];
      else
	 return i915_emit_arith( p, A0_MOV, dest, mask, saturate, src[0], 0, 0 );
   case GL_MODULATE: 
      return i915_emit_arith( p, A0_MUL, dest, mask, saturate,
			     src[0], src[1], 0 );
   case GL_ADD: 
      return i915_emit_arith( p, A0_ADD, dest, mask, saturate, 
			     src[0], src[1], 0 );
   case GL_ADD_SIGNED:
      /* tmp = arg0 + arg1
       * result = tmp + -.5
       */
      tmp = i915_emit_const1f(p, .5);
      tmp = negate(swizzle(tmp,X,X,X,X),1,1,1,1);
      i915_emit_arith( p, A0_ADD, dest, mask, 0, src[0], src[1], 0 );
      i915_emit_arith( p, A0_ADD, dest, mask, saturate, dest, tmp, 0 );
      return dest;
   case GL_INTERPOLATE:		/* TWO INSTRUCTIONS */
      /* Arg0 * (Arg2) + Arg1 * (1-Arg2)
       *
       * Arg0*Arg2 + Arg1 - Arg1Arg2 
       *
       * tmp = Arg0*Arg2 + Arg1, 
       * result = (-Arg1)Arg2 + tmp 
       */
      tmp = i915_get_temp( p );
      i915_emit_arith( p, A0_MAD, tmp, mask, 0, src[0], src[2], src[1] );
      i915_emit_arith( p, A0_MAD, dest, mask, saturate, 
		      negate(src[1], 1,1,1,1), src[2], tmp );
      return dest;
   case GL_SUBTRACT: 
      /* negate src[1] */
      return i915_emit_arith( p, A0_ADD, dest, mask, saturate, src[0],
			 negate(src[1],1,1,1,1), 0 );

   case GL_DOT3_RGBA:
   case GL_DOT3_RGBA_EXT: 
   case GL_DOT3_RGB_EXT:
   case GL_DOT3_RGB: {
      GLuint tmp0 = i915_get_temp( p );
      GLuint tmp1 = i915_get_temp( p );
      GLuint neg1 = negate(swizzle(i915_emit_const1f(p, 1),X,X,X,X), 1,1,1,1);
      GLuint two = swizzle(i915_emit_const1f(p, 2),X,X,X,X);
      i915_emit_arith( p, A0_MAD, tmp0, A0_DEST_CHANNEL_ALL, 0, 
		      two, src[0], neg1);
      if (src[0] == src[1])
	 tmp1 = tmp0;
      else
	 i915_emit_arith( p, A0_MAD, tmp1, A0_DEST_CHANNEL_ALL, 0, 
			 two, src[1], neg1);
      i915_emit_arith( p, A0_DP3, dest, mask, saturate, tmp0, tmp1, 0);
      return dest;
   }

   default: 
      return src[0];
   }
}