Example #1
0
/**
 * Generate a / b
 */
LLVMValueRef
lp_build_div(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;

   if(a == bld->zero)
      return bld->zero;
   if(a == bld->one)
      return lp_build_rcp(bld, b);
   if(b == bld->zero)
      return bld->undef;
   if(b == bld->one)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   if(LLVMIsConstant(a) && LLVMIsConstant(b))
      return LLVMConstFDiv(a, b);

   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
      return lp_build_mul(bld, a, lp_build_rcp(bld, b));

   return LLVMBuildFDiv(bld->builder, a, b, "");
}
Example #2
0
/**
 * Generate 1/sqrt(a)
 */
LLVMValueRef
lp_build_rsqrt(struct lp_build_context *bld,
               LLVMValueRef a)
{
   const struct lp_type type = bld->type;

   assert(type.floating);

   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
      return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);

   return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
Example #3
0
/**
 * Increment the shader input attribute values.
 * This is called when we move from one quad to the next.
 */
static void
attribs_update(struct lp_build_interp_soa_context *bld,
               struct gallivm_state *gallivm,
               LLVMValueRef loop_iter,
               int start,
               int end)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   LLVMValueRef oow = NULL;
   unsigned attrib;
   unsigned chan;

   for(attrib = start; attrib < end; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
         if(mask & (1 << chan)) {
            LLVMValueRef a;
            if (interp == LP_INTERP_CONSTANT ||
                interp == LP_INTERP_FACING) {
               a = LLVMBuildLoad(builder, bld->a[attrib][chan], "");
            }
            else if (interp == LP_INTERP_POSITION) {
               assert(attrib > 0);
               a = bld->attribs[0][chan];
            }
            else {
               LLVMValueRef dadq;

               a = bld->a[attrib][chan];

               /*
                * Broadcast the attribute value for this quad into all elements
                */

               {
                  /* stored as vector load as float */
                  LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
                                                            gallivm->context), 0);
                  LLVMValueRef ptr;
                  a = LLVMBuildBitCast(builder, a, ptr_type, "");
                  ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
                  a = LLVMBuildLoad(builder, ptr, "");
                  a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
               }

               /*
                * Get the derivatives.
                */

               dadq = bld->dadq[attrib][chan];

#if PERSPECTIVE_DIVIDE_PER_QUAD
               if (interp == LP_INTERP_PERSPECTIVE) {
                  LLVMValueRef dwdq = bld->dadq[0][3];

                  if (oow == NULL) {
                     assert(bld->oow);
                     oow = LLVMBuildShuffleVector(coeff_bld->builder,
                                                  bld->oow, coeff_bld->undef,
                                                  shuffle, "");
                  }

                  dadq = lp_build_sub(coeff_bld,
                                      dadq,
                                      lp_build_mul(coeff_bld, a, dwdq));
                  dadq = lp_build_mul(coeff_bld, dadq, oow);
               }
#endif

               /*
                * Add the derivatives
                */

               a = lp_build_add(coeff_bld, a, dadq);

#if !PERSPECTIVE_DIVIDE_PER_QUAD
               if (interp == LP_INTERP_PERSPECTIVE) {
                  if (oow == NULL) {
                     LLVMValueRef w = bld->attribs[0][3];
                     assert(attrib != 0);
                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
                     oow = lp_build_rcp(coeff_bld, w);
                  }
                  a = lp_build_mul(coeff_bld, a, oow);
               }
#endif

               if (attrib == 0 && chan == 2) {
                  /* FIXME: Depth values can exceed 1.0, due to the fact that
                   * setup interpolation coefficients refer to (0,0) which causes
                   * precision loss. So we must clamp to 1.0 here to avoid artifacts
                   */
                  a = lp_build_min(coeff_bld, a, coeff_bld->one);
               }

               attrib_name(a, attrib, chan, "");
            }
            bld->attribs[attrib][chan] = a;
         }
      }
   }
}
Example #4
0
/**
 * Initialize the bld->a, dadq fields.  This involves fetching
 * those values from the arrays which are passed into the JIT function.
 */
static void
coeffs_init(struct lp_build_interp_soa_context *bld,
            LLVMValueRef a0_ptr,
            LLVMValueRef dadx_ptr,
            LLVMValueRef dady_ptr)
{
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   struct lp_build_context *setup_bld = &bld->setup_bld;
   struct gallivm_state *gallivm = coeff_bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef pixoffx, pixoffy;
   unsigned attrib;
   unsigned chan;
   unsigned i;

   pixoffx = coeff_bld->undef;
   pixoffy = coeff_bld->undef;
   for (i = 0; i < coeff_bld->type.length; i++) {
      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
   }


   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      LLVMValueRef index = lp_build_const_int32(gallivm,
                                attrib * TGSI_NUM_CHANNELS);
      LLVMValueRef ptr;
      LLVMValueRef dadxaos = setup_bld->zero;
      LLVMValueRef dadyaos = setup_bld->zero;
      LLVMValueRef a0aos = setup_bld->zero;

      /* always fetch all 4 values for performance/simplicity */
      switch (interp) {
      case LP_INTERP_PERSPECTIVE:
         /* fall-through */

      case LP_INTERP_LINEAR:
         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         dadxaos = LLVMBuildLoad(builder, ptr, "");

         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         dadyaos = LLVMBuildLoad(builder, ptr, "");

         attrib_name(dadxaos, attrib, 0, ".dadxaos");
         attrib_name(dadyaos, attrib, 0, ".dadyaos");
         /* fall-through */

      case LP_INTERP_CONSTANT:
      case LP_INTERP_FACING:
         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         a0aos = LLVMBuildLoad(builder, ptr, "");
         attrib_name(a0aos, attrib, 0, ".a0aos");
         break;

      case LP_INTERP_POSITION:
         /* Nothing to do as the position coeffs are already setup in slot 0 */
         continue;

      default:
         assert(0);
         break;
      }

      /*
       * a = a0 + (x * dadx + y * dady)
       * a0aos is the attrib value at top left corner of stamp
       */
      if (interp != LP_INTERP_CONSTANT &&
          interp != LP_INTERP_FACING) {
         LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x);
         LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y);
         a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos);
         a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos);
      }

      /*
       * dadq = {0, dadx, dady, dadx + dady}
       * for two quads (side by side) this is:
       * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
       */
      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
         /* this generates a CRAPLOAD of shuffles... */
         if (mask & (1 << chan)) {
            LLVMValueRef dadx, dady;
            LLVMValueRef dadq, dadq2;
            LLVMValueRef a;
            LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);

            if (attrib == 0 && chan == 0) {
               a = bld->x;
               if (bld->pos_offset) {
                  a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
               }
               a = lp_build_broadcast_scalar(coeff_bld, a);
               dadx = coeff_bld->one;
               dady = coeff_bld->zero;
            }
            else if (attrib == 0 && chan == 1) {
               a = bld->y;
               if (bld->pos_offset) {
                  a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
               }
               a = lp_build_broadcast_scalar(coeff_bld, a);
               dady = coeff_bld->one;
               dadx = coeff_bld->zero;
            }
            else {
               dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, dadxaos, chan_index);
               dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, dadyaos, chan_index);

               /*
                * a = {a, a, a, a}
                */
               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, a0aos, chan_index);
            }

            dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
            dady = LLVMBuildFMul(builder, dady, pixoffy, "");
            dadq = LLVMBuildFAdd(builder, dadx, dady, "");

            /*
             * Compute the attrib values on the upper-left corner of each
             * group of quads.
             * Note that if we process 2 quads at once this doesn't
             * really exactly to what we want.
             * We need to access elem 0 and 2 respectively later if we process
             * 2 quads at once.
             */

            if (interp != LP_INTERP_CONSTANT &&
                interp != LP_INTERP_FACING) {
               dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
               a = LLVMBuildFAdd(builder, a, dadq2, "");
	    }

#if PERSPECTIVE_DIVIDE_PER_QUAD
            /*
             * a *= 1 / w
             */

            /*
             * XXX since we're only going to access elements 0,2 out of 8
             * if we have 8-wide vectors we should do the division only 4-wide.
             * a is really a 2-elements in a 4-wide vector disguised as 8-wide
             * in this case.
             */
            if (interp == LP_INTERP_PERSPECTIVE) {
               LLVMValueRef w = bld->a[0][3];
               assert(attrib != 0);
               assert(bld->mask[0] & TGSI_WRITEMASK_W);
               if (!bld->oow) {
                  bld->oow = lp_build_rcp(coeff_bld, w);
                  lp_build_name(bld->oow, "oow");
               }
               a = lp_build_mul(coeff_bld, a, bld->oow);
            }
#endif

            attrib_name(a, attrib, chan, ".a");
            attrib_name(dadq, attrib, chan, ".dadq");

            bld->a[attrib][chan] = lp_build_alloca(gallivm,
                                                   LLVMTypeOf(a), "");
            LLVMBuildStore(builder, a, bld->a[attrib][chan]);
            bld->dadq[attrib][chan] = dadq;
         }
      }
   }
}
Example #5
0
/**
 * Interpolate the shader input attribute values.
 * This is called for each (group of) quad(s).
 */
static void
attribs_update_simple(struct lp_build_interp_soa_context *bld,
                      struct gallivm_state *gallivm,
                      LLVMValueRef loop_iter,
                      int start,
                      int end)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   struct lp_build_context *setup_bld = &bld->setup_bld;
   LLVMValueRef oow = NULL;
   unsigned attrib;
   LLVMValueRef pixoffx;
   LLVMValueRef pixoffy;
   LLVMValueRef ptr;

   /* could do this with code-generated passed in pixel offsets too */

   assert(loop_iter);
   ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
   pixoffx = LLVMBuildLoad(builder, ptr, "");
   ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
   pixoffy = LLVMBuildLoad(builder, ptr, "");

   pixoffx = LLVMBuildFAdd(builder, pixoffx,
                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
   pixoffy = LLVMBuildFAdd(builder, pixoffy,
                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");

   for (attrib = start; attrib < end; attrib++) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      unsigned chan;

      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
         if (mask & (1 << chan)) {
            LLVMValueRef index;
            LLVMValueRef dadx = coeff_bld->zero;
            LLVMValueRef dady = coeff_bld->zero;
            LLVMValueRef a = coeff_bld->zero;

            index = lp_build_const_int32(gallivm, chan);
            switch (interp) {
            case LP_INTERP_PERSPECTIVE:
               /* fall-through */

            case LP_INTERP_LINEAR:
               if (attrib == 0 && chan == 0) {
                  dadx = coeff_bld->one;
                  if (bld->pos_offset) {
                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
                  }
               }
               else if (attrib == 0 && chan == 1) {
                  dady = coeff_bld->one;
                  if (bld->pos_offset) {
                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
                  }
               }
               else {
                  dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                                    coeff_bld->type, bld->dadxaos[attrib],
                                                    index);
                  dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                                    coeff_bld->type, bld->dadyaos[attrib],
                                                    index);
                  a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                                 coeff_bld->type, bld->a0aos[attrib],
                                                 index);
               }
               /*
                * a = a0 + (x * dadx + y * dady)
                */
               a = lp_build_fmuladd(builder, dadx, pixoffx, a);
               a = lp_build_fmuladd(builder, dady, pixoffy, a);

               if (interp == LP_INTERP_PERSPECTIVE) {
                  if (oow == NULL) {
                     LLVMValueRef w = bld->attribs[0][3];
                     assert(attrib != 0);
                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
                     oow = lp_build_rcp(coeff_bld, w);
                  }
                  a = lp_build_mul(coeff_bld, a, oow);
               }
               break;

            case LP_INTERP_CONSTANT:
            case LP_INTERP_FACING:
               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, bld->a0aos[attrib],
                                              index);
               break;

            case LP_INTERP_POSITION:
               assert(attrib > 0);
               a = bld->attribs[0][chan];
               break;

            default:
               assert(0);
               break;
            }

            if ((attrib == 0) && (chan == 2)){
               /* FIXME: Depth values can exceed 1.0, due to the fact that
                * setup interpolation coefficients refer to (0,0) which causes
                * precision loss. So we must clamp to 1.0 here to avoid artifacts
                */
               a = lp_build_min(coeff_bld, a, coeff_bld->one);
            }
            bld->attribs[attrib][chan] = a;
         }
      }
   }
}
/**
 * Emit LLVM for one TGSI instruction.
 * \param return TRUE for success, FALSE otherwise
 */
boolean
lp_emit_instruction_aos(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   const struct tgsi_opcode_info *info,
   int *pc)
{
   LLVMValueRef src0, src1, src2;
   LLVMValueRef tmp0, tmp1;
   LLVMValueRef dst0 = NULL;

   /*
    * Stores and write masks are handled in a general fashion after the long
    * instruction opcode switch statement.
    *
    * Although not stricitly necessary, we avoid generating instructions for
    * channels which won't be stored, in cases where's that easy. For some
    * complex instructions, like texture sampling, it is more convenient to
    * assume a full writemask and then let LLVM optimization passes eliminate
    * redundant code.
    */

   (*pc)++;

   assert(info->num_dst <= 1);
   if (info->num_dst) {
      dst0 = bld->bld_base.base.undef;
   }

   switch (inst->Instruction.Opcode) {
   case TGSI_OPCODE_ARL:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_floor(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_MOV:
      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      break;

   case TGSI_OPCODE_LIT:
      return FALSE;

   case TGSI_OPCODE_RCP:
   /* TGSI_OPCODE_RECIP */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_RSQ:
   /* TGSI_OPCODE_RECIPSQRT */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_EXP:
      return FALSE;

   case TGSI_OPCODE_LOG:
      return FALSE;

   case TGSI_OPCODE_MUL:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_ADD:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_DP3:
   /* TGSI_OPCODE_DOT3 */
      return FALSE;

   case TGSI_OPCODE_DP4:
   /* TGSI_OPCODE_DOT4 */
      return FALSE;

   case TGSI_OPCODE_DST:
      return FALSE;

   case TGSI_OPCODE_MIN:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_MAX:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_SLT:
   /* TGSI_OPCODE_SETLT */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SGE:
   /* TGSI_OPCODE_SETGE */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_MAD:
   /* TGSI_OPCODE_MADD */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
      break;

   case TGSI_OPCODE_SUB:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_LRP:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
      break;

   case TGSI_OPCODE_CND:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
      break;

   case TGSI_OPCODE_DP2A:
      return FALSE;

   case TGSI_OPCODE_FRC:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
      break;

   case TGSI_OPCODE_CLAMP:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
      break;

   case TGSI_OPCODE_FLR:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_floor(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_ROUND:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_round(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_EX2:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_LG2:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_POW:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_XPD:
      return FALSE;

   case TGSI_OPCODE_RCC:
      /* deprecated? */
      assert(0);
      return FALSE;

   case TGSI_OPCODE_DPH:
      return FALSE;

   case TGSI_OPCODE_COS:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_DDX:
      return FALSE;

   case TGSI_OPCODE_DDY:
      return FALSE;

   case TGSI_OPCODE_KILP:
      /* predicated kill */
      return FALSE;

   case TGSI_OPCODE_KIL:
      /* conditional kill */
      return FALSE;

   case TGSI_OPCODE_PK2H:
      return FALSE;
      break;

   case TGSI_OPCODE_PK2US:
      return FALSE;
      break;

   case TGSI_OPCODE_PK4B:
      return FALSE;
      break;

   case TGSI_OPCODE_PK4UB:
      return FALSE;

   case TGSI_OPCODE_RFL:
      return FALSE;

   case TGSI_OPCODE_SEQ:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SFL:
      dst0 = bld->bld_base.base.zero;
      break;

   case TGSI_OPCODE_SGT:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SIN:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_SLE:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SNE:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_STR:
      dst0 = bld->bld_base.base.one;
      break;

   case TGSI_OPCODE_TEX:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
      break;

   case TGSI_OPCODE_TXD:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
      break;

   case TGSI_OPCODE_UP2H:
      /* deprecated */
      assert (0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP2US:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP4B:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP4UB:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_X2D:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ARA:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ARR:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_round(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_BRA:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CAL:
      return FALSE;

   case TGSI_OPCODE_RET:
      return FALSE;

   case TGSI_OPCODE_END:
      *pc = -1;
      break;

   case TGSI_OPCODE_SSG:
   /* TGSI_OPCODE_SGN */
      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_CMP:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
      break;

   case TGSI_OPCODE_SCS:
      return FALSE;

   case TGSI_OPCODE_TXB:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
      break;

   case TGSI_OPCODE_NRM:
      /* fall-through */
   case TGSI_OPCODE_NRM4:
      return FALSE;

   case TGSI_OPCODE_DIV:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_DP2:
      return FALSE;

   case TGSI_OPCODE_TXL:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
      break;

   case TGSI_OPCODE_TXP:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
      break;

   case TGSI_OPCODE_BRK:
      return FALSE;

   case TGSI_OPCODE_IF:
      return FALSE;

   case TGSI_OPCODE_BGNLOOP:
      return FALSE;

   case TGSI_OPCODE_BGNSUB:
      return FALSE;

   case TGSI_OPCODE_ELSE:
      return FALSE;

   case TGSI_OPCODE_ENDIF:
      return FALSE;

   case TGSI_OPCODE_ENDLOOP:
      return FALSE;

   case TGSI_OPCODE_ENDSUB:
      return FALSE;

   case TGSI_OPCODE_PUSHA:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_POPA:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CEIL:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_I2F:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_NOT:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TRUNC:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_SHL:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ISHR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_AND:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_OR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_MOD:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_XOR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_SAD:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TXF:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TXQ:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CONT:
      return FALSE;

   case TGSI_OPCODE_EMIT:
      return FALSE;
      break;

   case TGSI_OPCODE_ENDPRIM:
      return FALSE;
      break;

   case TGSI_OPCODE_NOP:
      break;

   default:
      return FALSE;
   }
   
   if (info->num_dst) {
      lp_emit_store_aos(bld, inst, 0, dst0);
   }

   return TRUE;
}