コード例 #1
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Do the one or two-sided stencil test op/update.
 */
static LLVMValueRef
lp_build_stencil_op(struct lp_build_context *bld,
                    const struct pipe_stencil_state stencil[2],
                    enum stencil_op op,
                    LLVMValueRef stencilRefs[2],
                    LLVMValueRef stencilVals,
                    LLVMValueRef mask,
                    LLVMValueRef front_facing)

{
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef res;

   assert(stencil[0].enabled);

   /* do front face op */
   res = lp_build_stencil_op_single(bld, &stencil[0], op,
                                     stencilRefs[0], stencilVals);

   if (stencil[1].enabled && front_facing != NULL) {
      /* do back face op */
      LLVMValueRef back_res;

      back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
                                            stencilRefs[1], stencilVals);

      res = lp_build_select(bld, front_facing, res, back_res);
   }

   if (stencil[0].writemask != 0xff ||
       (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
      /* mask &= stencil[0].writemask */
      LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
                                                      stencil[0].writemask);
      if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
         LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
                                                         stencil[1].writemask);
         writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
      }

      mask = LLVMBuildAnd(builder, mask, writemask, "");
      /* res = (res & mask) | (stencilVals & ~mask) */
      res = lp_build_select_bitwise(bld, mask, res, stencilVals);
   }
   else {
      /* res = mask ? res : stencilVals */
      res = lp_build_select(bld, mask, res, stencilVals);
   }

   return res;
}
コード例 #2
0
/**
 * Generate color blending and color output.
 * \param rt  the render target index (to index blend, colormask state)
 * \param type  the pixel color type
 * \param context_ptr  pointer to the runtime JIT context
 * \param mask  execution mask (active fragment/pixel mask)
 * \param src  colors from the fragment shader
 * \param dst_ptr  the destination color buffer pointer
 */
static void
generate_blend(const struct pipe_blend_state *blend,
               unsigned rt,
               LLVMBuilderRef builder,
               struct lp_type type,
               LLVMValueRef context_ptr,
               LLVMValueRef mask,
               LLVMValueRef *src,
               LLVMValueRef dst_ptr)
{
   struct lp_build_context bld;
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask_ctx;
   LLVMTypeRef vec_type;
   LLVMValueRef const_ptr;
   LLVMValueRef con[4];
   LLVMValueRef dst[4];
   LLVMValueRef res[4];
   unsigned chan;

   lp_build_context_init(&bld, builder, type);

   flow = lp_build_flow_create(builder);

   /* we'll use this mask context to skip blending if all pixels are dead */
   lp_build_mask_begin(&mask_ctx, flow, type, mask);

   vec_type = lp_build_vec_type(type);

   const_ptr = lp_jit_context_blend_color(builder, context_ptr);
   const_ptr = LLVMBuildBitCast(builder, const_ptr,
                                LLVMPointerType(vec_type, 0), "");

   /* load constant blend color and colors from the dest color buffer */
   for(chan = 0; chan < 4; ++chan) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
      con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");

      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");

      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
   }

   /* do blend */
   lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);

   /* store results to color buffer */
   for(chan = 0; chan < 4; ++chan) {
      if(blend->rt[rt].colormask & (1 << chan)) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
         lp_build_name(res[chan], "res.%c", "rgba"[chan]);
         res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
         LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
      }
   }

   lp_build_mask_end(&mask_ctx);
   lp_build_flow_destroy(flow);
}
コード例 #3
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Do the one or two-sided stencil test comparison.
 * \sa lp_build_stencil_test_single
 * \param front_facing  an integer vector mask, indicating front (~0) or back
 *                      (0) facing polygon. If NULL, assume front-facing.
 */
static LLVMValueRef
lp_build_stencil_test(struct lp_build_context *bld,
                      const struct pipe_stencil_state stencil[2],
                      LLVMValueRef stencilRefs[2],
                      LLVMValueRef stencilVals,
                      LLVMValueRef front_facing)
{
   LLVMValueRef res;

   assert(stencil[0].enabled);

   /* do front face test */
   res = lp_build_stencil_test_single(bld, &stencil[0],
                                      stencilRefs[0], stencilVals);

   if (stencil[1].enabled && front_facing != NULL) {
      /* do back face test */
      LLVMValueRef back_res;

      back_res = lp_build_stencil_test_single(bld, &stencil[1],
                                              stencilRefs[1], stencilVals);

      res = lp_build_select(bld, front_facing, res, back_res);
   }

   return res;
}
コード例 #4
0
/**
 * Extract Y, U, V channels from packed YUYV.
 * @param packed  is a <n x i32> vector with the packed YUYV blocks
 * @param i  is a <n x i32> vector with the x pixel coordinate (0 or 1)
 */
static void
yuyv_to_yuv_soa(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef packed,
                LLVMValueRef i,
                LLVMValueRef *y,
                LLVMValueRef *u,
                LLVMValueRef *v)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type type;
   LLVMValueRef mask;

   memset(&type, 0, sizeof type);
   type.width = 32;
   type.length = n;

   assert(lp_check_value(type, packed));
   assert(lp_check_value(type, i));

   /*
    * y = (yuyv >> 16*i) & 0xff
    * u = (yuyv >> 8   ) & 0xff
    * v = (yuyv >> 24  ) & 0xff
    */

#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
   /*
    * Avoid shift with per-element count.
    * No support on x86, gets translated to roughly 5 instructions
    * per element. Didn't measure performance but cuts shader size
    * by quite a bit (less difference if cpu has no sse4.1 support).
    */
   if (util_cpu_caps.has_sse2 && n == 4) {
      LLVMValueRef sel, tmp;
      struct lp_build_context bld32;

      lp_build_context_init(&bld32, gallivm, type);

      tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), "");
      sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0));
       *y = lp_build_select(&bld32, sel, packed, tmp);
   } else
#endif
   {
      LLVMValueRef shift;
      shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), "");
      *y = LLVMBuildLShr(builder, packed, shift, "");
   }

   *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), "");
   *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), "");

   mask = lp_build_const_int_vec(gallivm, type, 0xff);

   *y = LLVMBuildAnd(builder, *y, mask, "y");
   *u = LLVMBuildAnd(builder, *u, mask, "u");
   *v = LLVMBuildAnd(builder, *v, mask, "v");
}
コード例 #5
0
ファイル: lp_state_fs.c プロジェクト: emcmanus/FlashMesa3D
/**
 * Generate color blending and color output.
 */
static void
generate_blend(const struct pipe_blend_state *blend,
               LLVMBuilderRef builder,
               struct lp_type type,
               LLVMValueRef context_ptr,
               LLVMValueRef mask,
               LLVMValueRef *src,
               LLVMValueRef dst_ptr)
{
   struct lp_build_context bld;
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask_ctx;
   LLVMTypeRef vec_type;
   LLVMTypeRef int_vec_type;
   LLVMValueRef const_ptr;
   LLVMValueRef con[4];
   LLVMValueRef dst[4];
   LLVMValueRef res[4];
   unsigned chan;

   lp_build_context_init(&bld, builder, type);

   flow = lp_build_flow_create(builder);
   lp_build_mask_begin(&mask_ctx, flow, type, mask);

   vec_type = lp_build_vec_type(type);
   int_vec_type = lp_build_int_vec_type(type);

   const_ptr = lp_jit_context_blend_color(builder, context_ptr);
   const_ptr = LLVMBuildBitCast(builder, const_ptr,
                                LLVMPointerType(vec_type, 0), "");

   for(chan = 0; chan < 4; ++chan) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
      con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");

      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");

      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
   }

   lp_build_blend_soa(builder, blend, type, src, dst, con, res);

   for(chan = 0; chan < 4; ++chan) {
      if(blend->colormask & (1 << chan)) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
         lp_build_name(res[chan], "res.%c", "rgba"[chan]);
         res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
         LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
      }
   }

   lp_build_mask_end(&mask_ctx);
   lp_build_flow_destroy(flow);
}
コード例 #6
0
/**
 * Convert srgb int values to linear float values.
 * Several possibilities how to do this, e.g.
 * - table
 * - doing the pow() with int-to-float and float-to-int tricks
 *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
 * - just using standard polynomial approximation
 *   (3rd order polynomial is required for crappy but just sufficient accuracy)
 *
 * @param src   integer (vector) value(s) to convert
 *              (chan_bits bit values unpacked to 32 bit already).
 */
LLVMValueRef
lp_build_srgb_to_linear(struct gallivm_state *gallivm,
                        struct lp_type src_type,
                        unsigned chan_bits,
                        LLVMValueRef src)
{
   struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
   struct lp_build_context f32_bld;
   LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
   double coeffs[4] = {0.0023f,
                       0.0030f / 255.0f,
                       0.6935f / (255.0f * 255.0f),
                       0.3012f / (255.0f * 255.0f * 255.0f)
   };

   assert(src_type.width == 32);
   /* Technically this would work with more bits too but would be inaccurate. */
   assert(chan_bits <= 8);

   lp_build_context_init(&f32_bld, gallivm, f32_type);

   /*
    * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
    * ( poly =  0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023)
    * (found with octave polyfit and some magic as I couldn't get the error
    * function right). Using the above mentioned error function, the values stay
    * within +-0.35, except for the lowest values - hence tweaking linear segment
    * to cover the first 16 instead of the first 11 values (the error stays
    * just about acceptable there too).
    * Hence: lin = src > 15 ? poly : src / 12.6
    * This function really only makes sense for vectors, should use LUT otherwise.
    * All in all (including float conversion) 11 instructions (with sse4.1),
    * 6 constants (polynomial could be done with 1 instruction less at the cost
    * of slightly worse dependency chain, fma should also help).
    */
   /* doing the 1/255 mul as part of the approximation */
   srcf = lp_build_int_to_float(&f32_bld, src);
   if (chan_bits != 8) {
      /* could adjust all the constants instead */
      LLVMValueRef rescale_const = lp_build_const_vec(gallivm, f32_type,
                                                      255.0f / ((1 << chan_bits) - 1));
      srcf = lp_build_mul(&f32_bld, srcf, rescale_const);
   }
   lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
   part_lin = lp_build_mul(&f32_bld, srcf, lin_const);

   part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);

   lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
   is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
   return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
}
コード例 #7
0
ファイル: lp_bld_logic.c プロジェクト: Forzaferrarileo/mesa
/**
 * Return mask ? a : b;
 *
 * mask is a TGSI_WRITEMASK_xxx.
 */
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
                    unsigned mask,
                    LLVMValueRef a,
                    LLVMValueRef b,
                    unsigned num_channels)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   assert((mask & ~0xf) == 0);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(a == b)
      return a;
   if((mask & 0xf) == 0xf)
      return a;
   if((mask & 0xf) == 0x0)
      return b;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   /*
    * There are two major ways of accomplishing this:
    * - with a shuffle
    * - with a select
    *
    * The flip between these is empirical and might need to be adjusted.
    */
   if (n <= 4) {
      /*
       * Shuffle.
       */
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

      for(j = 0; j < n; j += num_channels)
         for(i = 0; i < num_channels; ++i)
            shuffles[j + i] = LLVMConstInt(elem_type,
                                           (mask & (1 << i) ? 0 : n) + j + i,
                                           0);

      return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
   }
   else {
      LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
      return lp_build_select(bld, mask_vec, a, b);
   }
}
コード例 #8
0
ファイル: lp_bld_arit.c プロジェクト: CPFDSoftware-Tony/gmv
LLVMValueRef
lp_build_sgn(struct lp_build_context *bld,
             LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMValueRef cond;
   LLVMValueRef res;

   /* Handle non-zero case */
   if(!type.sign) {
      /* if not zero then sign must be positive */
      res = bld->one;
   }
   else if(type.floating) {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef one;
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      one = LLVMConstBitCast(bld->one, int_vec_type);
      res = LLVMBuildOr(bld->builder, sign, one, "");
      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
   }
   else
   {
      LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
      res = lp_build_select(bld, cond, bld->one, minus_one);
   }

   /* Handle zero */
   cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
   res = lp_build_select(bld, cond, bld->zero, bld->one);

   return res;
}
コード例 #9
0
ファイル: lp_bld_arit.c プロジェクト: CPFDSoftware-Tony/gmv
/**
 * Generate min(a, b)
 * No checks for special case values of a or b = 1 or 0 are done.
 */
static LLVMValueRef
lp_build_min_simple(struct lp_build_context *bld,
                    LLVMValueRef a,
                    LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   const char *intrinsic = NULL;
   LLVMValueRef cond;

   /* TODO: optimize the constant case */

   if(type.width * type.length == 128) {
      if(type.floating) {
         if(type.width == 32 && util_cpu_caps.has_sse)
            intrinsic = "llvm.x86.sse.min.ps";
         if(type.width == 64 && util_cpu_caps.has_sse2)
            intrinsic = "llvm.x86.sse2.min.pd";
      }
      else {
         if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
            intrinsic = "llvm.x86.sse2.pminu.b";
         if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminsb";
         if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminuw";
         if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
            intrinsic = "llvm.x86.sse2.pmins.w";
         if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminud";
         if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminsd";
      }
   }

   if(intrinsic)
      return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);

   cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
   return lp_build_select(bld, cond, a, b);
}
コード例 #10
0
/**
 * Register store.
 */
void
lp_emit_store_aos(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned index,
   LLVMValueRef value)
{
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
   LLVMValueRef mask = NULL;
   LLVMValueRef ptr;

   /*
    * Saturate the value
    */

   switch (inst->Instruction.Saturate) {
   case TGSI_SAT_NONE:
      break;

   case TGSI_SAT_ZERO_ONE:
      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
      break;

   case TGSI_SAT_MINUS_PLUS_ONE:
      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
      break;

   default:
      assert(0);
   }

   /*
    * Translate the register file
    */

   assert(!reg->Register.Indirect);

   switch (reg->Register.File) {
   case TGSI_FILE_OUTPUT:
      ptr = bld->outputs[reg->Register.Index];
      break;

   case TGSI_FILE_TEMPORARY:
      ptr = bld->temps[reg->Register.Index];
      break;

   case TGSI_FILE_ADDRESS:
      ptr = bld->addr[reg->Indirect.Index];
      break;

   case TGSI_FILE_PREDICATE:
      ptr = bld->preds[reg->Register.Index];
      break;

   default:
      assert(0);
      return;
   }

   if (!ptr)
      return;
   /*
    * Predicate
    */

   if (inst->Instruction.Predicate) {
      LLVMValueRef pred;

      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);

      pred = LLVMBuildLoad(builder,
                           bld->preds[inst->Predicate.Index], "");

      /*
       * Convert the value to an integer mask.
       */
      pred = lp_build_compare(bld->bld_base.base.gallivm,
                               bld->bld_base.base.type,
                               PIPE_FUNC_NOTEQUAL,
                               pred,
                               bld->bld_base.base.zero);

      if (inst->Predicate.Negate) {
         pred = LLVMBuildNot(builder, pred, "");
      }

      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
                         inst->Predicate.SwizzleX,
                         inst->Predicate.SwizzleY,
                         inst->Predicate.SwizzleZ,
                         inst->Predicate.SwizzleW);

      if (mask) {
         mask = LLVMBuildAnd(builder, mask, pred, "");
      } else {
         mask = pred;
      }
   }

   /*
    * Writemask
    */

   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
      LLVMValueRef writemask;

      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
                                                   bld->bld_base.base.type,
                                                   reg->Register.WriteMask,
                                                   TGSI_NUM_CHANNELS,
                                                   bld->swizzles);

      if (mask) {
         mask = LLVMBuildAnd(builder, mask, writemask, "");
      } else {
         mask = writemask;
      }
   }

   if (mask) {
      LLVMValueRef orig_value;

      orig_value = LLVMBuildLoad(builder, ptr, "");
      value = lp_build_select(&bld->bld_base.base,
                              mask, value, orig_value);
   }

   LLVMBuildStore(builder, value, ptr);
}
コード例 #11
0
/**
 * Performs blending of src and dst pixels
 *
 * @param blend         the blend state of the shader variant
 * @param cbuf_format   format of the colour buffer
 * @param type          data type of the pixel vector
 * @param rt            render target index
 * @param src           blend src
 * @param src_alpha     blend src alpha (if not included in src)
 * @param src1          second blend src (for dual source blend)
 * @param src1_alpha    second blend src alpha (if not included in src1)
 * @param dst           blend dst
 * @param mask          optional mask to apply to the blending result
 * @param const_        const blend color
 * @param const_alpha   const blend color alpha (if not included in const_)
 * @param swizzle       swizzle values for RGBA
 *
 * @return the result of blending src and dst
 */
LLVMValueRef
lp_build_blend_aos(struct gallivm_state *gallivm,
                   const struct pipe_blend_state *blend,
                   enum pipe_format cbuf_format,
                   struct lp_type type,
                   unsigned rt,
                   LLVMValueRef src,
                   LLVMValueRef src_alpha,
                   LLVMValueRef src1,
                   LLVMValueRef src1_alpha,
                   LLVMValueRef dst,
                   LLVMValueRef mask,
                   LLVMValueRef const_,
                   LLVMValueRef const_alpha,
                   const unsigned char swizzle[4],
                   int nr_channels)
{
   const struct pipe_rt_blend_state * state = &blend->rt[rt];
   const struct util_format_description * desc;
   struct lp_build_blend_aos_context bld;
   LLVMValueRef src_factor, dst_factor;
   LLVMValueRef result;
   unsigned alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;
   unsigned i;

   desc = util_format_description(cbuf_format);

   /* Setup build context */
   memset(&bld, 0, sizeof bld);
   lp_build_context_init(&bld.base, gallivm, type);
   bld.src = src;
   bld.src1 = src1;
   bld.dst = dst;
   bld.const_ = const_;
   bld.src_alpha = src_alpha;
   bld.src1_alpha = src1_alpha;
   bld.const_alpha = const_alpha;

   /* Find the alpha channel if not provided seperately */
   if (!src_alpha) {
      for (i = 0; i < 4; ++i) {
         if (swizzle[i] == 3) {
            alpha_swizzle = i;
         }
      }
   }

   if (blend->logicop_enable) {
      if(!type.floating) {
         result = lp_build_logicop(gallivm->builder, blend->logicop_func, src, dst);
      }
      else {
         result = src;
      }
   } else if (!state->blend_enable) {
      result = src;
   } else {
      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;

      src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
                                         state->alpha_src_factor,
                                         alpha_swizzle,
                                         nr_channels);

      dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
                                         state->alpha_dst_factor,
                                         alpha_swizzle,
                                         nr_channels);

      result = lp_build_blend(&bld.base,
                              state->rgb_func,
                              state->rgb_src_factor,
                              state->rgb_dst_factor,
                              src,
                              dst,
                              src_factor,
                              dst_factor,
                              rgb_alpha_same,
                              false);

      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
         LLVMValueRef alpha;

         alpha = lp_build_blend(&bld.base,
                                state->alpha_func,
                                state->alpha_src_factor,
                                state->alpha_dst_factor,
                                src,
                                dst,
                                src_factor,
                                dst_factor,
                                rgb_alpha_same,
                                false);

         result = lp_build_blend_swizzle(&bld,
                                         result,
                                         alpha,
                                         LP_BUILD_BLEND_SWIZZLE_RGBA,
                                         alpha_swizzle,
                                         nr_channels);
      }
   }

   /* Check if color mask is necessary */
   if (!util_format_colormask_full(desc, state->colormask)) {
      LLVMValueRef color_mask;

      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
      lp_build_name(color_mask, "color_mask");

      /* Combine with input mask if necessary */
      if (mask) {
         /* We can be blending floating values but masks are always integer... */
         unsigned floating = bld.base.type.floating;
         bld.base.type.floating = 0;

         mask = lp_build_and(&bld.base, color_mask, mask);

         bld.base.type.floating = floating;
      } else {
         mask = color_mask;
      }
   }

   /* Apply mask, if one exists */
   if (mask) {
      result = lp_build_select(&bld.base, mask, result, dst);
   }

   return result;
}
コード例 #12
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Generate code for performing depth and/or stencil tests.
 * We operate on a vector of values (typically n 2x2 quads).
 *
 * \param depth  the depth test state
 * \param stencil  the front/back stencil state
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param stencil_refs  the front/back stencil ref values (scalar)
 * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
 * \param zs_dst  the depth/stencil values in framebuffer
 * \param face  contains boolean value indicating front/back facing polygon
 */
void
lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                            const struct pipe_depth_state *depth,
                            const struct pipe_stencil_state stencil[2],
                            struct lp_type z_src_type,
                            const struct util_format_description *format_desc,
                            struct lp_build_mask_context *mask,
                            LLVMValueRef stencil_refs[2],
                            LLVMValueRef z_src,
                            LLVMValueRef z_fb,
                            LLVMValueRef s_fb,
                            LLVMValueRef face,
                            LLVMValueRef *z_value,
                            LLVMValueRef *s_value,
                            boolean do_branch)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_type z_type;
   struct lp_build_context z_bld;
   struct lp_build_context s_bld;
   struct lp_type s_type;
   unsigned z_shift = 0, z_width = 0, z_mask = 0;
   LLVMValueRef z_dst = NULL;
   LLVMValueRef stencil_vals = NULL;
   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
   LLVMValueRef orig_mask = lp_build_mask_value(mask);
   LLVMValueRef front_facing = NULL;
   boolean have_z, have_s;

   /*
    * Depths are expected to be between 0 and 1, even if they are stored in
    * floats. Setting these bits here will ensure that the lp_build_conv() call
    * below won't try to unnecessarily clamp the incoming values.
    */
   if(z_src_type.floating) {
      z_src_type.sign = FALSE;
      z_src_type.norm = TRUE;
   }
   else {
      assert(!z_src_type.sign);
      assert(z_src_type.norm);
   }

   /* Pick the type matching the depth-stencil format. */
   z_type = lp_depth_type(format_desc, z_src_type.length);

   /* Pick the intermediate type for depth operations. */
   z_type.width = z_src_type.width;
   assert(z_type.length == z_src_type.length);

   /* FIXME: for non-float depth/stencil might generate better code
    * if we'd always split it up to use 128bit operations.
    * For stencil we'd almost certainly want to pack to 8xi16 values,
    * for z just run twice.
    */

   /* Sanity checking */
   {
      const unsigned z_swizzle = format_desc->swizzle[0];
      const unsigned s_swizzle = format_desc->swizzle[1];

      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);

      assert(depth->enabled || stencil[0].enabled);

      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
      assert(format_desc->block.width == 1);
      assert(format_desc->block.height == 1);

      if (stencil[0].enabled) {
         assert(s_swizzle < 4);
         assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED);
         assert(format_desc->channel[s_swizzle].pure_integer);
         assert(!format_desc->channel[s_swizzle].normalized);
         assert(format_desc->channel[s_swizzle].size == 8);
      }

      if (depth->enabled) {
         assert(z_swizzle < 4);
         if (z_type.floating) {
            assert(z_swizzle == 0);
            assert(format_desc->channel[z_swizzle].type ==
                   UTIL_FORMAT_TYPE_FLOAT);
            assert(format_desc->channel[z_swizzle].size == 32);
         }
         else {
            assert(format_desc->channel[z_swizzle].type ==
                   UTIL_FORMAT_TYPE_UNSIGNED);
            assert(format_desc->channel[z_swizzle].normalized);
            assert(!z_type.fixed);
         }
      }
   }


   /* Setup build context for Z vals */
   lp_build_context_init(&z_bld, gallivm, z_type);

   /* Setup build context for stencil vals */
   s_type = lp_int_type(z_type);
   lp_build_context_init(&s_bld, gallivm, s_type);

   /* Compute and apply the Z/stencil bitmasks and shifts.
    */
   {
      unsigned s_shift, s_mask;

      z_dst = z_fb;
      stencil_vals = s_fb;

      have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
      have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);

      if (have_z) {
         if (z_mask != 0xffffffff) {
            z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
         }

         /*
          * Align the framebuffer Z 's LSB to the right.
          */
         if (z_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
            z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
         } else if (z_bitmask) {
            z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
         } else {
            lp_build_name(z_dst, "z_dst");
         }
      }

      if (have_s) {
         if (s_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
            stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
            stencil_shift = shift;  /* used below */
         }

         if (s_mask != 0xffffffff) {
            LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
         }

         lp_build_name(stencil_vals, "s_dst");
      }
   }

   if (stencil[0].enabled) {

      if (face) {
         LLVMValueRef zero = lp_build_const_int32(gallivm, 0);

         /* front_facing = face != 0 ? ~0 : 0 */
         front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
         front_facing = LLVMBuildSExt(builder, front_facing,
                                      LLVMIntTypeInContext(gallivm->context,
                                             s_bld.type.length*s_bld.type.width),
                                      "");
         front_facing = LLVMBuildBitCast(builder, front_facing,
                                         s_bld.int_vec_type, "");
      }

      /* convert scalar stencil refs into vectors */
      stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
      stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);

      s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
                                          stencil_refs, stencil_vals,
                                          front_facing);

      /* apply stencil-fail operator */
      {
         LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
         stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            s_fail_mask, front_facing);
      }
   }

   if (depth->enabled) {
      /*
       * Convert fragment Z to the desired type, aligning the LSB to the right.
       */

      assert(z_type.width == z_src_type.width);
      assert(z_type.length == z_src_type.length);
      assert(lp_check_value(z_src_type, z_src));
      if (z_src_type.floating) {
         /*
          * Convert from floating point values
          */

         if (!z_type.floating) {
            z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
                                                            z_src_type,
                                                            z_width,
                                                            z_src);
         }
      } else {
         /*
          * Convert from unsigned normalized values.
          */

         assert(!z_src_type.sign);
         assert(!z_src_type.fixed);
         assert(z_src_type.norm);
         assert(!z_type.floating);
         if (z_src_type.width > z_width) {
            LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
                                                        z_src_type.width - z_width);
            z_src = LLVMBuildLShr(builder, z_src, shift, "");
         }
      }
      assert(lp_check_value(z_type, z_src));

      lp_build_name(z_src, "z_src");

      /* compare src Z to dst Z, returning 'pass' mask */
      z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);

      if (!stencil[0].enabled) {
         /* We can potentially skip all remaining operations here, but only
          * if stencil is disabled because we still need to update the stencil
          * buffer values.  Don't need to update Z buffer values.
          */
         lp_build_mask_update(mask, z_pass);

         if (do_branch) {
            lp_build_mask_check(mask);
            do_branch = FALSE;
         }
      }

      if (depth->writemask) {
         LLVMValueRef zselectmask;

         /* mask off bits that failed Z test */
         zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");

         /* mask off bits that failed stencil test */
         if (s_pass_mask) {
            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
         }

         /* Mix the old and new Z buffer values.
          * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
          */
         z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
      }

      if (stencil[0].enabled) {
         /* update stencil buffer values according to z pass/fail result */
         LLVMValueRef z_fail_mask, z_pass_mask;

         /* apply Z-fail operator */
         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            z_fail_mask, front_facing);

         /* apply Z-pass operator */
         z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
         stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                            stencil_refs, stencil_vals,
                                            z_pass_mask, front_facing);
      }
   }
   else {
      /* No depth test: apply Z-pass operator to stencil buffer values which
       * passed the stencil test.
       */
      s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
      stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
                                         stencil_refs, stencil_vals,
                                         s_pass_mask, front_facing);
   }

   /* Put Z and stencil bits in the right place */
   if (have_z && z_shift) {
      LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
      z_dst = LLVMBuildShl(builder, z_dst, shift, "");
   }
   if (stencil_vals && stencil_shift)
      stencil_vals = LLVMBuildShl(builder, stencil_vals,
                                  stencil_shift, "");

   /* Finally, merge the z/stencil values */
   if (format_desc->block.bits <= 32) {
      if (have_z && have_s)
         *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
      else if (have_z)
         *z_value = z_dst;
      else
         *z_value = stencil_vals;
      *s_value = *z_value;
   }
   else {
      *z_value = z_dst;
      *s_value = stencil_vals;
   }

   if (s_pass_mask)
      lp_build_mask_update(mask, s_pass_mask);

   if (depth->enabled && stencil[0].enabled)
      lp_build_mask_update(mask, z_pass);
}
コード例 #13
0
ファイル: lp_bld_depth.c プロジェクト: blckshrk/Mesa
/**
 * Store depth/stencil values.
 * Incoming values are swizzled (typically n 2x2 quads), stored linear.
 * If there's a mask it will do select/store otherwise just store.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param z_fb  z values read from fb (with padding)
 * \param s_fb  s values read from fb (with padding)
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_value the depth values to store (with padding)
 * \param s_value the stencil values to store (with padding)
 */
void
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      struct lp_build_mask_context *mask,
                                      LLVMValueRef z_fb,
                                      LLVMValueRef s_fb,
                                      LLVMValueRef loop_counter,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
                                      LLVMValueRef z_value,
                                      LLVMValueRef s_value)
{
   struct lp_build_context z_bld;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef mask_value = NULL;
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type z_type = zs_type;
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   z_type.width = z_src_type.width;

   lp_build_context_init(&z_bld, gallivm, z_type);

   /*
    * This is far from ideal, at least for late depth write we should do this
    * outside the fs loop to avoid all the swizzle stuff.
    */
   if (z_src_type.length == 4) {
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
   zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");

   if (format_desc->block.bits > 32) {
      s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
   }

   if (mask) {
      mask_value = lp_build_mask_value(mask);
      z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
      if (format_desc->block.bits > 32) {
         s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
         s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
      }
   }

   if (zs_type.width < z_src_type.width) {
      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
      z_value = LLVMBuildTrunc(builder, z_value,
                               lp_build_int_vec_type(gallivm, zs_type), "");
   }

   if (format_desc->block.bits <= 32) {
      if (z_src_type.length == 4) {
         zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
         zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
      }
      else {
         assert(z_src_type.length == 8);
         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
                                          LLVMConstVector(&shuffles[0],
                                                          zs_load_type.length), "");
         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
                                          LLVMConstVector(&shuffles[4],
                                                          zs_load_type.length), "");
      }
   }
   else {
      if (z_src_type.length == 4) {
         zs_dst1 = lp_build_interleave2(gallivm, z_type,
                                        z_value, s_value, 0);
         zs_dst2 = lp_build_interleave2(gallivm, z_type,
                                        z_value, s_value, 1);
      }
      else {
         unsigned i;
         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
         assert(z_src_type.length == 8);
         for (i = 0; i < 8; i++) {
            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
                                                   z_src_type.length);
         }
         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
                                          LLVMConstVector(&shuffles[0],
                                                          z_src_type.length), "");
         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
                                          LLVMConstVector(&shuffles[8],
                                                          z_src_type.length), "");
      }
      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
                                 lp_build_vec_type(gallivm, zs_load_type), "");
      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
                                 lp_build_vec_type(gallivm, zs_load_type), "");
   }

   LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
}
コード例 #14
0
/**
 * Convert linear float values to srgb int values.
 * Several possibilities how to do this, e.g.
 * - use table (based on exponent/highest order mantissa bits) and do
 *   linear interpolation (https://gist.github.com/rygorous/2203834)
 * - Chebyshev polynomial
 * - Approximation using reciprocals
 * - using int-to-float and float-to-int tricks for pow()
 *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
 *
 * @param src   float (vector) value(s) to convert.
 */
static LLVMValueRef
lp_build_linear_to_srgb(struct gallivm_state *gallivm,
                        struct lp_type src_type,
                        LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context f32_bld;
   LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;

   lp_build_context_init(&f32_bld, gallivm, src_type);

   src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);

   if (0) {
      /*
       * using int-to-float and float-to-int trick for pow().
       * This is much more accurate than necessary thanks to the correction,
       * but it most certainly makes no sense without rsqrt available.
       * Bonus points if you understand how this works...
       * All in all (including min/max clamp, conversion) 19 instructions.
       */

      float exp_f = 2.0f / 3.0f;
      /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
      float exp2f_c = 1.30438178253e+19f;
      float coeff_f = 0.62996f;
      LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
      struct lp_type int_type = lp_int_type(src_type);

      /*
       * First calculate approx x^8/12
       */
      exponent = lp_build_const_vec(gallivm, src_type, exp_f);
      coeff = lp_build_const_vec(gallivm, src_type,
                                 exp2f_c * powf(coeff_f, 1.0f / exp_f));

      /* premultiply src */
      tmp = lp_build_mul(&f32_bld, coeff, src);
      /* "log2" */
      tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
      tmp = lp_build_int_to_float(&f32_bld, tmp);
      /* multiply for pow */
      tmp = lp_build_mul(&f32_bld, tmp, exponent);
      /* "exp2" */
      pow_approx = lp_build_itrunc(&f32_bld, tmp);
      pow_approx = LLVMBuildBitCast(builder, pow_approx,
                                    lp_build_vec_type(gallivm, src_type), "");

      /*
       * Since that pow was inaccurate (like 3 bits, though each sqrt step would
       * give another bit), compensate the error (which is why we chose another
       * exponent in the first place).
       */
      /* x * x^(8/12) = x^(20/12) */
      pow_1 = lp_build_mul(&f32_bld, pow_approx, src);

      /* x * x * x^(-4/12) = x^(20/12) */
      /* Should avoid using rsqrt if it's not available, but
       * using x * x^(4/12) * x^(4/12) instead will change error weight */
      tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
      x2 = lp_build_mul(&f32_bld, src, src);
      pow_2 = lp_build_mul(&f32_bld, x2, tmp);

      /* average the values so the errors cancel out, compensate bias,
       * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
       * for conversion to int in here */
      tmp = lp_build_add(&f32_bld, pow_1, pow_2);
      coeff = lp_build_const_vec(gallivm, src_type,
                                 1.0f / (3.0f * coeff_f) * 0.999852f *
                                 powf(1.055f * 255.0f, 4.0f));
      pow_final = lp_build_mul(&f32_bld, tmp, coeff);

      /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
      if (lp_build_fast_rsqrt_available(src_type)) {
         pow_final = lp_build_fast_rsqrt(&f32_bld,
                        lp_build_fast_rsqrt(&f32_bld, pow_final));
      }
      else {
         pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
      }
      pow_final = lp_build_add(&f32_bld, pow_final,
                               lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
   }

   else {
      /*
       * using "rational polynomial" approximation here.
       * Essentially y = a*x^0.375 + b*x^0.5 + c, with also
       * factoring in the 255.0 mul and the scaling mul.
       * (a is closer to actual value so has higher weight than b.)
       * Note: the constants are magic values. They were found empirically,
       * possibly could be improved but good enough (be VERY careful with
       * error metric if you'd want to tweak them, they also MUST fit with
       * the crappy polynomial above for srgb->linear since it is required
       * that each srgb value maps back to the same value).
       * This function has an error of max +-0.17 (and we'd only require +-0.6),
       * for the approximated srgb->linear values the error is naturally larger
       * (+-0.42) but still accurate enough (required +-0.5 essentially).
       * All in all (including min/max clamp, conversion) 15 instructions.
       * FMA would help (minus 2 instructions).
       */

      LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;

      if (lp_build_fast_rsqrt_available(src_type)) {
         tmp = lp_build_fast_rsqrt(&f32_bld, src);
         x05 = lp_build_mul(&f32_bld, src, tmp);
      }
      else {
         /*
          * I don't really expect this to be practical without rsqrt
          * but there's no reason for triple punishment so at least
          * save the otherwise resulting division and unnecessary mul...
          */
         x05 = lp_build_sqrt(&f32_bld, src);
      }

      tmp = lp_build_mul(&f32_bld, x05, src);
      if (lp_build_fast_rsqrt_available(src_type)) {
         x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
      }
      else {
         x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
      }

      a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
      b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
      c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);

      tmp = lp_build_mul(&f32_bld, a_const, x0375);
      tmp2 = lp_build_mul(&f32_bld, b_const, x05);
      tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
      pow_final = lp_build_add(&f32_bld, tmp, tmp2);
   }

   /* linear part is easy */
   lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
   lin = lp_build_mul(&f32_bld, src, lin_const);

   lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
   is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
   tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);

   f32_bld.type.sign = 0;
   return lp_build_iround(&f32_bld, tmp);
}
コード例 #15
0
/**
 * Apply the stencil operator (add/sub/keep/etc) to the given vector
 * of stencil values.
 * \return  new stencil values vector
 */
static LLVMValueRef
lp_build_stencil_op_single(struct lp_build_context *bld,
                           const struct pipe_stencil_state *stencil,
                           enum stencil_op op,
                           LLVMValueRef stencilRef,
                           LLVMValueRef stencilVals,
                           LLVMValueRef mask)

{
   const unsigned stencilMax = 255; /* XXX fix */
   struct lp_type type = bld->type;
   LLVMValueRef res;
   LLVMValueRef max = lp_build_const_int_vec(type, stencilMax);
   unsigned stencil_op;

   assert(type.sign);

   switch (op) {
   case S_FAIL_OP:
      stencil_op = stencil->fail_op;
      break;
   case Z_FAIL_OP:
      stencil_op = stencil->zfail_op;
      break;
   case Z_PASS_OP:
      stencil_op = stencil->zpass_op;
      break;
   default:
      assert(0 && "Invalid stencil_op mode");
      stencil_op = PIPE_STENCIL_OP_KEEP;
   }

   switch (stencil_op) {
   case PIPE_STENCIL_OP_KEEP:
      res = stencilVals;
      /* we can return early for this case */
      return res;
   case PIPE_STENCIL_OP_ZERO:
      res = bld->zero;
      break;
   case PIPE_STENCIL_OP_REPLACE:
      res = stencilRef;
      break;
   case PIPE_STENCIL_OP_INCR:
      res = lp_build_add(bld, stencilVals, bld->one);
      res = lp_build_min(bld, res, max);
      break;
   case PIPE_STENCIL_OP_DECR:
      res = lp_build_sub(bld, stencilVals, bld->one);
      res = lp_build_max(bld, res, bld->zero);
      break;
   case PIPE_STENCIL_OP_INCR_WRAP:
      res = lp_build_add(bld, stencilVals, bld->one);
      res = LLVMBuildAnd(bld->builder, res, max, "");
      break;
   case PIPE_STENCIL_OP_DECR_WRAP:
      res = lp_build_sub(bld, stencilVals, bld->one);
      res = LLVMBuildAnd(bld->builder, res, max, "");
      break;
   case PIPE_STENCIL_OP_INVERT:
      res = LLVMBuildNot(bld->builder, stencilVals, "");
      res = LLVMBuildAnd(bld->builder, res, max, "");
      break;
   default:
      assert(0 && "bad stencil op mode");
      res = NULL;
   }

   if (stencil->writemask != stencilMax) {
      /* mask &= stencil->writemask */
      LLVMValueRef writemask = lp_build_const_int_vec(type, stencil->writemask);
      mask = LLVMBuildAnd(bld->builder, mask, writemask, "");
      /* res = (res & mask) | (stencilVals & ~mask) */
      res = lp_build_select_bitwise(bld, writemask, res, stencilVals);
   }
   else {
      /* res = mask ? res : stencilVals */
      res = lp_build_select(bld, mask, res, stencilVals);
   }

   return res;
}
コード例 #16
0
/**
 * Emit LLVM for one TGSI instruction.
 * \param return TRUE for success, FALSE otherwise
 */
boolean
lp_emit_instruction_aos(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   const struct tgsi_opcode_info *info,
   int *pc)
{
   LLVMValueRef src0, src1, src2;
   LLVMValueRef tmp0, tmp1;
   LLVMValueRef dst0 = NULL;

   /*
    * Stores and write masks are handled in a general fashion after the long
    * instruction opcode switch statement.
    *
    * Although not stricitly necessary, we avoid generating instructions for
    * channels which won't be stored, in cases where's that easy. For some
    * complex instructions, like texture sampling, it is more convenient to
    * assume a full writemask and then let LLVM optimization passes eliminate
    * redundant code.
    */

   (*pc)++;

   assert(info->num_dst <= 1);
   if (info->num_dst) {
      dst0 = bld->bld_base.base.undef;
   }

   switch (inst->Instruction.Opcode) {
   case TGSI_OPCODE_ARL:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_floor(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_MOV:
      dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      break;

   case TGSI_OPCODE_LIT:
      return FALSE;

   case TGSI_OPCODE_RCP:
   /* TGSI_OPCODE_RECIP */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_rcp(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_RSQ:
   /* TGSI_OPCODE_RECIPSQRT */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0);
      dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_EXP:
      return FALSE;

   case TGSI_OPCODE_LOG:
      return FALSE;

   case TGSI_OPCODE_MUL:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_mul(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_ADD:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_add(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_DP3:
   /* TGSI_OPCODE_DOT3 */
      return FALSE;

   case TGSI_OPCODE_DP4:
   /* TGSI_OPCODE_DOT4 */
      return FALSE;

   case TGSI_OPCODE_DST:
      return FALSE;

   case TGSI_OPCODE_MIN:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_MAX:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_max(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_SLT:
   /* TGSI_OPCODE_SETLT */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SGE:
   /* TGSI_OPCODE_SETGE */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_MAD:
   /* TGSI_OPCODE_MADD */
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1);
      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
      break;

   case TGSI_OPCODE_SUB:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      dst0 = lp_build_sub(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_LRP:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2);
      tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0);
      dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2);
      break;

   case TGSI_OPCODE_CND:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1);
      break;

   case TGSI_OPCODE_DP2A:
      return FALSE;

   case TGSI_OPCODE_FRC:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = lp_build_floor(&bld->bld_base.base, src0);
      dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0);
      break;

   case TGSI_OPCODE_CLAMP:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_max(&bld->bld_base.base, src0, src1);
      dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2);
      break;

   case TGSI_OPCODE_FLR:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_floor(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_ROUND:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_round(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_EX2:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS);
      dst0 = lp_build_exp2(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_LG2:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_log2(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_POW:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
      dst0 = lp_build_pow(&bld->bld_base.base, src0, src1);
      break;

   case TGSI_OPCODE_XPD:
      return FALSE;

   case TGSI_OPCODE_RCC:
      /* deprecated? */
      assert(0);
      return FALSE;

   case TGSI_OPCODE_DPH:
      return FALSE;

   case TGSI_OPCODE_COS:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_cos(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_DDX:
      return FALSE;

   case TGSI_OPCODE_DDY:
      return FALSE;

   case TGSI_OPCODE_KILP:
      /* predicated kill */
      return FALSE;

   case TGSI_OPCODE_KIL:
      /* conditional kill */
      return FALSE;

   case TGSI_OPCODE_PK2H:
      return FALSE;
      break;

   case TGSI_OPCODE_PK2US:
      return FALSE;
      break;

   case TGSI_OPCODE_PK4B:
      return FALSE;
      break;

   case TGSI_OPCODE_PK4UB:
      return FALSE;

   case TGSI_OPCODE_RFL:
      return FALSE;

   case TGSI_OPCODE_SEQ:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SFL:
      dst0 = bld->bld_base.base.zero;
      break;

   case TGSI_OPCODE_SGT:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SIN:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_sin(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_SLE:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_SNE:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero);
      break;

   case TGSI_OPCODE_STR:
      dst0 = bld->bld_base.base.one;
      break;

   case TGSI_OPCODE_TEX:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
      break;

   case TGSI_OPCODE_TXD:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
      break;

   case TGSI_OPCODE_UP2H:
      /* deprecated */
      assert (0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP2US:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP4B:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP4UB:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_X2D:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ARA:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ARR:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_round(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_BRA:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CAL:
      return FALSE;

   case TGSI_OPCODE_RET:
      return FALSE;

   case TGSI_OPCODE_END:
      *pc = -1;
      break;

   case TGSI_OPCODE_SSG:
   /* TGSI_OPCODE_SGN */
      tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_sgn(&bld->bld_base.base, tmp0);
      break;

   case TGSI_OPCODE_CMP:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL);
      src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL);
      tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero);
      dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2);
      break;

   case TGSI_OPCODE_SCS:
      return FALSE;

   case TGSI_OPCODE_TXB:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
      break;

   case TGSI_OPCODE_NRM:
      /* fall-through */
   case TGSI_OPCODE_NRM4:
      return FALSE;

   case TGSI_OPCODE_DIV:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_DP2:
      return FALSE;

   case TGSI_OPCODE_TXL:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
      break;

   case TGSI_OPCODE_TXP:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
      break;

   case TGSI_OPCODE_BRK:
      return FALSE;

   case TGSI_OPCODE_IF:
      return FALSE;

   case TGSI_OPCODE_BGNLOOP:
      return FALSE;

   case TGSI_OPCODE_BGNSUB:
      return FALSE;

   case TGSI_OPCODE_ELSE:
      return FALSE;

   case TGSI_OPCODE_ENDIF:
      return FALSE;

   case TGSI_OPCODE_ENDLOOP:
      return FALSE;

   case TGSI_OPCODE_ENDSUB:
      return FALSE;

   case TGSI_OPCODE_PUSHA:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_POPA:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CEIL:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_ceil(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_I2F:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_NOT:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TRUNC:
      src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL);
      dst0 = lp_build_trunc(&bld->bld_base.base, src0);
      break;

   case TGSI_OPCODE_SHL:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ISHR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_AND:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_OR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_MOD:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_XOR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_SAD:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TXF:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TXQ:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CONT:
      return FALSE;

   case TGSI_OPCODE_EMIT:
      return FALSE;
      break;

   case TGSI_OPCODE_ENDPRIM:
      return FALSE;
      break;

   case TGSI_OPCODE_NOP:
      break;

   default:
      return FALSE;
   }
   
   if (info->num_dst) {
      lp_emit_store_aos(bld, inst, 0, dst0);
   }

   return TRUE;
}
コード例 #17
0
ファイル: lp_bld_sample_aos.c プロジェクト: nikai3d/mesa
/**
 * Build LLVM code for texture coord wrapping, for linear filtering,
 * for scaled integer texcoords.
 * \param block_length  is the length of the pixel block along the
 *                      coordinate axis
 * \param coord0  the incoming texcoord (s,t,r or q) scaled to the texture size
 * \param length  the texture size along one dimension
 * \param stride  pixel stride along the coordinate axis (in bytes)
 * \param is_pot  if TRUE, length is a power of two
 * \param wrap_mode  one of PIPE_TEX_WRAP_x
 * \param offset0  resulting relative offset for coord0
 * \param offset1  resulting relative offset for coord0 + 1
 * \param i0  resulting sub-block pixel coordinate for coord0
 * \param i1  resulting sub-block pixel coordinate for coord0 + 1
 */
static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
                                unsigned block_length,
                                LLVMValueRef coord0,
                                LLVMValueRef length,
                                LLVMValueRef stride,
                                boolean is_pot,
                                unsigned wrap_mode,
                                LLVMValueRef *offset0,
                                LLVMValueRef *offset1,
                                LLVMValueRef *i0,
                                LLVMValueRef *i1)
{
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef length_minus_one;
   LLVMValueRef lmask, umask, mask;

   if (block_length != 1) {
      /*
       * If the pixel block covers more than one pixel then there is no easy
       * way to calculate offset1 relative to offset0. Instead, compute them
       * independently.
       */

      LLVMValueRef coord1;

      lp_build_sample_wrap_nearest_int(bld,
                                       block_length,
                                       coord0,
                                       length,
                                       stride,
                                       is_pot,
                                       wrap_mode,
                                       offset0, i0);

      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);

      lp_build_sample_wrap_nearest_int(bld,
                                       block_length,
                                       coord1,
                                       length,
                                       stride,
                                       is_pot,
                                       wrap_mode,
                                       offset1, i1);

      return;
   }

   /*
    * Scalar pixels -- try to compute offset0 and offset1 with a single stride
    * multiplication.
    */

   *i0 = int_coord_bld->zero;
   *i1 = int_coord_bld->zero;

   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);

   switch(wrap_mode) {
   case PIPE_TEX_WRAP_REPEAT:
      if (is_pot) {
         coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
      }
      else {
         /* Add a bias to the texcoord to handle negative coords */
         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
         coord0 = LLVMBuildAdd(builder, coord0, bias, "");
         coord0 = LLVMBuildURem(builder, coord0, length, "");
      }

      mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
                              PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);

      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
      *offset1 = LLVMBuildAnd(builder,
                              lp_build_add(int_coord_bld, *offset0, stride),
                              mask, "");
      break;

   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
      lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
                               PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
      umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
                               PIPE_FUNC_LESS, coord0, length_minus_one);

      coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
      coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);

      mask = LLVMBuildAnd(builder, lmask, umask, "");

      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
      *offset1 = lp_build_add(int_coord_bld,
                              *offset0,
                              LLVMBuildAnd(builder, stride, mask, ""));
      break;

   case PIPE_TEX_WRAP_CLAMP:
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
   case PIPE_TEX_WRAP_MIRROR_REPEAT:
   case PIPE_TEX_WRAP_MIRROR_CLAMP:
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
   default:
      assert(0);
      *offset0 = int_coord_bld->zero;
      *offset1 = int_coord_bld->zero;
      break;
   }
}
コード例 #18
0
/**
 * Performs blending of src and dst pixels
 *
 * @param blend         the blend state of the shader variant
 * @param cbuf_format   format of the colour buffer
 * @param type          data type of the pixel vector
 * @param rt            rt number
 * @param src           blend src
 * @param dst           blend dst
 * @param mask          optional mask to apply to the blending result
 * @param const_        const blend color
 * @param swizzle       swizzle values for RGBA
 *
 * @return the result of blending src and dst
 */
LLVMValueRef
lp_build_blend_aos(struct gallivm_state *gallivm,
                   const struct pipe_blend_state *blend,
                   const enum pipe_format *cbuf_format,
                   struct lp_type type,
                   unsigned rt,
                   LLVMValueRef src,
                   LLVMValueRef dst,
                   LLVMValueRef mask,
                   LLVMValueRef const_,
                   const unsigned char swizzle[4])
{
   const struct pipe_rt_blend_state * state = &blend->rt[rt];
   struct lp_build_blend_aos_context bld;
   LLVMValueRef src_factor, dst_factor;
   LLVMValueRef result;
   unsigned alpha_swizzle = swizzle[3];
   boolean fullcolormask;

   /* Setup build context */
   memset(&bld, 0, sizeof bld);
   lp_build_context_init(&bld.base, gallivm, type);
   bld.src = src;
   bld.dst = dst;
   bld.const_ = const_;

   if (swizzle[3] > UTIL_FORMAT_SWIZZLE_W || swizzle[3] == swizzle[0])
      alpha_swizzle = UTIL_FORMAT_SWIZZLE_NONE;

   if (!state->blend_enable) {
      result = src;
   } else {
      boolean rgb_alpha_same = state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor;
      assert(rgb_alpha_same || alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE);

      src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
                                         state->alpha_src_factor, alpha_swizzle);
      dst_factor = lp_build_blend_factor(&bld, state->rgb_dst_factor,
                                         state->alpha_dst_factor, alpha_swizzle);

      result = lp_build_blend(&bld.base,
                              state->rgb_func,
                              state->rgb_src_factor,
                              state->rgb_dst_factor,
                              src,
                              dst,
                              src_factor,
                              dst_factor,
                              rgb_alpha_same,
                              false);

      if(state->rgb_func != state->alpha_func && alpha_swizzle != UTIL_FORMAT_SWIZZLE_NONE) {
         LLVMValueRef alpha;

         alpha = lp_build_blend(&bld.base,
                                state->alpha_func,
                                state->alpha_src_factor,
                                state->alpha_dst_factor,
                                src,
                                dst,
                                src_factor,
                                dst_factor,
                                rgb_alpha_same,
                                false);

         result = lp_build_blend_swizzle(&bld,
                                         result,
                                         alpha,
                                         LP_BUILD_BLEND_SWIZZLE_RGBA,
                                         alpha_swizzle);
      }
   }

   /* Check if color mask is necessary */
   fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), state->colormask);

   if (!fullcolormask) {
      LLVMValueRef color_mask;

      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, swizzle);
      lp_build_name(color_mask, "color_mask");

      /* Combine with input mask if necessary */
      if (mask) {
         mask = lp_build_and(&bld.base, color_mask, mask);
      } else {
         mask = color_mask;
      }
   }

   /* Apply mask, if one exists */
   if (mask) {
      result = lp_build_select(&bld.base, mask, result, dst);
   }

   return result;
}
コード例 #19
0
ファイル: lp_bld_tgsi_aos.c プロジェクト: fabe3k/mesa
/**
 * Register store.
 */
void
lp_emit_store_aos(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned index,
   LLVMValueRef value)
{
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
   LLVMValueRef mask = NULL;
   LLVMValueRef ptr;

   /*
    * Saturate the value
    */
   if (inst->Instruction.Saturate) {
      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
   }

   /*
    * Translate the register file
    */

   assert(!reg->Register.Indirect);

   switch (reg->Register.File) {
   case TGSI_FILE_OUTPUT:
      ptr = bld->outputs[reg->Register.Index];
      break;

   case TGSI_FILE_TEMPORARY:
      ptr = bld->temps[reg->Register.Index];
      break;

   case TGSI_FILE_ADDRESS:
      ptr = bld->addr[reg->Indirect.Index];
      break;

   default:
      assert(0);
      return;
   }

   if (!ptr)
      return;

   /*
    * Writemask
    */

   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
      LLVMValueRef writemask;

      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
                                                   bld->bld_base.base.type,
                                                   reg->Register.WriteMask,
                                                   TGSI_NUM_CHANNELS,
                                                   bld->swizzles);

      if (mask) {
         mask = LLVMBuildAnd(builder, mask, writemask, "");
      } else {
         mask = writemask;
      }
   }

   if (mask) {
      LLVMValueRef orig_value;

      orig_value = LLVMBuildLoad(builder, ptr, "");
      value = lp_build_select(&bld->bld_base.base,
                              mask, value, orig_value);
   }

   LLVMBuildStore(builder, value, ptr);
}