Example #1
0
static LLVMValueRef
translateFloatBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) {
  switch (Op) {
    case SumOp:  return LLVMBuildFAdd(Builder, ValueE1, ValueE2, ""); 
    case SubOp:  return LLVMBuildFSub(Builder, ValueE1, ValueE2, ""); 
    case MultOp: return LLVMBuildFMul(Builder, ValueE1, ValueE2, ""); 
    case DivOp:  return LLVMBuildFDiv(Builder, ValueE1, ValueE2, ""); 
    case LtOp:   return LLVMBuildFCmp(Builder, LLVMRealOLT, ValueE1, ValueE2, ""); 
    case LeOp:   return LLVMBuildFCmp(Builder, LLVMRealOLE, ValueE1, ValueE2, ""); 
    case GtOp:   return LLVMBuildFCmp(Builder, LLVMRealOGT, ValueE1, ValueE2, ""); 
    case GeOp:   return LLVMBuildFCmp(Builder, LLVMRealOGE, ValueE1, ValueE2, ""); 
    case EqOp:   return LLVMBuildFCmp(Builder, LLVMRealOEQ, ValueE1, ValueE2, ""); 
    case DiffOp: return LLVMBuildFCmp(Builder, LLVMRealONE, ValueE1, ValueE2, ""); 
    default:     return NULL;
  }
}
Example #2
0
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 *
 * Ex: src = { float, float, float, float }
 * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, src_type);
    LLVMValueRef res;
    unsigned mantissa;

    assert(src_type.floating);
    assert(dst_width <= src_type.width);
    src_type.sign = FALSE;

    mantissa = lp_mantissa(src_type);

    if (dst_width <= mantissa) {
        /*
         * Apply magic coefficients that will make the desired result to appear
         * in the lowest significant bits of the mantissa, with correct rounding.
         *
         * This only works if the destination width fits in the mantissa.
         */

        unsigned long long ubound;
        unsigned long long mask;
        double scale;
        double bias;

        ubound = (1ULL << dst_width);
        mask = ubound - 1;
        scale = (double)mask/ubound;
        bias = (double)(1ULL << (mantissa - dst_width));

        res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), "");
        res = LLVMBuildBitCast(builder, res, int_vec_type, "");
        res = LLVMBuildAnd(builder, res,
                           lp_build_const_int_vec(gallivm, src_type, mask), "");
    }
    else if (dst_width == (mantissa + 1)) {
        /*
         * The destination width matches exactly what can be represented in
         * floating point (i.e., mantissa + 1 bits). So do a straight
         * multiplication followed by casting. No further rounding is necessary.
         */

        double scale;

        scale = (double)((1ULL << dst_width) - 1);

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
    }
    else {
        /*
         * The destination exceeds what can be represented in the floating point.
         * So multiply by the largest power two we get away with, and when
         * subtract the most significant bit to rescale to normalized values.
         *
         * The largest power of two factor we can get away is
         * (1 << (src_type.width - 1)), because we need to use signed . In theory it
         * should be (1 << (src_type.width - 2)), but IEEE 754 rules states
         * INT_MIN should be returned in FPToSI, which is the correct result for
         * values near 1.0!
         *
         * This means we get (src_type.width - 1) correct bits for values near 0.0,
         * and (mantissa + 1) correct bits for values near 1.0. Equally or more
         * important, we also get exact results for 0.0 and 1.0.
         */

        unsigned n = MIN2(src_type.width - 1, dst_width);

        double scale = (double)(1ULL << n);
        unsigned lshift = dst_width - n;
        unsigned rshift = n;
        LLVMValueRef lshifted;
        LLVMValueRef rshifted;

        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
        res = LLVMBuildFPToSI(builder, res, int_vec_type, "");

        /*
         * Align the most significant bit to its final place.
         *
         * This will cause 1.0 to overflow to 0, but the later adjustment will
         * get it right.
         */
        if (lshift) {
            lshifted = LLVMBuildShl(builder, res,
                                    lp_build_const_int_vec(gallivm, src_type,
                                            lshift), "");
        } else {
            lshifted = res;
        }

        /*
         * Align the most significant bit to the right.
         */
        rshifted =  LLVMBuildLShr(builder, res,
                                  lp_build_const_int_vec(gallivm, src_type, rshift),
                                  "");

        /*
         * Subtract the MSB to the LSB, therefore re-scaling from
         * (1 << dst_width) to ((1 << dst_width) - 1).
         */

        res = LLVMBuildSub(builder, lshifted, rshifted, "");
    }

    return res;
}
Example #3
0
/**
 * Generate code to compute texture level of detail (lambda).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 * \param lod_bias  optional float vector with the shader lod bias
 * \param explicit_lod  optional float vector with the explicit lod
 * \param width  scalar int texture width
 * \param height  scalar int texture height
 * \param depth  scalar int texture depth
 *
 * XXX: The resulting lod is scalar, so ignore all but the first element of
 * derivatives, lod_bias, etc that are passed by the shader.
 */
void
lp_build_lod_selector(struct lp_build_sample_context *bld,
                      unsigned unit,
                      const LLVMValueRef ddx[4],
                      const LLVMValueRef ddy[4],
                      LLVMValueRef lod_bias, /* optional */
                      LLVMValueRef explicit_lod, /* optional */
                      unsigned mip_filter,
                      LLVMValueRef *out_lod_ipart,
                      LLVMValueRef *out_lod_fpart)

{
   LLVMBuilderRef builder = bld->gallivm->builder;
   struct lp_build_context *float_bld = &bld->float_bld;
   LLVMValueRef lod;

   *out_lod_ipart = bld->int_bld.zero;
   *out_lod_fpart = bld->float_bld.zero;

   if (bld->static_state->min_max_lod_equal) {
      /* User is forcing sampling from a particular mipmap level.
       * This is hit during mipmap generation.
       */
      LLVMValueRef min_lod =
         bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);

      lod = min_lod;
   }
   else {
      LLVMValueRef sampler_lod_bias =
         bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit);
      LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0);

      if (explicit_lod) {
         lod = LLVMBuildExtractElement(builder, explicit_lod,
                                       index0, "");
      }
      else {
         LLVMValueRef rho;

         rho = lp_build_rho(bld, unit, ddx, ddy);

         /*
          * Compute lod = log2(rho)
          */

         if (!lod_bias &&
             !bld->static_state->lod_bias_non_zero &&
             !bld->static_state->apply_max_lod &&
             !bld->static_state->apply_min_lod) {
            /*
             * Special case when there are no post-log2 adjustments, which
             * saves instructions but keeping the integer and fractional lod
             * computations separate from the start.
             */

            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
               *out_lod_ipart = lp_build_ilog2(float_bld, rho);
               *out_lod_fpart = bld->float_bld.zero;
               return;
            }
            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
               lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
                                      out_lod_ipart, out_lod_fpart);
               return;
            }
         }

         if (0) {
            lod = lp_build_log2(float_bld, rho);
         }
         else {
            lod = lp_build_fast_log2(float_bld, rho);
         }

         /* add shader lod bias */
         if (lod_bias) {
            lod_bias = LLVMBuildExtractElement(builder, lod_bias,
                                               index0, "");
            lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
         }
      }

      /* add sampler lod bias */
      if (bld->static_state->lod_bias_non_zero)
         lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");


      /* clamp lod */
      if (bld->static_state->apply_max_lod) {
         LLVMValueRef max_lod =
            bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit);

         lod = lp_build_min(float_bld, lod, max_lod);
      }
      if (bld->static_state->apply_min_lod) {
         LLVMValueRef min_lod =
            bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);

         lod = lp_build_max(float_bld, lod, min_lod);
      }
   }

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
         lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
                                out_lod_ipart, out_lod_fpart);
      }
      else {
         lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
      }

      lp_build_name(*out_lod_fpart, "lod_fpart");
   }
   else {
      *out_lod_ipart = lp_build_iround(float_bld, lod);
   }

   lp_build_name(*out_lod_ipart, "lod_ipart");

   return;
}
Example #4
0
/**
 * Initialize the bld->a, dadq fields.  This involves fetching
 * those values from the arrays which are passed into the JIT function.
 */
static void
coeffs_init(struct lp_build_interp_soa_context *bld,
            LLVMValueRef a0_ptr,
            LLVMValueRef dadx_ptr,
            LLVMValueRef dady_ptr)
{
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   struct lp_build_context *setup_bld = &bld->setup_bld;
   struct gallivm_state *gallivm = coeff_bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef pixoffx, pixoffy;
   unsigned attrib;
   unsigned chan;
   unsigned i;

   pixoffx = coeff_bld->undef;
   pixoffy = coeff_bld->undef;
   for (i = 0; i < coeff_bld->type.length; i++) {
      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
   }


   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      LLVMValueRef index = lp_build_const_int32(gallivm,
                                attrib * TGSI_NUM_CHANNELS);
      LLVMValueRef ptr;
      LLVMValueRef dadxaos = setup_bld->zero;
      LLVMValueRef dadyaos = setup_bld->zero;
      LLVMValueRef a0aos = setup_bld->zero;

      /* always fetch all 4 values for performance/simplicity */
      switch (interp) {
      case LP_INTERP_PERSPECTIVE:
         /* fall-through */

      case LP_INTERP_LINEAR:
         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         dadxaos = LLVMBuildLoad(builder, ptr, "");

         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         dadyaos = LLVMBuildLoad(builder, ptr, "");

         attrib_name(dadxaos, attrib, 0, ".dadxaos");
         attrib_name(dadyaos, attrib, 0, ".dadyaos");
         /* fall-through */

      case LP_INTERP_CONSTANT:
      case LP_INTERP_FACING:
         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
         ptr = LLVMBuildBitCast(builder, ptr,
               LLVMPointerType(setup_bld->vec_type, 0), "");
         a0aos = LLVMBuildLoad(builder, ptr, "");
         attrib_name(a0aos, attrib, 0, ".a0aos");
         break;

      case LP_INTERP_POSITION:
         /* Nothing to do as the position coeffs are already setup in slot 0 */
         continue;

      default:
         assert(0);
         break;
      }

      /*
       * a = a0 + (x * dadx + y * dady)
       * a0aos is the attrib value at top left corner of stamp
       */
      if (interp != LP_INTERP_CONSTANT &&
          interp != LP_INTERP_FACING) {
         LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x);
         LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y);
         a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos);
         a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos);
      }

      /*
       * dadq = {0, dadx, dady, dadx + dady}
       * for two quads (side by side) this is:
       * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
       */
      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
         /* this generates a CRAPLOAD of shuffles... */
         if (mask & (1 << chan)) {
            LLVMValueRef dadx, dady;
            LLVMValueRef dadq, dadq2;
            LLVMValueRef a;
            LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);

            if (attrib == 0 && chan == 0) {
               a = bld->x;
               if (bld->pos_offset) {
                  a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
               }
               a = lp_build_broadcast_scalar(coeff_bld, a);
               dadx = coeff_bld->one;
               dady = coeff_bld->zero;
            }
            else if (attrib == 0 && chan == 1) {
               a = bld->y;
               if (bld->pos_offset) {
                  a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), "");
               }
               a = lp_build_broadcast_scalar(coeff_bld, a);
               dady = coeff_bld->one;
               dadx = coeff_bld->zero;
            }
            else {
               dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, dadxaos, chan_index);
               dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, dadyaos, chan_index);

               /*
                * a = {a, a, a, a}
                */
               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, a0aos, chan_index);
            }

            dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
            dady = LLVMBuildFMul(builder, dady, pixoffy, "");
            dadq = LLVMBuildFAdd(builder, dadx, dady, "");

            /*
             * Compute the attrib values on the upper-left corner of each
             * group of quads.
             * Note that if we process 2 quads at once this doesn't
             * really exactly to what we want.
             * We need to access elem 0 and 2 respectively later if we process
             * 2 quads at once.
             */

            if (interp != LP_INTERP_CONSTANT &&
                interp != LP_INTERP_FACING) {
               dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
               a = LLVMBuildFAdd(builder, a, dadq2, "");
	    }

#if PERSPECTIVE_DIVIDE_PER_QUAD
            /*
             * a *= 1 / w
             */

            /*
             * XXX since we're only going to access elements 0,2 out of 8
             * if we have 8-wide vectors we should do the division only 4-wide.
             * a is really a 2-elements in a 4-wide vector disguised as 8-wide
             * in this case.
             */
            if (interp == LP_INTERP_PERSPECTIVE) {
               LLVMValueRef w = bld->a[0][3];
               assert(attrib != 0);
               assert(bld->mask[0] & TGSI_WRITEMASK_W);
               if (!bld->oow) {
                  bld->oow = lp_build_rcp(coeff_bld, w);
                  lp_build_name(bld->oow, "oow");
               }
               a = lp_build_mul(coeff_bld, a, bld->oow);
            }
#endif

            attrib_name(a, attrib, chan, ".a");
            attrib_name(dadq, attrib, chan, ".dadq");

            bld->a[attrib][chan] = lp_build_alloca(gallivm,
                                                   LLVMTypeOf(a), "");
            LLVMBuildStore(builder, a, bld->a[attrib][chan]);
            bld->dadq[attrib][chan] = dadq;
         }
      }
   }
}
Example #5
0
/**
 * Interpolate the shader input attribute values.
 * This is called for each (group of) quad(s).
 */
static void
attribs_update_simple(struct lp_build_interp_soa_context *bld,
                      struct gallivm_state *gallivm,
                      LLVMValueRef loop_iter,
                      int start,
                      int end)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   struct lp_build_context *setup_bld = &bld->setup_bld;
   LLVMValueRef oow = NULL;
   unsigned attrib;
   LLVMValueRef pixoffx;
   LLVMValueRef pixoffy;
   LLVMValueRef ptr;

   /* could do this with code-generated passed in pixel offsets too */

   assert(loop_iter);
   ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
   pixoffx = LLVMBuildLoad(builder, ptr, "");
   ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
   pixoffy = LLVMBuildLoad(builder, ptr, "");

   pixoffx = LLVMBuildFAdd(builder, pixoffx,
                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
   pixoffy = LLVMBuildFAdd(builder, pixoffy,
                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");

   for (attrib = start; attrib < end; attrib++) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      unsigned chan;

      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
         if (mask & (1 << chan)) {
            LLVMValueRef index;
            LLVMValueRef dadx = coeff_bld->zero;
            LLVMValueRef dady = coeff_bld->zero;
            LLVMValueRef a = coeff_bld->zero;

            index = lp_build_const_int32(gallivm, chan);
            switch (interp) {
            case LP_INTERP_PERSPECTIVE:
               /* fall-through */

            case LP_INTERP_LINEAR:
               if (attrib == 0 && chan == 0) {
                  dadx = coeff_bld->one;
                  if (bld->pos_offset) {
                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
                  }
               }
               else if (attrib == 0 && chan == 1) {
                  dady = coeff_bld->one;
                  if (bld->pos_offset) {
                     a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
                  }
               }
               else {
                  dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                                    coeff_bld->type, bld->dadxaos[attrib],
                                                    index);
                  dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                                    coeff_bld->type, bld->dadyaos[attrib],
                                                    index);
                  a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                                 coeff_bld->type, bld->a0aos[attrib],
                                                 index);
               }
               /*
                * a = a0 + (x * dadx + y * dady)
                */
               a = lp_build_fmuladd(builder, dadx, pixoffx, a);
               a = lp_build_fmuladd(builder, dady, pixoffy, a);

               if (interp == LP_INTERP_PERSPECTIVE) {
                  if (oow == NULL) {
                     LLVMValueRef w = bld->attribs[0][3];
                     assert(attrib != 0);
                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
                     oow = lp_build_rcp(coeff_bld, w);
                  }
                  a = lp_build_mul(coeff_bld, a, oow);
               }
               break;

            case LP_INTERP_CONSTANT:
            case LP_INTERP_FACING:
               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
                                              coeff_bld->type, bld->a0aos[attrib],
                                              index);
               break;

            case LP_INTERP_POSITION:
               assert(attrib > 0);
               a = bld->attribs[0][chan];
               break;

            default:
               assert(0);
               break;
            }

            if ((attrib == 0) && (chan == 2)){
               /* FIXME: Depth values can exceed 1.0, due to the fact that
                * setup interpolation coefficients refer to (0,0) which causes
                * precision loss. So we must clamp to 1.0 here to avoid artifacts
                */
               a = lp_build_min(coeff_bld, a, coeff_bld->one);
            }
            bld->attribs[attrib][chan] = a;
         }
      }
   }
}
Example #6
0
/* Perform view culling and small primitive elimination and return true
 * if the primitive is accepted and initially_accepted == true. */
static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
			      LLVMValueRef pos[3][4],
			      LLVMValueRef initially_accepted,
			      struct ac_position_w_info *w,
			      LLVMValueRef vp_scale[2],
			      LLVMValueRef vp_translate[2],
			      LLVMValueRef small_prim_precision,
			      bool cull_view_xy,
			      bool cull_view_near_z,
			      bool cull_view_far_z,
			      bool cull_small_prims,
			      bool use_halfz_clip_space)
{
	LLVMBuilderRef builder = ctx->builder;

	if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
		return ctx->i1true;

	/* Skip the culling if the primitive has already been rejected or
	 * if any W is negative. The bounding box culling doesn't work when
	 * W is negative.
	 */
	LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
					 w->all_w_positive, "");
	LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
	LLVMBuildStore(builder, initially_accepted, accepted_var);

	ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
	{
		LLVMValueRef bbox_min[3], bbox_max[3];
		LLVMValueRef accepted = initially_accepted;

		/* Compute the primitive bounding box for easy culling. */
		for (unsigned chan = 0; chan < 3; chan++) {
			bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
			bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);

			bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
			bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
		}

		/* View culling. */
		if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
			for (unsigned chan = 0; chan < 3; chan++) {
				LLVMValueRef visible;

				if ((cull_view_xy && chan <= 1) ||
				    (cull_view_near_z && chan == 2)) {
					float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
					visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
								LLVMConstReal(ctx->f32, t), "");
					accepted = LLVMBuildAnd(builder, accepted, visible, "");
				}

				if ((cull_view_xy && chan <= 1) ||
				    (cull_view_far_z && chan == 2)) {
					visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
								ctx->f32_1, "");
					accepted = LLVMBuildAnd(builder, accepted, visible, "");
				}
			}
		}

		/* Small primitive elimination. */
		if (cull_small_prims) {
			/* Assuming a sample position at (0.5, 0.5), if we round
			 * the bounding box min/max extents and the results of
			 * the rounding are equal in either the X or Y direction,
			 * the bounding box does not intersect the sample.
			 *
			 * See these GDC slides for pictures:
			 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
			 */
			LLVMValueRef min, max, not_equal[2], visible;

			for (unsigned chan = 0; chan < 2; chan++) {
				/* Convert the position to screen-space coordinates. */
				min = ac_build_fmad(ctx, bbox_min[chan],
						    vp_scale[chan], vp_translate[chan]);
				max = ac_build_fmad(ctx, bbox_max[chan],
						    vp_scale[chan], vp_translate[chan]);
				/* Scale the bounding box according to the precision of
				 * the rasterizer and the number of MSAA samples. */
				min = LLVMBuildFSub(builder, min, small_prim_precision, "");
				max = LLVMBuildFAdd(builder, max, small_prim_precision, "");

				/* Determine if the bbox intersects the sample point.
				 * It also works for MSAA, but vp_scale, vp_translate,
				 * and small_prim_precision are computed differently.
				 */
				min = ac_build_round(ctx, min);
				max = ac_build_round(ctx, max);
				not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
			}
			visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
			accepted = LLVMBuildAnd(builder, accepted, visible, "");
		}

		LLVMBuildStore(builder, accepted, accepted_var);
	}
	ac_build_endif(ctx, 10000000);

	return LLVMBuildLoad(builder, accepted_var, "");
}
Example #7
0
void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
			    struct lp_build_emit_data *emit_data,
			    LLVMValueRef *coords_arg,
			    LLVMValueRef *derivs_arg)
{

	unsigned target = emit_data->inst->Texture.Texture;
	unsigned opcode = emit_data->inst->Instruction.Opcode;
	struct gallivm_state *gallivm = bld_base->base.gallivm;
	LLVMBuilderRef builder = gallivm->builder;
	LLVMValueRef coords[4];
	unsigned i;

	si_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);

	if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
		LLVMValueRef derivs[4];
		int axis;

		/* Convert cube derivatives to 2D derivatives. */
		for (axis = 0; axis < 2; axis++) {
			LLVMValueRef shifted_cube_coords[4], shifted_coords[4];

			/* Shift the cube coordinates by the derivatives to get
			 * the cube coordinates of the "neighboring pixel".
			 */
			for (i = 0; i < 3; i++)
				shifted_cube_coords[i] =
					LLVMBuildFAdd(builder, coords_arg[i],
						      derivs_arg[axis*3+i], "");
			shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);

			/* Project the shifted cube coordinates onto the face. */
			si_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
						      shifted_coords);

			/* Subtract both sets of 2D coordinates to get 2D derivatives.
			 * This won't work if the shifted coordinates ended up
			 * in a different face.
			 */
			for (i = 0; i < 2; i++)
				derivs[axis * 2 + i] =
					LLVMBuildFSub(builder, shifted_coords[i],
						      coords[i], "");
		}

		memcpy(derivs_arg, derivs, sizeof(derivs));
	}

	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
		/* coords_arg.w component - array_index for cube arrays */
		coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
						       coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
	}

	/* Preserve compare/lod/bias. Put it in coords.w. */
	if (opcode == TGSI_OPCODE_TEX2 ||
	    opcode == TGSI_OPCODE_TXB2 ||
	    opcode == TGSI_OPCODE_TXL2) {
		coords[3] = coords_arg[4];
	} else if (opcode == TGSI_OPCODE_TXB ||
		   opcode == TGSI_OPCODE_TXL ||
		   target == TGSI_TEXTURE_SHADOWCUBE) {
		coords[3] = coords_arg[3];
	}

	memcpy(coords_arg, coords, sizeof(coords));
}
/**
 * Special case for converting clamped IEEE-754 floats to unsigned norms.
 *
 * The mathematical voodoo below may seem excessive but it is actually
 * paramount we do it this way for several reasons. First, there is no single
 * precision FP to unsigned integer conversion Intel SSE instruction. Second,
 * secondly, even if there was, since the FP's mantissa takes only a fraction
 * of register bits the typically scale and cast approach would require double
 * precision for accurate results, and therefore half the throughput
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 *
 * Ex: src = { float, float, float, float }
 * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
 */
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
                                        struct lp_type src_type,
                                        unsigned dst_width,
                                        LLVMValueRef src)
{
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(src_type);
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(src_type.floating);

   mantissa = lp_mantissa(src_type);

   /* We cannot carry more bits than the mantissa */
   n = MIN2(mantissa, dst_width);

   /* This magic coefficients will make the desired result to appear in the
    * lowest significant bits of the mantissa.
    */
   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)mask/ubound;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = LLVMBuildFMul(builder, src, lp_build_const_vec(src_type, scale), "");
   res = LLVMBuildFAdd(builder, res, lp_build_const_vec(src_type, bias), "");
   res = LLVMBuildBitCast(builder, res, int_vec_type, "");

   if(dst_width > n) {
      int shift = dst_width - n;
      res = LLVMBuildShl(builder, res, lp_build_const_int_vec(src_type, shift), "");

      /* TODO: Fill in the empty lower bits for additional precision? */
      /* YES: this fixes progs/trivial/tri-z-eq.c.
       * Otherwise vertex Z=1.0 values get converted to something like
       * 0xfffffb00 and the test for equality with 0xffffffff fails.
       */
#if 0
      {
         LLVMValueRef msb;
         msb = LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, dst_width - 1), "");
         msb = LLVMBuildShl(builder, msb, lp_build_const_int_vec(src_type, shift), "");
         msb = LLVMBuildSub(builder, msb, lp_build_const_int_vec(src_type, 1), "");
         res = LLVMBuildOr(builder, res, msb, "");
      }
#elif 0
      while(shift > 0) {
         res = LLVMBuildOr(builder, res, LLVMBuildLShr(builder, res, lp_build_const_int_vec(src_type, n), ""), "");
         shift -= n;
         n *= 2;
      }
#endif
   }
   else
      res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(src_type, mask), "");

   return res;
}
Example #9
0
void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
		       bool is_deriv, bool is_array,
		       LLVMValueRef *coords_arg,
		       LLVMValueRef *derivs_arg)
{

	LLVMBuilderRef builder = ctx->builder;
	struct cube_selection_coords selcoords;
	LLVMValueRef coords[3];
	LLVMValueRef invma;

	build_cube_intrinsic(ctx, coords_arg, &selcoords);

	invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
			ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
	invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);

	for (int i = 0; i < 2; ++i)
		coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");

	coords[2] = selcoords.id;

	if (is_deriv && derivs_arg) {
		LLVMValueRef derivs[4];
		int axis;

		/* Convert cube derivatives to 2D derivatives. */
		for (axis = 0; axis < 2; axis++) {
			LLVMValueRef deriv_st[2];
			LLVMValueRef deriv_ma;

			/* Transform the derivative alongside the texture
			 * coordinate. Mathematically, the correct formula is
			 * as follows. Assume we're projecting onto the +Z face
			 * and denote by dx/dh the derivative of the (original)
			 * X texture coordinate with respect to horizontal
			 * window coordinates. The projection onto the +Z face
			 * plane is:
			 *
			 *   f(x,z) = x/z
			 *
			 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
			 *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
			 *
			 * This motivatives the implementation below.
			 *
			 * Whether this actually gives the expected results for
			 * apps that might feed in derivatives obtained via
			 * finite differences is anyone's guess. The OpenGL spec
			 * seems awfully quiet about how textureGrad for cube
			 * maps should be handled.
			 */
			build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
					  deriv_st, &deriv_ma);

			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");

			for (int i = 0; i < 2; ++i)
				derivs[axis * 2 + i] =
					LLVMBuildFSub(builder,
						LLVMBuildFMul(builder, deriv_st[i], invma, ""),
						LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
		}

		memcpy(derivs_arg, derivs, sizeof(derivs));
	}

	/* Shift the texture coordinate. This must be applied after the
	 * derivative calculation.
	 */
	for (int i = 0; i < 2; ++i)
		coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");

	if (is_array) {
		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
		/* coords_arg.w component - array_index for cube arrays */
		LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
		coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
	}

	memcpy(coords_arg, coords, sizeof(coords));
}