예제 #1
0
/*
 * Helper for building packed ddx/ddy vector for one coord (scalar per quad
 * values). The vector will look like this (8-wide):
 * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
 * This only requires one shuffle instead of two for more straightforward packing.
 */
LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
                                 LLVMValueRef a)
{
   struct gallivm_state *gallivm = bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef vec1, vec2;

   /* use aos swizzle helper */

   static const unsigned char swizzle1[] = { /* no-op swizzle */
      LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
      LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
   };
   static const unsigned char swizzle2[] = {
      LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
      LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
   };

   vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
   vec2 = lp_build_swizzle_aos(bld, a, swizzle2);

   if (bld->type.floating)
      return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
   else
      return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
}
예제 #2
0
/*
 * Helper for building packed ddx/ddy vector for one coord (scalar per quad
 * values). The vector will look like this (8-wide):
 * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
 * This only needs 2 (v)shufps.
 */
LLVMValueRef
lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
                                 LLVMValueRef a, LLVMValueRef b)
{
   struct gallivm_state *gallivm = bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
   LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
   LLVMValueRef vec1, vec2;
   unsigned length, num_quads, i;

   /* XXX: do hsub version */
   length = bld->type.length;
   num_quads = length / 4;
   for (i = 0; i < num_quads; i++) {
      unsigned s1 = 4 * i;
      unsigned s2 = 4 * i + length;
      shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
      shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
      shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
      shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
      shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
      shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
      shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
      shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
   }
   vec1 = LLVMBuildShuffleVector(builder, a, b,
                                 LLVMConstVector(shuffles1, length), "");
   vec2 = LLVMBuildShuffleVector(builder, a, b,
                                 LLVMConstVector(shuffles2, length), "");
   if (bld->type.floating)
      return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
   else
      return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
}
예제 #3
0
파일: lp_bld_quad.c 프로젝트: RAOF/mesa
/*
 * To be able to handle multiple quads at once in texture sampling and
 * do lod calculations per quad, it is necessary to get the per-quad
 * derivatives into the lp_build_rho function.
 * For 8-wide vectors the packed derivative values for 3 coords would
 * look like this, this scales to a arbitrary (multiple of 4) vector size:
 * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
 * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
 * The second vector will be unused for 1d and 2d textures.
 */
LLVMValueRef
lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
                                 LLVMValueRef a)
{
   struct gallivm_state *gallivm = bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef vec1, vec2;

   /* same packing as _twocoord, but can use aos swizzle helper */

   /*
    * XXX could make swizzle1 a noop swizzle by using right top/bottom
    * pair for ddy
    */
   static const unsigned char swizzle1[] = {
      LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
   };
   static const unsigned char swizzle2[] = {
      LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
   };

   vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
   vec2 = lp_build_swizzle_aos(bld, a, swizzle2);

   if (bld->type.floating)
      return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
   else
      return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
}
예제 #4
0
/* Perform front/back face culling and return true if the primitive is accepted. */
static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
				 LLVMValueRef pos[3][4],
				 struct ac_position_w_info *w,
				 bool cull_front,
				 bool cull_back,
				 bool cull_zero_area)
{
	LLVMBuilderRef builder = ctx->builder;

	if (cull_front && cull_back)
		return ctx->i1false;

	if (!cull_front && !cull_back && !cull_zero_area)
		return ctx->i1true;

	/* Front/back face culling. Also if the determinant == 0, the triangle
	 * area is 0.
	 */
	LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
	LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
	LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
	LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
	LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
	LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
	LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");

	/* Negative W negates the determinant. */
	det = LLVMBuildSelect(builder, w->w_reflection,
			      LLVMBuildFNeg(builder, det, ""),
			      det, "");

	LLVMValueRef accepted = NULL;
	if (cull_front) {
		LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
		accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
	} else if (cull_back) {
		LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
		accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
	} else if (cull_zero_area) {
		accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
	}
	return accepted;
}
예제 #5
0
/*
 * SI implements derivatives using the local data store (LDS)
 * All writes to the LDS happen in all executing threads at
 * the same time. TID is the Thread ID for the current
 * thread and is a value between 0 and 63, representing
 * the thread's position in the wavefront.
 *
 * For the pixel shader threads are grouped into quads of four pixels.
 * The TIDs of the pixels of a quad are:
 *
 *  +------+------+
 *  |4n + 0|4n + 1|
 *  +------+------+
 *  |4n + 2|4n + 3|
 *  +------+------+
 *
 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
 * the current pixel's column, and masking with 0xfffffffe yields the TID
 * of the left pixel of the current pixel's row.
 *
 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
 * adding 2 yields the TID of the pixel below the top pixel.
 */
LLVMValueRef
ac_build_ddxy(struct ac_llvm_context *ctx,
	      bool has_ds_bpermute,
	      uint32_t mask,
	      int idx,
	      LLVMValueRef lds,
	      LLVMValueRef val)
{
	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
	LLVMValueRef result;

	thread_id = ac_get_thread_id(ctx);

	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
			      LLVMConstInt(ctx->i32, mask, false), "");

	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
				LLVMConstInt(ctx->i32, idx, false), "");

	if (has_ds_bpermute) {
		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
				       LLVMConstInt(ctx->i32, 4, false), "");
		args[1] = val;
		tl = ac_build_intrinsic(ctx,
					"llvm.amdgcn.ds.bpermute", ctx->i32,
					args, 2,
					AC_FUNC_ATTR_READNONE |
					AC_FUNC_ATTR_CONVERGENT);

		args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
				       LLVMConstInt(ctx->i32, 4, false), "");
		trbl = ac_build_intrinsic(ctx,
					  "llvm.amdgcn.ds.bpermute", ctx->i32,
					  args, 2,
					  AC_FUNC_ATTR_READNONE |
					  AC_FUNC_ATTR_CONVERGENT);
	} else {
		LLVMValueRef store_ptr, load_ptr0, load_ptr1;

		store_ptr = ac_build_gep0(ctx, lds, thread_id);
		load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
		load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);

		LLVMBuildStore(ctx->builder, val, store_ptr);
		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
	}

	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
	trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
	result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
	return result;
}
예제 #6
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(dst_type.floating);

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), "");
   }

   bias_ = lp_build_const_vec(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildFSub(builder, res, bias_, "");
   res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");

   return res;
}
예제 #7
0
파일: TranslateExpr.c 프로젝트: YuKill/ftc
static LLVMValueRef
translateFloatBinOp(NodeKind Op, LLVMValueRef ValueE1, LLVMValueRef ValueE2) {
  switch (Op) {
    case SumOp:  return LLVMBuildFAdd(Builder, ValueE1, ValueE2, ""); 
    case SubOp:  return LLVMBuildFSub(Builder, ValueE1, ValueE2, ""); 
    case MultOp: return LLVMBuildFMul(Builder, ValueE1, ValueE2, ""); 
    case DivOp:  return LLVMBuildFDiv(Builder, ValueE1, ValueE2, ""); 
    case LtOp:   return LLVMBuildFCmp(Builder, LLVMRealOLT, ValueE1, ValueE2, ""); 
    case LeOp:   return LLVMBuildFCmp(Builder, LLVMRealOLE, ValueE1, ValueE2, ""); 
    case GtOp:   return LLVMBuildFCmp(Builder, LLVMRealOGT, ValueE1, ValueE2, ""); 
    case GeOp:   return LLVMBuildFCmp(Builder, LLVMRealOGE, ValueE1, ValueE2, ""); 
    case EqOp:   return LLVMBuildFCmp(Builder, LLVMRealOEQ, ValueE1, ValueE2, ""); 
    case DiffOp: return LLVMBuildFCmp(Builder, LLVMRealONE, ValueE1, ValueE2, ""); 
    default:     return NULL;
  }
}
예제 #8
0
static void emit_frac(const struct lp_build_tgsi_action *action,
		      struct lp_build_tgsi_context *bld_base,
		      struct lp_build_emit_data *emit_data)
{
	LLVMBuilderRef builder = bld_base->base.gallivm->builder;
	char *intr;

	if (emit_data->info->opcode == TGSI_OPCODE_FRC)
		intr = "llvm.floor.f32";
	else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
		intr = "llvm.floor.f64";
	else {
		assert(0);
		return;
	}

	LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
						&emit_data->args[0], 1,
						LLVMReadNoneAttribute);
	emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
			emit_data->args[0], floor, "");
}
예제 #9
0
파일: lp_bld_conv.c 프로젝트: RobinWuDev/Qt
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef vec_type = lp_build_vec_type(gallivm, dst_type);
    LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, dst_type);
    LLVMValueRef bias_;
    LLVMValueRef res;
    unsigned mantissa;
    unsigned n;
    unsigned long long ubound;
    unsigned long long mask;
    double scale;
    double bias;

    assert(dst_type.floating);

    mantissa = lp_mantissa(dst_type);

    if (src_width <= (mantissa + 1)) {
        /*
         * The source width matches fits what can be represented in floating
         * point (i.e., mantissa + 1 bits). So do a straight multiplication
         * followed by casting. No further rounding is necessary.
         */

        scale = 1.0/(double)((1ULL << src_width) - 1);
        res = LLVMBuildSIToFP(builder, src, vec_type, "");
        res = LLVMBuildFMul(builder, res,
                            lp_build_const_vec(gallivm, dst_type, scale), "");
        return res;
    }
    else {
        /*
         * The source width exceeds what can be represented in floating
         * point. So truncate the incoming values.
         */

        n = MIN2(mantissa, src_width);

        ubound = ((unsigned long long)1 << n);
        mask = ubound - 1;
        scale = (double)ubound/mask;
        bias = (double)((unsigned long long)1 << (mantissa - n));

        res = src;

        if (src_width > mantissa) {
            int shift = src_width - mantissa;
            res = LLVMBuildLShr(builder, res,
                                lp_build_const_int_vec(gallivm, dst_type, shift), "");
        }

        bias_ = lp_build_const_vec(gallivm, dst_type, bias);

        res = LLVMBuildOr(builder,
                          res,
                          LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

        res = LLVMBuildBitCast(builder, res, vec_type, "");

        res = LLVMBuildFSub(builder, res, bias_, "");
        res = LLVMBuildFMul(builder, res, lp_build_const_vec(gallivm, dst_type, scale), "");
    }

    return res;
}
예제 #10
0
/* Perform view culling and small primitive elimination and return true
 * if the primitive is accepted and initially_accepted == true. */
static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
			      LLVMValueRef pos[3][4],
			      LLVMValueRef initially_accepted,
			      struct ac_position_w_info *w,
			      LLVMValueRef vp_scale[2],
			      LLVMValueRef vp_translate[2],
			      LLVMValueRef small_prim_precision,
			      bool cull_view_xy,
			      bool cull_view_near_z,
			      bool cull_view_far_z,
			      bool cull_small_prims,
			      bool use_halfz_clip_space)
{
	LLVMBuilderRef builder = ctx->builder;

	if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
		return ctx->i1true;

	/* Skip the culling if the primitive has already been rejected or
	 * if any W is negative. The bounding box culling doesn't work when
	 * W is negative.
	 */
	LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
					 w->all_w_positive, "");
	LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
	LLVMBuildStore(builder, initially_accepted, accepted_var);

	ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
	{
		LLVMValueRef bbox_min[3], bbox_max[3];
		LLVMValueRef accepted = initially_accepted;

		/* Compute the primitive bounding box for easy culling. */
		for (unsigned chan = 0; chan < 3; chan++) {
			bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
			bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);

			bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
			bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
		}

		/* View culling. */
		if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
			for (unsigned chan = 0; chan < 3; chan++) {
				LLVMValueRef visible;

				if ((cull_view_xy && chan <= 1) ||
				    (cull_view_near_z && chan == 2)) {
					float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
					visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
								LLVMConstReal(ctx->f32, t), "");
					accepted = LLVMBuildAnd(builder, accepted, visible, "");
				}

				if ((cull_view_xy && chan <= 1) ||
				    (cull_view_far_z && chan == 2)) {
					visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
								ctx->f32_1, "");
					accepted = LLVMBuildAnd(builder, accepted, visible, "");
				}
			}
		}

		/* Small primitive elimination. */
		if (cull_small_prims) {
			/* Assuming a sample position at (0.5, 0.5), if we round
			 * the bounding box min/max extents and the results of
			 * the rounding are equal in either the X or Y direction,
			 * the bounding box does not intersect the sample.
			 *
			 * See these GDC slides for pictures:
			 * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
			 */
			LLVMValueRef min, max, not_equal[2], visible;

			for (unsigned chan = 0; chan < 2; chan++) {
				/* Convert the position to screen-space coordinates. */
				min = ac_build_fmad(ctx, bbox_min[chan],
						    vp_scale[chan], vp_translate[chan]);
				max = ac_build_fmad(ctx, bbox_max[chan],
						    vp_scale[chan], vp_translate[chan]);
				/* Scale the bounding box according to the precision of
				 * the rasterizer and the number of MSAA samples. */
				min = LLVMBuildFSub(builder, min, small_prim_precision, "");
				max = LLVMBuildFAdd(builder, max, small_prim_precision, "");

				/* Determine if the bbox intersects the sample point.
				 * It also works for MSAA, but vp_scale, vp_translate,
				 * and small_prim_precision are computed differently.
				 */
				min = ac_build_round(ctx, min);
				max = ac_build_round(ctx, max);
				not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
			}
			visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
			accepted = LLVMBuildAnd(builder, accepted, visible, "");
		}

		LLVMBuildStore(builder, accepted, accepted_var);
	}
	ac_build_endif(ctx, 10000000);

	return LLVMBuildLoad(builder, accepted_var, "");
}
예제 #11
0
void si_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
			    struct lp_build_emit_data *emit_data,
			    LLVMValueRef *coords_arg,
			    LLVMValueRef *derivs_arg)
{

	unsigned target = emit_data->inst->Texture.Texture;
	unsigned opcode = emit_data->inst->Instruction.Opcode;
	struct gallivm_state *gallivm = bld_base->base.gallivm;
	LLVMBuilderRef builder = gallivm->builder;
	LLVMValueRef coords[4];
	unsigned i;

	si_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);

	if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
		LLVMValueRef derivs[4];
		int axis;

		/* Convert cube derivatives to 2D derivatives. */
		for (axis = 0; axis < 2; axis++) {
			LLVMValueRef shifted_cube_coords[4], shifted_coords[4];

			/* Shift the cube coordinates by the derivatives to get
			 * the cube coordinates of the "neighboring pixel".
			 */
			for (i = 0; i < 3; i++)
				shifted_cube_coords[i] =
					LLVMBuildFAdd(builder, coords_arg[i],
						      derivs_arg[axis*3+i], "");
			shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);

			/* Project the shifted cube coordinates onto the face. */
			si_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
						      shifted_coords);

			/* Subtract both sets of 2D coordinates to get 2D derivatives.
			 * This won't work if the shifted coordinates ended up
			 * in a different face.
			 */
			for (i = 0; i < 2; i++)
				derivs[axis * 2 + i] =
					LLVMBuildFSub(builder, shifted_coords[i],
						      coords[i], "");
		}

		memcpy(derivs_arg, derivs, sizeof(derivs));
	}

	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
		/* coords_arg.w component - array_index for cube arrays */
		coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
						       coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
	}

	/* Preserve compare/lod/bias. Put it in coords.w. */
	if (opcode == TGSI_OPCODE_TEX2 ||
	    opcode == TGSI_OPCODE_TXB2 ||
	    opcode == TGSI_OPCODE_TXL2) {
		coords[3] = coords_arg[4];
	} else if (opcode == TGSI_OPCODE_TXB ||
		   opcode == TGSI_OPCODE_TXL ||
		   target == TGSI_TEXTURE_SHADOWCUBE) {
		coords[3] = coords_arg[3];
	}

	memcpy(coords_arg, coords, sizeof(coords));
}
예제 #12
0
void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
		       bool is_deriv, bool is_array,
		       LLVMValueRef *coords_arg,
		       LLVMValueRef *derivs_arg)
{

	LLVMBuilderRef builder = ctx->builder;
	struct cube_selection_coords selcoords;
	LLVMValueRef coords[3];
	LLVMValueRef invma;

	build_cube_intrinsic(ctx, coords_arg, &selcoords);

	invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
			ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
	invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);

	for (int i = 0; i < 2; ++i)
		coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");

	coords[2] = selcoords.id;

	if (is_deriv && derivs_arg) {
		LLVMValueRef derivs[4];
		int axis;

		/* Convert cube derivatives to 2D derivatives. */
		for (axis = 0; axis < 2; axis++) {
			LLVMValueRef deriv_st[2];
			LLVMValueRef deriv_ma;

			/* Transform the derivative alongside the texture
			 * coordinate. Mathematically, the correct formula is
			 * as follows. Assume we're projecting onto the +Z face
			 * and denote by dx/dh the derivative of the (original)
			 * X texture coordinate with respect to horizontal
			 * window coordinates. The projection onto the +Z face
			 * plane is:
			 *
			 *   f(x,z) = x/z
			 *
			 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
			 *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
			 *
			 * This motivatives the implementation below.
			 *
			 * Whether this actually gives the expected results for
			 * apps that might feed in derivatives obtained via
			 * finite differences is anyone's guess. The OpenGL spec
			 * seems awfully quiet about how textureGrad for cube
			 * maps should be handled.
			 */
			build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
					  deriv_st, &deriv_ma);

			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");

			for (int i = 0; i < 2; ++i)
				derivs[axis * 2 + i] =
					LLVMBuildFSub(builder,
						LLVMBuildFMul(builder, deriv_st[i], invma, ""),
						LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
		}

		memcpy(derivs_arg, derivs, sizeof(derivs));
	}

	/* Shift the texture coordinate. This must be applied after the
	 * derivative calculation.
	 */
	for (int i = 0; i < 2; ++i)
		coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");

	if (is_array) {
		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
		/* coords_arg.w component - array_index for cube arrays */
		LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
		coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
	}

	memcpy(coords_arg, coords, sizeof(coords));
}