Ejemplo n.º 1
0
/*
 * Helper for building packed ddx/ddy vector for one coord (scalar per quad
 * values). The vector will look like this (8-wide):
 * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
 * This only needs 2 (v)shufps.
 */
LLVMValueRef
lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
                                 LLVMValueRef a, LLVMValueRef b)
{
   struct gallivm_state *gallivm = bld->gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
   LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
   LLVMValueRef vec1, vec2;
   unsigned length, num_quads, i;

   /* XXX: do hsub version */
   length = bld->type.length;
   num_quads = length / 4;
   for (i = 0; i < num_quads; i++) {
      unsigned s1 = 4 * i;
      unsigned s2 = 4 * i + length;
      shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
      shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
      shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
      shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
      shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
      shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
      shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
      shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
   }
   vec1 = LLVMBuildShuffleVector(builder, a, b,
                                 LLVMConstVector(shuffles1, length), "");
   vec2 = LLVMBuildShuffleVector(builder, a, b,
                                 LLVMConstVector(shuffles2, length), "");
   if (bld->type.floating)
      return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
   else
      return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
}
Ejemplo n.º 2
0
LLVMValueRef
lp_build_broadcast(struct gallivm_state *gallivm,
                   LLVMTypeRef vec_type,
                   LLVMValueRef scalar)
{
   LLVMValueRef res;

   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
      /* scalar */
      assert(vec_type == LLVMTypeOf(scalar));
      res = scalar;
   } else {
      LLVMBuilderRef builder = gallivm->builder;
      const unsigned length = LLVMGetVectorSize(vec_type);
      LLVMValueRef undef = LLVMGetUndef(vec_type);
      /* The shuffle vector is always made of int32 elements */
      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);

      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));

      res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
      res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
   }

   return res;
}
Ejemplo n.º 3
0
/**
 * Swizzle a vector consisting of an array of XYZW structs.
 *
 * This fills a vector of dst_len length with the swizzled channels from src.
 *
 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
 *      RGBA RGBA = BGR BGR BG
 *
 * @param swizzles        the swizzle array
 * @param num_swizzles    the number of elements in swizzles
 * @param dst_len         the length of the result
 */
LLVMValueRef
lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
                       LLVMValueRef src,
                       const unsigned char* swizzles,
                       unsigned num_swizzles,
                       unsigned dst_len)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
   unsigned i;

   assert(dst_len < LP_MAX_VECTOR_WIDTH);

   for (i = 0; i < dst_len; ++i) {
      int swizzle = swizzles[i % num_swizzles];

      if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
         shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
      } else {
         shuffles[i] = lp_build_const_int32(gallivm, swizzle);
      }
   }

   return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
}
/**
 * Expands src vector from src.length to dst_length
 */
LLVMValueRef
lp_build_pad_vector(struct gallivm_state *gallivm,
                       LLVMValueRef src,
                       struct lp_type src_type,
                       unsigned dst_length)
{
   LLVMValueRef undef = LLVMGetUndef(lp_build_vec_type(gallivm, src_type));
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   assert(dst_length <= Elements(elems));
   assert(dst_length > src_type.length);

   if (src_type.length == dst_length)
      return src;

   /* If its a single scalar type, no need to reinvent the wheel */
   if (src_type.length == 1) {
      return lp_build_broadcast(gallivm, LLVMVectorType(lp_build_elem_type(gallivm, src_type), dst_length), src);
   }

   /* All elements from src vector */
   for (i = 0; i < src_type.length; ++i)
      elems[i] = lp_build_const_int32(gallivm, i);

   /* Undef fill remaining space */
   for (i = src_type.length; i < dst_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, src_type.length);

   /* Combine the two vectors */
   return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), "");
}
Ejemplo n.º 5
0
/**
 * Unpack and broadcast packed aos values consisting of only the
 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
 */
LLVMValueRef
lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
                                      struct lp_type src_type,
                                      struct lp_type dst_type,
                                      const LLVMValueRef src)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
   unsigned num_dst = dst_type.length;
   unsigned num_src = dst_type.length / 4;
   unsigned i;

   assert(num_dst / 4 <= src_type.length);

   for (i = 0; i < num_src; i++) {
      shuffles[i*4] = LLVMConstInt(i32t, i, 0);
      shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
      shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
      shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
   }

   if (num_src == 1) {
      return lp_build_extract_broadcast(gallivm, src_type, dst_type,
                                        src, shuffles[0]);
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(shuffles, num_dst), "");
   }
}
Ejemplo n.º 6
0
/**
 * Pack first element of aos values,
 * pad out to destination size.
 * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
 */
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
                          struct lp_type src_type,
                          struct lp_type dst_type,
                          const LLVMValueRef src)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef undef = LLVMGetUndef(i32t);
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
   unsigned num_src = src_type.length / 4;
   unsigned num_dst = dst_type.length;
   unsigned i;

   assert(num_src <= num_dst);

   for (i = 0; i < num_src; i++) {
      shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
   }
   for (i = num_src; i < num_dst; i++) {
      shuffles[i] = undef;
   }

   if (num_dst == 1) {
      return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(shuffles, num_dst), "");
   }
}
/**
 * Concatenates several (must be a power of 2) vectors (of same type)
 * into a larger one.
 * Most useful for building up a 256bit sized vector out of two 128bit ones.
 */
LLVMValueRef
lp_build_concat(struct gallivm_state *gallivm,
                LLVMValueRef src[],
                struct lp_type src_type,
                unsigned num_vectors)
{
   unsigned new_length, i;
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH/2];
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

   assert(src_type.length * num_vectors <= Elements(shuffles));
   assert(util_is_power_of_two(num_vectors));

   new_length = src_type.length;

   for (i = 0; i < num_vectors; i++)
      tmp[i] = src[i];

   while (num_vectors > 1) {
      num_vectors >>= 1;
      new_length <<= 1;
      for (i = 0; i < new_length; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, i);
      }
      for (i = 0; i < num_vectors; i++) {
         tmp[i] = LLVMBuildShuffleVector(gallivm->builder, tmp[i*2], tmp[i*2 + 1],
                                         LLVMConstVector(shuffles, new_length), "");
      }
   }

   return tmp[0];
}
Ejemplo n.º 8
0
/**
 * Interleave vector elements.
 *
 * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
 * (but not for 256bit AVX vectors).
 */
LLVMValueRef
lp_build_interleave2(struct gallivm_state *gallivm,
                     struct lp_type type,
                     LLVMValueRef a,
                     LLVMValueRef b,
                     unsigned lo_hi)
{
   LLVMValueRef shuffle;

   if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) {
      /*
       * XXX: This is a workaround for llvm code generation deficiency. Strangely
       * enough, while this needs vinsertf128/vextractf128 instructions (hence
       * a natural match when using 2x128bit vectors) the "normal" unpack shuffle
       * generates code ranging from atrocious (llvm 3.1) to terrible (llvm 3.2, 3.3).
       * So use some different shuffles instead (the exact shuffles don't seem to
       * matter, as long as not using 128bit wide vectors, works with 8x32 or 4x64).
       */
      struct lp_type tmp_type = type;
      LLVMValueRef srchalf[2], tmpdst;
      tmp_type.length = 4;
      tmp_type.width = 64;
      a = LLVMBuildBitCast(gallivm->builder, a, lp_build_vec_type(gallivm, tmp_type), "");
      b = LLVMBuildBitCast(gallivm->builder, b, lp_build_vec_type(gallivm, tmp_type), "");
      srchalf[0] = lp_build_extract_range(gallivm, a, lo_hi * 2, 2);
      srchalf[1] = lp_build_extract_range(gallivm, b, lo_hi * 2, 2);
      tmp_type.length = 2;
      tmpdst = lp_build_concat(gallivm, srchalf, tmp_type, 2);
      return LLVMBuildBitCast(gallivm->builder, tmpdst, lp_build_vec_type(gallivm, type), "");
   }

   shuffle = lp_build_const_unpack_shuffle(gallivm, type.length, lo_hi);

   return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, "");
}
Ejemplo n.º 9
0
/**
 * Combined extract and broadcast (mere shuffle in most cases)
 */
LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state *gallivm,
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef res;

   assert(src_type.floating == dst_type.floating);
   assert(src_type.width    == dst_type.width);

   assert(lp_check_value(src_type, vector));
   assert(LLVMTypeOf(index) == i32t);

   if (src_type.length == 1) {
      if (dst_type.length == 1) {
         /*
          * Trivial scalar -> scalar.
          */

         res = vector;
      }
      else {
         /*
          * Broadcast scalar -> vector.
          */

         res = lp_build_broadcast(gallivm,
                                  lp_build_vec_type(gallivm, dst_type),
                                  vector);
      }
   }
   else {
      if (dst_type.length > 1) {
         /*
          * shuffle - result can be of different length.
          */

         LLVMValueRef shuffle;
         shuffle = lp_build_broadcast(gallivm,
                                      LLVMVectorType(i32t, dst_type.length),
                                      index);
         res = LLVMBuildShuffleVector(gallivm->builder, vector,
                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
                                      shuffle, "");
      }
      else {
         /*
          * Trivial extract scalar from vector.
          */
          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
      }
   }

   return res;
}
Ejemplo n.º 10
0
/**
 * Return mask ? a : b;
 *
 * mask is a TGSI_WRITEMASK_xxx.
 */
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
                    unsigned mask,
                    LLVMValueRef a,
                    LLVMValueRef b,
                    unsigned num_channels)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   assert((mask & ~0xf) == 0);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(a == b)
      return a;
   if((mask & 0xf) == 0xf)
      return a;
   if((mask & 0xf) == 0x0)
      return b;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   /*
    * There are two major ways of accomplishing this:
    * - with a shuffle
    * - with a select
    *
    * The flip between these is empirical and might need to be adjusted.
    */
   if (n <= 4) {
      /*
       * Shuffle.
       */
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

      for(j = 0; j < n; j += num_channels)
         for(i = 0; i < num_channels; ++i)
            shuffles[j + i] = LLVMConstInt(elem_type,
                                           (mask & (1 << i) ? 0 : n) + j + i,
                                           0);

      return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
   }
   else {
      LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
      return lp_build_select(bld, mask_vec, a, b);
   }
}
Ejemplo n.º 11
0
/**
 * Interleave vector elements.
 *
 * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
 */
LLVMValueRef
lp_build_interleave2(struct gallivm_state *gallivm,
                     struct lp_type type,
                     LLVMValueRef a,
                     LLVMValueRef b,
                     unsigned lo_hi)
{
   LLVMValueRef shuffle;

   shuffle = lp_build_const_unpack_shuffle(gallivm, type.length, lo_hi);

   return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, "");
}
/**
 * Interleave vector elements but with 256 bit,
 * treats it as interleave with 2 concatenated 128 bit vectors.
 *
 * This differs to lp_build_interleave2 as that function would do the following (for lo):
 * a0 b0 a1 b1 a2 b2 a3 b3, and this does not compile into an AVX unpack instruction.
 *
 *
 * An example interleave 8x float with 8x float on AVX 256bit unpack:
 *   a0 a1 a2 a3 a4 a5 a6 a7 <-> b0 b1 b2 b3 b4 b5 b6 b7
 *
 * Equivalent to interleaving 2x 128 bit vectors
 *   a0 a1 a2 a3 <-> b0 b1 b2 b3 concatenated with a4 a5 a6 a7 <-> b4 b5 b6 b7
 *
 * So interleave-lo would result in:
 *   a0 b0 a1 b1 a4 b4 a5 b5
 *
 * And interleave-hi would result in:
 *   a2 b2 a3 b3 a6 b6 a7 b7
 */
LLVMValueRef
lp_build_interleave2_half(struct gallivm_state *gallivm,
                     struct lp_type type,
                     LLVMValueRef a,
                     LLVMValueRef b,
                     unsigned lo_hi)
{
   if (type.length * type.width == 256) {
      LLVMValueRef shuffle = lp_build_const_unpack_shuffle_half(gallivm, type.length, lo_hi);
      return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, "");
   } else {
      return lp_build_interleave2(gallivm, type, a, b, lo_hi);
   }
}
Ejemplo n.º 13
0
static LLVMValueRef emit_swizzle(
	struct lp_build_tgsi_context * bld_base,
        LLVMValueRef value,
	unsigned swizzle_x,
	unsigned swizzle_y,
	unsigned swizzle_z,
	unsigned swizzle_w)
{
	LLVMValueRef swizzles[4];
	LLVMTypeRef i32t =
		LLVMInt32TypeInContext(bld_base->base.gallivm->context);

	swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
	swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
	swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
	swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);

	return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
		value,
		LLVMGetUndef(LLVMTypeOf(value)),
		LLVMConstVector(swizzles, 4), "");
}
/**
 * Return a vector with elements src[start:start+size]
 * Most useful for getting half the values out of a 256bit sized vector,
 * otherwise may cause data rearrangement to happen.
 */
LLVMValueRef
lp_build_extract_range(struct gallivm_state *gallivm,
                       LLVMValueRef src,
                       unsigned start,
                       unsigned size)
{
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   assert(size <= Elements(elems));

   for (i = 0; i < size; ++i)
      elems[i] = lp_build_const_int32(gallivm, i + start);

   if (size == 1) {
      return LLVMBuildExtractElement(gallivm->builder, src, elems[0], "");
   }
   else {
      return LLVMBuildShuffleVector(gallivm->builder, src, src,
                                    LLVMConstVector(elems, size), "");
   }
}
static LLVMValueRef
llvm_load_input_vector(
	struct radeon_llvm_context * ctx, unsigned location, unsigned ijregs,
	boolean interp)
{
		LLVMTypeRef VecType;
		LLVMValueRef Args[3] = {
			lp_build_const_int32(&(ctx->gallivm), location)
		};
		unsigned ArgCount = 1;
		if (interp) {
			VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 2);
			LLVMValueRef IJIndex = LLVMGetParam(ctx->main_fn, ijregs / 2);
			Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
				lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2)), "");
			Args[ArgCount++] = LLVMBuildExtractElement(ctx->gallivm.builder, IJIndex,
				lp_build_const_int32(&(ctx->gallivm), 2 * (ijregs % 2) + 1), "");
			LLVMValueRef HalfVec[2] = {
				build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.xy",
					VecType, Args, ArgCount, LLVMReadNoneAttribute),
				build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.zw",
					VecType, Args, ArgCount, LLVMReadNoneAttribute)
			};
			LLVMValueRef MaskInputs[4] = {
				lp_build_const_int32(&(ctx->gallivm), 0),
				lp_build_const_int32(&(ctx->gallivm), 1),
				lp_build_const_int32(&(ctx->gallivm), 2),
				lp_build_const_int32(&(ctx->gallivm), 3)
			};
			LLVMValueRef Mask = LLVMConstVector(MaskInputs, 4);
			return LLVMBuildShuffleVector(ctx->gallivm.builder, HalfVec[0], HalfVec[1],
				Mask, "");
		} else {
			VecType = LLVMVectorType(ctx->soa.bld_base.base.elem_type, 4);
			return build_intrinsic(ctx->gallivm.builder, "llvm.R600.interp.const",
				VecType, Args, ArgCount, LLVMReadNoneAttribute);
		}
}
Ejemplo n.º 16
0
/**
 * Expands src vector from src.length to dst_length
 */
LLVMValueRef
lp_build_pad_vector(struct gallivm_state *gallivm,
                    LLVMValueRef src,
                    unsigned dst_length)
{
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   LLVMValueRef undef;
   LLVMTypeRef type;
   unsigned i, src_length;

   type = LLVMTypeOf(src);

   if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
      /* Can't use ShuffleVector on non-vector type */
      undef = LLVMGetUndef(LLVMVectorType(type, dst_length));
      return LLVMBuildInsertElement(gallivm->builder, undef, src, lp_build_const_int32(gallivm, 0), "");
   }

   undef      = LLVMGetUndef(type);
   src_length = LLVMGetVectorSize(type);

   assert(dst_length <= Elements(elems));
   assert(dst_length >= src_length);

   if (src_length == dst_length)
      return src;

   /* All elements from src vector */
   for (i = 0; i < src_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, i);

   /* Undef fill remaining space */
   for (i = src_length; i < dst_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, src_length);

   /* Combine the two vectors */
   return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), "");
}
Ejemplo n.º 17
0
LLVMValueRef
lp_build_broadcast(struct gallivm_state *gallivm,
                   LLVMTypeRef vec_type,
                   LLVMValueRef scalar)
{
   LLVMValueRef res;

   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
      /* scalar */
      assert(vec_type == LLVMTypeOf(scalar));
      res = scalar;
   } else {
      LLVMBuilderRef builder = gallivm->builder;
      const unsigned length = LLVMGetVectorSize(vec_type);
      LLVMValueRef undef = LLVMGetUndef(vec_type);
      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);

      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));

      if (HAVE_LLVM >= 0x207) {
         /* The shuffle vector is always made of int32 elements */
         LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
         res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
         res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
      } else {
         /* XXX: The above path provokes a bug in LLVM 2.6 */
         unsigned i;
         res = undef;
         for(i = 0; i < length; ++i) {
            LLVMValueRef index = lp_build_const_int32(gallivm, i);
            res = LLVMBuildInsertElement(builder, res, scalar, index, "");
         }
      }
   }

   return res;
}
Ejemplo n.º 18
0
/**
 * Perform the occlusion test and increase the counter.
 * Test the depth mask. Add the number of channel which has none zero mask
 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 * The counter will add 4.
 *
 * \param type holds element type of the mask vector.
 * \param maskvalue is the depth test mask.
 * \param counter is a pointer of the uint32 counter.
 */
static void
lp_build_occlusion_count(LLVMBuilderRef builder,
                         struct lp_type type,
                         LLVMValueRef maskvalue,
                         LLVMValueRef counter)
{
   LLVMValueRef countmask = lp_build_const_int_vec(type, 1);
   LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
   LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16);
   LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti");
   LLVMValueRef maskarray[4] = {
      LLVMConstInt(LLVMInt32Type(), 0, 0),
      LLVMConstInt(LLVMInt32Type(), 4, 0),
      LLVMConstInt(LLVMInt32Type(), 8, 0),
      LLVMConstInt(LLVMInt32Type(), 12, 0),
   };
   LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4);
   LLVMValueRef shufflev =  LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev");
   LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle");
   LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle);
   LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig");
   LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr");
   LLVMBuildStore(builder, incr, counter);
}
Ejemplo n.º 19
0
         break;
      }

      if (res) {
         res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
         return res;
      }
   }

   /* generic shuffle */
   lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
   hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");

   shuffle = lp_build_const_pack_shuffle(gallivm, dst_type.length);

   res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");

   return res;
}



/**
 * Non-interleaved pack and saturate.
 *
 * Same as lp_build_pack2 but will saturate values so that they fit into the
 * destination type.
 */
LLVMValueRef
lp_build_packs2(struct gallivm_state *gallivm,
                struct lp_type src_type,
/**
 * Non-interleaved pack.
 *
 * This will move values as
 *         (LSB)                     (MSB)
 *   lo =   l0 __ l1 __ l2 __..  __ ln __
 *   hi =   h0 __ h1 __ h2 __..  __ hn __
 *   res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
 *
 * This will only change the number of bits the values are represented, not the
 * values themselves.
 *
 * It is assumed the values are already clamped into the destination type range.
 * Values outside that range will produce undefined results. Use
 * lp_build_packs2 instead.
 */
LLVMValueRef
lp_build_pack2(struct gallivm_state *gallivm,
               struct lp_type src_type,
               struct lp_type dst_type,
               LLVMValueRef lo,
               LLVMValueRef hi)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, dst_type);
   LLVMValueRef shuffle;
   LLVMValueRef res = NULL;
   struct lp_type intr_type = dst_type;

#if HAVE_LLVM < 0x0207
   intr_type = src_type;
#endif

   assert(!src_type.floating);
   assert(!dst_type.floating);
   assert(src_type.width == dst_type.width * 2);
   assert(src_type.length * 2 == dst_type.length);

   /* Check for special cases first */
   if(util_cpu_caps.has_sse2 && src_type.width * src_type.length >= 128) {
      const char *intrinsic = NULL;

      switch(src_type.width) {
      case 32:
         if(dst_type.sign) {
            intrinsic = "llvm.x86.sse2.packssdw.128";
         }
         else {
            if (util_cpu_caps.has_sse4_1) {
               intrinsic = "llvm.x86.sse41.packusdw";
#if HAVE_LLVM < 0x0207
               /* llvm < 2.7 has inconsistent signatures except for packusdw */
               intr_type = dst_type;
#endif
            }
         }
         break;
      case 16:
         if (dst_type.sign) {
            intrinsic = "llvm.x86.sse2.packsswb.128";
         }
         else {
            intrinsic = "llvm.x86.sse2.packuswb.128";
         }
         break;
      /* default uses generic shuffle below */
      }
      if (intrinsic) {
         if (src_type.width * src_type.length == 128) {
            LLVMTypeRef intr_vec_type = lp_build_vec_type(gallivm, intr_type);
            res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, lo, hi);
            if (dst_vec_type != intr_vec_type) {
               res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
            }
         }
         else {
            int num_split = src_type.width * src_type.length / 128;
            int i;
            int nlen = 128 / src_type.width;
            struct lp_type ndst_type = lp_type_unorm(dst_type.width, 128);
            struct lp_type nintr_type = lp_type_unorm(intr_type.width, 128);
            LLVMValueRef tmpres[LP_MAX_VECTOR_WIDTH / 128];
            LLVMValueRef tmplo, tmphi;
            LLVMTypeRef ndst_vec_type = lp_build_vec_type(gallivm, ndst_type);
            LLVMTypeRef nintr_vec_type = lp_build_vec_type(gallivm, nintr_type);

            assert(num_split <= LP_MAX_VECTOR_WIDTH / 128);

            for (i = 0; i < num_split / 2; i++) {
               tmplo = lp_build_extract_range(gallivm,
                                              lo, i*nlen*2, nlen);
               tmphi = lp_build_extract_range(gallivm,
                                              lo, i*nlen*2 + nlen, nlen);
               tmpres[i] = lp_build_intrinsic_binary(builder, intrinsic,
                                                     nintr_vec_type, tmplo, tmphi);
               if (ndst_vec_type != nintr_vec_type) {
                  tmpres[i] = LLVMBuildBitCast(builder, tmpres[i], ndst_vec_type, "");
               }
            }
            for (i = 0; i < num_split / 2; i++) {
               tmplo = lp_build_extract_range(gallivm,
                                              hi, i*nlen*2, nlen);
               tmphi = lp_build_extract_range(gallivm,
                                              hi, i*nlen*2 + nlen, nlen);
               tmpres[i+num_split/2] = lp_build_intrinsic_binary(builder, intrinsic,
                                                                 nintr_vec_type,
                                                                 tmplo, tmphi);
               if (ndst_vec_type != nintr_vec_type) {
                  tmpres[i+num_split/2] = LLVMBuildBitCast(builder, tmpres[i+num_split/2],
                                                           ndst_vec_type, "");
               }
            }
            res = lp_build_concat(gallivm, tmpres, ndst_type, num_split);
         }
         return res;
      }
   }

   /* generic shuffle */
   lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
   hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");

   shuffle = lp_build_const_pack_shuffle(gallivm, dst_type.length);

   res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");

   return res;
}
Ejemplo n.º 21
0
LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context *bld,
                     LLVMValueRef a,
                     const unsigned char swizzles[4])
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   if (swizzles[0] == PIPE_SWIZZLE_X &&
       swizzles[1] == PIPE_SWIZZLE_Y &&
       swizzles[2] == PIPE_SWIZZLE_Z &&
       swizzles[3] == PIPE_SWIZZLE_W) {
      return a;
   }

   if (swizzles[0] == swizzles[1] &&
       swizzles[1] == swizzles[2] &&
       swizzles[2] == swizzles[3]) {
      switch (swizzles[0]) {
      case PIPE_SWIZZLE_X:
      case PIPE_SWIZZLE_Y:
      case PIPE_SWIZZLE_Z:
      case PIPE_SWIZZLE_W:
         return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
      case PIPE_SWIZZLE_0:
         return bld->zero;
      case PIPE_SWIZZLE_1:
         return bld->one;
      case LP_BLD_SWIZZLE_DONTCARE:
         return bld->undef;
      default:
         assert(0);
         return bld->undef;
      }
   }

   if (LLVMIsConstant(a) ||
       type.width >= 16) {
      /*
       * Shuffle.
       */
      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
      LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];

      memset(aux, 0, sizeof aux);

      for(j = 0; j < n; j += 4) {
         for(i = 0; i < 4; ++i) {
            unsigned shuffle;
            switch (swizzles[i]) {
            default:
               assert(0);
               /* fall through */
            case PIPE_SWIZZLE_X:
            case PIPE_SWIZZLE_Y:
            case PIPE_SWIZZLE_Z:
            case PIPE_SWIZZLE_W:
               shuffle = j + swizzles[i];
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
               break;
            case PIPE_SWIZZLE_0:
               shuffle = type.length + 0;
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
               if (!aux[0]) {
                  aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
               }
               break;
            case PIPE_SWIZZLE_1:
               shuffle = type.length + 1;
               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
               if (!aux[1]) {
                  aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
               }
               break;
            case LP_BLD_SWIZZLE_DONTCARE:
               shuffles[j + i] = LLVMGetUndef(i32t);
               break;
            }
         }
      }

      for (i = 0; i < n; ++i) {
         if (!aux[i]) {
            aux[i] = undef;
         }
      }

      return LLVMBuildShuffleVector(builder, a,
                                    LLVMConstVector(aux, n),
                                    LLVMConstVector(shuffles, n), "");
   } else {
      /*
       * Bit mask and shifts.
       *
       * For example, this will convert BGRA to RGBA by doing
       *
       * Little endian:
       *   rgba = (bgra & 0x00ff0000) >> 16
       *        | (bgra & 0xff00ff00)
       *        | (bgra & 0x000000ff) << 16
       *
       * Big endian:A
       *   rgba = (bgra & 0x0000ff00) << 16
       *        | (bgra & 0x00ff00ff)
       *        | (bgra & 0xff000000) >> 16
       *
       * This is necessary not only for faster cause, but because X86 backend
       * will refuse shuffles of <4 x i8> vectors
       */
      LLVMValueRef res;
      struct lp_type type4;
      unsigned cond = 0;
      unsigned chan;
      int shift;

      /*
       * Start with a mixture of 1 and 0.
       */
      for (chan = 0; chan < 4; ++chan) {
         if (swizzles[chan] == PIPE_SWIZZLE_1) {
            cond |= 1 << chan;
         }
      }
      res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);

      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */
      type4 = type;
      type4.floating = FALSE;
      type4.width *= 4;
      type4.length /= 4;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");

      /*
       * Mask and shift the channels, trying to group as many channels in the
       * same shift as possible.  The shift amount is positive for shifts left
       * and negative for shifts right.
       */
      for (shift = -3; shift <= 3; ++shift) {
         uint64_t mask = 0;

         assert(type4.width <= sizeof(mask)*8);

         /*
          * Vector element numbers follow the XYZW order, so 0 is always X, etc.
          * After widening 4 times we have:
          *
          *                                3210
          * Little-endian register layout: WZYX
          *
          *                                0123
          * Big-endian register layout:    XYZW
          *
          * For little-endian, higher-numbered channels are obtained by a shift right
          * (negative shift amount) and lower-numbered channels by a shift left
          * (positive shift amount).  The opposite is true for big-endian.
          */
         for (chan = 0; chan < 4; ++chan) {
            if (swizzles[chan] < 4) {
               /* We need to move channel swizzles[chan] into channel chan */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
               if (swizzles[chan] - chan == -shift) {
                  mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
               }
#else
               if (swizzles[chan] - chan == shift) {
                  mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
               }
#endif
            }
         }

         if (mask) {
            LLVMValueRef masked;
            LLVMValueRef shifted;
            if (0)
               debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);

            masked = LLVMBuildAnd(builder, a,
                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
            if (shift > 0) {
               shifted = LLVMBuildShl(builder, masked,
                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
            } else if (shift < 0) {
               shifted = LLVMBuildLShr(builder, masked,
                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
            } else {
               shifted = masked;
            }

            res = LLVMBuildOr(builder, res, shifted, "");
         }
      }

      return LLVMBuildBitCast(builder, res,
                              lp_build_vec_type(bld->gallivm, type), "");
   }
Ejemplo n.º 22
0
/**
 * Perform the occlusion test and increase the counter.
 * Test the depth mask. Add the number of channel which has none zero mask
 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 * The counter will add 4.
 *
 * \param type holds element type of the mask vector.
 * \param maskvalue is the depth test mask.
 * \param counter is a pointer of the uint32 counter.
 */
void
lp_build_occlusion_count(struct gallivm_state *gallivm,
                         struct lp_type type,
                         LLVMValueRef maskvalue,
                         LLVMValueRef counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMContextRef context = gallivm->context;
   LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
   LLVMValueRef count, newcount;

   assert(type.length <= 16);
   assert(type.floating);

   if(util_cpu_caps.has_sse && type.length == 4) {
      const char *movmskintr = "llvm.x86.sse.movmsk.ps";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else if(util_cpu_caps.has_avx && type.length == 8) {
      const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else {
      unsigned i;
      LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
      LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
      LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
      LLVMValueRef shufflev, countd;
      LLVMValueRef shuffles[16];
      const char *popcntintr = NULL;

      countv = LLVMBuildBitCast(builder, countv, i8vntype, "");

       for (i = 0; i < type.length; i++) {
          shuffles[i] = lp_build_const_int32(gallivm, 4*i);
       }

       shufflev = LLVMConstVector(shuffles, type.length);
       countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
       countd = LLVMBuildBitCast(builder, countd, counttype, "countd");

       /*
        * XXX FIXME
        * this is bad on cpus without popcount (on x86 supported by intel
        * nehalem, amd barcelona, and up - not tied to sse42).
        * Would be much faster to just sum the 4 elements of the vector with
        * some horizontal add (shuffle/add/shuffle/add after the initial and).
        */
       switch (type.length) {
       case 4:
          popcntintr = "llvm.ctpop.i32";
          break;
       case 8:
          popcntintr = "llvm.ctpop.i64";
          break;
       case 16:
          popcntintr = "llvm.ctpop.i128";
          break;
       default:
          assert(0);
       }
       count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);

       if (type.length > 4) {
          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
       }
   }
   newcount = LLVMBuildLoad(builder, counter, "origcount");
   newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
   LLVMBuildStore(builder, newcount, counter);
}
Ejemplo n.º 23
0
/**
 * Store depth/stencil values.
 * Incoming values are swizzled (typically n 2x2 quads), stored linear.
 * If there's a mask it will do select/store otherwise just store.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param z_fb  z values read from fb (with padding)
 * \param s_fb  s values read from fb (with padding)
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_value the depth values to store (with padding)
 * \param s_value the stencil values to store (with padding)
 */
void
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      struct lp_build_mask_context *mask,
                                      LLVMValueRef z_fb,
                                      LLVMValueRef s_fb,
                                      LLVMValueRef loop_counter,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
                                      LLVMValueRef z_value,
                                      LLVMValueRef s_value)
{
   struct lp_build_context z_bld;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef mask_value = NULL;
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type z_type = zs_type;
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   z_type.width = z_src_type.width;

   lp_build_context_init(&z_bld, gallivm, z_type);

   /*
    * This is far from ideal, at least for late depth write we should do this
    * outside the fs loop to avoid all the swizzle stuff.
    */
   if (z_src_type.length == 4) {
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
   zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");

   if (format_desc->block.bits > 32) {
      s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
   }

   if (mask) {
      mask_value = lp_build_mask_value(mask);
      z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
      if (format_desc->block.bits > 32) {
         s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
         s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
      }
   }

   if (zs_type.width < z_src_type.width) {
      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
      z_value = LLVMBuildTrunc(builder, z_value,
                               lp_build_int_vec_type(gallivm, zs_type), "");
   }

   if (format_desc->block.bits <= 32) {
      if (z_src_type.length == 4) {
         zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
         zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
      }
      else {
         assert(z_src_type.length == 8);
         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
                                          LLVMConstVector(&shuffles[0],
                                                          zs_load_type.length), "");
         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
                                          LLVMConstVector(&shuffles[4],
                                                          zs_load_type.length), "");
      }
   }
   else {
      if (z_src_type.length == 4) {
         zs_dst1 = lp_build_interleave2(gallivm, z_type,
                                        z_value, s_value, 0);
         zs_dst2 = lp_build_interleave2(gallivm, z_type,
                                        z_value, s_value, 1);
      }
      else {
         unsigned i;
         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
         assert(z_src_type.length == 8);
         for (i = 0; i < 8; i++) {
            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
                                                   z_src_type.length);
         }
         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
                                          LLVMConstVector(&shuffles[0],
                                                          z_src_type.length), "");
         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
                                          LLVMConstVector(&shuffles[8],
                                                          z_src_type.length), "");
      }
      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
                                 lp_build_vec_type(gallivm, zs_load_type), "");
      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
                                 lp_build_vec_type(gallivm, zs_load_type), "");
   }

   LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
}
static void llvm_emit_tex(
	const struct lp_build_tgsi_action * action,
	struct lp_build_tgsi_context * bld_base,
	struct lp_build_emit_data * emit_data)
{
	struct gallivm_state * gallivm = bld_base->base.gallivm;
	LLVMValueRef args[7];
	unsigned c, sampler_src;
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);

	if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
		switch (emit_data->inst->Instruction.Opcode) {
		case TGSI_OPCODE_TXQ: {
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->uses_tex_buffers = true;
			bool isEgPlus = (ctx->chip_class >= EVERGREEN);
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm,
				isEgPlus ? 0 : 1);
			LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset,
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			if (!isEgPlus) {
				LLVMValueRef maskval[4] = {
					lp_build_const_int32(gallivm, 1),
					lp_build_const_int32(gallivm, 2),
					lp_build_const_int32(gallivm, 3),
					lp_build_const_int32(gallivm, 0),
				};
				LLVMValueRef mask = LLVMConstVector(maskval, 4);
				cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval,
					mask, "");
			}
			emit_data->output[0] = cvecval;
			return;
		}
		case TGSI_OPCODE_TXF: {
			args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), "");
			args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS);
			emit_data->output[0] = build_intrinsic(gallivm->builder,
							"llvm.R600.load.texbuf",
							emit_data->dst_type, args, 2, LLVMReadNoneAttribute);
			if (ctx->chip_class >= EVERGREEN)
				return;
			ctx->uses_tex_buffers = true;
			LLVMDumpValue(emit_data->output[0]);
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4),
				"");
			LLVMValueRef Mask = llvm_load_const_buffer(bld_base,
				lp_build_const_int32(gallivm, 0),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				LLVMVectorType(bld_base->base.int_elem_type, 4), "");
			emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND,
				emit_data->output[0],
				Mask);
			LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder,
				emit_data->output[0], lp_build_const_int32(gallivm, 3), "");
			Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1),
				LLVM_R600_BUFFER_INFO_CONST_BUFFER);
			Mask = LLVMBuildExtractElement(gallivm->builder, Mask,
				lp_build_const_int32(gallivm, 0), "");
			Mask = LLVMBuildBitCast(gallivm->builder, Mask,
				bld_base->base.int_elem_type, "");
			WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR,
				WComponent, Mask);
			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder,
				emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), "");
			emit_data->output[0] = LLVMBuildBitCast(gallivm->builder,
				emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), "");
		}
			return;
		default:
			break;
		}
	}

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX ||
		emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
		LLVMValueRef Vector[4] = {
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 0), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 1), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 2), ""),
			LLVMBuildExtractElement(gallivm->builder, emit_data->args[0],
				lp_build_const_int32(gallivm, 3), ""),
		};
		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D:
		case TGSI_TEXTURE_RECT:
			Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		case TGSI_TEXTURE_1D:
			Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type);
			break;
		default:
			break;
		}
		args[0] = lp_build_gather_values(gallivm, Vector, 4);
	} else {
		args[0] = emit_data->args[0];
	}

	assert(emit_data->arg_count + 2 <= Elements(args));

	for (c = 1; c < emit_data->arg_count; ++c)
		args[c] = emit_data->args[c];

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
		args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), "");
		args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), "");
		args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), "");
	}

	sampler_src = emit_data->inst->Instruction.NumSrcRegs-1;

	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Src[sampler_src].Register.Index);
	args[c++] = lp_build_const_int32(gallivm,
					emit_data->inst->Texture.Texture);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
		(emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) {

		switch (emit_data->inst->Texture.Texture) {
		case TGSI_TEXTURE_2D_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D);
			break;
		case TGSI_TEXTURE_2D_ARRAY_MSAA:
			args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY);
			break;
		default:
			break;
		}

		if (ctx->has_compressed_msaa_texturing) {
			LLVMValueRef ldptr_args[10] = {
				args[0], // Coord
				args[1], // Offset X
				args[2], // Offset Y
				args[3], // Offset Z
				args[4],
				args[5],
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1),
				lp_build_const_int32(gallivm, 1)
			};
			LLVMValueRef ptr = build_intrinsic(gallivm->builder,
				"llvm.R600.ldptr",
				emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute);
			LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0],
				lp_build_const_int32(gallivm, 3), "");
			Tmp = LLVMBuildMul(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 4), "");
			LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr,
				lp_build_const_int32(gallivm, 0), "");
			ResX = LLVMBuildBitCast(gallivm->builder, ResX,
				bld_base->base.int_elem_type, "");
			Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, "");
			Tmp = LLVMBuildAnd(gallivm->builder, Tmp,
				lp_build_const_int32(gallivm, 0xF), "");
			args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp,
				lp_build_const_int32(gallivm, 3), "");
			args[c++] = lp_build_const_int32(gallivm,
				emit_data->inst->Texture.Texture);
		}
	}

	emit_data->output[0] = build_intrinsic(gallivm->builder,
					action->intr_name,
					emit_data->dst_type, args, c, LLVMReadNoneAttribute);

	if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
		((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
		emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)))
		if (emit_data->inst->Dst[0].Register.WriteMask & 4) {
			LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0);
			LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder,
				llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER),
				lp_build_const_int32(gallivm, 0), "");

			emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), "");
			struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
			ctx->has_txq_cube_array_z_comp = true;
		}
}
Ejemplo n.º 25
0
/**
 * Increment the shader input attribute values.
 * This is called when we move from one quad to the next.
 */
static void
attribs_update(struct lp_build_interp_soa_context *bld,
               struct gallivm_state *gallivm,
               LLVMValueRef loop_iter,
               int start,
               int end)
{
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context *coeff_bld = &bld->coeff_bld;
   LLVMValueRef oow = NULL;
   unsigned attrib;
   unsigned chan;

   for(attrib = start; attrib < end; ++attrib) {
      const unsigned mask = bld->mask[attrib];
      const unsigned interp = bld->interp[attrib];
      for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
         if(mask & (1 << chan)) {
            LLVMValueRef a;
            if (interp == LP_INTERP_CONSTANT ||
                interp == LP_INTERP_FACING) {
               a = LLVMBuildLoad(builder, bld->a[attrib][chan], "");
            }
            else if (interp == LP_INTERP_POSITION) {
               assert(attrib > 0);
               a = bld->attribs[0][chan];
            }
            else {
               LLVMValueRef dadq;

               a = bld->a[attrib][chan];

               /*
                * Broadcast the attribute value for this quad into all elements
                */

               {
                  /* stored as vector load as float */
                  LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext(
                                                            gallivm->context), 0);
                  LLVMValueRef ptr;
                  a = LLVMBuildBitCast(builder, a, ptr_type, "");
                  ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, "");
                  a = LLVMBuildLoad(builder, ptr, "");
                  a = lp_build_broadcast_scalar(&bld->coeff_bld, a);
               }

               /*
                * Get the derivatives.
                */

               dadq = bld->dadq[attrib][chan];

#if PERSPECTIVE_DIVIDE_PER_QUAD
               if (interp == LP_INTERP_PERSPECTIVE) {
                  LLVMValueRef dwdq = bld->dadq[0][3];

                  if (oow == NULL) {
                     assert(bld->oow);
                     oow = LLVMBuildShuffleVector(coeff_bld->builder,
                                                  bld->oow, coeff_bld->undef,
                                                  shuffle, "");
                  }

                  dadq = lp_build_sub(coeff_bld,
                                      dadq,
                                      lp_build_mul(coeff_bld, a, dwdq));
                  dadq = lp_build_mul(coeff_bld, dadq, oow);
               }
#endif

               /*
                * Add the derivatives
                */

               a = lp_build_add(coeff_bld, a, dadq);

#if !PERSPECTIVE_DIVIDE_PER_QUAD
               if (interp == LP_INTERP_PERSPECTIVE) {
                  if (oow == NULL) {
                     LLVMValueRef w = bld->attribs[0][3];
                     assert(attrib != 0);
                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
                     oow = lp_build_rcp(coeff_bld, w);
                  }
                  a = lp_build_mul(coeff_bld, a, oow);
               }
#endif

               if (attrib == 0 && chan == 2) {
                  /* FIXME: Depth values can exceed 1.0, due to the fact that
                   * setup interpolation coefficients refer to (0,0) which causes
                   * precision loss. So we must clamp to 1.0 here to avoid artifacts
                   */
                  a = lp_build_min(coeff_bld, a, coeff_bld->one);
               }

               attrib_name(a, attrib, chan, "");
            }
            bld->attribs[attrib][chan] = a;
         }
      }
   }
}
static LLVMValueRef
emit_fetch_constant(
   struct lp_build_tgsi_context * bld_base,
   const struct tgsi_full_src_register * reg,
   enum tgsi_opcode_type stype,
   unsigned swizzle)
{
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   struct lp_type type = bld_base->base.type;
   LLVMValueRef res;
   unsigned chan;

   assert(!reg->Register.Indirect);

   /*
    * Get the constants components
    */

   res = bld->bld_base.base.undef;
   for (chan = 0; chan < 4; ++chan) {
      LLVMValueRef index;
      LLVMValueRef scalar_ptr;
      LLVMValueRef scalar;
      LLVMValueRef swizzle;

      index = lp_build_const_int32(bld->bld_base.base.gallivm,
                                   reg->Register.Index * 4 + chan);

      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");

      scalar = LLVMBuildLoad(builder, scalar_ptr, "");

      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);

      /*
       * NOTE: constants array is always assumed to be RGBA
       */

      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
                                     bld->swizzles[chan]);

      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
   }

   /*
    * Broadcast the first quaternion to all others.
    *
    * XXX: could be factored into a reusable function.
    */

   if (type.length > 4) {
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
      unsigned i;

      for (chan = 0; chan < 4; ++chan) {
         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
      }

      for (i = 4; i < type.length; ++i) {
         shuffles[i] = shuffles[i % 4];
      }

      res = LLVMBuildShuffleVector(builder,
                                   res, bld->bld_base.base.undef,
                                   LLVMConstVector(shuffles, type.length),
                                   "");
   }
   return res;
}
Ejemplo n.º 27
0
/**
 * Register fetch.
 */
static LLVMValueRef
emit_fetch(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned src_op)
{
   struct lp_type type = bld->base.type;
   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
   LLVMValueRef res;
   unsigned chan;

   assert(!reg->Register.Indirect);

   /*
    * Fetch the from the register file.
    */

   switch (reg->Register.File) {
   case TGSI_FILE_CONSTANT:
      /*
       * Get the constants components
       */

      res = bld->base.undef;
      for (chan = 0; chan < 4; ++chan) {
         LLVMValueRef index;
         LLVMValueRef scalar_ptr;
         LLVMValueRef scalar;
         LLVMValueRef swizzle;

         index = LLVMConstInt(LLVMInt32Type(),
                              reg->Register.Index*4 + chan,
                              0);

         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
                                   &index, 1, "");

         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");

         lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);

         /*
          * NOTE: constants array is always assumed to be RGBA
          */

         swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);

         res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
      }

      /*
       * Broadcast the first quaternion to all others.
       *
       * XXX: could be factored into a reusable function.
       */

      if (type.length > 4) {
         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
         unsigned i;

         for (chan = 0; chan < 4; ++chan) {
            shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
         }

         for (i = 4; i < type.length; ++i) {
            shuffles[i] = shuffles[i % 4];
         }

         res = LLVMBuildShuffleVector(bld->base.builder,
                                      res, bld->base.undef,
                                      LLVMConstVector(shuffles, type.length),
                                      "");
      }
      break;

   case TGSI_FILE_IMMEDIATE:
      res = bld->immediates[reg->Register.Index];
      assert(res);
      break;

   case TGSI_FILE_INPUT:
      res = bld->inputs[reg->Register.Index];
      assert(res);
      break;

   case TGSI_FILE_TEMPORARY:
      {
         LLVMValueRef temp_ptr;
         temp_ptr = bld->temps[reg->Register.Index];
         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
         if (!res)
            return bld->base.undef;
      }
      break;

   default:
      assert(0 && "invalid src register in emit_fetch()");
      return bld->base.undef;
   }

   /*
    * Apply sign modifier.
    */

   if (reg->Register.Absolute) {
      res = lp_build_abs(&bld->base, res);
   }

   if(reg->Register.Negate) {
      res = lp_build_negate(&bld->base, res);
   }

   /*
    * Swizzle the argument
    */

   res = swizzle_aos(bld, res,
                     reg->Register.SwizzleX,
                     reg->Register.SwizzleY,
                     reg->Register.SwizzleZ,
                     reg->Register.SwizzleW);

   return res;
}
Ejemplo n.º 28
0
/**
 * Unpack a single pixel into its RGBA components.
 *
 * @param desc  the pixel format for the packed pixel value
 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 *
 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 */
static INLINE LLVMValueRef
lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
                               const struct util_format_description *desc,
                               LLVMValueRef packed)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shifted, casted, scaled, masked;
   LLVMValueRef shifts[4];
   LLVMValueRef masks[4];
   LLVMValueRef scales[4];

   boolean normalized;
   boolean needs_uitofp;
   unsigned shift;
   unsigned i;

   /* TODO: Support more formats */
   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(desc->block.width == 1);
   assert(desc->block.height == 1);
   assert(desc->block.bits <= 32);

   /* Do the intermediate integer computations with 32bit integers since it
    * matches floating point size */
   assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));

   /* Broadcast the packed value to all four channels
    * before: packed = BGRA
    * after: packed = {BGRA, BGRA, BGRA, BGRA}
    */
   packed = LLVMBuildInsertElement(builder,
                                   LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   packed,
                                   LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)),
                                   "");
   packed = LLVMBuildShuffleVector(builder,
                                   packed,
                                   LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)),
                                   "");

   /* Initialize vector constants */
   normalized = FALSE;
   needs_uitofp = FALSE;
   shift = 0;

   /* Loop over 4 color components */
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
         masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
         scales[i] =  LLVMConstNull(LLVMFloatTypeInContext(gallivm->context));
      }
      else {
         unsigned long long mask = (1ULL << bits) - 1;

         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);

         if (bits == 32) {
            needs_uitofp = TRUE;
         }

         shifts[i] = lp_build_const_int32(gallivm, shift);
         masks[i] = lp_build_const_int32(gallivm, mask);

         if (desc->channel[i].normalized) {
            scales[i] = lp_build_const_float(gallivm, 1.0 / mask);
            normalized = TRUE;
         }
         else
            scales[i] =  lp_build_const_float(gallivm, 1.0);
      }

      shift += bits;
   }

   /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
    * into masked = {B, G, R, A}
    */
   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");


   if (!needs_uitofp) {
      /* UIToFP can't be expressed in SSE2 */
      casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
   } else {
      casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), "");
   }

   /* At this point 'casted' may be a vector of floats such as
    * {255.0, 255.0, 255.0, 255.0}.  Next, if the pixel values are normalized
    * we'll scale this to {1.0, 1.0, 1.0, 1.0}.
    */

   if (normalized)
      scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), "");
   else
      scaled = casted;

   return scaled;
}
Ejemplo n.º 29
0
/**
 * Pack a single pixel.
 *
 * @param rgba 4 float vector with the unpacked components.
 *
 * XXX: This is mostly for reference and testing -- operating a single pixel at
 * a time is rarely if ever needed.
 */
LLVMValueRef
lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
                       const struct util_format_description *desc,
                       LLVMValueRef rgba)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef type;
   LLVMValueRef packed = NULL;
   LLVMValueRef swizzles[4];
   LLVMValueRef shifted, casted, scaled, unswizzled;
   LLVMValueRef shifts[4];
   LLVMValueRef scales[4];
   boolean normalized;
   unsigned shift;
   unsigned i, j;

   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
   assert(desc->block.width == 1);
   assert(desc->block.height == 1);

   type = LLVMIntTypeInContext(gallivm->context, desc->block.bits);

   /* Unswizzle the color components into the source vector. */
   for (i = 0; i < 4; ++i) {
      for (j = 0; j < 4; ++j) {
         if (desc->swizzle[j] == i)
            break;
      }
      if (j < 4)
         swizzles[i] = lp_build_const_int32(gallivm, j);
      else
         swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
   }

   unswizzled = LLVMBuildShuffleVector(builder, rgba,
                                       LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)),
                                       LLVMConstVector(swizzles, 4), "");

   normalized = FALSE;
   shift = 0;
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
         scales[i] =  LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context));
      }
      else {
         unsigned mask = (1 << bits) - 1;

         assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED);
         assert(bits < 32);

         shifts[i] = lp_build_const_int32(gallivm, shift);

         if (desc->channel[i].normalized) {
            scales[i] = lp_build_const_float(gallivm, mask);
            normalized = TRUE;
         }
         else
            scales[i] = lp_build_const_float(gallivm, 1.0);
      }

      shift += bits;
   }

   if (normalized)
      scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), "");
   else
      scaled = unswizzled;

   casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), "");

   shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), "");
   
   /* Bitwise or all components */
   for (i = 0; i < 4; ++i) {
      if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
         LLVMValueRef component = LLVMBuildExtractElement(builder, shifted,
                                               lp_build_const_int32(gallivm, i), "");
         if (packed)
            packed = LLVMBuildOr(builder, packed, component, "");
         else
            packed = component;
      }
   }

   if (!packed)
      packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));

   if (desc->block.bits < 32)
      packed = LLVMBuildTrunc(builder, packed, type, "");

   return packed;
}
Ejemplo n.º 30
0
/**
 * Load depth/stencil values.
 * The stored values are linear, swizzle them.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_fb  contains z values loaded from fb (may include padding)
 * \param s_fb  contains s values loaded from fb (may include padding)
 */
void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                     struct lp_type z_src_type,
                                     const struct util_format_description *format_desc,
                                     LLVMValueRef depth_ptr,
                                     LLVMValueRef depth_stride,
                                     LLVMValueRef *z_fb,
                                     LLVMValueRef *s_fb,
                                     LLVMValueRef loop_counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   if (z_src_type.length == 4) {
      unsigned i;
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");

      /* just concatenate the loaded 2x2 values into 4-wide vector */
      for (i = 0; i < 4; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, i);
      }
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   /* Load current z/stencil values from z/stencil buffer */
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");

   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
                                  LLVMConstVector(shuffles, zs_type.length), "");
   *s_fb = *z_fb;

   if (format_desc->block.bits < z_src_type.width) {
      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
      *z_fb = LLVMBuildZExt(builder, *z_fb,
                            lp_build_int_vec_type(gallivm, z_src_type), "");
   }

   else if (format_desc->block.bits > 32) {
      /* rely on llvm to handle too wide vector we have here nicely */
      unsigned i;
      struct lp_type typex2 = zs_type;
      struct lp_type s_type = zs_type;
      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef tmp;

      typex2.width = typex2.width / 2;
      typex2.length = typex2.length * 2;
      s_type.width = s_type.width / 2;
      s_type.floating = 0;

      tmp = LLVMBuildBitCast(builder, *z_fb,
                             lp_build_vec_type(gallivm, typex2), "");

      for (i = 0; i < zs_type.length; i++) {
         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
      }
      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles1, zs_type.length), "");
      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles2, zs_type.length), "");
      *s_fb = LLVMBuildBitCast(builder, *s_fb,
                               lp_build_vec_type(gallivm, s_type), "");
      lp_build_name(*s_fb, "s_dst");
   }

   lp_build_name(*z_fb, "z_dst");
   lp_build_name(*s_fb, "s_dst");
   lp_build_name(*z_fb, "z_dst");
}