Beispiel #1
0
static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_context * base = &bld_base->base;
	unsigned i;

	/* Add the necessary export instructions */
	for (i = 0; i < ctx->output_reg_count; i++) {
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			LLVMValueRef output;
			LLVMValueRef store_output;
			unsigned adjusted_reg_idx = i +
					ctx->reserved_reg_count;
			LLVMValueRef reg_index = lp_build_const_int32(
				base->gallivm,
				radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));

			output = LLVMBuildLoad(base->gallivm->builder,
				ctx->soa.outputs[i][chan], "");

			store_output = lp_build_intrinsic_binary(
				base->gallivm->builder,
				"llvm.AMDGPU.store.output",
				base->elem_type,
				output, reg_index);

			lp_build_intrinsic_unary(base->gallivm->builder,
				"llvm.AMDGPU.export.reg",
				LLVMVoidTypeInContext(base->gallivm->context),
				store_output);
		}
	}
}
Beispiel #2
0
static INLINE LLVMValueRef
lp_build_round_sse41(struct lp_build_context *bld,
                     LLVMValueRef a,
                     enum lp_build_round_sse41_mode mode)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   const char *intrinsic;

   assert(type.floating);
   assert(type.width*type.length == 128);
   assert(lp_check_value(type, a));
   assert(util_cpu_caps.has_sse4_1);

   switch(type.width) {
   case 32:
      intrinsic = "llvm.x86.sse41.round.ps";
      break;
   case 64:
      intrinsic = "llvm.x86.sse41.round.pd";
      break;
   default:
      assert(0);
      return bld->undef;
   }

   return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
                                    LLVMConstInt(LLVMInt32Type(), mode, 0));
}
Beispiel #3
0
/**
 * Generate a + b
 */
LLVMValueRef
lp_build_add(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   LLVMValueRef res;

   if(a == bld->zero)
      return b;
   if(b == bld->zero)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   if(bld->type.norm) {
      const char *intrinsic = NULL;

      if(a == bld->one || b == bld->one)
        return bld->one;

      if(util_cpu_caps.has_sse2 &&
         type.width * type.length == 128 &&
         !type.floating && !type.fixed) {
         if(type.width == 8)
            intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
         if(type.width == 16)
            intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
      }
   
      if(intrinsic)
         return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
   }

   if(LLVMIsConstant(a) && LLVMIsConstant(b))
      res = LLVMConstAdd(a, b);
   else
      res = LLVMBuildAdd(bld->builder, a, b, "");

   /* clamp to ceiling of 1.0 */
   if(bld->type.norm && (bld->type.floating || bld->type.fixed))
      res = lp_build_min_simple(bld, res, bld->one);

   /* XXX clamp to floor of -1 or 0??? */

   return res;
}
Beispiel #4
0
/**
 * Generate a - b
 */
LLVMValueRef
lp_build_sub(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   LLVMValueRef res;

   if(b == bld->zero)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;
   if(a == b)
      return bld->zero;

   if(bld->type.norm) {
      const char *intrinsic = NULL;

      if(b == bld->one)
        return bld->zero;

      if(util_cpu_caps.has_sse2 &&
         type.width * type.length == 128 &&
         !type.floating && !type.fixed) {
         if(type.width == 8)
            intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
         if(type.width == 16)
            intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
      }
   
      if(intrinsic)
         return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
   }

   if(LLVMIsConstant(a) && LLVMIsConstant(b))
      res = LLVMConstSub(a, b);
   else
      res = LLVMBuildSub(bld->builder, a, b, "");

   if(bld->type.norm && (bld->type.floating || bld->type.fixed))
      res = lp_build_max_simple(bld, res, bld->zero);

   return res;
}
Beispiel #5
0
/**
 * Generate min(a, b)
 * No checks for special case values of a or b = 1 or 0 are done.
 */
static LLVMValueRef
lp_build_min_simple(struct lp_build_context *bld,
                    LLVMValueRef a,
                    LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   const char *intrinsic = NULL;
   LLVMValueRef cond;

   /* TODO: optimize the constant case */

   if(type.width * type.length == 128) {
      if(type.floating) {
         if(type.width == 32 && util_cpu_caps.has_sse)
            intrinsic = "llvm.x86.sse.min.ps";
         if(type.width == 64 && util_cpu_caps.has_sse2)
            intrinsic = "llvm.x86.sse2.min.pd";
      }
      else {
         if(type.width == 8 && !type.sign && util_cpu_caps.has_sse2)
            intrinsic = "llvm.x86.sse2.pminu.b";
         if(type.width == 8 && type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminsb";
         if(type.width == 16 && !type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminuw";
         if(type.width == 16 && type.sign && util_cpu_caps.has_sse2)
            intrinsic = "llvm.x86.sse2.pmins.w";
         if(type.width == 32 && !type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminud";
         if(type.width == 32 && type.sign && util_cpu_caps.has_sse4_1)
            intrinsic = "llvm.x86.sse41.pminsd";
      }
   }

   if(intrinsic)
      return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);

   cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
   return lp_build_select(bld, cond, a, b);
}
Beispiel #6
0
/**
 * Non-interleaved pack.
 *
 * This will move values as
 *
 *   lo =   __ l0 __ l1 __ l2 __..  __ ln
 *   hi =   __ h0 __ h1 __ h2 __..  __ hn
 *   res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
 *
 * This will only change the number of bits the values are represented, not the
 * values themselves.
 *
 * It is assumed the values are already clamped into the destination type range.
 * Values outside that range will produce undefined results. Use
 * lp_build_packs2 instead.
 */
LLVMValueRef
lp_build_pack2(struct gallivm_state *gallivm,
               struct lp_type src_type,
               struct lp_type dst_type,
               LLVMValueRef lo,
               LLVMValueRef hi)
{
   LLVMBuilderRef builder = gallivm->builder;
#if HAVE_LLVM < 0x0207
   LLVMTypeRef src_vec_type = lp_build_vec_type(gallivm, src_type);
#endif
   LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, dst_type);
   LLVMValueRef shuffle;
   LLVMValueRef res = NULL;

   assert(!src_type.floating);
   assert(!dst_type.floating);
   assert(src_type.width == dst_type.width * 2);
   assert(src_type.length * 2 == dst_type.length);

   /* Check for special cases first */
   if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
      switch(src_type.width) {
      case 32:
         if(dst_type.sign) {
#if HAVE_LLVM >= 0x0207
            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi);
#else
            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
#endif
         }
         else {
            if (util_cpu_caps.has_sse4_1) {
               return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
            }
            else {
               /* use generic shuffle below */
               res = NULL;
            }
         }
         break;

      case 16:
         if(dst_type.sign)
#if HAVE_LLVM >= 0x0207
            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi);
#else
            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
#endif
         else
#if HAVE_LLVM >= 0x0207
            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi);
#else
            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
#endif
         break;

      default:
         assert(0);
         return LLVMGetUndef(dst_vec_type);
         break;
      }

      if (res) {
         res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
         return res;
      }
   }
/**
 * Non-interleaved pack.
 *
 * This will move values as
 *         (LSB)                     (MSB)
 *   lo =   l0 __ l1 __ l2 __..  __ ln __
 *   hi =   h0 __ h1 __ h2 __..  __ hn __
 *   res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
 *
 * This will only change the number of bits the values are represented, not the
 * values themselves.
 *
 * It is assumed the values are already clamped into the destination type range.
 * Values outside that range will produce undefined results. Use
 * lp_build_packs2 instead.
 */
LLVMValueRef
lp_build_pack2(struct gallivm_state *gallivm,
               struct lp_type src_type,
               struct lp_type dst_type,
               LLVMValueRef lo,
               LLVMValueRef hi)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, dst_type);
   LLVMValueRef shuffle;
   LLVMValueRef res = NULL;
   struct lp_type intr_type = dst_type;

#if HAVE_LLVM < 0x0207
   intr_type = src_type;
#endif

   assert(!src_type.floating);
   assert(!dst_type.floating);
   assert(src_type.width == dst_type.width * 2);
   assert(src_type.length * 2 == dst_type.length);

   /* Check for special cases first */
   if(util_cpu_caps.has_sse2 && src_type.width * src_type.length >= 128) {
      const char *intrinsic = NULL;

      switch(src_type.width) {
      case 32:
         if(dst_type.sign) {
            intrinsic = "llvm.x86.sse2.packssdw.128";
         }
         else {
            if (util_cpu_caps.has_sse4_1) {
               intrinsic = "llvm.x86.sse41.packusdw";
#if HAVE_LLVM < 0x0207
               /* llvm < 2.7 has inconsistent signatures except for packusdw */
               intr_type = dst_type;
#endif
            }
         }
         break;
      case 16:
         if (dst_type.sign) {
            intrinsic = "llvm.x86.sse2.packsswb.128";
         }
         else {
            intrinsic = "llvm.x86.sse2.packuswb.128";
         }
         break;
      /* default uses generic shuffle below */
      }
      if (intrinsic) {
         if (src_type.width * src_type.length == 128) {
            LLVMTypeRef intr_vec_type = lp_build_vec_type(gallivm, intr_type);
            res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, lo, hi);
            if (dst_vec_type != intr_vec_type) {
               res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
            }
         }
         else {
            int num_split = src_type.width * src_type.length / 128;
            int i;
            int nlen = 128 / src_type.width;
            struct lp_type ndst_type = lp_type_unorm(dst_type.width, 128);
            struct lp_type nintr_type = lp_type_unorm(intr_type.width, 128);
            LLVMValueRef tmpres[LP_MAX_VECTOR_WIDTH / 128];
            LLVMValueRef tmplo, tmphi;
            LLVMTypeRef ndst_vec_type = lp_build_vec_type(gallivm, ndst_type);
            LLVMTypeRef nintr_vec_type = lp_build_vec_type(gallivm, nintr_type);

            assert(num_split <= LP_MAX_VECTOR_WIDTH / 128);

            for (i = 0; i < num_split / 2; i++) {
               tmplo = lp_build_extract_range(gallivm,
                                              lo, i*nlen*2, nlen);
               tmphi = lp_build_extract_range(gallivm,
                                              lo, i*nlen*2 + nlen, nlen);
               tmpres[i] = lp_build_intrinsic_binary(builder, intrinsic,
                                                     nintr_vec_type, tmplo, tmphi);
               if (ndst_vec_type != nintr_vec_type) {
                  tmpres[i] = LLVMBuildBitCast(builder, tmpres[i], ndst_vec_type, "");
               }
            }
            for (i = 0; i < num_split / 2; i++) {
               tmplo = lp_build_extract_range(gallivm,
                                              hi, i*nlen*2, nlen);
               tmphi = lp_build_extract_range(gallivm,
                                              hi, i*nlen*2 + nlen, nlen);
               tmpres[i+num_split/2] = lp_build_intrinsic_binary(builder, intrinsic,
                                                                 nintr_vec_type,
                                                                 tmplo, tmphi);
               if (ndst_vec_type != nintr_vec_type) {
                  tmpres[i+num_split/2] = LLVMBuildBitCast(builder, tmpres[i+num_split/2],
                                                           ndst_vec_type, "");
               }
            }
            res = lp_build_concat(gallivm, tmpres, ndst_type, num_split);
         }
         return res;
      }
   }

   /* generic shuffle */
   lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
   hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");

   shuffle = lp_build_const_pack_shuffle(gallivm, dst_type.length);

   res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");

   return res;
}
Beispiel #8
0
/**
 * Converts float32 to int16 half-float
 * Note this can be performed in 1 instruction if vcvtps2ph exists (f16c/cvt16)
 * [llvm.x86.vcvtps2ph / _mm_cvtps_ph]
 *
 * @param src           value to convert
 *
 * Convert float32 to half floats, preserving Infs and NaNs,
 * with rounding towards zero (trunc).
 */
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
                       LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef f32_vec_type = LLVMTypeOf(src);
   unsigned length = LLVMGetTypeKind(f32_vec_type) == LLVMVectorTypeKind
                   ? LLVMGetVectorSize(f32_vec_type) : 1;
   struct lp_type i32_type = lp_type_int_vec(32, 32 * length);
   struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
   LLVMValueRef result;

   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
       (length == 4 || length == 8)) {
      struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
      unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
      LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
      const char *intrinsic = NULL;
      if (length == 4) {
         intrinsic = "llvm.x86.vcvtps2ph.128";
      }
      else {
         intrinsic = "llvm.x86.vcvtps2ph.256";
      }
      result = lp_build_intrinsic_binary(builder, intrinsic,
                                         lp_build_vec_type(gallivm, i168_type),
                                         src, LLVMConstInt(i32t, mode, 0));
      if (length == 4) {
         result = lp_build_extract_range(gallivm, result, 0, 4);
      }
   }

   else {
      result = lp_build_float_to_smallfloat(gallivm, i32_type, src, 10, 5, 0, true);
      /* Convert int32 vector to int16 vector by trunc (might generate bad code) */
      result = LLVMBuildTrunc(builder, result, lp_build_vec_type(gallivm, i16_type), "");
   }

   /*
    * Debugging code.
    */
   if (0) {
     LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
     LLVMTypeRef i16t = LLVMInt16TypeInContext(gallivm->context);
     LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
     LLVMValueRef ref_result = LLVMGetUndef(LLVMVectorType(i16t, length));
     unsigned i;

     LLVMTypeRef func_type = LLVMFunctionType(i16t, &f32t, 1, 0);
     LLVMValueRef func = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer)util_float_to_half));
     func = LLVMBuildBitCast(builder, func, LLVMPointerType(func_type, 0), "util_float_to_half");

     for (i = 0; i < length; ++i) {
        LLVMValueRef index = LLVMConstInt(i32t, i, 0);
        LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
        /* XXX: not really supported by backends */
        LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
        LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
#endif
        ref_result = LLVMBuildInsertElement(builder, ref_result, f16, index, "");
     }

     lp_build_print_value(gallivm, "src  = ", src);
     lp_build_print_value(gallivm, "llvm = ", result);
     lp_build_print_value(gallivm, "util = ", ref_result);
     lp_build_printf(gallivm, "\n");
  }

   return result;
}
Beispiel #9
0
/**
 * Call intrinsic with arguments adapted to intrinsic vector length.
 *
 * Split vectors which are too large for the hw, or expand them if they
 * are too small, so a caller calling a function which might use intrinsics
 * doesn't need to do splitting/expansion on its own.
 * This only supports intrinsics where src and dst types match.
 */
LLVMValueRef
lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm,
                                    const char *name,
                                    struct lp_type src_type,
                                    unsigned intr_size,
                                    LLVMValueRef a,
                                    LLVMValueRef b)
{
   unsigned i;
   struct lp_type intrin_type = src_type;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
   LLVMValueRef anative, bnative;
   unsigned intrin_length = intr_size / src_type.width;

   intrin_type.length = intrin_length;

   if (intrin_length > src_type.length) {
      LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
      LLVMValueRef constvec, tmp;

      for (i = 0; i < src_type.length; i++) {
         elems[i] = lp_build_const_int32(gallivm, i);
      }
      for (; i < intrin_length; i++) {
         elems[i] = i32undef;
      }
      if (src_type.length == 1) {
         LLVMTypeRef elem_type = lp_build_elem_type(gallivm, intrin_type);
         a = LLVMBuildBitCast(builder, a, LLVMVectorType(elem_type, 1), "");
         b = LLVMBuildBitCast(builder, b, LLVMVectorType(elem_type, 1), "");
      }
      constvec = LLVMConstVector(elems, intrin_length);
      anative = LLVMBuildShuffleVector(builder, a, a, constvec, "");
      bnative = LLVMBuildShuffleVector(builder, b, b, constvec, "");
      tmp = lp_build_intrinsic_binary(builder, name,
                                      lp_build_vec_type(gallivm, intrin_type),
                                      anative, bnative);
      if (src_type.length > 1) {
         constvec = LLVMConstVector(elems, src_type.length);
         return LLVMBuildShuffleVector(builder, tmp, tmp, constvec, "");
      }
      else {
         return LLVMBuildExtractElement(builder, tmp, elems[0], "");
      }
   }
   else if (intrin_length < src_type.length) {
      unsigned num_vec = src_type.length / intrin_length;
      LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];

      /* don't support arbitrary size here as this is so yuck */
      if (src_type.length % intrin_length) {
         /* FIXME: This is something which should be supported
          * but there doesn't seem to be any need for it currently
          * so crash and burn.
          */
         debug_printf("%s: should handle arbitrary vector size\n",
                      __FUNCTION__);
         assert(0);
         return NULL;
      }

      for (i = 0; i < num_vec; i++) {
         anative = lp_build_extract_range(gallivm, a, i*intrin_length,
                                        intrin_length);
         bnative = lp_build_extract_range(gallivm, b, i*intrin_length,
                                        intrin_length);
         tmp[i] = lp_build_intrinsic_binary(builder, name,
                                            lp_build_vec_type(gallivm, intrin_type),
                                            anative, bnative);
      }
      return lp_build_concat(gallivm, tmp, intrin_type, num_vec);
   }
   else {
      return lp_build_intrinsic_binary(builder, name,
                                       lp_build_vec_type(gallivm, src_type),
                                       a, b);
   }
}
Beispiel #10
0
static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_context * base = &bld_base->base;
	unsigned i;
	
	unsigned color_count = 0;
	boolean has_color = false;

	/* Add the necessary export instructions */
	for (i = 0; i < ctx->output_reg_count; i++) {
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			LLVMValueRef output;
			unsigned adjusted_reg_idx = i +
					ctx->reserved_reg_count;

			output = LLVMBuildLoad(base->gallivm->builder,
				ctx->soa.outputs[i][chan], "");

			if (ctx->type == TGSI_PROCESSOR_VERTEX) {
				LLVMValueRef reg_index = lp_build_const_int32(
					base->gallivm,
					radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
				lp_build_intrinsic_binary(
					base->gallivm->builder,
					"llvm.AMDGPU.store.output",
					LLVMVoidTypeInContext(base->gallivm->context),
					output, reg_index);
			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
				switch (ctx->r600_outputs[i].name) {
				case TGSI_SEMANTIC_COLOR:
					has_color = true;
					if ( color_count/4 < ctx->color_buffer_count) {
						if (ctx->fs_color_all) {
							for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
								LLVMValueRef reg_index = lp_build_const_int32(
									base->gallivm,
									(j * 4) + chan);
								lp_build_intrinsic_binary(
									base->gallivm->builder,
									"llvm.R600.store.pixel.color",
									LLVMVoidTypeInContext(base->gallivm->context),
									output, reg_index);
							}
						} else {
							LLVMValueRef reg_index = lp_build_const_int32(
								base->gallivm,
								(color_count++/4) * 4 + chan);
							lp_build_intrinsic_binary(
								base->gallivm->builder,
								"llvm.R600.store.pixel.color",
								LLVMVoidTypeInContext(base->gallivm->context),
								output, reg_index);
						}
					}
					break;
				case TGSI_SEMANTIC_POSITION:
					if (chan != 2)
						continue;
					lp_build_intrinsic_unary(
						base->gallivm->builder,
						"llvm.R600.store.pixel.depth",
						LLVMVoidTypeInContext(base->gallivm->context),
						output);
					break;
				case TGSI_SEMANTIC_STENCIL:
					if (chan != 1)
						continue;
					lp_build_intrinsic_unary(
						base->gallivm->builder,
						"llvm.R600.store.pixel.stencil",
						LLVMVoidTypeInContext(base->gallivm->context),
						output);
					break;
				}
			}
		}
	}

	if (!has_color && ctx->type == TGSI_PROCESSOR_FRAGMENT)
		lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base->gallivm->context), 0, 0);
}