예제 #1
0
/**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16)
 * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
 *
 * @param src           value to convert
 *
 */
LLVMValueRef
lp_build_half_to_float(struct gallivm_state *gallivm,
                       LLVMValueRef src)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_type = LLVMTypeOf(src);
   unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
                            LLVMGetVectorSize(src_type) : 1;

   struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
   struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
   LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
   LLVMValueRef h;

   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
       (src_length == 4 || src_length == 8)) {
      const char *intrinsic = NULL;
      if (src_length == 4) {
         src = lp_build_pad_vector(gallivm, src, 8);
         intrinsic = "llvm.x86.vcvtph2ps.128";
      }
      else {
         intrinsic = "llvm.x86.vcvtph2ps.256";
      }
      return lp_build_intrinsic_unary(builder, intrinsic,
                                      lp_build_vec_type(gallivm, f32_type), src);
   }

   /* Convert int16 vector to int32 vector by zero ext (might generate bad code) */
   h = LLVMBuildZExt(builder, src, int_vec_type, "");
   return lp_build_smallfloat_to_float(gallivm, f32_type, h, 10, 5, 0, true);
}
예제 #2
0
/**
 * Interleave vector elements.
 *
 * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
 * (but not for 256bit AVX vectors).
 */
LLVMValueRef
lp_build_interleave2(struct gallivm_state *gallivm,
                     struct lp_type type,
                     LLVMValueRef a,
                     LLVMValueRef b,
                     unsigned lo_hi)
{
   LLVMValueRef shuffle;

   if (type.length == 2 && type.width == 128 && util_cpu_caps.has_avx) {
      /*
       * XXX: This is a workaround for llvm code generation deficiency. Strangely
       * enough, while this needs vinsertf128/vextractf128 instructions (hence
       * a natural match when using 2x128bit vectors) the "normal" unpack shuffle
       * generates code ranging from atrocious (llvm 3.1) to terrible (llvm 3.2, 3.3).
       * So use some different shuffles instead (the exact shuffles don't seem to
       * matter, as long as not using 128bit wide vectors, works with 8x32 or 4x64).
       */
      struct lp_type tmp_type = type;
      LLVMValueRef srchalf[2], tmpdst;
      tmp_type.length = 4;
      tmp_type.width = 64;
      a = LLVMBuildBitCast(gallivm->builder, a, lp_build_vec_type(gallivm, tmp_type), "");
      b = LLVMBuildBitCast(gallivm->builder, b, lp_build_vec_type(gallivm, tmp_type), "");
      srchalf[0] = lp_build_extract_range(gallivm, a, lo_hi * 2, 2);
      srchalf[1] = lp_build_extract_range(gallivm, b, lo_hi * 2, 2);
      tmp_type.length = 2;
      tmpdst = lp_build_concat(gallivm, srchalf, tmp_type, 2);
      return LLVMBuildBitCast(gallivm->builder, tmpdst, lp_build_vec_type(gallivm, type), "");
   }

   shuffle = lp_build_const_unpack_shuffle(gallivm, type.length, lo_hi);

   return LLVMBuildShuffleVector(gallivm->builder, a, b, shuffle, "");
}
예제 #3
0
/**
 * Combined extract and broadcast (mere shuffle in most cases)
 */
LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state *gallivm,
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef res;

   assert(src_type.floating == dst_type.floating);
   assert(src_type.width    == dst_type.width);

   assert(lp_check_value(src_type, vector));
   assert(LLVMTypeOf(index) == i32t);

   if (src_type.length == 1) {
      if (dst_type.length == 1) {
         /*
          * Trivial scalar -> scalar.
          */

         res = vector;
      }
      else {
         /*
          * Broadcast scalar -> vector.
          */

         res = lp_build_broadcast(gallivm,
                                  lp_build_vec_type(gallivm, dst_type),
                                  vector);
      }
   }
   else {
      if (dst_type.length > 1) {
         /*
          * shuffle - result can be of different length.
          */

         LLVMValueRef shuffle;
         shuffle = lp_build_broadcast(gallivm,
                                      LLVMVectorType(i32t, dst_type.length),
                                      index);
         res = LLVMBuildShuffleVector(gallivm->builder, vector,
                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
                                      shuffle, "");
      }
      else {
         /*
          * Trivial extract scalar from vector.
          */
          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
      }
   }

   return res;
}
예제 #4
0
/**
 * @brief lp_build_fetch_rgba_aos_array
 *
 * \param format_desc   describes format of the image we're fetching from
 * \param dst_type      output type
 * \param base_ptr      address of the pixel block (or the texel if uncompressed)
 * \param offset        ptr offset
 */
LLVMValueRef
lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
                              const struct util_format_description *format_desc,
                              struct lp_type dst_type,
                              LLVMValueRef base_ptr,
                              LLVMValueRef offset)
{
    struct lp_build_context bld;
    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef src_elem_type, src_vec_type;
    LLVMValueRef ptr, res = NULL;
    struct lp_type src_type;

    memset(&src_type, 0, sizeof src_type);
    src_type.floating = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT;
    src_type.fixed    = format_desc->channel[0].type == UTIL_FORMAT_TYPE_FIXED;
    src_type.sign     = format_desc->channel[0].type != UTIL_FORMAT_TYPE_UNSIGNED;
    src_type.norm     = format_desc->channel[0].normalized;
    src_type.width    = format_desc->channel[0].size;
    src_type.length   = format_desc->nr_channels;

    assert(src_type.length <= dst_type.length);

    src_elem_type = lp_build_elem_type(gallivm, src_type);
    src_vec_type  = lp_build_vec_type(gallivm,  src_type);

    /* Read whole vector from memory, unaligned */
    if (!res) {
        ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
        ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
        res = LLVMBuildLoad(builder, ptr, "");
        lp_set_load_alignment(res, src_type.width / 8);
    }

    /* Truncate doubles to float */
    if (src_type.floating && src_type.width == 64) {
        src_type.width = 32;
        src_vec_type  = lp_build_vec_type(gallivm,  src_type);

        res = LLVMBuildFPTrunc(builder, res, src_vec_type, "");
    }

    /* Expand to correct length */
    if (src_type.length < dst_type.length) {
        res = lp_build_pad_vector(gallivm, res, src_type, dst_type.length);
        src_type.length = dst_type.length;
    }

    /* Convert to correct format */
    lp_build_conv(gallivm, src_type, dst_type, &res, 1, &res, 1);

    /* Swizzle it */
    lp_build_context_init(&bld, gallivm, dst_type);
    return lp_build_format_swizzle_aos(format_desc, &bld, res);
}
static LLVMValueRef
add_conv_test(struct gallivm_state *gallivm,
              struct lp_type src_type, unsigned num_srcs,
              struct lp_type dst_type, unsigned num_dsts)
{
   LLVMModuleRef module = gallivm->module;
   LLVMContextRef context = gallivm->context;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef args[2];
   LLVMValueRef func;
   LLVMValueRef src_ptr;
   LLVMValueRef dst_ptr;
   LLVMBasicBlockRef block;
   LLVMValueRef src[LP_MAX_VECTOR_LENGTH];
   LLVMValueRef dst[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   args[0] = LLVMPointerType(lp_build_vec_type(gallivm, src_type), 0);
   args[1] = LLVMPointerType(lp_build_vec_type(gallivm, dst_type), 0);

   func = LLVMAddFunction(module, "test",
                          LLVMFunctionType(LLVMVoidTypeInContext(context),
                                           args, 2, 0));
   LLVMSetFunctionCallConv(func, LLVMCCallConv);
   src_ptr = LLVMGetParam(func, 0);
   dst_ptr = LLVMGetParam(func, 1);

   block = LLVMAppendBasicBlockInContext(context, func, "entry");
   LLVMPositionBuilderAtEnd(builder, block);

   for(i = 0; i < num_srcs; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
      LLVMValueRef ptr = LLVMBuildGEP(builder, src_ptr, &index, 1, "");
      src[i] = LLVMBuildLoad(builder, ptr, "");
   }

   lp_build_conv(gallivm, src_type, dst_type, src, num_srcs, dst, num_dsts);

   for(i = 0; i < num_dsts; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
      LLVMValueRef ptr = LLVMBuildGEP(builder, dst_ptr, &index, 1, "");
      LLVMBuildStore(builder, dst[i], ptr);
   }

   LLVMBuildRetVoid(builder);;

   gallivm_verify_function(gallivm, func);

   return func;
}
예제 #6
0
static INLINE LLVMValueRef
lp_build_round_sse41(struct lp_build_context *bld,
                     LLVMValueRef a,
                     enum lp_build_round_sse41_mode mode)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   const char *intrinsic;

   assert(type.floating);
   assert(type.width*type.length == 128);
   assert(lp_check_value(type, a));
   assert(util_cpu_caps.has_sse4_1);

   switch(type.width) {
   case 32:
      intrinsic = "llvm.x86.sse41.round.ps";
      break;
   case 64:
      intrinsic = "llvm.x86.sse41.round.pd";
      break;
   default:
      assert(0);
      return bld->undef;
   }

   return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
                                    LLVMConstInt(LLVMInt32Type(), mode, 0));
}
예제 #7
0
/**
 * Set the sign of float vector 'a' according to 'sign'.
 * If sign==0, return abs(a).
 * If sign==1, return -abs(a);
 * Other values for sign produce undefined results.
 */
LLVMValueRef
lp_build_set_sign(struct lp_build_context *bld,
                  LLVMValueRef a, LLVMValueRef sign)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1);
   LLVMValueRef mask = lp_build_int_const_scalar(type,
                             ~((unsigned long long) 1 << (type.width - 1)));
   LLVMValueRef val, res;

   assert(type.floating);

   /* val = reinterpret_cast<int>(a) */
   val = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
   /* val = val & mask */
   val = LLVMBuildAnd(bld->builder, val, mask, "");
   /* sign = sign << shift */
   sign = LLVMBuildShl(bld->builder, sign, shift, "");
   /* res = val | sign */
   res = LLVMBuildOr(bld->builder, val, sign, "");
   /* res = reinterpret_cast<float>(res) */
   res = LLVMBuildBitCast(bld->builder, res, vec_type, "");

   return res;
}
예제 #8
0
/**
 * Generate abs(a)
 */
LLVMValueRef
lp_build_abs(struct lp_build_context *bld,
             LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);

   if(!type.sign)
      return a;

   if(type.floating) {
      /* Mask out the sign bit */
      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
      unsigned long long absMask = ~(1ULL << (type.width - 1));
      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      a = LLVMBuildAnd(bld->builder, a, mask, "");
      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
      return a;
   }

   if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
      switch(type.width) {
      case 8:
         return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
      case 16:
         return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
      case 32:
         return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
      }
   }

   return lp_build_max(bld, a, LLVMBuildNeg(bld->builder, a, ""));
}
예제 #9
0
/**
 * Generate color blending and color output.
 * \param rt  the render target index (to index blend, colormask state)
 * \param type  the pixel color type
 * \param context_ptr  pointer to the runtime JIT context
 * \param mask  execution mask (active fragment/pixel mask)
 * \param src  colors from the fragment shader
 * \param dst_ptr  the destination color buffer pointer
 */
static void
generate_blend(const struct pipe_blend_state *blend,
               unsigned rt,
               LLVMBuilderRef builder,
               struct lp_type type,
               LLVMValueRef context_ptr,
               LLVMValueRef mask,
               LLVMValueRef *src,
               LLVMValueRef dst_ptr)
{
   struct lp_build_context bld;
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask_ctx;
   LLVMTypeRef vec_type;
   LLVMValueRef const_ptr;
   LLVMValueRef con[4];
   LLVMValueRef dst[4];
   LLVMValueRef res[4];
   unsigned chan;

   lp_build_context_init(&bld, builder, type);

   flow = lp_build_flow_create(builder);

   /* we'll use this mask context to skip blending if all pixels are dead */
   lp_build_mask_begin(&mask_ctx, flow, type, mask);

   vec_type = lp_build_vec_type(type);

   const_ptr = lp_jit_context_blend_color(builder, context_ptr);
   const_ptr = LLVMBuildBitCast(builder, const_ptr,
                                LLVMPointerType(vec_type, 0), "");

   /* load constant blend color and colors from the dest color buffer */
   for(chan = 0; chan < 4; ++chan) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
      con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");

      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");

      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
   }

   /* do blend */
   lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);

   /* store results to color buffer */
   for(chan = 0; chan < 4; ++chan) {
      if(blend->rt[rt].colormask & (1 << chan)) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
         lp_build_name(res[chan], "res.%c", "rgba"[chan]);
         res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
         LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
      }
   }

   lp_build_mask_end(&mask_ctx);
   lp_build_flow_destroy(flow);
}
/**
 * Expands src vector from src.length to dst_length
 */
LLVMValueRef
lp_build_pad_vector(struct gallivm_state *gallivm,
                       LLVMValueRef src,
                       struct lp_type src_type,
                       unsigned dst_length)
{
   LLVMValueRef undef = LLVMGetUndef(lp_build_vec_type(gallivm, src_type));
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   assert(dst_length <= Elements(elems));
   assert(dst_length > src_type.length);

   if (src_type.length == dst_length)
      return src;

   /* If its a single scalar type, no need to reinvent the wheel */
   if (src_type.length == 1) {
      return lp_build_broadcast(gallivm, LLVMVectorType(lp_build_elem_type(gallivm, src_type), dst_length), src);
   }

   /* All elements from src vector */
   for (i = 0; i < src_type.length; ++i)
      elems[i] = lp_build_const_int32(gallivm, i);

   /* Undef fill remaining space */
   for (i = src_type.length; i < dst_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, src_type.length);

   /* Combine the two vectors */
   return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), "");
}
예제 #11
0
파일: lp_bld_conv.c 프로젝트: RobinWuDev/Qt
/**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?)
 * [llvm.x86.vcvtph2ps / _mm_cvtph_ps]
 *
 * @param src_type      <vector> type of int16
 * @param src           value to convert
 *
 * ref http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
 */
LLVMValueRef
lp_build_half_to_float(struct gallivm_state *gallivm,
                       struct lp_type src_type,
                       LLVMValueRef src)
{
    struct lp_type f32_type = lp_type_float_vec(32, 32 * src_type.length);
    struct lp_type i32_type = lp_type_int_vec(32, 32 * src_type.length);

    LLVMBuilderRef builder = gallivm->builder;
    LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
    LLVMTypeRef float_vec_type = lp_build_vec_type(gallivm, f32_type);

    /* Constants */
    LLVMValueRef i32_13          = lp_build_const_int_vec(gallivm, i32_type, 13);
    LLVMValueRef i32_16          = lp_build_const_int_vec(gallivm, i32_type, 16);
    LLVMValueRef i32_mask_nosign = lp_build_const_int_vec(gallivm, i32_type, 0x7fff);
    LLVMValueRef i32_was_infnan  = lp_build_const_int_vec(gallivm, i32_type, 0x7bff);
    LLVMValueRef i32_exp_infnan  = lp_build_const_int_vec(gallivm, i32_type, 0xff << 23);
    LLVMValueRef f32_magic       = LLVMBuildBitCast(builder,
                                   lp_build_const_int_vec(gallivm, i32_type, (254 - 15) << 23),
                                   float_vec_type, "");

    /* Convert int16 vector to int32 vector by zero ext */
    LLVMValueRef h             = LLVMBuildZExt(builder, src, int_vec_type, "");

    /* Exponent / mantissa bits */
    LLVMValueRef expmant       = LLVMBuildAnd(builder, i32_mask_nosign, h, "");
    LLVMValueRef shifted       = LLVMBuildBitCast(builder, LLVMBuildShl(builder, expmant, i32_13, ""), float_vec_type, "");

    /* Exponent adjust */
    LLVMValueRef scaled        = LLVMBuildBitCast(builder, LLVMBuildFMul(builder, shifted, f32_magic, ""), int_vec_type, "");

    /* Make sure Inf/NaN survive */
    LLVMValueRef b_wasinfnan   = lp_build_compare(gallivm, i32_type, PIPE_FUNC_GREATER, expmant, i32_was_infnan);
    LLVMValueRef infnanexp     = LLVMBuildAnd(builder, b_wasinfnan, i32_exp_infnan, "");

    /* Sign bit */
    LLVMValueRef justsign      = LLVMBuildXor(builder, h, expmant, "");
    LLVMValueRef sign          = LLVMBuildShl(builder, justsign, i32_16, "");

    /* Combine result */
    LLVMValueRef sign_inf      = LLVMBuildOr(builder, sign, infnanexp, "");
    LLVMValueRef final         = LLVMBuildOr(builder, scaled, sign_inf, "");

    /* Cast from int32 vector to float32 vector */
    return LLVMBuildBitCast(builder, final, float_vec_type, "");
}
예제 #12
0
/**
 * Generate the depth /stencil test code.
 */
static void
generate_depth_stencil(LLVMBuilderRef builder,
                       const struct lp_fragment_shader_variant_key *key,
                       struct lp_type src_type,
                       struct lp_build_mask_context *mask,
                       LLVMValueRef stencil_refs[2],
                       LLVMValueRef src,
                       LLVMValueRef dst_ptr,
                       LLVMValueRef facing,
                       LLVMValueRef counter)
{
   const struct util_format_description *format_desc;
   struct lp_type dst_type;

   if (!key->depth.enabled && !key->stencil[0].enabled && !key->stencil[1].enabled)
      return;

   format_desc = util_format_description(key->zsbuf_format);
   assert(format_desc);

   /*
    * Depths are expected to be between 0 and 1, even if they are stored in
    * floats. Setting these bits here will ensure that the lp_build_conv() call
    * below won't try to unnecessarily clamp the incoming values.
    */
   if(src_type.floating) {
      src_type.sign = FALSE;
      src_type.norm = TRUE;
   }
   else {
      assert(!src_type.sign);
      assert(src_type.norm);
   }

   /* Pick the depth type. */
   dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);

   /* FIXME: Cope with a depth test type with a different bit width. */
   assert(dst_type.width == src_type.width);
   assert(dst_type.length == src_type.length);

   /* Convert fragment Z from float to integer */
   lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);

   dst_ptr = LLVMBuildBitCast(builder,
                              dst_ptr,
                              LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
   lp_build_depth_stencil_test(builder,
                               &key->depth,
                               key->stencil,
                               dst_type,
                               format_desc,
                               mask,
                               stencil_refs,
                               src,
                               dst_ptr,
                               facing,
                               counter);
}
예제 #13
0
void
lp_build_exp2_approx(struct lp_build_context *bld,
                     LLVMValueRef x,
                     LLVMValueRef *p_exp2_int_part,
                     LLVMValueRef *p_frac_part,
                     LLVMValueRef *p_exp2)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef ipart = NULL;
   LLVMValueRef fpart = NULL;
   LLVMValueRef expipart = NULL;
   LLVMValueRef expfpart = NULL;
   LLVMValueRef res = NULL;

   if(p_exp2_int_part || p_frac_part || p_exp2) {
      /* TODO: optimize the constant case */
      if(LLVMIsConstant(x))
         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
                      __FUNCTION__);

      assert(type.floating && type.width == 32);

      x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
      x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));

      /* ipart = int(x - 0.5) */
      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
      ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");

      /* fpart = x - ipart */
      fpart = LLVMBuildSIToFP(bld->builder, ipart, vec_type, "");
      fpart = LLVMBuildSub(bld->builder, x, fpart, "");
   }

   if(p_exp2_int_part || p_exp2) {
      /* expipart = (float) (1 << ipart) */
      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
      expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
   }

   if(p_exp2) {
      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
                                     Elements(lp_build_exp2_polynomial));

      res = LLVMBuildMul(bld->builder, expipart, expfpart, "");
   }

   if(p_exp2_int_part)
      *p_exp2_int_part = expipart;

   if(p_frac_part)
      *p_frac_part = fpart;

   if(p_exp2)
      *p_exp2 = res;
}
예제 #14
0
static LLVMValueRef
add_blend_test(struct gallivm_state *gallivm,
               const struct pipe_blend_state *blend,
               struct lp_type type)
{
   LLVMModuleRef module = gallivm->module;
   LLVMContextRef context = gallivm->context;
   LLVMTypeRef vec_type;
   LLVMTypeRef args[5];
   LLVMValueRef func;
   LLVMValueRef src_ptr;
   LLVMValueRef src1_ptr;
   LLVMValueRef dst_ptr;
   LLVMValueRef const_ptr;
   LLVMValueRef res_ptr;
   LLVMBasicBlockRef block;
   LLVMBuilderRef builder;
   const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
   const unsigned rt = 0;
   const unsigned char swizzle[4] = { 0, 1, 2, 3 };
   LLVMValueRef src;
   LLVMValueRef src1;
   LLVMValueRef dst;
   LLVMValueRef con;
   LLVMValueRef res;

   vec_type = lp_build_vec_type(gallivm, type);

   args[4] = args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 5, 0));
   LLVMSetFunctionCallConv(func, LLVMCCallConv);
   src_ptr = LLVMGetParam(func, 0);
   src1_ptr = LLVMGetParam(func, 1);
   dst_ptr = LLVMGetParam(func, 2);
   const_ptr = LLVMGetParam(func, 3);
   res_ptr = LLVMGetParam(func, 4);

   block = LLVMAppendBasicBlockInContext(context, func, "entry");
   builder = gallivm->builder;
   LLVMPositionBuilderAtEnd(builder, block);

   src = LLVMBuildLoad(builder, src_ptr, "src");
   src1 = LLVMBuildLoad(builder, src1_ptr, "src1");
   dst = LLVMBuildLoad(builder, dst_ptr, "dst");
   con = LLVMBuildLoad(builder, const_ptr, "const");

   res = lp_build_blend_aos(gallivm, blend, format, type, rt, src, NULL,
                            src1, NULL, dst, NULL, con, NULL, swizzle, 4);

   lp_build_name(res, "res");

   LLVMBuildStore(builder, res, res_ptr);

   LLVMBuildRetVoid(builder);;

   gallivm_verify_function(gallivm, func);

   return func;
}
예제 #15
0
void
lp_emit_declaration_aos(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_declaration *decl)
{
   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
   LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type);

   unsigned first = decl->Range.First;
   unsigned last = decl->Range.Last;
   unsigned idx;

   for (idx = first; idx <= last; ++idx) {
      switch (decl->Declaration.File) {
      case TGSI_FILE_TEMPORARY:
         assert(idx < LP_MAX_INLINED_TEMPS);
         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
            LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1);
            bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm,
                                                     vec_type, array_size, "");
         } else {
            bld->temps[idx] = lp_build_alloca(gallivm, vec_type, "");
         }
         break;

      case TGSI_FILE_OUTPUT:
         bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, "");
         break;

      case TGSI_FILE_ADDRESS:
         assert(idx < LP_MAX_TGSI_ADDRS);
         bld->addr[idx] = lp_build_alloca(gallivm, vec_type, "");
         break;

      case TGSI_FILE_PREDICATE:
         assert(idx < LP_MAX_TGSI_PREDS);
         bld->preds[idx] = lp_build_alloca(gallivm, vec_type, "");
         break;

      case TGSI_FILE_SAMPLER_VIEW:
         /*
          * The target stored here MUST match whatever there actually
          * is in the set sampler views (what about return type?).
          */
         assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
         for (idx = first; idx <= last; ++idx) {
            bld->sv[idx] = decl->SamplerView;
         }
         break;

      default:
         /* don't need to declare other vars */
         break;
      }
   }
}
예제 #16
0
/**
 * Generate color blending and color output.
 */
static void
generate_blend(const struct pipe_blend_state *blend,
               LLVMBuilderRef builder,
               struct lp_type type,
               LLVMValueRef context_ptr,
               LLVMValueRef mask,
               LLVMValueRef *src,
               LLVMValueRef dst_ptr)
{
   struct lp_build_context bld;
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask_ctx;
   LLVMTypeRef vec_type;
   LLVMTypeRef int_vec_type;
   LLVMValueRef const_ptr;
   LLVMValueRef con[4];
   LLVMValueRef dst[4];
   LLVMValueRef res[4];
   unsigned chan;

   lp_build_context_init(&bld, builder, type);

   flow = lp_build_flow_create(builder);
   lp_build_mask_begin(&mask_ctx, flow, type, mask);

   vec_type = lp_build_vec_type(type);
   int_vec_type = lp_build_int_vec_type(type);

   const_ptr = lp_jit_context_blend_color(builder, context_ptr);
   const_ptr = LLVMBuildBitCast(builder, const_ptr,
                                LLVMPointerType(vec_type, 0), "");

   for(chan = 0; chan < 4; ++chan) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
      con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");

      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");

      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
      lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
   }

   lp_build_blend_soa(builder, blend, type, src, dst, con, res);

   for(chan = 0; chan < 4; ++chan) {
      if(blend->colormask & (1 << chan)) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
         lp_build_name(res[chan], "res.%c", "rgba"[chan]);
         res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
         LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
      }
   }

   lp_build_mask_end(&mask_ctx);
   lp_build_flow_destroy(flow);
}
예제 #17
0
/**
 * Transpose from AOS <-> SOA
 *
 * @param single_type_lp   type of pixels
 * @param src              the 4 * n pixel input
 * @param dst              the 4 * n pixel output
 */
void
lp_build_transpose_aos(struct gallivm_state *gallivm,
                       struct lp_type single_type_lp,
                       const LLVMValueRef src[4],
                       LLVMValueRef dst[4])
{
   struct lp_type double_type_lp = single_type_lp;
   LLVMTypeRef single_type;
   LLVMTypeRef double_type;
   LLVMValueRef t0, t1, t2, t3;

   double_type_lp.length >>= 1;
   double_type_lp.width  <<= 1;

   double_type = lp_build_vec_type(gallivm, double_type_lp);
   single_type = lp_build_vec_type(gallivm, single_type_lp);

   /* Interleave x, y, z, w -> xy and zw */
   t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
   t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
   t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
   t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);

   /* Cast to double width type for second interleave */
   t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
   t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
   t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
   t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");

   /* Interleave xy, zw -> xyzw */
   dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
   dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
   dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
   dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);

   /* Cast back to original single width type */
   dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
   dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
   dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
   dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
}
예제 #18
0
/**
 * Generate 1/sqrt(a)
 */
LLVMValueRef
lp_build_rsqrt(struct lp_build_context *bld,
               LLVMValueRef a)
{
   const struct lp_type type = bld->type;

   assert(type.floating);

   if(util_cpu_caps.has_sse && type.width == 32 && type.length == 4)
      return lp_build_intrinsic_unary(bld->builder, "llvm.x86.sse.rsqrt.ps", lp_build_vec_type(type), a);

   return lp_build_rcp(bld, lp_build_sqrt(bld, a));
}
예제 #19
0
LLVMValueRef
lp_build_zero(struct lp_type type)
{
   if (type.length == 1) {
      if (type.floating)
         return LLVMConstReal(LLVMFloatType(), 0.0);
      else
         return LLVMConstInt(LLVMIntType(type.width), 0, 0);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      return LLVMConstNull(vec_type);
   }
}
예제 #20
0
LLVMValueRef
lp_build_zero(struct gallivm_state *gallivm, struct lp_type type)
{
   if (type.length == 1) {
      if (type.floating)
         return lp_build_const_float(gallivm, 0.0);
      else
         return LLVMConstInt(LLVMIntTypeInContext(gallivm->context, type.width), 0, 0);
   }
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type);
      return LLVMConstNull(vec_type);
   }
}
예제 #21
0
/**
 * Twiddle from quad format to row format
 *
 *   src0      src1
 * ######### #########      #################
 * # 0 | 1 # # 4 | 5 #      # 0 | 1 | 4 | 5 # src0
 * #---+---# #---+---#  ->  #################
 * # 2 | 3 # # 6 | 7 #      # 2 | 3 | 6 | 7 # src1
 * ######### #########      #################
 *
 */
void
lp_bld_quad_twiddle(struct gallivm_state *gallivm,
                    struct lp_type lp_dst_type,
                    const LLVMValueRef* src,
                    unsigned src_count,
                    LLVMValueRef* dst)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef dst_type_ref;
   LLVMTypeRef type2_ref;
   struct lp_type type2;
   unsigned i;

   assert((src_count % 2) == 0);

   /* Create a type with only 2 elements */
   type2 = lp_dst_type;
   type2.width = (lp_dst_type.width * lp_dst_type.length) / 2;
   type2.length = 2;
   type2.floating = 0;

   type2_ref = lp_build_vec_type(gallivm, type2);
   dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type);

   for (i = 0; i < src_count; i += 2) {
      LLVMValueRef src0, src1;

      src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, "");
      src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, "");

      dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0);
      dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1);

      dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, "");
      dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, "");
   }
}
예제 #22
0
static void
emit_declaration(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_declaration *decl)
{
   LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type);

   unsigned first = decl->Range.First;
   unsigned last = decl->Range.Last;
   unsigned idx;

   for (idx = first; idx <= last; ++idx) {
      switch (decl->Declaration.File) {
      case TGSI_FILE_TEMPORARY:
         assert(idx < LP_MAX_TGSI_TEMPS);
         if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
            LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(),
                                                   last + 1, 0);
            bld->temps_array = lp_build_array_alloca(bld->base.builder,
                                                     vec_type, array_size, "");
         } else {
            bld->temps[idx] = lp_build_alloca(bld->base.builder,
                                              vec_type, "");
         }
         break;

      case TGSI_FILE_OUTPUT:
         bld->outputs[idx] = lp_build_alloca(bld->base.builder,
                                             vec_type, "");
         break;

      case TGSI_FILE_ADDRESS:
         assert(idx < LP_MAX_TGSI_ADDRS);
         bld->addr[idx] = lp_build_alloca(bld->base.builder,
                                          vec_type, "");
         break;

      case TGSI_FILE_PREDICATE:
         assert(idx < LP_MAX_TGSI_PREDS);
         bld->preds[idx] = lp_build_alloca(bld->base.builder,
                                           vec_type, "");
         break;

      default:
         /* don't need to declare other vars */
         break;
      }
   }
}
예제 #23
0
/**
 * Generate sin(a)
 */
LLVMValueRef
lp_build_sin(struct lp_build_context *bld,
              LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   char intrinsic[32];

   /* TODO: optimize the constant case */

   assert(type.floating);
   util_snprintf(intrinsic, sizeof intrinsic, "llvm.sin.v%uf%u", type.length, type.width);

   return lp_build_intrinsic_unary(bld->builder, intrinsic, vec_type, a);
}
예제 #24
0
/**
 * Small vector x scale multiplication optimization.
 */
LLVMValueRef
lp_build_mul_imm(struct lp_build_context *bld,
                 LLVMValueRef a,
                 int b)
{
   LLVMValueRef factor;

   if(b == 0)
      return bld->zero;

   if(b == 1)
      return a;

   if(b == -1)
      return LLVMBuildNeg(bld->builder, a, "");

   if(b == 2 && bld->type.floating)
      return lp_build_add(bld, a, a);

   if(util_is_pot(b)) {
      unsigned shift = ffs(b) - 1;

      if(bld->type.floating) {
#if 0
         /*
          * Power of two multiplication by directly manipulating the mantissa.
          *
          * XXX: This might not be always faster, it will introduce a small error
          * for multiplication by zero, and it will produce wrong results
          * for Inf and NaN.
          */
         unsigned mantissa = lp_mantissa(bld->type);
         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
         a = LLVMBuildAdd(bld->builder, a, factor, "");
         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
         return a;
#endif
      }
      else {
         factor = lp_build_const_scalar(bld->type, shift);
         return LLVMBuildShl(bld->builder, a, factor, "");
      }
   }

   factor = lp_build_const_scalar(bld->type, (double)b);
   return lp_build_mul(bld, a, factor);
}
예제 #25
0
/**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
LLVMValueRef
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
                                unsigned src_width,
                                struct lp_type dst_type,
                                LLVMValueRef src)
{
   LLVMTypeRef vec_type = lp_build_vec_type(dst_type);
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(dst_type);
   LLVMValueRef bias_;
   LLVMValueRef res;
   unsigned mantissa;
   unsigned n;
   unsigned long long ubound;
   unsigned long long mask;
   double scale;
   double bias;

   assert(dst_type.floating);

   mantissa = lp_mantissa(dst_type);

   n = MIN2(mantissa, src_width);

   ubound = ((unsigned long long)1 << n);
   mask = ubound - 1;
   scale = (double)ubound/mask;
   bias = (double)((unsigned long long)1 << (mantissa - n));

   res = src;

   if(src_width > mantissa) {
      int shift = src_width - mantissa;
      res = LLVMBuildLShr(builder, res, lp_build_const_int_vec(dst_type, shift), "");
   }

   bias_ = lp_build_const_vec(dst_type, bias);

   res = LLVMBuildOr(builder,
                     res,
                     LLVMBuildBitCast(builder, bias_, int_vec_type, ""), "");

   res = LLVMBuildBitCast(builder, res, vec_type, "");

   res = LLVMBuildFSub(builder, res, bias_, "");
   res = LLVMBuildFMul(builder, res, lp_build_const_vec(dst_type, scale), "");

   return res;
}
예제 #26
0
/**
 * Generate a + b
 */
LLVMValueRef
lp_build_add(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   LLVMValueRef res;

   if(a == bld->zero)
      return b;
   if(b == bld->zero)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   if(bld->type.norm) {
      const char *intrinsic = NULL;

      if(a == bld->one || b == bld->one)
        return bld->one;

      if(util_cpu_caps.has_sse2 &&
         type.width * type.length == 128 &&
         !type.floating && !type.fixed) {
         if(type.width == 8)
            intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
         if(type.width == 16)
            intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
      }
   
      if(intrinsic)
         return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
   }

   if(LLVMIsConstant(a) && LLVMIsConstant(b))
      res = LLVMConstAdd(a, b);
   else
      res = LLVMBuildAdd(bld->builder, a, b, "");

   /* clamp to ceiling of 1.0 */
   if(bld->type.norm && (bld->type.floating || bld->type.fixed))
      res = lp_build_min_simple(bld, res, bld->one);

   /* XXX clamp to floor of -1 or 0??? */

   return res;
}
예제 #27
0
/**
 * Convert vector of int to vector of float.
 */
LLVMValueRef
lp_build_int_to_float(struct lp_build_context *bld,
                      LLVMValueRef a)
{
   const struct lp_type type = bld->type;

   assert(type.floating);
   /*assert(lp_check_value(type, a));*/

   {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
      LLVMValueRef res;
      res = LLVMBuildSIToFP(bld->builder, a, vec_type, "");
      return res;
   }
}
예제 #28
0
/**
 * Convert float[] to int[] with floor().
 */
LLVMValueRef
lp_build_ifloor(struct lp_build_context *bld,
                LLVMValueRef a)
{
   const struct lp_type type = bld->type;
   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
   LLVMValueRef res;

   assert(type.floating);
   assert(lp_check_value(type, a));

   if(util_cpu_caps.has_sse4_1) {
      res = lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   }
   else {
      /* Take the sign bit and add it to 1 constant */
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      unsigned mantissa = lp_mantissa(type);
      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
      LLVMValueRef sign;
      LLVMValueRef offset;

      /* sign = a < 0 ? ~0 : 0 */
      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
      sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
      lp_build_name(sign, "floor.sign");

      /* offset = -0.99999(9)f */
      offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
      offset = LLVMConstBitCast(offset, int_vec_type);

      /* offset = a < 0 ? -0.99999(9)f : 0.0f */
      offset = LLVMBuildAnd(bld->builder, offset, sign, "");
      offset = LLVMBuildBitCast(bld->builder, offset, vec_type, "");
      lp_build_name(offset, "floor.offset");

      res = LLVMBuildAdd(bld->builder, a, offset, "");
      lp_build_name(res, "floor.res");
   }

   res = LLVMBuildFPToSI(bld->builder, res, int_vec_type, "");
   lp_build_name(res, "floor");

   return res;
}
예제 #29
0
LLVMValueRef
lp_build_floor(struct lp_build_context *bld,
               LLVMValueRef a)
{
   const struct lp_type type = bld->type;

   assert(type.floating);

   if(util_cpu_caps.has_sse4_1)
      return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
   else {
      LLVMTypeRef vec_type = lp_build_vec_type(type);
      LLVMValueRef res;
      res = lp_build_ifloor(bld, a);
      res = LLVMBuildSIToFP(bld->builder, res, vec_type, "");
      return res;
   }
}
예제 #30
0
/**
 * Generate a - b
 */
LLVMValueRef
lp_build_sub(struct lp_build_context *bld,
             LLVMValueRef a,
             LLVMValueRef b)
{
   const struct lp_type type = bld->type;
   LLVMValueRef res;

   if(b == bld->zero)
      return a;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;
   if(a == b)
      return bld->zero;

   if(bld->type.norm) {
      const char *intrinsic = NULL;

      if(b == bld->one)
        return bld->zero;

      if(util_cpu_caps.has_sse2 &&
         type.width * type.length == 128 &&
         !type.floating && !type.fixed) {
         if(type.width == 8)
            intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
         if(type.width == 16)
            intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
      }
   
      if(intrinsic)
         return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
   }

   if(LLVMIsConstant(a) && LLVMIsConstant(b))
      res = LLVMConstSub(a, b);
   else
      res = LLVMBuildSub(bld->builder, a, b, "");

   if(bld->type.norm && (bld->type.floating || bld->type.fixed))
      res = lp_build_max_simple(bld, res, bld->zero);

   return res;
}