Example #1
0
/**
 * Return mask ? a : b;
 *
 * mask is a TGSI_WRITEMASK_xxx.
 */
LLVMValueRef
lp_build_select_aos(struct lp_build_context *bld,
                    unsigned mask,
                    LLVMValueRef a,
                    LLVMValueRef b,
                    unsigned num_channels)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   assert((mask & ~0xf) == 0);
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

   if(a == b)
      return a;
   if((mask & 0xf) == 0xf)
      return a;
   if((mask & 0xf) == 0x0)
      return b;
   if(a == bld->undef || b == bld->undef)
      return bld->undef;

   /*
    * There are two major ways of accomplishing this:
    * - with a shuffle
    * - with a select
    *
    * The flip between these is empirical and might need to be adjusted.
    */
   if (n <= 4) {
      /*
       * Shuffle.
       */
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

      for(j = 0; j < n; j += num_channels)
         for(i = 0; i < num_channels; ++i)
            shuffles[j + i] = LLVMConstInt(elem_type,
                                           (mask & (1 << i) ? 0 : n) + j + i,
                                           0);

      return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
   }
   else {
      LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
      return lp_build_select(bld, mask_vec, a, b);
   }
}
/**
 * Register store.
 */
static void
emit_store(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned index,
   LLVMValueRef value)
{
   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
   LLVMValueRef mask = NULL;
   LLVMValueRef ptr;

   /*
    * Saturate the value
    */

   switch (inst->Instruction.Saturate) {
   case TGSI_SAT_NONE:
      break;

   case TGSI_SAT_ZERO_ONE:
      value = lp_build_max(&bld->base, value, bld->base.zero);
      value = lp_build_min(&bld->base, value, bld->base.one);
      break;

   case TGSI_SAT_MINUS_PLUS_ONE:
      value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0));
      value = lp_build_min(&bld->base, value, bld->base.one);
      break;

   default:
      assert(0);
   }

   /*
    * Translate the register file
    */

   assert(!reg->Register.Indirect);

   switch (reg->Register.File) {
   case TGSI_FILE_OUTPUT:
      ptr = bld->outputs[reg->Register.Index];
      break;

   case TGSI_FILE_TEMPORARY:
      ptr = bld->temps[reg->Register.Index];
      break;

   case TGSI_FILE_ADDRESS:
      ptr = bld->addr[reg->Indirect.Index];
      break;

   case TGSI_FILE_PREDICATE:
      ptr = bld->preds[reg->Register.Index];
      break;

   default:
      assert(0);
      return;
   }

   /*
    * Predicate
    */

   if (inst->Instruction.Predicate) {
      LLVMValueRef pred;

      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);

      pred = LLVMBuildLoad(bld->base.builder,
                           bld->preds[inst->Predicate.Index], "");

      /*
       * Convert the value to an integer mask.
       */
      pred = lp_build_compare(bld->base.builder,
                               bld->base.type,
                               PIPE_FUNC_NOTEQUAL,
                               pred,
                               bld->base.zero);

      if (inst->Predicate.Negate) {
         pred = LLVMBuildNot(bld->base.builder, pred, "");
      }

      pred = swizzle_aos(bld, pred,
                         inst->Predicate.SwizzleX,
                         inst->Predicate.SwizzleY,
                         inst->Predicate.SwizzleZ,
                         inst->Predicate.SwizzleW);

      if (mask) {
         mask = LLVMBuildAnd(bld->base.builder, mask, pred, "");
      } else {
         mask = pred;
      }
   }

   /*
    * Writemask
    */

   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
      LLVMValueRef writemask;

      writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask);

      if (mask) {
         mask = LLVMBuildAnd(bld->base.builder, mask, writemask, "");
      } else {
         mask = writemask;
      }
   }

   if (mask) {
      LLVMValueRef orig_value;

      orig_value = LLVMBuildLoad(bld->base.builder, ptr, "");
      value = lp_build_select(&bld->base,
                              mask, value, orig_value);
   }

   LLVMBuildStore(bld->base.builder, value, ptr);
}
Example #3
0
/**
 * Swizzle one channel into other channels.
 */
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
                            LLVMValueRef a,
                            unsigned channel,
                            unsigned num_channels)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
      return a;

   assert(num_channels == 2 || num_channels == 4);

   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
    * using shuffles here actually causes worst results. More investigation is
    * needed. */
   if (LLVMIsConstant(a) ||
       type.width >= 16) {
      /*
       * Shuffle.
       */
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

      for(j = 0; j < n; j += num_channels)
         for(i = 0; i < num_channels; ++i)
            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);

      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
   }
   else if (num_channels == 2) {
      /*
       * Bit mask and shifts
       *
       *   XY XY .... XY  <= input
       *   0Y 0Y .... 0Y
       *   YY YY .... YY
       *   YY YY .... YY  <= output
       */
      struct lp_type type2;
      LLVMValueRef tmp = NULL;
      int shift;

      a = LLVMBuildAnd(builder, a,
                       lp_build_const_mask_aos(bld->gallivm,
                                               type, 1 << channel, num_channels), "");

      type2 = type;
      type2.floating = FALSE;
      type2.width *= 2;
      type2.length /= 2;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");

      /*
       * Vector element 0 is always channel X.
       *
       *                        76 54 32 10 (array numbering)
       * Little endian reg in:  YX YX YX YX
       * Little endian reg out: YY YY YY YY if shift right (shift == -1)
       *                        XX XX XX XX if shift left (shift == 1)
       *
       *                        01 23 45 67 (array numbering)
       * Big endian reg in:     XY XY XY XY
       * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
       *                        XX XX XX XX if shift right (shift == -1)
       *
       */
#ifdef PIPE_ARCH_LITTLE_ENDIAN
      shift = channel == 0 ? 1 : -1;
#else
      shift = channel == 0 ? -1 : 1;
#endif

      if (shift > 0) {
         tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
      } else if (shift < 0) {
         tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
      }

      assert(tmp);
      if (tmp) {
         a = LLVMBuildOr(builder, a, tmp, "");
      }

      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
   }
   else {
      /*
       * Bit mask and recursive shifts
       *
       * Little-endian registers:
       *
       *   7654 3210
       *   WZYX WZYX .... WZYX  <= input
       *   00Y0 00Y0 .... 00Y0  <= mask
       *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
       *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
       *
       * Big-endian registers:
       *
       *   0123 4567
       *   XYZW XYZW .... XYZW  <= input
       *   0Y00 0Y00 .... 0Y00  <= mask
       *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
       *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
       *
       * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
       */
      struct lp_type type4;
      const int shifts[4][2] = {
         { 1,  2},
         {-1,  2},
         { 1, -2},
         {-1, -2}
      };
      unsigned i;

      a = LLVMBuildAnd(builder, a,
                       lp_build_const_mask_aos(bld->gallivm,
                                               type, 1 << channel, 4), "");

      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */

      type4 = type;
      type4.floating = FALSE;
      type4.width *= 4;
      type4.length /= 4;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");

      for(i = 0; i < 2; ++i) {
         LLVMValueRef tmp = NULL;
         int shift = shifts[channel][i];

         /* See endianness diagram above */
#ifdef PIPE_ARCH_BIG_ENDIAN
         shift = -shift;
#endif

         if(shift > 0)
            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
         if(shift < 0)
            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");

         assert(tmp);
         if(tmp)
            a = LLVMBuildOr(builder, a, tmp, "");
      }

      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
   }
}
Example #4
0
/**
 * Swizzle one channel into all other three channels.
 */
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
                            LLVMValueRef a,
                            unsigned channel)
{
   LLVMBuilderRef builder = bld->gallivm->builder;
   const struct lp_type type = bld->type;
   const unsigned n = type.length;
   unsigned i, j;

   if(a == bld->undef || a == bld->zero || a == bld->one)
      return a;

   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
    * using shuffles here actually causes worst results. More investigation is
    * needed. */
   if (type.width >= 16) {
      /*
       * Shuffle.
       */
      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];

      for(j = 0; j < n; j += 4)
         for(i = 0; i < 4; ++i)
            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);

      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
   }
   else {
      /*
       * Bit mask and recursive shifts
       *
       *   XYZW XYZW .... XYZW  <= input
       *   0Y00 0Y00 .... 0Y00
       *   YY00 YY00 .... YY00
       *   YYYY YYYY .... YYYY  <= output
       */
      struct lp_type type4;
      const char shifts[4][2] = {
         { 1,  2},
         {-1,  2},
         { 1, -2},
         {-1, -2}
      };
      unsigned i;

      a = LLVMBuildAnd(builder, a,
                       lp_build_const_mask_aos(bld->gallivm,
                                               type, 1 << channel), "");

      /*
       * Build a type where each element is an integer that cover the four
       * channels.
       */

      type4 = type;
      type4.floating = FALSE;
      type4.width *= 4;
      type4.length /= 4;

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");

      for(i = 0; i < 2; ++i) {
         LLVMValueRef tmp = NULL;
         int shift = shifts[channel][i];

#ifdef PIPE_ARCH_LITTLE_ENDIAN
         shift = -shift;
#endif

         if(shift > 0)
            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
         if(shift < 0)
            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");

         assert(tmp);
         if(tmp)
            a = LLVMBuildOr(builder, a, tmp, "");
      }

      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
   }
}