Exemple #1
0
/*
 * Derive from the quad's upper left scalar coordinates the coordinates for
 * all other quad pixels
 */
static void
generate_pos0(LLVMBuilderRef builder,
              LLVMValueRef x,
              LLVMValueRef y,
              LLVMValueRef *x0,
              LLVMValueRef *y0)
{
   LLVMTypeRef int_elem_type = LLVMInt32Type();
   LLVMTypeRef int_vec_type = LLVMVectorType(int_elem_type, QUAD_SIZE);
   LLVMTypeRef elem_type = LLVMFloatType();
   LLVMTypeRef vec_type = LLVMVectorType(elem_type, QUAD_SIZE);
   LLVMValueRef x_offsets[QUAD_SIZE];
   LLVMValueRef y_offsets[QUAD_SIZE];
   unsigned i;

   x = lp_build_broadcast(builder, int_vec_type, x);
   y = lp_build_broadcast(builder, int_vec_type, y);

   for(i = 0; i < QUAD_SIZE; ++i) {
      x_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_x[i], 0);
      y_offsets[i] = LLVMConstInt(int_elem_type, quad_offset_y[i], 0);
   }

   x = LLVMBuildAdd(builder, x, LLVMConstVector(x_offsets, QUAD_SIZE), "");
   y = LLVMBuildAdd(builder, y, LLVMConstVector(y_offsets, QUAD_SIZE), "");

   *x0 = LLVMBuildSIToFP(builder, x, vec_type, "");
   *y0 = LLVMBuildSIToFP(builder, y, vec_type, "");
}
Exemple #2
0
/**
 * Combined extract and broadcast (mere shuffle in most cases)
 */
LLVMValueRef
lp_build_extract_broadcast(struct gallivm_state *gallivm,
                           struct lp_type src_type,
                           struct lp_type dst_type,
                           LLVMValueRef vector,
                           LLVMValueRef index)
{
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
   LLVMValueRef res;

   assert(src_type.floating == dst_type.floating);
   assert(src_type.width    == dst_type.width);

   assert(lp_check_value(src_type, vector));
   assert(LLVMTypeOf(index) == i32t);

   if (src_type.length == 1) {
      if (dst_type.length == 1) {
         /*
          * Trivial scalar -> scalar.
          */

         res = vector;
      }
      else {
         /*
          * Broadcast scalar -> vector.
          */

         res = lp_build_broadcast(gallivm,
                                  lp_build_vec_type(gallivm, dst_type),
                                  vector);
      }
   }
   else {
      if (dst_type.length > 1) {
         /*
          * shuffle - result can be of different length.
          */

         LLVMValueRef shuffle;
         shuffle = lp_build_broadcast(gallivm,
                                      LLVMVectorType(i32t, dst_type.length),
                                      index);
         res = LLVMBuildShuffleVector(gallivm->builder, vector,
                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
                                      shuffle, "");
      }
      else {
         /*
          * Trivial extract scalar from vector.
          */
          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
      }
   }

   return res;
}
/**
 * Expands src vector from src.length to dst_length
 */
LLVMValueRef
lp_build_pad_vector(struct gallivm_state *gallivm,
                       LLVMValueRef src,
                       struct lp_type src_type,
                       unsigned dst_length)
{
   LLVMValueRef undef = LLVMGetUndef(lp_build_vec_type(gallivm, src_type));
   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
   unsigned i;

   assert(dst_length <= Elements(elems));
   assert(dst_length > src_type.length);

   if (src_type.length == dst_length)
      return src;

   /* If its a single scalar type, no need to reinvent the wheel */
   if (src_type.length == 1) {
      return lp_build_broadcast(gallivm, LLVMVectorType(lp_build_elem_type(gallivm, src_type), dst_length), src);
   }

   /* All elements from src vector */
   for (i = 0; i < src_type.length; ++i)
      elems[i] = lp_build_const_int32(gallivm, i);

   /* Undef fill remaining space */
   for (i = src_type.length; i < dst_length; ++i)
      elems[i] = lp_build_const_int32(gallivm, src_type.length);

   /* Combine the two vectors */
   return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), "");
}
Exemple #4
0
/**
 * Broadcast
 */
LLVMValueRef
lp_build_broadcast_scalar(struct lp_build_context *bld,
                          LLVMValueRef scalar)
{
   assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));

   return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
}
Exemple #5
0
static LLVMValueRef
generate_scissor_test(LLVMBuilderRef builder,
                      LLVMValueRef context_ptr,
                      const struct lp_build_interp_soa_context *interp,
                      struct lp_type type)
{
   LLVMTypeRef vec_type = lp_build_vec_type(type);
   LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
   LLVMValueRef xmin, ymin, xmax, ymax;
   LLVMValueRef m0, m1, m2, m3, m;

   /* xpos, ypos contain the window coords for the four pixels in the quad */
   assert(xpos);
   assert(ypos);

   /* get the current scissor bounds, convert to vectors */
   xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
   xmin = lp_build_broadcast(builder, vec_type, xmin);

   ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
   ymin = lp_build_broadcast(builder, vec_type, ymin);

   xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
   xmax = lp_build_broadcast(builder, vec_type, xmax);

   ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
   ymax = lp_build_broadcast(builder, vec_type, ymax);

   /* compare the fragment's position coordinates against the scissor bounds */
   m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
   m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
   m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
   m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);

   /* AND all the masks together */
   m = LLVMBuildAnd(builder, m0, m1, "");
   m = LLVMBuildAnd(builder, m, m2, "");
   m = LLVMBuildAnd(builder, m, m3, "");

   lp_build_name(m, "scissormask");

   return m;
}
Exemple #6
0
/**
 * Generate the fragment shader, depth/stencil test, and alpha tests.
 * \param i  which quad in the tile, in range [0,3]
 * \param do_tri_test  if 1, do triangle edge in/out testing
 */
static void
generate_fs(struct llvmpipe_context *lp,
            struct lp_fragment_shader *shader,
            const struct lp_fragment_shader_variant_key *key,
            LLVMBuilderRef builder,
            struct lp_type type,
            LLVMValueRef context_ptr,
            unsigned i,
            const struct lp_build_interp_soa_context *interp,
            struct lp_build_sampler_soa *sampler,
            LLVMValueRef *pmask,
            LLVMValueRef (*color)[4],
            LLVMValueRef depth_ptr,
            unsigned do_tri_test,
            LLVMValueRef c0,
            LLVMValueRef c1,
            LLVMValueRef c2,
            LLVMValueRef step0_ptr,
            LLVMValueRef step1_ptr,
            LLVMValueRef step2_ptr)
{
   const struct tgsi_token *tokens = shader->base.tokens;
   LLVMTypeRef elem_type;
   LLVMTypeRef vec_type;
   LLVMTypeRef int_vec_type;
   LLVMValueRef consts_ptr;
   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
   LLVMValueRef z = interp->pos[2];
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask;
   boolean early_depth_test;
   unsigned attrib;
   unsigned chan;
   unsigned cbuf;

   assert(i < 4);

   elem_type = lp_build_elem_type(type);
   vec_type = lp_build_vec_type(type);
   int_vec_type = lp_build_int_vec_type(type);

   consts_ptr = lp_jit_context_constants(builder, context_ptr);

   flow = lp_build_flow_create(builder);

   memset(outputs, 0, sizeof outputs);

   lp_build_flow_scope_begin(flow);

   /* Declare the color and z variables */
   for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
	 color[cbuf][chan] = LLVMGetUndef(vec_type);
	 lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
      }
   }
   lp_build_flow_scope_declare(flow, &z);

   /* do triangle edge testing */
   if (do_tri_test) {
      generate_tri_edge_mask(builder, i, pmask,
                             c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
   }
   else {
      *pmask = build_int32_vec_const(~0);
   }

   /* 'mask' will control execution based on quad's pixel alive/killed state */
   lp_build_mask_begin(&mask, flow, type, *pmask);

   if (key->scissor) {
      LLVMValueRef smask =
         generate_scissor_test(builder, context_ptr, interp, type);
      lp_build_mask_update(&mask, smask);
   }

   early_depth_test =
      key->depth.enabled &&
      !key->alpha.enabled &&
      !shader->info.uses_kill &&
      !shader->info.writes_z;

   if(early_depth_test)
      generate_depth(builder, key,
                     type, &mask,
                     z, depth_ptr);

   lp_build_tgsi_soa(builder, tokens, type, &mask,
                     consts_ptr, interp->pos, interp->inputs,
                     outputs, sampler);

   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
         if(outputs[attrib][chan]) {
            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
            lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);

            switch (shader->info.output_semantic_name[attrib]) {
            case TGSI_SEMANTIC_COLOR:
               {
                  unsigned cbuf = shader->info.output_semantic_index[attrib];

                  lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);

                  /* Alpha test */
                  /* XXX: should the alpha reference value be passed separately? */
		  /* XXX: should only test the final assignment to alpha */
                  if(cbuf == 0 && chan == 3) {
                     LLVMValueRef alpha = out;
                     LLVMValueRef alpha_ref_value;
                     alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
                     alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
                     lp_build_alpha_test(builder, &key->alpha, type,
                                         &mask, alpha, alpha_ref_value);
                  }

		  color[cbuf][chan] = out;
                  break;
               }

            case TGSI_SEMANTIC_POSITION:
               if(chan == 2)
                  z = out;
               break;
            }
         }
      }
   }

   if(!early_depth_test)
      generate_depth(builder, key,
                     type, &mask,
                     z, depth_ptr);

   lp_build_mask_end(&mask);

   lp_build_flow_scope_end(flow);

   lp_build_flow_destroy(flow);

   *pmask = mask.value;

}
Exemple #7
0
/**
 * Generate the code to do inside/outside triangle testing for the
 * four pixels in a 2x2 quad.  This will set the four elements of the
 * quad mask vector to 0 or ~0.
 * \param i  which quad of the quad group to test, in [0,3]
 */
static void
generate_tri_edge_mask(LLVMBuilderRef builder,
                       unsigned i,
                       LLVMValueRef *mask,      /* ivec4, out */
                       LLVMValueRef c0,         /* int32 */
                       LLVMValueRef c1,         /* int32 */
                       LLVMValueRef c2,         /* int32 */
                       LLVMValueRef step0_ptr,  /* ivec4 */
                       LLVMValueRef step1_ptr,  /* ivec4 */
                       LLVMValueRef step2_ptr)  /* ivec4 */
{
#define OPTIMIZE_IN_OUT_TEST 0
#if OPTIMIZE_IN_OUT_TEST
   struct lp_build_if_state ifctx;
   LLVMValueRef not_draw_all;
#endif
   struct lp_build_flow_context *flow;
   struct lp_type i32_type;
   LLVMTypeRef i32vec4_type, mask_type;
   LLVMValueRef c0_vec, c1_vec, c2_vec;
   LLVMValueRef in_out_mask;

   assert(i < 4);
   
   /* int32 vector type */
   memset(&i32_type, 0, sizeof i32_type);
   i32_type.floating = FALSE; /* values are integers */
   i32_type.sign = TRUE;      /* values are signed */
   i32_type.norm = FALSE;     /* values are not normalized */
   i32_type.width = 32;       /* 32-bit int values */
   i32_type.length = 4;       /* 4 elements per vector */

   i32vec4_type = lp_build_int32_vec4_type();

   mask_type = LLVMIntType(32 * 4);

   /*
    * Use a conditional here to do detailed pixel in/out testing.
    * We only have to do this if c0 != INT_MIN.
    */
   flow = lp_build_flow_create(builder);
   lp_build_flow_scope_begin(flow);

   {
#if OPTIMIZE_IN_OUT_TEST
      /* not_draw_all = (c0 != INT_MIN) */
      not_draw_all = LLVMBuildICmp(builder,
                                   LLVMIntNE,
                                   c0,
                                   LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
                                   "");

      in_out_mask = lp_build_int_const_scalar(i32_type, ~0);


      lp_build_flow_scope_declare(flow, &in_out_mask);

      /* if (not_draw_all) {... */
      lp_build_if(&ifctx, flow, builder, not_draw_all);
#endif
      {
         LLVMValueRef step0_vec, step1_vec, step2_vec;
         LLVMValueRef m0_vec, m1_vec, m2_vec;
         LLVMValueRef index, m;

         /* c0_vec = {c0, c0, c0, c0}
          * Note that we emit this code four times but LLVM optimizes away
          * three instances of it.
          */
         c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
         c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
         c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
         lp_build_name(c0_vec, "edgeconst0vec");
         lp_build_name(c1_vec, "edgeconst1vec");
         lp_build_name(c2_vec, "edgeconst2vec");

         /* load step0vec, step1, step2 vec from memory */
         index = LLVMConstInt(LLVMInt32Type(), i, 0);
         step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
         step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
         step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
         lp_build_name(step0_vec, "step0vec");
         lp_build_name(step1_vec, "step1vec");
         lp_build_name(step2_vec, "step2vec");

         /* m0_vec = step0_ptr[i] > c0_vec */
         m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
         m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
         m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);

         /* in_out_mask = m0_vec & m1_vec & m2_vec */
         m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
         in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
         lp_build_name(in_out_mask, "inoutmaskvec");
      }
#if OPTIMIZE_IN_OUT_TEST
      lp_build_endif(&ifctx);
#endif

   }
   lp_build_flow_scope_end(flow);
   lp_build_flow_destroy(flow);

   /* This is the initial alive/dead pixel mask for a quad of four pixels.
    * It's an int[4] vector with each word set to 0 or ~0.
    * Words will get cleared when pixels faile the Z test, etc.
    */
   *mask = in_out_mask;
}
Exemple #8
0
static LLVMValueRef
lp_build_gather_avx2(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     struct lp_type dst_type,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef src_type, src_vec_type;
   LLVMValueRef res;
   struct lp_type res_type = dst_type;
   res_type.length *= length;

   if (dst_type.floating) {
      src_type = src_width == 64 ? LLVMDoubleTypeInContext(gallivm->context) :
                                   LLVMFloatTypeInContext(gallivm->context);
   } else {
      src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   }
   src_vec_type = LLVMVectorType(src_type, length);

   /* XXX should allow hw scaling (can handle i8, i16, i32, i64 for x86) */
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   if (0) {
      /*
       * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but
       * will not use the AVX2 gather instrinsics (even with llvm 4.0), at
       * least with Haswell. See
       * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html
       * And the generated code doing the emulation is quite a bit worse
       * than what we get by doing it ourselves too.
       */
      LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32);
      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
      LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1);
      LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length);
      LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
      LLVMValueRef src_ptr;

      base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, "");

      /* Rescale offsets from bytes to elements */
      LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0);
      scale = lp_build_broadcast(gallivm, i32_vec_type, scale);
      assert(LLVMTypeOf(offsets) == i32_vec_type);
      offsets = LLVMBuildSDiv(builder, offsets, scale, "");

      src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep");

      char intrinsic[64];
      util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%u%s%u",
                    length, dst_type.floating ? "f" : "i", src_width);
      LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0);
      LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type);
      LLVMValueRef passthru = LLVMGetUndef(src_vec_type);

      LLVMValueRef args[] = { src_ptr, alignment, mask, passthru };

      res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0);
   } else {
      LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8);
      const char *intrinsic = NULL;
      unsigned l_idx = 0;

      assert(src_width == 32 || src_width == 64);
      if (src_width == 32) {
         assert(length == 4 || length == 8);
      } else {
         assert(length == 2 || length == 4);
      }

      static const char *intrinsics[2][2][2] = {

         {{"llvm.x86.avx2.gather.d.d",
           "llvm.x86.avx2.gather.d.d.256"},
          {"llvm.x86.avx2.gather.d.q",
           "llvm.x86.avx2.gather.d.q.256"}},

         {{"llvm.x86.avx2.gather.d.ps",
           "llvm.x86.avx2.gather.d.ps.256"},
          {"llvm.x86.avx2.gather.d.pd",
           "llvm.x86.avx2.gather.d.pd.256"}},
      };

      if ((src_width == 32 && length == 8) ||
          (src_width == 64 && length == 4)) {
         l_idx = 1;
      }
      intrinsic = intrinsics[dst_type.floating][src_width == 64][l_idx];

      LLVMValueRef passthru = LLVMGetUndef(src_vec_type);
      LLVMValueRef mask = LLVMConstAllOnes(src_vec_type);
      mask = LLVMConstBitCast(mask, src_vec_type);
      LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0);

      LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale };

      res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0);
   }
   res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, res_type), "");

   return res;
}
Exemple #9
0
/**
 * Generate the fragment shader, depth/stencil test, and alpha tests.
 */
static void
generate_fs(struct llvmpipe_context *lp,
            struct lp_fragment_shader *shader,
            const struct lp_fragment_shader_variant_key *key,
            LLVMBuilderRef builder,
            struct lp_type type,
            LLVMValueRef context_ptr,
            unsigned i,
            const struct lp_build_interp_soa_context *interp,
            struct lp_build_sampler_soa *sampler,
            LLVMValueRef *pmask,
            LLVMValueRef *color,
            LLVMValueRef depth_ptr)
{
   const struct tgsi_token *tokens = shader->base.tokens;
   LLVMTypeRef elem_type;
   LLVMTypeRef vec_type;
   LLVMTypeRef int_vec_type;
   LLVMValueRef consts_ptr;
   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
   LLVMValueRef z = interp->pos[2];
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask;
   boolean early_depth_test;
   unsigned attrib;
   unsigned chan;

   elem_type = lp_build_elem_type(type);
   vec_type = lp_build_vec_type(type);
   int_vec_type = lp_build_int_vec_type(type);

   consts_ptr = lp_jit_context_constants(builder, context_ptr);

   flow = lp_build_flow_create(builder);

   memset(outputs, 0, sizeof outputs);

   lp_build_flow_scope_begin(flow);

   /* Declare the color and z variables */
   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
      color[chan] = LLVMGetUndef(vec_type);
      lp_build_flow_scope_declare(flow, &color[chan]);
   }
   lp_build_flow_scope_declare(flow, &z);

   lp_build_mask_begin(&mask, flow, type, *pmask);

   early_depth_test =
      key->depth.enabled &&
      !key->alpha.enabled &&
      !shader->info.uses_kill &&
      !shader->info.writes_z;

   if(early_depth_test)
      generate_depth(builder, key,
                     type, &mask,
                     z, depth_ptr);

   lp_build_tgsi_soa(builder, tokens, type, &mask,
                     consts_ptr, interp->pos, interp->inputs,
                     outputs, sampler);

   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
         if(outputs[attrib][chan]) {
            lp_build_name(outputs[attrib][chan], "output%u.%u.%c", i, attrib, "xyzw"[chan]);

            switch (shader->info.output_semantic_name[attrib]) {
            case TGSI_SEMANTIC_COLOR:
               {
                  unsigned cbuf = shader->info.output_semantic_index[attrib];

                  lp_build_name(outputs[attrib][chan], "color%u.%u.%c", i, attrib, "rgba"[chan]);

                  /* Alpha test */
                  /* XXX: should the alpha reference value be passed separately? */
                  if(cbuf == 0 && chan == 3) {
                     LLVMValueRef alpha = outputs[attrib][chan];
                     LLVMValueRef alpha_ref_value;
                     alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
                     alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
                     lp_build_alpha_test(builder, &key->alpha, type,
                                         &mask, alpha, alpha_ref_value);
                  }

                  if(cbuf == 0)
                     color[chan] = outputs[attrib][chan];

                  break;
               }

            case TGSI_SEMANTIC_POSITION:
               if(chan == 2)
                  z = outputs[attrib][chan];
               break;
            }
         }
      }
   }

   if(!early_depth_test)
      generate_depth(builder, key,
                     type, &mask,
                     z, depth_ptr);

   lp_build_mask_end(&mask);

   lp_build_flow_scope_end(flow);

   lp_build_flow_destroy(flow);

   *pmask = mask.value;

}
/**
 * Expand the relevent bits of mask_input to a 4-dword mask for the 
 * four pixels in a 2x2 quad.  This will set the four elements of the
 * quad mask vector to 0 or ~0.
 *
 * \param quad  which quad of the quad group to test, in [0,3]
 * \param mask_input  bitwise mask for the whole 4x4 stamp
 */
static LLVMValueRef
generate_quad_mask(LLVMBuilderRef builder,
                   struct lp_type fs_type,
                   unsigned quad,
                   LLVMValueRef mask_input) /* int32 */
{
   struct lp_type mask_type;
   LLVMTypeRef i32t = LLVMInt32Type();
   LLVMValueRef bits[4];
   LLVMValueRef mask;
   int shift;

   /*
    * XXX: We'll need a different path for 16 x u8
    */
   assert(fs_type.width == 32);
   assert(fs_type.length == 4);
   mask_type = lp_int_type(fs_type);

   /*
    * mask_input >>= (quad * 4)
    */
   
   switch (quad) {
   case 0:
      shift = 0;
      break;
   case 1:
      shift = 2;
      break;
   case 2:
      shift = 8;
      break;
   case 3:
      shift = 10;
      break;
   default:
      assert(0);
      shift = 0;
   }

   mask_input = LLVMBuildLShr(builder,
                              mask_input,
                              LLVMConstInt(i32t, shift, 0),
                              "");

   /*
    * mask = { mask_input & (1 << i), for i in [0,3] }
    */

   mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input);

   bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
   bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
   bits[2] = LLVMConstInt(i32t, 1 << 4, 0);
   bits[3] = LLVMConstInt(i32t, 1 << 5, 0);
   
   mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");

   /*
    * mask = mask != 0 ? ~0 : 0
    */

   mask = lp_build_compare(builder,
                           mask_type, PIPE_FUNC_NOTEQUAL,
                           mask,
                           lp_build_const_int_vec(mask_type, 0));

   return mask;
}