Exemple #1
0
/**
 * Generate the runtime callable function for the whole fragment pipeline.
 */
static struct lp_fragment_shader_variant *
generate_fragment(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
                  const struct lp_fragment_shader_variant_key *key)
{
   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
   struct lp_fragment_shader_variant *variant;
   struct lp_type fs_type;
   struct lp_type blend_type;
   LLVMTypeRef fs_elem_type;
   LLVMTypeRef fs_vec_type;
   LLVMTypeRef fs_int_vec_type;
   LLVMTypeRef blend_vec_type;
   LLVMTypeRef blend_int_vec_type;
   LLVMTypeRef arg_types[9];
   LLVMTypeRef func_type;
   LLVMValueRef context_ptr;
   LLVMValueRef x;
   LLVMValueRef y;
   LLVMValueRef a0_ptr;
   LLVMValueRef dadx_ptr;
   LLVMValueRef dady_ptr;
   LLVMValueRef mask_ptr;
   LLVMValueRef color_ptr;
   LLVMValueRef depth_ptr;
   LLVMBasicBlockRef block;
   LLVMBuilderRef builder;
   LLVMValueRef x0;
   LLVMValueRef y0;
   struct lp_build_sampler_soa *sampler;
   struct lp_build_interp_soa_context interp;
   LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
   LLVMValueRef fs_out_color[NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
   LLVMValueRef blend_mask;
   LLVMValueRef blend_in_color[NUM_CHANNELS];
   unsigned num_fs;
   unsigned i;
   unsigned chan;

#ifdef DEBUG
   tgsi_dump(shader->base.tokens, 0);
   if(key->depth.enabled) {
      debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
      debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
      debug_printf("depth.writemask = %u\n", key->depth.writemask);
   }
   if(key->alpha.enabled) {
      debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
      debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
   }
   if(key->blend.logicop_enable) {
      debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
   }
   else if(key->blend.blend_enable) {
      debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
      debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
      debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
      debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
      debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
      debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
   }
   debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
      if(key->sampler[i].format) {
         debug_printf("sampler[%u] = \n", i);
         debug_printf("  .format = %s\n",
                      pf_name(key->sampler[i].format));
         debug_printf("  .target = %s\n",
                      debug_dump_tex_target(key->sampler[i].target, TRUE));
         debug_printf("  .pot = %u %u %u\n",
                      key->sampler[i].pot_width,
                      key->sampler[i].pot_height,
                      key->sampler[i].pot_depth);
         debug_printf("  .wrap = %s %s %s\n",
                      debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
                      debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
                      debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
         debug_printf("  .min_img_filter = %s\n",
                      debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
         debug_printf("  .min_mip_filter = %s\n",
                      debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
         debug_printf("  .mag_img_filter = %s\n",
                      debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
         if(key->sampler[i].compare_mode)
            debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
         debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
         debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
      }
   }

#endif

   variant = CALLOC_STRUCT(lp_fragment_shader_variant);
   if(!variant)
      return NULL;

   variant->shader = shader;
   memcpy(&variant->key, key, sizeof *key);

   /* TODO: actually pick these based on the fs and color buffer
    * characteristics. */

   memset(&fs_type, 0, sizeof fs_type);
   fs_type.floating = TRUE; /* floating point values */
   fs_type.sign = TRUE;     /* values are signed */
   fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
   fs_type.width = 32;      /* 32-bit float */
   fs_type.length = 4;      /* 4 element per vector */
   num_fs = 4;

   memset(&blend_type, 0, sizeof blend_type);
   blend_type.floating = FALSE; /* values are integers */
   blend_type.sign = FALSE;     /* values are unsigned */
   blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
   blend_type.width = 8;        /* 8-bit ubyte values */
   blend_type.length = 16;      /* 16 elements per vector */

   /* 
    * Generate the function prototype. Any change here must be reflected in
    * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
    */

   fs_elem_type = lp_build_elem_type(fs_type);
   fs_vec_type = lp_build_vec_type(fs_type);
   fs_int_vec_type = lp_build_int_vec_type(fs_type);

   blend_vec_type = lp_build_vec_type(blend_type);
   blend_int_vec_type = lp_build_int_vec_type(blend_type);

   arg_types[0] = screen->context_ptr_type;            /* context */
   arg_types[1] = LLVMInt32Type();                     /* x */
   arg_types[2] = LLVMInt32Type();                     /* y */
   arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
   arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
   arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dady */
   arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */
   arg_types[7] = LLVMPointerType(blend_vec_type, 0);  /* color */
   arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */

   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);

   variant->function = LLVMAddFunction(screen->module, "shader", func_type);
   LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
   for(i = 0; i < Elements(arg_types); ++i)
      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
         LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);

   context_ptr  = LLVMGetParam(variant->function, 0);
   x            = LLVMGetParam(variant->function, 1);
   y            = LLVMGetParam(variant->function, 2);
   a0_ptr       = LLVMGetParam(variant->function, 3);
   dadx_ptr     = LLVMGetParam(variant->function, 4);
   dady_ptr     = LLVMGetParam(variant->function, 5);
   mask_ptr     = LLVMGetParam(variant->function, 6);
   color_ptr    = LLVMGetParam(variant->function, 7);
   depth_ptr    = LLVMGetParam(variant->function, 8);

   lp_build_name(context_ptr, "context");
   lp_build_name(x, "x");
   lp_build_name(y, "y");
   lp_build_name(a0_ptr, "a0");
   lp_build_name(dadx_ptr, "dadx");
   lp_build_name(dady_ptr, "dady");
   lp_build_name(mask_ptr, "mask");
   lp_build_name(color_ptr, "color");
   lp_build_name(depth_ptr, "depth");

   /*
    * Function body
    */

   block = LLVMAppendBasicBlock(variant->function, "entry");
   builder = LLVMCreateBuilder();
   LLVMPositionBuilderAtEnd(builder, block);

   generate_pos0(builder, x, y, &x0, &y0);

   lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
                            a0_ptr, dadx_ptr, dady_ptr,
                            x0, y0, 2, 0);

#if 0
   /* C texture sampling */
   sampler = lp_c_sampler_soa_create(context_ptr);
#else
   /* code generated texture sampling */
   sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
#endif

   for(i = 0; i < num_fs; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
      LLVMValueRef out_color[NUM_CHANNELS];
      LLVMValueRef depth_ptr_i;

      if(i != 0)
         lp_build_interp_soa_update(&interp);

      fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");

      generate_fs(lp, shader, key,
                  builder,
                  fs_type,
                  context_ptr,
                  i,
                  &interp,
                  sampler,
                  &fs_mask[i],
                  out_color,
                  depth_ptr_i);

      for(chan = 0; chan < NUM_CHANNELS; ++chan)
         fs_out_color[chan][i] = out_color[chan];
   }

   sampler->destroy(sampler);

   /* 
    * Convert the fs's output color and mask to fit to the blending type. 
    */

   for(chan = 0; chan < NUM_CHANNELS; ++chan) {
      lp_build_conv(builder, fs_type, blend_type,
                    fs_out_color[chan], num_fs,
                    &blend_in_color[chan], 1);
      lp_build_name(blend_in_color[chan], "color.%c", "rgba"[chan]);

   }

   lp_build_conv_mask(builder, fs_type, blend_type,
                               fs_mask, num_fs,
                               &blend_mask, 1);

   /*
    * Blending.
    */

   generate_blend(&key->blend,
                  builder,
                  blend_type,
                  context_ptr,
                  blend_mask,
                  blend_in_color,
                  color_ptr);

   LLVMBuildRetVoid(builder);

   LLVMDisposeBuilder(builder);

   /*
    * Translate the LLVM IR into machine code.
    */

   if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
      LLVMDumpValue(variant->function);
      abort();
   }

   LLVMRunFunctionPassManager(screen->pass, variant->function);

#ifdef DEBUG
   LLVMDumpValue(variant->function);
   debug_printf("\n");
#endif

   variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);

#ifdef DEBUG
   lp_disassemble(variant->jit_function);
#endif

   variant->next = shader->variants;
   shader->variants = variant;

   return variant;
}
Exemple #2
0
/**
 * Generate the runtime callable function for the whole fragment pipeline.
 * Note that the function which we generate operates on a block of 16
 * pixels at at time.  The block contains 2x2 quads.  Each quad contains
 * 2x2 pixels.
 */
static void
generate_fragment(struct llvmpipe_context *lp,
                  struct lp_fragment_shader *shader,
                  struct lp_fragment_shader_variant *variant,
                  unsigned do_tri_test)
{
   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
   const struct lp_fragment_shader_variant_key *key = &variant->key;
   struct lp_type fs_type;
   struct lp_type blend_type;
   LLVMTypeRef fs_elem_type;
   LLVMTypeRef fs_vec_type;
   LLVMTypeRef fs_int_vec_type;
   LLVMTypeRef blend_vec_type;
   LLVMTypeRef blend_int_vec_type;
   LLVMTypeRef arg_types[14];
   LLVMTypeRef func_type;
   LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
   LLVMValueRef context_ptr;
   LLVMValueRef x;
   LLVMValueRef y;
   LLVMValueRef a0_ptr;
   LLVMValueRef dadx_ptr;
   LLVMValueRef dady_ptr;
   LLVMValueRef color_ptr_ptr;
   LLVMValueRef depth_ptr;
   LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr;
   LLVMBasicBlockRef block;
   LLVMBuilderRef builder;
   LLVMValueRef x0;
   LLVMValueRef y0;
   struct lp_build_sampler_soa *sampler;
   struct lp_build_interp_soa_context interp;
   LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
   LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
   LLVMValueRef blend_mask;
   LLVMValueRef blend_in_color[NUM_CHANNELS];
   LLVMValueRef function;
   unsigned num_fs;
   unsigned i;
   unsigned chan;
   unsigned cbuf;


   /* TODO: actually pick these based on the fs and color buffer
    * characteristics. */

   memset(&fs_type, 0, sizeof fs_type);
   fs_type.floating = TRUE; /* floating point values */
   fs_type.sign = TRUE;     /* values are signed */
   fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
   fs_type.width = 32;      /* 32-bit float */
   fs_type.length = 4;      /* 4 elements per vector */
   num_fs = 4;              /* number of quads per block */

   memset(&blend_type, 0, sizeof blend_type);
   blend_type.floating = FALSE; /* values are integers */
   blend_type.sign = FALSE;     /* values are unsigned */
   blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
   blend_type.width = 8;        /* 8-bit ubyte values */
   blend_type.length = 16;      /* 16 elements per vector */

   /* 
    * Generate the function prototype. Any change here must be reflected in
    * lp_jit.h's lp_jit_frag_func function pointer type, and vice-versa.
    */

   fs_elem_type = lp_build_elem_type(fs_type);
   fs_vec_type = lp_build_vec_type(fs_type);
   fs_int_vec_type = lp_build_int_vec_type(fs_type);

   blend_vec_type = lp_build_vec_type(blend_type);
   blend_int_vec_type = lp_build_int_vec_type(blend_type);

   arg_types[0] = screen->context_ptr_type;            /* context */
   arg_types[1] = LLVMInt32Type();                     /* x */
   arg_types[2] = LLVMInt32Type();                     /* y */
   arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
   arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
   arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dady */
   arg_types[6] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0);  /* color */
   arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
   arg_types[8] = LLVMInt32Type();                     /* c0 */
   arg_types[9] = LLVMInt32Type();                     /* c1 */
   arg_types[10] = LLVMInt32Type();                    /* c2 */
   /* Note: the step arrays are built as int32[16] but we interpret
    * them here as int32_vec4[4].
    */
   arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
   arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
   arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */

   func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);

   function = LLVMAddFunction(screen->module, "shader", func_type);
   LLVMSetFunctionCallConv(function, LLVMCCallConv);

   variant->function[do_tri_test] = function;


   /* XXX: need to propagate noalias down into color param now we are
    * passing a pointer-to-pointer?
    */
   for(i = 0; i < Elements(arg_types); ++i)
      if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
         LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);

   context_ptr  = LLVMGetParam(function, 0);
   x            = LLVMGetParam(function, 1);
   y            = LLVMGetParam(function, 2);
   a0_ptr       = LLVMGetParam(function, 3);
   dadx_ptr     = LLVMGetParam(function, 4);
   dady_ptr     = LLVMGetParam(function, 5);
   color_ptr_ptr = LLVMGetParam(function, 6);
   depth_ptr    = LLVMGetParam(function, 7);
   c0           = LLVMGetParam(function, 8);
   c1           = LLVMGetParam(function, 9);
   c2           = LLVMGetParam(function, 10);
   step0_ptr    = LLVMGetParam(function, 11);
   step1_ptr    = LLVMGetParam(function, 12);
   step2_ptr    = LLVMGetParam(function, 13);

   lp_build_name(context_ptr, "context");
   lp_build_name(x, "x");
   lp_build_name(y, "y");
   lp_build_name(a0_ptr, "a0");
   lp_build_name(dadx_ptr, "dadx");
   lp_build_name(dady_ptr, "dady");
   lp_build_name(color_ptr_ptr, "color_ptr");
   lp_build_name(depth_ptr, "depth");
   lp_build_name(c0, "c0");
   lp_build_name(c1, "c1");
   lp_build_name(c2, "c2");
   lp_build_name(step0_ptr, "step0");
   lp_build_name(step1_ptr, "step1");
   lp_build_name(step2_ptr, "step2");

   /*
    * Function body
    */

   block = LLVMAppendBasicBlock(function, "entry");
   builder = LLVMCreateBuilder();
   LLVMPositionBuilderAtEnd(builder, block);

   generate_pos0(builder, x, y, &x0, &y0);

   lp_build_interp_soa_init(&interp, 
                            shader->base.tokens,
                            key->flatshade,
                            builder, fs_type,
                            a0_ptr, dadx_ptr, dady_ptr,
                            x0, y0);

   /* code generated texture sampling */
   sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);

   /* loop over quads in the block */
   for(i = 0; i < num_fs; ++i) {
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
      LLVMValueRef out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS];
      LLVMValueRef depth_ptr_i;
      int cbuf;

      if(i != 0)
         lp_build_interp_soa_update(&interp, i);

      depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");

      generate_fs(lp, shader, key,
                  builder,
                  fs_type,
                  context_ptr,
                  i,
                  &interp,
                  sampler,
                  &fs_mask[i], /* output */
                  out_color,
                  depth_ptr_i,
                  do_tri_test,
                  c0, c1, c2,
                  step0_ptr, step1_ptr, step2_ptr);

      for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
	 for(chan = 0; chan < NUM_CHANNELS; ++chan)
	    fs_out_color[cbuf][chan][i] = out_color[cbuf][chan];
   }

   sampler->destroy(sampler);

   /* Loop over color outputs / color buffers to do blending.
    */
   for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
      LLVMValueRef color_ptr;
      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0);

      /* 
       * Convert the fs's output color and mask to fit to the blending type. 
       */
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
	 lp_build_conv(builder, fs_type, blend_type,
		       fs_out_color[cbuf][chan], num_fs,
		       &blend_in_color[chan], 1);
	 lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
      }

      lp_build_conv_mask(builder, fs_type, blend_type,
			 fs_mask, num_fs,
			 &blend_mask, 1);

      color_ptr = LLVMBuildLoad(builder, 
				LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
				"");
      lp_build_name(color_ptr, "color_ptr%d", cbuf);

      /*
       * Blending.
       */
      generate_blend(&key->blend,
		     builder,
		     blend_type,
		     context_ptr,
		     blend_mask,
		     blend_in_color,
		     color_ptr);
   }

   LLVMBuildRetVoid(builder);

   LLVMDisposeBuilder(builder);


   /* Verify the LLVM IR.  If invalid, dump and abort */
#ifdef DEBUG
   if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
      if (1)
         LLVMDumpValue(function);
      abort();
   }
#endif

   /* Apply optimizations to LLVM IR */
   if (1)
      LLVMRunFunctionPassManager(screen->pass, function);

   if (LP_DEBUG & DEBUG_JIT) {
      /* Print the LLVM IR to stderr */
      LLVMDumpValue(function);
      debug_printf("\n");
   }

   /*
    * Translate the LLVM IR into machine code.
    */
   variant->jit_function[do_tri_test] = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, function);

   if (LP_DEBUG & DEBUG_ASM)
      lp_disassemble(variant->jit_function[do_tri_test]);
}
Exemple #3
0
/**
 * Perform the occlusion test and increase the counter.
 * Test the depth mask. Add the number of channel which has none zero mask
 * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
 * The counter will add 4.
 *
 * \param type holds element type of the mask vector.
 * \param maskvalue is the depth test mask.
 * \param counter is a pointer of the uint32 counter.
 */
void
lp_build_occlusion_count(struct gallivm_state *gallivm,
                         struct lp_type type,
                         LLVMValueRef maskvalue,
                         LLVMValueRef counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMContextRef context = gallivm->context;
   LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
   LLVMValueRef count, newcount;

   assert(type.length <= 16);
   assert(type.floating);

   if(util_cpu_caps.has_sse && type.length == 4) {
      const char *movmskintr = "llvm.x86.sse.movmsk.ps";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else if(util_cpu_caps.has_avx && type.length == 8) {
      const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
      const char *popcntintr = "llvm.ctpop.i32";
      LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
                                           lp_build_vec_type(gallivm, type), "");
      bits = lp_build_intrinsic_unary(builder, movmskintr,
                                      LLVMInt32TypeInContext(context), bits);
      count = lp_build_intrinsic_unary(builder, popcntintr,
                                       LLVMInt32TypeInContext(context), bits);
   }
   else {
      unsigned i;
      LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
      LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
      LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
      LLVMValueRef shufflev, countd;
      LLVMValueRef shuffles[16];
      const char *popcntintr = NULL;

      countv = LLVMBuildBitCast(builder, countv, i8vntype, "");

       for (i = 0; i < type.length; i++) {
          shuffles[i] = lp_build_const_int32(gallivm, 4*i);
       }

       shufflev = LLVMConstVector(shuffles, type.length);
       countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
       countd = LLVMBuildBitCast(builder, countd, counttype, "countd");

       /*
        * XXX FIXME
        * this is bad on cpus without popcount (on x86 supported by intel
        * nehalem, amd barcelona, and up - not tied to sse42).
        * Would be much faster to just sum the 4 elements of the vector with
        * some horizontal add (shuffle/add/shuffle/add after the initial and).
        */
       switch (type.length) {
       case 4:
          popcntintr = "llvm.ctpop.i32";
          break;
       case 8:
          popcntintr = "llvm.ctpop.i64";
          break;
       case 16:
          popcntintr = "llvm.ctpop.i128";
          break;
       default:
          assert(0);
       }
       count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);

       if (type.length > 4) {
          count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
       }
   }
   newcount = LLVMBuildLoad(builder, counter, "origcount");
   newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
   LLVMBuildStore(builder, newcount, counter);
}
Exemple #4
0
/**
 * Gather one element from scatter positions in memory.
 * Nearly the same as above, however the individual elements
 * may be vectors themselves, and fetches may be float type.
 * Can also do pad vector instead of ZExt.
 *
 * @sa lp_build_gather()
 */
static LLVMValueRef
lp_build_gather_elem_vec(struct gallivm_state *gallivm,
                         unsigned length,
                         unsigned src_width,
                         LLVMTypeRef src_type,
                         struct lp_type dst_type,
                         boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         unsigned i,
                         boolean vector_justify)
{
   LLVMValueRef ptr, res;
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_type.width * dst_type.length);
   if (src_width < dst_type.width * dst_type.length) {
      if (dst_type.length > 1) {
         res = lp_build_pad_vector(gallivm, res, dst_type.length);
         /*
          * vector_justify hopefully a non-issue since we only deal
          * with src_width >= 32 here?
          */
      } else {
         LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type);

         /*
          * Only valid if src_ptr_type is int type...
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

#ifdef PIPE_ARCH_BIG_ENDIAN
         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
         }
         if (src_width == 48) {
            /* Load 3x16 bit vector.
             * The sequence of loads on big-endian hardware proceeds as follows.
             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
             * of three fields appears in the order X, Y, Z.
             *
             * Load 32-bit word: 0.0.X.Y
             * Load 16-bit halfword: 0.0.0.Z
             * Rotate left: 0.X.Y.0
             * Bitwise OR: 0.X.Y.Z
             *
             * The order in which we need the fields in the result is 0.Z.Y.X,
             * the same as on little-endian; permute 16-bit fields accordingly
             * within 64-bit register:
             */
            LLVMValueRef shuffles[4] = {
               lp_build_const_int32(gallivm, 2),
               lp_build_const_int32(gallivm, 1),
               lp_build_const_int32(gallivm, 0),
               lp_build_const_int32(gallivm, 3),
            };
            res = LLVMBuildBitCast(gallivm->builder, res,
                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
         }
#endif
      }
   }
   return res;
}
Exemple #5
0
/**
 * Generate the code to do inside/outside triangle testing for the
 * four pixels in a 2x2 quad.  This will set the four elements of the
 * quad mask vector to 0 or ~0.
 * \param i  which quad of the quad group to test, in [0,3]
 */
static void
generate_tri_edge_mask(LLVMBuilderRef builder,
                       unsigned i,
                       LLVMValueRef *mask,      /* ivec4, out */
                       LLVMValueRef c0,         /* int32 */
                       LLVMValueRef c1,         /* int32 */
                       LLVMValueRef c2,         /* int32 */
                       LLVMValueRef step0_ptr,  /* ivec4 */
                       LLVMValueRef step1_ptr,  /* ivec4 */
                       LLVMValueRef step2_ptr)  /* ivec4 */
{
#define OPTIMIZE_IN_OUT_TEST 0
#if OPTIMIZE_IN_OUT_TEST
   struct lp_build_if_state ifctx;
   LLVMValueRef not_draw_all;
#endif
   struct lp_build_flow_context *flow;
   struct lp_type i32_type;
   LLVMTypeRef i32vec4_type, mask_type;
   LLVMValueRef c0_vec, c1_vec, c2_vec;
   LLVMValueRef in_out_mask;

   assert(i < 4);
   
   /* int32 vector type */
   memset(&i32_type, 0, sizeof i32_type);
   i32_type.floating = FALSE; /* values are integers */
   i32_type.sign = TRUE;      /* values are signed */
   i32_type.norm = FALSE;     /* values are not normalized */
   i32_type.width = 32;       /* 32-bit int values */
   i32_type.length = 4;       /* 4 elements per vector */

   i32vec4_type = lp_build_int32_vec4_type();

   mask_type = LLVMIntType(32 * 4);

   /*
    * Use a conditional here to do detailed pixel in/out testing.
    * We only have to do this if c0 != INT_MIN.
    */
   flow = lp_build_flow_create(builder);
   lp_build_flow_scope_begin(flow);

   {
#if OPTIMIZE_IN_OUT_TEST
      /* not_draw_all = (c0 != INT_MIN) */
      not_draw_all = LLVMBuildICmp(builder,
                                   LLVMIntNE,
                                   c0,
                                   LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
                                   "");

      in_out_mask = lp_build_int_const_scalar(i32_type, ~0);


      lp_build_flow_scope_declare(flow, &in_out_mask);

      /* if (not_draw_all) {... */
      lp_build_if(&ifctx, flow, builder, not_draw_all);
#endif
      {
         LLVMValueRef step0_vec, step1_vec, step2_vec;
         LLVMValueRef m0_vec, m1_vec, m2_vec;
         LLVMValueRef index, m;

         /* c0_vec = {c0, c0, c0, c0}
          * Note that we emit this code four times but LLVM optimizes away
          * three instances of it.
          */
         c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
         c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
         c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
         lp_build_name(c0_vec, "edgeconst0vec");
         lp_build_name(c1_vec, "edgeconst1vec");
         lp_build_name(c2_vec, "edgeconst2vec");

         /* load step0vec, step1, step2 vec from memory */
         index = LLVMConstInt(LLVMInt32Type(), i, 0);
         step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
         step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
         step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
         lp_build_name(step0_vec, "step0vec");
         lp_build_name(step1_vec, "step1vec");
         lp_build_name(step2_vec, "step2vec");

         /* m0_vec = step0_ptr[i] > c0_vec */
         m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
         m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
         m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);

         /* in_out_mask = m0_vec & m1_vec & m2_vec */
         m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
         in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
         lp_build_name(in_out_mask, "inoutmaskvec");
      }
#if OPTIMIZE_IN_OUT_TEST
      lp_build_endif(&ifctx);
#endif

   }
   lp_build_flow_scope_end(flow);
   lp_build_flow_destroy(flow);

   /* This is the initial alive/dead pixel mask for a quad of four pixels.
    * It's an int[4] vector with each word set to 0 or ~0.
    * Words will get cleared when pixels faile the Z test, etc.
    */
   *mask = in_out_mask;
}
Exemple #6
0
void genprim_array_serialise(compile_t* c, reach_type_t* t)
{
  // Generate the serialise function.
  t->serialise_fn = codegen_addfun(c, genname_serialise(t->name),
    c->serialise_type);

  codegen_startfun(c, t->serialise_fn, NULL, NULL);
  LLVMSetFunctionCallConv(t->serialise_fn, LLVMCCallConv);
  LLVMSetLinkage(t->serialise_fn, LLVMExternalLinkage);

  LLVMValueRef ctx = LLVMGetParam(t->serialise_fn, 0);
  LLVMValueRef arg = LLVMGetParam(t->serialise_fn, 1);
  LLVMValueRef addr = LLVMGetParam(t->serialise_fn, 2);
  LLVMValueRef offset = LLVMGetParam(t->serialise_fn, 3);
  LLVMValueRef mut = LLVMGetParam(t->serialise_fn, 4);

  LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr,
    "");
  LLVMValueRef offset_addr = LLVMBuildAdd(c->builder,
    LLVMBuildPtrToInt(c->builder, addr, c->intptr, ""), offset, "");

  genserialise_typeid(c, t, offset_addr);

  // Don't serialise our contents if we are opaque.
  LLVMBasicBlockRef body_block = codegen_block(c, "body");
  LLVMBasicBlockRef post_block = codegen_block(c, "post");

  LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntNE, mut,
    LLVMConstInt(c->i32, PONY_TRACE_OPAQUE, false), "");
  LLVMBuildCondBr(c->builder, test, body_block, post_block);
  LLVMPositionBuilderAtEnd(c->builder, body_block);

  // Write the size twice, effectively rewriting alloc to be the same as size.
  LLVMValueRef size = field_value(c, object, 1);

  LLVMValueRef size_loc = field_loc(c, offset_addr, t->structure,
    c->intptr, 1);
  LLVMBuildStore(c->builder, size, size_loc);

  LLVMValueRef alloc_loc = field_loc(c, offset_addr, t->structure,
    c->intptr, 2);
  LLVMBuildStore(c->builder, size, alloc_loc);

  // Write the pointer.
  LLVMValueRef ptr = field_value(c, object, 3);

  // The resulting offset will only be invalid (i.e. have the high bit set) if
  // the size is zero. For an opaque array, we don't serialise the contents,
  // so we don't get here, so we don't end up with an invalid offset.
  LLVMValueRef args[5];
  args[0] = ctx;
  args[1] = ptr;
  LLVMValueRef ptr_offset = gencall_runtime(c, "pony_serialise_offset",
    args, 2, "");

  LLVMValueRef ptr_loc = field_loc(c, offset_addr, t->structure, c->intptr, 3);
  LLVMBuildStore(c->builder, ptr_offset, ptr_loc);

  LLVMValueRef ptr_offset_addr = LLVMBuildAdd(c->builder, ptr_offset,
    LLVMBuildPtrToInt(c->builder, addr, c->intptr, ""), "");

  // Serialise elements.
  ast_t* typeargs = ast_childidx(t->ast, 2);
  ast_t* typearg = ast_child(typeargs);
  reach_type_t* t_elem = reach_type(c->reach, typearg);

  size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type);
  LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false);

  if((t_elem->underlying == TK_PRIMITIVE) && (t_elem->primitive != NULL))
  {
    // memcpy machine words
    args[0] = LLVMBuildIntToPtr(c->builder, ptr_offset_addr, c->void_ptr, "");
    args[1] = LLVMBuildBitCast(c->builder, ptr, c->void_ptr, "");
    args[2] = LLVMBuildMul(c->builder, size, l_size, "");
    args[3] = LLVMConstInt(c->i32, 1, false);
    args[4] = LLVMConstInt(c->i1, 0, false);
    if(target_is_ilp32(c->opt->triple))
    {
      gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i32", args, 5, "");
    } else {
      gencall_runtime(c, "llvm.memcpy.p0i8.p0i8.i64", args, 5, "");
    }
  } else {
    ptr = LLVMBuildBitCast(c->builder, ptr,
      LLVMPointerType(t_elem->use_type, 0), "");

    LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder);
    LLVMBasicBlockRef cond_block = codegen_block(c, "cond");
    LLVMBasicBlockRef body_block = codegen_block(c, "body");
    LLVMBasicBlockRef post_block = codegen_block(c, "post");

    LLVMValueRef offset_var = LLVMBuildAlloca(c->builder, c->intptr, "");
    LLVMBuildStore(c->builder, ptr_offset_addr, offset_var);

    LLVMBuildBr(c->builder, cond_block);

    // While the index is less than the size, serialise an element. The
    // initial index when coming from the entry block is zero.
    LLVMPositionBuilderAtEnd(c->builder, cond_block);
    LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, "");
    LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false);
    LLVMAddIncoming(phi, &zero, &entry_block, 1);
    LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, "");
    LLVMBuildCondBr(c->builder, test, body_block, post_block);

    // The phi node is the index. Get the element and serialise it.
    LLVMPositionBuilderAtEnd(c->builder, body_block);
    LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, ptr, &phi, 1, "");

    ptr_offset_addr = LLVMBuildLoad(c->builder, offset_var, "");
    genserialise_element(c, t_elem, false, ctx, elem_ptr, ptr_offset_addr);
    ptr_offset_addr = LLVMBuildAdd(c->builder, ptr_offset_addr, l_size, "");
    LLVMBuildStore(c->builder, ptr_offset_addr, offset_var);

    // Add one to the phi node and branch back to the cond block.
    LLVMValueRef one = LLVMConstInt(c->intptr, 1, false);
    LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, "");
    body_block = LLVMGetInsertBlock(c->builder);
    LLVMAddIncoming(phi, &inc, &body_block, 1);
    LLVMBuildBr(c->builder, cond_block);

    LLVMPositionBuilderAtEnd(c->builder, post_block);
  }

  LLVMBuildBr(c->builder, post_block);
  LLVMPositionBuilderAtEnd(c->builder, post_block);
  LLVMBuildRetVoid(c->builder);
  codegen_finishfun(c);
}
Exemple #7
0
LLVMValueRef gen_funcdef(struct node *ast)
{
	LLVMValueRef global, func, retval;
	LLVMTypeRef func_type, *param_types;
	LLVMBasicBlockRef body_block, ret_block;
	int param_count, i;

	if (hcreate(SYMTAB_SIZE) == 0)
		generror(">s");

	param_count = count_chain(ast->two);
	param_types = calloc(sizeof(LLVMTypeRef), param_count);

	if (param_count > 0 && param_types == NULL)
		generror("out of memory");

	for (i = 0; i < param_count; i++)
		param_types[i] = TYPE_INT;

	func_type = LLVMFunctionType(TYPE_INT, param_types, param_count, 0);
	func = LLVMAddFunction(module, ".gfunc", func_type);
	LLVMSetLinkage(func, LLVMPrivateLinkage);
	/* TODO: How to specify stack alignment? Should be 16 bytes */
	LLVMAddFunctionAttr(func, LLVMStackAlignment);

	global = find_or_add_global(ast->one->val);
	LLVMSetInitializer(global, LLVMBuildPtrToInt(builder, func, TYPE_INT, ""));

	body_block = LLVMAppendBasicBlock(func, "");
	ret_block = LLVMAppendBasicBlock(func, "");
	LLVMPositionBuilderAtEnd(builder, body_block);

	retval = LLVMBuildAlloca(builder, TYPE_INT, "");
	LLVMBuildStore(builder, CONST(0), retval);

	symtab_enter(ast->one->val, global);
	symtab_enter(".return", ret_block);
	symtab_enter(".retval", retval);

	label_count = 0;
	predeclare_labels(ast->three);

	if (ast->two)
		codegen(ast->two);

	codegen(ast->three);

	LLVMBuildBr(builder, ret_block);
	/* TODO: Untangle out-of-order blocks */
	LLVMPositionBuilderAtEnd(builder, ret_block);
	LLVMBuildRet(builder, LLVMBuildLoad(builder, retval, ""));

	LLVMMoveBasicBlockAfter(ret_block, LLVMGetLastBasicBlock(func));

	/* TODO: Handle failed verification and print internal compiler error */
	LLVMVerifyFunction(func, LLVMPrintMessageAction);

	hdestroy();

	return NULL;
}
/**
 * Register store.
 */
void
lp_emit_store_aos(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned index,
   LLVMValueRef value)
{
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
   LLVMValueRef mask = NULL;
   LLVMValueRef ptr;

   /*
    * Saturate the value
    */

   switch (inst->Instruction.Saturate) {
   case TGSI_SAT_NONE:
      break;

   case TGSI_SAT_ZERO_ONE:
      value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero);
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
      break;

   case TGSI_SAT_MINUS_PLUS_ONE:
      value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0));
      value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one);
      break;

   default:
      assert(0);
   }

   /*
    * Translate the register file
    */

   assert(!reg->Register.Indirect);

   switch (reg->Register.File) {
   case TGSI_FILE_OUTPUT:
      ptr = bld->outputs[reg->Register.Index];
      break;

   case TGSI_FILE_TEMPORARY:
      ptr = bld->temps[reg->Register.Index];
      break;

   case TGSI_FILE_ADDRESS:
      ptr = bld->addr[reg->Indirect.Index];
      break;

   case TGSI_FILE_PREDICATE:
      ptr = bld->preds[reg->Register.Index];
      break;

   default:
      assert(0);
      return;
   }

   if (!ptr)
      return;
   /*
    * Predicate
    */

   if (inst->Instruction.Predicate) {
      LLVMValueRef pred;

      assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS);

      pred = LLVMBuildLoad(builder,
                           bld->preds[inst->Predicate.Index], "");

      /*
       * Convert the value to an integer mask.
       */
      pred = lp_build_compare(bld->bld_base.base.gallivm,
                               bld->bld_base.base.type,
                               PIPE_FUNC_NOTEQUAL,
                               pred,
                               bld->bld_base.base.zero);

      if (inst->Predicate.Negate) {
         pred = LLVMBuildNot(builder, pred, "");
      }

      pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred,
                         inst->Predicate.SwizzleX,
                         inst->Predicate.SwizzleY,
                         inst->Predicate.SwizzleZ,
                         inst->Predicate.SwizzleW);

      if (mask) {
         mask = LLVMBuildAnd(builder, mask, pred, "");
      } else {
         mask = pred;
      }
   }

   /*
    * Writemask
    */

   if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) {
      LLVMValueRef writemask;

      writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm,
                                                   bld->bld_base.base.type,
                                                   reg->Register.WriteMask,
                                                   bld->swizzles);

      if (mask) {
         mask = LLVMBuildAnd(builder, mask, writemask, "");
      } else {
         mask = writemask;
      }
   }

   if (mask) {
      LLVMValueRef orig_value;

      orig_value = LLVMBuildLoad(builder, ptr, "");
      value = lp_build_select(&bld->bld_base.base,
                              mask, value, orig_value);
   }

   LLVMBuildStore(builder, value, ptr);
}
static LLVMValueRef
emit_fetch_constant(
   struct lp_build_tgsi_context * bld_base,
   const struct tgsi_full_src_register * reg,
   enum tgsi_opcode_type stype,
   unsigned swizzle)
{
   struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base);
   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
   struct lp_type type = bld_base->base.type;
   LLVMValueRef res;
   unsigned chan;

   assert(!reg->Register.Indirect);

   /*
    * Get the constants components
    */

   res = bld->bld_base.base.undef;
   for (chan = 0; chan < 4; ++chan) {
      LLVMValueRef index;
      LLVMValueRef scalar_ptr;
      LLVMValueRef scalar;
      LLVMValueRef swizzle;

      index = lp_build_const_int32(bld->bld_base.base.gallivm,
                                   reg->Register.Index * 4 + chan);

      scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, "");

      scalar = LLVMBuildLoad(builder, scalar_ptr, "");

      lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);

      /*
       * NOTE: constants array is always assumed to be RGBA
       */

      swizzle = lp_build_const_int32(bld->bld_base.base.gallivm,
                                     bld->swizzles[chan]);

      res = LLVMBuildInsertElement(builder, res, scalar, swizzle, "");
   }

   /*
    * Broadcast the first quaternion to all others.
    *
    * XXX: could be factored into a reusable function.
    */

   if (type.length > 4) {
      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
      unsigned i;

      for (chan = 0; chan < 4; ++chan) {
         shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan);
      }

      for (i = 4; i < type.length; ++i) {
         shuffles[i] = shuffles[i % 4];
      }

      res = LLVMBuildShuffleVector(builder,
                                   res, bld->bld_base.base.undef,
                                   LLVMConstVector(shuffles, type.length),
                                   "");
   }
   return res;
}
Exemple #10
0
/**
 * Texture sampling in AoS format.  Used when sampling common 32-bit/texel
 * formats.  1D/2D/3D/cube texture supported.  All mipmap sampling modes
 * but only limited texture coord wrap modes.
 */
void
lp_build_sample_aos(struct lp_build_sample_context *bld,
                    unsigned unit,
                    LLVMValueRef s,
                    LLVMValueRef t,
                    LLVMValueRef r,
                    const LLVMValueRef *ddx,
                    const LLVMValueRef *ddy,
                    LLVMValueRef lod_bias, /* optional */
                    LLVMValueRef explicit_lod, /* optional */
                    LLVMValueRef texel_out[4])
{
   struct lp_build_context *int_bld = &bld->int_bld;
   LLVMBuilderRef builder = bld->gallivm->builder;
   const unsigned mip_filter = bld->static_state->min_mip_filter;
   const unsigned min_filter = bld->static_state->min_img_filter;
   const unsigned mag_filter = bld->static_state->mag_img_filter;
   const unsigned dims = bld->dims;
   LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
   LLVMValueRef ilevel0, ilevel1 = NULL;
   LLVMValueRef packed, packed_lo, packed_hi;
   LLVMValueRef unswizzled[4];
   LLVMValueRef face_ddx[4], face_ddy[4];
   struct lp_build_context h16_bld;
   LLVMValueRef first_level;
   LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0);

   /* we only support the common/simple wrap modes at this time */
   assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
   if (dims >= 2)
      assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
   if (dims >= 3)
      assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));


   /* make 16-bit fixed-pt builder context */
   lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));

   /* cube face selection, compute pre-face coords, etc. */
   if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
      LLVMValueRef face, face_s, face_t;
      lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
      s = face_s; /* vec */
      t = face_t; /* vec */
      /* use 'r' to indicate cube face */
      r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */

      /* recompute ddx, ddy using the new (s,t) face texcoords */
      face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
      face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
      face_ddx[2] = NULL;
      face_ddx[3] = NULL;
      face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
      face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
      face_ddy[2] = NULL;
      face_ddy[3] = NULL;
      ddx = face_ddx;
      ddy = face_ddy;
   }

   /*
    * Compute the level of detail (float).
    */
   if (min_filter != mag_filter ||
       mip_filter != PIPE_TEX_MIPFILTER_NONE) {
      /* Need to compute lod either to choose mipmap levels or to
       * distinguish between minification/magnification with one mipmap level.
       */
      lp_build_lod_selector(bld, unit, ddx, ddy,
                            lod_bias, explicit_lod,
                            mip_filter,
                            &lod_ipart, &lod_fpart);
   } else {
      lod_ipart = i32t_zero;
   }

   /*
    * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
    */
   switch (mip_filter) {
   default:
      assert(0 && "bad mip_filter value in lp_build_sample_aos()");
      /* fall-through */
   case PIPE_TEX_MIPFILTER_NONE:
      /* always use mip level 0 */
      if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
         /* XXX this is a work-around for an apparent bug in LLVM 2.7.
          * We should be able to set ilevel0 = const(0) but that causes
          * bad x86 code to be emitted.
          */
         assert(lod_ipart);
         lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      }
      else {
         first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                       bld->gallivm, unit);
         ilevel0 = first_level;
      }
      break;
   case PIPE_TEX_MIPFILTER_NEAREST:
      assert(lod_ipart);
      lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
      break;
   case PIPE_TEX_MIPFILTER_LINEAR:
      assert(lod_ipart);
      assert(lod_fpart);
      lp_build_linear_mip_levels(bld, unit,
                                 lod_ipart, &lod_fpart,
                                 &ilevel0, &ilevel1);
      break;
   }

   /*
    * Get/interpolate texture colors.
    */

   packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
   packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");

   if (min_filter == mag_filter) {
      /* no need to distinquish between minification and magnification */
      lp_build_sample_mipmap(bld,
                             min_filter, mip_filter,
                             s, t, r,
                             ilevel0, ilevel1, lod_fpart,
                             packed_lo, packed_hi);
   }
   else {
      /* Emit conditional to choose min image filter or mag image filter
       * depending on the lod being > 0 or <= 0, respectively.
       */
      struct lp_build_if_state if_ctx;
      LLVMValueRef minify;

      /* minify = lod >= 0.0 */
      minify = LLVMBuildICmp(builder, LLVMIntSGE,
                             lod_ipart, int_bld->zero, "");

      lp_build_if(&if_ctx, bld->gallivm, minify);
      {
         /* Use the minification filter */
         lp_build_sample_mipmap(bld,
                                min_filter, mip_filter,
                                s, t, r,
                                ilevel0, ilevel1, lod_fpart,
                                packed_lo, packed_hi);
      }
      lp_build_else(&if_ctx);
      {
         /* Use the magnification filter */
         lp_build_sample_mipmap(bld, 
                                mag_filter, PIPE_TEX_MIPFILTER_NONE,
                                s, t, r,
                                ilevel0, NULL, NULL,
                                packed_lo, packed_hi);
      }
      lp_build_endif(&if_ctx);
   }

   /*
    * combine the values stored in 'packed_lo' and 'packed_hi' variables
    * into 'packed'
    */
   packed = lp_build_pack2(bld->gallivm,
                           h16_bld.type, lp_type_unorm(8),
                           LLVMBuildLoad(builder, packed_lo, ""),
                           LLVMBuildLoad(builder, packed_hi, ""));

   /*
    * Convert to SoA and swizzle.
    */
   lp_build_rgba8_to_f32_soa(bld->gallivm,
                             bld->texel_type,
                             packed, unswizzled);

   if (util_format_is_rgba8_variant(bld->format_desc)) {
      lp_build_format_swizzle_soa(bld->format_desc,
                                  &bld->texel_bld,
                                  unswizzled, texel_out);
   }
   else {
      texel_out[0] = unswizzled[0];
      texel_out[1] = unswizzled[1];
      texel_out[2] = unswizzled[2];
      texel_out[3] = unswizzled[3];
   }
}
Exemple #11
0
static LLVMValueRef
add_blend_test(struct gallivm_state *gallivm,
               const struct pipe_blend_state *blend,
               enum vector_mode mode,
               struct lp_type type)
{
   LLVMModuleRef module = gallivm->module;
   LLVMContextRef context = gallivm->context;
   LLVMTypeRef vec_type;
   LLVMTypeRef args[4];
   LLVMValueRef func;
   LLVMValueRef src_ptr;
   LLVMValueRef dst_ptr;
   LLVMValueRef const_ptr;
   LLVMValueRef res_ptr;
   LLVMBasicBlockRef block;
   LLVMBuilderRef builder;
   const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
   const unsigned rt = 0;
   const unsigned char swizzle[4] = { 0, 1, 2, 3 };

   vec_type = lp_build_vec_type(gallivm, type);

   args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
   LLVMSetFunctionCallConv(func, LLVMCCallConv);
   src_ptr = LLVMGetParam(func, 0);
   dst_ptr = LLVMGetParam(func, 1);
   const_ptr = LLVMGetParam(func, 2);
   res_ptr = LLVMGetParam(func, 3);

   block = LLVMAppendBasicBlockInContext(context, func, "entry");
   builder = gallivm->builder;
   LLVMPositionBuilderAtEnd(builder, block);

   if (mode == AoS) {
      LLVMValueRef src;
      LLVMValueRef dst;
      LLVMValueRef con;
      LLVMValueRef res;

      src = LLVMBuildLoad(builder, src_ptr, "src");
      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
      con = LLVMBuildLoad(builder, const_ptr, "const");

      res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);

      lp_build_name(res, "res");

      LLVMBuildStore(builder, res, res_ptr);
   }

   if (mode == SoA) {
      LLVMValueRef src[4];
      LLVMValueRef dst[4];
      LLVMValueRef con[4];
      LLVMValueRef res[4];
      unsigned i;

      for(i = 0; i < 4; ++i) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
         lp_build_name(src[i], "src.%c", "rgba"[i]);
         lp_build_name(con[i], "con.%c", "rgba"[i]);
         lp_build_name(dst[i], "dst.%c", "rgba"[i]);
      }

      lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);

      for(i = 0; i < 4; ++i) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
         lp_build_name(res[i], "res.%c", "rgba"[i]);
         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
      }
   }

   LLVMBuildRetVoid(builder);;

   return func;
}
Exemple #12
0
static LLVMValueRef make_unbox_function(compile_t* c, gentype_t* g,
  const char* name, token_id t)
{
  LLVMValueRef fun = LLVMGetNamedFunction(c->module, name);

  if(fun == NULL)
    return LLVMConstNull(c->void_ptr);

  // Create a new unboxing function that forwards to the real function.
  LLVMTypeRef f_type = LLVMGetElementType(LLVMTypeOf(fun));
  int count = LLVMCountParamTypes(f_type);

  // Leave space for a receiver if it's a constructor vtable entry.
  size_t buf_size = (count + 1) * sizeof(LLVMTypeRef);
  LLVMTypeRef* params = (LLVMTypeRef*)pool_alloc_size(buf_size);
  LLVMGetParamTypes(f_type, params);
  LLVMTypeRef ret_type = LLVMGetReturnType(f_type);

  const char* unbox_name = genname_unbox(name);

  if(t != TK_NEW)
  {
    // It's the same type, but it takes the boxed type instead of the primitive
    // type as the receiver.
    params[0] = g->structure_ptr;
  } else {
    // For a constructor, the unbox_fun has a receiver, even though the real
    // method does not.
    memmove(&params[1], &params[0], count * sizeof(LLVMTypeRef*));
    params[0] = g->structure_ptr;
    count++;
  }

  LLVMTypeRef unbox_type = LLVMFunctionType(ret_type, params, count, false);
  LLVMValueRef unbox_fun = codegen_addfun(c, unbox_name, unbox_type);
  codegen_startfun(c, unbox_fun, false);

  // Extract the primitive type from element 1 and call the real function.
  LLVMValueRef this_ptr = LLVMGetParam(unbox_fun, 0);
  LLVMValueRef primitive_ptr = LLVMBuildStructGEP(c->builder, this_ptr, 1, "");
  LLVMValueRef primitive = LLVMBuildLoad(c->builder, primitive_ptr, "");

  LLVMValueRef* args = (LLVMValueRef*)pool_alloc_size(buf_size);

  if(t != TK_NEW)
  {
    // If it's not a constructor, pass the extracted primitive as the receiver.
    args[0] = primitive;

    for(int i = 1; i < count; i++)
      args[i] = LLVMGetParam(unbox_fun, i);
  } else {
    count--;

    for(int i = 0; i < count; i++)
      args[i] = LLVMGetParam(unbox_fun, i + 1);
  }

  LLVMValueRef result = codegen_call(c, fun, args, count);
  LLVMBuildRet(c->builder, result);
  codegen_finishfun(c);

  pool_free_size(buf_size, params);
  pool_free_size(buf_size, args);
  return LLVMConstBitCast(unbox_fun, c->void_ptr);
}
static LLVMValueRef
add_blend_test(LLVMModuleRef module,
               const struct pipe_blend_state *blend,
               enum vector_mode mode,
               struct lp_type type)
{
   LLVMTypeRef ret_type;
   LLVMTypeRef vec_type;
   LLVMTypeRef args[4];
   LLVMValueRef func;
   LLVMValueRef src_ptr;
   LLVMValueRef dst_ptr;
   LLVMValueRef const_ptr;
   LLVMValueRef res_ptr;
   LLVMBasicBlockRef block;
   LLVMBuilderRef builder;

   ret_type = LLVMInt64Type();
   vec_type = lp_build_vec_type(type);

   args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
   func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
   LLVMSetFunctionCallConv(func, LLVMCCallConv);
   src_ptr = LLVMGetParam(func, 0);
   dst_ptr = LLVMGetParam(func, 1);
   const_ptr = LLVMGetParam(func, 2);
   res_ptr = LLVMGetParam(func, 3);

   block = LLVMAppendBasicBlock(func, "entry");
   builder = LLVMCreateBuilder();
   LLVMPositionBuilderAtEnd(builder, block);

   if (mode == AoS) {
      LLVMValueRef src;
      LLVMValueRef dst;
      LLVMValueRef con;
      LLVMValueRef res;

      src = LLVMBuildLoad(builder, src_ptr, "src");
      dst = LLVMBuildLoad(builder, dst_ptr, "dst");
      con = LLVMBuildLoad(builder, const_ptr, "const");

      res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);

      lp_build_name(res, "res");

      LLVMBuildStore(builder, res, res_ptr);
   }

   if (mode == SoA) {
      LLVMValueRef src[4];
      LLVMValueRef dst[4];
      LLVMValueRef con[4];
      LLVMValueRef res[4];
      unsigned i;

      for(i = 0; i < 4; ++i) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
         src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
         dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
         con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
         lp_build_name(src[i], "src.%c", "rgba"[i]);
         lp_build_name(con[i], "con.%c", "rgba"[i]);
         lp_build_name(dst[i], "dst.%c", "rgba"[i]);
      }

      lp_build_blend_soa(builder, blend, type, src, dst, con, res);

      for(i = 0; i < 4; ++i) {
         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
         lp_build_name(res[i], "res.%c", "rgba"[i]);
         LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
      }
   }

   LLVMBuildRetVoid(builder);;

   LLVMDisposeBuilder(builder);
   return func;
}
Exemple #14
0
static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_context * base = &bld_base->base;
	unsigned i;
	
	unsigned color_count = 0;
	boolean has_color = false;

	/* Add the necessary export instructions */
	for (i = 0; i < ctx->output_reg_count; i++) {
		unsigned chan;
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			LLVMValueRef output;
			unsigned adjusted_reg_idx = i +
					ctx->reserved_reg_count;

			output = LLVMBuildLoad(base->gallivm->builder,
				ctx->soa.outputs[i][chan], "");

			if (ctx->type == TGSI_PROCESSOR_VERTEX) {
				LLVMValueRef reg_index = lp_build_const_int32(
					base->gallivm,
					radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
				lp_build_intrinsic_binary(
					base->gallivm->builder,
					"llvm.AMDGPU.store.output",
					LLVMVoidTypeInContext(base->gallivm->context),
					output, reg_index);
			} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
				switch (ctx->r600_outputs[i].name) {
				case TGSI_SEMANTIC_COLOR:
					has_color = true;
					if ( color_count/4 < ctx->color_buffer_count) {
						if (ctx->fs_color_all) {
							for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
								LLVMValueRef reg_index = lp_build_const_int32(
									base->gallivm,
									(j * 4) + chan);
								lp_build_intrinsic_binary(
									base->gallivm->builder,
									"llvm.R600.store.pixel.color",
									LLVMVoidTypeInContext(base->gallivm->context),
									output, reg_index);
							}
						} else {
							LLVMValueRef reg_index = lp_build_const_int32(
								base->gallivm,
								(color_count++/4) * 4 + chan);
							lp_build_intrinsic_binary(
								base->gallivm->builder,
								"llvm.R600.store.pixel.color",
								LLVMVoidTypeInContext(base->gallivm->context),
								output, reg_index);
						}
					}
					break;
				case TGSI_SEMANTIC_POSITION:
					if (chan != 2)
						continue;
					lp_build_intrinsic_unary(
						base->gallivm->builder,
						"llvm.R600.store.pixel.depth",
						LLVMVoidTypeInContext(base->gallivm->context),
						output);
					break;
				case TGSI_SEMANTIC_STENCIL:
					if (chan != 1)
						continue;
					lp_build_intrinsic_unary(
						base->gallivm->builder,
						"llvm.R600.store.pixel.stencil",
						LLVMVoidTypeInContext(base->gallivm->context),
						output);
					break;
				}
			}
		}
	}

	if (!has_color && ctx->type == TGSI_PROCESSOR_FRAGMENT)
		lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base->gallivm->context), 0, 0);
}
Exemple #15
0
static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
{
	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
	struct lp_build_context * base = &bld_base->base;
	struct pipe_stream_output_info * so = ctx->stream_outputs;
	unsigned i;
	unsigned next_pos = 60;
	unsigned next_param = 0;

	unsigned color_count = 0;
	boolean has_color = false;

	if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) {
		for (i = 0; i < so->num_outputs; i++) {
			unsigned register_index = so->output[i].register_index;
			unsigned start_component = so->output[i].start_component;
			unsigned num_components = so->output[i].num_components;
			unsigned dst_offset = so->output[i].dst_offset;
			unsigned chan;
			LLVMValueRef elements[4];
			if (dst_offset < start_component) {
				for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
					elements[chan] = LLVMBuildLoad(base->gallivm->builder,
						ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], "");
				}
				start_component = 0;
			} else {
				for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
					elements[chan] = LLVMBuildLoad(base->gallivm->builder,
						ctx->soa.outputs[register_index][chan], "");
				}
			}
			LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
			LLVMValueRef args[4];
			args[0] = output;
			args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component);
			args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
			args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
			lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
				LLVMVoidTypeInContext(base->gallivm->context), args, 4, 0);
		}
	}

	/* Add the necessary export instructions */
	for (i = 0; i < ctx->output_reg_count; i++) {
		unsigned chan;
		LLVMValueRef elements[4];
		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
			elements[chan] = LLVMBuildLoad(base->gallivm->builder,
				ctx->soa.outputs[i][chan], "");
		}
		if (ctx->alpha_to_one && ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->r600_outputs[i].name == TGSI_SEMANTIC_COLOR)
			elements[3] = lp_build_const_float(base->gallivm, 1.0f);
		LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);

		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
			switch (ctx->r600_outputs[i].name) {
			case TGSI_SEMANTIC_POSITION:
			case TGSI_SEMANTIC_PSIZE: {
				LLVMValueRef args[3];
				args[0] = output;
				args[1] = lp_build_const_int32(base->gallivm, next_pos++);
				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
				lp_build_intrinsic(
					base->gallivm->builder,
					"llvm.R600.store.swizzle",
					LLVMVoidTypeInContext(base->gallivm->context),
					args, 3, 0);
				break;
			}
			case TGSI_SEMANTIC_CLIPVERTEX: {
				LLVMValueRef args[3];
				unsigned reg_index;
				LLVMValueRef adjusted_elements[4];
				for (reg_index = 0; reg_index < 2; reg_index ++) {
					for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
						LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan);
						LLVMValueRef base_vector = llvm_load_const_buffer(bld_base, offset, CONSTANT_BUFFER_1_ADDR_SPACE);
						args[0] = output;
						args[1] = base_vector;
						adjusted_elements[chan] = lp_build_intrinsic(base->gallivm->builder,
							"llvm.AMDGPU.dp4", bld_base->base.elem_type,
							args, 2, LLVMReadNoneAttribute);
					}
					args[0] = lp_build_gather_values(base->gallivm,
						adjusted_elements, 4);
					args[1] = lp_build_const_int32(base->gallivm, next_pos++);
					args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
					lp_build_intrinsic(
						base->gallivm->builder,
						"llvm.R600.store.swizzle",
						LLVMVoidTypeInContext(base->gallivm->context),
						args, 3, 0);
				}
				break;
			}
			case TGSI_SEMANTIC_CLIPDIST : {
				LLVMValueRef args[3];
				args[0] = output;
				args[1] = lp_build_const_int32(base->gallivm, next_pos++);
				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
				lp_build_intrinsic(
					base->gallivm->builder,
					"llvm.R600.store.swizzle",
					LLVMVoidTypeInContext(base->gallivm->context),
					args, 3, 0);
				args[1] = lp_build_const_int32(base->gallivm, next_param++);
				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
				lp_build_intrinsic(
					base->gallivm->builder,
					"llvm.R600.store.swizzle",
					LLVMVoidTypeInContext(base->gallivm->context),
					args, 3, 0);
				break;
			}
			case TGSI_SEMANTIC_FOG: {
				elements[0] = LLVMBuildLoad(base->gallivm->builder,
					ctx->soa.outputs[i][0], "");
				elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f);
				elements[3] = lp_build_const_float(base->gallivm, 1.0f);

				LLVMValueRef args[3];
				args[0] = lp_build_gather_values(base->gallivm, elements, 4);
				args[1] = lp_build_const_int32(base->gallivm, next_param++);
				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
				lp_build_intrinsic(
					base->gallivm->builder,
					"llvm.R600.store.swizzle",
					LLVMVoidTypeInContext(base->gallivm->context),
					args, 3, 0);
				break;
			}
			default: {
				LLVMValueRef args[3];
				args[0] = output;
				args[1] = lp_build_const_int32(base->gallivm, next_param++);
				args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
				lp_build_intrinsic(
					base->gallivm->builder,
					"llvm.R600.store.swizzle",
					LLVMVoidTypeInContext(base->gallivm->context),
					args, 3, 0);
				break;
			}
			}
		} else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
			switch (ctx->r600_outputs[i].name) {
			case TGSI_SEMANTIC_COLOR:
				has_color = true;
				if ( color_count < ctx->color_buffer_count) {
					LLVMValueRef args[3];
					args[0] = output;
					if (ctx->fs_color_all) {
						for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
							args[1] = lp_build_const_int32(base->gallivm, j);
							args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
							lp_build_intrinsic(
								base->gallivm->builder,
								"llvm.R600.store.swizzle",
								LLVMVoidTypeInContext(base->gallivm->context),
								args, 3, 0);
						}
					} else {
						args[1] = lp_build_const_int32(base->gallivm, color_count++);
						args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
						lp_build_intrinsic(
							base->gallivm->builder,
							"llvm.R600.store.swizzle",
							LLVMVoidTypeInContext(base->gallivm->context),
							args, 3, 0);
					}
				}
				break;
			case TGSI_SEMANTIC_POSITION:
				lp_build_intrinsic_unary(
					base->gallivm->builder,
					"llvm.R600.store.pixel.depth",
					LLVMVoidTypeInContext(base->gallivm->context),
					LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], ""));
				break;
			case TGSI_SEMANTIC_STENCIL:
				lp_build_intrinsic_unary(
					base->gallivm->builder,
					"llvm.R600.store.pixel.stencil",
					LLVMVoidTypeInContext(base->gallivm->context),
					LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], ""));
				break;
			}
		}
	}
	// Add dummy exports
	if (ctx->type == TGSI_PROCESSOR_VERTEX) {
		if (!next_param) {
			lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
				LLVMVoidTypeInContext(base->gallivm->context),
				lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM));
		}
		if (!(next_pos-60)) {
			lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
				LLVMVoidTypeInContext(base->gallivm->context),
				lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS));
		}
	}
	if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
		if (!has_color) {
			lp_build_intrinsic_unary(base->gallivm->builder, "llvm.R600.store.dummy",
				LLVMVoidTypeInContext(base->gallivm->context),
				lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL));
		}
	}

}
Exemple #16
0
static bool make_trace(compile_t* c, gentype_t* g)
{
  // Do nothing if we have no fields.
  if(g->field_count == 0)
    return true;

  if(g->underlying == TK_CLASS)
  {
    // Special case the array trace function.
    AST_GET_CHILDREN(g->ast, pkg, id);
    const char* package = ast_name(pkg);
    const char* name = ast_name(id);

    if((package == c->str_1) && (name == c->str_Array))
    {
      genprim_array_trace(c, g);
      return true;
    }
  }

  // Create a trace function.
  const char* trace_name = genname_trace(g->type_name);
  LLVMValueRef trace_fn = codegen_addfun(c, trace_name, c->trace_type);

  codegen_startfun(c, trace_fn, false);
  LLVMSetFunctionCallConv(trace_fn, LLVMCCallConv);

  LLVMValueRef arg = LLVMGetParam(trace_fn, 0);
  LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, g->structure_ptr,
    "object");

  // If we don't ever trace anything, delete this function.
  bool need_trace;

  if(g->underlying == TK_TUPLETYPE)
  {
    // Create another function that traces the tuple members.
    const char* trace_tuple_name = genname_tracetuple(g->type_name);
    LLVMTypeRef trace_tuple_type = LLVMFunctionType(c->void_type,
      &g->primitive, 1, false);
    LLVMValueRef trace_tuple_fn = codegen_addfun(c, trace_tuple_name,
      trace_tuple_type);

    codegen_startfun(c, trace_tuple_fn, false);
    LLVMSetFunctionCallConv(trace_tuple_fn, LLVMCCallConv);

    LLVMValueRef arg = LLVMGetParam(trace_tuple_fn, 0);
    need_trace = trace_elements(c, g, arg);

    LLVMBuildRetVoid(c->builder);
    codegen_finishfun(c);

    if(need_trace)
    {
      // Get the tuple primitive.
      LLVMValueRef tuple_ptr = LLVMBuildStructGEP(c->builder, object, 1, "");
      LLVMValueRef tuple = LLVMBuildLoad(c->builder, tuple_ptr, "");

      // Call the tuple trace function with the unboxed primitive type.
      LLVMBuildCall(c->builder, trace_tuple_fn, &tuple, 1, "");
    } else {
      LLVMDeleteFunction(trace_tuple_fn);
    }
  } else {
    int extra = 1;

    // Actors have a pad.
    if(g->underlying == TK_ACTOR)
      extra++;

    need_trace = trace_fields(c, g, object, extra);
  }

  LLVMBuildRetVoid(c->builder);
  codegen_finishfun(c);

  if(!need_trace)
    LLVMDeleteFunction(trace_fn);

  return true;
}
/**
 * Fetch a pixel into a 4 float AoS.
 *
 * \param format_desc  describes format of the image we're fetching from
 * \param aligned  whether the data is guaranteed to be aligned
 * \param ptr  address of the pixel block (or the texel if uncompressed)
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0, 0).
 * \return  a 4 element vector with the pixel's RGBA values.
 */
LLVMValueRef
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        const struct util_format_description *format_desc,
                        struct lp_type type,
                        boolean aligned,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j)
{
    LLVMBuilderRef builder = gallivm->builder;
    unsigned num_pixels = type.length / 4;
    struct lp_build_context bld;

    assert(type.length <= LP_MAX_VECTOR_LENGTH);
    assert(type.length % 4 == 0);

    lp_build_context_init(&bld, gallivm, type);

    /*
     * Trivial case
     *
     * The format matches the type (apart of a swizzle) so no need for
     * scaling or converting.
     */

    if (format_matches_type(format_desc, type) &&
            format_desc->block.bits <= type.width * 4 &&
            util_is_power_of_two(format_desc->block.bits)) {
        LLVMValueRef packed;
        LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type);
        unsigned vec_len = type.width * type.length;

        /*
         * The format matches the type (apart of a swizzle) so no need for
         * scaling or converting.
         */

        packed = lp_build_gather(gallivm, type.length/4,
                                 format_desc->block.bits, type.width*4,
                                 aligned, base_ptr, offset, TRUE);

        assert(format_desc->block.bits <= vec_len);

        packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
        return lp_build_format_swizzle_aos(format_desc, &bld, packed);
    }

    /*
     * Bit arithmetic
     */

    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
            (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
             format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
            format_desc->block.width == 1 &&
            format_desc->block.height == 1 &&
            util_is_power_of_two(format_desc->block.bits) &&
            format_desc->block.bits <= 32 &&
            format_desc->is_bitmask &&
            !format_desc->is_mixed &&
            (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
             format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED) &&
            !format_desc->channel[0].pure_integer) {

        LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
        LLVMValueRef res;
        unsigned k;

        /*
         * Unpack a pixel at a time into a <4 x float> RGBA vector
         */

        for (k = 0; k < num_pixels; ++k) {
            LLVMValueRef packed;

            packed = lp_build_gather_elem(gallivm, num_pixels,
                                          format_desc->block.bits, 32, aligned,
                                          base_ptr, offset, k, FALSE);

            tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
                      format_desc,
                      packed);
        }

        /*
         * Type conversion.
         *
         * TODO: We could avoid floating conversion for integer to
         * integer conversions.
         */

        if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) {
            debug_printf("%s: unpacking %s with floating point\n",
                         __FUNCTION__, format_desc->short_name);
        }

        lp_build_conv(gallivm,
                      lp_float32_vec4_type(),
                      type,
                      tmps, num_pixels, &res, 1);

        return lp_build_format_swizzle_aos(format_desc, &bld, res);
    }

    /* If all channels are of same type and we are not using half-floats */
    if (format_desc->is_array &&
            format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) {
        assert(!format_desc->is_mixed);
        return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset);
    }

    /*
     * YUV / subsampled formats
     */

    if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
        struct lp_type tmp_type;
        LLVMValueRef tmp;

        memset(&tmp_type, 0, sizeof tmp_type);
        tmp_type.width = 8;
        tmp_type.length = num_pixels * 4;
        tmp_type.norm = TRUE;

        tmp = lp_build_fetch_subsampled_rgba_aos(gallivm,
                format_desc,
                num_pixels,
                base_ptr,
                offset,
                i, j);

        lp_build_conv(gallivm,
                      tmp_type, type,
                      &tmp, 1, &tmp, 1);

        return tmp;
    }

    /*
     * Fallback to util_format_description::fetch_rgba_8unorm().
     */

    if (format_desc->fetch_rgba_8unorm &&
            !type.floating && type.width == 8 && !type.sign && type.norm) {
        /*
         * Fallback to calling util_format_description::fetch_rgba_8unorm.
         *
         * This is definitely not the most efficient way of fetching pixels, as
         * we miss the opportunity to do vectorization, but this it is a
         * convenient for formats or scenarios for which there was no opportunity
         * or incentive to optimize.
         */

        LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
        LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
        LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
        LLVMValueRef function;
        LLVMValueRef tmp_ptr;
        LLVMValueRef tmp;
        LLVMValueRef res;
        unsigned k;

        if (gallivm_debug & GALLIVM_DEBUG_PERF) {
            debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n",
                         __FUNCTION__, format_desc->short_name);
        }

        /*
         * Declare and bind format_desc->fetch_rgba_8unorm().
         */

        {
            /*
             * Function to call looks like:
             *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
             */
            LLVMTypeRef ret_type;
            LLVMTypeRef arg_types[4];
            LLVMTypeRef function_type;

            ret_type = LLVMVoidTypeInContext(gallivm->context);
            arg_types[0] = pi8t;
            arg_types[1] = pi8t;
            arg_types[2] = i32t;
            arg_types[3] = i32t;
            function_type = LLVMFunctionType(ret_type, arg_types,
                                             Elements(arg_types), 0);

            /* make const pointer for the C fetch_rgba_8unorm function */
            function = lp_build_const_int_pointer(gallivm,
                                                  func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));

            /* cast the callee pointer to the function's type */
            function = LLVMBuildBitCast(builder, function,
                                        LLVMPointerType(function_type, 0),
                                        "cast callee");
        }

        tmp_ptr = lp_build_alloca(gallivm, i32t, "");

        res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels));

        /*
         * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result
         * in the SoA vectors.
         */

        for (k = 0; k < num_pixels; ++k) {
            LLVMValueRef index = lp_build_const_int32(gallivm, k);
            LLVMValueRef args[4];

            args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
            args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                               base_ptr, offset, k);

            if (num_pixels == 1) {
                args[2] = i;
                args[3] = j;
            }
            else {
                args[2] = LLVMBuildExtractElement(builder, i, index, "");
                args[3] = LLVMBuildExtractElement(builder, j, index, "");
            }

            LLVMBuildCall(builder, function, args, Elements(args), "");

            tmp = LLVMBuildLoad(builder, tmp_ptr, "");

            if (num_pixels == 1) {
                res = tmp;
            }
            else {
                res = LLVMBuildInsertElement(builder, res, tmp, index, "");
            }
        }

        /* Bitcast from <n x i32> to <4n x i8> */
        res = LLVMBuildBitCast(builder, res, bld.vec_type, "");

        return res;
    }

    /*
     * Fallback to util_format_description::fetch_rgba_float().
     */

    if (format_desc->fetch_rgba_float) {
        /*
         * Fallback to calling util_format_description::fetch_rgba_float.
         *
         * This is definitely not the most efficient way of fetching pixels, as
         * we miss the opportunity to do vectorization, but this it is a
         * convenient for formats or scenarios for which there was no opportunity
         * or incentive to optimize.
         */

        LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context);
        LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4);
        LLVMTypeRef pf32t = LLVMPointerType(f32t, 0);
        LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
        LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
        LLVMValueRef function;
        LLVMValueRef tmp_ptr;
        LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4];
        LLVMValueRef res;
        unsigned k;

        if (gallivm_debug & GALLIVM_DEBUG_PERF) {
            debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n",
                         __FUNCTION__, format_desc->short_name);
        }

        /*
         * Declare and bind format_desc->fetch_rgba_float().
         */

        {
            /*
             * Function to call looks like:
             *   fetch(float *dst, const uint8_t *src, unsigned i, unsigned j)
             */
            LLVMTypeRef ret_type;
            LLVMTypeRef arg_types[4];

            ret_type = LLVMVoidTypeInContext(gallivm->context);
            arg_types[0] = pf32t;
            arg_types[1] = pi8t;
            arg_types[2] = i32t;
            arg_types[3] = i32t;

            function = lp_build_const_func_pointer(gallivm,
                                                   func_to_pointer((func_pointer) format_desc->fetch_rgba_float),
                                                   ret_type,
                                                   arg_types, Elements(arg_types),
                                                   format_desc->short_name);
        }

        tmp_ptr = lp_build_alloca(gallivm, f32x4t, "");

        /*
         * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
         * in the SoA vectors.
         */

        for (k = 0; k < num_pixels; ++k) {
            LLVMValueRef args[4];

            args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, "");
            args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels,
                                               base_ptr, offset, k);

            if (num_pixels == 1) {
                args[2] = i;
                args[3] = j;
            }
            else {
                LLVMValueRef index = lp_build_const_int32(gallivm, k);
                args[2] = LLVMBuildExtractElement(builder, i, index, "");
                args[3] = LLVMBuildExtractElement(builder, j, index, "");
            }

            LLVMBuildCall(builder, function, args, Elements(args), "");

            tmps[k] = LLVMBuildLoad(builder, tmp_ptr, "");
        }

        lp_build_conv(gallivm,
                      lp_float32_vec4_type(),
                      type,
                      tmps, num_pixels, &res, 1);

        return res;
    }

    assert(!util_format_is_pure_integer(format_desc->format));

    assert(0);
    return lp_build_undef(gallivm, type);
}
Exemple #18
0
static LLVMValueRef desc_field(compile_t* c, LLVMValueRef desc, int index)
{
  LLVMValueRef ptr = LLVMBuildStructGEP(c->builder, desc, index, "");
  return LLVMBuildLoad(c->builder, ptr, "");
}
Exemple #19
0
static LLVMValueRef field_value(compile_t* c, LLVMValueRef object, int index)
{
  LLVMValueRef field = LLVMBuildStructGEP(c->builder, object, index, "");
  return LLVMBuildLoad(c->builder, field, "");
}
Exemple #20
0
LLVMValueRef gendesc_fetch(compile_t* c, LLVMValueRef object)
{
  LLVMValueRef ptr = LLVMBuildStructGEP(c->builder, object, 0, "");
  return LLVMBuildLoad(c->builder, ptr, "");
}
Exemple #21
0
LLVMValueRef gen_indir(struct node *ast)
{
	return LLVMBuildLoad(builder, lvalue(ast), "");
}
Exemple #22
0
/*
 * Create a function that deforms a tuple of type desc up to natts columns.
 */
LLVMValueRef
slot_compile_deform(LLVMJitContext *context, TupleDesc desc, int natts)
{
	char	   *funcname;

	LLVMModuleRef mod;
	LLVMBuilderRef b;

	LLVMTypeRef deform_sig;
	LLVMValueRef v_deform_fn;

	LLVMBasicBlockRef b_entry;
	LLVMBasicBlockRef b_adjust_unavail_cols;
	LLVMBasicBlockRef b_find_start;

	LLVMBasicBlockRef b_out;
	LLVMBasicBlockRef b_dead;
	LLVMBasicBlockRef *attcheckattnoblocks;
	LLVMBasicBlockRef *attstartblocks;
	LLVMBasicBlockRef *attisnullblocks;
	LLVMBasicBlockRef *attcheckalignblocks;
	LLVMBasicBlockRef *attalignblocks;
	LLVMBasicBlockRef *attstoreblocks;

	LLVMValueRef v_offp;

	LLVMValueRef v_tupdata_base;
	LLVMValueRef v_tts_values;
	LLVMValueRef v_tts_nulls;
	LLVMValueRef v_slotoffp;
	LLVMValueRef v_slowp;
	LLVMValueRef v_nvalidp;
	LLVMValueRef v_nvalid;
	LLVMValueRef v_maxatt;

	LLVMValueRef v_slot;

	LLVMValueRef v_tupleheaderp;
	LLVMValueRef v_tuplep;
	LLVMValueRef v_infomask1;
	LLVMValueRef v_infomask2;
	LLVMValueRef v_bits;

	LLVMValueRef v_hoff;

	LLVMValueRef v_hasnulls;

	/* last column (0 indexed) guaranteed to exist */
	int			guaranteed_column_number = -1;

	/* current known alignment */
	int			known_alignment = 0;

	/* if true, known_alignment describes definite offset of column */
	bool		attguaranteedalign = true;

	int			attnum;

	mod = llvm_mutable_module(context);

	funcname = llvm_expand_funcname(context, "deform");

	/*
	 * Check which columns do have to exist, so we don't have to check the
	 * rows natts unnecessarily.
	 */
	for (attnum = 0; attnum < desc->natts; attnum++)
	{
		Form_pg_attribute att = TupleDescAttr(desc, attnum);

		/*
		 * If the column is possibly missing, we can't rely on its (or
		 * subsequent) NOT NULL constraints to indicate minimum attributes in
		 * the tuple, so stop here.
		 */
		if (att->atthasmissing)
			break;

		/*
		 * Column is NOT NULL and there've been no preceding missing columns,
		 * it's guaranteed that all columns up to here exist at least in the
		 * NULL bitmap.
		 */
		if (att->attnotnull)
			guaranteed_column_number = attnum;
	}

	/* Create the signature and function */
	{
		LLVMTypeRef param_types[1];

		param_types[0] = l_ptr(StructTupleTableSlot);

		deform_sig = LLVMFunctionType(LLVMVoidType(), param_types,
									  lengthof(param_types), 0);
	}
	v_deform_fn = LLVMAddFunction(mod, funcname, deform_sig);
	LLVMSetLinkage(v_deform_fn, LLVMInternalLinkage);
	LLVMSetParamAlignment(LLVMGetParam(v_deform_fn, 0), MAXIMUM_ALIGNOF);
	llvm_copy_attributes(AttributeTemplate, v_deform_fn);

	b_entry =
		LLVMAppendBasicBlock(v_deform_fn, "entry");
	b_adjust_unavail_cols =
		LLVMAppendBasicBlock(v_deform_fn, "adjust_unavail_cols");
	b_find_start =
		LLVMAppendBasicBlock(v_deform_fn, "find_startblock");
	b_out =
		LLVMAppendBasicBlock(v_deform_fn, "outblock");
	b_dead =
		LLVMAppendBasicBlock(v_deform_fn, "deadblock");

	b = LLVMCreateBuilder();

	attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
	attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);

	known_alignment = 0;

	LLVMPositionBuilderAtEnd(b, b_entry);

	/* perform allocas first, llvm only converts those to registers */
	v_offp = LLVMBuildAlloca(b, TypeSizeT, "v_offp");

	v_slot = LLVMGetParam(v_deform_fn, 0);

	v_tts_values =
		l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_VALUES,
						  "tts_values");
	v_tts_nulls =
		l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_ISNULL,
						  "tts_ISNULL");

	v_slotoffp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_OFF, "");
	v_slowp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_SLOW, "");
	v_nvalidp = LLVMBuildStructGEP(b, v_slot, FIELDNO_TUPLETABLESLOT_NVALID, "");

	v_tupleheaderp =
		l_load_struct_gep(b, v_slot, FIELDNO_TUPLETABLESLOT_TUPLE,
						  "tupleheader");
	v_tuplep =
		l_load_struct_gep(b, v_tupleheaderp, FIELDNO_HEAPTUPLEDATA_DATA,
						  "tuple");
	v_bits =
		LLVMBuildBitCast(b,
						 LLVMBuildStructGEP(b, v_tuplep,
											FIELDNO_HEAPTUPLEHEADERDATA_BITS,
											""),
						 l_ptr(LLVMInt8Type()),
						 "t_bits");
	v_infomask1 =
		l_load_struct_gep(b, v_tuplep,
						  FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK,
						  "infomask1");
	v_infomask2 =
		l_load_struct_gep(b,
						  v_tuplep, FIELDNO_HEAPTUPLEHEADERDATA_INFOMASK2,
						  "infomask2");

	/* t_infomask & HEAP_HASNULL */
	v_hasnulls =
		LLVMBuildICmp(b, LLVMIntNE,
					  LLVMBuildAnd(b,
								   l_int16_const(HEAP_HASNULL),
								   v_infomask1, ""),
					  l_int16_const(0),
					  "hasnulls");

	/* t_infomask2 & HEAP_NATTS_MASK */
	v_maxatt = LLVMBuildAnd(b,
							l_int16_const(HEAP_NATTS_MASK),
							v_infomask2,
							"maxatt");

	v_hoff =
		l_load_struct_gep(b, v_tuplep,
						  FIELDNO_HEAPTUPLEHEADERDATA_HOFF,
						  "t_hoff");

	v_tupdata_base =
		LLVMBuildGEP(b,
					 LLVMBuildBitCast(b,
									  v_tuplep,
									  l_ptr(LLVMInt8Type()),
									  ""),
					 &v_hoff, 1,
					 "v_tupdata_base");

	/*
	 * Load tuple start offset from slot. Will be reset below in case there's
	 * no existing deformed columns in slot.
	 */
	{
		LLVMValueRef v_off_start;

		v_off_start = LLVMBuildLoad(b, v_slotoffp, "v_slot_off");
		v_off_start = LLVMBuildZExt(b, v_off_start, TypeSizeT, "");
		LLVMBuildStore(b, v_off_start, v_offp);
	}

	/* build the basic block for each attribute, need them as jump target */
	for (attnum = 0; attnum < natts; attnum++)
	{
		attcheckattnoblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckattno", attnum);
		attstartblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.start", attnum);
		attisnullblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.attisnull", attnum);
		attcheckalignblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.attcheckalign", attnum);
		attalignblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.align", attnum);
		attstoreblocks[attnum] =
			l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum);
	}

	/*
	 * Check if's guaranteed the all the desired attributes are available in
	 * tuple. If so, we can start deforming. If not, need to make sure to
	 * fetch the missing columns.
	 */
	if ((natts - 1) <= guaranteed_column_number)
	{
		/* just skip through unnecessary blocks */
		LLVMBuildBr(b, b_adjust_unavail_cols);
		LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);
		LLVMBuildBr(b, b_find_start);
	}
	else
	{
		LLVMValueRef v_params[3];

		/* branch if not all columns available */
		LLVMBuildCondBr(b,
						LLVMBuildICmp(b, LLVMIntULT,
									  v_maxatt,
									  l_int16_const(natts),
									  ""),
						b_adjust_unavail_cols,
						b_find_start);

		/* if not, memset tts_isnull of relevant cols to true */
		LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols);

		v_params[0] = v_slot;
		v_params[1] = LLVMBuildZExt(b, v_maxatt, LLVMInt32Type(), "");
		v_params[2] = l_int32_const(natts);
		LLVMBuildCall(b, llvm_get_decl(mod, FuncSlotGetmissingattrs),
					  v_params, lengthof(v_params), "");
		LLVMBuildBr(b, b_find_start);
	}

	LLVMPositionBuilderAtEnd(b, b_find_start);

	v_nvalid = LLVMBuildLoad(b, v_nvalidp, "");

	/*
	 * Build switch to go from nvalid to the right startblock.  Callers
	 * currently don't have the knowledge, but it'd be good for performance to
	 * avoid this check when it's known that the slot is empty (e.g. in scan
	 * nodes).
	 */
	if (true)
	{
		LLVMValueRef v_switch = LLVMBuildSwitch(b, v_nvalid,
												b_dead, natts);

		for (attnum = 0; attnum < natts; attnum++)
		{
			LLVMValueRef v_attno = l_int32_const(attnum);

			LLVMAddCase(v_switch, v_attno, attcheckattnoblocks[attnum]);
		}

	}
	else
	{
		/* jump from entry block to first block */
		LLVMBuildBr(b, attcheckattnoblocks[0]);
	}

	LLVMPositionBuilderAtEnd(b, b_dead);
	LLVMBuildUnreachable(b);

	/*
	 * Iterate over each attribute that needs to be deformed, build code to
	 * deform it.
	 */
	for (attnum = 0; attnum < natts; attnum++)
	{
		Form_pg_attribute att = TupleDescAttr(desc, attnum);
		LLVMValueRef v_incby;
		int			alignto;
		LLVMValueRef l_attno = l_int16_const(attnum);
		LLVMValueRef v_attdatap;
		LLVMValueRef v_resultp;

		/* build block checking whether we did all the necessary attributes */
		LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]);

		/*
		 * If this is the first attribute, slot->tts_nvalid was 0. Therefore
		 * reset offset to 0 to, it be from a previous execution.
		 */
		if (attnum == 0)
		{
			LLVMBuildStore(b, l_sizet_const(0), v_offp);
		}

		/*
		 * Build check whether column is available (i.e. whether the tuple has
		 * that many columns stored). We can avoid the branch if we know
		 * there's a subsequent NOT NULL column.
		 */
		if (attnum <= guaranteed_column_number)
		{
			LLVMBuildBr(b, attstartblocks[attnum]);
		}
		else
		{
			LLVMValueRef v_islast;

			v_islast = LLVMBuildICmp(b, LLVMIntUGE,
									 l_attno,
									 v_maxatt,
									 "heap_natts");
			LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]);
		}
		LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]);

		/*
		 * Check for nulls if necessary. No need to take missing attributes
		 * into account, because in case they're present the heaptuple's natts
		 * would have indicated that a slot_getmissingattrs() is needed.
		 */
		if (!att->attnotnull)
		{
			LLVMBasicBlockRef b_ifnotnull;
			LLVMBasicBlockRef b_ifnull;
			LLVMBasicBlockRef b_next;
			LLVMValueRef v_attisnull;
			LLVMValueRef v_nullbyteno;
			LLVMValueRef v_nullbytemask;
			LLVMValueRef v_nullbyte;
			LLVMValueRef v_nullbit;

			b_ifnotnull = attcheckalignblocks[attnum];
			b_ifnull = attisnullblocks[attnum];

			if (attnum + 1 == natts)
				b_next = b_out;
			else
				b_next = attcheckattnoblocks[attnum + 1];

			v_nullbyteno = l_int32_const(attnum >> 3);
			v_nullbytemask = l_int8_const(1 << ((attnum) & 0x07));
			v_nullbyte = l_load_gep1(b, v_bits, v_nullbyteno, "attnullbyte");

			v_nullbit = LLVMBuildICmp(b,
									  LLVMIntEQ,
									  LLVMBuildAnd(b, v_nullbyte, v_nullbytemask, ""),
									  l_int8_const(0),
									  "attisnull");

			v_attisnull = LLVMBuildAnd(b, v_hasnulls, v_nullbit, "");

			LLVMBuildCondBr(b, v_attisnull, b_ifnull, b_ifnotnull);

			LLVMPositionBuilderAtEnd(b, b_ifnull);

			/* store null-byte */
			LLVMBuildStore(b,
						   l_int8_const(1),
						   LLVMBuildGEP(b, v_tts_nulls, &l_attno, 1, ""));
			/* store zero datum */
			LLVMBuildStore(b,
						   l_sizet_const(0),
						   LLVMBuildGEP(b, v_tts_values, &l_attno, 1, ""));

			LLVMBuildBr(b, b_next);
			attguaranteedalign = false;
		}
		else
		{
Exemple #23
0
/**
 * Gather one element from scatter positions in memory.
 *
 * @sa lp_build_gather()
 */
LLVMValueRef
lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned length,
                     unsigned src_width,
                     unsigned dst_width,
                     boolean aligned,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
                     unsigned i,
                     boolean vector_justify)
{
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
   LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
   LLVMValueRef ptr;
   LLVMValueRef res;

   assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));

   ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
   ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
   res = LLVMBuildLoad(gallivm->builder, ptr, "");

   /* XXX
    * On some archs we probably really want to avoid having to deal
    * with alignments lower than 4 bytes (if fetch size is a power of
    * two >= 32). On x86 it doesn't matter, however.
    * We should be able to guarantee full alignment for any kind of texture
    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
    * but I don't think that's quite what we wanted).
    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
    * enforcing what we want (which is what d3d10 does, the offset needs to
    * be aligned to element size, but GL has bytes regardless of element
    * size which would only leave us with minimum alignment restriction of 16
    * which doesn't make much sense if the type isn't 4x32bit). Due to
    * translation of offsets to first_elem in sampler_views it actually seems
    * gallium could not do anything else except 16 no matter what...
    */
   if (!aligned) {
      LLVMSetAlignment(res, 1);
   } else if (!util_is_power_of_two(src_width)) {
      /*
       * Full alignment is impossible, assume the caller really meant
       * the individual elements were aligned (e.g. 3x32bit format).
       * And yes the generated code may otherwise crash, llvm will
       * really assume 128bit alignment with a 96bit fetch (I suppose
       * that makes sense as it can just assume the upper 32bit to be
       * whatever).
       * Maybe the caller should be able to explicitly set this, but
       * this should cover all the 3-channel formats.
       */
      if (((src_width / 24) * 24 == src_width) &&
           util_is_power_of_two(src_width / 24)) {
          LLVMSetAlignment(res, src_width / 24);
      } else {
         LLVMSetAlignment(res, 1);
      }
   }

   assert(src_width <= dst_width);
   if (src_width < dst_width) {
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
      if (vector_justify) {
#ifdef PIPE_ARCH_BIG_ENDIAN
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
#endif
      }
   }

   return res;
}
Exemple #24
0
/**
 * Generate code to do cube face selection and compute per-face texcoords.
 */
void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
                     LLVMValueRef s,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     LLVMValueRef *face,
                     LLVMValueRef *face_s,
                     LLVMValueRef *face_t)
{
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *coord_bld = &bld->coord_bld;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef rx, ry, rz;
   LLVMValueRef arx, ary, arz;
   LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25);
   LLVMValueRef arx_ge_ary, arx_ge_arz;
   LLVMValueRef ary_ge_arx, ary_ge_arz;
   LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;

   assert(bld->coord_bld.type.length == 4);

   /*
    * Use the average of the four pixel's texcoords to choose the face.
    */
   rx = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, s));
   ry = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, t));
   rz = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, r));

   arx = lp_build_abs(float_bld, rx);
   ary = lp_build_abs(float_bld, ry);
   arz = lp_build_abs(float_bld, rz);

   /*
    * Compare sign/magnitude of rx,ry,rz to determine face
    */
   arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, "");
   arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, "");
   ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, "");
   ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, "");

   arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, "");
   ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, "");

   {
      struct lp_build_if_state if_ctx;
      LLVMValueRef face_s_var;
      LLVMValueRef face_t_var;
      LLVMValueRef face_var;

      face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var");
      face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var");
      face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var");

      lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz);
      {
         /* +/- X face */
         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
         LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
         *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
         *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
         *face = lp_build_cube_face(bld, rx,
                                    PIPE_TEX_FACE_POS_X,
                                    PIPE_TEX_FACE_NEG_X);
         LLVMBuildStore(builder, *face_s, face_s_var);
         LLVMBuildStore(builder, *face_t, face_t_var);
         LLVMBuildStore(builder, *face, face_var);
      }
      lp_build_else(&if_ctx);
      {
         struct lp_build_if_state if_ctx2;

         lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz);
         {
            /* +/- Y face */
            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
            *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
            *face = lp_build_cube_face(bld, ry,
                                       PIPE_TEX_FACE_POS_Y,
                                       PIPE_TEX_FACE_NEG_Y);
            LLVMBuildStore(builder, *face_s, face_s_var);
            LLVMBuildStore(builder, *face_t, face_t_var);
            LLVMBuildStore(builder, *face, face_var);
         }
         lp_build_else(&if_ctx2);
         {
            /* +/- Z face */
            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
            *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
            *face = lp_build_cube_face(bld, rz,
                                       PIPE_TEX_FACE_POS_Z,
                                       PIPE_TEX_FACE_NEG_Z);
            LLVMBuildStore(builder, *face_s, face_s_var);
            LLVMBuildStore(builder, *face_t, face_t_var);
            LLVMBuildStore(builder, *face, face_var);
         }
         lp_build_endif(&if_ctx2);
      }

      lp_build_endif(&if_ctx);

      *face_s = LLVMBuildLoad(builder, face_s_var, "face_s");
      *face_t = LLVMBuildLoad(builder, face_t_var, "face_t");
      *face   = LLVMBuildLoad(builder, face_var, "face");
   }
}
Exemple #25
0
/**
 * Generate the fragment shader, depth/stencil test, and alpha tests.
 * \param i  which quad in the tile, in range [0,3]
 * \param do_tri_test  if 1, do triangle edge in/out testing
 */
static void
generate_fs(struct llvmpipe_context *lp,
            struct lp_fragment_shader *shader,
            const struct lp_fragment_shader_variant_key *key,
            LLVMBuilderRef builder,
            struct lp_type type,
            LLVMValueRef context_ptr,
            unsigned i,
            const struct lp_build_interp_soa_context *interp,
            struct lp_build_sampler_soa *sampler,
            LLVMValueRef *pmask,
            LLVMValueRef (*color)[4],
            LLVMValueRef depth_ptr,
            unsigned do_tri_test,
            LLVMValueRef c0,
            LLVMValueRef c1,
            LLVMValueRef c2,
            LLVMValueRef step0_ptr,
            LLVMValueRef step1_ptr,
            LLVMValueRef step2_ptr)
{
   const struct tgsi_token *tokens = shader->base.tokens;
   LLVMTypeRef elem_type;
   LLVMTypeRef vec_type;
   LLVMTypeRef int_vec_type;
   LLVMValueRef consts_ptr;
   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
   LLVMValueRef z = interp->pos[2];
   struct lp_build_flow_context *flow;
   struct lp_build_mask_context mask;
   boolean early_depth_test;
   unsigned attrib;
   unsigned chan;
   unsigned cbuf;

   assert(i < 4);

   elem_type = lp_build_elem_type(type);
   vec_type = lp_build_vec_type(type);
   int_vec_type = lp_build_int_vec_type(type);

   consts_ptr = lp_jit_context_constants(builder, context_ptr);

   flow = lp_build_flow_create(builder);

   memset(outputs, 0, sizeof outputs);

   lp_build_flow_scope_begin(flow);

   /* Declare the color and z variables */
   for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
	 color[cbuf][chan] = LLVMGetUndef(vec_type);
	 lp_build_flow_scope_declare(flow, &color[cbuf][chan]);
      }
   }
   lp_build_flow_scope_declare(flow, &z);

   /* do triangle edge testing */
   if (do_tri_test) {
      generate_tri_edge_mask(builder, i, pmask,
                             c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
   }
   else {
      *pmask = build_int32_vec_const(~0);
   }

   /* 'mask' will control execution based on quad's pixel alive/killed state */
   lp_build_mask_begin(&mask, flow, type, *pmask);

   if (key->scissor) {
      LLVMValueRef smask =
         generate_scissor_test(builder, context_ptr, interp, type);
      lp_build_mask_update(&mask, smask);
   }

   early_depth_test =
      key->depth.enabled &&
      !key->alpha.enabled &&
      !shader->info.uses_kill &&
      !shader->info.writes_z;

   if(early_depth_test)
      generate_depth(builder, key,
                     type, &mask,
                     z, depth_ptr);

   lp_build_tgsi_soa(builder, tokens, type, &mask,
                     consts_ptr, interp->pos, interp->inputs,
                     outputs, sampler);

   for (attrib = 0; attrib < shader->info.num_outputs; ++attrib) {
      for(chan = 0; chan < NUM_CHANNELS; ++chan) {
         if(outputs[attrib][chan]) {
            LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
            lp_build_name(out, "output%u.%u.%c", i, attrib, "xyzw"[chan]);

            switch (shader->info.output_semantic_name[attrib]) {
            case TGSI_SEMANTIC_COLOR:
               {
                  unsigned cbuf = shader->info.output_semantic_index[attrib];

                  lp_build_name(out, "color%u.%u.%c", i, attrib, "rgba"[chan]);

                  /* Alpha test */
                  /* XXX: should the alpha reference value be passed separately? */
		  /* XXX: should only test the final assignment to alpha */
                  if(cbuf == 0 && chan == 3) {
                     LLVMValueRef alpha = out;
                     LLVMValueRef alpha_ref_value;
                     alpha_ref_value = lp_jit_context_alpha_ref_value(builder, context_ptr);
                     alpha_ref_value = lp_build_broadcast(builder, vec_type, alpha_ref_value);
                     lp_build_alpha_test(builder, &key->alpha, type,
                                         &mask, alpha, alpha_ref_value);
                  }

		  color[cbuf][chan] = out;
                  break;
               }

            case TGSI_SEMANTIC_POSITION:
               if(chan == 2)
                  z = out;
               break;
            }
         }
      }
   }

   if(!early_depth_test)
      generate_depth(builder, key,
                     type, &mask,
                     z, depth_ptr);

   lp_build_mask_end(&mask);

   lp_build_flow_scope_end(flow);

   lp_build_flow_destroy(flow);

   *pmask = mask.value;

}
Exemple #26
0
static bool make_trace(compile_t* c, reachable_type_t* t)
{
  if(t->trace_fn == NULL)
    return true;

  if(t->underlying == TK_CLASS)
  {
    // Special case the array trace function.
    AST_GET_CHILDREN(t->ast, pkg, id);
    const char* package = ast_name(pkg);
    const char* name = ast_name(id);

    if((package == c->str_builtin) && (name == c->str_Array))
    {
      genprim_array_trace(c, t);
      return true;
    }
  }

  // Generate the trace functions.
  codegen_startfun(c, t->trace_fn, NULL, NULL);
  LLVMSetFunctionCallConv(t->trace_fn, LLVMCCallConv);

  LLVMValueRef ctx = LLVMGetParam(t->trace_fn, 0);
  LLVMValueRef arg = LLVMGetParam(t->trace_fn, 1);
  LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr,
    "object");

  // If we don't ever trace anything, delete this function.
  int extra = 0;

  // Non-structs have a type descriptor.
  if(t->underlying != TK_STRUCT)
    extra++;

  // Actors have a pad.
  if(t->underlying == TK_ACTOR)
    extra++;

  for(uint32_t i = 0; i < t->field_count; i++)
  {
    LLVMValueRef field = LLVMBuildStructGEP(c->builder, object, i + extra, "");

    if(!t->fields[i].embed)
    {
      // Call the trace function indirectly depending on rcaps.
      LLVMValueRef value = LLVMBuildLoad(c->builder, field, "");
      gentrace(c, ctx, value, t->fields[i].ast);
    } else {
      // Call the trace function directly without marking the field.
      LLVMValueRef trace_fn = t->fields[i].type->trace_fn;

      if(trace_fn != NULL)
      {
        LLVMValueRef args[2];
        args[0] = ctx;
        args[1] = LLVMBuildBitCast(c->builder, field, c->object_ptr, "");

        LLVMBuildCall(c->builder, trace_fn, args, 2, "");
      }
    }
  }

  LLVMBuildRetVoid(c->builder);
  codegen_finishfun(c);
  return true;
}
/**
 * Generate code for performing depth and/or stencil tests.
 * We operate on a vector of values (typically a 2x2 quad).
 *
 * \param depth  the depth test state
 * \param stencil  the front/back stencil state
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param mask  the alive/dead pixel mask for the quad (vector)
 * \param stencil_refs  the front/back stencil ref values (scalar)
 * \param z_src  the incoming depth/stencil values (a 2x2 quad)
 * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
 * \param facing  contains float value indicating front/back facing polygon
 */
void
lp_build_depth_stencil_test(LLVMBuilderRef builder,
                            const struct pipe_depth_state *depth,
                            const struct pipe_stencil_state stencil[2],
                            struct lp_type type,
                            const struct util_format_description *format_desc,
                            struct lp_build_mask_context *mask,
                            LLVMValueRef stencil_refs[2],
                            LLVMValueRef z_src,
                            LLVMValueRef zs_dst_ptr,
                            LLVMValueRef face,
                            LLVMValueRef counter)
{
   struct lp_build_context bld;
   struct lp_build_context sbld;
   struct lp_type s_type;
   LLVMValueRef zs_dst, z_dst = NULL;
   LLVMValueRef stencil_vals = NULL;
   LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
   LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
   LLVMValueRef orig_mask = mask->value;

   /* Sanity checking */
   {
      const unsigned z_swizzle = format_desc->swizzle[0];
      const unsigned s_swizzle = format_desc->swizzle[1];

      assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
             s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);

      assert(depth->enabled || stencil[0].enabled);

      assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
      assert(format_desc->block.width == 1);
      assert(format_desc->block.height == 1);

      if (stencil[0].enabled) {
         assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_USCALED ||
                format_desc->format == PIPE_FORMAT_S8_USCALED_Z24_UNORM);
      }

      assert(z_swizzle < 4);
      assert(format_desc->block.bits == type.width);
      if (type.floating) {
         assert(z_swizzle == 0);
         assert(format_desc->channel[z_swizzle].type ==
                UTIL_FORMAT_TYPE_FLOAT);
         assert(format_desc->channel[z_swizzle].size ==
                format_desc->block.bits);
      }
      else {
         assert(format_desc->channel[z_swizzle].type ==
                UTIL_FORMAT_TYPE_UNSIGNED);
         assert(format_desc->channel[z_swizzle].normalized);
         assert(!type.fixed);
         assert(!type.sign);
         assert(type.norm);
      }
   }


   /* Setup build context for Z vals */
   lp_build_context_init(&bld, builder, type);

   /* Setup build context for stencil vals */
   s_type = lp_type_int_vec(type.width);
   lp_build_context_init(&sbld, builder, s_type);

   /* Load current z/stencil value from z/stencil buffer */
   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");

   lp_build_name(zs_dst, "zsbufval");


   /* Compute and apply the Z/stencil bitmasks and shifts.
    */
   {
      unsigned z_shift, z_mask;
      unsigned s_shift, s_mask;

      if (get_z_shift_and_mask(format_desc, &z_shift, &z_mask)) {
         if (z_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(type, z_shift);
            z_src = LLVMBuildLShr(builder, z_src, shift, "");
         }

         if (z_mask != 0xffffffff) {
            LLVMValueRef mask = lp_build_const_int_vec(type, z_mask);
            z_src = LLVMBuildAnd(builder, z_src, mask, "");
            z_dst = LLVMBuildAnd(builder, zs_dst, mask, "");
            z_bitmask = mask;  /* used below */
         }
         else {
            z_dst = zs_dst;
         }

         lp_build_name(z_dst, "zsbuf.z");
      }

      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
         if (s_shift) {
            LLVMValueRef shift = lp_build_const_int_vec(type, s_shift);
            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
            stencil_shift = shift;  /* used below */
         }
         else {
            stencil_vals = zs_dst;
         }

         if (s_mask != 0xffffffff) {
            LLVMValueRef mask = lp_build_const_int_vec(type, s_mask);
            stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
         }

         lp_build_name(stencil_vals, "stencil");
      }
   }


   if (stencil[0].enabled) {
      /* convert scalar stencil refs into vectors */
      stencil_refs[0] = lp_build_broadcast_scalar(&bld, stencil_refs[0]);
      stencil_refs[1] = lp_build_broadcast_scalar(&bld, stencil_refs[1]);

      s_pass_mask = lp_build_stencil_test(&sbld, stencil,
                                          stencil_refs, stencil_vals, face);

      /* apply stencil-fail operator */
      {
         LLVMValueRef s_fail_mask = lp_build_andc(&bld, orig_mask, s_pass_mask);
         stencil_vals = lp_build_stencil_op(&sbld, stencil, S_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            s_fail_mask, face);
      }
   }

   if (depth->enabled) {
      /* compare src Z to dst Z, returning 'pass' mask */
      z_pass = lp_build_cmp(&bld, depth->func, z_src, z_dst);

      if (!stencil[0].enabled) {
         /* We can potentially skip all remaining operations here, but only
          * if stencil is disabled because we still need to update the stencil
          * buffer values.  Don't need to update Z buffer values.
          */
         lp_build_mask_update(mask, z_pass);
      }

      if (depth->writemask) {
         LLVMValueRef zselectmask = mask->value;

         /* mask off bits that failed Z test */
         zselectmask = LLVMBuildAnd(builder, zselectmask, z_pass, "");

         /* mask off bits that failed stencil test */
         if (s_pass_mask) {
            zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
         }

         /* if combined Z/stencil format, mask off the stencil bits */
         if (z_bitmask) {
            zselectmask = LLVMBuildAnd(builder, zselectmask, z_bitmask, "");
         }

         /* Mix the old and new Z buffer values.
          * z_dst[i] = (zselectmask[i] & z_src[i]) | (~zselectmask[i] & z_dst[i])
          */
         z_dst = lp_build_select_bitwise(&bld, zselectmask, z_src, z_dst);
      }

      if (stencil[0].enabled) {
         /* update stencil buffer values according to z pass/fail result */
         LLVMValueRef z_fail_mask, z_pass_mask;

         /* apply Z-fail operator */
         z_fail_mask = lp_build_andc(&bld, orig_mask, z_pass);
         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_FAIL_OP,
                                            stencil_refs, stencil_vals,
                                            z_fail_mask, face);

         /* apply Z-pass operator */
         z_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, z_pass, "");
         stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
                                            stencil_refs, stencil_vals,
                                            z_pass_mask, face);
      }
   }
   else {
      /* No depth test: apply Z-pass operator to stencil buffer values which
       * passed the stencil test.
       */
      s_pass_mask = LLVMBuildAnd(bld.builder, orig_mask, s_pass_mask, "");
      stencil_vals = lp_build_stencil_op(&sbld, stencil, Z_PASS_OP,
                                         stencil_refs, stencil_vals,
                                         s_pass_mask, face);
   }

   /* The Z bits are already in the right place but we may need to shift the
    * stencil bits before ORing Z with Stencil to make the final pixel value.
    */
   if (stencil_vals && stencil_shift)
      stencil_vals = LLVMBuildShl(bld.builder, stencil_vals,
                                  stencil_shift, "");

   /* Finally, merge/store the z/stencil values */
   if ((depth->enabled && depth->writemask) ||
       (stencil[0].enabled && stencil[0].writemask)) {

      if (z_dst && stencil_vals)
         zs_dst = LLVMBuildOr(bld.builder, z_dst, stencil_vals, "");
      else if (z_dst)
         zs_dst = z_dst;
      else
         zs_dst = stencil_vals;

      LLVMBuildStore(builder, zs_dst, zs_dst_ptr);
   }

   if (s_pass_mask)
      lp_build_mask_update(mask, s_pass_mask);

   if (depth->enabled && stencil[0].enabled)
      lp_build_mask_update(mask, z_pass);

   if (counter)
      lp_build_occlusion_count(builder, type, mask->value, counter);
}
Exemple #28
0
LLVMValueRef
lp_build_mask_value(struct lp_build_mask_context *mask)
{
   return LLVMBuildLoad(mask->skip.gallivm->builder, mask->var, "");
}
Exemple #29
0
/**
 * Load depth/stencil values.
 * The stored values are linear, swizzle them.
 *
 * \param type  the data type of the fragment depth/stencil values
 * \param format_desc  description of the depth/stencil surface
 * \param loop_counter  the current loop iteration
 * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
 * \param depth_stride  stride of the depth/stencil buffer
 * \param z_fb  contains z values loaded from fb (may include padding)
 * \param s_fb  contains s values loaded from fb (may include padding)
 */
void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                     struct lp_type z_src_type,
                                     const struct util_format_description *format_desc,
                                     LLVMValueRef depth_ptr,
                                     LLVMValueRef depth_stride,
                                     LLVMValueRef *z_fb,
                                     LLVMValueRef *s_fb,
                                     LLVMValueRef loop_counter)
{
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
   LLVMValueRef zs_dst1, zs_dst2;
   LLVMValueRef zs_dst_ptr;
   LLVMValueRef depth_offset1, depth_offset2;
   LLVMTypeRef load_ptr_type;
   unsigned depth_bytes = format_desc->block.bits / 8;
   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
   struct lp_type zs_load_type = zs_type;

   zs_load_type.length = zs_load_type.length / 2;
   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);

   if (z_src_type.length == 4) {
      unsigned i;
      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 1), "");
      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
                                          lp_build_const_int32(gallivm, 2), "");
      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                          depth_stride, "");
      depth_offset1 = LLVMBuildMul(builder, looplsb,
                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");

      /* just concatenate the loaded 2x2 values into 4-wide vector */
      for (i = 0; i < 4; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, i);
      }
   }
   else {
      unsigned i;
      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
                                         lp_build_const_int32(gallivm, 1), "");
      assert(z_src_type.length == 8);
      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
      /*
       * We load 2x4 values, and need to swizzle them (order
       * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
       */
      for (i = 0; i < 8; i++) {
         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
      }
   }

   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");

   /* Load current z/stencil values from z/stencil buffer */
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");

   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
                                  LLVMConstVector(shuffles, zs_type.length), "");
   *s_fb = *z_fb;

   if (format_desc->block.bits < z_src_type.width) {
      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
      *z_fb = LLVMBuildZExt(builder, *z_fb,
                            lp_build_int_vec_type(gallivm, z_src_type), "");
   }

   else if (format_desc->block.bits > 32) {
      /* rely on llvm to handle too wide vector we have here nicely */
      unsigned i;
      struct lp_type typex2 = zs_type;
      struct lp_type s_type = zs_type;
      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
      LLVMValueRef tmp;

      typex2.width = typex2.width / 2;
      typex2.length = typex2.length * 2;
      s_type.width = s_type.width / 2;
      s_type.floating = 0;

      tmp = LLVMBuildBitCast(builder, *z_fb,
                             lp_build_vec_type(gallivm, typex2), "");

      for (i = 0; i < zs_type.length; i++) {
         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
      }
      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles1, zs_type.length), "");
      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
                                     LLVMConstVector(shuffles2, zs_type.length), "");
      *s_fb = LLVMBuildBitCast(builder, *s_fb,
                               lp_build_vec_type(gallivm, s_type), "");
      lp_build_name(*s_fb, "s_dst");
   }

   lp_build_name(*z_fb, "z_dst");
   lp_build_name(*s_fb, "s_dst");
   lp_build_name(*z_fb, "z_dst");
}
Exemple #30
0
static LLVMValueRef box_is_box(compile_t* c, ast_t* left_type,
  LLVMValueRef l_value, LLVMValueRef r_value, int possible_boxes)
{
  pony_assert(LLVMGetTypeKind(LLVMTypeOf(l_value)) == LLVMPointerTypeKind);
  pony_assert(LLVMGetTypeKind(LLVMTypeOf(r_value)) == LLVMPointerTypeKind);

  LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder);
  LLVMBasicBlockRef checkbox_block = codegen_block(c, "is_checkbox");
  LLVMBasicBlockRef box_block = codegen_block(c, "is_box");
  LLVMBasicBlockRef num_block = NULL;
  if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0)
    num_block = codegen_block(c, "is_num");
  LLVMBasicBlockRef tuple_block = NULL;
  if((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0)
    tuple_block = codegen_block(c, "is_tuple");
  LLVMBasicBlockRef post_block = codegen_block(c, "is_post");

  LLVMValueRef eq_addr = LLVMBuildICmp(c->builder, LLVMIntEQ, l_value, r_value,
    "");
  LLVMBuildCondBr(c->builder, eq_addr, post_block, checkbox_block);

  // Check whether we have two boxed objects of the same type.
  LLVMPositionBuilderAtEnd(c->builder, checkbox_block);
  LLVMValueRef l_desc = gendesc_fetch(c, l_value);
  LLVMValueRef r_desc = gendesc_fetch(c, r_value);
  LLVMValueRef same_type = LLVMBuildICmp(c->builder, LLVMIntEQ, l_desc, r_desc,
    "");
  LLVMValueRef l_typeid = NULL;
  if((possible_boxes & BOXED_SUBTYPES_UNBOXED) != 0)
  {
    l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef boxed_mask = LLVMConstInt(c->i32, 1, false);
    LLVMValueRef left_boxed = LLVMBuildAnd(c->builder, l_typeid, boxed_mask,
      "");
    LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
    left_boxed = LLVMBuildICmp(c->builder, LLVMIntEQ, left_boxed, zero, "");
    LLVMValueRef both_boxed = LLVMBuildAnd(c->builder, same_type, left_boxed,
      "");
    LLVMBuildCondBr(c->builder, both_boxed, box_block, post_block);
  } else {
    LLVMBuildCondBr(c->builder, same_type, box_block, post_block);
  }

  // Check whether it's a numeric primitive or a tuple.
  LLVMPositionBuilderAtEnd(c->builder, box_block);
  if((possible_boxes & BOXED_SUBTYPES_BOXED) == BOXED_SUBTYPES_BOXED)
  {
    if(l_typeid == NULL)
      l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef num_mask = LLVMConstInt(c->i32, 2, false);
    LLVMValueRef boxed_num = LLVMBuildAnd(c->builder, l_typeid, num_mask, "");
    LLVMValueRef zero = LLVMConstInt(c->i32, 0, false);
    boxed_num = LLVMBuildICmp(c->builder, LLVMIntEQ, boxed_num, zero, "");
    LLVMBuildCondBr(c->builder, boxed_num, num_block, tuple_block);
  } else if((possible_boxes & BOXED_SUBTYPES_NUMERIC) != 0) {
    LLVMBuildBr(c->builder, num_block);
  } else {
    pony_assert((possible_boxes & BOXED_SUBTYPES_TUPLE) != 0);
    LLVMBuildBr(c->builder, tuple_block);
  }

  LLVMValueRef args[3];
  LLVMValueRef is_num = NULL;
  if(num_block != NULL)
  {
    // Get the machine word size and memcmp without unboxing.
    LLVMPositionBuilderAtEnd(c->builder, num_block);
    if(l_typeid == NULL)
      l_typeid = gendesc_typeid(c, l_value);
    LLVMValueRef num_sizes = LLVMBuildBitCast(c->builder, c->numeric_sizes,
      c->void_ptr, "");
    args[0] = LLVMBuildZExt(c->builder, l_typeid, c->intptr, "");
    LLVMValueRef size = LLVMBuildInBoundsGEP(c->builder, num_sizes, args, 1,
      "");
    size = LLVMBuildBitCast(c->builder, size, LLVMPointerType(c->i32, 0), "");
    size = LLVMBuildLoad(c->builder, size, "");
    LLVMSetAlignment(size, 4);
    LLVMValueRef one = LLVMConstInt(c->i32, 1, false);
    args[0] = LLVMBuildInBoundsGEP(c->builder, l_value, &one, 1, "");
    args[0] = LLVMBuildBitCast(c->builder, args[0], c->void_ptr, "");
    args[1] = LLVMBuildInBoundsGEP(c->builder, r_value, &one, 1, "");
    args[1] = LLVMBuildBitCast(c->builder, args[1], c->void_ptr, "");
    args[2] = LLVMBuildZExt(c->builder, size, c->intptr, "");
    is_num = gencall_runtime(c, "memcmp", args, 3, "");
    is_num = LLVMBuildICmp(c->builder, LLVMIntEQ, is_num,
      LLVMConstInt(c->i32, 0, false), "");
    LLVMBuildBr(c->builder, post_block);
  }

  LLVMValueRef is_tuple = NULL;
  if(tuple_block != NULL)
  {
    // Call the type-specific __is function, which will unbox the tuples.
    LLVMPositionBuilderAtEnd(c->builder, tuple_block);
    reach_type_t* r_left = reach_type(c->reach, left_type);
    reach_method_t* is_fn = reach_method(r_left, TK_BOX, stringtab("__is"),
      NULL);
    pony_assert(is_fn != NULL);
    LLVMValueRef func = gendesc_vtable(c, l_value, is_fn->vtable_index);
    LLVMTypeRef params[2];
    params[0] = c->object_ptr;
    params[1] = c->object_ptr;
    LLVMTypeRef type = LLVMFunctionType(c->i1, params, 2, false);
    func = LLVMBuildBitCast(c->builder, func, LLVMPointerType(type, 0), "");
    args[0] = l_value;
    args[1] = r_value;
    is_tuple = codegen_call(c, func, args, 2);
    LLVMBuildBr(c->builder, post_block);
  }

  LLVMPositionBuilderAtEnd(c->builder, post_block);
  LLVMValueRef phi = LLVMBuildPhi(c->builder, c->i1, "");
  LLVMValueRef one = LLVMConstInt(c->i1, 1, false);
  LLVMValueRef zero = LLVMConstInt(c->i1, 0, false);
  LLVMAddIncoming(phi, &one, &this_block, 1);
  if(is_num != NULL)
    LLVMAddIncoming(phi, &is_num, &num_block, 1);
  if(is_tuple != NULL)
    LLVMAddIncoming(phi, &is_tuple, &tuple_block, 1);
  LLVMAddIncoming(phi, &zero, &checkbox_block, 1);
  return phi;
}