Пример #1
0
/** Helper used by lp_build_cube_lookup() */
static LLVMValueRef
lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
{
   /* ima = -0.5 / abs(coord); */
   LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, -0.5);
   LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
   LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
   return ima;
}
Пример #2
0
/**
 * Generate code to do cube face selection and compute per-face texcoords.
 */
void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
                     LLVMValueRef s,
                     LLVMValueRef t,
                     LLVMValueRef r,
                     LLVMValueRef *face,
                     LLVMValueRef *face_s,
                     LLVMValueRef *face_t)
{
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *coord_bld = &bld->coord_bld;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMValueRef rx, ry, rz;
   LLVMValueRef arx, ary, arz;
   LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25);
   LLVMValueRef arx_ge_ary, arx_ge_arz;
   LLVMValueRef ary_ge_arx, ary_ge_arz;
   LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;

   assert(bld->coord_bld.type.length == 4);

   /*
    * Use the average of the four pixel's texcoords to choose the face.
    */
   rx = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, s));
   ry = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, t));
   rz = lp_build_mul(float_bld, c25,
                     lp_build_sum_vector(&bld->coord_bld, r));

   arx = lp_build_abs(float_bld, rx);
   ary = lp_build_abs(float_bld, ry);
   arz = lp_build_abs(float_bld, rz);

   /*
    * Compare sign/magnitude of rx,ry,rz to determine face
    */
   arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, "");
   arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, "");
   ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, "");
   ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, "");

   arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, "");
   ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, "");

   {
      struct lp_build_if_state if_ctx;
      LLVMValueRef face_s_var;
      LLVMValueRef face_t_var;
      LLVMValueRef face_var;

      face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var");
      face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var");
      face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var");

      lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz);
      {
         /* +/- X face */
         LLVMValueRef sign = lp_build_sgn(float_bld, rx);
         LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
         *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
         *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
         *face = lp_build_cube_face(bld, rx,
                                    PIPE_TEX_FACE_POS_X,
                                    PIPE_TEX_FACE_NEG_X);
         LLVMBuildStore(builder, *face_s, face_s_var);
         LLVMBuildStore(builder, *face_t, face_t_var);
         LLVMBuildStore(builder, *face, face_var);
      }
      lp_build_else(&if_ctx);
      {
         struct lp_build_if_state if_ctx2;

         lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz);
         {
            /* +/- Y face */
            LLVMValueRef sign = lp_build_sgn(float_bld, ry);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
            *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
            *face = lp_build_cube_face(bld, ry,
                                       PIPE_TEX_FACE_POS_Y,
                                       PIPE_TEX_FACE_NEG_Y);
            LLVMBuildStore(builder, *face_s, face_s_var);
            LLVMBuildStore(builder, *face_t, face_t_var);
            LLVMBuildStore(builder, *face, face_var);
         }
         lp_build_else(&if_ctx2);
         {
            /* +/- Z face */
            LLVMValueRef sign = lp_build_sgn(float_bld, rz);
            LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
            *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
            *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
            *face = lp_build_cube_face(bld, rz,
                                       PIPE_TEX_FACE_POS_Z,
                                       PIPE_TEX_FACE_NEG_Z);
            LLVMBuildStore(builder, *face_s, face_s_var);
            LLVMBuildStore(builder, *face_t, face_t_var);
            LLVMBuildStore(builder, *face, face_var);
         }
         lp_build_endif(&if_ctx2);
      }

      lp_build_endif(&if_ctx);

      *face_s = LLVMBuildLoad(builder, face_s_var, "face_s");
      *face_t = LLVMBuildLoad(builder, face_t_var, "face_t");
      *face   = LLVMBuildLoad(builder, face_var, "face");
   }
}
/**
 * Emit LLVM for one TGSI instruction.
 * \param return TRUE for success, FALSE otherwise
 */
static boolean
emit_instruction(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   const struct tgsi_opcode_info *info,
   int *pc)
{
   LLVMValueRef src0, src1, src2;
   LLVMValueRef tmp0, tmp1;
   LLVMValueRef dst0;

   /*
    * Stores and write masks are handled in a general fashion after the long
    * instruction opcode switch statement.
    *
    * Although not stricitly necessary, we avoid generating instructions for
    * channels which won't be stored, in cases where's that easy. For some
    * complex instructions, like texture sampling, it is more convenient to
    * assume a full writemask and then let LLVM optimization passes eliminate
    * redundant code.
    */

   (*pc)++;

   assert(info->num_dst <= 1);
   if (info->num_dst) {
      dst0 = bld->base.undef;
   }

   switch (inst->Instruction.Opcode) {
   case TGSI_OPCODE_ARL:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_floor(&bld->base, src0);
      break;

   case TGSI_OPCODE_MOV:
      dst0 = emit_fetch(bld, inst, 0);
      break;

   case TGSI_OPCODE_LIT:
      return FALSE;

   case TGSI_OPCODE_RCP:
   /* TGSI_OPCODE_RECIP */
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_rcp(&bld->base, src0);
      break;

   case TGSI_OPCODE_RSQ:
   /* TGSI_OPCODE_RECIPSQRT */
      src0 = emit_fetch(bld, inst, 0);
      tmp0 = lp_build_abs(&bld->base, src0);
      dst0 = lp_build_rsqrt(&bld->base, tmp0);
      break;

   case TGSI_OPCODE_EXP:
      return FALSE;

   case TGSI_OPCODE_LOG:
      return FALSE;

   case TGSI_OPCODE_MUL:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      dst0 = lp_build_mul(&bld->base, src0, src1);
      break;

   case TGSI_OPCODE_ADD:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      dst0 = lp_build_add(&bld->base, src0, src1);
      break;

   case TGSI_OPCODE_DP3:
   /* TGSI_OPCODE_DOT3 */
      return FALSE;

   case TGSI_OPCODE_DP4:
   /* TGSI_OPCODE_DOT4 */
      return FALSE;

   case TGSI_OPCODE_DST:
      return FALSE;

   case TGSI_OPCODE_MIN:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      dst0 = lp_build_max(&bld->base, src0, src1);
      break;

   case TGSI_OPCODE_MAX:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      dst0 = lp_build_max(&bld->base, src0, src1);
      break;

   case TGSI_OPCODE_SLT:
   /* TGSI_OPCODE_SETLT */
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1);
      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
      break;

   case TGSI_OPCODE_SGE:
   /* TGSI_OPCODE_SETGE */
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
      break;

   case TGSI_OPCODE_MAD:
   /* TGSI_OPCODE_MADD */
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      src2 = emit_fetch(bld, inst, 2);
      tmp0 = lp_build_mul(&bld->base, src0, src1);
      dst0 = lp_build_add(&bld->base, tmp0, src2);
      break;

   case TGSI_OPCODE_SUB:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      dst0 = lp_build_sub(&bld->base, src0, src1);
      break;

   case TGSI_OPCODE_LRP:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      src2 = emit_fetch(bld, inst, 2);
      tmp0 = lp_build_sub(&bld->base, src1, src2);
      tmp0 = lp_build_mul(&bld->base, src0, tmp0);
      dst0 = lp_build_add(&bld->base, tmp0, src2);
      break;

   case TGSI_OPCODE_CND:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      src2 = emit_fetch(bld, inst, 2);
      tmp1 = lp_build_const_vec(bld->base.type, 0.5);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1);
      dst0 = lp_build_select(&bld->base, tmp0, src0, src1);
      break;

   case TGSI_OPCODE_DP2A:
      return FALSE;

   case TGSI_OPCODE_FRC:
      src0 = emit_fetch(bld, inst, 0);
      tmp0 = lp_build_floor(&bld->base, src0);
      dst0 = lp_build_sub(&bld->base, src0, tmp0);
      break;

   case TGSI_OPCODE_CLAMP:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      src2 = emit_fetch(bld, inst, 2);
      tmp0 = lp_build_max(&bld->base, src0, src1);
      dst0 = lp_build_min(&bld->base, tmp0, src2);
      break;

   case TGSI_OPCODE_FLR:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_floor(&bld->base, src0);
      break;

   case TGSI_OPCODE_ROUND:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_round(&bld->base, src0);
      break;

   case TGSI_OPCODE_EX2:
      src0 = emit_fetch(bld, inst, 0);
      tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_exp2(&bld->base, tmp0);
      break;

   case TGSI_OPCODE_LG2:
      src0 = emit_fetch(bld, inst, 0);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_log2(&bld->base, tmp0);
      break;

   case TGSI_OPCODE_POW:
      src0 = emit_fetch(bld, inst, 0);
      src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      src1 = emit_fetch(bld, inst, 1);
      src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X);
      dst0 = lp_build_pow(&bld->base, src0, src1);
      break;

   case TGSI_OPCODE_XPD:
      return FALSE;

   case TGSI_OPCODE_ABS:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_abs(&bld->base, src0);
      break;

   case TGSI_OPCODE_RCC:
      /* deprecated? */
      assert(0);
      return FALSE;

   case TGSI_OPCODE_DPH:
      return FALSE;

   case TGSI_OPCODE_COS:
      src0 = emit_fetch(bld, inst, 0);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_cos(&bld->base, tmp0);
      break;

   case TGSI_OPCODE_DDX:
      return FALSE;

   case TGSI_OPCODE_DDY:
      return FALSE;

   case TGSI_OPCODE_KILP:
      /* predicated kill */
      return FALSE;

   case TGSI_OPCODE_KIL:
      /* conditional kill */
      return FALSE;

   case TGSI_OPCODE_PK2H:
      return FALSE;
      break;

   case TGSI_OPCODE_PK2US:
      return FALSE;
      break;

   case TGSI_OPCODE_PK4B:
      return FALSE;
      break;

   case TGSI_OPCODE_PK4UB:
      return FALSE;

   case TGSI_OPCODE_RFL:
      return FALSE;

   case TGSI_OPCODE_SEQ:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1);
      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
      break;

   case TGSI_OPCODE_SFL:
      dst0 = bld->base.zero;
      break;

   case TGSI_OPCODE_SGT:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1);
      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
      break;

   case TGSI_OPCODE_SIN:
      src0 = emit_fetch(bld, inst, 0);
      tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X);
      dst0 = lp_build_sin(&bld->base, tmp0);
      break;

   case TGSI_OPCODE_SLE:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
      break;

   case TGSI_OPCODE_SNE:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1);
      dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero);
      break;

   case TGSI_OPCODE_STR:
      dst0 = bld->base.one;
      break;

   case TGSI_OPCODE_TEX:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
      break;

   case TGSI_OPCODE_TXD:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV);
      break;

   case TGSI_OPCODE_UP2H:
      /* deprecated */
      assert (0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP2US:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP4B:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_UP4UB:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_X2D:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ARA:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ARR:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_round(&bld->base, src0);
      break;

   case TGSI_OPCODE_BRA:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CAL:
      return FALSE;

   case TGSI_OPCODE_RET:
      return FALSE;

   case TGSI_OPCODE_END:
      *pc = -1;
      break;

   case TGSI_OPCODE_SSG:
   /* TGSI_OPCODE_SGN */
      tmp0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_sgn(&bld->base, tmp0);
      break;

   case TGSI_OPCODE_CMP:
      src0 = emit_fetch(bld, inst, 0);
      src1 = emit_fetch(bld, inst, 1);
      src2 = emit_fetch(bld, inst, 2);
      tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero);
      dst0 = lp_build_select(&bld->base, tmp0, src1, src2);
      break;

   case TGSI_OPCODE_SCS:
      return FALSE;

   case TGSI_OPCODE_TXB:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
      break;

   case TGSI_OPCODE_NRM:
      /* fall-through */
   case TGSI_OPCODE_NRM4:
      return FALSE;

   case TGSI_OPCODE_DIV:
      /* deprecated */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_DP2:
      return FALSE;

   case TGSI_OPCODE_TXL:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD);
      break;

   case TGSI_OPCODE_TXP:
      dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED);
      break;

   case TGSI_OPCODE_BRK:
      return FALSE;

   case TGSI_OPCODE_IF:
      return FALSE;

   case TGSI_OPCODE_BGNLOOP:
      return FALSE;

   case TGSI_OPCODE_BGNSUB:
      return FALSE;

   case TGSI_OPCODE_ELSE:
      return FALSE;

   case TGSI_OPCODE_ENDIF:
      return FALSE;

   case TGSI_OPCODE_ENDLOOP:
      return FALSE;

   case TGSI_OPCODE_ENDSUB:
      return FALSE;

   case TGSI_OPCODE_PUSHA:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_POPA:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CEIL:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_ceil(&bld->base, src0);
      break;

   case TGSI_OPCODE_I2F:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_NOT:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TRUNC:
      src0 = emit_fetch(bld, inst, 0);
      dst0 = lp_build_trunc(&bld->base, src0);
      break;

   case TGSI_OPCODE_SHL:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_ISHR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_AND:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_OR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_MOD:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_XOR:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_SAD:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TXF:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_TXQ:
      /* deprecated? */
      assert(0);
      return FALSE;
      break;

   case TGSI_OPCODE_CONT:
      return FALSE;

   case TGSI_OPCODE_EMIT:
      return FALSE;
      break;

   case TGSI_OPCODE_ENDPRIM:
      return FALSE;
      break;

   case TGSI_OPCODE_NOP:
      break;

   default:
      return FALSE;
   }
   
   if (info->num_dst) {
      emit_store(bld, inst, 0, dst0);
   }

   return TRUE;
}
Пример #4
0
/**
 * Generate code to compute coordinate gradient (rho).
 * \param ddx  partial derivatives of (s, t, r, q) with respect to X
 * \param ddy  partial derivatives of (s, t, r, q) with respect to Y
 *
 * XXX: The resulting rho is scalar, so we ignore all but the first element of
 * derivatives that are passed by the shader.
 */
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
             unsigned unit,
             const LLVMValueRef ddx[4],
             const LLVMValueRef ddy[4])
{
   struct lp_build_context *int_size_bld = &bld->int_size_bld;
   struct lp_build_context *float_size_bld = &bld->float_size_bld;
   struct lp_build_context *float_bld = &bld->float_bld;
   const unsigned dims = bld->dims;
   LLVMBuilderRef builder = bld->gallivm->builder;
   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
   LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
   LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
   LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
   LLVMValueRef rho_x, rho_y;
   LLVMValueRef rho_vec;
   LLVMValueRef int_size, float_size;
   LLVMValueRef rho;
   LLVMValueRef first_level, first_level_vec;

   dsdx = ddx[0];
   dsdy = ddy[0];

   if (dims <= 1) {
      rho_x = dsdx;
      rho_y = dsdy;
   }
   else {
      rho_x = float_size_bld->undef;
      rho_y = float_size_bld->undef;

      rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, "");
      rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, "");

      dtdx = ddx[1];
      dtdy = ddy[1];

      rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, "");
      rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, "");

      if (dims >= 3) {
         drdx = ddx[2];
         drdy = ddy[2];

         rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, "");
         rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, "");
      }
   }

   rho_x = lp_build_abs(float_size_bld, rho_x);
   rho_y = lp_build_abs(float_size_bld, rho_y);

   rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);

   first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                 bld->gallivm, unit);
   first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
   int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
   float_size = lp_build_int_to_float(float_size_bld, int_size);

   rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);

   if (dims <= 1) {
      rho = rho_vec;
   }
   else {
      if (dims >= 2) {
         LLVMValueRef rho_s, rho_t, rho_r;

         rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
         rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");

         rho = lp_build_max(float_bld, rho_s, rho_t);
         if (dims >= 3) {
            rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
            rho = lp_build_max(float_bld, rho, rho_r);
         }
      }
   }

   return rho;
}
/**
 * Register fetch.
 */
static LLVMValueRef
emit_fetch(
   struct lp_build_tgsi_aos_context *bld,
   const struct tgsi_full_instruction *inst,
   unsigned src_op)
{
   struct lp_type type = bld->base.type;
   const struct tgsi_full_src_register *reg = &inst->Src[src_op];
   LLVMValueRef res;
   unsigned chan;

   assert(!reg->Register.Indirect);

   /*
    * Fetch the from the register file.
    */

   switch (reg->Register.File) {
   case TGSI_FILE_CONSTANT:
      /*
       * Get the constants components
       */

      res = bld->base.undef;
      for (chan = 0; chan < 4; ++chan) {
         LLVMValueRef index;
         LLVMValueRef scalar_ptr;
         LLVMValueRef scalar;
         LLVMValueRef swizzle;

         index = LLVMConstInt(LLVMInt32Type(),
                              reg->Register.Index*4 + chan,
                              0);

         scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr,
                                   &index, 1, "");

         scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");

         lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]);

         /*
          * NOTE: constants array is always assumed to be RGBA
          */

         swizzle = LLVMConstInt(LLVMInt32Type(), chan, 0);

         res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, "");
      }

      /*
       * Broadcast the first quaternion to all others.
       *
       * XXX: could be factored into a reusable function.
       */

      if (type.length > 4) {
         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
         unsigned i;

         for (chan = 0; chan < 4; ++chan) {
            shuffles[chan] = LLVMConstInt(LLVMInt32Type(), chan, 0);
         }

         for (i = 4; i < type.length; ++i) {
            shuffles[i] = shuffles[i % 4];
         }

         res = LLVMBuildShuffleVector(bld->base.builder,
                                      res, bld->base.undef,
                                      LLVMConstVector(shuffles, type.length),
                                      "");
      }
      break;

   case TGSI_FILE_IMMEDIATE:
      res = bld->immediates[reg->Register.Index];
      assert(res);
      break;

   case TGSI_FILE_INPUT:
      res = bld->inputs[reg->Register.Index];
      assert(res);
      break;

   case TGSI_FILE_TEMPORARY:
      {
         LLVMValueRef temp_ptr;
         temp_ptr = bld->temps[reg->Register.Index];
         res = LLVMBuildLoad(bld->base.builder, temp_ptr, "");
         if (!res)
            return bld->base.undef;
      }
      break;

   default:
      assert(0 && "invalid src register in emit_fetch()");
      return bld->base.undef;
   }

   /*
    * Apply sign modifier.
    */

   if (reg->Register.Absolute) {
      res = lp_build_abs(&bld->base, res);
   }

   if(reg->Register.Negate) {
      res = lp_build_negate(&bld->base, res);
   }

   /*
    * Swizzle the argument
    */

   res = swizzle_aos(bld, res,
                     reg->Register.SwizzleX,
                     reg->Register.SwizzleY,
                     reg->Register.SwizzleZ,
                     reg->Register.SwizzleW);

   return res;
}