void lp_emit_declaration_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_declaration *decl) { struct gallivm_state *gallivm = bld->bld_base.base.gallivm; LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx; for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_INLINED_TEMPS); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, vec_type, array_size, ""); } else { bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); } break; case TGSI_FILE_OUTPUT: bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); break; case TGSI_FILE_SAMPLER_VIEW: /* * The target stored here MUST match whatever there actually * is in the set sampler views (what about return type?). */ assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); for (idx = first; idx <= last; ++idx) { bld->sv[idx] = decl->SamplerView; } break; default: /* don't need to declare other vars */ break; } } }
static void emit_declaration( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_declaration *decl) { LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); unsigned first = decl->Range.First; unsigned last = decl->Range.Last; unsigned idx; for (idx = first; idx <= last; ++idx) { switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: assert(idx < LP_MAX_TGSI_TEMPS); if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), last + 1, 0); bld->temps_array = lp_build_array_alloca(bld->base.builder, vec_type, array_size, ""); } else { bld->temps[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); } break; case TGSI_FILE_OUTPUT: bld->outputs[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); break; case TGSI_FILE_ADDRESS: assert(idx < LP_MAX_TGSI_ADDRS); bld->addr[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); break; case TGSI_FILE_PREDICATE: assert(idx < LP_MAX_TGSI_PREDS); bld->preds[idx] = lp_build_alloca(bld->base.builder, vec_type, ""); break; default: /* don't need to declare other vars */ break; } } }
/** * Creates a c-style for loop, * contrasts lp_build_loop as this checks condition on entry * e.g. for(i = start; i cmp_op end; i += step) * \param state the for loop state, initialized here * \param gallivm the gallivm state * \param start starting value of iterator * \param cmp_op comparison operator used for comparing current value with end value * \param end value used to compare against iterator * \param step value added to iterator at end of each loop */ void lp_build_for_loop_begin(struct lp_build_for_loop_state *state, struct gallivm_state *gallivm, LLVMValueRef start, LLVMIntPredicate cmp_op, LLVMValueRef end, LLVMValueRef step) { LLVMBuilderRef builder = gallivm->builder; assert(LLVMTypeOf(start) == LLVMTypeOf(end)); assert(LLVMTypeOf(start) == LLVMTypeOf(step)); state->begin = lp_build_insert_new_block(gallivm, "loop_begin"); state->step = step; state->counter_var = lp_build_alloca(gallivm, LLVMTypeOf(start), "loop_counter"); state->gallivm = gallivm; state->cond = cmp_op; state->end = end; LLVMBuildStore(builder, start, state->counter_var); LLVMBuildBr(builder, state->begin); LLVMPositionBuilderAtEnd(builder, state->begin); state->counter = LLVMBuildLoad(builder, state->counter_var, ""); state->body = lp_build_insert_new_block(gallivm, "loop_body"); LLVMPositionBuilderAtEnd(builder, state->body); }
/** * Begin a section of code which is predicated on a mask. * \param mask the mask context, initialized here * \param flow the flow context * \param type the type of the mask * \param value storage for the mask */ void lp_build_mask_begin(struct lp_build_mask_context *mask, struct gallivm_state *gallivm, struct lp_type type, LLVMValueRef value) { memset(mask, 0, sizeof *mask); mask->reg_type = LLVMIntTypeInContext(gallivm->context, type.width * type.length); mask->var = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, type), "execution_mask"); LLVMBuildStore(gallivm->builder, value, mask->var); lp_build_flow_skip_begin(&mask->skip, gallivm); }
void lp_build_loop_begin(struct lp_build_loop_state *state, struct gallivm_state *gallivm, LLVMValueRef start) { LLVMBuilderRef builder = gallivm->builder; state->block = lp_build_insert_new_block(gallivm, "loop_begin"); state->counter_var = lp_build_alloca(gallivm, LLVMTypeOf(start), "loop_counter"); state->gallivm = gallivm; LLVMBuildStore(builder, start, state->counter_var); LLVMBuildBr(builder, state->block); LLVMPositionBuilderAtEnd(builder, state->block); state->counter = LLVMBuildLoad(builder, state->counter_var, ""); }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
/** * Generate code to do cube face selection and compute per-face texcoords. */ void lp_build_cube_lookup(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, LLVMValueRef *face, LLVMValueRef *face_s, LLVMValueRef *face_t) { struct lp_build_context *float_bld = &bld->float_bld; struct lp_build_context *coord_bld = &bld->coord_bld; LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef rx, ry, rz; LLVMValueRef arx, ary, arz; LLVMValueRef c25 = lp_build_const_float(bld->gallivm, 0.25); LLVMValueRef arx_ge_ary, arx_ge_arz; LLVMValueRef ary_ge_arx, ary_ge_arz; LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; assert(bld->coord_bld.type.length == 4); /* * Use the average of the four pixel's texcoords to choose the face. */ rx = lp_build_mul(float_bld, c25, lp_build_sum_vector(&bld->coord_bld, s)); ry = lp_build_mul(float_bld, c25, lp_build_sum_vector(&bld->coord_bld, t)); rz = lp_build_mul(float_bld, c25, lp_build_sum_vector(&bld->coord_bld, r)); arx = lp_build_abs(float_bld, rx); ary = lp_build_abs(float_bld, ry); arz = lp_build_abs(float_bld, rz); /* * Compare sign/magnitude of rx,ry,rz to determine face */ arx_ge_ary = LLVMBuildFCmp(builder, LLVMRealUGE, arx, ary, ""); arx_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, arx, arz, ""); ary_ge_arx = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arx, ""); ary_ge_arz = LLVMBuildFCmp(builder, LLVMRealUGE, ary, arz, ""); arx_ge_ary_arz = LLVMBuildAnd(builder, arx_ge_ary, arx_ge_arz, ""); ary_ge_arx_arz = LLVMBuildAnd(builder, ary_ge_arx, ary_ge_arz, ""); { struct lp_build_if_state if_ctx; LLVMValueRef face_s_var; LLVMValueRef face_t_var; LLVMValueRef face_var; face_s_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_s_var"); face_t_var = lp_build_alloca(bld->gallivm, bld->coord_bld.vec_type, "face_t_var"); face_var = lp_build_alloca(bld->gallivm, bld->int_bld.vec_type, "face_var"); lp_build_if(&if_ctx, bld->gallivm, arx_ge_ary_arz); { /* +/- X face */ LLVMValueRef sign = lp_build_sgn(float_bld, rx); LLVMValueRef ima = lp_build_cube_ima(coord_bld, s); *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); *face = lp_build_cube_face(bld, rx, PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X); LLVMBuildStore(builder, *face_s, face_s_var); LLVMBuildStore(builder, *face_t, face_t_var); LLVMBuildStore(builder, *face, face_var); } lp_build_else(&if_ctx); { struct lp_build_if_state if_ctx2; lp_build_if(&if_ctx2, bld->gallivm, ary_ge_arx_arz); { /* +/- Y face */ LLVMValueRef sign = lp_build_sgn(float_bld, ry); LLVMValueRef ima = lp_build_cube_ima(coord_bld, t); *face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); *face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); *face = lp_build_cube_face(bld, ry, PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y); LLVMBuildStore(builder, *face_s, face_s_var); LLVMBuildStore(builder, *face_t, face_t_var); LLVMBuildStore(builder, *face, face_var); } lp_build_else(&if_ctx2); { /* +/- Z face */ LLVMValueRef sign = lp_build_sgn(float_bld, rz); LLVMValueRef ima = lp_build_cube_ima(coord_bld, r); *face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); *face = lp_build_cube_face(bld, rz, PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z); LLVMBuildStore(builder, *face_s, face_s_var); LLVMBuildStore(builder, *face_t, face_t_var); LLVMBuildStore(builder, *face, face_var); } lp_build_endif(&if_ctx2); } lp_build_endif(&if_ctx); *face_s = LLVMBuildLoad(builder, face_s_var, "face_s"); *face_t = LLVMBuildLoad(builder, face_t_var, "face_t"); *face = LLVMBuildLoad(builder, face_var, "face"); } }
/** * Initialize the bld->a, dadq fields. This involves fetching * those values from the arrays which are passed into the JIT function. */ static void coeffs_init(struct lp_build_interp_soa_context *bld, LLVMValueRef a0_ptr, LLVMValueRef dadx_ptr, LLVMValueRef dady_ptr) { struct lp_build_context *coeff_bld = &bld->coeff_bld; struct lp_build_context *setup_bld = &bld->setup_bld; struct gallivm_state *gallivm = coeff_bld->gallivm; LLVMBuilderRef builder = gallivm->builder; LLVMValueRef pixoffx, pixoffy; unsigned attrib; unsigned chan; unsigned i; pixoffx = coeff_bld->undef; pixoffy = coeff_bld->undef; for (i = 0; i < coeff_bld->type.length; i++) { LLVMValueRef nr = lp_build_const_int32(gallivm, i); LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); } for (attrib = 0; attrib < bld->num_attribs; ++attrib) { const unsigned mask = bld->mask[attrib]; const unsigned interp = bld->interp[attrib]; LLVMValueRef index = lp_build_const_int32(gallivm, attrib * TGSI_NUM_CHANNELS); LLVMValueRef ptr; LLVMValueRef dadxaos = setup_bld->zero; LLVMValueRef dadyaos = setup_bld->zero; LLVMValueRef a0aos = setup_bld->zero; /* always fetch all 4 values for performance/simplicity */ switch (interp) { case LP_INTERP_PERSPECTIVE: /* fall-through */ case LP_INTERP_LINEAR: ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadxaos = LLVMBuildLoad(builder, ptr, ""); ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); dadyaos = LLVMBuildLoad(builder, ptr, ""); attrib_name(dadxaos, attrib, 0, ".dadxaos"); attrib_name(dadyaos, attrib, 0, ".dadyaos"); /* fall-through */ case LP_INTERP_CONSTANT: case LP_INTERP_FACING: ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(setup_bld->vec_type, 0), ""); a0aos = LLVMBuildLoad(builder, ptr, ""); attrib_name(a0aos, attrib, 0, ".a0aos"); break; case LP_INTERP_POSITION: /* Nothing to do as the position coeffs are already setup in slot 0 */ continue; default: assert(0); break; } /* * a = a0 + (x * dadx + y * dady) * a0aos is the attrib value at top left corner of stamp */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { LLVMValueRef x = lp_build_broadcast_scalar(setup_bld, bld->x); LLVMValueRef y = lp_build_broadcast_scalar(setup_bld, bld->y); a0aos = lp_build_fmuladd(builder, x, dadxaos, a0aos); a0aos = lp_build_fmuladd(builder, y, dadyaos, a0aos); } /* * dadq = {0, dadx, dady, dadx + dady} * for two quads (side by side) this is: * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} */ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { /* this generates a CRAPLOAD of shuffles... */ if (mask & (1 << chan)) { LLVMValueRef dadx, dady; LLVMValueRef dadq, dadq2; LLVMValueRef a; LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); if (attrib == 0 && chan == 0) { a = bld->x; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dadx = coeff_bld->one; dady = coeff_bld->zero; } else if (attrib == 0 && chan == 1) { a = bld->y; if (bld->pos_offset) { a = LLVMBuildFAdd(builder, a, lp_build_const_float(gallivm, bld->pos_offset), ""); } a = lp_build_broadcast_scalar(coeff_bld, a); dady = coeff_bld->one; dadx = coeff_bld->zero; } else { dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadxaos, chan_index); dady = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, dadyaos, chan_index); /* * a = {a, a, a, a} */ a = lp_build_extract_broadcast(gallivm, setup_bld->type, coeff_bld->type, a0aos, chan_index); } dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); dady = LLVMBuildFMul(builder, dady, pixoffy, ""); dadq = LLVMBuildFAdd(builder, dadx, dady, ""); /* * Compute the attrib values on the upper-left corner of each * group of quads. * Note that if we process 2 quads at once this doesn't * really exactly to what we want. * We need to access elem 0 and 2 respectively later if we process * 2 quads at once. */ if (interp != LP_INTERP_CONSTANT && interp != LP_INTERP_FACING) { dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); a = LLVMBuildFAdd(builder, a, dadq2, ""); } #if PERSPECTIVE_DIVIDE_PER_QUAD /* * a *= 1 / w */ /* * XXX since we're only going to access elements 0,2 out of 8 * if we have 8-wide vectors we should do the division only 4-wide. * a is really a 2-elements in a 4-wide vector disguised as 8-wide * in this case. */ if (interp == LP_INTERP_PERSPECTIVE) { LLVMValueRef w = bld->a[0][3]; assert(attrib != 0); assert(bld->mask[0] & TGSI_WRITEMASK_W); if (!bld->oow) { bld->oow = lp_build_rcp(coeff_bld, w); lp_build_name(bld->oow, "oow"); } a = lp_build_mul(coeff_bld, a, bld->oow); } #endif attrib_name(a, attrib, chan, ".a"); attrib_name(dadq, attrib, chan, ".dadq"); bld->a[attrib][chan] = lp_build_alloca(gallivm, LLVMTypeOf(a), ""); LLVMBuildStore(builder, a, bld->a[attrib][chan]); bld->dadq[attrib][chan] = dadq; } } } }
static void emit_declaration( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_declaration *decl) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); switch(decl->Declaration.File) { case TGSI_FILE_ADDRESS: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { ctx->soa.addr[idx][chan] = lp_build_alloca( &ctx->gallivm, ctx->soa.bld_base.uint_bld.elem_type, ""); } } break; } case TGSI_FILE_TEMPORARY: lp_emit_declaration_soa(bld_base, decl); break; case TGSI_FILE_INPUT: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { ctx->load_input(ctx, idx, decl); } } break; case TGSI_FILE_SYSTEM_VALUE: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { ctx->load_system_value(ctx, idx, decl); } } break; case TGSI_FILE_OUTPUT: { unsigned idx; for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { unsigned chan; assert(idx < RADEON_LLVM_MAX_OUTPUTS); for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { ctx->soa.outputs[idx][chan] = lp_build_alloca(&ctx->gallivm, ctx->soa.bld_base.base.elem_type, ""); } } ctx->output_reg_count = MAX2(ctx->output_reg_count, decl->Range.Last + 1); break; } default: break; } }
/** * Texture sampling in AoS format. Used when sampling common 32-bit/texel * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes * but only limited texture coord wrap modes. */ void lp_build_sample_aos(struct lp_build_sample_context *bld, unsigned unit, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, const LLVMValueRef *ddx, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ LLVMValueRef texel_out[4]) { struct lp_build_context *int_bld = &bld->int_bld; LLVMBuilderRef builder = bld->gallivm->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; const unsigned dims = bld->dims; LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; struct lp_build_context h16_bld; LLVMValueRef first_level; LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); if (dims >= 2) assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t)); if (dims >= 3) assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r)); /* make 16-bit fixed-pt builder context */ lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef face, face_s, face_t; lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t); s = face_s; /* vec */ t = face_t; /* vec */ /* use 'r' to indicate cube face */ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } /* * Compute the level of detail (float). */ if (min_filter != mag_filter || mip_filter != PIPE_TEX_MIPFILTER_NONE) { /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ lp_build_lod_selector(bld, unit, ddx, ddy, lod_bias, explicit_lod, mip_filter, &lod_ipart, &lod_fpart); } else { lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 */ switch (mip_filter) { default: assert(0 && "bad mip_filter value in lp_build_sample_aos()"); /* fall-through */ case PIPE_TEX_MIPFILTER_NONE: /* always use mip level 0 */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { /* XXX this is a work-around for an apparent bug in LLVM 2.7. * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ assert(lod_ipart); lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { first_level = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, unit); ilevel0 = first_level; } break; case PIPE_TEX_MIPFILTER_NEAREST: assert(lod_ipart); lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: assert(lod_ipart); assert(lod_fpart); lp_build_linear_mip_levels(bld, unit, lod_ipart, &lod_fpart, &ilevel0, &ilevel1); break; } /* * Get/interpolate texture colors. */ packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ struct lp_build_if_state if_ctx; LLVMValueRef minify; /* minify = lod >= 0.0 */ minify = LLVMBuildICmp(builder, LLVMIntSGE, lod_ipart, int_bld->zero, ""); lp_build_if(&if_ctx, bld->gallivm, minify); { /* Use the minification filter */ lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, s, t, r, ilevel0, NULL, NULL, packed_lo, packed_hi); } lp_build_endif(&if_ctx); } /* * combine the values stored in 'packed_lo' and 'packed_hi' variables * into 'packed' */ packed = lp_build_pack2(bld->gallivm, h16_bld.type, lp_type_unorm(8), LLVMBuildLoad(builder, packed_lo, ""), LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. */ lp_build_rgba8_to_f32_soa(bld->gallivm, bld->texel_type, packed, unswizzled); if (util_format_is_rgba8_variant(bld->format_desc)) { lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, unswizzled, texel_out); } else { texel_out[0] = unswizzled[0]; texel_out[1] = unswizzled[1]; texel_out[2] = unswizzled[2]; texel_out[3] = unswizzled[3]; } }