static void emit_ucmp(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0], bld_base->uint_bld.elem_type, ""); LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0, bld_base->uint_bld.zero, ""); emit_data->output[emit_data->chan] = LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], ""); }
/** * Return (a & b) */ LLVMValueRef lp_build_and(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; LLVMValueRef res; assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); /* can't do bitwise ops on floating-point values */ if (type.floating) { a = LLVMBuildBitCast(builder, a, bld->int_vec_type, ""); b = LLVMBuildBitCast(builder, b, bld->int_vec_type, ""); } res = LLVMBuildAnd(builder, a, b, ""); if (type.floating) { res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } return res; }
static void trace_known(compile_t* c, LLVMValueRef ctx, LLVMValueRef object, ast_t* type, bool immutable) { reachable_type_t* t = reach_type(c->reachable, type); // If this type has no trace function, don't try to recurse in the runtime. if(t->trace_fn != NULL) { // Cast the object to an object pointer. LLVMValueRef args[4]; args[0] = ctx; args[1] = LLVMBuildBitCast(c->builder, object, c->object_ptr, ""); args[2] = t->trace_fn; args[3] = LLVMConstInt(c->i32, immutable, false); gencall_runtime(c, "pony_traceobject", args, 4, ""); } else { // Cast the object to a void pointer. LLVMValueRef args[2]; args[0] = ctx; args[1] = LLVMBuildBitCast(c->builder, object, c->void_ptr, ""); gencall_runtime(c, "pony_trace", args, 2, ""); } }
/** * Perform the occlusion test and increase the counter. * Test the depth mask. Add the number of channel which has none zero mask * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. * The counter will add 4. * * \param type holds element type of the mask vector. * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ static void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, LLVMValueRef counter) { LLVMValueRef countmask = lp_build_const_int_vec(type, 1); LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16); LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); LLVMValueRef maskarray[4] = { LLVMConstInt(LLVMInt32Type(), 0, 0), LLVMConstInt(LLVMInt32Type(), 4, 0), LLVMConstInt(LLVMInt32Type(), 8, 0), LLVMConstInt(LLVMInt32Type(), 12, 0), }; LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle"); LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle); LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); LLVMBuildStore(builder, incr, counter); }
static void trace_array_elements(compile_t* c, reach_type_t* t, LLVMValueRef ctx, LLVMValueRef object, LLVMValueRef pointer) { // Get the type argument for the array. This will be used to generate the // per-element trace call. ast_t* typeargs = ast_childidx(t->ast, 2); ast_t* typearg = ast_child(typeargs); if(!gentrace_needed(typearg)) return; reach_type_t* t_elem = reach_type(c->reach, typearg); pointer = LLVMBuildBitCast(c->builder, pointer, LLVMPointerType(t_elem->use_type, 0), ""); LLVMBasicBlockRef entry_block = LLVMGetInsertBlock(c->builder); LLVMBasicBlockRef cond_block = codegen_block(c, "cond"); LLVMBasicBlockRef body_block = codegen_block(c, "body"); LLVMBasicBlockRef post_block = codegen_block(c, "post"); // Read the size. LLVMValueRef size = field_value(c, object, 1); LLVMBuildBr(c->builder, cond_block); // While the index is less than the size, trace an element. The initial // index when coming from the entry block is zero. LLVMPositionBuilderAtEnd(c->builder, cond_block); LLVMValueRef phi = LLVMBuildPhi(c->builder, c->intptr, ""); LLVMValueRef zero = LLVMConstInt(c->intptr, 0, false); LLVMAddIncoming(phi, &zero, &entry_block, 1); LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntULT, phi, size, ""); LLVMBuildCondBr(c->builder, test, body_block, post_block); // The phi node is the index. Get the element and trace it. LLVMPositionBuilderAtEnd(c->builder, body_block); LLVMValueRef elem_ptr = LLVMBuildGEP(c->builder, pointer, &phi, 1, "elem"); LLVMValueRef elem = LLVMBuildLoad(c->builder, elem_ptr, ""); gentrace(c, ctx, elem, typearg); // Add one to the phi node and branch back to the cond block. LLVMValueRef one = LLVMConstInt(c->intptr, 1, false); LLVMValueRef inc = LLVMBuildAdd(c->builder, phi, one, ""); body_block = LLVMGetInsertBlock(c->builder); LLVMAddIncoming(phi, &inc, &body_block, 1); LLVMBuildBr(c->builder, cond_block); LLVMPositionBuilderAtEnd(c->builder, post_block); }
LLVMValueRef gencall_allocstruct(compile_t* c, gentype_t* g) { // Disable debug anchor dwarf_location(&c->dwarf, NULL); // We explicitly want a boxed version. // Get the size of the structure. size_t size = (size_t)LLVMABISizeOfType(c->target_data, g->structure); // Get the finaliser, if there is one. const char* final = genname_finalise(g->type_name); LLVMValueRef final_fun = LLVMGetNamedFunction(c->module, final); // Allocate the object. LLVMValueRef args[3]; args[0] = codegen_ctx(c); LLVMValueRef result; if(final_fun == NULL) { if(size <= HEAP_MAX) { uint32_t index = ponyint_heap_index(size); args[1] = LLVMConstInt(c->i32, index, false); result = gencall_runtime(c, "pony_alloc_small", args, 2, ""); } else { args[1] = LLVMConstInt(c->intptr, size, false); result = gencall_runtime(c, "pony_alloc_large", args, 2, ""); } } else { args[1] = LLVMConstInt(c->intptr, size, false); args[2] = LLVMConstBitCast(final_fun, c->final_fn); result = gencall_runtime(c, "pony_alloc_final", args, 3, ""); } result = LLVMBuildBitCast(c->builder, result, g->structure_ptr, ""); // Set the descriptor. if(g->underlying != TK_STRUCT) { LLVMValueRef desc_ptr = LLVMBuildStructGEP(c->builder, result, 0, ""); LLVMBuildStore(c->builder, g->desc, desc_ptr); } return result; }
static LLVMValueRef translateIdLval(SymbolTable *TyTable, SymbolTable *ValTable, ASTNode *Node) { Type *IdType = (Type*) symTableFind(ValTable, Node->Value); LLVMValueRef IdValue; if (IdType->EscapedLevel > 0) { LLVMTypeRef LLVMType = getLLVMTypeFromType(TyTable, IdType); LLVMValueRef EVPtr = getEscapedVar(ValTable, Node->Value, Node->EscapedLevel); LLVMValueRef EVLoad = LLVMBuildLoad(Builder, EVPtr, ""); IdValue = LLVMBuildBitCast(Builder, EVLoad, LLVMPointerType(LLVMType, 0), ""); } else { IdValue = resolveAliasId(ValTable, Node->Value, &toValName, &symTableFindLocal); } return IdValue; }
static void pointer_delete(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_delete"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = c->intptr; params[2] = c->intptr; start_function(c, m, t_elem->use_type, params, 3); // Set up a constant integer for the allocation size. size_t size = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, size, false); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef n = LLVMGetParam(m->func, 1); LLVMValueRef len = LLVMGetParam(m->func, 2); LLVMValueRef elem_ptr = LLVMBuildBitCast(c->builder, ptr, LLVMPointerType(t_elem->use_type, 0), ""); LLVMValueRef result = LLVMBuildLoad(c->builder, elem_ptr, ""); LLVMValueRef dst = LLVMBuildPtrToInt(c->builder, elem_ptr, c->intptr, ""); LLVMValueRef offset = LLVMBuildMul(c->builder, n, l_size, ""); LLVMValueRef src = LLVMBuildAdd(c->builder, dst, offset, ""); LLVMValueRef elen = LLVMBuildMul(c->builder, len, l_size, ""); LLVMValueRef args[5]; args[0] = LLVMBuildIntToPtr(c->builder, dst, c->void_ptr, ""); args[1] = LLVMBuildIntToPtr(c->builder, src, c->void_ptr, ""); args[2] = elen; args[3] = LLVMConstInt(c->i32, 1, false); args[4] = LLVMConstInt(c->i1, 0, false); // llvm.memmove.*(ptr, ptr + (n * sizeof(elem)), len * sizeof(elem)) if(target_is_ilp32(c->opt->triple)) { gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i32", args, 5, ""); } else { gencall_runtime(c, "llvm.memmove.p0i8.p0i8.i64", args, 5, ""); } // Return ptr[0]. LLVMBuildRet(c->builder, result); codegen_finishfun(c); }
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2]) { if (HAVE_LLVM >= 0x0500) { LLVMTypeRef v2f16 = LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2); LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16, args, 2, AC_FUNC_ATTR_READNONE); return LLVMBuildBitCast(ctx->builder, res, ctx->i32, ""); } return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_LEGACY); }
static LLVMValueRef rgb_to_rgba_aos(struct gallivm_state *gallivm, unsigned n, LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) { LLVMBuilderRef builder = gallivm->builder; struct lp_type type; LLVMValueRef a; LLVMValueRef rgba; memset(&type, 0, sizeof type); type.sign = TRUE; type.width = 32; type.length = n; assert(lp_check_value(type, r)); assert(lp_check_value(type, g)); assert(lp_check_value(type, b)); /* * Make a 4 x unorm8 vector */ #ifdef PIPE_ARCH_LITTLE_ENDIAN r = r; g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); a = lp_build_const_int_vec(gallivm, type, 0xff000000); #else r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), ""); g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), ""); b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), ""); a = lp_build_const_int_vec(gallivm, type, 0x000000ff); #endif rgba = r; rgba = LLVMBuildOr(builder, rgba, g, ""); rgba = LLVMBuildOr(builder, rgba, b, ""); rgba = LLVMBuildOr(builder, rgba, a, ""); rgba = LLVMBuildBitCast(builder, rgba, LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); return rgba; }
/** * lp_build_assert. * * Build an assertion in LLVM IR by building a function call to the * lp_assert() function above. * * \param condition should be an 'i1' or 'i32' value * \param msg a string to print if the assertion fails. */ LLVMValueRef lp_build_assert(LLVMBuilderRef builder, LLVMValueRef condition, const char *msg) { LLVMModuleRef module; LLVMTypeRef arg_types[2]; LLVMValueRef msg_string, assert_func, params[2], r; module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( LLVMGetInsertBlock(builder))); msg_string = lp_build_const_string_variable(module, msg, strlen(msg) + 1); arg_types[0] = LLVMInt32Type(); arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); /* lookup the lp_assert function */ assert_func = LLVMGetNamedFunction(module, "lp_assert"); /* Create the assertion function if not found */ if (!assert_func) { LLVMTypeRef func_type = LLVMFunctionType(LLVMVoidType(), arg_types, 2, 0); assert_func = LLVMAddFunction(module, "lp_assert", func_type); LLVMSetFunctionCallConv(assert_func, LLVMCCallConv); LLVMSetLinkage(assert_func, LLVMExternalLinkage); LLVMAddGlobalMapping(lp_build_engine, assert_func, func_to_pointer((func_pointer)lp_assert)); } assert(assert_func); /* build function call param list */ params[0] = LLVMBuildZExt(builder, condition, arg_types[0], ""); params[1] = LLVMBuildBitCast(builder, msg_string, arg_types[1], ""); /* check arg types */ assert(LLVMTypeOf(params[0]) == arg_types[0]); assert(LLVMTypeOf(params[1]) == arg_types[1]); r = LLVMBuildCall(builder, assert_func, params, 2, ""); return r; }
/** * Transpose from AOS <-> SOA * * @param single_type_lp type of pixels * @param src the 4 * n pixel input * @param dst the 4 * n pixel output */ void lp_build_transpose_aos(struct gallivm_state *gallivm, struct lp_type single_type_lp, const LLVMValueRef src[4], LLVMValueRef dst[4]) { struct lp_type double_type_lp = single_type_lp; LLVMTypeRef single_type; LLVMTypeRef double_type; LLVMValueRef t0, t1, t2, t3; double_type_lp.length >>= 1; double_type_lp.width <<= 1; double_type = lp_build_vec_type(gallivm, double_type_lp); single_type = lp_build_vec_type(gallivm, single_type_lp); /* Interleave x, y, z, w -> xy and zw */ t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0); t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0); t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1); t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1); /* Cast to double width type for second interleave */ t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0"); t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1"); t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2"); t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3"); /* Interleave xy, zw -> xyzw */ dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0); dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1); dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0); dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1); /* Cast back to original single width type */ dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0"); dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1"); dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2"); dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3"); }
LLVMValueRef gen_assign_cast(compile_t* c, LLVMTypeRef l_type, LLVMValueRef r_value, ast_t* type) { if(r_value <= GEN_NOVALUE) return r_value; LLVMTypeRef r_type = LLVMTypeOf(r_value); if(r_type == l_type) return r_value; switch(LLVMGetTypeKind(l_type)) { case LLVMIntegerTypeKind: case LLVMHalfTypeKind: case LLVMFloatTypeKind: case LLVMDoubleTypeKind: assert(LLVMGetTypeKind(r_type) == LLVMPointerTypeKind); return gen_unbox(c, type, r_value); case LLVMPointerTypeKind: r_value = gen_box(c, type, r_value); if(r_value == NULL) return NULL; return LLVMBuildBitCast(c->builder, r_value, l_type, ""); case LLVMStructTypeKind: if(LLVMGetTypeKind(r_type) == LLVMPointerTypeKind) { r_value = gen_unbox(c, type, r_value); assert(LLVMGetTypeKind(LLVMTypeOf(r_value)) == LLVMStructTypeKind); } return assign_to_tuple(c, l_type, r_value, type); default: {} } assert(0); return NULL; }
static LLVMValueRef cast_ffi_arg(compile_t* c, ffi_decl_t* decl, ast_t* ast, LLVMValueRef arg, LLVMTypeRef param, const char* name) { if(arg == NULL) return NULL; LLVMTypeRef arg_type = LLVMTypeOf(arg); if(param == arg_type) return arg; if((LLVMABISizeOfType(c->target_data, param) != LLVMABISizeOfType(c->target_data, arg_type))) { report_ffi_type_err(c, decl, ast, name); return NULL; } switch(LLVMGetTypeKind(param)) { case LLVMPointerTypeKind: if(LLVMGetTypeKind(arg_type) == LLVMIntegerTypeKind) return LLVMBuildIntToPtr(c->builder, arg, param, ""); else return LLVMBuildBitCast(c->builder, arg, param, ""); case LLVMIntegerTypeKind: if(LLVMGetTypeKind(arg_type) == LLVMPointerTypeKind) return LLVMBuildPtrToInt(c->builder, arg, param, ""); break; case LLVMStructTypeKind: pony_assert(LLVMGetTypeKind(arg_type) == LLVMStructTypeKind); return arg; default: {} } pony_assert(false); return NULL; }
void genprim_array_trace(compile_t* c, reach_type_t* t) { codegen_startfun(c, t->trace_fn, NULL, NULL); LLVMSetFunctionCallConv(t->trace_fn, LLVMCCallConv); LLVMValueRef ctx = LLVMGetParam(t->trace_fn, 0); LLVMValueRef arg = LLVMGetParam(t->trace_fn, 1); // Read the base pointer. LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->use_type, ""); LLVMValueRef pointer = field_value(c, object, 3); // Trace the base pointer. LLVMValueRef args[2]; args[0] = ctx; args[1] = pointer; gencall_runtime(c, "pony_trace", args, 2, ""); trace_array_elements(c, t, ctx, object, pointer); LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
void genprim_array_serialise_trace(compile_t* c, reach_type_t* t) { // Generate the serialise_trace function. t->serialise_trace_fn = codegen_addfun(c, genname_serialise_trace(t->name), c->trace_type); codegen_startfun(c, t->serialise_trace_fn, NULL, NULL); LLVMSetFunctionCallConv(t->serialise_trace_fn, LLVMCCallConv); LLVMSetLinkage(t->serialise_trace_fn, LLVMExternalLinkage); LLVMValueRef ctx = LLVMGetParam(t->serialise_trace_fn, 0); LLVMValueRef arg = LLVMGetParam(t->serialise_trace_fn, 1); LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->use_type, ""); // Read the size. LLVMValueRef size = field_value(c, object, 1); // Calculate the size of the element type. ast_t* typeargs = ast_childidx(t->ast, 2); ast_t* typearg = ast_child(typeargs); reach_type_t* t_elem = reach_type(c->reach, typearg); size_t abisize = (size_t)LLVMABISizeOfType(c->target_data, t_elem->use_type); LLVMValueRef l_size = LLVMConstInt(c->intptr, abisize, false); // Reserve space for the array elements. LLVMValueRef pointer = field_value(c, object, 3); LLVMValueRef args[3]; args[0] = ctx; args[1] = pointer; args[2] = LLVMBuildMul(c->builder, size, l_size, ""); gencall_runtime(c, "pony_serialise_reserve", args, 3, ""); // Trace the array elements. trace_array_elements(c, t, ctx, object, pointer); LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
LLVMValueRef gen_call(struct node *ast) { LLVMValueRef func, *arg_list = NULL; struct node *n; int arg_count, i; func = LLVMBuildBitCast(builder, rvalue_to_lvalue(codegen(ast->one)), LLVMPointerType(TYPE_FUNC, 0), ""); arg_count = count_chain(ast->two); arg_list = calloc(sizeof(LLVMValueRef), arg_count); if (arg_count > 0 && arg_list == NULL) generror("out of memory"); for (i = 0, n = ast->two; i < arg_count; i++, n = n->two) arg_list[arg_count - i - 1] = codegen(n->one); return LLVMBuildCall(builder, func, arg_list, arg_count, ""); }
static void pointer_update(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { FIND_METHOD("_update"); LLVMTypeRef params[3]; params[0] = t->use_type; params[1] = c->intptr; params[2] = t_elem->use_type; start_function(c, m, t_elem->use_type, params, 3); LLVMValueRef ptr = LLVMGetParam(m->func, 0); LLVMValueRef index = LLVMGetParam(m->func, 1); LLVMValueRef elem_ptr = LLVMBuildBitCast(c->builder, ptr, LLVMPointerType(t_elem->use_type, 0), ""); LLVMValueRef loc = LLVMBuildGEP(c->builder, elem_ptr, &index, 1, ""); LLVMValueRef result = LLVMBuildLoad(c->builder, loc, ""); LLVMBuildStore(c->builder, LLVMGetParam(m->func, 2), loc); LLVMBuildRet(c->builder, result); codegen_finishfun(c); }
/** * Convert a vector of rgba8 values into 32bit wide SoA vectors. * * \param dst_type The desired return type. For pure integer formats * this should be a 32bit wide int or uint vector type, * otherwise a float vector type. * * \param packed The rgba8 values to pack. * * \param rgba The 4 SoA return vectors. */ void lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, struct lp_type dst_type, LLVMValueRef packed, LLVMValueRef *rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff); unsigned chan; /* XXX technically shouldn't use that for uint dst_type */ packed = LLVMBuildBitCast(builder, packed, lp_build_int_vec_type(gallivm, dst_type), ""); /* Decode the input vector components */ for (chan = 0; chan < 4; ++chan) { #ifdef PIPE_ARCH_LITTLE_ENDIAN unsigned start = chan*8; #else unsigned start = (3-chan)*8; #endif unsigned stop = start + 8; LLVMValueRef input; input = packed; if (start) input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, dst_type, start), ""); if (stop < 32) input = LLVMBuildAnd(builder, input, mask, ""); if (dst_type.floating) input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input); rgba[chan] = input; } }
/** * Gather elements from scatter positions in memory into a single vector. * * @param src_width src element width * @param dst_width result element width (source will be expanded to fit) * @param length length of the offsets, * @param base_ptr base pointer, should be a i8 pointer type. * @param offsets vector with offsets */ LLVMValueRef lp_build_gather(LLVMBuilderRef builder, unsigned length, unsigned src_width, unsigned dst_width, LLVMValueRef base_ptr, LLVMValueRef offsets) { LLVMTypeRef src_type = LLVMIntType(src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); LLVMValueRef res; unsigned i; res = LLVMGetUndef(dst_vec_type); for(i = 0; i < length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef elem_offset; LLVMValueRef elem_ptr; LLVMValueRef elem; elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); elem = LLVMBuildLoad(builder, elem_ptr, ""); assert(src_width <= dst_width); if(src_width > dst_width) elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); if(src_width < dst_width) elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); res = LLVMBuildInsertElement(builder, res, elem, index, ""); } return res; }
static LLVMValueRef dispatch_function(compile_t* c, ast_t* from, gentype_t* g, LLVMValueRef l_value, const char* method_name, ast_t* typeargs) { LLVMValueRef func; if(g->use_type == c->object_ptr) { // Virtual, get the function by selector colour. uint32_t index = genfun_vtable_index(c, g, method_name, typeargs); assert(index != (uint32_t)-1); // Get the function from the vtable. func = gendesc_vtable(c, l_value, index); // Cast to the right function type. LLVMTypeRef f_type = genfun_sig(c, g, method_name, typeargs); if(f_type == NULL) { ast_error(from, "couldn't create a signature for '%s'", method_name); return NULL; } f_type = LLVMPointerType(f_type, 0); func = LLVMBuildBitCast(c->builder, func, f_type, "method"); } else { // Static, get the actual function. func = genfun_proto(c, g, method_name, typeargs); if(func == NULL) { ast_error(from, "couldn't locate '%s'", method_name); return NULL; } } return func; }
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vindex, LLVMValueRef voffset, bool can_speculate) { LLVMValueRef args [] = { LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), vindex, voffset, LLVMConstInt(ctx->i1, 0, 0), /* glc */ LLVMConstInt(ctx->i1, 0, 0), /* slc */ }; return ac_build_intrinsic(ctx, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32, args, ARRAY_SIZE(args), /* READNONE means writes can't affect it, while * READONLY means that writes can affect it. */ can_speculate && HAVE_LLVM >= 0x0400 ? AC_FUNC_ATTR_READNONE : AC_FUNC_ATTR_READONLY); }
static void make_deserialise(compile_t* c, reach_type_t* t) { // Generate the deserialise function. t->deserialise_fn = codegen_addfun(c, genname_deserialise(t->name), c->trace_type); codegen_startfun(c, t->deserialise_fn, NULL, NULL); LLVMSetFunctionCallConv(t->deserialise_fn, LLVMCCallConv); LLVMSetLinkage(t->deserialise_fn, LLVMExternalLinkage); LLVMValueRef ctx = LLVMGetParam(t->deserialise_fn, 0); LLVMValueRef arg = LLVMGetParam(t->deserialise_fn, 1); LLVMValueRef object = LLVMBuildBitCast(c->builder, arg, t->structure_ptr, ""); // At this point, the serialised contents have been copied to the allocated // object. deserialise(c, t, ctx, object); LLVMBuildRetVoid(c->builder); codegen_finishfun(c); }
static LLVMValueRef rgb_to_rgba_aos(LLVMBuilderRef builder, unsigned n, LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) { struct lp_type type; LLVMValueRef a; LLVMValueRef rgba; memset(&type, 0, sizeof type); type.sign = TRUE; type.width = 32; type.length = n; assert(lp_check_value(type, r)); assert(lp_check_value(type, g)); assert(lp_check_value(type, b)); /* * Make a 4 x unorm8 vector */ r = r; g = LLVMBuildShl(builder, g, lp_build_const_int_vec(type, 8), ""); b = LLVMBuildShl(builder, b, lp_build_const_int_vec(type, 16), ""); a = lp_build_const_int_vec(type, 0xff000000); rgba = r; rgba = LLVMBuildOr(builder, rgba, g, ""); rgba = LLVMBuildOr(builder, rgba, b, ""); rgba = LLVMBuildOr(builder, rgba, a, ""); rgba = LLVMBuildBitCast(builder, rgba, LLVMVectorType(LLVMInt8Type(), 4*n), ""); return rgba; }
static void maybe_apply(compile_t* c, reach_type_t* t, reach_type_t* t_elem) { // Returns the receiver if it isn't null. FIND_METHOD("apply"); start_function(c, m, t_elem->use_type, &t->use_type, 1); LLVMValueRef result = LLVMGetParam(m->func, 0); LLVMValueRef test = LLVMBuildIsNull(c->builder, result, ""); LLVMBasicBlockRef is_false = codegen_block(c, ""); LLVMBasicBlockRef is_true = codegen_block(c, ""); LLVMBuildCondBr(c->builder, test, is_true, is_false); LLVMPositionBuilderAtEnd(c->builder, is_false); result = LLVMBuildBitCast(c->builder, result, t_elem->use_type, ""); LLVMBuildRet(c->builder, result); LLVMPositionBuilderAtEnd(c->builder, is_true); gencall_throw(c); codegen_finishfun(c); BOX_FUNCTION(); }
static LLVMValueRef cast_ffi_arg(compile_t* c, LLVMValueRef arg, LLVMTypeRef param) { if(arg == NULL) return NULL; LLVMTypeRef arg_type = LLVMTypeOf(arg); if(param == c->i1) { // If the parameter is an i1, it must be from an LLVM intrinsic. In that // case, the argument must be a Bool encoded as an ibool. if(arg_type != c->ibool) return NULL; // Truncate the Bool's i8 representation to i1. return LLVMBuildTrunc(c->builder, arg, c->i1, ""); } switch(LLVMGetTypeKind(param)) { case LLVMPointerTypeKind: { if(LLVMGetTypeKind(arg_type) == LLVMIntegerTypeKind) arg = LLVMBuildIntToPtr(c->builder, arg, param, ""); else arg = LLVMBuildBitCast(c->builder, arg, param, ""); break; } default: {} } return arg; }
static void emit_up2h(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMContextRef context = bld_base->base.gallivm->context; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMTypeRef fp16, i16; LLVMValueRef const16, input, val; unsigned i; fp16 = LLVMHalfTypeInContext(context); i16 = LLVMInt16TypeInContext(context); const16 = lp_build_const_int32(uint_bld->gallivm, 16); input = emit_data->args[0]; for (i = 0; i < 2; i++) { val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input; val = LLVMBuildTrunc(builder, val, i16, ""); val = LLVMBuildBitCast(builder, val, fp16, ""); emit_data->output[i] = LLVMBuildFPExt(builder, val, bld_base->base.elem_type, ""); } }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
/** * Fetch a texels from a texture, returning them in SoA layout. * * \param type the desired return type for 'rgba'. The vector length * is the number of texels to fetch * * \param base_ptr points to the base of the texture mip tree. * \param offset offset to start of the texture image block. For non- * compressed formats, this simply is an offset to the texel. * For compressed formats, it is an offset to the start of the * compressed data block. * * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0,0). For compressed formats, i will * be in [0, block_width-1] and j will be in [0, block_height-1]. */ void lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && format_desc->block.bits <= type.width && (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || format_desc->channel[0].size == 32)) { /* * The packed pixel fits into an element of the destination format. Put * the packed pixels into a vector and extract each component for all * vector elements in parallel. */ LLVMValueRef packed; /* * gather the texels from the texture * Ex: packed = {XYZW, XYZW, XYZW, XYZW} */ assert(format_desc->block.bits <= type.width); packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, type.width, base_ptr, offset, FALSE); /* * convert texels to float rgba */ lp_build_unpack_rgba_soa(gallivm, format_desc, type, packed, rgba_out); return; } if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT || format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) { /* * similar conceptually to above but requiring special * AoS packed -> SoA float conversion code. */ LLVMValueRef packed; assert(type.floating); assert(type.width == 32); packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, type.width, base_ptr, offset, FALSE); if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { lp_build_r11g11b10_to_float(gallivm, packed, rgba_out); } else { lp_build_rgb9e5_to_float(gallivm, packed, rgba_out); } return; } if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS && format_desc->block.bits == 64) { /* * special case the format is 64 bits but we only require * 32bit (or 8bit) from each block. */ LLVMValueRef packed; if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) { /* * for stencil simply fix up offsets - could in fact change * base_ptr instead even outside the shader. */ unsigned mask = (1 << 8) - 1; LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4); offset = LLVMBuildAdd(builder, offset, s_offset, ""); packed = lp_build_gather(gallivm, type.length, 32, type.width, base_ptr, offset, FALSE); packed = LLVMBuildAnd(builder, packed, lp_build_const_int_vec(gallivm, type, mask), ""); } else { assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); packed = lp_build_gather(gallivm, type.length, 32, type.width, base_ptr, offset, TRUE); packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(gallivm, type), ""); } /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */ rgba_out[0] = rgba_out[1] = rgba_out[2] = packed; rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f); return; } /* * Try calling lp_build_fetch_rgba_aos for all pixels. */ if (util_format_fits_8unorm(format_desc) && type.floating && type.width == 32 && (type.length == 1 || (type.length % 4 == 0))) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = type.length * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, base_ptr, offset, i, j); lp_build_rgba8_to_fi32_soa(gallivm, type, tmp, rgba_out); return; } /* * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * * This is not the most efficient way of fetching pixels, as we * miss some opportunities to do vectorization, but this is * convenient for formats or scenarios for which there was no * opportunity or incentive to optimize. */ { unsigned k, chan; struct lp_type tmp_type; if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: scalar unpacking of %s\n", __FUNCTION__, format_desc->short_name); } tmp_type = type; tmp_type.length = 4; for (chan = 0; chan < 4; ++chan) { rgba_out[chan] = lp_build_undef(gallivm, type); } /* loop over number of pixels */ for(k = 0; k < type.length; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef offset_elem; LLVMValueRef i_elem, j_elem; LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, base_ptr, offset_elem, i_elem, j_elem); /* * Insert the AoS tmp value channels into the SoA result vectors at * position = 'index'. */ for (chan = 0; chan < 4; ++chan) { LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan), tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], tmp_chan, index, ""); } } } }
/** * Unpack several pixels in SoA. * * It takes a vector of packed pixels: * * packed = {P0, P1, P2, P3, ..., Pn} * * And will produce four vectors: * * red = {R0, R1, R2, R3, ..., Rn} * green = {G0, G1, G2, G3, ..., Gn} * blue = {B0, B1, B2, B3, ..., Bn} * alpha = {A0, A1, A2, A3, ..., An} * * It requires that a packed pixel fits into an element of the output * channels. The common case is when converting pixel with a depth of 32 bit or * less into floats. * * \param format_desc the format of the 'packed' incoming pixel vector * \param type the desired type for rgba_out (type.length = n, above) * \param packed the incoming vector of packed pixels * \param rgba_out returns the SoA R,G,B,A vectors */ void lp_build_unpack_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef packed, LLVMValueRef rgba_out[4]) { LLVMBuilderRef builder = gallivm->builder; struct lp_build_context bld; LLVMValueRef inputs[4]; unsigned chan; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(format_desc->block.width == 1); assert(format_desc->block.height == 1); assert(format_desc->block.bits <= type.width); /* FIXME: Support more output types */ assert(type.width == 32); lp_build_context_init(&bld, gallivm, type); /* Decode the input vector components */ for (chan = 0; chan < format_desc->nr_channels; ++chan) { const unsigned width = format_desc->channel[chan].size; const unsigned start = format_desc->channel[chan].shift; const unsigned stop = start + width; LLVMValueRef input; input = packed; switch(format_desc->channel[chan].type) { case UTIL_FORMAT_TYPE_VOID: input = lp_build_undef(gallivm, type); break; case UTIL_FORMAT_TYPE_UNSIGNED: /* * Align the LSB */ if (start) { input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), ""); } /* * Zero the MSBs */ if (stop < format_desc->block.bits) { unsigned mask = ((unsigned long long)1 << width) - 1; input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), ""); } /* * Type conversion */ if (type.floating) { if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { assert(width == 8); if (format_desc->swizzle[3] == chan) { input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); } else { struct lp_type conv_type = lp_uint_type(type); input = lp_build_srgb_to_linear(gallivm, conv_type, input); } } else { if(format_desc->channel[chan].normalized) input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); else input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_SIGNED: /* * Align the sign bit first. */ if (stop < type.width) { unsigned bits = type.width - stop; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildShl(builder, input, bits_val, ""); } /* * Align the LSB (with an arithmetic shift to preserve the sign) */ if (format_desc->channel[chan].size < type.width) { unsigned bits = type.width - format_desc->channel[chan].size; LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); input = LLVMBuildAShr(builder, input, bits_val, ""); } /* * Type conversion */ if (type.floating) { input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); if (format_desc->channel[chan].normalized) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildFMul(builder, input, scale_val, ""); /* the formula above will produce value below -1.0 for most negative * value but everything seems happy with that hence disable for now */ if (0) input = lp_build_max(&bld, input, lp_build_const_vec(gallivm, type, -1.0f)); } } else if (format_desc->channel[chan].pure_integer) { /* Nothing to do */ } else { /* FIXME */ assert(0); } break; case UTIL_FORMAT_TYPE_FLOAT: if (type.floating) { assert(start == 0); assert(stop == 32); assert(type.width == 32); input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; case UTIL_FORMAT_TYPE_FIXED: if (type.floating) { double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1); LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), ""); input = LLVMBuildFMul(builder, input, scale_val, ""); } else { /* FIXME */ assert(0); input = lp_build_undef(gallivm, type); } break; default: assert(0); input = lp_build_undef(gallivm, type); break; } inputs[chan] = input; } lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out); }