/** * Swizzle a vector consisting of an array of XYZW structs. * * This fills a vector of dst_len length with the swizzled channels from src. * * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in * RGBA RGBA = BGR BGR BG * * @param swizzles the swizzle array * @param num_swizzles the number of elements in swizzles * @param dst_len the length of the result */ LLVMValueRef lp_build_swizzle_aos_n(struct gallivm_state* gallivm, LLVMValueRef src, const unsigned char* swizzles, unsigned num_swizzles, unsigned dst_len) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; unsigned i; assert(dst_len < LP_MAX_VECTOR_WIDTH); for (i = 0; i < dst_len; ++i) { int swizzle = swizzles[i % num_swizzles]; if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); } else { shuffles[i] = lp_build_const_int32(gallivm, swizzle); } } return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), ""); }
int main (void) { LLVMModuleRef module = LLVMModuleCreateWithName("kal"); LLVMBuilderRef builder = LLVMCreateBuilder(); // LLVMInitializeNativeTarget(); LLVMTypeRef funcType = LLVMFunctionType(LLVMVoidType(), NULL, 0, 0); LLVMValueRef func = LLVMAddFunction(module, "main", funcType); LLVMSetLinkage(func, LLVMExternalLinkage); LLVMBasicBlockRef block = LLVMAppendBasicBlock(func, "entry"); LLVMPositionBuilderAtEnd(builder, block); LLVMValueRef cond = LLVMBuildICmp(builder, LLVMIntNE, LLVMConstInt(LLVMInt32Type(), 2, 0), LLVMConstInt(LLVMInt32Type(), 1, 0), "ifcond"); LLVMValueRef owning_block = LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)); //TODO: WRONG?? //LLVMValueRef owning_block = LLVMBasicBlockAsValue(LLVMGetPreviousBasicBlock(LLVMGetInsertBlock(builder))); // 2. Generate new blocks for cases. LLVMBasicBlockRef then_ref = LLVMAppendBasicBlock(owning_block, "then"); LLVMBasicBlockRef else_ref = LLVMAppendBasicBlock(owning_block, "else"); LLVMBasicBlockRef merge_ref = LLVMAppendBasicBlock(owning_block, "ifmerge"); // 3. Branch conditionally on then or else. LLVMBuildCondBr(builder, cond, then_ref, else_ref); // 4. Build then branch prologue. LLVMPositionBuilderAtEnd(builder, then_ref); LLVMValueRef hi1 = LLVMBuildXor(builder, LLVMGetUndef(LLVMInt32Type()), LLVMGetUndef(LLVMInt32Type()), "subtmp"); // 5. Connect then branch to merge block. LLVMBuildBr(builder, merge_ref); then_ref = LLVMGetInsertBlock(builder); // 6. Build else branch prologue. LLVMPositionBuilderAtEnd(builder, else_ref); LLVMValueRef hi2 = LLVMBuildXor(builder, LLVMGetUndef(LLVMInt32Type()), LLVMGetUndef(LLVMInt32Type()), "subtmp2"); // 7. Connect else branch to merge block. LLVMBuildBr(builder, merge_ref); else_ref = LLVMGetInsertBlock(builder); // 8. Position ourselves after the merge block. LLVMPositionBuilderAtEnd(builder, merge_ref); // 9. Build the phi node. // LLVMValueRef phi = LLVMBuildPhi(builder, LLVMDoubleType(), "phi"); // 10. Add incoming edges. // LLVMAddIncoming(phi, &hi1, &then_ref, 1); // LLVMAddIncoming(phi, &hi2, &else_ref, 1); LLVMDumpModule(module); LLVMDisposeBuilder(builder); LLVMDisposeModule(module); return 0; }
LLVMValueRef lp_build_broadcast(struct gallivm_state *gallivm, LLVMTypeRef vec_type, LLVMValueRef scalar) { LLVMValueRef res; if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) { /* scalar */ assert(vec_type == LLVMTypeOf(scalar)); res = scalar; } else { LLVMBuilderRef builder = gallivm->builder; const unsigned length = LLVMGetVectorSize(vec_type); LLVMValueRef undef = LLVMGetUndef(vec_type); /* The shuffle vector is always made of int32 elements */ LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context); LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar)); res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), ""); res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), ""); } return res; }
static LLVMValueRef build_cube_intrinsic(struct gallivm_state *gallivm, LLVMValueRef in[3]) { if (HAVE_LLVM >= 0x0309) { LLVMTypeRef f32 = LLVMTypeOf(in[0]); LLVMValueRef out[4]; out[0] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubetc", f32, in, 3, LLVMReadNoneAttribute); out[1] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubesc", f32, in, 3, LLVMReadNoneAttribute); out[2] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubema", f32, in, 3, LLVMReadNoneAttribute); out[3] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubeid", f32, in, 3, LLVMReadNoneAttribute); return lp_build_gather_values(gallivm, out, 4); } else { LLVMValueRef c[4] = { in[0], in[1], in[2], LLVMGetUndef(LLVMTypeOf(in[0])) }; LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4); return lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.cube", LLVMTypeOf(vec), &vec, 1, LLVMReadNoneAttribute); } }
/** * Pack first element of aos values, * pad out to destination size. * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _ */ LLVMValueRef lp_build_pack_aos_scalars(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, const LLVMValueRef src) { LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef undef = LLVMGetUndef(i32t); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; unsigned num_src = src_type.length / 4; unsigned num_dst = dst_type.length; unsigned i; assert(num_src <= num_dst); for (i = 0; i < num_src; i++) { shuffles[i] = LLVMConstInt(i32t, i * 4, 0); } for (i = num_src; i < num_dst; i++) { shuffles[i] = undef; } if (num_dst == 1) { return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], ""); } else { return LLVMBuildShuffleVector(gallivm->builder, src, src, LLVMConstVector(shuffles, num_dst), ""); } }
static LLVMValueRef emit_array_fetch( struct lp_build_tgsi_context *bld_base, unsigned File, enum tgsi_opcode_type type, struct tgsi_declaration_range range, unsigned swizzle) { struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); struct gallivm_state * gallivm = bld->bld_base.base.gallivm; LLVMBuilderRef builder = bld_base->base.gallivm->builder; unsigned i, size = range.Last - range.First + 1; LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); LLVMValueRef result = LLVMGetUndef(vec); struct tgsi_full_src_register tmp_reg = {}; tmp_reg.Register.File = File; for (i = 0; i < size; ++i) { tmp_reg.Register.Index = i + range.First; LLVMValueRef temp = emit_fetch(bld_base, &tmp_reg, type, swizzle); result = LLVMBuildInsertElement(builder, result, temp, lp_build_const_int32(gallivm, i), ""); } return result; }
void lp_build_context_init(struct lp_build_context *bld, struct gallivm_state *gallivm, struct lp_type type) { bld->gallivm = gallivm; bld->type = type; bld->int_elem_type = lp_build_int_elem_type(gallivm, type); if (type.floating) bld->elem_type = lp_build_elem_type(gallivm, type); else bld->elem_type = bld->int_elem_type; if (type.length == 1) { bld->int_vec_type = bld->int_elem_type; bld->vec_type = bld->elem_type; } else { bld->int_vec_type = LLVMVectorType(bld->int_elem_type, type.length); bld->vec_type = LLVMVectorType(bld->elem_type, type.length); } bld->undef = LLVMGetUndef(bld->vec_type); bld->zero = LLVMConstNull(bld->vec_type); bld->one = lp_build_one(gallivm, type); }
/** * Gather elements from scatter positions in memory into a single vector. * Use for fetching texels from a texture. * For SSE, typical values are length=4, src_width=32, dst_width=32. * * @param length length of the offsets * @param src_width src element width in bits * @param dst_width result element width in bits (src will be expanded to fit) * @param base_ptr base pointer, should be a i8 pointer type. * @param offsets vector with offsets */ LLVMValueRef lp_build_gather(struct gallivm_state *gallivm, unsigned length, unsigned src_width, unsigned dst_width, LLVMValueRef base_ptr, LLVMValueRef offsets) { LLVMValueRef res; if (length == 1) { /* Scalar */ return lp_build_gather_elem(gallivm, length, src_width, dst_width, base_ptr, offsets, 0); } else { /* Vector */ LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); unsigned i; res = LLVMGetUndef(dst_vec_type); for (i = 0; i < length; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef elem; elem = lp_build_gather_elem(gallivm, length, src_width, dst_width, base_ptr, offsets, i); res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, ""); } } return res; }
LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values, unsigned value_count, unsigned value_stride, bool load) { LLVMBuilderRef builder = ctx->builder; LLVMValueRef vec = NULL; unsigned i; if (value_count == 1) { if (load) return LLVMBuildLoad(builder, values[0], ""); return values[0]; } else if (!value_count) unreachable("value_count is 0"); for (i = 0; i < value_count; i++) { LLVMValueRef value = values[i * value_stride]; if (load) value = LLVMBuildLoad(builder, value, ""); if (!i) vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count)); LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); vec = LLVMBuildInsertElement(builder, vec, value, index, ""); } return vec; }
LLVMValueRef lp_build_intrinsic_map(struct gallivm_state *gallivm, const char *name, LLVMTypeRef ret_type, LLVMValueRef *args, unsigned num_args) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type); unsigned n = LLVMGetVectorSize(ret_type); unsigned i, j; LLVMValueRef res; assert(num_args <= LP_MAX_FUNC_ARGS); res = LLVMGetUndef(ret_type); for(i = 0; i < n; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS]; LLVMValueRef res_elem; for(j = 0; j < num_args; ++j) arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, ""); res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0); res = LLVMBuildInsertElement(builder, res, res_elem, index, ""); } return res; }
/** * Expands src vector from src.length to dst_length */ LLVMValueRef lp_build_pad_vector(struct gallivm_state *gallivm, LLVMValueRef src, struct lp_type src_type, unsigned dst_length) { LLVMValueRef undef = LLVMGetUndef(lp_build_vec_type(gallivm, src_type)); LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; unsigned i; assert(dst_length <= Elements(elems)); assert(dst_length > src_type.length); if (src_type.length == dst_length) return src; /* If its a single scalar type, no need to reinvent the wheel */ if (src_type.length == 1) { return lp_build_broadcast(gallivm, LLVMVectorType(lp_build_elem_type(gallivm, src_type), dst_length), src); } /* All elements from src vector */ for (i = 0; i < src_type.length; ++i) elems[i] = lp_build_const_int32(gallivm, i); /* Undef fill remaining space */ for (i = src_type.length; i < dst_length; ++i) elems[i] = lp_build_const_int32(gallivm, src_type.length); /* Combine the two vectors */ return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), ""); }
/** * Combined extract and broadcast (mere shuffle in most cases) */ LLVMValueRef lp_build_extract_broadcast(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef vector, LLVMValueRef index) { LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef res; assert(src_type.floating == dst_type.floating); assert(src_type.width == dst_type.width); assert(lp_check_value(src_type, vector)); assert(LLVMTypeOf(index) == i32t); if (src_type.length == 1) { if (dst_type.length == 1) { /* * Trivial scalar -> scalar. */ res = vector; } else { /* * Broadcast scalar -> vector. */ res = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, dst_type), vector); } } else { if (dst_type.length > 1) { /* * shuffle - result can be of different length. */ LLVMValueRef shuffle; shuffle = lp_build_broadcast(gallivm, LLVMVectorType(i32t, dst_type.length), index); res = LLVMBuildShuffleVector(gallivm->builder, vector, LLVMGetUndef(lp_build_vec_type(gallivm, src_type)), shuffle, ""); } else { /* * Trivial extract scalar from vector. */ res = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); } } return res; }
LLVMValueRef gen_tuple(compile_t* c, ast_t* ast) { ast_t* child = ast_child(ast); if(ast_sibling(child) == NULL) return gen_expr(c, child); deferred_reification_t* reify = c->frame->reify; ast_t* type = deferred_reify(reify, ast_type(ast), c->opt); // If we contain '_', we have no usable value. if(contains_dontcare(type)) { ast_free_unattached(type); return GEN_NOTNEEDED; } reach_type_t* t = reach_type(c->reach, type); compile_type_t* c_t = (compile_type_t*)t->c_type; int count = LLVMCountStructElementTypes(c_t->primitive); size_t buf_size = count * sizeof(LLVMTypeRef); LLVMTypeRef* elements = (LLVMTypeRef*)ponyint_pool_alloc_size(buf_size); LLVMGetStructElementTypes(c_t->primitive, elements); LLVMValueRef tuple = LLVMGetUndef(c_t->primitive); int i = 0; while(child != NULL) { LLVMValueRef value = gen_expr(c, child); if(value == NULL) { ponyint_pool_free_size(buf_size, elements); return NULL; } // We'll have an undefined element if one of our source elements is a // variable declaration. This is ok, since the tuple value will never be // used. if(value == GEN_NOVALUE || value == GEN_NOTNEEDED) { ponyint_pool_free_size(buf_size, elements); return value; } ast_t* child_type = deferred_reify(reify, ast_type(child), c->opt); value = gen_assign_cast(c, elements[i], value, child_type); ast_free_unattached(child_type); tuple = LLVMBuildInsertValue(c->builder, tuple, value, i++, ""); child = ast_sibling(child); } ponyint_pool_free_size(buf_size, elements); return tuple; }
static LLVMValueRef emit_fetch( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef result, ptr; if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { values[chan] = emit_fetch(bld_base, reg, type, chan); } return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } if (reg->Register.Indirect) { struct tgsi_declaration_range range = get_array_range(bld_base, reg->Register.File, ®->Indirect); return LLVMBuildExtractElement(builder, emit_array_fetch(bld_base, reg->Register.File, type, range, swizzle), emit_array_index(bld, ®->Indirect, reg->Register.Index - range.First), ""); } switch(reg->Register.File) { case TGSI_FILE_IMMEDIATE: { LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } case TGSI_FILE_INPUT: result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; break; case TGSI_FILE_TEMPORARY: ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); result = LLVMBuildLoad(builder, ptr, ""); break; case TGSI_FILE_OUTPUT: ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); result = LLVMBuildLoad(builder, ptr, ""); break; default: return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); } return bitcast(bld_base, type, result); }
/** * Expands src vector from src.length to dst_length */ LLVMValueRef lp_build_pad_vector(struct gallivm_state *gallivm, LLVMValueRef src, unsigned dst_length) { LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; LLVMValueRef undef; LLVMTypeRef type; unsigned i, src_length; type = LLVMTypeOf(src); if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) { /* Can't use ShuffleVector on non-vector type */ undef = LLVMGetUndef(LLVMVectorType(type, dst_length)); return LLVMBuildInsertElement(gallivm->builder, undef, src, lp_build_const_int32(gallivm, 0), ""); } undef = LLVMGetUndef(type); src_length = LLVMGetVectorSize(type); assert(dst_length <= Elements(elems)); assert(dst_length >= src_length); if (src_length == dst_length) return src; /* All elements from src vector */ for (i = 0; i < src_length; ++i) elems[i] = lp_build_const_int32(gallivm, i); /* Undef fill remaining space */ for (i = src_length; i < dst_length; ++i) elems[i] = lp_build_const_int32(gallivm, src_length); /* Combine the two vectors */ return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), ""); }
/** * @param n is the number of pixels processed * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) * @return a <4*n x i8> vector with the pixel RGBA values in AoS */ LLVMValueRef lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMValueRef packed; LLVMValueRef rgba; struct lp_type fetch_type; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); assert(format_desc->block.bits == 32); assert(format_desc->block.width == 2); assert(format_desc->block.height == 1); fetch_type = lp_type_uint(32); packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE); (void)j; switch (format_desc->format) { case PIPE_FORMAT_UYVY: rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_YUYV: rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_R8G8_B8G8_UNORM: rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_G8R8_G8B8_UNORM: rgba = grgb_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_G8R8_B8R8_UNORM: rgba = grbr_to_rgba_aos(gallivm, n, packed, i); break; case PIPE_FORMAT_R8G8_R8B8_UNORM: rgba = rgrb_to_rgba_aos(gallivm, n, packed, i); break; default: assert(0); rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); break; } return rgba; }
LLVMValueRef lp_build_gather_values(struct gallivm_state * gallivm, LLVMValueRef * values, unsigned value_count) { LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count); LLVMBuilderRef builder = gallivm->builder; LLVMValueRef vec = LLVMGetUndef(vec_type); unsigned i; for (i = 0; i < value_count; i++) { LLVMValueRef index = lp_build_const_int32(gallivm, i); vec = LLVMBuildInsertElement(builder, vec, values[i], index, ""); } return vec; }
void JITImpl::reclaimUnreachableFunctions(JITCoreInfo &coreInfo) { std::vector<uint32_t> &unreachableFunctions = coreInfo.unreachableFunctions; std::map<uint32_t,JITFunctionInfo*> &functionMap = coreInfo.functionMap; for (std::vector<uint32_t>::iterator it = unreachableFunctions.begin(), e = unreachableFunctions.end(); it != e; ++it) { std::map<uint32_t,JITFunctionInfo*>::iterator entry = functionMap.find(*it); if (entry == functionMap.end()) continue; LLVMValueRef value = entry->second->value; LLVMFreeMachineCodeForFunction(executionEngine, value); LLVMReplaceAllUsesWith(value, LLVMGetUndef(LLVMTypeOf(value))); LLVMDeleteFunction(value); delete entry->second; functionMap.erase(entry); } unreachableFunctions.clear(); }
/** * @param n is the number of pixels processed * @param packed is a <n x i32> vector with the packed YUYV blocks * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) * @return a <4*n x i8> vector with the pixel RGBA values in AoS */ LLVMValueRef lp_build_fetch_subsampled_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, unsigned n, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMValueRef packed; LLVMValueRef rgba; assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); assert(format_desc->block.bits == 32); assert(format_desc->block.width == 2); assert(format_desc->block.height == 1); packed = lp_build_gather(builder, n, 32, 32, base_ptr, offset); (void)j; switch (format_desc->format) { case PIPE_FORMAT_UYVY: rgba = uyvy_to_rgba_aos(builder, n, packed, i); break; case PIPE_FORMAT_YUYV: rgba = yuyv_to_rgba_aos(builder, n, packed, i); break; case PIPE_FORMAT_R8G8_B8G8_UNORM: rgba = rgbg_to_rgba_aos(builder, n, packed, i); break; case PIPE_FORMAT_G8R8_G8B8_UNORM: rgba = grgb_to_rgba_aos(builder, n, packed, i); break; default: assert(0); rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8Type(), 4*n)); break; } return rgba; }
static LLVMValueRef emit_swizzle( struct lp_build_tgsi_context * bld_base, LLVMValueRef value, unsigned swizzle_x, unsigned swizzle_y, unsigned swizzle_z, unsigned swizzle_w) { LLVMValueRef swizzles[4]; LLVMTypeRef i32t = LLVMInt32TypeInContext(bld_base->base.gallivm->context); swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0); swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0); swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0); swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0); return LLVMBuildShuffleVector(bld_base->base.gallivm->builder, value, LLVMGetUndef(LLVMTypeOf(value)), LLVMConstVector(swizzles, 4), ""); }
/** * Gather elements from scatter positions in memory into a single vector. * * @param src_width src element width * @param dst_width result element width (source will be expanded to fit) * @param length length of the offsets, * @param base_ptr base pointer, should be a i8 pointer type. * @param offsets vector with offsets */ LLVMValueRef lp_build_gather(LLVMBuilderRef builder, unsigned length, unsigned src_width, unsigned dst_width, LLVMValueRef base_ptr, LLVMValueRef offsets) { LLVMTypeRef src_type = LLVMIntType(src_width); LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); LLVMTypeRef dst_elem_type = LLVMIntType(dst_width); LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length); LLVMValueRef res; unsigned i; res = LLVMGetUndef(dst_vec_type); for(i = 0; i < length; ++i) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); LLVMValueRef elem_offset; LLVMValueRef elem_ptr; LLVMValueRef elem; elem_offset = LLVMBuildExtractElement(builder, offsets, index, ""); elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, ""); elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, ""); elem = LLVMBuildLoad(builder, elem_ptr, ""); assert(src_width <= dst_width); if(src_width > dst_width) elem = LLVMBuildTrunc(builder, elem, dst_elem_type, ""); if(src_width < dst_width) elem = LLVMBuildZExt(builder, elem, dst_elem_type, ""); res = LLVMBuildInsertElement(builder, res, elem, index, ""); } return res; }
LLVMValueRef gen_tuple(compile_t* c, ast_t* ast) { ast_t* child = ast_child(ast); if(ast_sibling(child) == NULL) return gen_expr(c, child); ast_t* type = ast_type(ast); gentype_t g; if(!gentype(c, type, &g)) return NULL; // If we contain TK_DONTCARE, we have no usable value. if(g.primitive == NULL) return GEN_NOVALUE; LLVMValueRef tuple = LLVMGetUndef(g.primitive); int i = 0; while(child != NULL) { LLVMValueRef value = gen_expr(c, child); if(value == NULL) return NULL; // We'll have an undefined element if one of our source elements is a // variable declaration. This is ok, since the tuple value will never be // used. if(value != GEN_NOVALUE) tuple = LLVMBuildInsertValue(c->builder, tuple, value, i++, ""); child = ast_sibling(child); } return tuple; }
LLVMValueRef lp_build_broadcast(struct gallivm_state *gallivm, LLVMTypeRef vec_type, LLVMValueRef scalar) { LLVMValueRef res; if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) { /* scalar */ assert(vec_type == LLVMTypeOf(scalar)); res = scalar; } else { LLVMBuilderRef builder = gallivm->builder; const unsigned length = LLVMGetVectorSize(vec_type); LLVMValueRef undef = LLVMGetUndef(vec_type); LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context); assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar)); if (HAVE_LLVM >= 0x207) { /* The shuffle vector is always made of int32 elements */ LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), ""); res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), ""); } else { /* XXX: The above path provokes a bug in LLVM 2.6 */ unsigned i; res = undef; for(i = 0; i < length; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); res = LLVMBuildInsertElement(builder, res, scalar, index, ""); } } } return res; }
static LLVMValueRef assign_to_tuple(compile_t* c, LLVMTypeRef l_type, LLVMValueRef r_value, ast_t* type) { // Cast each component. assert(ast_id(type) == TK_TUPLETYPE); int count = LLVMCountStructElementTypes(l_type); size_t buf_size = count * sizeof(LLVMTypeRef); LLVMTypeRef* elements = (LLVMTypeRef*)pool_alloc_size(buf_size); LLVMGetStructElementTypes(l_type, elements); LLVMValueRef result = LLVMGetUndef(l_type); ast_t* type_child = ast_child(type); int i = 0; while(type_child != NULL) { LLVMValueRef r_child = LLVMBuildExtractValue(c->builder, r_value, i, ""); LLVMValueRef cast_value = gen_assign_cast(c, elements[i], r_child, type_child); if(cast_value == NULL) { pool_free_size(buf_size, elements); return NULL; } result = LLVMBuildInsertValue(c->builder, result, cast_value, i, ""); type_child = ast_sibling(type_child); i++; } pool_free_size(buf_size, elements); return result; }
/** * Perform the occlusion test and increase the counter. * Test the depth mask. Add the number of channel which has none zero mask * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. * The counter will add 4. * * \param type holds element type of the mask vector. * \param maskvalue is the depth test mask. * \param counter is a pointer of the uint32 counter. */ static void lp_build_occlusion_count(LLVMBuilderRef builder, struct lp_type type, LLVMValueRef maskvalue, LLVMValueRef counter) { LLVMValueRef countmask = lp_build_const_int_vec(type, 1); LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); LLVMTypeRef i8v16 = LLVMVectorType(LLVMInt8Type(), 16); LLVMValueRef counti = LLVMBuildBitCast(builder, countv, i8v16, "counti"); LLVMValueRef maskarray[4] = { LLVMConstInt(LLVMInt32Type(), 0, 0), LLVMConstInt(LLVMInt32Type(), 4, 0), LLVMConstInt(LLVMInt32Type(), 8, 0), LLVMConstInt(LLVMInt32Type(), 12, 0), }; LLVMValueRef shufflemask = LLVMConstVector(maskarray, 4); LLVMValueRef shufflev = LLVMBuildShuffleVector(builder, counti, LLVMGetUndef(i8v16), shufflemask, "shufflev"); LLVMValueRef shuffle = LLVMBuildBitCast(builder, shufflev, LLVMInt32Type(), "shuffle"); LLVMValueRef count = lp_build_intrinsic_unary(builder, "llvm.ctpop.i32", LLVMInt32Type(), shuffle); LLVMValueRef orig = LLVMBuildLoad(builder, counter, "orig"); LLVMValueRef incr = LLVMBuildAdd(builder, orig, count, "incr"); LLVMBuildStore(builder, incr, counter); }
/** * Fetch a pixel into a 4 float AoS. * * \param format_desc describes format of the image we're fetching from * \param ptr address of the pixel block (or the texel if uncompressed) * \param i, j the sub-block pixel coordinates. For non-compressed formats * these will always be (0, 0). * \return a 4 element vector with the pixel's RGBA values. */ LLVMValueRef lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, LLVMValueRef j) { LLVMBuilderRef builder = gallivm->builder; unsigned num_pixels = type.length / 4; struct lp_build_context bld; assert(type.length <= LP_MAX_VECTOR_LENGTH); assert(type.length % 4 == 0); lp_build_context_init(&bld, gallivm, type); /* * Trivial case * * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ if (format_matches_type(format_desc, type) && format_desc->block.bits <= type.width * 4 && util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; /* * The format matches the type (apart of a swizzle) so no need for * scaling or converting. */ packed = lp_build_gather(gallivm, type.length/4, format_desc->block.bits, type.width*4, base_ptr, offset); assert(format_desc->block.bits <= type.width * type.length); packed = LLVMBuildBitCast(gallivm->builder, packed, lp_build_vec_type(gallivm, type), ""); return lp_build_format_swizzle_aos(format_desc, &bld, packed); } /* * Bit arithmetic */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && format_desc->block.width == 1 && format_desc->block.height == 1 && util_is_power_of_two(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && !format_desc->is_mixed && (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; unsigned k; /* * Unpack a pixel at a time into a <4 x float> RGBA vector */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef packed; packed = lp_build_gather_elem(gallivm, num_pixels, format_desc->block.bits, 32, base_ptr, offset, k); tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, format_desc, packed); } /* * Type conversion. * * TODO: We could avoid floating conversion for integer to * integer conversions. */ if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { debug_printf("%s: unpacking %s with floating point\n", __FUNCTION__, format_desc->short_name); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return lp_build_format_swizzle_aos(format_desc, &bld, res); } /* * YUV / subsampled formats */ if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { struct lp_type tmp_type; LLVMValueRef tmp; memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = num_pixels * 4; tmp_type.norm = TRUE; tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, format_desc, num_pixels, base_ptr, offset, i, j); lp_build_conv(gallivm, tmp_type, type, &tmp, 1, &tmp, 1); return tmp; } /* * Fallback to util_format_description::fetch_rgba_8unorm(). */ if (format_desc->fetch_rgba_8unorm && !type.floating && type.width == 8 && !type.sign && type.norm) { /* * Fallback to calling util_format_description::fetch_rgba_8unorm. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); char name[256]; LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmp; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_8unorm", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_8unorm(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pi8t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, i32t, ""); res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); /* * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmp = LLVMBuildLoad(builder, tmp_ptr, ""); if (num_pixels == 1) { res = tmp; } else { res = LLVMBuildInsertElement(builder, res, tmp, index, ""); } } /* Bitcast from <n x i32> to <4n x i8> */ res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); return res; } /* * Fallback to util_format_description::fetch_rgba_float(). */ if (format_desc->fetch_rgba_float) { /* * Fallback to calling util_format_description::fetch_rgba_float. * * This is definitely not the most efficient way of fetching pixels, as * we miss the opportunity to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); char name[256]; LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); LLVMValueRef function; LLVMValueRef tmp_ptr; LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; LLVMValueRef res; LLVMValueRef callee; unsigned k; util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); if (gallivm_debug & GALLIVM_DEBUG_PERF) { debug_printf("%s: falling back to %s\n", __FUNCTION__, name); } /* * Declare and bind format_desc->fetch_rgba_float(). */ function = LLVMGetNamedFunction(module, name); if (!function) { /* * Function to call looks like: * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) */ LLVMTypeRef ret_type; LLVMTypeRef arg_types[4]; LLVMTypeRef function_type; ret_type = LLVMVoidTypeInContext(gallivm->context); arg_types[0] = pf32t; arg_types[1] = pi8t; arg_types[2] = i32t; arg_types[3] = i32t; function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); function = LLVMAddFunction(module, name, function_type); LLVMSetFunctionCallConv(function, LLVMCCallConv); LLVMSetLinkage(function, LLVMExternalLinkage); assert(LLVMIsDeclaration(function)); } /* Note: we're using this casting here instead of LLVMAddGlobalMapping() * to work around a bug in LLVM 2.6. */ /* make const pointer for the C fetch_rgba_float function */ callee = lp_build_const_int_pointer(gallivm, func_to_pointer((func_pointer) format_desc->fetch_rgba_float)); /* cast the callee pointer to the function's type */ function = LLVMBuildBitCast(builder, callee, LLVMTypeOf(function), "cast callee"); tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); /* * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result * in the SoA vectors. */ for (k = 0; k < num_pixels; ++k) { LLVMValueRef args[4]; args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, base_ptr, offset, k); if (num_pixels == 1) { args[2] = i; args[3] = j; } else { LLVMValueRef index = lp_build_const_int32(gallivm, k); args[2] = LLVMBuildExtractElement(builder, i, index, ""); args[3] = LLVMBuildExtractElement(builder, j, index, ""); } LLVMBuildCall(builder, function, args, Elements(args), ""); tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); } lp_build_conv(gallivm, lp_float32_vec4_type(), type, tmps, num_pixels, &res, 1); return res; } assert(0); return lp_build_undef(gallivm, type); }
/** * Pack a single pixel. * * @param rgba 4 float vector with the unpacked components. * * XXX: This is mostly for reference and testing -- operating a single pixel at * a time is rarely if ever needed. */ LLVMValueRef lp_build_pack_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef rgba) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef type; LLVMValueRef packed = NULL; LLVMValueRef swizzles[4]; LLVMValueRef shifted, casted, scaled, unswizzled; LLVMValueRef shifts[4]; LLVMValueRef scales[4]; boolean normalized; unsigned shift; unsigned i, j; assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); /* Unswizzle the color components into the source vector. */ for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (desc->swizzle[j] == i) break; } if (j < 4) swizzles[i] = lp_build_const_int32(gallivm, j); else swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); } unswizzled = LLVMBuildShuffleVector(builder, rgba, LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), LLVMConstVector(swizzles, 4), ""); normalized = FALSE; shift = 0; for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned mask = (1 << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); assert(bits < 32); shifts[i] = lp_build_const_int32(gallivm, shift); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; } if (normalized) scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); else scaled = unswizzled; casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); /* Bitwise or all components */ for (i = 0; i < 4; ++i) { if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, lp_build_const_int32(gallivm, i), ""); if (packed) packed = LLVMBuildOr(builder, packed, component, ""); else packed = component; } } if (!packed) packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); if (desc->block.bits < 32) packed = LLVMBuildTrunc(builder, packed, type, ""); return packed; }
/** * Non-interleaved pack. * * This will move values as * * lo = __ l0 __ l1 __ l2 __.. __ ln * hi = __ h0 __ h1 __ h2 __.. __ hn * res = l0 l1 l2 .. ln h0 h1 h2 .. hn * * This will only change the number of bits the values are represented, not the * values themselves. * * It is assumed the values are already clamped into the destination type range. * Values outside that range will produce undefined results. Use * lp_build_packs2 instead. */ LLVMValueRef lp_build_pack2(struct gallivm_state *gallivm, struct lp_type src_type, struct lp_type dst_type, LLVMValueRef lo, LLVMValueRef hi) { LLVMBuilderRef builder = gallivm->builder; #if HAVE_LLVM < 0x0207 LLVMTypeRef src_vec_type = lp_build_vec_type(gallivm, src_type); #endif LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, dst_type); LLVMValueRef shuffle; LLVMValueRef res = NULL; assert(!src_type.floating); assert(!dst_type.floating); assert(src_type.width == dst_type.width * 2); assert(src_type.length * 2 == dst_type.length); /* Check for special cases first */ if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) { switch(src_type.width) { case 32: if(dst_type.sign) { #if HAVE_LLVM >= 0x0207 res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", dst_vec_type, lo, hi); #else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi); #endif } else { if (util_cpu_caps.has_sse4_1) { return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi); } else { /* use generic shuffle below */ res = NULL; } } break; case 16: if(dst_type.sign) #if HAVE_LLVM >= 0x0207 res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", dst_vec_type, lo, hi); #else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi); #endif else #if HAVE_LLVM >= 0x0207 res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", dst_vec_type, lo, hi); #else res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi); #endif break; default: assert(0); return LLVMGetUndef(dst_vec_type); break; } if (res) { res = LLVMBuildBitCast(builder, res, dst_vec_type, ""); return res; } }
/** * Unpack a single pixel into its RGBA components. * * @param desc the pixel format for the packed pixel value * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM * * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. */ static INLINE LLVMValueRef lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, const struct util_format_description *desc, LLVMValueRef packed) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef shifted, casted, scaled, masked; LLVMValueRef shifts[4]; LLVMValueRef masks[4]; LLVMValueRef scales[4]; boolean normalized; boolean needs_uitofp; unsigned shift; unsigned i; /* TODO: Support more formats */ assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); assert(desc->block.width == 1); assert(desc->block.height == 1); assert(desc->block.bits <= 32); /* Do the intermediate integer computations with 32bit integers since it * matches floating point size */ assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); /* Broadcast the packed value to all four channels * before: packed = BGRA * after: packed = {BGRA, BGRA, BGRA, BGRA} */ packed = LLVMBuildInsertElement(builder, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), packed, LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), ""); packed = LLVMBuildShuffleVector(builder, packed, LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), ""); /* Initialize vector constants */ normalized = FALSE; needs_uitofp = FALSE; shift = 0; /* Loop over 4 color components */ for (i = 0; i < 4; ++i) { unsigned bits = desc->channel[i].size; if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); } else { unsigned long long mask = (1ULL << bits) - 1; assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); if (bits == 32) { needs_uitofp = TRUE; } shifts[i] = lp_build_const_int32(gallivm, shift); masks[i] = lp_build_const_int32(gallivm, mask); if (desc->channel[i].normalized) { scales[i] = lp_build_const_float(gallivm, 1.0 / mask); normalized = TRUE; } else scales[i] = lp_build_const_float(gallivm, 1.0); } shift += bits; } /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} * into masked = {B, G, R, A} */ shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); if (!needs_uitofp) { /* UIToFP can't be expressed in SSE2 */ casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } else { casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); } /* At this point 'casted' may be a vector of floats such as * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized * we'll scale this to {1.0, 1.0, 1.0, 1.0}. */ if (normalized) scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); else scaled = casted; return scaled; }
static void llvm_emit_tex( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) { struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef args[7]; unsigned c, sampler_src; struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); if (emit_data->inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { switch (emit_data->inst->Instruction.Opcode) { case TGSI_OPCODE_TXQ: { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->uses_tex_buffers = true; bool isEgPlus = (ctx->chip_class >= EVERGREEN); LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, isEgPlus ? 0 : 1); LLVMValueRef cvecval = llvm_load_const_buffer(bld_base, offset, LLVM_R600_BUFFER_INFO_CONST_BUFFER); if (!isEgPlus) { LLVMValueRef maskval[4] = { lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 2), lp_build_const_int32(gallivm, 3), lp_build_const_int32(gallivm, 0), }; LLVMValueRef mask = LLVMConstVector(maskval, 4); cvecval = LLVMBuildShuffleVector(gallivm->builder, cvecval, cvecval, mask, ""); } emit_data->output[0] = cvecval; return; } case TGSI_OPCODE_TXF: { args[0] = LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""); args[1] = lp_build_const_int32(gallivm, R600_MAX_CONST_BUFFERS); emit_data->output[0] = build_intrinsic(gallivm->builder, "llvm.R600.load.texbuf", emit_data->dst_type, args, 2, LLVMReadNoneAttribute); if (ctx->chip_class >= EVERGREEN) return; ctx->uses_tex_buffers = true; LLVMDumpValue(emit_data->output[0]); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.int_elem_type, 4), ""); LLVMValueRef Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 0), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildBitCast(gallivm->builder, Mask, LLVMVectorType(bld_base->base.int_elem_type, 4), ""); emit_data->output[0] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_AND, emit_data->output[0], Mask); LLVMValueRef WComponent = LLVMBuildExtractElement(gallivm->builder, emit_data->output[0], lp_build_const_int32(gallivm, 3), ""); Mask = llvm_load_const_buffer(bld_base, lp_build_const_int32(gallivm, 1), LLVM_R600_BUFFER_INFO_CONST_BUFFER); Mask = LLVMBuildExtractElement(gallivm->builder, Mask, lp_build_const_int32(gallivm, 0), ""); Mask = LLVMBuildBitCast(gallivm->builder, Mask, bld_base->base.int_elem_type, ""); WComponent = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_OR, WComponent, Mask); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], WComponent, lp_build_const_int32(gallivm, 3), ""); emit_data->output[0] = LLVMBuildBitCast(gallivm->builder, emit_data->output[0], LLVMVectorType(bld_base->base.elem_type, 4), ""); } return; default: break; } } if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TEX || emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXP) { LLVMValueRef Vector[4] = { LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 0), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 1), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 2), ""), LLVMBuildExtractElement(gallivm->builder, emit_data->args[0], lp_build_const_int32(gallivm, 3), ""), }; switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; case TGSI_TEXTURE_1D: Vector[1] = Vector[2] = Vector[3] = LLVMGetUndef(bld_base->base.elem_type); break; default: break; } args[0] = lp_build_gather_values(gallivm, Vector, 4); } else { args[0] = emit_data->args[0]; } assert(emit_data->arg_count + 2 <= Elements(args)); for (c = 1; c < emit_data->arg_count; ++c) args[c] = emit_data->args[c]; if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF) { args[1] = LLVMBuildShl(gallivm->builder, args[1], lp_build_const_int32(gallivm, 1), ""); args[2] = LLVMBuildShl(gallivm->builder, args[2], lp_build_const_int32(gallivm, 1), ""); args[3] = LLVMBuildShl(gallivm->builder, args[3], lp_build_const_int32(gallivm, 1), ""); } sampler_src = emit_data->inst->Instruction.NumSrcRegs-1; args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index + R600_MAX_CONST_BUFFERS); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Src[sampler_src].Register.Index); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXF && (emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || emit_data->inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { switch (emit_data->inst->Texture.Texture) { case TGSI_TEXTURE_2D_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D); break; case TGSI_TEXTURE_2D_ARRAY_MSAA: args[6] = lp_build_const_int32(gallivm, TGSI_TEXTURE_2D_ARRAY); break; default: break; } if (ctx->has_compressed_msaa_texturing) { LLVMValueRef ldptr_args[10] = { args[0], // Coord args[1], // Offset X args[2], // Offset Y args[3], // Offset Z args[4], args[5], lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1), lp_build_const_int32(gallivm, 1) }; LLVMValueRef ptr = build_intrinsic(gallivm->builder, "llvm.R600.ldptr", emit_data->dst_type, ldptr_args, 10, LLVMReadNoneAttribute); LLVMValueRef Tmp = LLVMBuildExtractElement(gallivm->builder, args[0], lp_build_const_int32(gallivm, 3), ""); Tmp = LLVMBuildMul(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 4), ""); LLVMValueRef ResX = LLVMBuildExtractElement(gallivm->builder, ptr, lp_build_const_int32(gallivm, 0), ""); ResX = LLVMBuildBitCast(gallivm->builder, ResX, bld_base->base.int_elem_type, ""); Tmp = LLVMBuildLShr(gallivm->builder, ResX, Tmp, ""); Tmp = LLVMBuildAnd(gallivm->builder, Tmp, lp_build_const_int32(gallivm, 0xF), ""); args[0] = LLVMBuildInsertElement(gallivm->builder, args[0], Tmp, lp_build_const_int32(gallivm, 3), ""); args[c++] = lp_build_const_int32(gallivm, emit_data->inst->Texture.Texture); } } emit_data->output[0] = build_intrinsic(gallivm->builder, action->intr_name, emit_data->dst_type, args, c, LLVMReadNoneAttribute); if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((emit_data->inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) if (emit_data->inst->Dst[0].Register.WriteMask & 4) { LLVMValueRef offset = lp_build_const_int32(bld_base->base.gallivm, 0); LLVMValueRef ZLayer = LLVMBuildExtractElement(gallivm->builder, llvm_load_const_buffer(bld_base, offset, CONSTANT_TXQ_BUFFER), lp_build_const_int32(gallivm, 0), ""); emit_data->output[0] = LLVMBuildInsertElement(gallivm->builder, emit_data->output[0], ZLayer, lp_build_const_int32(gallivm, 2), ""); struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); ctx->has_txq_cube_array_z_comp = true; } }