/** * Return mask ? a : b; * * mask is a TGSI_WRITEMASK_xxx. */ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, unsigned mask, LLVMValueRef a, LLVMValueRef b, unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; assert((mask & ~0xf) == 0); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); if(a == b) return a; if((mask & 0xf) == 0xf) return a; if((mask & 0xf) == 0x0) return b; if(a == bld->undef || b == bld->undef) return bld->undef; /* * There are two major ways of accomplishing this: * - with a shuffle * - with a select * * The flip between these is empirical and might need to be adjusted. */ if (n <= 4) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += num_channels) for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, (mask & (1 << i) ? 0 : n) + j + i, 0); return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); } else { LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); return lp_build_select(bld, mask_vec, a, b); } }
/** * Register store. */ static void emit_store( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, unsigned index, LLVMValueRef value) { const struct tgsi_full_dst_register *reg = &inst->Dst[index]; LLVMValueRef mask = NULL; LLVMValueRef ptr; /* * Saturate the value */ switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: break; case TGSI_SAT_ZERO_ONE: value = lp_build_max(&bld->base, value, bld->base.zero); value = lp_build_min(&bld->base, value, bld->base.one); break; case TGSI_SAT_MINUS_PLUS_ONE: value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); value = lp_build_min(&bld->base, value, bld->base.one); break; default: assert(0); } /* * Translate the register file */ assert(!reg->Register.Indirect); switch (reg->Register.File) { case TGSI_FILE_OUTPUT: ptr = bld->outputs[reg->Register.Index]; break; case TGSI_FILE_TEMPORARY: ptr = bld->temps[reg->Register.Index]; break; case TGSI_FILE_ADDRESS: ptr = bld->addr[reg->Indirect.Index]; break; case TGSI_FILE_PREDICATE: ptr = bld->preds[reg->Register.Index]; break; default: assert(0); return; } /* * Predicate */ if (inst->Instruction.Predicate) { LLVMValueRef pred; assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); pred = LLVMBuildLoad(bld->base.builder, bld->preds[inst->Predicate.Index], ""); /* * Convert the value to an integer mask. */ pred = lp_build_compare(bld->base.builder, bld->base.type, PIPE_FUNC_NOTEQUAL, pred, bld->base.zero); if (inst->Predicate.Negate) { pred = LLVMBuildNot(bld->base.builder, pred, ""); } pred = swizzle_aos(bld, pred, inst->Predicate.SwizzleX, inst->Predicate.SwizzleY, inst->Predicate.SwizzleZ, inst->Predicate.SwizzleW); if (mask) { mask = LLVMBuildAnd(bld->base.builder, mask, pred, ""); } else { mask = pred; } } /* * Writemask */ if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { LLVMValueRef writemask; writemask = lp_build_const_mask_aos(bld->base.type, reg->Register.WriteMask); if (mask) { mask = LLVMBuildAnd(bld->base.builder, mask, writemask, ""); } else { mask = writemask; } } if (mask) { LLVMValueRef orig_value; orig_value = LLVMBuildLoad(bld->base.builder, ptr, ""); value = lp_build_select(&bld->base, mask, value, orig_value); } LLVMBuildStore(bld->base.builder, value, ptr); }
/** * Swizzle one channel into other channels. */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel, unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) return a; assert(num_channels == 2 || num_channels == 4); /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ if (LLVMIsConstant(a) || type.width >= 16) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += num_channels) for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } else if (num_channels == 2) { /* * Bit mask and shifts * * XY XY .... XY <= input * 0Y 0Y .... 0Y * YY YY .... YY * YY YY .... YY <= output */ struct lp_type type2; LLVMValueRef tmp = NULL; int shift; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel, num_channels), ""); type2 = type; type2.floating = FALSE; type2.width *= 2; type2.length /= 2; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); /* * Vector element 0 is always channel X. * * 76 54 32 10 (array numbering) * Little endian reg in: YX YX YX YX * Little endian reg out: YY YY YY YY if shift right (shift == -1) * XX XX XX XX if shift left (shift == 1) * * 01 23 45 67 (array numbering) * Big endian reg in: XY XY XY XY * Big endian reg out: YY YY YY YY if shift left (shift == 1) * XX XX XX XX if shift right (shift == -1) * */ #ifdef PIPE_ARCH_LITTLE_ENDIAN shift = channel == 0 ? 1 : -1; #else shift = channel == 0 ? -1 : 1; #endif if (shift > 0) { tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); } else if (shift < 0) { tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); } assert(tmp); if (tmp) { a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } else { /* * Bit mask and recursive shifts * * Little-endian registers: * * 7654 3210 * WZYX WZYX .... WZYX <= input * 00Y0 00Y0 .... 00Y0 <= mask * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) * * Big-endian registers: * * 0123 4567 * XYZW XYZW .... XYZW <= input * 0Y00 0Y00 .... 0Y00 <= mask * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) * * shifts[] gives little-endian shift amounts; we need to negate for big-endian. */ struct lp_type type4; const int shifts[4][2] = { { 1, 2}, {-1, 2}, { 1, -2}, {-1, -2} }; unsigned i; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel, 4), ""); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); for(i = 0; i < 2; ++i) { LLVMValueRef tmp = NULL; int shift = shifts[channel][i]; /* See endianness diagram above */ #ifdef PIPE_ARCH_BIG_ENDIAN shift = -shift; #endif if(shift > 0) tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); if(shift < 0) tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); assert(tmp); if(tmp) a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } }
/** * Swizzle one channel into all other three channels. */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, unsigned channel) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; if(a == bld->undef || a == bld->zero || a == bld->one) return a; /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ if (type.width >= 16) { /* * Shuffle. */ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; for(j = 0; j < n; j += 4) for(i = 0; i < 4; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } else { /* * Bit mask and recursive shifts * * XYZW XYZW .... XYZW <= input * 0Y00 0Y00 .... 0Y00 * YY00 YY00 .... YY00 * YYYY YYYY .... YYYY <= output */ struct lp_type type4; const char shifts[4][2] = { { 1, 2}, {-1, 2}, { 1, -2}, {-1, -2} }; unsigned i; a = LLVMBuildAnd(builder, a, lp_build_const_mask_aos(bld->gallivm, type, 1 << channel), ""); /* * Build a type where each element is an integer that cover the four * channels. */ type4 = type; type4.floating = FALSE; type4.width *= 4; type4.length /= 4; a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); for(i = 0; i < 2; ++i) { LLVMValueRef tmp = NULL; int shift = shifts[channel][i]; #ifdef PIPE_ARCH_LITTLE_ENDIAN shift = -shift; #endif if(shift > 0) tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); if(shift < 0) tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); assert(tmp); if(tmp) a = LLVMBuildOr(builder, a, tmp, ""); } return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); } }