/** * Generate pow(x, y) */ LLVMValueRef lp_build_pow(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef y) { /* TODO: optimize the constant case */ if(LLVMIsConstant(x) && LLVMIsConstant(y)) debug_printf("%s: inefficient/imprecise constant arithmetic\n", __FUNCTION__); return lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2(bld, x), y)); }
/** * Generate code to compute texture level of detail (lambda). * \param ddx partial derivatives of (s, t, r, q) with respect to X * \param ddy partial derivatives of (s, t, r, q) with respect to Y * \param lod_bias optional float vector with the shader lod bias * \param explicit_lod optional float vector with the explicit lod * \param width scalar int texture width * \param height scalar int texture height * \param depth scalar int texture depth * * XXX: The resulting lod is scalar, so ignore all but the first element of * derivatives, lod_bias, etc that are passed by the shader. */ void lp_build_lod_selector(struct lp_build_sample_context *bld, unsigned unit, const LLVMValueRef ddx[4], const LLVMValueRef ddy[4], LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ unsigned mip_filter, LLVMValueRef *out_lod_ipart, LLVMValueRef *out_lod_fpart) { LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context *float_bld = &bld->float_bld; LLVMValueRef lod; *out_lod_ipart = bld->int_bld.zero; *out_lod_fpart = bld->float_bld.zero; if (bld->static_state->min_max_lod_equal) { /* User is forcing sampling from a particular mipmap level. * This is hit during mipmap generation. */ LLVMValueRef min_lod = bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); lod = min_lod; } else { LLVMValueRef sampler_lod_bias = bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit); LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0); if (explicit_lod) { lod = LLVMBuildExtractElement(builder, explicit_lod, index0, ""); } else { LLVMValueRef rho; rho = lp_build_rho(bld, unit, ddx, ddy); /* * Compute lod = log2(rho) */ if (!lod_bias && !bld->static_state->lod_bias_non_zero && !bld->static_state->apply_max_lod && !bld->static_state->apply_min_lod) { /* * Special case when there are no post-log2 adjustments, which * saves instructions but keeping the integer and fractional lod * computations separate from the start. */ if (mip_filter == PIPE_TEX_MIPFILTER_NONE || mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { *out_lod_ipart = lp_build_ilog2(float_bld, rho); *out_lod_fpart = bld->float_bld.zero; return; } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR && !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); return; } } if (0) { lod = lp_build_log2(float_bld, rho); } else { lod = lp_build_fast_log2(float_bld, rho); } /* add shader lod bias */ if (lod_bias) { lod_bias = LLVMBuildExtractElement(builder, lod_bias, index0, ""); lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias"); } } /* add sampler lod bias */ if (bld->static_state->lod_bias_non_zero) lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias"); /* clamp lod */ if (bld->static_state->apply_max_lod) { LLVMValueRef max_lod = bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit); lod = lp_build_min(float_bld, lod, max_lod); } if (bld->static_state->apply_min_lod) { LLVMValueRef min_lod = bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit); lod = lp_build_max(float_bld, lod, min_lod); } } if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) { lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR, out_lod_ipart, out_lod_fpart); } else { lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart); } lp_build_name(*out_lod_fpart, "lod_fpart"); } else { *out_lod_ipart = lp_build_iround(float_bld, lod); } lp_build_name(*out_lod_ipart, "lod_ipart"); return; }
/** * Emit LLVM for one TGSI instruction. * \param return TRUE for success, FALSE otherwise */ boolean lp_emit_instruction_aos( struct lp_build_tgsi_aos_context *bld, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, int *pc) { LLVMValueRef src0, src1, src2; LLVMValueRef tmp0, tmp1; LLVMValueRef dst0 = NULL; /* * Stores and write masks are handled in a general fashion after the long * instruction opcode switch statement. * * Although not stricitly necessary, we avoid generating instructions for * channels which won't be stored, in cases where's that easy. For some * complex instructions, like texture sampling, it is more convenient to * assume a full writemask and then let LLVM optimization passes eliminate * redundant code. */ (*pc)++; assert(info->num_dst <= 1); if (info->num_dst) { dst0 = bld->bld_base.base.undef; } switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_floor(&bld->bld_base.base, src0); break; case TGSI_OPCODE_MOV: dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); break; case TGSI_OPCODE_LIT: return FALSE; case TGSI_OPCODE_RCP: /* TGSI_OPCODE_RECIP */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_rcp(&bld->bld_base.base, src0); break; case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0); dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_EXP: return FALSE; case TGSI_OPCODE_LOG: return FALSE; case TGSI_OPCODE_MUL: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_ADD: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_add(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_DP3: /* TGSI_OPCODE_DOT3 */ return FALSE; case TGSI_OPCODE_DP4: /* TGSI_OPCODE_DOT4 */ return FALSE; case TGSI_OPCODE_DST: return FALSE; case TGSI_OPCODE_MIN: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_max(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_MAX: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_max(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_SLT: /* TGSI_OPCODE_SETLT */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SGE: /* TGSI_OPCODE_SETGE */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_MAD: /* TGSI_OPCODE_MADD */ src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_SUB: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_LRP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_CND: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1); break; case TGSI_OPCODE_DP2A: return FALSE; case TGSI_OPCODE_FRC: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_floor(&bld->bld_base.base, src0); dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); break; case TGSI_OPCODE_CLAMP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); break; case TGSI_OPCODE_FLR: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_floor(&bld->bld_base.base, src0); break; case TGSI_OPCODE_ROUND: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_round(&bld->bld_base.base, src0); break; case TGSI_OPCODE_EX2: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS); dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_LG2: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); dst0 = lp_build_log2(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_POW: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); break; case TGSI_OPCODE_XPD: return FALSE; case TGSI_OPCODE_RCC: /* deprecated? */ assert(0); return FALSE; case TGSI_OPCODE_DPH: return FALSE; case TGSI_OPCODE_COS: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); dst0 = lp_build_cos(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_DDX: return FALSE; case TGSI_OPCODE_DDY: return FALSE; case TGSI_OPCODE_KILP: /* predicated kill */ return FALSE; case TGSI_OPCODE_KIL: /* conditional kill */ return FALSE; case TGSI_OPCODE_PK2H: return FALSE; break; case TGSI_OPCODE_PK2US: return FALSE; break; case TGSI_OPCODE_PK4B: return FALSE; break; case TGSI_OPCODE_PK4UB: return FALSE; case TGSI_OPCODE_RFL: return FALSE; case TGSI_OPCODE_SEQ: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SFL: dst0 = bld->bld_base.base.zero; break; case TGSI_OPCODE_SGT: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SIN: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); dst0 = lp_build_sin(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_SLE: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_SNE: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); break; case TGSI_OPCODE_STR: dst0 = bld->bld_base.base.one; break; case TGSI_OPCODE_TEX: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); break; case TGSI_OPCODE_TXD: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); break; case TGSI_OPCODE_UP2H: /* deprecated */ assert (0); return FALSE; break; case TGSI_OPCODE_UP2US: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_UP4B: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_UP4UB: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_X2D: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_ARA: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_ARR: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_round(&bld->bld_base.base, src0); break; case TGSI_OPCODE_BRA: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_CAL: return FALSE; case TGSI_OPCODE_RET: return FALSE; case TGSI_OPCODE_END: *pc = -1; break; case TGSI_OPCODE_SSG: /* TGSI_OPCODE_SGN */ tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); break; case TGSI_OPCODE_CMP: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); break; case TGSI_OPCODE_SCS: return FALSE; case TGSI_OPCODE_TXB: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); break; case TGSI_OPCODE_NRM: /* fall-through */ case TGSI_OPCODE_NRM4: return FALSE; case TGSI_OPCODE_DIV: /* deprecated */ assert(0); return FALSE; break; case TGSI_OPCODE_DP2: return FALSE; case TGSI_OPCODE_TXL: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); break; case TGSI_OPCODE_TXP: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); break; case TGSI_OPCODE_BRK: return FALSE; case TGSI_OPCODE_IF: return FALSE; case TGSI_OPCODE_BGNLOOP: return FALSE; case TGSI_OPCODE_BGNSUB: return FALSE; case TGSI_OPCODE_ELSE: return FALSE; case TGSI_OPCODE_ENDIF: return FALSE; case TGSI_OPCODE_ENDLOOP: return FALSE; case TGSI_OPCODE_ENDSUB: return FALSE; case TGSI_OPCODE_PUSHA: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_POPA: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_CEIL: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_ceil(&bld->bld_base.base, src0); break; case TGSI_OPCODE_I2F: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_NOT: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_TRUNC: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_trunc(&bld->bld_base.base, src0); break; case TGSI_OPCODE_SHL: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_ISHR: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_AND: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_OR: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_MOD: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_XOR: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_SAD: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_TXF: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_TXQ: /* deprecated? */ assert(0); return FALSE; break; case TGSI_OPCODE_CONT: return FALSE; case TGSI_OPCODE_EMIT: return FALSE; break; case TGSI_OPCODE_ENDPRIM: return FALSE; break; case TGSI_OPCODE_NOP: break; default: return FALSE; } if (info->num_dst) { lp_emit_store_aos(bld, inst, 0, dst0); } return TRUE; }