static void binaryCompareSet(State &state, const ControlFlowInst &cf, const AluInst &alu, const char *op) { // dst = (src0 op src1) ? 1 : 0 auto flags = getInstructionFlags(alu.op2.ALU_INST()); insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); state.out << "("; insertSource0(state, state.out, cf, alu); state.out << op; insertSource1(state, state.out, cf, alu); state.out << ") ? "; if ((flags & SQ_ALU_FLAG_INT_OUT) || (flags & SQ_ALU_FLAG_UINT_OUT)) { state.out << "1 : 0"; } else { state.out << "1.0f : 0.0f"; } insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void multiplyAdd(State &state, const ControlFlowInst &cf, const AluInst &alu, const char *modifier = nullptr) { // dst = (src0 * src1 + src2) modifier insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); if (modifier) { state.out << "("; } insertSource0(state, state.out, cf, alu); state.out << " * "; insertSource1(state, state.out, cf, alu); state.out << " + "; insertSource2(state, state.out, cf, alu); if (modifier) { state.out << ")" << modifier; } insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void KILL(State &state, const ControlFlowInst &cf) { insertLineStart(state); state.out << "discard;"; insertLineEnd(state); }
void insertElse(State &state) { insertLineStart(state); fmt::format_to(state.out, "if (stack[stackIndex - 1] == Active) {{"); insertLineEnd(state); increaseIndent(state); insertLineStart(state); fmt::format_to(state.out, "activeMask = (activeMask == Active) ? InactiveBranch : Active;"); insertLineEnd(state); decreaseIndent(state); insertLineStart(state); fmt::format_to(state.out, "}}"); insertLineEnd(state); }
static void binaryCompareKill(State &state, const ControlFlowInst &cf, const AluInst &alu, const char *op) { // if (src0 op src1) { discard; } else { dst = 0.0f; } auto flags = getInstructionFlags(alu.op2.ALU_INST()); insertLineStart(state); state.out << "if ("; insertSource0(state, state.out, cf, alu); state.out << op; insertSource1(state, state.out, cf, alu); state.out << ") {"; insertLineEnd(state); increaseIndent(state); insertLineStart(state); state.out << "discard;"; insertLineEnd(state); decreaseIndent(state); insertLineStart(state); state.out << "} else {"; insertLineEnd(state); increaseIndent(state); insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); if ((flags & SQ_ALU_FLAG_INT_OUT) || (flags & SQ_ALU_FLAG_UINT_OUT)) { state.out << "0"; } else { state.out << "0.0f"; } insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); decreaseIndent(state); insertLineStart(state); state.out << '}'; insertLineEnd(state); }
void condEnd(State &state) { decreaseIndent(state); insertLineStart(state); fmt::format_to(state.out, "}}"); insertLineEnd(state); }
void insertPush(State &state, unsigned count) { for (auto i = 0u; i < count; ++i) { insertLineStart(state); fmt::format_to(state.out, "PUSH(stack, stackIndex, activeMask);"); insertLineEnd(state); } }
static void LOOP_END(State &state, const ControlFlowInst &cf) { // TODO: LOOP_END has different behaviour depending on which LOOP_START // instruction started the loop, currently we only handle LOOP_START_DX10 auto &loopState = state.loopStack.top(); auto loopIndex = state.loopStack.size() - 1; // Sanity check to ensure we are at the cfPC decaf_check(state.cfPC == loopState.endPC); decaf_check((cf.word0.ADDR - 1) == loopState.startPC); state.loopStack.pop(); // If breakMask is set, lets break from the while insertLineStart(state); state.out << "if (activeMask == InactiveBreak) {"; insertLineEnd(state); increaseIndent(state); insertLineStart(state); state.out << "break;"; insertLineEnd(state); decreaseIndent(state); insertLineStart(state); state.out << "}"; insertLineEnd(state); // If ContinueMask is set, lets break from the while insertLineStart(state); state.out << "if (activeMask == InactiveContinue) {"; insertLineEnd(state); increaseIndent(state); insertLineStart(state); state.out << "activeMask = Active;"; insertLineEnd(state); decreaseIndent(state); insertLineStart(state); state.out << "}"; insertLineEnd(state); // Check the while condition but without checking loop masks decreaseIndent(state); insertLineStart(state); state.out << "} while ("; insertCond(state, cf.word1.COND()); state.out << ");"; insertLineEnd(state); insertPop(state); condEnd(state); }
void condStart(State &state, SQ_CF_COND cond) { insertLineStart(state); fmt::format_to(state.out, "if ("); insertCond(state, cond); fmt::format_to(state.out, ") {{"); insertLineEnd(state); increaseIndent(state); }
static void MOV(State &state, const ControlFlowInst &cf, const AluInst &alu) { // dst = src0 insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); insertSource0(state, state.out, cf, alu); insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void SIN(State &state, const ControlFlowInst &cf, const AluInst &alu) { // dst = sin(src0 / 0.1591549367) insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); state.out << "sin("; insertSource0(state, state.out, cf, alu); state.out << " / 0.1591549367)"; insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void MOVA_FLOOR(State &state, const ControlFlowInst &cf, const AluInst &alu) { // ar.x = dst = int(clamp(floor(src0), -256, 256)) insertLineStart(state); insertArDestBegin(state, cf, alu, state.unit); state.out << "int(clamp(floor("; insertSource0(state, state.out, cf, alu); state.out << "), -256, 256))"; insertArDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void LOOP_START_DX10(State &state, const ControlFlowInst &cf) { LoopState loop; loop.startPC = state.cfPC; loop.endPC = cf.word0.ADDR - 1; state.loopStack.emplace(loop); condStart(state, cf.word1.COND()); insertPush(state); insertLineStart(state); state.out << "do {"; insertLineEnd(state); increaseIndent(state); }
static void VTX_FETCH(State &state, const ControlFlowInst &cf, const VertexFetchInst &inst) { // FETCH R4.xyzw, R0.y, b131 NO_INDEX_OFFSET FMT_FROM_FETCH_CONSTANT MEGA(16) OFFSET(0) auto id = inst.word0.BUFFER_ID() + SQ_VS_RESOURCE_BASE; // For now we only support reading from vertex buffers (uniform blocks) decaf_assert(id >= SQ_VS_BUF_RESOURCE_0 && id < SQ_VS_GSOUT_RESOURCE, fmt::format("Unsupported VTX_FETCH buffer id {}", id)); // Let's only support a very expected set of values decaf_check(inst.word0.FETCH_TYPE() == SQ_VTX_FETCH_NO_INDEX_OFFSET); decaf_check(inst.word1.USE_CONST_FIELDS() == 1); decaf_check(inst.word2.OFFSET() == 0); decaf_check(inst.word2.MEGA_FETCH() && (inst.word0.MEGA_FETCH_COUNT() + 1) == 16); auto dstSelX = inst.word1.DST_SEL_X(); auto dstSelY = inst.word1.DST_SEL_Y(); auto dstSelZ = inst.word1.DST_SEL_Z(); auto dstSelW = inst.word1.DST_SEL_W(); auto numDstSels = 4u; auto dstSelMask = condenseSelections(dstSelX, dstSelY, dstSelZ, dstSelW, numDstSels); if (numDstSels > 0) { auto dst = getExportRegister(inst.gpr.DST_GPR(), inst.gpr.DST_REL()); auto src = getExportRegister(inst.word0.SRC_GPR(), inst.word0.SRC_REL()); inst.word0.SRC_SEL_X(); auto blockID = id - SQ_VS_BUF_RESOURCE_0; if (state.shader) { state.shader->usedUniformBlocks[blockID] = true; } fmt::MemoryWriter tmp; tmp << "UB_" << blockID << ".values[floatBitsToInt("; insertSelectValue(tmp, src, inst.word0.SRC_SEL_X()); tmp << ")]"; insertLineStart(state); state.out << dst << "." << dstSelMask << " = "; insertSelectVector(state.out, tmp.str(), dstSelX, dstSelY, dstSelZ, dstSelW, numDstSels); state.out << ";"; insertLineEnd(state); } }
static void binaryOperator(State &state, const ControlFlowInst &cf, const AluInst &alu, const std::string &op) { // dst = src0 op src1 insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); insertSource0(state, state.out, cf, alu); state.out << op; insertSource1(state, state.out, cf, alu); insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void unaryFunction(State &state, const ControlFlowInst &cf, const AluInst &alu, const std::string &func) { // dst = func(src0) insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); state.out << func << "("; insertSource0(state, state.out, cf, alu); state.out << ")"; insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void conditionalMove(State &state, const ControlFlowInst &cf, const AluInst &alu, const char *op) { // dst = (src0 op 0) ? src1 : src2 insertLineStart(state); insertDestBegin(state, cf, alu, state.unit); state.out << "("; insertSource0(state, state.out, cf, alu); state.out << op << "0) ? "; insertSource1(state, state.out, cf, alu); state.out << " : "; insertSource2(state, state.out, cf, alu); insertDestEnd(state, cf, alu); state.out << ';'; insertLineEnd(state); }
static void GET_TEXTURE_INFO(State &state, const latte::ControlFlowInst &cf, const latte::TextureFetchInst &inst) { auto dstSelX = inst.word1.DST_SEL_X(); auto dstSelY = inst.word1.DST_SEL_Y(); auto dstSelZ = inst.word1.DST_SEL_Z(); auto dstSelW = inst.word1.DST_SEL_W(); auto srcSelX = inst.word2.SRC_SEL_X(); auto srcSelY = inst.word2.SRC_SEL_Y(); auto srcSelZ = inst.word2.SRC_SEL_Z(); auto srcSelW = inst.word2.SRC_SEL_W(); auto resourceID = inst.word0.RESOURCE_ID(); // SAMPLER_ID is a don't care in this instruction, but we ensure that // textures and samplers use the same IDs, so we can safely use // RESOURCE_ID as the sampler ID. auto samplerID = resourceID; auto samplerDim = state.shader->samplerDim[samplerID]; registerSamplerID(state, samplerID, false); auto dst = getExportRegister(inst.word1.DST_GPR(), inst.word1.DST_REL()); auto src = getExportRegister(inst.word0.SRC_GPR(), inst.word0.SRC_REL()); // TODO: Which source component is used to select the LoD? Xenoblade has: // GET_TEXTURE_INFO R6.xy__, R4.xx0x, t4, s0 (with R4.x = 0) auto srcSelLod = srcSelX; // GET_TEXTURE_INFO returns {width, height, depth, mipmap count}, but GLSL // has separate functions for W/H/D and mipmap count, so we need to split // this up into two operations. auto numDstSels = 3u; SQ_SEL dummy = SQ_SEL::SEL_MASK; auto dstSelMask = condenseSelections(dstSelX, dstSelY, dstSelZ, dummy, numDstSels); if (numDstSels > 0) { auto samplerElements = getSamplerArgCount(samplerDim, false); insertLineStart(state); state.out << "texTmp.xyz = intBitsToFloat(ivec3(textureSize(sampler_" << samplerID; if (!getSamplerIsMsaa(samplerDim)) { state.out << ", floatBitsToInt("; insertSelectValue(state.out, src, srcSelLod); state.out << ")"; } state.out << ")"; for (auto i = samplerElements; i < 3; ++i) { state.out << ", 1"; } state.out << "));"; insertLineEnd(state); insertLineStart(state); state.out << dst << "." << dstSelMask; state.out << " = "; insertSelectVector(state.out, "texTmp", dstSelX, dstSelY, dstSelZ, SQ_SEL::SEL_MASK, numDstSels); state.out << ";"; insertLineEnd(state); } if (dstSelW != SQ_SEL::SEL_MASK) { insertLineStart(state); insertSelectValue(state.out, dst, dstSelW); state.out << " = intBitsToFloat(textureQueryLevels(sampler_" << samplerID << "));"; insertLineEnd(state); } }
static void sampleFunc(State &state, const latte::ControlFlowInst &cf, const latte::TextureFetchInst &inst, const std::string &func, const std::string &offsetFunc, bool isShadowOp = false, latte::SQ_SEL extraArg = latte::SQ_SEL::SEL_MASK, bool asInts = false) { auto dstSelX = inst.word1.DST_SEL_X(); auto dstSelY = inst.word1.DST_SEL_Y(); auto dstSelZ = inst.word1.DST_SEL_Z(); auto dstSelW = inst.word1.DST_SEL_W(); auto srcSelX = inst.word2.SRC_SEL_X(); auto srcSelY = inst.word2.SRC_SEL_Y(); auto srcSelZ = inst.word2.SRC_SEL_Z(); auto srcSelW = inst.word2.SRC_SEL_W(); int32_t offsetX = sign_extend<5>(inst.word2.OFFSET_X()); int32_t offsetY = sign_extend<5>(inst.word2.OFFSET_Y()); int32_t offsetZ = sign_extend<5>(inst.word2.OFFSET_Z()); auto resourceID = inst.word0.RESOURCE_ID(); auto samplerID = inst.word2.SAMPLER_ID(); auto samplerDim = state.shader->samplerDim[samplerID]; auto samplerUsage = registerSamplerID(state, samplerID, isShadowOp); if (resourceID != samplerID) { throw translate_exception("Unsupported sample with RESOURCE_ID != SAMPLER_ID"); } auto dst = getExportRegister(inst.word1.DST_GPR(), inst.word1.DST_REL()); auto src = getExportRegister(inst.word0.SRC_GPR(), inst.word0.SRC_REL()); auto numDstSels = 4u; auto dstSelMask = condenseSelections(dstSelX, dstSelY, dstSelZ, dstSelW, numDstSels); if (numDstSels > 0) { insertLineStart(state); auto samplerElements = getSamplerArgCount(samplerDim, isShadowOp); if (!isShadowOp) { state.out << "texTmp"; } else { state.out << "texTmp.x"; } state.out << " = "; bool writeOffsets = false; if (offsetX != 0 || offsetY != 0 || offsetZ != 0) { decaf_check(offsetFunc.size()); state.out << offsetFunc; writeOffsets = true; } else { decaf_check(func.size()); state.out << func; } state.out << "(sampler_" << samplerID << ", "; if (isShadowOp) { /* In r600 the .w channel holds the compare value whereas OpenGL * shadow samplers just expect it to be the last texture coordinate * so we must set the last channel to SQ_SEL::SEL_W */ if (samplerElements == 2) { srcSelY = srcSelW; } else if (samplerElements == 3) { srcSelZ = srcSelW; } else if (samplerElements == 4) { // The value will already be in place } else { decaf_abort(fmt::format("Unexpected samplerElements {} for shadow sampler", samplerElements)); } } if (asInts) { state.out << "floatBitsToInt("; } insertSelectVector(state.out, src, srcSelX, srcSelY, srcSelZ, srcSelW, samplerElements); if (asInts) { state.out << ")"; } switch (extraArg) { case latte::SQ_SEL::SEL_X: state.out << ", "; insertSelectValue(state.out, src, srcSelX); break; case latte::SQ_SEL::SEL_Y: state.out << ", "; insertSelectValue(state.out, src, srcSelY); break; case latte::SQ_SEL::SEL_Z: state.out << ", "; insertSelectValue(state.out, src, srcSelZ); break; case latte::SQ_SEL::SEL_W: state.out << ", "; insertSelectValue(state.out, src, srcSelW); break; case latte::SQ_SEL::SEL_0: state.out << ", 0"; break; case latte::SQ_SEL::SEL_1: state.out << ", 1"; break; } if (writeOffsets) { switch (samplerDim) { case latte::SQ_TEX_DIM::DIM_1D: case latte::SQ_TEX_DIM::DIM_1D_ARRAY: state.out << ", " << offsetX; break; case latte::SQ_TEX_DIM::DIM_2D: case latte::SQ_TEX_DIM::DIM_2D_ARRAY: case latte::SQ_TEX_DIM::DIM_2D_MSAA: case latte::SQ_TEX_DIM::DIM_2D_ARRAY_MSAA: state.out << ", ivec2(" << offsetX << ", " << offsetY << ")"; break; case latte::SQ_TEX_DIM::DIM_3D: state.out << ", ivec3(" << offsetX << ", " << offsetY << ", " << offsetZ << ")"; break; case latte::SQ_TEX_DIM::DIM_CUBEMAP: default: throw translate_exception(fmt::format("Unsupported sampler dim {}", static_cast<unsigned>(samplerDim))); } } if (getSamplerIsMsaa(samplerDim)) { // Write the sample number if this is an MSAA sampler state.out << ", 0"; } state.out << ");"; insertLineEnd(state); insertLineStart(state); state.out << dst << "." << dstSelMask; state.out << " = "; insertSelectVector(state.out, "texTmp", dstSelX, dstSelY, dstSelZ, dstSelW, numDstSels); state.out << ";"; insertLineEnd(state); } }