void JitILBase::bx(UGeckoInstruction inst) { NORMALBRANCH_START INSTRUCTION_START; // We must always process the following sentence // even if the blocks are merged by PPCAnalyst::Flatten(). if (inst.LK) ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4)); // If this is not the last instruction of a block, // we will skip the rest process. // Because PPCAnalyst::Flatten() merged the blocks. if (!js.isLastInstruction) { return; } u32 destination; if (inst.AA) destination = SignExt26(inst.LI << 2); else destination = js.compilerPC + SignExt26(inst.LI << 2); if (destination == js.compilerPC) { ibuild.EmitShortIdleLoop(ibuild.EmitIntConst(js.compilerPC)); return; } ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); }
// Most functions that are relevant to analyze should be // called by another function. Therefore, let's scan the // entire space for bl operations and find what functions // get called. static void FindFunctionsFromBranches(u32 startAddr, u32 endAddr, SymbolDB *func_db) { for (u32 addr = startAddr; addr < endAddr; addr+=4) { UGeckoInstruction instr = (UGeckoInstruction)Memory::ReadUnchecked_U32(addr); if (PPCTables::IsValidInstruction(instr)) { switch (instr.OPCD) { case 18://branch instruction { if (instr.LK) //bl { u32 target = SignExt26(instr.LI << 2); if (!instr.AA) target += addr; if (Memory::IsRAMAddress(target)) { func_db->AddFunction(target); } } } break; default: break; } } } }
void SymbolMap::AnalyzeBackwards() { #ifndef BWLINKS return; #else for (int i=0; i<numEntries; i++) { u32 ptr = entries[i].vaddress; if (ptr) { if (entries[i].type == ST_FUNCTION) { for (int a = 0; a<entries[i].size/4; a++) { u32 inst = CMemory::ReadUncheckedu32(ptr); switch (inst>>26) { case 18: { if (LK) //LK { u32 addr; if(AA) addr = SignExt26(LI << 2); else addr = ptr + SignExt26(LI << 2); int funNum = SymbolMap::GetSymbolNum(addr); if (funNum>=0) entries[funNum].backwardLinks.push_back(ptr); } break; } default: ; } ptr+=4; } } } } #endif }
u32 EvaluateBranchTarget(UGeckoInstruction instr, u32 pc) { switch (instr.OPCD) { case 18://branch instruction { u32 target = SignExt26(instr.LI<<2); if (!instr.AA) target += pc; return target; } default: return INVALID_TARGET; } }
u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32 blockSize) { // Clear block stats memset(block->m_stats, 0, sizeof(BlockStats)); // Clear register stats block->m_gpa->any = true; block->m_fpa->any = false; block->m_gpa->Clear(); block->m_fpa->Clear(); // Set the blocks start address block->m_address = address; // Reset our block state block->m_broken = false; block->m_memory_exception = false; block->m_num_instructions = 0; if (address == 0) { // Memory exception occurred during instruction fetch block->m_memory_exception = true; return address; } if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && (address & JIT_ICACHE_VMEM_BIT)) { if (!Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION)) { // Memory exception occurred during instruction fetch block->m_memory_exception = true; return address; } } CodeOp *code = buffer->codebuffer; bool found_exit = false; u32 return_address = 0; u32 numFollows = 0; u32 num_inst = 0; for (u32 i = 0; i < blockSize; ++i) { UGeckoInstruction inst = JitInterface::ReadOpcodeJIT(address); if (inst.hex != 0) { num_inst++; memset(&code[i], 0, sizeof(CodeOp)); GekkoOPInfo *opinfo = GetOpInfo(inst); code[i].opinfo = opinfo; code[i].address = address; code[i].inst = inst; code[i].branchTo = -1; code[i].branchToIndex = -1; code[i].skip = false; block->m_stats->numCycles += opinfo->numCycles; SetInstructionStats(block, &code[i], opinfo, i); bool follow = false; u32 destination = 0; bool conditional_continue = false; // Do we inline leaf functions? if (HasOption(OPTION_LEAF_INLINE)) { if (inst.OPCD == 18 && blockSize > 1) { //Is bx - should we inline? yes! if (inst.AA) destination = SignExt26(inst.LI << 2); else destination = address + SignExt26(inst.LI << 2); if (destination != block->m_address) follow = true; } else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && (inst.BO & (1 << 4)) && (inst.BO & (1 << 2)) && return_address != 0) { // bclrx with unconditional branch = return follow = true; destination = return_address; return_address = 0; if (inst.LK) return_address = address + 4; } else if (inst.OPCD == 31 && inst.SUBOP10 == 467) { // mtspr const u32 index = (inst.SPRU << 5) | (inst.SPRL & 0x1F); if (index == SPR_LR) { // We give up to follow the return address // because we have to check the register usage. return_address = 0; } } // TODO: Find the optimal value for FUNCTION_FOLLOWING_THRESHOLD. // If it is small, the performance will be down. // If it is big, the size of generated code will be big and // cache clearning will happen many times. // TODO: Investivate the reason why // "0" is fastest in some games, MP2 for example. if (numFollows > FUNCTION_FOLLOWING_THRESHOLD) follow = false; } if (HasOption(OPTION_CONDITIONAL_CONTINUE)) { if (inst.OPCD == 16 && ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0)) { // bcx with conditional branch conditional_continue = true; } else if (inst.OPCD == 19 && inst.SUBOP10 == 16 && ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0 || (inst.BO & BO_DONT_CHECK_CONDITION) == 0)) { // bclrx with conditional branch conditional_continue = true; } else if (inst.OPCD == 3 || (inst.OPCD == 31 && inst.SUBOP10 == 4)) { // tw/twi tests and raises an exception conditional_continue = true; } else if (inst.OPCD == 19 && inst.SUBOP10 == 528 && (inst.BO_2 & BO_DONT_CHECK_CONDITION) == 0) { // Rare bcctrx with conditional branch // Seen in NES games conditional_continue = true; } } if (!follow) { address += 4; if (!conditional_continue && opinfo->flags & FL_ENDBLOCK) //right now we stop early { found_exit = true; break; } } // XXX: We don't support inlining yet. #if 0 else { numFollows++; // We don't "code[i].skip = true" here // because bx may store a certain value to the link register. // Instead, we skip a part of bx in Jit**::bx(). address = destination; merged_addresses[size_of_merged_addresses++] = address; } #endif } else { // ISI exception or other critical memory exception occured (game over) ERROR_LOG(DYNA_REC, "Instruction hex was 0!"); break; } } block->m_num_instructions = num_inst; if (block->m_num_instructions > 1) ReorderInstructions(block->m_num_instructions, code); if ((!found_exit && num_inst > 0) || blockSize == 1) { // We couldn't find an exit block->m_broken = true; } // Scan for flag dependencies; assume the next block (or any branch that can leave the block) // wants flags, to be safe. bool wantsCR0 = true, wantsCR1 = true, wantsFPRF = true, wantsCA = true; BitSet32 fprInUse, gprInUse, gprInReg, fprInXmm; for (int i = block->m_num_instructions - 1; i >= 0; i--) { bool opWantsCR0 = code[i].wantsCR0; bool opWantsCR1 = code[i].wantsCR1; bool opWantsFPRF = code[i].wantsFPRF; bool opWantsCA = code[i].wantsCA; code[i].wantsCR0 = wantsCR0 || code[i].canEndBlock; code[i].wantsCR1 = wantsCR1 || code[i].canEndBlock; code[i].wantsFPRF = wantsFPRF || code[i].canEndBlock; code[i].wantsCA = wantsCA || code[i].canEndBlock; wantsCR0 |= opWantsCR0 || code[i].canEndBlock; wantsCR1 |= opWantsCR1 || code[i].canEndBlock; wantsFPRF |= opWantsFPRF || code[i].canEndBlock; wantsCA |= opWantsCA || code[i].canEndBlock; wantsCR0 &= !code[i].outputCR0 || opWantsCR0; wantsCR1 &= !code[i].outputCR1 || opWantsCR1; wantsFPRF &= !code[i].outputFPRF || opWantsFPRF; wantsCA &= !code[i].outputCA || opWantsCA; code[i].gprInUse = gprInUse; code[i].fprInUse = fprInUse; code[i].gprInReg = gprInReg; code[i].fprInXmm = fprInXmm; // TODO: if there's no possible endblocks or exceptions in between, tell the regcache // we can throw away a register if it's going to be overwritten later. gprInUse |= code[i].regsIn; gprInReg |= code[i].regsIn; fprInUse |= code[i].fregsIn; if (strncmp(code[i].opinfo->opname, "stfd", 4)) fprInXmm |= code[i].fregsIn; // For now, we need to count output registers as "used" though; otherwise the flush // will result in a redundant store (e.g. store to regcache, then store again to // the same location later). gprInUse |= code[i].regsOut; if (code[i].fregOut >= 0) fprInUse[code[i].fregOut] = true; } // Forward scan, for flags that need the other direction for calculation. BitSet32 fprIsSingle, fprIsDuplicated, fprIsStoreSafe; for (u32 i = 0; i < block->m_num_instructions; i++) { code[i].fprIsSingle = fprIsSingle; code[i].fprIsDuplicated = fprIsDuplicated; code[i].fprIsStoreSafe = fprIsStoreSafe; if (code[i].fregOut >= 0) { fprIsSingle[code[i].fregOut] = false; fprIsDuplicated[code[i].fregOut] = false; fprIsStoreSafe[code[i].fregOut] = false; // Single, duplicated, and doesn't need PPC_FP. if (code[i].opinfo->type == OPTYPE_SINGLEFP) { fprIsSingle[code[i].fregOut] = true; fprIsDuplicated[code[i].fregOut] = true; fprIsStoreSafe[code[i].fregOut] = true; } // Single and duplicated, but might be a denormal (not safe to skip PPC_FP). // TODO: if we go directly from a load to store, skip conversion entirely? // TODO: if we go directly from a load to a float instruction, and the value isn't used // for anything else, we can skip PPC_FP on a load too. if (!strncmp(code[i].opinfo->opname, "lfs", 3)) { fprIsSingle[code[i].fregOut] = true; fprIsDuplicated[code[i].fregOut] = true; } // Paired are still floats, but the top/bottom halves may differ. if (code[i].opinfo->type == OPTYPE_PS || code[i].opinfo->type == OPTYPE_LOADPS) { fprIsSingle[code[i].fregOut] = true; fprIsStoreSafe[code[i].fregOut] = true; } // Careful: changing the float mode in a block breaks this optimization, since // a previous float op might have had had FTZ off while the later store has FTZ // on. So, discard all information we have. if (!strncmp(code[i].opinfo->opname, "mtfs", 4)) fprIsStoreSafe = BitSet32(0); } } return address; }