Example #1
0
// Basic idea of the disassembly + discovery:
//
// start with the wanted address, insert it in the worklist
// while worklist not empty, take next address in the worklist:
// - check if atom exists there
//   - if middle of atom:
//     - split basic blocks referencing the atom
//     - look for an already encountered BBInfo (using a map<atom, bbinfo>)
//       - if there is, split it (new one, fallthrough, move succs, etc..)
//   - if start of atom: nothing else to do
//   - if no atom: create new atom and new bbinfo
// - look at the last instruction in the atom, add succs to worklist
// for all elements in the worklist:
// - create basic block, update preds/succs, etc..
//
void MCObjectDisassembler::disassembleFunctionAt(
    MCModule *Module, MCFunction *MCFN, uint64_t BBBeginAddr,
    AddressSetTy &CallTargets, AddressSetTy &TailCallTargets) {
  std::map<uint64_t, BBInfo> BBInfos;

  typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;

  AddrWorklistTy Worklist;

  DEBUG(dbgs() << "Starting CFG at " << utohexstr(BBBeginAddr) << "\n");

  Worklist.insert(BBBeginAddr);
  for (size_t wi = 0; wi < Worklist.size(); ++wi) {
    const uint64_t BeginAddr = Worklist[wi];

    AddrPrettyStackTraceEntry X(BeginAddr, "Basic Block");

    DEBUG(dbgs() << "Looking for block at " << utohexstr(BeginAddr) << "\n");

    // Look for a BB at BeginAddr.
    auto BeforeIt = std::upper_bound(
        BBInfos.begin(), BBInfos.end(), BeginAddr,
        [](uint64_t Addr, const std::pair<uint64_t, BBInfo> &BBI) {
          return Addr < BBI.second.BeginAddr+BBI.second.SizeInBytes;
        });

    assert((BeforeIt == BBInfos.end() || BeforeIt->first != BeginAddr) &&
           "Visited same basic block twice!");

    // Found a BB containing BeginAddr, we have to split it.
    if (BeforeIt != BBInfos.end() && BeforeIt->first < BeginAddr) {

      BBInfo &BeforeBB = BeforeIt->second;
      DEBUG(dbgs() << "Found block at " << utohexstr(BeforeBB.BeginAddr)
                   << ", needs splitting at " << utohexstr(BeginAddr) << "\n");

      assert(BeginAddr < BeforeBB.BeginAddr + BeforeBB.SizeInBytes &&
             "Address isn't inside block?");

      BBInfo &NewBB = BBInfos[BeginAddr];
      NewBB.BeginAddr = BeginAddr;

      auto SplitInst = BeforeBB.Insts.end();
      for (auto I = BeforeBB.Insts.begin(), E = BeforeBB.Insts.end(); I != E;
           ++I) {
        if (BeginAddr == I->Address) {
          SplitInst = I;
          break;
        }
      }

      assert(SplitInst != BeforeBB.Insts.end() &&
             "Split point does not fall on an instruction boundary!");

      // FIXME: use a list instead for free splicing?

      // Splice the remaining instructions to the new block.
      // While SplitInst is still valid, decrease the size to match.
      const uint64_t SplitOffset = SplitInst->Address - BeforeBB.BeginAddr;
      NewBB.SizeInBytes = BeforeBB.SizeInBytes - SplitOffset;
      BeforeBB.SizeInBytes = SplitOffset;

      // Now do the actual splicing out of BeforeBB.
      NewBB.Insts.insert(NewBB.Insts.begin(), SplitInst, BeforeBB.Insts.end());
      BeforeBB.Insts.erase(SplitInst, BeforeBB.Insts.end());

      // Move the successors to the new block.
      std::swap(NewBB.SuccAddrs, BeforeBB.SuccAddrs);

      BeforeBB.SuccAddrs.push_back(BeginAddr);
    } else {
      // If we didn't find a BB, then we have to disassemble to create one!
      const MemoryRegion &Region = getRegionFor(BeginAddr);
      if (Region.Bytes.empty())
        report_fatal_error(("No suitable region for disassembly at 0x" +
                            utohexstr(BeginAddr)).c_str());
      const uint64_t EndRegion = Region.Addr + Region.Bytes.size();

      uint64_t EndAddr = EndRegion;

      // We want to stop before the next BB and have a fallthrough to it.
      if (BeforeIt != BBInfos.end())
        EndAddr = std::min(EndAddr, BeforeIt->first);

      BBInfo &BBI = BBInfos[BeginAddr];
      BBI.BeginAddr = BeginAddr;

      assert(BBI.Insts.empty() && "Basic Block already exists!");

      DEBUG(dbgs() << "No existing block found, starting disassembly from "
                   << utohexstr(Region.Addr) << " to "
                   << utohexstr(Region.Addr + Region.Bytes.size()) << "\n");

      auto AddInst = [&](MCInst &I, uint64_t Addr, uint64_t Size) {
        const uint64_t NextAddr = BBI.BeginAddr + BBI.SizeInBytes;
        assert(NextAddr == Addr);
        BBI.Insts.emplace_back(I, NextAddr, Size);
        BBI.SizeInBytes += Size;
      };

      uint64_t InstSize;

      for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
        MCInst Inst;
        if (Dis.getInstruction(Inst, InstSize,
                               Region.Bytes.slice(Addr - Region.Addr), Addr,
                               nulls(), nulls())) {
          AddInst(Inst, Addr, InstSize);
        } else {
          DEBUG(dbgs() << "Failed disassembly at " << utohexstr(Addr) << "!\n");
          break;
        }

        uint64_t BranchTarget;
        if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
          DEBUG(dbgs() << "Found branch to " << utohexstr(BranchTarget)
                       << "!\n");
          if (MIA.isCall(Inst)) {
            DEBUG(dbgs() << "Found call!\n");
            CallTargets.push_back(BranchTarget);
          }
        }

        if (MIA.isTerminator(Inst)) {
          DEBUG(dbgs() << "Found terminator!\n");
          // Now we have a complete basic block, add successors.

          // Add the fallthrough block, and mark it for visiting.
          if (MIA.isConditionalBranch(Inst)) {
            BBI.SuccAddrs.push_back(Addr + InstSize);
            Worklist.insert(Addr + InstSize);
          }

          // If the terminator is a branch, add the target block.
          if (MIA.isBranch(Inst)) {
            uint64_t BranchTarget;
            if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
              StringRef ExtFnName;
              if (MOS &&
                  !(ExtFnName = MOS->findExternalFunctionAt(BranchTarget))
                       .empty()) {
                TailCallTargets.push_back(BranchTarget);
                CallTargets.push_back(BranchTarget);
              } else {
                BBI.SuccAddrs.push_back(BranchTarget);
                Worklist.insert(BranchTarget);
              }
            }
          }
          break;
        }
      }
    }
  }

  // First, create all blocks.
  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
    const uint64_t BeginAddr = Worklist[wi];
    BBInfo *BBI = &BBInfos[BeginAddr];
    MCBasicBlock *&MCBB = BBI->BB;

    MCBB = &MCFN->createBlock(BeginAddr);

    std::swap(MCBB->Insts, BBI->Insts);
    MCBB->InstCount = MCBB->Insts.size();
    MCBB->SizeInBytes = BBI->SizeInBytes;
  }

  // Next, add all predecessors/successors.
  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
    const uint64_t BeginAddr = Worklist[wi];
    BBInfo *BBI = &BBInfos[BeginAddr];
    MCBasicBlock *&MCBB = BBI->BB;
    RemoveDupsFromAddressVector(BBI->SuccAddrs);
    for (uint64_t Address : BBI->SuccAddrs) {
      MCBasicBlock *Succ = BBInfos[Address].BB;
      assert(Succ && "Couldn't find block successor?!");
      // FIXME: Sort the succs/preds at the end?
      MCBB->Successors.push_back(Succ);
      Succ->Predecessors.push_back(MCBB);
    }
  }
}
// Basic idea of the disassembly + discovery:
//
// start with the wanted address, insert it in the worklist
// while worklist not empty, take next address in the worklist:
// - check if atom exists there
//   - if middle of atom:
//     - split basic blocks referencing the atom
//     - look for an already encountered BBInfo (using a map<atom, bbinfo>)
//       - if there is, split it (new one, fallthrough, move succs, etc..)
//   - if start of atom: nothing else to do
//   - if no atom: create new atom and new bbinfo
// - look at the last instruction in the atom, add succs to worklist
// for all elements in the worklist:
// - create basic block, update preds/succs, etc..
//
MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
                                            uint64_t BBBeginAddr,
                                            AddressSetTy &CallTargets,
                                            AddressSetTy &TailCallTargets) {
  typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
  typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
  BBInfoByAddrTy BBInfos;
  AddrWorklistTy Worklist;

  Worklist.insert(BBBeginAddr);
  for (size_t wi = 0; wi < Worklist.size(); ++wi) {
    const uint64_t BeginAddr = Worklist[wi];
    BBInfo *BBI = &BBInfos[BeginAddr];

    MCTextAtom *&TA = BBI->Atom;
    assert(!TA && "Discovered basic block already has an associated atom!");

    // Look for an atom at BeginAddr.
    if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
      // FIXME: We don't care about mixed atoms, see above.
      TA = cast<MCTextAtom>(A);

      // The found atom doesn't begin at BeginAddr, we have to split it.
      if (TA->getBeginAddr() != BeginAddr) {
        // FIXME: Handle overlapping atoms: middle-starting instructions, etc..
        MCTextAtom *NewTA = TA->split(BeginAddr);

        // Look for an already encountered basic block that needs splitting
        BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
        if (It != BBInfos.end() && It->second.Atom) {
          BBI->SuccAddrs = It->second.SuccAddrs;
          It->second.SuccAddrs.clear();
          It->second.SuccAddrs.push_back(BeginAddr);
        }
        TA = NewTA;
      }
      BBI->Atom = TA;
    } else {
      // If we didn't find an atom, then we have to disassemble to create one!

      MemoryObject *Region = getRegionFor(BeginAddr);
      if (!Region)
        llvm_unreachable(("Couldn't find suitable region for disassembly at " +
                          utostr(BeginAddr)).c_str());

      uint64_t InstSize;
      uint64_t EndAddr = Region->getBase() + Region->getExtent();

      // We want to stop before the next atom and have a fallthrough to it.
      if (MCTextAtom *NextAtom =
              cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
        EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());

      for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
        MCInst Inst;
        if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
                               nulls())) {
          if (!TA)
            TA = Module->createTextAtom(Addr, Addr);
          TA->addInst(Inst, InstSize);
        } else {
          // We don't care about splitting mixed atoms either.
          llvm_unreachable("Couldn't disassemble instruction in atom.");
        }

        uint64_t BranchTarget;
        if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
          if (MIA.isCall(Inst))
            CallTargets.push_back(BranchTarget);
        }

        if (MIA.isTerminator(Inst))
          break;
      }
      BBI->Atom = TA;
    }

    assert(TA && "Couldn't disassemble atom, none was created!");
    assert(TA->begin() != TA->end() && "Empty atom!");

    MemoryObject *Region = getRegionFor(TA->getBeginAddr());
    assert(Region && "Couldn't find region for already disassembled code!");
    uint64_t EndRegion = Region->getBase() + Region->getExtent();

    // Now we have a basic block atom, add successors.
    // Add the fallthrough block.
    if ((MIA.isConditionalBranch(TA->back().Inst) ||
         !MIA.isTerminator(TA->back().Inst)) &&
        (TA->getEndAddr() + 1 < EndRegion)) {
      BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
      Worklist.insert(TA->getEndAddr() + 1);
    }

    // If the terminator is a branch, add the target block.
    if (MIA.isBranch(TA->back().Inst)) {
      uint64_t BranchTarget;
      if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
                             TA->back().Size, BranchTarget)) {
        StringRef ExtFnName;
        if (MOS)
          ExtFnName =
              MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
        if (!ExtFnName.empty()) {
          TailCallTargets.push_back(BranchTarget);
          CallTargets.push_back(BranchTarget);
        } else {
          BBI->SuccAddrs.push_back(BranchTarget);
          Worklist.insert(BranchTarget);
        }
      }
    }
  }

  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
    const uint64_t BeginAddr = Worklist[wi];
    BBInfo *BBI = &BBInfos[BeginAddr];

    assert(BBI->Atom && "Found a basic block without an associated atom!");

    // Look for a basic block at BeginAddr.
    BBI->BB = MCFN->find(BeginAddr);
    if (BBI->BB) {
      // FIXME: check that the succs/preds are the same
      continue;
    }
    // If there was none, we have to create one from the atom.
    BBI->BB = &MCFN->createBlock(*BBI->Atom);
  }

  for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
    const uint64_t BeginAddr = Worklist[wi];
    BBInfo *BBI = &BBInfos[BeginAddr];
    MCBasicBlock *BB = BBI->BB;

    RemoveDupsFromAddressVector(BBI->SuccAddrs);
    for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
         SE = BBI->SuccAddrs.end();
         SE != SE; ++SI) {
      MCBasicBlock *Succ = BBInfos[*SI].BB;
      BB->addSuccessor(Succ);
      Succ->addPredecessor(BB);
    }
  }

  assert(BBInfos[Worklist[0]].BB &&
         "No basic block created at requested address?");

  return BBInfos[Worklist[0]].BB;
}