Beispiel #1
0
static
bool isAddrRelocated(const object::SectionRef &sr, uint32_t offt, VA address) {

    llvm::object::relocation_iterator rit = sr.begin_relocations();
    llvm::error_code    e;

    while( rit != sr.end_relocations() ) {
      llvm::object::RelocationRef   rref = *rit;
      llvm::object::SymbolRef     symref;
      VA                          addr = 0;

      e = rref.getAddress((::uint64_t &)addr);
      LASSERT(!e, e.message()); 

      e = rref.getSymbol(symref);
      LASSERT(!e, e.message());

      if( address == (offt+addr) ) {
        StringRef symname;
        //get the symbol for the address?
        llvm::object::SymbolRef::Type t;
        uint32_t                      flag;
        //check and see if the symbols type is a global ... 
        e = symref.getType(t);
        LASSERT(!e, e.message());
        e = symref.getFlags(flag);
        LASSERT(!e, e.message());


        symref.getName(symname);

        SmallString<32> relocType;
        e = rref.getTypeName(relocType);
        LASSERT(!e, e.message());
        // shortcut for ELF relocations by type
        // TODO: move this to ELF speific code
        if(relocType == "R_386_32" ||
           relocType == "R_386_PC32") {
            return true;
        }


        bool t1 = (t == llvm::object::SymbolRef::ST_Data);
        bool t2 = 0 != (flag | llvm::object::SymbolRef::SF_Global);

        if( (t1 && t2) || 
            (t == llvm::object::SymbolRef::ST_Other) ||
            (t == llvm::object::SymbolRef::ST_Unknown) )
        {
          return true;
        }
      }

      rit.increment(e);

      LASSERT(!e, e.message());
    }

  return false;
}
Beispiel #2
0
static bool find_import_for_addr(object::SectionRef section, uint32_t offt, uint32_t target,
                                 std::string &import_name) {

    llvm::object::relocation_iterator rit = section.relocation_begin();
    std::error_code ec;

    while( rit != section.relocation_end() ) {
        llvm::object::SymbolRef       symref;
        VA                            addr = 0;

        ec = rit->getAddress((::uint64_t &)addr);
        LASSERT(!ec, "Can't get address for relocation ref");
        llvm::dbgs() << "\t" << __FUNCTION__ << ": Testing " << to_string<VA>(target, hex)
                     << " vs. " << to_string<VA>(addr+offt, hex) << "\n";

        if( target == (addr+offt) ) {

            llvm::object::SymbolRef       symref;
            symref = *rit->getSymbol();

            llvm::StringRef strr;
            ec = symref.getName(strr);
            LASSERT(!ec, "Can't get name for symbol ref");

            import_name = strr.str();
            llvm::dbgs() << "Found symbol named: " << import_name << "\n";

            ::uint64_t sym_addr;
            ec = symref.getAddress(sym_addr);
            if(ec) {
                llvm::dbgs() << "Could not get address of symbol: " << import_name << "\n";
            } else {
                llvm::dbgs() << "Address for " << import_name
                             << " is: " << to_string< ::uint64_t >(sym_addr, hex) << "\n";
            }

            llvm::object::SymbolRef::Type symtype;
            ec = symref.getType(symtype);
            switch(symtype) {
            case llvm::object::SymbolRef::ST_Unknown:
            case llvm::object::SymbolRef::ST_Data:
            case llvm::object::SymbolRef::ST_Function:
                if( sym_addr == (::uint64_t)(-1) ) {
                    return true;
                } else {
                    llvm::dbgs() << "Skipping symbol due to address\n";
                }
                break;
            default:
                llvm::dbgs() << "Skipping symbol since its probably not an import!" << "\n";
            }
        }

        ++rit;
    }


    return false;
}
Beispiel #3
0
void Disassembler::setSection(const object::SectionRef Section) {
  StringRef Bytes;
  uint64_t SectAddr, SectSize;
  std::error_code ec = Section.getContents(Bytes);
  if (ec) {
    printError(ec.message());
    return;
  }
  ec = Section.getAddress(SectAddr);
  if (ec) {
    printError(ec.message());
    return;
  }
  ec = Section.getSize(SectSize);
  if (ec) {
    printError(ec.message());
    return;
  }

  CurSection = Section;
  CurSectionEnd = SectAddr + SectSize;
  CurSectionMemory = new StringRefMemoryObject(Bytes, SectAddr);
  StringRef SectionName;
  CurSection.getName(SectionName);
  printInfo("Setting Section " + std::string(SectionName.data()));
  // TODO: Add section relocations (if ncessary).
  // Make a list of all the relocations for this section.
  // error_code ec;
  // std::vector<object::RelocationRef> Rels;
  // for (relocation_iterator ri = Section.begin_relocations(), re =
  //     Section.end_relocations(); ri != re; ri.increment(ec)) {
  //   if (error(ec))
  //     break;
  //   Rels.push_back(*ri);
  // }

  // Sort relocations by address.
  // std::sort(Rels.begin(), Rels.end(), relocAddressLess);

  // std::vector<RelocationRef>::const_iterator rel_cur = Rels.begin();
  // std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
}
Beispiel #4
0
Function* Decompiler::decompileFunction(unsigned Address) {
  // Check that Address is inside the current section.
  // TODO: Find a better way to do this check. What we really care about is
  // avoiding reads to library calls and areas of memory we can't "see".
  const object::SectionRef Sect = Dis->getCurrentSection();
  uint64_t SectStart, SectEnd;
  Sect.getAddress(SectStart);
  Sect.getSize(SectEnd);
  SectEnd += SectStart;
  if (Address < SectStart || Address > SectEnd) {
    errs() << "Address out of bounds for section (is this a library call?): "
           << format("%1" PRIx64, Address) << "\n";
    return NULL;
  }

  MachineFunction *MF = Dis->disassemble(Address);

  // Get Function Name
  // TODO: Determine Function Type
  FunctionType *FType = FunctionType::get(Type::getPrimitiveType(*Context,
      Type::VoidTyID), false);
  Function *F =
    cast<Function>(Mod->getOrInsertFunction(MF->getName(), FType));

  if (!F->empty()) {
    return F;
  }

  // Create a basic block to hold entry point (alloca) information
  BasicBlock *entry = getOrCreateBasicBlock("entry", F);

  // For each basic block
  MachineFunction::iterator BI = MF->begin(), BE = MF->end();
  while (BI != BE) {
    // Add branch from "entry"
    if (BI == MF->begin()) {
      entry->getInstList().push_back(
        BranchInst::Create(getOrCreateBasicBlock(BI->getName(), F)));
    } else {
      getOrCreateBasicBlock(BI->getName(), F);
    }
    ++BI;
  }

  BI = MF->begin();
  while (BI != BE) {
    if (decompileBasicBlock(BI, F) == NULL) {
      printError("Unable to decompile basic block!");
    }
    ++BI;
  }

  // During Decompilation, did any "in-between" basic blocks get created?
  // Nothing ever splits the entry block, so we skip it.
  for (Function::iterator I = ++F->begin(), E = F->end(); I != E; ++I) {
    if (!(I->empty())) {
      continue;
    }
    // Right now, the only way to get the right offset is to parse its name
    // it sucks, but it works.
    StringRef Name = I->getName();
    if (Name == "end" || Name == "entry") continue; // these can be empty

    size_t Off = F->getName().size() + 1;
    size_t Size = Name.size() - Off;
    StringRef BBAddrStr = Name.substr(Off, Size);
    unsigned long long BBAddr;
    getAsUnsignedInteger(BBAddrStr, 10, BBAddr);
    BBAddr += Address;
    DEBUG(errs() << "Split Target: " << Name << "\t Address: "
                 << BBAddr << "\n");
    // split Block at AddrStr
    Function::iterator SB;      // Split basic block
    BasicBlock::iterator SI, SE;    // Split instruction
    // Note the ++, nothing ever splits the entry block.
    for (SB = ++F->begin(); SB != E; ++SB) {
      DEBUG(outs() << "SB: " << SB->getName()
        << "\tRange: " << Dis->getDebugOffset(SB->begin()->getDebugLoc())
        << " " << Dis->getDebugOffset(SB->getTerminator()->getDebugLoc())
        << "\n");
      if (SB->empty() || BBAddr < getBasicBlockAddress(SB)
        || BBAddr > Dis->getDebugOffset(SB->getTerminator()->getDebugLoc())) {
        continue;
      }
      // Reorder instructions based on Debug Location
      sortBasicBlock(SB);
      DEBUG(errs() << "Found Split Block: " << SB->getName() << "\n");
      // Find iterator to split on.
      for (SI = SB->begin(), SE = SB->end(); SI != SE; ++SI) {
        // outs() << "SI: " << SI->getDebugLoc().getLine() << "\n";
        if (Dis->getDebugOffset(SI->getDebugLoc()) == BBAddr) break;
        if (Dis->getDebugOffset(SI->getDebugLoc()) > BBAddr) {
          errs() << "Could not find address inside basic block!\n"
                 << "SI: " << Dis->getDebugOffset(SI->getDebugLoc()) << "\n"
                 << "BBAddr: " << BBAddr << "\n";
          break;
        }
      }
      break;
    }
    if (!SB || SI == SE || SB == E) {
      errs() << "Decompiler: Failed to find instruction offset in function!\n";
      continue;
    }
    // outs() << SB->getName() << " " << SI->getName() << "\n";
    // outs() << "Creating Block...";
    splitBasicBlockIntoBlock(SB, SI, I);
  }

  // Clean up unnecessary stores and loads
  FunctionPassManager FPM(Mod);
  // FPM.add(createPromoteMemoryToRegisterPass()); // See Scalar.h for more.
  FPM.add(createTypeRecoveryPass());
  FPM.run(*F);

  return F;
}