コード例 #1
0
ファイル: cfg_recover.cpp プロジェクト: 0xDEC0DE8/mcsema
// assume the immediate references code if:
// * we are dealing with a fully linked ELF
// * The immediate is in the range of a valid code or data section
static bool setHeuristicRef(ExecutableContainer *c,
        InstPtr I,
        int opnum,
        stack<VA> &funcs,
        raw_ostream &out,
        const std::string whichInst)
{
    MCOperand op;
    std::string imp_name;
    ElfTarget *elft = dynamic_cast<ElfTarget*>(c);
    op = I->get_inst().getOperand(opnum);
    LASSERT(op.isImm(), "No immediate operand for " + whichInst);
    VA imm = op.getImm();


    if(elft && elft->isLinked()) {
       if (elft->is_in_code(imm)) {
            // this instruction references code
            I->set_call_tgt(imm);
            // make sure we disassemble at this new address
            funcs.push(imm);
            out << "Found new function entry from " << whichInst << ": " << to_string<VA>(imm, hex) << "\n";
            return true;
       } else if (elft->is_in_data(imm)) {
            out << "Adding local data ref to: " << to_string<VA>(imm, hex) << "\n";
            I->set_data_offset(imm);
       } else if (c->find_import_name(imm, imp_name)) {
           out << "Import name is: " << imp_name << "\n";
       }
    }

    return false;
}
コード例 #2
0
ファイル: cfg_recover.cpp プロジェクト: andrnag/mcsema
NativeBlockPtr decodeBlock( ExecutableContainer *c, 
                            ExternalFunctionMap &f,
                            LLVMByteDecoder     &d,
                            stack<VA>           &blockChildren,
                            VA                  e,
                            stack<VA>           &funcs,
                            raw_ostream         &out)
{
  NativeBlockPtr  B = NativeBlockPtr(new NativeBlock(e, d.getPrinter()));
  VA              curAddr = e;
  bool            has_follow = true;

out << "Processing block: " << B->get_name() << "\n";
do
  {
    InstPtr I = d.getInstFromBuff(curAddr, c);

    //I, if a terminator, will have true and false targets 
    //filled in. I could be an indirect branch of some kind,
    //we will deal with that here. we will also deal with the 
    //instruction if it is a data instruction with relocation
   
    out << to_string<VA>(I->get_loc(), hex) << ":";
    out << I->printInst() << "\n";

    if(I->get_tr() != 0) {
      B->add_follow(I->get_tr());
      has_follow = false;
      out << "Adding block: " << to_string<VA>(I->get_tr(), hex) << "\n";
      blockChildren.push(I->get_tr());
    }

    if(I->get_fa() != 0) {
      B->add_follow(I->get_fa());
      has_follow = false;
      out << "Adding block: " << to_string<VA>(I->get_fa(), hex) << "\n";
      blockChildren.push(I->get_fa());
    }

    if(I->terminator()) {
      has_follow = false;
    }

    //do we need to add a data reference to this instruction?
    //again, because there is no offset information in the 
    //instruction decoder, for now we just ask if every addr
    //in the inst is relocated
    for(uint32_t i = 0; i < I->get_len(); i++) {
      VA addrInInst = curAddr+i;
      if(c->is_addr_relocated(addrInInst)) {
        VA  addr = 0;
        std::string has_imp;

        // this instruction has a relocation
        // save the relocation offset for later
        I->set_reloc_offset(i);

        //get the offset for this address
        //add it as a data offset to the instruction
        if (c->find_import_name(addrInInst, has_imp) )  {

            if(f.is_data(has_imp)) 
            {
                ExternalDataRefPtr data_p = makeExtDataRefFromString(has_imp, f);
                out << "Adding external data ref: " << has_imp << "\n";
                I->set_ext_data_ref(data_p);
            }
            else
            {
                ExternalCodeRefPtr code_p = makeExtCodeRefFromString(has_imp, f);
                LASSERT(code_p, "Failed to get ext call from map for symbol: "+has_imp);
                //maybe, this call doesn't return, in which case, 
                //we should kill the decoding of this flow
                if(code_p->getReturnType() == ExternalCodeRef::NoReturn) {
                    has_follow = false;
                }
                out << "Adding external code ref: " << has_imp << "\n";
                I->set_ext_call_target(code_p);
            }
                    
        } else if(c->relocate_addr(addrInInst, addr)) {
            bool can_ref_code = canInstructionReferenceCode(I);
            bool is_reloc_code = isAddrOfType(c, addr, ExecutableContainer::CodeSection);
            bool is_reloc_data = isAddrOfType(c, addr, ExecutableContainer::DataSection);
            unsigned opc = I->get_inst().getOpcode();

            if(isBranchViaMemory(I)) {
                out << "Detect branch via memory, relocation handled later\n";      
            }
            // this instruction can reference code and does
            // reference code
            // so we assume the code points to a function
            else if( can_ref_code && is_reloc_code ) {
                list<VA> new_funcs;
                if(dataInCodeHeuristic(c, I, addr, new_funcs)) {
                    // add new functions to our functions list
                    for(list<VA>::const_iterator nfi = new_funcs.begin();
                            nfi != new_funcs.end();
                            nfi++)
                    {
                        funcs.push(*nfi);
                    }

                    I->set_data_offset(addr);
                } else {
                    I->set_call_tgt(addr);
                    out << "Adding: 0x" << to_string<VA>(addr, hex) << " as target\n";
                    funcs.push(addr);
                }
            } 
            // this instruction can't reference code and points to .text
            // or references data. Treat as data element
            // TODO: extract this from .text and shove into .data?
            else if(( !can_ref_code && is_reloc_code) || is_reloc_data )
            {
              I->set_data_offset(addr);
            } else {
              out << "WARNING: relocation points to neither code nor data:" << to_string<VA>(addr, hex) << "\n";
            }

        } else {
            out << "*NOT* Relocating relocatable addr:" << to_string<uint32_t>(addrInInst, hex) << "\n";
        }
        break;
      }
    }

    //is this instruction an external call?
    //in a COFF binary, the pcrel call can refer to an 
    //external symbol that has been relocated
    //so, get the string that corresponds, and 
    //provide the translation using the function map
    MCOperand op;
    string  imp;
    switch(I->get_inst().getOpcode()) {
      case X86::JMP32m:
          {
            string  thunkSym;
            bool r = c->find_import_name(curAddr+2, thunkSym);
            if(r) {
                // this goes to an external API call
                out << "Adding external code ref via JMP: " << thunkSym << "\n";
                ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f);
                I->set_ext_call_target(p);
                has_follow = false;
            } else {
                // this is an internal jmp. probably a jump table.
                bool did_jmptable = handlePossibleJumpTable(c, B, I, curAddr, funcs, blockChildren, out); 

                LASSERT(did_jmptable, "JMP32m processing aborted: couldn't parse jumptable");
            }
          }
          break;
      case X86::CALLpcrel32:
        //this could be an external call in COFF, or not
        op = I->get_inst().getOperand(0);
        LASSERT(op.isImm(), "Nonsense for CALLpcrel32");
        if(op.getImm() !=0) {
          VA    callTgt = curAddr+op.getImm()+I->get_len();
          bool  foldFunc = false;
          //speculate about callTgt
          InstPtr spec = d.getInstFromBuff(callTgt, c);
          if(spec->terminator() && spec->get_inst().getOpcode() == X86::JMP32m) {
            string  thunkSym;
            bool r = c->find_import_name(callTgt+2, thunkSym);
            LASSERT(r, "Need to find thunk import addr");
            ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f);
            I->set_ext_call_target(p);
            foldFunc = true;
            if(p->getReturnType() == ExternalCodeRef::NoReturn) {
              has_follow = false;
            }
          }
          if(foldFunc == false) {
            //add this to our list of funcs to search
            funcs.push(callTgt);
          }
        } else {
          //check to see if this is an external call...
          if(I->has_ext_call_target() == false) {
            // may be a local call
            VA addr=curAddr+1, relo_addr=0;
            out << "Symbol not found, maybe a local call\n";
            if(c->relocate_addr(addr, relo_addr)){
                out << "Found local call to: " << to_string<VA>(relo_addr, hex) << "\n";
                I->set_call_tgt(relo_addr);
                out << "Adding: 0x" << to_string<VA>(relo_addr, hex) << " as target\n";
                funcs.push(relo_addr);
            } else {
                out << "Could not relocate addr for local call at: ";
                out << to_string<VA>(curAddr, hex) << "\n";
            }
          } else {
            out << "External call to: " << I->get_ext_call_target()->getSymbolName() << "\n";
          }
        }
        break;

      case X86::CALL32m:
        //this should be a call to an external, or we have no idea
        //so we need to try and look up the symbol that we're calling at this address...
        if(c->find_import_name(curAddr+2, imp)) {
          ExternalCodeRefPtr p = makeExtCodeRefFromString(imp, f);
          LASSERT(p, "Failed to get ext call from map for symbol"+imp);
          
          out << "Calling symbol: " << p->getSymbolName() << "\n";
          if(p->getReturnType() == ExternalCodeRef::NoReturn) {
            has_follow = false;
          }
          I->set_ext_call_target(p);
        } else {
          out << "Cannot find symbol at address ";
          out << to_string<VA>(curAddr, hex) << "\n";
        }
        break;
    }

    B->add_inst(I);
    curAddr += I->get_len();
  } while(has_follow);

  //we have built a basic block, it might contain
  //multiple calls, but it only has one terminator
  //which is either a ret or a branch
  return B;
}