コード例 #1
0
ファイル: JumpTables.cpp プロジェクト: Sineaggi/mcsema
void doJumpIndexTableViaSwitch(
        BasicBlock *&block, 
        InstPtr ip)
{
    Function *F = block->getParent();
    Module *M = F->getParent();
    // we know this conforms to
    // movzx reg32, [base+disp]

    // sanity check
    const MCInst &inst = ip->get_inst();
    const MCOperand& dest = OP(0);
    const MCOperand& base = OP(1);

    TASSERT(base.isReg(), "Conformant jump index tables need base to be a register");
    TASSERT(dest.isReg(), "Conformant jump index tables need to write to a register");

    JumpIndexTablePtr idxptr = ip->get_jump_index_table();

    // to ensure no negative entries
    Value *adjustment = CONST_V<32>(block, idxptr->getInitialEntry());
    Value *reg_val = R_READ<32>(block, base.getReg());
    Value *real_index = 
        BinaryOperator::Create(Instruction::Add, adjustment, reg_val, "", block);
   
    BasicBlock *continueBlock = 
        BasicBlock::Create(block->getContext(), "", F, 0);

    // create a default block that just traps
    BasicBlock *defaultBlock = 
        BasicBlock::Create(block->getContext(), "", F, 0);
    Function *trapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
    CallInst::Create(trapFn, "", defaultBlock);
    BranchInst::Create(continueBlock, defaultBlock);
    // end default block

    const std::vector<uint8_t> &idxblocks = idxptr->getJumpIndexTable();


    // create a switch inst
    SwitchInst *theSwitch = SwitchInst::Create(
            real_index, 
            defaultBlock,
            idxblocks.size(),
            block);

    // populate switch
    int myindex = 0;
    for(std::vector<uint8_t>::const_iterator itr = idxblocks.begin();
        itr != idxblocks.end();
        itr++) 
    {
        BasicBlock *writeBl = emitJumpIndexWrite(F, *itr, dest.getReg(), continueBlock );
        theSwitch->addCase(CONST_V<32>(block, myindex), writeBl);
        ++myindex;
    }

    // new block to write to is continue block
    block = continueBlock;
}
コード例 #2
0
ファイル: cfg_recover.cpp プロジェクト: 0xDEC0DE8/mcsema
// assume the immediate references code if:
// * we are dealing with a fully linked ELF
// * The immediate is in the range of a valid code or data section
static bool setHeuristicRef(ExecutableContainer *c,
        InstPtr I,
        int opnum,
        stack<VA> &funcs,
        raw_ostream &out,
        const std::string whichInst)
{
    MCOperand op;
    std::string imp_name;
    ElfTarget *elft = dynamic_cast<ElfTarget*>(c);
    op = I->get_inst().getOperand(opnum);
    LASSERT(op.isImm(), "No immediate operand for " + whichInst);
    VA imm = op.getImm();


    if(elft && elft->isLinked()) {
       if (elft->is_in_code(imm)) {
            // this instruction references code
            I->set_call_tgt(imm);
            // make sure we disassemble at this new address
            funcs.push(imm);
            out << "Found new function entry from " << whichInst << ": " << to_string<VA>(imm, hex) << "\n";
            return true;
       } else if (elft->is_in_data(imm)) {
            out << "Adding local data ref to: " << to_string<VA>(imm, hex) << "\n";
            I->set_data_offset(imm);
       } else if (c->find_import_name(imm, imp_name)) {
           out << "Import name is: " << imp_name << "\n";
       }
    }

    return false;
}
コード例 #3
0
ファイル: cfg_recover.cpp プロジェクト: 0xDEC0DE8/mcsema
bool dataInCodeHeuristic(
        ExecutableContainer *c,
        InstPtr             I,
        uint32_t            addr,
        list<VA>           &funcs,
		uint32_t 			relocSize)
{
    // detect SEH handler
   if(I->get_inst().getOpcode() == X86::PUSHi32) {
       uint32_t dw1;
       uint8_t *ptr = (uint8_t*)&dw1;
       c->readByte(addr+0, ptr+0);
       c->readByte(addr+1, ptr+1);
       c->readByte(addr+2, ptr+2);
       c->readByte(addr+3, ptr+3);
       if(dw1 == 0xFFFFFFFE) {
           llvm::outs() << "WARNING: Heuristically detected SEH handler at: "
               << to_string<VA>(addr, hex) << "\n";
           return treatCodeAsData(c, addr, 0x28, funcs);
       }
   } else {
	   return treatCodeAsData(c, addr, relocSize, funcs);
   }

   return false;

}
コード例 #4
0
ファイル: cfg_recover.cpp プロジェクト: andrnag/mcsema
// return true if this instruction
// branches via a memory lookup
static bool isBranchViaMemory(InstPtr inst) {

    switch(inst->get_inst().getOpcode()) {
        case X86::JMP32m:
        case X86::CALL32m:
            return true;
        default:
            return false;
    }
}
コード例 #5
0
ファイル: cfg_recover.cpp プロジェクト: andrnag/mcsema
static bool processJumpIndexTable(ExecutableContainer *c,
        NativeBlockPtr B,
        InstPtr jmpinst,
        const vector<VA> &jmptable_entries,
        raw_ostream &out) 
{
    // first, find which operand was the index 
    // register in jmpinst
    //
    const MCInst &inst = jmpinst->get_inst();
    int index_reg = regFromInst(inst);
    if(index_reg == -1) {
        out << "JMPINST does not use a register to index\n";
        return false;
    }

    // loop backwards through block looking for 
    // instructions that write to this register
    const std::list<InstPtr> &block_insts = B->get_insts();
    InstPtr write_reg_insn;
    for( std::list<InstPtr>::const_reverse_iterator itr = block_insts.rbegin();
        itr != block_insts.rend();
        itr++)
    {
        // check if we 'write to a register'
        if(writesToReg((*itr)->get_inst(), index_reg)) {
            write_reg_insn = *itr; 
            break;
        }
    }

    if(write_reg_insn == NULL) {
        out << "No instruction writes index register in the same basic block\n";
        return false;
    }

    out << "Found register index write instruction:\n"; 

    if(!parseJumpIndexTable(c, write_reg_insn, 
            jmptable_entries, out)) {
        out << "Could not parse jump index table, aborting\n";
        return false;
    }

    return true;

}
コード例 #6
0
ファイル: cfg_recover.cpp プロジェクト: andrnag/mcsema
static bool canInstructionReferenceCode( InstPtr inst) {
    switch(inst->get_inst().getOpcode()) {
        case X86::MOV32mi:      // writes to memory, but uses an immediate, which could be code
        case X86::MOV32o32a:    // writes imm32 to eax; probably code
        case X86::MOV32ri:      // writes imm32 to register, could be code
        case X86::PUSHi32:      // push an imm32, which could be code

        // need to check if mem references are valid here
        case X86::MOV32rm:      // writes mem to register, mem could be code?
        case X86::PUSH32rmm:    // push mem, which could be/have code
        //case X86::LEA32r:       // write address of mem to reg
            return true;

        default:
            return false;

    }
}
コード例 #7
0
ファイル: cfg_recover.cpp プロジェクト: andrnag/mcsema
NativeBlockPtr decodeBlock( ExecutableContainer *c, 
                            ExternalFunctionMap &f,
                            LLVMByteDecoder     &d,
                            stack<VA>           &blockChildren,
                            VA                  e,
                            stack<VA>           &funcs,
                            raw_ostream         &out)
{
  NativeBlockPtr  B = NativeBlockPtr(new NativeBlock(e, d.getPrinter()));
  VA              curAddr = e;
  bool            has_follow = true;

out << "Processing block: " << B->get_name() << "\n";
do
  {
    InstPtr I = d.getInstFromBuff(curAddr, c);

    //I, if a terminator, will have true and false targets 
    //filled in. I could be an indirect branch of some kind,
    //we will deal with that here. we will also deal with the 
    //instruction if it is a data instruction with relocation
   
    out << to_string<VA>(I->get_loc(), hex) << ":";
    out << I->printInst() << "\n";

    if(I->get_tr() != 0) {
      B->add_follow(I->get_tr());
      has_follow = false;
      out << "Adding block: " << to_string<VA>(I->get_tr(), hex) << "\n";
      blockChildren.push(I->get_tr());
    }

    if(I->get_fa() != 0) {
      B->add_follow(I->get_fa());
      has_follow = false;
      out << "Adding block: " << to_string<VA>(I->get_fa(), hex) << "\n";
      blockChildren.push(I->get_fa());
    }

    if(I->terminator()) {
      has_follow = false;
    }

    //do we need to add a data reference to this instruction?
    //again, because there is no offset information in the 
    //instruction decoder, for now we just ask if every addr
    //in the inst is relocated
    for(uint32_t i = 0; i < I->get_len(); i++) {
      VA addrInInst = curAddr+i;
      if(c->is_addr_relocated(addrInInst)) {
        VA  addr = 0;
        std::string has_imp;

        // this instruction has a relocation
        // save the relocation offset for later
        I->set_reloc_offset(i);

        //get the offset for this address
        //add it as a data offset to the instruction
        if (c->find_import_name(addrInInst, has_imp) )  {

            if(f.is_data(has_imp)) 
            {
                ExternalDataRefPtr data_p = makeExtDataRefFromString(has_imp, f);
                out << "Adding external data ref: " << has_imp << "\n";
                I->set_ext_data_ref(data_p);
            }
            else
            {
                ExternalCodeRefPtr code_p = makeExtCodeRefFromString(has_imp, f);
                LASSERT(code_p, "Failed to get ext call from map for symbol: "+has_imp);
                //maybe, this call doesn't return, in which case, 
                //we should kill the decoding of this flow
                if(code_p->getReturnType() == ExternalCodeRef::NoReturn) {
                    has_follow = false;
                }
                out << "Adding external code ref: " << has_imp << "\n";
                I->set_ext_call_target(code_p);
            }
                    
        } else if(c->relocate_addr(addrInInst, addr)) {
            bool can_ref_code = canInstructionReferenceCode(I);
            bool is_reloc_code = isAddrOfType(c, addr, ExecutableContainer::CodeSection);
            bool is_reloc_data = isAddrOfType(c, addr, ExecutableContainer::DataSection);
            unsigned opc = I->get_inst().getOpcode();

            if(isBranchViaMemory(I)) {
                out << "Detect branch via memory, relocation handled later\n";      
            }
            // this instruction can reference code and does
            // reference code
            // so we assume the code points to a function
            else if( can_ref_code && is_reloc_code ) {
                list<VA> new_funcs;
                if(dataInCodeHeuristic(c, I, addr, new_funcs)) {
                    // add new functions to our functions list
                    for(list<VA>::const_iterator nfi = new_funcs.begin();
                            nfi != new_funcs.end();
                            nfi++)
                    {
                        funcs.push(*nfi);
                    }

                    I->set_data_offset(addr);
                } else {
                    I->set_call_tgt(addr);
                    out << "Adding: 0x" << to_string<VA>(addr, hex) << " as target\n";
                    funcs.push(addr);
                }
            } 
            // this instruction can't reference code and points to .text
            // or references data. Treat as data element
            // TODO: extract this from .text and shove into .data?
            else if(( !can_ref_code && is_reloc_code) || is_reloc_data )
            {
              I->set_data_offset(addr);
            } else {
              out << "WARNING: relocation points to neither code nor data:" << to_string<VA>(addr, hex) << "\n";
            }

        } else {
            out << "*NOT* Relocating relocatable addr:" << to_string<uint32_t>(addrInInst, hex) << "\n";
        }
        break;
      }
    }

    //is this instruction an external call?
    //in a COFF binary, the pcrel call can refer to an 
    //external symbol that has been relocated
    //so, get the string that corresponds, and 
    //provide the translation using the function map
    MCOperand op;
    string  imp;
    switch(I->get_inst().getOpcode()) {
      case X86::JMP32m:
          {
            string  thunkSym;
            bool r = c->find_import_name(curAddr+2, thunkSym);
            if(r) {
                // this goes to an external API call
                out << "Adding external code ref via JMP: " << thunkSym << "\n";
                ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f);
                I->set_ext_call_target(p);
                has_follow = false;
            } else {
                // this is an internal jmp. probably a jump table.
                bool did_jmptable = handlePossibleJumpTable(c, B, I, curAddr, funcs, blockChildren, out); 

                LASSERT(did_jmptable, "JMP32m processing aborted: couldn't parse jumptable");
            }
          }
          break;
      case X86::CALLpcrel32:
        //this could be an external call in COFF, or not
        op = I->get_inst().getOperand(0);
        LASSERT(op.isImm(), "Nonsense for CALLpcrel32");
        if(op.getImm() !=0) {
          VA    callTgt = curAddr+op.getImm()+I->get_len();
          bool  foldFunc = false;
          //speculate about callTgt
          InstPtr spec = d.getInstFromBuff(callTgt, c);
          if(spec->terminator() && spec->get_inst().getOpcode() == X86::JMP32m) {
            string  thunkSym;
            bool r = c->find_import_name(callTgt+2, thunkSym);
            LASSERT(r, "Need to find thunk import addr");
            ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f);
            I->set_ext_call_target(p);
            foldFunc = true;
            if(p->getReturnType() == ExternalCodeRef::NoReturn) {
              has_follow = false;
            }
          }
          if(foldFunc == false) {
            //add this to our list of funcs to search
            funcs.push(callTgt);
          }
        } else {
          //check to see if this is an external call...
          if(I->has_ext_call_target() == false) {
            // may be a local call
            VA addr=curAddr+1, relo_addr=0;
            out << "Symbol not found, maybe a local call\n";
            if(c->relocate_addr(addr, relo_addr)){
                out << "Found local call to: " << to_string<VA>(relo_addr, hex) << "\n";
                I->set_call_tgt(relo_addr);
                out << "Adding: 0x" << to_string<VA>(relo_addr, hex) << " as target\n";
                funcs.push(relo_addr);
            } else {
                out << "Could not relocate addr for local call at: ";
                out << to_string<VA>(curAddr, hex) << "\n";
            }
          } else {
            out << "External call to: " << I->get_ext_call_target()->getSymbolName() << "\n";
          }
        }
        break;

      case X86::CALL32m:
        //this should be a call to an external, or we have no idea
        //so we need to try and look up the symbol that we're calling at this address...
        if(c->find_import_name(curAddr+2, imp)) {
          ExternalCodeRefPtr p = makeExtCodeRefFromString(imp, f);
          LASSERT(p, "Failed to get ext call from map for symbol"+imp);
          
          out << "Calling symbol: " << p->getSymbolName() << "\n";
          if(p->getReturnType() == ExternalCodeRef::NoReturn) {
            has_follow = false;
          }
          I->set_ext_call_target(p);
        } else {
          out << "Cannot find symbol at address ";
          out << to_string<VA>(curAddr, hex) << "\n";
        }
        break;
    }

    B->add_inst(I);
    curAddr += I->get_len();
  } while(has_follow);

  //we have built a basic block, it might contain
  //multiple calls, but it only has one terminator
  //which is either a ret or a branch
  return B;
}
コード例 #8
0
ファイル: cfg_recover.cpp プロジェクト: andrnag/mcsema
static bool handlePossibleJumpTable(ExecutableContainer *c,
        NativeBlockPtr B,
        InstPtr jmpinst, 
        VA curAddr, 
        stack<VA> &funcs,
        stack<VA> &blockChildren,
        raw_ostream &out) {

    LASSERT(jmpinst->get_inst().getOpcode() == X86::JMP32m, 
            "handlePossibleJumpTable needs a JMP32m opcode"  );

    // is this a jump table, step 0
    // does this instruction have a relocation?
    VA reloc_offset = jmpinst->get_reloc_offset();
    if (reloc_offset == 0)  {
        out << "Not a jump table: no relocation in JMP32m\n";
        // bail, this is not a jump table
        return false;
    }

    // this relocation has to point to a relocation

    VA addrInInst = curAddr + reloc_offset;
    VA jmpTableEntry, someFunction;
    if(!c->relocate_addr(addrInInst, jmpTableEntry)) {
        out << "Not a jump table: can't relocate relocation in JMP32m\n";
        // can't relocate, something bad happened
       return false; 
    }

    if(!c->relocate_addr(jmpTableEntry, someFunction)) {
        // could not relocate the default jump table entry.
        // not good
        out << "Not a jump table: can't relocate first jump table entry\n";
        return false;
    }

    bool is_reloc_code = isAddrOfType(c, someFunction, ExecutableContainer::CodeSection);
    if(!is_reloc_code) {
        // jump table entry not point to code
        out << "Not a jump table: first entry doesn't point to code\n";
        return false;
    }
     

    // read jump table entries and add them as new function
    // entry points
    vector<VA> jmptable_entries; 
    int new_funs;
    int original_zero;

    // this reads negative jump table indexes, but vectors are not negative
    // indexed. the negative most, which should be the new index 0, is now
    // index N. Reverse the vector so it will be index 0, and save the current
    // size as the original zeroth element
    new_funs = addJmpTableEntries(c, jmptable_entries, jmpTableEntry,  -4, out);
    std::reverse(jmptable_entries.begin(), jmptable_entries.end());
    out << "Added: " << to_string<int>(new_funs, dec) << " functions to jmptable\n";

    original_zero = new_funs;

    // add original entry at the zero position
    jmptable_entries.push_back(someFunction);
    out << "Added JMPTABLE entry [" << to_string<uint32_t>(jmpTableEntry, hex) 
        << "] => " << to_string<uint32_t>(someFunction, hex)  << "\n";

    // add the positive table entries
    new_funs = addJmpTableEntries(c, jmptable_entries, jmpTableEntry,  4, out);
    out << "Added: " << to_string<int>(new_funs, dec) << " functions to jmptable\n";

    // associate instruction with jump table
    JumpTable *jt = new JumpTable(jmptable_entries, original_zero);
    jmpinst->set_jump_table(JumpTablePtr(jt));

    stack<VA> *toPush = NULL;

    // if this jump table is in the format
    // jmp [reg*4+imm32], then it is conformant
    // and we can turn it into an llvm switch();
    bool is_conformant = isConformantJumpInst(jmpinst);
    if(is_conformant) {
        toPush = &blockChildren;
        out << "GOT A CONFORMANT JUMP INST\n";
    } else {
        toPush = &funcs;
    }

    // add these jump table entries as new entry points
    for(std::vector<VA>::const_iterator itr = jmptable_entries.begin();
            itr != jmptable_entries.end();
            itr++) 
    {
        out << "Adding block via jmptable: " << to_string<VA>(*itr, hex) << "\n";
        toPush->push(*itr);
        if(is_conformant) {
            B->add_follow(*itr);
        }
    }

    processJumpIndexTable(c, B, jmpinst, jmptable_entries, out);

    return true;

}