void visit(SgNode *node) {
     SgAsmBlock *block = isSgAsmBlock(node);
     if (block && !analyzer->is_vertex_filtered(block)) {
         block->get_successors().clear();
         block->set_successors_complete(false);
     }
 }
/*
 * Converts blocks to functions (is not part of Jeremiahs disassembler - and also IDA)
 * deprecated!
 */
void
RoseBin_FlowAnalysis::convertBlocksToFunctions(SgAsmNode* globalNode) {
  vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmBlock);
  vector<SgNode*>::iterator itV = tree.begin();
  //cerr << " ObjDump-BinRose:: Converting Blocks To Functions" << endl;
  for (;itV!=tree.end();itV++) {
    SgAsmBlock* block = isSgAsmBlock(*itV);
    if (block && block!=globalNode) {
      uint64_t addr = block->get_address();
      isSgAsmBlock(globalNode)->remove_statement(block);
      block->set_parent(NULL);
      SgAsmFunction* func = new SgAsmFunction(addr, RoseBin_support::HexToString(addr));
      ROSE_ASSERT(g_algo->info);
      g_algo->info->returnTargets[func].insert(g_algo->info->returnTargets[block].begin(), g_algo->info->returnTargets[block].end());
      isSgAsmBlock(globalNode)->append_statement(func);
      func->set_parent(globalNode);
      vector <SgNode*> vec =block->get_traversalSuccessorContainer();
      for (unsigned int itf = 0; itf < vec.size() ; itf++) {
        SgAsmInstruction* finst = isSgAsmInstruction(vec[itf]);
        finst->set_parent(func);
        func->append_statement(finst);
      }
      block->remove_children();
    }
  }

  //  string filename="_binary_tree_func.dot";
  //AST_BIN_Traversal* trav = new AST_BIN_Traversal();
  //trav->run(globalNode, filename);

}
/*
 * This function removes blocks, so functions contain only instructions
 * deprecated!
 */
void
RoseBin_FlowAnalysis::flattenBlocks(SgAsmNode* globalNode) {
  vector<SgNode*> tree =NodeQuery::querySubTree(globalNode, V_SgAsmBlock);
  vector<SgNode*>::iterator itV = tree.begin();
  //cerr << " ObjDump-BinRose:: Removing Blocks " << endl;
  for (;itV!=tree.end();itV++) {
    SgAsmBlock* block = isSgAsmBlock(*itV);
    if (block && block!=globalNode) {
      SgAsmFunction* func = isSgAsmFunction(block->get_parent());
      if (func) {
        ROSE_ASSERT(g_algo->info);
        g_algo->info->returnTargets[func].insert(g_algo->info->returnTargets[block].begin(), g_algo->info->returnTargets[block].end());
        vector <SgNode*> vec =block->get_traversalSuccessorContainer();
        for (unsigned int itf = 0; itf < vec.size() ; itf++) {
          SgAsmInstruction* finst = isSgAsmInstruction(vec[itf]);
          finst->set_parent(func);
          func->append_statement(finst);
        }
        func->remove_statement(block);
      }

    }
  }


}
SgAsmBlock*
buildBasicBlock(const std::vector<SgAsmInstruction*> &insns) {
    SgAsmBlock *bb = new SgAsmBlock;
    if (!insns.empty()) {
        bb->set_id(insns.front()->get_address());
        bb->set_address(insns.front()->get_address());
        BOOST_FOREACH (SgAsmInstruction *insn, insns) {
            bb->get_statementList().push_back(insn);
            insn->set_parent(bb);
        }
void CountTraversal::visit ( SgNode* n )
   {
     SgAsmInstruction* asmInstruction = isSgAsmInstruction(n);
     if (asmInstruction != NULL)
        {
       // Use the new interface support for this (this detects all multi-byte nop instructions). 
          if (SageInterface::isNOP(asmInstruction) == true)
             {
               if (previousInstructionWasNop == true)
                  {
                 // Increment the length of the identified NOP sequence
                    count++;
                  }
                 else
                  {
                    count = 1;
                 // Record the starting address of the NOP sequence
                    nopSequenceStart = asmInstruction;
                  }

               previousInstructionWasNop = true;
             }
            else
             {
               if (count > 0)
                   {
                  // Report the sequence when we have detected the end of the sequence.
                     SgAsmFunction* functionDeclaration = getAsmFunction(asmInstruction);
                     printf ("Reporting NOP sequence of length %3d at address %zu in function %s (reason for this being a function = %u = %s) \n",
                          count,nopSequenceStart->get_address(),functionDeclaration->get_name().c_str(),
                             functionDeclaration->get_reason(),
                             stringifySgAsmFunctionFunctionReason(functionDeclaration->get_reason()).c_str());

                     nopSequences.push_back(pair<SgAsmInstruction*,int>(nopSequenceStart,count));

                     SgAsmBlock* block = isSgAsmBlock(nopSequenceStart->get_parent());
                     ROSE_ASSERT(block != NULL);
                     SgAsmStatementPtrList & l = block->get_statementList();

                  // Now iterate over the nop instructions in the sequence and report the lenght of each (can be multi-byte nop instructions).
                     SgAsmStatementPtrList::iterator i = find(l.begin(),l.end(),nopSequenceStart);
                     ROSE_ASSERT(i != l.end());
                     int counter = 0;
                     while ( (*i != asmInstruction) && (i != l.end()) )
                        {
                          printf ("--- NOP #%2d is length = %2d \n",counter++,(int)isSgAsmInstruction(*i)->get_raw_bytes().size());
                          i++;
                        }
                   }

               count = 0;
               previousInstructionWasNop = false;
             }
        }
   }
Beispiel #6
0
/** Add edges to graph from functions that call system calls to system calls.
 *
 *  The first 1000 vertexes (0 to 999) in the graph is reserved for system calls, which is many more than the actual system
 *  calls in linux.  */
void
add_syscall_edges(DirectedGraph* G, std::vector<SgAsmFunction*>& all_functions)
{
    // Detect all system calls and add an edge from the function to the function to the system call
    for (unsigned int caller_id = 0; caller_id < all_functions.size(); ++caller_id) {
        SgAsmFunction *func = all_functions[caller_id];

        std::vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(func);

        for (std::vector<SgAsmInstruction*>::iterator inst_it = insns.begin(); inst_it != insns.end(); ++inst_it) {
            SgAsmX86Instruction *insn = isSgAsmX86Instruction(*inst_it);
            if (insn == NULL)
                continue;

            SgAsmBlock *block = SageInterface::getEnclosingNode<SgAsmBlock>(insn);

            // On linux system calls are always interrups and all interrupts are system calls
            if (insn && block && insn->get_kind()==x86_int) {

                const SgAsmExpressionPtrList &opand_list = insn->get_operandList()->get_operands();
                SgAsmExpression *expr = opand_list.size()==1 ? opand_list[0] : NULL;

                //semantically execute the basic block to find out which sytem call was called
                if (expr && expr->variantT()==V_SgAsmIntegerValueExpression &&
                    0x80==isSgAsmIntegerValueExpression(expr)->get_value()) {

                    const SgAsmStatementPtrList &stmts = block->get_statementList();
                    size_t int_n;
                    for (int_n=0; int_n<stmts.size(); int_n++) {
                        if (isSgAsmInstruction(stmts[int_n])==insn)
                            break;
                    }

                    typedef PartialSymbolicSemantics::Policy<PartialSymbolicSemantics::State,
                                                             PartialSymbolicSemantics::ValueType> Policy;
                    typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics;
                    Policy policy;
                    Semantics semantics(policy);

                    try {
                        semantics.processBlock(stmts, 0, int_n);
                        if (policy.readRegister<32>("eax").is_known()) {
                            int nr = policy.readRegister<32>("eax").known_value();
                            boost::add_edge(caller_id, nr, *G);
                        }
                    } catch (const Semantics::Exception&) {
                    } catch (const Policy::Exception&) {
                    }
                }
            }
        }
    }
}
Beispiel #7
0
 void visit(SgNode *node) {
     SgAsmBlock *bb = isSgAsmBlock(node);
     SgAsmFunction *func = bb ? SageInterface::getEnclosingNode<SgAsmFunction>(bb) : NULL;
     if (func && bb->get_address()==removal_addr) {
         SgAsmStatementPtrList::iterator found = std::find(func->get_statementList().begin(), 
                                                           func->get_statementList().end(),
                                                           bb);
         ROSE_ASSERT(found!=func->get_statementList().end());
         func->get_statementList().erase(found);
         std::cout <<"removed basic block " <<StringUtility::addrToString(removal_addr) <<std::endl;
         // throw 1 /* found the one-and-only block, so we can abandon the traversal */
     }
 }
BtorTranslationPolicy::BtorTranslationPolicy(BtorTranslationHooks* hooks, uint32_t minNumStepsToFindError, uint32_t maxNumStepsToFindError, SgProject* proj): problem(), hooks(hooks), regdict(NULL) {
  assert (minNumStepsToFindError >= 1); // Can't find an error on the first step
  assert (maxNumStepsToFindError < 0xFFFFFFFFU); // Prevent overflows
  assert (minNumStepsToFindError <= maxNumStepsToFindError || maxNumStepsToFindError == 0);
  makeRegMap(origRegisterMap, "");
  makeRegMapZero(newRegisterMap);
  isValidIp = false_();
  validIPs.clear();
  Comp stepCount = problem.build_var(32, "stepCount_saturating_at_" + boost::lexical_cast<std::string>(maxNumStepsToFindError + 1));
  addNext(stepCount, ite(problem.build_op_eq(stepCount, number<32>(maxNumStepsToFindError + 1)), number<32>(maxNumStepsToFindError + 1), problem.build_op_inc(stepCount)));
  resetState = problem.build_op_eq(stepCount, zero(32));
  errorsEnabled =
    problem.build_op_and(
      problem.build_op_ugte(stepCount, number<32>(minNumStepsToFindError)),
      (maxNumStepsToFindError == 0 ?
       true_() :
       problem.build_op_ulte(stepCount, number<32>(maxNumStepsToFindError))));
  {
    vector<SgNode*> functions = NodeQuery::querySubTree(proj, V_SgAsmFunction);
    for (size_t i = 0; i < functions.size(); ++i) {
      functionStarts.push_back(isSgAsmFunction(functions[i])->get_address());
      // fprintf(stderr, "functionStarts 0x%"PRIx64"\n", isSgAsmFunction(functions[i])->get_address());
    }
  }
  {
    vector<SgNode*> blocks = NodeQuery::querySubTree(proj, V_SgAsmBlock);
    for (size_t i = 0; i < blocks.size(); ++i) {
      SgAsmBlock* b = isSgAsmBlock(blocks[i]);
      if (!b->get_statementList().empty() && isSgAsmX86Instruction(b->get_statementList().front())) {
        blockStarts.push_back(b->get_address());
        // fprintf(stderr, "blockStarts 0x%"PRIx64"\n", b->get_address());
      }
    }
  }
  {
    vector<SgNode*> calls = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction);
    for (size_t i = 0; i < calls.size(); ++i) {
      SgAsmX86Instruction* b = isSgAsmX86Instruction(calls[i]);
      if (b->get_kind() != x86_call) continue;
      returnPoints.push_back(b->get_address() + b->get_raw_bytes().size());
      // fprintf(stderr, "returnPoints 0x%"PRIx64"\n", b->get_address() + b->get_raw_bytes().size());
    }
  }
  {
    vector<SgNode*> instructions = NodeQuery::querySubTree(proj, V_SgAsmX86Instruction);
    for (size_t i = 0; i < instructions.size(); ++i) {
      SgAsmX86Instruction* b = isSgAsmX86Instruction(instructions[i]);
      validIPs.push_back(b->get_address());
    }
  }
}
 void visit(SgNode *node) {
     SgAsmBlock *block = isSgAsmBlock(node);
     if (block && block->has_instructions()) {
         using namespace rose::BinaryAnalysis::InstructionSemantics2;
         const RegisterDictionary *regdict = RegisterDictionary::dictionary_i386();
         SymbolicSemantics::RiscOperatorsPtr ops = SymbolicSemantics::RiscOperators::instance(regdict);
         ops->computingDefiners(SymbolicSemantics::TRACK_ALL_DEFINERS); // only used so we can test that it works
         BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(ops, 32);
         const SgAsmStatementPtrList &stmts = block->get_statementList();
         for (SgAsmStatementPtrList::const_iterator si=stmts.begin(); si!=stmts.end(); ++si) {
             SgAsmX86Instruction *insn = isSgAsmX86Instruction(*si);
             if (insn) {
                 std::cout <<unparseInstructionWithAddress(insn) <<"\n";
                 dispatcher->processInstruction(insn);
                 std::cout <<*ops <<"\n";
             }
         }
     }
 }
void BlockTraversal::visit(SgNode* n)
   {
     SgAsmBlock* asmBlock = isSgAsmBlock(n);
     if (asmBlock != NULL)
        {
       // Save the address of the SgAsmBlock.
          rose_addr_t asmBlockAddress = asmBlock->get_address();
       // printf ("asmBlockAddress = %zu \n",asmBlockAddress);
          printf ("asmBlockAddress = %p \n",asmBlockAddress);

          bool blockExists = blockMap.find(asmBlockAddress) != blockMap.end();
          ROSE_ASSERT(blockExists == false);

          if (blockExists == false)
             {
            // Need to add the block address.
            // blockMap[asmBlockAddress] = asmBlock;
               blockMap[asmBlockAddress] = pair<SgAsmBlock*,TraceStructType*>(asmBlock,new TraceStructType(asmBlock));
             }
        }
   }
    void visit(SgNode *node) {
        SgAsmBlock *block = isSgAsmBlock(node);
        if (block && block->has_instructions()) {
            using namespace BinaryAnalysis::InstructionSemantics;
            typedef SymbolicSemantics::Policy<SymbolicSemantics::State, SymbolicSemantics::ValueType> Policy;
            typedef X86InstructionSemantics<Policy, SymbolicSemantics::ValueType> Semantics;

            Policy policy(NULL/*no SMT solver*/);
            Semantics semantics(policy);

            const SgAsmStatementPtrList &stmts = block->get_statementList();
            for (SgAsmStatementPtrList::const_iterator si=stmts.begin(); si!=stmts.end(); ++si) {
                SgAsmX86Instruction *insn = isSgAsmX86Instruction(*si);
                if (insn) {
                    std::cout <<unparseInstructionWithAddress(insn) <<"\n";
                    semantics.processInstruction(insn);
                    std::cout <<policy;
                }
            }
        }
    }
 void visit(SgNode *node) {
     SgAsmBlock *block = isSgAsmBlock(node);
     SgAsmFunction *func = block ? block->get_enclosing_function() : NULL;
     if (block && func) {
         if (block==func->get_entry_block()) {
             if (block->get_immediate_dominator()) {
                 if (bad_blocks)
                     bad_blocks->insert(block);
                 failed = true;
             }
         } else {
             SgAsmBlock *idom = block->get_immediate_dominator();
             if (!idom || idom->get_enclosing_function()!=func) {
                 if (bad_blocks)
                     bad_blocks->insert(block);
                 failed = true;
             }
         }
     }
 }
Beispiel #13
0
void
InitPointerToNull::visit(SgNode* node) {
  if (isSgAsmFunction(node)) {
    memoryWrites.clear();
    memoryRead.clear();
  } else

  if (isSgAsmx86Instruction(node) && isSgAsmx86Instruction(node)->get_kind() == x86_mov) {
    // this is the address of the mov instruction prior to the call
    //rose_addr_t resolveAddr=0;
    SgAsmx86Instruction* inst = isSgAsmx86Instruction(node);
    SgNode* instBlock = NULL;
    if (project) 
      instBlock= isSgAsmBlock(inst->get_parent());
    else //we run IDA, this is different
      instBlock=inst;

    if (instBlock==NULL)
      return;
    SgAsmFunction* instFunc = isSgAsmFunction(instBlock->get_parent());
    if (instFunc==NULL)
      return;

    // we have found a mov instruction
    // we need to check if it is a   mov mem, (value or reg) // assignment of variable // forgot mov mem, mem
    // or we find a                  mov reg, mem // usage of variable
    // make sure a variable is assigned before used
    SgAsmOperandList * ops = inst->get_operandList();
    SgAsmExpressionPtrList& opsList = ops->get_operands();
    SgAsmExpressionPtrList::iterator itOP = opsList.begin();
    SgAsmMemoryReferenceExpression* memL=NULL;
    SgAsmMemoryReferenceExpression* memR=NULL;
    SgAsmRegisterReferenceExpression* regL=NULL;
    SgAsmRegisterReferenceExpression* regR=NULL;
    SgAsmValueExpression* Val = NULL;
    int iteration=0;
    for (;itOP!=opsList.end();++itOP) {
      SgAsmExpression* exp = *itOP;
      ROSE_ASSERT(exp);
      if (iteration==1) {
	// right hand side
	memR = isSgAsmMemoryReferenceExpression(exp);
	regR = isSgAsmRegisterReferenceExpression(exp);
	Val = isSgAsmValueExpression(exp);
      }
      if (iteration==0) {
	// left hand side
	memL = isSgAsmMemoryReferenceExpression(exp);
	regL = isSgAsmRegisterReferenceExpression(exp);
	iteration++;
      }
    } //for
    if ((memL && regR) || (memL && Val) || (memL && memR)) {
      // could be assignment to address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memL);      
      // apparently the reference to memory does not always have to be BP but
      // can also be IP if it is a static variable. How will we handle global variables?
      //bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memL);
      //if (containsBP) {
	// this is memory write with offset to BP
	// remember this memory location as a write
	if (debug)
	cerr << "found a memory write (REG) : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	memoryWrites.insert(addr);
	//}
    } else if (regL && memR) {
      // could be usage of address
      rose_addr_t addr=BinQSupport::evaluateMemoryExpression(inst,memR);      
      bool containsBP = BinQSupport::memoryExpressionContainsRegister(x86_regclass_gpr,x86_gpr_bp, memR);
      if (containsBP) {
	// this is memory read with offset to BP
	// did we see a write for this? If not, it is not initialized!
	std::set<rose_addr_t>::const_iterator it = memoryWrites.find(addr);
	if (it!=memoryWrites.end()) {
	  // found write, everything is good
	if (debug)
	  cerr << "found a read with matching write : " << RoseBin_support::HexToString(inst->get_address())<<" "<<unparseInstruction(inst)<<endl;
	} else {
	  std::set<rose_addr_t>::const_iterator it2 = memoryRead.find(addr);
	  if (it2!=memoryRead.end()) {
	    // found this case before
	  } else {
	if (debug)
	    cerr << " This variable might not be initialized : " << RoseBin_support::HexToString(inst->get_address())<<" "<< unparseInstruction(inst) << endl;
	    string res = "Possibly uninitialized variable: ";
	    string funcname="";
	    SgAsmBlock* b = isSgAsmBlock(inst->get_parent());
	    SgAsmFunction* func = NULL;
	    if (b)
	      func=isSgAsmFunction(b->get_parent()); 
	    if (func)
	      funcname = func->get_name();
	    res+=" ("+RoseBin_support::HexToString(inst->get_address())+") : "+unparseInstruction(inst)+
	      " <"+inst->get_comment()+">  in function: "+funcname;
	    result[inst]= res;
	    memoryRead.insert(addr);
	  }
	}
      }
    }
  }
}
 void visit(SgNode *node) {
     SgAsmBlock *block = isSgAsmBlock(node);
     if (block)
         block->set_immediate_dominator(NULL);
 }
Beispiel #15
0
/** Returns a label for the value.  The label consists of the base object name (if available) or address, followed by a plus
 *  sign or minus sign, followed by the offset from that object.  The empty string is returned if this integer value expression
 *  has no base object (i.e., it's absolute).
 *
 *  If the base object has no name and the integer value points directly at the object (offset=0) then one of two things
 *  happen: if @p quiet is true, the empty string is returned, otherwise the label is the name of the node type enclosed in an
 *  extra set of angle brackets.  This is useful to indicate that a value is relative rather than absolute.  For instance, the
 *  instruction listing "call 0x004126bb" is ambiguous as to whether 0x004126bb points to a known, unnamed function, a non-entry
 *  instruction within a function, or some memory location we didn't disassemble.  But when labeled with @p quiet being false,
 *  the output will be:
 *
 *  <ul>
 *    <li>call 0x004126bb<main>; points to a function with a name</li>
 *    <li>call 0x004126bb<<Func>>; points to a function without a name</li>
 *    <li>call 0x004126bb<<Insn>>; points to an instruction that's not a function entry point</li>
 *    <li>call 0x004126bb; points to something that's not been disassembled</li>
 *  </ul>
 */
std::string
SgAsmIntegerValueExpression::get_label(bool quiet/*=false*/) const
{
    SgNode *node = get_base_node();
    if (!node)
        return "";

    // Get the name of the base object if possible.
    std::string retval;
    std::string refkind;
    if (isSgAsmFunction(node)) {
        retval = isSgAsmFunction(node)->get_name();
        refkind = "Func";
    } else if (isSgAsmGenericSymbol(node)) {
        retval = isSgAsmGenericSymbol(node)->get_name()->get_string();
        refkind = "Sym";
    } else if (isSgAsmPEImportItem(node)) {
        retval = isSgAsmPEImportItem(node)->get_name()->get_string();
        refkind = "Import";
    } else if (isSgAsmGenericSection(node)) {
        retval = isSgAsmGenericSection(node)->get_short_name();
        refkind = "Section";
    } else if (isSgAsmInstruction(node)) {
        refkind = "Insn";
    } else if (isSgAsmStaticData(node)) {
        SgAsmBlock *blk = SageInterface::getEnclosingNode<SgAsmBlock>(node);
        if (blk && 0!=(blk->get_reason() & SgAsmBlock::BLK_JUMPTABLE)) {
            refkind = "JumpTable";
        } else {
            refkind = "StaticData";
        }
    } else if (isSgAsmBlock(node)) {
        refkind = "BBlock";
    } else if (isSgAsmPERVASizePair(node)) {
        refkind = "Rva/Size";
    } else {
        refkind = "Reference";
    }

    // If it has no name, then use something fairly generic.  That way we can at least indicate that the value is relative.
    int64_t offset = (int64_t)get_relative_value();
    if (retval.empty()) {
        retval = "<" + refkind; // extra level of brackets to indicate that it's not a real name
        if (offset)
            retval += "@" + StringUtility::addrToString(virtual_address(node), 32);
        retval += ">";
    }

    // Append the offset, but consider it to be signed.  Disregard the number of significant bits in the absolute value and use
    // a smaller bit width if possible.  But don't use the minimum bit width since this makes it hard to tell how many bits
    // there are at a glance (use only 8, 16, 32, or 64).
    size_t nbits = 0;
    if (offset > 0xffffffffll) {
        nbits = 64;
        retval += "+";
    } else if (offset > 0xffffll) {
        nbits = 32;
        retval += "+";
    } else if (offset > 0xffll) {
        nbits = 16;
        retval += "+";
    } else if (offset > 9) {
        nbits = 8;
        retval += "+";
    } else if (offset > 0) {
        char buf[64];
        snprintf(buf, sizeof buf, "+%"PRId64, offset);
        retval += buf;
    } else if (offset==0) {
        /*void*/
    } else if (-offset > 0xffffffffll) {
        nbits = 64;
        offset = -offset;
        retval += "-";
    } else if (-offset > 0xffffll) {
        nbits = 32;
        offset = -offset;
        retval += "-";
    } else if (-offset > 0xffll) {
        nbits = 16;
        offset = -offset;
        retval += "-";
    } else if (-offset > 9) {
        nbits = 8;
        offset = -offset;
        retval += "-";
    } else {
        char buf[64];
        snprintf(buf, sizeof buf, "%"PRId64, offset);
        retval += buf;
    }
    if (nbits!=0)
        retval += StringUtility::addrToString(offset, nbits);

    return retval;
}