/* Given a node in the AST, it replaces all function calls with the corresponding expression from the fct2Var mapping. Since we traverse in order of function evaluation, all nodes that are lower in the subtree are supposed to have been mapped, since they have already appeared in the list. Also, we only replace the shallowest level of function calls in the surrent subtree, since we assume that replaceFunctionCallsInExpression has already been called for each of the subtrees rooted at the shallowest function calls lower than the current node: E.g.: f(g(h(i))) + 5 has the AST: + / \ fc1 5 / \ f fc2 / \ g fc3 / \ h i where fc? represents an SgFunctionCallExpression. Order of function evaluation is h(i), g(_), f(_). Calling 'replaceFunctionCallsInExpression' on '+' will generate temp_var + 5 but calling the same function on 'fc2' will generate f(temp_var) + 5 */ void FunctionCallNormalization::replaceFunctionCallsInExpression( SgNode *root, map<SgFunctionCallExp *, SgExpression *> fct2Var ) { if ( !root ) return; if ( NodeQuery::querySubTree( root, V_SgFunctionCallExp ).size() > 0 ) { list<SgNode *> toVisit; toVisit.push_back( root ); while ( !toVisit.empty() ) { SgNode *crt = toVisit.front(); // will visit every node above an function call exp, but not below // also, we will replace function call expressions found if ( !isSgFunctionCallExp( crt ) ) { vector<SgNode *> succ = ( crt )->get_traversalSuccessorContainer(); for ( vector<SgNode *>::iterator succIt = succ.begin(); succIt != succ.end(); succIt++ ) if ( isSgNode ( *succIt ) ) toVisit.push_back( *succIt ); } else { SgFunctionCallExp *call = isSgFunctionCallExp( crt ); ROSE_ASSERT ( call ); map<SgFunctionCallExp *, SgExpression *>::iterator mapIter; mapIter = fct2Var.find( call ); if ( mapIter == fct2Var.end() ) cout << "NOT FOUND " << call->unparseToString() << "\t" << call << "\n"; // a mapping for function call exps in the subtree must already exist ( postorder traversal ) ROSE_ASSERT ( mapIter != fct2Var.end() && mapIter->second ); // duplicate the variable SgTreeCopy treeCopy; SgExpression *scnd = mapIter->second; ROSE_ASSERT ( isSgExpression( scnd ) ); SgExpression *newVar = isSgExpression( scnd->copy( treeCopy ) ); ROSE_ASSERT ( newVar ); SgExpression *parent = isSgExpression( call->get_parent() ); ROSE_ASSERT ( parent ); // replace the node in the AST newVar->set_parent( parent ); int k = parent->replace_expression( call, newVar ); ROSE_ASSERT ( k == 1 ); delete call; } toVisit.pop_front(); } } }
InheritedAttribute BugSeeding::evaluateInheritedAttribute ( SgNode* astNode, InheritedAttribute inheritedAttribute ) { // Use this if we only want to seed bugs in loops bool isLoop = inheritedAttribute.isLoop || (isSgForStatement(astNode) != NULL) || (isSgWhileStmt(astNode) != NULL) || (isSgDoWhileStmt(astNode) != NULL); // Add Fortran support isLoop = isLoop || (isSgFortranDo(astNode) != NULL); // Mark future noes in this subtree as being part of a loop inheritedAttribute.isLoop = isLoop; // To test this on simple codes, optionally allow it to be applied everywhere bool applyEveryWhere = true; if (isLoop == true || applyEveryWhere == true) { // The inherited attribute is true iff we are inside a loop and this is a SgPntrArrRefExp. SgPntrArrRefExp *arrayReference = isSgPntrArrRefExp(astNode); if (arrayReference != NULL) { // Mark as a vulnerability inheritedAttribute.isVulnerability = true; // Now change the array index (to seed the buffer overflow bug) SgVarRefExp* arrayVarRef = isSgVarRefExp(arrayReference->get_lhs_operand()); ROSE_ASSERT(arrayVarRef != NULL); ROSE_ASSERT(arrayVarRef->get_symbol() != NULL); SgInitializedName* arrayName = isSgInitializedName(arrayVarRef->get_symbol()->get_declaration()); ROSE_ASSERT(arrayName != NULL); SgArrayType* arrayType = isSgArrayType(arrayName->get_type()); ROSE_ASSERT(arrayType != NULL); SgExpression* arraySize = arrayType->get_index(); SgTreeCopy copyHelp; // Make a copy of the expression used to hold the array size in the array declaration. SgExpression* arraySizeCopy = isSgExpression(arraySize->copy(copyHelp)); ROSE_ASSERT(arraySizeCopy != NULL); // This is the existing index expression SgExpression* indexExpression = arrayReference->get_rhs_operand(); ROSE_ASSERT(indexExpression != NULL); // Build a new expression: "array[n]" --> "array[n+arraySizeCopy]", where the arraySizeCopy is a size of "array" SgExpression* newIndexExpression = buildAddOp(indexExpression,arraySizeCopy); // Substitute the new expression for the old expression arrayReference->set_rhs_operand(newIndexExpression); } } return inheritedAttribute; }
virtual void visit(SgNode* n) { if (isSgVarRefExp(n)) { SgVarRefExp* vr = isSgVarRefExp(n); assert (vr->get_symbol()); if (vr->get_symbol()->get_declaration() == initname) { if (inSimpleContext(vr) || !needSimpleContext) { SgTreeCopy tc; isSgExpression(n->get_parent())->replace_expression( vr, isSgExpression(initexpr->copy(tc))); } } } }
// Do finite differencing on one expression within one context. The expression // must be defined and valid within the entire body of root. The rewrite rules // are used to simplify expressions. When a variable var is updated from // old_value to new_value, an expression of the form (var, (old_value, // new_value)) is created and rewritten. The rewrite rules may either produce // an arbitrary expression (which will be used as-is) or one of the form (var, // (something, value)) (which will be changed to (var = value)). void doFiniteDifferencingOne(SgExpression* e, SgBasicBlock* root, RewriteRule* rules) { SgStatementPtrList& root_stmts = root->get_statements(); SgStatementPtrList::iterator i; for (i = root_stmts.begin(); i != root_stmts.end(); ++i) { if (expressionComputedIn(e, *i)) break; } if (i == root_stmts.end()) return; // Expression is not used within root, so quit vector<SgVariableSymbol*> used_symbols = SageInterface::getSymbolsUsedInExpression(e); SgName cachename = "cache_fd__"; cachename << ++SageInterface::gensym_counter; SgVariableDeclaration* cachedecl = new SgVariableDeclaration(SgNULL_FILE, cachename, e->get_type(),0 /* new SgAssignInitializer(SgNULL_FILE, e) */); SgInitializedName* cachevar = cachedecl->get_variables().back(); ROSE_ASSERT (cachevar); root->get_statements().insert(i, cachedecl); cachedecl->set_parent(root); cachedecl->set_definingDeclaration(cachedecl); cachevar->set_scope(root); SgVariableSymbol* sym = new SgVariableSymbol(cachevar); root->insert_symbol(cachename, sym); SgVarRefExp* vr = new SgVarRefExp(SgNULL_FILE, sym); vr->set_endOfConstruct(SgNULL_FILE); replaceCopiesOfExpression(e, vr, root); vector<SgExpression*> modifications_to_used_symbols; FdFindModifyingStatementsVisitor(used_symbols, modifications_to_used_symbols).go(root); cachedecl->addToAttachedPreprocessingInfo( new PreprocessingInfo(PreprocessingInfo::CplusplusStyleComment,(string("// Finite differencing: ") + cachename.str() + " is a cache of " + e->unparseToString()).c_str(),"Compiler-Generated in Finite Differencing",0, 0, 0, PreprocessingInfo::before)); if (modifications_to_used_symbols.size() == 0) { SgInitializer* cacheinit = new SgAssignInitializer(SgNULL_FILE, e); e->set_parent(cacheinit); cachevar->set_initializer(cacheinit); cacheinit->set_parent(cachevar); } else { for (unsigned int i = 0; i < modifications_to_used_symbols.size(); ++i) { SgExpression* modstmt = modifications_to_used_symbols[i]; #ifdef FD_DEBUG cout << "Updating cache after " << modstmt->unparseToString() << endl; #endif SgExpression* updateCache = 0; SgVarRefExp* varref = new SgVarRefExp(SgNULL_FILE, sym); varref->set_endOfConstruct(SgNULL_FILE); SgTreeCopy tc; SgExpression* eCopy = isSgExpression(e->copy(tc)); switch (modstmt->variantT()) { case V_SgAssignOp: { SgAssignOp* assignment = isSgAssignOp(modstmt); assert (assignment); SgExpression* lhs = assignment->get_lhs_operand(); SgExpression* rhs = assignment->get_rhs_operand(); replaceCopiesOfExpression(lhs, rhs, eCopy); } break; case V_SgPlusAssignOp: case V_SgMinusAssignOp: case V_SgAndAssignOp: case V_SgIorAssignOp: case V_SgMultAssignOp: case V_SgDivAssignOp: case V_SgModAssignOp: case V_SgXorAssignOp: case V_SgLshiftAssignOp: case V_SgRshiftAssignOp: { SgBinaryOp* assignment = isSgBinaryOp(modstmt); assert (assignment); SgExpression* lhs = assignment->get_lhs_operand(); SgExpression* rhs = assignment->get_rhs_operand(); SgTreeCopy tc; SgExpression* rhsCopy = isSgExpression(rhs->copy(tc)); SgExpression* newval = 0; switch (modstmt->variantT()) { #define DO_OP(op, nonassignment) \ case V_##op: { \ newval = new nonassignment(SgNULL_FILE, lhs, rhsCopy); \ newval->set_endOfConstruct(SgNULL_FILE); \ } \ break DO_OP(SgPlusAssignOp, SgAddOp); DO_OP(SgMinusAssignOp, SgSubtractOp); DO_OP(SgAndAssignOp, SgBitAndOp); DO_OP(SgIorAssignOp, SgBitOrOp); DO_OP(SgMultAssignOp, SgMultiplyOp); DO_OP(SgDivAssignOp, SgDivideOp); DO_OP(SgModAssignOp, SgModOp); DO_OP(SgXorAssignOp, SgBitXorOp); DO_OP(SgLshiftAssignOp, SgLshiftOp); DO_OP(SgRshiftAssignOp, SgRshiftOp); #undef DO_OP default: break; } assert (newval); replaceCopiesOfExpression(lhs, newval, eCopy); } break; case V_SgPlusPlusOp: { SgExpression* lhs = isSgPlusPlusOp(modstmt)->get_operand(); SgIntVal* one = new SgIntVal(SgNULL_FILE, 1); one->set_endOfConstruct(SgNULL_FILE); SgAddOp* add = new SgAddOp(SgNULL_FILE, lhs, one); add->set_endOfConstruct(SgNULL_FILE); lhs->set_parent(add); one->set_parent(add); replaceCopiesOfExpression(lhs,add,eCopy); } break; case V_SgMinusMinusOp: { SgExpression* lhs = isSgMinusMinusOp(modstmt)->get_operand(); SgIntVal* one = new SgIntVal(SgNULL_FILE, 1); one->set_endOfConstruct(SgNULL_FILE); SgSubtractOp* sub = new SgSubtractOp(SgNULL_FILE, lhs, one); sub->set_endOfConstruct(SgNULL_FILE); lhs->set_parent(sub); one->set_parent(sub); replaceCopiesOfExpression(lhs,sub,eCopy); } break; default: cerr << modstmt->sage_class_name() << endl; assert (false); break; } #ifdef FD_DEBUG cout << "e is " << e->unparseToString() << endl; cout << "eCopy is " << eCopy->unparseToString() << endl; #endif updateCache = doFdVariableUpdate(rules, varref, e, eCopy); #ifdef FD_DEBUG cout << "updateCache is " << updateCache->unparseToString() << endl; #endif if (updateCache) { ROSE_ASSERT(modstmt != NULL); SgNode* ifp = modstmt->get_parent(); SgCommaOpExp* comma = new SgCommaOpExp(SgNULL_FILE, updateCache, modstmt); modstmt->set_parent(comma); updateCache->set_parent(comma); if (ifp == NULL) { printf ("modstmt->get_parent() == NULL modstmt = %p = %s \n",modstmt,modstmt->class_name().c_str()); modstmt->get_startOfConstruct()->display("modstmt->get_parent() == NULL: debug"); } ROSE_ASSERT(ifp != NULL); #ifdef FD_DEBUG cout << "New expression is " << comma->unparseToString() << endl; cout << "IFP is " << ifp->sage_class_name() << ": " << ifp->unparseToString() << endl; #endif if (isSgExpression(ifp)) { isSgExpression(ifp)->replace_expression(modstmt, comma); comma->set_parent(ifp); } else { // DQ (12/16/2006): Need to handle cases that are not SgExpression (now that SgExpressionRoot is not used!) // cerr << ifp->sage_class_name() << endl; // assert (!"Bad parent type for inserting comma expression"); SgStatement* statement = isSgStatement(ifp); if (statement != NULL) { #ifdef FD_DEBUG printf ("Before statement->replace_expression(): statement = %p = %s modstmt = %p = %s \n",statement,statement->class_name().c_str(),modstmt,modstmt->class_name().c_str()); SgExprStatement* expresionStatement = isSgExprStatement(statement); if (expresionStatement != NULL) { SgExpression* expression = expresionStatement->get_expression(); printf ("expressionStatement expression = %p = %s \n",expression,expression->class_name().c_str()); } #endif statement->replace_expression(modstmt, comma); comma->set_parent(statement); } else { ROSE_ASSERT(ifp != NULL); printf ("Error: parent is neither a SgExpression nor a SgStatement ifp = %p = %s \n",ifp,ifp->class_name().c_str()); ROSE_ASSERT(false); } } #ifdef FD_DEBUG cout << "IFP is now " << ifp->unparseToString() << endl; #endif } } } }
int main( int argc, char* argv[] ) { // Initialize and check compatibility. See rose::initialize ROSE_INITIALIZE; SgProject* project = frontend(argc,argv); AstTests::runAllTests(project); #if 0 // Output the graph so that we can see the whole AST graph, for debugging. generateAstGraph(project, 4000); #endif #if 1 printf ("Generate the dot output of the SAGE III AST \n"); generateDOT ( *project ); printf ("DONE: Generate the dot output of the SAGE III AST \n"); #endif // There are lots of way to write this, this is one simple approach; get all the function calls. std::vector<SgNode*> functionCalls = NodeQuery::querySubTree (project,V_SgFunctionCallExp); // Find the SgFunctionSymbol for snprintf so that we can reset references to "sprintf" to "snprintf" instead. // SgGlobal* globalScope = (*project)[0]->get_globalScope(); SgSourceFile* sourceFile = isSgSourceFile(project->get_fileList()[0]); ROSE_ASSERT(sourceFile != NULL); SgGlobal* globalScope = sourceFile->get_globalScope(); SgFunctionSymbol* snprintf_functionSymbol = globalScope->lookup_function_symbol("snprintf"); ROSE_ASSERT(snprintf_functionSymbol != NULL); // Iterate over the function calls to find the calls to "sprintf" for (unsigned long i = 0; i < functionCalls.size(); i++) { SgFunctionCallExp* functionCallExp = isSgFunctionCallExp(functionCalls[i]); ROSE_ASSERT(functionCallExp != NULL); SgFunctionRefExp* functionRefExp = isSgFunctionRefExp(functionCallExp->get_function()); if (functionRefExp != NULL) { SgFunctionSymbol* functionSymbol = functionRefExp->get_symbol(); if (functionSymbol != NULL) { SgName functionName = functionSymbol->get_name(); // printf ("Function being called: %s \n",functionName.str()); if (functionName == "sprintf") { // Now we have something to do! functionRefExp->set_symbol(snprintf_functionSymbol); // Now add the "n" argument SgExprListExp* functionArguments = functionCallExp->get_args(); SgExpressionPtrList & functionArgumentList = functionArguments->get_expressions(); // "sprintf" shuld have exactly 2 arguments (I guess the "..." don't count) printf ("functionArgumentList.size() = %zu \n",functionArgumentList.size()); // ROSE_ASSERT(functionArgumentList.size() == 2); SgExpressionPtrList::iterator i = functionArgumentList.begin(); // printf ("(*i) = %p = %s = %s \n",*i,(*i)->class_name().c_str(),SageInterface::get_name(*i).c_str()); SgVarRefExp* variableRefExp = isSgVarRefExp(*i); ROSE_ASSERT(variableRefExp != NULL); // printf ("variableRefExp->get_type() = %p = %s = %s \n",variableRefExp->get_type(),variableRefExp->get_type()->class_name().c_str(),SageInterface::get_name(variableRefExp->get_type()).c_str()); SgType* bufferType = variableRefExp->get_type(); SgExpression* bufferLengthExpression = NULL; switch(bufferType->variantT()) { case V_SgArrayType: { SgArrayType* arrayType = isSgArrayType(bufferType); bufferLengthExpression = arrayType->get_index(); break; } case V_SgPointerType: { // SgPointerType* pointerType = isSgPointerType(bufferType); SgInitializedName* variableDeclaration = variableRefExp->get_symbol()->get_declaration(); ROSE_ASSERT(variableDeclaration != NULL); SgExpression* initializer = variableDeclaration->get_initializer(); if (initializer != NULL) { SgAssignInitializer* assignmentInitializer = isSgAssignInitializer(initializer); ROSE_ASSERT(assignmentInitializer != NULL); // This is the rhs of the initialization of the pointer (likely a malloc through a cast). // This assumes: buffer = (char*) malloc(bufferLengthExpression); SgExpression* initializationExpression = assignmentInitializer->get_operand(); ROSE_ASSERT(initializationExpression != NULL); SgCastExp* castExp = isSgCastExp(initializationExpression); ROSE_ASSERT(castExp != NULL); SgFunctionCallExp* functionCall = isSgFunctionCallExp(castExp->get_operand()); ROSE_ASSERT(functionCall != NULL); SgExprListExp* functionArguments = isSgExprListExp(functionCall->get_args()); bufferLengthExpression = functionArguments->get_expressions()[0]; ROSE_ASSERT(bufferLengthExpression != NULL); } else { printf ("Initializer not found, so no value for n in snprintf can be computed currently \n"); } break; } default: { printf ("Error: default reached in evaluation of buffer type = %p = %s \n",bufferType,bufferType->class_name().c_str()); ROSE_ASSERT(false); } } ROSE_ASSERT(bufferLengthExpression != NULL); // printf ("bufferLengthExpression = %p = %s = %s \n",bufferLengthExpression,bufferLengthExpression->class_name().c_str(),SageInterface::get_name(bufferLengthExpression).c_str()); // Jump over the first argument, the "n" is defined to be the 2nd argument (the rest are shifted one position). i++; // Build a deep copy of the expression used to define the static buffer (could be any complex expression). SgTreeCopy copy_help; SgExpression* bufferLengthExpression_copy = isSgExpression(bufferLengthExpression->copy(copy_help)); // Insert the "n" for the parameter list to work with "snprintf" instead of "sprintf" functionArgumentList.insert(i,bufferLengthExpression_copy); } } } } return backend(project); }