/* * Instrument select instructions similar to how we instrument branches. */ void PandaInstrumentVisitor::visitSelectInst(SelectInst &I){ BinaryOperator *BO; ZExtInst *ZEI; CallInst *CI; std::vector<Value*> argValues; Value *condition; Function *F = mod->getFunction("log_dynval"); if (!F) { printf("Instrumentation function not found\n"); assert(1==0); } condition = I.getCondition(); BO = static_cast<BinaryOperator*>(IRB.CreateNot(condition)); ZEI = static_cast<ZExtInst*>(IRB.CreateZExt(BO, wordType)); argValues.push_back(ConstantInt::get(ptrType, (uintptr_t)dynval_buffer)); argValues.push_back(ConstantInt::get(intType, SELECTENTRY)); argValues.push_back(ConstantInt::get(intType, SELECT)); argValues.push_back(static_cast<Value*>(ZEI)); CI = IRB.CreateCall(F, ArrayRef<Value*>(argValues)); CI->insertBefore(static_cast<Instruction*>(&I)); ZEI->insertBefore(static_cast<Instruction*>(CI)); BO->insertBefore(static_cast<Instruction*>(ZEI)); }
/// Insert code in the prolog code when unrolling a loop with a /// run-time trip-count. /// /// This method assumes that the loop unroll factor is total number /// of loop bodes in the loop after unrolling. (Some folks refer /// to the unroll factor as the number of *extra* copies added). /// We assume also that the loop unroll factor is a power-of-two. So, after /// unrolling the loop, the number of loop bodies executed is 2, /// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch /// instruction in SimplifyCFG.cpp. Then, the backend decides how code for /// the switch instruction is generated. /// /// extraiters = tripcount % loopfactor /// if (extraiters == 0) jump Loop: /// if (extraiters == loopfactor) jump L1 /// if (extraiters == loopfactor-1) jump L2 /// ... /// L1: LoopBody; /// L2: LoopBody; /// ... /// if tripcount < loopfactor jump End /// Loop: /// ... /// End: /// bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, LPPassManager *LPM) { // for now, only unroll loops that contain a single exit if (!L->getExitingBlock()) return false; // Make sure the loop is in canonical form, and there is a single // exit block only. if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) return false; // Use Scalar Evolution to compute the trip count. This allows more // loops to be unrolled than relying on induction var simplification if (!LPM) return false; ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); if (!SE) return false; // Only unroll loops with a computable trip count and the trip count needs // to be an int value (allowing a pointer type is a TODO item) const SCEV *BECount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) return false; // Add 1 since the backedge count doesn't include the first loop iteration const SCEV *TripCountSC = SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); if (isa<SCEVCouldNotCompute>(TripCountSC)) return false; // We only handle cases when the unroll factor is a power of 2. // Count is the loop unroll factor, the number of extra copies added + 1. if ((Count & (Count-1)) != 0) return false; // If this loop is nested, then the loop unroller changes the code in // parent loop, so the Scalar Evolution pass needs to be run again if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to splits the original preheader twice, one for the end of the // prolog code and one for a new loop preheader BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); Type *CountTy = TripCount->getType(); BinaryOperator *ModVal = BinaryOperator::CreateURem(TripCount, ConstantInt::get(CountTy, Count), "xtraiter"); ModVal->insertBefore(PreHeaderBR); // Check if for no extra iterations, then jump to unrolled loop Value *BranchVal = new ICmpInst(PreHeaderBR, ICmpInst::ICMP_NE, ModVal, ConstantInt::get(CountTy, 0), "lcmp"); // Branch to either the extra iterations or the unrolled loop // We will fix up the true branch label when adding loop body copies BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); assert(PreHeaderBR->isUnconditional() && PreHeaderBR->getSuccessor(0) == PEnd && "CFG edges in Preheader are not correct"); PreHeaderBR->eraseFromParent(); ValueToValueMapTy LVMap; Function *F = Header->getParent(); // These variables are used to update the CFG links in each iteration BasicBlock *CompareBB = nullptr; BasicBlock *LastLoopBB = PH; // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog code LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); // // For each extra loop iteration, create a copy of the loop's basic blocks // and generate a condition that branches to the copy depending on the // number of 'left over' iterations. // for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { std::vector<BasicBlock*> NewBlocks; ValueToValueMapTy VMap; // Clone all the basic blocks in the loop, but we don't clone the loop // This function adds the appropriate CFG connections. CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, LoopBlocks, VMap, LVMap, LI); LastLoopBB = cast<BasicBlock>(VMap[Latch]); // Insert the cloned blocks into function just before the original loop F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0], F->end()); // Generate the code for the comparison which determines if the loop // prolog code needs to be executed. if (leftOverIters == Count-1) { // There is no compare block for the fall-thru case when for the last // left over iteration CompareBB = NewBlocks[0]; } else { // Create a new block for the comparison BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", F, CompareBB); if (Loop *ParentLoop = L->getParentLoop()) { // Add the new block to the parent loop, if needed ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } // The comparison w/ the extra iteration value and branch Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, ConstantInt::get(CountTy, leftOverIters), "un.tmp"); // Branch to either the extra iterations or the unrolled loop BranchInst::Create(NewBlocks[0], CompareBB, BranchVal, NewBB); CompareBB = NewBB; PH->getTerminator()->setSuccessor(0, NewBB); VMap[NewPH] = CompareBB; } // Rewrite the cloned instruction operands to use the values // created when the clone is created. for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) { RemapInstruction(I, VMap, RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); } } } // Connect the prolog code to the original loop and update the // PHI functions. ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, LPM->getAsPass()); NumRuntimeUnrolled++; return true; }
/* * Call the logging function, logging the branch target. Target[0] is the true * branch, and target[1] is the false branch. So when logging, we NOT the * condition to actually log the target taken. We are also logging and * processing unconditional branches for the time being. */ void PandaInstrumentVisitor::visitBranchInst(BranchInst &I){ BinaryOperator *BO; ZExtInst *ZEI; CallInst *CI; std::vector<Value*> argValues; Value *condition; Function *F = mod->getFunction("log_dynval"); if (!F) { printf("Instrumentation function not found\n"); assert(1==0); } if (I.isConditional()){ condition = I.getCondition(); if(isa<UndefValue>(condition)){ BO = static_cast<BinaryOperator*>(IRB.CreateNot(condition)); ZEI = static_cast<ZExtInst*>(IRB.CreateZExt(BO, wordType)); argValues.push_back(ConstantInt::get(ptrType, (uintptr_t)dynval_buffer)); argValues.push_back(ConstantInt::get(intType, BRANCHENTRY)); argValues.push_back(ConstantInt::get(intType, BRANCHOP)); argValues.push_back(static_cast<Value*>(ZEI)); CI = IRB.CreateCall(F, ArrayRef<Value*>(argValues)); CI->insertBefore(static_cast<Instruction*>(&I)); } else if (isa<Constant>(condition)){ CallInst *CI; std::vector<Value*> argValues; uint64_t constcond = static_cast<ConstantInt*>( I.getCondition())->getZExtValue(); argValues.push_back(ConstantInt::get(ptrType, (uintptr_t)dynval_buffer)); argValues.push_back(ConstantInt::get(intType, BRANCHENTRY)); argValues.push_back(ConstantInt::get(intType, BRANCHOP)); argValues.push_back(ConstantInt::get(wordType, !constcond)); CI = IRB.CreateCall(F, ArrayRef<Value*>(argValues)); CI->insertBefore(static_cast<Instruction*>(&I)); } else { BO = static_cast<BinaryOperator*>(IRB.CreateNot(condition)); ZEI = static_cast<ZExtInst*>(IRB.CreateZExt(BO, wordType)); argValues.push_back(ConstantInt::get(ptrType, (uintptr_t)dynval_buffer)); argValues.push_back(ConstantInt::get(intType, BRANCHENTRY)); argValues.push_back(ConstantInt::get(intType, BRANCHOP)); argValues.push_back(static_cast<Value*>(ZEI)); CI = IRB.CreateCall(F, ArrayRef<Value*>(argValues)); CI->insertBefore(static_cast<Instruction*>(&I)); ZEI->insertBefore(static_cast<Instruction*>(CI)); BO->insertBefore(static_cast<Instruction*>(ZEI)); } } else { argValues.push_back(ConstantInt::get(ptrType, (uintptr_t)dynval_buffer)); argValues.push_back(ConstantInt::get(intType, BRANCHENTRY)); argValues.push_back(ConstantInt::get(intType, BRANCHOP)); argValues.push_back(ConstantInt::get(wordType, 0)); CI = IRB.CreateCall(F, ArrayRef<Value*>(argValues)); CI->insertBefore(static_cast<Instruction*>(&I)); } }