/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst *, 16> Returns; SmallVector<InvokeInst *, 16> Invokes; SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { if (Function *Callee = II->getCalledFunction()) if (Callee->isIntrinsic() && Callee->getIntrinsicID() == Intrinsic::donothing) { // Remove the NOP invoke. BranchInst::Create(II->getNormalDest(), II); II->eraseFromParent(); continue; } Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Returns.push_back(RI); } if (Invokes.empty()) return false; NumInvokes += Invokes.size(); lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); IRBuilder<> Builder(EntryBB->getTerminator()); // Get a reference to the jump buffer. Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep"); // Save the frame pointer. Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep"); Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); // Save the stack pointer. Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep"); Val = Builder.CreateCall(StackAddrFn, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy()); Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg); // Store a pointer to the function context so that the back-end will know // where to look for it. Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); Builder.CreateCall(FuncCtxFn, FuncCtxArg); // At this point, we are all set up, update the invoke instructions to mark // their call_site values. for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); } // Mark call instructions that aren't nounwind as no-action (call_site == // -1). Skip the entry block, as prior to then, no function context has been // created for this function and any unexpected exceptions thrown will go // directly to the caller's context, which is what we want anyway, so no need // to do anything here. for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (!CI->doesNotThrow()) insertCallSiteStore(CI, -1); } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { insertCallSiteStore(RI, -1); } // Register the function context and make sure it's known to not throw CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the // jmpbuf to the new value. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (BB == F.begin()) continue; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast<CallInst>(I)) { if (CI->getCalledFunction() != StackRestoreFn) continue; } else if (!isa<AllocaInst>(I)) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); StackAddr->insertAfter(I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } } // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned I = 0, E = Returns.size(); I != E; ++I) CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); return true; }
static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT, const SimplifyQuery &SQ) { bool FnChanged = false; // Visiting in a pre-order depth-first traversal causes us to simplify early // blocks before querying later blocks (which require us to analyze early // blocks). Eagerly simplifying shallow blocks means there is strictly less // work to do for deep blocks. This also means we don't visit unreachable // blocks. for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { bool BBChanged = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *II = &*BI++; switch (II->getOpcode()) { case Instruction::Select: BBChanged |= processSelect(cast<SelectInst>(II), LVI); break; case Instruction::PHI: BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ); break; case Instruction::ICmp: case Instruction::FCmp: BBChanged |= processCmp(cast<CmpInst>(II), LVI); break; case Instruction::Load: case Instruction::Store: BBChanged |= processMemAccess(II, LVI); break; case Instruction::Call: case Instruction::Invoke: BBChanged |= processCallSite(CallSite(II), LVI); break; case Instruction::SRem: BBChanged |= processSRem(cast<BinaryOperator>(II), LVI); break; case Instruction::SDiv: BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI); break; case Instruction::UDiv: case Instruction::URem: BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI); break; case Instruction::AShr: BBChanged |= processAShr(cast<BinaryOperator>(II), LVI); break; case Instruction::Add: BBChanged |= processAdd(cast<BinaryOperator>(II), LVI); break; } } Instruction *Term = BB->getTerminator(); switch (Term->getOpcode()) { case Instruction::Switch: BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT); break; case Instruction::Ret: { auto *RI = cast<ReturnInst>(Term); // Try to determine the return value if we can. This is mainly here to // simplify the writing of unit tests, but also helps to enable IPO by // constant folding the return values of callees. auto *RetVal = RI->getReturnValue(); if (!RetVal) break; // handle "ret void" if (isa<Constant>(RetVal)) break; // nothing to do if (auto *C = getConstantAt(RetVal, RI, LVI)) { ++NumReturns; RI->replaceUsesOfWith(RetVal, C); BBChanged = true; } } } FnChanged |= BBChanged; } return FnChanged; }
/// \brief Assign DWARF discriminators. /// /// To assign discriminators, we examine the boundaries of every /// basic block and its successors. Suppose there is a basic block B1 /// with successor B2. The last instruction I1 in B1 and the first /// instruction I2 in B2 are located at the same file and line number. /// This situation is illustrated in the following code snippet: /// /// if (i < 10) x = i; /// /// entry: /// br i1 %cmp, label %if.then, label %if.end, !dbg !10 /// if.then: /// %1 = load i32* %i.addr, align 4, !dbg !10 /// store i32 %1, i32* %x, align 4, !dbg !10 /// br label %if.end, !dbg !10 /// if.end: /// ret void, !dbg !12 /// /// Notice how the branch instruction in block 'entry' and all the /// instructions in block 'if.then' have the exact same debug location /// information (!dbg !10). /// /// To distinguish instructions in block 'entry' from instructions in /// block 'if.then', we generate a new lexical block for all the /// instruction in block 'if.then' that share the same file and line /// location with the last instruction of block 'entry'. /// /// This new lexical block will have the same location information as /// the previous one, but with a new DWARF discriminator value. /// /// One of the main uses of this discriminator value is in runtime /// sample profilers. It allows the profiler to distinguish instructions /// at location !dbg !10 that execute on different basic blocks. This is /// important because while the predicate 'if (x < 10)' may have been /// executed millions of times, the assignment 'x = i' may have only /// executed a handful of times (meaning that the entry->if.then edge is /// seldom taken). /// /// If we did not have discriminator information, the profiler would /// assign the same weight to both blocks 'entry' and 'if.then', which /// in turn will make it conclude that the entry->if.then edge is very /// hot. /// /// To decide where to create new discriminator values, this function /// traverses the CFG and examines instruction at basic block boundaries. /// If the last instruction I1 of a block B1 is at the same file and line /// location as instruction I2 of successor B2, then it creates a new /// lexical block for I2 and all the instruction in B2 that share the same /// file and line location as I2. This new lexical block will have a /// different discriminator number than I1. bool AddDiscriminators::runOnFunction(Function &F) { // If the function has debug information, but the user has disabled // discriminators, do nothing. // Simlarly, if the function has no debug info, do nothing. // Finally, if this module is built with dwarf versions earlier than 4, // do nothing (discriminator support is a DWARF 4 feature). if (NoDiscriminators || !hasDebugInfo(F) || F.getParent()->getDwarfVersion() < 4) return false; bool Changed = false; Module *M = F.getParent(); LLVMContext &Ctx = M->getContext(); DIBuilder Builder(*M, /*AllowUnresolved*/ false); // Traverse all the blocks looking for instructions in different // blocks that are at the same file:line location. for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *B = I; TerminatorInst *Last = B->getTerminator(); DILocation LastDIL = Last->getDebugLoc().get(); if (!LastDIL) continue; for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) { BasicBlock *Succ = Last->getSuccessor(I); Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime(); DILocation FirstDIL = First->getDebugLoc().get(); if (!FirstDIL) continue; // If the first instruction (First) of Succ is at the same file // location as B's last instruction (Last), add a new // discriminator for First's location and all the instructions // in Succ that share the same location with First. if (!FirstDIL->canDiscriminate(*LastDIL)) { // Create a new lexical scope and compute a new discriminator // number for it. StringRef Filename = FirstDIL->getFilename(); auto *Scope = FirstDIL->getScope(); auto *File = Builder.createFile(Filename, Scope->getDirectory()); // FIXME: Calculate the discriminator here, based on local information, // and delete MDLocation::computeNewDiscriminator(). The current // solution gives different results depending on other modules in the // same context. All we really need is to discriminate between // FirstDIL and LastDIL -- a local map would suffice. unsigned Discriminator = FirstDIL->computeNewDiscriminator(); auto *NewScope = Builder.createLexicalBlockFile(Scope, File, Discriminator); auto *NewDIL = MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(), NewScope, FirstDIL->getInlinedAt()); DebugLoc newDebugLoc = NewDIL; // Attach this new debug location to First and every // instruction following First that shares the same location. for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1; ++I1) { if (I1->getDebugLoc().get() != FirstDIL) break; I1->setDebugLoc(newDebugLoc); DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine() << ":" << NewDIL->getColumn() << ":" << NewDIL->getDiscriminator() << *I1 << "\n"); } DEBUG(dbgs() << "\n"); Changed = true; } } } return Changed; }
bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) { // Clone the program to try hacking it apart... Module *M = CloneModule(BD.getProgram()); // Convert list to set for fast lookup... std::set<BasicBlock*> Blocks; for (unsigned i = 0, e = BBs.size(); i != e; ++i) { // Convert the basic block from the original module to the new module... const Function *F = BBs[i]->getParent(); Function *CMF = M->getFunction(F->getName()); assert(CMF && "Function not in module?!"); assert(CMF->getFunctionType() == F->getFunctionType() && "wrong type?"); // Get the mapped basic block... Function::iterator CBI = CMF->begin(); std::advance(CBI, std::distance(F->begin(), Function::const_iterator(BBs[i]))); Blocks.insert(CBI); } std::cout << "Checking for crash with only these blocks:"; unsigned NumPrint = Blocks.size(); if (NumPrint > 10) NumPrint = 10; for (unsigned i = 0, e = NumPrint; i != e; ++i) std::cout << " " << BBs[i]->getName(); if (NumPrint < Blocks.size()) std::cout << "... <" << Blocks.size() << " total>"; std::cout << ": "; // Loop over and delete any hack up any blocks that are not listed... for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) if (!Blocks.count(BB) && BB->getTerminator()->getNumSuccessors()) { // Loop over all of the successors of this block, deleting any PHI nodes // that might include it. for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) (*SI)->removePredecessor(BB); if (BB->getTerminator()->getType() != Type::VoidTy) BB->getTerminator()->replaceAllUsesWith( Constant::getNullValue(BB->getTerminator()->getType())); // Delete the old terminator instruction... BB->getInstList().pop_back(); // Add a new return instruction of the appropriate type... const Type *RetTy = BB->getParent()->getReturnType(); new ReturnInst(RetTy == Type::VoidTy ? 0 : Constant::getNullValue(RetTy), BB); } // The CFG Simplifier pass may delete one of the basic blocks we are // interested in. If it does we need to take the block out of the list. Make // a "persistent mapping" by turning basic blocks into <function, name> pairs. // This won't work well if blocks are unnamed, but that is just the risk we // have to take. std::vector<std::pair<Function*, std::string> > BlockInfo; for (std::set<BasicBlock*>::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) BlockInfo.push_back(std::make_pair((*I)->getParent(), (*I)->getName())); // Now run the CFG simplify pass on the function... PassManager Passes; Passes.add(createCFGSimplificationPass()); Passes.add(createVerifierPass()); Passes.run(*M); // Try running on the hacked up program... if (TestFn(BD, M)) { BD.setNewProgram(M); // It crashed, keep the trimmed version... // Make sure to use basic block pointers that point into the now-current // module, and that they don't include any deleted blocks. BBs.clear(); for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { ValueSymbolTable &ST = BlockInfo[i].first->getValueSymbolTable(); Value* V = ST.lookup(BlockInfo[i].second); if (V && V->getType() == Type::LabelTy) BBs.push_back(cast<BasicBlock>(V)); } return true; } delete M; // It didn't crash, try something else. return false; }
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { return Result(F.getParent()->getDataLayout()); }
bool GaussNewton(Function& f, real_type t, Vector& x, real_type atol, real_type rtol, unsigned *itCount, unsigned maxit, unsigned maxjac, real_type lambdamin) { Vector err, dx; Matrix J; #define USE_QR #ifdef USE_QR LinAlg::MatrixFactors<real_type,0,0,LinAlg::QRTag> jacFactors; #else LinAlg::MatrixFactors<real_type,0,0,LinAlg::LUTag> jacFactors; #endif bool converged; do { // Compute in each step a new jacobian f.jac(t, x, J); Log(NewtonMethod, Debug) << "Jacobian is:\n" << J << endl; #ifdef USE_QR jacFactors = J; #else jacFactors = trans(J)*J; #endif Log(NewtonMethod, Debug) << "Jacobian is " << (jacFactors.singular() ? "singular" : "ok") << endl; // Compute the actual error f.eval(t, x, err); // Compute the search direction #ifdef USE_QR dx = jacFactors.solve(err); #else dx = jacFactors.solve(trans(J)*err); #endif Log(NewtonMethod, Debug) << "dx residual " << trans(J*dx - err) << endl << trans(J*dx - err)*J << endl; // Get a better search guess if (1 < norm(dx)) dx = normalize(dx); Vector xnew = LineSearch(f, t, x, -dx, 1.0, atol); // check convergence converged = norm1(xnew - x) < atol; Log(NewtonMethod, Debug) << "Convergence test: |dx| = " << norm(xnew - x) << ", converged = " << converged << endl; // New guess is the better one x = xnew; } while (!converged); return converged; }
inline bool NewtonTypeMethod(Function& f, LinAlg::MatrixFactors<T,0,0,FactorTag>& jacInv, real_type t, Vector& x, real_type atol, real_type rtol, unsigned *itCount, unsigned maxit, unsigned maxjac, real_type lambdamin) { // The initial damping factor. // lambda == 1 means nondapmed newton method. real_type lambda = 1; // Flag if we observe convergence or not. // If not abort and hope that the caller knows what todo ... bool converging; // True if the method is converged. // That is the error is below a given tolerance. bool converged = false; // x_new is the potential solution at the next iteration step. Vector x_new; Log(NewtonMethod, Debug) << "__________________________" << endl; // The displacement of the undamped newton method for the first // iteration step. Vector err; f.eval(t, x, err); Log(NewtonMethod, Debug1) << "err " << trans(err) << endl; Vector dx_bar = jacInv.solve(err); Log(NewtonMethod, Debug2) << "dx_bar " << trans(dx_bar) << endl; do { // Increment the iteration counter. Just statistics ... if (itCount) ++(*itCount); --maxit; // dx contains the displacement of the undamped newton iteration in the // current iteration step. Vector dx = dx_bar; // Compute the the error norm of the increment. real_type normdx = norm(dx); if (normdx == 0.0) return true; Log(NewtonMethod, Debug1) << "outer " << normdx << endl; // Damped newton method: // Use lambda*dx with 0 < lambda <= 1 instead of just dx as displacement. // Be optimistic and try twice the displacement from the prevous step. lambda = min(static_cast<real_type>(1), 2.0*lambda); // Convergence rate, that is an estimate of || e_{n+1} ||/|| e_{n} || real_type convergenceRate; do { // Compute the new approximation to the solution with the current damping // factor lambda. x_new = x - lambda*dx; // Compute the error of that new approximation. f.eval(t, x_new, err); Log(NewtonMethod, Debug) << "err " << trans(err) << endl; // Check if we get some kind of convergence with this lambda. // This jacobian evaluation will also be used for the next step if // this lambda truns out to be acceptable. dx_bar = jacInv.solve(err); Log(NewtonMethod, Debug2) << "dx_bar " << trans(dx_bar) << endl; // The convergence criterion parameter theta. real_type theta = 1.0 - 0.5*lambda; // Compute the norm of dx_bar and check if we get a better approximation // to the current solution. real_type normdx_bar = norm(dx_bar); Log(NewtonMethod, Debug) << "inner " << normdx_bar << endl; const real_type min_conv_rate = 1e-10; if (normdx == 0.0) { Log(NewtonMethod, Error) << "Whow: we have most likely an exact " "solution and we iterate furter: normdx = " << normdx << endl; convergenceRate = min_conv_rate; } else convergenceRate = max(min_conv_rate, normdx_bar/normdx); converging = normdx_bar < theta*normdx; if (converging) break; // If we are still not converging, half the damping factor and try again. lambda *= 0.5; } while (lambdamin < lambda); if (converging) { // Ok, we found a better approximation to the solution. // Finally check for convergence: // Compute the scaled norm of our last displacement ... real_type enormdx = scaledDiff(x, x_new, atol, rtol); // ... and check if either the convergence rate is very high, which // signals that we are very near to the zero crossing or if our last // displacement is *very* short converged = enormdx*max(static_cast<real_type>(1e-2),convergenceRate) < (1-convergenceRate); // Use the newly computed solution. x = x_new; } else if (0 <= maxjac) { --maxjac; Log(NewtonMethod, Debug) << "Computing new jacobian" << endl; // get a new jacobian ... f.jac(t, x, jacInv.data()); Log(NewtonMethod, Debug2) << jacInv.data() << endl; jacInv.factorize(); Log(NewtonMethod, Debug2) << "decomposed qr\n" << jacInv.data() << endl; if (jacInv.singular()) Log(NewtonMethod, Warning) << "Have singular jacobian!" << endl; converging = true; } // Iterate as long as either the iteration converged or the // maximum iteration count is reached. } while (!converged && converging && 0 < maxit); Log(NewtonMethod, Info) << "Newton type method: converged = " << converged << endl; // Tell the caller if it worked or not. return converged; }
void StatsTracker::writeIStats() { Module *m = executor.kmodule->module; uint64_t istatsMask = 0; llvm::raw_fd_ostream &of = *istatsFile; // We assume that we didn't move the file pointer unsigned istatsSize = of.tell(); of.seek(0); of << "version: 1\n"; of << "creator: klee\n"; of << "pid: " << getpid() << "\n"; of << "cmd: " << m->getModuleIdentifier() << "\n\n"; of << "\n"; StatisticManager &sm = *theStatisticManager; unsigned nStats = sm.getNumStatistics(); // Max is 13, sadly istatsMask |= 1<<sm.getStatisticID("Queries"); istatsMask |= 1<<sm.getStatisticID("QueriesValid"); istatsMask |= 1<<sm.getStatisticID("QueriesInvalid"); istatsMask |= 1<<sm.getStatisticID("QueryTime"); istatsMask |= 1<<sm.getStatisticID("ResolveTime"); istatsMask |= 1<<sm.getStatisticID("Instructions"); istatsMask |= 1<<sm.getStatisticID("InstructionTimes"); istatsMask |= 1<<sm.getStatisticID("InstructionRealTimes"); istatsMask |= 1<<sm.getStatisticID("Forks"); istatsMask |= 1<<sm.getStatisticID("CoveredInstructions"); istatsMask |= 1<<sm.getStatisticID("UncoveredInstructions"); istatsMask |= 1<<sm.getStatisticID("States"); istatsMask |= 1<<sm.getStatisticID("MinDistToUncovered"); of << "positions: instr line\n"; for (unsigned i=0; i<nStats; i++) { if (istatsMask & (1<<i)) { Statistic &s = sm.getStatistic(i); of << "event: " << s.getShortName() << " : " << s.getName() << "\n"; } } of << "events: "; for (unsigned i=0; i<nStats; i++) { if (istatsMask & (1<<i)) of << sm.getStatistic(i).getShortName() << " "; } of << "\n"; // set state counts, decremented after we process so that we don't // have to zero all records each time. if (istatsMask & (1<<stats::states.getID())) updateStateStatistics(1); std::string sourceFile = ""; CallSiteSummaryTable callSiteStats; if (UseCallPaths) callPathManager.getSummaryStatistics(callSiteStats); of << "ob=" << objectFilename << "\n"; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { if (!fnIt->isDeclaration()) { // Always try to write the filename before the function name, as otherwise // KCachegrind can create two entries for the function, one with an // unnamed file and one without. const InstructionInfo &ii = executor.kmodule->infos->getFunctionInfo(fnIt); if (ii.file != sourceFile) { of << "fl=" << ii.file << "\n"; sourceFile = ii.file; } of << "fn=" << fnIt->getName().str() << "\n"; for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { Instruction *instr = &*it; const InstructionInfo &ii = executor.kmodule->infos->getInfo(instr); unsigned index = ii.id; if (ii.file!=sourceFile) { of << "fl=" << ii.file << "\n"; sourceFile = ii.file; } of << ii.assemblyLine << " "; of << ii.line << " "; for (unsigned i=0; i<nStats; i++) if (istatsMask&(1<<i)) of << sm.getIndexedValue(sm.getStatistic(i), index) << " "; of << "\n"; if (UseCallPaths && (isa<CallInst>(instr) || isa<InvokeInst>(instr))) { CallSiteSummaryTable::iterator it = callSiteStats.find(instr); if (it!=callSiteStats.end()) { for (std::map<llvm::Function*, CallSiteInfo>::iterator fit = it->second.begin(), fie = it->second.end(); fit != fie; ++fit) { Function *f = fit->first; CallSiteInfo &csi = fit->second; const InstructionInfo &fii = executor.kmodule->infos->getFunctionInfo(f); if (fii.file!="" && fii.file!=sourceFile) of << "cfl=" << fii.file << "\n"; of << "cfn=" << f->getName().str() << "\n"; of << "calls=" << csi.count << " "; of << fii.assemblyLine << " "; of << fii.line << "\n"; of << ii.assemblyLine << " "; of << ii.line << " "; for (unsigned i=0; i<nStats; i++) { if (istatsMask&(1<<i)) { Statistic &s = sm.getStatistic(i); uint64_t value; // Hack, ignore things that don't make sense on // call paths. if (&s == &stats::uncoveredInstructions) { value = 0; } else { value = csi.statistics.getValue(s); } of << value << " "; } } of << "\n"; } } } } } } } if (istatsMask & (1<<stats::states.getID())) updateStateStatistics((uint64_t)-1); // Clear then end of the file if necessary (no truncate op?). unsigned pos = of.tell(); for (unsigned i=pos; i<istatsSize; ++i) of << '\n'; of.flush(); }
void StatsTracker::computeReachableUncovered() { KModule *km = executor.kmodule; Module *m = km->module; static bool init = true; const InstructionInfoTable &infos = *km->infos; StatisticManager &sm = *theStatisticManager; if (init) { init = false; // Compute call targets. It would be nice to use alias information // instead of assuming all indirect calls hit all escaping // functions, eh? for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { if (isa<CallInst>(it) || isa<InvokeInst>(it)) { CallSite cs(it); if (isa<InlineAsm>(cs.getCalledValue())) { // We can never call through here so assume no targets // (which should be correct anyhow). callTargets.insert(std::make_pair(it, std::vector<Function*>())); } else if (Function *target = getDirectCallTarget(cs)) { callTargets[it].push_back(target); } else { callTargets[it] = std::vector<Function*>(km->escapingFunctions.begin(), km->escapingFunctions.end()); } } } } } // Compute function callers as reflexion of callTargets. for (calltargets_ty::iterator it = callTargets.begin(), ie = callTargets.end(); it != ie; ++it) for (std::vector<Function*>::iterator fit = it->second.begin(), fie = it->second.end(); fit != fie; ++fit) functionCallers[*fit].push_back(it->first); // Initialize minDistToReturn to shortest paths through // functions. 0 is unreachable. std::vector<Instruction *> instructions; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { if (fnIt->isDeclaration()) { if (fnIt->doesNotReturn()) { functionShortestPath[fnIt] = 0; } else { functionShortestPath[fnIt] = 1; // whatever } } else { functionShortestPath[fnIt] = 0; } // Not sure if I should bother to preorder here. XXX I should. for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { instructions.push_back(it); unsigned id = infos.getInfo(it).id; sm.setIndexedValue(stats::minDistToReturn, id, isa<ReturnInst>(it) #if LLVM_VERSION_CODE < LLVM_VERSION(3, 1) || isa<UnwindInst>(it) #endif ); } } } std::reverse(instructions.begin(), instructions.end()); // I'm so lazy it's not even worklisted. bool changed; do { changed = false; for (std::vector<Instruction*>::iterator it = instructions.begin(), ie = instructions.end(); it != ie; ++it) { Instruction *inst = *it; unsigned bestThrough = 0; if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { std::vector<Function*> &targets = callTargets[inst]; for (std::vector<Function*>::iterator fnIt = targets.begin(), ie = targets.end(); fnIt != ie; ++fnIt) { uint64_t dist = functionShortestPath[*fnIt]; if (dist) { dist = 1+dist; // count instruction itself if (bestThrough==0 || dist<bestThrough) bestThrough = dist; } } } else { bestThrough = 1; } if (bestThrough) { unsigned id = infos.getInfo(*it).id; uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToReturn, id); std::vector<Instruction*> succs = getSuccs(*it); for (std::vector<Instruction*>::iterator it2 = succs.begin(), ie = succs.end(); it2 != ie; ++it2) { uint64_t dist = sm.getIndexedValue(stats::minDistToReturn, infos.getInfo(*it2).id); if (dist) { uint64_t val = bestThrough + dist; if (best==0 || val<best) best = val; } } // there's a corner case here when a function only includes a single // instruction (a ret). in that case, we MUST update // functionShortestPath, or it will remain 0 (erroneously indicating // that no return instructions are reachable) Function *f = inst->getParent()->getParent(); if (best != cur || (inst == f->begin()->begin() && functionShortestPath[f] != best)) { sm.setIndexedValue(stats::minDistToReturn, id, best); changed = true; // Update shortest path if this is the entry point. if (inst==f->begin()->begin()) functionShortestPath[f] = best; } } } } while (changed); } // compute minDistToUncovered, 0 is unreachable std::vector<Instruction *> instructions; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { // Not sure if I should bother to preorder here. for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { unsigned id = infos.getInfo(it).id; instructions.push_back(&*it); sm.setIndexedValue(stats::minDistToUncovered, id, sm.getIndexedValue(stats::uncoveredInstructions, id)); } } } std::reverse(instructions.begin(), instructions.end()); // I'm so lazy it's not even worklisted. bool changed; do { changed = false; for (std::vector<Instruction*>::iterator it = instructions.begin(), ie = instructions.end(); it != ie; ++it) { Instruction *inst = *it; uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToUncovered, infos.getInfo(inst).id); unsigned bestThrough = 0; if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { std::vector<Function*> &targets = callTargets[inst]; for (std::vector<Function*>::iterator fnIt = targets.begin(), ie = targets.end(); fnIt != ie; ++fnIt) { uint64_t dist = functionShortestPath[*fnIt]; if (dist) { dist = 1+dist; // count instruction itself if (bestThrough==0 || dist<bestThrough) bestThrough = dist; } if (!(*fnIt)->isDeclaration()) { uint64_t calleeDist = sm.getIndexedValue(stats::minDistToUncovered, infos.getFunctionInfo(*fnIt).id); if (calleeDist) { calleeDist = 1+calleeDist; // count instruction itself if (best==0 || calleeDist<best) best = calleeDist; } } } } else { bestThrough = 1; } if (bestThrough) { std::vector<Instruction*> succs = getSuccs(inst); for (std::vector<Instruction*>::iterator it2 = succs.begin(), ie = succs.end(); it2 != ie; ++it2) { uint64_t dist = sm.getIndexedValue(stats::minDistToUncovered, infos.getInfo(*it2).id); if (dist) { uint64_t val = bestThrough + dist; if (best==0 || val<best) best = val; } } } if (best != cur) { sm.setIndexedValue(stats::minDistToUncovered, infos.getInfo(inst).id, best); changed = true; } } } while (changed); for (std::set<ExecutionState*>::iterator it = executor.states.begin(), ie = executor.states.end(); it != ie; ++it) { ExecutionState *es = *it; uint64_t currentFrameMinDist = 0; #if MULTITHREAD for (Thread::stack_ty::iterator sfIt = es->stack().begin(), sf_ie = es->stack().end(); sfIt != sf_ie; ++sfIt) { Thread::stack_ty::iterator next = sfIt + 1; KInstIterator kii; if (next==es->stack().end()) { kii = es->pc(); #else for (ExecutionState::stack_ty::iterator sfIt = es->stack.begin(), sf_ie = es->stack.end(); sfIt != sf_ie; ++sfIt) { ExecutionState::stack_ty::iterator next = sfIt + 1; KInstIterator kii; if (next==es->stack.end()) { kii = es->pc; #endif } else { kii = next->caller; ++kii; } sfIt->minDistToUncoveredOnReturn = currentFrameMinDist; currentFrameMinDist = computeMinDistToUncovered(kii, currentFrameMinDist); } } }
vector<Opcode*> ScriptParser::assembleOne( Program& program, vector<Opcode*> runCode, int numparams) { vector<Opcode *> rval; // Push on the params to the run. int i; for (i = 0; i < numparams && i < 9; ++i) rval.push_back(new OPushRegister(new VarArgument(i))); for (; i < numparams; ++i) rval.push_back(new OPushRegister(new VarArgument(EXP1))); // Generate a map of labels to functions. vector<Function*> allFunctions = getFunctions(program); map<int, Function*> functionsByLabel; for (vector<Function*>::iterator it = allFunctions.begin(); it != allFunctions.end(); ++it) { Function& function = **it; functionsByLabel[function.getLabel()] = &function; } // Grab all labels directly jumped to. set<int> usedLabels; for (vector<Opcode*>::iterator it = runCode.begin(); it != runCode.end(); ++it) { GetLabels temp(usedLabels); (*it)->execute(temp, NULL); } set<int> unprocessedLabels(usedLabels); // Grab labels used by each function until we run out of functions. while (!unprocessedLabels.empty()) { int label = *unprocessedLabels.begin(); Function* function = find<Function*>(functionsByLabel, label).value_or(NULL); if (function) { vector<Opcode*> const& functionCode = function->getCode(); for (vector<Opcode*>::const_iterator it = functionCode.begin(); it != functionCode.end(); ++it) { GetLabels temp(usedLabels); (*it)->execute(temp, NULL); insertElements(unprocessedLabels, temp.newLabels); } } unprocessedLabels.erase(label); } // Make the rval for (vector<Opcode*>::iterator it = runCode.begin(); it != runCode.end(); ++it) rval.push_back((*it)->makeClone()); for (set<int>::iterator it = usedLabels.begin(); it != usedLabels.end(); ++it) { int label = *it; Function* function = find<Function*>(functionsByLabel, label).value_or(NULL); if (!function) continue; vector<Opcode*> functionCode = function->getCode(); for (vector<Opcode*>::iterator it = functionCode.begin(); it != functionCode.end(); ++it) rval.push_back((*it)->makeClone()); } // Set the label line numbers. map<int, int> linenos; int lineno = 1; for (vector<Opcode*>::iterator it = rval.begin(); it != rval.end(); ++it) { if ((*it)->getLabel() != -1) linenos[(*it)->getLabel()] = lineno; lineno++; } // Now fill in those labels for (vector<Opcode*>::iterator it = rval.begin(); it != rval.end(); ++it) { SetLabels temp; (*it)->execute(temp, &linenos); } return rval; }
/// JITCompilerFn - This function is called when a lazy compilation stub has /// been entered. It looks up which function this stub corresponds to, compiles /// it if necessary, then returns the resultant function pointer. void *JITResolver::JITCompilerFn(void *Stub) { JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); assert(JR && "Unable to find the corresponding JITResolver to the call site"); Function* F = nullptr; void* ActualPtr = nullptr; { // Only lock for getting the Function. The call getPointerToFunction made // in this function might trigger function materializing, which requires // JIT lock to be unlocked. MutexGuard locked(JR->TheJIT->lock); // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. std::pair<void*, Function*> I = JR->state.LookupFunctionFromCallSite(Stub); F = I.second; ActualPtr = I.first; } // If we have already code generated the function, just return the address. void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F); if (!Result) { // Otherwise we don't have it, do lazy compilation now. // If lazy compilation is disabled, emit a useful error message and abort. if (!JR->TheJIT->isCompilingLazily()) { report_fatal_error("LLVM JIT requested to do lazy compilation of" " function '" + F->getName() + "' when lazy compiles are disabled!"); } DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() << "' In stub ptr = " << Stub << " actual ptr = " << ActualPtr << "\n"); (void)ActualPtr; Result = JR->TheJIT->getPointerToFunction(F); } // Reacquire the lock to update the GOT map. MutexGuard locked(JR->TheJIT->lock); // We might like to remove the call site from the CallSiteToFunction map, but // we can't do that! Multiple threads could be stuck, waiting to acquire the // lock above. As soon as the 1st function finishes compiling the function, // the next one will be released, and needs to be able to find the function it // needs to call. // FIXME: We could rewrite all references to this stub if we knew them. // What we will do is set the compiled function address to map to the // same GOT entry as the stub so that later clients may update the GOT // if they see it still using the stub address. // Note: this is done so the Resolver doesn't have to manage GOT memory // Do this without allocating map space if the target isn't using a GOT if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end()) JR->revGOTMap[Result] = JR->revGOTMap[Stub]; return Result; }
/// InsertUniqueBackedgeBlock - This method is called when the specified loop /// has more than one backedge in it. If this occurs, revector all of these /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// BasicBlock * LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); // Unique backedge insertion currently depends on having a preheader. if (!Preheader) return 0; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ BasicBlock *P = *I; // Indirectbr edges cannot be split, so we must fail if we find one. if (isa<IndirectBrInst>(P->getTerminator())) return 0; if (P != Preheader) BackedgeBlocks.push_back(P); } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " << BEBlock->getName() << "\n"); // Move the new backedge block to right after the last backedge block. Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos; F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); // Now that the block has been inserted into the function, create PHI nodes in // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { PHINode *PN = cast<PHINode>(I); PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), PN->getName()+".be", BETerminator); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; Value *UniqueValue = 0; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); if (IBB == Preheader) { PreheaderIdx = i; } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { if (UniqueValue == 0) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; } } } // Delete all of the incoming values from the old PN except the preheader's assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); if (PreheaderIdx != 0) { PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); } // Nuke all entries except the zero'th. for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) PN->removeIncomingValue(e-i, false); // Finally, add the newly constructed PHI node as the entry for the BEBlock. PN->addIncoming(NewPN, BEBlock); // As an optimization, if all incoming values in the new PhiNode (which is a // subset of the incoming values of the old PHI node) have the same value, // eliminate the PHI Node. if (HasUniqueIncomingValue) { NewPN->replaceAllUsesWith(UniqueValue); if (AA) AA->deleteValue(NewPN); BEBlock->getInstList().erase(NewPN); } } // Now that all of the PHI nodes have been inserted and adjusted, modify the // backedge blocks to just to the BEBlock instead of the header. for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { TerminatorInst *TI = BackedgeBlocks[i]->getTerminator(); for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op) if (TI->getSuccessor(Op) == Header) TI->setSuccessor(Op, BEBlock); } //===--- Update all analyses which we must preserve now -----------------===// // Update Loop Information - we know that this block is now in the current // loop and all parent loops. L->addBasicBlockToLoop(BEBlock, LI->getBase()); // Update dominator information DT->splitBlock(BEBlock); return BEBlock; }
static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) { SmallPtrSet<CallGraphNode *, 8> SCCNodes; bool MadeChange = false; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphNode *I : SCC) SCCNodes.insert(I); // First pass, scan all of the functions in the SCC, simplifying them // according to what we know. for (CallGraphNode *I : SCC) if (Function *F = I->getFunction()) MadeChange |= SimplifyFunction(F, CG); // Next, check to see if any callees might throw or if there are any external // functions in this SCC: if so, we cannot prune any functions in this SCC. // Definitions that are weak and not declared non-throwing might be // overridden at linktime with something that throws, so assume that. // If this SCC includes the unwind instruction, we KNOW it throws, so // obviously the SCC might throw. // bool SCCMightUnwind = false, SCCMightReturn = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); if (!F) { SCCMightUnwind = true; SCCMightReturn = true; } else if (!F->hasExactDefinition()) { SCCMightUnwind |= !F->doesNotThrow(); SCCMightReturn |= !F->doesNotReturn(); } else { bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow(); bool CheckReturn = !SCCMightReturn && !F->doesNotReturn(); // Determine if we should scan for InlineAsm in a naked function as it // is the only way to return without a ReturnInst. Only do this for // no-inline functions as functions which may be inlined cannot // meaningfully return via assembly. bool CheckReturnViaAsm = CheckReturn && F->hasFnAttribute(Attribute::Naked) && F->hasFnAttribute(Attribute::NoInline); if (!CheckUnwind && !CheckReturn) continue; for (const BasicBlock &BB : *F) { const TerminatorInst *TI = BB.getTerminator(); if (CheckUnwind && TI->mayThrow()) { SCCMightUnwind = true; } else if (CheckReturn && isa<ReturnInst>(TI)) { SCCMightReturn = true; } for (const Instruction &I : BB) { if ((!CheckUnwind || SCCMightUnwind) && (!CheckReturnViaAsm || SCCMightReturn)) break; // Check to see if this function performs an unwind or calls an // unwinding function. if (CheckUnwind && !SCCMightUnwind && I.mayThrow()) { bool InstMightUnwind = true; if (const auto *CI = dyn_cast<CallInst>(&I)) { if (Function *Callee = CI->getCalledFunction()) { CallGraphNode *CalleeNode = CG[Callee]; // If the callee is outside our current SCC then we may throw // because it might. If it is inside, do nothing. if (SCCNodes.count(CalleeNode) > 0) InstMightUnwind = false; } } SCCMightUnwind |= InstMightUnwind; } if (CheckReturnViaAsm && !SCCMightReturn) if (auto ICS = ImmutableCallSite(&I)) if (const auto *IA = dyn_cast<InlineAsm>(ICS.getCalledValue())) if (IA->hasSideEffects()) SCCMightReturn = true; } if (SCCMightUnwind && SCCMightReturn) break; } } } // If the SCC doesn't unwind or doesn't throw, note this fact. if (!SCCMightUnwind || !SCCMightReturn) for (CallGraphNode *I : SCC) { Function *F = I->getFunction(); if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) { F->addFnAttr(Attribute::NoUnwind); MadeChange = true; } if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) { F->addFnAttr(Attribute::NoReturn); MadeChange = true; } } for (CallGraphNode *I : SCC) { // Convert any invoke instructions to non-throwing functions in this node // into call instructions with a branch. This makes the exception blocks // dead. if (Function *F = I->getFunction()) MadeChange |= SimplifyFunction(F, CG); } return MadeChange; }
void VCButton::pressFunction() { assert(m_keyBind); if (/*m_keyBind->pressAction() == KeyBind::PressNothing || */ m_functionID == KNoID) { return; } /* else if (m_keyBind->pressAction() == KeyBind::PressStart) { Function* f = _app->doc()->function(m_functionID); if (f) { if (f->engage(static_cast<QObject*> (this))) { setOn(true); } } else { qDebug("Function has been deleted!"); attachFunction(KNoID); } } */ else //if (m_keyBind->pressAction() == KeyBind::PressToggle) { Function* f = _app->doc()->function(m_functionID); if (f) { if (isOn()) { f->stop(); //setOn(false); } else { if (f->engage(static_cast<QObject*> (this))) { setOn(true); } } } else { qDebug("Function has been deleted!"); attachFunction(KNoID); } } /* else if (m_keyBind->pressAction() == KeyBind::PressStepForward) { // // TODO: Implement a bus for stepping // } else if (m_keyBind->pressAction() == KeyBind::PressStepBackward) { // // TODO: Implement a bus for stepping // } */ }
bool WinEHStatePass::runOnFunction(Function &F) { // If this is an outlined handler, don't do anything. We'll do state insertion // for it in the parent. StringRef WinEHParentName = F.getFnAttribute("wineh-parent").getValueAsString(); if (WinEHParentName != F.getName() && !WinEHParentName.empty()) return false; // Check the personality. Do nothing if this is not an MSVC personality. if (!F.hasPersonalityFn()) return false; PersonalityFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); if (!PersonalityFn) return false; Personality = classifyEHPersonality(PersonalityFn); if (!isMSVCEHPersonality(Personality)) return false; // Skip this function if there are no EH pads and we aren't using IR-level // outlining. if (WinEHParentName.empty()) { bool HasPads = false; for (BasicBlock &BB : F) { if (BB.isEHPad()) { HasPads = true; break; } } if (!HasPads) return false; } // Disable frame pointer elimination in this function. // FIXME: Do the nested handlers need to keep the parent ebp in ebp, or can we // use an arbitrary register? F.addFnAttr("no-frame-pointer-elim", "true"); emitExceptionRegistrationRecord(&F); auto *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); // If MMI is null, create our own WinEHFuncInfo. This only happens in opt // tests. std::unique_ptr<WinEHFuncInfo> FuncInfoPtr; if (!MMI) FuncInfoPtr.reset(new WinEHFuncInfo()); WinEHFuncInfo &FuncInfo = *(MMI ? &MMI->getWinEHFuncInfo(&F) : FuncInfoPtr.get()); FuncInfo.EHRegNode = RegNode; switch (Personality) { default: llvm_unreachable("unexpected personality function"); case EHPersonality::MSVC_CXX: addCXXStateStores(F, FuncInfo); break; case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, FuncInfo); break; } // Reset per-function state. PersonalityFn = nullptr; Personality = EHPersonality::Unknown; return true; }
/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC. bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) { bool Changed = false; SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F && !F->isDeclaration() && !F->mayBeOverridden()) SCCNodes.insert(F); } ArgumentGraph AG; AttrBuilder B; B.addAttribute(Attributes::NoCapture); // Check each function in turn, determining which pointer arguments are not // captured. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F == 0) // External node - only a problem for arguments that we pass to it. continue; // Definitions with weak linkage may be overridden at linktime with // something that captures pointers, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) continue; // Functions that are readonly (or readnone) and nounwind and don't return // a value can't capture arguments. Don't analyze them. if (F->onlyReadsMemory() && F->doesNotThrow() && F->getReturnType()->isVoidTy()) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { A->addAttr(Attributes::get(F->getContext(), B)); ++NumNoCapture; Changed = true; } } continue; } for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A) if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { ArgumentUsesTracker Tracker(SCCNodes); PointerMayBeCaptured(A, &Tracker); if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. A->addAttr(Attributes::get(F->getContext(), B)); ++NumNoCapture; Changed = true; } else { // If it's not trivially captured and not trivially not captured, // then it must be calling into another function in our SCC. Save // its particulars for Argument-SCC analysis later. ArgumentGraphNode *Node = AG[A]; for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(), UE = Tracker.Uses.end(); UI != UE; ++UI) Node->Uses.push_back(AG[*UI]); } } // Otherwise, it's captured. Don't bother doing SCC analysis on it. } } // The graph we've collected is partial because we stopped scanning for // argument uses once we solved the argument trivially. These partial nodes // show up as ArgumentGraphNode objects with an empty Uses list, and for // these nodes the final decision about whether they capture has already been // made. If the definition doesn't have a 'nocapture' attribute by now, it // captures. for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG), E = scc_end(&AG); I != E; ++I) { std::vector<ArgumentGraphNode*> &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { if (!ArgumentSCC[0]->Definition) continue; // synthetic root node // eg. "void f(int* x) { if (...) f(x); }" if (ArgumentSCC[0]->Uses.size() == 1 && ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { ArgumentSCC[0]-> Definition-> addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B)); ++NumNoCapture; Changed = true; } continue; } bool SCCCaptured = false; for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *Node = *I; if (Node->Uses.empty()) { if (!Node->Definition->hasNoCaptureAttr()) SCCCaptured = true; } } if (SCCCaptured) continue; SmallPtrSet<Argument*, 8> ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { ArgumentSCCNodes.insert((*I)->Definition); } for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(), UE = N->Uses.end(); UI != UE; ++UI) { Argument *A = (*UI)->Definition; if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) continue; SCCCaptured = true; break; } } if (SCCCaptured) continue; for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; A->addAttr(Attributes::get(A->getContext(), B)); ++NumNoCapture; Changed = true; } } return Changed; }
bool ObjCARCContract::runOnFunction(Function &F) { if (!EnableARCOpts) return false; // If nothing in the Module uses ARC, don't do anything. if (!Run) return false; Changed = false; AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); PA.setAA(&getAnalysis<AliasAnalysis>()); // Track whether it's ok to mark objc_storeStrong calls with the "tail" // keyword. Be conservative if the function has variadic arguments. // It seems that functions which "return twice" are also unsafe for the // "tail" argument, because they are setjmp, which could need to // return to an earlier stack state. bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice(); // For ObjC library calls which return their argument, replace uses of the // argument with uses of the call return value, if it dominates the use. This // reduces register pressure. SmallPtrSet<Instruction *, 4> DependingInstructions; SmallPtrSet<const BasicBlock *, 4> Visited; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { Instruction *Inst = &*I++; DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); // Only these library routines return their argument. In particular, // objc_retainBlock does not necessarily return its argument. InstructionClass Class = GetBasicInstructionClass(Inst); switch (Class) { case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: break; case IC_Autorelease: case IC_AutoreleaseRV: if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) continue; break; case IC_Retain: // Attempt to convert retains to retainrvs if they are next to function // calls. if (!OptimizeRetainCall(F, Inst)) break; // If we succeed in our optimization, fall through. // FALLTHROUGH case IC_RetainRV: { // If we're compiling for a target which needs a special inline-asm // marker to do the retainAutoreleasedReturnValue optimization, // insert it now. if (!RetainRVMarker) break; BasicBlock::iterator BBI = Inst; BasicBlock *InstParent = Inst->getParent(); // Step up to see if the call immediately precedes the RetainRV call. // If it's an invoke, we have to cross a block boundary. And we have // to carefully dodge no-op instructions. do { if (&*BBI == InstParent->begin()) { BasicBlock *Pred = InstParent->getSinglePredecessor(); if (!Pred) goto decline_rv_optimization; BBI = Pred->getTerminator(); break; } --BBI; } while (IsNoopInstruction(BBI)); if (&*BBI == GetObjCArg(Inst)) { DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " "retainAutoreleasedReturnValue optimization.\n"); Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), RetainRVMarker->getString(), /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } decline_rv_optimization: break; } case IC_InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; new StoreInst(Null, CI->getArgOperand(0), CI); DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" << " New = " << *Null << "\n"); CI->replaceAllUsesWith(Null); CI->eraseFromParent(); } continue; } case IC_Release: ContractRelease(Inst, I); continue; case IC_User: // Be conservative if the function has any alloca instructions. // Technically we only care about escaping alloca instructions, // but this is sufficient to handle some interesting cases. if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; continue; case IC_IntrinsicUser: // Remove calls to @clang.arc.use(...). Inst->eraseFromParent(); continue; default: continue; } DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); // Don't use GetObjCArg because we don't want to look through bitcasts // and such; to do the replacement, the argument must have type i8*. const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); for (;;) { // If we're compiling bugpointed code, don't get in trouble. if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) break; // Look through the uses of the pointer. for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE; ) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. // If the call's return value dominates a use of the call's argument // value, rewrite the use to use the return value. We check for // reachability here because an unreachable call is considered to // trivially dominate itself, which would lead us to rewriting its // argument in terms of its return value, which would lead to // infinite loops in GetObjCArg. if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { Changed = true; Instruction *Replacement = Inst; Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); BasicBlock *BB = PHI->getIncomingBlock(ValNo); if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", &BB->back()); // While we're here, rewrite all edges for this PHI, rather // than just one use at a time, to minimize the number of // bitcasts we emit. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == BB) { // Keep the UI iterator valid. if (&PHI->getOperandUse( PHINode::getOperandNumForIncomingValue(i)) == &UI.getUse()) ++UI; PHI->setIncomingValue(i, Replacement); } } else { if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", cast<Instruction>(U.getUser())); U.set(Replacement); } } } // If Arg is a no-op casted pointer, strip one level of casts and iterate. if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) Arg = BI->getOperand(0); else if (isa<GEPOperator>(Arg) && cast<GEPOperator>(Arg)->hasAllZeroIndices()) Arg = cast<GEPOperator>(Arg)->getPointerOperand(); else if (isa<GlobalAlias>(Arg) && !cast<GlobalAlias>(Arg)->mayBeOverridden()) Arg = cast<GlobalAlias>(Arg)->getAliasee(); else break; } } // If this function has no escaping allocas or suspicious vararg usage, // objc_storeStrong calls can be marked with the "tail" keyword. if (TailOkForStoreStrongs) for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(), E = StoreStrongCalls.end(); I != E; ++I) (*I)->setTailCall(); StoreStrongCalls.clear(); return Changed; }
/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { SmallPtrSet<Function*, 8> SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) SCCNodes.insert((*I)->getFunction()); // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. bool ReadsMemory = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F == 0) // External node - may write memory. Just give up. return false; AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F); if (MRB == AliasAnalysis::DoesNotAccessMemory) // Already perfect! continue; // Definitions with weak linkage may be overridden at linktime with // something that writes memory, so treat them like declarations. if (F->isDeclaration() || F->mayBeOverridden()) { if (!AliasAnalysis::onlyReadsMemory(MRB)) // May write memory. Just give up. return false; ReadsMemory = true; continue; } // Scan the function body for instructions that may read or write memory. for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { Instruction *I = &*II; // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS(cast<Value>(I)); if (CS) { // Ignore calls to functions in the same SCC. if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) continue; AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS); // If the call doesn't access arbitrary memory, we may be able to // figure out something. if (AliasAnalysis::onlyAccessesArgPointees(MRB)) { // If the call does access argument pointees, check each argument. if (AliasAnalysis::doesAccessArgPointees(MRB)) // Check whether all pointer arguments point to local memory, and // ignore calls that only access local memory. for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI) { Value *Arg = *CI; if (Arg->getType()->isPointerTy()) { AliasAnalysis::Location Loc(Arg, AliasAnalysis::UnknownSize, I->getMetadata(LLVMContext::MD_tbaa)); if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) { if (MRB & AliasAnalysis::Mod) // Writes non-local memory. Give up. return false; if (MRB & AliasAnalysis::Ref) // Ok, it reads non-local memory. ReadsMemory = true; } } } continue; } // The call could access any memory. If that includes writes, give up. if (MRB & AliasAnalysis::Mod) return false; // If it reads, note it. if (MRB & AliasAnalysis::Ref) ReadsMemory = true; continue; } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { // Ignore non-volatile loads from local memory. (Atomic is okay here.) if (!LI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(LI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { // Ignore non-volatile stores to local memory. (Atomic is okay here.) if (!SI->isVolatile()) { AliasAnalysis::Location Loc = AA->getLocation(SI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { // Ignore vaargs on local memory. AliasAnalysis::Location Loc = AA->getLocation(VI); if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) continue; } // Any remaining instructions need to be taken seriously! Check if they // read or write memory. if (I->mayWriteToMemory()) // Writes memory. Just give up. return false; // If this instruction may read memory, remember that. ReadsMemory |= I->mayReadFromMemory(); } } // Success! Functions in this SCC do not access memory, or only read memory. // Give them the appropriate attribute. bool MadeChange = false; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); if (F->doesNotAccessMemory()) // Already perfect! continue; if (F->onlyReadsMemory() && ReadsMemory) // No change. continue; MadeChange = true; // Clear out any existing attributes. AttrBuilder B; B.addAttribute(Attributes::ReadOnly) .addAttribute(Attributes::ReadNone); F->removeAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); // Add in the new attribute. B.clear(); B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone); F->addAttribute(AttributeSet::FunctionIndex, Attributes::get(F->getContext(), B)); if (ReadsMemory) ++NumReadOnly; else ++NumReadNone; } return MadeChange; }
bool LevenbergMarquart(Function& f, real_type t, Vector& x, real_type atol, real_type rtol, unsigned *itCount, unsigned maxit) { Log(NewtonMethod, Debug3) << "Start guess\nx = " << trans(x) << endl; Matrix J; LinAlg::MatrixFactors<real_type,0,0,LinAlg::LUTag> jacFactors; bool converged = false; real_type tau = 1e-1; real_type nu = 2; // Compute in each step a new jacobian f.jac(t, x, J); Log(NewtonMethod, Debug3) << "Jacobian is:\n" << J << endl; real_type mu = tau*norm1(J); Vector fx; // Compute the actual error f.eval(t, x, fx); Vector g = trans(J)*fx; do { jacFactors = trans(J)*J + mu*LinAlg::Eye<real_type,0,0>(rows(x), rows(x)); Log(NewtonMethod, Debug) << "Jacobian is " << (jacFactors.singular() ? "singular" : "ok") << endl; // Compute the search direction Vector h = jacFactors.solve(-g); Log(NewtonMethod, Debug) << "Solve Residual " << norm(trans(J)*J*h + mu*h + g)/norm(g) << endl; // Get a better search guess Vector xnew = x + h; // check convergence converged = equal(x, xnew, atol, rtol); Log(NewtonMethod, Debug) << "Convergence test: ||h||_1 = " << norm1(h) << ", converged = " << converged << endl; if (converged) break; f.eval(t, x, fx); real_type Fx = norm(fx); f.eval(t, xnew, fx); real_type Fxnew = norm(fx); real_type rho = (Fx - Fxnew)/(0.5*dot(h, mu*h - g)); Log(NewtonMethod, Debug) << "Rho = " << rho << ", Fxnew = " << Fxnew << ", Fx = " << Fx << endl; if (0 < rho) { Log(NewtonMethod, Debug) << "Accepted step!" << endl; Log(NewtonMethod, Debug3) << "xnew = " << trans(xnew) << endl; Log(NewtonMethod, Debug3) << "h = " << trans(h) << endl; // New guess is the better one x = xnew; f.jac(t, x, J); Log(NewtonMethod, Debug3) << "Jacobian is:\n" << J << endl; // Compute the actual error f.eval(t, x, fx); g = trans(J)*fx; converged = norm1(g) < atol; Log(NewtonMethod, Debug) << "||g||_1 = " << norm1(g) << endl; mu = mu * max(real_type(1)/3, 1-pow(2*rho-1, real_type(3))); nu = 2; } else { mu = mu * nu; nu = 2 * nu; } } while (!converged); return converged; }
int CodeGenerator::getDependency(const Function& f) const { const void* h = static_cast<const void*>(f.get()); PointerMap::const_iterator it=added_dependencies_.find(h); casadi_assert(it!=added_dependencies_.end()); return it->second; }
void interpolate_nonmatching_mesh(const GenericFunction& u0, Function& u) { // Interpolate from GenericFunction u0 to FunctionSpace of Function u // The FunctionSpace of u can have a different mesh than that of u0 // (if u0 has a mesh) // // The algorithm is like this // // 1) Tabulate all coordinates for all dofs in u.function_space() // 2) Create a map from dof to component number in Mixed Space. // 3) Evaluate u0 for all coordinates in u (computed in 1)). // Problem here is that u0 and u will have different meshes // and as such a vertex in u will not necessarily be found // on the same processor for u0. Hence the vertex will be // passed around and searched on all ranks until found. // 4) Set all values in local u using the dof to component map // Get the function space interpolated to boost::shared_ptr<const FunctionSpace> V = u.function_space(); // Get mesh and dimension of the FunctionSpace interpolated to const Mesh& mesh = *V->mesh(); const std::size_t gdim = mesh.geometry().dim(); // Create arrays used to evaluate one point std::vector<double> x(gdim); std::vector<double> values(u.value_size()); Array<double> _x(gdim, x.data()); Array<double> _values(u.value_size(), values.data()); // Create vector to hold all local values of u std::vector<double> local_u_vector(u.vector()->local_size()); // Get coordinates of all dofs on mesh of this processor std::vector<double> coords = V->dofmap()->tabulate_all_coordinates(mesh); // Get dof ownership range std::pair<std::size_t, std::size_t> owner_range = V->dofmap()->ownership_range(); // Get a map from global dofs to component number in mixed space std::map<std::size_t, std::size_t> dof_component_map; int component = -1; extract_dof_component_map(dof_component_map, *V, &component); // Search this process first for all coordinates in u's local mesh std::vector<std::size_t> global_dofs_not_found; std::vector<double> coords_not_found; for (std::size_t j=0; j<coords.size()/gdim; j++) { std::copy(coords.begin()+j*gdim, coords.begin()+(j+1)*gdim, x.begin()); try { // store when point is found u0.eval(_values, _x); // This evaluates all dofs, but need only one component. Possible fix? local_u_vector[j] = values[dof_component_map[j+owner_range.first]]; } catch (std::exception &e) { // If not found then it must be seached on the other processes global_dofs_not_found.push_back(j+owner_range.first); for (std::size_t jj=0; jj<gdim; jj++) coords_not_found.push_back(x[jj]); } } // Send all points not found to processor with one higher rank. // Search there and send found points back to owner and not found to // next processor in line. By the end of this loop all processors // will have been searched and thus if not found the point is not // in the mesh of Function u0. In that case the point will take // the value of zero. std::size_t num_processes = MPI::num_processes(); std::size_t rank = MPI::process_number(); for (std::size_t k = 1; k < num_processes; ++k) { std::vector<double> coords_recv; std::vector<std::size_t> global_dofs_recv; std::size_t src = (rank-1+num_processes) % num_processes; std::size_t dest = (rank+1) % num_processes; MPI::send_recv(global_dofs_not_found, dest, global_dofs_recv, src); MPI::send_recv(coords_not_found, dest, coords_recv, src); global_dofs_not_found.clear(); coords_not_found.clear(); // Search this processor for received points std::vector<std::size_t> global_dofs_found; std::vector<std::vector<double> > coefficients_found; for (std::size_t j=0; j<coords_recv.size()/gdim; j++) { std::size_t m = global_dofs_recv[j]; std::copy(coords_recv.begin()+j*gdim, coords_recv.begin()+(j+1)*gdim, x.begin()); try { // push back when point is found u0.eval(_values, _x); coefficients_found.push_back(values); global_dofs_found.push_back(m); } catch (std::exception &e) { // If not found then collect and send to next rank global_dofs_not_found.push_back(m); for (std::size_t jj=0; jj<gdim; jj++) coords_not_found.push_back(x[jj]); } } // Send found coefficients back to owner (dest) std::vector<std::size_t> global_dofs_found_recv; std::vector<std::vector<double> > coefficients_found_recv; dest = (rank-k+num_processes) % num_processes; src = (rank+k) % num_processes; MPI::send_recv(global_dofs_found, dest, global_dofs_found_recv, src); MPI::send_recv(coefficients_found, dest, coefficients_found_recv, src); // Move all found coefficients onto the local_u_vector // Choose the correct component using dof_component_map for (std::size_t j=0; j<global_dofs_found_recv.size(); j++) { std::size_t m = global_dofs_found_recv[j]-owner_range.first; std::size_t n = dof_component_map[m+owner_range.first]; local_u_vector[m] = coefficients_found_recv[j][n]; } // Note that this algorithm computes and sends back all values, // i.e., coefficients_found pushes back the entire vector for all // components in mixed space. An alternative algorithm is to send // around the correct component number in addition to global dof number // and coordinates and then just send back the correct value. } u.vector()->set_local(local_u_vector); }
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (skipFunction(F)) return false; // TODO: Could probably handle variadic functions. if (F.isVarArg() || F.hasStructRetAttr() || AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); unsigned ReturnNumRegs = 0; SmallSet<int, 4> OutArgIndexes; SmallVector<Type *, 4> ReturnTypes; Type *RetTy = F.getReturnType(); if (!RetTy->isVoidTy()) { ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4; if (ReturnNumRegs >= MaxNumRetRegs) return false; ReturnTypes.push_back(RetTy); } SmallVector<Argument *, 4> OutArgs; for (Argument &Arg : F.args()) { if (isOutArgumentCandidate(Arg)) { LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg << " in function " << F.getName() << '\n'); OutArgs.push_back(&Arg); } } if (OutArgs.empty()) return false; using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>; DenseMap<ReturnInst *, ReplacementVec> Replacements; SmallVector<ReturnInst *, 4> Returns; for (BasicBlock &BB : F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back())) Returns.push_back(RI); } if (Returns.empty()) return false; bool Changing; do { Changing = false; // Keep retrying if we are able to successfully eliminate an argument. This // helps with cases with multiple arguments which may alias, such as in a // sincos implemntation. If we have 2 stores to arguments, on the first // attempt the MDA query will succeed for the second store but not the // first. On the second iteration we've removed that out clobbering argument // (by effectively moving it into another function) and will find the second // argument is OK to move. for (Argument *OutArg : OutArgs) { bool ThisReplaceable = true; SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores; Type *ArgTy = OutArg->getType()->getPointerElementType(); // Skip this argument if converting it will push us over the register // count to return limit. // TODO: This is an approximation. When legalized this could be more. We // can ask TLI for exactly how many. unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4; if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs) continue; // An argument is convertible only if all exit blocks are able to replace // it. for (ReturnInst *RI : Returns) { BasicBlock *BB = RI->getParent(); MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg), true, BB->end(), BB, RI); StoreInst *SI = nullptr; if (Q.isDef()) SI = dyn_cast<StoreInst>(Q.getInst()); if (SI) { LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); ReplaceableStores.emplace_back(RI, SI); } else { ThisReplaceable = false; break; } } if (!ThisReplaceable) continue; // Try the next argument candidate. for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) { Value *ReplVal = Store.second->getValueOperand(); auto &ValVec = Replacements[Store.first]; if (llvm::find_if(ValVec, [OutArg](const std::pair<Argument *, Value *> &Entry) { return Entry.first == OutArg;}) != ValVec.end()) { LLVM_DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n'); // It is possible to see stores to the same argument multiple times, // but we expect these would have been optimized out already. ThisReplaceable = false; break; } ValVec.emplace_back(OutArg, ReplVal); Store.second->eraseFromParent(); } if (ThisReplaceable) { ReturnTypes.push_back(ArgTy); OutArgIndexes.insert(OutArg->getArgNo()); ++NumOutArgumentsReplaced; Changing = true; } } } while (Changing); if (Replacements.empty()) return false; LLVMContext &Ctx = F.getParent()->getContext(); StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); FunctionType *NewFuncTy = FunctionType::get(NewRetTy, F.getFunctionType()->params(), F.isVarArg()); LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, F.getName() + ".body"); F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc); NewFunc->copyAttributesFrom(&F); NewFunc->setComdat(F.getComdat()); // We want to preserve the function and param attributes, but need to strip // off any return attributes, e.g. zeroext doesn't make sense with a struct. NewFunc->stealArgumentListFrom(F); AttrBuilder RetAttrs; RetAttrs.addAttribute(Attribute::SExt); RetAttrs.addAttribute(Attribute::ZExt); RetAttrs.addAttribute(Attribute::NoAlias); NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs); // TODO: How to preserve metadata? // Move the body of the function into the new rewritten function, and replace // this function with a stub. NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList()); for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) { ReturnInst *RI = Replacement.first; IRBuilder<> B(RI); B.SetCurrentDebugLocation(RI->getDebugLoc()); int RetIdx = 0; Value *NewRetVal = UndefValue::get(NewRetTy); Value *RetVal = RI->getReturnValue(); if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) { Argument *Arg = ReturnPoint.first; Value *Val = ReturnPoint.second; Type *EltTy = Arg->getType()->getPointerElementType(); if (Val->getType() != EltTy) { Type *EffectiveEltTy = EltTy; if (StructType *CT = dyn_cast<StructType>(EltTy)) { assert(CT->getNumElements() == 1); EffectiveEltTy = CT->getElementType(0); } if (DL->getTypeSizeInBits(EffectiveEltTy) != DL->getTypeSizeInBits(Val->getType())) { assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType())); Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()), { 0, 1, 2 }); } Val = B.CreateBitCast(Val, EffectiveEltTy); // Re-create single element composite. if (EltTy != EffectiveEltTy) Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0); } NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++); } if (RetVal) RI->setOperand(0, NewRetVal); else { B.CreateRet(NewRetVal); RI->eraseFromParent(); } } SmallVector<Value *, 16> StubCallArgs; for (Argument &Arg : F.args()) { if (OutArgIndexes.count(Arg.getArgNo())) { // It's easier to preserve the type of the argument list. We rely on // DeadArgumentElimination to take care of these. StubCallArgs.push_back(UndefValue::get(Arg.getType())); } else { StubCallArgs.push_back(&Arg); } } BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F); IRBuilder<> B(StubBB); CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs); int RetIdx = RetTy->isVoidTy() ? 0 : 1; for (Argument &Arg : F.args()) { if (!OutArgIndexes.count(Arg.getArgNo())) continue; PointerType *ArgType = cast<PointerType>(Arg.getType()); auto *EltTy = ArgType->getElementType(); unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL->getABITypeAlignment(EltTy); Value *Val = B.CreateExtractValue(StubCall, RetIdx++); Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace()); // We can peek through bitcasts, so the type may not match. Value *PtrVal = B.CreateBitCast(&Arg, PtrTy); B.CreateAlignedStore(Val, PtrVal, Align); } if (!RetTy->isVoidTy()) { B.CreateRet(B.CreateExtractValue(StubCall, 0)); } else { B.CreateRetVoid(); } // The function is now a stub we want to inline. F.addFnAttr(Attribute::AlwaysInline); ++NumOutArgumentFunctionsReplaced; return true; }
/// run - Start execution with the specified function and arguments. /// GenericValue JIT::runFunction(Function *F, const std::vector<GenericValue> &ArgValues) { assert(F && "Function *F was null at entry to run()"); void *FPtr = getPointerToFunction(F); assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); const FunctionType *FTy = F->getFunctionType(); const Type *RetTy = FTy->getReturnType(); assert((FTy->getNumParams() == ArgValues.size() || (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && "Wrong number of arguments passed into function!"); assert(FTy->getNumParams() == ArgValues.size() && "This doesn't support passing arguments through varargs (yet)!"); // Handle some common cases first. These cases correspond to common `main' // prototypes. if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { switch (ArgValues.size()) { case 3: if (FTy->getParamType(0)->isIntegerTy(32) && FTy->getParamType(1)->isPointerTy() && FTy->getParamType(2)->isPointerTy()) { int (*PF)(int, char **, const char **) = (int(*)(int, char **, const char **))(intptr_t)FPtr; // Call the function. GenericValue rv; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]), (const char **)GVTOP(ArgValues[2]))); return rv; } break; case 2: if (FTy->getParamType(0)->isIntegerTy(32) && FTy->getParamType(1)->isPointerTy()) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; // Call the function. GenericValue rv; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]))); return rv; } break; case 1: if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); return rv; } break; } } // Handle cases where no arguments are passed first. if (ArgValues.empty()) { GenericValue rv; switch (RetTy->getTypeID()) { default: llvm_unreachable("Unknown return type for function call!"); case Type::IntegerTyID: { unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth(); if (BitWidth == 1) rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); else if (BitWidth <= 8) rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); else if (BitWidth <= 16) rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); else if (BitWidth <= 32) rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); else if (BitWidth <= 64) rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); else llvm_unreachable("Integer types > 64 bits not supported"); return rv; } case Type::VoidTyID: rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); return rv; case Type::FloatTyID: rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); return rv; case Type::DoubleTyID: rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); return rv; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: llvm_unreachable("long double not supported yet"); return rv; case Type::PointerTyID: return PTOGV(((void*(*)())(intptr_t)FPtr)()); } } // Okay, this is not one of our quick and easy cases. Because we don't have a // full FFI, we have to codegen a nullary stub function that just calls the // function we are interested in, passing in constants for all of the // arguments. Make this function and return. // First, create the function. FunctionType *STy=FunctionType::get(RetTy, false); Function *Stub = Function::Create(STy, Function::InternalLinkage, "", F->getParent()); // Insert a basic block. BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub); // Convert all of the GenericValue arguments over to constants. Note that we // currently don't support varargs. SmallVector<Value*, 8> Args; for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { Constant *C = 0; const Type *ArgTy = FTy->getParamType(i); const GenericValue &AV = ArgValues[i]; switch (ArgTy->getTypeID()) { default: llvm_unreachable("Unknown argument type for function call!"); case Type::IntegerTyID: C = ConstantInt::get(F->getContext(), AV.IntVal); break; case Type::FloatTyID: C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal)); break; case Type::DoubleTyID: C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal)); break; case Type::PPC_FP128TyID: case Type::X86_FP80TyID: case Type::FP128TyID: C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal)); break; case Type::PointerTyID: void *ArgPtr = GVTOP(AV); if (sizeof(void*) == 4) C = ConstantInt::get(Type::getInt32Ty(F->getContext()), (int)(intptr_t)ArgPtr); else C = ConstantInt::get(Type::getInt64Ty(F->getContext()), (intptr_t)ArgPtr); // Cast the integer to pointer C = ConstantExpr::getIntToPtr(C, ArgTy); break; } Args.push_back(C); } CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(), "", StubBB); TheCall->setCallingConv(F->getCallingConv()); TheCall->setTailCall(); if (!TheCall->getType()->isVoidTy()) // Return result of the call. ReturnInst::Create(F->getContext(), TheCall, StubBB); else ReturnInst::Create(F->getContext(), StubBB); // Just return void. // Finally, call our nullary stub function. GenericValue Result = runFunction(Stub, std::vector<GenericValue>()); // Erase it, since no other function can have a reference to it. Stub->eraseFromParent(); // And return the result. return Result; }
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // If getInsertFencesForAtomic() returns true, then the target does not want // to deal with memory orders, and emitLeading/TrailingFence should take care // of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. AtomicOrdering MemOpOrder = TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] // fence? // cmpxchg.start: // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure // cmpxchg.success: // fence? // br label %cmpxchg.end // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); Builder.CreateCondBr(StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : LoopBB); // Make sure later instructions don't get reordered with a fence if necessary. Builder.SetInsertPoint(SuccessBB); TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(FailureBB); TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg // succeeded or not. We expose this to later passes by converting any // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. // Setup the builder so we can create any PHIs we need. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; for (auto User : CI->users()) { ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); if (!EV) continue; assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && "weird extraction from { iN, i1 }"); if (EV->getIndices()[0] == 0) EV->replaceAllUsesWith(Loaded); else EV->replaceAllUsesWith(Success); PrunedInsts.push_back(EV); } // We can remove the instructions now we're no longer iterating through them. for (auto EV : PrunedInsts) EV->eraseFromParent(); if (!CI->use_empty()) { // Some use of the full struct return that we don't understand has happened, // so we've got to reconstruct it properly. Value *Res; Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); } CI->eraseFromParent(); return true; }
// run - Run the transformation on the program. We grab the function // prototypes for longjmp and setjmp. If they are used in the program, // then we can go directly to the places they're at and transform them. bool LowerSetJmp::runOnModule(Module& M) { bool Changed = false; // These are what the functions are called. Function* SetJmp = M.getFunction("llvm.setjmp"); Function* LongJmp = M.getFunction("llvm.longjmp"); // This program doesn't have longjmp and setjmp calls. if ((!LongJmp || LongJmp->use_empty()) && (!SetJmp || SetJmp->use_empty())) return false; // Initialize some values and functions we'll need to transform the // setjmp/longjmp functions. doInitialization(M); if (SetJmp) { for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end(); B != E; ++B) { BasicBlock* BB = cast<Instruction>(*B)->getParent(); for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks), E = df_ext_end(BB, DFSBlocks); I != E; ++I) /* empty */; } while (!SetJmp->use_empty()) { assert(isa<CallInst>(SetJmp->use_back()) && "User of setjmp intrinsic not a call?"); TransformSetJmpCall(cast<CallInst>(SetJmp->use_back())); Changed = true; } } if (LongJmp) while (!LongJmp->use_empty()) { assert(isa<CallInst>(LongJmp->use_back()) && "User of longjmp intrinsic not a call?"); TransformLongJmpCall(cast<CallInst>(LongJmp->use_back())); Changed = true; } // Now go through the affected functions and convert calls and invokes // to new invokes... for (std::map<Function*, AllocaInst*>::iterator B = SJMap.begin(), E = SJMap.end(); B != E; ++B) { Function* F = B->first; for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) { visit(*IB++); if (IB != BB->end() && IB->getParent() != BB) break; // The next instruction got moved to a different block! } } DFSBlocks.clear(); SJMap.clear(); RethrowBBMap.clear(); PrelimBBMap.clear(); SwitchValMap.clear(); SetJmpIDMap.clear(); return Changed; }
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { assert(AI); AtomicOrdering MemOpOrder = AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: // [...] // %init_loaded = load atomic iN* %addr // br label %loop // loop: // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] // %new = some_op iN %loaded, %incr // %pair = cmpxchg iN* %addr, iN %loaded, iN %new // %new_loaded = extractvalue { iN, i1 } %pair, 0 // %success = extractvalue { iN, i1 } %pair, 1 // br i1 %success, label %atomicrmw.end, label %loop // atomicrmw.end: // [...] BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // This grabs the DebugLoc from AI. IRBuilder<> Builder(AI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(Addr); // Atomics require at least natural alignment. InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); Loaded->addIncoming(InitLoaded, BB); Value *NewVal = performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); Value *NewLoaded = nullptr; Value *Success = nullptr; CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, Success, NewLoaded); assert(Success && NewLoaded); Loaded->addIncoming(NewLoaded, LoopBB); Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); AI->replaceAllUsesWith(NewLoaded); AI->eraseFromParent(); return true; }
void MasterTimer::timerTickFunctions(QList<Universe *> universes) { // List of m_functionList indices that should be removed at the end of this // function. The functions at the indices have been stopped. QList<int> removeList; bool functionListHasChanged = false; bool stoppedAFunction = true; bool firstIteration = true; while (stoppedAFunction) { stoppedAFunction = false; removeList.clear(); for (int i = 0; i < m_functionList.size(); i++) { Function* function = m_functionList.at(i); if (function != NULL) { /* Run the function unless it's supposed to be stopped */ if (function->stopped() == false && m_stopAllFunctions == false) { if (firstIteration) function->write(this, universes); } else { // Clear function's parentList if (m_stopAllFunctions) function->stop(FunctionParent::master()); /* Function should be stopped instead */ function->postRun(this, universes); //qDebug() << "[MasterTimer] Add function (ID: " << function->id() << ") to remove list "; removeList << i; // Don't remove the item from the list just yet. functionListHasChanged = true; stoppedAFunction = true; } } } // Remove functions that need to be removed AFTER all functions have been run // for this round. This is done separately to prevent a case when a function // is first removed and then another is added (chaser, for example), keeping the // list's size the same, thus preventing the last added function from being run // on this round. The indices in removeList are automatically sorted because the // list is iterated with an int above from 0 to size, so iterating the removeList // backwards here will always remove the correct indices. QListIterator <int> it(removeList); it.toBack(); while (it.hasPrevious() == true) m_functionList.removeAt(it.previous()); firstIteration = false; } { QMutexLocker locker(&m_functionListMutex); while (m_startQueue.size() > 0) { QList<Function*> startQueue(m_startQueue); m_startQueue.clear(); locker.unlock(); foreach (Function* f, startQueue) { if (m_functionList.contains(f)) { f->postRun(this, universes); } else { m_functionList.append(f); functionListHasChanged = true; } f->preRun(this); f->write(this, universes); emit functionStarted(f->id()); } locker.relock(); } } if (functionListHasChanged) emit functionListChanged(); }
//----------------------------------------------------------------------- bool FFPTexturing::resolveFunctionsParams(TextureUnitParams* textureUnitParams, ProgramSet* programSet) { Program* vsProgram = programSet->getCpuVertexProgram(); Program* psProgram = programSet->getCpuFragmentProgram(); Function* vsMain = vsProgram->getEntryPointFunction(); Function* psMain = psProgram->getEntryPointFunction(); Parameter::Content texCoordContent = Parameter::SPC_UNKNOWN; switch (textureUnitParams->mTexCoordCalcMethod) { case TEXCALC_NONE: // Resolve explicit vs input texture coordinates. if (textureUnitParams->mTextureMatrix.get() == NULL) texCoordContent = Parameter::Content(Parameter::SPC_TEXTURE_COORDINATE0 + textureUnitParams->mTextureUnitState->getTextureCoordSet()); textureUnitParams->mVSInputTexCoord = vsMain->resolveInputParameter(Parameter::SPS_TEXTURE_COORDINATES, textureUnitParams->mTextureUnitState->getTextureCoordSet(), Parameter::Content(Parameter::SPC_TEXTURE_COORDINATE0 + textureUnitParams->mTextureUnitState->getTextureCoordSet()), textureUnitParams->mVSInTextureCoordinateType); if (textureUnitParams->mVSInputTexCoord.get() == NULL) return false; break; case TEXCALC_ENVIRONMENT_MAP: case TEXCALC_ENVIRONMENT_MAP_PLANAR: case TEXCALC_ENVIRONMENT_MAP_NORMAL: // Resolve vertex normal. mVSInputNormal = vsMain->resolveInputParameter(Parameter::SPS_NORMAL, 0, Parameter::SPC_NORMAL_OBJECT_SPACE, GCT_FLOAT3); if (mVSInputNormal.get() == NULL) return false; break; case TEXCALC_ENVIRONMENT_MAP_REFLECTION: // Resolve vertex normal. mVSInputNormal = vsMain->resolveInputParameter(Parameter::SPS_NORMAL, 0, Parameter::SPC_NORMAL_OBJECT_SPACE, GCT_FLOAT3); if (mVSInputNormal.get() == NULL) return false; // Resolve vertex position. mVSInputPos = vsMain->resolveInputParameter(Parameter::SPS_POSITION, 0, Parameter::SPC_POSITION_OBJECT_SPACE, GCT_FLOAT4); if (mVSInputPos.get() == NULL) return false; break; case TEXCALC_PROJECTIVE_TEXTURE: // Resolve vertex position. mVSInputPos = vsMain->resolveInputParameter(Parameter::SPS_POSITION, 0, Parameter::SPC_POSITION_OBJECT_SPACE, GCT_FLOAT4); if (mVSInputPos.get() == NULL) return false; break; } // Resolve vs output texture coordinates. textureUnitParams->mVSOutputTexCoord = vsMain->resolveOutputParameter(Parameter::SPS_TEXTURE_COORDINATES, -1, texCoordContent, textureUnitParams->mVSOutTextureCoordinateType); if (textureUnitParams->mVSOutputTexCoord.get() == NULL) return false; // Resolve ps input texture coordinates. textureUnitParams->mPSInputTexCoord = psMain->resolveInputParameter(Parameter::SPS_TEXTURE_COORDINATES, textureUnitParams->mVSOutputTexCoord->getIndex(), textureUnitParams->mVSOutputTexCoord->getContent(), textureUnitParams->mVSOutTextureCoordinateType); if (textureUnitParams->mPSInputTexCoord.get() == NULL) return false; const ShaderParameterList& inputParams = psMain->getInputParameters(); const ShaderParameterList& localParams = psMain->getLocalParameters(); mPSDiffuse = psMain->getParameterByContent(inputParams, Parameter::SPC_COLOR_DIFFUSE, GCT_FLOAT4); if (mPSDiffuse.get() == NULL) { mPSDiffuse = psMain->getParameterByContent(localParams, Parameter::SPC_COLOR_DIFFUSE, GCT_FLOAT4); if (mPSDiffuse.get() == NULL) return false; } mPSSpecular = psMain->getParameterByContent(inputParams, Parameter::SPC_COLOR_SPECULAR, GCT_FLOAT4); if (mPSSpecular.get() == NULL) { mPSSpecular = psMain->getParameterByContent(localParams, Parameter::SPC_COLOR_SPECULAR, GCT_FLOAT4); if (mPSSpecular.get() == NULL) return false; } mPSOutDiffuse = psMain->resolveOutputParameter(Parameter::SPS_COLOR, 0, Parameter::SPC_COLOR_DIFFUSE, GCT_FLOAT4); if (mPSOutDiffuse.get() == NULL) return false; return true; }
static bool hasDebugInfo(const Function &F) { NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu"); return CUNodes != nullptr; }
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; if (Inst->use_empty()) continue; if (Inst->hasOneUse() && cast<Instruction>(Inst->user_back())->getParent() == BB && !isa<PHINode>(Inst->user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; for (User *U : Inst->users()) { Instruction *UI = cast<Instruction>(U); if (UI->getParent() != BB || isa<PHINode>(UI)) Users.push_back(UI); } // Find all of the blocks that this value is live in. SmallPtrSet<BasicBlock *, 64> LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); Users.pop_back(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); } else { // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } } // If we decided we need a spill, do it. // FIXME: Spilling this way is overkill, as it forces all uses of // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { DemoteRegToStack(*Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. SmallPtrSet<PHINode *, 8> PHIsToDemote; for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) PHIsToDemote.insert(cast<PHINode>(PN)); if (PHIsToDemote.empty()) continue; // Demote the PHIs to the stack. for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(UnwindBlock->begin()); } }