bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; // Avoid merging nontemporal stores since the resulting // memcpy/memset would not be able to preserve the nontemporal hint. // In theory we could teach how to propagate the !nontemporal metadata to // memset calls. However, that change would force the backend to // conservatively expand !nontemporal memset calls back to sequences of // store instructions (effectively undoing the merging). if (SI->getMetadata(LLVMContext::MD_nontemporal)) return false; const DataLayout &DL = SI->getModule()->getDataLayout(); // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) C = dyn_cast<CallInst>(ldep.getInst()); if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); MemoryLocation StoreLoc = MemoryLocation::get(SI); for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); I != E; --I) { if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { C = nullptr; break; } } } if (C) { unsigned storeAlign = SI->getAlignment(); if (!storeAlign) storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); unsigned loadAlign = LI->getAlignment(); if (!loadAlign) loadAlign = DL.getABITypeAlignment(LI->getType()); bool changed = performCallSlotOptzn( LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), std::min(storeAlign, loadAlign), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); LI->eraseFromParent(); ++NumMemCpyInstr; return true; } } } } // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { BBI = I->getIterator(); // Don't invalidate iterator. return true; } return false; }
/// performLocalReleaseMotion - Scan backwards from the specified release, /// moving it earlier in the function if possible, over instructions that do not /// access the released object. If we get to a retain or allocation of the /// object, zap both. static bool performLocalReleaseMotion(CallInst &Release, BasicBlock &BB, SwiftRCIdentity *RC) { // FIXME: Call classifier should identify the object for us. Too bad C++ // doesn't have nice Swift-style enums. Value *ReleasedObject = RC->getSwiftRCIdentityRoot(Release.getArgOperand(0)); BasicBlock::iterator BBI = Release.getIterator(); // Scan until we get to the top of the block. while (BBI != BB.begin()) { --BBI; // Don't analyze PHI nodes. We can't move retains before them and they // aren't "interesting". if (isa<PHINode>(BBI) || // If we found the instruction that defines the value we're releasing, // don't push the release past it. &*BBI == Release.getArgOperand(0)) { ++BBI; goto OutOfLoop; } switch (classifyInstruction(*BBI)) { // These instructions should not reach here based on the pass ordering. // i.e. LLVMARCOpt -> LLVMContractOpt. case RT_UnknownRetainN: case RT_BridgeRetainN: case RT_RetainN: case RT_UnknownReleaseN: case RT_BridgeReleaseN: case RT_ReleaseN: llvm_unreachable("These are only created by LLVMARCContract !"); case RT_NoMemoryAccessed: // Skip over random instructions that don't touch memory. They don't need // protection by retain/release. continue; case RT_UnknownRelease: case RT_BridgeRelease: case RT_ObjCRelease: case RT_Release: { // If we get to a release, we can generally ignore it and scan past it. // However, if we get to a release of obviously the same object, we stop // scanning here because it should have already be moved as early as // possible, so there is no reason to move its friend to the same place. // // NOTE: If this occurs frequently, maybe we can have a release(Obj, N) // API to drop multiple retain counts at once. CallInst &ThisRelease = cast<CallInst>(*BBI); Value *ThisReleasedObject = ThisRelease.getArgOperand(0); ThisReleasedObject = RC->getSwiftRCIdentityRoot(ThisReleasedObject); if (ThisReleasedObject == ReleasedObject) { //Release.dump(); ThisRelease.dump(); BB.getParent()->dump(); ++BBI; goto OutOfLoop; } continue; } case RT_UnknownRetain: case RT_BridgeRetain: case RT_ObjCRetain: case RT_Retain: { // swift_retain(obj) CallInst &Retain = cast<CallInst>(*BBI); Value *RetainedObject = Retain.getArgOperand(0); RetainedObject = RC->getSwiftRCIdentityRoot(RetainedObject); // Since we canonicalized earlier, we know that if our retain has any // uses, they were replaced already. This assertion documents this // assumption. assert(Retain.use_empty() && "Retain should have been canonicalized to " "have no uses."); // If the retain and release are to obviously pointer-equal objects, then // we can delete both of them. We have proven that they do not protect // anything of value. if (RetainedObject == ReleasedObject) { Retain.eraseFromParent(); Release.eraseFromParent(); ++NumRetainReleasePairs; return true; } // Otherwise, this is a retain of an object that is not statically known // to be the same object. It may still be dynamically the same object // though. In this case, we can't move the release past it. // TODO: Strengthen analysis. //Release.dump(); ThisRelease.dump(); BB.getParent()->dump(); ++BBI; goto OutOfLoop; } case RT_AllocObject: { // %obj = swift_alloc(...) CallInst &Allocation = cast<CallInst>(*BBI); // If this is an allocation of an unrelated object, just ignore it. // TODO: This is not safe without proving the object being released is not // related to the allocated object. Consider something silly like this: // A = allocate() // B = bitcast A to object // release(B) if (ReleasedObject != &Allocation) { // Release.dump(); BB.getParent()->dump(); ++BBI; goto OutOfLoop; } // If this is a release right after an allocation of the object, then we // can zap both. Allocation.replaceAllUsesWith(UndefValue::get(Allocation.getType())); Allocation.eraseFromParent(); Release.eraseFromParent(); ++NumAllocateReleasePairs; return true; } case RT_FixLifetime: case RT_RetainUnowned: case RT_CheckUnowned: case RT_Unknown: // Otherwise, we have reached something that we do not understand. Do not // attempt to shorten the lifetime of this object beyond this point so we // are conservative. ++BBI; goto OutOfLoop; } } OutOfLoop: // If we got to the top of the block, (and if the instruction didn't start // there) move the release to the top of the block. // TODO: This is where we'd plug in some global algorithms someday. if (&*BBI != &Release) { Release.moveBefore(&*BBI); return true; } return false; }
/// performLocalRetainMotion - Scan forward from the specified retain, moving it /// later in the function if possible, over instructions that provably can't /// release the object. If we get to a release of the object, zap both. /// /// NOTE: this handles both objc_retain and swift_retain. /// static bool performLocalRetainMotion(CallInst &Retain, BasicBlock &BB, SwiftRCIdentity *RC) { // FIXME: Call classifier should identify the object for us. Too bad C++ // doesn't have nice Swift-style enums. Value *RetainedObject = RC->getSwiftRCIdentityRoot(Retain.getArgOperand(0)); BasicBlock::iterator BBI = Retain.getIterator(), BBE = BB.getTerminator()->getIterator(); bool isObjCRetain = Retain.getCalledFunction()->getName() == "objc_retain"; bool MadeProgress = false; // Scan until we get to the end of the block. for (++BBI; BBI != BBE; ++BBI) { Instruction &CurInst = *BBI; // Classify the instruction. This switch does a "break" when the instruction // can be skipped and is interesting, and a "continue" when it is a retain // of the same pointer. switch (classifyInstruction(CurInst)) { // These instructions should not reach here based on the pass ordering. // i.e. LLVMARCOpt -> LLVMContractOpt. case RT_RetainN: case RT_UnknownRetainN: case RT_BridgeRetainN: case RT_ReleaseN: case RT_UnknownReleaseN: case RT_BridgeReleaseN: llvm_unreachable("These are only created by LLVMARCContract !"); case RT_NoMemoryAccessed: case RT_AllocObject: case RT_CheckUnowned: // Skip over random instructions that don't touch memory. They don't need // protection by retain/release. break; case RT_FixLifetime: // This only stops release motion. Retains can move over it. break; case RT_Retain: case RT_UnknownRetain: case RT_BridgeRetain: case RT_RetainUnowned: case RT_ObjCRetain: { // swift_retain(obj) //CallInst &ThisRetain = cast<CallInst>(CurInst); //Value *ThisRetainedObject = ThisRetain.getArgOperand(0); // If we see a retain of the same object, we can skip over it, but we // can't count it as progress. Just pushing a retain(x) past a retain(y) // doesn't change the program. continue; } case RT_UnknownRelease: case RT_BridgeRelease: case RT_ObjCRelease: case RT_Release: { // If we get to a release that is provably to this object, then we can zap // it and the retain. CallInst &ThisRelease = cast<CallInst>(CurInst); Value *ThisReleasedObject = ThisRelease.getArgOperand(0); ThisReleasedObject = RC->getSwiftRCIdentityRoot(ThisReleasedObject); if (ThisReleasedObject == RetainedObject) { Retain.eraseFromParent(); ThisRelease.eraseFromParent(); if (isObjCRetain) { ++NumObjCRetainReleasePairs; } else { ++NumRetainReleasePairs; } return true; } // Otherwise, if this is some other pointer, we can only ignore it if we // can prove that the two objects don't alias. // Retain.dump(); ThisRelease.dump(); BB.getParent()->dump(); goto OutOfLoop; } case RT_Unknown: // Loads cannot affect the retain. if (isa<LoadInst>(CurInst)) continue; // Load, store, memcpy etc can't do a release. if (isa<LoadInst>(CurInst) || isa<StoreInst>(CurInst) || isa<MemIntrinsic>(CurInst)) break; // CurInst->dump(); BBI->dump(); // Otherwise, we get to something unknown/unhandled. Bail out for now. goto OutOfLoop; } // If the switch did a break, we made some progress moving this retain. MadeProgress = true; } OutOfLoop: // If we were able to move the retain down, move it now. // TODO: This is where we'd plug in some global algorithms someday. if (MadeProgress) { Retain.moveBefore(&*BBI); return true; } return false; }
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; // Avoid merging nontemporal stores since the resulting // memcpy/memset would not be able to preserve the nontemporal hint. // In theory we could teach how to propagate the !nontemporal metadata to // memset calls. However, that change would force the backend to // conservatively expand !nontemporal memset calls back to sequences of // store instructions (effectively undoing the merging). if (SI->getMetadata(LLVMContext::MD_nontemporal)) return false; const DataLayout &DL = SI->getModule()->getDataLayout(); // Load to store forwarding can be interpreted as memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { auto *T = LI->getType(); if (T->isAggregateType()) { AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); MemoryLocation LoadLoc = MemoryLocation::get(LI); // We use alias analysis to check if an instruction may store to // the memory we load from in between the load and the store. If // such an instruction is found, we store it in AI. Instruction *AI = nullptr; for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); I != E; ++I) { if (AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod) { AI = &*I; break; } } // If no aliasing instruction is found, then we can promote the // load/store pair to a memcpy at the store loaction. if (!AI) { // If we load from memory that may alias the memory we store to, // memmove must be used to preserve semantic. If not, memcpy can // be used. bool UseMemMove = false; if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) UseMemMove = true; unsigned Align = findCommonAlignment(DL, SI, LI); uint64_t Size = DL.getTypeStoreSize(T); IRBuilder<> Builder(SI); Instruction *M; if (UseMemMove) M = Builder.CreateMemMove(SI->getPointerOperand(), LI->getPointerOperand(), Size, Align, SI->isVolatile()); else M = Builder.CreateMemCpy(SI->getPointerOperand(), LI->getPointerOperand(), Size, Align, SI->isVolatile()); DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); LI->eraseFromParent(); ++NumMemCpyInstr; // Make sure we do not invalidate the iterator. BBI = M->getIterator(); return true; } } // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) C = dyn_cast<CallInst>(ldep.getInst()); if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); MemoryLocation StoreLoc = MemoryLocation::get(SI); for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); I != E; --I) { if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { C = nullptr; break; } } } if (C) { bool changed = performCallSlotOptzn( LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), findCommonAlignment(DL, SI, LI), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); MD->removeInstruction(LI); LI->eraseFromParent(); ++NumMemCpyInstr; return true; } } } } // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { BBI = I->getIterator(); // Don't invalidate iterator. return true; } return false; }