/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }