bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); if (!C) C = SimplifiedValues.lookup(I.getAggregateOperand()); if (C) { SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); return true; } // SROA can look through these but give them a cost. return false; }
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); Value *Addr = CI->getPointerOperand(); BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // If getInsertFencesForAtomic() returns true, then the target does not want // to deal with memory orders, and emitLeading/TrailingFence should take care // of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. AtomicOrdering MemOpOrder = TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] // fence? // cmpxchg.start: // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.nostore // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure // cmpxchg.success: // fence? // br label %cmpxchg.end // cmpxchg.nostore: // @load_linked_fail_balance()? // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we might want a fence too. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); Builder.CreateCondBr(StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : LoopBB); // Make sure later instructions don't get reordered with a fence if necessary. Builder.SetInsertPoint(SuccessBB); TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg // succeeded or not. We expose this to later passes by converting any // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. // Setup the builder so we can create any PHIs we need. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; for (auto User : CI->users()) { ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); if (!EV) continue; assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && "weird extraction from { iN, i1 }"); if (EV->getIndices()[0] == 0) EV->replaceAllUsesWith(Loaded); else EV->replaceAllUsesWith(Success); PrunedInsts.push_back(EV); } // We can remove the instructions now we're no longer iterating through them. for (auto EV : PrunedInsts) EV->eraseFromParent(); if (!CI->use_empty()) { // Some use of the full struct return that we don't understand has happened, // so we've got to reconstruct it properly. Value *Res; Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); Res = Builder.CreateInsertValue(Res, Success, 1); CI->replaceAllUsesWith(Res); } CI->eraseFromParent(); return true; }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. Search for insert/extractvalue instructions // that can be simplified. // // Inputs: // M - A reference to the LLVM module to transform. // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool SimplifyEV::runOnModule(Module& M) { // Repeat till no change bool changed; do { changed = false; for (Module::iterator F = M.begin(); F != M.end(); ++F) { for (Function::iterator B = F->begin(), FE = F->end(); B != FE; ++B) { for (BasicBlock::iterator I = B->begin(), BE = B->end(); I != BE;) { ExtractValueInst *EV = dyn_cast<ExtractValueInst>(I++); if(!EV) continue; Value *Agg = EV->getAggregateOperand(); if (!EV->hasIndices()) { EV->replaceAllUsesWith(Agg); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } if (Constant *C = dyn_cast<Constant>(Agg)) { if (isa<UndefValue>(C)) { EV->replaceAllUsesWith(UndefValue::get(EV->getType())); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } if (isa<ConstantAggregateZero>(C)) { EV->replaceAllUsesWith(Constant::getNullValue(EV->getType())); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { // Extract the element indexed by the first index out of the constant Value *V = C->getOperand(*EV->idx_begin()); if (EV->getNumIndices() > 1) { // Extract the remaining indices out of the constant indexed by the // first index ExtractValueInst *EV_new = ExtractValueInst::Create(V, EV->getIndices().slice(1), "", EV); EV->replaceAllUsesWith(EV_new); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } else { EV->replaceAllUsesWith(V); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } } continue; } if (LoadInst * LI = dyn_cast<LoadInst>(Agg)) { // if the Agg value came from a load instruction // replace the extract value intruction with // a gep and a load. SmallVector<Value*, 8> Indices; Type *Int32Ty = Type::getInt32Ty(M.getContext()); Indices.push_back(Constant::getNullValue(Int32Ty)); for (ExtractValueInst::idx_iterator I = EV->idx_begin(), E = EV->idx_end(); I != E; ++I) { Indices.push_back(ConstantInt::get(Int32Ty, *I)); } GetElementPtrInst *GEP = GetElementPtrInst::CreateInBounds(LI->getOperand(0), Indices, LI->getName(), LI) ; LoadInst *LINew = new LoadInst(GEP, "", LI); EV->replaceAllUsesWith(LINew); EV->eraseFromParent(); changed = true; numErased++; continue; } if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { bool done = false; // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; for (exti = EV->idx_begin(), insi = IV->idx_begin(), exte = EV->idx_end(), inse = IV->idx_end(); exti != exte && insi != inse; ++exti, ++insi) { if (*insi != *exti) { // The insert and extract both reference distinctly different elements. // This means the extract is not influenced by the insert, and we can // replace the aggregate operand of the extract with the aggregate // operand of the insert. i.e., replace // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 // %E = extractvalue { i32, { i32 } } %I, 0 // with // %E = extractvalue { i32, { i32 } } %A, 0 ExtractValueInst *EV_new = ExtractValueInst::Create(IV->getAggregateOperand(), EV->getIndices(), "", EV); EV->replaceAllUsesWith(EV_new); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; done = true; changed = true; break; } } if(done) continue; if (exti == exte && insi == inse) { // Both iterators are at the end: Index lists are identical. Replace // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 // %C = extractvalue { i32, { i32 } } %B, 1, 0 // with "i32 42" EV->replaceAllUsesWith(IV->getInsertedValueOperand()); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } if (exti == exte) { // The extract list is a prefix of the insert list. i.e. replace // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 // %E = extractvalue { i32, { i32 } } %I, 1 // with // %X = extractvalue { i32, { i32 } } %A, 1 // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = ExtractValueInst::Create(IV->getAggregateOperand(), EV->getIndices(), "", EV); Value *NewIV = InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), makeArrayRef(insi, inse), "", EV); EV->replaceAllUsesWith(NewIV); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } if (insi == inse) { // The insert list is a prefix of the extract list // We can simply remove the common indices from the extract and make it // operate on the inserted value instead of the insertvalue result. // i.e., replace // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 // %E = extractvalue { i32, { i32 } } %I, 1, 0 // with // %E extractvalue { i32 } { i32 42 }, 0 ExtractValueInst *EV_new = ExtractValueInst::Create(IV->getInsertedValueOperand(), makeArrayRef(exti, exte), "", EV); EV->replaceAllUsesWith(EV_new); DEBUG(errs() << "EV:"); DEBUG(errs() << "ERASE:"); DEBUG(EV->dump()); EV->eraseFromParent(); numErased++; changed = true; continue; } } } } } } while(changed); return (numErased > 0); }