bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. Constant *COp = dyn_cast<Constant>(I.getOperand(0)); if (!COp) COp = SimplifiedValues.lookup(I.getOperand(0)); if (COp) if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { SimplifiedValues[&I] = C; return true; } // Track base/offsets through casts std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); // Casts don't change the offset, just wrap it up. if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; // Also look for SROA candidates here. Value *SROAArg; DenseMap<Value *, int>::iterator CostIt; if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; // Bitcasts are always zero cost. return true; }
bool Scalarizer::visitBitCastInst(BitCastInst &BCI) { VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy()); VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy()); if (!DstVT || !SrcVT) return false; unsigned DstNumElems = DstVT->getNumElements(); unsigned SrcNumElems = SrcVT->getNumElements(); IRBuilder<> Builder(BCI.getParent(), &BCI); Scatterer Op0 = scatter(&BCI, BCI.getOperand(0)); ValueVector Res; Res.resize(DstNumElems); if (DstNumElems == SrcNumElems) { for (unsigned I = 0; I < DstNumElems; ++I) Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), BCI.getName() + ".i" + Twine(I)); } else if (DstNumElems > SrcNumElems) { // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the // individual elements to the destination. unsigned FanOut = DstNumElems / SrcNumElems; Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut); unsigned ResI = 0; for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) { Value *V = Op0[Op0I]; Instruction *VI; // Look through any existing bitcasts before converting to <N x t2>. // In the best case, the resulting conversion might be a no-op. while ((VI = dyn_cast<Instruction>(V)) && VI->getOpcode() == Instruction::BitCast) V = VI->getOperand(0); V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast"); Scatterer Mid = scatter(&BCI, V); for (unsigned MidI = 0; MidI < FanOut; ++MidI) Res[ResI++] = Mid[MidI]; } } else { // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2. unsigned FanIn = SrcNumElems / DstNumElems; Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn); unsigned Op0I = 0; for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) { Value *V = UndefValue::get(MidTy); for (unsigned MidI = 0; MidI < FanIn; ++MidI) V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI), BCI.getName() + ".i" + Twine(ResI) + ".upto" + Twine(MidI)); Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(), BCI.getName() + ".i" + Twine(ResI)); } } gather(&BCI, Res); return true; }
/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return /// instructions to the predecessor to enable tail call optimizations. The /// case it is currently looking for is: /// @code /// bb0: /// %tmp0 = tail call i32 @f0() /// br label %return /// bb1: /// %tmp1 = tail call i32 @f1() /// br label %return /// bb2: /// %tmp2 = tail call i32 @f2() /// br label %return /// return: /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] /// ret i32 %retval /// @endcode /// /// => /// /// @code /// bb0: /// %tmp0 = tail call i32 @f0() /// ret i32 %tmp0 /// bb1: /// %tmp1 = tail call i32 @f1() /// ret i32 %tmp1 /// bb2: /// %tmp2 = tail call i32 @f2() /// ret i32 %tmp2 /// @endcode bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; PHINode *PN = 0; BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); if (V) { BCI = dyn_cast<BitCastInst>(V); if (BCI) V = BCI->getOperand(0); PN = dyn_cast<PHINode>(V); if (!PN) return false; } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) return false; // It's not safe to eliminate the sign / zero extension of the return value. // See llvm::isInTailCallPosition(). const Function *F = BB->getParent(); Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); if (CallerRetAttr.hasAttribute(Attributes::ZExt) || CallerRetAttr.hasAttribute(Attributes::SExt)) return false; // Make sure there are no instructions between the PHI and return, or that the // return is the first instruction in the block. if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); if (&*BI == BCI) // Also skip over the bitcast. ++BI; if (&*BI != RI) return false; } else { BasicBlock::iterator BI = BB->begin(); while (isa<DbgInfoIntrinsic>(BI)) ++BI; if (&*BI != RI) return false; } /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail /// call. SmallVector<CallInst*, 4> TailCalls; if (PN) { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { if (!VisitedBBs.insert(*PI)) continue; BasicBlock::InstListType &InstList = (*PI)->getInstList(); BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); if (RI == RE) continue; CallInst *CI = dyn_cast<CallInst>(&*RI); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) TailCalls.push_back(CI); } } bool Changed = false; for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { CallInst *CI = TailCalls[i]; CallSite CS(CI); // Conservatively require the attributes of the call to match those of the // return. Ignore noalias because it doesn't affect the call sequence. Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); if (AttrBuilder(CalleeRetAttr). removeAttribute(Attributes::NoAlias) != AttrBuilder(CallerRetAttr). removeAttribute(Attributes::NoAlias)) continue; // Make sure the call instruction is followed by an unconditional branch to // the return block. BasicBlock *CallBB = CI->getParent(); BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) continue; // Duplicate the return into CallBB. (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); ModifiedDT = Changed = true; ++NumRetsDup; } // If we eliminated all predecessors of the block, delete the block now. if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; }
void GraphBuilder::visitBitCastInst(BitCastInst &I) { if (!isa<PointerType>(I.getType())) return; // Only pointers DSNodeHandle Ptr = getValueDest(I.getOperand(0)); if (Ptr.isNull()) return; setDestTo(I, Ptr); }
bool CombineNoopCasts::runOnBasicBlock(BasicBlock &BB) { bool Changed = false; for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE;) { Instruction* Inst = BBI++; if (IntToPtrInst *Cast1 = dyn_cast<IntToPtrInst>(Inst)) { if (isa<PtrToIntInst>(Cast1->getOperand(0)) || (isa<ConstantExpr>(Cast1->getOperand(0)) && cast<ConstantExpr>(Cast1->getOperand(0))->getOpcode() == Instruction::PtrToInt)) { User *Cast2 = cast<User>(Cast1->getOperand(0)); Value *V = Cast2->getOperand(0); if(Cast2->getType() != IntPtrType) { continue; } if (V->getType() != Cast1->getType()) V = CopyDebug(new BitCastInst(V, Cast1->getType(), V->getName() + ".bc", Cast1), Cast2); Cast1->replaceAllUsesWith(V); if (Cast1->use_empty()) Cast1->eraseFromParent(); if (Cast2->use_empty() && isa<Instruction>(Cast2)) cast<Instruction>(Cast2)->eraseFromParent(); Changed = true; } } else if(PtrToIntInst *Cast1 = dyn_cast<PtrToIntInst>(Inst)) { if(Cast1->getType() != IntPtrType) { continue; } if (isa<IntToPtrInst>(Cast1->getOperand(0)) || (isa<ConstantExpr>(Cast1->getOperand(0)) && cast<ConstantExpr>(Cast1->getOperand(0))->getOpcode() == Instruction::IntToPtr)) { User *Cast2 = cast<User>(Cast1->getOperand(0)); Value *V = Cast2->getOperand(0); Cast1->replaceAllUsesWith(V); if (Cast1->use_empty()) Cast1->eraseFromParent(); if (Cast2->use_empty() && isa<Instruction>(Cast2)) cast<Instruction>(Cast2)->eraseFromParent(); Changed = true; } else if(BitCastInst *Cast2 = dyn_cast<BitCastInst>(Cast1->getOperand(0))) { Cast1->setOperand(0, Cast2->getOperand(0)); if (Cast2->use_empty()) Cast2->eraseFromParent(); } } else if(BitCastInst* Cast1 = dyn_cast<BitCastInst>(Inst)) { if(Cast1->getOperand(0)->getType() == Cast1->getType()) { Cast1->replaceAllUsesWith(Cast1->getOperand(0)); Cast1->eraseFromParent(); } else if(BitCastInst* Cast2 = dyn_cast<BitCastInst>(Cast1->getOperand(0))) { if(Cast1->getType() == Cast2->getOperand(0)->getType()) { Cast1->replaceAllUsesWith(Cast2->getOperand(0)); Cast1->eraseFromParent(); if (Cast2->use_empty()) { Cast2->eraseFromParent(); } } } } } // de-duplicate bitcasts: DenseMap<std::pair<Value*, Type*>, BitCastInst*> BCs; for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE;) { Instruction* Inst = BBI++; if(PtrToIntInst *Cast1 = dyn_cast<PtrToIntInst>(Inst)) { if(Cast1->use_empty()) { Cast1->eraseFromParent(); continue; } } if(!isa<BitCastInst>(Inst)) { continue; } BitCastInst* BC = cast<BitCastInst>(Inst); auto Val = std::make_pair(BC->getOperand(0), BC->getType()); auto BI = BCs.find(Val); if(BI == BCs.end()) { BCs.insert(std::make_pair(Val, BC)); } else { BC->replaceAllUsesWith(BI->second); BC->eraseFromParent(); } } return Changed; }
/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the /// instruction is a bitcast of the result of a malloc call. CallInst *llvm::extractMallocCallFromBitCast(Value *I) { BitCastInst *BCI = dyn_cast<BitCastInst>(I); return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) : NULL; }
/* * Rewrite OpenMP call sites and their associated kernel functions -- the folloiwng pattern call void @GOMP_parallel_start(void (i8*)* @_Z20initialize_variablesiPfS_.omp_fn.4, i8* %.omp_data_o.5571, i32 0) nounwind call void @_Z20initialize_variablesiPfS_.omp_fn.4(i8* %.omp_data_o.5571) nounwind call void @GOMP_parallel_end() nounwind */ void HeteroOMPTransform::rewrite_omp_call_sites(Module &M) { SmallVector<Instruction *, 16> toDelete; DenseMap<Value *, Value *> ValueMap; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I){ if (!I->isDeclaration()) { for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; ++BBI) { bool match = false; for (BasicBlock::iterator INSNI = BBI->begin(), INSNE = BBI->end(); INSNI != INSNE; ++INSNI) { if (isa<CallInst>(INSNI)) { CallSite CI(cast<Instruction>(INSNI)); if (CI.getCalledFunction() != NULL){ string called_func_name = CI.getCalledFunction()->getName(); if (called_func_name == OMP_PARALLEL_START_NAME && CI.arg_size() == 3) { // change alloc to malloc_shared // %5 = call i8* @_Z13malloc_sharedm(i64 20) ; <i8*> [#uses=5] // %6 = bitcast i8* %5 to float* ; <float*> [#uses=2] AllocaInst *AllocCall; Value *arg_0 = CI.getArgument(0); // function Value *arg_1 = CI.getArgument(1); // context Value *loop_ub = NULL; Function *function; BitCastInst* BCI; Function *kernel_function; BasicBlock::iterator iI(*INSNI); //BasicBlock::iterator iJ = iI+1; iI++; iI++; //BasicBlock::iterator iK = iI; CallInst /**next,*/ *next_next; if (arg_0 != NULL && arg_1 != NULL /*&& (next = dyn_cast<CallInst>(*iJ))*/ && (next_next = dyn_cast<CallInst>(iI)) && (next_next->getCalledFunction() != NULL) && (next_next->getCalledFunction()->getName() == OMP_PARALLEL_END_NAME) && (BCI = dyn_cast<BitCastInst>(arg_1)) && (AllocCall = dyn_cast<AllocaInst>(BCI->getOperand(0))) && (function = dyn_cast<Function>(arg_0)) && (loop_ub = find_loop_upper_bound (AllocCall)) && (kernel_function=convert_to_kernel_function (M, function))){ SmallVector<Value*, 16> Args; Args.push_back(AllocCall->getArraySize()); Instruction *MallocCall = CallInst::Create(mallocFnTy, Args, "", AllocCall); CastInst *MallocCast = CastInst::Create(Instruction::BitCast, MallocCall, AllocCall->getType(), "", AllocCall); ValueMap[AllocCall] = MallocCast; //AllocCall->replaceAllUsesWith(MallocCall); // Add offload function Args.clear(); Args.push_back(loop_ub); Args.push_back(BCI); Args.push_back(kernel_function); if (offloadFnTy == NULL) { init_offload_type(M, kernel_function); } Instruction *call = CallInst::Create(offloadFnTy, Args, "", INSNI); if (find(toDelete.begin(), toDelete.end(), AllocCall) == toDelete.end()){ toDelete.push_back(AllocCall); } toDelete.push_back(&(*INSNI)); match = true; } } else if (called_func_name == OMP_PARALLEL_END_NAME && CI.arg_size() == 0 && match) { toDelete.push_back(&(*INSNI)); match = false; } else if (match) { toDelete.push_back(&(*INSNI)); } } } } } } } /* Replace AllocCalls by MallocCalls */ for(DenseMap<Value *, Value *>::iterator I = ValueMap.begin(), E = ValueMap.end(); I != E; I++) { I->first->replaceAllUsesWith(I->second); } /* delete the instructions for get_omp_num_thread and get_omp_thread_num */ while(!toDelete.empty()) { Instruction *g = toDelete.back(); toDelete.pop_back(); g->eraseFromParent(); } }