Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (TD) { const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { const Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It = New; while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); Value *Idx[2]; Idx[0] = NullIdx; Idx[1] = NullIdx; Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, New->getName()+".sub", It); // Now make everything use the getelementptr instead of the original // allocation. return ReplaceInstUsesWith(AI, V); } else if (isa<UndefValue>(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } } if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { // If alloca'ing a zero byte object, replace the alloca with a null pointer. // Note that we only do this for alloca's, because malloc should allocate // and return a unique pointer, even for a zero byte allocation. if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); } return 0; }
AllocaInst* Variables::changeLocal(Value* value, ArrayType* newType) { AllocaInst* oldTarget = dyn_cast<AllocaInst>(value); PointerType* oldPointerType = dyn_cast<PointerType>(oldTarget->getType()); ArrayType* oldType = dyn_cast<ArrayType>(oldPointerType->getElementType()); AllocaInst* newTarget = NULL; errs() << "Changing the precision of variable \"" << oldTarget->getName() << "\" from " << *oldType << " to " << *newType << ".\n"; if (newType->getElementType()->getTypeID() != oldType->getElementType()->getTypeID()) { newTarget = new AllocaInst(newType, getInt32(1), "", oldTarget); // we are not calling getAlignment because in this case double requires 16. Investigate further. unsigned alignment; switch(newType->getElementType()->getTypeID()) { case Type::FloatTyID: alignment = 4; break; case Type::DoubleTyID: alignment = 16; break; case Type::X86_FP80TyID: alignment = 16; break; default: alignment = 0; } newTarget->setAlignment(alignment); // depends on type? 8 for float? 16 for double? newTarget->takeName(oldTarget); // iterating through instructions using old AllocaInst vector<Instruction*> erase; Value::use_iterator it = oldTarget->use_begin(); for(; it != oldTarget->use_end(); it++) { bool is_erased = Transformer::transform(it, newTarget, oldTarget, newType, oldType, alignment); if (!is_erased) erase.push_back(dyn_cast<Instruction>(*it)); } // erasing uses of old instructions for(unsigned int i = 0; i < erase.size(); i++) { erase[i]->eraseFromParent(); } // erase old instruction //oldTarget->eraseFromParent(); } else { errs() << "\tNo changes required.\n"; } return newTarget; }
static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Check for array size of 1 (scalar allocation). if (!AI.isArrayAllocation()) { // i32 1 is the canonical array size for scalar allocations. if (AI.getArraySize()->getType()->isIntegerTy(32)) return nullptr; // Canonicalize it. Value *V = IC.Builder->getInt32(1); AI.setOperand(0, V); return &AI; } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It(New); while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); IC.InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. return IC.ReplaceInstUsesWith(AI, GEP); } if (isa<UndefValue>(AI.getArraySize())) return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } return nullptr; }
bool MisalignStackPass::runOnBasicBlock (BasicBlock &BB) { bool Changed = false; const unsigned alignLimit = sizeof(uint32_t); for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { AllocaInst *AI = dyn_cast<AllocaInst>(I); if (AI && AI->getAlignment() > alignLimit) { AI->setAlignment(alignLimit); Changed = true; } } return Changed; }
// ============================================================================= // If the function had a byval struct ptr arg, say foo(%struct.x* byval %d), // then add the following instructions to the first basic block: // // %temp = alloca %struct.x, align 8 // %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)* // %tv = load %struct.x addrspace(101)* %tempd // store %struct.x %tv, %struct.x* %temp, align 8 // // The above code allocates some space in the stack and copies the incoming // struct from param space to local space. // Then replace all occurrences of %d by %temp. // ============================================================================= void NVPTXLowerArgs::handleByValParam(Argument *Arg) { Function *Func = Arg->getParent(); Instruction *FirstInst = &(Func->getEntryBlock().front()); PointerType *PType = dyn_cast<PointerType>(Arg->getType()); assert(PType && "Expecting pointer type in handleByValParam"); Type *StructType = PType->getElementType(); AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); // Set the alignment to alignment of the byval parameter. This is because, // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), FirstInst); LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst); new StoreInst(LI, AllocA, FirstInst); }
void NVPTXLowerStructArgs::handleParam(Argument *Arg) { Function *Func = Arg->getParent(); Instruction *FirstInst = &(Func->getEntryBlock().front()); PointerType *PType = dyn_cast<PointerType>(Arg->getType()); assert(PType && "Expecting pointer type in handleParam"); Type *StructType = PType->getElementType(); AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst); /* Set the alignment to alignment of the byval parameter. This is because, * later load/stores assume that alignment, and we are going to replace * the use of the byval parameter with this alloca instruction. */ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); Arg->replaceAllUsesWith(AllocA); // Get the cvt.gen.to.param intrinsic Type *CvtTypes[] = { Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM), Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_GENERIC)}; Function *CvtFunc = Intrinsic::getDeclaration( Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes); Value *BitcastArgs[] = { new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_GENERIC), Arg->getName(), FirstInst)}; CallInst *CallCVT = CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst); BitCastInst *BitCast = new BitCastInst( CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(), FirstInst); LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst); new StoreInst(LI, AllocA, FirstInst); }
void ConstantInsertExtractElementIndex::fixNonConstantVectorIndices( BasicBlock &BB, const Instructions &Instrs) const { for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end(); IB != IE; ++IB) { Instruction *I = *IB; Value *Vec = I->getOperand(0); Value *Idx = getInsertExtractElementIdx(I); VectorType *VecTy = cast<VectorType>(Vec->getType()); Type *ElemTy = VecTy->getElementType(); unsigned ElemAlign = DL->getPrefTypeAlignment(ElemTy); unsigned VecAlign = std::max(ElemAlign, DL->getPrefTypeAlignment(VecTy)); IRBuilder<> IRB(I); AllocaInst *Alloca = IRB.CreateAlloca( ElemTy, ConstantInt::get(Type::getInt32Ty(M->getContext()), vectorNumElements(I))); Alloca->setAlignment(VecAlign); Value *AllocaAsVec = IRB.CreateBitCast(Alloca, VecTy->getPointerTo()); IRB.CreateAlignedStore(Vec, AllocaAsVec, Alloca->getAlignment()); Value *GEP = IRB.CreateGEP(Alloca, Idx); Value *Res; switch (I->getOpcode()) { default: llvm_unreachable("expected InsertElement or ExtractElement"); case Instruction::InsertElement: IRB.CreateAlignedStore(I->getOperand(1), GEP, ElemAlign); Res = IRB.CreateAlignedLoad(AllocaAsVec, Alloca->getAlignment()); break; case Instruction::ExtractElement: Res = IRB.CreateAlignedLoad(GEP, ElemAlign); break; } I->replaceAllUsesWith(Res); I->eraseFromParent(); } }
void Preparer::expandAlloca(AllocaInst *AI) { // Skip dynaa.slots which is added by AliasCheckerInstrumenter. if (AI->getName().startswith(DynAAUtils::SlotsName)) return; if (AI->isArrayAllocation()) { // e.g. %32 = alloca i8, i64 %conv164 Value *Size = AI->getArraySize(); Value *ExpandedSize = BinaryOperator::Create( Instruction::Add, Size, ConstantInt::get(cast<IntegerType>(Size->getType()), 1), "expanded.size", AI); AI->setOperand(0, ExpandedSize); return; } Type *AllocatedType = AI->getAllocatedType(); if (ArrayType *ArrType = dyn_cast<ArrayType>(AllocatedType)) { ArrayType *NewArrType = ArrayType::get(ArrType->getElementType(), ArrType->getNumElements() + 1); AllocaInst *NewAI = new AllocaInst(NewArrType, AI->getName(), AI); // inherit the alignment as well NewAI->setAlignment(AI->getAlignment()); BitCastInst *CastNewAI = new BitCastInst(NewAI, AI->getType(), AI->getName(), AI); AI->replaceAllUsesWith(CastNewAI); AI->eraseFromParent(); return; } assert(AllocatedType->isSized()); IntegerType *PadType = IntegerType::get(AI->getContext(), 8); new AllocaInst(PadType, "alloca_pad", AI); }
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (auto *I = simplifyAllocaArraySize(*this, AI)) return I; if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. if (AI.isArrayAllocation()) { AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); return &AI; } // Get the first instruction in the entry block. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); if (FirstInst != &AI) { // If the entry block doesn't start with a zero-size alloca then move // this one to the start of the entry block. There is no problem with // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } // If the alignment of the entry block alloca is 0 (unspecified), // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. unsigned MaxAlign = std::max(EntryAI->getAlignment(), AI.getAlignment()); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); return ReplaceInstUsesWith(AI, EntryAI); } } } if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) EraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast<Constant>(Copy->getSource()); Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); Instruction *NewI = ReplaceInstUsesWith(AI, Cast); EraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; } } } // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); }
/// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. if (!InlineFunction(CS, IFI, InsertLifetime)) return false; AdjustCallerSSPLevel(Caller, Callee); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. // // There are many heuristics possible for merging these allocas, and the // different options have different tradeoffs. One thing that we *really* // don't want to hurt is SRoA: once inlining happens, often allocas are no // longer address taken and so they can be promoted. // // Our "solution" for that is to only merge allocas whose outermost type is an // array type. These are usually not promoted because someone is using a // variable index into them. These are also often the most important ones to // merge. // // A better solution would be to have real memory lifetime markers in the IR // and not have the inliner do any merging of allocas at all. This would // allow the backend to do proper stack slot coloring of all allocas that // *actually make it to the backend*, which is really what we want. // // Because we don't have this information, we do this simple and useful hack. // SmallPtrSet<AllocaInst*, 16> UsedAllocas; // When processing our SCC, check to see if CS was inlined from some other // call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } // Assume that C was not inlined into B initially, and so we're processing A // and decide to inline B into A. Doing this makes an alloca available for // reuse and makes a callsite (C) available for inlining. When we process // the C call site we don't want to do any alloca merging between X and Y // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. return true; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (!ATy || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca).second) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); if (Align1 != Align2) { if (!Align1 || !Align2) { const DataLayout &DL = Caller->getParent()->getDataLayout(); unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); Align1 = Align1 ? Align1 : TypeAlign; Align2 = Align2 ? Align2 : TypeAlign; } if (Align1 > Align2) AvailableAlloca->setAlignment(AI->getAlignment()); } AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; IFI.StaticAllocas[AllocaNo] = nullptr; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } return true; }
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (DL) { Type *IntPtrTy = DL->getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It = New; while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // Type *IdxTy = DL ? DL->getIntPtrType(AI.getType()) : Type::getInt64Ty(AI.getContext()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = { NullIdx, NullIdx }; Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. return ReplaceInstUsesWith(AI, GEP); } else if (isa<UndefValue>(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } } if (DL && AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. if (AI.isArrayAllocation()) { AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); return &AI; } // Get the first instruction in the entry block. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); if (FirstInst != &AI) { // If the entry block doesn't start with a zero-size alloca then move // this one to the start of the entry block. There is no problem with // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } // If the alignment of the entry block alloca is 0 (unspecified), // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. unsigned MaxAlign = std::max(EntryAI->getAlignment(), AI.getAlignment()); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); return ReplaceInstUsesWith(AI, EntryAI); } } } if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), AI.getAlignment(), DL); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) EraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast<Constant>(Copy->getSource()); Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); Instruction *NewI = ReplaceInstUsesWith(AI, Cast); EraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; } } } // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); }
static Value *julia_to_native(Type *ty, jl_value_t *jt, Value *jv, jl_value_t *aty, bool addressOf, bool byRef, bool inReg, bool needCopy, int argn, jl_codectx_t *ctx, bool *needStackRestore) { Type *vt = jv->getType(); // We're passing any if (ty == jl_pvalue_llvmt) { return boxed(jv,ctx); } if (ty == vt && !addressOf && !byRef) { return jv; } if (vt != jl_pvalue_llvmt) { // argument value is unboxed if (addressOf || (byRef && inReg)) { if (ty->isPointerTy() && ty->getContainedType(0)==vt) { // pass the address of an alloca'd thing, not a box // since those are immutable. *needStackRestore = true; Value *slot = builder.CreateAlloca(vt); builder.CreateStore(jv, slot); return builder.CreateBitCast(slot, ty); } } else if ((vt->isIntegerTy() && ty->isIntegerTy()) || (vt->isFloatingPointTy() && ty->isFloatingPointTy()) || (vt->isPointerTy() && ty->isPointerTy())) { if (vt->getPrimitiveSizeInBits() == ty->getPrimitiveSizeInBits()) { if (!byRef) { return builder.CreateBitCast(jv, ty); } else { *needStackRestore = true; Value *mem = builder.CreateAlloca(ty); builder.CreateStore(jv,builder.CreateBitCast(mem,vt->getPointerTo())); return mem; } } } else if (vt->isStructTy()) { if (!byRef) { return jv; } else { *needStackRestore = true; Value *mem = builder.CreateAlloca(vt); builder.CreateStore(jv,mem); return mem; } } emit_error("ccall: argument type did not match declaration", ctx); } if (jl_is_tuple(jt)) { return emit_unbox(ty,jv,jt); } if (jl_is_cpointer_type(jt) && addressOf) { assert(ty->isPointerTy()); jl_value_t *ety = jl_tparam0(jt); if (aty != ety && ety != (jl_value_t*)jl_any_type && jt != (jl_value_t*)jl_voidpointer_type) { std::stringstream msg; msg << "ccall argument "; msg << argn; emit_typecheck(jv, ety, msg.str(), ctx); } if (jl_is_mutable_datatype(ety)) { // no copy, just reference the data field return builder.CreateBitCast(jv, ty); } else if (jl_is_immutable_datatype(ety) && jt != (jl_value_t*)jl_voidpointer_type) { // yes copy Value *nbytes; if (jl_is_leaf_type(ety)) nbytes = ConstantInt::get(T_int32, jl_datatype_size(ety)); else nbytes = tbaa_decorate(tbaa_datatype, builder.CreateLoad( builder.CreateGEP(builder.CreatePointerCast(emit_typeof(jv), T_pint32), ConstantInt::get(T_size, offsetof(jl_datatype_t,size)/sizeof(int32_t))), false)); *needStackRestore = true; AllocaInst *ai = builder.CreateAlloca(T_int8, nbytes); ai->setAlignment(16); builder.CreateMemCpy(ai, builder.CreateBitCast(jv, T_pint8), nbytes, 1); return builder.CreateBitCast(ai, ty); } // emit maybe copy *needStackRestore = true; Value *jvt = emit_typeof(jv); BasicBlock *mutableBB = BasicBlock::Create(getGlobalContext(),"is-mutable",ctx->f); BasicBlock *immutableBB = BasicBlock::Create(getGlobalContext(),"is-immutable",ctx->f); BasicBlock *afterBB = BasicBlock::Create(getGlobalContext(),"after",ctx->f); Value *ismutable = builder.CreateTrunc( tbaa_decorate(tbaa_datatype, builder.CreateLoad( builder.CreateGEP(builder.CreatePointerCast(jvt, T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t,mutabl))), false)), T_int1); builder.CreateCondBr(ismutable, mutableBB, immutableBB); builder.SetInsertPoint(mutableBB); Value *p1 = builder.CreatePointerCast(jv, ty); builder.CreateBr(afterBB); builder.SetInsertPoint(immutableBB); Value *nbytes = tbaa_decorate(tbaa_datatype, builder.CreateLoad( builder.CreateGEP(builder.CreatePointerCast(jvt, T_pint32), ConstantInt::get(T_size, offsetof(jl_datatype_t,size)/sizeof(int32_t))), false)); AllocaInst *ai = builder.CreateAlloca(T_int8, nbytes); ai->setAlignment(16); builder.CreateMemCpy(ai, builder.CreatePointerCast(jv, T_pint8), nbytes, 1); Value *p2 = builder.CreatePointerCast(ai, ty); builder.CreateBr(afterBB); builder.SetInsertPoint(afterBB); PHINode *p = builder.CreatePHI(ty, 2); p->addIncoming(p1, mutableBB); p->addIncoming(p2, immutableBB); return p; } if (addressOf) jl_error("ccall: unexpected & on argument"); // the only "safe" thing to emit here is the expected struct assert(jl_is_datatype(jt)); if (aty != jt) { std::stringstream msg; msg << "ccall argument "; msg << argn; emit_typecheck(jv, jt, msg.str(), ctx); } Value *p = data_pointer(jv); Value *pjv = builder.CreatePointerCast(p, PointerType::get(ty,0)); if (byRef) { if (!needCopy) { return pjv; } else { *needStackRestore = true; Value *mem = builder.CreateAlloca(ty); builder.CreateMemCpy(mem,pjv,(uint64_t)jl_datatype_size(jt),(uint64_t)((jl_datatype_t*)jt)->alignment); return mem; } } else { return builder.CreateLoad(pjv,false); } }
static void convertInstruction(Instruction *Inst, ConversionState &State) { if (SExtInst *Sext = dyn_cast<SExtInst>(Inst)) { Value *Op = Sext->getOperand(0); Value *NewInst = NULL; // If the operand to be extended is illegal, we first need to fill its // upper bits (which are zero) with its sign bit. if (shouldConvert(Op)) { NewInst = getSignExtend(State.getConverted(Op), Op, Sext); } // If the converted type of the operand is the same as the converted // type of the result, we won't actually be changing the type of the // variable, just its value. if (getPromotedType(Op->getType()) != getPromotedType(Sext->getType())) { NewInst = new SExtInst( NewInst ? NewInst : State.getConverted(Op), getPromotedType(cast<IntegerType>(Sext->getType())), Sext->getName() + ".sext", Sext); } // Now all the bits of the result are correct, but we need to restore // the bits above its type to zero. if (shouldConvert(Sext)) { NewInst = getClearUpper(NewInst, Sext->getType(), Sext); } assert(NewInst && "Failed to convert sign extension"); State.recordConverted(Sext, NewInst); } else if (ZExtInst *Zext = dyn_cast<ZExtInst>(Inst)) { Value *Op = Zext->getOperand(0); Value *NewInst = NULL; // TODO(dschuff): Some of these zexts could be no-ops. if (shouldConvert(Op)) { NewInst = getClearUpper(State.getConverted(Op), Op->getType(), Zext); } // If the converted type of the operand is the same as the converted // type of the result, we won't actually be changing the type of the // variable, just its value. if (getPromotedType(Op->getType()) != getPromotedType(Zext->getType())) { NewInst = CastInst::CreateZExtOrBitCast( NewInst ? NewInst : State.getConverted(Op), getPromotedType(cast<IntegerType>(Zext->getType())), "", Zext); } assert(NewInst); State.recordConverted(Zext, NewInst); } else if (TruncInst *Trunc = dyn_cast<TruncInst>(Inst)) { Value *Op = Trunc->getOperand(0); Value *NewInst = NULL; // If the converted type of the operand is the same as the converted // type of the result, we won't actually be changing the type of the // variable, just its value. if (getPromotedType(Op->getType()) != getPromotedType(Trunc->getType())) { NewInst = new TruncInst( State.getConverted(Op), getPromotedType(cast<IntegerType>(Trunc->getType())), State.getConverted(Op)->getName() + ".trunc", Trunc); } // Restoring the upper-bits-are-zero invariant effectively truncates the // value. if (shouldConvert(Trunc)) { NewInst = getClearUpper(NewInst ? NewInst : Op, Trunc->getType(), Trunc); } assert(NewInst); State.recordConverted(Trunc, NewInst); } else if (AllocaInst *Alloc = dyn_cast<AllocaInst>(Inst)) { // Don't handle arrays of illegal types, but we could handle an array // with size specified as an illegal type, as unlikely as that seems. if (shouldConvert(Alloc) && Alloc->isArrayAllocation()) report_fatal_error("Can't convert arrays of illegal type"); AllocaInst *NewInst = new AllocaInst( getPromotedType(Alloc->getAllocatedType()), State.getConverted(Alloc->getArraySize()), "", Alloc); NewInst->setAlignment(Alloc->getAlignment()); State.recordConverted(Alloc, NewInst); } else if (BitCastInst *BCInst = dyn_cast<BitCastInst>(Inst)) { // Only handle pointers. Ints can't be casted to/from other ints Type *DestType = shouldConvert(BCInst) ? getPromotedType(BCInst->getDestTy()) : BCInst->getDestTy(); BitCastInst *NewInst = new BitCastInst( State.getConverted(BCInst->getOperand(0)), DestType, "", BCInst); State.recordConverted(BCInst, NewInst); } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { if (shouldConvert(Load)) { splitLoad(Load, State); } } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { if (shouldConvert(Store->getValueOperand())) { splitStore(Store, State); } } else if (isa<CallInst>(Inst)) { report_fatal_error("can't convert calls with illegal types"); } else if (BinaryOperator *Binop = dyn_cast<BinaryOperator>(Inst)) { Value *NewInst = NULL; if (Binop->getOpcode() == Instruction::AShr) { // The AShr operand needs to be sign-extended to the promoted size // before shifting. Because the sign-extension is implemented with // with AShr, it can be combined with the original operation. Value *Op = Binop->getOperand(0); Value *ShiftAmount = NULL; APInt SignShiftAmt = APInt( getPromotedType(Op->getType())->getIntegerBitWidth(), getPromotedType(Op->getType())->getIntegerBitWidth() - Op->getType()->getIntegerBitWidth()); NewInst = BinaryOperator::Create( Instruction::Shl, State.getConverted(Op), ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt), State.getConverted(Op)->getName() + ".getsign", Binop); if (ConstantInt *C = dyn_cast<ConstantInt>( State.getConverted(Binop->getOperand(1)))) { ShiftAmount = ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt + C->getValue()); } else { ShiftAmount = BinaryOperator::Create( Instruction::Add, State.getConverted(Binop->getOperand(1)), ConstantInt::get( getPromotedType(Binop->getOperand(1)->getType()), SignShiftAmt), State.getConverted(Op)->getName() + ".shamt", Binop); } NewInst = BinaryOperator::Create( Instruction::AShr, NewInst, ShiftAmount, Binop->getName() + ".result", Binop); } else { // If the original operation is not AShr, just recreate it as usual. NewInst = BinaryOperator::Create( Binop->getOpcode(), State.getConverted(Binop->getOperand(0)), State.getConverted(Binop->getOperand(1)), Binop->getName() + ".result", Binop); if (isa<OverflowingBinaryOperator>(NewInst)) { cast<BinaryOperator>(NewInst)->setHasNoUnsignedWrap (Binop->hasNoUnsignedWrap()); cast<BinaryOperator>(NewInst)->setHasNoSignedWrap( Binop->hasNoSignedWrap()); } } // Now restore the invariant if necessary. // This switch also sanity-checks the operation. switch (Binop->getOpcode()) { case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::LShr: // These won't change the upper bits. break; // These can change the upper bits, unless we are sure they never // overflow. So clear them now. case Instruction::Add: case Instruction::Sub: if (!(Binop->hasNoUnsignedWrap() && Binop->hasNoSignedWrap())) NewInst = getClearUpper(NewInst, Binop->getType(), Binop); break; case Instruction::Shl: if (!Binop->hasNoUnsignedWrap()) NewInst = getClearUpper(NewInst, Binop->getType(), Binop); break; // We modified the upper bits ourselves when implementing AShr case Instruction::AShr: NewInst = getClearUpper(NewInst, Binop->getType(), Binop); break; // We should not see FP operators here. // We don't handle mul/div. case Instruction::FAdd: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::BinaryOpsEnd: errs() << *Inst << "\n"; llvm_unreachable("Cannot handle binary operator"); break; } State.recordConverted(Binop, NewInst); } else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Inst)) { Value *Op0, *Op1; // For signed compares, operands are sign-extended to their // promoted type. For unsigned or equality compares, the comparison // is equivalent with the larger type because they are already // zero-extended. if (Cmp->isSigned()) { Op0 = getSignExtend(State.getConverted(Cmp->getOperand(0)), Cmp->getOperand(0), Cmp); Op1 = getSignExtend(State.getConverted(Cmp->getOperand(1)), Cmp->getOperand(1), Cmp); } else { Op0 = State.getConverted(Cmp->getOperand(0)); Op1 = State.getConverted(Cmp->getOperand(1)); } ICmpInst *NewInst = new ICmpInst( Cmp, Cmp->getPredicate(), Op0, Op1, ""); State.recordConverted(Cmp, NewInst); } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) { SelectInst *NewInst = SelectInst::Create( Select->getCondition(), State.getConverted(Select->getTrueValue()), State.getConverted(Select->getFalseValue()), "", Select); State.recordConverted(Select, NewInst); } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) { PHINode *NewPhi = PHINode::Create( getPromotedType(Phi->getType()), Phi->getNumIncomingValues(), "", Phi); for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) { NewPhi->addIncoming(State.getConverted(Phi->getIncomingValue(I)), Phi->getIncomingBlock(I)); } State.recordConverted(Phi, NewPhi); } else if (SwitchInst *Switch = dyn_cast<SwitchInst>(Inst)) { SwitchInst *NewInst = SwitchInst::Create( State.getConverted(Switch->getCondition()), Switch->getDefaultDest(), Switch->getNumCases(), Switch); for (SwitchInst::CaseIt I = Switch->case_begin(), E = Switch->case_end(); I != E; ++I) { // Build a new case from the ranges that map to the successor BB. Each // range consists of a high and low value which are typed, so the ranges // must be rebuilt and a new case constructed from them. IntegersSubset CaseRanges = I.getCaseValueEx(); IntegersSubsetToBB CaseBuilder; for (unsigned RI = 0, RE = CaseRanges.getNumItems(); RI < RE; ++RI) { CaseBuilder.add( IntItem::fromConstantInt(cast<ConstantInt>(convertConstant( CaseRanges.getItem(RI).getLow().toConstantInt()))), IntItem::fromConstantInt(cast<ConstantInt>(convertConstant( CaseRanges.getItem(RI).getHigh().toConstantInt())))); } IntegersSubset Case = CaseBuilder.getCase(); NewInst->addCase(Case, I.getCaseSuccessor()); } Switch->eraseFromParent(); } else { errs() << *Inst<<"\n"; llvm_unreachable("unhandled instruction"); } }