StructLayout::StructLayout(const StructType *ST, const TargetData &TD) { assert(!ST->isOpaque() && "Cannot get layout of opaque structs"); StructAlignment = 0; StructSize = 0; NumElements = ST->getNumElements(); // Loop over each of the elements, placing them in memory. for (unsigned i = 0, e = NumElements; i != e; ++i) { const Type *Ty = ST->getElementType(i); unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty); // Add padding if necessary to align the data element properly. if ((StructSize & (TyAlign-1)) != 0) StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign); // Keep track of maximum alignment constraint. StructAlignment = std::max(TyAlign, StructAlignment); MemberOffsets[i] = StructSize; StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item } // Empty structures have alignment of 1 byte. if (StructAlignment == 0) StructAlignment = 1; // Add padding to the end of the struct so that it could be put in an array // and all array elements would be aligned correctly. if ((StructSize & (StructAlignment-1)) != 0) StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment); }
void nest::TargetTable::add_target( const thread tid, const thread target_rank, const TargetData& target_data ) { const index lid = target_data.get_source_lid(); vector_util::grow( targets_[ tid ][ lid ] ); if ( target_data.is_primary() ) { const TargetDataFields& target_fields = target_data.target_data; targets_[ tid ][ lid ].push_back( Target( target_fields.get_tid(), target_rank, target_fields.get_syn_id(), target_fields.get_lcid() ) ); } else { const SecondaryTargetDataFields& secondary_fields = target_data.secondary_data; const size_t send_buffer_pos = secondary_fields.get_send_buffer_pos(); const synindex syn_id = secondary_fields.get_syn_id(); assert( syn_id < secondary_send_buffer_pos_[ tid ][ lid ].size() ); secondary_send_buffer_pos_[ tid ][ lid ][ syn_id ].push_back( send_buffer_pos ); } }
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // If we found more than 8 stores to merge or 64 bytes, use memset. if (TheStores.size() >= 8 || End-Start >= 64) return true; // Assume that the code generator is capable of merging pairs of stores // together if it wants to. if (TheStores.size() <= 2) return false; // If we have fewer than 8 stores, it can still be worthwhile to do this. // For example, merging 4 i8 stores into an i32 store is useful almost always. // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the // memset will be split into 2 32-bit stores anyway) and doing so can // pessimize the llvm optimizer. // // Since we don't have perfect knowledge here, make some assumptions: assume // the maximum GPR width is the same size as the pointer size and assume that // this width can be stored. If so, check to see whether we will end up // actually reducing the number of stores used. unsigned Bytes = unsigned(End-Start); unsigned NumPointerStores = Bytes/TD.getPointerSize(); // Assume the remaining bytes if any are done a byte at a time. unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 // etc. return TheStores.size() > NumPointerStores+NumByteStores; }
NewProjectDialog::NewProjectDialog(QWidget *parent) : QDialog(parent), ui(new Ui::NewProjectDialog) { ui->setupUi(this); /* Getting the default directory from the application settings */ QSettings settings; settings.beginGroup("NewProjectDialog"); ui->locationBox->setText(settings.value("defaultDir", QDir::home().absolutePath()) .toString()); settings.endGroup(); /* Populating the target box */ TargetData targets; for(int i = 0; i < targets.count(); i++) { ui->targetBox->insertItem(i, QIcon(), targets.name(i), targets.id(i)); } targetChange(0); /* Connecting the browse button and target box */ QObject::connect(ui->browseButton, SIGNAL(clicked()), this, SLOT(browse())); QObject::connect(ui->targetBox, SIGNAL(currentIndexChanged(int)), this, SLOT(targetChange(int))); }
// Write ViewPaneBlock keywords bool UChromaSession::writeViewPaneBlock(LineParser& parser, ViewPane* pane) { parser.writeLineF(" %s '%s'\n", UChromaSession::viewKeyword(UChromaSession::ViewPaneBlockKeyword), qPrintable(pane->name())); parser.writeLineF(" %s %s\n", UChromaSession::viewPaneKeyword(UChromaSession::AutoPositionTitlesKeyword), stringBool(pane->axes().autoPositionTitles())); for (int axis=0; axis < 3; ++axis) writeAxisBlock(parser, pane->axes(), axis); parser.writeLineF(" %s %i\n", UChromaSession::viewPaneKeyword(UChromaSession::BoundingBoxKeyword), pane->boundingBox()); parser.writeLineF(" %s %f\n", UChromaSession::viewPaneKeyword(UChromaSession::BoundingBoxPlaneYKeyword), pane->boundingBoxPlaneY()); parser.writeLineF(" %s %s\n", UChromaSession::viewPaneKeyword(UChromaSession::FlatLabelsKeyword), stringBool(pane->flatLabels())); parser.writeLineF(" %s %i %i %i %i\n", UChromaSession::viewPaneKeyword(UChromaSession::GeometryKeyword), pane->bottomEdge(), pane->leftEdge(), pane->width(), pane->height()); parser.writeLineF(" %s %f\n", UChromaSession::viewPaneKeyword(UChromaSession::LabelPointSizeKeyword), pane->labelPointSize()); parser.writeLineF(" %s %f\n", UChromaSession::viewPaneKeyword(UChromaSession::TitlePointSizeKeyword), pane->titlePointSize()); Matrix mat = pane->viewRotation(); Vec3<double> trans = pane->viewTranslation(); parser.writeLineF(" %s %f %f %f\n", UChromaSession::viewPaneKeyword(UChromaSession::RotationXKeyword), mat[0], mat[1], mat[2]); parser.writeLineF(" %s %f %f %f\n", UChromaSession::viewPaneKeyword(UChromaSession::RotationYKeyword), mat[4], mat[5], mat[6]); parser.writeLineF(" %s %f %f %f\n", UChromaSession::viewPaneKeyword(UChromaSession::RotationZKeyword), mat[8], mat[9], mat[10]); parser.writeLineF(" %s %f %f %f\n", UChromaSession::viewPaneKeyword(UChromaSession::TranslationKeyword), trans.x, trans.y, trans.z); parser.writeLineF(" %s %s\n", UChromaSession::viewPaneKeyword(UChromaSession::PerspectiveKeyword), stringBool(pane->hasPerspective())); parser.writeLineF(" %s '%s'\n", UChromaSession::viewPaneKeyword(UChromaSession::RoleKeyword), ViewPane::paneRole(pane->role())); for (TargetData* target = pane->collectionTargets(); target != NULL; target = target->next) { if (!Collection::objectValid(target->collection(), "collection in UChromaSession::writeViewPaneBlock")) continue; parser.writeLineF(" %s '%s'\n", UChromaSession::viewPaneKeyword(UChromaSession::RoleTargetCollectionKeyword), qPrintable(target->collection()->locator())); } for (RefListItem<ViewPane,bool>* ri = pane->paneTargets(); ri != NULL; ri = ri->next) parser.writeLineF(" %s '%s'\n", UChromaSession::viewPaneKeyword(UChromaSession::RoleTargetPaneKeyword), qPrintable(ri->item->name())); parser.writeLineF(" %s %s\n", UChromaSession::viewPaneKeyword(UChromaSession::UseBestFlatViewKeyword), stringBool(pane->axes().useBestFlatView())); parser.writeLineF(" %s '%s'\n", UChromaSession::viewPaneKeyword(UChromaSession::ViewTypeKeyword), ViewPane::viewType(pane->viewType())); parser.writeLineF(" %s\n", UChromaSession::viewPaneKeyword(UChromaSession::EndViewPaneKeyword)); return true; }
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, bool &VariableIdxFound, const TargetData &TD) { // Skip over the first indices. gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) /*skip along*/; // Compute the offset implied by the rest of the indices. int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (OpC == 0) return VariableIdxFound = true; if (OpC->isZero()) continue; // No offset. // Handle struct indices, which add their field offset to the pointer. if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } // Otherwise, we have a sequential type like an array or vector. Multiply // the index by the ElementSize. uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); Offset += Size*OpC->getSExtValue(); } return Offset; }
static unsigned getTypeSize(TargetData &TD, Type *type) { if (type->isFunctionTy()) /* it is not sized, weird */ return TD.getPointerSize(); if (!type->isSized()) return 100; /* FIXME */ if (StructType *ST = dyn_cast<StructType>(type)) return TD.getStructLayout(ST)->getSizeInBytes(); return TD.getTypeAllocSize(type); }
/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, int64_t &Offset, const TargetData &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { Offset = 0; return true; } // Otherwise, if this isn't a constant expr, bail out. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); if (!CE) return false; // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) if (CE->getOpcode() == Instruction::GetElementPtr) { // Cannot compute this if the element type of the pointer is missing size // info. if (!cast<PointerType>(CE->getOperand(0)->getType())->getElementType()->isSized()) return false; // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; // Otherwise, add any offset that our operands provide. gep_type_iterator GTI = gep_type_begin(CE); for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(i)); if (!CI) return false; // Index isn't a simple constant? if (CI->getZExtValue() == 0) continue; // Not adding anything. if (const StructType *ST = dyn_cast<StructType>(*GTI)) { // N = N + Offset Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); } else { const SequentialType *SQT = cast<SequentialType>(*GTI); Offset += TD.getTypeSize(SQT->getElementType())*CI->getSExtValue(); } } return true; } return false; }
static void ComputeStructureFieldIndices(const Type *Ty, unsigned Offset, std::vector<unsigned> &Idxs, const TargetData &TD) { if (Ty->isFirstClassType()) { assert(Offset == 0 && "Illegal structure index!"); return; } if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) { ComputeStructureFieldIndices(STy->getElementType(), Offset, Idxs, TD); } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { const StructLayout *SL = TD.getStructLayout(STy); std::vector<uint64_t>::const_iterator SI = std::upper_bound(SL->MemberOffsets.begin(), SL->MemberOffsets.end(), Offset); assert(SI != SL->MemberOffsets.begin() && "Offset not in structure type!"); --SI; assert(*SI <= Offset && "upper_bound didn't work"); assert((SI == SL->MemberOffsets.begin() || *(SI-1) < Offset) && (SI+1 == SL->MemberOffsets.end() || *(SI+1) > Offset) && "Upper bound didn't work!"); Offset -= *SI; // Skip over the offset to this structure field. unsigned Idx = SI - SL->MemberOffsets.begin(); assert(Idx < STy->getNumElements() && "Illegal structure index"); Idxs.push_back(Idx); ComputeStructureFieldIndices(STy->getElementType(Idx), Offset, Idxs, TD); } else { assert(0 && "Unknown type to index into!"); } }
/// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, unsigned Size, const TargetData &TD) { const Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { AccessTy = GV->getType()->getElementType(); } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { if (!AI->isArrayAllocation()) AccessTy = AI->getType()->getElementType(); else return false; } else if (const CallInst* CI = extractMallocCall(V)) { if (!isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1))) return (C->getZExtValue() < Size); return false; } else if (const Argument *A = dyn_cast<Argument>(V)) { if (A->hasByValAttr()) AccessTy = cast<PointerType>(A->getType())->getElementType(); else return false; } else { return false; } if (AccessTy->isSized()) return TD.getTypeAllocSize(AccessTy) < Size; return false; }
/// getPointeeAlignment - Compute the minimum alignment of the value pointed /// to by the given pointer. static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) if (CE->getOpcode() == Instruction::BitCast || (CE->getOpcode() == Instruction::GetElementPtr && cast<GEPOperator>(CE)->hasAllZeroIndices())) return getPointeeAlignment(CE->getOperand(0), TD); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) if (!GV->isDeclaration()) return TD.getPreferredAlignment(GV); if (PointerType *PT = dyn_cast<PointerType>(V->getType())) return TD.getABITypeAlignment(PT->getElementType()); return 0; }
/// getEntryAlignment - Return the alignment of each entry in the jump table. unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { // The alignment of a jump table entry is the alignment of int32 unless the // entry is just the address of a block, in which case it is the pointer // alignment. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: return TD.getPointerABIAlignment(); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: return TD.getABIIntegerTypeAlignment(32); case MachineJumpTableInfo::EK_Inline: return 1; } assert(0 && "Unknown jump table encoding!"); return ~0; }
Kleerer(ModulePass &modPass, Module &M, TargetData &TD, callgraph::Callgraph &CG) : modPass(modPass), M(M), TD(TD), CG(CG), C(M.getContext()), intPtrTy(TD.getIntPtrType(C)), done(false) { voidPtrType = TypeBuilder<void *, false>::get(C); intType = TypeBuilder<int, false>::get(C); uintType = TypeBuilder<unsigned, false>::get(C); }
// Peephole Malloc instructions: we take a look at the use chain of the // malloc instruction, and try to find out if the following conditions hold: // 1. The malloc is of the form: 'malloc [sbyte], uint <constant>' // 2. The only users of the malloc are cast & add instructions // 3. Of the cast instructions, there is only one destination pointer type // [RTy] where the size of the pointed to object is equal to the number // of bytes allocated. // // If these conditions hold, we convert the malloc to allocate an [RTy] // element. TODO: This comment is out of date WRT arrays // static bool MallocConvertibleToType(MallocInst *MI, const Type *Ty, ValueTypeCache &CTMap, const TargetData &TD) { if (!isa<PointerType>(Ty)) return false; // Malloc always returns pointers // Deal with the type to allocate, not the pointer type... Ty = cast<PointerType>(Ty)->getElementType(); if (!Ty->isSized()) return false; // Can only alloc something with a size // Analyze the number of bytes allocated... ExprType Expr = ClassifyExpr(MI->getArraySize()); // Get information about the base datatype being allocated, before & after int ReqTypeSize = TD.getTypeSize(Ty); if (ReqTypeSize == 0) return false; unsigned OldTypeSize = TD.getTypeSize(MI->getType()->getElementType()); // Must have a scale or offset to analyze it... if (!Expr.Offset && !Expr.Scale && OldTypeSize == 1) return false; // Get the offset and scale of the allocation... int64_t OffsetVal = Expr.Offset ? getConstantValue(Expr.Offset) : 0; int64_t ScaleVal = Expr.Scale ? getConstantValue(Expr.Scale) :(Expr.Var != 0); // The old type might not be of unit size, take old size into consideration // here... int64_t Offset = OffsetVal * OldTypeSize; int64_t Scale = ScaleVal * OldTypeSize; // In order to be successful, both the scale and the offset must be a multiple // of the requested data type's size. // if (Offset/ReqTypeSize*ReqTypeSize != Offset || Scale/ReqTypeSize*ReqTypeSize != Scale) return false; // Nope. return true; }
void NewProjectDialog::targetChange(int target) { TargetData targets; if(targets.fm(target)) { ui->fmsBox->setEnabled(true); ui->rfmsBox->setEnabled(true); } else { ui->fmsBox->setChecked(false); ui->rfmsBox->setChecked(false); ui->fmsBox->setEnabled(false); ui->rfmsBox->setEnabled(false); } if(targets.remoteDepth(target) == TargetData::None) { ui->rwpsBox->setChecked(false); ui->rsbsBox->setChecked(false); ui->rfmsBox->setChecked(false); ui->rsbsBox->setEnabled(false); ui->rwpsBox->setEnabled(false); ui->rfmsBox->setEnabled(false); } else { ui->rsbsBox->setEnabled(true); ui->rwpsBox->setEnabled(true); if(targets.fm(target)) ui->rfmsBox->setEnabled(true); } }
/// getEntrySize - Return the size of each entry in the jump table. unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const { // The size of a jump table entry is 4 bytes unless the entry is just the // address of a block, in which case it is the pointer size. switch (getEntryKind()) { case MachineJumpTableInfo::EK_BlockAddress: return TD.getPointerSize(); case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: case MachineJumpTableInfo::EK_Custom32: return 4; case MachineJumpTableInfo::EK_Inline: return 0; } assert(0 && "Unknown jump table encoding!"); return ~0; }
/// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, unsigned Size, const TargetData &TD) { const Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { AccessTy = GV->getType()->getElementType(); } else if (const AllocationInst *AI = dyn_cast<AllocationInst>(V)) { if (!AI->isArrayAllocation()) AccessTy = AI->getType()->getElementType(); else return false; } else if (const Argument *A = dyn_cast<Argument>(V)) { if (A->hasByValAttr()) AccessTy = cast<PointerType>(A->getType())->getElementType(); else return false; } else { return false; } if (AccessTy->isSized()) return TD.getTypePaddedSize(AccessTy) < Size; return false; }
/// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), true, CS.getInstruction(), CS.getInstruction()->getParent()); if (!DepInfo.isClobber()) return false; // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the // result. MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); if (MDep == 0 || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) return false; // Get the alignment of the byval. If it is greater than the memcpy, then we // can't do the substitution. If the call doesn't specify the alignment, then // it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) return false; // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. // memcpy(a <- b) // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. MemDepResult SourceDep = MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), false, CS.getInstruction(), MDep->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), "tmpcast", CS.getInstruction()); DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" << " " << *MDep << "\n" << " " << *CS.getInstruction() << "\n"); // Otherwise we're good! Update the byval argument. CS.setArgument(ArgNo, TmpCast); ++NumMemCpyInstr; return true; }
void SVMBlockSizeAccumulator::AddConstant(const TargetData &TD, const MachineConstantPoolEntry &CPE) { AddConstant(TD.getTypeAllocSize(CPE.getType()), CPE.getAlignment()); }
/// processStore - When GVN is scanning forward over instructions, we look for /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones /// (currently 4) it attempts to merge them together into a memcpy/memset. bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (SI->isVolatile()) return false; LLVMContext &Context = SI->getContext(); // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. Value *ByteVal = isBytewiseValue(SI->getOperand(0)); if (!ByteVal) return false; TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); Module *M = SI->getParent()->getParent()->getParent(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); Value *StartPtr = SI->getPointerOperand(); BasicBlock::iterator BI = SI; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { // If the call is readnone, ignore it, otherwise bail out. We don't even // allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (AA.getModRefBehavior(CallSite::get(BI)) == AliasAnalysis::DoesNotAccessMemory) continue; // TODO: If this is a memset, try to join it in. break; } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI)) break; // If this is a non-store instruction it is fine, ignore it. StoreInst *NextStore = dyn_cast<StoreInst>(BI); if (NextStore == 0) continue; // If this is a store, see if we can merge it in. if (NextStore->isVolatile()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return false; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addStore(0, SI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. bool MadeChange = false; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. We put // the memset right before the first instruction that isn't part of this // memset block. This ensure that the memset is dominated by any addressing // instruction needed by the start of the block. BasicBlock::iterator InsertPt = BI; // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { const Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } // Cast the start ptr to be i8* as memset requires. const PointerType* StartPTy = cast<PointerType>(StartPtr->getType()); const PointerType *i8Ptr = Type::getInt8PtrTy(Context, StartPTy->getAddressSpace()); if (StartPTy!= i8Ptr) StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); Value *Ops[] = { StartPtr, ByteVal, // Start, value // size ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), // align ConstantInt::get(Type::getInt32Ty(Context), Alignment), // volatile ConstantInt::get(Type::getInt1Ty(Context), 0), }; const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i]; dbgs() << "With: " << *C); C=C; // Don't invalidate the iterator BBI = BI; // Zap all the stores. for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) (*SI)->eraseFromParent(); ++NumMemSetInfer; MadeChange = true; } return MadeChange; }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); CallSite CS = CallSite::get(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); if (!cpyLength) return false; // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLength->getZExtValue() < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { User *UI = srcUseList.pop_back_val(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); else return false; } else if (UI != C && UI != cpy) { return false; } } // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTree>(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { if (cpySrc->getType() != cpyDest->getType()) cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) return false; // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); MD.removeInstruction(C); // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); ++NumMemCpyInstr; return true; }
// ExpressionConvertibleToType - Return true if it is possible bool llvm::ExpressionConvertibleToType(Value *V, const Type *Ty, ValueTypeCache &CTMap, const TargetData &TD) { // Expression type must be holdable in a register. if (!Ty->isFirstClassType()) return false; ValueTypeCache::iterator CTMI = CTMap.find(V); if (CTMI != CTMap.end()) return CTMI->second == Ty; // If it's a constant... all constants can be converted to a different // type. // if (isa<Constant>(V) && !isa<GlobalValue>(V)) return true; CTMap[V] = Ty; if (V->getType() == Ty) return true; // Expression already correct type! Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; // Otherwise, we can't convert! switch (I->getOpcode()) { case Instruction::Cast: // We can convert the expr if the cast destination type is losslessly // convertible to the requested type. if (!Ty->isLosslesslyConvertibleTo(I->getType())) return false; // We also do not allow conversion of a cast that casts from a ptr to array // of X to a *X. For example: cast [4 x %List *] * %val to %List * * // if (const PointerType *SPT = dyn_cast<PointerType>(I->getOperand(0)->getType())) if (const PointerType *DPT = dyn_cast<PointerType>(I->getType())) if (const ArrayType *AT = dyn_cast<ArrayType>(SPT->getElementType())) if (AT->getElementType() == DPT->getElementType()) return false; break; case Instruction::Add: case Instruction::Sub: if (!Ty->isInteger() && !Ty->isFloatingPoint()) return false; if (!ExpressionConvertibleToType(I->getOperand(0), Ty, CTMap, TD) || !ExpressionConvertibleToType(I->getOperand(1), Ty, CTMap, TD)) return false; break; case Instruction::Shr: if (!Ty->isInteger()) return false; if (Ty->isSigned() != V->getType()->isSigned()) return false; // FALL THROUGH case Instruction::Shl: if (!Ty->isInteger()) return false; if (!ExpressionConvertibleToType(I->getOperand(0), Ty, CTMap, TD)) return false; break; case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); if (!ExpressionConvertibleToType(LI->getPointerOperand(), PointerType::get(Ty), CTMap, TD)) return false; break; } case Instruction::PHI: { PHINode *PN = cast<PHINode>(I); // Be conservative if we find a giant PHI node. if (PN->getNumIncomingValues() > 32) return false; for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) if (!ExpressionConvertibleToType(PN->getIncomingValue(i), Ty, CTMap, TD)) return false; break; } case Instruction::Malloc: if (!MallocConvertibleToType(cast<MallocInst>(I), Ty, CTMap, TD)) return false; break; case Instruction::GetElementPtr: { // GetElementPtr's are directly convertible to a pointer type if they have // a number of zeros at the end. Because removing these values does not // change the logical offset of the GEP, it is okay and fair to remove them. // This can change this: // %t1 = getelementptr %Hosp * %hosp, ubyte 4, ubyte 0 ; <%List **> // %t2 = cast %List * * %t1 to %List * // into // %t2 = getelementptr %Hosp * %hosp, ubyte 4 ; <%List *> // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); const PointerType *PTy = dyn_cast<PointerType>(Ty); if (!PTy) return false; // GEP must always return a pointer... const Type *PVTy = PTy->getElementType(); // Check to see if there are zero elements that we can remove from the // index array. If there are, check to see if removing them causes us to // get to the right type... // std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); const Type *BaseType = GEP->getPointerOperand()->getType(); const Type *ElTy = 0; while (!Indices.empty() && Indices.back() == Constant::getNullValue(Indices.back()->getType())){ Indices.pop_back(); ElTy = GetElementPtrInst::getIndexedType(BaseType, Indices, true); if (ElTy == PVTy) break; // Found a match!! ElTy = 0; } if (ElTy) break; // Found a number of zeros we can strip off! // Otherwise, we can convert a GEP from one form to the other iff the // current gep is of the form 'getelementptr sbyte*, long N // and we could convert this to an appropriate GEP for the new type. // if (GEP->getNumOperands() == 2 && GEP->getType() == PointerType::get(Type::SByteTy)) { // Do not Check to see if our incoming pointer can be converted // to be a ptr to an array of the right type... because in more cases than // not, it is simply not analyzable because of pointer/array // discrepancies. To fix this, we will insert a cast before the GEP. // // Check to see if 'N' is an expression that can be converted to // the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(PTy, I->getOperand(1), Indices, TD); if (ElTy == PVTy) { if (!ExpressionConvertibleToType(I->getOperand(0), PointerType::get(ElTy), CTMap, TD)) return false; // Can't continue, ExConToTy might have polluted set! break; } } // Otherwise, it could be that we have something like this: // getelementptr [[sbyte] *] * %reg115, long %reg138 ; [sbyte]** // and want to convert it into something like this: // getelemenptr [[int] *] * %reg115, long %reg138 ; [int]** // if (GEP->getNumOperands() == 2 && PTy->getElementType()->isSized() && TD.getTypeSize(PTy->getElementType()) == TD.getTypeSize(GEP->getType()->getElementType())) { const PointerType *NewSrcTy = PointerType::get(PVTy); if (!ExpressionConvertibleToType(I->getOperand(0), NewSrcTy, CTMap, TD)) return false; break; } return false; // No match, maybe next time. } case Instruction::Call: { if (isa<Function>(I->getOperand(0))) return false; // Don't even try to change direct calls. // If this is a function pointer, we can convert the return type if we can // convert the source function pointer. // const PointerType *PT = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FT = cast<FunctionType>(PT->getElementType()); std::vector<const Type *> ArgTys(FT->param_begin(), FT->param_end()); const FunctionType *NewTy = FunctionType::get(Ty, ArgTys, FT->isVarArg()); if (!ExpressionConvertibleToType(I->getOperand(0), PointerType::get(NewTy), CTMap, TD)) return false; break; } default: return false; } // Expressions are only convertible if all of the users of the expression can // have this value converted. This makes use of the map to avoid infinite // recursion. // for (Value::use_iterator It = I->use_begin(), E = I->use_end(); It != E; ++It) if (!OperandConvertibleToType(*It, I, Ty, CTMap, TD)) return false; return true; }
/// isSafeToPromoteArgument - As you might guess from the name of this method, /// it checks to see if it is both safe and useful to promote the argument. /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { typedef std::set<IndicesVector> GEPIndicesSet; // Quick exit for unused arguments if (Arg->use_empty()) return true; // We can only promote this argument if all of the uses are loads, or are GEP // instructions (with constant indices) that are subsequently loaded. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load // would have happened in the callee anyway (ie, there is a load in the entry // block) or the pointer passed in at every call site is guaranteed to be // valid. // In the former case, invalid loads can happen, but would have happened // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. // // This set will contain all sets of indices that are loaded in the entry // block, and thus are safe to unconditionally load in the caller. GEPIndicesSet SafeToUnconditionallyLoad; // This set contains all the sets of indices that we are planning to promote. // This makes it possible to limit the number of arguments added. GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. if (isByVal || AllCalleesPassInValidPointerForArgument(Arg)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as // safe. BasicBlock *EntryBlock = Arg->getParent()->begin(); // Declare this here so we can reuse it IndicesVector Indices; for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end(); I != E; ++I) if (LoadInst *LI = dyn_cast<LoadInst>(I)) { Value *V = LI->getPointerOperand(); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { V = GEP->getPointerOperand(); if (V == Arg) { // This load actually loads (part of) Arg? Check the indices then. Indices.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) if (ConstantInt *CI = dyn_cast<ConstantInt>(*II)) Indices.push_back(CI->getSExtValue()); else // We found a non-constant GEP index for this argument? Bail out // right away, can't promote this argument at all. return false; // Indices checked out, mark them as safe MarkIndicesSafe(Indices, SafeToUnconditionallyLoad); Indices.clear(); } } else if (V == Arg) { // Direct loads are equivalent to a GEP with a single 0 index. MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); } } // Now, iterate all uses of the argument to see if there are any uses that are // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. SmallVector<LoadInst*, 16> Loads; IndicesVector Operands; for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); UI != E; ++UI) { User *U = *UI; Operands.clear(); if (LoadInst *LI = dyn_cast<LoadInst>(U)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. getAnalysis<AliasAnalysis>().deleteValue(GEP); GEP->eraseFromParent(); // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? return isSafeToPromoteArgument(Arg, isByVal); } // Ensure that all of the indices are constants. for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e; ++i) if (ConstantInt *C = dyn_cast<ConstantInt>(*i)) Operands.push_back(C->getSExtValue()); else return false; // Not a constant operand GEP! // Ensure that the only users of the GEP are load instructions. for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end(); UI != E; ++UI) if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); } else { // Other uses than load? return false; } } else { return false; // Not a load or a GEP. } // Now, see if it is safe to promote this load / loads of this GEP. Loading // is safe if Operands, or a prefix of Operands, is marked as safe. if (!PrefixIn(Operands, SafeToUnconditionallyLoad)) return false; // See if we are already promoting a load with these indices. If not, check // to make sure that we aren't promoting too many elements. If so, nothing // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { DEBUG(dbgs() << "argpromotion not promoting argument '" << Arg->getName() << "' because it would require adding more " << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number // of elements of the aggregate. return false; } ToPromote.insert(Operands); } } if (Loads.empty()) return true; // No users, this is a dead argument. // Okay, now we know that the argument is only used by load instructions and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of // the function to each of the load instructions. // Because there could be several/many load instructions, remember which // blocks we know to be transparent to the load. SmallPtrSet<BasicBlock*, 16> TranspBlocks; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to // the load itself. LoadInst *Load = Loads[i]; BasicBlock *BB = Load->getParent(); const PointerType *LoadTy = cast<PointerType>(Load->getPointerOperand()->getType()); unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) return false; // Pointer is invalidated! // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *P = *PI; for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> > I = idf_ext_begin(P, TranspBlocks), E = idf_ext_end(P, TranspBlocks); I != E; ++I) if (AA.canBasicBlockModify(**I, Arg, LoadSize)) return false; } } // If the path from the entry of the function to each load is free of // instructions that potentially invalidate the load, we can make the // transformation! return true; }
// OperandConvertibleToType - Return true if it is possible to convert operand // V of User (instruction) U to the specified type. This is true iff it is // possible to change the specified instruction to accept this. CTMap is a map // of converted types, so that circular definitions will see the future type of // the expression, not the static current type. // static bool OperandConvertibleToType(User *U, Value *V, const Type *Ty, ValueTypeCache &CTMap, const TargetData &TD) { // if (V->getType() == Ty) return true; // Operand already the right type? // Expression type must be holdable in a register. if (!Ty->isFirstClassType()) return false; Instruction *I = dyn_cast<Instruction>(U); if (I == 0) return false; // We can't convert! switch (I->getOpcode()) { case Instruction::Cast: assert(I->getOperand(0) == V); // We can convert the expr if the cast destination type is losslessly // convertible to the requested type. // Also, do not change a cast that is a noop cast. For all intents and // purposes it should be eliminated. if (!Ty->isLosslesslyConvertibleTo(I->getOperand(0)->getType()) || I->getType() == I->getOperand(0)->getType()) return false; // Do not allow a 'cast ushort %V to uint' to have it's first operand be // converted to a 'short' type. Doing so changes the way sign promotion // happens, and breaks things. Only allow the cast to take place if the // signedness doesn't change... or if the current cast is not a lossy // conversion. // if (!I->getType()->isLosslesslyConvertibleTo(I->getOperand(0)->getType()) && I->getOperand(0)->getType()->isSigned() != Ty->isSigned()) return false; // We also do not allow conversion of a cast that casts from a ptr to array // of X to a *X. For example: cast [4 x %List *] * %val to %List * * // if (const PointerType *SPT = dyn_cast<PointerType>(I->getOperand(0)->getType())) if (const PointerType *DPT = dyn_cast<PointerType>(I->getType())) if (const ArrayType *AT = dyn_cast<ArrayType>(SPT->getElementType())) if (AT->getElementType() == DPT->getElementType()) return false; return true; case Instruction::Add: if (isa<PointerType>(Ty)) { Value *IndexVal = I->getOperand(V == I->getOperand(0) ? 1 : 0); std::vector<Value*> Indices; if (const Type *ETy = ConvertibleToGEP(Ty, IndexVal, Indices, TD)) { const Type *RetTy = PointerType::get(ETy); // Only successful if we can convert this type to the required type if (ValueConvertibleToType(I, RetTy, CTMap, TD)) { CTMap[I] = RetTy; return true; } // We have to return failure here because ValueConvertibleToType could // have polluted our map return false; } } // FALLTHROUGH case Instruction::Sub: { if (!Ty->isInteger() && !Ty->isFloatingPoint()) return false; Value *OtherOp = I->getOperand((V == I->getOperand(0)) ? 1 : 0); return ValueConvertibleToType(I, Ty, CTMap, TD) && ExpressionConvertibleToType(OtherOp, Ty, CTMap, TD); } case Instruction::SetEQ: case Instruction::SetNE: { Value *OtherOp = I->getOperand((V == I->getOperand(0)) ? 1 : 0); return ExpressionConvertibleToType(OtherOp, Ty, CTMap, TD); } case Instruction::Shr: if (Ty->isSigned() != V->getType()->isSigned()) return false; // FALL THROUGH case Instruction::Shl: if (I->getOperand(1) == V) return false; // Cannot change shift amount type if (!Ty->isInteger()) return false; return ValueConvertibleToType(I, Ty, CTMap, TD); case Instruction::Free: assert(I->getOperand(0) == V); return isa<PointerType>(Ty); // Free can free any pointer type! case Instruction::Load: // Cannot convert the types of any subscripts... if (I->getOperand(0) != V) return false; if (const PointerType *PT = dyn_cast<PointerType>(Ty)) { LoadInst *LI = cast<LoadInst>(I); const Type *LoadedTy = PT->getElementType(); // They could be loading the first element of a composite type... if (const CompositeType *CT = dyn_cast<CompositeType>(LoadedTy)) { unsigned Offset = 0; // No offset, get first leaf. std::vector<Value*> Indices; // Discarded... LoadedTy = getStructOffsetType(CT, Offset, Indices, TD, false); assert(Offset == 0 && "Offset changed from zero???"); } if (!LoadedTy->isFirstClassType()) return false; if (TD.getTypeSize(LoadedTy) != TD.getTypeSize(LI->getType())) return false; return ValueConvertibleToType(LI, LoadedTy, CTMap, TD); } return false; case Instruction::Store: { StoreInst *SI = cast<StoreInst>(I); if (V == I->getOperand(0)) { ValueTypeCache::iterator CTMI = CTMap.find(I->getOperand(1)); if (CTMI != CTMap.end()) { // Operand #1 is in the table already? // If so, check to see if it's Ty*, or, more importantly, if it is a // pointer to a structure where the first element is a Ty... this code // is necessary because we might be trying to change the source and // destination type of the store (they might be related) and the dest // pointer type might be a pointer to structure. Below we allow pointer // to structures where the 0th element is compatible with the value, // now we have to support the symmetrical part of this. // const Type *ElTy = cast<PointerType>(CTMI->second)->getElementType(); // Already a pointer to what we want? Trivially accept... if (ElTy == Ty) return true; // Tricky case now, if the destination is a pointer to structure, // obviously the source is not allowed to be a structure (cannot copy // a whole structure at a time), so the level raiser must be trying to // store into the first field. Check for this and allow it now: // if (const StructType *SElTy = dyn_cast<StructType>(ElTy)) { unsigned Offset = 0; std::vector<Value*> Indices; ElTy = getStructOffsetType(ElTy, Offset, Indices, TD, false); assert(Offset == 0 && "Offset changed!"); if (ElTy == 0) // Element at offset zero in struct doesn't exist! return false; // Can only happen for {}* if (ElTy == Ty) // Looks like the 0th element of structure is return true; // compatible! Accept now! // Otherwise we know that we can't work, so just stop trying now. return false; } } // Can convert the store if we can convert the pointer operand to match // the new value type... return ExpressionConvertibleToType(I->getOperand(1), PointerType::get(Ty), CTMap, TD); } else if (const PointerType *PT = dyn_cast<PointerType>(Ty)) { const Type *ElTy = PT->getElementType(); assert(V == I->getOperand(1)); if (isa<StructType>(ElTy)) { // We can change the destination pointer if we can store our first // argument into the first element of the structure... // unsigned Offset = 0; std::vector<Value*> Indices; ElTy = getStructOffsetType(ElTy, Offset, Indices, TD, false); assert(Offset == 0 && "Offset changed!"); if (ElTy == 0) // Element at offset zero in struct doesn't exist! return false; // Can only happen for {}* } // Must move the same amount of data... if (!ElTy->isSized() || TD.getTypeSize(ElTy) != TD.getTypeSize(I->getOperand(0)->getType())) return false; // Can convert store if the incoming value is convertible and if the // result will preserve semantics... const Type *Op0Ty = I->getOperand(0)->getType(); if (!(Op0Ty->isIntegral() ^ ElTy->isIntegral()) && !(Op0Ty->isFloatingPoint() ^ ElTy->isFloatingPoint())) return ExpressionConvertibleToType(I->getOperand(0), ElTy, CTMap, TD); } return false; } case Instruction::GetElementPtr: if (V != I->getOperand(0) || !isa<PointerType>(Ty)) return false; // If we have a two operand form of getelementptr, this is really little // more than a simple addition. As with addition, check to see if the // getelementptr instruction can be changed to index into the new type. // if (I->getNumOperands() == 2) { const Type *OldElTy = cast<PointerType>(I->getType())->getElementType(); unsigned DataSize = TD.getTypeSize(OldElTy); Value *Index = I->getOperand(1); Instruction *TempScale = 0; // If the old data element is not unit sized, we have to create a scale // instruction so that ConvertibleToGEP will know the REAL amount we are // indexing by. Note that this is never inserted into the instruction // stream, so we have to delete it when we're done. // if (DataSize != 1) { Value *CST; if (Index->getType()->isSigned()) CST = ConstantSInt::get(Index->getType(), DataSize); else CST = ConstantUInt::get(Index->getType(), DataSize); TempScale = BinaryOperator::create(Instruction::Mul, Index, CST); Index = TempScale; } // Check to see if the second argument is an expression that can // be converted to the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(Ty, Index, Indices, TD); delete TempScale; // Free our temporary multiply if we made it if (ElTy == 0) return false; // Cannot make conversion... return ValueConvertibleToType(I, PointerType::get(ElTy), CTMap, TD); } return false; case Instruction::PHI: { PHINode *PN = cast<PHINode>(I); // Be conservative if we find a giant PHI node. if (PN->getNumIncomingValues() > 32) return false; for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) if (!ExpressionConvertibleToType(PN->getIncomingValue(i), Ty, CTMap, TD)) return false; return ValueConvertibleToType(PN, Ty, CTMap, TD); } case Instruction::Call: { User::op_iterator OI = find(I->op_begin(), I->op_end(), V); assert (OI != I->op_end() && "Not using value!"); unsigned OpNum = OI - I->op_begin(); // Are we trying to change the function pointer value to a new type? if (OpNum == 0) { const PointerType *PTy = dyn_cast<PointerType>(Ty); if (PTy == 0) return false; // Can't convert to a non-pointer type... const FunctionType *FTy = dyn_cast<FunctionType>(PTy->getElementType()); if (FTy == 0) return false; // Can't convert to a non ptr to function... // Do not allow converting to a call where all of the operands are ...'s if (FTy->getNumParams() == 0 && FTy->isVarArg()) return false; // Do not permit this conversion! // Perform sanity checks to make sure that new function type has the // correct number of arguments... // unsigned NumArgs = I->getNumOperands()-1; // Don't include function ptr // Cannot convert to a type that requires more fixed arguments than // the call provides... // if (NumArgs < FTy->getNumParams()) return false; // Unless this is a vararg function type, we cannot provide more arguments // than are desired... // if (!FTy->isVarArg() && NumArgs > FTy->getNumParams()) return false; // Okay, at this point, we know that the call and the function type match // number of arguments. Now we see if we can convert the arguments // themselves. Note that we do not require operands to be convertible, // we can insert casts if they are convertible but not compatible. The // reason for this is that we prefer to have resolved functions but casted // arguments if possible. // for (unsigned i = 0, NA = FTy->getNumParams(); i < NA; ++i) if (!FTy->getParamType(i)->isLosslesslyConvertibleTo(I->getOperand(i+1)->getType())) return false; // Operands must have compatible types! // Okay, at this point, we know that all of the arguments can be // converted. We succeed if we can change the return type if // necessary... // return ValueConvertibleToType(I, FTy->getReturnType(), CTMap, TD); } const PointerType *MPtr = cast<PointerType>(I->getOperand(0)->getType()); const FunctionType *FTy = cast<FunctionType>(MPtr->getElementType()); if (!FTy->isVarArg()) return false; if ((OpNum-1) < FTy->getNumParams()) return false; // It's not in the varargs section... // If we get this far, we know the value is in the varargs section of the // function! We can convert if we don't reinterpret the value... // return Ty->isLosslesslyConvertibleTo(V->getType()); } } return false; }
bool nest::SourceTable::get_next_target_data( const thread tid, const thread rank_start, const thread rank_end, thread& source_rank, TargetData& next_target_data ) { SourceTablePosition& current_position = current_positions_[ tid ]; // we stay in this loop either until we can return a valid // TargetData object or we have reached the end of the sources table while ( true ) { current_position.wrap_position( sources_ ); if ( current_position.is_at_end() ) { return false; // reached the end of the sources table } // the current position contains an entry, so we retrieve it const Source& const_current_source = sources_[ current_position.tid ][ current_position .syn_id ][ current_position.lcid ]; if ( const_current_source.is_processed() or const_current_source.is_disabled() ) { // looks like we've processed this already, let's continue --current_position.lcid; continue; } source_rank = kernel().mpi_manager.get_process_id_of_gid( const_current_source.get_gid() ); // determine whether this thread is responsible for this part of // the MPI buffer; if not we just continue with the next iteration // of the loop if ( source_rank < rank_start or source_rank >= rank_end ) { --current_position.lcid; continue; } Source& current_source = sources_[ current_position.tid ][ current_position .syn_id ][ current_position.lcid ]; // we have found a valid entry, so mark it as processed current_source.set_processed( true ); // we need to set a marker stating whether the entry following this // entry, if existent, has the same source; start by assuming it // has a different source, only change if necessary kernel().connection_manager.set_has_source_subsequent_targets( current_position.tid, current_position.syn_id, current_position.lcid, false ); if ( ( current_position.lcid + 1 < static_cast< long >( sources_[ current_position.tid ][ current_position.syn_id ] .size() ) and sources_[ current_position.tid ][ current_position.syn_id ] [ current_position.lcid + 1 ].get_gid() == current_source.get_gid() ) ) { kernel().connection_manager.set_has_source_subsequent_targets( current_position.tid, current_position.syn_id, current_position.lcid, true ); } // decrease the position without returning a TargetData if the // entry preceding this entry has the same source, but only if // the preceding entry was not processed yet if ( ( current_position.lcid - 1 >= 0 ) and ( sources_[ current_position.tid ][ current_position.syn_id ] [ current_position.lcid - 1 ].get_gid() == current_source.get_gid() ) and ( not sources_[ current_position.tid ][ current_position.syn_id ] [ current_position.lcid - 1 ].is_processed() ) ) { --current_position.lcid; continue; } // otherwise we return a valid TargetData else { // set values of next_target_data next_target_data.set_source_lid( kernel().vp_manager.gid_to_lid( current_source.get_gid() ) ); next_target_data.set_source_tid( kernel().vp_manager.vp_to_thread( kernel().vp_manager.suggest_vp_for_gid( current_source.get_gid() ) ) ); next_target_data.reset_marker(); if ( current_source.is_primary() ) { next_target_data.set_is_primary( true ); // we store the thread index of the source table, not our own tid! TargetDataFields& target_fields = next_target_data.target_data; target_fields.set_tid( current_position.tid ); target_fields.set_syn_id( current_position.syn_id ); target_fields.set_lcid( current_position.lcid ); } else { next_target_data.set_is_primary( false ); const size_t recv_buffer_pos = kernel().connection_manager.get_secondary_recv_buffer_position( current_position.tid, current_position.syn_id, current_position.lcid ); // convert receive buffer position to send buffer position // according to buffer layout of MPIAlltoall const size_t send_buffer_pos = kernel() .mpi_manager.recv_buffer_pos_to_send_buffer_pos_secondary_events( recv_buffer_pos, source_rank ); SecondaryTargetDataFields& secondary_fields = next_target_data.secondary_data; secondary_fields.set_send_buffer_pos( send_buffer_pos ); secondary_fields.set_syn_id( current_position.syn_id ); } --current_position.lcid; return true; // found a valid entry } } }
static void ConvertOperandToType(User *U, Value *OldVal, Value *NewVal, ValueMapCache &VMC, const TargetData &TD) { if (isa<ValueHandle>(U)) return; // Valuehandles don't let go of operands... if (VMC.OperandsMapped.count(U)) return; VMC.OperandsMapped.insert(U); ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(U); if (VMCI != VMC.ExprMap.end()) return; Instruction *I = cast<Instruction>(U); // Only Instructions convertible BasicBlock *BB = I->getParent(); assert(BB != 0 && "Instruction not embedded in basic block!"); std::string Name = I->getName(); I->setName(""); Instruction *Res; // Result of conversion //std::cerr << endl << endl << "Type:\t" << Ty << "\nInst: " << I // << "BB Before: " << BB << endl; // Prevent I from being removed... ValueHandle IHandle(VMC, I); const Type *NewTy = NewVal->getType(); Constant *Dummy = (NewTy != Type::VoidTy) ? Constant::getNullValue(NewTy) : 0; switch (I->getOpcode()) { case Instruction::Cast: if (VMC.NewCasts.count(ValueHandle(VMC, I))) { // This cast has already had it's value converted, causing a new cast to // be created. We don't want to create YET ANOTHER cast instruction // representing the original one, so just modify the operand of this cast // instruction, which we know is newly created. I->setOperand(0, NewVal); I->setName(Name); // give I its name back return; } else { Res = new CastInst(NewVal, I->getType(), Name); } break; case Instruction::Add: if (isa<PointerType>(NewTy)) { Value *IndexVal = I->getOperand(OldVal == I->getOperand(0) ? 1 : 0); std::vector<Value*> Indices; BasicBlock::iterator It = I; if (const Type *ETy = ConvertibleToGEP(NewTy, IndexVal, Indices, TD,&It)){ // If successful, convert the add to a GEP //const Type *RetTy = PointerType::get(ETy); // First operand is actually the given pointer... Res = new GetElementPtrInst(NewVal, Indices, Name); assert(cast<PointerType>(Res->getType())->getElementType() == ETy && "ConvertibleToGEP broken!"); break; } } // FALLTHROUGH case Instruction::Sub: case Instruction::SetEQ: case Instruction::SetNE: { Res = BinaryOperator::create(cast<BinaryOperator>(I)->getOpcode(), Dummy, Dummy, Name); VMC.ExprMap[I] = Res; // Add node to expression eagerly unsigned OtherIdx = (OldVal == I->getOperand(0)) ? 1 : 0; Value *OtherOp = I->getOperand(OtherIdx); Res->setOperand(!OtherIdx, NewVal); Value *NewOther = ConvertExpressionToType(OtherOp, NewTy, VMC, TD); Res->setOperand(OtherIdx, NewOther); break; } case Instruction::Shl: case Instruction::Shr: assert(I->getOperand(0) == OldVal); Res = new ShiftInst(cast<ShiftInst>(I)->getOpcode(), NewVal, I->getOperand(1), Name); break; case Instruction::Free: // Free can free any pointer type! assert(I->getOperand(0) == OldVal); Res = new FreeInst(NewVal); break; case Instruction::Load: { assert(I->getOperand(0) == OldVal && isa<PointerType>(NewVal->getType())); const Type *LoadedTy = cast<PointerType>(NewVal->getType())->getElementType(); Value *Src = NewVal; if (const CompositeType *CT = dyn_cast<CompositeType>(LoadedTy)) { std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; // No offset, get first leaf. LoadedTy = getStructOffsetType(CT, Offset, Indices, TD, false); assert(LoadedTy->isFirstClassType()); if (Indices.size() != 1) { // Do not generate load X, 0 // Insert the GEP instruction before this load. Src = new GetElementPtrInst(Src, Indices, Name+".idx", I); } } Res = new LoadInst(Src, Name); assert(Res->getType()->isFirstClassType() && "Load of structure or array!"); break; } case Instruction::Store: { if (I->getOperand(0) == OldVal) { // Replace the source value // Check to see if operand #1 has already been converted... ValueMapCache::ExprMapTy::iterator VMCI = VMC.ExprMap.find(I->getOperand(1)); if (VMCI != VMC.ExprMap.end()) { // Comments describing this stuff are in the OperandConvertibleToType // switch statement for Store... // const Type *ElTy = cast<PointerType>(VMCI->second->getType())->getElementType(); Value *SrcPtr = VMCI->second; if (ElTy != NewTy) { // We check that this is a struct in the initial scan... const StructType *SElTy = cast<StructType>(ElTy); std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; const Type *Ty = getStructOffsetType(ElTy, Offset, Indices, TD,false); assert(Offset == 0 && "Offset changed!"); assert(NewTy == Ty && "Did not convert to correct type!"); // Insert the GEP instruction before this store. SrcPtr = new GetElementPtrInst(SrcPtr, Indices, SrcPtr->getName()+".idx", I); } Res = new StoreInst(NewVal, SrcPtr); VMC.ExprMap[I] = Res; } else { // Otherwise, we haven't converted Operand #1 over yet... const PointerType *NewPT = PointerType::get(NewTy); Res = new StoreInst(NewVal, Constant::getNullValue(NewPT)); VMC.ExprMap[I] = Res; Res->setOperand(1, ConvertExpressionToType(I->getOperand(1), NewPT, VMC, TD)); } } else { // Replace the source pointer const Type *ValTy = cast<PointerType>(NewTy)->getElementType(); Value *SrcPtr = NewVal; if (isa<StructType>(ValTy)) { std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::UIntTy)); unsigned Offset = 0; ValTy = getStructOffsetType(ValTy, Offset, Indices, TD, false); assert(Offset == 0 && ValTy); // Insert the GEP instruction before this store. SrcPtr = new GetElementPtrInst(SrcPtr, Indices, SrcPtr->getName()+".idx", I); } Res = new StoreInst(Constant::getNullValue(ValTy), SrcPtr); VMC.ExprMap[I] = Res; Res->setOperand(0, ConvertExpressionToType(I->getOperand(0), ValTy, VMC, TD)); } break; } case Instruction::GetElementPtr: { // Convert a one index getelementptr into just about anything that is // desired. // BasicBlock::iterator It = I; const Type *OldElTy = cast<PointerType>(I->getType())->getElementType(); unsigned DataSize = TD.getTypeSize(OldElTy); Value *Index = I->getOperand(1); if (DataSize != 1) { // Insert a multiply of the old element type is not a unit size... Value *CST; if (Index->getType()->isSigned()) CST = ConstantSInt::get(Index->getType(), DataSize); else CST = ConstantUInt::get(Index->getType(), DataSize); Index = BinaryOperator::create(Instruction::Mul, Index, CST, "scale", It); } // Perform the conversion now... // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewVal->getType(),Index,Indices,TD,&It); assert(ElTy != 0 && "GEP Conversion Failure!"); Res = new GetElementPtrInst(NewVal, Indices, Name); assert(Res->getType() == PointerType::get(ElTy) && "ConvertibleToGet failed!"); } #if 0 if (I->getType() == PointerType::get(Type::SByteTy)) { // Convert a getelementptr sbyte * %reg111, uint 16 freely back to // anything that is a pointer type... // BasicBlock::iterator It = I; // Check to see if the second argument is an expression that can // be converted to the appropriate size... if so, allow it. // std::vector<Value*> Indices; const Type *ElTy = ConvertibleToGEP(NewVal->getType(), I->getOperand(1), Indices, TD, &It); assert(ElTy != 0 && "GEP Conversion Failure!"); Res = new GetElementPtrInst(NewVal, Indices, Name); } else { // Convert a getelementptr ulong * %reg123, uint %N // to getelementptr long * %reg123, uint %N // ... where the type must simply stay the same size... // GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); std::vector<Value*> Indices(GEP->idx_begin(), GEP->idx_end()); Res = new GetElementPtrInst(NewVal, Indices, Name); } #endif break; case Instruction::PHI: { PHINode *OldPN = cast<PHINode>(I); PHINode *NewPN = new PHINode(NewTy, Name); VMC.ExprMap[I] = NewPN; while (OldPN->getNumOperands()) { BasicBlock *BB = OldPN->getIncomingBlock(0); Value *OldVal = OldPN->getIncomingValue(0); ValueHandle OldValHandle(VMC, OldVal); OldPN->removeIncomingValue(BB, false); Value *V = ConvertExpressionToType(OldVal, NewTy, VMC, TD); NewPN->addIncoming(V, BB); } Res = NewPN; break; } case Instruction::Call: { Value *Meth = I->getOperand(0); std::vector<Value*> Params(I->op_begin()+1, I->op_end()); if (Meth == OldVal) { // Changing the function pointer? const PointerType *NewPTy = cast<PointerType>(NewVal->getType()); const FunctionType *NewTy = cast<FunctionType>(NewPTy->getElementType()); if (NewTy->getReturnType() == Type::VoidTy) Name = ""; // Make sure not to name a void call! // Get an iterator to the call instruction so that we can insert casts for // operands if need be. Note that we do not require operands to be // convertible, we can insert casts if they are convertible but not // compatible. The reason for this is that we prefer to have resolved // functions but casted arguments if possible. // BasicBlock::iterator It = I; // Convert over all of the call operands to their new types... but only // convert over the part that is not in the vararg section of the call. // for (unsigned i = 0; i != NewTy->getNumParams(); ++i) if (Params[i]->getType() != NewTy->getParamType(i)) { // Create a cast to convert it to the right type, we know that this // is a lossless cast... // Params[i] = new CastInst(Params[i], NewTy->getParamType(i), "callarg.cast." + Params[i]->getName(), It); } Meth = NewVal; // Update call destination to new value } else { // Changing an argument, must be in vararg area std::vector<Value*>::iterator OI = find(Params.begin(), Params.end(), OldVal); assert (OI != Params.end() && "Not using value!"); *OI = NewVal; } Res = new CallInst(Meth, Params, Name); break; } default: assert(0 && "Expression convertible, but don't know how to convert?"); return; } // If the instruction was newly created, insert it into the instruction // stream. // BasicBlock::iterator It = I; assert(It != BB->end() && "Instruction not in own basic block??"); BB->getInstList().insert(It, Res); // Keep It pointing to old instruction DEBUG(std::cerr << "COT CREATED: " << (void*)Res << " " << *Res << "In: " << (void*)I << " " << *I << "Out: " << (void*)Res << " " << *Res); // Add the instruction to the expression map VMC.ExprMap[I] = Res; if (I->getType() != Res->getType()) ConvertValueToNewType(I, Res, VMC, TD); else { bool FromStart = true; Value::use_iterator UI; while (1) { if (FromStart) UI = I->use_begin(); if (UI == I->use_end()) break; if (isa<ValueHandle>(*UI)) { ++UI; FromStart = false; } else { User *U = *UI; if (!FromStart) --UI; U->replaceUsesOfWith(I, Res); if (!FromStart) ++UI; } } } }