C++ (Cpp) VectorType::getVectorElementType Exemples

Langage de programmation: C++ (Cpp)

Class/Type: VectorType

Méthode/Fonction: getVectorElementType

Exemples au hotexamples.com: 4

C++ (Cpp) VectorType::getVectorElementType - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de VectorType::getVectorElementType extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

size(30)

resize(26)

getNumElements(25)

getElementType(20)

dimension(14)

dot(11)

end(11)

begin(10)

push_back(10)

createEvenlyPartitionedSpace(7)

getPointerTo(6)

array(6)

data(6)

getVectorNumElements(4)

getVectorElementType(4)

adjoint(4)

transpose(4)

rows(4)

handle(3)

empty(3)

length(3)

norm(3)

normalize(3)

start(3)

X(3)

cols(3)

Y(3)

clear(3)

vectorLength(2)

Normalize(2)

getBitWidth(2)

stride(2)

extent(2)

internal_size(2)

real(2)

getScalarType(2)

dimension_0(2)

coeff(2)

maxCoeff(2)

segment(2)

minCoeff(2)

createSpace(2)

tail(1)

rank(1)

x(1)

ptr_on_device(1)

setK(1)

squared_magnitude(1)

ready(1)

setJ(1)

Méthodes fréquemment utilisées

size (30)

resize (26)

getNumElements (25)

getElementType (20)

dimension (14)

dot (11)

end (11)

begin (10)

push_back (10)

createEvenlyPartitionedSpace (7)

Méthodes fréquemment utilisées

getPointerTo (6)

array (6)

data (6)

getVectorNumElements (4)

getVectorElementType (4)

adjoint (4)

transpose (4)

rows (4)

handle (3)

empty (3)

length (3)

norm (3)

normalize (3)

start (3)

X (3)

cols (3)

Y (3)

clear (3)

vectorLength (2)

Normalize (2)

Méthodes fréquemment utilisées

length (3)

norm (3)

normalize (3)

start (3)

X (3)

cols (3)

Y (3)

clear (3)

vectorLength (2)

Normalize (2)

getBitWidth (2)

stride (2)

extent (2)

internal_size (2)

real (2)

getScalarType (2)

dimension_0 (2)

coeff (2)

maxCoeff (2)

segment (2)

minCoeff (2)

createSpace (2)

tail (1)

rank (1)

x (1)

ptr_on_device (1)

setK (1)

squared_magnitude (1)

ready (1)

setJ (1)

Méthodes fréquemment utilisées

getBitWidth (2)

stride (2)

extent (2)

internal_size (2)

real (2)

getScalarType (2)

dimension_0 (2)

coeff (2)

maxCoeff (2)

segment (2)

minCoeff (2)

createSpace (2)

tail (1)

rank (1)

x (1)

ptr_on_device (1)

setK (1)

squared_magnitude (1)

ready (1)

setJ (1)

replace (1)

reserve (1)

squaredNorm (1)

sort (1)

setZero (1)

populateMembers (1)

setElementRange (1)

setI (1)

rowwise (1)

AddScale (1)

pop_back (1)

copyToHost (1)

Count (1)

DotNonConj (1)

GetNorm (1)

GetNumberOfComponents (1)

Norm (1)

Random (1)

ScaleAdd (1)

Z (1)

Exemple #1

0

Afficher le fichier

Fichier : X86InterleavedAccess.cpp Projet : CTSRD-SOAAP/llvm

bool X86InterleavedAccessGroup::isSupported() const { VectorType *ShuffleVecTy = Shuffles[0]->getType(); Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType(); unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy); unsigned WideInstSize; // Currently, lowering is supported for the following vectors: // Stride 4: // 1. Store and load of 4-element vectors of 64 bits on AVX. // 2. Store of 16/32-element vectors of 8 bits on AVX. // Stride 3: // 1. Load of 16/32-element vecotrs of 8 bits on AVX. if (!Subtarget.hasAVX() || (Factor != 4 && Factor != 3)) return false; if (isa<LoadInst>(Inst)) { WideInstSize = DL.getTypeSizeInBits(Inst->getType()); } else WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType()); // We support shuffle represents stride 4 for byte type with size of // WideInstSize. if (ShuffleElemSize == 64 && WideInstSize == 1024 && Factor == 4) return true; if (ShuffleElemSize == 8 && isa<StoreInst>(Inst) && Factor == 4 && (WideInstSize == 512 || WideInstSize == 1024)) return true; if (ShuffleElemSize == 8 && isa<LoadInst>(Inst) && Factor == 3 && (WideInstSize == 384 || WideInstSize == 768)) return true; return false; }

Exemple #2

0

Afficher le fichier

Fichier : X86TargetTransformInfo.cpp Projet : Nanosim-LIG/llvm

unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned Alignment, unsigned AddressSpace) { VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace); unsigned NumElem = SrcVTy->getVectorNumElements(); VectorType *MaskTy = VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem); if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) || (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) || !isPowerOf2_32(NumElem)) { // Scalarization unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true); unsigned ScalarCompareCost = getCmpSelInstrCost(Instruction::ICmp, Type::getInt8Ty(getGlobalContext()), NULL); unsigned BranchCost = getCFInstrCost(Instruction::Br); unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); unsigned ValueSplitCost = getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store); unsigned MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), Alignment, AddressSpace); return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; } // Legalize the type. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy); unsigned Cost = 0; if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() && LT.second.getVectorNumElements() == NumElem) // Promotion requires expand/truncate for data and a shuffle for mask. Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) + getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0); else if (LT.second.getVectorNumElements() > NumElem) { VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(), LT.second.getVectorNumElements()); // Expanding requires fill mask with zeroes Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy); } if (!ST->hasAVX512()) return Cost + LT.first*4; // Each maskmov costs 4 // AVX-512 masked load/store is cheapper return Cost+LT.first; }

Exemple #3

0

Afficher le fichier

Fichier : X86InterleavedAccess.cpp Projet : CTSRD-SOAAP/llvm

// Lowers this interleaved access group into X86-specific // instructions/intrinsics. bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { SmallVector<Instruction *, 4> DecomposedVectors; SmallVector<Value *, 4> TransposedVectors; VectorType *ShuffleTy = Shuffles[0]->getType(); if (isa<LoadInst>(Inst)) { // Try to generate target-sized register(/instruction). decompose(Inst, Factor, ShuffleTy, DecomposedVectors); Type *ShuffleEltTy = Inst->getType(); unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor; // Perform matrix-transposition in order to compute interleaved // results by generating some sort of (optimized) target-specific // instructions. switch (NumSubVecElems) { default: return false; case 4: transpose_4x4(DecomposedVectors, TransposedVectors); break; case 8: case 16: case 32: deinterleave8bitStride3(DecomposedVectors, TransposedVectors, NumSubVecElems); break; } // Now replace the unoptimized-interleaved-vectors with the // transposed-interleaved vectors. for (unsigned i = 0, e = Shuffles.size(); i < e; ++i) Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]); return true; } Type *ShuffleEltTy = ShuffleTy->getVectorElementType(); unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor; // Lower the interleaved stores: // 1. Decompose the interleaved wide shuffle into individual shuffle // vectors. decompose(Shuffles[0], Factor, VectorType::get(ShuffleEltTy, NumSubVecElems), DecomposedVectors); // 2. Transpose the interleaved-vectors into vectors of contiguous // elements. switch (NumSubVecElems) { case 4: transpose_4x4(DecomposedVectors, TransposedVectors); break; case 16: case 32: interleave8bitStride4(DecomposedVectors, TransposedVectors, NumSubVecElems); break; default: return false; } // 3. Concatenate the contiguous-vectors back into a wide vector. Value *WideVec = concatenateVectors(Builder, TransposedVectors); // 4. Generate a store instruction for wide-vec. StoreInst *SI = cast<StoreInst>(Inst); Builder.CreateAlignedStore(WideVec, SI->getPointerOperand(), SI->getAlignment()); return true; }

Exemple #4

0

Afficher le fichier

Fichier : AMDGPULowerKernelArguments.cpp Projet : alex-t/llvm

bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) return false; auto &TPC = getAnalysis<TargetPassConfig>(); const TargetMachine &TM = TPC.getTM<TargetMachine>(); const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); LLVMContext &Ctx = F.getParent()->getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&*EntryBlock.begin()); const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); unsigned MaxAlign; // FIXME: Alignment is broken broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) return false; CallInst *KernArgSegment = Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {}, nullptr, F.getName() + ".kernarg.segment"); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize)); unsigned AS = KernArgSegment->getType()->getPointerAddressSpace(); uint64_t ExplicitArgOffset = 0; for (Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); unsigned Align = DL.getABITypeAlignment(ArgTy); unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; if (Arg.use_empty()) continue; if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) { // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing // modes on SI to know the high bits are 0 so pointer adds don't wrap. We // can't represent this with range metadata because it's only allowed for // integer types. if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) continue; // FIXME: We can replace this with equivalent alias.scope/noalias // metadata, but this appears to be a lot of work. if (Arg.hasNoAliasAttr()) continue; } VectorType *VT = dyn_cast<VectorType>(ArgTy); bool IsV3 = VT && VT->getNumElements() == 3; bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType(); VectorType *V4Ty = nullptr; int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, KernArgBaseAlign); Value *ArgPtr; Type *AdjustedArgTy; if (DoShiftOpt) { // FIXME: Handle aggregate types // Since we don't have sub-dword scalar loads, avoid doing an extload by // loading earlier than the argument address, and extracting the relevant // bits. // // Additionally widen any sub-dword load to i32 even if suitably aligned, // so that CSE between different argument loads works easily. ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, AlignDownOffset, Arg.getName() + ".kernarg.offset.align.down"); AdjustedArgTy = Builder.getInt32Ty(); } else { ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, EltOffset, Arg.getName() + ".kernarg.offset"); AdjustedArgTy = ArgTy; } if (IsV3 && Size >= 32) { V4Ty = VectorType::get(VT->getVectorElementType(), 4); // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads AdjustedArgTy = V4Ty; } ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); LoadInst *Load = Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); if (isa<PointerType>(ArgTy)) { if (Arg.hasNonNullAttr()) Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {})); uint64_t DerefBytes = Arg.getDereferenceableBytes(); if (DerefBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable, MDNode::get(Ctx, MDB.createConstant( ConstantInt::get(Builder.getInt64Ty(), DerefBytes)))); } uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes(); if (DerefOrNullBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable_or_null, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), DerefOrNullBytes)))); } unsigned ParamAlign = Arg.getParamAlignment(); if (ParamAlign != 0) { Load->setMetadata( LLVMContext::MD_align, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), ParamAlign)))); } } // TODO: Convert noalias arg to !noalias if (DoShiftOpt) { Value *ExtractBits = OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8); IntegerType *ArgIntTy = Builder.getIntNTy(Size); Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy); Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy, Arg.getName() + ".load"); Arg.replaceAllUsesWith(NewVal); } else if (IsV3) { Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty), {0, 1, 2}, Arg.getName() + ".load"); Arg.replaceAllUsesWith(Shuf); } else { Load->setName(Arg.getName() + ".load"); Arg.replaceAllUsesWith(Load); } } KernArgSegment->addAttribute( AttributeList::ReturnIndex, Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); return true; }