void ConstantInsertExtractElementIndex::fixNonConstantVectorIndices( BasicBlock &BB, const Instructions &Instrs) const { for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end(); IB != IE; ++IB) { Instruction *I = *IB; Value *Vec = I->getOperand(0); Value *Idx = getInsertExtractElementIdx(I); VectorType *VecTy = cast<VectorType>(Vec->getType()); Type *ElemTy = VecTy->getElementType(); unsigned ElemAlign = DL->getPrefTypeAlignment(ElemTy); unsigned VecAlign = std::max(ElemAlign, DL->getPrefTypeAlignment(VecTy)); IRBuilder<> IRB(I); AllocaInst *Alloca = IRB.CreateAlloca( ElemTy, ConstantInt::get(Type::getInt32Ty(M->getContext()), vectorNumElements(I))); Alloca->setAlignment(VecAlign); Value *AllocaAsVec = IRB.CreateBitCast(Alloca, VecTy->getPointerTo()); IRB.CreateAlignedStore(Vec, AllocaAsVec, Alloca->getAlignment()); Value *GEP = IRB.CreateGEP(Alloca, Idx); Value *Res; switch (I->getOpcode()) { default: llvm_unreachable("expected InsertElement or ExtractElement"); case Instruction::InsertElement: IRB.CreateAlignedStore(I->getOperand(1), GEP, ElemAlign); Res = IRB.CreateAlignedLoad(AllocaAsVec, Alloca->getAlignment()); break; case Instruction::ExtractElement: Res = IRB.CreateAlignedLoad(GEP, ElemAlign); break; } I->replaceAllUsesWith(Res); I->eraseFromParent(); } }
static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. if (!AllocaTy || AllocaTy->getElementType()->isVectorTy() || AllocaTy->getNumElements() > 4) { DEBUG(dbgs() << " Cannot convert type to vector\n"); return false; } std::map<GetElementPtrInst*, Value*> GEPVectorIdx; std::vector<Value*> WorkList; for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); continue; } Value *Index = GEPToVectorIndex(GEP); // If we can't compute a vector index from this GEP, then we can't // promote this alloca to vector. if (!Index) { DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); return false; } GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); } } VectorType *VectorTy = arrayTypeToVecType(AllocaTy); DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " << *VectorTy << '\n'); for (Value *V : WorkList) { Instruction *Inst = cast<Instruction>(V); IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { Value *Ptr = Inst->getOperand(0); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0)); Value *VecValue = Builder.CreateLoad(BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); Inst->replaceAllUsesWith(ExtractElement); Inst->eraseFromParent(); break; } case Instruction::Store: { Value *Ptr = Inst->getOperand(1); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0)); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, Inst->getOperand(0), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); break; } case Instruction::BitCast: case Instruction::AddrSpaceCast: break; default: Inst->dump(); llvm_unreachable("Inconsistency in instructions promotable to vector"); } } return true; }
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2 || !VectorType::isValidElementType(AllocaTy->getElementType())) { DEBUG(dbgs() << " Cannot convert type to vector\n"); return false; } std::map<GetElementPtrInst*, Value*> GEPVectorIdx; std::vector<Value*> WorkList; for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); if (!GEP) { if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); continue; } Value *Index = GEPToVectorIndex(GEP); // If we can't compute a vector index from this GEP, then we can't // promote this alloca to vector. if (!Index) { DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); return false; } GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); } } VectorType *VectorTy = arrayTypeToVecType(AllocaTy); DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> " << *VectorTy << '\n'); for (Value *V : WorkList) { Instruction *Inst = cast<Instruction>(V); IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); Inst->replaceAllUsesWith(ExtractElement); Inst->eraseFromParent(); break; } case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); StoreInst *SI = cast<StoreInst>(Inst); Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); break; } case Instruction::BitCast: case Instruction::AddrSpaceCast: break; default: llvm_unreachable("Inconsistency in instructions promotable to vector"); } } return true; }
Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) { Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) ScalarTy = SI->getValueOperand()->getType(); VectorType *VecTy = VectorType::get(ScalarTy, VF); // Check if all of the operands are constants or identical. bool AllConst = true; bool AllSameScalar = true; for (unsigned i = 0, e = VF; i < e; ++i) { AllConst &= !!dyn_cast<Constant>(VL[i]); AllSameScalar &= (VL[0] == VL[i]); // Must have a single use. Instruction *I = dyn_cast<Instruction>(VL[i]); if (I && (I->getNumUses() > 1 || I->getParent() != BB)) return Scalarize(VL, VecTy); } // Is this a simple vector constant. if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); // Scalarize unknown structures. Instruction *VL0 = dyn_cast<Instruction>(VL[0]); if (!VL0) return Scalarize(VL, VecTy); unsigned Opcode = VL0->getOpcode(); for (unsigned i = 0, e = VF; i < e; ++i) { Instruction *I = dyn_cast<Instruction>(VL[i]); // If not all of the instructions are identical then we have to scalarize. if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); } switch (Opcode) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { ValueList LHSVL, RHSVL; for (int i = 0; i < VF; ++i) { RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); } Value *RHS = vectorizeTree(RHSVL, VF); Value *LHS = vectorizeTree(LHSVL, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0); return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); } case Instruction::Load: { LoadInst *LI = dyn_cast<LoadInst>(VL0); unsigned Alignment = LI->getAlignment(); // Check if all of the loads are consecutive. for (unsigned i = 1, e = VF; i < e; ++i) if (!isConsecutiveAccess(VL[i-1], VL[i])) return Scalarize(VL, VecTy); IRBuilder<> Builder(GetLastInstr(VL, VF)); Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo()); LI = Builder.CreateLoad(VecPtr); LI->setAlignment(Alignment); return LI; } case Instruction::Store: { StoreInst *SI = dyn_cast<StoreInst>(VL0); unsigned Alignment = SI->getAlignment(); ValueList ValueOp; for (int i = 0; i < VF; ++i) ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); Value *VecValue = vectorizeTree(ValueOp, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo()); Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); for (int i = 0; i < VF; ++i) cast<Instruction>(VL[i])->eraseFromParent(); return 0; } default: return Scalarize(VL, VecTy); } }
int qdp_jit_vec::vectorize_loads( std::vector<std::vector<Instruction*> >& load_instructions ) { DEBUG(dbgs() << "Vectorize loads, total of " << load_instructions.size() << "\n"); //std::vector<std::pair<Value*,Value*> > scalar_vector_loads; scalar_vector_pairs.clear(); if (load_instructions.empty()) return 0; int load_vec_elem = 0; for( std::vector<Instruction*>& VI : load_instructions ) { DEBUG(dbgs() << "Processing vector of loads number " << load_vec_elem++ << "\n"); assert( VI.size() == vec_len && "length of vector of loads does not match vec_len" ); int loads_consec = true; uint64_t lo,hi; bool first = true; for( Instruction* I : VI ) { GetElementPtrInst* GEP; if ((GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0)))) { if (first) { ConstantInt * CI; if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) { lo = CI->getZExtValue(); hi = lo+1; first=false; } else { DEBUG(dbgs() << "First load in the chain: Operand of GEP not a ConstantInt" << *GEP->getOperand(1) << "\n"); assert( 0 && "First load in the chain: Operand of GEP not a ConstantInt\n"); exit(0); } } else { ConstantInt * CI; if ((CI = dyn_cast<ConstantInt>(GEP->getOperand(1)))) { if (hi != CI->getZExtValue()) { DEBUG(dbgs() << "Loads not consecutive lo=" << lo << " hi=" << hi << " this=" << CI->getZExtValue() << "\n"); loads_consec = false; } else { hi++; } } } } else { DEBUG(dbgs() << "Operand of load not a GEP " << *I->getOperand(0) << "\n"); assert( 0 && "Operand of load not a GEP" ); exit(0); loads_consec = false; } } if (loads_consec) { DEBUG(dbgs() << "Loads consecuetive\n"); LoadInst* LI = cast<LoadInst>(VI.at(0)); GetElementPtrInst* GEP = cast<GetElementPtrInst>(LI->getOperand(0)); Instruction* GEPcl = clone_with_operands(GEP); unsigned AS = LI->getPointerAddressSpace(); VectorType *VecTy = VectorType::get( LI->getType() , vec_len ); unsigned bitwidth = LI->getType()->getPrimitiveSizeInBits(); unsigned bytewidth = bitwidth == 1 ? 1 : bitwidth/8; DEBUG(dbgs() << "bit/byte width of load instr trype: " << bitwidth << "/" << bytewidth << "\n"); //Builder->SetInsertPoint( GEP ); Value *VecPtr = Builder->CreateBitCast(GEPcl,VecTy->getPointerTo(AS)); //Value *VecLoad = Builder->CreateLoad( VecPtr ); unsigned align = lo % vec_len == 0 ? bytewidth * vec_len : bytewidth; Value *VecLoad = Builder->CreateAlignedLoad( VecPtr , align ); //DEBUG(dbgs() << "created vector load: " << *VecLoad << "\n"); //function->dump(); // unsigned AS = LI->getPointerAddressSpace(); // VectorType *VecTy = VectorType::get( LI->getType() , vec_len ); // Builder->SetInsertPoint( LI ); // Value *VecPtr = Builder->CreateBitCast(LI->getPointerOperand(),VecTy->getPointerTo(AS)); // Value *VecLoad = Builder->CreateLoad( VecPtr ); scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , VecLoad ) ); } else { DEBUG(dbgs() << "Loads not consecutive:\n"); for (Value* V: VI) { DEBUG(dbgs() << *V << "\n"); } //Instruction* I = dyn_cast<Instruction>(VI.back()->getNextNode()); //DEBUG(dbgs() << *I << "\n"); //Builder->SetInsertPoint( VI.at(0) ); std::vector<Instruction*> VIcl; for( Instruction* I : VI ) { VIcl.push_back( clone_with_operands(I) ); } VectorType *VecTy = VectorType::get( VI.at(0)->getType() , vec_len ); Value *Vec = UndefValue::get(VecTy); int i=0; for( Instruction* I : VIcl ) { Vec = Builder->CreateInsertElement(Vec, I, Builder->getInt32(i++)); } scalar_vector_pairs.push_back( std::make_pair( VI.at(0) , Vec ) ); } } //vectorize_all_uses( scalar_vector_loads ); DEBUG(dbgs() << "Searching for the stores:\n"); //function->dump(); // // Vectorize all StoreInst reachable by the first load of each vector of loads // { SetVector<Instruction*> to_visit; SetVector<Instruction*> stores_processed; for( std::vector<Instruction*>& VI : load_instructions ) { to_visit.insert(VI.at(0)); } while (!to_visit.empty()) { Instruction* I = to_visit.back(); to_visit.pop_back(); DEBUG(dbgs() << "visiting " << *I << "\n"); if (StoreInst* SI = dyn_cast<StoreInst>(I)) { if (!stores_processed.count(SI)) { get_vector_version( SI ); stores_processed.insert( SI ); } } else { for (Use &U : I->uses()) { Value* V = U.getUser(); to_visit.insert(cast<Instruction>(V)); } } } } // DEBUG(dbgs() << "After vectorizing the stores\n"); // function->dump(); // // Mark all stores as being processed // SetVector<Instruction*> to_visit; for( std::vector<Instruction*>& VI : load_instructions ) { for( Instruction* I : VI ) { to_visit.insert(I); if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(I->getOperand(0))) { for_erasure.insert(GEP); } } } while (!to_visit.empty()) { Instruction* I = to_visit.back(); to_visit.pop_back(); for_erasure.insert(I); if (StoreInst* SI = dyn_cast<StoreInst>(I)) { stores_processed.insert(SI); if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(SI->getOperand(1))) { for_erasure.insert(GEP); } } else { for (Use &U : I->uses()) { Value* V = U.getUser(); to_visit.insert(cast<Instruction>(V)); } } } DEBUG(dbgs() << "----------------------------------------\n"); DEBUG(dbgs() << "After vectorize_loads\n"); //function->dump(); return 0; }
Value* qdp_jit_vec::get_vector_version( Value* scalar_version ) { DEBUG(dbgs() << "get_vector_version: scalar version: " << *scalar_version << "\n"); if (!isa<Instruction>(scalar_version)) { if (Constant* C = dyn_cast<Constant>(scalar_version)) { return Builder->Insert( ConstantVector::getSplat( vec_len , C ) ); } assert( 0 && "scalar version is not an instruction, and not a constant" ); return NULL; } #if 0 // We might not need this if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(scalar_version)) { Instruction* GEPcl = clone_with_operands(GEP); printf("waring using built in vector size 4 here!!\n"); VectorType *VecTy = VectorType::get( GEP->getPointerOperandType() , 4 ); Value *VecPtr = Builder->CreateBitCast(GEPcl,VecTy->getPointerTo()); DEBUG(dbgs() << "it's a GEP\n"); DEBUG(dbgs() << *VecTy << "\n"); DEBUG(dbgs() << *VecPtr << "\n"); return VecPtr; } #endif if (StoreInst* SI = dyn_cast<StoreInst>(scalar_version)) { unsigned AS = SI->getPointerAddressSpace(); SequentialType* ST = cast<SequentialType>(SI->getPointerOperand()->getType()); //DEBUG(dbgs() << "store pointer operand type: " << *ST->getElementType() << "\n"); if (isa<VectorType>(ST->getElementType())) { assert( 0 && "did not expect a vector type store instruction" ); } //DEBUG(dbgs() << "store: " << *SI << "\n"); // DEBUG(dbgs() << "store value: " << *SI->getValueOperand() << "\n"); // DEBUG(dbgs() << "store pointer: " << *SI->getPointerOperand() << "\n"); Instruction* GEP = cast<Instruction>(SI->getPointerOperand()); Instruction* GEPcl = clone_with_operands( GEP ); DEBUG(dbgs() << "SI->getValueOp = " << *SI->getValueOperand() << "\n"); Value* vec_value = get_vector_version( SI->getValueOperand() ); Value *VecPtr = Builder->CreateBitCast( GEPcl , vec_value->getType()->getPointerTo(AS) ); Value* vecstore = Builder->CreateStore( vec_value , VecPtr ); DEBUG(dbgs() << "vec store created " << *vecstore << "\n"); return vecstore; } for ( std::vector<std::pair<Value*,Value*> >::iterator it = scalar_vector_pairs.begin(); it != scalar_vector_pairs.end(); ++it ) { DEBUG(dbgs() << "search: " << *it->first << "\n"); if ( it->first == scalar_version ) { DEBUG(dbgs() << "found it, it was already there!\n"); return it->second; } } Instruction* I = cast<Instruction>(scalar_version); std::vector<Value*> operands; for (Use& U : I->operands()) operands.push_back(U.get()); //I->getOperand(0); unsigned Opcode = I->getOpcode(); Value* V; switch (Opcode) { case Instruction::FMul: V = Builder->CreateFMul( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::FAdd: V = Builder->CreateFAdd( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::FSub: V = Builder->CreateFSub( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::Mul: V = Builder->CreateMul( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::Add: V = Builder->CreateAdd( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::Sub: V = Builder->CreateSub( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::And: V = Builder->CreateAnd( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::AShr: V = Builder->CreateAShr( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::ICmp: V = Builder->CreateICmp( cast<CmpInst>(I)->getPredicate() , get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::BitCast: V = Builder->CreateBitCast( get_vector_version( operands.at(0) ) , VectorType::get( I->getType() , vec_len ) ); break; case Instruction::SExt: V = Builder->CreateSExt( get_vector_version( operands.at(0) ) , VectorType::get( I->getType() , vec_len ) ); break; case Instruction::Trunc: V = Builder->CreateTrunc( get_vector_version( operands.at(0) ) , VectorType::get( I->getType() , vec_len ) ); break; case Instruction::FRem: V = Builder->CreateFRem( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::SRem: V = Builder->CreateSRem( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::Shl: V = Builder->CreateShl( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::Or: V = Builder->CreateOr( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::Xor: V = Builder->CreateXor( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::SDiv: V = Builder->CreateSDiv( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; case Instruction::FDiv: V = Builder->CreateFDiv( get_vector_version( operands.at(0) ) , get_vector_version( operands.at(1) ) ); break; default: dbgs() << Instruction::getOpcodeName(Opcode) << "\n"; assert( 0 && "opcode not found!" ); V = NULL; } scalar_vector_pairs.push_back( std::make_pair( I , V ) ); return V; }