void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) { assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " "the vector block generator"); BasicBlock *BB = Stmt.getBasicBlock(); BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); CopyBB->setName("polly.stmt." + BB->getName()); Builder.SetInsertPoint(CopyBB->begin()); // Create two maps that store the mapping from the original instructions of // the old basic block to their copies in the new basic block. Those maps // are basic block local. // // As vector code generation is supported there is one map for scalar values // and one for vector values. // // In case we just do scalar code generation, the vectorMap is not used and // the scalarMap has just one dimension, which contains the mapping. // // In case vector code generation is done, an instruction may either appear // in the vector map once (as it is calculating >vectorwidth< values at a // time. Or (if the values are calculated using scalar operations), it // appears once in every dimension of the scalarMap. VectorValueMapT ScalarBlockMap(getVectorWidth()); ValueMapT VectorBlockMap; for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); }
/// @brief Read the new scattering from the scoplib description. /// /// @S The Scop to update /// @OScop The ScopLib data structure describing the new scattering. /// @return A map that contains for each Statement the new scattering. StatementToIslMapTy *readScattering(Scop *S, scoplib_scop_p OScop) { StatementToIslMapTy &NewScattering = *(new StatementToIslMapTy()); scoplib_statement_p stmt = OScop->statement; // Check if we have dimensions for each scattering or if each row // represents a scattering dimension. int numScatteringDims = -1; ScopStmt *pollyStmt = *S->begin(); if (stmt->schedule->NbColumns == 2 + pollyStmt->getNumParams() + pollyStmt->getNumIterators()) { numScatteringDims = maxScattering(stmt); } for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { if (!stmt) { errs() << "Not enough statements available in OpenScop file\n"; freeStmtToIslMap(&NewScattering); return NULL; } NewScattering[*SI] = scatteringForStmt(stmt->schedule, *SI, numScatteringDims); stmt = stmt->next; } if (stmt) { errs() << "Too many statements in OpenScop file\n"; freeStmtToIslMap(&NewScattering); return NULL; } return &NewScattering; }
SetVector<Value *> ClastStmtCodeGen::getGPUValues(unsigned &OutputBytes) { SetVector<Value *> Values; OutputBytes = 0; // Record the memory reference base addresses. for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; for (SmallVector<MemoryAccess *, 8>::iterator I = Stmt->memacc_begin(), E = Stmt->memacc_end(); I != E; ++I) { Value *BaseAddr = const_cast<Value *>((*I)->getBaseAddr()); Values.insert((BaseAddr)); // FIXME: we assume that there is one and only one array to be written // in a SCoP. int NumWrites = 0; if ((*I)->isWrite()) { ++NumWrites; assert(NumWrites <= 1 && "We support at most one array to be written in a SCoP."); if (const PointerType *PT = dyn_cast<PointerType>(BaseAddr->getType())) { Type *T = PT->getArrayElementType(); const ArrayType *ATy = dyn_cast<ArrayType>(T); OutputBytes = getArraySizeInBytes(ATy); } } } } return Values; }
void Dependences::collectInfo(Scop &S, isl_union_map **Read, isl_union_map **Write, isl_union_map **MayWrite, isl_union_map **Schedule) { isl_space *Space = S.getParamSpace(); *Read = isl_union_map_empty(isl_space_copy(Space)); *Write = isl_union_map_empty(isl_space_copy(Space)); *MayWrite = isl_union_map_empty(isl_space_copy(Space)); *Schedule = isl_union_map_empty(Space); for (Scop::iterator SI = S.begin(), SE = S.end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; for (ScopStmt::memacc_iterator MI = Stmt->memacc_begin(), ME = Stmt->memacc_end(); MI != ME; ++MI) { isl_set *domcp = Stmt->getDomain(); isl_map *accdom = (*MI)->getAccessRelation(); accdom = isl_map_intersect_domain(accdom, domcp); if ((*MI)->isRead()) *Read = isl_union_map_add_map(*Read, accdom); else *Write = isl_union_map_add_map(*Write, accdom); } *Schedule = isl_union_map_add_map(*Schedule, Stmt->getScattering()); } }
/// @brief Update the scattering in a Scop using the scoplib description of /// the scattering. bool ScopLib::updateScattering() { if (!scoplib) return false; StatementToIslMapTy *NewScattering = readScattering(PollyScop, scoplib); if (!NewScattering) return false; if (!D->isValidScattering(NewScattering)) { freeStmtToIslMap(NewScattering); errs() << "OpenScop file contains a scattering that changes the " << "dependences. Use -disable-polly-legality to continue anyways\n"; return false; } for (Scop::iterator SI = PollyScop->begin(), SE = PollyScop->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; if (NewScattering->find(Stmt) != NewScattering->end()) Stmt->setScattering(isl_map_copy((*NewScattering)[Stmt])); } freeStmtToIslMap(NewScattering); return true; }
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector<Value*> &IVS, __isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule) { isl_id *Annotation = isl_ast_node_get_annotation(User); assert(Annotation && "Vector user statement is not annotated"); struct IslAstUser *Info = (struct IslAstUser *) isl_id_get_user(Annotation); assert(Info && "Vector user statement annotation does not contain info"); isl_id *Id = isl_pw_multi_aff_get_tuple_id(Info->PMA, isl_dim_out); ScopStmt *Stmt = (ScopStmt *) isl_id_get_user(Id); VectorValueMapT VectorMap(IVS.size()); isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain()); Schedule = isl_union_map_intersect_domain(Schedule, Domain); isl_map *S = isl_map_from_union_map(Schedule); createSubstitutionsVector(isl_pw_multi_aff_copy(Info->PMA), isl_ast_build_copy(Info->Context), Stmt, VectorMap, IVS, IteratorID); VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, S, P); isl_map_free(S); isl_id_free(Annotation); isl_id_free(Id); isl_ast_node_free(User); }
void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S) { assert(Stmt.isBlockStmt() && "Only block statements can be copied by the block generator"); ValueMapT BBMap; BasicBlock *BB = Stmt.getBasicBlock(); copyBB(Stmt, BB, BBMap, GlobalMap, LTS); }
ScopStmt::ScopStmt(Scop &parent, SmallVectorImpl<unsigned> &Scatter) : Parent(parent), BB(NULL), IVS(0) { BaseName = "FinalRead"; // Build iteration domain. std::string IterationDomainString = "{[i0] : i0 = 0}"; Domain = isl_set_read_from_str(Parent.getCtx(), IterationDomainString.c_str(), -1); Domain = isl_set_add_dims(Domain, isl_dim_param, Parent.getNumParams()); Domain = isl_set_set_tuple_name(Domain, getBaseName()); // Build scattering. unsigned ScatDim = Parent.getMaxLoopDepth() * 2 + 1; isl_dim *dim = isl_dim_alloc(Parent.getCtx(), Parent.getNumParams(), 1, ScatDim); dim = isl_dim_set_tuple_name(dim, isl_dim_out, "scattering"); dim = isl_dim_set_tuple_name(dim, isl_dim_in, getBaseName()); isl_basic_map *bmap = isl_basic_map_universe(isl_dim_copy(dim)); isl_int v; isl_int_init(v); isl_constraint *c = isl_equality_alloc(dim); isl_int_set_si(v, -1); isl_constraint_set_coefficient(c, isl_dim_out, 0, v); // TODO: This is incorrect. We should not use a very large number to ensure // that this statement is executed last. isl_int_set_si(v, 200000000); isl_constraint_set_constant(c, v); bmap = isl_basic_map_add_constraint(bmap, c); isl_int_clear(v); Scattering = isl_map_from_basic_map(bmap); // Build memory accesses, use SetVector to keep the order of memory accesses // and prevent the same memory access inserted more than once. SetVector<const Value*> BaseAddressSet; for (Scop::const_iterator SI = Parent.begin(), SE = Parent.end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; for (MemoryAccessVec::const_iterator I = Stmt->memacc_begin(), E = Stmt->memacc_end(); I != E; ++I) BaseAddressSet.insert((*I)->getBaseAddr()); } for (SetVector<const Value*>::iterator BI = BaseAddressSet.begin(), BE = BaseAddressSet.end(); BI != BE; ++BI) MemAccs.push_back(new MemoryAccess(*BI, this)); IsReduction = false; }
isl_union_map *getCombinedScheduleForSpace(Scop *scop, unsigned dimLevel) { isl_space *Space = scop->getParamSpace(); isl_union_map *schedule = isl_union_map_empty(Space); for (Scop::iterator SI = scop->begin(), SE = scop->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; unsigned remainingDimensions = Stmt->getNumScattering() - dimLevel; isl_map *Scattering = isl_map_project_out( Stmt->getScattering(), isl_dim_out, dimLevel, remainingDimensions); schedule = isl_union_map_add_map(schedule, Scattering); } return schedule; }
__isl_give isl_union_map *IslAst::getSchedule() { isl_union_map *Schedule = isl_union_map_empty(S->getParamSpace()); for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; isl_map *StmtSchedule = Stmt->getScattering(); StmtSchedule = isl_map_intersect_domain(StmtSchedule, Stmt->getDomain()); Schedule = isl_union_map_union(Schedule, isl_union_map_from_map(StmtSchedule)); } return Schedule; }
void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { if (!VectorType::isValidElementType(Load->getType())) { for (int i = 0; i < getVectorWidth(); i++) ScalarMaps[i][Load] = generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); return; } const MemoryAccess &Access = Stmt.getAccessFor(Load); // Make sure we have scalar values available to access the pointer to // the data location. extractScalarValues(Load, VectorMap, ScalarMaps); Value *NewLoad; if (Access.isStrideZero(isl_map_copy(Schedule))) NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]); else if (Access.isStrideOne(isl_map_copy(Schedule))) NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps); else if (Access.isStrideX(isl_map_copy(Schedule), -1)) NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true); else NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps); VectorMap[Load] = NewLoad; }
void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { const MemoryAccess &Access = Stmt.getAccessFor(Store); const Value *Pointer = Store->getPointerOperand(); Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap, ScalarMaps, getLoopForInst(Store)); // Make sure we have scalar values available to access the pointer to // the data location. extractScalarValues(Store, VectorMap, ScalarMaps); if (Access.isStrideOne(isl_map_copy(Schedule))) { Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth()); Value *NewPointer = generateLocationAccessed( Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); StoreInst *Store = Builder.CreateStore(Vector, VectorPtr); if (!Aligned) Store->setAlignment(8); } else { for (unsigned i = 0; i < ScalarMaps.size(); i++) { Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); Value *NewPointer = generateLocationAccessed( Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); Builder.CreateStore(Scalar, NewPointer); } } }
bool Dependences::isValidScattering(StatementToIslMapTy *NewScattering) { Scop &S = getCurScop(); if (LegalityCheckDisabled) return true; isl_union_map *Dependences = getDependences(TYPE_ALL); isl_space *Space = S.getParamSpace(); isl_union_map *Scattering = isl_union_map_empty(Space); isl_space *ScatteringSpace = 0; for (Scop::iterator SI = S.begin(), SE = S.end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; isl_map *StmtScat; if (NewScattering->find(*SI) == NewScattering->end()) StmtScat = Stmt->getScattering(); else StmtScat = isl_map_copy((*NewScattering)[Stmt]); if (!ScatteringSpace) ScatteringSpace = isl_space_range(isl_map_get_space(StmtScat)); Scattering = isl_union_map_add_map(Scattering, StmtScat); } Dependences = isl_union_map_apply_domain(Dependences, isl_union_map_copy(Scattering)); Dependences = isl_union_map_apply_range(Dependences, Scattering); isl_set *Zero = isl_set_universe(isl_space_copy(ScatteringSpace)); for (unsigned i = 0; i < isl_set_dim(Zero, isl_dim_set); i++) Zero = isl_set_fix_si(Zero, isl_dim_set, i, 0); isl_union_set *UDeltas = isl_union_map_deltas(Dependences); isl_set *Deltas = isl_union_set_extract_set(UDeltas, ScatteringSpace); isl_union_set_free(UDeltas); isl_map *NonPositive = isl_set_lex_le_set(Deltas, Zero); bool IsValid = isl_map_is_empty(NonPositive); isl_map_free(NonPositive); return IsValid; }
CloogUnionDomain *Cloog::buildCloogUnionDomain() { CloogUnionDomain *DU = cloog_union_domain_alloc(S->getNumParams()); for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; CloogScattering *Scattering; CloogDomain *Domain; Scattering = cloog_scattering_from_isl_map(Stmt->getScattering()); Domain = cloog_domain_from_isl_set(Stmt->getDomain()); std::string entryName = Stmt->getBaseName(); DU = cloog_union_domain_add_domain(DU, entryName.c_str(), Domain, Scattering, Stmt); } return DU; }
void IslScheduleOptimizer::extendScattering(Scop &S, unsigned NewDimensions) { for (Scop::iterator SI = S.begin(), SE = S.end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; unsigned OldDimensions = Stmt->getNumScattering(); isl_space *Space; isl_map *Map, *New; Space = isl_space_alloc(Stmt->getIslCtx(), 0, OldDimensions, NewDimensions); Map = isl_map_universe(Space); for (unsigned i = 0; i < OldDimensions; i++) Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); for (unsigned i = OldDimensions; i < NewDimensions; i++) Map = isl_map_fix_si(Map, isl_dim_out, i, 0); Map = isl_map_align_params(Map, S.getParamSpace()); New = isl_map_apply_range(Stmt->getScattering(), Map); Stmt->setScattering(New); } }
Json::Value JSONExporter::getJSON(Scop &scop) const { Json::Value root; root["name"] = S->getRegion().getNameStr(); root["context"] = S->getContextStr(); root["statements"]; for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; Json::Value statement; statement["name"] = Stmt->getBaseName(); statement["domain"] = Stmt->getDomainStr(); statement["schedule"] = Stmt->getScatteringStr(); statement["accesses"]; for (ScopStmt::memacc_iterator MI = Stmt->memacc_begin(), ME = Stmt->memacc_end(); MI != ME; ++MI) { Json::Value access; access["kind"] = (*MI)->isRead() ? "read" : "write"; access["relation"] = (*MI)->getAccessRelationStr(); statement["accesses"].append(access); } root["statements"].append(statement); } return root; }
bool Interchange::runOnScop(Scop &S) { if (std::distance(S.begin(), S.end()) != 2) // One statement besides the final statement return false; for (Scop::iterator SI = S.begin(), SE = S.end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; if (!Stmt->isReduction()) continue; isl_map *Scattering = isl_map_copy(Stmt->getScattering()); const std::string MapString = "{scattering[i0, i1, i2, i3, i4] -> scattering[i0, i3, i2, i1, i4]}"; isl_map *Map = isl_map_read_from_str(Stmt->getIslContext(), MapString.c_str(), -1); isl_map_add_dims(Map, isl_dim_param, Stmt->getNumParams()); Scattering = isl_map_apply_range(Scattering, Map); Stmt->setScattering(Scattering); DEBUG( isl_printer *p = isl_printer_to_str(S.getCtx()); isl_printer_print_map(p, Scattering); dbgs() << isl_printer_get_str(p) << '\n'; isl_printer_flush(p); isl_printer_free(p); ); }
void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector<Value *> &IVS, __isl_take isl_id *IteratorID, __isl_take isl_union_map *Schedule) { isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); isl_id *Id = isl_ast_expr_get_id(StmtExpr); isl_ast_expr_free(StmtExpr); ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); VectorValueMapT VectorMap(IVS.size()); std::vector<LoopToScevMapT> VLTS(IVS.size()); isl_union_set *Domain = isl_union_set_from_set(Stmt->getDomain()); Schedule = isl_union_map_intersect_domain(Schedule, Domain); isl_map *S = isl_map_from_union_map(Schedule); createSubstitutionsVector(Expr, Stmt, VectorMap, VLTS, IVS, IteratorID); VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE); isl_map_free(S); isl_id_free(Id); isl_ast_node_free(User); }
CloogUnionDomain *Cloog::buildCloogUnionDomain() { CloogUnionDomain *DU = cloog_union_domain_alloc(S->getNumParams()); for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; if (Stmt->isFinalRead()) continue; CloogScattering *Scattering= cloog_scattering_from_isl_map(isl_map_copy(Stmt->getScattering())); CloogDomain *Domain = cloog_domain_from_isl_set(isl_set_copy(Stmt->getDomain())); std::string entryName = Stmt->getBaseName(); char *Name = (char*)malloc(sizeof(char) * (entryName.size() + 1)); strcpy(Name, entryName.c_str()); DU = cloog_union_domain_add_domain(DU, Name, Domain, Scattering, Stmt); } return DU; }
void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); // After a basic block was copied store all scalars that escape this block // in their alloca. First the scalars that have dependences inside the SCoP, // then the ones that might escape the SCoP. generateScalarStores(Stmt, BB, BBMap, GlobalMap); const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) handleOutsideUsers(R, &Inst, BBMap[&Inst]); }
void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, BasicBlock *IncomingBB, ValueMapT &GlobalMap, LoopToScevMapT <S) { Region *StmtR = Stmt.getRegion(); // If the incoming block was not yet copied mark this PHI as incomplete. // Once the block will be copied the incoming value will be added. BasicBlock *BBCopy = BlockMap[IncomingBB]; if (!BBCopy) { assert(StmtR->contains(IncomingBB) && "Bad incoming block for PHI in non-affine region"); IncompletePHINodeMap[IncomingBB].push_back(std::make_pair(PHI, PHICopy)); return; } Value *OpCopy = nullptr; if (StmtR->contains(IncomingBB)) { assert(RegionMaps.count(BBCopy) && "Incoming PHI block did not have a BBMap"); ValueMapT &BBCopyMap = RegionMaps[BBCopy]; Value *Op = PHI->getIncomingValueForBlock(IncomingBB); OpCopy = getNewValue(Stmt, Op, BBCopyMap, GlobalMap, LTS, getLoopForInst(PHI)); } else { if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) return; AllocaInst *PHIOpAddr = getOrCreateAlloca(const_cast<PHINode *>(PHI), PHIOpMap, ".phiops"); OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", BlockMap[IncomingBB]->getTerminator()); } assert(OpCopy && "Incoming PHI value was not copied properly"); assert(BBCopy && "Incoming PHI block was not copied properly"); PHICopy->addIncoming(OpCopy, BBCopy); }
void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) { // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) return; if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) return; if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { generateLoad(Stmt, Load, VectorMap, ScalarMaps); return; } if (hasVectorOperands(Inst, VectorMap)) { if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { copyStore(Stmt, Store, VectorMap, ScalarMaps); return; } if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) { copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps); return; } if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) { copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps); return; } // Falltrough: We generate scalar instructions, if we don't know how to // generate vector code. } copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); }
void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &BBMap) { // Iterate over all memory accesses for the given instruction and handle all // scalar reads. if (ScopStmt::MemoryAccessList *MAL = Stmt.lookupAccessesFor(Inst)) { for (MemoryAccess &MA : *MAL) { if (!MA.isScalar() || !MA.isRead()) continue; Instruction *ScalarBase = cast<Instruction>(MA.getBaseAddr()); Instruction *ScalarInst = MA.getAccessInstruction(); PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase); // This is either a common scalar use (second case) or the use of a phi // operand by the PHI node (first case). if (ScalarBasePHI == ScalarInst) { AllocaInst *PHIOpAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); LoadInst *LI = Builder.CreateLoad(PHIOpAddr, PHIOpAddr->getName() + ".reload"); BBMap[ScalarBase] = LI; } else { // For non-PHI operand uses we look up the alloca in the ScalarMap, // reload it and add the mapping to the ones in the current basic block. AllocaInst *ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); LoadInst *LI = Builder.CreateLoad(ScalarAddr, ScalarAddr->getName() + ".reload"); BBMap[ScalarBase] = LI; } } } }
bool IslScheduleOptimizer::runOnScop(Scop &S) { Dependences *D = &getAnalysis<Dependences>(); if (!D->hasValidDependences()) return false; isl_schedule_free(LastSchedule); LastSchedule = nullptr; // Build input data. int ValidityKinds = Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; int ProximityKinds; if (OptimizeDeps == "all") ProximityKinds = Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; else if (OptimizeDeps == "raw") ProximityKinds = Dependences::TYPE_RAW; else { errs() << "Do not know how to optimize for '" << OptimizeDeps << "'" << " Falling back to optimizing all dependences.\n"; ProximityKinds = Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; } isl_union_set *Domain = S.getDomains(); if (!Domain) return false; isl_union_map *Validity = D->getDependences(ValidityKinds); isl_union_map *Proximity = D->getDependences(ProximityKinds); // Simplify the dependences by removing the constraints introduced by the // domains. This can speed up the scheduling time significantly, as large // constant coefficients will be removed from the dependences. The // introduction of some additional dependences reduces the possible // transformations, but in most cases, such transformation do not seem to be // interesting anyway. In some cases this option may stop the scheduler to // find any schedule. if (SimplifyDeps == "yes") { Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain)); Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain)); Proximity = isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain)); Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain)); } else if (SimplifyDeps != "no") { errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' " "or 'no'. Falling back to default: 'yes'\n"; } DEBUG(dbgs() << "\n\nCompute schedule from: "); DEBUG(dbgs() << "Domain := "; isl_union_set_dump(Domain); dbgs() << ";\n"); DEBUG(dbgs() << "Proximity := "; isl_union_map_dump(Proximity); dbgs() << ";\n"); DEBUG(dbgs() << "Validity := "; isl_union_map_dump(Validity); dbgs() << ";\n"); int IslFusionStrategy; if (FusionStrategy == "max") { IslFusionStrategy = ISL_SCHEDULE_FUSE_MAX; } else if (FusionStrategy == "min") { IslFusionStrategy = ISL_SCHEDULE_FUSE_MIN; } else { errs() << "warning: Unknown fusion strategy. Falling back to maximal " "fusion.\n"; IslFusionStrategy = ISL_SCHEDULE_FUSE_MAX; } int IslMaximizeBands; if (MaximizeBandDepth == "yes") { IslMaximizeBands = 1; } else if (MaximizeBandDepth == "no") { IslMaximizeBands = 0; } else { errs() << "warning: Option -polly-opt-maximize-bands should either be 'yes'" " or 'no'. Falling back to default: 'yes'\n"; IslMaximizeBands = 1; } isl_options_set_schedule_fuse(S.getIslCtx(), IslFusionStrategy); isl_options_set_schedule_maximize_band_depth(S.getIslCtx(), IslMaximizeBands); isl_options_set_schedule_max_constant_term(S.getIslCtx(), MaxConstantTerm); isl_options_set_schedule_max_coefficient(S.getIslCtx(), MaxCoefficient); isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_CONTINUE); isl_schedule_constraints *ScheduleConstraints; ScheduleConstraints = isl_schedule_constraints_on_domain(Domain); ScheduleConstraints = isl_schedule_constraints_set_proximity(ScheduleConstraints, Proximity); ScheduleConstraints = isl_schedule_constraints_set_validity( ScheduleConstraints, isl_union_map_copy(Validity)); ScheduleConstraints = isl_schedule_constraints_set_coincidence(ScheduleConstraints, Validity); isl_schedule *Schedule; Schedule = isl_schedule_constraints_compute_schedule(ScheduleConstraints); isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_ABORT); // In cases the scheduler is not able to optimize the code, we just do not // touch the schedule. if (!Schedule) return false; DEBUG(dbgs() << "Schedule := "; isl_schedule_dump(Schedule); dbgs() << ";\n"); isl_union_map *ScheduleMap = getScheduleMap(Schedule); for (Scop::iterator SI = S.begin(), SE = S.end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; isl_map *StmtSchedule; isl_set *Domain = Stmt->getDomain(); isl_union_map *StmtBand; StmtBand = isl_union_map_intersect_domain(isl_union_map_copy(ScheduleMap), isl_union_set_from_set(Domain)); if (isl_union_map_is_empty(StmtBand)) { StmtSchedule = isl_map_from_domain(isl_set_empty(Stmt->getDomainSpace())); isl_union_map_free(StmtBand); } else { assert(isl_union_map_n_map(StmtBand) == 1); StmtSchedule = isl_map_from_union_map(StmtBand); } Stmt->setScattering(StmtSchedule); } isl_union_map_free(ScheduleMap); LastSchedule = Schedule; unsigned MaxScatDims = 0; for (Scop::iterator SI = S.begin(), SE = S.end(); SI != SE; ++SI) MaxScatDims = std::max((*SI)->getNumScattering(), MaxScatDims); extendScattering(S, MaxScatDims); return false; }
bool JSONImporter::runOnScop(Scop &scop) { S = &scop; Region &R = S->getRegion(); Dependences *D = &getAnalysis<Dependences>(); std::string FileName = ImportDir + "/" + getFileName(S); std::string FunctionName = R.getEntry()->getParent()->getName(); errs() << "Reading JScop '" << R.getNameStr() << "' in function '" << FunctionName << "' from '" << FileName << "'.\n"; OwningPtr<MemoryBuffer> result; error_code ec = MemoryBuffer::getFile(FileName, result); if (ec) { errs() << "File could not be read: " << ec.message() << "\n"; return false; } Json::Reader reader; Json::Value jscop; bool parsingSuccessful = reader.parse(result->getBufferStart(), jscop); if (!parsingSuccessful) { errs() << "JSCoP file could not be parsed\n"; return false; } isl_set *OldContext = S->getContext(); isl_set *NewContext = isl_set_read_from_str(S->getIslCtx(), jscop["context"].asCString()); for (unsigned i = 0; i < isl_set_dim(OldContext, isl_dim_param); i++) { isl_id *id = isl_set_get_dim_id(OldContext, isl_dim_param, i); NewContext = isl_set_set_dim_id(NewContext, isl_dim_param, i, id); } isl_set_free(OldContext); S->setContext(NewContext); StatementToIslMapTy &NewScattering = *(new StatementToIslMapTy()); int index = 0; for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { Json::Value schedule = jscop["statements"][index]["schedule"]; isl_map *m = isl_map_read_from_str(S->getIslCtx(), schedule.asCString()); isl_space *Space = (*SI)->getDomainSpace(); // Copy the old tuple id. This is necessary to retain the user pointer, // that stores the reference to the ScopStmt this scattering belongs to. m = isl_map_set_tuple_id(m, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set)); isl_space_free(Space); NewScattering[*SI] = m; index++; } if (!D->isValidScattering(&NewScattering)) { errs() << "JScop file contains a scattering that changes the " << "dependences. Use -disable-polly-legality to continue anyways\n"; return false; } for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; if (NewScattering.find(Stmt) != NewScattering.end()) Stmt->setScattering(NewScattering[Stmt]); } int statementIdx = 0; for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { ScopStmt *Stmt = *SI; int memoryAccessIdx = 0; for (ScopStmt::memacc_iterator MI = Stmt->memacc_begin(), ME = Stmt->memacc_end(); MI != ME; ++MI) { Json::Value accesses = jscop["statements"][statementIdx]["accesses"][ memoryAccessIdx]["relation"]; isl_map *newAccessMap = isl_map_read_from_str(S->getIslCtx(), accesses.asCString()); isl_map *currentAccessMap = (*MI)->getAccessRelation(); if (isl_map_dim(newAccessMap, isl_dim_param) != isl_map_dim(currentAccessMap, isl_dim_param)) { errs() << "JScop file changes the number of parameter dimensions\n"; isl_map_free(currentAccessMap); isl_map_free(newAccessMap); return false; } // We need to copy the isl_ids for the parameter dimensions to the new // map. Without doing this the current map would have different // ids then the new one, even though both are named identically. for (unsigned i = 0; i < isl_map_dim(currentAccessMap, isl_dim_param); i++) { isl_id *id = isl_map_get_dim_id(currentAccessMap, isl_dim_param, i); newAccessMap = isl_map_set_dim_id(newAccessMap, isl_dim_param, i, id); } // Copy the old tuple id. This is necessary to retain the user pointer, // that stores the reference to the ScopStmt this access belongs to. isl_id *Id = isl_map_get_tuple_id(currentAccessMap, isl_dim_in); newAccessMap = isl_map_set_tuple_id(newAccessMap, isl_dim_in, Id); if (!isl_map_has_equal_space(currentAccessMap, newAccessMap)) { errs() << "JScop file contains access function with incompatible " << "dimensions\n"; isl_map_free(currentAccessMap); isl_map_free(newAccessMap); return false; } if (isl_map_dim(newAccessMap, isl_dim_out) != 1) { errs() << "New access map in JScop file should be single dimensional\n"; isl_map_free(currentAccessMap); isl_map_free(newAccessMap); return false; } if (!isl_map_is_equal(newAccessMap, currentAccessMap)) { // Statistics. ++NewAccessMapFound; newAccessStrings.push_back(accesses.asCString()); (*MI)->setNewAccessRelation(newAccessMap); } else { isl_map_free(newAccessMap); } isl_map_free(currentAccessMap); memoryAccessIdx++; } statementIdx++; } return false; }
void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, ValueMapT &GlobalMap) { const Region &R = Stmt.getParent()->getRegion(); assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && "Region statements need to use the generateScalarStores() " "function in the RegionGenerator"); // Set to remember a store to the phiops alloca of a PHINode. It is needed as // we might have multiple write accesses to the same PHI and while one is the // self write of the PHI (to the ScalarMap alloca) the other is the write to // the operand alloca (PHIOpMap). SmallPtrSet<PHINode *, 4> SeenPHIs; // Iterate over all accesses in the given statement. for (MemoryAccess *MA : Stmt) { // Skip non-scalar and read accesses. if (!MA->isScalar() || MA->isRead()) continue; Instruction *ScalarBase = cast<Instruction>(MA->getBaseAddr()); Instruction *ScalarInst = MA->getAccessInstruction(); PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase); // Get the alloca node for the base instruction and the value we want to // store. In total there are 4 options: // (1) The base is no PHI, hence it is a simple scalar def-use chain. // (2) The base is a PHI, // (a) and the write is caused by an operand in the block. // (b) and it is the PHI self write (same as case (1)). // (c) (2a) and (2b) are not distinguishable. // For case (1) and (2b) we get the alloca from the scalar map and the value // we want to store is initialized with the instruction attached to the // memory access. For case (2a) we get the alloca from the PHI operand map // and the value we want to store is initialized with the incoming value for // this block. The tricky case (2c) is when both (2a) and (2b) match. This // happens if the PHI operand is in the same block as the PHI. To handle // that we choose the alloca of (2a) first and (2b) for the next write // access to that PHI (there must be 2). Value *ScalarValue = nullptr; AllocaInst *ScalarAddr = nullptr; if (!ScalarBasePHI) { // Case (1) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else { int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); if (ScalarBasePHI != ScalarInst) { // Case (2a) assert(PHIIdx >= 0 && "Bad scalar write to PHI operand"); SeenPHIs.insert(ScalarBasePHI); ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx); } else if (PHIIdx < 0) { // Case (2b) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else { // Case (2c) if (SeenPHIs.insert(ScalarBasePHI).second) { // First access ==> same as (2a) ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx); } else { // Second access ==> same as (2b) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } } } ScalarValue = getNewScalarValue(ScalarValue, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(ScalarValue, ScalarAddr); } }
void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S) { // First check for possible scalar dependences for this instruction. generateScalarLoads(Stmt, Inst, BBMap); // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) return; Loop *L = getLoopForInst(Inst); if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) { Value *NewValue = getNewValue(Stmt, Inst, BBMap, GlobalMap, LTS, L); BBMap[Inst] = NewValue; return; } if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) { Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS); // Compute NewLoad before its insertion in BBMap to make the insertion // deterministic. BBMap[Load] = NewLoad; return; } if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) { Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS); // Compute NewStore before its insertion in BBMap to make the insertion // deterministic. BBMap[Store] = NewStore; return; } if (const PHINode *PHI = dyn_cast<PHINode>(Inst)) { copyPHIInstruction(Stmt, PHI, BBMap, GlobalMap, LTS); return; } // Skip some special intrinsics for which we do not adjust the semantics to // the new schedule. All others are handled like every other instruction. if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) { switch (IT->getIntrinsicID()) { // Lifetime markers are ignored. case llvm::Intrinsic::lifetime_start: case llvm::Intrinsic::lifetime_end: // Invariant markers are ignored. case llvm::Intrinsic::invariant_start: case llvm::Intrinsic::invariant_end: // Some misc annotations are ignored. case llvm::Intrinsic::var_annotation: case llvm::Intrinsic::ptr_annotation: case llvm::Intrinsic::annotation: case llvm::Intrinsic::donothing: case llvm::Intrinsic::assume: case llvm::Intrinsic::expect: return; default: // Other intrinsics are copied. break; } } copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); }
void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, ValueMapT &GlobalMap) { const Region &R = Stmt.getParent()->getRegion(); Region *StmtR = Stmt.getRegion(); assert(StmtR && "Block statements need to use the generateScalarStores() " "function in the BlockGenerator"); BasicBlock *ExitBB = StmtR->getExit(); // For region statements three kinds of scalar stores exists: // (1) A definition used by a non-phi instruction outside the region. // (2) A phi-instruction in the region entry. // (3) A write to a phi instruction in the region exit. // The last case is the tricky one since we do not know anymore which // predecessor of the exit needs to store the operand value that doesn't // have a definition in the region. Therefore, we have to check in each // block in the region if we should store the value or not. // Iterate over all accesses in the given statement. for (MemoryAccess *MA : Stmt) { // Skip non-scalar and read accesses. if (!MA->isScalar() || MA->isRead()) continue; Instruction *ScalarBase = cast<Instruction>(MA->getBaseAddr()); Instruction *ScalarInst = MA->getAccessInstruction(); PHINode *ScalarBasePHI = dyn_cast<PHINode>(ScalarBase); Value *ScalarValue = nullptr; AllocaInst *ScalarAddr = nullptr; if (!ScalarBasePHI) { // Case (1) ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else if (ScalarBasePHI->getParent() != ExitBB) { // Case (2) assert(ScalarBasePHI->getParent() == StmtR->getEntry() && "Bad PHI self write in non-affine region"); assert(ScalarBase == ScalarInst && "Bad PHI self write in non-affine region"); ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); ScalarValue = ScalarInst; } else { int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); // Skip accesses we will not handle in this basic block but in another one // in the statement region. if (PHIIdx < 0) continue; // Case (3) ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); ScalarValue = ScalarBasePHI->getIncomingValue(PHIIdx); } ScalarValue = getNewScalarValue(ScalarValue, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(ScalarValue, ScalarAddr); } }
void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S) { assert(Stmt.isRegionStmt() && "Only region statements can be copied by the block generator"); // Forget all old mappings. BlockMap.clear(); RegionMaps.clear(); IncompletePHINodeMap.clear(); // The region represented by the statement. Region *R = Stmt.getRegion(); // Create a dedicated entry for the region where we can reload all demoted // inputs. BasicBlock *EntryBB = R->getEntry(); BasicBlock *EntryBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; // Iterate over all blocks in the region in a breadth-first search. std::deque<BasicBlock *> Blocks; SmallPtrSet<BasicBlock *, 8> SeenBlocks; Blocks.push_back(EntryBB); SeenBlocks.insert(EntryBB); while (!Blocks.empty()) { BasicBlock *BB = Blocks.front(); Blocks.pop_front(); // First split the block and update dominance information. BasicBlock *BBCopy = splitBB(BB); BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy); // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Get the mapping for this block and initialize it with the mapping // available at its immediate dominator (in the new region). ValueMapT &RegionMap = RegionMaps[BBCopy]; RegionMap = RegionMaps[BBCopyIDom]; // Copy the block with the BlockGenerator. copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Add values to incomplete PHI nodes waiting for this block to be copied. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, GlobalMap, LTS); IncompletePHINodeMap[BB].clear(); // And continue with new successors inside the region. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) if (R->contains(*SI) && SeenBlocks.insert(*SI).second) Blocks.push_back(*SI); } // Now create a new dedicated region exit block and add it to the region map. BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit"); BlockMap[R->getExit()] = ExitBBCopy; repairDominance(R->getExit(), ExitBBCopy); // As the block generator doesn't handle control flow we need to add the // region control flow by hand after all blocks have been copied. for (BasicBlock *BB : SeenBlocks) { BranchInst *BI = cast<BranchInst>(BB->getTerminator()); BasicBlock *BBCopy = BlockMap[BB]; Instruction *BICopy = BBCopy->getTerminator(); ValueMapT &RegionMap = RegionMaps[BBCopy]; RegionMap.insert(BlockMap.begin(), BlockMap.end()); Builder.SetInsertPoint(BBCopy); copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); BICopy->eraseFromParent(); } // Add counting PHI nodes to all loops in the region that can be used as // replacement for SCEVs refering to the old loop. for (BasicBlock *BB : SeenBlocks) { Loop *L = LI.getLoopFor(BB); if (L == nullptr || L->getHeader() != BB) continue; BasicBlock *BBCopy = BlockMap[BB]; Value *NullVal = Builder.getInt32(0); PHINode *LoopPHI = PHINode::Create(Builder.getInt32Ty(), 2, "polly.subregion.iv"); Instruction *LoopPHIInc = BinaryOperator::CreateAdd( LoopPHI, Builder.getInt32(1), "polly.subregion.iv.inc"); LoopPHI->insertBefore(BBCopy->begin()); LoopPHIInc->insertBefore(BBCopy->getTerminator()); for (auto *PredBB : make_range(pred_begin(BB), pred_end(BB))) { if (!R->contains(PredBB)) continue; if (L->contains(PredBB)) LoopPHI->addIncoming(LoopPHIInc, BlockMap[PredBB]); else LoopPHI->addIncoming(NullVal, BlockMap[PredBB]); } for (auto *PredBBCopy : make_range(pred_begin(BBCopy), pred_end(BBCopy))) if (LoopPHI->getBasicBlockIndex(PredBBCopy) < 0) LoopPHI->addIncoming(NullVal, PredBBCopy); LTS[L] = SE.getUnknown(LoopPHI); } // Add all mappings from the region to the global map so outside uses will use // the copied instructions. for (auto &BBMap : RegionMaps) GlobalMap.insert(BBMap.second.begin(), BBMap.second.end()); // Reset the old insert point for the build. Builder.SetInsertPoint(ExitBBCopy->begin()); }
void ClastStmtCodeGen::codegenForGPGPU(const clast_for *F) { BasicBlock::iterator LoopBody; SetVector<Value *> Values; SetVector<Value *> IVS; std::vector<int> NumIterations; PTXGenerator::ValueToValueMapTy VMap; assert(!GPUTriple.empty() && "Target triple should be set properly for GPGPU code generation."); PTXGenerator PTXGen(Builder, P, GPUTriple); // Get original IVS and ScopStmt unsigned TiledLoopDepth, NonPLoopDepth; const clast_stmt *InnerStmt = getScheduleInfo(F, NumIterations, TiledLoopDepth, NonPLoopDepth); const clast_stmt *TmpStmt; const clast_user_stmt *U; const clast_for *InnerFor; if (CLAST_STMT_IS_A(InnerStmt, stmt_for)) { InnerFor = (const clast_for *)InnerStmt; TmpStmt = InnerFor->body; } else TmpStmt = InnerStmt; U = (const clast_user_stmt *)TmpStmt; ScopStmt *Statement = (ScopStmt *)U->statement->usr; for (unsigned i = 0; i < Statement->getNumIterators() - NonPLoopDepth; i++) { const Value *IV = Statement->getInductionVariableForDimension(i); IVS.insert(const_cast<Value *>(IV)); } unsigned OutBytes; Values = getGPUValues(OutBytes); PTXGen.setOutputBytes(OutBytes); PTXGen.startGeneration(Values, IVS, VMap, &LoopBody); BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); Builder.SetInsertPoint(LoopBody); BasicBlock *AfterBB = 0; if (NonPLoopDepth) { Value *LowerBound, *UpperBound, *IV, *Stride; Type *IntPtrTy = getIntPtrTy(); LowerBound = ExpGen.codegen(InnerFor->LB, IntPtrTy); UpperBound = ExpGen.codegen(InnerFor->UB, IntPtrTy); Stride = Builder.getInt(APInt_from_MPZ(InnerFor->stride)); IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB, CmpInst::ICMP_SLE); const Value *OldIV_ = Statement->getInductionVariableForDimension(2); Value *OldIV = const_cast<Value *>(OldIV_); VMap.insert(std::make_pair<Value *, Value *>(OldIV, IV)); } updateWithValueMap(VMap); BlockGenerator::generate(Builder, *Statement, ValueMap, P); if (AfterBB) Builder.SetInsertPoint(AfterBB->begin()); // FIXME: The replacement of the host base address with the parameter of ptx // subfunction should have been done by updateWithValueMap. We use the // following codes to avoid affecting other parts of Polly. This should be // fixed later. Function *FN = Builder.GetInsertBlock()->getParent(); for (unsigned j = 0; j < Values.size(); j++) { Value *baseAddr = Values[j]; for (Function::iterator B = FN->begin(); B != FN->end(); ++B) { for (BasicBlock::iterator I = B->begin(); I != B->end(); ++I) I->replaceUsesOfWith(baseAddr, ValueMap[baseAddr]); } } Builder.SetInsertPoint(AfterLoop); PTXGen.setLaunchingParameters(NumIterations[0], NumIterations[1], NumIterations[2], NumIterations[3]); PTXGen.finishGeneration(FN); }