LoopTreeNode* ParameterizeBlocking:: ApplyBlocking( const CompSliceDepGraphNode::FullNestInfo& nestInfo, LoopTreeDepComp& comp, DependenceHoisting &op, LoopTreeNode *&top) { const CompSliceNest* pslices = nestInfo.GetNest(); assert(pslices != 0); const CompSliceNest& slices = *pslices; AstInterface& fa = LoopTransformInterface::getAstInterface(); int size = slices.NumberOfEntries(); assert (size > 0); AutoTuningInterface* tuning = LoopTransformInterface::getAutoTuningInterface(); assert(tuning != 0); for (int j=size-1;j >= 0; --j) //QY: arrange the desired loop nesting order top = op.Transform( comp, slices[j], top); /*QY: check for non-perfectness which can be solved via loop distribution */ const CompSliceDepGraphNode::NestInfo* inner = DoNonPerfectBlocking(nestInfo); if (inner == 0 && size == 1) { /*QY: all loops are perfectly nested*/ return top;} const CompSlice* slice_innermost = slices[size-1], *slice_top=slices[0]; CompSlice::ConstLoopIterator p_inner = slice_innermost->GetConstLoopIterator(); LoopTreeNode* loop_innermost=*p_inner; if (size > 1) while (slice_top->QuerySliceLoop(loop_innermost)) { ++p_inner; assert(!p_inner.ReachEnd()); loop_innermost = *p_inner; } /*QY: this is for triangular non-perfect nests where a single loop is shared by multiple slices; loops cannot be distributed in spite of non-perfectness */ std::vector<FuseLoopInfo> non_perfects; if (size > 1) { for (unsigned i = 1; i < size; ++i) { const CompSlice* slice_inner=slices[i]; const CompSlice* slice_pivot = slices[i-1]; CompSlice::ConstLoopIterator p_pivot=slice_pivot->GetConstLoopIterator(); if (slice_inner->SliceCommonLoop(slice_pivot)) { /*QY: outer loops are not perfectly nested*/ FuseLoopInfo loops_cur(p_pivot.Current()); for (LoopTreeTraverseSelectLoop p_inner(top); /*for (CompSlice::ConstLoopIterator p_inner=slice_inner->GetConstLoopIterator(); QY: the ordering of loops are not enforced in CompSlice*/ !p_inner.ReachEnd(); ++p_inner) { LoopTreeNode* cur = p_inner.Current(); if (!slice_inner->QuerySliceLoop(cur) || slice_pivot->QuerySliceLoop(cur)) continue; CompSlice::SliceLoopInfo curinfo = slice_inner->QuerySliceLoopInfo(cur); loops_cur.loops.push_back(FuseLoopInfo::Entry(cur,curinfo.minalign-p_pivot.CurrentInfo().minalign)); } assert(loops_cur.loops.size() > 0); non_perfects.push_back(loops_cur); } } } #ifdef DEBUG std::cerr << "Number of non-perfect entries: " << non_perfects.size() << "\n"; #endif if (inner == 0) { /*QY: no inner loops that can be blocked together*/ if (non_perfects.size() > 0) tuning->BlockLoops(top, loop_innermost, this, &non_perfects); else tuning->BlockLoops(top, loop_innermost, this); } else { LoopTreeNode* innerTop = LoopTreeTransform().InsertHandle(loop_innermost,1); /*QY: need to call GenXformRoot for each innerNest before inner*/ for (const CompSliceDepGraphNode::NestInfo* p = nestInfo.InnerNest(); p != 0 ; p = p->InnerNest()) { LoopTreeNode* curTop = p->GenXformRoot(innerTop); assert(curTop != 0); if (p == inner) { innerTop = curTop; break; } } const CompSliceNest* innerNest=inner->GetNest(); assert(innerNest!=0); for (int j = innerNest->NumberOfEntries()-1; j >= 0; --j) { innerTop = op.Transform( comp, innerNest->Entry(j),innerTop); } /*QY: inner loops that are not involved in outer slice fusion*/ LoopTreeNode* inner2=0; for (int i = 0; i < innerNest->NumberOfEntries(); ++i) { CompSlice::ConstLoopIterator p_inner2 = innerNest->Entry(i)->GetConstLoopIterator(); inner2 = p_inner2.Current(); FuseLoopInfo cur; for ( ; !p_inner2.ReachEnd(); ++p_inner2) { cur.loops.push_back(FuseLoopInfo::Entry(p_inner2.Current(),p_inner2.CurrentInfo().minalign)); } non_perfects.push_back(cur); } tuning->BlockLoops(top, inner2, this, &non_perfects); } return top; }
LoopTreeNode* ParameterizeBlocking:: ApplyBlocking( const CompSliceDepGraphNode::FullNestInfo& nestInfo, LoopTreeDepComp& comp, DependenceHoisting &op, LoopTreeNode *&top) { const CompSliceNest* pslices = nestInfo.GetNest(); assert(pslices != 0); const CompSliceNest& slices = *pslices; AstInterface& fa = LoopTransformInterface::getAstInterface(); int size = slices.NumberOfEntries(); assert (size > 0); for (int j=size-1;j >= 0; --j) /*QY: arrange the desired loop nesting order*/ top = op.Transform( comp, slices[j], top); AutoTuningInterface* tuning = LoopTransformInterface::getAutoTuningInterface(); assert(tuning != 0); const CompSlice* slice_innermost = slices[size-1]; if (size > 1) { const CompSlice* slice_pivot = slices[size-2]; CompSlice::ConstLoopIterator p_pivot=slice_pivot->GetConstLoopIterator(); if (slice_innermost->SliceCommonLoop(slice_pivot)) { /*QY: outer loops are not perfectly nested*/ FuseLoopInfo loops_innermost(p_pivot.Current()); for (CompSlice::ConstLoopIterator p_inner = slice_innermost->GetConstLoopIterator(); !p_inner.ReachEnd(); ++p_inner) { LoopTreeNode* cur = p_inner.Current(); if (slice_pivot->QuerySliceLoop(cur)) continue; loops_innermost.loops.push_back(FuseLoopInfo::Entry(cur,p_inner.CurrentInfo().minalign-p_pivot.CurrentInfo().minalign)); } assert(loops_innermost.loops.size() > 0); /*QY: right now do not block the deeper inner loops */ tuning->BlockLoops(top, loops_innermost.loops[0].first, this, &loops_innermost); return top; } } CompSlice::ConstLoopIterator p_inner = slice_innermost->GetConstLoopIterator(); LoopTreeNode* loop_innermost=*p_inner; /*QY: nonperfect!=0 only if there is a single inner loop nest inside*/ const CompSliceDepGraphNode::NestInfo* nonperfect = DoNonPerfectBlocking(nestInfo); if (nonperfect == 0) { /*QY: all loops are perfectly nested*/ if (size > 1) tuning->BlockLoops(top, loop_innermost, this); } else { LoopTreeNode* innerTop = LoopTreeTransform().InsertHandle(loop_innermost,1); /*QY: need to call GenXformRoot for each innerNest before nonperfect*/ for (const CompSliceDepGraphNode::NestInfo* p = nestInfo.InnerNest(); p != 0 ; p = p->InnerNest()) { LoopTreeNode* curTop = p->GenXformRoot(innerTop); assert(curTop != 0); if (p == nonperfect) { innerTop = curTop; break; } } const CompSliceNest* innerNest=nonperfect->GetNest(); assert(innerNest!=0); for (int j = innerNest->NumberOfEntries()-1; j >= 0; --j) { innerTop = op.Transform( comp, innerNest->Entry(j),innerTop); } /*QY: inner loops that are not involved in outer slice fusion*/ FuseLoopInfo innerloops; CompSlice::ConstLoopIterator p_inner2 = innerNest->Entry(innerNest->NumberOfEntries()-1)->GetConstLoopIterator(); LoopTreeNode *inner2 = p_inner2.Current(); for ( ; !p_inner2.ReachEnd(); ++p_inner2) { innerloops.loops.push_back(FuseLoopInfo::Entry(p_inner2.Current(),p_inner2.CurrentInfo().minalign)); } tuning->BlockLoops(top, inner2, this, &innerloops); } return top; }