const CompSlice* ParameterizeBlocking::
SetBlocking(CompSliceLocalityRegistry *anal, 
                           const CompSliceDepGraphNode::FullNestInfo& nestInfo)
{
  const CompSlice* res = AllLoopReuseBlocking::SetBlocking(anal,nestInfo);
  const CompSliceNest* curNest = nestInfo.GetNest();
#ifdef DEBUG
std::cerr << "Set blocking curNest: " << curNest->toString() << "\n";
#endif
  unsigned size = curNest->NumberOfEntries();
#ifdef DEBUG
std::cerr << "size=" << size  << "\n";
#endif
  if (size <= 1 || !curNest->Entry(size-1)->SliceCommonLoop(curNest->Entry(0)))
  { /*QY: current loop nest is perfectly nested*/
     const CompSliceDepGraphNode::NestInfo* nonperfect = DoNonPerfectBlocking(nestInfo);
     if (nonperfect) 
     { /*QY: extra loops inside; (not considered for blocking normally). */
        const CompSliceNest* innerNest = nonperfect->GetNest();
        assert(innerNest!=0); /*QY: this is why nonperfect is returned */
        int reuseLevel = SliceNestReuseLevel(anal, *innerNest);
        int j = 0, size = innerNest->NumberOfEntries();
        for (; j < reuseLevel; ++j) blocksize.push_back(1);
        for (; j < size; ++j) blocksize.push_back(GetDefaultBlockSize(innerNest->Entry(j)));
        res = innerNest->Entry(size-1);
#ifdef DEBUG
std::cerr << "set nonperfect blocking\n";
#endif 
     }
  }
  return res;
}
Exemple #2
0
LoopTreeNode* ParameterizeBlocking::
ApplyBlocking( const CompSliceDepGraphNode::FullNestInfo& nestInfo, 
              LoopTreeDepComp& comp, DependenceHoisting &op, 
                                      LoopTreeNode *&top)
{
  const CompSliceNest* pslices = nestInfo.GetNest();
  assert(pslices != 0);
  const CompSliceNest& slices = *pslices;
  AstInterface& fa = LoopTransformInterface::getAstInterface();
  int size = slices.NumberOfEntries();
  assert (size > 0);

  AutoTuningInterface* tuning = LoopTransformInterface::getAutoTuningInterface();
  assert(tuning != 0);

  for (int j=size-1;j >= 0; --j)  //QY: arrange the desired loop nesting order
     top = op.Transform( comp, slices[j], top);

  /*QY: check for non-perfectness which can be solved via loop distribution */
  const CompSliceDepGraphNode::NestInfo* inner = DoNonPerfectBlocking(nestInfo);
  if (inner == 0 && size == 1) { /*QY: all loops are perfectly nested*/ return top;}

  const CompSlice* slice_innermost = slices[size-1], *slice_top=slices[0];
  CompSlice::ConstLoopIterator p_inner 
                     = slice_innermost->GetConstLoopIterator();
  LoopTreeNode* loop_innermost=*p_inner;
  if (size > 1)
    while (slice_top->QuerySliceLoop(loop_innermost))
     { ++p_inner; assert(!p_inner.ReachEnd()); loop_innermost = *p_inner; }

  /*QY: this is for triangular non-perfect nests where a single loop is shared by multiple slices; loops cannot be distributed in spite of non-perfectness */
  std::vector<FuseLoopInfo> non_perfects;
  if (size > 1) {
    for (unsigned i = 1; i < size; ++i) {
       const CompSlice* slice_inner=slices[i];
       const CompSlice* slice_pivot = slices[i-1];
       CompSlice::ConstLoopIterator p_pivot=slice_pivot->GetConstLoopIterator();
       if  (slice_inner->SliceCommonLoop(slice_pivot)) { 
         /*QY: outer loops are not perfectly nested*/
         FuseLoopInfo loops_cur(p_pivot.Current());
         for (LoopTreeTraverseSelectLoop p_inner(top);
         /*for (CompSlice::ConstLoopIterator p_inner=slice_inner->GetConstLoopIterator(); QY: the ordering of loops are not enforced in CompSlice*/
              !p_inner.ReachEnd(); ++p_inner) {
             LoopTreeNode* cur = p_inner.Current();
             if (!slice_inner->QuerySliceLoop(cur) || slice_pivot->QuerySliceLoop(cur)) continue;
             CompSlice::SliceLoopInfo curinfo = slice_inner->QuerySliceLoopInfo(cur);
             loops_cur.loops.push_back(FuseLoopInfo::Entry(cur,curinfo.minalign-p_pivot.CurrentInfo().minalign)); 
         }
         assert(loops_cur.loops.size() > 0);
         non_perfects.push_back(loops_cur);
       }
     }
  }
#ifdef DEBUG
  std::cerr << "Number of non-perfect entries: " << non_perfects.size() << "\n";
#endif


  if (inner == 0) { /*QY: no inner loops that can be blocked together*/
     if (non_perfects.size() > 0) 
         tuning->BlockLoops(top, loop_innermost, this, &non_perfects);
     else tuning->BlockLoops(top, loop_innermost, this);
  }
  else {
      LoopTreeNode* innerTop = LoopTreeTransform().InsertHandle(loop_innermost,1);
      /*QY: need to call GenXformRoot for each innerNest before inner*/ 
      for (const CompSliceDepGraphNode::NestInfo* p = nestInfo.InnerNest(); 
           p != 0 ;  p = p->InnerNest()) {
         LoopTreeNode* curTop = p->GenXformRoot(innerTop);
         assert(curTop != 0);
         if (p == inner) { innerTop = curTop; break; }
      }
      const CompSliceNest* innerNest=inner->GetNest();
      assert(innerNest!=0);
      for (int j = innerNest->NumberOfEntries()-1; j >= 0; --j) 
      {
         innerTop = op.Transform( comp, innerNest->Entry(j),innerTop);
      }
      /*QY: inner loops that are not involved in outer slice fusion*/
      LoopTreeNode* inner2=0;
      for (int i = 0; i < innerNest->NumberOfEntries(); ++i) {
         CompSlice::ConstLoopIterator p_inner2 = innerNest->Entry(i)->GetConstLoopIterator();
         inner2 = p_inner2.Current();
         FuseLoopInfo cur;
         for ( ; !p_inner2.ReachEnd(); ++p_inner2) 
          { 
             cur.loops.push_back(FuseLoopInfo::Entry(p_inner2.Current(),p_inner2.CurrentInfo().minalign));
          }
          non_perfects.push_back(cur); 
      }
      tuning->BlockLoops(top, inner2, this, &non_perfects);
  }
  return top;
}
LoopTreeNode* ParameterizeBlocking::
ApplyBlocking( const CompSliceDepGraphNode::FullNestInfo& nestInfo, 
              LoopTreeDepComp& comp, DependenceHoisting &op, 
                                      LoopTreeNode *&top)
{
  const CompSliceNest* pslices = nestInfo.GetNest();
  assert(pslices != 0);
  const CompSliceNest& slices = *pslices;
  AstInterface& fa = LoopTransformInterface::getAstInterface();
  int size = slices.NumberOfEntries();
  assert (size > 0);

  for (int j=size-1;j >= 0; --j)  /*QY: arrange the desired loop nesting order*/
     top = op.Transform( comp, slices[j], top);

  AutoTuningInterface* tuning = LoopTransformInterface::getAutoTuningInterface();
  assert(tuning != 0);

  const CompSlice* slice_innermost = slices[size-1];
  if (size > 1) {
     const CompSlice* slice_pivot = slices[size-2];
     CompSlice::ConstLoopIterator p_pivot=slice_pivot->GetConstLoopIterator();
     if  (slice_innermost->SliceCommonLoop(slice_pivot)) { 
         /*QY: outer loops are not perfectly nested*/
         FuseLoopInfo loops_innermost(p_pivot.Current());
         for (CompSlice::ConstLoopIterator p_inner 
                     = slice_innermost->GetConstLoopIterator();
              !p_inner.ReachEnd(); ++p_inner) {
             LoopTreeNode* cur = p_inner.Current();
             if (slice_pivot->QuerySliceLoop(cur)) continue;
             loops_innermost.loops.push_back(FuseLoopInfo::Entry(cur,p_inner.CurrentInfo().minalign-p_pivot.CurrentInfo().minalign)); 
         }
         assert(loops_innermost.loops.size() > 0);
         /*QY: right now do not block the deeper inner loops */
         tuning->BlockLoops(top, loops_innermost.loops[0].first, this, &loops_innermost);
         return top;
     }
  }

  CompSlice::ConstLoopIterator p_inner 
                     = slice_innermost->GetConstLoopIterator();
  LoopTreeNode* loop_innermost=*p_inner;

  /*QY: nonperfect!=0 only if there is a single inner loop nest inside*/
  const CompSliceDepGraphNode::NestInfo* nonperfect = DoNonPerfectBlocking(nestInfo);
  
  if (nonperfect == 0) { /*QY: all loops are perfectly nested*/
     if (size > 1)
         tuning->BlockLoops(top, loop_innermost, this);
  }
  else {
      LoopTreeNode* innerTop = LoopTreeTransform().InsertHandle(loop_innermost,1);
      /*QY: need to call GenXformRoot for each innerNest before nonperfect*/ 
      for (const CompSliceDepGraphNode::NestInfo* p = nestInfo.InnerNest(); 
           p != 0 ;  p = p->InnerNest()) {
         LoopTreeNode* curTop = p->GenXformRoot(innerTop);
         assert(curTop != 0);
         if (p == nonperfect) { innerTop = curTop; break; }
      }
      const CompSliceNest* innerNest=nonperfect->GetNest();
      assert(innerNest!=0);
      for (int j = innerNest->NumberOfEntries()-1; j >= 0; --j) 
      {
         innerTop = op.Transform( comp, innerNest->Entry(j),innerTop);
      }
      /*QY: inner loops that are not involved in outer slice fusion*/
      FuseLoopInfo innerloops;
      CompSlice::ConstLoopIterator p_inner2 = innerNest->Entry(innerNest->NumberOfEntries()-1)->GetConstLoopIterator();
      LoopTreeNode *inner2 = p_inner2.Current();
      for ( ; !p_inner2.ReachEnd(); ++p_inner2) 
       { 
          innerloops.loops.push_back(FuseLoopInfo::Entry(p_inner2.Current(),p_inner2.CurrentInfo().minalign));
       }
      tuning->BlockLoops(top, inner2, this, &innerloops);
  }
  return top;
}