Пример #1
0
void
drop_blocks(int grid[GRID_ROWS][GRID_COLS],
            free_blocks *blocks)
{
  while (move_blocks(grid, blocks, DOWN))
    ;
}
Пример #2
0
void move_blocks(BLOCK_LIST *blocks, int h_change, int v_change, int h_wrap, int v_wrap) {
  if (blocks != NULL) {
    ((BLOCK *)(blocks->data))->hpos += h_change;
    if (((BLOCK *)(blocks->data))->hpos >= h_wrap) {
      ((BLOCK *)(blocks->data))->hpos -= h_wrap;
    } else if (((BLOCK *)(blocks->data))->hpos < 0) {
      ((BLOCK *)(blocks->data))->hpos += h_wrap;
    }
    ((BLOCK *)(blocks->data))->vpos += v_change;
    if (((BLOCK *)(blocks->data))->vpos >= v_wrap) {
      ((BLOCK *)(blocks->data))->vpos -= v_wrap;
    } else if (((BLOCK *)(blocks->data))->vpos < 0) {
      ((BLOCK *)(blocks->data))->vpos += v_wrap;
    }
    move_blocks(blocks->next, h_change, v_change, h_wrap, v_wrap);
  }
}
Пример #3
0
// This file includes routines needed for load balancing.  Load balancing is
// based on RCB.  At each stage, a direction and factor is chosen (factor is
// based on the prime factorization of the number of processors) and the
// blocks in that group are sorted in that direction and divided into factor
// subgroups.  Then dots (corresponding to blocks) are moved into the proper
// subgroup and the process is repeated with the subgroups until each group
// represents a processor.  The dots are then moved back to the originating
// processor, at which point we know where the blocks need to be moved and
// then the blocks are moved.  Some of these routines are also used when
// blocks need to be coarsened - the coarsening routine determines which
// blocks need to be coarsened and those blocks are moved to the processor
// where their parent is.
void load_balance(void)
{
   int npx1, npy1, npz1, nfac, fac[25], fact;
   int i, j, m, n, dir, in;
   double t1, t2, t3, t4, t5, tp, tm, tu;
   block *bp;

   tp = tm = tu = 0.0;

   t3 = t4 = t5 = 0.0;
   t1 = timer();
   for (in = 0, num_dots = 0; in < sorted_index[num_refine+1]; in++) {
      n = sorted_list[in].n;
      if ((bp = &blocks[n])->number >= 0) {
         bp->new_proc = my_pe;
         if ((num_dots+1) > max_num_dots) {
            printf("%d ERROR: need more dots\n", my_pe);
            exit(-1);
         }
         dots[num_dots].cen[0] = bp->cen[0];
         dots[num_dots].cen[1] = bp->cen[1];
         dots[num_dots].cen[2] = bp->cen[2];
         dots[num_dots].number = bp->number;
         dots[num_dots].n = n;
         dots[num_dots].proc = my_pe;
         dots[num_dots++].new_proc = 0;
      }
   }
   max_active_dot = num_dots;
   for (n = num_dots; n < max_num_dots; n++)
      dots[n].number = -1;

   npx1 = npx;
   npy1 = npy;
   npz1 = npz;
   nfac = factor(num_pes, fac);
   for (i = nfac, j = 0; i > 0; i--, j++) {
      fact = fac[i-1];
      dir = find_dir(fact, npx1, npy1, npz1);
      if (dir == 0)
         npx1 /= fact;
      else if (dir == 1)
         npy1 /= fact;
      else
         npz1 /= fact;
      sort(j, fact, dir);
      move_dots(j, fact);
   }
   // first have to move information from dots back to original core,
   // then will update processor block is moving to, and then its neighbors
   for (n = 0; n < num_pes; n++)
      to[n] = 0;
   for (m = i = 0; i < max_active_dot; i++)
      if (dots[i].number >= 0 && dots[i].proc != my_pe) {
         to[dots[i].proc]++;
         m++;
      }

   num_moved_lb += m;
   MPI_Allreduce(&m, &n, 1, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD);
   t4 = timer();
   t2 = t4 - t1;
   if (n) {  // Only move dots and blocks if there is something to move
      MPI_Alltoall(to, 1, MPI_INTEGER, from, 1, MPI_INTEGER, MPI_COMM_WORLD);

      move_dots_back();
      t5 = timer();
      t3 = t5 - t4;
      t4 = t5;

      move_blocks(&tp, &tm, &tu);
   }
   t5 = timer() - t4;
   timer_lb_misc += timer() - t1 - t2 - t3 - tp - tm - tu;
   timer_lb_sort += t2;
   timer_lb_pa += tp;
   timer_lb_mv += tm;
   timer_lb_un += tu;
   timer_lb_mb += t3;
   timer_lb_ma += t5;
}