Beispiel #1
0
static void sortHostArray(HostId *blockHosts,
                          Int32 numBlocks,
                          Int32 replication, 
                          const NAString &randomizer)
{
  // the hdfsGetHosts() call randomizes the hosts for 1st, 2nd and 3rd replica etc.
  // for each call, probably to get more even access patterns. This makes it hard
  // to debug the placement algorithm, since almost no 2 query plans are alike.
  // Replace the random method of hdfsGetHosts with a pseudo-random one,
  // based on the file name. With no randomization we would put a bigger load
  // on hosts with a lower id.

  // we have replication * numBlocks entries in blockHosts, with entry
  // (r * numBlocks + b) being the rth replica of block #b.

  if (replication > 1 && replication <= 10)
    {
      UInt32 rshift = (UInt32) randomizer.hash();

      for (Int32 b=0; b<numBlocks; b++)
        {
          // a sorted array of HostIds for a given block
          HostId s[10];

          // insert the first v
          s[0]=blockHosts[b];
          for (Int32 r=1; r<replication; r++)
            {
              HostId newVal = blockHosts[r*numBlocks + b];

              // replication is a small number, bubblesort of s will do...
              for (Int32 x=0; x<r; x++)
                if (newVal < s[x])
                  {
                    // shift the larger values up by 1
                    for (Int32 y=r; y>x; y--)
                      s[y] = s[y-1];
                    // then insert the new value
                    s[x] = newVal;
                    break;
                  }
                else if (x == r-1)
                  // new value is the largest, insert at end
                  s[r] = newVal;
            } // for each replica host of a block

          // now move sorted values in s back to blockHosts,
          // but shift them by rshift mod replication
          for (Int32 m=0; m<replication; m++)
            blockHosts[m*numBlocks + b] = s[((UInt32) m + rshift + (UInt32) b) % replication];

        } // for each block b
    } // replication between 2 and 10
} // sortHostArray