Esempio n. 1
0
void Balancer::perfBalanceGPU(Cluster &cluster, Decomposition& decomp,
        const double kTimeEstimate) {
  const int kGPUOnly = 2;
  WorkQueue work_queue;
  WorkRequest work_request;
  const int kNumTotalGPUs = cluster.getNumTotalGPUs();
  if (decomp.getNumSubDomains() == 0 || kNumTotalGPUs == 0)
    return;
  for (int gpu_index = 0; gpu_index < kNumTotalGPUs; ++gpu_index) {
    Node& gpu = cluster.getGlobalGPU(gpu_index);
    // fastest gpu will have largest weight, and thus move to front of queue
    work_request.setTimeDiff(kTimeEstimate - gpu.getBalTimeEst(1, kGPUOnly));
    work_request.setIndex(gpu_index);
    work_queue.push(work_request);
  }

  const int kNumBlocks = decomp.getNumSubDomains();
  // place data blocks on gpu's one-at-a-time
  for (int block_id = 0; block_id < kNumBlocks; ++block_id) {
    work_request = work_queue.top();
    work_queue.pop();

    Node& gpu = cluster.getGlobalGPU(work_request.getIndex());
    gpu.incrementBalCount();

    double time_diff = gpu.getBalTimeEst(1, kGPUOnly);

    work_request.setTimeDiff(time_diff);
    work_queue.push(work_request);
    //printWorkQueue(work_queue);
  }

  cluster.distributeBlocks(&decomp);
}
Esempio n. 2
0
void Balancer::perfBalance(Cluster &cluster, Decomposition& decomp,
        const int kConfig) {
  WorkQueue work_queue;
  WorkRequest work_request;
  double total_weight(0.0);
  double min_edge_weight(0.0);
  double procTime(0.0);
  double commTime(0.0);
  double timeEst = procTime;
  bool changed(false);
  const int kGPUOnly(2);
  const int kStrongest(3);
  Node &root = cluster.getNode(0);
  size_t num_blocks = decomp.getNumSubDomains();

  // initialize block directory
  cluster.setNumBlocks(num_blocks);

  //get total iterations per second for cluster
  for (unsigned int node = 0; node < cluster.getNumNodes(); ++node) {
    total_weight += cluster.getNode(node).getTotalWeight(kConfig);
    min_edge_weight += cluster.getNode(node).getMinEdgeWeight(kConfig);
  }

  // quick estimation of runtime
  procTime = num_blocks / total_weight;
  commTime = num_blocks / min_edge_weight;
  timeEst = procTime;
  if (0.0 < min_edge_weight)
    timeEst += commTime;

  if (kGPUOnly == kConfig) {
    perfBalanceGPU(cluster, decomp, timeEst);
  } else if (kStrongest == kConfig) {
    perfBalanceStrongestDevice(cluster, decomp);
  } else {
    // perform initial task distribution
    for (size_t i = 0; i < num_blocks; ++i)
      root.incrementBalCount();

    /*fprintf(stderr,
            "perfBalance: \n\ttime est: %f sec\n\tprocTime: %f sec\n\tcommTime: %f \
            sec\n\ttotal weight:%e \n\tmin edge weight:%e.\n",
            timeEst, procTime, commTime, total_weight, min_edge_weight);  // */

    do {
      changed = false;
      // balance the work between nodes and root
      for (unsigned int cpu_index = 1;
              cpu_index < cluster.getNumNodes();
              ++cpu_index) {
        Node& cpu_node = cluster.getNode(cpu_index);
        int work_deficit = cpu_node.getTotalWorkNeeded(timeEst, kConfig) -
                cpu_node.getBalCount();
        if (0 > work_deficit) { // node has extra work
          int extra_blocks = abs(work_deficit);
          for (int block_index = 0;
                  (block_index < extra_blocks) &&
                  (0 < cpu_node.getBalCount());
                  ++block_index) {
            // move block from child to parent
            cpu_node.decrementBalCount();
            root.incrementBalCount();
            changed = true;
          }
        } else if (0 < work_deficit) { //child needs more work
          work_request.setTimeDiff(timeEst - cpu_node.getBalTimeEst(0, kConfig));
          work_request.setIndex(cpu_index);
          work_queue.push(work_request);
        }
      }

      for (unsigned int cpu_index = 0;
              cpu_index < root.getNumChildren();
              ++cpu_index) {
        Node& cpu_node = root.getChild(cpu_index);
        int work_deficit = cpu_node.getTotalWorkNeeded(timeEst, kConfig) -
                cpu_node.getBalCount();
        if (0 > work_deficit) { // child has extra work
          int extra_blocks = abs(work_deficit);
          for (int block_index = 0;
                  (block_index < extra_blocks) && (0 < cpu_node.getBalCount());
                  ++block_index) {
            // move block from child to parent
            cpu_node.decrementBalCount();
            root.incrementBalCount();
            changed = true;
          }
        } else if (0 < work_deficit) { // child needs more work
          work_request.setTimeDiff(timeEst - cpu_node.getBalTimeEst(0, kConfig));
          work_request.setIndex(-1 * cpu_index); // hack so I know to give to one of root's children
          work_queue.push(work_request);
        }
      }
      /*
         at this point we have all extra blocks, and
         now we need to distribute blocks to children
         that need it
       */

      while (0 < root.getBalCount() && // there are blocks left to give
              !work_queue.empty()) { // there are requests left to fill

        // get largest request
        WorkRequest tmp = work_queue.top();
        work_queue.pop();

        double newTimeDiff = 0.0;
        int id = tmp.getIndex();
        if (id <= 0) { // local child
          id = -1 * id;
          root.decrementBalCount();
          root.getChild(id).incrementBalCount();
          newTimeDiff = timeEst - root.getChild(id).getBalTimeEst(0, kConfig);
          changed = true;
        } else { // request was from another node in cluster
          root.decrementBalCount();
          cluster.getNode(id).incrementBalCount();
          newTimeDiff = timeEst - cluster.getNode(id).getBalTimeEst(0, kConfig);
          changed = true;
        }
        // if there is still work left to do put it back on
        // the queue so that it will reorder correctly
        if (0 < newTimeDiff) {
          tmp.setTimeDiff(newTimeDiff);
          work_queue.push(tmp);
        }
      }

      // balance the work within each node
      for (unsigned int node = 0; node < cluster.getNumNodes(); ++node) {
        changed |= balanceNode(cluster.getNode(node), timeEst, kConfig);
      }
    } while (changed);
  }

  /* now that we know where everything should go, distribute the blocks */
  cluster.distributeBlocks(&decomp);

  /* the work is balanced, so we can fill the block directory */
  cluster.storeBlockLocs();
}