Exemple #1
0
void Balancer::balance(Cluster &cluster, Decomposition& decomp,
        const int kConfig) {
  const int kCPUAndGPU = 0;
  const int kCPUOnly = 1;
  const int kGPUOnly = 2;
  int blocks_per_node = 0;
  // num cpu nodes
  unsigned int total_nodes = cluster.getNumNodes();
  size_t num_blocks = decomp.getNumSubDomains();

  // initialize block directory
  cluster.setNumBlocks(num_blocks);

  if (kConfig != kCPUOnly) {
    unsigned int num_gpus = 0;
    for (unsigned int node_index = 0;
            node_index < cluster.getNumNodes();
            ++node_index) {
      num_gpus += cluster.getNode(node_index).getNumChildren();
    }
    if (kConfig == kGPUOnly) // gpu only
      total_nodes = num_gpus;
    else if (kConfig == kCPUAndGPU) // cpu and gpu
      total_nodes += num_gpus;
  }

  blocks_per_node = ceil(decomp.getNumSubDomains() / (float) total_nodes);
  for (unsigned int node_index = 0;
          node_index < cluster.getNumNodes();
          ++node_index) {
    Node& node = cluster.getNode(node_index);
    if (kConfig == kCPUOnly || kConfig == kCPUAndGPU) {
      for (int subd = 0;
              subd < blocks_per_node && 0 < decomp.getNumSubDomains();
              ++subd) {
        SubDomain *block = decomp.popSubDomain();
        node.addSubDomain(block);
      }
    }
    if (kConfig == kGPUOnly || kConfig == kCPUAndGPU) {
      for (unsigned int gpu_index = 0;
              gpu_index < node.getNumChildren();
              ++gpu_index) {
        Node& gpu = node.getChild(gpu_index);
        for (int subd = 0;
                subd < blocks_per_node && 0 < decomp.getNumSubDomains();
                ++subd) {
          SubDomain *block = decomp.popSubDomain();
          gpu.addSubDomain(block);
        }
      }
    }
  }
  /* the work is balanced, so we can fill the block directory */
  cluster.storeBlockLocs();
}