void Balancer::balance(Cluster &cluster, Decomposition& decomp, const int kConfig) { const int kCPUAndGPU = 0; const int kCPUOnly = 1; const int kGPUOnly = 2; int blocks_per_node = 0; // num cpu nodes unsigned int total_nodes = cluster.getNumNodes(); size_t num_blocks = decomp.getNumSubDomains(); // initialize block directory cluster.setNumBlocks(num_blocks); if (kConfig != kCPUOnly) { unsigned int num_gpus = 0; for (unsigned int node_index = 0; node_index < cluster.getNumNodes(); ++node_index) { num_gpus += cluster.getNode(node_index).getNumChildren(); } if (kConfig == kGPUOnly) // gpu only total_nodes = num_gpus; else if (kConfig == kCPUAndGPU) // cpu and gpu total_nodes += num_gpus; } blocks_per_node = ceil(decomp.getNumSubDomains() / (float) total_nodes); for (unsigned int node_index = 0; node_index < cluster.getNumNodes(); ++node_index) { Node& node = cluster.getNode(node_index); if (kConfig == kCPUOnly || kConfig == kCPUAndGPU) { for (int subd = 0; subd < blocks_per_node && 0 < decomp.getNumSubDomains(); ++subd) { SubDomain *block = decomp.popSubDomain(); node.addSubDomain(block); } } if (kConfig == kGPUOnly || kConfig == kCPUAndGPU) { for (unsigned int gpu_index = 0; gpu_index < node.getNumChildren(); ++gpu_index) { Node& gpu = node.getChild(gpu_index); for (int subd = 0; subd < blocks_per_node && 0 < decomp.getNumSubDomains(); ++subd) { SubDomain *block = decomp.popSubDomain(); gpu.addSubDomain(block); } } } } /* the work is balanced, so we can fill the block directory */ cluster.storeBlockLocs(); }