void Master::schedule_tasks() { log_debug("Scheduling %d tasks on %d slots...", ready_queue.size(), free_slots.size()); int scheduled = 0; TaskList deferred_tasks; while (ready_queue.size() > 0 && free_slots.size() > 0) { Task *task = ready_queue.top(); ready_queue.pop(); log_trace("Scheduling task %s", task->name.c_str()); bool match = false; for (SlotList::iterator s = free_slots.begin(); s != free_slots.end(); s++) { Slot *slot = *s; Host *host = slot->host; // If the task fits, schedule it if (host->can_run(task)) { log_trace("Matched task %s to slot %d on host %s", task->name.c_str(), slot->rank, host->name()); // Reserve the resources vector<cpu_t> bindings = host->allocate_resources(task); host->log_resources(resource_log); submit_task(task, slot->rank, bindings); s = free_slots.erase(s); // so that the s++ in the loop doesn't skip one s--; match = true; scheduled += 1; // This is to break out of the slot loop so that we can // consider the next task break; } } if (!match) { // If the task could not be scheduled, then we save it // and move on to the next one. It will be requeued later. log_trace("No slot found for task %s", task->name.c_str()); deferred_tasks.push_back(task); } } log_debug("Scheduled %d tasks and deferred %d tasks", scheduled, deferred_tasks.size()); // Requeue all the deferred tasks for (TaskList::iterator t = deferred_tasks.begin(); t != deferred_tasks.end(); t++) { ready_queue.push(*t); } }
/* * Register all workers, create hosts, create slots. Assign a host-centric * rank to each of the workers. The worker with the lowest global rank on * each host is given host rank 0, the next lowest is given host rank 1, * and so on. The master is not given a host rank. */ void Master::register_workers() { typedef map<string, Host *> HostMap; HostMap hostmap; typedef map<int, string> HostnameMap; HostnameMap hostnames; // Collect host names from all workers, create host objects for (int i=0; i<numworkers; i++) { RegistrationMessage *msg = dynamic_cast<RegistrationMessage *>(comm->recv_message()); if (msg == NULL) { myfailure("Expected registration message"); } int rank = msg->source; string hostname = msg->hostname; unsigned int memory = msg->memory; unsigned int threads = msg->threads; unsigned int cores = msg->cores; unsigned int sockets = msg->sockets; delete msg; hostnames[rank] = hostname; if (hostmap.find(hostname) == hostmap.end()) { // If the host is not found, create a new one log_debug("Got new host: name=%s, mem=%u, threads/cpus=%u, cores=%u, sockets=%u", hostname.c_str(), memory, threads, cores, sockets); Host *newhost = new Host(hostname, memory, threads, cores, sockets); hosts.push_back(newhost); hostmap[hostname] = newhost; } else { // Otherwise, increment the number of slots available Host *host = hostmap[hostname]; host->add_slot(); } log_debug("Slot %d on host %s", rank, hostname.c_str()); } typedef map<string, int> RankMap; RankMap ranks; // Create slots, assign a host rank to each worker for (int rank=1; rank<=numworkers; rank++) { string hostname = hostnames.find(rank)->second; // Find host Host *host = hostmap.find(hostname)->second; // Create new slot Slot *slot = new Slot(rank, host); slots.push_back(slot); free_slots.push_back(slot); // Compute hostrank for this slot RankMap::iterator nextrank = ranks.find(hostname); int hostrank = 0; if (nextrank != ranks.end()) { hostrank = nextrank->second; } ranks[hostname] = hostrank + 1; HostrankMessage hrmsg(hostrank); comm->send_message(&hrmsg, rank); log_debug("Host rank of worker %d is %d", rank, hostrank); } // Log the initial resource freeability for (vector<Host *>::iterator i = hosts.begin(); i!=hosts.end(); i++) { Host *host = *i; host->log_resources(resource_log); } }