void Master::schedule_tasks() { log_debug("Scheduling %d tasks on %d slots...", ready_queue.size(), free_slots.size()); int scheduled = 0; TaskList deferred_tasks; while (ready_queue.size() > 0 && free_slots.size() > 0) { Task *task = ready_queue.top(); ready_queue.pop(); log_trace("Scheduling task %s", task->name.c_str()); bool match = false; for (SlotList::iterator s = free_slots.begin(); s != free_slots.end(); s++) { Slot *slot = *s; Host *host = slot->host; // If the task fits, schedule it if (host->can_run(task)) { log_trace("Matched task %s to slot %d on host %s", task->name.c_str(), slot->rank, host->name()); // Reserve the resources vector<cpu_t> bindings = host->allocate_resources(task); host->log_resources(resource_log); submit_task(task, slot->rank, bindings); s = free_slots.erase(s); // so that the s++ in the loop doesn't skip one s--; match = true; scheduled += 1; // This is to break out of the slot loop so that we can // consider the next task break; } } if (!match) { // If the task could not be scheduled, then we save it // and move on to the next one. It will be requeued later. log_trace("No slot found for task %s", task->name.c_str()); deferred_tasks.push_back(task); } } log_debug("Scheduled %d tasks and deferred %d tasks", scheduled, deferred_tasks.size()); // Requeue all the deferred tasks for (TaskList::iterator t = deferred_tasks.begin(); t != deferred_tasks.end(); t++) { ready_queue.push(*t); } }
int Master::run() { log_info("Master starting with %d workers", numworkers); start_time = current_time(); publish_event(WORKFLOW_START, NULL); // Install signal handlers struct sigaction signal_action; signal_action.sa_handler = on_signal; signal_action.sa_flags = SA_NODEFER; sigemptyset(&signal_action.sa_mask); if (sigaction(SIGALRM, &signal_action, NULL) < 0) { myfailures("Unable to set signal handler for SIGALRM"); } if (sigaction(SIGTERM, &signal_action, NULL) < 0) { myfailures("Unable to set signal handler for SIGTERM"); } // Set alarm to interrupt the master when the walltime is up if (this->max_wall_time > 0.0) { log_info("Setting max walltime to %lf minutes", this->max_wall_time); alarm((unsigned)ceil(max_wall_time * 60.0)); } register_workers(); // Check to make sure that there is at least one host capable // of executing every task for (DAG::iterator t = dag->begin(); t != dag->end(); t++){ Task *task = (*t).second; // Check all the hosts for one that can run the task bool match = false; for (unsigned h=0; h<hosts.size(); h++) { Host *host = hosts[h]; if (host->can_run(task)) { match = true; break; } } if (!match) { // There was no host found that was capable of executing the // task, so we must abort myfailure("FATAL ERROR: No host is capable of running task %s", task->name.c_str()); } } // If there is a host script, wait here for it to run if (has_host_script) { comm->barrier(); } log_info("Starting workflow"); double makespan_start = current_time(); // Keep executing tasks until the workflow is finished or the master // needs to abort the workflow due to a signal being caught while (!this->engine->is_finished() && !ABORT) { queue_ready_tasks(); schedule_tasks(); wait_for_results(); } double makespan_finish = current_time(); if (ABORT) { log_error("Aborting workflow"); } else { log_info("Workflow finished"); } if (this->engine->max_failures_reached()) { log_error("Max failures reached: DAG prematurely aborted"); } // This must be done before write_cluster_summary so that the // wall time can be recorded in the cluster-summary record finish_time = current_time(); wall_time = finish_time - start_time; double makespan = makespan_finish - makespan_start; // Close FDCache here before merging output so that // we can be sure the data files are flushed fdcache->close(); // Compute resource utilization double master_util = total_runtime / (wall_time * (numworkers+1)); double worker_util = total_runtime / (wall_time * numworkers); if (total_runtime <= 0) { master_util = 0.0; worker_util = 0.0; } log_info("Resource utilization (with master): %lf", master_util); log_info("Resource utilization (without master): %lf", worker_util); log_info("Total runtime of tasks: %lf seconds (%lf minutes)", total_runtime, total_runtime/60.0); log_info("Wall time: %lf seconds (%lf minutes)", wall_time, wall_time/60.0); log_info("Makespan: %lf seconds (%lf minutes)", makespan, makespan/60.0); log_info("Throughput: %lf tasks/second", success_count/makespan); log_info("Bytes sent to workers: %lu", comm->sent()); log_info("Bytes received from workers: %lu", comm->recvd()); log_info("File descriptor cache hit rate: %lf", fdcache->hitrate()); bool failed = ABORT || this->engine->is_failed(); write_cluster_summary(failed); if (!per_task_stdio) merge_all_task_stdio(); log_info("Sending workers shutdown messages..."); for (int i=1; i<=numworkers; i++) { log_debug("Sending shutdown message to worker %d", i); ShutdownMessage shmsg; comm->send_message(&shmsg, i); } if (failed) { publish_event(WORKFLOW_FAILURE, NULL); } else { publish_event(WORKFLOW_SUCCESS, NULL); } if (ABORT) { myfailure("Workflow aborted"); return 1; } else if (failed) { log_error("Workflow failed"); return 1; } else { log_info("Workflow suceeded"); return 0; } }