// This is a blocking call and must provide a fully booted partition when it // returns. Otherwise, this partition could be overcommitted given the // nature of the use of this call. // script partition_name size kind void Partition::boot(char *script, PKind pkind) { FILE *fin = NULL; ArgList args; MyString line; priv_state priv; // we're told what kind of partition this is going to be set_pkind(pkind); dprintf(D_ALWAYS, "\t%s %s %ld %s\n", script, get_name().Value(), get_size(), pkind_xlate(get_pkind()).Value()); args.AppendArg(script); args.AppendArg(get_name()); args.AppendArg(get_size()); args.AppendArg(pkind_xlate(get_pkind()).Value()); priv = set_root_priv(); fin = my_popen(args, "r", MY_POPEN_OPT_WANT_STDERR); line.readLine(fin); // read back OK or NOT_OK, XXX ignore my_pclose(fin); set_priv(priv); // Now that the script is done, mark it booted. set_pstate(BOOTED); }
void Partition::back(char *script) { FILE *fin = NULL; ArgList args; MyString line; priv_state priv; dprintf(D_ALWAYS, "\t%s %s %ld %s\n", script, get_name().Value(), get_size(), pkind_xlate(get_pkind()).Value()); args.AppendArg(script); args.AppendArg(get_name()); args.AppendArg(get_size()); args.AppendArg(pkind_xlate(get_pkind()).Value()); priv = set_root_priv(); fin = my_popen(args, "r", MY_POPEN_OPT_WANT_STDERR); line.readLine(fin); // read back OK or NOT_OK, XXX ignore my_pclose(fin); set_priv(priv); // we don't know it is backed until the // STARTD_FACTORY_SCRIPT_AVAILABLE_PARTITIONS // tells us it is actually backed. This prevents overcommit of a // partition to multiple startds. set_pstate(ASSIGNED); }
void Partition::dump(int flags) { MyString backer; if (m_initialized == false) { dprintf(flags, "Partition is not initialized!\n"); return; } dprintf(flags, "Partition: %s\n", get_name().Value()); backer = get_backer(); if (backer == "") { dprintf(flags, "\tBacked by: [NONE]\n"); } else { dprintf(flags, "\tBacked by: %s\n", backer.Value()); } dprintf(flags, "\tSize: %ld\n", get_size()); dprintf(flags, "\tPState: %s\n", pstate_xlate(get_pstate()).Value()); switch(get_pstate()) { case BOOTED: case ASSIGNED: case BACKED: dprintf(flags, "\tPKind: %s\n", pkind_xlate(get_pkind()).Value()); break; default: dprintf(flags, "\tPKind: N/A\n"); break; } }
void PartitionManager::schedule_partitions(WorkloadManager &wkld_mgr, char *generate_script, char *boot_script, char *back_script, char *shutdown_script, char * /*destroy_script*/) { int idx; int total_smp_idle; int total_dual_idle; int total_vn_idle; MyString name; bool val = true; int smp_backed; int dual_backed; int vn_backed; // figure out the big picture workload to satisfy wkld_mgr.total_idle(total_smp_idle, total_dual_idle, total_vn_idle); // figure out the maximum number of jobs the currently backed // partitions are able to realize partition_realization(smp_backed, dual_backed, vn_backed); dprintf(D_ALWAYS, "Total Idle: SMP: %d, DUAL: %d, VN: %d\n", total_smp_idle, total_dual_idle, total_vn_idle); // Tell the user if I have to grab more partitions of a various type. if (total_smp_idle > smp_backed) { dprintf(D_ALWAYS, "%d backed SMP jobs, %d total SMP idle, attempting " "to back an SMP partition.\n", smp_backed, total_smp_idle); } else { dprintf(D_ALWAYS, "%d backed SMP jobs satisfies %d idle SMP jobs.\n", smp_backed, total_smp_idle); } if (total_dual_idle > dual_backed) { dprintf(D_ALWAYS, "%d backed DUAL jobs, %d total DUAL idle, attempting " "to back a DUAL partition.\n", dual_backed, total_dual_idle); } else { dprintf(D_ALWAYS, "%d backed DUAL jobs satisfies %d idle DUAL jobs.\n", dual_backed, total_dual_idle); } if (total_vn_idle > vn_backed) { dprintf(D_ALWAYS, "%d backed VN jobs, %d total VN idle, attempting " "to back a DUAL partition.\n", vn_backed, total_vn_idle); } else { dprintf(D_ALWAYS, "%d backed VN jobs satisfies %d idle VN jobs.\n", dual_backed, total_vn_idle); } // This is a crappy algorithm which will boot and then // back a single partition for each kind of idle job. // Only try to back a partition of there are more jobs than // the current backed partitions can realize. Otherwise if a // single job in one schedd doesn't run for a while, eventually // all available partitions will be allocated to it. So, stop // when we know there are enough compute elements of a certain kind // to satisfy the workloads. // ********* SCHEDULE FOR SMP JOBS ************** SCHED_SMP: if (total_smp_idle > smp_backed) { // find a booted one first if possible for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == BOOTED && m_parts[idx].get_pkind() == SMP) { dprintf(D_ALWAYS, "Backing SMP partition: %s %ld\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size()); m_parts[idx].back(back_script); name = m_parts[idx].get_name(); m_assigned.insert(name,val); goto SCHED_DUAL; } } // if not, we'll use a generated one SCHED_SMP_GENERATE: for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == GENERATED) { dprintf(D_ALWAYS, "Booting SMP partition: %s %ld\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size()); m_parts[idx].boot(boot_script, SMP); // we'll find this booted partition and then back it. goto SCHED_SMP; } } // We don't implement the next section of code, so skip it, but // leave it there so compilation checks it at least. dprintf(D_ALWAYS, "WARNING: No suitable partitions found for SMP jobs.\n"); goto SMP_NOT_IMPL; // if there are no generated partitions, we have to generate one for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == NOT_GENERATED) { dprintf(D_ALWAYS, "Generating a partition for DUAL jobs: 32 nodes\n"); if (m_parts[idx].generate(generate_script, 32) == true) { // we'll find this generated partition and then boot it. goto SCHED_SMP_GENERATE; } // if the generation fails, we fall through... } } SMP_NOT_IMPL: ; } // ********* SCHEDULE FOR DUAL JOBS ************** SCHED_DUAL: if (total_dual_idle > dual_backed) { // find a booted one first if possible for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == BOOTED && m_parts[idx].get_pkind() == DUAL) { dprintf(D_ALWAYS, "Backing DUAL partition: %s %ld\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size()); m_parts[idx].back(back_script); name = m_parts[idx].get_name(); m_assigned.insert(name,val); goto SCHED_VN; } } // if not, we'll use a generated one SCHED_DUAL_GENERATE: for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == GENERATED) { dprintf(D_ALWAYS, "Booting DUAL partition: %s %ld\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size()); m_parts[idx].boot(boot_script, DUAL); // we'll find this booted partition and then back it. goto SCHED_DUAL; } } // We don't implement the next section of code, so skip it, but // leave it there so compilation checks it at least. dprintf(D_ALWAYS, "WARNING: No suitable partitions found for DUAL jobs.\n"); goto DUAL_NOT_IMPL; // if there are no generated partitions, we have to generate one for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == NOT_GENERATED) { dprintf(D_ALWAYS, "Generating a partition for DUAL jobs: 32 nodes\n"); if (m_parts[idx].generate(generate_script, 32) == true) { // we'll find this generated partition and then boot it. goto SCHED_DUAL_GENERATE; } // if the generation fails, we fall through... } } DUAL_NOT_IMPL: ; } // ********* SCHEDULE FOR VN JOBS ************** SCHED_VN: if (total_vn_idle > vn_backed) { // find a booted one first if possible for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == BOOTED && m_parts[idx].get_pkind() == VN) { dprintf(D_ALWAYS, "Backing VN partition: %s %ld\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size()); m_parts[idx].back(back_script); name = m_parts[idx].get_name(); m_assigned.insert(name,val); goto DONE; } } // if not, we'll use a generated one SCHED_VN_GENERATE: for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == GENERATED) { dprintf(D_ALWAYS, "Booting VN partition: %s %ld\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size()); m_parts[idx].boot(boot_script, VN); // we'll find this booted partition and then back it. goto SCHED_VN; } } dprintf(D_ALWAYS, "WARNING: No suitable partitions found for DUAL jobs.\n"); goto VN_NOT_IMPL; // if there are no generated partitions, we have to generate one for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == NOT_GENERATED) { dprintf(D_ALWAYS, "Generating a partition for VN jobs: 32 nodes\n"); if (m_parts[idx].generate(generate_script, 32) == true) { // we'll find this generated partition and then boot it. goto SCHED_VN_GENERATE; } // if the generation fails, we fall through... } } VN_NOT_IMPL: ; } DONE: ; // Now that we've potentially backed partitions, let's take a look at the // partitions again and see if we can evict any ones which are only booted. // This state represents a non-backed partition (due to a startd // killing itself due to lack of work) which we don't need. // XXX After getting rid of something, maybe we should try and restart // the algorithm to see if something would have gotten booted and // backed right away for a different type of HTC job. Otherwise we'll wait // an entire cycle beofre trying to adjust the partitions again. for (idx = 0; idx < m_parts.length(); idx++) { if (m_parts[idx].get_pstate() == BOOTED) { dprintf(D_ALWAYS, "Shutting down unused partition: %s %ld %s\n", m_parts[idx].get_name().Value(), m_parts[idx].get_size(), pkind_xlate(m_parts[idx].get_pkind()).Value()); m_parts[idx].shutdown(shutdown_script); } } }