int Defrag::countMachines(char const *constraint,char const *constraint_source, MachineSet *machines) { ClassAdList startdAds; int count = 0; if( !queryMachines(constraint,constraint_source,startdAds) ) { return -1; } MachineSet my_machines; if( !machines ) { machines = &my_machines; } startdAds.Open(); ClassAd *startd_ad; while( (startd_ad=startdAds.Next()) ) { std::string machine; std::string name; startd_ad->LookupString(ATTR_NAME,name); slotNameToDaemonName(name,machine); if( machines->count(machine) ) { continue; } machines->insert(machine); count++; } startdAds.Close(); dprintf(D_FULLDEBUG,"Counted %d machines matching %s=%s\n", count,constraint_source,constraint); return count; }
void Defrag::poll_cancel(MachineSet &cancelled_machines) { if (!m_cancel_requirements.size()) { return; } MachineSet draining_whole_machines; std::stringstream draining_whole_machines_ss; draining_whole_machines_ss << "(" << m_cancel_requirements << ") && (" << DRAINING_CONSTRAINT << ")"; int num_draining_whole_machines = countMachines(draining_whole_machines_ss.str().c_str(), "<DEFRAG_CANCEL_REQUIREMENTS>", &draining_whole_machines); if (num_draining_whole_machines) { dprintf(D_ALWAYS, "Of the whole machines, %d are in the draining state.\n", num_draining_whole_machines); } else { // Early exit: nothing to do. return; } ClassAdList startdAds; if (!queryMachines(DRAINING_CONSTRAINT, "DRAINING_CONSTRAINT <all draining slots>",startdAds)) { return; } startdAds.Shuffle(); startdAds.Sort(StartdSortFunc,&m_rank_ad); startdAds.Open(); unsigned int cancel_count = 0; ClassAd *startd_ad_ptr; while ( (startd_ad_ptr=startdAds.Next()) ) { if (!startd_ad_ptr) continue; ClassAd &startd_ad = *startd_ad_ptr; std::string machine; std::string name; startd_ad.LookupString(ATTR_NAME,name); slotNameToDaemonName(name,machine); if( !cancelled_machines.count(machine) && draining_whole_machines.count(machine) ) { cancel_drain(startd_ad); cancelled_machines.insert(machine); cancel_count ++; } } startdAds.Close(); dprintf(D_ALWAYS, "Cancelled draining of %u whole machines.\n", cancel_count); }
void StatsD::mapDaemonIPs(ClassAdList &daemon_ads,CollectorList &collectors) { // The map of machines to IPs is used when directing ganglia to // associate specific metrics with specific hosts (host spoofing) m_daemon_ips.clear(); daemon_ads.Open(); ClassAd *daemon; while( (daemon=daemon_ads.Next()) ) { std::string machine,name,my_address; daemon->EvaluateAttrString(ATTR_MACHINE,machine); daemon->EvaluateAttrString(ATTR_MACHINE,name); daemon->EvaluateAttrString(ATTR_MY_ADDRESS,my_address); Sinful s(my_address.c_str()); if( !s.getHost() ) { continue; } std::string ip = s.getHost(); if( !machine.empty() ) { m_daemon_ips.insert( std::map< std::string,std::string >::value_type(machine,ip) ); } if( !name.empty() ) { m_daemon_ips.insert( std::map< std::string,std::string >::value_type(name,ip) ); } } daemon_ads.Close(); // Also add a mapping of collector hosts to IPs, and determine the // collector host to use as the default machine name for aggregate // metrics. m_default_aggregate_host = ""; DCCollector *collector=NULL; collectors.rewind(); while( (collectors.next(collector)) ) { char const *collector_host = collector->fullHostname(); char const *collector_addr = collector->addr(); if( collector_host && m_default_aggregate_host.empty() ) { m_default_aggregate_host = collector_host; } if( collector_host && collector_addr ) { Sinful s(collector_addr); if( s.getHost() ) { char const *ip = s.getHost(); m_daemon_ips.insert( std::map< std::string,std::string >::value_type(collector_host,ip) ); } } } }
int LeaseManager::timerHandler_GetAds ( void ) { CondorQuery query( m_queryAdtypeNum ); if ( m_queryConstraints.length() ) { query.addANDConstraint( m_queryConstraints.c_str() ); } if ( m_enable_ad_debug ) { ClassAd qad; query.getQueryAd( qad ); dprintf( D_FULLDEBUG, "Query Ad:\n" ); dPrintAd( D_FULLDEBUG, qad ); } QueryResult result; ClassAdList ads; dprintf(D_ALWAYS, " Getting all resource ads ...\n" ); result = m_collectorList->query( query, ads ); if( result != Q_OK ) { dprintf(D_ALWAYS, "Couldn't fetch ads: %s\n", getStrQueryResult(result)); return false; } m_resources.StartExpire( ); dprintf(D_ALWAYS, " Processing %d ads ...\n", ads.MyLength() ); DebugTimerDprintf timer; int list_length = ads.MyLength(); ads.Open( ); ClassAd *ad; while( ( ad = ads.Next()) ) { // Give the ad to the collection ads.Remove( ad ); m_resources.AddResource( ad ); } ads.Close( ); timer.Log( "ProcessAds", list_length ); dprintf( D_ALWAYS, " Done processing %d ads; pruning\n", list_length); timer.Start( ); m_resources.PruneExpired( ); timer.Log( "PruneExpired" ); dprintf( D_ALWAYS, " Done pruning ads\n" ); return 0; }
QueryResult CondorQuery:: filterAds (ClassAdList &in, ClassAdList &out) { ClassAd queryAd, *candidate; QueryResult result; // make the query ad result = getQueryAd (queryAd); if (result != Q_OK) return result; in.Open(); while( (candidate = (ClassAd *) in.Next()) ) { // if a match occurs if (IsAHalfMatch(&queryAd, candidate)) out.Insert (candidate); } in.Close (); return Q_OK; }
static void printJobAds(ClassAdList & jobs) { if(longformat && use_xml) { std::string out; AddClassAdXMLFileHeader(out); printf("%s\n", out.c_str()); } jobs.Open(); ClassAd *job; while (( job = jobs.Next())) { printJob(*job); } jobs.Close(); if(longformat && use_xml) { std::string out; AddClassAdXMLFileFooter(out); printf("%s\n", out.c_str()); } }
void StatsD::determineExecuteNodes(ClassAdList &daemon_ads) { std::set< std::string > submit_nodes; std::set< std::string > execute_nodes; std::set< std::string > cm_nodes; daemon_ads.Open(); ClassAd *daemon; while( (daemon=daemon_ads.Next()) ) { std::string machine,my_type; daemon->EvaluateAttrString(ATTR_MACHINE,machine); daemon->EvaluateAttrString(ATTR_MY_TYPE,my_type); if( strcasecmp(my_type.c_str(),"machine")==0 ) { execute_nodes.insert( std::set< std::string >::value_type(machine) ); } else if( strcasecmp(my_type.c_str(),"scheduler")==0 ) { submit_nodes.insert( std::set< std::string >::value_type(machine) ); } else if( strcasecmp(my_type.c_str(),"negotiator")==0 || strcasecmp(my_type.c_str(),"collector")==0 ) { cm_nodes.insert( std::set< std::string >::value_type(machine) ); } } daemon_ads.Close(); m_execute_only_nodes.clear(); for( std::set< std::string >::iterator itr = execute_nodes.begin(); itr != execute_nodes.end(); itr++ ) { if( !submit_nodes.count(*itr) && !cm_nodes.count(*itr) ) { m_execute_only_nodes.insert(*itr); } } if( !m_per_execute_node_metrics && m_execute_only_nodes.size()>0 ) { dprintf(D_FULLDEBUG,"Filtering out metrics for %d execute nodes because PER_EXECUTE_NODE_METRICS=False.\n", (int)m_execute_only_nodes.size()); } }
void Defrag::poll() { dprintf(D_FULLDEBUG,"Evaluating defragmentation policy.\n"); // If we crash during this polling cycle, we will have saved // the time of the last poll, so the next cycle will be // scheduled on the false assumption that a cycle ran now. In // this way, we error on the side of draining too little // rather than too much. time_t now = time(NULL); time_t prev = m_last_poll; m_last_poll = now; saveState(); m_stats.Tick(); int num_to_drain = m_draining_per_poll; time_t last_hour = (prev / 3600)*3600; time_t current_hour = (now / 3600)*3600; time_t last_day = (prev / (3600*24))*3600*24; time_t current_day = (now / (3600*24))*3600*24; if( current_hour != last_hour ) { num_to_drain += prorate(m_draining_per_poll_hour,now-current_hour,3600,m_polling_interval); } if( current_day != last_day ) { num_to_drain += prorate(m_draining_per_poll_day,now-current_day,3600*24,m_polling_interval); } int num_draining = countMachines(DRAINING_CONSTRAINT,"<InternalDrainingConstraint>"); m_stats.MachinesDraining = num_draining; MachineSet whole_machines; int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines); m_stats.WholeMachines = num_whole_machines; dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n", num_draining,num_whole_machines); queryDrainingCost(); // If possible, cancel some drains. MachineSet cancelled_machines; poll_cancel(cancelled_machines); if( num_to_drain <= 0 ) { dprintf(D_ALWAYS,"Doing nothing, because number to drain in next %ds is calculated to be 0.\n", m_polling_interval); return; } if( (int)ceil(m_draining_per_hour) <= 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_DRAINING_MACHINES_PER_HOUR=%f\n", m_draining_per_hour); return; } if( m_max_draining == 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=0\n"); return; } if( m_max_whole_machines == 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=0\n"); return; } if( m_max_draining >= 0 ) { if( num_draining >= m_max_draining ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and there are %d draining machines.\n", m_max_draining, num_draining); return; } else if( num_draining < 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and the query to count draining machines failed.\n", m_max_draining); return; } } if( m_max_whole_machines >= 0 ) { if( num_whole_machines >= m_max_whole_machines ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=%d and there are %d whole machines.\n", m_max_whole_machines, num_whole_machines); return; } } // Even if m_max_whole_machines is -1 (infinite), we still need // the list of whole machines in order to filter them out in // the draining selection algorithm, so abort now if the // whole machine query failed. if( num_whole_machines < 0 ) { dprintf(D_ALWAYS,"Doing nothing, because the query to find whole machines failed.\n"); return; } dprintf(D_ALWAYS,"Looking for %d machines to drain.\n",num_to_drain); ClassAdList startdAds; std::string requirements; sprintf(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str()); if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) { dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n"); return; } startdAds.Shuffle(); startdAds.Sort(StartdSortFunc,&m_rank_ad); startdAds.Open(); int num_drained = 0; ClassAd *startd_ad_ptr; MachineSet machines_done; while( (startd_ad_ptr=startdAds.Next()) ) { if (!startd_ad_ptr) continue; ClassAd &startd_ad = *startd_ad_ptr; std::string machine; std::string name; startd_ad.LookupString(ATTR_NAME,name); slotNameToDaemonName(name,machine); // If we have already cancelled draining on this machine, ignore it for this cycle. if( cancelled_machines.count(machine) ) { dprintf(D_FULLDEBUG, "Skipping %s: already cancelled draining of %s in this cycle.\n", name.c_str(),machine.c_str()); continue; } if( machines_done.count(machine) ) { dprintf(D_FULLDEBUG, "Skipping %s: already attempted to drain %s in this cycle.\n", name.c_str(),machine.c_str()); continue; } if( whole_machines.count(machine) ) { dprintf(D_FULLDEBUG, "Skipping %s: because it is already running as a whole machine.\n", name.c_str()); continue; } if( drain(startd_ad) ) { machines_done.insert(machine); if( ++num_drained >= num_to_drain ) { dprintf(D_ALWAYS, "Drained maximum number of machines allowed in this cycle (%d).\n", num_to_drain); break; } } } startdAds.Close(); dprintf(D_ALWAYS,"Drained %d machines (wanted to drain %d machines).\n", num_drained,num_drained); dprintf(D_FULLDEBUG,"Done evaluating defragmentation policy.\n"); }
void Defrag::queryDrainingCost() { ClassAdList startdAds; CondorQuery startdQuery(STARTD_AD); char const *desired_attrs[6]; desired_attrs[0] = ATTR_TOTAL_MACHINE_DRAINING_UNCLAIMED_TIME; desired_attrs[1] = ATTR_TOTAL_MACHINE_DRAINING_BADPUT; desired_attrs[2] = ATTR_DAEMON_START_TIME; desired_attrs[3] = ATTR_TOTAL_CPUS; desired_attrs[4] = ATTR_LAST_HEARD_FROM; desired_attrs[5] = NULL; startdQuery.setDesiredAttrs(desired_attrs); std::string query; // only want one ad per machine sprintf(query,"%s==1 && (%s =!= undefined || %s =!= undefined)", ATTR_SLOT_ID, ATTR_TOTAL_MACHINE_DRAINING_UNCLAIMED_TIME, ATTR_TOTAL_MACHINE_DRAINING_BADPUT); startdQuery.addANDConstraint(query.c_str()); CollectorList* collects = daemonCore->getCollectorList(); ASSERT( collects ); QueryResult result; result = collects->query(startdQuery,startdAds); if( result != Q_OK ) { dprintf(D_ALWAYS, "Couldn't fetch startd ads: %s\n", getStrQueryResult(result)); return; } double avg_badput = 0.0; double avg_unclaimed = 0.0; int total_cpus = 0; startdAds.Open(); ClassAd *startd_ad; while( (startd_ad=startdAds.Next()) ) { int unclaimed = 0; int badput = 0; int start_time = 0; int cpus = 0; int last_heard_from = 0; startd_ad->LookupInteger(ATTR_TOTAL_MACHINE_DRAINING_UNCLAIMED_TIME,unclaimed); startd_ad->LookupInteger(ATTR_TOTAL_MACHINE_DRAINING_BADPUT,badput); startd_ad->LookupInteger(ATTR_DAEMON_START_TIME,start_time); startd_ad->LookupInteger(ATTR_LAST_HEARD_FROM,last_heard_from); startd_ad->LookupInteger(ATTR_TOTAL_CPUS,cpus); int age = last_heard_from - start_time; if( last_heard_from == 0 || start_time == 0 || age <= 0 ) { continue; } avg_badput += ((double)badput)/age; avg_unclaimed += ((double)unclaimed)/age; total_cpus += cpus; } startdAds.Close(); if( total_cpus > 0 ) { avg_badput = avg_badput/total_cpus; avg_unclaimed = avg_unclaimed/total_cpus; } dprintf(D_ALWAYS,"Average pool draining badput = %.2f%%\n", avg_badput*100); dprintf(D_ALWAYS,"Average pool draining unclaimed = %.2f%%\n", avg_unclaimed*100); m_stats.AvgDrainingBadput = avg_badput; m_stats.AvgDrainingUnclaimed = avg_unclaimed; }
bool Triggerd::PerformQueries() { ClassAdList result; CondorError errstack; QueryResult status; Trigger* trig = NULL; CondorQuery* query; bool ret_val = true; std::map<uint32_t,Trigger*>::iterator iter; ClassAd* ad = NULL; std::string eventText; char* token = NULL; std::string triggerText; char* queryString = NULL; ExprTree* attr = NULL; std::list<std::string> missing_nodes; size_t pos; size_t prev_pos; bool bad_trigger = false; const char* token_str = NULL; if (0 < triggers.size()) { dprintf(D_FULLDEBUG, "Triggerd: Evaluating %d triggers\n", (int)triggers.size()); query = new CondorQuery(ANY_AD); for (iter = triggers.begin(); iter != triggers.end(); iter++) { // Clear any pre-exhisting custom contraints and add the constraint // for this trigger trig = iter->second; query->clearORCustomConstraints(); query->clearANDCustomConstraints(); queryString = strdup(trig->GetQuery().c_str()); ReplaceAllChars(queryString, '\'', '"'); query->addANDConstraint(queryString); free(queryString); // Perform the query and check the result if (NULL != query_collector) { status = query->fetchAds(result, query_collector->addr(), &errstack); } else { status = collectors->query(*query, result, &errstack); } if (Q_OK != status) { // Problem with the query if (Q_COMMUNICATION_ERROR == status) { dprintf(D_ALWAYS, "Triggerd Error: Error contacting the collecter - %s\n", errstack.getFullText(true).c_str()); if (CEDAR_ERR_CONNECT_FAILED == errstack.code(0)) { dprintf(D_ALWAYS, "Triggerd Error: Couldn't contact the collector on the central manager\n"); } } else { dprintf(D_ALWAYS, "Triggerd Error: Could not retrieve ads - %s\n", getStrQueryResult(status)); } ret_val = false; break; } else { dprintf(D_FULLDEBUG, "Query successful. Parsing results\n"); // Query was successful, so parse the results result.Open(); while ((ad = result.Next())) { if (true == bad_trigger) { // Avoid processing a bad trigger multiple times. Remove // all result ads and reset the flag dprintf(D_FULLDEBUG, "Cleaning up after a bad trigger\n"); result.Delete(ad); while ((ad = result.Next())) { result.Delete(ad); } bad_trigger = false; break; } eventText = ""; triggerText = trig->GetText(); dprintf(D_FULLDEBUG, "Parsing trigger text '%s'\n", triggerText.c_str()); prev_pos = pos = 0; while (prev_pos < triggerText.length()) { pos = triggerText.find("$(", prev_pos, 2); if (std::string::npos == pos) { // Didn't find the start of a varible, so append the // remaining string dprintf(D_FULLDEBUG, "Adding text string to event text\n"); eventText += triggerText.substr(prev_pos, std::string::npos); prev_pos = triggerText.length(); } else { // Found a variable for substitution. Need to add // text before it to the string, grab the variable // to substitute for, and put its value in the text eventText += triggerText.substr(prev_pos, pos - prev_pos); dprintf(D_FULLDEBUG, "Adding text string prior to variable substitution to event text\n"); // Increment the position by 2 to skip the $( prev_pos = pos + 2; pos = triggerText.find(")", prev_pos, 1); if (std::string::npos == pos) { // Uh-oh. We have a start of a variable substitution // but no closing marker. dprintf(D_FULLDEBUG, "Error: Failed to find closing varable substitution marker ')'. Aborting processing of the trigger\n"); bad_trigger = true; break; } else { token_str = triggerText.substr(prev_pos, pos-prev_pos).c_str(); token = RemoveWS(token_str); dprintf(D_FULLDEBUG, "token: '%s'\n", token); if (NULL == token) { dprintf(D_ALWAYS, "Removing whitespace from %s produced unusable name. Aborting processing of the trigger\n", token_str); bad_trigger = true; break; } attr = ad->LookupExpr(token); if (NULL == attr) { // The token isn't found in the classad, so treat it // like a string dprintf(D_FULLDEBUG, "Adding text string to event text\n"); eventText += token; } else { dprintf(D_FULLDEBUG, "Adding classad value to event text\n"); eventText += ExprTreeToString(attr); } if (NULL != token) { free(token); token = NULL; } ++pos; } prev_pos = pos; } } // Remove the trailing space std::string::size_type notwhite = eventText.find_last_not_of(" "); eventText.erase(notwhite+1); // Send the event if (false == bad_trigger) { EventCondorTriggerNotify event(eventText, time(NULL)); singleton->getInstance()->raiseEvent(event); dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str()); } result.Delete(ad); } bad_trigger = false; result.Close(); } } delete query; } else { dprintf(D_FULLDEBUG, "Triggerd: No triggers to evaluate\n"); } // Look for absent nodes (nodes expected to be in the pool but aren't) if (NULL != console) { missing_nodes = console->findAbsentNodes(); if (0 < missing_nodes.size()) { for (std::list<std::string>::iterator node = missing_nodes.begin(); node != missing_nodes.end(); ++ node) { eventText = node->c_str(); eventText += " is missing from the pool"; EventCondorTriggerNotify event(eventText, time(NULL)); singleton->getInstance()->raiseEvent(event); dprintf(D_FULLDEBUG, "Triggerd: Raised event with text '%s'\n", eventText.c_str()); } } } return ret_val; }
void prettyPrint (ClassAdList &adList, TrackTotals *totals) { ppOption pps = using_print_format ? PP_CUSTOM : ppStyle; ClassAd *ad; int classad_index; int last_classad_index; bool fPrintHeadings = pm.has_headings() || (pm_head.Length() > 0); classad_index = 0; last_classad_index = adList.Length() - 1; adList.Open(); while ((ad = adList.Next())) { if (!wantOnlyTotals) { switch (pps) { case PP_STARTD_NORMAL: if (absentMode) { printStartdAbsent (ad, (classad_index == 0)); } else if( offlineMode ) { printStartdOffline( ad, (classad_index == 0)); } else { printStartdNormal (ad, (classad_index == 0)); } break; case PP_STARTD_SERVER: printServer (ad, (classad_index == 0)); break; case PP_STARTD_RUN: printRun (ad, (classad_index == 0)); break; case PP_STARTD_COD: printCOD (ad); break; case PP_STARTD_STATE: printState(ad, (classad_index == 0)); break; #ifdef HAVE_EXT_POSTGRESQL case PP_QUILL_NORMAL: printQuillNormal (ad); break; #endif /* HAVE_EXT_POSTGRESQL */ case PP_SCHEDD_NORMAL: printScheddNormal (ad, (classad_index == 0)); break; case PP_NEGOTIATOR_NORMAL: printNegotiatorNormal (ad, (classad_index == 0)); break; case PP_SCHEDD_SUBMITTORS: printScheddSubmittors (ad, (classad_index == 0)); break; case PP_VERBOSE: printVerbose (ad); break; case PP_XML: printXML (ad, (classad_index == 0), (classad_index == last_classad_index)); break; case PP_MASTER_NORMAL: printMasterNormal(ad, (classad_index == 0)); break; case PP_COLLECTOR_NORMAL: printCollectorNormal(ad, (classad_index == 0)); break; case PP_CKPT_SRVR_NORMAL: printCkptSrvrNormal(ad, (classad_index == 0)); break; case PP_STORAGE_NORMAL: printStorageNormal(ad, (classad_index == 0)); break; case PP_GRID_NORMAL: printGridNormal(ad, (classad_index == 0)); break; case PP_GENERIC_NORMAL: case PP_GENERIC: case PP_ANY_NORMAL: printAnyNormal(ad, (classad_index == 0)); break; case PP_CUSTOM: // hack: print a single item to a string, then discard the string // this makes sure that the headings line up correctly over the first // line of data. if (fPrintHeadings) { std::string tmp; pm.display(tmp, ad, targetAd); if (pm.has_headings()) { if ( ! (pmHeadFoot & HF_NOHEADER)) pm.display_Headings(stdout); } else { pm.display_Headings(stdout, pm_head); } fPrintHeadings = false; } printCustom (ad); break; case PP_NOTSET: fprintf (stderr, "Error: pretty printing set to PP_NOTSET.\n"); exit (1); default: fprintf (stderr, "Error: Unknown pretty print option.\n"); exit (1); } } classad_index++; totals->update(ad); } adList.Close(); // if there are no ads to print, but the user wanted XML output, // then print out the XML header and footer, so that naive XML // parsers won't get confused. if ( PP_XML == pps && 0 == classad_index ) { printXML (NULL, true, true); } // if totals are required, display totals if (adList.MyLength() > 0 && totals) totals->displayTotals(stdout, 20); }
void Rooster::poll() { dprintf(D_FULLDEBUG,"C**k-a-doodle-doo! (Time to look for machines to wake up.)\n"); ClassAdList startdAds; CondorQuery unhibernateQuery(STARTD_AD); ExprTree *requirements = NULL; if( ParseClassAdRvalExpr( m_unhibernate_constraint.Value(), requirements )!=0 || requirements==NULL ) { EXCEPT("Invalid expression for ROOSTER_UNHIBERNATE: %s\n", m_unhibernate_constraint.Value()); } unhibernateQuery.addANDConstraint(m_unhibernate_constraint.Value()); CollectorList* collects = daemonCore->getCollectorList(); ASSERT( collects ); QueryResult result; result = collects->query(unhibernateQuery,startdAds); if( result != Q_OK ) { dprintf(D_ALWAYS, "Couldn't fetch startd ads using constraint " "ROOSTER_UNHIBERNATE=%s: %s\n", m_unhibernate_constraint.Value(), getStrQueryResult(result)); return; } dprintf(D_FULLDEBUG,"Got %d startd ads matching ROOSTER_UNHIBERNATE=%s\n", startdAds.MyLength(), m_unhibernate_constraint.Value()); startdAds.Sort(StartdSortFunc,&m_rank_ad); startdAds.Open(); int num_woken = 0; ClassAd *startd_ad; HashTable<MyString,bool> machines_done(MyStringHash); while( (startd_ad=startdAds.Next()) ) { MyString machine; MyString name; startd_ad->LookupString(ATTR_MACHINE,machine); startd_ad->LookupString(ATTR_NAME,name); if( machines_done.exists(machine)==0 ) { dprintf(D_FULLDEBUG, "Skipping %s: already attempted to wake up %s in this cycle.\n", name.Value(),machine.Value()); continue; } // in case the unhibernate expression is time-sensitive, // re-evaluate it now to make sure it still passes if( !EvalBool(startd_ad,requirements) ) { dprintf(D_ALWAYS, "Skipping %s: ROOSTER_UNHIBERNATE is no longer true.\n", name.Value()); continue; } if( wakeUp(startd_ad) ) { machines_done.insert(machine,true); if( ++num_woken >= m_max_unhibernate && m_max_unhibernate > 0 ) { dprintf(D_ALWAYS, "Reached ROOSTER_MAX_UNHIBERNATE=%d in this cycle.\n", m_max_unhibernate); break; } } } startdAds.Close(); delete requirements; requirements = NULL; if( startdAds.MyLength() ) { dprintf(D_FULLDEBUG,"Done sending wakeup calls.\n"); } }
void Defrag::poll() { dprintf(D_FULLDEBUG,"Evaluating defragmentation policy.\n"); // If we crash during this polling cycle, we will have saved // the time of the last poll, so the next cycle will be // scheduled on the false assumption that a cycle ran now. In // this way, we error on the side of draining too little // rather than too much. time_t now = time(NULL); time_t prev = m_last_poll; m_last_poll = now; saveState(); m_stats.Tick(); int num_to_drain = m_draining_per_poll; time_t last_hour = (prev / 3600)*3600; time_t current_hour = (now / 3600)*3600; time_t last_day = (prev / (3600*24))*3600*24; time_t current_day = (now / (3600*24))*3600*24; if( current_hour != last_hour ) { num_to_drain += prorate(m_draining_per_poll_hour,now-current_hour,3600,m_polling_interval); } if( current_day != last_day ) { num_to_drain += prorate(m_draining_per_poll_day,now-current_day,3600*24,m_polling_interval); } MachineSet draining_machines; int num_draining = countMachines(DRAINING_CONSTRAINT,"<InternalDrainingConstraint>", &draining_machines); m_stats.MachinesDraining = num_draining; MachineSet whole_machines; int num_whole_machines = countMachines(m_whole_machine_expr.c_str(),"DEFRAG_WHOLE_MACHINE_EXPR",&whole_machines); m_stats.WholeMachines = num_whole_machines; dprintf(D_ALWAYS,"There are currently %d draining and %d whole machines.\n", num_draining,num_whole_machines); // Calculate arrival rate of fully drained machines. This is a bit tricky because we poll. // We count by finding the newly-arrived // fully drained machines, and add to that count machines which are no-longer draining. // This allows us to find machines that have fully drained, but were then claimed between // polling cycles. MachineSet new_machines; MachineSet no_longer_whole_machines; // Find newly-arrived machines std::set_difference(whole_machines.begin(), whole_machines.end(), m_prev_whole_machines.begin(), m_prev_whole_machines.end(), std::inserter(new_machines, new_machines.begin())); // Now, newly-departed machines std::set_difference(m_prev_draining_machines.begin(), m_prev_draining_machines.end(), draining_machines.begin(), draining_machines.end(), std::inserter(no_longer_whole_machines, no_longer_whole_machines.begin())); dprintf_set("Set of current whole machines is ", &whole_machines); dprintf_set("Set of current draining machine is ", &draining_machines); dprintf_set("Newly Arrived whole machines is ", &new_machines); dprintf_set("Newly departed draining machines is ", &no_longer_whole_machines); m_prev_draining_machines = draining_machines; m_prev_whole_machines = whole_machines; int newly_drained = new_machines.size() + no_longer_whole_machines.size(); double arrival_rate = 0.0; // If there is an arrival... if (newly_drained > 0) { time_t current = time(0); // And it isn't the first one since defrag boot... if (m_last_whole_machine_arrival > 0) { m_whole_machines_arrived += newly_drained; time_t arrival_time = current - m_last_whole_machine_arrival; if (arrival_time < 1) arrival_time = 1; // very unlikely, but just in case m_whole_machine_arrival_sum += newly_drained * arrival_time; arrival_rate = newly_drained / ((double)arrival_time); dprintf(D_ALWAYS, "Arrival rate is %g machines/hour\n", arrival_rate * 3600.0); } m_last_whole_machine_arrival = current; } dprintf(D_ALWAYS, "Lifetime whole machines arrived: %d\n", m_whole_machines_arrived); if (m_whole_machine_arrival_sum > 0) { double lifetime_mean = m_whole_machines_arrived / m_whole_machine_arrival_sum; dprintf(D_ALWAYS, "Lifetime mean arrival rate: %g machines / hour\n", 3600.0 * lifetime_mean); if (newly_drained > 0) { double diff = arrival_rate - lifetime_mean; m_whole_machine_arrival_mean_squared += diff * diff; } double sd = sqrt(m_whole_machine_arrival_mean_squared / m_whole_machines_arrived); dprintf(D_ALWAYS, "Lifetime mean arrival rate sd: %g\n", sd * 3600); m_stats.MeanDrainedArrival = lifetime_mean; m_stats.MeanDrainedArrivalSD = sd; m_stats.DrainedMachines = m_whole_machines_arrived; } queryDrainingCost(); // If possible, cancel some drains. MachineSet cancelled_machines; poll_cancel(cancelled_machines); if( num_to_drain <= 0 ) { dprintf(D_ALWAYS,"Doing nothing, because number to drain in next %ds is calculated to be 0.\n", m_polling_interval); return; } if( (int)ceil(m_draining_per_hour) <= 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_DRAINING_MACHINES_PER_HOUR=%f\n", m_draining_per_hour); return; } if( m_max_draining == 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=0\n"); return; } if( m_max_whole_machines == 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=0\n"); return; } if( m_max_draining >= 0 ) { if( num_draining >= m_max_draining ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and there are %d draining machines.\n", m_max_draining, num_draining); return; } else if( num_draining < 0 ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_CONCURRENT_DRAINING=%d and the query to count draining machines failed.\n", m_max_draining); return; } } if( m_max_whole_machines >= 0 ) { if( num_whole_machines >= m_max_whole_machines ) { dprintf(D_ALWAYS,"Doing nothing, because DEFRAG_MAX_WHOLE_MACHINES=%d and there are %d whole machines.\n", m_max_whole_machines, num_whole_machines); return; } } // Even if m_max_whole_machines is -1 (infinite), we still need // the list of whole machines in order to filter them out in // the draining selection algorithm, so abort now if the // whole machine query failed. if( num_whole_machines < 0 ) { dprintf(D_ALWAYS,"Doing nothing, because the query to find whole machines failed.\n"); return; } dprintf(D_ALWAYS,"Looking for %d machines to drain.\n",num_to_drain); ClassAdList startdAds; std::string requirements; formatstr(requirements,"(%s) && Draining =!= true",m_defrag_requirements.c_str()); if( !queryMachines(requirements.c_str(),"DEFRAG_REQUIREMENTS",startdAds) ) { dprintf(D_ALWAYS,"Doing nothing, because the query to select machines matching DEFRAG_REQUIREMENTS failed.\n"); return; } startdAds.Shuffle(); startdAds.Sort(StartdSortFunc,&m_rank_ad); startdAds.Open(); int num_drained = 0; ClassAd *startd_ad_ptr; MachineSet machines_done; while( (startd_ad_ptr=startdAds.Next()) ) { ClassAd &startd_ad = *startd_ad_ptr; std::string machine; std::string name; startd_ad.LookupString(ATTR_NAME,name); slotNameToDaemonName(name,machine); // If we have already cancelled draining on this machine, ignore it for this cycle. if( cancelled_machines.count(machine) ) { dprintf(D_FULLDEBUG, "Skipping %s: already cancelled draining of %s in this cycle.\n", name.c_str(),machine.c_str()); continue; } if( machines_done.count(machine) ) { dprintf(D_FULLDEBUG, "Skipping %s: already attempted to drain %s in this cycle.\n", name.c_str(),machine.c_str()); continue; } if( whole_machines.count(machine) ) { dprintf(D_FULLDEBUG, "Skipping %s: because it is already running as a whole machine.\n", name.c_str()); continue; } if( drain(startd_ad) ) { machines_done.insert(machine); if( ++num_drained >= num_to_drain ) { dprintf(D_ALWAYS, "Drained maximum number of machines allowed in this cycle (%d).\n", num_to_drain); break; } } } startdAds.Close(); dprintf(D_ALWAYS,"Drained %d machines (wanted to drain %d machines).\n", num_drained,num_to_drain); dprintf(D_FULLDEBUG,"Done evaluating defragmentation policy.\n"); }