void HoldJob( const char* long_reason, const char* short_reason, int reason_code, int reason_subcode ) { char subject[ BUFSIZ ]; FILE *mailer; sprintf( subject, "Condor Job %d.%d put on hold\n", Proc->id.cluster, Proc->id.proc ); if( ! JobAd ) { dprintf( D_ALWAYS, "In HoldJob() w/ NULL JobAd!\n" ); exit( JOB_SHOULD_HOLD ); } ExitReason = JOB_SHOULD_HOLD; if ( !ConnectQ(schedd, SHADOW_QMGMT_TIMEOUT) ) { dprintf( D_ALWAYS, "Failed to connect to schedd!\n" ); } SetAttributeString( Proc->id.cluster, Proc->id.proc, ATTR_HOLD_REASON, short_reason ); SetAttributeInt( Proc->id.cluster, Proc->id.proc, ATTR_HOLD_REASON_CODE, reason_code ); SetAttributeInt( Proc->id.cluster, Proc->id.proc, ATTR_HOLD_REASON_SUBCODE, reason_subcode ); if ( !DisconnectQ(0) ) { dprintf( D_ALWAYS, "Failed to commit updated job queue status!\n" ); } mailer = email_user_open(JobAd, subject); if( ! mailer ) { // User didn't want email, so just exit now with the right // value so the schedd actually puts the job on hold. dprintf( D_ALWAYS, "Job going into Hold state.\n"); dprintf( D_ALWAYS, "********** Shadow Exiting(%d) **********\n", JOB_SHOULD_HOLD); exit( JOB_SHOULD_HOLD ); } fprintf( mailer, "Your condor job " ); if( Proc->args_v1or2[0] ) { ArgList args; MyString args_string; args.AppendArgsV1or2Raw(Proc->args_v1or2[0],NULL); args.GetArgsStringForDisplay(&args_string); fprintf( mailer, "%s %s ", Proc->cmd[0], args_string.Value() ); } else { fprintf( mailer, "%s ", Proc->cmd[0] ); } fprintf( mailer, "\nis being put on hold.\n\n" ); fprintf( mailer, "%s", long_reason ); email_close(mailer); // Now that the user knows why, exit with the right code. dprintf( D_ALWAYS, "Job going into Hold state.\n"); dprintf( D_ALWAYS, "********** Shadow Exiting(%d) **********\n", JOB_SHOULD_HOLD); exit( JOB_SHOULD_HOLD ); }
//--------------------------------------------------------------------------- void DagmanClassad::SetDagAttribute( const char *attrName, int attrVal ) { if ( SetAttributeInt( _dagmanId._cluster, _dagmanId._proc, attrName, attrVal ) != 0 ) { debug_printf( DEBUG_QUIET, "WARNING: failed to set attribute %s\n", attrName ); check_warning_strictness( DAG_STRICT_3 ); } }
bool GenerateId(uint32_t &id) { static const char * MGMT_ID = "MgmtId"; if (GetAttributeInt(HEADER_CLUSTER, HEADER_PROC, MGMT_ID, (int *) &id) < 0) { id = 2; // Id 1 is reserved for the Scheduler } if (SetAttributeInt(HEADER_CLUSTER, HEADER_PROC, MGMT_ID, (int) ++id)) { return false; } return true; }
bool GetSubmitterId(const char *name, uint64_t &id) { uint32_t mgmtId; if (GetAttributeInt(HEADER_CLUSTER, HEADER_PROC, name, (int *) &mgmtId) < 0) { if (!GenerateId(mgmtId)) { // Failed to generate a new id, this seems fatal return false; } if (SetAttributeInt(HEADER_CLUSTER, HEADER_PROC, name, (int) mgmtId)) { // Failed to record the new id, this seems fatal return false; } } // The ((uint64_t) 0) << 32 id space is reserved for us id = (uint64_t) mgmtId; return true; }
void update_job_status( struct rusage *localp, struct rusage *remotep ) { int status = -1; double utime = 0.0; double stime = 0.0; int tot_sus=0, cum_sus=0, last_sus=0; char buf[1024*50]; // If the job completed, and there is no HISTORY file specified, // the don't bother to update the job ClassAd since it is about to be // flushed into the bit bucket by the schedd anyway. char *myHistoryFile = param("HISTORY"); if ((Proc->status == COMPLETED) && (myHistoryFile==NULL)) { return; } if (myHistoryFile) { free(myHistoryFile); } if (!JobAd) { EXCEPT( "update_job_status(): No job ad"); } JobAd->LookupInteger(ATTR_TOTAL_SUSPENSIONS, tot_sus); JobAd->LookupInteger(ATTR_CUMULATIVE_SUSPENSION_TIME, cum_sus); JobAd->LookupInteger(ATTR_LAST_SUSPENSION_TIME, last_sus); //new syntax, can use filesystem to authenticate if (!ConnectQ(schedd, SHADOW_QMGMT_TIMEOUT) || GetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_JOB_STATUS, &status) < 0) { EXCEPT("Failed to connect to schedd!"); } job_report_update_queue( Proc ); if( status == REMOVED ) { dprintf( D_ALWAYS, "update_job_status(): Job %d.%d has been removed " "by condor_rm\n", Proc->id.cluster, Proc->id.proc ); } else { SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_TOTAL_SUSPENSIONS, tot_sus); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_CUMULATIVE_SUSPENSION_TIME, cum_sus); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_LAST_SUSPENSION_TIME, last_sus); update_job_rusage( localp, remotep ); Proc->image_size = ImageSize; SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_IMAGE_SIZE, ImageSize); // For standard universe. MemoryUsed==ImageSize, no need to param this one. // because imagesize is already the best measure of memory usage. SetAttribute(Proc->id.cluster, Proc->id.proc, ATTR_MEMORY_USAGE, "((ImageSize+1023)/1024)"); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_JOB_EXIT_STATUS, JobExitStatus); rusage_to_float( Proc->local_usage, &utime, &stime ); SetAttributeFloat(Proc->id.cluster, Proc->id.proc, ATTR_JOB_LOCAL_USER_CPU, utime); SetAttributeFloat(Proc->id.cluster, Proc->id.proc, ATTR_JOB_LOCAL_SYS_CPU, stime); rusage_to_float( Proc->remote_usage[0], &utime, &stime ); SetAttributeFloat(Proc->id.cluster, Proc->id.proc, ATTR_JOB_REMOTE_USER_CPU, utime); SetAttributeFloat(Proc->id.cluster, Proc->id.proc, ATTR_JOB_REMOTE_SYS_CPU, stime); dprintf(D_FULLDEBUG,"TIME DEBUG 3 USR remotep=%lu Proc=%lu utime=%f\n", remotep->ru_utime.tv_sec, Proc->remote_usage[0].ru_utime.tv_sec, utime); dprintf(D_FULLDEBUG,"TIME DEBUG 4 SYS remotep=%lu Proc=%lu utime=%f\n", remotep->ru_stime.tv_sec, Proc->remote_usage[0].ru_stime.tv_sec, stime); if( sock_RSC1 ) { float TotalBytesSentUpdate = TotalBytesSent + sock_RSC1->get_bytes_sent() + BytesSent; float TotalBytesRecvdUpdate = TotalBytesRecvd + sock_RSC1->get_bytes_recvd() + BytesRecvd; SetAttributeFloat( Proc->id.cluster, Proc->id.proc, ATTR_BYTES_SENT, TotalBytesSentUpdate ); SetAttributeFloat( Proc->id.cluster, Proc->id.proc, ATTR_BYTES_RECVD, TotalBytesRecvdUpdate ); float RSCBytesSentUpdate = sock_RSC1->get_bytes_sent() + RSCBytesSent; float RSCBytesRecvdUpdate = sock_RSC1->get_bytes_recvd() + RSCBytesRecvd; SetAttributeFloat( Proc->id.cluster, Proc->id.proc, ATTR_RSC_BYTES_SENT, RSCBytesSentUpdate ); SetAttributeFloat( Proc->id.cluster, Proc->id.proc, ATTR_RSC_BYTES_RECVD, RSCBytesRecvdUpdate ); } if( ExitReason == JOB_CKPTED || ExitReason == JOB_NOT_CKPTED ) { SetAttributeInt( Proc->id.cluster, Proc->id.proc, ATTR_LAST_VACATE_TIME, time(0) ); } if( ExitReason == JOB_CKPTED || LastCkptTime > LastRestartTime ) { int uncommitted_suspension_time = 0; JobAd->LookupInteger(ATTR_UNCOMMITTED_SUSPENSION_TIME, uncommitted_suspension_time); if( uncommitted_suspension_time > 0 ) { int committed_suspension_time = 0; GetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_COMMITTED_SUSPENSION_TIME, &committed_suspension_time); committed_suspension_time += uncommitted_suspension_time; SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_COMMITTED_SUSPENSION_TIME, committed_suspension_time); } } // if we had checkpointed, then save all of these attributes as well. if (LastCkptTime > LastRestartTime) { SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_LAST_CKPT_TIME, LastCkptTime); CommittedTime=0; GetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_JOB_COMMITTED_TIME, &CommittedTime); CommittedTime += LastCkptTime - LastRestartTime; SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_JOB_COMMITTED_TIME, CommittedTime); LastRestartTime = LastCkptTime; SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_NUM_CKPTS, NumCkpts); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_NUM_RESTARTS, NumRestarts); if (Executing_Arch) { SetAttributeString(Proc->id.cluster, Proc->id.proc, ATTR_CKPT_ARCH, Executing_Arch); } if (Executing_OpSys) { SetAttributeString(Proc->id.cluster, Proc->id.proc, ATTR_CKPT_OPSYS, Executing_OpSys); } // If we wrote a checkpoint, store the location in the // LastCkptServer attribute. If we didn't use a checkpoint // server (i.e., we stored it locally), then make sure // no LastCkptServer attribute is set. if (LastCkptServer) { SetAttributeString(Proc->id.cluster, Proc->id.proc, ATTR_LAST_CKPT_SERVER, LastCkptServer); } else { DeleteAttribute(Proc->id.cluster, Proc->id.proc, ATTR_LAST_CKPT_SERVER); } if (LastCkptPlatform) { SetAttributeString(Proc->id.cluster, Proc->id.proc, ATTR_LAST_CHECKPOINT_PLATFORM, LastCkptPlatform); } } // if the job completed, we should include the run-time in // committed time, since it contributed to the completion of // the job. Also, commit the exit code/signal stuff, plus any // core filenames. if (Proc->status == COMPLETED) { int exit_code, exit_signal, exit_by_signal; int pending; // update the time. CommittedTime = 0; GetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_JOB_COMMITTED_TIME, &CommittedTime); CommittedTime += Proc->completion_date - LastRestartTime; SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_JOB_COMMITTED_TIME, CommittedTime); // if there is a core file, update that too. if (JobAd->LookupString(ATTR_JOB_CORE_FILENAME, buf, sizeof(buf))) { SetAttributeString(Proc->id.cluster, Proc->id.proc, ATTR_JOB_CORE_FILENAME, buf); } // only new style ads have ATTR_ON_EXIT_BY_SIGNAL, so only // SetAttribute for those types of ads if (JobAd->LookupInteger(ATTR_ON_EXIT_BY_SIGNAL, exit_by_signal)==1) { SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_ON_EXIT_BY_SIGNAL, exit_by_signal); if (exit_by_signal == 1) /* exited via signal */ { JobAd->LookupInteger(ATTR_ON_EXIT_SIGNAL, exit_signal); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_ON_EXIT_SIGNAL, exit_signal); } else { JobAd->LookupInteger(ATTR_ON_EXIT_CODE, exit_code); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_ON_EXIT_CODE, exit_code); } } // and now, let's try and mark this job as a terminate pending // job. If the job already is, then fine. We'll mark it again. if (JobAd->LookupBool(ATTR_TERMINATION_PENDING, pending)) { SetAttribute(Proc->id.cluster, Proc->id.proc, ATTR_TERMINATION_PENDING, pending?"TRUE":"FALSE"); } else { // if it isn't in the job ad, then add it to the saved ad in the // schedd. SetAttribute(Proc->id.cluster, Proc->id.proc, ATTR_TERMINATION_PENDING, "TRUE"); } // store the reason why the job is marked completed. if (JobAd->LookupString(ATTR_TERMINATION_REASON, buf, sizeof(buf))) { SetAttributeString(Proc->id.cluster, Proc->id.proc, ATTR_TERMINATION_REASON, buf); } // Set up the exit code the shadow was about to exit with to // help support the terminate pending "state". SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_TERMINATION_EXITREASON, ExitReason); // Put the job status as created by waitpid() into the job ad // itself. This is to implement the terminate_pending feature. It // is done like this because EVERYWHERE in this codebase we do // stuff like WIFEXITED(JobStatus) and it turns out there are no // user level macros to will one of those status values as returned // by waitpid() into existance. So, we'll put it directly into the // job ad to prevent me having to reimplement a few large functions // which deal with JobStatus directly--as it is sadly a global // variable. SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_WAITPID_STATUS, JobStatus); } } if (!DisconnectQ(0)) { EXCEPT("Failed to commit updated job queue status!"); } }
/* Mess up the in memory job ad with interesting statistics about suspensions */ void record_suspension_hack(unsigned int action) { char tmp[256]; int total_suspensions; int last_suspension_time; int cumulative_suspension_time; extern char *schedd; if (!JobAd) { EXCEPT("Suspension code: Non-existant JobAd"); } switch(action) { case ULOG_JOB_SUSPENDED: /* Add to ad number of suspensions */ JobAd->LookupInteger(ATTR_TOTAL_SUSPENSIONS, total_suspensions); total_suspensions++; sprintf(tmp, "%s = %d", ATTR_TOTAL_SUSPENSIONS, total_suspensions); JobAd->Insert(tmp); /* Add to ad the current suspension time */ last_suspension_time = time(NULL); sprintf(tmp, "%s = %d", ATTR_LAST_SUSPENSION_TIME, last_suspension_time); JobAd->Insert(tmp); break; case ULOG_JOB_UNSUSPENDED: { /* add in the time I spent suspended to a running total */ JobAd->LookupInteger(ATTR_CUMULATIVE_SUSPENSION_TIME, cumulative_suspension_time); JobAd->LookupInteger(ATTR_LAST_SUSPENSION_TIME, last_suspension_time); int delta = time(NULL) - last_suspension_time; cumulative_suspension_time += delta; sprintf(tmp, "%s = %d", ATTR_CUMULATIVE_SUSPENSION_TIME, cumulative_suspension_time); JobAd->Insert(tmp); int uncommitted_suspension_time = 0; JobAd->LookupInteger(ATTR_UNCOMMITTED_SUSPENSION_TIME, uncommitted_suspension_time); uncommitted_suspension_time += delta; JobAd->Assign(ATTR_UNCOMMITTED_SUSPENSION_TIME,uncommitted_suspension_time); /* set the current suspension time to zero, meaning not suspended */ last_suspension_time = 0; sprintf(tmp, "%s = %d", ATTR_LAST_SUSPENSION_TIME, last_suspension_time); JobAd->Insert(tmp); break; } default: EXCEPT("record_suspension_hack(): Action event not recognized."); break; } /* Sanity output */ JobAd->LookupInteger(ATTR_TOTAL_SUSPENSIONS, total_suspensions); dprintf(D_FULLDEBUG,"%s = %d\n", ATTR_TOTAL_SUSPENSIONS, total_suspensions); JobAd->LookupInteger(ATTR_LAST_SUSPENSION_TIME, last_suspension_time); dprintf(D_FULLDEBUG, "%s = %d\n", ATTR_LAST_SUSPENSION_TIME, last_suspension_time); JobAd->LookupInteger(ATTR_CUMULATIVE_SUSPENSION_TIME, cumulative_suspension_time); dprintf(D_FULLDEBUG, "%s = %d\n", ATTR_CUMULATIVE_SUSPENSION_TIME, cumulative_suspension_time); /* If we've been asked to perform real time updates of the suspension information, then connect to the queue and do it here. */ if (param_boolean("REAL_TIME_JOB_SUSPEND_UPDATES", false)) { dprintf( D_ALWAYS, "Updating suspension info to schedd.\n" ); if (!ConnectQ(schedd, SHADOW_QMGMT_TIMEOUT)) { /* Since these attributes aren't updated periodically, if the schedd is busy and a resume event update is lost, the the job will be marked suspended when it really isn't. The new shadow eventually corrects this via a periodic update of various calssad attributes, but I suspect it won't be corrected in the event of a bad connect here for this shadow. */ dprintf( D_ALWAYS, "Timeout connecting to schedd. Suspension update lost.\n"); return; } SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_TOTAL_SUSPENSIONS, total_suspensions); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_CUMULATIVE_SUSPENSION_TIME, cumulative_suspension_time); SetAttributeInt(Proc->id.cluster, Proc->id.proc, ATTR_LAST_SUSPENSION_TIME, last_suspension_time); DisconnectQ(NULL); } }
int ActualScheddQ::set_AttributeInt(int cluster, int proc, const char *attr, int value, SetAttributeFlags_t flags) { return SetAttributeInt(cluster, proc, attr, value, flags); }
bool CWrapEngine::LoadCache() { // We have to synchronize access to layout.xml so that multiple processed don't write // to the same file or one is reading while the other one writes. CInterProcessMutex mutex(MUTEX_LAYOUT); wxFileName file(COptions::Get()->GetOption(OPTION_DEFAULT_SETTINGSDIR), _T("layout.xml")); CXmlFile xml(file); TiXmlElement* pDocument = xml.Load(); if (!pDocument) { m_use_cache = false; wxMessageBox(xml.GetError(), _("Error loading xml file"), wxICON_ERROR); return false; } bool cacheValid = true; TiXmlElement* pElement = pDocument->FirstChildElement("Layout"); if (!pElement) pElement = pDocument->LinkEndChild(new TiXmlElement("Layout"))->ToElement(); const wxString buildDate = CBuildInfo::GetBuildDateString(); if (GetTextAttribute(pElement, "Builddate") != buildDate) { cacheValid = false; SetTextAttribute(pElement, "Builddate", buildDate); } const wxString buildTime = CBuildInfo::GetBuildTimeString(); if (GetTextAttribute(pElement, "Buildtime") != buildTime) { cacheValid = false; SetTextAttribute(pElement, "Buildtime", buildTime); } // Enumerate resource file names // ----------------------------- TiXmlElement* pResources = pElement->FirstChildElement("Resources"); if (!pResources) pResources = pElement->LinkEndChild(new TiXmlElement("Resources"))->ToElement(); wxString resourceDir = wxGetApp().GetResourceDir(); wxDir dir(resourceDir); wxLogNull log; wxString xrc; for (bool found = dir.GetFirst(&xrc, _T("*.xrc")); found; found = dir.GetNext(&xrc)) { if (!wxFileName::FileExists(resourceDir + xrc)) continue; wxFileName fn(resourceDir + xrc); wxDateTime date = fn.GetModificationTime(); wxLongLong ticks = date.GetTicks(); TiXmlElement* resourceElement = FindElementWithAttribute(pResources, "xrc", "file", xrc.mb_str()); if (!resourceElement) { resourceElement = pResources->LinkEndChild(new TiXmlElement("xrc"))->ToElement(); resourceElement->SetAttribute("file", xrc.mb_str()); resourceElement->SetAttribute("date", ticks.ToString().mb_str()); cacheValid = false; } else { const char* xrcNodeDate = resourceElement->Attribute("date"); if (!xrcNodeDate || strcmp(xrcNodeDate, ticks.ToString().mb_str())) { cacheValid = false; resourceElement->SetAttribute("date", ticks.ToString().mb_str()); } } } if (!cacheValid) { // Clear all languages TiXmlElement* pLanguage = pElement->FirstChildElement("Language"); while (pLanguage) { pElement->RemoveChild(pLanguage); pLanguage = pElement->FirstChildElement("Language"); } } // Get current language wxString language = wxGetApp().GetCurrentLanguageCode(); if (language == _T("")) language = _T("default"); TiXmlElement* languageElement = FindElementWithAttribute(pElement, "Language", "id", language.mb_str()); if (!languageElement) { languageElement = pElement->LinkEndChild(new TiXmlElement("Language"))->ToElement(); languageElement->SetAttribute("id", language.mb_str()); } // Get static text font and measure sample text wxFrame* pFrame = new wxFrame; pFrame->Create(0, -1, _T("Title"), wxDefaultPosition, wxDefaultSize, wxFRAME_TOOL_WINDOW); wxStaticText* pText = new wxStaticText(pFrame, -1, _T("foo")); wxFont font = pText->GetFont(); wxString fontDesc = font.GetNativeFontInfoDesc(); TiXmlElement* pFontElement = languageElement->FirstChildElement("Font"); if (!pFontElement) pFontElement = languageElement->LinkEndChild(new TiXmlElement("Font"))->ToElement(); if (GetTextAttribute(pFontElement, "font") != fontDesc) { SetTextAttribute(pFontElement, "font", fontDesc); cacheValid = false; } int width, height; pText->GetTextExtent(_T("Just some test string we are measuring. If width or heigh differ from the recorded values, invalidate cache. 1234567890MMWWII"), &width, &height); if (GetAttributeInt(pFontElement, "width") != width || GetAttributeInt(pFontElement, "height") != height) { cacheValid = false; SetAttributeInt(pFontElement, "width", width); SetAttributeInt(pFontElement, "height", height); } pFrame->Destroy(); // Get language file const wxString& localesDir = wxGetApp().GetLocalesDir(); wxString name = GetLocaleFile(localesDir, language); if (name != _T("")) { wxFileName fn(localesDir + name + _T("/filezilla.mo")); wxDateTime date = fn.GetModificationTime(); wxLongLong ticks = date.GetTicks(); const char* languageNodeDate = languageElement->Attribute("date"); if (!languageNodeDate || strcmp(languageNodeDate, ticks.ToString().mb_str())) { languageElement->SetAttribute("date", ticks.ToString().mb_str()); cacheValid = false; } } else languageElement->SetAttribute("date", ""); if (!cacheValid) { TiXmlElement* dialog; while ((dialog = languageElement->FirstChildElement("Dialog"))) languageElement->RemoveChild(dialog); } if (COptions::Get()->GetOptionVal(OPTION_DEFAULT_KIOSKMODE) == 2) { m_use_cache = cacheValid; return true; } wxString error; if (!xml.Save(&error)) { m_use_cache = false; wxString msg = wxString::Format(_("Could not write \"%s\": %s"), file.GetFullPath().c_str(), error.c_str()); wxMessageBox(msg, _("Error writing xml file"), wxICON_ERROR); } return true; }
bool CXmlConfig::WriteProfileInt(LPCTSTR lpszSection, LPCTSTR lpszEntry, int nValue) { return SetAttributeInt(CString(lpszSection) + _T("\\") + lpszEntry, nValue); }