int Do(CWorkerNodeJobContext& context) { CNcbiIstream& is = context.GetIStream(); string input_type; is >> input_type; if (input_type != "doubles") { context.CommitJobWithFailure( "This worker node can only process the 'doubles' input type."); return 1; } int vsize; is >> vsize; vector<double> v(vsize); for (int i = 0; i < vsize; ++i) is >> v[i]; unsigned delay = m_SleepTimeDistr.GetNextValue(); if (delay > 0) SleepMilliSec(delay); if (m_Random.GetRand() < TParam_FailureRate::GetDefault() * m_Random.GetMax()) context.CommitJobWithFailure("FAILED"); else { sort(v.begin(), v.end()); CNcbiOstream& os = context.GetOStream(); os << vsize << ' '; for (int i = 0; i < vsize; ++i) os << v[i] << ' '; context.CommitJob(); } return 0; }
void CWNJobWatcher::Notify(const CWorkerNodeJobContext& job_context, EEvent event) { switch (event) { case eJobStarted: { CMutexGuard guard(m_ActiveJobsMutex); m_ActiveJobs[const_cast<CWorkerNodeJobContext*>(&job_context)] = SJobActivity(); ++m_JobsStarted; if (m_MaxJobsAllowed > 0 && m_JobsStarted > m_MaxJobsAllowed - 1) { LOG_POST_X(1, "The maximum number of allowed jobs (" << m_MaxJobsAllowed << ") has been reached. " "Sending the shutdown request." ); CGridGlobals::GetInstance(). RequestShutdown(CNetScheduleAdmin::eNormalShutdown); } } break; case eJobStopped: { CMutexGuard guard(m_ActiveJobsMutex); m_ActiveJobs.erase( const_cast<CWorkerNodeJobContext*>(&job_context)); } break; case eJobFailed: ++m_JobsFailed; if (m_MaxFailuresAllowed > 0 && m_JobsFailed > m_MaxFailuresAllowed - 1) { LOG_POST_X(2, "The maximum number of failed jobs (" << m_MaxFailuresAllowed << ") has been reached. " "Shutting down..." ); CGridGlobals::GetInstance(). RequestShutdown(CNetScheduleAdmin::eShutdownImmediate); } break; case eJobSucceeded: ++m_JobsSucceeded; break; case eJobReturned: ++m_JobsReturned; break; case eJobRescheduled: ++m_JobsRescheduled; break; case eJobCanceled: ++m_JobsCanceled; break; case eJobLost: ++m_JobsLost; break; } if (event != eJobStarted) { CGridWorkerNode worker_node(job_context.GetWorkerNode()); Uint8 total_memory_limit = worker_node.GetTotalMemoryLimit(); if (total_memory_limit > 0) { // memory check requested size_t memory_usage; if (!GetMemoryUsage(&memory_usage, 0, 0)) { ERR_POST("Could not check self memory usage" ); } else if (memory_usage > total_memory_limit) { ERR_POST(Warning << "Memory usage (" << memory_usage << ") is above the configured limit (" << total_memory_limit << ")"); CGridGlobals::GetInstance().RequestShutdown( CNetScheduleAdmin::eNormalShutdown, RESOURCE_OVERUSE_EXIT_CODE); } } int total_time_limit = worker_node.GetTotalTimeLimit(); if (total_time_limit > 0 && // time check requested time(0) > worker_node.GetStartupTime() + total_time_limit) CGridGlobals::GetInstance().RequestShutdown( CNetScheduleAdmin::eNormalShutdown, RESOURCE_OVERUSE_EXIT_CODE); } }
int Do(CWorkerNodeJobContext& context) { context.GetCleanupEventSource()->AddListener( new CSampleJobCleanupListener("Job-do")); LOG_POST( context.GetJobKey() + " " + context.GetJobInput()); LOG_POST( "This parameter is read from a config file: " << m_Param); // 1. Get an input data from the client // (You can use ASN.1 de-serialization here) // CNcbiIstream& is = context.GetIStream(); CNcbiOstream& os = context.GetOStream(); string output_type; is >> output_type; // could be "doubles" or "html" LOG_POST( "Output type: " << output_type); int count; is >> count; vector<double> dvec; dvec.reserve(count); LOG_POST( "Getting " << count << " doubles from stream..."); for (int i = 0; i < count; ++i) { if (!is.good()) { ERR_POST( "Input stream error. Index : " << i ); // If anything bad happened, throw an exception // and its message will be delivered to the client. throw runtime_error("Worker node input stream error"); } // Don't forget to check if shutdown has been requested if (count % 1000 == 0) switch (context.GetShutdownLevel()) { case CNetScheduleAdmin::eShutdownImmediate: case CNetScheduleAdmin::eDie: // Either this job is not needed anymore (canceled, // expired or already executed elsewhere), or the // server is going down and the job's execution // should be gracefully (yet urgently) aborted and // the job returned back to the network queue // for execution by other worker node instances. context.ReturnJob(); return 1; default: break; } double d; is >> d; dvec.push_back(d); } // 2. Doing some time consuming job here // Well behaved algorithm checks from time to time if // immediate shutdown has been requested and gracefully return // without calling context.CommitJob() // for (int i = 0; i < m_Iters; ++i) { switch (context.GetShutdownLevel()) { case CNetScheduleAdmin::eShutdownImmediate: case CNetScheduleAdmin::eDie: // Either this job is not needed anymore (canceled, // expired or already executed elsewhere), or the // server is going down and the job's execution // should be gracefully (yet urgently) aborted and // the job returned back to the network queue // for execution by other worker node instances. context.ReturnJob(); return 1; default: break; } context.PutProgressMessage("Iteration " + NStr::IntToString(i+1) + " from " + NStr::IntToString(m_Iters)); SleepSec(m_SleepSec); } sort(dvec.begin(), dvec.end()); // 3. Return the result to the client // (You can use ASN.1 serialization here) // // CNcbiOstream& os = context.GetOStream(); if (output_type == "html") os << "<html><head><title>" "Sample Grid Worker Result Page" "</title></head><body>" "<p>Sample Grid Worker Result</p>"; else os << dvec.size() << ' '; for (int i = 0; i < count; ++i) { if (!os.good()) { ERR_POST( "Output stream error. Index : " << i ); throw runtime_error("Worker node output stream error"); } os << dvec[i] << ' '; } if (output_type == "html") os << "</body></html>"; // 4. Indicate that the job is done and the result // can be delivered to the client. // context.CommitJob(); LOG_POST( "Job " << context.GetJobKey() << " is done."); return 0; }