/** Post a new job from Nagios to the dnxServer job queue. * * @param[in] joblist - the job list to which the new job should be posted. * @param[in] serial - the serial number of the new job. * @param[in] jdp - a pointer to a job data structure. * @param[in] ds - a pointer to the nagios job that's being posted. * @param[in] pNode - a dnxClient node request structure that is being * posted with this job. The dispatcher thread will send the job to the * associated node. * * @return Zero on success, or a non-zero error value. */ static int dnxPostNewJob(DnxJobList * joblist, unsigned long serial, DnxJobData * jdp, nebstruct_service_check_data * ds, DnxNodeRequest * pNode) { DnxNewJob Job; int ret; assert(ds); assert(ds->command_line); // fill-in the job structure with the necessary information dnxMakeXID(&Job.xid, DNX_OBJ_JOB, serial, 0); Job.payload = jdp; Job.cmd = xstrdup(ds->command_line); Job.start_time = ds->start_time.tv_sec; Job.timeout = ds->timeout; Job.expires = Job.start_time + Job.timeout + 5; Job.pNode = pNode; dnxDebug(2, "DnxNebMain: Posting Job [%lu]: %s.", serial, Job.cmd); // post to the Job Queue if ((ret = dnxJobListAdd(joblist, &Job)) != DNX_OK) { dnxStatsInc(0, JOBS_REJECTED_NO_SLOTS); dnxLog("Failed to post Job [%lu]; \"%s\": %d.", Job.xid.objSerial, Job.cmd, ret); } else { dnxStatsInc(0, JOBS_HANDLED); dnxAuditJob(&Job, "ASSIGN"); } return ret; }
int main (int argc, char **argv) { DnxXmlBuf xbuf; DnxJob job; char *cp; int ret; // Set program base name szProg = (char *)((cp = strrchr(argv[0], '/')) ? (cp+1) : argv[0]); // Initialize Job structure memset(&job, 0, sizeof(job)); dnxMakeXID(&job.xid, DNX_OBJ_JOB, 12345L, 3); job.state = DNX_JOB_PENDING; job.priority = 7; job.cmd = "check_spam.pl <wak>test</wak> ahora por favor"; jobDump("Initialized Job", &job); // Create an XML buffer if ((ret = xmlPut(&xbuf, &job)) == DNX_OK) { // Examine the XML buffer xmlDump("After xmlPut", &xbuf); // Clear the job structure and see if we get the same data back from the XML buffer printf("Clearing the job structure.\n"); memset(&job, 0, sizeof(job)); jobDump("Cleared Job", &job); // Reconstitute the job structure from the xml buffer ret = xmlGet(&xbuf, &job); // Examine the XML buffer jobDump("After xmlGet", &job); // Cleanup if (job.cmd) free(job.cmd); } else fprintf(stderr, "Error from xmlPut: %d\n", ret); return ret; }
/** The main thread routine for a worker thread. * * @param[in] data - an opaque pointer to a DnxWorkerStatus structure for this * thread. * * @return Always returns 0. */ static void * dnxWorker(void * data) { DnxWorkerStatus * ws = (DnxWorkerStatus *)data; pthread_t tid = pthread_self(); int retries = 0; iDnxWlm * iwlm; assert(data); iwlm = ws->iwlm; pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0); pthread_cleanup_push(dnxWorkerCleanup, data); time(&ws->tstart); // set thread start time (for stats) while (!iwlm->terminate) { DnxNodeRequest msg; DnxJob job; int ret; // setup job request message - use thread id and node address in XID dnxMakeXID(&msg.xid, DNX_OBJ_WORKER, tid, iwlm->myipaddr); msg.reqType = DNX_REQ_REGISTER; msg.jobCap = 1; msg.ttl = iwlm->cfg.reqTimeout - iwlm->cfg.ttlBackoff; msg.hn = iwlm->myhostname; // request a job, and then wait for a job to come in... if ((ret = dnxSendNodeRequest(ws->dispatch, &msg, 0)) != DNX_OK) { dnxLog("Worker[%lx]: Error sending node request: %s.", tid, dnxErrorString(ret)); } else { DNX_PT_MUTEX_LOCK(&iwlm->mutex); iwlm->reqsent++; DNX_PT_MUTEX_UNLOCK(&iwlm->mutex); } // wait for job, even if request was never sent if ((ret = dnxWaitForJob(ws->dispatch, &job, job.address,iwlm->cfg.reqTimeout)) != DNX_OK && ret != DNX_ERR_TIMEOUT) { dnxLog("Worker[%lx]: Error receiving job: %s.", tid, dnxErrorString(ret)); } // Allow thread to be canceled pthread_testcancel(); DNX_PT_MUTEX_LOCK(&iwlm->mutex); cleanThreadPool(iwlm); // ensure counts are accurate before using them if (ret != DNX_OK) { // if above pool minimum and exceeded max retries... if (iwlm->threads > iwlm->cfg.poolMin && ++retries > iwlm->cfg.maxRetries) { dnxLog("Worker[%lx]: Exiting - max retries exceeded.", tid); DNX_PT_MUTEX_UNLOCK(&iwlm->mutex); break; } } else { iwlm->jobsrcvd++; iwlm->active++; // dnxSendJobAck(ws->collect, &job, &job.address); // dnxDebug(3, "Worker[%lx]: Acknowledged job [%lu:%lu] (T/O %d): %s.", // tid, job.xid.objSerial, job.xid.objSlot, job.timeout, job.cmd); // DnxAck ack; // ack.xid = job.xid; // ack.timestamp = job.timestamp; dnxSendJobAck(ws->collect, &job, 0); dnxDebug(3, "Worker[%lx]: Acknowledged job [%lu:%lu] to channel (%lx) (T/S %lu).", tid, job.xid.objSerial, job.xid.objSlot, ws->collect, job.timestamp); // check pool size before we get too busy - // if we're not shutting down and we haven't reached the configured // maximum and this is the last thread out, then increase the pool if (!iwlm->terminate && iwlm->threads < iwlm->cfg.poolMax && iwlm->active == iwlm->threads) // Maybe more aggressive here growThreadPool(iwlm); } DNX_PT_MUTEX_UNLOCK(&iwlm->mutex); // if we have a job, execute it and reset retry count if (ret == DNX_OK) { char resData[MAX_RESULT_DATA + 1]; DnxResult result; time_t jobstart; dnxDebug(3, "Worker[%lx]: Received job [%lu:%lu] from (%lx) (T/O %d): %s.", tid, job.xid.objSerial, job.xid.objSlot, ws->collect, job.timeout, job.cmd); // prepare result structure result.xid = job.xid; // result xid must match job xid result.state = DNX_JOB_COMPLETE; // complete or expired result.delta = 0; result.resCode = DNX_PLUGIN_RESULT_OK; result.resData = 0; /** @todo Allocate result data buffer based on configured buffer size. */ // we want to be able to cancel threads while they're out on a task // in order to obtain timely shutdown for long jobs - move into // async cancel mode, but only for the duration of the check pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, 0); *resData = 0; jobstart = time(0); dnxPluginExecute(job.cmd, &result.resCode, resData, sizeof resData - 1, job.timeout,iwlm->cfg.showNodeAddr? iwlm->myipaddrstr: 0); result.delta = time(0) - jobstart; pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0); // store allocated copy of the result string if (*resData) result.resData = xstrdup(resData); dnxDebug(3, "Worker[%lx]: Job [%lu:%lu] completed in %lu seconds: %d, %s.", tid, job.xid.objSerial, job.xid.objSlot, result.delta, result.resCode, result.resData); // if ((ret = dnxSendResult(ws->collect, &result, 0)) != DNX_OK) { // dnxDebug(3, "Worker[%lx]: Post job [%lu:%lu] results failed: %s.", // tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret)); // } // Wait while we wait for an Ack to our Results DnxJob ack; int trys = 1; while(trys < 4) { if ((ret = dnxSendResult(ws->collect, &result, 0)) != DNX_OK) { dnxDebug(3, "Worker[%lx]: Post job [%lu:%lu] results failed: %s.", tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret)); break; } // Now wait for our Ack if ((ret = dnxWaitForAck(ws->dispatch, &ack, job.address, 3)) != DNX_OK && ret != DNX_ERR_TIMEOUT) { dnxDebug(3, "Worker[%lx]: Error receiving Ack for job [%lu:%lu]: %s. Retry (%i).", tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret), trys); } else if (ret == DNX_ERR_TIMEOUT) { // we didn't get our Ack trys++; } else { // We got our Ack dnxDebug(3, "Worker[%lx]: Ack Received for job [%lu:%lu]: %s. After (%i) try(s).", tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret), trys); break; } } xfree(result.resData); // update all statistics DNX_PT_MUTEX_LOCK(&iwlm->mutex); { // track status if (result.resCode == DNX_PLUGIN_RESULT_OK) iwlm->jobsok++; else iwlm->jobsfail++; // track min/max/avg execution time if (result.delta > iwlm->maxexectm) iwlm->maxexectm = result.delta; if (result.delta < iwlm->minexectm) iwlm->minexectm = result.delta; iwlm->avgexectm = (iwlm->avgexectm + result.delta) / 2; // total job processing time iwlm->jobtm += (unsigned)result.delta; iwlm->active--; // reduce active count } DNX_PT_MUTEX_UNLOCK(&iwlm->mutex); ws->serial++; // increment job serial number for next job retries = 0; } } pthread_cleanup_pop(1); return 0; }
/** The main program entry point for the dnx management client. * * @param[in] argc - the number of elements in the @p argv array. * @param[in] argv - a null-terminated array of command-line arguments. * * @return Zero on success, or a non-zero error code that is returned to the * shell. Any non-zero codes should be values between 1 and 127. */ int main(int argc, char ** argv) { extern char * optarg; extern int optind, opterr, optopt; gTopDCS = dnxComStatCreateDCS("127.0.0.1"); int ch, ret; char * cp, * prog, * cmdstr; char * hoststr, * portstr; // get program base name prog = (char *)((cp = strrchr(argv[0], '/')) != 0 ? (cp + 1) : argv[0]); // parse arguments hoststr = "localhost"; portstr = "12482"; opterr = 0; cmdstr = 0; while ((ch = getopt(argc, argv, "hvc:s:p:")) != -1) { switch (ch) { case 's': hoststr = optarg; break; case 'p': portstr = optarg; break; case 'c': cmdstr = optarg; break; case 'v': printf("\n %s version %s\n Bug reports: %s.\n\n", prog, VERSION, PACKAGE_BUGREPORT); exit(0); case 'h': default : usage(prog); } } // ensure we've been given a command if (!cmdstr) { fprintf(stderr, "%s: No command string specified.\n", prog); usage(prog); } // init comm sub-system; send command; wait for response if ((ret = dnxChanMapInit(0)) != 0) fprintf(stderr, "%s: Error initializing channel map: %s.\n", prog, dnxErrorString(ret)); else { char url[1024]; snprintf(url, sizeof url, "udp://%s:%s", hoststr, portstr); if ((ret = dnxChanMapAdd("MgmtClient", url)) != 0) fprintf(stderr, "%s: Error adding channel (%s): %s.\n", prog, url, dnxErrorString(ret)); else { DnxChannel * channel; if ((ret = dnxConnect("MgmtClient", 1, &channel)) != 0) fprintf(stderr, "%s: Error connecting to server (%s): %s.\n", prog, url, dnxErrorString(ret)); else { DnxMgmtRequest req; memset(&req, 0, sizeof req); dnxMakeXID(&req.xid, DNX_OBJ_MANAGER, 0, 0); req.action = cmdstr; if ((ret = dnxSendMgmtRequest(channel, &req, 0)) != 0) fprintf(stderr, "%s: Error sending request: %s.\n", prog, dnxErrorString(ret)); else { DnxMgmtReply rsp; if ((ret = dnxWaitForMgmtReply(channel, &rsp, 0, 10)) != 0) fprintf(stderr, "%s: Error receiving response: %s.\n", prog, dnxErrorString(ret)); else { if (rsp.status == DNX_REQ_ACK) printf("%s\n", rsp.reply); else fprintf(stderr, "%s: Request failed on server.\nResponse was (%s)\n", prog,rsp.reply); } } dnxDisconnect(channel); } dnxChanMapDelete("MgmtClient"); } dnxChanMapRelease(); } xheapchk(); return ret? -1: 0; }