int main(int argc, char ** argv) { DnxRegistrar * reg; iDnxRegistrar * ireg; DnxNodeRequest * node; verbose = argc > 1 ? 1 : 0; CHECK_ZERO(dnxRegistrarCreate((DnxChannel*)17, 5, ®)); ireg = (iDnxRegistrar *)reg; CHECK_TRUE(ireg->channel == (DnxChannel*)17); CHECK_TRUE(ireg->rqueue != 0); CHECK_TRUE(ireg->tid != 0); while (passes < 4) dnxCancelableSleep(10); CHECK_ZERO(dnxGetNodeRequest(reg, &node)); CHECK_TRUE(node == test_req1); dnxRegistrarDestroy(reg); #ifdef DEBUG_HEAP CHECK_ZERO(dnxCheckHeap()); #endif return 0; }
int dnxJobListExpire(DnxJobList * pJobList, DnxNewJob * pExpiredJobs, int * totalJobs) { iDnxJobList * ilist = (iDnxJobList *)pJobList; unsigned long current; DnxNewJob * pJob; int jobCount = 0; time_t now; assert(pJobList && pExpiredJobs && totalJobs && *totalJobs > 0); DNX_PT_MUTEX_LOCK(&ilist->mut); // get the current time (after we acquire the lock! In case we had to wait) now = time(0); // walk the entire job list - InProgress and Pending jobs (in that order) current = ilist->head; int zero_factor = ilist->size - current; // add this value to normalize the index dnxDebug(6, "dnxJobListExpire: searching for (%i) expired objects. Head(%lu) Tail(%i)", *totalJobs, ilist->head, ilist->tail); int state = 0; while(jobCount < *totalJobs) { state = (pJob = &ilist->list[current])->state; unsigned long dispatch_timeout = now - DNX_DISPATCH_TIMEOUT; // only examine jobs that are either awaiting dispatch or results switch (state) { case DNX_JOB_UNBOUND: if(pJob->start_time <= dispatch_timeout) { dnxDebug(2, "dnxJobListExpire: Expiring Unbound %s Job [%lu:%lu] count(%i) type(%i) Start Time: (%lu) Now: (%lu) Expire: (%lu)", (pJob->object_check_type ? "Host" : "Service"), pJob->xid.objSerial, pJob->xid.objSlot, current, state, pJob->start_time, now, dispatch_timeout); // Put the old job in a purgable state pJob->state = DNX_JOB_EXPIRED; // Add a copy to the expired job list memcpy(&pExpiredJobs[jobCount++], pJob, sizeof(DnxNewJob)); } else { // If there is a client associated with it, xid.objSlot != -1 // then it means we may be getting a result coming back to us // This job has not expired, try and get a dnxClient for it if (dnxGetNodeRequest(dnxGetRegistrar(), &(pJob->pNode)) == DNX_OK) { // If OK we have successfully dispatched it so update it's expiration dnxDebug(2, "dnxJobListExpire: Dequeueing DNX_JOB_UNBOUND job [%lu:%lu] Expires in (%i) seconds. Dispatch TO:(%i) Now: (%lu) count(%i) type(%i)", pJob->xid.objSerial, pJob->xid.objSlot, pJob->start_time - dispatch_timeout, dispatch_timeout, now, current, state); pJob->state = DNX_JOB_PENDING; pthread_cond_signal(&ilist->cond); // signal that a new job is available } else { dnxDebug(6, "dnxJobListExpire: Unable to dequeue DNX_JOB_UNBOUND job [%lu:%lu] Expires in (%i) seconds. Dispatch TO:(%i) Now: (%lu) count(%i) type(%i)", pJob->xid.objSerial, pJob->xid.objSlot, pJob->start_time - dispatch_timeout, dispatch_timeout, now, current, state); } } break; case DNX_JOB_PENDING: case DNX_JOB_INPROGRESS: // check the job's expiration stamp if (pJob->expires <= now) { // // This is an expired job, it was sent out, but never came back dnxDebug(1, "dnxJobListExpire: Expiring Job [%lu:%lu] count(%i) type(%i) Exp: (%lu) Now: (%lu)", pJob->xid.objSerial, pJob->xid.objSlot, current, state, pJob->expires, now); // Put the old job in a purgable state pJob->state = DNX_JOB_EXPIRED; // Add a copy to the expired job list memcpy(&pExpiredJobs[jobCount++], pJob, sizeof(DnxNewJob)); } break; case DNX_JOB_COMPLETE: // If the Ack hasn't been sent out yet, give it time to complete if(! pJob->ack) { dnxDebug(3, "dnxJobListExpire: Waiting to send Ack. count(%i) type(%i)", current, state); break; } case DNX_JOB_EXPIRED: dnxJobCleanup(pJob); dnxDebug(3, "dnxJobListExpire: Nullified Job. count(%i) type(%i)", current, state); case DNX_JOB_NULL: if(current == ilist->head && current != ilist->tail) { ilist->head = ((current + 1) % ilist->size); dnxDebug(2, "dnxJobListExpire: Moving head to (%i). count(%i) type(%i)", ilist->head, current, pJob->state); // we have an old item at the head of the list, so we need to // increment the head. It should never be larger than the tail. } else { dnxDebug(5, "dnxJobListExpire: Null Job. count(%i) type(%i)", current, pJob->state); } break; case DNX_JOB_RECEIVED: if(! pJob->ack) { dnxDebug(3, "dnxJobListExpire: Waiting to send Ack. job [%lu:%lu] count(%i) type(%i)", current, state); } else { dnxDebug(2, "dnxJobListExpire: Ack sent. job [%lu:%lu] count(%i) type(%i)", current, state); } // The Collector thread will set this to DNX_JOB_COMPLETE once it has // replied to Nagios, but we don't advance the head until that happens break; } // bail-out if this was the job list tail if (current == ilist->tail) { break; } // increment the job list index current = ((current + 1) % ilist->size); } // update the total jobs in the expired job list *totalJobs = jobCount; DNX_PT_MUTEX_UNLOCK(&ilist->mut); return DNX_OK; }
/** Service Check Event Handler. * * @param[in] event_type - the event type for which we're being called. * @param[in] data - an opaque pointer to nagios event-specific data. * * @return Zero if we want Nagios to handle the event; * NEBERROR_CALLBACKOVERRIDE indicates that we want to handle the event * ourselves; any other non-zero value represents an error. */ static int ehSvcCheck(int event_type, void * data) { static unsigned long serial = 0; // the number of service checks processed nebstruct_service_check_data * svcdata = (nebstruct_service_check_data *)data; DnxNodeRequest * pNode; DnxJobData * jdp; int ret; if (event_type != NEBCALLBACK_SERVICE_CHECK_DATA) return OK; if (svcdata == 0) { dnxLog("Service handler received NULL service data structure."); return ERROR; // shouldn't happen - internal Nagios error } if (svcdata->type != NEBTYPE_SERVICECHECK_INITIATE) return OK; // ignore non-initiate service checks // check for local execution pattern on command line if (cfg.localCheckPattern && regexec(®Ex, svcdata->command_line, 0, 0, 0) == 0) { dnxDebug(1, "Service will execute locally: %s.", svcdata->command_line); return OK; // tell nagios execute locally } dnxDebug(3, "ehSvcCheck: Received Job [%lu] at %lu (%lu).", serial, (unsigned long)time(0), (unsigned long)svcdata->start_time.tv_sec); if ((ret = dnxGetNodeRequest(registrar, &pNode)) != DNX_OK) { dnxDebug(3, "ehSvcCheck: No worker nodes requests available: %s.",dnxErrorString(ret)); return OK; // tell nagios execute locally } // allocate and populate a new job payload object if ((jdp = (DnxJobData *)xmalloc(sizeof *jdp)) == 0) { dnxDebug(1, "ehSvcCheck: Out of memory!"); return OK; } memset(jdp, 0, sizeof *jdp); jdp->svc = (service *)svcdata->OBJECT_FIELD_NAME; assert(jdp->svc); #if CURRENT_NEB_API_VERSION == 3 { // a nagios 3.x global variable extern check_result check_result_info; /** @todo patch nagios to pass these values to the event handler. */ jdp->chkopts = check_result_info.check_options; jdp->schedule = check_result_info.scheduled_check; jdp->reschedule = check_result_info.reschedule_check; } #endif if ((ret = dnxPostNewJob(joblist, serial, jdp, svcdata, pNode)) != DNX_OK) { dnxLog("Unable to post job [%lu]: %s.", serial, dnxErrorString(ret)); xfree(jdp); return OK; // tell nagios execute locally } serial++; // bump serial number return NEBERROR_CALLBACKOVERRIDE; // tell nagios we want it }