Beispiel #1
0
int main(int argc, char ** argv)
{
   DnxRegistrar * reg;
   iDnxRegistrar * ireg;
   DnxNodeRequest * node;

   verbose = argc > 1 ? 1 : 0;

   CHECK_ZERO(dnxRegistrarCreate((DnxChannel*)17, 5, &reg));

   ireg = (iDnxRegistrar *)reg;

   CHECK_TRUE(ireg->channel == (DnxChannel*)17);
   CHECK_TRUE(ireg->rqueue != 0);
   CHECK_TRUE(ireg->tid != 0);

   while (passes < 4)
      dnxCancelableSleep(10);

   CHECK_ZERO(dnxGetNodeRequest(reg, &node));
   CHECK_TRUE(node == test_req1);

   dnxRegistrarDestroy(reg);

#ifdef DEBUG_HEAP
   CHECK_ZERO(dnxCheckHeap());
#endif

   return 0;
}
Beispiel #2
0
int dnxJobListExpire(DnxJobList * pJobList, DnxNewJob * pExpiredJobs, int * totalJobs) {
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   unsigned long current;
   DnxNewJob * pJob;
   int jobCount = 0;
   time_t now;

   assert(pJobList && pExpiredJobs && totalJobs && *totalJobs > 0);

   DNX_PT_MUTEX_LOCK(&ilist->mut);

   // get the current time (after we acquire the lock! In case we had to wait)
   now = time(0);

   // walk the entire job list - InProgress and Pending jobs (in that order)
   current = ilist->head;
   int zero_factor = ilist->size - current; // add this value to normalize the index
   dnxDebug(6, "dnxJobListExpire: searching for (%i) expired objects. Head(%lu) Tail(%i)", *totalJobs, ilist->head, ilist->tail);
   int state = 0;
   while(jobCount < *totalJobs) {
      state = (pJob = &ilist->list[current])->state;
      unsigned long dispatch_timeout = now - DNX_DISPATCH_TIMEOUT;

      // only examine jobs that are either awaiting dispatch or results
      switch (state) {
         case DNX_JOB_UNBOUND:
            if(pJob->start_time <= dispatch_timeout) {
               dnxDebug(2, "dnxJobListExpire: Expiring Unbound %s Job [%lu:%lu] count(%i) type(%i) Start Time: (%lu) Now: (%lu) Expire: (%lu)",
                  (pJob->object_check_type ? "Host" : "Service"),  pJob->xid.objSerial, pJob->xid.objSlot, current, state, pJob->start_time, now, dispatch_timeout);               
               // Put the old job in a purgable state   
               pJob->state = DNX_JOB_EXPIRED;
               
               // Add a copy to the expired job list
               memcpy(&pExpiredJobs[jobCount++], pJob, sizeof(DnxNewJob));    
            } else {
               // If there is a client associated with it, xid.objSlot != -1
               // then it means we may be getting a result coming back to us
            
               // This job has not expired, try and get a dnxClient for it
               if (dnxGetNodeRequest(dnxGetRegistrar(), &(pJob->pNode)) == DNX_OK) { 
                  // If OK we have successfully dispatched it so update it's expiration
                  dnxDebug(2, "dnxJobListExpire: Dequeueing DNX_JOB_UNBOUND job [%lu:%lu] Expires in (%i) seconds. Dispatch TO:(%i) Now: (%lu) count(%i) type(%i)", 
                     pJob->xid.objSerial, pJob->xid.objSlot, pJob->start_time - dispatch_timeout, dispatch_timeout, now, current, state);
                  pJob->state = DNX_JOB_PENDING;
                  pthread_cond_signal(&ilist->cond);  // signal that a new job is available
               } else {
                  dnxDebug(6, "dnxJobListExpire: Unable to dequeue DNX_JOB_UNBOUND job [%lu:%lu] Expires in (%i) seconds. Dispatch TO:(%i) Now: (%lu) count(%i) type(%i)", 
                     pJob->xid.objSerial, pJob->xid.objSlot, pJob->start_time - dispatch_timeout, dispatch_timeout, now, current, state);
               }
            }
            break;
         case DNX_JOB_PENDING:
         case DNX_JOB_INPROGRESS:
            // check the job's expiration stamp
            if (pJob->expires <= now) { //  
               // This is an expired job, it was sent out, but never came back
               dnxDebug(1, "dnxJobListExpire: Expiring Job [%lu:%lu] count(%i) type(%i) Exp: (%lu) Now: (%lu)",
                  pJob->xid.objSerial, pJob->xid.objSlot, current, state, pJob->expires, now);               
               // Put the old job in a purgable state   
               pJob->state = DNX_JOB_EXPIRED;
               // Add a copy to the expired job list
               memcpy(&pExpiredJobs[jobCount++], pJob, sizeof(DnxNewJob));
            } 
            break;
         case DNX_JOB_COMPLETE:
            // If the Ack hasn't been sent out yet, give it time to complete
            if(! pJob->ack) {
               dnxDebug(3, "dnxJobListExpire: Waiting to send Ack. count(%i) type(%i)", current, state);
               break;
            }
         case DNX_JOB_EXPIRED:
            dnxJobCleanup(pJob);
            dnxDebug(3, "dnxJobListExpire: Nullified Job. count(%i) type(%i)", current, state);
         case DNX_JOB_NULL:
            if(current == ilist->head && current != ilist->tail) {
               ilist->head = ((current + 1) % ilist->size);
               dnxDebug(2, "dnxJobListExpire: Moving head to (%i). count(%i) type(%i)", ilist->head, current, pJob->state);
               // we have an old item at the head of the list, so we need to
               // increment the head. It should never be larger than the tail.
            } else {
               dnxDebug(5, "dnxJobListExpire: Null Job. count(%i) type(%i)", current, pJob->state);
            }
            break;
         case DNX_JOB_RECEIVED:
            if(! pJob->ack) {
               dnxDebug(3, "dnxJobListExpire: Waiting to send Ack. job [%lu:%lu] count(%i) type(%i)", current, state);
            } else {
               dnxDebug(2, "dnxJobListExpire: Ack sent. job [%lu:%lu] count(%i) type(%i)", current, state);
            }
            // The Collector thread will set this to DNX_JOB_COMPLETE once it has 
            // replied to Nagios, but we don't advance the head until that happens
            break;
      }

      // bail-out if this was the job list tail
      if (current == ilist->tail) {
         break;
      }
      // increment the job list index
      current = ((current + 1) % ilist->size);
   }
      
   // update the total jobs in the expired job list
   *totalJobs = jobCount;
   DNX_PT_MUTEX_UNLOCK(&ilist->mut);

   return DNX_OK;
}
Beispiel #3
0
/** Service Check Event Handler.
 *
 * @param[in] event_type - the event type for which we're being called.
 * @param[in] data - an opaque pointer to nagios event-specific data.
 *
 * @return Zero if we want Nagios to handle the event;
 *    NEBERROR_CALLBACKOVERRIDE indicates that we want to handle the event
 *    ourselves; any other non-zero value represents an error.
 */
static int ehSvcCheck(int event_type, void * data)
{
   static unsigned long serial = 0; // the number of service checks processed

   nebstruct_service_check_data * svcdata = (nebstruct_service_check_data *)data;
   DnxNodeRequest * pNode;
   DnxJobData * jdp;
   int ret;

   if (event_type != NEBCALLBACK_SERVICE_CHECK_DATA)
      return OK;

   if (svcdata == 0)
   {
      dnxLog("Service handler received NULL service data structure.");
      return ERROR;  // shouldn't happen - internal Nagios error
   }

   if (svcdata->type != NEBTYPE_SERVICECHECK_INITIATE)
      return OK;  // ignore non-initiate service checks

   // check for local execution pattern on command line
   if (cfg.localCheckPattern && regexec(&regEx, svcdata->command_line, 0, 0, 0) == 0)
   {
      dnxDebug(1, "Service will execute locally: %s.", svcdata->command_line);
      return OK;     // tell nagios execute locally
   }

   dnxDebug(3, "ehSvcCheck: Received Job [%lu] at %lu (%lu).",
         serial, (unsigned long)time(0),
         (unsigned long)svcdata->start_time.tv_sec);

   if ((ret = dnxGetNodeRequest(registrar, &pNode)) != DNX_OK)
   {
      dnxDebug(3, "ehSvcCheck: No worker nodes requests available: %s.",dnxErrorString(ret));
      return OK;     // tell nagios execute locally
   }

   // allocate and populate a new job payload object
   if ((jdp = (DnxJobData *)xmalloc(sizeof *jdp)) == 0)
   {
      dnxDebug(1, "ehSvcCheck: Out of memory!");
      return OK;
   }
   memset(jdp, 0, sizeof *jdp);
   jdp->svc = (service *)svcdata->OBJECT_FIELD_NAME;

   assert(jdp->svc);

#if CURRENT_NEB_API_VERSION == 3
   {
      // a nagios 3.x global variable
      extern check_result check_result_info;

      /** @todo patch nagios to pass these values to the event handler. */

      jdp->chkopts    = check_result_info.check_options;
      jdp->schedule   = check_result_info.scheduled_check;
      jdp->reschedule = check_result_info.reschedule_check;
   }
#endif

   if ((ret = dnxPostNewJob(joblist, serial, jdp, svcdata, pNode)) != DNX_OK)
   {
      dnxLog("Unable to post job [%lu]: %s.", serial, dnxErrorString(ret));
      xfree(jdp);
      return OK;     // tell nagios execute locally
   }

   serial++;                           // bump serial number
   return NEBERROR_CALLBACKOVERRIDE;   // tell nagios we want it
}