Beispiel #1
0
void dnxWlmDestroy(DnxWlm * wlm)
{
   iDnxWlm * iwlm = (iDnxWlm *)wlm;
   time_t expires;
   unsigned i;

   assert(wlm);

   dnxLog("WLM: Beginning termination sequence...");

   // sleep till we can't stand it anymore, then kill everyone
   iwlm->terminate = 1;
   expires = iwlm->cfg.shutdownGrace + time(0);

   DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   while (iwlm->threads > 0 && time(0) < expires)
   {
      cleanThreadPool(iwlm);
      DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
      dnxCancelableSleep(100);
      DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   }

   // check for workers remaining after grace period
   if (iwlm->threads)
      dnxDebug(1, "WLM: Termination - %d workers remaining"
            " after grace period.", iwlm->threads);
      
   // cancel all remaining workers
   for (i = 0; i < iwlm->threads; i++)
      if (iwlm->pool[i]->state == DNX_THREAD_RUNNING)
      {
         dnxDebug(1, "WLMDestroy: Cancelling worker[%lx].", iwlm->pool[i]->tid);
         pthread_cancel(iwlm->pool[i]->tid);
      }

   // give remaining thread some time to quit
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
   dnxCancelableSleep(1000);
   DNX_PT_MUTEX_LOCK(&iwlm->mutex);

   // join all zombies (should be everything left)
   cleanThreadPool(iwlm);
   assert(iwlm->threads == 0);
   xfree(iwlm->pool);
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

   DNX_PT_MUTEX_DESTROY(&iwlm->mutex);

   xfree(iwlm->cfg.dispatcher);
   xfree(iwlm->cfg.collector);
   xfree(iwlm);

   dnxLog("WLM: Termination sequence complete.");
}
Beispiel #2
0
int dnxWlmReconfigure(DnxWlm * wlm, DnxWlmCfgData * cfg)
{
   iDnxWlm * iwlm = (iDnxWlm *)wlm;
   DnxWorkerStatus ** pool;
   int ret = 0;

   assert(wlm && cfg);
   assert(cfg->poolMin > 0);
   assert(cfg->poolMax >= cfg->poolMin);
   assert(cfg->poolInitial >= cfg->poolMin);
   assert(cfg->poolInitial <= cfg->poolMax);

   DNX_PT_MUTEX_LOCK(&iwlm->mutex);

   // dynamic reconfiguration of dispatcher/collector URL's is not allowed

   logConfigChanges(&iwlm->cfg, cfg);
 
   iwlm->cfg.reqTimeout = cfg->reqTimeout;
   iwlm->cfg.ttlBackoff = cfg->ttlBackoff;
   iwlm->cfg.maxRetries = cfg->maxRetries;
   iwlm->cfg.poolMin = cfg->poolMin;
   iwlm->cfg.poolInitial = cfg->poolInitial;
   iwlm->cfg.poolMax = cfg->poolMax;
   iwlm->cfg.poolGrow = cfg->poolGrow;
   iwlm->cfg.pollInterval = cfg->pollInterval;
   iwlm->cfg.shutdownGrace = cfg->shutdownGrace;
   iwlm->cfg.maxResults = cfg->maxResults;
   iwlm->cfg.showNodeAddr = cfg->showNodeAddr;
   strcpy(iwlm->cfg.hostname, cfg->hostname);

   // we can't reduce the poolsz until the number of threads
   //    drops below the new maximum
   while (iwlm->threads > iwlm->cfg.poolMax)
   {
      DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
      dnxCancelableSleep(3 * 1000);
      DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   }

   // reallocate the pool to the new size
   if ((pool = (DnxWorkerStatus **)xrealloc(iwlm->pool, 
         iwlm->cfg.poolMax * sizeof *pool)) == 0)
      ret = DNX_ERR_MEMORY;
   else
   {
      iwlm->poolsz = iwlm->cfg.poolMax;
      iwlm->pool = pool;
   }
    
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

   return ret;
}
Beispiel #3
0
/** Return the next item payload without removing it from the queue.
 * 
 * Ownership of the queue item payload does NOT transfer to the caller.
 * 
 * @param[in] queue - the queue from which the next item payload should 
 *    be returned.
 * @param[out] ppPayload - the address of storage in which to return a 
 *    reference to the next item payload.
 * 
 * @return Zero on success, or DNX_ERR_NOTFOUND if there is no next node.
 * 
 * @note Not currently used (or exported by the dnxQueue.h header file).
 * 
 * @note Cancellation safe.
 */
int dnxQueueNext(DnxQueue * queue, void ** ppPayload)
{
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   
   assert(queue && ppPayload);
   
   *ppPayload = 0;
   
   DNX_PT_MUTEX_LOCK(&iqueue->mutex);
   
   // save pointer to current payload
   if (iqueue->current) 
   {
      *ppPayload = iqueue->current->pPayload;
      
      // advance circular buffer pointer
      if (iqueue->current->next)
         iqueue->current = iqueue->current->next;
      else
         iqueue->current = iqueue->head;
   }
   
   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);
   
   return *ppPayload ? DNX_OK : DNX_ERR_NOTFOUND;
}
Beispiel #4
0
/** Clean up resources allocated by the channel map sub-system.
 */
void dnxChanMapRelease(void)
{
   if (dnxInit)
   {
      int i;

      DNX_PT_MUTEX_LOCK(&chanMutex);

      for (i = 0; i < DNX_MAX_CHAN_MAP; i++)
      {
         xfree(gChannelMap[i].name);
         xfree(gChannelMap[i].url);
      }
   
      memset(gChannelMap, 0, sizeof gChannelMap);
   
      DNX_PT_MUTEX_UNLOCK(&chanMutex);
      DNX_PT_MUTEX_DESTROY(&chanMutex);
   
      // de-initialize transport module table
      i = elemcount(gTMList);
      while (i--) gTMList[i].txExit();
   
      dnxInit = 0;
   }
}
Beispiel #5
0
void dnxWlmGetStats(DnxWlm * wlm, DnxWlmStats * wsp)
{
   iDnxWlm * iwlm = (iDnxWlm *)wlm;

   assert(wlm && wsp);

   DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   wsp->jobs_succeeded = iwlm->jobsok;
   wsp->jobs_failed = iwlm->jobsfail;
   wsp->threads_created = iwlm->tcreated;
   wsp->threads_destroyed = iwlm->tdestroyed;
   wsp->total_threads = iwlm->threads;
   wsp->active_threads = iwlm->active;
   wsp->requests_sent = iwlm->reqsent;
   wsp->jobs_received = iwlm->jobsrcvd;
   wsp->min_exec_time = iwlm->minexectm;
   wsp->avg_exec_time = iwlm->avgexectm;
   wsp->max_exec_time = iwlm->maxexectm;
   wsp->avg_total_threads = iwlm->avgthreads;
   wsp->avg_active_threads = iwlm->avgactive;
   wsp->thread_time = iwlm->threadtm;
   wsp->job_time = iwlm->jobtm;
   wsp->packets_out = iwlm->packets_out;
   wsp->packets_in = iwlm->packets_in;
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
}
Beispiel #6
0
DnxQueueResult dnxQueueFind(DnxQueue * queue, void ** ppPayload, 
      DnxQueueResult (*Compare)(void * pLeft, void * pRight))
{
   DnxQueueResult bFound = DNX_QRES_CONTINUE;
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   iDnxQueueEntry * item;

   assert(queue && ppPayload && Compare);

   DNX_PT_MUTEX_LOCK(&iqueue->mutex);

   for (item = iqueue->head; item; item = item->next)
   {
      if ((bFound = Compare(*ppPayload, item->pPayload)) != DNX_QRES_CONTINUE)
      {
         if (bFound == DNX_QRES_FOUND)
            *ppPayload = item->pPayload;
         break;
      }
   }

   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);

   return bFound;
}
Beispiel #7
0
void dnxQueueDestroy(DnxQueue * queue)
{
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   iDnxQueueEntry * item;
   
   assert(queue);
   
   DNX_PT_MUTEX_LOCK(&iqueue->mutex);

   // first free any requests that might be on the queue
   item = iqueue->head;
   while (item != 0) 
   {
      iDnxQueueEntry * next = item->next;
      iqueue->freepayload(item->pPayload);
      xfree(item);
      item = next;
   }
   
   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);
   
   DNX_PT_MUTEX_DESTROY(&iqueue->mutex);
   pthread_cond_destroy(&iqueue->cv);

   xfree(iqueue);
}
Beispiel #8
0
/** Move the DNX N3 results queue to Nagios.
 *
 * This function should only be called on the Nagios timed event handler
 * thread so there are no race conditions between Nagios's processing of it's 
 * results queue and DNX's addition of data to that queue.
 */
static void dnxMoveResultsToNagios(void)
{
   check_result * local;

   // safely save off currently local list
   DNX_PT_MUTEX_LOCK(&dnxResultListMutex);
   local = dnxResultList;
   dnxResultList = 0;
   DNX_PT_MUTEX_UNLOCK(&dnxResultListMutex);

   // merge local into check_result_list, store in check_result_list
   check_result_list = dnxMergeLists(local, check_result_list);
}
Beispiel #9
0
int dnxJobListAdd(DnxJobList * pJobList, DnxNewJob * pJob) {
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   unsigned long tail;
   int ret = DNX_OK;

   assert(pJobList && pJob);

   DNX_PT_MUTEX_LOCK(&ilist->mut);

   tail = ilist->tail;

   // verify space in the job list, this keeps a single empty buffer element to 
   // protect us from not knowing a full ring from an empty one
   if (ilist->list[tail].state && (tail = (tail + 1) % ilist->size) == ilist->head) {
      dnxLog("dnxJobListAdd: Out of job slots (max=%lu): %s.", 
            ilist->size, pJob->cmd);
      dnxDebug(1, "dnxJobListAdd: Out of job slots (max=%lu): %s.", 
            ilist->size, pJob->cmd);
     ret = DNX_ERR_CAPACITY;
   } else {
      // add the slot index to the Job's XID - this allows us to index 
      //    the job list using the returned result's XID.objSlot field
      pJob->xid.objSlot = tail;
      // We were unable to get an available dnxClient job request so we
      // put the job into the queue anyway and have the timer thread try 
      // and find a dnxClient for it later
      if (pJob->pNode->xid.objSlot == -1) {
         pJob->state = DNX_JOB_UNBOUND;
      } else {
         pJob->state = DNX_JOB_PENDING;
      }
      
      dnxAuditJob(pJob, "ASSIGN");
      
      // add this job to the job list
      memcpy(&ilist->list[tail], pJob, sizeof *pJob);
      
      ilist->tail = tail;
   
      dnxDebug(1, "dnxJobListAdd: Job [%lu:%lu]: Head=%lu, Tail=%lu.", 
            pJob->xid.objSerial, pJob->xid.objSlot, ilist->head, ilist->tail);
      
      if(pJob->state == DNX_JOB_PENDING) {
         pthread_cond_signal(&ilist->cond);  // signal that a new job is available
      }         
   }

   DNX_PT_MUTEX_UNLOCK(&ilist->mut);

   return ret;
}
Beispiel #10
0
/** Return the number of items in the queue.
 * 
 * @param[in] queue - the queue to be queried for item count.
 * 
 * @return The count of items in the queue.
 * 
 * @note Not currently used (or exported by the dnxQueue.h header file).
 * 
 * @note Cancellation safe.
 */
int dnxQueueSize(DnxQueue * queue)
{
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   int count;

   assert(queue);
   
   DNX_PT_MUTEX_LOCK(&iqueue->mutex);
   
   count = (int)iqueue->size;
   
   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);
   
   return count;
}
Beispiel #11
0
DnxQueueResult dnxQueueRemove(DnxQueue * queue, void ** ppPayload, 
      DnxQueueResult (*Compare)(void * pLeft, void * pRight)) {
   DnxQueueResult bFound = DNX_QRES_CONTINUE;
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   iDnxQueueEntry * item, * prev;
   int counter = 0;

   assert(queue && ppPayload && Compare);

   DNX_PT_MUTEX_LOCK(&iqueue->mutex);

   prev = 0;
   for (item = iqueue->head; item; item = item->next) {
      counter++;
      if ((bFound = Compare(*ppPayload, item->pPayload)) != DNX_QRES_CONTINUE) {
         if (bFound == DNX_QRES_FOUND) {
            *ppPayload = item->pPayload;

            // cross-link previous to next and free current
            if (prev)
               prev->next = item->next;
            else                          // removing the head item
               iqueue->head = item->next;

            if (item->next == 0)          // removing the tail item
               iqueue->tail = prev;

            if (iqueue->current == item)  // advance circular pointer
               if ((iqueue->current = item->next) == 0)
                  iqueue->current = iqueue->head;

            iqueue->size--;
         }
         break;
      }
      prev = item;
   }

   dnxDebug(8, "dnxQueueRemove: (%i) elements searched in (%i) sized queue", 
      counter, iqueue->size);

   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);

   if (bFound == DNX_QRES_FOUND) {
      xfree(item);       // free the queue entry wrapper object
   }
   return bFound;
}
Beispiel #12
0
void dnxWlmResetStats(DnxWlm * wlm)
{
   iDnxWlm * iwlm = (iDnxWlm *)wlm;

   assert(wlm);

   DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   iwlm->jobtm = iwlm->threadtm = 0;
   iwlm->jobsok = iwlm->jobsfail = iwlm->tcreated = iwlm->tdestroyed = 0;
   iwlm->reqsent = iwlm->jobsrcvd = iwlm->avgexectm = 0;
   iwlm->maxexectm = iwlm->avgthreads = iwlm->avgactive = 0;
   iwlm->minexectm = (unsigned)(-1);   // the largest possible value
   iwlm->packets_out = 0;
   iwlm->packets_in = 0;
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
}
Beispiel #13
0
/** Allocate an unconnected channel.
 * 
 * @param[in] name - the name of the channel to allocate.
 * @param[out] icpp - the address of storage for the returned object 
 *    reference.
 * 
 * @return Zero on success, or a non-zero error value.
 */
static int dnxChanMapAllocChannel(char * name, iDnxChannel ** icpp)
{
   DnxChanMap * chanMap;
   int ret;

   assert(name && *name && icpp);

   DNX_PT_MUTEX_LOCK(&chanMutex);

   if ((ret = dnxChanMapFindName(name, &chanMap)) == DNX_OK)
      ret = chanMap->txAlloc(chanMap->url, icpp);

   DNX_PT_MUTEX_UNLOCK(&chanMutex);

   return ret;
}
Beispiel #14
0
int dnxJobListCollect(DnxJobList * pJobList, DnxXID * pxid, DnxNewJob * pJob)
{
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   unsigned long current;
   int ret = DNX_OK;
   assert(pJobList && pxid && pJob);   // parameter validation

   current = pxid->objSlot;

   dnxDebug(4, "dnxJobListCollect: Job serial (%lu) slot (%lu) list head(%i)", 
        pxid->objSerial, pxid->objSlot, ilist->head);

   if (current >= ilist->size)         // runtime validation requires check
      return DNX_ERR_INVALID;          // corrupt client network message

   DNX_PT_MUTEX_LOCK(&ilist->mut);
   
   // verify that the XID of this result matches the XID of the service check 
   if (ilist->list[current].state == DNX_JOB_NULL 
         || !dnxEqualXIDs(pxid, &ilist->list[current].xid)) {
      dnxDebug(4, "dnxJobListCollect: Job [%lu:%lu] not found.", pxid->objSerial, pxid->objSlot);      
      ret = DNX_ERR_NOTFOUND;          // Very old job or we restarted and lost state
   } else if(ilist->list[current].state == DNX_JOB_EXPIRED) {
      dnxDebug(4, "dnxJobListCollect: Job [%lu:%lu] expired before retrieval.", pxid->objSerial, pxid->objSlot);      
      ret = DNX_ERR_EXPIRED;          // job expired; removed by the timer
   } else {
      if(ilist->list[current].state == DNX_JOB_COMPLETE || ilist->list[current].state == DNX_JOB_RECEIVED) {
         dnxDebug(4, "dnxJobListCollect: Job [%lu:%lu] already retrieved.", pxid->objSerial, pxid->objSlot);      
         ilist->list[current].ack = 0;
         ret = DNX_ERR_ALREADY;           // It needs another Ack
      } else {
         // DNX_JOB_INPROGRESS // DNX_JOB_UNBOUND!!
         ilist->list[current].state = DNX_JOB_RECEIVED;      
         // make a copy to return to the Collector
         memcpy(pJob, &ilist->list[current], sizeof *pJob);
         dnxDebug(4, "dnxJobListCollect: Job [%lu:%lu] completed. Copy of result for (%s) assigned to collector.",
             pxid->objSerial, pxid->objSlot, pJob->cmd);
      }
      
      // Signal to the dispatcher that we need to send an Ack
      pthread_cond_signal(&ilist->cond);
   }

   DNX_PT_MUTEX_UNLOCK(&ilist->mut);

   return ret;
}
Beispiel #15
0
/** Add a check result to the dnx check result list in sorted order
 *
 * The check result is added to the check result list in ascending order, 
 * sorted by finish time.
 *
 * @param[in] cr - the check result to be addd to the results list.
 */
static void dnxAddResultToList(check_result * newcr)
{
   check_result ** curp;

   assert(newcr);

   DNX_PT_MUTEX_LOCK(&dnxResultListMutex);

   for (curp = &dnxResultList; *curp; curp = &(*curp)->next)
      if (dnxTimeCompare(&(*curp)->finish_time, &newcr->finish_time) >= 0)
         break;

   newcr->next = *curp;
   *curp = newcr;

   DNX_PT_MUTEX_UNLOCK(&dnxResultListMutex);
}
Beispiel #16
0
int dnxJobListMarkComplete(DnxJobList * pJobList, DnxXID * pXid) {
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   assert(pJobList && pXid);   // parameter validation
   int ret = DNX_ERR_NOTFOUND;
   dnxDebug(4, "dnxJobListMarkComplete: Job [%lu:%lu]", 
        pXid->objSerial, pXid->objSlot);
   unsigned long current = pXid->objSlot;

   DNX_PT_MUTEX_LOCK(&ilist->mut);
   if (dnxEqualXIDs(pXid, &ilist->list[current].xid)) {
      if(ilist->list[current].state == DNX_JOB_RECEIVED) {
         ilist->list[current].state = DNX_JOB_COMPLETE;
         ret = DNX_OK;
      }
   }
   DNX_PT_MUTEX_UNLOCK(&ilist->mut);
   return ret;
}
Beispiel #17
0
/** Delete a channel map by name.
 * 
 * @param[in] name - the name of the channel map to be deleted.
 */
void dnxChanMapDelete(char * name)
{
   DnxChanMap * chanMap;

   assert(name && *name);

   DNX_PT_MUTEX_LOCK(&chanMutex);

   // locate resource by name
   if (dnxChanMapFindName(name, &chanMap) == DNX_OK)
   {
      // release allocated variables, clear object
      xfree(chanMap->name);
      xfree(chanMap->url);
      memset(chanMap, 0, sizeof *chanMap);
   }

   DNX_PT_MUTEX_UNLOCK(&chanMutex);
}
Beispiel #18
0
/** Waits and returns the first pending item payload from a queue.
 * 
 * Suspends the calling thread if the queue is empty. The returned payload 
 * and its resources becomes the property of the caller.
 * 
 * @param[in] queue - the queue to be waited on.
 * @param[out] ppPayload - the address of storage in which to return the
 *    payload of the first queue item.
 * 
 * @return Zero on success, or DNX_ERR_NOTFOUND if not found.
 * 
 * @note Not currently used (or exported by the dnxQueue.h header file).
 * 
 * @note Cancellation safe.
 */
int dnxQueueGetWait(DnxQueue * queue, void ** ppPayload)
{
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   iDnxQueueEntry * item = 0;
   
   assert(queue && ppPayload);
   
   DNX_PT_MUTEX_LOCK(&iqueue->mutex);
   
   // block this thread until it can dequeue a request
   while (item == 0) 
   {
      // see if we have any queue items already waiting
      if (iqueue->size > 0) 
      {
         item = iqueue->head;
         iqueue->head = item->next;
         if (iqueue->current == item)
            iqueue->current = item->next;

         // adjust the tail pointer if the queue is now empty
         if (iqueue->head == 0)
            iqueue->tail = 0;
         
         iqueue->size--;
      }
      else     // queue is empty
         pthread_cond_wait(&iqueue->cv, &iqueue->mutex);
   }
   
   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);
   
   // return the payload to the caller.
   if (item) 
   {
      *ppPayload = item->pPayload;
      xfree(item);
      return DNX_OK;
   }

   return DNX_ERR_NOTFOUND;
}
Beispiel #19
0
int dnxJobListMarkAck(DnxJobList * pJobList, DnxResult * pRes) {
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   assert(pJobList && pRes);   // parameter validation
   time_t now = time(0);
   int ret = DNX_ERR_NOTFOUND;
   dnxDebug(4, "dnxJobListMarkAck: Job [%lu:%lu] serial (%lu) slot (%lu) latency (%lu) sec.", 
        pRes->xid.objSerial, pRes->xid.objSlot, pRes->xid.objSerial, pRes->xid.objSlot, (now - pRes->timestamp));
   unsigned long current = pRes->xid.objSlot;

   DNX_PT_MUTEX_LOCK(&ilist->mut);
   if (dnxEqualXIDs(&(pRes->xid), &ilist->list[current].xid)) {
      if(ilist->list[current].state == DNX_JOB_PENDING || ilist->list[current].state == DNX_JOB_UNBOUND) {
         ilist->list[current].state = DNX_JOB_INPROGRESS;
         dnxAuditJob(&(ilist->list[current]), "ACK");
         ret = DNX_OK;
      }
   }
   DNX_PT_MUTEX_UNLOCK(&ilist->mut);
   return ret;
}
Beispiel #20
0
/** Add a new channel to the global channel map.
 * 
 * @param[in] name - the name of the new channel to be added.
 * @param[in] url - the URL to associate with this new channel.
 * 
 * @return Zero on success, or a non-zero error value.
 */
int dnxChanMapAdd(char * name, char * url)
{
   DnxChanMap tmp, * chanMap;
   int ret;

   assert(name && *name && url && strlen(url) < DNX_MAX_URL);

   // parse and validate the URL
   if ((ret = dnxChanMapUrlParse(&tmp, url)) != DNX_OK)
      return ret;

   // set the name, unless we are overriding an existing channel
   if ((tmp.name = xstrdup(name)) == 0 || (tmp.url = xstrdup(url)) == 0)
   {
      xfree(tmp.name);
      return DNX_ERR_MEMORY;
   }

   DNX_PT_MUTEX_LOCK(&chanMutex);

   // see if this name already exists, otherwise grab an empty channel slot
   if ((ret = dnxChanMapFindName(name, &chanMap)) == DNX_OK 
         || (ret = dnxChanMapFindSlot(&chanMap)) == DNX_OK)
   {
      xfree(chanMap->name);
      xfree(chanMap->url);
      memcpy(chanMap, &tmp, sizeof *chanMap);
   }
   
   DNX_PT_MUTEX_UNLOCK(&chanMutex);

   // on error, release previously allocated memory
   if (ret != DNX_OK)
   {
      xfree(tmp.name);
      xfree(tmp.url);
   }
   return ret;
}
Beispiel #21
0
int dnxQueueGet(DnxQueue * queue, void ** ppPayload)
{
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   iDnxQueueEntry * item = 0;
   
   assert(queue && ppPayload);
   
   dnxDebug(8, "dnxQueueGet: iQueue size(%i)", iqueue->size);

   DNX_PT_MUTEX_LOCK(&iqueue->mutex);
   
   if (iqueue->size > 0) 
   {
      // remove the 'head' item from the queue
      item = iqueue->head;
      iqueue->head = item->next;
      if (iqueue->current == item)
         iqueue->current = item->next;

      // adjust tail pointer if queue is now empty
      if (iqueue->head == 0)
         iqueue->tail = 0;
   
      iqueue->size--;
   }
   
   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);

   // return the payload to the caller, free queue item
   if (item) 
   {
      *ppPayload = item->pPayload;
      xfree(item);
      return DNX_OK;
   }

   return DNX_ERR_NOTFOUND;
}
Beispiel #22
0
/** The main thread routine for a worker thread.
 * 
 * @param[in] data - an opaque pointer to a DnxWorkerStatus structure for this
 *    thread.
 * 
 * @return Always returns 0.
 */
static void * dnxWorker(void * data)
{
   DnxWorkerStatus * ws = (DnxWorkerStatus *)data;
   pthread_t tid = pthread_self();
   int retries = 0;
   iDnxWlm * iwlm;

   assert(data);
   
   iwlm = ws->iwlm;

   pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0);
   pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0);
   pthread_cleanup_push(dnxWorkerCleanup, data);

   time(&ws->tstart);   // set thread start time (for stats)

   while (!iwlm->terminate)
   {
      DnxNodeRequest msg;
      DnxJob job;
      int ret;
      
      // setup job request message - use thread id and node address in XID
      dnxMakeXID(&msg.xid, DNX_OBJ_WORKER, tid, iwlm->myipaddr);
      msg.reqType = DNX_REQ_REGISTER;
      msg.jobCap = 1;
      msg.ttl = iwlm->cfg.reqTimeout - iwlm->cfg.ttlBackoff;
      msg.hn = iwlm->myhostname;
      // request a job, and then wait for a job to come in...
      if ((ret = dnxSendNodeRequest(ws->dispatch, &msg, 0)) != DNX_OK) {
         dnxLog("Worker[%lx]: Error sending node request: %s.", 
               tid, dnxErrorString(ret));
      } else {
         DNX_PT_MUTEX_LOCK(&iwlm->mutex);
         iwlm->reqsent++;
         DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
      }

      // wait for job, even if request was never sent
      if ((ret = dnxWaitForJob(ws->dispatch, &job, job.address,iwlm->cfg.reqTimeout)) != DNX_OK && ret != DNX_ERR_TIMEOUT) {
         dnxLog("Worker[%lx]: Error receiving job: %s.",
               tid, dnxErrorString(ret));
      }
      
      // Allow thread to be canceled
      pthread_testcancel();

      DNX_PT_MUTEX_LOCK(&iwlm->mutex);
      cleanThreadPool(iwlm); // ensure counts are accurate before using them
      if (ret != DNX_OK)
      {
         // if above pool minimum and exceeded max retries...
         if (iwlm->threads > iwlm->cfg.poolMin 
               && ++retries > iwlm->cfg.maxRetries)
         {
            dnxLog("Worker[%lx]: Exiting - max retries exceeded.", tid);
            DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
            break;
         }
      }
      else
      {
         iwlm->jobsrcvd++;
         iwlm->active++;
//          dnxSendJobAck(ws->collect, &job, &job.address);
//          dnxDebug(3, "Worker[%lx]: Acknowledged job [%lu:%lu] (T/O %d): %s.", 
//                tid, job.xid.objSerial, job.xid.objSlot, job.timeout, job.cmd);
         
//          DnxAck ack;
//          ack.xid = job.xid;
//          ack.timestamp = job.timestamp;
         
         dnxSendJobAck(ws->collect, &job, 0);
         dnxDebug(3, "Worker[%lx]: Acknowledged job [%lu:%lu] to channel (%lx) (T/S %lu).", 
               tid, job.xid.objSerial, job.xid.objSlot, ws->collect, job.timestamp);



         // check pool size before we get too busy -
         // if we're not shutting down and we haven't reached the configured
         // maximum and this is the last thread out, then increase the pool
         if (!iwlm->terminate 
               && iwlm->threads < iwlm->cfg.poolMax
               && iwlm->active == iwlm->threads) // Maybe more aggressive here
            growThreadPool(iwlm);
      }
      DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

      // if we have a job, execute it and reset retry count
      if (ret == DNX_OK)
      {
         char resData[MAX_RESULT_DATA + 1];
         DnxResult result;
         time_t jobstart;


         dnxDebug(3, "Worker[%lx]: Received job [%lu:%lu] from (%lx) (T/O %d): %s.", 
               tid, job.xid.objSerial, job.xid.objSlot, ws->collect, job.timeout, job.cmd);
               
               
         
         
         // prepare result structure
         result.xid = job.xid;               // result xid must match job xid
         result.state = DNX_JOB_COMPLETE;    // complete or expired
         result.delta = 0;
         result.resCode = DNX_PLUGIN_RESULT_OK;
         result.resData = 0;

         /** @todo Allocate result data buffer based on configured buffer size. */

         // we want to be able to cancel threads while they're out on a task
         // in order to obtain timely shutdown for long jobs - move into
         // async cancel mode, but only for the duration of the check
         pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, 0);

         *resData = 0;
         jobstart = time(0);
         dnxPluginExecute(job.cmd, &result.resCode, resData, sizeof resData - 1, job.timeout,iwlm->cfg.showNodeAddr? iwlm->myipaddrstr: 0);
         result.delta = time(0) - jobstart;

         pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0);

         // store allocated copy of the result string
         if (*resData) result.resData = xstrdup(resData);

         dnxDebug(3, "Worker[%lx]: Job [%lu:%lu] completed in %lu seconds: %d, %s.",
               tid, job.xid.objSerial, job.xid.objSlot, result.delta, 
               result.resCode, result.resData);

//          if ((ret = dnxSendResult(ws->collect, &result, 0)) != DNX_OK) {
//             dnxDebug(3, "Worker[%lx]: Post job [%lu:%lu] results failed: %s.",
//                   tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret));
//          }
         

         // Wait while we wait for an Ack to our Results
         DnxJob ack;
         int trys = 1;
         while(trys < 4) {
            if ((ret = dnxSendResult(ws->collect, &result, 0)) != DNX_OK) {
               dnxDebug(3, "Worker[%lx]: Post job [%lu:%lu] results failed: %s.",
                     tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret));
               break;
            }
            // Now wait for our Ack
            if ((ret = dnxWaitForAck(ws->dispatch, &ack, job.address, 3)) != DNX_OK && ret != DNX_ERR_TIMEOUT) {
               dnxDebug(3, "Worker[%lx]: Error receiving Ack for job [%lu:%lu]: %s. Retry (%i).",
                     tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret), trys);
            } else if (ret == DNX_ERR_TIMEOUT) {
               // we didn't get our Ack
               trys++;
            } else {
               // We got our Ack
               dnxDebug(3, "Worker[%lx]: Ack Received for job [%lu:%lu]: %s. After (%i) try(s).",
                     tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret), trys);
               break;
            }
         }


         xfree(result.resData);
 
         // update all statistics
         DNX_PT_MUTEX_LOCK(&iwlm->mutex);
         {
            // track status
            if (result.resCode == DNX_PLUGIN_RESULT_OK) 
               iwlm->jobsok++;
            else 
               iwlm->jobsfail++;

            // track min/max/avg execution time
            if (result.delta > iwlm->maxexectm)
               iwlm->maxexectm = result.delta;
            if (result.delta < iwlm->minexectm)
               iwlm->minexectm = result.delta;
            iwlm->avgexectm = (iwlm->avgexectm + result.delta) / 2;

            // total job processing time
            iwlm->jobtm += (unsigned)result.delta;
            iwlm->active--;   // reduce active count
         }
         DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

         ws->serial++;     // increment job serial number for next job
         retries = 0;
      }
   }
   pthread_cleanup_pop(1);
   return 0;
}
Beispiel #23
0
int dnxQueuePut(DnxQueue * queue, void * pPayload)
{
   iDnxQueue * iqueue = (iDnxQueue *)queue;
   iDnxQueueEntry * item;
   
   assert(queue);
   
   // create structure to store the new request
   if ((item = (iDnxQueueEntry *)xmalloc(sizeof *item)) == 0)
      return DNX_ERR_MEMORY;

   // We only put a pointer here, because this is a generic queue
   // so we need to allocate a unique object before calling the queue

   item->pPayload = pPayload;
   item->next = 0;
   
   DNX_PT_MUTEX_LOCK(&iqueue->mutex);

   // add new request to end of list, updating list pointers as required
   if (iqueue->size == 0) // special case - list is empty
      iqueue->head = iqueue->tail = iqueue->current = item;
   else 
   {
      iqueue->tail->next = item;
      iqueue->tail = item;
   }
   
   iqueue->size++;
   
   // check for queue overflow if this queue was created with a maximum size
   if (iqueue->maxsz > 0 && iqueue->size > iqueue->maxsz)
   {
      // remove the oldest entry at the queue head
         // we had better make sure it's a useless queue object though, otherwise
         // we are throwing away data!!!
      item = iqueue->head;
      iqueue->head = item->next;
      if (iqueue->current == item)
         iqueue->current = item->next;

      // adjust tail if queue is now empty
      if (iqueue->head == 0)
         iqueue->tail = 0;
      
      iqueue->size--;

      // call item payload destructor, if one was supplied
      if (iqueue->freepayload)
         iqueue->freepayload(item->pPayload);

      xfree(item);
   }
   
   // signal any waiters - there's a new item in the queue
   pthread_cond_signal(&iqueue->cv);
   
   DNX_PT_MUTEX_UNLOCK(&iqueue->mutex);
   
   return DNX_OK;
}
Beispiel #24
0
int dnxWlmCreate(DnxWlmCfgData * cfg, DnxWlm ** pwlm)
{
   iDnxWlm * iwlm;
   struct ifaddrs * ifa = NULL;

   assert(cfg && pwlm);
   assert(cfg->poolMin > 0);
   assert(cfg->poolMax >= cfg->poolMin);
   assert(cfg->poolInitial >= cfg->poolMin);
   assert(cfg->poolInitial <= cfg->poolMax);

   // allocate and configure the master thread pool data structure
   if ((iwlm = (iDnxWlm *)xmalloc(sizeof *iwlm)) == 0)
      return DNX_ERR_MEMORY;

   memset(iwlm, 0, sizeof *iwlm);
   iwlm->cfg = *cfg;
   iwlm->cfg.dispatcher = xstrdup(iwlm->cfg.dispatcher);
   iwlm->cfg.collector = xstrdup(iwlm->cfg.collector);
   iwlm->poolsz = iwlm->cfg.poolMax;
   iwlm->pool = (DnxWorkerStatus **)xmalloc(iwlm->poolsz * sizeof *iwlm->pool);
   iwlm->minexectm = (unsigned)(-1);   // the largest possible value
   memset(iwlm->pool, 0, iwlm->poolsz * sizeof *iwlm->pool);

   // cache our (primary?) ip address in binary and string format
   if (getifaddrs(&ifa) == 0)
   {
      u_int setflags = IFF_UP | IFF_RUNNING;
      u_int clrflags = IFF_LOOPBACK;
      struct ifaddrs * ifcur = ifa;

      // locate the first proper AF_NET address in our interface list
      while (ifcur && (ifcur->ifa_addr == 0 
            || ifcur->ifa_addr->sa_family != AF_INET 
            || (ifcur->ifa_flags & setflags) != setflags
            || (ifcur->ifa_flags & clrflags) != 0))
         ifcur = ifcur->ifa_next;

      if (ifcur)
      {
         // cache binary and presentation (string) versions of the ip address
         iwlm->myipaddr = (unsigned long)
               ((struct sockaddr_in *)ifcur->ifa_addr)->sin_addr.s_addr;
         inet_ntop(ifcur->ifa_addr->sa_family,
                &((struct sockaddr_in *)ifcur->ifa_addr)->sin_addr,
                iwlm->myipaddrstr, sizeof iwlm->myipaddrstr);
      }
      freeifaddrs(ifa);
   }
   
   char unset[] = "NULL";
   if(!strnlen(iwlm->myhostname, 1)) //See if the global hostname has been set
   {
      dnxDebug(3, "dnxWlmCreate: Hostname not set in parent thread.");
      char machineName [MAX_HOSTNAME];
      if(strcmp(cfg->hostname, unset)==0)
      {
         dnxDebug(3, "dnxWlmCreate: Hostname undefined in config.");
         // Get our hostname
         if(gethostname(machineName, MAX_HOSTNAME)==0)
         {
            dnxDebug(3, "dnxWlmCreate: Hostname is [%s].", machineName);
            // cache hostname
            strcpy(iwlm->myhostname, machineName);
         } else {
            dnxLog("dnxWlmCreate: Unable to obtain Hostname [%s?],"
               "please set hostname in config.", machineName);
            sprintf( machineName, "localhost");
            strcpy(iwlm->myhostname, machineName);
         }
      } else {
         dnxDebug(3, "dnxWlmCreate: Using hostname in config [%s].", cfg->hostname);
         strcpy(iwlm->myhostname, cfg->hostname);
      }
   } else {
      dnxDebug(3, "dnxWlmCreate: Using cached hostname [%s].", iwlm->myhostname);
      strcpy(iwlm->cfg.hostname, iwlm->myhostname);
   }

   // if any of the above failed, we really can't continue
   if (!iwlm->cfg.dispatcher || !iwlm->cfg.collector || !iwlm->pool)
   {
      xfree(iwlm->cfg.dispatcher);
      xfree(iwlm->cfg.collector);
      xfree(iwlm);
      return DNX_ERR_MEMORY;
   }

   // create initial worker thread pool
   DNX_PT_MUTEX_INIT(&iwlm->mutex);
   DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   {
      int ret;
      if ((ret = growThreadPool(iwlm)) != DNX_OK)
      {
         if (iwlm->threads)
            dnxLog("WLM: Error creating SOME worker threads: %s; "
                  "continuing with smaller initial pool.", dnxErrorString(ret));
         else
         {
            dnxLog("WLM: Unable to create ANY worker threads: %s; "
                  "terminating.", dnxErrorString(ret));
            DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
            DNX_PT_MUTEX_DESTROY(&iwlm->mutex);
            xfree(iwlm);
            return ret;
         }
      }
   }
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

   dnxLog("WLM: Started worker thread pool.");

   *pwlm = (DnxWlm *)iwlm;

   return DNX_OK;
}
Beispiel #25
0
int dnxJobListExpire(DnxJobList * pJobList, DnxNewJob * pExpiredJobs, int * totalJobs) {
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   unsigned long current;
   DnxNewJob * pJob;
   int jobCount = 0;
   time_t now;

   assert(pJobList && pExpiredJobs && totalJobs && *totalJobs > 0);

   DNX_PT_MUTEX_LOCK(&ilist->mut);

   // get the current time (after we acquire the lock! In case we had to wait)
   now = time(0);

   // walk the entire job list - InProgress and Pending jobs (in that order)
   current = ilist->head;
   int zero_factor = ilist->size - current; // add this value to normalize the index
   dnxDebug(6, "dnxJobListExpire: searching for (%i) expired objects. Head(%lu) Tail(%i)", *totalJobs, ilist->head, ilist->tail);
   int state = 0;
   while(jobCount < *totalJobs) {
      state = (pJob = &ilist->list[current])->state;
      unsigned long dispatch_timeout = now - DNX_DISPATCH_TIMEOUT;

      // only examine jobs that are either awaiting dispatch or results
      switch (state) {
         case DNX_JOB_UNBOUND:
            if(pJob->start_time <= dispatch_timeout) {
               dnxDebug(2, "dnxJobListExpire: Expiring Unbound %s Job [%lu:%lu] count(%i) type(%i) Start Time: (%lu) Now: (%lu) Expire: (%lu)",
                  (pJob->object_check_type ? "Host" : "Service"),  pJob->xid.objSerial, pJob->xid.objSlot, current, state, pJob->start_time, now, dispatch_timeout);               
               // Put the old job in a purgable state   
               pJob->state = DNX_JOB_EXPIRED;
               
               // Add a copy to the expired job list
               memcpy(&pExpiredJobs[jobCount++], pJob, sizeof(DnxNewJob));    
            } else {
               // If there is a client associated with it, xid.objSlot != -1
               // then it means we may be getting a result coming back to us
            
               // This job has not expired, try and get a dnxClient for it
               if (dnxGetNodeRequest(dnxGetRegistrar(), &(pJob->pNode)) == DNX_OK) { 
                  // If OK we have successfully dispatched it so update it's expiration
                  dnxDebug(2, "dnxJobListExpire: Dequeueing DNX_JOB_UNBOUND job [%lu:%lu] Expires in (%i) seconds. Dispatch TO:(%i) Now: (%lu) count(%i) type(%i)", 
                     pJob->xid.objSerial, pJob->xid.objSlot, pJob->start_time - dispatch_timeout, dispatch_timeout, now, current, state);
                  pJob->state = DNX_JOB_PENDING;
                  pthread_cond_signal(&ilist->cond);  // signal that a new job is available
               } else {
                  dnxDebug(6, "dnxJobListExpire: Unable to dequeue DNX_JOB_UNBOUND job [%lu:%lu] Expires in (%i) seconds. Dispatch TO:(%i) Now: (%lu) count(%i) type(%i)", 
                     pJob->xid.objSerial, pJob->xid.objSlot, pJob->start_time - dispatch_timeout, dispatch_timeout, now, current, state);
               }
            }
            break;
         case DNX_JOB_PENDING:
         case DNX_JOB_INPROGRESS:
            // check the job's expiration stamp
            if (pJob->expires <= now) { //  
               // This is an expired job, it was sent out, but never came back
               dnxDebug(1, "dnxJobListExpire: Expiring Job [%lu:%lu] count(%i) type(%i) Exp: (%lu) Now: (%lu)",
                  pJob->xid.objSerial, pJob->xid.objSlot, current, state, pJob->expires, now);               
               // Put the old job in a purgable state   
               pJob->state = DNX_JOB_EXPIRED;
               // Add a copy to the expired job list
               memcpy(&pExpiredJobs[jobCount++], pJob, sizeof(DnxNewJob));
            } 
            break;
         case DNX_JOB_COMPLETE:
            // If the Ack hasn't been sent out yet, give it time to complete
            if(! pJob->ack) {
               dnxDebug(3, "dnxJobListExpire: Waiting to send Ack. count(%i) type(%i)", current, state);
               break;
            }
         case DNX_JOB_EXPIRED:
            dnxJobCleanup(pJob);
            dnxDebug(3, "dnxJobListExpire: Nullified Job. count(%i) type(%i)", current, state);
         case DNX_JOB_NULL:
            if(current == ilist->head && current != ilist->tail) {
               ilist->head = ((current + 1) % ilist->size);
               dnxDebug(2, "dnxJobListExpire: Moving head to (%i). count(%i) type(%i)", ilist->head, current, pJob->state);
               // we have an old item at the head of the list, so we need to
               // increment the head. It should never be larger than the tail.
            } else {
               dnxDebug(5, "dnxJobListExpire: Null Job. count(%i) type(%i)", current, pJob->state);
            }
            break;
         case DNX_JOB_RECEIVED:
            if(! pJob->ack) {
               dnxDebug(3, "dnxJobListExpire: Waiting to send Ack. job [%lu:%lu] count(%i) type(%i)", current, state);
            } else {
               dnxDebug(2, "dnxJobListExpire: Ack sent. job [%lu:%lu] count(%i) type(%i)", current, state);
            }
            // The Collector thread will set this to DNX_JOB_COMPLETE once it has 
            // replied to Nagios, but we don't advance the head until that happens
            break;
      }

      // bail-out if this was the job list tail
      if (current == ilist->tail) {
         break;
      }
      // increment the job list index
      current = ((current + 1) % ilist->size);
   }
      
   // update the total jobs in the expired job list
   *totalJobs = jobCount;
   DNX_PT_MUTEX_UNLOCK(&ilist->mut);

   return DNX_OK;
}
Beispiel #26
0
int dnxJobListDispatch(DnxJobList * pJobList, DnxNewJob * pJob)
{
   iDnxJobList * ilist = (iDnxJobList *)pJobList;
   unsigned long current;
   int ret = DNX_OK; //DNX_ERR_TIMEOUT;
   struct timeval now;
   struct timespec timeout;

   assert(pJobList && pJob);

   DNX_PT_MUTEX_LOCK(&ilist->mut);


   // start at current head
   current = ilist->head;

   dnxDebug(6, "dnxJobListDispatch: BEFORE: Head=%lu, Tail=%lu, Queue=%lu.", 
       ilist->head, ilist->tail, ilist->size);

   while (1) {
 
      switch (ilist->list[current].state) {
         case DNX_JOB_INPROGRESS:
            dnxDebug(8, "dnxJobListDispatch: In Progress Item in slot:(%lu) head:(%lu) tail:(%lu).", 
               current, ilist->head, ilist->tail);
            break;
         case DNX_JOB_NULL:
            dnxDebug(8, "dnxJobListDispatch: Null Item in slot:(%lu) head:(%lu) tail:(%lu).", 
               current, ilist->head, ilist->tail);
            break;
         case DNX_JOB_EXPIRED:
            dnxDebug(8, "dnxJobListDispatch: Expired Item in slot:(%lu) head:(%lu) tail:(%lu).", 
               current, ilist->head, ilist->tail);
            break;
         case DNX_JOB_UNBOUND:
            dnxDebug(8, "dnxJobListDispatch: Unbound Item in slot:(%lu) head:(%lu) tail:(%lu).", 
               current, ilist->head, ilist->tail);
            break;
         case DNX_JOB_PENDING:
            gettimeofday(&now, 0);

            // Check to see if we have recently dispatched this
            if((ilist->list[current].pNode)->retry > now.tv_sec) {
               dnxDebug(5, "dnxJobListDispatch: Pending job [%lu:%lu] waiting for Ack, resend in (%i) sec.",
                  ilist->list[current].xid.objSerial, ilist->list[current].xid.objSlot, ((ilist->list[current].pNode)->retry - now.tv_sec));
               break;
            } else {
                if((ilist->list[current].pNode)->retry) {
                  // Make sure the dnxClient service offer is still fresh
                  if((ilist->list[current].pNode)->expires < now.tv_sec) {
                     dnxDebug(4, "dnxJobListDispatch: Pending job [%lu:%lu] waiting for Ack, client node expired. Resubmitting.",
                     ilist->list[current].xid.objSerial, ilist->list[current].xid.objSlot);
                     ilist->list[current].state = DNX_JOB_UNBOUND;
                     
                     // reset the node?
                     // It's likely that the same client will be servicing us
                     // or that the job might come back in the mean time, so we
                     // should keep this node as long as possible
                     // We just need to make sure that the Affinity is correct and that 
                     // it's only used to find a new node, so if we get as far as 
                     // resubmitting, we will have a valid node anyway
                     
                     // If the original job comes back, the acks will get all messed up
                     // not sure how to deal with that other than to just be graceful
                     // about receiving lots of results...
                   
                     
//                      dnxDeleteNodeReq(ilist->list[current].pNode);
//                      DnxNodeRequest * pNode = dnxCreateNodeReq();
                     ilist->list[current].pNode->flags = *(dnxGetAffinity(ilist->list[current].host_name));
//                      ilist->list[current].pNode->hn = xstrdup(ilist->list[current].host_name);
//                      ilist->list[current].pNode->addr = NULL;

                     // We should leave the address alone so we don't segfault if results come in late
                     // but should we reset these? 
//                      ilist->list[current].pNode->xid.objSlot = -1;
//                      ilist->list[current].pNode->xid.objSerial = ilist->list[current].xid.objSerial;
//                      ilist->list[current].pNode = pNode;
                  }
                  break;                  
               } else {
                  // This is a new job, so dispatch it
                  dnxDebug(4, "dnxJobListDispatch: Dispatching new job [%lu:%lu] waiting for Ack",
                     ilist->list[current].xid.objSerial, ilist->list[current].xid.objSlot);
               }
            }
            
            // set our retry interval
            // This should be fairly forgiving in case we just missed the Ack but it actually
            // got the job and is returning our results.
            (ilist->list[current].pNode)->retry = now.tv_sec + 5; 
            
         
            // make a copy for the Dispatcher to send to client
            memcpy(pJob, &ilist->list[current], sizeof *pJob);
            
            // release the mutex
            DNX_PT_MUTEX_UNLOCK(&ilist->mut);
            return ret;
         case DNX_JOB_COMPLETE:
         case DNX_JOB_RECEIVED:
            // This is a job that we have received the response and we need to send an ack to
            // the client to let it know we got it
            if(ilist->list[current].ack) {
               // Only send a single Ack
               break;
            }
            // make a copy for the Dispatcher to send an Ack to the client
            memcpy(pJob, &ilist->list[current], sizeof *pJob);
            
            dnxDebug(4, "dnxJobListDispatch: Received job [%lu:%lu] sending Ack.",
               ilist->list[current].xid.objSerial, ilist->list[current].xid.objSlot);
            
            // release the mutex
            DNX_PT_MUTEX_UNLOCK(&ilist->mut);
            return ret;
      }

      if (current == ilist->tail) {
         // if we are at the end of the queue
         gettimeofday(&now, 0);
         timeout.tv_sec = now.tv_sec + DNX_JOBLIST_TIMEOUT;
         timeout.tv_nsec = now.tv_usec * 1000;
         if ((ret = pthread_cond_timedwait(&ilist->cond, &ilist->mut, &timeout)) == ETIMEDOUT) {
            // We waited for the time out period and no new jobs arrived. So give control back to caller.
            dnxDebug(5, "dnxJobListDispatch: Reached end of dispatch queue. Thread timer returned.");      
            DNX_PT_MUTEX_UNLOCK(&ilist->mut);
            return ret;
         } else {
            // We were signaled that there is a new job, so lets move back to the head and get it!
            current = ilist->head;
            dnxDebug(5, "dnxJobListDispatch: Reached end of dispatch queue. A new job arrived.");      
         }
      } else {
         // move to next item in queue
         current = ((current + 1) % ilist->size);
      }
   }
}