Ejemplo n.º 1
0
int dnxWlmReconfigure(DnxWlm * wlm, DnxWlmCfgData * cfg)
{
   iDnxWlm * iwlm = (iDnxWlm *)wlm;
   DnxWorkerStatus ** pool;
   int ret = 0;

   assert(wlm && cfg);
   assert(cfg->poolMin > 0);
   assert(cfg->poolMax >= cfg->poolMin);
   assert(cfg->poolInitial >= cfg->poolMin);
   assert(cfg->poolInitial <= cfg->poolMax);

   DNX_PT_MUTEX_LOCK(&iwlm->mutex);

   // dynamic reconfiguration of dispatcher/collector URL's is not allowed

   logConfigChanges(&iwlm->cfg, cfg);
 
   iwlm->cfg.reqTimeout = cfg->reqTimeout;
   iwlm->cfg.ttlBackoff = cfg->ttlBackoff;
   iwlm->cfg.maxRetries = cfg->maxRetries;
   iwlm->cfg.poolMin = cfg->poolMin;
   iwlm->cfg.poolInitial = cfg->poolInitial;
   iwlm->cfg.poolMax = cfg->poolMax;
   iwlm->cfg.poolGrow = cfg->poolGrow;
   iwlm->cfg.pollInterval = cfg->pollInterval;
   iwlm->cfg.shutdownGrace = cfg->shutdownGrace;
   iwlm->cfg.maxResults = cfg->maxResults;
   iwlm->cfg.showNodeAddr = cfg->showNodeAddr;
   strcpy(iwlm->cfg.hostname, cfg->hostname);

   // we can't reduce the poolsz until the number of threads
   //    drops below the new maximum
   while (iwlm->threads > iwlm->cfg.poolMax)
   {
      DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
      dnxCancelableSleep(3 * 1000);
      DNX_PT_MUTEX_LOCK(&iwlm->mutex);
   }

   // reallocate the pool to the new size
   if ((pool = (DnxWorkerStatus **)xrealloc(iwlm->pool, 
         iwlm->cfg.poolMax * sizeof *pool)) == 0)
      ret = DNX_ERR_MEMORY;
   else
   {
      iwlm->poolsz = iwlm->cfg.poolMax;
      iwlm->pool = pool;
   }
    
   DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

   return ret;
}
Ejemplo n.º 2
0
/** The main thread entry point procedure for the registrar thread.
 *
 * This thread handles all inbound requests in a single-threaded fashion,
 * so we can safely call dnxStatsInc here for new nodes.
 *
 * @param[in] data - an opaque pointer to registrar thread data. This is
 *    actually a pointer to the dnx server global data structure.
 *
 * @return Always returns NULL.
 */
static void * dnxRegistrar(void * data)
{
   iDnxRegistrar * ireg = (iDnxRegistrar *)data;
   DnxNodeRequest * pMsg = 0;

   assert(data);

   pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0);
   pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0);

   dnxLog("DNX Registrar: Awaiting worker node requests...");

   while (1)
   {
      int ret;

      // (re)allocate message block if not consumed in last pass
      if (pMsg == 0 && (pMsg = (DnxNodeRequest *)xmalloc(sizeof *pMsg)) == 0)
      {
         dnxCancelableSleep(10);    // sleep for a while and try again...
         continue;
      }

      pthread_cleanup_push(xfree, pMsg);

      pthread_testcancel();

      // wait on the registrar socket for a request
      if ((ret = dnxWaitForNodeRequest(ireg->channel, pMsg, pMsg->address,
            DNX_REGISTRAR_REQUEST_TIMEOUT)) == DNX_OK)
      {
         switch (pMsg->reqType)
         {
            case DNX_REQ_REGISTER:
               ret = dnxRegisterNode(ireg, &pMsg);
               break;

            case DNX_REQ_DEREGISTER:
               ret = dnxDeregisterNode(ireg, pMsg);
               break;

            default:
               ret = DNX_ERR_UNSUPPORTED;
         }
      }

      pthread_cleanup_pop(0);

      if (ret != DNX_OK && ret != DNX_ERR_TIMEOUT)
         dnxLog("DNX Registrar: Process node request failed: %s.",
               dnxErrorString(ret));
   }
   return 0;
}
Ejemplo n.º 3
0
int main(int argc, char ** argv)
{
   DnxTimer * timer;
   iDnxTimer * itimer;

   verbose = argc > 1? 1: 0;

   // setup test harness
   fakenode.xid.objType    = DNX_OBJ_JOB;
   fakenode.xid.objSerial  = 1;
   fakenode.xid.objSlot    = 2;
   fakenode.reqType        = DNX_REQ_DEREGISTER;
   fakenode.jobCap         = 1;
   fakenode.ttl            = 2;
   fakenode.expires        = 3;
   strcpy(fakenode.address, "fake address");

   fakejob.state           = DNX_JOB_INPROGRESS;
   fakejob.xid.objType     = DNX_OBJ_JOB;
   fakejob.xid.objSerial   = 1;
   fakejob.xid.objSlot     = 2;
   fakejob.cmd             = "fake command line";
   fakejob.start_time      = 100;
   fakejob.timeout         = 10;
   fakejob.expires         = fakejob.start_time + fakejob.timeout;
   fakejob.payload         = &fakepayload;
   fakejob.pNode           = &fakenode;

   entered_dnxJobListExpire = 0;

   // create a short timer and reference it as a concrete object for testing
   CHECK_ZERO(dnxTimerCreate(&fakejoblist, 100, &timer));
   itimer = (iDnxTimer *)timer;

   // check internal state
   CHECK_TRUE(itimer->joblist == &fakejoblist);
   CHECK_TRUE(itimer->tid != 0);
   CHECK_TRUE(itimer->sleepms == 100);

   // wait for timer to have made one pass though timer thread loop
   while (!entered_dnxJobListExpire)
      dnxCancelableSleep(10);

   // shut down
   dnxTimerDestroy(timer);

   return 0;
}
Ejemplo n.º 4
0
/** The main timer thread procedure entry point.
 *
 * @param[in] data - an opaque pointer to thread data for the timer thread.
 *    This is actually the dnx server global data object.
 *
 * @return Always returns 0.
 */
static void * dnxTimer(void * data)
{
   iDnxTimer * itimer = (iDnxTimer *)data;
   DnxNewJob ExpiredList[MAX_EXPIRED];
   int i, totalExpired;
   int ret = 0;

   assert(data);

   pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0);
   pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0);
   pthread_cleanup_push(dnxTimerCleanup, data);

   dnxLog("dnxTimer[%lx]: Watching for expired jobs...", pthread_self());

   while (1)
   {
      pthread_testcancel();

      dnxCancelableSleep(itimer->sleepms);

      // search for expired jobs in the pending queue
      totalExpired = MAX_EXPIRED;
      if ((ret = dnxJobListExpire(itimer->joblist, ExpiredList, 
            &totalExpired)) == DNX_OK && totalExpired > 0)
      {
         for (i = 0; i < totalExpired; i++)
         {
            char msg[256];
            char addrstr[DNX_MAX_ADDRSTR];
            DnxNewJob * job = &ExpiredList[i];

            dnxDebug(1, "dnxTimer[%lx]: Expiring Job [%lu,%lu]: %s.",
                  pthread_self(), job->xid.objSerial, job->xid.objSlot, job->cmd);

            dnxStatsInc(job->pNode->address, RESULTS_TIMED_OUT);
            dnxAuditJob(job, "EXPIRE");

//          if (job->ack)
               snprintf(msg, sizeof msg, 
                     "(DNX: Service Check [%lu,%lu] Timed Out - "
                     "Node: %s - Failed to return job response in time allowed)",
                     job->xid.objSerial, job->xid.objSlot, addrstr);
//          else
//             snprintf(msg, sizeof msg, 
//                   "(DNX: Service Check [%lu,%lu] Timed Out - "
//                   "Node: %s - Failed to acknowledge job receipt)",
//                   job->xid.objSerial, job->xid.objSlot, addrstr);

            dnxDebug(2, msg);

            // report the expired job to Nagios
            ret = dnxPostResult(job->payload, job->xid.objSerial, job->start_time, 
                  time(0) - job->start_time, 1, 0, msg);
            dnxJobCleanup(job);
         }
      }

      if (totalExpired > 0 || ret != DNX_OK)
         dnxDebug(2, "dnxTimer[%lx]: Expired job count: %d  Retcode=%d: %s.",
               pthread_self(), totalExpired, ret, dnxErrorString(ret));
   }

   dnxLog("dnxTimer[%lx]: Terminating: %s.", pthread_self(), dnxErrorString(ret));

   pthread_cleanup_pop(1);
   return 0;
}