int dnxWlmReconfigure(DnxWlm * wlm, DnxWlmCfgData * cfg) { iDnxWlm * iwlm = (iDnxWlm *)wlm; DnxWorkerStatus ** pool; int ret = 0; assert(wlm && cfg); assert(cfg->poolMin > 0); assert(cfg->poolMax >= cfg->poolMin); assert(cfg->poolInitial >= cfg->poolMin); assert(cfg->poolInitial <= cfg->poolMax); DNX_PT_MUTEX_LOCK(&iwlm->mutex); // dynamic reconfiguration of dispatcher/collector URL's is not allowed logConfigChanges(&iwlm->cfg, cfg); iwlm->cfg.reqTimeout = cfg->reqTimeout; iwlm->cfg.ttlBackoff = cfg->ttlBackoff; iwlm->cfg.maxRetries = cfg->maxRetries; iwlm->cfg.poolMin = cfg->poolMin; iwlm->cfg.poolInitial = cfg->poolInitial; iwlm->cfg.poolMax = cfg->poolMax; iwlm->cfg.poolGrow = cfg->poolGrow; iwlm->cfg.pollInterval = cfg->pollInterval; iwlm->cfg.shutdownGrace = cfg->shutdownGrace; iwlm->cfg.maxResults = cfg->maxResults; iwlm->cfg.showNodeAddr = cfg->showNodeAddr; strcpy(iwlm->cfg.hostname, cfg->hostname); // we can't reduce the poolsz until the number of threads // drops below the new maximum while (iwlm->threads > iwlm->cfg.poolMax) { DNX_PT_MUTEX_UNLOCK(&iwlm->mutex); dnxCancelableSleep(3 * 1000); DNX_PT_MUTEX_LOCK(&iwlm->mutex); } // reallocate the pool to the new size if ((pool = (DnxWorkerStatus **)xrealloc(iwlm->pool, iwlm->cfg.poolMax * sizeof *pool)) == 0) ret = DNX_ERR_MEMORY; else { iwlm->poolsz = iwlm->cfg.poolMax; iwlm->pool = pool; } DNX_PT_MUTEX_UNLOCK(&iwlm->mutex); return ret; }
/** The main thread entry point procedure for the registrar thread. * * This thread handles all inbound requests in a single-threaded fashion, * so we can safely call dnxStatsInc here for new nodes. * * @param[in] data - an opaque pointer to registrar thread data. This is * actually a pointer to the dnx server global data structure. * * @return Always returns NULL. */ static void * dnxRegistrar(void * data) { iDnxRegistrar * ireg = (iDnxRegistrar *)data; DnxNodeRequest * pMsg = 0; assert(data); pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0); dnxLog("DNX Registrar: Awaiting worker node requests..."); while (1) { int ret; // (re)allocate message block if not consumed in last pass if (pMsg == 0 && (pMsg = (DnxNodeRequest *)xmalloc(sizeof *pMsg)) == 0) { dnxCancelableSleep(10); // sleep for a while and try again... continue; } pthread_cleanup_push(xfree, pMsg); pthread_testcancel(); // wait on the registrar socket for a request if ((ret = dnxWaitForNodeRequest(ireg->channel, pMsg, pMsg->address, DNX_REGISTRAR_REQUEST_TIMEOUT)) == DNX_OK) { switch (pMsg->reqType) { case DNX_REQ_REGISTER: ret = dnxRegisterNode(ireg, &pMsg); break; case DNX_REQ_DEREGISTER: ret = dnxDeregisterNode(ireg, pMsg); break; default: ret = DNX_ERR_UNSUPPORTED; } } pthread_cleanup_pop(0); if (ret != DNX_OK && ret != DNX_ERR_TIMEOUT) dnxLog("DNX Registrar: Process node request failed: %s.", dnxErrorString(ret)); } return 0; }
int main(int argc, char ** argv) { DnxTimer * timer; iDnxTimer * itimer; verbose = argc > 1? 1: 0; // setup test harness fakenode.xid.objType = DNX_OBJ_JOB; fakenode.xid.objSerial = 1; fakenode.xid.objSlot = 2; fakenode.reqType = DNX_REQ_DEREGISTER; fakenode.jobCap = 1; fakenode.ttl = 2; fakenode.expires = 3; strcpy(fakenode.address, "fake address"); fakejob.state = DNX_JOB_INPROGRESS; fakejob.xid.objType = DNX_OBJ_JOB; fakejob.xid.objSerial = 1; fakejob.xid.objSlot = 2; fakejob.cmd = "fake command line"; fakejob.start_time = 100; fakejob.timeout = 10; fakejob.expires = fakejob.start_time + fakejob.timeout; fakejob.payload = &fakepayload; fakejob.pNode = &fakenode; entered_dnxJobListExpire = 0; // create a short timer and reference it as a concrete object for testing CHECK_ZERO(dnxTimerCreate(&fakejoblist, 100, &timer)); itimer = (iDnxTimer *)timer; // check internal state CHECK_TRUE(itimer->joblist == &fakejoblist); CHECK_TRUE(itimer->tid != 0); CHECK_TRUE(itimer->sleepms == 100); // wait for timer to have made one pass though timer thread loop while (!entered_dnxJobListExpire) dnxCancelableSleep(10); // shut down dnxTimerDestroy(timer); return 0; }
/** The main timer thread procedure entry point. * * @param[in] data - an opaque pointer to thread data for the timer thread. * This is actually the dnx server global data object. * * @return Always returns 0. */ static void * dnxTimer(void * data) { iDnxTimer * itimer = (iDnxTimer *)data; DnxNewJob ExpiredList[MAX_EXPIRED]; int i, totalExpired; int ret = 0; assert(data); pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0); pthread_cleanup_push(dnxTimerCleanup, data); dnxLog("dnxTimer[%lx]: Watching for expired jobs...", pthread_self()); while (1) { pthread_testcancel(); dnxCancelableSleep(itimer->sleepms); // search for expired jobs in the pending queue totalExpired = MAX_EXPIRED; if ((ret = dnxJobListExpire(itimer->joblist, ExpiredList, &totalExpired)) == DNX_OK && totalExpired > 0) { for (i = 0; i < totalExpired; i++) { char msg[256]; char addrstr[DNX_MAX_ADDRSTR]; DnxNewJob * job = &ExpiredList[i]; dnxDebug(1, "dnxTimer[%lx]: Expiring Job [%lu,%lu]: %s.", pthread_self(), job->xid.objSerial, job->xid.objSlot, job->cmd); dnxStatsInc(job->pNode->address, RESULTS_TIMED_OUT); dnxAuditJob(job, "EXPIRE"); // if (job->ack) snprintf(msg, sizeof msg, "(DNX: Service Check [%lu,%lu] Timed Out - " "Node: %s - Failed to return job response in time allowed)", job->xid.objSerial, job->xid.objSlot, addrstr); // else // snprintf(msg, sizeof msg, // "(DNX: Service Check [%lu,%lu] Timed Out - " // "Node: %s - Failed to acknowledge job receipt)", // job->xid.objSerial, job->xid.objSlot, addrstr); dnxDebug(2, msg); // report the expired job to Nagios ret = dnxPostResult(job->payload, job->xid.objSerial, job->start_time, time(0) - job->start_time, 1, 0, msg); dnxJobCleanup(job); } } if (totalExpired > 0 || ret != DNX_OK) dnxDebug(2, "dnxTimer[%lx]: Expired job count: %d Retcode=%d: %s.", pthread_self(), totalExpired, ret, dnxErrorString(ret)); } dnxLog("dnxTimer[%lx]: Terminating: %s.", pthread_self(), dnxErrorString(ret)); pthread_cleanup_pop(1); return 0; }