예제 #1
0
파일: dnxNebMain.c 프로젝트: dnsmichi/DNX
/** Post a new job from Nagios to the dnxServer job queue.
 *
 * @param[in] joblist - the job list to which the new job should be posted.
 * @param[in] serial - the serial number of the new job.
 * @param[in] jdp - a pointer to a job data structure.
 * @param[in] ds - a pointer to the nagios job that's being posted.
 * @param[in] pNode - a dnxClient node request structure that is being
 *    posted with this job. The dispatcher thread will send the job to the
 *    associated node.
 *
 * @return Zero on success, or a non-zero error value.
 */
static int dnxPostNewJob(DnxJobList * joblist, unsigned long serial, 
      DnxJobData * jdp, nebstruct_service_check_data * ds, 
      DnxNodeRequest * pNode)
{
   DnxNewJob Job;
   int ret;

   assert(ds);
   assert(ds->command_line);

   // fill-in the job structure with the necessary information
   dnxMakeXID(&Job.xid, DNX_OBJ_JOB, serial, 0);
   Job.payload    = jdp;
   Job.cmd        = xstrdup(ds->command_line);
   Job.start_time = ds->start_time.tv_sec;
   Job.timeout    = ds->timeout;
   Job.expires    = Job.start_time + Job.timeout + 5;
   Job.pNode      = pNode;

   dnxDebug(2, "DnxNebMain: Posting Job [%lu]: %s.", serial, Job.cmd);

   // post to the Job Queue
   if ((ret = dnxJobListAdd(joblist, &Job)) != DNX_OK)
   {
      dnxStatsInc(0, JOBS_REJECTED_NO_SLOTS);
      dnxLog("Failed to post Job [%lu]; \"%s\": %d.",
            Job.xid.objSerial, Job.cmd, ret);
   }
   else
   {
      dnxStatsInc(0, JOBS_HANDLED);
      dnxAuditJob(&Job, "ASSIGN");
   }
   return ret;
}
예제 #2
0
파일: xml.c 프로젝트: Bakafish/DNX_Affinity
int main (int argc, char **argv)
{
   DnxXmlBuf xbuf;
   DnxJob job;
   char *cp;
   int ret;

   // Set program base name
   szProg = (char *)((cp = strrchr(argv[0], '/')) ? (cp+1) : argv[0]);

   // Initialize Job structure
   memset(&job, 0, sizeof(job));
   dnxMakeXID(&job.xid, DNX_OBJ_JOB, 12345L, 3);
   job.state    = DNX_JOB_PENDING;
   job.priority = 7;
   job.cmd      = "check_spam.pl <wak>test</wak> ahora por favor";
   jobDump("Initialized Job", &job);

   // Create an XML buffer
   if ((ret = xmlPut(&xbuf, &job)) == DNX_OK)
   {
      // Examine the XML buffer
      xmlDump("After xmlPut", &xbuf);

      // Clear the job structure and see if we get the same data back from the XML buffer
      printf("Clearing the job structure.\n");
      memset(&job, 0, sizeof(job));
      jobDump("Cleared Job", &job);

      // Reconstitute the job structure from the xml buffer
      ret = xmlGet(&xbuf, &job);

      // Examine the XML buffer
      jobDump("After xmlGet", &job);

      // Cleanup
      if (job.cmd) free(job.cmd);
   }
   else
      fprintf(stderr, "Error from xmlPut: %d\n", ret);

   return ret;
}
예제 #3
0
/** The main thread routine for a worker thread.
 * 
 * @param[in] data - an opaque pointer to a DnxWorkerStatus structure for this
 *    thread.
 * 
 * @return Always returns 0.
 */
static void * dnxWorker(void * data)
{
   DnxWorkerStatus * ws = (DnxWorkerStatus *)data;
   pthread_t tid = pthread_self();
   int retries = 0;
   iDnxWlm * iwlm;

   assert(data);
   
   iwlm = ws->iwlm;

   pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, 0);
   pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0);
   pthread_cleanup_push(dnxWorkerCleanup, data);

   time(&ws->tstart);   // set thread start time (for stats)

   while (!iwlm->terminate)
   {
      DnxNodeRequest msg;
      DnxJob job;
      int ret;
      
      // setup job request message - use thread id and node address in XID
      dnxMakeXID(&msg.xid, DNX_OBJ_WORKER, tid, iwlm->myipaddr);
      msg.reqType = DNX_REQ_REGISTER;
      msg.jobCap = 1;
      msg.ttl = iwlm->cfg.reqTimeout - iwlm->cfg.ttlBackoff;
      msg.hn = iwlm->myhostname;
      // request a job, and then wait for a job to come in...
      if ((ret = dnxSendNodeRequest(ws->dispatch, &msg, 0)) != DNX_OK) {
         dnxLog("Worker[%lx]: Error sending node request: %s.", 
               tid, dnxErrorString(ret));
      } else {
         DNX_PT_MUTEX_LOCK(&iwlm->mutex);
         iwlm->reqsent++;
         DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
      }

      // wait for job, even if request was never sent
      if ((ret = dnxWaitForJob(ws->dispatch, &job, job.address,iwlm->cfg.reqTimeout)) != DNX_OK && ret != DNX_ERR_TIMEOUT) {
         dnxLog("Worker[%lx]: Error receiving job: %s.",
               tid, dnxErrorString(ret));
      }
      
      // Allow thread to be canceled
      pthread_testcancel();

      DNX_PT_MUTEX_LOCK(&iwlm->mutex);
      cleanThreadPool(iwlm); // ensure counts are accurate before using them
      if (ret != DNX_OK)
      {
         // if above pool minimum and exceeded max retries...
         if (iwlm->threads > iwlm->cfg.poolMin 
               && ++retries > iwlm->cfg.maxRetries)
         {
            dnxLog("Worker[%lx]: Exiting - max retries exceeded.", tid);
            DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);
            break;
         }
      }
      else
      {
         iwlm->jobsrcvd++;
         iwlm->active++;
//          dnxSendJobAck(ws->collect, &job, &job.address);
//          dnxDebug(3, "Worker[%lx]: Acknowledged job [%lu:%lu] (T/O %d): %s.", 
//                tid, job.xid.objSerial, job.xid.objSlot, job.timeout, job.cmd);
         
//          DnxAck ack;
//          ack.xid = job.xid;
//          ack.timestamp = job.timestamp;
         
         dnxSendJobAck(ws->collect, &job, 0);
         dnxDebug(3, "Worker[%lx]: Acknowledged job [%lu:%lu] to channel (%lx) (T/S %lu).", 
               tid, job.xid.objSerial, job.xid.objSlot, ws->collect, job.timestamp);



         // check pool size before we get too busy -
         // if we're not shutting down and we haven't reached the configured
         // maximum and this is the last thread out, then increase the pool
         if (!iwlm->terminate 
               && iwlm->threads < iwlm->cfg.poolMax
               && iwlm->active == iwlm->threads) // Maybe more aggressive here
            growThreadPool(iwlm);
      }
      DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

      // if we have a job, execute it and reset retry count
      if (ret == DNX_OK)
      {
         char resData[MAX_RESULT_DATA + 1];
         DnxResult result;
         time_t jobstart;


         dnxDebug(3, "Worker[%lx]: Received job [%lu:%lu] from (%lx) (T/O %d): %s.", 
               tid, job.xid.objSerial, job.xid.objSlot, ws->collect, job.timeout, job.cmd);
               
               
         
         
         // prepare result structure
         result.xid = job.xid;               // result xid must match job xid
         result.state = DNX_JOB_COMPLETE;    // complete or expired
         result.delta = 0;
         result.resCode = DNX_PLUGIN_RESULT_OK;
         result.resData = 0;

         /** @todo Allocate result data buffer based on configured buffer size. */

         // we want to be able to cancel threads while they're out on a task
         // in order to obtain timely shutdown for long jobs - move into
         // async cancel mode, but only for the duration of the check
         pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, 0);

         *resData = 0;
         jobstart = time(0);
         dnxPluginExecute(job.cmd, &result.resCode, resData, sizeof resData - 1, job.timeout,iwlm->cfg.showNodeAddr? iwlm->myipaddrstr: 0);
         result.delta = time(0) - jobstart;

         pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, 0);

         // store allocated copy of the result string
         if (*resData) result.resData = xstrdup(resData);

         dnxDebug(3, "Worker[%lx]: Job [%lu:%lu] completed in %lu seconds: %d, %s.",
               tid, job.xid.objSerial, job.xid.objSlot, result.delta, 
               result.resCode, result.resData);

//          if ((ret = dnxSendResult(ws->collect, &result, 0)) != DNX_OK) {
//             dnxDebug(3, "Worker[%lx]: Post job [%lu:%lu] results failed: %s.",
//                   tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret));
//          }
         

         // Wait while we wait for an Ack to our Results
         DnxJob ack;
         int trys = 1;
         while(trys < 4) {
            if ((ret = dnxSendResult(ws->collect, &result, 0)) != DNX_OK) {
               dnxDebug(3, "Worker[%lx]: Post job [%lu:%lu] results failed: %s.",
                     tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret));
               break;
            }
            // Now wait for our Ack
            if ((ret = dnxWaitForAck(ws->dispatch, &ack, job.address, 3)) != DNX_OK && ret != DNX_ERR_TIMEOUT) {
               dnxDebug(3, "Worker[%lx]: Error receiving Ack for job [%lu:%lu]: %s. Retry (%i).",
                     tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret), trys);
            } else if (ret == DNX_ERR_TIMEOUT) {
               // we didn't get our Ack
               trys++;
            } else {
               // We got our Ack
               dnxDebug(3, "Worker[%lx]: Ack Received for job [%lu:%lu]: %s. After (%i) try(s).",
                     tid, job.xid.objSerial, job.xid.objSlot, dnxErrorString(ret), trys);
               break;
            }
         }


         xfree(result.resData);
 
         // update all statistics
         DNX_PT_MUTEX_LOCK(&iwlm->mutex);
         {
            // track status
            if (result.resCode == DNX_PLUGIN_RESULT_OK) 
               iwlm->jobsok++;
            else 
               iwlm->jobsfail++;

            // track min/max/avg execution time
            if (result.delta > iwlm->maxexectm)
               iwlm->maxexectm = result.delta;
            if (result.delta < iwlm->minexectm)
               iwlm->minexectm = result.delta;
            iwlm->avgexectm = (iwlm->avgexectm + result.delta) / 2;

            // total job processing time
            iwlm->jobtm += (unsigned)result.delta;
            iwlm->active--;   // reduce active count
         }
         DNX_PT_MUTEX_UNLOCK(&iwlm->mutex);

         ws->serial++;     // increment job serial number for next job
         retries = 0;
      }
   }
   pthread_cleanup_pop(1);
   return 0;
}
예제 #4
0
/** The main program entry point for the dnx management client.
 * 
 * @param[in] argc - the number of elements in the @p argv array.
 * @param[in] argv - a null-terminated array of command-line arguments.
 * 
 * @return Zero on success, or a non-zero error code that is returned to the
 * shell. Any non-zero codes should be values between 1 and 127.
 */
int main(int argc, char ** argv)
{
   extern char * optarg;
   extern int optind, opterr, optopt;
   gTopDCS = dnxComStatCreateDCS("127.0.0.1");
   int ch, ret;
   char * cp, * prog, * cmdstr;
   char * hoststr, * portstr;

   // get program base name
   prog = (char *)((cp = strrchr(argv[0], '/')) != 0 ? (cp + 1) : argv[0]);

   // parse arguments
   hoststr = "localhost";
   portstr = "12482";
   opterr = 0;
   cmdstr = 0;
   while ((ch = getopt(argc, argv, "hvc:s:p:")) != -1)
   {
      switch (ch)
      {
         case 's':
            hoststr = optarg;
            break;

         case 'p':
            portstr = optarg;
            break;

         case 'c': 
            cmdstr = optarg; 
            break;

         case 'v':
            printf("\n  %s version %s\n  Bug reports: %s.\n\n", 
                  prog, VERSION, PACKAGE_BUGREPORT);
            exit(0);

         case 'h': 
         default :
            usage(prog);
      }
   }

   // ensure we've been given a command
   if (!cmdstr)
   {
      fprintf(stderr, "%s: No command string specified.\n", prog);
      usage(prog);
   }

   // init comm sub-system; send command; wait for response
   if ((ret = dnxChanMapInit(0)) != 0)
      fprintf(stderr, "%s: Error initializing channel map: %s.\n", 
            prog, dnxErrorString(ret));
   else
   {
      char url[1024];

      snprintf(url, sizeof url, "udp://%s:%s", hoststr, portstr);

      if ((ret = dnxChanMapAdd("MgmtClient", url)) != 0)
         fprintf(stderr, "%s: Error adding channel (%s): %s.\n", 
               prog, url, dnxErrorString(ret));
      else
      {
         DnxChannel * channel;

         if ((ret = dnxConnect("MgmtClient", 1, &channel)) != 0)
            fprintf(stderr, "%s: Error connecting to server (%s): %s.\n", 
                  prog, url, dnxErrorString(ret));
         else
         {
            DnxMgmtRequest req;

            memset(&req, 0, sizeof req);
            dnxMakeXID(&req.xid, DNX_OBJ_MANAGER, 0, 0);
            req.action = cmdstr;

            if ((ret = dnxSendMgmtRequest(channel, &req, 0)) != 0)
               fprintf(stderr, "%s: Error sending request: %s.\n", 
                     prog, dnxErrorString(ret));
            else
            {
               DnxMgmtReply rsp;

               if ((ret = dnxWaitForMgmtReply(channel, &rsp, 0, 10)) != 0)
                  fprintf(stderr, "%s: Error receiving response: %s.\n", 
                        prog, dnxErrorString(ret));
               else
               {
                  if (rsp.status == DNX_REQ_ACK)
                     printf("%s\n", rsp.reply);
                  else
                     fprintf(stderr, "%s: Request failed on server.\nResponse was (%s)\n", prog,rsp.reply);
      
               }
            }
            dnxDisconnect(channel);
         }
         dnxChanMapDelete("MgmtClient");
      }
      dnxChanMapRelease();
   }

   xheapchk();

   return ret? -1: 0;
}