Exemplo n.º 1
0
END_TEST

START_TEST(task_save_test)
  {
  int result = 0;
  struct task test_task;
  struct job test_job;
  const char *file_prefix = "prefix";

  memset(&test_task, 0, sizeof(test_task));
  memset(&test_job, 0, sizeof(test_job));

  result = task_save(NULL);
  fail_unless(result == PBSE_BAD_PARAMETER, "NULL input fail");

  result = task_save(&test_task);
  fail_unless(result == PBSE_BAD_PARAMETER, "NULL pointer to owning job fail");

  test_task.ti_job = &test_job;
  strncpy(test_job.ji_qs.ji_fileprefix,
          file_prefix,
          sizeof(test_job.ji_qs.ji_fileprefix) - 1);
  
  result = task_save(&test_task);
  fail_unless(result == -1, "task_save fail");
  }
Exemplo n.º 2
0
END_TEST

START_TEST(task_save_test)
  {
  int result = 0;
  struct task test_task;
  struct job test_job;
  const char *file_prefix = "prefix";

  memset(&test_task, 0, sizeof(test_task));
  memset(&test_job, 0, sizeof(test_job));

  result = task_save(NULL);
  fail_unless(result == PBSE_BAD_PARAMETER, "NULL input fail");

  result = task_save(&test_task);
  fail_unless(result == PBSE_BAD_PARAMETER, "NULL pointer to owning job fail");

  strncpy(test_job.ji_qs.ji_fileprefix,
          file_prefix,
          sizeof(test_job.ji_qs.ji_fileprefix) - 1);

  strcpy(test_task.ti_qs.ti_parentjobid, "jobid");
  
  extern job *mock_mom_find_job_return;
  mock_mom_find_job_return = NULL;
  result = task_save(&test_task);
  fail_unless(result == -1, "task_save fail");
  }
Exemplo n.º 3
0
void scan_for_terminated(void)

  {
  static char id[] = "scan_for_terminated";

  int  exiteval = 0;
  pid_t  pid;
  job *pjob;
  task *ptask = NULL;
  int  statloc;
  unsigned int momport = 0;

  int    tcount;

  if (LOGLEVEL >= 7)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      id,
      "entered");
    }

  /* update the latest intelligence about the running jobs;         */
  /* must be done before we reap the zombies, else we lose the info */

  termin_child = 0;

  if (mom_get_sample() == PBSE_NONE)
    {
    pjob = (job *)GET_PRIOR(svr_alljobs);

    while (pjob != NULL)
      {
      mom_set_use(pjob);

      pjob = (job *)GET_PRIOR(pjob->ji_alljobs);
      }
    }

  /* Now figure out which task(s) have terminated (are zombies) */

  /* NOTE:  does a job's tasks include its epilog? */

  while ((pid = waitpid(-1, &statloc, WNOHANG)) > 0)
    {
    pjob = (job *)GET_PRIOR(svr_alljobs);

    while (pjob != NULL)
      {
      /*
       * see if process was a child doing a special
       * function for MOM
       */

      if (LOGLEVEL >= 7)
        {
        snprintf(log_buffer, 1024, "checking job w/subtask pid=%d (child pid=%d)",
          pjob->ji_momsubt,
          pid);

        LOG_EVENT(
          PBSEVENT_DEBUG,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);
        }

      if (pid == pjob->ji_momsubt)
        {
        if (LOGLEVEL >= 7)
          {
          snprintf(log_buffer, 1024, "found match with job subtask for pid=%d",
            pid);

          LOG_EVENT(
            PBSEVENT_DEBUG,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            log_buffer);
          }

        break;
        }

      /* look for task */

      ptask = (task *)GET_NEXT(pjob->ji_tasks);

      /* locate task with associated process id */

      tcount = 0;

      while (ptask != NULL)
        {
        if (ptask->ti_qs.ti_sid == pid)
          {
          if (LOGLEVEL >= 7)
            {
            snprintf(log_buffer, 1024, "found match with job task %d for pid=%d",
              tcount,
              pid);

            LOG_EVENT(
              PBSEVENT_DEBUG,
              PBS_EVENTCLASS_JOB,
              pjob->ji_qs.ji_jobid,
              log_buffer);
            }

          break;
          }

        ptask = (task *)GET_NEXT(ptask->ti_jobtask);

        tcount++;
        }  /* END while (ptask) */

      if (ptask != NULL)
        {
        /* pid match located - break out of job loop */

        break;
        }

      pjob = (job *)GET_PRIOR(pjob->ji_alljobs);
      }  /* END while (pjob != NULL) */

    if (pjob == NULL)
      {
      if (LOGLEVEL >= 1)
        {
        sprintf(log_buffer, "pid %d not tracked, exitcode=%d",
          pid,
          statloc);

        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          id,
          log_buffer);
        }

      continue;
      }  /* END if (pjob == NULL) */

    if (WIFEXITED(statloc))
      exiteval = WEXITSTATUS(statloc);
    else if (WIFSIGNALED(statloc))
      exiteval = WTERMSIG(statloc) + 0x100;
    else
      exiteval = 1;

    if (pid == pjob->ji_momsubt)
      {
      /* PID matches job mom subtask */

      /* NOTE:  both ji_momsubt and ji_mompost normally set in routine
                preobit_reply() after epilog child is successfully forked */

      if (pjob->ji_mompost != NULL)
        {
        if (pjob->ji_mompost(pjob, exiteval) == 0)
          {
          /* success */

          pjob->ji_mompost = NULL;
          }

        }  /* END if (pjob->ji_mompost != NULL) */
      else
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "job has no postprocessing routine registered");
        }

      /* clear mom sub-task */

      pjob->ji_momsubt = 0;

      if(multi_mom)
        {
        momport = pbs_rm_port;
        }

      job_save(pjob, SAVEJOB_QUICK, momport);

      continue;
      }  /* END if (pid == pjob->ji_momsubt) */

    /* what happens if mom PID is reaped before subtask? */

    if (LOGLEVEL >= 2)
      {
      sprintf(log_buffer, "pid %d harvested for job %s, task %d, exitcode=%d",
              pid,
              pjob->ji_qs.ji_jobid,
              ptask->ti_qs.ti_task,
              exiteval);

      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        id,
        log_buffer);
      }

    /* where is job purged?  How do we keep job from progressing in state until the obit is sent? */

    kill_task(ptask, SIGKILL, 0);

    ptask->ti_qs.ti_exitstat = exiteval;

    ptask->ti_qs.ti_status   = TI_STATE_EXITED;

    task_save(ptask);

    sprintf(log_buffer, "%s: job %s task %d terminated, sid=%d",
            id,
            pjob->ji_qs.ji_jobid,
            ptask->ti_qs.ti_task,
            ptask->ti_qs.ti_sid);

    LOG_EVENT(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    exiting_tasks = 1;
    }  /* END while ((pid = waitpid(-1,&statloc,WNOHANG)) > 0) */

  return;
  }  /* END scan_for_terminated() */
Exemplo n.º 4
0
/**
 * @brief
 *      Internal session cpu time decoding routine.
 *
 * @param[in] job - a job pointer.
 *
 * @return      ulong
 * @retval      sum of all cpu time consumed for all tasks executed by the job, in seconds,
 *              adjusted by cputfactor.
 *
 */
static unsigned long
cput_sum(job *pjob)
{
	ulong			cputime = 0;
	int			i;
	int			nps = 0;
	int			taskprocs;
	prstatus_t		*ps;
	prpsinfo_t		*pi;
	task			*ptask;
	ulong			tcput;

	for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
		ptask != NULL;
		ptask = (task *)GET_NEXT(ptask->ti_jobtask)) {

		/* DEAD task */
		if (ptask->ti_qs.ti_sid <= 1) {
			cputime += ptask->ti_cput;
			continue;
		}

		tcput = 0;
		taskprocs = 0;
		for (i=0; i<nproc; i++) {
			pi = &proc_info[i];

			/* is this process part of any task? */
			if (ptask->ti_qs.ti_sid != pi->pr_sid)
				continue;

			nps++;
			taskprocs++;
			if (pi->pr_state == SZOMB) {
				/* use zombie's iff top process */
				if ((pi->pr_sid != pi->pr_pid) &&
					(pi->pr_ppid != mom_pid))
					continue;

				tcput += tv(pi->pr_time);
				DBPRT(("%s: task %08.8X ses %d pid %d "
					"(zomb) %d\n",
					__func__, ptask->ti_qs.ti_task,
					pi->pr_sid, pi->pr_pid, tcput))
			} else {
				ps = &proc_status[i];
				tcput += (tv(ps->pr_utime) +
					tv(ps->pr_stime) +
					tv(ps->pr_cutime) +
					tv(ps->pr_cstime));
			}
			DBPRT(("%s: task %08.8X ses %d pid %d cputime %d\n",
				__func__, ptask->ti_qs.ti_task,
				pi->pr_sid, ps->pr_pid, tcput))
		}

		if (tcput > ptask->ti_cput)
			ptask->ti_cput = tcput;
		cputime += ptask->ti_cput;
		DBPRT(("%s: task %08.8X cput %lu total %lu\n", __func__,
			ptask->ti_qs.ti_task, ptask->ti_cput, cputime))

		if (taskprocs == 0) {
			sprintf(log_buffer,
				"no active process for task %8.8X",
				ptask->ti_qs.ti_task);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
				LOG_INFO, pjob->ji_qs.ji_jobid,
				log_buffer);
			ptask->ti_qs.ti_exitstat = 0;
			ptask->ti_qs.ti_status = TI_STATE_EXITED;
			if (pjob->ji_qs.ji_un.ji_momt.ji_exitstat >= 0)
				pjob->ji_qs.ji_un.ji_momt.ji_exitstat = 0;
			task_save(ptask);
			exiting_tasks = 1;
		}
	}
Exemplo n.º 5
0
void scan_for_terminated(void) /* linux */

  {
  int           exiteval = 0;
  pid_t         pid;
  job          *pjob = NULL;
  task         *ptask = NULL;
  int           statloc;
  unsigned int  momport = 0;

#ifdef USESAVEDRESOURCES
  int           update_stats = TRUE;
#endif /* USESAVEDRESOURCES */

  int           tcount;

  if (LOGLEVEL >= 9)
    {
    log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, "entered");
    }

  /* update the latest intelligence about the running jobs;         */
  /* must be done before we reap the zombies, else we lose the info */

  termin_child = 0;

  if (mom_get_sample() == PBSE_NONE)
    {
    std::list<job *>::reverse_iterator iter;

    // get a list of jobs in start time order, first to last
    for (iter = alljobs_list.rbegin(); iter != alljobs_list.rend(); iter++)
      {
      pjob = *iter;

      if ((pjob->ji_stats_done == true) || 
          (pjob->ji_qs.ji_state < JOB_STATE_RUNNING))
        continue;

#ifdef USESAVEDRESOURCES
      ptask = (task *)GET_NEXT(pjob->ji_tasks);

      /*
       ** check task with associated process id to see if we are recovering
       ** after a mom restart where process completed while we were gone
        */
      
      while (ptask != NULL)
        {
        if (ptask->ti_flags & TI_FLAGS_RECOVERY)
          {
          if (LOGLEVEL >= 7)
            {
            snprintf(log_buffer, sizeof(log_buffer), "Found match for recovering job task for sid=%d",
              ptask->ti_qs.ti_sid);

            log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer);
            }
          
          update_stats = FALSE;
          break;
          }

        ptask = (task *)GET_NEXT(ptask->ti_jobtask);
        
        }  /* END while (ptask) */
      
      if (update_stats)
        {
        mom_set_use(pjob);
        }
#else

      mom_set_use(pjob);

#endif /* USESAVEDRESOURCES */
      }
    }

  /* Now figure out which task(s) have terminated (are zombies) */

  /* NOTE:  does a job's tasks include its epilog? */

  while ((pid = waitpid(-1, &statloc, WNOHANG)) > 0)
    {
    std::list<job *>::reverse_iterator iter;

    if (LOGLEVEL >= 8)
      {
      sprintf(log_buffer, "Child exited with pid: %d", pid);
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer);
      }

    // get a list of jobs in start time order, first to last
    for (iter = alljobs_list.rbegin(); iter != alljobs_list.rend(); iter++)
      {
      pjob = *iter;

      /*
       * see if process was a child doing a special
       * function for MOM
       */

      if (pjob->ji_momsubt != 0)
        {
        if (LOGLEVEL >= 9)
          {
          snprintf(log_buffer, sizeof(log_buffer),
            "Checking to see if exiting child pid '%d' is a match for special mom task with pid=%d",
            pid, pjob->ji_momsubt);

          log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer);
          }

        if (pid == pjob->ji_momsubt)
          {
          if (LOGLEVEL >= 9)
            {
            snprintf(log_buffer, sizeof(log_buffer),
              "The exiting child is a match of special subtask with pid=%d for job %s",
              pid, pjob->ji_qs.ji_jobid);

            log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer);
            }

          break;
          }
        }

      /* look for task */

      ptask = (task *)GET_NEXT(pjob->ji_tasks);

      /* locate task with associated process id */

      tcount = 0;

      while (ptask != NULL)
        {
        if ((ptask->ti_qs.ti_sid == pid) &&
            (ptask->ti_qs.ti_status != TI_STATE_EXITED))
          {
          if (LOGLEVEL >= 7)
            {
            snprintf(log_buffer, sizeof(log_buffer),
              "Exiting child matches job task %d for pid=%d",
              tcount,
              pid);

            log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer);
            }

          break;
          }

        ptask = (task *)GET_NEXT(ptask->ti_jobtask);

        tcount++;
        }  /* END while (ptask) */

      // make sure the task is the top level task for the job to mark the job done
      if ((ptask != NULL) &&
          (ptask->ti_qs.ti_parenttask == TM_NULL_TASK))
        {
        /* pid match located - break out of job loop */
        pjob->ji_stats_done = true;

        break;
        }

      }  /* END while (pjob != NULL) */

    if (WIFEXITED(statloc))
      exiteval = WEXITSTATUS(statloc);
    else if (WIFSIGNALED(statloc))
      exiteval = WTERMSIG(statloc) + 0x100;
    else
      exiteval = 1;

    if (pjob == NULL)
      {
      if (LOGLEVEL >= 1)
        {
        sprintf(log_buffer, "Child pid %d is not part of a job, statloc=%d, exitval=%d",
          pid,
          statloc,
          exiteval);

        log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer);
        }

      continue;
      }  /* END if (pjob == NULL) */

    if (pid == pjob->ji_momsubt)
      {
      /* PID matches job mom subtask */

      /* NOTE:  both ji_momsubt and ji_mompost normally set in routine
                preobit_reply() after epilog child is successfully forked */

      if (pjob->ji_mompost != NULL)
        {
        if (pjob->ji_mompost(pjob, exiteval) == 0)
          {
          /* success */

          pjob->ji_mompost = NULL;
          }

        }  /* END if (pjob->ji_mompost != NULL) */
      else if (LOGLEVEL >= 8) // This is a debug statement
        {
        log_record(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          "Job has no postprocessing routine registered");
        }

      /* clear mom sub-task */

      pjob->ji_momsubt = 0;

      if (multi_mom)
        {
        momport = pbs_rm_port;
        }

      job_save(pjob, SAVEJOB_QUICK, momport);

      continue;
      }  /* END if (pid == pjob->ji_momsubt) */

    if (ptask == NULL)
      continue;

    /* what happens if mom PID is reaped before subtask? */

    if (LOGLEVEL >= 2)
      {
      sprintf(log_buffer, "pid %d harvested for job %s, task %d, exitcode=%d",
              pid,
              pjob->ji_qs.ji_jobid,
              ptask->ti_qs.ti_task,
              exiteval);

      log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer);
      }

    /* where is job purged?  How do we keep job from progressing in state until the obit is sent? */

    kill_task(pjob, ptask, SIGKILL, 0);

    ptask->ti_qs.ti_exitstat = exiteval;

    ptask->ti_qs.ti_status   = TI_STATE_EXITED;

    task_save(ptask);

    sprintf(log_buffer, "%s: job %s task %d terminated, sid=%d",
      __func__,
      pjob->ji_qs.ji_jobid,
      ptask->ti_qs.ti_task,
      ptask->ti_qs.ti_sid);

    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer);

    exiting_tasks = 1;
    }  /* END while ((pid = waitpid(-1,&statloc,WNOHANG)) > 0) */

  return;
  }  /* END scan_for_terminated() */
Exemplo n.º 6
0
/**
 * @brief	wait_action
 *		Wait for a task that has terminated or a socket that is ready to read.
 *		Mark any terminated task as Exiting and do network processing on
 *		any ready socket.
 *
 * @return	void
 */
void
wait_action(void)
{
	static	char	id[] = "wait_action";
	int		rc = 0;
	int		hNum = 0;
	HANDLE		hArray[MAXIMUM_WAIT_OBJECTS+1] = {INVALID_HANDLE_VALUE};
	HANDLE		hProc = INVALID_HANDLE_VALUE;
	extern	HANDLE	hStop;	/* mutex: quit when released */
	int		ecode = -1;
	job		*pjob = NULL;
	task		*ptask = NULL;
	int		waittime = 500;
	extern	int	mom_run_state;
	struct work_task *p_wtask = NULL;
	HANDLE		  pid = INVALID_HANDLE_VALUE;

	/* Check for non job-related tasks like periodic hook tasks */
	while (1) {

		if ((pid = waitpid((HANDLE)-1, &ecode, WNOHANG)) == (HANDLE)-1) {
			if (errno == EINTR) {
				continue;
			} else {
				break;
			}

		} else if (pid == 0) {
			break;
		}

		p_wtask = (struct work_task *)GET_NEXT(task_list_event);
		while (p_wtask) {
			if ((p_wtask->wt_type == WORK_Deferred_Child) &&
				((HANDLE)p_wtask->wt_event == pid)) {
				p_wtask->wt_type = WORK_Deferred_Cmp;
				p_wtask->wt_aux = (int)ecode;	/* exit status */
				svr_delay_entry++;	/* see next_task() */
			}
			p_wtask = (struct work_task *)GET_NEXT(p_wtask->wt_linkall);
		}
	}

	for (;;) {
		hNum = 0;
		if (mom_run_state && hStop != NULL)	/* add mutex to array */
			hArray[hNum++] = hStop;

		pjob = (job *)GET_NEXT(svr_alljobs);
		while (pjob) {
			/*
			 * see if process was a child doing a special
			 * function for MOM
			 */
			if ((pjob->ji_momsubt != NULL) &&
				(pjob->ji_momsubt != INVALID_HANDLE_VALUE) &&
				(pjob->ji_mompost != NULL)) {
				hArray[hNum++] = pjob->ji_momsubt;
			}

			/*
			 * process tasks
			 */
			ptask = (task *)GET_NEXT(pjob->ji_tasks);
			while (ptask) {
				if ((ptask->ti_hProc != NULL) &&
					(ptask->ti_hProc != INVALID_HANDLE_VALUE))
					hArray[hNum++] = ptask->ti_hProc;
				if (hNum > MAXIMUM_WAIT_OBJECTS)
					break;
				ptask = (task *)GET_NEXT(ptask->ti_jobtask);
			}
			if (hNum > MAXIMUM_WAIT_OBJECTS) {
				DBPRT(("%s: %d more than MAX\n", id, hNum))
				hNum = MAXIMUM_WAIT_OBJECTS;
				break;
			}
			pjob = (job *)GET_NEXT(pjob->ji_alljobs);
		}

		if (hNum == 0)		/* nothing to wait for */
			break;

		rc = WaitForMultipleObjects(hNum, hArray,
			FALSE, waittime);
		if (rc == WAIT_TIMEOUT)	/* nobody is done */
			break;
		else if (rc == WAIT_FAILED) {
			log_err(-1, id, "WaitForMultipleObjects");
			break;
		}

		waittime = 0;		/* only wait the first time */
		rc -= WAIT_OBJECT_0;	/* which object was it? */
		assert(0 <= rc && rc < hNum);

		if (rc == 0 && mom_run_state && hStop != NULL) {		/* got mutex */
			mom_run_state = 0;				/* shutdown */
			continue;
		}
		/*
		 **	It was a process finishing.  Find which one.
		 */
		hProc = hArray[rc];

		rc = GetExitCodeProcess(hProc, &ecode);
		if (rc == 0) {
			log_err(-1, id, "GetExitCodeProcess");
			ecode = 99;
		} else if (rc == STILL_ACTIVE)	/* shouldn't happen */
			break;
		CloseHandle(hProc);

		/* find which process finished */
		pjob = (job *)GET_NEXT(svr_alljobs);
		while (pjob) {
			if (pjob->ji_momsubt == hProc)
				break;

			ptask = (task *)GET_NEXT(pjob->ji_tasks);
			while (ptask) {
				if (ptask->ti_hProc == hProc)
					break;
				ptask = (task *)GET_NEXT(ptask->ti_jobtask);
			}
			if (ptask)
				break;
			pjob = (job *)GET_NEXT(pjob->ji_alljobs);
		}
		assert(pjob != NULL);

		if (pjob->ji_momsubt == hProc) {
			pjob->ji_momsubt = NULL;
			if (pjob->ji_mompost) {
				pjob->ji_mompost(pjob, ecode);

				/* After epilogue, get rid of any HOSTFILE */
				if (pjob->ji_mompost == send_obit) {
					char	file[MAXPATHLEN+1];

					(void)sprintf(file, "%s/aux/%s",
						pbs_conf.pbs_home_path,
						pjob->ji_qs.ji_jobid);
					(void)unlink(file);
				}
				pjob->ji_mompost = 0;
			}
			(void)job_save(pjob, SAVEJOB_QUICK);
			continue;
		}

		DBPRT(("%s: task %d pid %d exit value %d\n", id,
			ptask->ti_qs.ti_task, ptask->ti_qs.ti_sid,
			ecode))
		ptask->ti_hProc = NULL;
		ptask->ti_qs.ti_exitstat = ecode;
		ptask->ti_qs.ti_status = TI_STATE_EXITED;
		ptask->ti_qs.ti_sid = 0;
		(void)task_save(ptask);
		sprintf(log_buffer, "task %d terminated", ptask->ti_qs.ti_task);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG,
			pjob->ji_qs.ji_jobid, log_buffer);

		exiting_tasks = 1;
	}

	connection_idlecheck();
}
Exemplo n.º 7
0
/**
 * @brief
 *      Internal session cpu time decoding routine.
 *
 * @param[in] job - a job pointer.
 *
 * @return      ulong
 * @retval      sum of all cpu time consumed for all tasks executed by the job, in seconds,
 *              adjusted by cputfactor.
 *
 */
static unsigned long
cput_sum(job *pjob)
{
	ulong			 cputime, addtime;
	int			 i;
	int			 nps = 0;
	int			taskprocs;
	psinfo_t		*pi;
	task			*ptask;
	ulong			tcput;

	cputime = 0;
	for (ptask = (task *)GET_NEXT(pjob->ji_tasks);
		ptask != NULL;
		ptask = (task *)GET_NEXT(ptask->ti_jobtask)) {

		/* DEAD task */
		if (ptask->ti_qs.ti_sid <= 1) {
			cputime += ptask->ti_cput;
			continue;
		}

		tcput = 0;
		taskprocs = 0;
		for (i=0; i<nproc; i++) {
			pi = &proc_info[i];

			/* is this process part of the task? */
			if (ptask->ti_qs.ti_sid != pi->pr_sid)
				continue;

			nps++;
			taskprocs++;
			if (pi->pr_nlwp == 0) {		/* zombie */
				if ((pi->pr_sid != pi->pr_pid) &&
					(pi->pr_ppid != mom_pid))
					continue;

				/* top of session/job, record it */
				tcput += tv(pi->pr_time);
				DBPRT(("%s: task %08.8X ses %d pid %d "
					"(zombie) cputime %lu\n", __func__,
					ptask->ti_qs.ti_task,
					pi->pr_sid, pi->pr_pid, tcput))
			} else {
				addtime = tv(pi->pr_time) + tv(pi->pr_ctime);

				tcput += addtime;
				DBPRT(("%s: task %08.8X ses %d pid %d "
					"cputime %lu\n", __func__,
					ptask->ti_qs.ti_task, pi->pr_sid,
					pi->pr_pid, tcput))
			}
		}
		if (tcput > ptask->ti_cput)
			ptask->ti_cput = tcput;
		cputime += ptask->ti_cput;
		DBPRT(("%s: task %08.8X cput %lu total %lu\n", __func__,
			ptask->ti_qs.ti_task, ptask->ti_cput, cputime))

		if (taskprocs == 0) {
			sprintf(log_buffer,
				"no active process for task %8.8X",
				ptask->ti_qs.ti_task);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
				LOG_INFO, pjob->ji_qs.ji_jobid,
				log_buffer);
			ptask->ti_qs.ti_status = TI_STATE_EXITED;
			task_save(ptask);
			exiting_tasks = 1;
		}
	}