Exemplo n.º 1
0
int check_exiting_jobs()

  {
  int                     iter = -1;
  job_exiting_retry_info *jeri;
  job                    *pjob;
  time_t                  time_now = time(NULL);
    
  while ((jeri = (job_exiting_retry_info *)next_from_hash_map(exiting_jobs_info, &iter)) != NULL)
    {
    if (time_now - jeri->last_attempt > EXITING_RETRY_TIME)
      {
      if ((pjob = svr_find_job(jeri->jobid, TRUE)) == NULL)
        {
        remove_entry_from_exiting_list(jeri);
        }
      else
        {
        if (pjob->ji_qs.ji_state == JOB_STATE_COMPLETE)
          {
          remove_entry_from_exiting_list(jeri);
          unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL);
          }
        else
          {
          unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL);
          retry_job_exit(jeri);
          }
        }
      }
    
    }

  return(PBSE_NONE);
  } /* END check_exiting_jobs() */
Exemplo n.º 2
0
char *get_next_retryable_jobid(

  int *iter)

  {
  job_exiting_retry_info *jeri;
  job                    *pjob;
  time_t                  time_now = time(NULL);
  char                    log_buf[LOCAL_LOG_BUF_SIZE];

  pthread_mutex_lock(exiting_jobs_info->hm_mutex);
  mutex_mgr exit_mgr(exiting_jobs_info->hm_mutex, true);

  while ((jeri = (job_exiting_retry_info *)next_from_hash_map(exiting_jobs_info, iter, true)) != NULL)
    {
    if (time_now - jeri->last_attempt > EXITING_RETRY_TIME)
      {
      if (jeri->attempts >= MAX_EXITING_RETRY_ATTEMPTS)
        {
        std::string jid(jeri->jobid);
        remove_from_hash_map(exiting_jobs_info, jeri->jobid, true);
        free(jeri);
        exit_mgr.unlock(); //Don't hold on to a mutex when trying to lock another.
        if ((pjob = svr_find_job((char *)jid.c_str(), TRUE)) != NULL)
          {
          snprintf(log_buf, sizeof(log_buf), "Job %s has had its exiting re-tried %d times, purging.",
            jeri->jobid, MAX_EXITING_RETRY_ATTEMPTS);
          log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);

          force_purge_work(pjob);
          }
        exit_mgr.lock();
        }
      else
        {
        jeri->attempts++;
        jeri->last_attempt = time_now;

        char *jobid = strdup(jeri->jobid);
        return(jobid);
        }
      }
    }

  return(NULL);
  } /* END get_next_retryable_jobid() */
Exemplo n.º 3
0
job_array *next_array(

  int *iter)

  {
  job_array *pa = NULL;

  pthread_mutex_lock(allarrays.allarrays_mutex);

  pa = (job_array *)next_from_hash_map(allarrays.hm, iter);
  
  if (pa != NULL)
    lock_ai_mutex(pa, __func__, NULL, LOGLEVEL);

  pthread_mutex_unlock(allarrays.allarrays_mutex);

  return(pa);
  } /* END next_array() */