Ejemplo n.º 1
0
void array_delete_wt(struct work_task *ptask)
  {

  struct batch_request *preq;
  job_array *pa;
  /*struct work_task *pnew_task;*/

  struct work_task *pwtnew;

  int i;

  static int last_check = 0;
  static char *last_id = NULL;

  preq = ptask->wt_parm1;

  pa = get_array(preq->rq_ind.rq_delete.rq_objname);

  if (pa == NULL)
    {
    /* jobs must have exited already */
    reply_ack(preq);
    last_check = 0;
    free(last_id);
    last_id = NULL;
    return;
    }

  if (last_id == NULL)
    {
    last_id = strdup(preq->rq_ind.rq_delete.rq_objname);
    last_check = time_now;
    }
  else if (strcmp(last_id, preq->rq_ind.rq_delete.rq_objname) != 0)
    {
    last_check = time_now;
    free(last_id);
    last_id = strdup(preq->rq_ind.rq_delete.rq_objname);
    }
  else if (time_now - last_check > 10)
    {
    int num_jobs;
    int num_prerun;
    job *pjob;

    num_jobs = 0;
    num_prerun = 0;

    for (i = 0; i < pa->ai_qs.array_size; i++)
      {
      if (pa->jobs[i] == NULL)
        continue;

      pjob = (job *)pa->jobs[i];

      num_jobs++;

      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN)
        {
        num_prerun++;
        /* mom still hasn't gotten job?? delete anyway */

        if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
          {
          /* job has restart file at mom, do end job processing */

          change_restart_comment_if_needed(pjob);

          svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING);

          pjob->ji_momhandle = -1;

          /* force new connection */

          pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

          if (pwtnew)
            {
            append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
            }

          }
        else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
          {
          /* job has staged-in file, should remove them */

          remove_stagein(pjob);

          job_abt(&pjob, NULL);
          }
        else
          {
          job_abt(&pjob, NULL);
          }

        }

      }

    if (num_jobs == num_prerun)
      {
      reply_ack(preq);
      free(last_id);
      last_id = NULL;
      return;
      }

    }



  req_deletearray(preq);


  }
Ejemplo n.º 2
0
int dispatch_request(

  int                   sfds,    /* I */
  struct batch_request *request) /* I */

  {
  int   rc = PBSE_NONE;
  char  log_buf[LOCAL_LOG_BUF_SIZE];
  char *job_id = NULL;

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buf,"dispatching request %s on sd=%d",
      reqtype_to_txt(request->rq_type),
      sfds);

    log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }

  switch (request->rq_type)
    {
    case PBS_BATCH_QueueJob:
      rc = req_quejob(request, &job_id);
      if ((rc != PBSE_NONE) && (job_id != NULL))
        close_quejob_by_jobid(job_id);
      if (job_id != NULL)
        free(job_id);
      break;


    case PBS_BATCH_JobCred:
      rc = req_jobcredential(request);
      break;


    case PBS_BATCH_jobscript:
      job_id = strdup(request->rq_ind.rq_jobfile.rq_jobid);
      rc = req_jobscript(request);
      if ((rc != PBSE_NONE) && (job_id != NULL))
        close_quejob_by_jobid(job_id);
      if (job_id != NULL)
        free(job_id);
      break;


    case PBS_BATCH_RdytoCommit:
      job_id = strdup(request->rq_ind.rq_rdytocommit);
      rc = req_rdytocommit(request);
      if ((rc != PBSE_NONE) && (job_id != NULL))
        close_quejob_by_jobid(job_id);
      if (job_id != NULL)
        free(job_id);
      break;


    case PBS_BATCH_Commit:
      job_id = strdup(request->rq_ind.rq_commit);
      rc = req_commit(request);
      if ((rc != PBSE_NONE) && (job_id != NULL))
        close_quejob_by_jobid(job_id);
      if (job_id != NULL)
        free(job_id);
      break;


    case PBS_BATCH_DeleteJob:

      /* if this is a server size job delete request, then the request
       * could also be for an entire array.  we check to see if the
       * request object name is an array id. If so we hand off the the
       * req_deletearray() function.  If not we pass along to the normal
       * req_deletejob() function.
      */

      if (is_array(request->rq_ind.rq_delete.rq_objname))
        rc = req_deletearray(request);
      else
        rc = req_deletejob(request);

      break;

    case PBS_BATCH_HoldJob:
      if (is_array(request->rq_ind.rq_hold.rq_orig.rq_objname))
        rc = req_holdarray(request);
      else
        rc = req_holdjob(request);

      break;

    case PBS_BATCH_CheckpointJob:

      rc = req_checkpointjob(request);

      break;

    case PBS_BATCH_LocateJob:

      rc = req_locatejob(request);

      break;

    case PBS_BATCH_Manager:

      rc = req_manager(request);

      break;

    case PBS_BATCH_MessJob:

      rc = req_messagejob(request);

      break;

    case PBS_BATCH_AsyModifyJob:

    case PBS_BATCH_ModifyJob:
      if (is_array(request->rq_ind.rq_delete.rq_objname))
        rc = req_modifyarray(request);
      else
        req_modifyjob(request);

      break;

    case PBS_BATCH_Rerun:

      rc = req_rerunjob(request);

      break;

    case PBS_BATCH_MoveJob:

      rc = req_movejob(request);

      break;

    case PBS_BATCH_OrderJob:

      rc = req_orderjob(request);

      break;

    case PBS_BATCH_Rescq:

      rc = req_rescq(request);

      break;

    case PBS_BATCH_ReserveResc:

      rc = req_rescreserve(request);

      break;

    case PBS_BATCH_ReleaseResc:

      rc = req_rescfree(request);

      break;

    case PBS_BATCH_ReleaseJob:

      if (is_array(request->rq_ind.rq_delete.rq_objname))
        rc = req_releasearray(request);
      else
        rc = req_releasejob(request);

      break;

    case PBS_BATCH_RunJob:

    case PBS_BATCH_AsyrunJob:
      globalset_del_sock(request->rq_conn);
      rc = req_runjob(request);

      break;

    case PBS_BATCH_SelectJobs:

    case PBS_BATCH_SelStat:

      /* handle special 'truncated' keyword */

      if (!strncasecmp(request->rq_ind.rq_status.rq_id, "truncated", strlen("truncated")))
        rc =req_stat_job(request);
      else 
        rc = req_selectjobs(request);

      break;

    case PBS_BATCH_Shutdown:

      req_shutdown(request);

      break;

    case PBS_BATCH_SignalJob:

    case PBS_BATCH_AsySignalJob:

      rc = req_signaljob(request);

      break;

    case PBS_BATCH_GpuCtrl:

      rc = req_gpuctrl_svr(request);

      break;

    case PBS_BATCH_MvJobFile:

      rc = req_mvjobfile(request);

      break;

    case PBS_BATCH_StatusQue:

      rc = req_stat_que(request);

      break;

    case PBS_BATCH_StatusNode:
      
      rc = req_stat_node(request);

      break;

    case PBS_BATCH_StatusSvr:

      rc = req_stat_svr(request);

      break;

      /* DIAGTODO: handle PBS_BATCH_StatusDiag and define req_stat_diag() */

    case PBS_BATCH_TrackJob:

      rc = req_track(request);

      break;

    case PBS_BATCH_RegistDep:

      if (is_array(request->rq_ind.rq_register.rq_parent))
        {
        rc = req_registerarray(request);
        }
      else
        {
        rc = req_register(request);
        }

      break;

    case PBS_BATCH_AuthenUser:

      /* determine if user is valid */
      rc = req_authenuser( request); 

      break;

    case PBS_BATCH_AltAuthenUser:

      break;

    case PBS_BATCH_JobObit:

      rc = req_jobobit(request);

      break;

    case PBS_BATCH_StageIn:

      rc = req_stagein(request);

      break;

    case PBS_BATCH_StatusJob:

      rc = req_stat_job(request);
      
      break;

    default:

      req_reject(PBSE_UNKREQ, 0, request, NULL, NULL);

      if (sfds != PBS_LOCAL_CONNECTION)
        close_conn(sfds, FALSE);

      break;
    }  /* END switch (request->rq_type) */

  return(rc);
  }  /* END dispatch_request() */
Ejemplo n.º 3
0
void array_delete_wt(
    
  struct work_task *ptask)

  {
  struct batch_request *preq;
  job_array            *pa;

  int                   i;

  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  int                   num_jobs = 0;
  int                   num_prerun = 0;
  job                  *pjob;

  preq = get_remove_batch_request((char *)ptask->wt_parm1);
  
  free(ptask->wt_mutex);
  free(ptask);

  if (preq == NULL)
    return;

  pa = get_array(preq->rq_ind.rq_delete.rq_objname);

  if (pa == NULL)
    {
    /* jobs must have exited already */
    reply_ack(preq);

    return;
    }

  for (i = 0; i < pa->ai_qs.array_size; i++)
    {
    if (pa->job_ids[i] == NULL)
      continue;
    
    if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL)
      {
      free(pa->job_ids[i]);
      pa->job_ids[i] = NULL;
      }
    else
      {
      num_jobs++;
      
      if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN)
        {
        num_prerun++;
        /* mom still hasn't gotten job?? delete anyway */
        
        if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
          {
          /* job has restart file at mom, do end job processing */
          change_restart_comment_if_needed(pjob);
          
          svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING, FALSE);
          
          pjob->ji_momhandle = -1;
          
          /* force new connection */
          if (LOGLEVEL >= 7)
            {
            sprintf(log_buf, "calling on_job_exit from %s", __func__);
            log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf);
            }
          set_task(WORK_Immed, 0, on_job_exit, strdup(pjob->ji_qs.ji_jobid), FALSE);
          
          unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
          }
        }
      else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
        {
        /* job has staged-in file, should remove them */
        remove_stagein(&pjob);
        
        if (pjob != NULL)
          {
          /* job_abt() calls svr_job_purge which will try to lock the array again */
          pthread_mutex_unlock(pa->ai_mutex);
          job_abt(&pjob, NULL);
          pthread_mutex_lock(pa->ai_mutex);
          }
        }
      else
        {
        /* job_abt() calls svr_job_purge which will try to lock the array again */
        pthread_mutex_unlock(pa->ai_mutex);
        job_abt(&pjob, NULL);
        pthread_mutex_lock(pa->ai_mutex);
        }
      } /* END if (ji_substate == JOB_SUBSTATE_PRERUN) */
    } /* END for each job in array */
  
  pthread_mutex_unlock(pa->ai_mutex);
  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "%s: unlocked ai_mutex", __func__);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }
  
  if (num_jobs == num_prerun)
    {
    reply_ack(preq);
    }
  else
    {
    req_deletearray(preq);
    }

  } /* END array_delete_wt() */
Ejemplo n.º 4
0
void dispatch_request(

  int          sfds,    /* I */
  struct batch_request *request) /* I */

  {
  char *id = "dispatch_request";

  if (LOGLEVEL >= 5)
    {
    sprintf(log_buffer,"dispatching request %s on sd=%d",
      reqtype_to_txt(request->rq_type),
      sfds);

    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      id,
      log_buffer);
    }

  switch (request->rq_type)
    {
    case PBS_BATCH_QueueJob:

      net_add_close_func(sfds, close_quejob);

      req_quejob(request);

      break;

    case PBS_BATCH_JobCred:

      req_jobcredential(request);

      break;

    case PBS_BATCH_jobscript:

      req_jobscript(request);

      break;

    case PBS_BATCH_RdytoCommit:

      req_rdytocommit(request);

      break;

    case PBS_BATCH_Commit:

      req_commit(request);

      net_add_close_func(sfds, (void (*)())0);

      break;

    case PBS_BATCH_DeleteJob:

#ifdef PBS_MOM
      req_deletejob(request);
#else
      /* if this is a server size job delete request, then the request could also be
      * for an entire array.  we check to see if the request object name is an array id.
      * if so we hand off the the req_deletearray() function.  If not we pass along to the
      * normal req_deltejob() function.
      */

      if (is_array(request->rq_ind.rq_delete.rq_objname))
        {
        req_deletearray(request);
        }
      else
        {
        req_deletejob(request);
        }

#endif
      break;

    case PBS_BATCH_HoldJob:
#ifdef PBS_MOM
      req_holdjob(request);

#else
      if (is_array(request->rq_ind.rq_hold.rq_orig.rq_objname))
        {
        req_holdarray(request);
        }
      else
        {
        req_holdjob(request);
        }

#endif
      break;

    case PBS_BATCH_CheckpointJob:

      req_checkpointjob(request);

      break;

#ifndef PBS_MOM

    case PBS_BATCH_LocateJob:

      req_locatejob(request);

      break;

    case PBS_BATCH_Manager:

      req_manager(request);

      break;

#endif  /* END !PBS_MOM */

    case PBS_BATCH_MessJob:

      req_messagejob(request);

      break;

    case PBS_BATCH_AsyModifyJob:

    case PBS_BATCH_ModifyJob:
#ifndef PBS_MOM
      if (is_array(request->rq_ind.rq_delete.rq_objname))
        {
        req_modifyarray(request);
        }
      else
        {
        req_modifyjob(request);
        }
#else /* END ifndef PBS_MOM */
      req_modifyjob(request);
#endif /* PBS_MOM */

      break;

    case PBS_BATCH_Rerun:

      req_rerunjob(request);

      break;

#ifndef PBS_MOM

    case PBS_BATCH_MoveJob:

      req_movejob(request);

      break;

    case PBS_BATCH_OrderJob:

      req_orderjob(request);

      break;

    case PBS_BATCH_Rescq:

      req_rescq(request);

      break;

    case PBS_BATCH_ReserveResc:

      req_rescreserve(request);

      break;

    case PBS_BATCH_ReleaseResc:

      req_rescfree(request);

      break;

    case PBS_BATCH_ReleaseJob:

      if (is_array(request->rq_ind.rq_delete.rq_objname))
        {
        req_releasearray(request);
        }
      else
        {
        req_releasejob(request);
        }

      break;

    case PBS_BATCH_RunJob:

    case PBS_BATCH_AsyrunJob:

      req_runjob(request);

      break;

    case PBS_BATCH_SelectJobs:

    case PBS_BATCH_SelStat:

      /* handle special 'truncated' keyword */

      if (!strncasecmp(request->rq_ind.rq_status.rq_id, "truncated", strlen("truncated")))
        req_stat_job(request);
      else
        req_selectjobs(request);

      break;

#endif  /* !PBS_MOM */

    case PBS_BATCH_Shutdown:

      req_shutdown(request);

      break;

    case PBS_BATCH_SignalJob:

    case PBS_BATCH_AsySignalJob:

      req_signaljob(request);

      break;

    case PBS_BATCH_GpuCtrl:

      req_gpuctrl(request);

      break;

    case PBS_BATCH_StatusJob:

      req_stat_job(request);

      break;

    case PBS_BATCH_MvJobFile:

      req_mvjobfile(request);

      break;

#ifndef PBS_MOM  /* server only functions */

    case PBS_BATCH_StatusQue:

      req_stat_que(request);

      break;

    case PBS_BATCH_StatusNode:

      req_stat_node(request);

      break;

    case PBS_BATCH_StatusSvr:

      req_stat_svr(request);

      break;

      /* DIAGTODO: handle PBS_BATCH_StatusDiag and define req_stat_diag() */

    case PBS_BATCH_TrackJob:

      req_track(request);

      break;

    case PBS_BATCH_RegistDep:

      if (is_array(request->rq_ind.rq_register.rq_parent))
        {
        req_registerarray(request);
        }
      else
        {
        req_register(request);
        }

      break;

    case PBS_BATCH_AuthenUser:

      /* determine if user is valid */

      req_authenuser(request);

      break;

    case PBS_BATCH_AltAuthenUser:

      break;

    case PBS_BATCH_JobObit:

      req_jobobit(request);

      break;

    case PBS_BATCH_StageIn:

      req_stagein(request);

      break;

#else /* MOM only functions */

    case PBS_BATCH_ReturnFiles:

      req_returnfiles(request);

      break;

    case PBS_BATCH_CopyFiles:

      req_cpyfile(request);

      break;

    case PBS_BATCH_DelFiles:

      req_delfile(request);

      break;

#endif /* !PBS_MOM */

    default:

      req_reject(PBSE_UNKREQ, 0, request, NULL, NULL);

      close_client(sfds);

      break;
    }  /* END switch (request->rq_type) */

  return;
  }  /* END dispatch_request() */