예제 #1
0
void *single_delete_work(

  void *vp)

  {
  int              rc = -1;
  batch_request   *preq = (batch_request *)vp;
  char            *jobid = preq->rq_ind.rq_delete.rq_objname;
  job             *pjob;
  char            *Msg = preq->rq_extend;

  pjob = svr_find_job(jobid, FALSE);

  if (pjob == NULL)
    {
    req_reject(PBSE_JOBNOTFOUND, 0, preq, NULL, "job unexpectedly deleted");
    }
  else
    {
    /* mutex is freed below */
    if ((rc = forced_jobpurge(pjob, preq)) == PBSE_NONE)
      rc = execute_job_delete(pjob, Msg, preq);
 
    if ((rc == PBSE_NONE) ||
        (rc == PURGE_SUCCESS))
      reply_ack(preq);
    }

  return(NULL);
  } /* END single_delete_work() */
예제 #2
0
void *single_delete_work(

  void *vp)

  {
  int              rc = -1;
  batch_request   *preq = (batch_request *)vp;
  char            *jobid = preq->rq_ind.rq_delete.rq_objname;
  job             *pjob;
  char            *Msg = preq->rq_extend;

  // TRUE is the same for non-heterogeneous jobs as FALSE. For heterogeneous
  // jobs simply delete one to trigger the other being deleted as well.
  pjob = svr_find_job(jobid, TRUE);

  if (pjob == NULL)
    {
    req_reject(PBSE_JOBNOTFOUND, 0, preq, NULL, "job unexpectedly deleted");
    }
  else
    {
    /* mutex is freed below */
    if ((rc = forced_jobpurge(pjob, preq)) == PBSE_NONE)
      rc = execute_job_delete(pjob, Msg, preq);
 
    if ((rc == PBSE_NONE) ||
        (rc == PURGE_SUCCESS))
      reply_ack(preq);
    }

  return(NULL);
  } /* END single_delete_work() */
예제 #3
0
END_TEST

START_TEST(test_forced_jobpurge)
  {
  job           *pjob;
  batch_request *preq;

  pjob = (job *)calloc(1, sizeof(job));
  preq = (batch_request *)calloc(1, sizeof(batch_request));

  preq->rq_extend = strdup(delpurgestr);
  nanny = 0;
  fail_unless(forced_jobpurge(pjob, preq) == -1);

  preq->rq_perm = ATR_DFLAG_MGRD;
  fail_unless(forced_jobpurge(pjob, preq) == PURGE_SUCCESS);
 
  preq->rq_extend = NULL;
  fail_unless(forced_jobpurge(pjob, preq) == PBSE_NONE);

  nanny = 1;
  }
예제 #4
0
파일: req_delete.c 프로젝트: dhill12/test
int handle_single_delete(

  struct batch_request *preq,
  struct batch_request *preq_tmp,
  char                 *Msg)

  {
  int   rc= -1;
  char *jobid = preq->rq_ind.rq_delete.rq_objname;
  job  *pjob = svr_find_job(jobid, FALSE);

  if (pjob == NULL)
    {
    log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,jobid,pbse_to_txt(PBSE_UNKJOBID));
    
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, "cannot locate job");
    }
  else
    {
    if (preq_tmp != NULL)
      {
      reply_ack(preq_tmp);
      preq->rq_noreply = TRUE; /* set for no more replies */
      }
    
    /* mutex is freed below */
    if ((rc = forced_jobpurge(pjob, preq)) == PBSE_NONE)
      rc = execute_job_delete(pjob, Msg, preq);
    }
  
  if ((rc == PBSE_NONE) ||
      (rc == PURGE_SUCCESS))
    reply_ack(preq);

  return(PBSE_NONE);
  } /* END handle_single_delete() */
예제 #5
0
파일: req_delete.c 프로젝트: dhill12/test
int handle_delete_all(

  struct batch_request *preq,
  struct batch_request *preq_tmp,
  char                 *Msg)

  {
  /* don't use the actual request so we can reply about all of the jobs */
  struct batch_request *preq_dup = duplicate_request(preq);
  job                  *pjob;
  int                   iter = -1;
  int                   failed_deletes = 0;
  int                   total_jobs = 0;
  int                   rc = PBSE_NONE;
  char                  tmpLine[MAXLINE];

  preq_dup->rq_noreply = TRUE;
  
  if (preq_tmp != NULL)
    {
    reply_ack(preq_tmp);
    preq->rq_noreply = TRUE; /* set for no more replies */
    }
  
  while ((pjob = next_job(&alljobs, &iter)) != NULL)
    {
    if ((rc = forced_jobpurge(pjob, preq_dup)) == PURGE_SUCCESS)
      {
      continue;
      }

    if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
      {
      unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
      
      continue;
      }
    
    total_jobs++;
    
    /* mutex is freed below */
    if (rc == PBSE_NONE)
      {
      if ((rc = execute_job_delete(pjob, Msg, preq_dup)) == PBSE_NONE)
        reply_ack(preq_dup);
       
      /* mark this as NULL because it has been freed */
      preq_dup = NULL;
      }
    
    if (rc != PURGE_SUCCESS)
      {
      /* duplicate the preq so we don't have a problem with double frees */
      preq_dup = duplicate_request(preq);
      preq_dup->rq_noreply = TRUE;
      
      if ((rc == MOM_DELETE) ||
          (rc == ROUTE_DELETE))
        failed_deletes++;
      }
    }
  
  if (failed_deletes == 0)
    {
    reply_ack(preq);

    /* PURGE SUCCESS means this was qdel -p all. In this case no reply_*() 
     * functions have been called */
    if (rc == PURGE_SUCCESS)
      {
      free_br(preq_dup);
      preq_dup = NULL;
      }
    }
  else
    {
    snprintf(tmpLine,sizeof(tmpLine),"Deletes failed for %d of %d jobs",
      failed_deletes,
      total_jobs);
    
    req_reject(PBSE_SYSTEM, 0, preq, NULL, tmpLine);
    }
    
  /* preq_dup happens at the end of the loop, so free the extra one if
   * it is there */
  if (preq_dup != NULL)
    free_br(preq_dup);

  return(PBSE_NONE);
  } /* END handle_delete_all() */
예제 #6
0
void req_deletejob(

  struct batch_request *preq)  /* I */

  {
  job              *pjob;

  struct work_task *pwtold;

  struct work_task *pwtnew;
  struct work_task *pwtcheck;

  int               rc;
  char             *sigt = "SIGTERM";

  char             *Msg = NULL;

  /* check if we are getting a purgecomplete from scheduler */
  if ((preq->rq_extend != NULL) && 
        !strncmp(preq->rq_extend,PURGECOMP,strlen(PURGECOMP)))
    {

    /*
     * purge_completed_jobs will respond with either an ack or reject
     */
    purge_completed_jobs(preq);

    return;
    }

  /* The way this is implemented, if the user enters the command "qdel -p <jobid>",
   * they can then delete jobs other than their own since the authorization
   * checks are made below in chk_job_request. This should probably be fixed.
   */

  if (forced_jobpurge(preq) != 0)
    {
    return;
    }

  /* NOTE:  should support rq_objname={<JOBID>|ALL|<name:<JOBNAME>} */

  /* NYI */

  pjob = chk_job_request(preq->rq_ind.rq_delete.rq_objname, preq);

  if (pjob == NULL)
    {
    /* NOTE:  chk_job_request() will issue req_reject() */

    return;
    }

  if (preq->rq_extend != NULL)
    {
    if (strncmp(preq->rq_extend, deldelaystr, strlen(deldelaystr)) &&
        strncmp(preq->rq_extend, delasyncstr, strlen(delasyncstr)) &&
        strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr)))
      {
      /* have text message in request extension, add it */

      Msg = preq->rq_extend;

      /*
       * Message capability is only for operators and managers.
       * Check if request is authorized
      */

      if ((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
                            ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) == 0)
        {
        req_reject(PBSE_PERM, 0, preq, NULL,
                   "must have operator or manager privilege to use -m parameter");
        return;
        }
      }
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT)
    {
    /*
     * Find pid of router from existing work task entry,
     * then establish another work task on same child.
     * Next, signal the router and wait for its completion;
     */

    pwtold = (struct work_task *)GET_NEXT(pjob->ji_svrtask);

    while (pwtold != NULL)
      {
      if ((pwtold->wt_type == WORK_Deferred_Child) ||
          (pwtold->wt_type == WORK_Deferred_Cmp))
        {
        pwtnew = set_task(
                   pwtold->wt_type,
                   pwtold->wt_event,
                   post_delete_route,
                   preq);

        if (pwtnew != NULL)
          {
          /*
           * reset type in case the SIGCHLD came
           * in during the set_task;  it makes
           * sure that next_task() will find the
           * new entry.
           */

          pwtnew->wt_type = pwtold->wt_type;
          pwtnew->wt_aux = pwtold->wt_aux;

          kill((pid_t)pwtold->wt_event, SIGTERM);

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_ABORT;

          return; /* all done for now */
          }
        else
          {
          req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

          return;
          }
        }

      pwtold = (struct work_task *)GET_NEXT(pwtold->wt_linkobj);
      }

    /* should never get here ...  */

    log_err(-1, "req_delete", "Did not find work task for router");

    req_reject(PBSE_INTERNAL, 0, preq, NULL, NULL);

    return;
    }

  if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN1 ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN2 ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN3 )
    {
    /* If JOB_SUBSTATE_PRERUN being sent to MOM, wait till she gets it going */
    /* retry in one second                            */
    /* If JOB_SUBSTATE_RERUN, RERUN1, RERUN2 or RERUN3 the
       job is being requeued. Wait until finished */

    static time_t  cycle_check_when = 0;
    static char    cycle_check_jid[PBS_MAXSVRJOBID + 1];

    if (cycle_check_when != 0)
      {
      if (!strcmp(pjob->ji_qs.ji_jobid, cycle_check_jid) &&
          (time_now - cycle_check_when > 10))
        {
        /* state not updated after 10 seconds */

        /* did the mom ever get it? delete it anyways... */

        cycle_check_jid[0] = '\0';
        cycle_check_when  = 0;

        goto jump;
        }

      if (time_now - cycle_check_when > 20)
        {
        /* give up after 20 seconds */

        cycle_check_jid[0] = '\0';
        cycle_check_when  = 0;
        }
      }    /* END if (cycle_check_when != 0) */

    if (cycle_check_when == 0)
      {
      /* new PRERUN job located */

      cycle_check_when = time_now;
      strcpy(cycle_check_jid, pjob->ji_qs.ji_jobid);
      }

    sprintf(log_buffer, "job cannot be deleted, state=PRERUN, requeuing delete request");

    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    pwtnew = set_task(
               WORK_Timed,
               time_now + 1,
               post_delete_route,
               preq);

    if (pwtnew == 0)
      req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

    return;
    }  /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */

jump:

  /*
   * Log delete and if requesting client is not job owner, send mail.
   */

  sprintf(log_buffer, "requestor=%s@%s",
          preq->rq_user,
          preq->rq_host);


  /* NOTE:  should annotate accounting record with extend message (NYI) */

  account_record(PBS_ACCT_DEL, pjob, log_buffer);

  sprintf(log_buffer, msg_manager,
          msg_deletejob,
          preq->rq_user,
          preq->rq_host);

  log_event(
    PBSEVENT_JOB,
    PBS_EVENTCLASS_JOB,
    pjob->ji_qs.ji_jobid,
    log_buffer);

  /* NOTE:  should incorporate job delete message */

  if (Msg != NULL)
    {
    /* have text message in request extension, add it */

    strcat(log_buffer, "\n");
    strcat(log_buffer, Msg);
    }

  if ((svr_chk_owner(preq, pjob) != 0) &&
      !has_job_delete_nanny(pjob))
    {
    /* only send email if owner did not delete job and job deleted
       has not been previously attempted */

    svr_mailowner(pjob, MAIL_DEL, MAIL_FORCE, log_buffer);
    /*
     * If we sent mail and already sent the extra message
     * then reset message so we don't trigger a redundant email
     * in job_abt()
    */

    if (Msg != NULL)
      {
      Msg = NULL;
      }
    }

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, change restart comment if failed */

    change_restart_comment_if_needed(pjob);
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /*
     * setup a nanny task to make sure the job is actually deleted (see the
     * comments at job_delete_nanny()).
     */

    if (has_job_delete_nanny(pjob))
      {
      req_reject(PBSE_IVALREQ, 0, preq, NULL, "job cancel in progress");

      return;
      }

    apply_job_delete_nanny(pjob, time_now + 60);

    /* check if we are getting a asynchronous delete */

    if ((preq->rq_extend != NULL) &&
          !strncmp(preq->rq_extend,DELASYNC,strlen(DELASYNC)))
      {
      struct batch_request *preq_tmp = NULL;
      /*
       * Respond with an ack now instead of after MOM processing
       * Create a new batch request and fill it in. It will be freed by reply_ack
       */

      snprintf(log_buffer,sizeof(log_buffer), "Deleting job asynchronously");
      log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buffer);

      preq_tmp = alloc_br(PBS_BATCH_DeleteJob);
      preq_tmp->rq_perm = preq->rq_perm;
      preq_tmp->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
      preq_tmp->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;
      preq_tmp->rq_fromsvr = preq->rq_fromsvr;
      preq_tmp->rq_extsz = preq->rq_extsz;
      preq_tmp->rq_conn = preq->rq_conn;
      memcpy(preq_tmp->rq_ind.rq_manager.rq_objname,
          preq->rq_ind.rq_manager.rq_objname, PBS_MAXSVRJOBID + 1);
      memcpy(preq_tmp->rq_user, preq->rq_user, PBS_MAXUSER + 1);
      memcpy(preq_tmp->rq_host, preq->rq_host, PBS_MAXHOSTNAME + 1);

      reply_ack(preq_tmp);
      preq->rq_noreply = TRUE; /* set for no more replies */
      }
  
    /* make a cleanup task if set */
    if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) &&
        (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0))
      {
      pwtcheck = set_task(
        WORK_Timed,
        time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long,
        ensure_deleted,
        preq);
    
      if (pwtcheck != NULL)
        append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck);
      }

    /*
     * Send signal request to MOM.  The server will automagically
     * pick up and "finish" off the client request when MOM replies.
     */

    if ((rc = issue_signal(pjob, sigt, post_delete_mom1, preq)))
      {
      /* cant send to MOM */

      req_reject(rc, 0, preq, NULL, NULL);
      }

    /* normally will ack reply when mom responds */

    sprintf(log_buffer, msg_delrunjobsig,
            sigt);

    LOG_EVENT(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    return;
    }  /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */

  /* make a cleanup task if set */
  if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) &&
      (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0))
    {
    pwtcheck = set_task(
        WORK_Timed,
        time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long,
        ensure_deleted,
        preq);
    
    if (pwtcheck != NULL)
      append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck);
    }

  /* if configured, and this job didn't have a slot limit hold, free a job
   * held with the slot limit hold */
  if ((server.sv_attr[SRV_ATR_MoabArrayCompatible].at_val.at_long != FALSE) &&
      ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE))
    {
    if ((pjob->ji_arraystruct != NULL) &&
        (pjob->ji_is_array_template == FALSE))
      {
      int        i;
      int        newstate;
      int        newsub;
      job       *tmp;
      job_array *pa = pjob->ji_arraystruct;

      for (i = 0; i < pa->ai_qs.array_size; i++)
        {
        if (pa->jobs[i] == NULL)
          continue;

        tmp = (job *)pa->jobs[i];

        if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l)
          {
          tmp->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l;
              
          if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long == 0)
            {
            tmp->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET;
            }
          
          svr_evaljobstate(tmp, &newstate, &newsub, 1);
          svr_setjobstate(tmp, newstate, newsub);
          job_save(tmp, SAVEJOB_FULL, 0);

          break;
          }
        }
      }
    } /* END MoabArrayCompatible check */

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, do end job processing */
    
    svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING);

    pjob->ji_momhandle = -1;

    /* force new connection */

    pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

    if (pwtnew)
      {
      append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
      }
    }
  else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
    {
    /* job has staged-in file, should remove them */

    remove_stagein(pjob);

    job_abt(&pjob, Msg);
    }
  else
    {
    /*
     * the job is not transitting (though it may have been) and
     * is not running, so put in into a complete state.
     */

    struct work_task *ptask;
    struct pbs_queue *pque;
    int  KeepSeconds = 0;

    svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE);

    if ((pque = pjob->ji_qhdr) && (pque != NULL))
      {
      pque->qu_numcompleted++;
      }

    KeepSeconds = attr_ifelse_long(
                    &pque->qu_attr[QE_ATR_KeepCompleted],
                    &server.sv_attr[SRV_ATR_KeepCompleted],
                    0);
    ptask = set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, pjob);

    if (ptask != NULL)
      {
      append_link(&pjob->ji_svrtask, &ptask->wt_linkobj, ptask);
      }
    }  /* END else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) */

  reply_ack(preq);

  return;
  }  /* END req_deletejob() */
예제 #7
0
void *delete_all_work(

  void *vp)

  {
  batch_request *preq = (batch_request *)vp;

  if (qdel_all_tracker.start_deleting_all_if_possible(preq->rq_user, preq->rq_perm) == false)
    {
    reply_ack(preq);
    return(NULL);
    }

  batch_request *preq_dup = duplicate_request(preq);
  job           *pjob;
  all_jobs_iterator *iter = NULL;
  int            failed_deletes = 0;
  int            total_jobs = 0;
  int            rc = PBSE_NONE;
  char           tmpLine[MAXLINE];
  char          *Msg = preq->rq_extend;
  
  alljobs.lock();
  iter = alljobs.get_iterator();
  alljobs.unlock();
  while ((pjob = next_job(&alljobs, iter)) != NULL)
    {
    // use mutex manager to make sure job mutex locks are properly handled at exit
    mutex_mgr job_mutex(pjob->ji_mutex, true);
 
    if ((rc = forced_jobpurge(pjob, preq_dup)) == PURGE_SUCCESS)
      {
      job_mutex.set_unlock_on_exit(false);

      continue;
      }

    if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
      {
      job_mutex.unlock();
      
      if(rc == -1)
        {
        //forced_jobpurge freed preq_dup so reallocate it.
        preq_dup = duplicate_request(preq);
        preq_dup->rq_noreply = TRUE;
        }
      continue;
      }
    
    total_jobs++;
    
    /* mutex is freed below */
    if (rc == PBSE_NONE)
      {
      if ((rc = execute_job_delete(pjob, Msg, preq_dup)) == PBSE_NONE)
        {
        // execute_job_delete() handles mutex so don't unlock on exit
        job_mutex.set_unlock_on_exit(false);
        reply_ack(preq_dup);
        }
       
      /* preq_dup has been freed at this point. Either reallocate it or set it to NULL*/
      if (rc == PURGE_SUCCESS)
        {
        preq_dup = duplicate_request(preq);
        preq_dup->rq_noreply = TRUE;
        }
      else
        preq_dup = NULL;
      }
    
    if (rc != PURGE_SUCCESS)
      {
      /* duplicate the preq so we don't have a problem with double frees */
      preq_dup = duplicate_request(preq);
      preq_dup->rq_noreply = TRUE;
      
      if ((rc == MOM_DELETE) ||
          (rc == ROUTE_DELETE))
        failed_deletes++;
      }
    }

  delete iter;
  
  qdel_all_tracker.done_deleting_all(preq->rq_user, preq->rq_perm);
  
  if (failed_deletes == 0)
    {
    reply_ack(preq);

    /* PURGE SUCCESS means this was qdel -p all. In this case no reply_*() 
     * functions have been called */
    if (rc == PURGE_SUCCESS)
      {
      free_br(preq_dup);
      preq_dup = NULL;
      }
    }
  else
    {
    snprintf(tmpLine,sizeof(tmpLine),"Deletes failed for %d of %d jobs",
      failed_deletes,
      total_jobs);
    
    req_reject(PBSE_SYSTEM, 0, preq, NULL, tmpLine);
    }
    
  /* preq_dup happens at the end of the loop, so free the extra one if
   * it is there */
  if (preq_dup != NULL)
    free_br(preq_dup);

  return(NULL);
  } /* END delete_all_work() */