Пример #1
0
int release_array_range(

  job_array            *pa,
  struct batch_request *preq,
  char                 *range_str)

  {
  tlist_head tl;
  int i;
  int rc;

  array_request_node *rn;
  array_request_node *to_free;
  
  char *range = strchr(range_str,'=');
  if (range == NULL)
    return(PBSE_IVALREQ);

  range++; /* move past the '=' */
  
  CLEAR_HEAD(tl);
  
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't hold the jobs if range error */
    
    return(PBSE_IVALREQ);
    }
  
  /* hold just that range from the array */
  rn = (array_request_node*)GET_NEXT(tl);
  
  while (rn != NULL)
    {
    for (i = rn->start; i <= rn->end; i++)
      {
      if (pa->jobs[i] == NULL)
        continue;
      
      /* don't stomp on other memory */
      if (i >= pa->ai_qs.array_size)
        continue;
      
      if ((rc = release_job(preq,pa->jobs[i])))
        return(rc);
      }
    
    /* release mem */
    to_free = rn;
    rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);
    free(to_free);
    }

  return(0);

  } /* END release_array_range() */
Пример #2
0
/*
 * hold_array_range()
 * 
 * holds just a specified range from an array
 * @param pa - the array to be acted on
 * @param range_str - string specifying the range 
 */
int hold_array_range(

  job_array *pa,         /* O */
  char      *range_str,  /* I */
  attribute *temphold)   /* I */

  {
  tlist_head tl;
  int i;

  array_request_node *rn;
  array_request_node *to_free;
  
  char *range = strchr(range_str,'=');
  if (range == NULL)
    return(PBSE_IVALREQ);

  range++; /* move past the '=' */
  
  CLEAR_HEAD(tl);
  
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't hold the jobs if range error */
    
    return(PBSE_IVALREQ);
    }
  else 
    {
    /* hold just that range from the array */
    rn = (array_request_node*)GET_NEXT(tl);
    
    while (rn != NULL)
      {
      for (i = rn->start; i <= rn->end; i++)
        {
        if (pa->jobs[i] == NULL)
          continue;
        
        /* don't stomp on other memory */
        if (i >= pa->ai_qs.array_size)
          continue;
        
        hold_job(temphold,pa->jobs[i]);
        }
      
      /* release mem */
      to_free = rn;
      rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);
      free(to_free);
      }
    }

  return(0);
  } /* END hold_array_range() */
Пример #3
0
int setup_array_struct(job *pjob)
  {
  job_array *pa;

  /* struct work_task *wt; */
  array_request_node *rn;
  int bad_token_count;
  int array_size;
  int rc;

  /* setup a link to this job array in the servers all_arrays list */
  pa = (job_array *)calloc(1,sizeof(job_array));

  pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION;
  
  pa->template_job = pjob;

  /*pa->ai_qs.array_size  = pjob->ji_wattr[(int)JOB_ATR_job_array_size].at_val.at_long;*/

  strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid);
  strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix);
  strncpy(pa->ai_qs.owner, pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str, PBS_MAXUSER + PBS_MAXSERVERNAME + 2);
  strncpy(pa->ai_qs.submit_host, get_variable(pjob, pbs_o_host), PBS_MAXSERVERNAME);

  pa->ai_qs.num_cloned = 0;
  CLEAR_LINK(pa->all_arrays);
  CLEAR_HEAD(pa->request_tokens);
  append_link(&svr_jobarrays, &pa->all_arrays, (void*)pa);

 if (job_save(pjob, SAVEJOB_FULL, 0) != 0)
    {
    job_purge(pjob);


    if (LOGLEVEL >= 6)
      {
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
        "cannot save job");
      }

    return 1;
    }

  if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa)))
    {
    array_delete(pa);

    snprintf(log_buffer,sizeof(log_buffer),
      "Array %s requested a slot limit above the max limit %ld, rejecting\n",
      pa->ai_qs.parent_id,
      server.sv_attr[SRV_ATR_MaxSlotLimit].at_val.at_long);
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      pa->ai_qs.parent_id,
      log_buffer);

    return(INVALID_SLOT_LIMIT);
    }

  pa->ai_qs.jobs_running = 0;
  pa->ai_qs.num_started = 0;
  pa->ai_qs.num_failed = 0;
  pa->ai_qs.num_successful = 0;
  
  bad_token_count =

    parse_array_request(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str,
                        &(pa->request_tokens));

  /* get the number of elements that should be allocated in the array */
  rn = (array_request_node *)GET_NEXT(pa->request_tokens);
  array_size = 0;
  pa->ai_qs.num_jobs = 0;
  while (rn != NULL) 
    {
    if (rn->end > array_size)
      array_size = rn->end;
    /* calculate the actual number of jobs (different from array size) */
    pa->ai_qs.num_jobs += rn->end - rn->start + 1;

    rn = (array_request_node *)GET_NEXT(rn->request_tokens_link);
    }

  /* size of array is the biggest index + 1 */
  array_size++; 

  if (server.sv_attr[SRV_ATR_MaxArraySize].at_flags & ATR_VFLAG_SET)
    {
    int max_array_size = server.sv_attr[SRV_ATR_MaxArraySize].at_val.at_long;
    if (max_array_size < pa->ai_qs.num_jobs)
      {
      array_delete(pa);

      return(ARRAY_TOO_LARGE);
      }
    }

  /* initialize the array */
  pa->jobs = malloc(array_size * sizeof(job *));
  memset(pa->jobs,0,array_size * sizeof(job *));

  /* remember array_size */
  pa->ai_qs.array_size = array_size;

  CLEAR_HEAD(pa->ai_qs.deps);

  array_save(pa);

  if (bad_token_count > 0)
    {
    array_delete(pa);
    return 2;
    }

  return 0;

  }
Пример #4
0
int modify_array_range(

  job_array *pa,              /* I/O */
  char      *range,           /* I */
  svrattrl  *plist,           /* I */
  struct batch_request *preq, /* I */
  int        checkpoint_req)  /* I */

  {
  char id[] = "modify_array_range";
  tlist_head tl;
  int i, rc;
  int mom_relay = 0;

  array_request_node *rn;
  array_request_node *to_free;
  
  CLEAR_HEAD(tl);
  
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't hold the jobs if range error */
    
    return(FAILURE);
    }
  else 
    {
    /* hold just that range from the array */
    rn = (array_request_node*)GET_NEXT(tl);
    
    while (rn != NULL)
      {
      for (i = rn->start; i <= rn->end; i++)
        {
        if ((i >= pa->ai_qs.array_size) ||
            (pa->jobs[i] == NULL))
          continue;
        
        rc = modify_job(pa->jobs[i],plist,preq,checkpoint_req, NO_MOM_RELAY);

        if (rc == PBSE_RELAYED_TO_MOM)
          {
          struct batch_request *array_req = NULL;
          
          /* We told modify_job not to call relay_to_mom so we need to contact the mom */
          rc = copy_batchrequest(&array_req, preq, 0, i);
          if (rc != 0)
            {
            return(rc);
            }
          
          preq->rq_refcount++;
          if (mom_relay == 0)
            {
            preq->rq_refcount++;
            }
          mom_relay++;
          if ((rc = relay_to_mom(
                      pa->jobs[i],
                      array_req,
                      post_modify_arrayreq)))
            {  
            snprintf(log_buffer,sizeof(log_buffer),
              "Unable to relay information to mom for job '%s'\n",
              pa->jobs[i]->ji_qs.ji_jobid);
            log_err(rc,id,log_buffer);
          
            return(rc); /* unable to get to MOM */
            }
        
          }  
        }
      
      /* release mem */
      to_free = rn;
      rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);
      free(to_free);
      }
    }

  if (mom_relay)
    {
    preq->rq_refcount--;
    if (preq->rq_refcount == 0)
      {
      free_br(preq);
      }
    return(PBSE_RELAYED_TO_MOM);
    }

  return(PBSE_NONE);
  } /* END modify_array_range() */
Пример #5
0
/*
 * delete_array_range()
 *
 * deletes a range from a specific array
 *
 * @param pa - the array whose jobs are deleted
 * @param range_str - the user-given range to delete 
 * @return - the number of jobs skipped, -1 if range error 
 */
int delete_array_range(

  job_array *pa,
  char      *range_str)

  {
  tlist_head tl;
  array_request_node *rn;
  array_request_node *to_free;
  job *pjob;
  char *range;

  int i;
  int num_skipped = 0;

  /* get just the numeric range specified, '=' should
   * always be there since we put it there in qdel */
  range = strchr(range_str,'=');
  range++; /* move past the '=' */

  CLEAR_HEAD(tl);
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't delete jobs if range error */

    return(-1);
    }

  rn = (array_request_node*)GET_NEXT(tl);

  while (rn != NULL)
    {
    for (i = rn->start; i <= rn->end; i++)
      {
      if (pa->jobs[i] == NULL)
        continue;

      /* don't stomp on other memory */
      if (i >= pa->ai_qs.array_size)
        continue;

      pjob = pa->jobs[i];

      if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
        {
        /* invalid state for request,  skip */
        continue;
        }

      if (attempt_delete((void *)pjob) == FALSE)
        num_skipped++;
      }

    to_free = rn;
    rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);

    /* release mem */
    free(to_free);
    }

  return(num_skipped);
  }
Пример #6
0
int setup_array_struct(
    
  job *pjob)

  {
  job_array          *pa;
  array_request_node *rn;

  int                 bad_token_count;
  int                 array_size;
  int                 rc;
  char                log_buf[LOCAL_LOG_BUF_SIZE];
  long                max_array_size;

    pa = (job_array *)calloc(1,sizeof(job_array));

  pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION;
  
  strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid);
  strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix);
  snprintf(pa->ai_qs.owner, sizeof(pa->ai_qs.owner), "%s", pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str);
  snprintf(pa->ai_qs.submit_host, sizeof(pa->ai_qs.submit_host), "%s", get_variable(pjob, pbs_o_host));

  pa->ai_qs.num_cloned = 0;
  CLEAR_HEAD(pa->request_tokens);

  pa->ai_mutex = calloc(1, sizeof(pthread_mutex_t));
  pthread_mutex_init(pa->ai_mutex,NULL);
  lock_ai_mutex(pa, __func__, NULL, LOGLEVEL);

  if (job_save(pjob, SAVEJOB_FULL, 0) != 0)
    {
    /* the array is deleted in svr_job_purge */
    unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
    svr_job_purge(pjob);
    /* Does job array need to be removed? */

    if (LOGLEVEL >= 6)
      {
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL",
        "cannot save job");
      }

    return(1);
    }

  if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa)))
    {
    long max_limit = 0;
    get_svr_attr_l(SRV_ATR_MaxSlotLimit, &max_limit);
    array_delete(pa);

    snprintf(log_buf,sizeof(log_buf),
      "Array %s requested a slot limit above the max limit %ld, rejecting\n",
      pa->ai_qs.parent_id,
      max_limit);

    log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,pa->ai_qs.parent_id,log_buf);

    return(INVALID_SLOT_LIMIT);
    }

  pa->ai_qs.jobs_running = 0;
  pa->ai_qs.num_started = 0;
  pa->ai_qs.num_failed = 0;
  pa->ai_qs.num_successful = 0;
  
  bad_token_count = parse_array_request(
                      pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str,
                      &(pa->request_tokens));

  /* get the number of elements that should be allocated in the array */
  rn = (array_request_node *)GET_NEXT(pa->request_tokens);
  array_size = 0;
  pa->ai_qs.num_jobs = 0;
  while (rn != NULL) 
    {
    if (rn->end > array_size)
      array_size = rn->end;
    /* calculate the actual number of jobs (different from array size) */
    pa->ai_qs.num_jobs += rn->end - rn->start + 1;

    rn = (array_request_node *)GET_NEXT(rn->request_tokens_link);
    }

  /* size of array is the biggest index + 1 */
  array_size++; 

  if (get_svr_attr_l(SRV_ATR_MaxArraySize, &max_array_size) == PBSE_NONE)
    {
    if (max_array_size < pa->ai_qs.num_jobs)
      {
      array_delete(pa);

      return(ARRAY_TOO_LARGE);
      }
    }

  /* initialize the array */
  pa->job_ids = calloc(array_size, sizeof(char *));
  if (pa->job_ids == NULL)
    {
    sprintf(log_buf, "Failed to alloc job_ids: job %s", pjob->ji_qs.ji_jobid);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    return(PBSE_MEM_MALLOC);
    }


  /* remember array_size */
  pa->ai_qs.array_size = array_size;

  CLEAR_HEAD(pa->ai_qs.deps);

  array_save(pa);

  if (bad_token_count > 0)
    {
    array_delete(pa);
    return 2;
    }

  pjob->ji_arraystruct = pa;

  insert_array(pa);

  unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

  return(PBSE_NONE);
  } /* END setup_array_struct() */
Пример #7
0
int modify_array_range(

  job_array *pa,              /* I/O */
  char      *range,           /* I */
  svrattrl  *plist,           /* I */
  struct batch_request *preq, /* I */
  int        checkpoint_req)  /* I */

  {
  char                log_buf[LOCAL_LOG_BUF_SIZE];
  tlist_head          tl;
  int                 i;
  int                 rc;
  int                 mom_relay = 0;
  job                *pjob;

  array_request_node *rn;
  array_request_node *to_free;
  
  CLEAR_HEAD(tl);
  
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't hold the jobs if range error */
    
    return(FAILURE);
    }
  else 
    {
    /* hold just that range from the array */
    rn = (array_request_node*)GET_NEXT(tl);
    
    while (rn != NULL)
      {
      for (i = rn->start; i <= rn->end; i++)
        {
        if ((i >= pa->ai_qs.array_size) ||
            (pa->job_ids[i] == NULL))
          continue;

        if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL)
          {
          free(pa->job_ids[i]);
          pa->job_ids[i] = NULL;
          }
        else
          {
          pthread_mutex_unlock(pa->ai_mutex);
          rc = modify_job((void **)&pjob, plist, preq, checkpoint_req, NO_MOM_RELAY);
          pa = get_jobs_array(&pjob);
          
          if (pjob != NULL)
            {
            if (rc == PBSE_RELAYED_TO_MOM)
              {
              struct batch_request *array_req = NULL;
              
              /* We told modify_job not to call relay_to_mom so we need to contact the mom */
              if ((rc = copy_batchrequest(&array_req, preq, 0, i)) != PBSE_NONE)
                {
                return(rc);
                }
              
              preq->rq_refcount++;
              if (mom_relay == 0)
                {
                preq->rq_refcount++;
                }
              mom_relay++;
              
              /* The array_req is freed in relay_to_mom (failure)
               * or in issue_Drequest (success) */
              
              if ((rc = relay_to_mom(&pjob, array_req, NULL)))
                {
                snprintf(log_buf,sizeof(log_buf),
                  "Unable to relay information to mom for job '%s'\n",
                  pjob->ji_qs.ji_jobid);
                log_err(rc, __func__, log_buf);
                
                unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
                
                return(rc); /* unable to get to MOM */
                }
              else
                {
                unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
                post_modify_arrayreq(array_req);
                }
              }
            else
              unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
            }
          else
            pa->job_ids[i] = NULL;

          }
        }
      
      /* release mem */
      to_free = rn;
      rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);
      free(to_free);
      }
    }

  if (mom_relay)
    {
    preq->rq_refcount--;
    if (preq->rq_refcount == 0)
      {
      free_br(preq);
      }
    return(PBSE_RELAYED_TO_MOM);
    }

  return(PBSE_NONE);
  } /* END modify_array_range() */
Пример #8
0
int release_array_range(

  job_array            *pa,
  struct batch_request *preq,
  char                 *range_str)

  {
  tlist_head tl;
  int i;
  int rc;
  job                *pjob;

  array_request_node *rn;
  array_request_node *to_free;
  
  char *range = strchr(range_str,'=');
  if (range == NULL)
    return(PBSE_IVALREQ);

  range++; /* move past the '=' */
  
  CLEAR_HEAD(tl);
  
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't hold the jobs if range error */
    
    return(PBSE_IVALREQ);
    }
  
  /* hold just that range from the array */
  rn = (array_request_node*)GET_NEXT(tl);
  
  while (rn != NULL)
    {
    for (i = rn->start; i <= rn->end; i++)
      {
      /* don't stomp on other memory */
      if (i >= pa->ai_qs.array_size)
        continue;

      if (pa->job_ids[i] == NULL)
        continue;

      if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL)
        {
        free(pa->job_ids[i]);
        pa->job_ids[i] = NULL;
        }
      else
        {
        if ((rc = release_job(preq,pjob)))
          {
          unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
          return(rc);
          }
        unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
        }
      }
    
    /* release mem */
    to_free = rn;
    rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);
    free(to_free);
    }

  return(PBSE_NONE);
  } /* END release_array_range() */
Пример #9
0
/*
 * delete_array_range()
 *
 * deletes a range from a specific array
 *
 * @param pa - the array whose jobs are deleted
 * @param range_str - the user-given range to delete 
 * @return - the number of jobs skipped, -1 if range error 
 */
int delete_array_range(

  job_array *pa,
  char      *range_str)

  {
  tlist_head          tl;
  array_request_node *rn;
  array_request_node *to_free;
  job                *pjob;
  char               *range;

  int                 i;
  int                 num_skipped = 0;
  int                 deleted;

  /* get just the numeric range specified, '=' should
   * always be there since we put it there in qdel */
  range = strchr(range_str,'=');
  range++; /* move past the '=' */

  CLEAR_HEAD(tl);
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't delete jobs if range error */

    return(-1);
    }

  rn = (array_request_node*)GET_NEXT(tl);

  while (rn != NULL)
    {
    for (i = rn->start; i <= rn->end; i++)
      {
      if (pa->job_ids[i] == NULL)
        continue;

      /* don't stomp on other memory */
      if (i >= pa->ai_qs.array_size)
        continue;

      if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL)
        {
        free(pa->job_ids[i]);
        pa->job_ids[i] = NULL;
        }
      else
        {
        if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
          {
          /* invalid state for request,  skip */
          unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
          continue;
          }

        pthread_mutex_unlock(pa->ai_mutex);
        deleted = attempt_delete(pjob);

        if (deleted == FALSE)
          {
          /* if the job was deleted, this mutex would be taked care of elsewhere. When it fails,
           * release it here */
          unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

          num_skipped++;
          }

        pthread_mutex_lock(pa->ai_mutex);
        }
      }

    to_free = rn;
    rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);

    /* release mem */
    free(to_free);
    }

  return(num_skipped);
  }
Пример #10
0
/*
 * delete_array_range()
 *
 * deletes a range from a specific array
 *
 * @param pa - the array whose jobs are deleted
 * @param range_str - the user-given range to delete 
 * @return - the number of jobs skipped, -1 if range error 
 */
int delete_array_range(

  job_array *pa,
  char      *range_str)

  {
  tlist_head          tl;
  array_request_node *rn;
  array_request_node *to_free;
  job                *pjob;
  char               *range;

  int                 i;
  int                 num_skipped = 0;
  int                 num_deleted = 0;
  int                 deleted;
  int                 running;

  /* get just the numeric range specified, '=' should
   * always be there since we put it there in qdel */
  if((range = strchr(range_str,'=')) == NULL)
    return(-1);
  range++; /* move past the '=' */

  CLEAR_HEAD(tl);
  if (parse_array_request(range,&tl) > 0)
    {
    /* don't delete jobs if range error */

    return(-1);
    }

  rn = (array_request_node*)GET_NEXT(tl);

  while (rn != NULL)
    {
    for (i = rn->start; i <= rn->end; i++)
      {
      if (pa->job_ids[i] == NULL)
        continue;

      /* don't stomp on other memory */
      if (i >= pa->ai_qs.array_size)
        continue;

      if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL)
        {
        free(pa->job_ids[i]);
        pa->job_ids[i] = NULL;
        }
      else
        {
        mutex_mgr pjob_mutex = mutex_mgr(pjob->ji_mutex, true);
        if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
          {
          /* invalid state for request,  skip */
          continue;
          }

        running = (pjob->ji_qs.ji_state == JOB_STATE_RUNNING);

        pthread_mutex_unlock(pa->ai_mutex);
        deleted = attempt_delete(pjob);
        /* we come out of attempt_delete unlocked */
        pjob_mutex.set_lock_on_exit(false);


        if (deleted == FALSE)
          {
          /* if the job was deleted, this mutex would be taked care of elsewhere. When it fails,
           * release it here */
          num_skipped++;
          }
        else if (running == FALSE)
          {
          /* running jobs will increase the deleted count when their obit is reported */
          num_deleted++;
          }

        pthread_mutex_lock(pa->ai_mutex);
        }
      }

    to_free = rn;
    rn = (array_request_node*)GET_NEXT(rn->request_tokens_link);

    /* release mem */
    free(to_free);
    }

  pa->ai_qs.num_failed += num_deleted;

  return(num_skipped);
  } /* END delete_array_range() */