int release_array_range( job_array *pa, struct batch_request *preq, char *range_str) { tlist_head tl; int i; int rc; array_request_node *rn; array_request_node *to_free; char *range = strchr(range_str,'='); if (range == NULL) return(PBSE_IVALREQ); range++; /* move past the '=' */ CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't hold the jobs if range error */ return(PBSE_IVALREQ); } /* hold just that range from the array */ rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if (pa->jobs[i] == NULL) continue; /* don't stomp on other memory */ if (i >= pa->ai_qs.array_size) continue; if ((rc = release_job(preq,pa->jobs[i]))) return(rc); } /* release mem */ to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); free(to_free); } return(0); } /* END release_array_range() */
/* * hold_array_range() * * holds just a specified range from an array * @param pa - the array to be acted on * @param range_str - string specifying the range */ int hold_array_range( job_array *pa, /* O */ char *range_str, /* I */ attribute *temphold) /* I */ { tlist_head tl; int i; array_request_node *rn; array_request_node *to_free; char *range = strchr(range_str,'='); if (range == NULL) return(PBSE_IVALREQ); range++; /* move past the '=' */ CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't hold the jobs if range error */ return(PBSE_IVALREQ); } else { /* hold just that range from the array */ rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if (pa->jobs[i] == NULL) continue; /* don't stomp on other memory */ if (i >= pa->ai_qs.array_size) continue; hold_job(temphold,pa->jobs[i]); } /* release mem */ to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); free(to_free); } } return(0); } /* END hold_array_range() */
int setup_array_struct(job *pjob) { job_array *pa; /* struct work_task *wt; */ array_request_node *rn; int bad_token_count; int array_size; int rc; /* setup a link to this job array in the servers all_arrays list */ pa = (job_array *)calloc(1,sizeof(job_array)); pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION; pa->template_job = pjob; /*pa->ai_qs.array_size = pjob->ji_wattr[(int)JOB_ATR_job_array_size].at_val.at_long;*/ strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid); strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix); strncpy(pa->ai_qs.owner, pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str, PBS_MAXUSER + PBS_MAXSERVERNAME + 2); strncpy(pa->ai_qs.submit_host, get_variable(pjob, pbs_o_host), PBS_MAXSERVERNAME); pa->ai_qs.num_cloned = 0; CLEAR_LINK(pa->all_arrays); CLEAR_HEAD(pa->request_tokens); append_link(&svr_jobarrays, &pa->all_arrays, (void*)pa); if (job_save(pjob, SAVEJOB_FULL, 0) != 0) { job_purge(pjob); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL", "cannot save job"); } return 1; } if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa))) { array_delete(pa); snprintf(log_buffer,sizeof(log_buffer), "Array %s requested a slot limit above the max limit %ld, rejecting\n", pa->ai_qs.parent_id, server.sv_attr[SRV_ATR_MaxSlotLimit].at_val.at_long); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buffer); return(INVALID_SLOT_LIMIT); } pa->ai_qs.jobs_running = 0; pa->ai_qs.num_started = 0; pa->ai_qs.num_failed = 0; pa->ai_qs.num_successful = 0; bad_token_count = parse_array_request(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, &(pa->request_tokens)); /* get the number of elements that should be allocated in the array */ rn = (array_request_node *)GET_NEXT(pa->request_tokens); array_size = 0; pa->ai_qs.num_jobs = 0; while (rn != NULL) { if (rn->end > array_size) array_size = rn->end; /* calculate the actual number of jobs (different from array size) */ pa->ai_qs.num_jobs += rn->end - rn->start + 1; rn = (array_request_node *)GET_NEXT(rn->request_tokens_link); } /* size of array is the biggest index + 1 */ array_size++; if (server.sv_attr[SRV_ATR_MaxArraySize].at_flags & ATR_VFLAG_SET) { int max_array_size = server.sv_attr[SRV_ATR_MaxArraySize].at_val.at_long; if (max_array_size < pa->ai_qs.num_jobs) { array_delete(pa); return(ARRAY_TOO_LARGE); } } /* initialize the array */ pa->jobs = malloc(array_size * sizeof(job *)); memset(pa->jobs,0,array_size * sizeof(job *)); /* remember array_size */ pa->ai_qs.array_size = array_size; CLEAR_HEAD(pa->ai_qs.deps); array_save(pa); if (bad_token_count > 0) { array_delete(pa); return 2; } return 0; }
int modify_array_range( job_array *pa, /* I/O */ char *range, /* I */ svrattrl *plist, /* I */ struct batch_request *preq, /* I */ int checkpoint_req) /* I */ { char id[] = "modify_array_range"; tlist_head tl; int i, rc; int mom_relay = 0; array_request_node *rn; array_request_node *to_free; CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't hold the jobs if range error */ return(FAILURE); } else { /* hold just that range from the array */ rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if ((i >= pa->ai_qs.array_size) || (pa->jobs[i] == NULL)) continue; rc = modify_job(pa->jobs[i],plist,preq,checkpoint_req, NO_MOM_RELAY); if (rc == PBSE_RELAYED_TO_MOM) { struct batch_request *array_req = NULL; /* We told modify_job not to call relay_to_mom so we need to contact the mom */ rc = copy_batchrequest(&array_req, preq, 0, i); if (rc != 0) { return(rc); } preq->rq_refcount++; if (mom_relay == 0) { preq->rq_refcount++; } mom_relay++; if ((rc = relay_to_mom( pa->jobs[i], array_req, post_modify_arrayreq))) { snprintf(log_buffer,sizeof(log_buffer), "Unable to relay information to mom for job '%s'\n", pa->jobs[i]->ji_qs.ji_jobid); log_err(rc,id,log_buffer); return(rc); /* unable to get to MOM */ } } } /* release mem */ to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); free(to_free); } } if (mom_relay) { preq->rq_refcount--; if (preq->rq_refcount == 0) { free_br(preq); } return(PBSE_RELAYED_TO_MOM); } return(PBSE_NONE); } /* END modify_array_range() */
/* * delete_array_range() * * deletes a range from a specific array * * @param pa - the array whose jobs are deleted * @param range_str - the user-given range to delete * @return - the number of jobs skipped, -1 if range error */ int delete_array_range( job_array *pa, char *range_str) { tlist_head tl; array_request_node *rn; array_request_node *to_free; job *pjob; char *range; int i; int num_skipped = 0; /* get just the numeric range specified, '=' should * always be there since we put it there in qdel */ range = strchr(range_str,'='); range++; /* move past the '=' */ CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't delete jobs if range error */ return(-1); } rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if (pa->jobs[i] == NULL) continue; /* don't stomp on other memory */ if (i >= pa->ai_qs.array_size) continue; pjob = pa->jobs[i]; if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) { /* invalid state for request, skip */ continue; } if (attempt_delete((void *)pjob) == FALSE) num_skipped++; } to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); /* release mem */ free(to_free); } return(num_skipped); }
int setup_array_struct( job *pjob) { job_array *pa; array_request_node *rn; int bad_token_count; int array_size; int rc; char log_buf[LOCAL_LOG_BUF_SIZE]; long max_array_size; pa = (job_array *)calloc(1,sizeof(job_array)); pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION; strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid); strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix); snprintf(pa->ai_qs.owner, sizeof(pa->ai_qs.owner), "%s", pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str); snprintf(pa->ai_qs.submit_host, sizeof(pa->ai_qs.submit_host), "%s", get_variable(pjob, pbs_o_host)); pa->ai_qs.num_cloned = 0; CLEAR_HEAD(pa->request_tokens); pa->ai_mutex = calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(pa->ai_mutex,NULL); lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); if (job_save(pjob, SAVEJOB_FULL, 0) != 0) { /* the array is deleted in svr_job_purge */ unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); svr_job_purge(pjob); /* Does job array need to be removed? */ if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL", "cannot save job"); } return(1); } if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa))) { long max_limit = 0; get_svr_attr_l(SRV_ATR_MaxSlotLimit, &max_limit); array_delete(pa); snprintf(log_buf,sizeof(log_buf), "Array %s requested a slot limit above the max limit %ld, rejecting\n", pa->ai_qs.parent_id, max_limit); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,pa->ai_qs.parent_id,log_buf); return(INVALID_SLOT_LIMIT); } pa->ai_qs.jobs_running = 0; pa->ai_qs.num_started = 0; pa->ai_qs.num_failed = 0; pa->ai_qs.num_successful = 0; bad_token_count = parse_array_request( pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, &(pa->request_tokens)); /* get the number of elements that should be allocated in the array */ rn = (array_request_node *)GET_NEXT(pa->request_tokens); array_size = 0; pa->ai_qs.num_jobs = 0; while (rn != NULL) { if (rn->end > array_size) array_size = rn->end; /* calculate the actual number of jobs (different from array size) */ pa->ai_qs.num_jobs += rn->end - rn->start + 1; rn = (array_request_node *)GET_NEXT(rn->request_tokens_link); } /* size of array is the biggest index + 1 */ array_size++; if (get_svr_attr_l(SRV_ATR_MaxArraySize, &max_array_size) == PBSE_NONE) { if (max_array_size < pa->ai_qs.num_jobs) { array_delete(pa); return(ARRAY_TOO_LARGE); } } /* initialize the array */ pa->job_ids = calloc(array_size, sizeof(char *)); if (pa->job_ids == NULL) { sprintf(log_buf, "Failed to alloc job_ids: job %s", pjob->ji_qs.ji_jobid); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); return(PBSE_MEM_MALLOC); } /* remember array_size */ pa->ai_qs.array_size = array_size; CLEAR_HEAD(pa->ai_qs.deps); array_save(pa); if (bad_token_count > 0) { array_delete(pa); return 2; } pjob->ji_arraystruct = pa; insert_array(pa); unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); return(PBSE_NONE); } /* END setup_array_struct() */
int modify_array_range( job_array *pa, /* I/O */ char *range, /* I */ svrattrl *plist, /* I */ struct batch_request *preq, /* I */ int checkpoint_req) /* I */ { char log_buf[LOCAL_LOG_BUF_SIZE]; tlist_head tl; int i; int rc; int mom_relay = 0; job *pjob; array_request_node *rn; array_request_node *to_free; CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't hold the jobs if range error */ return(FAILURE); } else { /* hold just that range from the array */ rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if ((i >= pa->ai_qs.array_size) || (pa->job_ids[i] == NULL)) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { pthread_mutex_unlock(pa->ai_mutex); rc = modify_job((void **)&pjob, plist, preq, checkpoint_req, NO_MOM_RELAY); pa = get_jobs_array(&pjob); if (pjob != NULL) { if (rc == PBSE_RELAYED_TO_MOM) { struct batch_request *array_req = NULL; /* We told modify_job not to call relay_to_mom so we need to contact the mom */ if ((rc = copy_batchrequest(&array_req, preq, 0, i)) != PBSE_NONE) { return(rc); } preq->rq_refcount++; if (mom_relay == 0) { preq->rq_refcount++; } mom_relay++; /* The array_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ if ((rc = relay_to_mom(&pjob, array_req, NULL))) { snprintf(log_buf,sizeof(log_buf), "Unable to relay information to mom for job '%s'\n", pjob->ji_qs.ji_jobid); log_err(rc, __func__, log_buf); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); return(rc); /* unable to get to MOM */ } else { unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); post_modify_arrayreq(array_req); } } else unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); } else pa->job_ids[i] = NULL; } } /* release mem */ to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); free(to_free); } } if (mom_relay) { preq->rq_refcount--; if (preq->rq_refcount == 0) { free_br(preq); } return(PBSE_RELAYED_TO_MOM); } return(PBSE_NONE); } /* END modify_array_range() */
int release_array_range( job_array *pa, struct batch_request *preq, char *range_str) { tlist_head tl; int i; int rc; job *pjob; array_request_node *rn; array_request_node *to_free; char *range = strchr(range_str,'='); if (range == NULL) return(PBSE_IVALREQ); range++; /* move past the '=' */ CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't hold the jobs if range error */ return(PBSE_IVALREQ); } /* hold just that range from the array */ rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { /* don't stomp on other memory */ if (i >= pa->ai_qs.array_size) continue; if (pa->job_ids[i] == NULL) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { if ((rc = release_job(preq,pjob))) { unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); return(rc); } unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); } } /* release mem */ to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); free(to_free); } return(PBSE_NONE); } /* END release_array_range() */
/* * delete_array_range() * * deletes a range from a specific array * * @param pa - the array whose jobs are deleted * @param range_str - the user-given range to delete * @return - the number of jobs skipped, -1 if range error */ int delete_array_range( job_array *pa, char *range_str) { tlist_head tl; array_request_node *rn; array_request_node *to_free; job *pjob; char *range; int i; int num_skipped = 0; int deleted; /* get just the numeric range specified, '=' should * always be there since we put it there in qdel */ range = strchr(range_str,'='); range++; /* move past the '=' */ CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't delete jobs if range error */ return(-1); } rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if (pa->job_ids[i] == NULL) continue; /* don't stomp on other memory */ if (i >= pa->ai_qs.array_size) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) { /* invalid state for request, skip */ unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); continue; } pthread_mutex_unlock(pa->ai_mutex); deleted = attempt_delete(pjob); if (deleted == FALSE) { /* if the job was deleted, this mutex would be taked care of elsewhere. When it fails, * release it here */ unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); num_skipped++; } pthread_mutex_lock(pa->ai_mutex); } } to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); /* release mem */ free(to_free); } return(num_skipped); }
/* * delete_array_range() * * deletes a range from a specific array * * @param pa - the array whose jobs are deleted * @param range_str - the user-given range to delete * @return - the number of jobs skipped, -1 if range error */ int delete_array_range( job_array *pa, char *range_str) { tlist_head tl; array_request_node *rn; array_request_node *to_free; job *pjob; char *range; int i; int num_skipped = 0; int num_deleted = 0; int deleted; int running; /* get just the numeric range specified, '=' should * always be there since we put it there in qdel */ if((range = strchr(range_str,'=')) == NULL) return(-1); range++; /* move past the '=' */ CLEAR_HEAD(tl); if (parse_array_request(range,&tl) > 0) { /* don't delete jobs if range error */ return(-1); } rn = (array_request_node*)GET_NEXT(tl); while (rn != NULL) { for (i = rn->start; i <= rn->end; i++) { if (pa->job_ids[i] == NULL) continue; /* don't stomp on other memory */ if (i >= pa->ai_qs.array_size) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { mutex_mgr pjob_mutex = mutex_mgr(pjob->ji_mutex, true); if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) { /* invalid state for request, skip */ continue; } running = (pjob->ji_qs.ji_state == JOB_STATE_RUNNING); pthread_mutex_unlock(pa->ai_mutex); deleted = attempt_delete(pjob); /* we come out of attempt_delete unlocked */ pjob_mutex.set_lock_on_exit(false); if (deleted == FALSE) { /* if the job was deleted, this mutex would be taked care of elsewhere. When it fails, * release it here */ num_skipped++; } else if (running == FALSE) { /* running jobs will increase the deleted count when their obit is reported */ num_deleted++; } pthread_mutex_lock(pa->ai_mutex); } } to_free = rn; rn = (array_request_node*)GET_NEXT(rn->request_tokens_link); /* release mem */ free(to_free); } pa->ai_qs.num_failed += num_deleted; return(num_skipped); } /* END delete_array_range() */