int remove_array( job_array *pa) { char log_buf[LOCAL_LOG_BUF_SIZE]; int rc; if (pthread_mutex_trylock(allarrays.allarrays_mutex)) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); pthread_mutex_lock(allarrays.allarrays_mutex); if(LOGLEVEL >= 7) { sprintf(log_buf, "%s: unlocked allarrays_mutex", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); } rc = remove_thing(allarrays.ra,pa); pthread_mutex_unlock(allarrays.allarrays_mutex); return(rc); } /* END remove_array() */
bool in_execution_queue( job *pjob, job_array *pa) { if (pjob == NULL) return(false); // unlock the job array if we're holding one - lock the queue before the array if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); pbs_queue *pque = get_jobs_queue(&pjob); if (pa != NULL) lock_ai_mutex(pa, __func__, "2", LOGLEVEL); if ((pjob == NULL) || (pque == NULL)) return(false); mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true); return(pque->qu_qs.qu_type == QTYPE_Execution); } // END in_execution_queue()
int remove_array( job_array *pa) { int rc; char arrayid[PBS_MAXSVRJOBID+1]; if (pthread_mutex_trylock(allarrays.allarrays_mutex)) { strcpy(arrayid, pa->ai_qs.parent_id); unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); pthread_mutex_lock(allarrays.allarrays_mutex); pa = (job_array *)get_from_hash_map(allarrays.hm, arrayid); if (pa != NULL) lock_ai_mutex(pa, __func__, "2", LOGLEVEL); } if (pa == NULL) rc = PBSE_NONE; else rc = remove_from_hash_map(allarrays.hm, pa->ai_qs.parent_id); pthread_mutex_unlock(allarrays.allarrays_mutex); return(rc); } /* END remove_array() */
job_array *next_array_check( int *iter, job_array *owned) { job_array *pa = NULL; pthread_mutex_lock(allarrays.allarrays_mutex); pa = (job_array *)next_thing(allarrays.ra,iter); pthread_mutex_unlock(allarrays.allarrays_mutex); if ((pa != NULL) && (pa != owned)) lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); return(pa); } /* END next_array_check() */
job_array *next_array( int *iter) { job_array *pa = NULL; pthread_mutex_lock(allarrays.allarrays_mutex); pa = (job_array *)next_from_hash_map(allarrays.hm, iter); if (pa != NULL) lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); pthread_mutex_unlock(allarrays.allarrays_mutex); return(pa); } /* END next_array() */
/* return a server's array info struct corresponding to an array id */ job_array *get_array( char *id) { job_array *pa; pthread_mutex_lock(allarrays.allarrays_mutex); pa = (job_array *)get_from_hash_map(allarrays.hm, id); if (pa != NULL) lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); pthread_mutex_unlock(allarrays.allarrays_mutex); return(pa); } /* END get_array() */
/* return a server's array info struct corresponding to an array id */ job_array *get_array( char *id) { job_array *pa; int iter = -1; while ((pa = next_array(&iter)) != NULL) { lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); if (strcmp(pa->ai_qs.parent_id, id) == 0) { snprintf(pa->ai_qs.parent_id, sizeof(pa->ai_qs.parent_id), "%s", id); return(pa); } unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } return(NULL); } /* END get_array() */
int is_array( char *id) { job_array *pa; int iter = -1; char *bracket_ptr; char *end_bracket_ptr; char jobid[PBS_MAXSVRJOBID]; char temp_jobid[PBS_MAXSVRJOBID]; snprintf(jobid, sizeof(jobid), "%s", id); /* Check to see if we have an array dependency */ /* If there is an array dependency count then we will */ /* have an id of something like arrayid[][1]. We need to take */ /* off the [1] so we can compare the array id with and existing */ /* array entry. */ if ((bracket_ptr = strchr(jobid,'[')) != NULL) { /* Make sure the next character is ']' */ if (*(++bracket_ptr) != ']') { /* If we do not have a ']' then we have bad syntax. */ return(FALSE); } if (*(++bracket_ptr) == '[') { /* we made it to here. That means we have a count inside brackets. Just truncate them for the name comparison */ end_bracket_ptr = strchr(bracket_ptr, ']'); if (end_bracket_ptr == NULL) { /* we do not have a ']' then we have bad syntax. */ return(FALSE); } /* advance end_bracket_ptr one. We should be either NULL or '.' */ end_bracket_ptr++; /* truncate the string */ *bracket_ptr = 0; /* this makes jobid just the arrayid name */ /* append the rest of the job id */ snprintf(temp_jobid, sizeof(jobid), "%s%s", jobid, end_bracket_ptr); snprintf(jobid, sizeof(jobid), "%s", temp_jobid); } } else { /* No '[' then we do not have an array */ return (FALSE); } while ((pa = next_array(&iter)) != NULL) { lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); if (strcmp(pa->ai_qs.parent_id, jobid) == 0) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); return(TRUE); } unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } return(FALSE); } /* END is_array() */
int setup_array_struct( job *pjob) { job_array *pa; array_request_node *rn; int bad_token_count; int array_size; int rc; char log_buf[LOCAL_LOG_BUF_SIZE]; long max_array_size; pa = (job_array *)calloc(1,sizeof(job_array)); pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION; strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid); strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix); snprintf(pa->ai_qs.owner, sizeof(pa->ai_qs.owner), "%s", pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str); snprintf(pa->ai_qs.submit_host, sizeof(pa->ai_qs.submit_host), "%s", get_variable(pjob, pbs_o_host)); pa->ai_qs.num_cloned = 0; CLEAR_HEAD(pa->request_tokens); pa->ai_mutex = calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(pa->ai_mutex,NULL); lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); if (job_save(pjob, SAVEJOB_FULL, 0) != 0) { /* the array is deleted in svr_job_purge */ unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); svr_job_purge(pjob); /* Does job array need to be removed? */ if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL", "cannot save job"); } return(1); } if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa))) { long max_limit = 0; get_svr_attr_l(SRV_ATR_MaxSlotLimit, &max_limit); array_delete(pa); snprintf(log_buf,sizeof(log_buf), "Array %s requested a slot limit above the max limit %ld, rejecting\n", pa->ai_qs.parent_id, max_limit); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,pa->ai_qs.parent_id,log_buf); return(INVALID_SLOT_LIMIT); } pa->ai_qs.jobs_running = 0; pa->ai_qs.num_started = 0; pa->ai_qs.num_failed = 0; pa->ai_qs.num_successful = 0; bad_token_count = parse_array_request( pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, &(pa->request_tokens)); /* get the number of elements that should be allocated in the array */ rn = (array_request_node *)GET_NEXT(pa->request_tokens); array_size = 0; pa->ai_qs.num_jobs = 0; while (rn != NULL) { if (rn->end > array_size) array_size = rn->end; /* calculate the actual number of jobs (different from array size) */ pa->ai_qs.num_jobs += rn->end - rn->start + 1; rn = (array_request_node *)GET_NEXT(rn->request_tokens_link); } /* size of array is the biggest index + 1 */ array_size++; if (get_svr_attr_l(SRV_ATR_MaxArraySize, &max_array_size) == PBSE_NONE) { if (max_array_size < pa->ai_qs.num_jobs) { array_delete(pa); return(ARRAY_TOO_LARGE); } } /* initialize the array */ pa->job_ids = calloc(array_size, sizeof(char *)); if (pa->job_ids == NULL) { sprintf(log_buf, "Failed to alloc job_ids: job %s", pjob->ji_qs.ji_jobid); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); return(PBSE_MEM_MALLOC); } /* remember array_size */ pa->ai_qs.array_size = array_size; CLEAR_HEAD(pa->ai_qs.deps); array_save(pa); if (bad_token_count > 0) { array_delete(pa); return 2; } pjob->ji_arraystruct = pa; insert_array(pa); unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); return(PBSE_NONE); } /* END setup_array_struct() */
/* array_recov reads in an array struct saved to disk and inserts it into the servers list of arrays */ int array_recov( char *path, job_array **new_pa) { job_array *pa; array_request_node *rn; char log_buf[LOCAL_LOG_BUF_SIZE]; int fd; int old_version; int num_tokens; int i; int len; int rc; old_version = ARRAY_QS_STRUCT_VERSION; /* allocate the storage for the struct */ pa = (job_array*)calloc(1,sizeof(job_array)); if (pa == NULL) { return PBSE_SYSTEM; } /* initialize the linked list nodes */ CLEAR_HEAD(pa->request_tokens); fd = open(path, O_RDONLY, 0); if (array_259_upgrade) { rc = read_and_convert_259_array(fd, pa, path); if(rc != PBSE_NONE) { free(pa); close(fd); return rc; } } else { /* read the file into the struct previously allocated. */ len = read(fd, &(pa->ai_qs), sizeof(pa->ai_qs)); if ((len < 0) || ((len < (int)sizeof(pa->ai_qs)) && (pa->ai_qs.struct_version == ARRAY_QS_STRUCT_VERSION))) { sprintf(log_buf, "error reading %s", path); log_err(errno, __func__, log_buf); free(pa); close(fd); return PBSE_SYSTEM; } if (pa->ai_qs.struct_version != ARRAY_QS_STRUCT_VERSION) { rc = array_upgrade(pa, fd, pa->ai_qs.struct_version, &old_version); if(rc) { sprintf(log_buf, "Cannot upgrade array version %d to %d", pa->ai_qs.struct_version, ARRAY_QS_STRUCT_VERSION); log_err(errno, __func__, log_buf); free(pa); close(fd); return rc; } } } pa->job_ids = calloc(pa->ai_qs.array_size, sizeof(char *)); /* check to see if there is any additional info saved in the array file */ /* check if there are any array request tokens that haven't been fully processed */ if (old_version > 1) { if (read(fd, &num_tokens, sizeof(int)) != sizeof(int)) { sprintf(log_buf, "error reading token count from %s", path); log_err(errno, __func__, log_buf); free(pa); close(fd); return PBSE_SYSTEM; } for (i = 0; i < num_tokens; i++) { rn = (array_request_node *)calloc(1, sizeof(array_request_node)); if (read(fd, rn, sizeof(array_request_node)) != sizeof(array_request_node)) { sprintf(log_buf, "error reading array_request_node from %s", path); log_err(errno, __func__, log_buf); free(rn); for (rn = (array_request_node*)GET_NEXT(pa->request_tokens); rn != NULL; rn = (array_request_node*)GET_NEXT(pa->request_tokens)) { delete_link(&rn->request_tokens_link); free(rn); } free(pa); close(fd); return PBSE_SYSTEM; } CLEAR_LINK(rn->request_tokens_link); append_link(&pa->request_tokens, &rn->request_tokens_link, (void*)rn); } } close(fd); CLEAR_HEAD(pa->ai_qs.deps); if (old_version != ARRAY_QS_STRUCT_VERSION) { /* resave the array struct if the version on disk is older than the current */ array_save(pa); } pa->ai_mutex = calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(pa->ai_mutex,NULL); lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); /* link the struct into the servers list of job arrays */ insert_array(pa); *new_pa = pa; return PBSE_NONE; }
void update_array_statuses( job_array *owned) { job_array *pa; job *pj; job *pjob; int i; int iter = -1; unsigned int running; unsigned int queued; unsigned int held; unsigned int complete; char log_buf[LOCAL_LOG_BUF_SIZE]; while ((pa = next_array_check(&iter, owned)) != NULL) { running = 0; queued = 0; held = 0; complete = 0; for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->job_ids[i] != NULL) { if ((pj = svr_find_job(pa->job_ids[i], TRUE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { if (pj->ji_qs.ji_state == JOB_STATE_RUNNING) { running++; } else if (pj->ji_qs.ji_state == JOB_STATE_QUEUED) { queued++; } else if (pj->ji_qs.ji_state == JOB_STATE_HELD) { held++; } else if (pj->ji_qs.ji_state == JOB_STATE_COMPLETE) { complete++; } unlock_ji_mutex(pj, __func__, "1", LOGLEVEL); } } } if (LOGLEVEL >= 7) { sprintf(log_buf, "%s: unlocking ai_mutex", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); if ((pjob = svr_find_job(pa->ai_qs.parent_id, TRUE)) != NULL) { if (running > 0) { svr_setjobstate(pjob, JOB_STATE_RUNNING, pjob->ji_qs.ji_substate, FALSE); } else if (held > 0 && queued == 0 && complete == 0) { svr_setjobstate(pjob, JOB_STATE_HELD, pjob->ji_qs.ji_substate, FALSE); } else if (complete > 0 && queued == 0 && held == 0) { svr_setjobstate(pjob, JOB_STATE_COMPLETE, pjob->ji_qs.ji_substate, FALSE); } else { /* default to just calling the array queued */ svr_setjobstate(pjob, JOB_STATE_QUEUED, pjob->ji_qs.ji_substate, FALSE); } unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); } if (pa == owned) { lock_ai_mutex(pa, __func__, "1", LOGLEVEL); } } } /* END update_array_statuses() */