void req_messagejob(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; if ((pjob = chk_job_request(preq->rq_ind.rq_message.rq_jid, preq, &jt)) == 0) return; if (jt != IS_ARRAY_NO) { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs"); return; } /* the job must be running */ if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } /* pass the request on to MOM */ rc = relay_to_mom(pjob, preq, post_message_req); if (rc) req_reject(rc, 0, preq); /* unable to get to MOM */ /* After MOM acts and replies to us, we pick up in post_message_req() */ }
void req_reject( int code, /* I */ int aux, /* I */ struct batch_request *preq, /* I */ char *HostName, /* I (optional) */ char *Msg) /* I (optional) */ { char msgbuf[ERR_MSG_SIZE + 256 + 1]; char msgbuf2[ERR_MSG_SIZE + 256 + 1]; set_err_msg(code, msgbuf); snprintf(msgbuf2, sizeof(msgbuf2), "%s", msgbuf); if ((HostName != NULL) && (*HostName != '\0')) { snprintf(msgbuf, sizeof(msgbuf), "%s REJHOST=%s", msgbuf2, HostName); snprintf(msgbuf2, sizeof(msgbuf2), "%s", msgbuf); } if ((Msg != NULL) && (*Msg != '\0')) { snprintf(msgbuf, sizeof(msgbuf), "%s MSG=%s", msgbuf2, Msg); /* NOTE: Don't need this last snprintf() unless another message is concatenated. */ } sprintf(log_buffer, "Reject reply code=%d(%s), aux=%d, type=%s, from %s@%s", code, msgbuf, aux, reqtype_to_txt(preq->rq_type), preq->rq_user, preq->rq_host); LOG_EVENT( PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "req_reject", log_buffer); preq->rq_reply.brp_auxcode = aux; reply_text(preq, code, msgbuf); return; } /* END req_reject() */
/** * @brief * Function that causes a rerun request to return with a timeout message. * * @param[in,out] pwt - work task which contains the job structure which holds the rerun request */ static void timeout_rerun_request(struct work_task *pwt) { job *pjob = (job *)pwt->wt_parm1; int conn_idx = -1; if ((pjob == NULL) || (pjob->ji_rerun_preq == NULL)) { return; /* nothing to timeout */ } if (pjob->ji_rerun_preq->rq_conn != PBS_LOCAL_CONNECTION) { conn_idx = connection_find_actual_index(pjob->ji_rerun_preq->rq_conn); } reply_text(pjob->ji_rerun_preq, PBSE_INTERNAL, "Response timed out. Job rerun request still in progress for"); /* clear no-timeout flag on connection */ if (conn_idx != -1) svr_conn[conn_idx].cn_authen &= ~PBS_NET_CONN_NOTIMEOUT; pjob->ji_rerun_preq = NULL; }
void reply_badattr( int code, int aux, svrattrl *pal, struct batch_request *preq) { int i = 1; char msgbuf[ERR_MSG_SIZE+1]; set_err_msg(code, msgbuf); while (pal) { if (i == aux) { strcat(msgbuf, " "); strcat(msgbuf, pal->al_name); if (pal->al_resc) { strcat(msgbuf, "."); strcat(msgbuf, pal->al_resc); } break; } pal = (svrattrl *)GET_NEXT(pal->al_link); ++i; } reply_text(preq, code, msgbuf); return; } /* END reply_badattr() */
void reply_badattr( int code, int aux, svrattrl *pal, struct batch_request *preq) { int i = 1; char msgbuf[ERR_MSG_SIZE+1]; set_err_msg(code, msgbuf, sizeof(msgbuf)); while (pal) { if (i == aux) { int len = strlen(msgbuf); if (pal->al_resc) snprintf(msgbuf + len, sizeof(msgbuf) - len, " %s.%s", pal->al_name, pal->al_resc); else snprintf(msgbuf + len, sizeof(msgbuf) - len, " %s", pal->al_name); break; } pal = (svrattrl *)GET_NEXT(pal->al_link); ++i; } reply_text(preq, code, msgbuf); return; } /* END reply_badattr() */
void req_relnodesjob(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; char *jid; int i, offset; char *nodeslist = NULL; char msg[LOG_BUF_SIZE]; if (preq == NULL) return; jid = preq->rq_ind.rq_relnodes.rq_jid; if (jid == NULL) return; /* ** Returns job pointer for singleton job or "parent" of ** an array job. */ pjob = chk_job_request(jid, preq, &jt); if (pjob == NULL) { return; } if (jt == IS_ARRAY_NO) { /* a regular job is okay */ /* the job must be running */ if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) || (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else if (jt == IS_ARRAY_Single) { /* a single subjob is okay */ offset = subjob_index_to_offset(pjob, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(pjob, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } if (i != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) { req_reject(PBSE_UNKJOBID, 0, preq); return; } if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs or multiple sub-jobs"); return; } nodeslist = preq->rq_ind.rq_relnodes.rq_node_list; if ((nodeslist != NULL) && (nodeslist[0] == '\0')) { nodeslist = NULL; } rc = free_sister_vnodes(pjob, nodeslist, msg, LOG_BUF_SIZE, preq); if (rc != 0) { reply_text(preq, PBSE_SYSTEM, msg); } }
void req_py_spawn(struct batch_request *preq) { int jt; /* job type */ job *pjob; int rc; char *jid = preq->rq_ind.rq_py_spawn.rq_jid; int i, offset; /* ** Returns job pointer for singleton job or "parent" of ** an array job. */ pjob = chk_job_request(jid, preq, &jt); if (pjob == NULL) return; /* see if requestor is the job owner */ if (svr_chk_owner(preq, pjob) != 0) { req_reject(PBSE_PERM, 0, preq); return; } if (jt == IS_ARRAY_NO) { /* a regular job is okay */ /* the job must be running */ if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) || (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else if (jt == IS_ARRAY_Single) { /* a single subjob is okay */ offset = subjob_index_to_offset(pjob, get_index_from_jid(jid)); if (offset == -1) { req_reject(PBSE_UNKJOBID, 0, preq); return; } i = get_subjob_state(pjob, offset); if (i == -1) { req_reject(PBSE_IVALREQ, 0, preq); return; } if (i != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) { req_reject(PBSE_UNKJOBID, 0, preq); return; } if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq); return; } } else { reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs or multiple sub-jobs"); return; } /* ** Pass the request on to MOM. If this works, the function ** post_py_spawn_req will be called to handle the reply. ** If it fails, send the reply now. */ rc = relay_to_mom(pjob, preq, post_py_spawn_req); if (rc) req_reject(rc, 0, preq); /* unable to get to MOM */ }
void dispatch_request(int sfds, struct batch_request *request) { conn_t *conn = NULL; int rpp = request->isrpp; if (!rpp) { if (sfds != PBS_LOCAL_CONNECTION) { conn = get_conn(sfds); if (!conn) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, LOG_ERR, "dispatch_request", "did not find socket in connection table"); req_reject(PBSE_SYSTEM, 0, request); close_client(sfds); return; } } } switch (request->rq_type) { case PBS_BATCH_QueueJob: if (rpp) { request->rpp_ack = 0; rpp_add_close_func(sfds, close_quejob); } else net_add_close_func(sfds, close_quejob); req_quejob(request); break; case PBS_BATCH_JobCred: #ifndef PBS_MOM /* Reject if a user client (qsub -Wpwd) and not a */ /* server (qmove) enqueued a job with JobCredential */ if ( !request->rq_fromsvr && \ (server.sv_attr[SRV_ATR_ssignon_enable].at_flags \ & ATR_VFLAG_SET) && \ (server.sv_attr[SRV_ATR_ssignon_enable].at_val.at_long == 1) ) { req_reject(PBSE_SSIGNON_SET_REJECT, 0, request); close_client(sfds); break; } #endif if (rpp) request->rpp_ack = 0; req_jobcredential(request); break; case PBS_BATCH_UserCred: #ifdef PBS_MOM #ifdef WIN32 req_reject(PBSE_NOSUP, 0, request); #else req_reject(PBSE_UNKREQ, 0, request); #endif close_client(sfds); #else req_usercredential(request); #endif break; case PBS_BATCH_UserMigrate: #ifdef PBS_MOM #ifdef WIN32 req_reject(PBSE_NOSUP, 0, request); #else req_reject(PBSE_UNKREQ, 0, request); #endif /* WIN32 */ close_client(sfds); #else req_user_migrate(request); #endif /* PBS_MOM */ break; case PBS_BATCH_GSS_Context: req_gsscontext(request); break; case PBS_BATCH_jobscript: if (rpp) request->rpp_ack = 0; req_jobscript(request); break; /* * The PBS_BATCH_Rdytocommit message is deprecated. * The server does not do anything with it anymore, but * simply acks the request (in case some client makes this call) */ case PBS_BATCH_RdytoCommit: if (request->isrpp) request->rpp_ack = 0; reply_ack(request); break; case PBS_BATCH_Commit: if (rpp) request->rpp_ack = 0; req_commit(request); if (rpp) rpp_add_close_func(sfds, (void (*)(int))0); else net_add_close_func(sfds, (void (*)(int))0); break; case PBS_BATCH_DeleteJob: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, request->rq_ind.rq_delete.rq_objname, "delete job request received"); req_deletejob(request); break; #ifndef PBS_MOM case PBS_BATCH_SubmitResv: req_resvSub(request); break; case PBS_BATCH_DeleteResv: req_deleteReservation(request); break; case PBS_BATCH_ModifyResv: req_modifyReservation(request); break; case PBS_BATCH_ResvOccurEnd: req_reservationOccurrenceEnd(request); break; #endif case PBS_BATCH_HoldJob: if (sfds != PBS_LOCAL_CONNECTION && !rpp) conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT; req_holdjob(request); break; #ifndef PBS_MOM case PBS_BATCH_LocateJob: req_locatejob(request); break; case PBS_BATCH_Manager: req_manager(request); break; case PBS_BATCH_RelnodesJob: req_relnodesjob(request); break; #endif case PBS_BATCH_MessJob: req_messagejob(request); break; case PBS_BATCH_PySpawn: if (sfds != PBS_LOCAL_CONNECTION && !rpp) conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT; req_py_spawn(request); break; case PBS_BATCH_ModifyJob: req_modifyjob(request); break; case PBS_BATCH_Rerun: req_rerunjob(request); break; #ifndef PBS_MOM case PBS_BATCH_MoveJob: req_movejob(request); break; case PBS_BATCH_OrderJob: req_orderjob(request); break; case PBS_BATCH_Rescq: req_reject(PBSE_NOSUP, 0, request); break; case PBS_BATCH_ReserveResc: req_reject(PBSE_NOSUP, 0, request); break; case PBS_BATCH_ReleaseResc: req_reject(PBSE_NOSUP, 0, request); break; case PBS_BATCH_ReleaseJob: if (sfds != PBS_LOCAL_CONNECTION && !rpp) conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT; req_releasejob(request); break; case PBS_BATCH_RunJob: case PBS_BATCH_AsyrunJob: req_runjob(request); break; case PBS_BATCH_DefSchReply: req_defschedreply(request); break; case PBS_BATCH_ConfirmResv: req_confirmresv(request); break; case PBS_BATCH_SelectJobs: case PBS_BATCH_SelStat: req_selectjobs(request); break; #endif /* !PBS_MOM */ case PBS_BATCH_Shutdown: req_shutdown(request); break; case PBS_BATCH_SignalJob: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, request->rq_ind.rq_signal.rq_jid, "signal job request received"); req_signaljob(request); break; case PBS_BATCH_MvJobFile: req_mvjobfile(request); break; #ifndef PBS_MOM /* Server Only Functions */ case PBS_BATCH_StatusJob: if (set_to_non_blocking(conn) == -1) { req_reject(PBSE_SYSTEM, 0, request); close_client(sfds); return; } req_stat_job(request); clear_non_blocking(conn); break; case PBS_BATCH_StatusQue: if (set_to_non_blocking(conn) == -1) { req_reject(PBSE_SYSTEM, 0, request); close_client(sfds); return; } req_stat_que(request); clear_non_blocking(conn); break; case PBS_BATCH_StatusNode: if (set_to_non_blocking(conn) == -1) { req_reject(PBSE_SYSTEM, 0, request); close_client(sfds); return; } req_stat_node(request); clear_non_blocking(conn); break; case PBS_BATCH_StatusResv: if (set_to_non_blocking(conn) == -1) { req_reject(PBSE_SYSTEM, 0, request); close_client(sfds); return; } req_stat_resv(request); clear_non_blocking(conn); break; case PBS_BATCH_StatusSvr: req_stat_svr(request); break; case PBS_BATCH_StatusSched: req_stat_sched(request); break; case PBS_BATCH_StatusHook: if (!is_local_root(request->rq_user, request->rq_host)) { sprintf(log_buffer, "%s@%s is unauthorized to " "access hooks data from server %s", request->rq_user, request->rq_host, server_host); reply_text(request, PBSE_HOOKERROR, log_buffer); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_HOOK, LOG_INFO, "", log_buffer); /* don't call close_client() to allow other */ /* non-hook related requests to continue */ break; } if (set_to_non_blocking(conn) == -1) { req_reject(PBSE_SYSTEM, 0, request); close_client(sfds); return; } req_stat_hook(request); clear_non_blocking(conn); break; case PBS_BATCH_TrackJob: req_track(request); break; case PBS_BATCH_RegistDep: req_register(request); break; case PBS_BATCH_AuthenResvPort: if (pbs_conf.auth_method == AUTH_MUNGE) { req_reject(PBSE_BADCRED, 0, request); close_client(sfds); return; } req_authenResvPort(request); break; case PBS_BATCH_StageIn: req_stagein(request); break; case PBS_BATCH_FailOver: req_failover(request); break; case PBS_BATCH_StatusRsc: req_stat_resc(request); break; case PBS_BATCH_MomRestart: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, LOG_INFO, request->rq_ind.rq_momrestart.rq_momhost, "Mom restarted on host"); req_momrestart(request); break; #else /* MOM only functions */ case PBS_BATCH_CopyFiles: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, request->rq_ind.rq_cpyfile.rq_jobid, "copy file request received"); /* don't time-out as copy may take long time */ if (sfds != PBS_LOCAL_CONNECTION && !rpp) conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT; req_cpyfile(request); break; case PBS_BATCH_CopyFiles_Cred: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, request->rq_ind.rq_cpyfile_cred.rq_copyfile.rq_jobid, "copy file cred request received"); /* don't time-out as copy may take long time */ if (sfds != PBS_LOCAL_CONNECTION && !rpp) conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT; req_cpyfile(request); break; case PBS_BATCH_DelFiles: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, request->rq_ind.rq_cpyfile.rq_jobid, "delete file request received"); req_delfile(request); break; case PBS_BATCH_DelFiles_Cred: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, request->rq_ind.rq_cpyfile_cred.rq_copyfile.rq_jobid, "delete file cred request received"); req_delfile(request); break; case PBS_BATCH_CopyHookFile: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_HOOK, LOG_INFO, request->rq_ind.rq_hookfile.rq_filename, "copy hook-related file request received"); req_copy_hookfile(request); break; case PBS_BATCH_DelHookFile: log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_HOOK, LOG_INFO, request->rq_ind.rq_hookfile.rq_filename, "delete hook-related file request received"); req_del_hookfile(request); break; #endif default: req_reject(PBSE_UNKREQ, 0, request); close_client(sfds); break; } return; }
/** * @brief Service the Modify Reservation Request from client such as pbs_ralter. * * This request atomically modifies one or more of a reservation's attributes. * An error is returned to the client if the user does not have permission * to perform the modification, the attribute is read-only, the reservation is * running and the attribute is only modifiable when the reservation is not * running or is empty. * * @param[in] preq - pointer to batch request from client */ void req_modifyReservation(struct batch_request *preq) { char *rid = NULL; svrattrl *psatl = NULL; attribute_def *pdef = NULL; int rc = 0; int bad = 0; char buf[PBS_MAXUSER + PBS_MAXHOSTNAME + 32] = {0}; int sock; int resc_access_perm_save = 0; int send_to_scheduler = 0; int log_len = 0; char *fmt = "%a %b %d %H:%M:%S %Y"; int is_standing = 0; int next_occr_start = 0; extern char *msg_stdg_resv_occr_conflict; resc_resv *presv; if (preq == NULL) return; sock = preq->rq_conn; presv = chk_rescResv_request(preq->rq_ind.rq_modify.rq_objname, preq); /* Note: on failure, chk_rescResv_request invokes req_reject * appropriate reply is sent and batch_request is freed. */ if (presv == NULL) return; rid = preq->rq_ind.rq_modify.rq_objname; if ((presv = find_resv(rid)) == NULL) { /* Not on "all_resvs" list try "new_resvs" list */ presv = (resc_resv *)GET_NEXT(svr_newresvs); while (presv) { if (!strcmp(presv->ri_qs.ri_resvID, rid)) break; presv = (resc_resv *)GET_NEXT(presv->ri_allresvs); } } if (presv == NULL) { req_reject(PBSE_UNKRESVID, 0, preq); return; } is_standing = presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long; if (is_standing) next_occr_start = get_occurrence(presv->ri_wattr[RESV_ATR_resv_rrule].at_val.at_str, presv->ri_wattr[RESV_ATR_start].at_val.at_long, presv->ri_wattr[RESV_ATR_resv_timezone].at_val.at_str, 2); resc_access_perm_save = resc_access_perm; psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); presv->ri_alter_flags = 0; while (psatl) { long temp = 0; char *end = NULL; int index; /* identify the attribute by name */ index = find_attr(resv_attr_def, psatl->al_name, RESV_ATR_LAST); if (index < 0) { /* didn`t recognize the name */ reply_badattr(PBSE_NOATTR, 1, psatl, preq); return; } pdef = &resv_attr_def[index]; /* Does attribute's definition flags indicate that * we have sufficient permission to write the attribute? */ resc_access_perm = resc_access_perm_save; /* reset */ if (psatl->al_flags & ATR_VFLAG_HOOK) { resc_access_perm = ATR_DFLAG_USWR | \ ATR_DFLAG_OPWR | \ ATR_DFLAG_MGWR | \ ATR_DFLAG_SvWR | \ ATR_DFLAG_Creat; } if ((pdef->at_flags & resc_access_perm) == 0) { reply_badattr(PBSE_ATTRRO, 1, psatl, preq); return; } switch (index) { case RESV_ATR_start: if ((presv->ri_wattr[RESV_ATR_state].at_val.at_long != RESV_RUNNING) || !(presv->ri_qp->qu_numjobs)) { temp = strtol(psatl->al_value, &end, 10); if ((temp > time(NULL)) && (temp != presv->ri_wattr[RESV_ATR_start].at_val.at_long)) { if (!is_standing || (temp < next_occr_start)) { send_to_scheduler = RESV_START_TIME_MODIFIED; presv->ri_alter_stime = presv->ri_wattr[RESV_ATR_start].at_val.at_long; presv->ri_alter_flags |= RESV_START_TIME_MODIFIED; } else { snprintf(log_buffer, sizeof(log_buffer), "%s", msg_stdg_resv_occr_conflict); log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, preq->rq_ind.rq_modify.rq_objname, log_buffer); req_reject(PBSE_STDG_RESV_OCCR_CONFLICT, 0, preq); return; } } else { req_reject(PBSE_BADTSPEC, 0, preq); return; } } else { if (presv->ri_qp->qu_numjobs) req_reject(PBSE_RESV_NOT_EMPTY, 0, preq); else req_reject(PBSE_BADTSPEC, 0, preq); return; } break; case RESV_ATR_end: temp = strtol(psatl->al_value, &end, 10); if (temp == presv->ri_wattr[RESV_ATR_end].at_val.at_long) { req_reject(PBSE_BADTSPEC, 0, preq); return; } if (!is_standing || temp < next_occr_start) { send_to_scheduler = RESV_END_TIME_MODIFIED; presv->ri_alter_etime = presv->ri_wattr[RESV_ATR_end].at_val.at_long; presv->ri_alter_flags |= RESV_END_TIME_MODIFIED; } else { snprintf(log_buffer, sizeof(log_buffer), "%s", msg_stdg_resv_occr_conflict); log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, preq->rq_ind.rq_modify.rq_objname, log_buffer); req_reject(PBSE_STDG_RESV_OCCR_CONFLICT, 0, preq); return; } break; default: break; } /* decode attribute */ rc = pdef->at_decode(&presv->ri_wattr[index], psatl->al_name, psatl->al_resc, psatl->al_value); if (rc != 0) { reply_badattr(rc, 1, psatl, preq); return; } psatl = (svrattrl *)GET_NEXT(psatl->al_link); } resc_access_perm = resc_access_perm_save; /* restore perm */ if (send_to_scheduler) { presv->ri_alter_state = presv->ri_wattr[RESV_ATR_state].at_val.at_long; resv_setResvState(presv, RESV_BEING_ALTERED, presv->ri_qs.ri_substate); /*"start", "end","duration", and "wall"; derive and check */ if (start_end_dur_wall(presv, RESC_RESV_OBJECT)) { req_reject(PBSE_BADTSPEC, 0, preq); resv_revert_alter_times(presv); return; } presv->ri_wattr[RESV_ATR_resource].at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; } bad = 0; psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); if (psatl) rc = modify_resv_attr(presv, psatl, preq->rq_perm, &bad); if (send_to_scheduler) set_scheduler_flag(SCH_SCHEDULE_RESV_RECONFIRM, dflt_scheduler); (void)sprintf(log_buffer, "Attempting to modify reservation"); if (presv->ri_alter_flags & RESV_START_TIME_MODIFIED) { strftime(buf, sizeof(buf), fmt, localtime((time_t *) &presv->ri_wattr[RESV_ATR_start].at_val.at_long)); log_len = strlen(log_buffer); snprintf(log_buffer + log_len, sizeof(log_buffer) - log_len," start=%s", buf); } if (presv->ri_alter_flags & RESV_END_TIME_MODIFIED) { strftime(buf, sizeof(buf), fmt, localtime((time_t *) &presv->ri_wattr[RESV_ATR_end].at_val.at_long)); log_len = strlen(log_buffer); snprintf(log_buffer + log_len, sizeof(log_buffer) - log_len," end=%s", buf); } log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, preq->rq_ind.rq_modify.rq_objname, log_buffer); if ((presv->ri_wattr[RESV_ATR_interactive].at_flags & ATR_VFLAG_SET) == 0) { char buf1[PBS_MAXUSER + PBS_MAXHOSTNAME + 32] = {0}; /*Not "interactive" so don't wait on scheduler, reply now*/ sprintf(buf, "%s ALTER REQUESTED", presv->ri_qs.ri_resvID); sprintf(buf1, "requestor=%s@%s", preq->rq_user, preq->rq_host); if ((rc = reply_text(preq, PBSE_NONE, buf))) { /* reply failed, close connection; DON'T purge resv */ close_client(sock); return; } } else { /*Don't reply back until scheduler decides*/ long dt; presv->ri_brp = preq; dt = presv->ri_wattr[RESV_ATR_interactive].at_val.at_long; /*reply with id and state no decision in +dt secs*/ (void)gen_future_reply(presv, dt); (void)snprintf(buf, sizeof(buf), "requestor=%s@%s Interactive=%ld", preq->rq_user, preq->rq_host, dt); } }
void req_modifyjob(struct batch_request *preq) { int add_to_am_list = 0; /* if altered during sched cycle */ int bad = 0; int jt; /* job type */ int newstate; int newsubstate; resource_def *outsideselect = NULL; job *pjob; svrattrl *plist; resource *presc; resource_def *prsd; int rc; int running = 0; int sendmom = 0; char hook_msg[HOOK_MSG_SIZE]; int mod_project = 0; pbs_sched *psched; switch (process_hooks(preq, hook_msg, sizeof(hook_msg), pbs_python_set_interrupt)) { case 0: /* explicit reject */ reply_text(preq, PBSE_HOOKERROR, hook_msg); return; case 1: /* explicit accept */ if (recreate_request(preq) == -1) { /* error */ /* we have to reject the request, as 'preq' */ /* may have been partly modified */ strcpy(hook_msg, "modifyjob event: rejected request"); log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_HOOK, LOG_ERR, "", hook_msg); reply_text(preq, PBSE_HOOKERROR, hook_msg); return; } break; case 2: /* no hook script executed - go ahead and accept event*/ break; default: log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_HOOK, LOG_INFO, "", "modifyjob event: accept req by default"); } if (pseldef == NULL) /* do one time to keep handy */ pseldef = find_resc_def(svr_resc_def, "select", svr_resc_size); pjob = chk_job_request(preq->rq_ind.rq_modify.rq_objname, preq, &jt); if (pjob == NULL) return; if ((jt == IS_ARRAY_Single) || (jt == IS_ARRAY_Range)) { req_reject(PBSE_IVALREQ, 0, preq); return; } psched = find_sched_from_sock(preq->rq_conn); /* allow scheduler to modify job */ if (psched == NULL) { /* provisioning job is not allowed to be modified */ if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PROVISION)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } /* cannot be in exiting or transit, exiting has already be checked */ if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) { req_reject(PBSE_BADSTATE, 0, preq); return; } plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); if (plist == NULL) { /* nothing to do */ reply_ack(preq); return; } /* * Special checks must be made: * if during a scheduling cycle and certain attributes are altered, * make a note of the job to prevent it from being run now; * if job is running, only certain attributes/resources can be * altered. */ if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { running = 1; } while (plist) { int i; i = find_attr(job_attr_def, plist->al_name, JOB_ATR_LAST); /* * Is the attribute being altered one which could change * scheduling (ATR_DFLAG_SCGALT set) and if a scheduling * cycle is in progress, then set flag to add the job to list * of jobs which cannot be run in this cycle. * If the scheduler itself sends a modify job request, * no need to delay the job until next cycle. */ if ((psched == NULL) && (scheduler_jobs_stat) && (job_attr_def[i].at_flags & ATR_DFLAG_SCGALT)) add_to_am_list = 1; /* Is the attribute modifiable in RUN state ? */ if (i < 0) { reply_badattr(PBSE_NOATTR, 1, plist, preq); return; } if ((running == 1) && ((job_attr_def[i].at_flags & ATR_DFLAG_ALTRUN) == 0)) { reply_badattr(PBSE_MODATRRUN, 1, plist, preq); return; } if (i == (int)JOB_ATR_resource) { prsd = find_resc_def(svr_resc_def, plist->al_resc, svr_resc_size); if (prsd == 0) { reply_badattr(PBSE_UNKRESC, 1, plist, preq); return; } /* is the specified resource modifiable while */ /* the job is running */ if (running) { if ((prsd->rs_flags & ATR_DFLAG_ALTRUN) == 0) { reply_badattr(PBSE_MODATRRUN, 1, plist, preq); return; } sendmom = 1; } /* should the resource be only in a select spec */ if (prsd->rs_flags & ATR_DFLAG_CVTSLT && !outsideselect && plist->al_atopl.value && plist->al_atopl.value[0]) { /* if "-lresource" is set and has non-NULL value, ** remember as potential bad resource ** if this appears along "select". */ outsideselect = prsd; } } if (strcmp(plist->al_name, ATTR_project) == 0) { mod_project = 1; } else if ((strcmp(plist->al_name, ATTR_runcount) == 0) && ((plist->al_flags & ATR_VFLAG_HOOK) == 0) && (plist->al_value != NULL) && (plist->al_value[0] != '\0') && ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) && (atol(plist->al_value) < \ pjob->ji_wattr[(int)JOB_ATR_runcount].at_val.at_long)) { sprintf(log_buffer, "regular user %s@%s cannot decrease '%s' attribute value from %ld to %ld", preq->rq_user, preq->rq_host, ATTR_runcount, pjob->ji_wattr[(int)JOB_ATR_runcount].at_val.at_long, atol(plist->al_value)); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_ERR, pjob->ji_qs.ji_jobid, log_buffer); req_reject(PBSE_PERM, 0, preq); return; } plist = (svrattrl *)GET_NEXT(plist->al_link); } if (outsideselect) { presc = find_resc_entry(&pjob->ji_wattr[(int)JOB_ATR_resource], pseldef); if (presc && ((presc->rs_value.at_flags & ATR_VFLAG_DEFLT) == 0)) { /* select is not a default, so reject qalter */ resc_in_err = strdup(outsideselect->rs_name); req_reject(PBSE_INVALJOBRESC, 0, preq); return; } } /* modify the jobs attributes */ bad = 0; plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); rc = modify_job_attr(pjob, plist, preq->rq_perm, &bad); if (rc) { if (pjob->ji_clterrmsg) reply_text(preq, rc, pjob->ji_clterrmsg); else reply_badattr(rc, bad, plist, preq); return; } /* If certain attributes modified and if in scheduling cycle */ /* then add to list of jobs which cannot be run in this cycle */ if (add_to_am_list) am_jobs_add(pjob); /* see req_runjob() */ /* check if project attribute was requested to be modified to */ /* be the default project value */ if (mod_project && (pjob->ji_wattr[(int)JOB_ATR_project].at_flags & \ ATR_VFLAG_SET)) { if (strcmp(pjob->ji_wattr[(int)JOB_ATR_project].at_val.at_str, PBS_DEFAULT_PROJECT) == 0) { sprintf(log_buffer, msg_defproject, ATTR_project, PBS_DEFAULT_PROJECT); #ifdef NAS /* localmod 107 */ log_event(PBSEVENT_DEBUG4, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); #else log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); #endif /* localmod 107 */ } } if (pjob->ji_wattr[(int)JOB_ATR_resource].at_flags & ATR_VFLAG_MODIFY) { presc = find_resc_entry(&pjob->ji_wattr[(int)JOB_ATR_resource], pseldef); if (presc && (presc->rs_value.at_flags & ATR_VFLAG_DEFLT)) { /* changing Resource_List and select is a default */ /* clear "select" so it is rebuilt inset_resc_deflt */ pseldef->rs_free(&presc->rs_value); } } /* Reset any defaults resource limit which might have been unset */ if ((rc = set_resc_deflt((void *)pjob, JOB_OBJECT, NULL)) != 0) { req_reject(rc, 0, preq); return; } /* if job is not running, may need to change its state */ if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) { svr_evaljobstate(pjob, &newstate, &newsubstate, 0); (void)svr_setjobstate(pjob, newstate, newsubstate); } else { (void)job_save(pjob, SAVEJOB_FULL); } (void)sprintf(log_buffer, msg_manager, msg_jobmod, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); /* if a resource limit changed for a running job, send to MOM */ if (sendmom) { rc = relay_to_mom(pjob, preq, post_modify_req); if (rc) req_reject(rc, 0, preq); /* unable to get to MOM */ return; } reply_ack(preq); }
void req_commit( struct batch_request *preq) /* I */ { job *pj; pj = locate_new_job(preq->rq_conn, preq->rq_ind.rq_commit); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "committing job"); } if (pj == NULL) { req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL); return; } if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSICM) { log_err(errno, "req_commit", "cannot commit job in unexpected state"); req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL); return; } /* move job from new job list to "all" job list, set to running state */ delete_link(&pj->ji_alljobs); append_link(&svr_alljobs, &pj->ji_alljobs, pj); /* ** Set JOB_SVFLG_HERE to indicate that this is Mother Superior. */ pj->ji_qs.ji_svrflags |= JOB_SVFLG_HERE; pj->ji_qs.ji_state = JOB_STATE_RUNNING; pj->ji_qs.ji_substate = JOB_SUBSTATE_PRERUN; pj->ji_qs.ji_un_type = JOB_UNION_TYPE_MOM; pj->ji_qs.ji_un.ji_momt.ji_svraddr = get_connectaddr(preq->rq_conn); pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0; /* For MOM - start up the job (blocks) */ if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "starting job execution"); } start_exec(pj); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "job execution started"); } /* if start request fails, reply with failure string */ if (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING) { char tmpLine[1024]; if ((pj->ji_hosts != NULL) && (pj->ji_nodekill >= 0) && (pj->ji_hosts[pj->ji_nodekill].hn_host != NULL)) { sprintf(tmpLine, "start failed on node %s", pj->ji_hosts[pj->ji_nodekill].hn_host); } else { sprintf(tmpLine, "start failed on unknown node"); } if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", tmpLine); } reply_text(preq, 0, tmpLine); } else { reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_Commit); } job_save(pj, SAVEJOB_FULL); /* NOTE: we used to flag JOB_ATR_errpath, JOB_ATR_outpath, * JOB_ATR_session_id, and JOB_ATR_altid as modified at this point to make sure * pbs_server got these attr values. This worked fine before TORQUE modified * job launched into an async process. At 2.0.0p6, a new attribute "SEND" flag * was added to handle this process. */ return; } /* END req_commit() */
void req_movejob(struct batch_request *req) { int jt; /* job type */ job *jobp; char hook_msg[HOOK_MSG_SIZE]; switch (process_hooks(req, hook_msg, sizeof(hook_msg), pbs_python_set_interrupt)) { case 0: /* explicit reject */ reply_text(req, PBSE_HOOKERROR, hook_msg); return; case 1: /* explicit accept */ if (recreate_request(req) == -1) { /* error */ /* we have to reject the request, as 'req' */ /* may have been partly modified */ strcpy(hook_msg, "movejob event: rejected request"); log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_HOOK, LOG_ERR, "", hook_msg); reply_text(req, PBSE_HOOKERROR, hook_msg); return; } break; case 2: /* no hook script executed - go ahead and accept event*/ break; default: log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_HOOK, LOG_INFO, "", "movejob event: accept req by default"); } jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req, &jt); if (jobp == NULL) return; if ((jt != IS_ARRAY_NO) && (jt != IS_ARRAY_ArrayJob)) { req_reject(PBSE_IVALREQ, 0, req); return; } if (jobp->ji_qs.ji_state != JOB_STATE_QUEUED && jobp->ji_qs.ji_state != JOB_STATE_HELD && jobp->ji_qs.ji_state != JOB_STATE_WAITING) { #ifndef NDEBUG (void)sprintf(log_buffer, "(%s) %s, state=%d", __func__, msg_badstate, jobp->ji_qs.ji_state); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG, jobp->ji_qs.ji_jobid, log_buffer); #endif /* NDEBUG */ req_reject(PBSE_BADSTATE, 0, req); return; } if (jt != IS_ARRAY_NO) { /* cannot move Subjob and can only move array job if */ /* no subjobs are running */ if ((jt != IS_ARRAY_ArrayJob) || (jobp->ji_ajtrk->tkm_subjsct[JOB_STATE_RUNNING] != 0)) { req_reject(PBSE_IVALREQ, 0, req); return; } } /* * svr_movejob() does the real work, handles both local and * network moves */ switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, req)) { case 0: /* success */ (void)strcpy(log_buffer, msg_movejob); (void)sprintf(log_buffer+strlen(log_buffer), msg_manager, req->rq_ind.rq_move.rq_destin, req->rq_user, req->rq_host); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); reply_ack(req); break; case -1: case 1: /* fail */ if (jobp->ji_clterrmsg) reply_text(req, pbs_errno, jobp->ji_clterrmsg); else req_reject(pbs_errno, 0, req); break; case 2: /* deferred, will be handled by */ /* post_movejob() when the child completes */ break; } return; }
void req_commit( struct batch_request *preq) /* I */ { unsigned int momport = 0; int rc; job *pj = locate_new_job(preq->rq_conn, preq->rq_ind.rq_commit); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "committing job"); } if (pj == NULL) { req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL); return; } if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSICM) { log_err(errno, "req_commit", (char *)"cannot commit job in unexpected state"); req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL); return; } /* move job from new job list to "all" job list, set to running state */ delete_link(&pj->ji_alljobs); alljobs_list.push_back(pj); /* ** Set JOB_SVFLG_HERE to indicate that this is Mother Superior. */ pj->ji_qs.ji_svrflags |= JOB_SVFLG_HERE; pj->ji_qs.ji_state = JOB_STATE_RUNNING; pj->ji_qs.ji_substate = JOB_SUBSTATE_PRERUN; pj->ji_qs.ji_un_type = JOB_UNION_TYPE_MOM; pj->ji_qs.ji_un.ji_momt.ji_svraddr = get_connectaddr(preq->rq_conn,FALSE); pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0; /* For MOM - start up the job (blocks) */ if (LOGLEVEL >= 6) log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pj->ji_qs.ji_jobid, "req_commit:starting job execution"); rc = start_exec(pj); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pj->ji_qs.ji_jobid, "req_commit:job execution started"); } /* if start request fails, reply with failure string */ if (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING) { char tmpLine[1024]; if ((pj->ji_hosts != NULL) && (pj->ji_nodekill >= 0) && (pj->ji_hosts[pj->ji_nodekill].hn_host != NULL)) { sprintf(tmpLine, "start failed on node %s", pj->ji_hosts[pj->ji_nodekill].hn_host); } else { sprintf(tmpLine, "start failed on unknown node"); } if (LOGLEVEL >= 6) { log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pj->ji_qs.ji_jobid, tmpLine); } reply_text(preq, rc, tmpLine); } else { reply_sid(preq, pj->ji_wattr[JOB_ATR_session_id].at_val.at_long,BATCH_REPLY_CHOICE_Text); } if (multi_mom) { momport = pbs_rm_port; } job_save(pj, SAVEJOB_FULL, momport); #ifdef NVIDIA_GPUS /* * Does this job have a gpuid assigned? * if so, then update gpu status */ if ((use_nvidia_gpu) && ((pj->ji_wattr[JOB_ATR_exec_gpus].at_flags & ATR_VFLAG_SET) != 0) && (pj->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str != NULL)) { send_update_soon(); } #endif /* NVIDIA_GPUS */ /* NOTE: we used to flag JOB_ATR_errpath, JOB_ATR_outpath, * JOB_ATR_session_id, and JOB_ATR_altid as modified at this point to make sure * pbs_server got these attr values. This worked fine before TORQUE modified * job launched into an async process. At 2.0.0p6, a new pbs_attribute "SEND" flag * was added to handle this process. */ return; } /* END req_commit() */
static void post_hold(struct work_task *pwt) { int code; job *pjob; struct batch_request *preq; int conn_idx; if (pwt->wt_aux2 != 1) svr_disconnect(pwt->wt_event); /* close connection to MOM */ preq = pwt->wt_parm1; code = preq->rq_reply.brp_code; preq->rq_conn = preq->rq_orgconn; /* restore client socket */ if (pwt->wt_aux2 != 1) { /* not rpp */ conn_idx = connection_find_actual_index(preq->rq_conn); if (conn_idx == -1) { req_reject(PBSE_SYSTEM, 0, preq); return; } svr_conn[conn_idx].cn_authen &= ~PBS_NET_CONN_NOTIMEOUT; } pjob = find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname); if (pjob == (job *)0) { log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG, preq->rq_ind.rq_hold.rq_orig.rq_objname, msg_postmomnojob); req_reject(PBSE_UNKJOBID, 0, preq); return; } if (code != 0) { if (code != PBSE_CKPBSY) pjob->ji_qs.ji_substate = JOB_SUBSTATE_RUNNING; /* reset it */ if (code != PBSE_NOSUP) { /* a "real" error - log message with return error code */ (void)sprintf(log_buffer, msg_mombadhold, code); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG, pjob->ji_qs.ji_jobid, log_buffer); /* send message back to server for display to user */ reply_text(preq, code, log_buffer); return; } } else if (code == 0) { /* record that MOM has a checkpoint file */ if (preq->rq_reply.brp_auxcode) /* chkpt can be moved */ pjob->ji_qs.ji_svrflags = (pjob->ji_qs.ji_svrflags & ~JOB_SVFLG_CHKPT) | JOB_SVFLG_HASRUN | JOB_SVFLG_ChkptMig; pjob->ji_modified = 1; /* indicate attributes changed */ (void)job_save(pjob, SAVEJOB_QUICK); /* note in accounting file */ account_record(PBS_ACCT_CHKPNT, pjob, (char *)0); } reply_ack(preq); }
void req_confirmresv(struct batch_request *preq) { char buf[PBS_MAXQRESVNAME+PBS_MAXHOSTNAME+256] = {0}; /* FQDN resvID+text */ time_t newstart = 0; attribute *petime = NULL; resc_resv *presv = NULL; int rc = 0; int state = 0; int sub = 0; int resv_count = 0; int is_degraded = 0; long next_retry_time = 0; char *execvnodes = NULL; char *next_execvnode = NULL; char **short_xc = NULL; char **tofree = NULL; char *str_time = NULL; extern char server_host[]; int is_being_altered = 0; char *tmp_buf = NULL; size_t tmp_buf_size = 0; if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) { req_reject(PBSE_PERM, 0, preq); return; } presv = find_resv(preq->rq_ind.rq_run.rq_jid); if (presv == NULL) { req_reject(PBSE_UNKRESVID, 0, preq); return; } is_degraded = presv->ri_qs.ri_substate == RESV_DEGRADED ? 1 : 0; is_being_altered = presv->ri_alter_flags; if (preq->rq_extend == NULL) { req_reject(PBSE_resvFail, 0, preq); return; } /* If the reservation was degraded and it could not be reconfirmed by the * scheduler, then the retry time for that reservation is reset to the half- * time between now and the time to reservation start or, if the retry time * is invalid, set it to some time after the soonest occurrence is to start */ if (strcmp(preq->rq_extend, PBS_RESV_CONFIRM_FAIL) == 0) { if (is_degraded && !is_being_altered) { long degraded_time = presv->ri_degraded_time; DBPRT(("degraded_time of %s is %s", presv->ri_qs.ri_resvID, ctime(°raded_time))); next_retry_time = time_now + ((degraded_time - time_now)/2); /* If reservation is still degraded, and time of degraded resv to start * is over cutoff from now, then set a time to try again. */ if (next_retry_time <= (degraded_time - reserve_retry_cutoff)) { set_resv_retry(presv, next_retry_time); str_time = ctime(&(presv->ri_wattr[RESV_ATR_retry].at_val.at_long)); if (str_time != NULL) { str_time[strlen(str_time)-1] = '\0'; (void)snprintf(log_buffer, sizeof(log_buffer), "Next attempt to reconfirm reservation will be made on %s", str_time); log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_RESV, LOG_NOTICE, presv->ri_qs.ri_resvID, log_buffer); } } else { /* reached a retry attempt that falls within the cutoff * When processing an advance reservation, unset retry attribute */ if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long == 0) { unset_resv_retry(presv); } else { /* When processing a standing reservation, set a retry time * past the end time of the soonest occurrence. */ set_resv_retry(presv, presv->ri_wattr[RESV_ATR_end].at_val.at_long + RESV_RETRY_DELAY); } } } else { if (!is_being_altered) log_event(PBS_EVENTCLASS_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, presv->ri_qs.ri_resvID, "Reservation denied"); /* Clients waiting on an interactive request must be * notified of the failure to confirm */ if ((presv->ri_brp != NULL) && (presv->ri_wattr[RESV_ATR_interactive].at_flags & ATR_VFLAG_SET)) { presv->ri_wattr[RESV_ATR_interactive].at_flags &= ~ATR_VFLAG_SET; snprintf(buf, sizeof(buf), "%s DENIED", presv->ri_qs.ri_resvID); (void)reply_text(presv->ri_brp, PBSE_NONE, buf); presv->ri_brp = NULL; } if (!is_being_altered) { (void)snprintf(log_buffer, sizeof(log_buffer), "requestor=%s@%s", msg_daemonname, server_host); account_recordResv(PBS_ACCT_DRss, presv, log_buffer); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_RESV, LOG_NOTICE, presv->ri_qs.ri_resvID, "reservation deleted"); resv_purge(presv); } } if (presv->ri_qs.ri_state == RESV_BEING_ALTERED) { resv_revert_alter_times(presv); log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, presv->ri_qs.ri_resvID, "Reservation alter denied"); } reply_ack(preq); return; } #ifdef NAS /* localmod 122 */ /* If an advance reservation has already been confirmed there's no * work to be done. */ if (presv->ri_qs.ri_state == RESV_CONFIRMED && !presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) { reply_ack(preq); return; } #endif /* localmod 122 */ /* Do not alter a reservation that started running when the reconfirmation * message was received. If a standing reservation, then set a retry time * past the end of this occurrence. */ if (presv->ri_qs.ri_state == RESV_RUNNING) { if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) set_resv_retry(presv, presv->ri_wattr[RESV_ATR_end].at_val.at_long + 10); req_reject(PBSE_TOOLATE, 0, preq); return; } petime = &presv->ri_wattr[RESV_ATR_end]; /* if passed in the confirmation, set a new start time */ if ((newstart = (time_t)preq->rq_ind.rq_run.rq_resch) != 0) { presv->ri_qs.ri_stime = newstart; presv->ri_wattr[RESV_ATR_start].at_val.at_long = newstart; presv->ri_wattr[RESV_ATR_start].at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; presv->ri_qs.ri_etime = newstart + presv->ri_qs.ri_duration; petime->at_val.at_long = presv->ri_qs.ri_etime; petime->at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; } /* The main difference between an advance reservation and a standing * reservation is the format of the execvnodes returned by "rq_destin": * An advance reservation has a single execvnode while a standing reservation * has a sting with the particular format: * <num_resv>#<execvnode1>[<range>]<exevnode2>[... * describing the execvnodes associated to each occurrence. */ if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) { /* The number of occurrences in the standing reservation and index are parsed * from the execvnode string which is of the form: * <num_occurrences>#<vnode1>[range1]<vnode2>[range2]... */ resv_count = get_execvnodes_count(preq->rq_ind.rq_run.rq_destin); if (resv_count == 0) { req_reject(PBSE_INTERNAL, 0, preq); return; } execvnodes = strdup(preq->rq_ind.rq_run.rq_destin); if (execvnodes == NULL) { req_reject(PBSE_SYSTEM, 0, preq); return; } DBPRT(("stdg_resv conf: execvnodes_seq is %s\n", execvnodes)); /* execvnodes is of the form: * <num_resv>#<(execvnode1)>[<range>]<(exevnode2)>[... * this "condensed" string is unrolled into a pointer array of * execvnodes per occurrence, e.g. short_xc[0] are the execvnodes * for 1st occurrence, short_xc[1] for the 2nd etc... * If something goes wrong during unrolling then NULL is returned. * which causes the confirmation message to be rejected */ short_xc = unroll_execvnode_seq(execvnodes, &tofree); if (short_xc == NULL) { free(execvnodes); req_reject(PBSE_SYSTEM, 0, preq); return; } /* The execvnode of the soonest (i.e., next) occurrence */ next_execvnode = strdup(short_xc[0]); if (next_execvnode == NULL) { free(short_xc); free_execvnode_seq(tofree); free(execvnodes); req_reject(PBSE_SYSTEM, 0, preq); return; } /* Release the now obsolete allocations used to manipulate the * unrolled string */ free(short_xc); free_execvnode_seq(tofree); free(execvnodes); /* When confirming for the first time, set the index and count */ if (!is_degraded) { /* Add first occurrence's end date on timed task list */ if (presv->ri_wattr[RESV_ATR_start].at_val.at_long != PBS_RESV_FUTURE_SCH) { if (gen_task_EndResvWindow(presv)) { free(next_execvnode); req_reject(PBSE_SYSTEM, 0, preq); return; } } if (!is_being_altered) { presv->ri_wattr[RESV_ATR_resv_count].at_val.at_long = resv_count; presv->ri_wattr[RESV_ATR_resv_count].at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; } /* Set first occurrence to index 1 * (rather than 0 because it gets displayed in pbs_rstat -f) */ presv->ri_wattr[RESV_ATR_resv_idx].at_val.at_long = 1; presv->ri_wattr[RESV_ATR_resv_idx].at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; } /* Skip setting the execvnodes sequence when reconfirming the last * occurrence or when altering a reservation. */ if (!is_being_altered) { if (presv->ri_wattr[RESV_ATR_resv_idx].at_val.at_long < presv->ri_wattr[RESV_ATR_resv_count].at_val.at_long) { /* now assign the execvnodes sequence attribute */ (void) resv_attr_def[(int)RESV_ATR_resv_execvnodes].at_free( &presv->ri_wattr[(int)RESV_ATR_resv_execvnodes]); (void) resv_attr_def[(int)RESV_ATR_resv_execvnodes].at_decode( &presv->ri_wattr[(int)RESV_ATR_resv_execvnodes], NULL, NULL, preq->rq_ind.rq_run.rq_destin); } } } else { /* Advance reservation */ next_execvnode = strdup(preq->rq_ind.rq_run.rq_destin); if (next_execvnode == NULL) { req_reject(PBSE_SYSTEM, 0, preq); return; } } /* Is reservation still a viable reservation? */ if ((rc = chk_resvReq_viable(presv)) != 0) { free(next_execvnode); req_reject(PBSE_BADTSPEC, 0, preq); return; } /* When reconfirming a degraded reservation, first free the nodes linked * to the reservation and unset all attributes relating to retry attempts */ if (is_degraded) { free_resvNodes(presv); /* Reset retry time */ unset_resv_retry(presv); /* reset vnodes_down counter to 0 */ presv->ri_vnodes_down = 0; } if (is_being_altered & RESV_END_TIME_MODIFIED) { if (gen_task_EndResvWindow(presv)) { free(next_execvnode); req_reject(PBSE_SYSTEM, 0, preq); return; } } /* * Assign the allocated resources to the reservation * and the reservation to the associated vnodes. */ if (is_being_altered) free_resvNodes(presv); rc = assign_resv_resc(presv, next_execvnode); if (rc != PBSE_NONE) { free(next_execvnode); req_reject(rc, 0, preq); return; } /* place "Time4resv" task on "task_list_timed" only if this is a * confirmation but not the reconfirmation of a degraded reservation as * in this case, the reservation had already been confirmed and added to * the task list before */ if (!is_degraded && (is_being_altered != RESV_END_TIME_MODIFIED) && (rc = gen_task_Time4resv(presv)) != 0) { free(next_execvnode); req_reject(rc, 0, preq); return; } /* * compute new values for state and substate * and update the resc_resv object with these * newly computed values */ eval_resvState(presv, RESVSTATE_gen_task_Time4resv, 0, &state, &sub); (void)resv_setResvState(presv, state, sub); cmp_resvStateRelated_attrs((void *)presv, presv->ri_qs.ri_type); Update_Resvstate_if_resv(presv->ri_jbp); if (presv->ri_modified) (void)job_or_resv_save((void *)presv, SAVERESV_FULL, RESC_RESV_OBJECT); log_buffer[0] = '\0'; /* * Notify all interested parties that the reservation * is moving from state UNCONFIRMED to CONFIRMED */ if (presv->ri_brp) { presv = find_resv(presv->ri_qs.ri_resvID); if (presv->ri_wattr[(int)RESV_ATR_convert].at_val.at_str != NULL) { rc = cnvrt_qmove(presv); if (rc != 0) { snprintf(buf, sizeof(buf), "%.240s FAILED", presv->ri_qs.ri_resvID); } else { snprintf(buf, sizeof(buf), "%.240s CONFIRMED", presv->ri_qs.ri_resvID); } } else { snprintf(buf, sizeof(buf), "%.240s CONFIRMED", presv->ri_qs.ri_resvID); } rc = reply_text(presv->ri_brp, PBSE_NONE, buf); presv->ri_brp = NULL; } svr_mailownerResv(presv, MAIL_CONFIRM, MAIL_NORMAL, log_buffer); presv->ri_wattr[RESV_ATR_interactive].at_flags &= ~ATR_VFLAG_SET; if (is_being_altered) { /* * If the reservation is currently running and its start time is being * altered after the current time, It is going back to the confirmed state. * We need to stop the reservation queue as it would have been started at * the original start time. * This will prevent any jobs - that are submitted after the * reservation's start time is changed - from running. * The reservation went to CO from RN while being altered, that means the reservation * had resources assigned. We should decrement their usages until it starts running * again, where the resources will be accounted again. */ if (presv->ri_qs.ri_state == RESV_CONFIRMED && presv->ri_alter_state == RESV_RUNNING) { change_enableORstart(presv, Q_CHNG_START, "FALSE"); if (presv->ri_giveback) { set_resc_assigned((void *)presv, 1, DECR); presv->ri_giveback = 0; } } /* * Reset only the flags and end time backup here, as we will need * the start time backup in Time4occurrenceFinish for a standing * reservation. Reset it for an advanced reservation. */ if (!(presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long)) { presv->ri_alter_stime = 0; } presv->ri_alter_etime = 0; presv->ri_alter_flags = 0; log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, presv->ri_qs.ri_resvID, "Reservation alter confirmed"); } else { log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, presv->ri_qs.ri_resvID, "Reservation confirmed"); } if (!is_degraded) { /* 100 extra bytes for field names, times, and count */ tmp_buf_size = 100 + strlen(preq->rq_user) + strlen(preq->rq_host) + strlen(next_execvnode); if (tmp_buf_size > sizeof(buf)) { tmp_buf = malloc(tmp_buf_size); if (tmp_buf == NULL) { snprintf(log_buffer, LOG_BUF_SIZE-1, "malloc failure (errno %d)", errno); log_err(PBSE_SYSTEM, __func__, log_buffer); free(next_execvnode); reply_ack(preq); return; } } else { tmp_buf = buf; tmp_buf_size = sizeof(buf); } if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) { (void)snprintf(tmp_buf, tmp_buf_size, "requestor=%s@%s start=%ld end=%ld nodes=%s count=%ld", preq->rq_user, preq->rq_host, presv->ri_qs.ri_stime, presv->ri_qs.ri_etime, next_execvnode, presv->ri_wattr[RESV_ATR_resv_count].at_val.at_long); } else { (void)snprintf(tmp_buf, tmp_buf_size, "requestor=%s@%s start=%ld end=%ld nodes=%s", preq->rq_user, preq->rq_host, presv->ri_qs.ri_stime, presv->ri_qs.ri_etime, next_execvnode); } account_recordResv(PBS_ACCT_CR, presv, tmp_buf); if (tmp_buf != buf) { free(tmp_buf); tmp_buf_size = 0; } } free(next_execvnode); reply_ack(preq); return; }