Ejemplo n.º 1
0
void
req_messagejob(struct batch_request *preq)
{
	int               jt;            /* job type */
	job		 *pjob;
	int		  rc;

	if ((pjob = chk_job_request(preq->rq_ind.rq_message.rq_jid, preq, &jt)) == 0)
		return;

	if (jt != IS_ARRAY_NO) {
		reply_text(preq, PBSE_NOSUP, "not supported for Array Jobs");
		return;
	}

	/* the job must be running */

	if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	/* pass the request on to MOM */

	rc = relay_to_mom(pjob, preq, post_message_req);
	if (rc)
		req_reject(rc, 0, preq);	/* unable to get to MOM */

	/* After MOM acts and replies to us, we pick up in post_message_req() */
}
Ejemplo n.º 2
0
void req_reject(

  int                   code,      /* I */
  int                   aux,       /* I */
  struct batch_request *preq,      /* I */
  char                 *HostName,  /* I (optional) */
  char                 *Msg)       /* I (optional) */

  {
  char msgbuf[ERR_MSG_SIZE + 256 + 1];
  char msgbuf2[ERR_MSG_SIZE + 256 + 1];

  set_err_msg(code, msgbuf);

  snprintf(msgbuf2, sizeof(msgbuf2), "%s", msgbuf);

  if ((HostName != NULL) && (*HostName != '\0'))
    {
    snprintf(msgbuf, sizeof(msgbuf), "%s REJHOST=%s",
             msgbuf2,
             HostName);

    snprintf(msgbuf2, sizeof(msgbuf2), "%s", msgbuf);
    }

  if ((Msg != NULL) && (*Msg != '\0'))
    {
    snprintf(msgbuf, sizeof(msgbuf), "%s MSG=%s",
             msgbuf2,
             Msg);

    /* NOTE: Don't need this last snprintf() unless another message is concatenated. */
    }

  sprintf(log_buffer, "Reject reply code=%d(%s), aux=%d, type=%s, from %s@%s",

          code,
          msgbuf,
          aux,
          reqtype_to_txt(preq->rq_type),
          preq->rq_user,
          preq->rq_host);

  LOG_EVENT(
    PBSEVENT_DEBUG,
    PBS_EVENTCLASS_REQUEST,
    "req_reject",
    log_buffer);

  preq->rq_reply.brp_auxcode = aux;

  reply_text(preq, code, msgbuf);

  return;
  }  /* END req_reject() */
Ejemplo n.º 3
0
/**
 * @brief
 * 		Function that causes a rerun request to return with a timeout message.
 *
 * @param[in,out]	pwt	-	work task which contains the job structure which holds the rerun request
 */
static void
timeout_rerun_request(struct work_task *pwt)
{
	job     *pjob = (job *)pwt->wt_parm1;
	int      conn_idx = -1;

	if ((pjob == NULL) || (pjob->ji_rerun_preq == NULL)) {
		return;	/* nothing to timeout */
	}
	if (pjob->ji_rerun_preq->rq_conn != PBS_LOCAL_CONNECTION) {
		conn_idx = connection_find_actual_index(pjob->ji_rerun_preq->rq_conn);
	}
	reply_text(pjob->ji_rerun_preq, PBSE_INTERNAL,
		"Response timed out. Job rerun request still in progress for");

	/* clear no-timeout flag on connection */
	if (conn_idx != -1)
		svr_conn[conn_idx].cn_authen &= ~PBS_NET_CONN_NOTIMEOUT;

	pjob->ji_rerun_preq = NULL;

}
Ejemplo n.º 4
0
void reply_badattr(

  int                   code,
  int                   aux,
  svrattrl        *pal,
  struct batch_request *preq)

  {
  int   i = 1;
  char  msgbuf[ERR_MSG_SIZE+1];

  set_err_msg(code, msgbuf);

  while (pal)
    {
    if (i == aux)
      {
      strcat(msgbuf, " ");
      strcat(msgbuf, pal->al_name);

      if (pal->al_resc)
        {
        strcat(msgbuf, ".");
        strcat(msgbuf, pal->al_resc);
        }

      break;
      }

    pal = (svrattrl *)GET_NEXT(pal->al_link);

    ++i;
    }

  reply_text(preq, code, msgbuf);

  return;
  }  /* END reply_badattr() */
Ejemplo n.º 5
0
void reply_badattr(

  int                   code,
  int                   aux,
  svrattrl             *pal,
  struct batch_request *preq)

  {
  int   i = 1;
  char  msgbuf[ERR_MSG_SIZE+1];

  set_err_msg(code, msgbuf, sizeof(msgbuf));

  while (pal)
    {
    if (i == aux)
      {
      int len = strlen(msgbuf);

      if (pal->al_resc)
        snprintf(msgbuf + len, sizeof(msgbuf) - len, " %s.%s", pal->al_name, pal->al_resc);
      else
        snprintf(msgbuf + len, sizeof(msgbuf) - len, " %s", pal->al_name);

      break;
      }

    pal = (svrattrl *)GET_NEXT(pal->al_link);

    ++i;
    }

  reply_text(preq, code, msgbuf);

  return;
  }  /* END reply_badattr() */
Ejemplo n.º 6
0
void
req_relnodesjob(struct batch_request *preq)
{
	int             jt;		/* job type */
	job		*pjob;
	int		rc;
	char		*jid;
	int		i, offset;
	char		*nodeslist = NULL;
	char		msg[LOG_BUF_SIZE];

 
	if (preq == NULL)
		return;

	jid = preq->rq_ind.rq_relnodes.rq_jid;
	if (jid == NULL)
		return;

	/*
	 ** Returns job pointer for singleton job or "parent" of
	 ** an array job.
	 */
	pjob = chk_job_request(jid, preq, &jt);
	if (pjob == NULL) {
		return;
	}

	if (jt == IS_ARRAY_NO) {		/* a regular job is okay */
		/* the job must be running */
		if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) ||
			(pjob->ji_qs.ji_substate !=
			JOB_SUBSTATE_RUNNING)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}
	else if (jt == IS_ARRAY_Single) {	/* a single subjob is okay */

		offset = subjob_index_to_offset(pjob,
			get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}

		i = get_subjob_state(pjob, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		}

		if (i != JOB_STATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	} else {
		reply_text(preq, PBSE_NOSUP,
			"not supported for Array Jobs or multiple sub-jobs");
		return;
	}

	nodeslist = preq->rq_ind.rq_relnodes.rq_node_list;

	if ((nodeslist != NULL) && (nodeslist[0] == '\0')) {
		nodeslist = NULL;
	}
	rc = free_sister_vnodes(pjob, nodeslist, msg, LOG_BUF_SIZE, preq);

	if (rc != 0) {
		reply_text(preq, PBSE_SYSTEM, msg);
	}
}
Ejemplo n.º 7
0
void
req_py_spawn(struct batch_request *preq)
{
	int             jt;		/* job type */
	job		*pjob;
	int		rc;
	char		*jid = preq->rq_ind.rq_py_spawn.rq_jid;
	int		i, offset;

	/*
	 ** Returns job pointer for singleton job or "parent" of
	 ** an array job.
	 */
	pjob = chk_job_request(jid, preq, &jt);
	if (pjob == NULL)
		return;

	/* see if requestor is the job owner */
	if (svr_chk_owner(preq, pjob) != 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	if (jt == IS_ARRAY_NO) {		/* a regular job is okay */
		/* the job must be running */
		if ((pjob->ji_qs.ji_state != JOB_STATE_RUNNING) ||
			(pjob->ji_qs.ji_substate !=
			JOB_SUBSTATE_RUNNING)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}
	else if (jt == IS_ARRAY_Single) {	/* a single subjob is okay */

		offset = subjob_index_to_offset(pjob,
			get_index_from_jid(jid));
		if (offset == -1) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}

		i = get_subjob_state(pjob, offset);
		if (i == -1) {
			req_reject(PBSE_IVALREQ, 0, preq);
			return;
		}

		if (i != JOB_STATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
		if ((pjob = pjob->ji_ajtrk->tkm_tbl[offset].trk_psubjob) == NULL) {
			req_reject(PBSE_UNKJOBID, 0, preq);
			return;
		}
		if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_RUNNING) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	} else {
		reply_text(preq, PBSE_NOSUP,
			"not supported for Array Jobs or multiple sub-jobs");
		return;
	}

	/*
	 ** Pass the request on to MOM.  If this works, the function
	 ** post_py_spawn_req will be called to handle the reply.
	 ** If it fails, send the reply now.
	 */
	rc = relay_to_mom(pjob, preq, post_py_spawn_req);
	if (rc)
		req_reject(rc, 0, preq);	/* unable to get to MOM */
}
Ejemplo n.º 8
0
void
dispatch_request(int sfds, struct batch_request *request)
{

	conn_t *conn = NULL;
	int rpp = request->isrpp;

	if (!rpp) {
		if (sfds != PBS_LOCAL_CONNECTION) {
			conn = get_conn(sfds);
			if (!conn) {
				log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST,
				LOG_ERR,
							"dispatch_request", "did not find socket in connection table");
				req_reject(PBSE_SYSTEM, 0, request);
				close_client(sfds);
				return;
			}
		}
	}

	switch (request->rq_type) {

		case PBS_BATCH_QueueJob:
			if (rpp) {
				request->rpp_ack = 0;
				rpp_add_close_func(sfds, close_quejob);
			} else
				net_add_close_func(sfds, close_quejob);
			req_quejob(request);
			break;

		case PBS_BATCH_JobCred:
#ifndef  PBS_MOM

			/* Reject if a user client (qsub -Wpwd) and not a */
			/* server (qmove) enqueued a job with JobCredential */
			if ( !request->rq_fromsvr && \
			     (server.sv_attr[SRV_ATR_ssignon_enable].at_flags \
                                                         & ATR_VFLAG_SET) &&  \
                             (server.sv_attr[SRV_ATR_ssignon_enable].at_val.at_long == 1) ) {
				req_reject(PBSE_SSIGNON_SET_REJECT, 0, request);
				close_client(sfds);
				break;
			}
#endif
			if (rpp)
				request->rpp_ack = 0;
			req_jobcredential(request);
			break;

		case PBS_BATCH_UserCred:
#ifdef PBS_MOM
#ifdef	WIN32
			req_reject(PBSE_NOSUP, 0, request);
#else
			req_reject(PBSE_UNKREQ, 0, request);
#endif
			close_client(sfds);
#else
			req_usercredential(request);
#endif
			break;

		case PBS_BATCH_UserMigrate:
#ifdef	PBS_MOM
#ifdef	WIN32
			req_reject(PBSE_NOSUP, 0, request);
#else
			req_reject(PBSE_UNKREQ, 0, request);
#endif	/* WIN32 */
			close_client(sfds);
#else
			req_user_migrate(request);
#endif	/* PBS_MOM */
			break;

		case PBS_BATCH_GSS_Context:
			req_gsscontext(request);
			break;

		case PBS_BATCH_jobscript:
			if (rpp)
				request->rpp_ack = 0;
			req_jobscript(request);
			break;

			/*
			 * The PBS_BATCH_Rdytocommit message is deprecated.
			 * The server does not do anything with it anymore, but
			 * simply acks the request (in case some client makes this call)
			 */
		case PBS_BATCH_RdytoCommit:
			if (request->isrpp)
				request->rpp_ack = 0;
			reply_ack(request);
			break;

		case PBS_BATCH_Commit:
			if (rpp)
				request->rpp_ack = 0;
			req_commit(request);
			if (rpp)
				rpp_add_close_func(sfds, (void (*)(int))0);
			else
				net_add_close_func(sfds, (void (*)(int))0);
			break;

		case PBS_BATCH_DeleteJob:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				request->rq_ind.rq_delete.rq_objname,
				"delete job request received");
			req_deletejob(request);
			break;

#ifndef PBS_MOM
		case PBS_BATCH_SubmitResv:
			req_resvSub(request);
			break;

		case PBS_BATCH_DeleteResv:
			req_deleteReservation(request);
			break;

		case PBS_BATCH_ModifyResv:
			req_modifyReservation(request);
			break;

		case PBS_BATCH_ResvOccurEnd:
			req_reservationOccurrenceEnd(request);
			break;
#endif

		case PBS_BATCH_HoldJob:
			if (sfds != PBS_LOCAL_CONNECTION && !rpp)
				conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT;
			req_holdjob(request);
			break;
#ifndef PBS_MOM
		case PBS_BATCH_LocateJob:
			req_locatejob(request);
			break;

		case PBS_BATCH_Manager:
			req_manager(request);
			break;

		case PBS_BATCH_RelnodesJob:
			req_relnodesjob(request);
			break;

#endif
		case PBS_BATCH_MessJob:
			req_messagejob(request);
			break;

		case PBS_BATCH_PySpawn:
			if (sfds != PBS_LOCAL_CONNECTION && !rpp)
				conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT;
			req_py_spawn(request);
			break;

		case PBS_BATCH_ModifyJob:
			req_modifyjob(request);
			break;

		case PBS_BATCH_Rerun:
			req_rerunjob(request);
			break;
#ifndef PBS_MOM
		case PBS_BATCH_MoveJob:
			req_movejob(request);
			break;

		case PBS_BATCH_OrderJob:
			req_orderjob(request);
			break;

		case PBS_BATCH_Rescq:
			req_reject(PBSE_NOSUP, 0, request);
			break;

		case PBS_BATCH_ReserveResc:
			req_reject(PBSE_NOSUP, 0, request);
			break;

		case PBS_BATCH_ReleaseResc:
			req_reject(PBSE_NOSUP, 0, request);
			break;

		case PBS_BATCH_ReleaseJob:
			if (sfds != PBS_LOCAL_CONNECTION && !rpp)
				conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT;
			req_releasejob(request);
			break;

		case PBS_BATCH_RunJob:
		case PBS_BATCH_AsyrunJob:
			req_runjob(request);
			break;

		case PBS_BATCH_DefSchReply:
			req_defschedreply(request);
			break;

		case PBS_BATCH_ConfirmResv:
			req_confirmresv(request);
			break;

		case PBS_BATCH_SelectJobs:
		case PBS_BATCH_SelStat:
			req_selectjobs(request);
			break;

#endif /* !PBS_MOM */

		case PBS_BATCH_Shutdown:
			req_shutdown(request);
			break;

		case PBS_BATCH_SignalJob:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				request->rq_ind.rq_signal.rq_jid,
				"signal job request received");
			req_signaljob(request);
			break;

		case PBS_BATCH_MvJobFile:
			req_mvjobfile(request);
			break;

#ifndef PBS_MOM		/* Server Only Functions */

		case PBS_BATCH_StatusJob:
			if (set_to_non_blocking(conn) == -1) {
				req_reject(PBSE_SYSTEM, 0, request);
				close_client(sfds);
				return;
			}
			req_stat_job(request);
			clear_non_blocking(conn);
			break;

		case PBS_BATCH_StatusQue:
			if (set_to_non_blocking(conn) == -1) {
				req_reject(PBSE_SYSTEM, 0, request);
				close_client(sfds);
				return;
			}
			req_stat_que(request);
			clear_non_blocking(conn);
			break;

		case PBS_BATCH_StatusNode:
			if (set_to_non_blocking(conn) == -1) {
				req_reject(PBSE_SYSTEM, 0, request);
				close_client(sfds);
				return;
			}
			req_stat_node(request);
			clear_non_blocking(conn);
			break;

		case PBS_BATCH_StatusResv:
			if (set_to_non_blocking(conn) == -1) {
				req_reject(PBSE_SYSTEM, 0, request);
				close_client(sfds);
				return;
			}
			req_stat_resv(request);
			clear_non_blocking(conn);
			break;

		case PBS_BATCH_StatusSvr:
			req_stat_svr(request);
			break;

		case PBS_BATCH_StatusSched:
			req_stat_sched(request);
			break;

		case PBS_BATCH_StatusHook:
			if (!is_local_root(request->rq_user,
				request->rq_host)) {
				sprintf(log_buffer, "%s@%s is unauthorized to "
					"access hooks data from server %s",
					request->rq_user, request->rq_host, server_host);
				reply_text(request, PBSE_HOOKERROR, log_buffer);
				log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_HOOK,
					LOG_INFO, "", log_buffer);
				/* don't call close_client() to allow other */
				/* non-hook related requests to continue */
				break;
			}

			if (set_to_non_blocking(conn) == -1) {
				req_reject(PBSE_SYSTEM, 0, request);
				close_client(sfds);
				return;
			}
			req_stat_hook(request);
			clear_non_blocking(conn);
			break;

		case PBS_BATCH_TrackJob:
			req_track(request);
			break;

		case PBS_BATCH_RegistDep:
			req_register(request);
			break;

		case PBS_BATCH_AuthenResvPort:
			if (pbs_conf.auth_method == AUTH_MUNGE) {
                                req_reject(PBSE_BADCRED, 0, request);
                                close_client(sfds);
                                return;
                        }
			req_authenResvPort(request);
			break;

		case PBS_BATCH_StageIn:
			req_stagein(request);
			break;

		case PBS_BATCH_FailOver:
			req_failover(request);
			break;

		case PBS_BATCH_StatusRsc:
			req_stat_resc(request);
			break;

		case PBS_BATCH_MomRestart:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE,
				LOG_INFO,
				request->rq_ind.rq_momrestart.rq_momhost,
				"Mom restarted on host");
			req_momrestart(request);
			break;
#else	/* MOM only functions */

		case PBS_BATCH_CopyFiles:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				request->rq_ind.rq_cpyfile.rq_jobid,
				"copy file request received");
			/* don't time-out as copy may take long time */
			if (sfds != PBS_LOCAL_CONNECTION && !rpp)
				conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT;
			req_cpyfile(request);
			break;
		case PBS_BATCH_CopyFiles_Cred:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				request->rq_ind.rq_cpyfile_cred.rq_copyfile.rq_jobid,
				"copy file cred request received");
			/* don't time-out as copy may take long time */
			if (sfds != PBS_LOCAL_CONNECTION && !rpp)
				conn->cn_authen |= PBS_NET_CONN_NOTIMEOUT;
			req_cpyfile(request);
			break;

		case PBS_BATCH_DelFiles:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				request->rq_ind.rq_cpyfile.rq_jobid,
				"delete file request received");
			req_delfile(request);
			break;
		case PBS_BATCH_DelFiles_Cred:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO,
				request->rq_ind.rq_cpyfile_cred.rq_copyfile.rq_jobid,
				"delete file cred request received");
			req_delfile(request);
			break;
		case PBS_BATCH_CopyHookFile:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_HOOK,
				LOG_INFO,
				request->rq_ind.rq_hookfile.rq_filename,
				"copy hook-related file request received");
			req_copy_hookfile(request);
			break;
		case PBS_BATCH_DelHookFile:
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_HOOK,
				LOG_INFO,
				request->rq_ind.rq_hookfile.rq_filename,
				"delete hook-related file request received");
			req_del_hookfile(request);
			break;

#endif
		default:
			req_reject(PBSE_UNKREQ, 0, request);
			close_client(sfds);
			break;
	}
	return;
}
Ejemplo n.º 9
0
/**
 * @brief Service the Modify Reservation Request from client such as pbs_ralter.
 *
 *	This request atomically modifies one or more of a reservation's attributes.
 *	An error is returned to the client if the user does not have permission
 *	to perform the modification, the attribute is read-only, the reservation is
 *	running and the attribute is only modifiable when the reservation is not
 *	running or is empty.
 *
 * @param[in] preq - pointer to batch request from client
 */
void
req_modifyReservation(struct batch_request *preq)
{
	char		*rid = NULL;
	svrattrl	*psatl = NULL;
	attribute_def	*pdef = NULL;
	int		rc = 0;
	int		bad = 0;
	char		buf[PBS_MAXUSER + PBS_MAXHOSTNAME + 32] = {0};
	int		sock;
	int		resc_access_perm_save = 0;
	int		send_to_scheduler = 0;
	int		log_len = 0;
	char		*fmt = "%a %b %d %H:%M:%S %Y";
	int		is_standing = 0;
	int		next_occr_start = 0;
	extern char	*msg_stdg_resv_occr_conflict;
	resc_resv	*presv;

	if (preq == NULL)
		return;

	sock = preq->rq_conn;

	presv = chk_rescResv_request(preq->rq_ind.rq_modify.rq_objname, preq);
	/* Note: on failure, chk_rescResv_request invokes req_reject
	 * appropriate reply is sent and batch_request is freed.
	 */
	if (presv == NULL)
		return;

	rid = preq->rq_ind.rq_modify.rq_objname;
	if ((presv = find_resv(rid)) == NULL) {
		/* Not on "all_resvs" list try "new_resvs" list */
		presv = (resc_resv *)GET_NEXT(svr_newresvs);
		while (presv) {
			if (!strcmp(presv->ri_qs.ri_resvID, rid))
				break;
			presv = (resc_resv *)GET_NEXT(presv->ri_allresvs);
		}
	}

	if (presv == NULL) {
		req_reject(PBSE_UNKRESVID, 0, preq);
		return;
	}

	is_standing = presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long;
	if (is_standing)
		next_occr_start = get_occurrence(presv->ri_wattr[RESV_ATR_resv_rrule].at_val.at_str,
					presv->ri_wattr[RESV_ATR_start].at_val.at_long,
					presv->ri_wattr[RESV_ATR_resv_timezone].at_val.at_str, 2);

	resc_access_perm_save = resc_access_perm;
	psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr);
	presv->ri_alter_flags = 0;
	while (psatl) {
		long temp = 0;
		char *end = NULL;
		int index;

		/* identify the attribute by name */
		index = find_attr(resv_attr_def, psatl->al_name, RESV_ATR_LAST);
		if (index < 0) {
			/* didn`t recognize the name */
			reply_badattr(PBSE_NOATTR, 1, psatl, preq);
			return;
		}
		pdef = &resv_attr_def[index];

		/* Does attribute's definition flags indicate that
		 * we have sufficient permission to write the attribute?
		 */

		resc_access_perm = resc_access_perm_save; /* reset */
		if (psatl->al_flags & ATR_VFLAG_HOOK) {
			resc_access_perm = ATR_DFLAG_USWR | \
					    ATR_DFLAG_OPWR | \
					    ATR_DFLAG_MGWR | \
				            ATR_DFLAG_SvWR | \
					    ATR_DFLAG_Creat;
		}
		if ((pdef->at_flags & resc_access_perm) == 0) {
			reply_badattr(PBSE_ATTRRO, 1, psatl, preq);
			return;
		}

		switch (index) {
			case RESV_ATR_start:
				if ((presv->ri_wattr[RESV_ATR_state].at_val.at_long != RESV_RUNNING) ||
					!(presv->ri_qp->qu_numjobs)) {
					temp = strtol(psatl->al_value, &end, 10);
					if ((temp > time(NULL)) &&
						(temp != presv->ri_wattr[RESV_ATR_start].at_val.at_long)) {
						if (!is_standing || (temp < next_occr_start)) {
							send_to_scheduler = RESV_START_TIME_MODIFIED;
							presv->ri_alter_stime = presv->ri_wattr[RESV_ATR_start].at_val.at_long;
							presv->ri_alter_flags |= RESV_START_TIME_MODIFIED;
						} else {
							snprintf(log_buffer, sizeof(log_buffer), "%s", msg_stdg_resv_occr_conflict);
							log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO,
								preq->rq_ind.rq_modify.rq_objname, log_buffer);
							req_reject(PBSE_STDG_RESV_OCCR_CONFLICT, 0, preq);
							return;
						}
					} else {
						req_reject(PBSE_BADTSPEC, 0, preq);
						return;
					}
				} else {
					if (presv->ri_qp->qu_numjobs)
						req_reject(PBSE_RESV_NOT_EMPTY, 0, preq);
					else
						req_reject(PBSE_BADTSPEC, 0, preq);
					return;
				}
				break;
			case RESV_ATR_end:
				temp = strtol(psatl->al_value, &end, 10);
				if (temp == presv->ri_wattr[RESV_ATR_end].at_val.at_long) {
					req_reject(PBSE_BADTSPEC, 0, preq);
					return;
				}
				if (!is_standing || temp < next_occr_start) {
					send_to_scheduler = RESV_END_TIME_MODIFIED;
					presv->ri_alter_etime = presv->ri_wattr[RESV_ATR_end].at_val.at_long;
					presv->ri_alter_flags |= RESV_END_TIME_MODIFIED;
				} else {
					snprintf(log_buffer, sizeof(log_buffer), "%s", msg_stdg_resv_occr_conflict);
					log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO,
						preq->rq_ind.rq_modify.rq_objname, log_buffer);
					req_reject(PBSE_STDG_RESV_OCCR_CONFLICT, 0, preq);
					return;
				}
				break;
			default:
				break;
		}

		/* decode attribute */
		rc = pdef->at_decode(&presv->ri_wattr[index],
			psatl->al_name, psatl->al_resc, psatl->al_value);

		if (rc != 0) {
			reply_badattr(rc, 1, psatl, preq);
			return;
		}

		psatl = (svrattrl *)GET_NEXT(psatl->al_link);
	}
	resc_access_perm = resc_access_perm_save; /* restore perm */

	if (send_to_scheduler) {
		presv->ri_alter_state = presv->ri_wattr[RESV_ATR_state].at_val.at_long;
		resv_setResvState(presv, RESV_BEING_ALTERED, presv->ri_qs.ri_substate);
		/*"start", "end","duration", and "wall"; derive and check */
		if (start_end_dur_wall(presv, RESC_RESV_OBJECT)) {
			req_reject(PBSE_BADTSPEC, 0, preq);
			resv_revert_alter_times(presv);
			return;
		}
		presv->ri_wattr[RESV_ATR_resource].at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;
	}
	bad = 0;
	psatl = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr);
	if (psatl)
		rc = modify_resv_attr(presv, psatl, preq->rq_perm, &bad);

	if (send_to_scheduler)
		set_scheduler_flag(SCH_SCHEDULE_RESV_RECONFIRM, dflt_scheduler);

	(void)sprintf(log_buffer, "Attempting to modify reservation");
	if (presv->ri_alter_flags & RESV_START_TIME_MODIFIED) {
		strftime(buf, sizeof(buf), fmt, localtime((time_t *) &presv->ri_wattr[RESV_ATR_start].at_val.at_long));
		log_len = strlen(log_buffer);
		snprintf(log_buffer + log_len, sizeof(log_buffer) - log_len," start=%s", buf);
	}
	if (presv->ri_alter_flags & RESV_END_TIME_MODIFIED) {
		strftime(buf, sizeof(buf), fmt, localtime((time_t *) &presv->ri_wattr[RESV_ATR_end].at_val.at_long));
		log_len = strlen(log_buffer);
		snprintf(log_buffer + log_len, sizeof(log_buffer) - log_len," end=%s", buf);
	}
	log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO, preq->rq_ind.rq_modify.rq_objname, log_buffer);

	if ((presv->ri_wattr[RESV_ATR_interactive].at_flags &
		ATR_VFLAG_SET) == 0) {
		char buf1[PBS_MAXUSER + PBS_MAXHOSTNAME + 32] = {0};
		/*Not "interactive" so don't wait on scheduler, reply now*/

		sprintf(buf, "%s ALTER REQUESTED",  presv->ri_qs.ri_resvID);
		sprintf(buf1, "requestor=%s@%s", preq->rq_user, preq->rq_host);

		if ((rc = reply_text(preq, PBSE_NONE, buf))) {
			/* reply failed,  close connection; DON'T purge resv */
			close_client(sock);
			return;
		}
	} else {
		/*Don't reply back until scheduler decides*/
		long dt;
		presv->ri_brp = preq;
		dt = presv->ri_wattr[RESV_ATR_interactive].at_val.at_long;
		/*reply with id and state no decision in +dt secs*/
		(void)gen_future_reply(presv, dt);
		(void)snprintf(buf, sizeof(buf), "requestor=%s@%s Interactive=%ld",
			preq->rq_user, preq->rq_host, dt);
	}
}
Ejemplo n.º 10
0
void
req_modifyjob(struct batch_request *preq)
{
	int		 add_to_am_list = 0; /* if altered during sched cycle */
	int		 bad = 0;
	int		 jt;		/* job type */
	int		 newstate;
	int		 newsubstate;
	resource_def	*outsideselect = NULL;
	job		*pjob;
	svrattrl	*plist;
	resource	*presc;
	resource_def	*prsd;
	int		 rc;
	int		 running = 0;
	int		 sendmom = 0;
	char		hook_msg[HOOK_MSG_SIZE];
	int		mod_project = 0;
	pbs_sched	*psched;

	switch (process_hooks(preq, hook_msg, sizeof(hook_msg),
			pbs_python_set_interrupt)) {
		case 0:	/* explicit reject */
			reply_text(preq, PBSE_HOOKERROR, hook_msg);
			return;
		case 1:   /* explicit accept */
			if (recreate_request(preq) == -1) { /* error */
				/* we have to reject the request, as 'preq' */
				/* may have been partly modified            */
				strcpy(hook_msg,
					"modifyjob event: rejected request");
				log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_HOOK,
					LOG_ERR, "", hook_msg);
				reply_text(preq, PBSE_HOOKERROR, hook_msg);
				return;
			}
			break;
		case 2:	/* no hook script executed - go ahead and accept event*/
			break;
		default:
			log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_HOOK,
				LOG_INFO, "", "modifyjob event: accept req by default");
	}

	if (pseldef == NULL)  /* do one time to keep handy */
		pseldef = find_resc_def(svr_resc_def, "select", svr_resc_size);

	pjob = chk_job_request(preq->rq_ind.rq_modify.rq_objname, preq, &jt);
	if (pjob == NULL)
		return;

	if ((jt == IS_ARRAY_Single) || (jt == IS_ARRAY_Range)) {
		req_reject(PBSE_IVALREQ, 0, preq);
		return;
	}

	psched = find_sched_from_sock(preq->rq_conn);
	/* allow scheduler to modify job */
	if (psched == NULL) {
		/* provisioning job is not allowed to be modified */
		if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) &&
			(pjob->ji_qs.ji_substate == JOB_SUBSTATE_PROVISION)) {
			req_reject(PBSE_BADSTATE, 0, preq);
			return;
		}
	}

	/* cannot be in exiting or transit, exiting has already be checked */

	if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) {
		req_reject(PBSE_BADSTATE, 0, preq);
		return;
	}

	plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr);
	if (plist == NULL) {	/* nothing to do */
		reply_ack(preq);
		return;
	}

	/*
	 * Special checks must be made:
	 *	if during a scheduling cycle and certain attributes are altered,
	 *	   make a note of the job to prevent it from being run now;
	 *	if job is running, only certain attributes/resources can be
	 *	   altered.
	 */

	if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) {
		running = 1;
	}
	while (plist) {
		int i;

		i = find_attr(job_attr_def, plist->al_name, JOB_ATR_LAST);

		/*
		 * Is the attribute being altered one which could change
		 * scheduling (ATR_DFLAG_SCGALT set) and if a scheduling
		 * cycle is in progress, then set flag to add the job to list
		 * of jobs which cannot be run in this cycle.
		 * If the scheduler itself sends a modify job request,
		 * no need to delay the job until next cycle.
		 */
		if ((psched == NULL) && (scheduler_jobs_stat) && (job_attr_def[i].at_flags & ATR_DFLAG_SCGALT))
			add_to_am_list = 1;

		/* Is the attribute modifiable in RUN state ? */

		if (i < 0) {
			reply_badattr(PBSE_NOATTR, 1, plist, preq);
			return;
		}
		if ((running == 1) &&
			((job_attr_def[i].at_flags & ATR_DFLAG_ALTRUN) == 0)) {

			reply_badattr(PBSE_MODATRRUN, 1, plist, preq);
			return;
		}
		if (i == (int)JOB_ATR_resource) {

			prsd = find_resc_def(svr_resc_def, plist->al_resc,
				svr_resc_size);

			if (prsd == 0) {
				reply_badattr(PBSE_UNKRESC, 1, plist, preq);
				return;
			}

			/* is the specified resource modifiable while */
			/* the job is running                         */

			if (running) {

				if ((prsd->rs_flags & ATR_DFLAG_ALTRUN) == 0) {
					reply_badattr(PBSE_MODATRRUN, 1, plist, preq);
					return;
				}

				sendmom = 1;
			}

			/* should the resource be only in a select spec */

			if (prsd->rs_flags & ATR_DFLAG_CVTSLT && !outsideselect &&
				plist->al_atopl.value && plist->al_atopl.value[0]) {
				/* if "-lresource" is set and has non-NULL value,
				** remember as potential bad resource
				** if this appears along "select".
				*/
				outsideselect = prsd;
			}
		}
		if (strcmp(plist->al_name, ATTR_project) == 0) {
			mod_project = 1;
		} else if ((strcmp(plist->al_name, ATTR_runcount) == 0) &&
			((plist->al_flags & ATR_VFLAG_HOOK) == 0) &&
			(plist->al_value != NULL) &&
			(plist->al_value[0] != '\0') &&
			((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) &&
		(atol(plist->al_value) < \
		    pjob->ji_wattr[(int)JOB_ATR_runcount].at_val.at_long)) {
			sprintf(log_buffer,
				"regular user %s@%s cannot decrease '%s' attribute value from %ld to %ld",
				preq->rq_user, preq->rq_host, ATTR_runcount,
				pjob->ji_wattr[(int)JOB_ATR_runcount].at_val.at_long,
				atol(plist->al_value));
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_ERR,
				pjob->ji_qs.ji_jobid, log_buffer);
			req_reject(PBSE_PERM, 0, preq);
			return;
		}
		plist = (svrattrl *)GET_NEXT(plist->al_link);
	}

	if (outsideselect) {
		presc = find_resc_entry(&pjob->ji_wattr[(int)JOB_ATR_resource],
			pseldef);
		if (presc &&
			((presc->rs_value.at_flags & ATR_VFLAG_DEFLT) == 0)) {
			/* select is not a default, so reject qalter */

			resc_in_err = strdup(outsideselect->rs_name);
			req_reject(PBSE_INVALJOBRESC, 0, preq);
			return;
		}

	}

	/* modify the jobs attributes */

	bad = 0;
	plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr);
	rc = modify_job_attr(pjob, plist, preq->rq_perm, &bad);
	if (rc) {
		if (pjob->ji_clterrmsg)
			reply_text(preq, rc, pjob->ji_clterrmsg);
		else
			reply_badattr(rc, bad, plist, preq);
		return;
	}

	/* If certain attributes modified and if in scheduling cycle  */
	/* then add to list of jobs which cannot be run in this cycle */

	if (add_to_am_list)
		am_jobs_add(pjob);	/* see req_runjob() */

	/* check if project attribute was requested to be modified to */
	/* be the default project value */
	if (mod_project && (pjob->ji_wattr[(int)JOB_ATR_project].at_flags & \
							ATR_VFLAG_SET)) {

		if (strcmp(pjob->ji_wattr[(int)JOB_ATR_project].at_val.at_str,
			PBS_DEFAULT_PROJECT) == 0) {
			sprintf(log_buffer, msg_defproject,
				ATTR_project, PBS_DEFAULT_PROJECT);
#ifdef NAS /* localmod 107 */
			log_event(PBSEVENT_DEBUG4, PBS_EVENTCLASS_JOB, LOG_INFO,
				pjob->ji_qs.ji_jobid, log_buffer);
#else
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
				pjob->ji_qs.ji_jobid, log_buffer);
#endif /* localmod 107 */
		}
	}

	if (pjob->ji_wattr[(int)JOB_ATR_resource].at_flags & ATR_VFLAG_MODIFY) {
		presc = find_resc_entry(&pjob->ji_wattr[(int)JOB_ATR_resource],
			pseldef);
		if (presc && (presc->rs_value.at_flags & ATR_VFLAG_DEFLT)) {
			/* changing Resource_List and select is a default   */
			/* clear "select" so it is rebuilt inset_resc_deflt */
			pseldef->rs_free(&presc->rs_value);
		}
	}

	/* Reset any defaults resource limit which might have been unset */
	if ((rc = set_resc_deflt((void *)pjob, JOB_OBJECT, NULL)) != 0) {
		req_reject(rc, 0, preq);
		return;
	}

	/* if job is not running, may need to change its state */

	if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) {
		svr_evaljobstate(pjob, &newstate, &newsubstate, 0);
		(void)svr_setjobstate(pjob, newstate, newsubstate);
	} else {
		(void)job_save(pjob, SAVEJOB_FULL);
	}
	(void)sprintf(log_buffer, msg_manager, msg_jobmod,
		preq->rq_user, preq->rq_host);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
		pjob->ji_qs.ji_jobid, log_buffer);

	/* if a resource limit changed for a running job, send to MOM */

	if (sendmom) {
		rc = relay_to_mom(pjob, preq, post_modify_req);
		if (rc)
			req_reject(rc, 0, preq);    /* unable to get to MOM */
		return;
	}

	reply_ack(preq);
}
Ejemplo n.º 11
0
void req_commit(

  struct batch_request *preq)  /* I */

  {
  job   *pj;

  pj = locate_new_job(preq->rq_conn, preq->rq_ind.rq_commit);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "committing job");
    }

  if (pj == NULL)
    {
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSICM)
    {
    log_err(errno, "req_commit", "cannot commit job in unexpected state");

    req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL);

    return;
    }

  /* move job from new job list to "all" job list, set to running state */

  delete_link(&pj->ji_alljobs);

  append_link(&svr_alljobs, &pj->ji_alljobs, pj);

  /*
  ** Set JOB_SVFLG_HERE to indicate that this is Mother Superior.
  */

  pj->ji_qs.ji_svrflags |= JOB_SVFLG_HERE;

  pj->ji_qs.ji_state = JOB_STATE_RUNNING;

  pj->ji_qs.ji_substate = JOB_SUBSTATE_PRERUN;

  pj->ji_qs.ji_un_type = JOB_UNION_TYPE_MOM;

  pj->ji_qs.ji_un.ji_momt.ji_svraddr = get_connectaddr(preq->rq_conn);

  pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0;

  /* For MOM - start up the job (blocks) */

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "starting job execution");
    }

  start_exec(pj);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "job execution started");
    }

  /* if start request fails, reply with failure string */

  if (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING)
    {
    char tmpLine[1024];

    if ((pj->ji_hosts != NULL) &&
        (pj->ji_nodekill >= 0) &&
        (pj->ji_hosts[pj->ji_nodekill].hn_host != NULL))
      {
      sprintf(tmpLine, "start failed on node %s",
              pj->ji_hosts[pj->ji_nodekill].hn_host);
      }
    else
      {
      sprintf(tmpLine, "start failed on unknown node");
      }

    if (LOGLEVEL >= 6)
      {
      log_record(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
        tmpLine);
      }

    reply_text(preq, 0, tmpLine);
    }
  else
    {
    reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_Commit);
    }

  job_save(pj, SAVEJOB_FULL);

  /* NOTE: we used to flag JOB_ATR_errpath, JOB_ATR_outpath,
   * JOB_ATR_session_id, and JOB_ATR_altid as modified at this point to make sure
   * pbs_server got these attr values.  This worked fine before TORQUE modified
   * job launched into an async process.  At 2.0.0p6, a new attribute "SEND" flag
   * was added to handle this process. */

  return;
  }  /* END req_commit() */
Ejemplo n.º 12
0
void
req_movejob(struct batch_request *req)
{
	int      jt;            /* job type */
	job	*jobp;
	char	hook_msg[HOOK_MSG_SIZE];

	switch (process_hooks(req, hook_msg, sizeof(hook_msg),
			pbs_python_set_interrupt)) {
		case 0:	/* explicit reject */
			reply_text(req, PBSE_HOOKERROR, hook_msg);
			return;
		case 1:   /* explicit accept */
			if (recreate_request(req) == -1) { /* error */
				/* we have to reject the request, as 'req' */
				/* may have been partly modified           */
				strcpy(hook_msg,
					"movejob event: rejected request");
				log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_HOOK,
					LOG_ERR, "", hook_msg);
				reply_text(req, PBSE_HOOKERROR, hook_msg);
				return;
			}
			break;
		case 2:	/* no hook script executed - go ahead and accept event*/
			break;
		default:
			log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_HOOK,
				LOG_INFO, "", "movejob event: accept req by default");
	}

	jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req, &jt);

	if (jobp == NULL)
		return;

	if ((jt != IS_ARRAY_NO) && (jt != IS_ARRAY_ArrayJob)) {
		req_reject(PBSE_IVALREQ, 0, req);
		return;
	}

	if (jobp->ji_qs.ji_state != JOB_STATE_QUEUED &&
		jobp->ji_qs.ji_state != JOB_STATE_HELD &&
		jobp->ji_qs.ji_state != JOB_STATE_WAITING) {
#ifndef NDEBUG
		(void)sprintf(log_buffer, "(%s) %s, state=%d",
			__func__, msg_badstate, jobp->ji_qs.ji_state);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG,
			jobp->ji_qs.ji_jobid, log_buffer);
#endif /* NDEBUG */
		req_reject(PBSE_BADSTATE, 0, req);
		return;
	}

	if (jt != IS_ARRAY_NO) {
		/* cannot move Subjob and can only move array job if */
		/* no subjobs are running			     */
		if ((jt != IS_ARRAY_ArrayJob) ||
			(jobp->ji_ajtrk->tkm_subjsct[JOB_STATE_RUNNING] != 0)) {
			req_reject(PBSE_IVALREQ, 0, req);
			return;
		}
	}

	/*
	 * svr_movejob() does the real work, handles both local and
	 * network moves
	 */

	switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, req)) {
		case 0:			/* success */
			(void)strcpy(log_buffer, msg_movejob);
			(void)sprintf(log_buffer+strlen(log_buffer),
				msg_manager, req->rq_ind.rq_move.rq_destin,
				req->rq_user, req->rq_host);
			log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO,
				jobp->ji_qs.ji_jobid, log_buffer);
			reply_ack(req);
			break;
		case -1:
		case 1:			/* fail */
			if (jobp->ji_clterrmsg)
				reply_text(req, pbs_errno, jobp->ji_clterrmsg);
			else
				req_reject(pbs_errno, 0, req);
			break;
		case 2:			/* deferred, will be handled by 	   */
			/* post_movejob() when the child completes */
			break;
	}
	return;
}
Ejemplo n.º 13
0
void req_commit(

  struct batch_request *preq)  /* I */

  {
  unsigned int  momport = 0;
  int           rc;
  job          *pj = locate_new_job(preq->rq_conn, preq->rq_ind.rq_commit);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL",
      "committing job");
    }

  if (pj == NULL)
    {
    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return;
    }

  if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSICM)
    {
    log_err(errno, "req_commit", (char *)"cannot commit job in unexpected state");

    req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL);

    return;
    }

  /* move job from new job list to "all" job list, set to running state */

  delete_link(&pj->ji_alljobs);

  alljobs_list.push_back(pj);

  /*
  ** Set JOB_SVFLG_HERE to indicate that this is Mother Superior.
  */

  pj->ji_qs.ji_svrflags |= JOB_SVFLG_HERE;

  pj->ji_qs.ji_state = JOB_STATE_RUNNING;

  pj->ji_qs.ji_substate = JOB_SUBSTATE_PRERUN;

  pj->ji_qs.ji_un_type = JOB_UNION_TYPE_MOM;

  pj->ji_qs.ji_un.ji_momt.ji_svraddr = get_connectaddr(preq->rq_conn,FALSE);

  pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0;

  /* For MOM - start up the job (blocks) */

  if (LOGLEVEL >= 6)
    log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pj->ji_qs.ji_jobid, "req_commit:starting job execution");

  rc = start_exec(pj);

  if (LOGLEVEL >= 6)
    {
    log_record(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pj->ji_qs.ji_jobid,
      "req_commit:job execution started");
    }

  /* if start request fails, reply with failure string */

  if (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING)
    {
    char tmpLine[1024];

    if ((pj->ji_hosts != NULL) &&
        (pj->ji_nodekill >= 0) &&
        (pj->ji_hosts[pj->ji_nodekill].hn_host != NULL))
      {
      sprintf(tmpLine, "start failed on node %s",
              pj->ji_hosts[pj->ji_nodekill].hn_host);
      }
    else
      {
      sprintf(tmpLine, "start failed on unknown node");
      }

    if (LOGLEVEL >= 6)
      {
      log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pj->ji_qs.ji_jobid, tmpLine);
      }

    reply_text(preq, rc, tmpLine);
    }
  else
    {
    reply_sid(preq, pj->ji_wattr[JOB_ATR_session_id].at_val.at_long,BATCH_REPLY_CHOICE_Text);
    }

  if (multi_mom)
    {
    momport = pbs_rm_port;
    }

  job_save(pj, SAVEJOB_FULL, momport);

#ifdef NVIDIA_GPUS
  /*
   * Does this job have a gpuid assigned?
   * if so, then update gpu status
   */
  if ((use_nvidia_gpu) && 
      ((pj->ji_wattr[JOB_ATR_exec_gpus].at_flags & ATR_VFLAG_SET) != 0) &&
      (pj->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str != NULL))
    {
    send_update_soon();
    }
#endif  /* NVIDIA_GPUS */


  /* NOTE: we used to flag JOB_ATR_errpath, JOB_ATR_outpath,
   * JOB_ATR_session_id, and JOB_ATR_altid as modified at this point to make sure
   * pbs_server got these attr values.  This worked fine before TORQUE modified
   * job launched into an async process.  At 2.0.0p6, a new pbs_attribute "SEND" flag
   * was added to handle this process. */

  return;
  }  /* END req_commit() */
Ejemplo n.º 14
0
static void
post_hold(struct work_task *pwt)
{
	int			code;
	job			*pjob;
	struct batch_request	*preq;
	int			conn_idx;

	if (pwt->wt_aux2 != 1)
		svr_disconnect(pwt->wt_event);	/* close connection to MOM */
	preq = pwt->wt_parm1;
	code = preq->rq_reply.brp_code;
	preq->rq_conn = preq->rq_orgconn;	/* restore client socket */

	if (pwt->wt_aux2 != 1) { /* not rpp */
		conn_idx = connection_find_actual_index(preq->rq_conn);

		if (conn_idx == -1) {
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}

		svr_conn[conn_idx].cn_authen &= ~PBS_NET_CONN_NOTIMEOUT;
	}

	pjob = find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname);
	if (pjob  == (job *)0) {
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG,
			preq->rq_ind.rq_hold.rq_orig.rq_objname,
			msg_postmomnojob);
		req_reject(PBSE_UNKJOBID, 0, preq);
		return;
	}
	if (code != 0) {
		if (code != PBSE_CKPBSY)
			pjob->ji_qs.ji_substate = JOB_SUBSTATE_RUNNING;	/* reset it */
		if (code != PBSE_NOSUP) {
			/* a "real" error - log message with return error code */
			(void)sprintf(log_buffer, msg_mombadhold, code);
			log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG,
				pjob->ji_qs.ji_jobid, log_buffer);
			/* send message back to server for display to user */
			reply_text(preq, code, log_buffer);
			return;
		}
	} else if (code == 0) {

		/* record that MOM has a checkpoint file */

		if (preq->rq_reply.brp_auxcode)	/* chkpt can be moved */
			pjob->ji_qs.ji_svrflags =
				(pjob->ji_qs.ji_svrflags & ~JOB_SVFLG_CHKPT) |
			JOB_SVFLG_HASRUN | JOB_SVFLG_ChkptMig;

		pjob->ji_modified = 1;	  /* indicate attributes changed     */
		(void)job_save(pjob, SAVEJOB_QUICK);

		/* note in accounting file */

		account_record(PBS_ACCT_CHKPNT, pjob, (char *)0);
	}
	reply_ack(preq);
}
Ejemplo n.º 15
0
void
req_confirmresv(struct batch_request *preq)
{
	char		buf[PBS_MAXQRESVNAME+PBS_MAXHOSTNAME+256] = {0}; /* FQDN resvID+text */
	time_t		newstart = 0;
	attribute	*petime = NULL;
	resc_resv	*presv = NULL;
	int		rc = 0;
	int		state = 0;
	int		sub = 0;
	int		resv_count = 0;
	int		is_degraded = 0;
	long		next_retry_time = 0;
	char		*execvnodes = NULL;
	char		*next_execvnode = NULL;
	char		**short_xc = NULL;
	char		**tofree = NULL;
	char		*str_time = NULL;
	extern char	server_host[];
	int		is_being_altered = 0;
	char		*tmp_buf = NULL;
	size_t		tmp_buf_size = 0;

	if ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) {
		req_reject(PBSE_PERM, 0, preq);
		return;
	}

	presv = find_resv(preq->rq_ind.rq_run.rq_jid);
	if (presv == NULL) {
		req_reject(PBSE_UNKRESVID, 0, preq);
		return;
	}
	is_degraded = presv->ri_qs.ri_substate == RESV_DEGRADED ? 1 : 0;
	is_being_altered = presv->ri_alter_flags;

	if (preq->rq_extend == NULL) {
		req_reject(PBSE_resvFail, 0, preq);
		return;
	}

	/* If the reservation was degraded and it could not be reconfirmed by the
	 * scheduler, then the retry time for that reservation is reset to the half-
	 * time between now and the time to reservation start or, if the retry time
	 * is invalid, set it to some time after the soonest occurrence is to start
	 */
	if (strcmp(preq->rq_extend, PBS_RESV_CONFIRM_FAIL) == 0) {
		if (is_degraded && !is_being_altered) {
			long degraded_time = presv->ri_degraded_time;
			DBPRT(("degraded_time of %s is %s", presv->ri_qs.ri_resvID, ctime(&degraded_time)));
			next_retry_time = time_now + ((degraded_time - time_now)/2);
			/* If reservation is still degraded, and time of degraded resv to start
			 * is over cutoff from now, then set a time to try again.
			 */
			if (next_retry_time <= (degraded_time - reserve_retry_cutoff)) {

				set_resv_retry(presv, next_retry_time);

				str_time = ctime(&(presv->ri_wattr[RESV_ATR_retry].at_val.at_long));
				if (str_time != NULL) {
					str_time[strlen(str_time)-1] = '\0';
					(void)snprintf(log_buffer, sizeof(log_buffer), "Next attempt to reconfirm reservation will be made on %s", str_time);
					log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_RESV, LOG_NOTICE, presv->ri_qs.ri_resvID, log_buffer);
				}
			}
			else {
				/* reached a retry attempt that falls within the cutoff
				 * When processing an advance reservation, unset retry attribute
				 */
				if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long == 0) {
					unset_resv_retry(presv);
				}
				else {
					/* When processing a standing reservation, set a retry time
					 * past the end time of the soonest occurrence.
					 */
					set_resv_retry(presv, presv->ri_wattr[RESV_ATR_end].at_val.at_long + RESV_RETRY_DELAY);
				}
			}
		}
		else {
			if (!is_being_altered)
				log_event(PBS_EVENTCLASS_RESV, PBS_EVENTCLASS_RESV,
					LOG_INFO, presv->ri_qs.ri_resvID,
					"Reservation denied");

			/* Clients waiting on an interactive request must be
			 * notified of the failure to confirm
			 */
			if ((presv->ri_brp != NULL) &&
				(presv->ri_wattr[RESV_ATR_interactive].at_flags &
				ATR_VFLAG_SET)) {
				presv->ri_wattr[RESV_ATR_interactive].at_flags &= ~ATR_VFLAG_SET;
				snprintf(buf, sizeof(buf), "%s DENIED",
					presv->ri_qs.ri_resvID);
				(void)reply_text(presv->ri_brp,
					PBSE_NONE, buf);
				presv->ri_brp = NULL;
			}
			if (!is_being_altered) {
				(void)snprintf(log_buffer, sizeof(log_buffer),
					"requestor=%s@%s", msg_daemonname, server_host);
				account_recordResv(PBS_ACCT_DRss, presv, log_buffer);
				log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_RESV,
					LOG_NOTICE, presv->ri_qs.ri_resvID,
					"reservation deleted");
				resv_purge(presv);
			}
		}
		if (presv->ri_qs.ri_state == RESV_BEING_ALTERED) {
			resv_revert_alter_times(presv);
			log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO,
				  presv->ri_qs.ri_resvID, "Reservation alter denied");
		}
		reply_ack(preq);
		return;
	}

#ifdef NAS /* localmod 122 */
	/* If an advance reservation has already been confirmed there's no
	 * work to be done.
	 */
	if (presv->ri_qs.ri_state == RESV_CONFIRMED &&
		!presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) {
		reply_ack(preq);
		return;
	}
#endif /* localmod 122 */

	/* Do not alter a reservation that started running when the reconfirmation
	 * message was received. If a standing reservation, then set a retry time
	 * past the end of this occurrence.
	 */
	if (presv->ri_qs.ri_state == RESV_RUNNING) {
		if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long)
			set_resv_retry(presv, presv->ri_wattr[RESV_ATR_end].at_val.at_long + 10);
		req_reject(PBSE_TOOLATE, 0, preq);
		return;
	}

	petime = &presv->ri_wattr[RESV_ATR_end];

	/* if passed in the confirmation, set a new start time */
	if ((newstart = (time_t)preq->rq_ind.rq_run.rq_resch) != 0) {
		presv->ri_qs.ri_stime = newstart;
		presv->ri_wattr[RESV_ATR_start].at_val.at_long = newstart;
		presv->ri_wattr[RESV_ATR_start].at_flags
		|= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;

		presv->ri_qs.ri_etime = newstart + presv->ri_qs.ri_duration;
		petime->at_val.at_long = presv->ri_qs.ri_etime;
		petime->at_flags |= ATR_VFLAG_SET | ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;
	}

	/* The main difference between an advance reservation and a standing
	 * reservation is the format of the execvnodes returned by "rq_destin":
	 * An advance reservation has a single execvnode while a standing reservation
	 * has a sting with the  particular format:
	 *    <num_resv>#<execvnode1>[<range>]<exevnode2>[...
	 * describing the execvnodes associated to each occurrence.
	 */
	if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) {

		/* The number of occurrences in the standing reservation and index are parsed
		 * from the execvnode string which is of the form:
		 *     <num_occurrences>#<vnode1>[range1]<vnode2>[range2]...
		 */
		resv_count = get_execvnodes_count(preq->rq_ind.rq_run.rq_destin);
		if (resv_count == 0) {
			req_reject(PBSE_INTERNAL, 0, preq);
			return;
		}

		execvnodes = strdup(preq->rq_ind.rq_run.rq_destin);
		if (execvnodes == NULL) {
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}
		DBPRT(("stdg_resv conf: execvnodes_seq is %s\n", execvnodes));

		/* execvnodes is of the form:
		 *       <num_resv>#<(execvnode1)>[<range>]<(exevnode2)>[...
		 * this "condensed" string is unrolled into a pointer array of
		 * execvnodes per occurrence, e.g. short_xc[0] are the execvnodes
		 * for 1st occurrence, short_xc[1] for the 2nd etc...
		 * If something goes wrong during unrolling then NULL is returned.
		 * which causes the confirmation message to be rejected
		 */
		short_xc = unroll_execvnode_seq(execvnodes, &tofree);
		if (short_xc == NULL) {
			free(execvnodes);
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}
		/* The execvnode of the soonest (i.e., next) occurrence */
		next_execvnode = strdup(short_xc[0]);
		if (next_execvnode == NULL) {
			free(short_xc);
			free_execvnode_seq(tofree);
			free(execvnodes);
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}
		/* Release the now obsolete allocations used to manipulate the
		 * unrolled string */
		free(short_xc);
		free_execvnode_seq(tofree);
		free(execvnodes);

		/* When confirming for the first time, set the index and count */
		if (!is_degraded) {

			/* Add first occurrence's end date on timed task list */
			if (presv->ri_wattr[RESV_ATR_start].at_val.at_long
				!= PBS_RESV_FUTURE_SCH) {
				if (gen_task_EndResvWindow(presv)) {
					free(next_execvnode);
					req_reject(PBSE_SYSTEM, 0, preq);
					return;
				}
			}
			if (!is_being_altered) {
				presv->ri_wattr[RESV_ATR_resv_count].at_val.at_long = resv_count;
				presv->ri_wattr[RESV_ATR_resv_count].at_flags |= ATR_VFLAG_SET
					| ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;
			}

			/* Set first occurrence to index 1
			 * (rather than 0 because it gets displayed in pbs_rstat -f) */
			presv->ri_wattr[RESV_ATR_resv_idx].at_val.at_long = 1;
			presv->ri_wattr[RESV_ATR_resv_idx].at_flags |= ATR_VFLAG_SET
				| ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE;
		}

		/* Skip setting the execvnodes sequence when reconfirming the last
		 * occurrence or when altering a reservation.
		 */
		if (!is_being_altered) {
			if (presv->ri_wattr[RESV_ATR_resv_idx].at_val.at_long
				< presv->ri_wattr[RESV_ATR_resv_count].at_val.at_long) {

				/* now assign the execvnodes sequence attribute */
				(void) resv_attr_def[(int)RESV_ATR_resv_execvnodes].at_free(
					&presv->ri_wattr[(int)RESV_ATR_resv_execvnodes]);

				(void) resv_attr_def[(int)RESV_ATR_resv_execvnodes].at_decode(
					&presv->ri_wattr[(int)RESV_ATR_resv_execvnodes],
					NULL,
					NULL,
					preq->rq_ind.rq_run.rq_destin);
			}
		}
	}
	else { /* Advance reservation */
		next_execvnode = strdup(preq->rq_ind.rq_run.rq_destin);
		if (next_execvnode == NULL) {
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}
	}

	/* Is reservation still a viable reservation? */
	if ((rc = chk_resvReq_viable(presv)) != 0) {
		free(next_execvnode);
		req_reject(PBSE_BADTSPEC, 0, preq);
		return;
	}

	/* When reconfirming a degraded reservation, first free the nodes linked
	 * to the reservation and unset all attributes relating to retry attempts
	 */
	if (is_degraded) {
		free_resvNodes(presv);
		/* Reset retry time */
		unset_resv_retry(presv);
		/* reset vnodes_down counter to 0 */
		presv->ri_vnodes_down = 0;
	}

	if (is_being_altered & RESV_END_TIME_MODIFIED) {
		if (gen_task_EndResvWindow(presv)) {
			free(next_execvnode);
			req_reject(PBSE_SYSTEM, 0, preq);
			return;
		}
	}

	/*
	 * Assign the allocated resources to the reservation
	 * and the reservation to the associated vnodes.
	 */
	if (is_being_altered)
		free_resvNodes(presv);
	rc = assign_resv_resc(presv, next_execvnode);

	if (rc != PBSE_NONE) {
		free(next_execvnode);
		req_reject(rc, 0, preq);
		return;
	}

	/* place "Time4resv" task on "task_list_timed" only if this is a
	 * confirmation but not the reconfirmation of a degraded reservation as
	 * in this case, the reservation had already been confirmed and added to
	 * the task list before
	 */
	if (!is_degraded && (is_being_altered != RESV_END_TIME_MODIFIED) &&
		(rc = gen_task_Time4resv(presv)) != 0) {
		free(next_execvnode);
		req_reject(rc, 0, preq);
		return;
	}

	/*
	 * compute new values for state and substate
	 * and update the resc_resv object with these
	 * newly computed values
	 */
	eval_resvState(presv, RESVSTATE_gen_task_Time4resv, 0, &state, &sub);
	(void)resv_setResvState(presv, state, sub);
	cmp_resvStateRelated_attrs((void *)presv,
		presv->ri_qs.ri_type);
	Update_Resvstate_if_resv(presv->ri_jbp);

	if (presv->ri_modified)
		(void)job_or_resv_save((void *)presv, SAVERESV_FULL, RESC_RESV_OBJECT);

	log_buffer[0] = '\0';

	/*
	 * Notify all interested parties that the reservation
	 * is moving from state UNCONFIRMED to CONFIRMED
	 */
	if (presv->ri_brp) {
		presv = find_resv(presv->ri_qs.ri_resvID);
		if (presv->ri_wattr[(int)RESV_ATR_convert].at_val.at_str != NULL) {
			rc = cnvrt_qmove(presv);
			if (rc != 0) {
				snprintf(buf, sizeof(buf), "%.240s FAILED",  presv->ri_qs.ri_resvID);
			} else {
				snprintf(buf, sizeof(buf), "%.240s CONFIRMED",  presv->ri_qs.ri_resvID);
			}
		} else {
			snprintf(buf, sizeof(buf), "%.240s CONFIRMED",  presv->ri_qs.ri_resvID);
		}

		rc = reply_text(presv->ri_brp, PBSE_NONE, buf);
		presv->ri_brp = NULL;
	}

	svr_mailownerResv(presv, MAIL_CONFIRM, MAIL_NORMAL, log_buffer);
	presv->ri_wattr[RESV_ATR_interactive].at_flags &= ~ATR_VFLAG_SET;

	if (is_being_altered) {
		/*
		 * If the reservation is currently running and its start time is being
		 * altered after the current time, It is going back to the confirmed state.
		 * We need to stop the reservation queue as it would have been started at
		 * the original start time.
		 * This will prevent any jobs - that are submitted after the
		 * reservation's start time is changed - from running.
		 * The reservation went to CO from RN while being altered, that means the reservation
		 * had resources assigned. We should decrement their usages until it starts running
		 * again, where the resources will be accounted again.
		 */
		if (presv->ri_qs.ri_state == RESV_CONFIRMED && presv->ri_alter_state == RESV_RUNNING) {
			change_enableORstart(presv, Q_CHNG_START, "FALSE");
			if (presv->ri_giveback) {
				set_resc_assigned((void *)presv, 1, DECR);
				presv->ri_giveback = 0;
			}
		}
		/*
		 * Reset only the flags and end time backup here, as we will need
		 * the start time backup in Time4occurrenceFinish for a standing
		 * reservation. Reset it for an advanced reservation.
		 */
		if (!(presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long)) {
		    presv->ri_alter_stime = 0;
		}
		presv->ri_alter_etime = 0;

		presv->ri_alter_flags = 0;

		log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO,
			  presv->ri_qs.ri_resvID, "Reservation alter confirmed");
	} else {
		log_event(PBSEVENT_RESV, PBS_EVENTCLASS_RESV, LOG_INFO,
			  presv->ri_qs.ri_resvID, "Reservation confirmed");
	}

	if (!is_degraded) {
		/* 100 extra bytes for field names, times, and count */
		tmp_buf_size = 100 + strlen(preq->rq_user) + strlen(preq->rq_host) + strlen(next_execvnode);
		if (tmp_buf_size > sizeof(buf)) {
			tmp_buf = malloc(tmp_buf_size);
			if (tmp_buf == NULL) {
				snprintf(log_buffer, LOG_BUF_SIZE-1, "malloc failure (errno %d)", errno);
				log_err(PBSE_SYSTEM, __func__, log_buffer);
				free(next_execvnode);
				reply_ack(preq);
				return;
			}
		} else {
			tmp_buf = buf;
			tmp_buf_size = sizeof(buf);
		}

		if (presv->ri_wattr[RESV_ATR_resv_standing].at_val.at_long) {
			(void)snprintf(tmp_buf, tmp_buf_size, "requestor=%s@%s start=%ld end=%ld nodes=%s count=%ld",
				preq->rq_user, preq->rq_host,
				presv->ri_qs.ri_stime, presv->ri_qs.ri_etime,
				next_execvnode,
				presv->ri_wattr[RESV_ATR_resv_count].at_val.at_long);
		} else {
			(void)snprintf(tmp_buf, tmp_buf_size, "requestor=%s@%s start=%ld end=%ld nodes=%s",
				preq->rq_user, preq->rq_host,
				presv->ri_qs.ri_stime, presv->ri_qs.ri_etime,
				next_execvnode);
		}
		account_recordResv(PBS_ACCT_CR, presv, tmp_buf);
		if (tmp_buf != buf) {
			free(tmp_buf);
			tmp_buf_size = 0;
		}
	}

	free(next_execvnode);
	reply_ack(preq);

	return;
}