Esempio n. 1
0
int
relay_to_mom2(job *pjob, struct batch_request *request,
	     void (*func)(struct work_task *), struct work_task **ppwt)
{
	int	rc;
	int	conn;	/* a client style connection handle */
	pbs_net_t    momaddr;
	unsigned int momport;
	struct work_task *pwt;
	int prot;
	mominfo_t *pmom = 0;
	pbs_list_head	*mom_tasklist_ptr = NULL;

	momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
	momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;

	if (pbs_conf.pbs_use_tcp == 1) {
		prot = PROT_RPP;
		pmom = tfind2((unsigned long) momaddr, momport, &ipaddrs);
		if (!pmom || (((mom_svrinfo_t *) (pmom->mi_data))->msr_state & INUSE_DOWN)) {
			return (PBSE_NORELYMOM);
		}
		mom_tasklist_ptr = &(((mom_svrinfo_t *) (pmom->mi_data))->msr_deferred_cmds);
	} else {
		prot = PROT_TCP;
	}

	conn = svr_connect(momaddr, momport, process_Dreply, ToServerDIS, prot);
	if (conn < 0) {
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_WARNING, "", msg_norelytomom);
		return (PBSE_NORELYMOM);
	}

	request->rq_orgconn = request->rq_conn;	/* save client socket */
	pbs_errno = 0;
	rc = issue_Drequest(conn, request, func, &pwt, prot);
	if ((rc == 0) && (func != release_req)) {
		/* work-task entry job related rpp, link to the job's list */
		append_link(&pjob->ji_svrtask, &pwt->wt_linkobj, pwt);
		if (prot == PROT_RPP)
			append_link(mom_tasklist_ptr, &pwt->wt_linkobj2, pwt); /* if rpp, link to mom list as well */
	}

	if (ppwt != NULL)
		*ppwt = pwt;

	/*
	 * We do not want req_reject() to send non PBSE error numbers.
	 * Check for internal errors and when found return PBSE_SYSTEM.
	 */
	if ((rc != 0) && (pbs_errno == 0))
		return (PBSE_SYSTEM);
	else
		return (rc);
}
Esempio n. 2
0
/**
 * @brief
 * 		process the reply received for a request issued to
 *		  another server via issue_request()
 *
 * 		Reads the reply from the RPP stream and executes the work task associated
 * 		with the RPP reply message. The RPP request for which this reply arrived
 * 		is matched by comparing the msgid of the reply with the msgid of the work
 * 		tasks stored in the msr_deferred_cmds list of the mom for this stream.
 *
 * @param[in] handle - RPP handle on which reply/close arrived
 *
 * @return void
 */
void
process_DreplyRPP(int handle)
{
	struct work_task	*ptask;
	int			 rc;
	struct batch_request	*request;
	struct batch_reply *reply;
	char 		*msgid = NULL;
	mominfo_t *pmom = 0;

	if ((pmom = tfind2((u_long) handle, 0, &streams)) == NULL)
		return;

	DIS_rpp_reset();

	/* find the work task for the socket, it will point us to the request */
	msgid = disrst(handle, &rc);

	if (!msgid || rc) { /* rpp connection actually broke, cull all pending requests */
		while ((ptask = (struct work_task *)GET_NEXT((((mom_svrinfo_t *) (pmom->mi_data))->msr_deferred_cmds)))) {
			/* no need to compare wt_event with handle, since the
			 * task list is for this mom and so it will always match
			 */
			if (ptask->wt_type == WORK_Deferred_Reply) {
				request = ptask->wt_parm1;
				if (request) {
					request->rq_reply.brp_code = rc;
					request->rq_reply.brp_choice = BATCH_REPLY_CHOICE_NULL;
				}
			}

			ptask->wt_aux = PBSE_NORELYMOM;
			pbs_errno = PBSE_NORELYMOM;

			if (ptask->wt_event2)
				free(ptask->wt_event2);

			dispatch_task(ptask);
		}
	} else {
		/* we read msgid fine, so proceed to match it and process the respective task */

		/* get the task list */
		ptask = (struct work_task *)GET_NEXT((((mom_svrinfo_t *) (pmom->mi_data))->msr_deferred_cmds));

		while (ptask) {

			char *cmd_msgid = ptask->wt_event2;

			if (strcmp(cmd_msgid, msgid) == 0) {

				if (ptask->wt_type == WORK_Deferred_Reply)
					request = ptask->wt_parm1;
				else
					request = NULL;

				if (!request) {
					if ((reply = (struct batch_reply *) malloc(sizeof(struct batch_reply))) == 0) {
						delete_task(ptask);
						free(cmd_msgid);
						log_err(errno, msg_daemonname, "Out of memory creating batch reply");
						return;
					}
					(void) memset(reply, 0, sizeof(struct batch_reply));
				} else {
					reply = &request->rq_reply;
				}

				/* read and decode the reply */
				if ((rc = DIS_reply_read(handle, reply, 1)) != 0) {
					reply->brp_code = rc;
					reply->brp_choice = BATCH_REPLY_CHOICE_NULL;
					ptask->wt_aux = PBSE_NORELYMOM;
					pbs_errno = PBSE_NORELYMOM;
				} else {
					ptask->wt_aux = reply->brp_code;
					pbs_errno = reply->brp_code;
				}

				ptask->wt_parm3 = reply; /* set the reply in case callback fn uses without having a preq */

				dispatch_task(ptask);

				if (!request)
					PBSD_FreeReply(reply);

				free(cmd_msgid);

				break;
			}
			ptask = (struct work_task *) GET_NEXT(ptask->wt_linkobj2);
		}
		free(msgid); /* the msgid read should be free after use in matching */
	}
}
Esempio n. 3
0
/**
 * @brief
 * 		Send execution job on connected rpp stream.
 *
 * @param[in]	jobp	-	pointer to the job being sent
 * @param[in]	hostaddr	-	the address of host to send job to, host byte order
 * @param[in]	port	-	the destination port, host byte order
 * @param[in]	request	-	The batch request associated with this send job call
 *
 * @return	int
 * @retval  2	: success
 * @retval  -1	: failure (pbs_errno set to error number)
 *
 */
int
send_job_exec(job *jobp, pbs_net_t hostaddr, int port, struct batch_request *request)
{
	pbs_list_head attrl;
	attribute *pattr;
	mominfo_t *pmom = NULL;
	int stream = -1;
	int encode_type;
	char *destin = jobp->ji_qs.ji_destin;
	int i;
	size_t		 credlen = 0;
	char		*credbuf = NULL;
	char job_id[PBS_MAXSVRJOBID + 1];
	struct attropl *pqjatr; /* list (single) of attropl for quejob */
	int rc;
	int rpp = 1;
	char *jobid = NULL;
	char *script = NULL;
	char *msgid = NULL;
	char *dup_msgid = NULL;
	struct work_task *ptask = NULL;

	/* if job has a script read it from database */
	if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
		/*
		 * copy the job script from database to a temp file
		 * PBSD_jscript works with a file
		 * delete it at the end of the send
		 */
		if ((script = svr_load_jobscript(jobp)) == NULL) {
			pbs_errno = PBSE_SYSTEM;
			snprintf(log_buffer, sizeof(log_buffer),
				"Failed to load job script for job %s",
				jobp->ji_qs.ji_jobid);
			log_err(pbs_errno, "send_job", log_buffer);
			goto send_err;
		}
	}

	stream = svr_connect(hostaddr, port, NULL, ToServerDIS, rpp);
	if (stream < 0) {
		log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_WARNING, "", "Could not connect to Mom");
		goto send_err;
	}

	pmom = tfind2((unsigned long) jobp->ji_qs.ji_un.ji_exect.ji_momaddr,
		jobp->ji_qs.ji_un.ji_exect.ji_momport,
		&ipaddrs);
	if (!pmom || (((mom_svrinfo_t *)(pmom->mi_data))->msr_state & INUSE_DOWN))
		goto send_err;

	CLEAR_HEAD(attrl);

	resc_access_perm = ATR_DFLAG_MOM;
	encode_type = ATR_ENCODE_MOM;

	pattr = jobp->ji_wattr;
	for (i = 0; i < (int) JOB_ATR_LAST; i++) {
		if ((job_attr_def + i)->at_flags & resc_access_perm) {
			(void)(job_attr_def + i)->at_encode(pattr + i, &attrl,
				(job_attr_def + i)->at_name, (char *) 0, encode_type,
				NULL);
		}
	}
	attrl_fixlink(&attrl);
	/* save the job id for when after we purge the job */

	/* read any credential file */
	(void)get_credential(pmom->mi_host, jobp, PBS_GC_BATREQ, &credbuf, &credlen);

	(void) strcpy(job_id, jobp->ji_qs.ji_jobid);

	pbs_errno = 0;

	pqjatr = &((svrattrl *) GET_NEXT(attrl))->al_atopl;
	jobid = PBSD_queuejob(stream, jobp->ji_qs.ji_jobid, destin, pqjatr, (char *) 0, rpp, &msgid);
	free_attrlist(&attrl);
	if (jobid == NULL)
		goto send_err;

	rpp_add_close_func(stream, process_DreplyRPP); /* register a close handler */

	/* adding msgid to deferred list, dont free msgid */
	if ((ptask = add_mom_deferred_list(stream, pmom, post_sendmom, msgid, request, jobp)) == NULL)
		goto send_err;

	/* add to pjob->svrtask list so its automatically cleared when job is purged */
	append_link(&jobp->ji_svrtask, &ptask->wt_linkobj, ptask);

	/* we cannot use the same msgid, since it is not part of the preq,
	 * make a dup of it, and we can freely free it
	 */
	if ((dup_msgid = strdup(msgid)) == NULL)
		goto send_err;

	/*
	 * henceforth use the same msgid, since we mean to say all this is
	 * part of a single logical request to the mom
	 * and we will be hanging off one request to be answered to finally
	 */
	if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
		if (PBSD_jscript_direct(stream, script, rpp, &dup_msgid) != 0)
			goto send_err;
	}
	free(script);
	script = NULL;

	if (credlen > 0) {
		rc = PBSD_jcred(stream, jobp->ji_extended.ji_ext.ji_credtype, credbuf, credlen, rpp, &dup_msgid);
		if (credbuf)
			free(credbuf);
		if (rc != 0)
			goto send_err;
	}

	if ((jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN)
		&& (hostaddr != pbs_server_addr)) {
		if ((move_job_file(stream, jobp, StdOut, rpp, &dup_msgid) != 0) ||
			(move_job_file(stream, jobp, StdErr, rpp, &dup_msgid) != 0) ||
			(move_job_file(stream, jobp, Chkpt, rpp, &dup_msgid) != 0))
			goto send_err;
	}

	if (PBSD_commit(stream, job_id, rpp, &dup_msgid) != 0)
		goto send_err;

	free(dup_msgid); /* free this as it is not part of any work task */

	return 2;

send_err:
	if (dup_msgid)
		free(dup_msgid);

	if (script)
		free(script);

	if (ptask) {
		if (ptask->wt_event2)
			free(ptask->wt_event2);
		delete_task(ptask);
	}

	sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno);
	log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
	return (-1);
}