int relay_to_mom2(job *pjob, struct batch_request *request, void (*func)(struct work_task *), struct work_task **ppwt) { int rc; int conn; /* a client style connection handle */ pbs_net_t momaddr; unsigned int momport; struct work_task *pwt; int prot; mominfo_t *pmom = 0; pbs_list_head *mom_tasklist_ptr = NULL; momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr; momport = pjob->ji_qs.ji_un.ji_exect.ji_momport; if (pbs_conf.pbs_use_tcp == 1) { prot = PROT_RPP; pmom = tfind2((unsigned long) momaddr, momport, &ipaddrs); if (!pmom || (((mom_svrinfo_t *) (pmom->mi_data))->msr_state & INUSE_DOWN)) { return (PBSE_NORELYMOM); } mom_tasklist_ptr = &(((mom_svrinfo_t *) (pmom->mi_data))->msr_deferred_cmds); } else { prot = PROT_TCP; } conn = svr_connect(momaddr, momport, process_Dreply, ToServerDIS, prot); if (conn < 0) { log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_WARNING, "", msg_norelytomom); return (PBSE_NORELYMOM); } request->rq_orgconn = request->rq_conn; /* save client socket */ pbs_errno = 0; rc = issue_Drequest(conn, request, func, &pwt, prot); if ((rc == 0) && (func != release_req)) { /* work-task entry job related rpp, link to the job's list */ append_link(&pjob->ji_svrtask, &pwt->wt_linkobj, pwt); if (prot == PROT_RPP) append_link(mom_tasklist_ptr, &pwt->wt_linkobj2, pwt); /* if rpp, link to mom list as well */ } if (ppwt != NULL) *ppwt = pwt; /* * We do not want req_reject() to send non PBSE error numbers. * Check for internal errors and when found return PBSE_SYSTEM. */ if ((rc != 0) && (pbs_errno == 0)) return (PBSE_SYSTEM); else return (rc); }
/** * @brief * process the reply received for a request issued to * another server via issue_request() * * Reads the reply from the RPP stream and executes the work task associated * with the RPP reply message. The RPP request for which this reply arrived * is matched by comparing the msgid of the reply with the msgid of the work * tasks stored in the msr_deferred_cmds list of the mom for this stream. * * @param[in] handle - RPP handle on which reply/close arrived * * @return void */ void process_DreplyRPP(int handle) { struct work_task *ptask; int rc; struct batch_request *request; struct batch_reply *reply; char *msgid = NULL; mominfo_t *pmom = 0; if ((pmom = tfind2((u_long) handle, 0, &streams)) == NULL) return; DIS_rpp_reset(); /* find the work task for the socket, it will point us to the request */ msgid = disrst(handle, &rc); if (!msgid || rc) { /* rpp connection actually broke, cull all pending requests */ while ((ptask = (struct work_task *)GET_NEXT((((mom_svrinfo_t *) (pmom->mi_data))->msr_deferred_cmds)))) { /* no need to compare wt_event with handle, since the * task list is for this mom and so it will always match */ if (ptask->wt_type == WORK_Deferred_Reply) { request = ptask->wt_parm1; if (request) { request->rq_reply.brp_code = rc; request->rq_reply.brp_choice = BATCH_REPLY_CHOICE_NULL; } } ptask->wt_aux = PBSE_NORELYMOM; pbs_errno = PBSE_NORELYMOM; if (ptask->wt_event2) free(ptask->wt_event2); dispatch_task(ptask); } } else { /* we read msgid fine, so proceed to match it and process the respective task */ /* get the task list */ ptask = (struct work_task *)GET_NEXT((((mom_svrinfo_t *) (pmom->mi_data))->msr_deferred_cmds)); while (ptask) { char *cmd_msgid = ptask->wt_event2; if (strcmp(cmd_msgid, msgid) == 0) { if (ptask->wt_type == WORK_Deferred_Reply) request = ptask->wt_parm1; else request = NULL; if (!request) { if ((reply = (struct batch_reply *) malloc(sizeof(struct batch_reply))) == 0) { delete_task(ptask); free(cmd_msgid); log_err(errno, msg_daemonname, "Out of memory creating batch reply"); return; } (void) memset(reply, 0, sizeof(struct batch_reply)); } else { reply = &request->rq_reply; } /* read and decode the reply */ if ((rc = DIS_reply_read(handle, reply, 1)) != 0) { reply->brp_code = rc; reply->brp_choice = BATCH_REPLY_CHOICE_NULL; ptask->wt_aux = PBSE_NORELYMOM; pbs_errno = PBSE_NORELYMOM; } else { ptask->wt_aux = reply->brp_code; pbs_errno = reply->brp_code; } ptask->wt_parm3 = reply; /* set the reply in case callback fn uses without having a preq */ dispatch_task(ptask); if (!request) PBSD_FreeReply(reply); free(cmd_msgid); break; } ptask = (struct work_task *) GET_NEXT(ptask->wt_linkobj2); } free(msgid); /* the msgid read should be free after use in matching */ } }
/** * @brief * Send execution job on connected rpp stream. * * @param[in] jobp - pointer to the job being sent * @param[in] hostaddr - the address of host to send job to, host byte order * @param[in] port - the destination port, host byte order * @param[in] request - The batch request associated with this send job call * * @return int * @retval 2 : success * @retval -1 : failure (pbs_errno set to error number) * */ int send_job_exec(job *jobp, pbs_net_t hostaddr, int port, struct batch_request *request) { pbs_list_head attrl; attribute *pattr; mominfo_t *pmom = NULL; int stream = -1; int encode_type; char *destin = jobp->ji_qs.ji_destin; int i; size_t credlen = 0; char *credbuf = NULL; char job_id[PBS_MAXSVRJOBID + 1]; struct attropl *pqjatr; /* list (single) of attropl for quejob */ int rc; int rpp = 1; char *jobid = NULL; char *script = NULL; char *msgid = NULL; char *dup_msgid = NULL; struct work_task *ptask = NULL; /* if job has a script read it from database */ if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { /* * copy the job script from database to a temp file * PBSD_jscript works with a file * delete it at the end of the send */ if ((script = svr_load_jobscript(jobp)) == NULL) { pbs_errno = PBSE_SYSTEM; snprintf(log_buffer, sizeof(log_buffer), "Failed to load job script for job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, "send_job", log_buffer); goto send_err; } } stream = svr_connect(hostaddr, port, NULL, ToServerDIS, rpp); if (stream < 0) { log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_WARNING, "", "Could not connect to Mom"); goto send_err; } pmom = tfind2((unsigned long) jobp->ji_qs.ji_un.ji_exect.ji_momaddr, jobp->ji_qs.ji_un.ji_exect.ji_momport, &ipaddrs); if (!pmom || (((mom_svrinfo_t *)(pmom->mi_data))->msr_state & INUSE_DOWN)) goto send_err; CLEAR_HEAD(attrl); resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; pattr = jobp->ji_wattr; for (i = 0; i < (int) JOB_ATR_LAST; i++) { if ((job_attr_def + i)->at_flags & resc_access_perm) { (void)(job_attr_def + i)->at_encode(pattr + i, &attrl, (job_attr_def + i)->at_name, (char *) 0, encode_type, NULL); } } attrl_fixlink(&attrl); /* save the job id for when after we purge the job */ /* read any credential file */ (void)get_credential(pmom->mi_host, jobp, PBS_GC_BATREQ, &credbuf, &credlen); (void) strcpy(job_id, jobp->ji_qs.ji_jobid); pbs_errno = 0; pqjatr = &((svrattrl *) GET_NEXT(attrl))->al_atopl; jobid = PBSD_queuejob(stream, jobp->ji_qs.ji_jobid, destin, pqjatr, (char *) 0, rpp, &msgid); free_attrlist(&attrl); if (jobid == NULL) goto send_err; rpp_add_close_func(stream, process_DreplyRPP); /* register a close handler */ /* adding msgid to deferred list, dont free msgid */ if ((ptask = add_mom_deferred_list(stream, pmom, post_sendmom, msgid, request, jobp)) == NULL) goto send_err; /* add to pjob->svrtask list so its automatically cleared when job is purged */ append_link(&jobp->ji_svrtask, &ptask->wt_linkobj, ptask); /* we cannot use the same msgid, since it is not part of the preq, * make a dup of it, and we can freely free it */ if ((dup_msgid = strdup(msgid)) == NULL) goto send_err; /* * henceforth use the same msgid, since we mean to say all this is * part of a single logical request to the mom * and we will be hanging off one request to be answered to finally */ if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { if (PBSD_jscript_direct(stream, script, rpp, &dup_msgid) != 0) goto send_err; } free(script); script = NULL; if (credlen > 0) { rc = PBSD_jcred(stream, jobp->ji_extended.ji_ext.ji_credtype, credbuf, credlen, rpp, &dup_msgid); if (credbuf) free(credbuf); if (rc != 0) goto send_err; } if ((jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) && (hostaddr != pbs_server_addr)) { if ((move_job_file(stream, jobp, StdOut, rpp, &dup_msgid) != 0) || (move_job_file(stream, jobp, StdErr, rpp, &dup_msgid) != 0) || (move_job_file(stream, jobp, Chkpt, rpp, &dup_msgid) != 0)) goto send_err; } if (PBSD_commit(stream, job_id, rpp, &dup_msgid) != 0) goto send_err; free(dup_msgid); /* free this as it is not part of any work task */ return 2; send_err: if (dup_msgid) free(dup_msgid); if (script) free(script); if (ptask) { if (ptask->wt_event2) free(ptask->wt_event2); delete_task(ptask); } sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); return (-1); }