static void execute( char *queue, char *server) { int ct; /* Connection to the server */ int local_errno = 0; int merr; /* Error return from pbs_manager */ char *errmsg; /* Error message from pbs_manager */ /* The disable request */ static struct attropl attr = { NULL, (char *)"enabled", NULL, (char *)"TRUE", SET }; if ((ct = cnt2server(server)) > 0) { merr = pbs_manager_err(ct, MGR_CMD_SET, MGR_OBJ_QUEUE, queue, &attr, NULL, &local_errno); if (merr != 0) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { fprintf(stderr, "qenable: %s ", errmsg); free(errmsg); } else { fprintf(stderr, "qenable: Error enabling queue: %d - %s ", local_errno, pbs_strerror(local_errno)); } if (notNULL(queue)) fprintf(stderr, "%s", queue); if (notNULL(server)) fprintf(stderr, "@%s", server); fprintf(stderr, "\n"); exitstatus = 2; } pbs_disconnect(ct); } else { fprintf(stderr, "qenable: could not connect to server %s (%d)\n", server, ct * -1); exitstatus = 2; } }
static int marknode( int con, char *name, char *state1, enum batch_op op1, char *state2, enum batch_op op2) { char *errmsg; struct attropl new_attr[2]; int rc; int local_errno = 0; new_attr[0].name = ATTR_NODE_state; new_attr[0].resource = NULL; new_attr[0].value = state1; new_attr[0].op = op1; if (state2 == NULL) { new_attr[0].next = NULL; } else { new_attr[0].next = &new_attr[1]; new_attr[1].next = NULL; new_attr[1].name = ATTR_NODE_state; new_attr[1].resource = NULL; new_attr[1].value = state2; new_attr[1].op = op2; } rc = pbs_manager_err( con, MGR_CMD_SET, MGR_OBJ_NODE, name, new_attr, NULL, &local_errno); if (rc && !quiet) { fprintf(stderr, "Error marking node %s - ", name); if ((errmsg = pbs_geterrmsg(con)) != NULL) fprintf(stderr, "%s\n", errmsg); } return(rc); } /* END marknode() */
/* * void execute( char *node, char *gpuid, int mode, const char *server ) * * node The name of the MOM node. * gpuid The id of the GPU * mode The num mode for the GPU * server The name of the server to send to. * * Returns: * None * * File Variables: * exitstatus Set to two if an error occurs. */ static void execute( char *node, char *gpuid, int mode, const char *server) { int ct; /* Connection to the server */ int merr; /* Error return from pbs_manager */ int local_errno = 0; char *errmsg; /* Error message from pbs_manager */ /* The request to change mode */ if ((ct = cnt2server(server)) > 0) { merr = pbs_gpumode_err(ct, node, gpuid, mode, &local_errno); if (merr != 0) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { fprintf(stderr, "qgpumode: %s ", errmsg); free(errmsg); } else { fprintf(stderr, "qgpumode: Error (%d - %s) changing GPU mode", local_errno, pbs_strerror(local_errno)); } if (notNULL(server)) fprintf(stderr, "@%s", server); fprintf(stderr, "\n"); exitstatus = 2; } pbs_disconnect(ct); } else { local_errno = -1 * ct; fprintf(stderr, "qgpumode: could not connect to server %s (%d) %s\n", server, local_errno, pbs_strerror(local_errno)); exitstatus = 2; } }
static void execute( int manner, /* I */ const char *server) /* I */ { int ct; /* Connection to the server */ int err; /* Error return from pbs_terminate */ char *errmsg; /* Error message from pbs_terminate */ int local_errno = 0; if ((ct = cnt2server(server)) > 0) { err = pbs_terminate_err(ct, manner, NULL, &local_errno); if (err != 0) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { fprintf(stderr, "qterm: %s", errmsg); } else { fprintf(stderr, "qterm: Error (%d - %s) terminating server ", local_errno, pbs_strerror(local_errno)); } fprintf(stderr, "%s\n", server); exitstatus = 2; } pbs_disconnect(ct); } else { /* FAILURE */ local_errno = -1 * ct; fprintf(stderr, "qterm: could not connect to server '%s' (%d) %s\n", server, local_errno, pbs_strerror(local_errno)); exitstatus = 2; } return; } /* END execute() */
/** * @brief * executes a job * * @param[in] job - The fully qualified job id. * @param[in] server - The name of the server that manages the job. * @param[in] location - location indicating where to run job * * @return - Void * * @File Variables: * exitstatus Set to two if an error occurs. * */ static void execute(char *job, char *server, char *location) { int ct; /* Connection to the server */ int err; /* Error return from pbs_run */ int out; /* Stores the size of err_msg_buf*/ int located = FALSE; char *errmsg; char err_msg_buf[COMMENT_BUF_SIZE] = {'\0'}; /* generic buffer - comments & logging*/ char rmt_server[MAXSERVERNAME]; cnt: if ((ct = cnt2server(server)) > 0) { if (async) err = pbs_asyrunjob(ct, job, location, NULL); else err = pbs_runjob(ct, job, location, NULL); if (err && (pbs_errno != PBSE_UNKJOBID)) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { if (pbs_errno == PBSE_UNKNODE) { out = snprintf(err_msg_buf, sizeof(err_msg_buf),"qrun: %s %s",errmsg, location); if (out >= sizeof(err_msg_buf)) { fprintf(stderr,"%s...\n", err_msg_buf); } else { fprintf(stderr, "%s\n", err_msg_buf); } } else { prt_job_err("qrun", ct, job); } } else { fprintf(stderr, "qrun : Server returned error %d for job ", pbs_errno); } exitstatus = 2; } else if (err && (pbs_errno == PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job, server, rmt_server)) { pbs_disconnect(ct); strcpy(server, rmt_server); goto cnt; } prt_job_err("qrun", ct, job); exitstatus = 2; } pbs_disconnect(ct); } else { fprintf(stderr, "qrun: could not connect to server %s (%d)\n", server, pbs_errno); exitstatus = 2; } }
/* * log_commit_error() * * checks the error status for the connection and logs any error * @pre-cond: con must be a valid index into the connection table */ void log_commit_error( int con, int mom_err, char *job_id, bool &timeout) { char *err_text; char log_buf[LOCAL_LOG_BUF_SIZE]; int errno2; err_text = pbs_geterrmsg(con); /* NOTE: errno is modified by log_err */ if (mom_err > PBSE_FLOOR) { sprintf(log_buf, "send_job commit failed, rc=%d (%s: %s)", mom_err, pbse_to_txt(mom_err), (err_text != NULL) ? err_text : "N/A"); errno2 = mom_err; } else { sprintf(log_buf, "send_job commit failed, rc=%d (%s)", mom_err, (err_text != NULL) ? err_text : "N/A"); errno2 = errno; } if (err_text != NULL) free(err_text); log_ext(errno2, __func__, log_buf, LOG_WARNING); /* if failure occurs, pbs_mom should purge job and pbs_server should set * job state to idle w/error msg */ if (errno2 == EINPROGRESS) { timeout = true; sprintf(log_buf, "child commit request timed-out for job %s, increase tcp_timeout?", job_id); log_ext(errno2, __func__, log_buf, LOG_WARNING); } else { sprintf(log_buf, "child failed in commit request for job %s", job_id); log_ext(errno2, __func__, log_buf, LOG_CRIT); } }
struct batch_status *statnode( int con, char *nodearg) { struct batch_status *bstatus; char *errmsg; int local_errno = 0; bstatus = pbs_statnode_err(con, nodearg, NULL, NULL, &local_errno); if (bstatus == NULL) { if (local_errno) { if (!quiet) { if ((errmsg = pbs_geterrmsg(con)) != NULL) { fprintf(stderr, "%s: %s\n", progname, errmsg); } else { fprintf(stderr, "%s: Error %d (%s)\n", progname, local_errno, pbs_strerror(local_errno)); } } exit(1); } if (!quiet) fprintf(stderr, "%s: No nodes found\n", progname); exit(2); } return bstatus; } /* END statnode() */
void prt_job_err( const char *cmd, int connect, const char *id) { char *errmsg; errmsg = pbs_geterrmsg(connect); if (errmsg != NULL) { fprintf(stderr, "%s: %s ", cmd, errmsg); fprintf(stderr, "%s\n", id); free(errmsg); } }
static int set_note( int con, char *name, char *msg) { char *errmsg; struct attropl new_attr; int rc; int local_errno = 0; new_attr.name = ATTR_NODE_note; new_attr.resource = NULL; new_attr.value = msg; new_attr.op = SET; new_attr.next = NULL; rc = pbs_manager_err( con, MGR_CMD_SET, MGR_OBJ_NODE, name, &new_attr, NULL, &local_errno); if (rc && !quiet) { fprintf(stderr, "Error setting note attribute for %s - ", name); if ((errmsg = pbs_geterrmsg(con)) != NULL) { fprintf(stderr, "%s\n", errmsg); } } return(rc); } /* END set_note() */
void prt_job_err( char *cmd, int connect, char *id) { char *errmsg; errmsg = pbs_geterrmsg(connect); if (errmsg != NULL) { fprintf(stderr, "%s: %s ", cmd, errmsg); } else { fprintf(stderr, "%s: Server returned error %d for job ", cmd, pbs_errno); } fprintf(stderr, "%s\n", id); }
/* * * run_update_job - run the job and update the job information * * pbs_sd - connection to pbs_server * sinfo - server job is on * qinfo - queue job resides in * jinfo - the job to run * * returns success/failure - see pbs_errno for more info * */ int run_update_job(int pbs_sd, server_info *sinfo, queue_info *qinfo, job_info *jinfo) { int ret; /* return code from pbs_runjob() */ node_info *best_node = NULL; /* best node to run job on */ char *best_node_name = NULL; /* name of best node */ char buf[256] = {'\0'}; /* generic buffer - comments & logging*/ char timebuf[128]; /* buffer to hold the time and date */ resource_req *res; /* ptr to the resource of ncpus */ int ncpus; /* numeric amount of resource ncpus */ char *errmsg; /* used for pbs_geterrmsg() */ strftime(timebuf, 128, "started on %a %b %d at %H:%M", localtime(&cstat.current_time)); if (cstat.load_balancing || cstat.load_balancing_rr) { best_node = find_best_node(jinfo, sinfo -> timesharing_nodes); if (best_node != NULL) { best_node_name = best_node -> name; sprintf(buf, "Job run on node %s - %s", best_node_name, timebuf); } } if (best_node == NULL) sprintf(buf, "Job %s", timebuf); update_job_comment(pbs_sd, jinfo, buf); buf[0] = '\0'; ret = pbs_runjob(pbs_sd, jinfo -> name, best_node_name, NULL); if (ret == 0) { /* If a job is 100% efficent, it will raise the load average by 1 per * cpu is uses. Temporarly inflate load average by that value */ if (cstat.load_balancing && best_node != NULL) { if ((res = find_resource_req(jinfo -> resreq, "ncpus")) == NULL) ncpus = 1; else ncpus = res -> amount; best_node -> loadave += ncpus; } if (cstat.help_starving_jobs && jinfo == cstat.starving_job) jinfo -> sch_priority = 0; sched_log(PBSEVENT_SCHED, PBS_EVENTCLASS_JOB, jinfo -> name, "Job Run"); update_server_on_run(sinfo, qinfo, jinfo); update_queue_on_run(qinfo, jinfo); update_job_on_run(pbs_sd, jinfo); if (cstat.fair_share) update_usage_on_run(jinfo); free(sinfo -> running_jobs); sinfo -> running_jobs = job_filter(sinfo -> jobs, sinfo -> sc.total, check_run_job, NULL); free(qinfo -> running_jobs); qinfo -> running_jobs = job_filter(qinfo -> jobs, qinfo -> sc.total, check_run_job, NULL); } else { errmsg = pbs_geterrmsg(pbs_sd); sprintf(buf, "Not Running - PBS Error: %s", errmsg); update_job_comment(pbs_sd, jinfo, buf); } return ret; }
/** * @brief * main - the initialization and main loop of pbs_daemon */ int main(int argc, char *argv[]) { char jobfile[MAXPATHLEN+1]; char jobfile_full[MAXPATHLEN+1]; pbs_net_t hostaddr = 0; int port = -1; int move_type = -1; pbs_list_head attrl; enum conn_type cntype = ToServerDIS; int con = -1; char *destin; int encode_type; int i; job *jobp; char job_id[PBS_MAXSVRJOBID+1]; attribute *pattr; struct attropl *pqjatr; /* list (single) of attropl for quejob */ char script_name[MAXPATHLEN+1]; int in_server = -1; char *param_name, *param_val; char buf[4096]; struct hostent *hp; struct in_addr addr; char *credbuf = NULL; size_t credlen = 0; int prot = PROT_TCP; /*the real deal or output version and exit?*/ execution_mode(argc, argv); /* If we are not run with real and effective uid of 0, forget it */ pbs_loadconf(0); if (!isAdminPrivilege(getlogin())) { fprintf(stderr, "%s: Must be run by root\n", argv[0]); exit(SEND_JOB_FATAL); } /* initialize the pointers in the resource_def array */ for (i = 0; i < (svr_resc_size - 1); ++i) svr_resc_def[i].rs_next = &svr_resc_def[i+1]; /* last entry is left with null pointer */ /* set single threaded mode */ pbs_client_thread_set_single_threaded_mode(); /* disable attribute verification */ set_no_attribute_verification(); /* initialize the thread context */ if (pbs_client_thread_init_thread_context() != 0) { fprintf(stderr, "%s: Unable to initialize thread context\n", argv[0]); exit(SEND_JOB_FATAL); } if(set_msgdaemonname("PBS_send_job")) { fprintf(stderr, "Out of memory\n"); return 1; } winsock_init(); connection_init(); while (fgets(buf, sizeof(buf), stdin) != NULL) { buf[strlen(buf)-1] = '\0'; /* gets rid of newline */ param_name = buf; param_val = strchr(buf, '='); if (param_val) { *param_val = '\0'; param_val++; } else { /* bad param_val -- skipping */ break; } if (strcmp(param_name, "jobfile") == 0) { jobfile[0] = '\0'; strncpy(jobfile, param_val, MAXPATHLEN); } else if (strcmp(param_name, "destaddr") == 0) { hostaddr = atol(param_val); } else if (strcmp(param_name, "destport") == 0) { port = atoi(param_val); } else if (strcmp(param_name, "move_type") == 0) { move_type = atoi(param_val); } else if (strcmp(param_name, "in_server") == 0) { in_server = atoi(param_val); } else if (strcmp(param_name, "server_name") == 0) { server_name[0] = '\0'; strncpy(server_name, param_val, PBS_MAXSERVERNAME); } else if (strcmp(param_name, "server_host") == 0) { server_host[0] = '\0'; strncpy(server_host, param_val, (sizeof(server_host) - 1)); } else if (strcmp(param_name, "server_addr") == 0) { pbs_server_addr = atol(param_val); } else if (strcmp(param_name, "server_port") == 0) { pbs_server_port_dis = atoi(param_val); } else if (strcmp(param_name, "log_file") == 0) { log_file = strdup(param_val); } else if (strcmp(param_name, "path_log") == 0) { path_log[0] = '\0'; strncpy(path_log, param_val, MAXPATHLEN); } else if (strcmp(param_name, "path_jobs") == 0) { path_jobs = strdup(param_val); } else if (strcmp(param_name, "path_spool") == 0) { path_spool = strdup(param_val); } else if (strcmp(param_name, "path_rescdef") == 0) { path_rescdef = strdup(param_val); } else if (strcmp(param_name, "path_users") == 0) { path_users = strdup(param_val); } else if (strcmp(param_name, "path_hooks_workdir") == 0) { path_hooks_workdir = strdup(param_val); if (path_hooks_workdir == NULL) exit(SEND_JOB_FATAL); } else if (strcmp(param_name, "svr_history_enable") == 0) { svr_history_enable = atol(param_val); } else if (strcmp(param_name, "svr_history_duration") == 0) { svr_history_duration = atol(param_val); } else if (strcmp(param_name, "single_signon_password_enable") == 0) { if (decode_b(&server.sv_attr[(int)SRV_ATR_ssignon_enable], NULL, NULL, param_val) != 0) { fprintf(stderr, "%s: failed to set ssignon_password_enable\n", argv[0]); exit(SEND_JOB_FATAL); } } else if (strcmp(param_name, "script_name") == 0) { strncpy(script_name, param_val, MAXPATHLEN + 1); } else break; } time(&time_now); (void)log_open_main(log_file, path_log, 1); /* silent open */ if (setup_resc(1) == -1) { /* log_buffer set in setup_resc */ log_err(-1, "pbsd_send_job(setup_resc)", log_buffer); return (-1); } if( strlen(jobfile) == 0 || hostaddr == 0 || port == 0 || move_type == -1 || \ in_server == -1 || strlen(server_name) == 0 || strlen(server_host) == 0 || \ pbs_server_addr == 0 || pbs_server_port_dis == 0 || \ strlen(path_log) == 0 || path_jobs == NULL || \ path_spool == NULL || path_users == NULL ) { log_err(-1, "pbs_send_job", "error on one of the parameters"); log_close(0); /* silent close */ exit(SEND_JOB_FATAL); } CLEAR_HEAD(task_list_immed); CLEAR_HEAD(task_list_timed); CLEAR_HEAD(task_list_event); CLEAR_HEAD(svr_queues); CLEAR_HEAD(svr_alljobs); CLEAR_HEAD(svr_newjobs); CLEAR_HEAD(svr_allresvs); CLEAR_HEAD(svr_newresvs); CLEAR_HEAD(svr_deferred_req); CLEAR_HEAD(svr_unlicensedjobs); strcpy(jobfile_full, path_jobs); strcat(jobfile_full, jobfile); if (chk_save_file(jobfile_full) != 0) { sprintf(log_buffer, "Error opening jobfile=%s", jobfile); log_err(-1, __func__, log_buffer); goto fatal_exit; } if ((jobp=job_recov_fs(jobfile, RECOV_SUBJOB)) == NULL) { sprintf(log_buffer, "Failed to recreate job in jobfile=%s", jobfile); log_err(-1, __func__, log_buffer); goto fatal_exit; } /* now delete the temp job file that was created by job_save_fs in server code * jobs are in database now, no need to keep in filesystem */ unlink(jobfile_full); if (in_server) append_link(&svr_alljobs, &jobp->ji_alljobs, jobp); /* select attributes/resources to send based on move type */ if (move_type == MOVE_TYPE_Exec) { resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; cntype = ToServerDIS; } else { resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR | ATR_DFLAG_MGWR | ATR_DFLAG_SvRD; encode_type = ATR_ENCODE_SVR; svr_dequejob(jobp); } CLEAR_HEAD(attrl); pattr = jobp->ji_wattr; for (i=0; i < (int)JOB_ATR_LAST; i++) { if ((job_attr_def+i)->at_flags & resc_access_perm) { (void)(job_attr_def+i)->at_encode(pattr+i, &attrl, (job_attr_def+i)->at_name, NULL, encode_type, NULL); } } attrl_fixlink(&attrl); /* script name is passed from parent */ /* get host name */ pbs_loadconf(0); addr.s_addr = htonl(hostaddr); hp = gethostbyaddr((void *)&addr, sizeof(struct in_addr), AF_INET); if (hp == NULL) { sprintf(log_buffer, "%s: h_errno=%d", inet_ntoa(addr), h_errno); log_err(-1, __func__, log_buffer); } else { /* read any credential file */ (void)get_credential(hp->h_name, jobp, PBS_GC_BATREQ, &credbuf, &credlen); } /* save the job id for when after we purge the job */ (void)strcpy(job_id, jobp->ji_qs.ji_jobid); con = -1; DIS_tcparray_init(); for (i=0; i<RETRY; i++) { pbs_errno = 0; /* connect to receiving server with retries */ if (i > 0) { /* recycle after an error */ if (con >= 0) svr_disconnect(con); if (should_retry_route(pbs_errno) == -1) { goto fatal_exit; /* fatal error, don't retry */ } sleep(1<<i); } if ((con = svr_connect(hostaddr, port, 0, cntype, prot)) == PBS_NET_RC_FATAL) { (void)sprintf(log_buffer, "send_job failed to %lx port %d", hostaddr, port); log_err(pbs_errno, __func__, log_buffer); goto fatal_exit; } else if (con == PBS_NET_RC_RETRY) { pbs_errno = WSAECONNREFUSED; /* should retry */ continue; } /* * if the job is substate JOB_SUBSTATE_TRNOUTCM which means * we are recovering after being down or a late failure, we * just want to send the "read-to-commit/commit" */ if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) { if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) { jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT; } pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl; destin = jobp->ji_qs.ji_destin; if (PBSD_queuejob(con, jobp->ji_qs.ji_jobid, destin, pqjatr, NULL, prot, NULL)== 0) { if (pbs_errno == PBSE_JOBEXIST && move_type == MOVE_TYPE_Exec) { /* already running, mark it so */ log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, "Mom reports job already running"); goto ok_exit; } else if ((pbs_errno == PBSE_HOOKERROR) || (pbs_errno == PBSE_HOOK_REJECT) || (pbs_errno == PBSE_HOOK_REJECT_RERUNJOB) || (pbs_errno == PBSE_HOOK_REJECT_DELETEJOB)) { char name_buf[MAXPATHLEN+1]; int rfd; int len; char *reject_msg; int err; err = pbs_errno; reject_msg = pbs_geterrmsg(con); (void)snprintf(log_buffer, sizeof(log_buffer), "send of job to %s failed error = %d reject_msg=%s", destin, err, reject_msg?reject_msg:""); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); (void)strcpy(name_buf, path_hooks_workdir); (void)strcat(name_buf, jobp->ji_qs.ji_jobid); (void)strcat(name_buf, HOOK_REJECT_SUFFIX); if ((reject_msg != NULL) && (reject_msg[0] != '\0')) { if ((rfd = open(name_buf, O_RDWR|O_CREAT|O_TRUNC, 0600)) == -1) { snprintf(log_buffer, sizeof(log_buffer), "open of reject file %s failed: errno %d", name_buf, errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); } else { secure_file(name_buf, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); setmode(rfd, O_BINARY); len = strlen(reject_msg)+1; /* write also trailing null char */ if (write(rfd, reject_msg, len) != len) { snprintf(log_buffer, sizeof(log_buffer), "write to file %s incomplete: errno %d", name_buf, errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); } close(rfd); } } if (err == PBSE_HOOKERROR) exit(SEND_JOB_HOOKERR); if (err == PBSE_HOOK_REJECT) exit(SEND_JOB_HOOK_REJECT); if (err == PBSE_HOOK_REJECT_RERUNJOB) exit(SEND_JOB_HOOK_REJECT_RERUNJOB); if (err == PBSE_HOOK_REJECT_DELETEJOB) exit(SEND_JOB_HOOK_REJECT_DELETEJOB); } else { (void)sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); continue; } } if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { if (PBSD_jscript(con, script_name, prot, NULL) != 0) continue; } if (credlen > 0) { int ret; ret = PBSD_jcred(con, jobp->ji_extended.ji_ext.ji_credtype, credbuf, credlen, prot, NULL); if ((ret == 0) || (i == (RETRY - 1))) free(credbuf); /* free credbuf if credbuf is sent successfully OR */ /* at the end of all retry attempts */ if (ret != 0) continue; } if ((move_type == MOVE_TYPE_Exec) && (jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) && (hostaddr != pbs_server_addr)) { /* send files created on prior run */ if ((move_job_file(con, jobp, StdOut, prot) != 0) || (move_job_file(con, jobp, StdErr, prot) != 0) || (move_job_file(con, jobp, Chkpt, prot) != 0)) continue; } jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM; } if (PBSD_rdytocmt(con, job_id, prot, NULL) != 0) continue; if (PBSD_commit(con, job_id, prot, NULL) != 0) goto fatal_exit; goto ok_exit; /* This child process is all done */ } if (con >= 0) svr_disconnect(con); /* * If connection is actively refused by the execution node(or mother superior) OR * the execution node(or mother superior) is rejecting request with error * PBSE_BADHOST(failing to authorize server host), the node should be marked down. */ if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == WSAECONNREFUSED || pbs_errno == PBSE_BADHOST)) { i = SEND_JOB_NODEDW; } else if (should_retry_route(pbs_errno) == -1) { i = SEND_JOB_FATAL; } else { i = SEND_JOB_RETRY; } (void)sprintf(log_buffer, "send_job failed with error %d", pbs_errno); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_NOTICE, jobp->ji_qs.ji_jobid, log_buffer); log_close(0); net_close(-1); unlink(script_name); exit(i); fatal_exit: if (con >= 0) svr_disconnect(con); log_close(0); net_close(-1); unlink(script_name); exit(SEND_JOB_FATAL); ok_exit: if (con >= 0) svr_disconnect(con); log_close(0); net_close(-1); unlink(script_name); exit(SEND_JOB_OK); }
/** * * @brief * Send a job over the network to some other server or MOM. * @par * Under Linux/Unix, this starts a child process to do the work. * Connect to the destination host and port, * and go through the protocol to transfer the job. * Signals are blocked. * * @param[in] jobp - pointer to the job being sent. * @param[in] hostaddr - the address of host to send job to, host byte order. * @param[in] port - the destination port, host byte order * @param[in] move_type - the type of move (e.g. MOVE_TYPE_exec) * @param[in] post_func - the function to execute once the child process * sending job completes (Linux/Unix only) * @param[in] data - input data to 'post_func' * * @return int * @retval 2 parent : success (child forked) * @retval -1 parent : on failure (pbs_errno set to error number) * @retval SEND_JOB_OK child : 0 success, job sent * @retval SEND_JOB_FATAL child : 1 permenent failure or rejection, * @retval SEND_JOB_RETRY child : 2 failed but try again * @retval SEND_JOB_NODEDW child : 3 execution node down, retry different node */ int send_job(job *jobp, pbs_net_t hostaddr, int port, int move_type, void (*post_func)(struct work_task *), struct batch_request *preq) { #ifdef WIN32 char cmdline[80]; pio_handles pio; char buf[4096]; struct work_task *ptask; int newstate; int newsub; long tempval; char script_name[MAXPATHLEN+1]; int gridproxy_cred = 0; #ifdef PBS_CRED_GRIDPROXY if (jobp->ji_extended.ji_ext.ji_credtype == PBS_CREDTYPE_GRIDPROXY) gridproxy_cred = 1; #endif if (pbs_conf.pbs_use_tcp == 1 && move_type == MOVE_TYPE_Exec && gridproxy_cred == 0) { return (send_job_exec(jobp, hostaddr, port, preq)); } sprintf(cmdline, "%s/sbin/pbs_send_job", pbs_conf.pbs_exec_path); if (win_popen(cmdline, "w", &pio, NULL) == 0) { errno = GetLastError(); pbs_errno = errno; (void)sprintf(log_buffer, "executing %s for job %s failed errno=%d", cmdline, jobp->ji_qs.ji_jobid, errno); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR, jobp->ji_qs.ji_jobid, log_buffer); /* force re-eval of job state out of Transit */ svr_evaljobstate(jobp, &newstate, &newsub, 1); svr_setjobstate(jobp, newstate, newsub); win_pclose(&pio); return (-1); } ptask = set_task(WORK_Deferred_Child, (long)pio.pi.hProcess, post_func, preq); if (!ptask) { log_err(errno, __func__, msg_err_malloc); errno = ENOMEM; pbs_errno = errno; win_pclose(&pio); /* force re-eval of job state out of Transit */ svr_evaljobstate(jobp, &newstate, &newsub, 1); svr_setjobstate(jobp, newstate, newsub); return (-1); } else { ptask->wt_parm2 = jobp; append_link(&((job *)jobp)->ji_svrtask, &ptask->wt_linkobj, ptask); } script_name[0] = '\0'; /* if job has a script read it from database */ if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { /* * copy the job script from database to a temp file * PBSD_jscript works with a file * delete it at the end of the send */ if (svr_create_tmp_jobscript(jobp, &script_name) != 0) { pbs_errno = PBSE_SYSTEM; snprintf(log_buffer, sizeof(log_buffer), "Failed to create temporary job script for job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, "send_job", log_buffer); win_pclose2(&pio); return (-1); } } addpid(pio.pi.hProcess); /* our job is to calc eligible time accurately and save it */ /* on new server, accrue type should be calc afresh */ /* Note: if job is being sent for execution on mom, then don't calc eligible time */ if ((jobp->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) && (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) && (move_type != MOVE_TYPE_Exec)) { tempval = ((long)time_now - jobp->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long); jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long += tempval; jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE; } /* in windows code, a child process "w32_send_job" handles the send * This needs the job information, so we save using the filesystem * This avoids the child process from having to "connect" to the database again * The file is deleted by the send_job child process when it has done recovering the job */ job_save_fs(jobp, SAVEJOB_FULLFORCE); /* so the spawned process can get a fresh copy of job */ if (*jobp->ji_qs.ji_fileprefix != '\0') sprintf(buf, "jobfile=%s%s\n", jobp->ji_qs.ji_fileprefix, JOB_FILE_SUFFIX); else sprintf(buf, "jobfile=%s%s\n", jobp->ji_qs.ji_jobid, JOB_FILE_SUFFIX); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "destaddr=%ld\n", hostaddr); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "destport=%d\n", port); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "move_type=%d\n", move_type); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "in_server=%d\n", is_linked(&svr_alljobs, &jobp->ji_alljobs)); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "server_name=%s\n", (server_name?server_name:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "server_host=%s\n", (server_host?server_host:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "server_addr=%ld\n", pbs_server_addr); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "server_port=%d\n", pbs_server_port_dis); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "log_file=%s\n", (log_file?log_file:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "path_log=%s\n", (path_log?path_log:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "path_jobs=%s\n", (path_jobs?path_jobs:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "path_spool=%s\n", (path_spool?path_spool:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "path_rescdef=%s\n", (path_rescdef?path_rescdef:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "path_users=%s\n", (path_users?path_users:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "path_hooks_workdir=%s\n", (path_hooks_workdir?path_hooks_workdir:"")); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "svr_history_enable=%ld\n", svr_history_enable); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "svr_history_duration=%ld\n", svr_history_duration); win_pwrite(&pio, buf, strlen(buf)); if ( (server.sv_attr[SRV_ATR_ssignon_enable].at_flags & \ ATR_VFLAG_SET) && \ (server.sv_attr[SRV_ATR_ssignon_enable].at_val.at_long == 1) ) strcpy(buf, "single_signon_password_enable=1\n"); else strcpy(buf, "single_signon_password_enable=0\n"); win_pwrite(&pio, buf, strlen(buf)); sprintf(buf, "script_name=%s\n", script_name); win_pwrite(&pio, buf, strlen(buf)); strcpy(buf, "quit\n"); win_pwrite(&pio, buf, strlen(buf)); win_pclose2(&pio); /* closes all handles except the process handle */ return (2); #else pbs_list_head attrl; enum conn_type cntype = ToServerDIS; int con; char *credbuf = NULL; size_t credlen = 0; char *destin = jobp->ji_qs.ji_destin; int encode_type; int i; char job_id[PBS_MAXSVRJOBID+1]; attribute *pattr; pid_t pid; struct attropl *pqjatr; /* list (single) of attropl for quejob */ char script_name[MAXPATHLEN+1]; struct work_task *ptask; struct hostent *hp; struct in_addr addr; long tempval; int gridproxy_cred = 0; int rpp = 0; #ifdef PBS_CRED_GRIDPROXY if (jobp->ji_extended.ji_ext.ji_credtype == PBS_CREDTYPE_GRIDPROXY) gridproxy_cred = 1; #endif if (pbs_conf.pbs_use_tcp == 1 && move_type == MOVE_TYPE_Exec && gridproxy_cred == 0) { return (send_job_exec(jobp, hostaddr, port, preq)); } script_name[0] = '\0'; /* if job has a script read it from database */ if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { /* * copy the job script from database to a temp file * PBSD_jscript works with a file * delete it at the end of the send */ if (svr_create_tmp_jobscript(jobp, script_name) != 0) { pbs_errno = PBSE_SYSTEM; snprintf(log_buffer, sizeof(log_buffer), "Failed to create temporary job script for job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, "send_job", log_buffer); return -1; } } pid = fork(); if (pid == -1) { /* Error on fork */ log_err(errno, __func__, "fork failed\n"); pbs_errno = PBSE_SYSTEM; return -1; } if (pid != 0) { /* The parent (main server) */ ptask = set_task(WORK_Deferred_Child, pid, post_func, preq); if (!ptask) { log_err(errno, __func__, msg_err_malloc); return (-1); } else { ptask->wt_parm2 = jobp; append_link(&((job *)jobp)->ji_svrtask, &ptask->wt_linkobj, ptask); } return 2; } /* * the child process * * set up signal cather for error return */ DBPRT(("%s: child started, sending to port %d\n", __func__, port)) rpp_terminate(); /* Unprotect child from being killed by kernel */ daemon_protect(0, PBS_DAEMON_PROTECT_OFF); #ifdef WIN32 /* get host name */ /* * If host address is loopback address then do not resolve with dns * Use "localhost" as the host name. */ if ((htonl(hostaddr) == loopback_addr->sin_addr.s_addr)) { (void)get_credential(LOCALHOST_SHORTNAME, jobp, PBS_GC_BATREQ, &credbuf, &credlen); } else { #endif addr.s_addr = htonl(hostaddr); hp = gethostbyaddr((void *)&addr, sizeof(struct in_addr), AF_INET); if (hp == NULL) { sprintf(log_buffer, "%s: h_errno=%d", inet_ntoa(addr), h_errno); log_err(-1, __func__, log_buffer); } else { /* read any credential file */ (void)get_credential(hp->h_name, jobp, PBS_GC_BATREQ, &credbuf, &credlen); } #ifdef WIN32 } #endif /* encode job attributes to be moved */ CLEAR_HEAD(attrl); /* select attributes/resources to send based on move type */ if (move_type == MOVE_TYPE_Exec) { resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; cntype = ToServerDIS; } else { resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR | ATR_DFLAG_MGWR | ATR_DFLAG_SvRD; encode_type = ATR_ENCODE_SVR; svr_dequejob(jobp); /* clears default resource settings */ } /* our job is to calc eligible time accurately and save it */ /* on new server, accrue type should be calc afresh */ /* Note: if job is being sent for execution on mom, then don't calc eligible time */ if ((jobp->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) && (server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) && (move_type != MOVE_TYPE_Exec)) { tempval = ((long)time_now - jobp->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long); jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long += tempval; jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE; } pattr = jobp->ji_wattr; for (i=0; i < (int)JOB_ATR_LAST; i++) { if ((job_attr_def+i)->at_flags & resc_access_perm) { (void)(job_attr_def+i)->at_encode(pattr+i, &attrl, (job_attr_def+i)->at_name, (char *)0, encode_type, NULL); } } attrl_fixlink(&attrl); /* save the job id for when after we purge the job */ (void)strcpy(job_id, jobp->ji_qs.ji_jobid); pbs_errno = 0; con = -1; for (i=0; i<RETRY; i++) { /* connect to receiving server with retries */ if (i > 0) { /* recycle after an error */ if (con >= 0) svr_disconnect(con); if (should_retry_route(pbs_errno) == -1) { /* delete the temp script file */ unlink(script_name); exit(SEND_JOB_FATAL); /* fatal error, don't retry */ } sleep(1<<i); } if ((con = svr_connect(hostaddr, port, 0, cntype, rpp)) == PBS_NET_RC_FATAL) { (void)sprintf(log_buffer, "send_job failed to %lx port %d", hostaddr, port); log_err(pbs_errno, __func__, log_buffer); /* delete the temp script file */ unlink(script_name); if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == PBSE_BADCRED)) exit(SEND_JOB_NODEDW); exit(SEND_JOB_FATAL); } else if (con == PBS_NET_RC_RETRY) { pbs_errno = ECONNREFUSED; /* should retry */ continue; } /* * if the job is substate JOB_SUBSTATE_TRNOUTCM which means * we are recovering after being down or a late failure, we * just want to send the commit" */ if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) { if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) { jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT; } pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl; if (PBSD_queuejob(con, jobp->ji_qs.ji_jobid, destin, pqjatr, (char *)0, rpp, NULL) == 0) { if (pbs_errno == PBSE_JOBEXIST && move_type == MOVE_TYPE_Exec) { /* already running, mark it so */ log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, "Mom reports job already running"); exit(SEND_JOB_OK); } else if ((pbs_errno == PBSE_HOOKERROR) || (pbs_errno == PBSE_HOOK_REJECT) || (pbs_errno == PBSE_HOOK_REJECT_RERUNJOB) || (pbs_errno == PBSE_HOOK_REJECT_DELETEJOB)) { char name_buf[MAXPATHLEN+1]; int rfd; int len; char *reject_msg; int err; err = pbs_errno; reject_msg = pbs_geterrmsg(con); (void)sprintf(log_buffer, "send of job to %s failed error = %d reject_msg=%s", destin, err, reject_msg?reject_msg:""); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); (void)strcpy(name_buf, path_hooks_workdir); (void)strcat(name_buf, jobp->ji_qs.ji_jobid); (void)strcat(name_buf, HOOK_REJECT_SUFFIX); if ((reject_msg != NULL) && (reject_msg[0] != '\0')) { if ((rfd = open(name_buf, O_RDWR|O_CREAT|O_TRUNC, 0600)) == -1) { sprintf(log_buffer, "open of reject file %s failed: errno %d", name_buf, errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); } else { #ifdef WIN32 secure_file(name_buf, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); setmode(rfd, O_BINARY); #endif len = strlen(reject_msg)+1; /* write also trailing null char */ if (write(rfd, reject_msg, len) != len) { sprintf(log_buffer, "write to file %s incomplete: errno %d", name_buf, errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); } close(rfd); } } if (err == PBSE_HOOKERROR) exit(SEND_JOB_HOOKERR); if (err == PBSE_HOOK_REJECT) exit(SEND_JOB_HOOK_REJECT); if (err == PBSE_HOOK_REJECT_RERUNJOB) exit(SEND_JOB_HOOK_REJECT_RERUNJOB); if (err == PBSE_HOOK_REJECT_DELETEJOB) exit(SEND_JOB_HOOK_REJECT_DELETEJOB); } else { (void)sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer); continue; } } if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { if (PBSD_jscript(con, script_name, rpp, NULL) != 0) continue; } if (credlen > 0) { int ret; ret = PBSD_jcred(con, jobp->ji_extended.ji_ext.ji_credtype, credbuf, credlen, rpp, NULL); if ((ret == 0) || (i == (RETRY - 1))) free(credbuf); /* free credbuf if cred info is sent successfully OR */ /* at the end of all retry attempts */ if (ret != 0) continue; } if ((move_type == MOVE_TYPE_Exec) && (jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) && (hostaddr != pbs_server_addr)) { /* send files created on prior run */ if ((move_job_file(con, jobp, StdOut, rpp, NULL) != 0) || (move_job_file(con, jobp, StdErr, rpp, NULL) != 0) || (move_job_file(con, jobp, Chkpt, rpp, NULL) != 0)) continue; } jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM; } if (PBSD_rdytocmt(con, job_id, rpp, NULL) != 0) continue; if (PBSD_commit(con, job_id, rpp, NULL) != 0) { /* delete the temp script file */ unlink(script_name); exit(SEND_JOB_FATAL); } svr_disconnect(con); /* delete the temp script file */ unlink(script_name); exit(SEND_JOB_OK); /* This child process is all done */ } if (con >= 0) svr_disconnect(con); /* * If connection is actively refused by the execution node(or mother superior) OR * the execution node(or mother superior) is rejecting request with error * PBSE_BADHOST(failing to authorize server host), the node should be marked down. */ if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == ECONNREFUSED || pbs_errno == PBSE_BADHOST)) { i = SEND_JOB_NODEDW; } else if (should_retry_route(pbs_errno) == -1) { i = SEND_JOB_FATAL; } else { i = SEND_JOB_RETRY; } (void)sprintf(log_buffer, "send_job failed with error %d", pbs_errno); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_NOTICE, jobp->ji_qs.ji_jobid, log_buffer); /* delete the temp script file */ unlink(script_name); exit(i); return -1; /* NOT REACHED */ #endif /* !WIN32 */ }
int main(int argc, char **argv, char **envp) /* qselect */ { int c; int errflg=0; char *errmsg; #define MAX_OPTARG_LEN 256 #define MAX_RESOURCE_NAME_LEN 256 char optargout[MAX_OPTARG_LEN+1]; char resource_name[MAX_RESOURCE_NAME_LEN+1]; enum batch_op op; enum batch_op *pop = &op; struct attropl *select_list = 0; static char destination[PBS_MAXQUEUENAME+1] = ""; char server_out[MAXSERVERNAME] = ""; char *queue_name_out; char *server_name_out; int connect; char **selectjob_list; char *res_pos; char *pc; time_t after; char a_value[80]; char extendopts[4] = ""; char *attr_time = NULL; struct ecl_attribute_errors *err_list; char *resc_time = NULL; #define GETOPT_ARGS "a:A:c:h:HJl:N:p:q:r:s:t:Tu:xP:" /*test for real deal or just version and exit*/ execution_mode(argc, argv); #ifdef WIN32 winsock_init(); #endif while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) switch (c) { case 'a': check_op(optarg, pop, optargout); if ((after = cvtdate(optargout)) < 0) { fprintf(stderr, "qselect: illegal -a value\n"); errflg++; break; } sprintf(a_value, "%ld", (long)after); set_attrop(&select_list, ATTR_a, NULL, a_value, op); break; case 'c': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)*pc)) pc++; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } set_attrop(&select_list, ATTR_c, NULL, optargout, op); break; case 'h': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)*pc)) pc++; set_attrop(&select_list, ATTR_h, NULL, optargout, op); break; case 'J': op = EQ; set_attrop(&select_list, ATTR_array, NULL, "True", op); break; case 'l': res_pos = optarg; while (*res_pos != '\0') { if (check_res_op(res_pos, resource_name, pop, optargout, &res_pos) != 0) { errflg++; break; } set_attrop(&select_list, ATTR_l, resource_name, optargout, op); } break; case 'p': check_op(optarg, pop, optargout); set_attrop(&select_list, ATTR_p, NULL, optargout, op); break; case 'q': strcpy(destination, optarg); check_op(optarg, pop, optargout); set_attrop(&select_list, ATTR_q, NULL, optargout, op); break; case 'r': op = EQ; pc = optarg; while (isspace((int)(*pc))) pc++; if (*pc != 'y' && *pc != 'n') { /* qselect specific check - stays */ fprintf(stderr, "qselect: illegal -r value\n"); errflg++; break; } set_attrop(&select_list, ATTR_r, NULL, pc, op); break; case 's': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)(*pc))) pc++; set_attrop(&select_list, ATTR_state, NULL, optargout, op); break; case 't': if (get_tsubopt(*optarg, &attr_time, &resc_time)) { fprintf(stderr, "qselect: illegal -t value\n"); errflg++; break; } /* 1st character possess the subopt, so send optarg++ */ optarg ++; check_op(optarg, pop, optargout); if ((after = cvtdate(optargout)) < 0) { fprintf(stderr, "qselect: illegal -t value\n"); errflg++; break; } sprintf(a_value, "%ld", (long)after); set_attrop(&select_list, attr_time, resc_time, a_value, op); break; case 'T': if (strchr(extendopts, (int)'T') == NULL) (void)strcat(extendopts, "T"); break; case 'x': if (strchr(extendopts, (int)'x') == NULL) (void)strcat(extendopts, "x"); break; case 'H': op = EQ; if (strchr(extendopts, (int)'x') == NULL) (void)strcat(extendopts, "x"); set_attrop(&select_list, ATTR_state, NULL, "FM", op); break; case 'u': op = EQ; set_attrop(&select_list, ATTR_u, NULL, optarg, op); break; case 'A': op = EQ; set_attrop(&select_list, ATTR_A, NULL, optarg, op); break; case 'P': op = EQ; set_attrop(&select_list, ATTR_project, NULL, optarg, op); break; case 'N': op = EQ; set_attrop(&select_list, ATTR_N, NULL, optarg, op); break; default : errflg++; } if (errflg || (optind < argc)) { print_usage(); exit(2); } if (notNULL(destination)) { if (parse_destination_id(destination, &queue_name_out, &server_name_out)) { fprintf(stderr, "qselect: illegally formed destination: %s\n", destination); exit(2); } else { if (notNULL(server_name_out)) { strcpy(server_out, server_name_out); } } } /*perform needed security library initializations (including none)*/ if (CS_client_init() != CS_SUCCESS) { fprintf(stderr, "qselect: unable to initialize security library.\n"); exit(2); } connect = cnt2server(server_out); if (connect <= 0) { fprintf(stderr, "qselect: cannot connect to server %s (errno=%d)\n", pbs_server, pbs_errno); /*cleanup security library initializations before exiting*/ CS_close_app(); exit(pbs_errno); } if (extendopts[0] == '\0') selectjob_list = pbs_selectjob(connect, select_list, NULL); else selectjob_list = pbs_selectjob(connect, select_list, extendopts); if (selectjob_list == NULL) { if ((err_list = pbs_get_attributes_in_error(connect))) handle_attribute_errors(err_list); if (pbs_errno != PBSE_NONE) { errmsg = pbs_geterrmsg(connect); if (errmsg != NULL) { fprintf(stderr, "qselect: %s\n", errmsg); } else { fprintf(stderr, "qselect: Error (%d) selecting jobs\n", pbs_errno); } /* * If the server is not configured for history jobs i.e. * job_history_enable svr attr is unset/set to FALSE, qselect * command with -x/-H option is being used, then pbs_selectjob() * will return PBSE_JOBHISTNOTSET error code. But command will * exit with exit code '0'after printing the corresponding error * message. i.e. "job_history_enable is set to false" */ if (pbs_errno == PBSE_JOBHISTNOTSET) pbs_errno = 0; /*cleanup security library initializations before exiting*/ CS_close_app(); exit(pbs_errno); } } else { /* got some jobs ids */ int i = 0; while (selectjob_list[i] != NULL) { printf("%s\n", selectjob_list[i++]); } free(selectjob_list); } pbs_disconnect(connect); /*cleanup security library initializations before exiting*/ CS_close_app(); exit(0); }
/** * @brief * The main function in C - entry point * * @param[in] argc - argument count * @param[in] argv - pointer to argument array * @param[in] envp - pointer to environment values * * @return int * @retval 0 - success * @retval !0 - error */ int main(int argc, char *argv[], char *envp[]) { int errflg; /* command line option error */ int connect; /* return from pbs_connect */ char *errmsg; /* return from pbs_geterrmsg */ char destbuf[256]; /* buffer for option server */ struct attrl *attrib; /* the attrib list */ char *new_resvname; /* the name returned from pbs_submit_resv */ struct ecl_attribute_errors *err_list; char *interactive = NULL; /*test for real deal or just version and exit*/ execution_mode(argc, argv); #ifdef WIN32 winsock_init(); #endif destbuf[0] = '\0'; errflg = process_opts(argc, argv, &attrib, destbuf); /* get cmdline options */ if (errflg || ((optind+1) < argc) || argc == 1) { print_usage(); exit(2); } /* Get any required environment variables needing to be sent. */ if (! set_resv_env(envp)) { fprintf(stderr, "pbs_rsub: can't send environment with the reservation\n"); exit(3); } /*perform needed security library initializations (including none)*/ if (CS_client_init() != CS_SUCCESS) { fprintf(stderr, "pbs_rsub: unable to initialize security library.\n"); exit(1); } /* Connect to the server */ connect = cnt2server(destbuf); if (connect <= 0) { fprintf(stderr, "pbs_rsub: cannot connect to server %s (errno=%d)\n", pbs_server, pbs_errno); CS_close_app(); exit(pbs_errno); } if (qmoveflg == TRUE) { qmoveflg = FALSE; interactive = get_attr(attrib, ATTR_inter, NULL); if (interactive == NULL) { set_attr(&attrib, ATTR_inter, DEFAULT_INTERACTIVE); } else { if (atoi(interactive) > -1) { fprintf(stderr, "pbs_rsub: -I <timeout> value must be negative when used with -Wqmove option.\n"); CS_close_app(); exit(2); } } errflg = cnvrt_proc_attrib(connect, &attrib, destbuf); if (errflg) { fprintf(stderr, "pbs_rsub: can't make a reservation with the qmove option\n"); CS_close_app(); exit(2); } } pbs_errno = 0; new_resvname = pbs_submit_resv(connect, (struct attropl *)attrib, NULL); if (new_resvname == NULL) { if ((err_list = pbs_get_attributes_in_error(connect))) handle_attribute_errors(err_list); errmsg = pbs_geterrmsg(connect); if (errmsg != NULL) { fprintf(stderr, "pbs_rsub: %s\n", errmsg); } else fprintf(stderr, "pbs_rsub: Error (%d) submitting reservation\n", pbs_errno); CS_close_app(); exit(pbs_errno); } else { printf("%s\n", new_resvname); free(new_resvname); } /* Disconnet from the server. */ pbs_disconnect(connect); CS_close_app(); exit(0); }
int main( int argc, char **argv) { int c; int errflg = 0; char *errmsg; #define MAX_OPTARG_LEN 256 #define MAX_RESOURCE_NAME_LEN 256 char optargout[MAX_OPTARG_LEN+1]; char resource_name[MAX_RESOURCE_NAME_LEN+1]; enum batch_op op; enum batch_op *pop = &op; struct attropl *select_list = 0; static char destination[PBS_MAXROUTEDEST+1] = ""; char server_out[MAXSERVERNAME] = ""; char *queue_name_out; char *server_name_out; int connect; char **selectjob_list; char *res_pos; char *pc; int u_cnt, o_cnt, s_cnt, n_cnt; time_t after; char a_value[80]; int exec_only = 0; if (getenv("PBS_QSTAT_EXECONLY") != NULL) exec_only = 1; #define GETOPT_ARGS "a:A:ec:h:l:N:p:q:r:s:u:" while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) switch (c) { case 'a': check_op(optarg, pop, optargout); if ((after = cvtdate(optargout)) < 0) { fprintf(stderr, "qselect: illegal -a value\n"); errflg++; break; } sprintf(a_value, "%ld", (long)after); set_attrop(&select_list, ATTR_a, NULL, a_value, op); break; case 'e': exec_only = 1; break; case 'c': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)*pc)) pc++; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } if (strcmp(pc, "u") == 0) { if ((op != EQ) && (op != NE)) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } } else if ((strcmp(pc, "n") != 0) && (strcmp(pc, "s") != 0) && (strcmp(pc, "c") != 0)) { if (strncmp(pc, "c=", 2) != 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } pc += 2; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } while (*pc != '\0') { if (!isdigit((int)*pc)) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } pc++; } } set_attrop(&select_list, ATTR_c, NULL, optargout, op); break; case 'h': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)*pc)) pc++; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -h value\n"); errflg++; break; } u_cnt = o_cnt = s_cnt = n_cnt = 0; while (*pc) { if (*pc == 'u') u_cnt++; else if (*pc == 'o') o_cnt++; else if (*pc == 's') s_cnt++; else if (*pc == 'n') n_cnt++; else { fprintf(stderr, "qselect: illegal -h value\n"); errflg++; break; } pc++; } if (n_cnt && (u_cnt + o_cnt + s_cnt)) { fprintf(stderr, "qselect: illegal -h value\n"); errflg++; break; } set_attrop(&select_list, ATTR_h, NULL, optargout, op); break; case 'l': res_pos = optarg; while (*res_pos != '\0') { if (check_res_op(res_pos, resource_name, pop, optargout, &res_pos) != 0) { errflg++; break; } set_attrop(&select_list, ATTR_l, resource_name, optargout, op); } break; case 'p': check_op(optarg, pop, optargout); set_attrop(&select_list, ATTR_p, NULL, optargout, op); break; case 'q': strncpy(destination, optarg, PBS_MAXROUTEDEST); check_op(optarg, pop, optargout); set_attrop(&select_list, ATTR_q, NULL, optargout, op); break; case 'r': op = EQ; pc = optarg; while (isspace((int)(*pc))) pc++; if (strlen(pc) != 1) { fprintf(stderr, "qsub: illegal -r value\n"); errflg++; break; } if (*pc != 'y' && *pc != 'n') { fprintf(stderr, "qsub: illegal -r value\n"); errflg++; break; } set_attrop(&select_list, ATTR_r, NULL, pc, op); break; case 's': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)(*pc))) pc++; if (strlen(optarg) == 0) { fprintf(stderr, "qselect: illegal -s value\n"); errflg++; break; } while (*pc) { if (*pc != 'C' && *pc != 'E' && *pc != 'H' && *pc != 'Q' && *pc != 'R' && *pc != 'T' && *pc != 'W') { fprintf(stderr, "qselect: illegal -s value\n"); errflg++; break; } pc++; } set_attrop(&select_list, ATTR_state, NULL, optargout, op); break; case 'u': op = EQ; if (parse_at_list(optarg, FALSE, FALSE)) { fprintf(stderr, "qselect: illegal -u value\n"); errflg++; break; } set_attrop(&select_list, ATTR_u, NULL, optarg, op); break; case 'A': op = EQ; set_attrop(&select_list, ATTR_A, NULL, optarg, op); break; case 'N': op = EQ; set_attrop(&select_list, ATTR_N, NULL, optarg, op); break; default : errflg++; } if (errflg || (optind < argc)) { static char usage[] = "usage: qselect \ [-a [op]date_time] [-A account_string] [-e] [-c [op]interval] \n\ [-h hold_list] [-l resource_list] [-N name] [-p [op]priority] \n\ [-q destination] [-r y|n] [-s states] [-u user_name]\n"; fprintf(stderr,"%s", usage); exit(2); } if (notNULL(destination)) { if (parse_destination_id(destination, &queue_name_out, &server_name_out)) { fprintf(stderr, "qselect: illegally formed destination: %s\n", destination); exit(2); } else { if (notNULL(server_name_out)) { strcpy(server_out, server_name_out); } } } connect = cnt2server(server_out); if (connect <= 0) { fprintf(stderr, "qselect: cannot connect to server %s (errno=%d) %s\n", pbs_server, pbs_errno, pbs_strerror(pbs_errno)); exit(pbs_errno); } selectjob_list = pbs_selectjob(connect, select_list, exec_only ? EXECQUEONLY : NULL); if (selectjob_list == NULL) { if (pbs_errno != PBSE_NONE) { errmsg = pbs_geterrmsg(connect); if (errmsg != NULL) { fprintf(stderr, "qselect: %s\n", errmsg); } else { fprintf(stderr, "qselect: Error (%d - %s) selecting jobs\n", pbs_errno, pbs_strerror(pbs_errno)); } exit(pbs_errno); } } else /* got some jobs ids */ { int i = 0; while (selectjob_list[i] != NULL) { printf("%s\n", selectjob_list[i++]); } free(selectjob_list); } pbs_disconnect(connect); exit(0); }
int main(int argc, char **argv, char **envp) /* pbs_release_nodes */ { int c; int errflg=0; int any_failed=0; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME]; char rmt_server[MAXSERVERNAME]; int len; char *node_list = NULL; int connect; int stat=0; int k; int all_opt = 0; #define GETOPT_ARGS "j:a" /*test for real deal or just version and exit*/ execution_mode(argc, argv); #ifdef WIN32 winsock_init(); #endif job_id[0] = '\0'; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) { switch (c) { case 'j': strncpy(job_id, optarg, PBS_MAXCLTJOBID); job_id[PBS_MAXCLTJOBID-1] = '\0'; break; case 'a': all_opt = 1; break; default : errflg++; } } if (job_id[0] == '\0') { char *jid; jid = getenv("PBS_JOBID"); strncpy(job_id, jid?jid:"", PBS_MAXCLTJOBID); job_id[PBS_MAXCLTJOBID-1] = '\0'; } if (errflg || ((optind == argc) && !all_opt) || ((optind != argc) && all_opt) ) { fprintf(stderr, USAGE); fprintf(stderr, USAGE2); fprintf(stderr, USAGE3); exit(2); } if (job_id[0] == '\0') { fprintf(stderr, "pbs_release_nodes: No jobid given\n"); exit(2); } /*perform needed security library initializations (including none)*/ if (CS_client_init() != CS_SUCCESS) { fprintf(stderr, "pbs_release_nodes: unable to initialize security library.\n"); exit(2); } len = 0; for (k = optind; k < argc; k++) { len += (strlen(argv[k]) + 1); /* +1 for space */ } node_list = (char *)malloc(len + 1); if (node_list == NULL) { fprintf(stderr, "failed to malloc to store data (error %d)", errno); exit(2); } node_list[0] = '\0'; for (k = optind; k < argc; k++) { if (k != optind) strcat(node_list, "+"); strcat(node_list, argv[k]); } if (get_server(job_id, job_id_out, server_out)) { fprintf(stderr, "pbs_release_nodes: illegally formed job identifier: %s\n", job_id); free(node_list); exit(2); } pbs_errno = 0; stat = 0; while(1) { connect = cnt2server(server_out); if (connect <= 0) { fprintf(stderr, "pbs_release_nodes: cannot connect to server %s (errno=%d)\n", pbs_server, pbs_errno); break; } stat = pbs_relnodesjob(connect, job_id_out, node_list, NULL); if (stat && (pbs_errno == PBSE_UNKJOBID)) { if (locate_job(job_id_out, server_out, rmt_server)) { /* * job located at a different server * retry connect on the new server */ pbs_disconnect(connect); strcpy(server_out, rmt_server); } else { prt_job_err("pbs_release_nodes", connect, job_id_out); break; } } else { char *info_msg; if (stat && (pbs_errno != PBSE_UNKJOBID)) { prt_job_err("pbs_release_nodes", connect, ""); } else if ((info_msg = pbs_geterrmsg(connect)) != NULL) { /* print potential warning message */ printf("pbs_release_nodes: %s\n", info_msg); } break; } } any_failed = pbs_errno; pbs_disconnect(connect); /*cleanup security library initializations before exiting*/ CS_close_app(); exit(any_failed); }