/* * int execute( char *queue, char *server ) * * queue The name of the queue to disable. * server The name of the server that manages the queue. * * Returns: * None * * File Variables: * exitstatus Set to two if an error occurs. */ static void execute( char *queue, char *server) { int ct; /* Connection to the server */ int local_errno = 0; int merr; /* Error return from pbs_manager */ char *errmsg; /* Error message from pbs_manager */ /* The disable request */ static struct attropl attr = { NULL, (char *)"enabled", NULL, (char *)"FALSE", SET }; if ((ct = cnt2server(server)) > 0) { merr = pbs_manager_err(ct, MGR_CMD_SET, MGR_OBJ_QUEUE, queue, &attr, NULL, &local_errno); if (merr != 0) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { fprintf(stderr, "qdisable: %s ", errmsg); free(errmsg); } else { fprintf(stderr, "qdisable: Error disabling queue: %d - %s ", local_errno, pbs_strerror(local_errno)); } if (notNULL(queue)) fprintf(stderr, "%s", queue); if (notNULL(server)) fprintf(stderr, "@%s", server); fprintf(stderr, "\n"); exitstatus = 2; } pbs_disconnect(ct); } else { fprintf(stderr, "qdisable: could not connect to server %s (%d) %s\n", server, ct * -1, pbs_strerror(ct * -1)); exitstatus = 2; } }
/* * void execute( char *node, char *gpuid, int ecc_perm, int ecc_vol, char *server ) * * node The name of the MOM node. * gpuid The id of the GPU. * ecc_perm The value for resetting the permanent ECC count. * ecc_vol The value for resetting the volatile ECC count. * server The name of the server to send to. * * Returns: * None * * File Variables: * exitstatus Set to two if an error occurs. */ static void execute( char *node, char *gpuid, int ecc_perm, int ecc_vol, const char *server) { int local_errno = 0; int ct; /* Connection to the server */ int merr; /* Error return from pbs_manager */ char *errmsg; /* Error message from pbs_manager */ /* The request to change mode */ if ((ct = cnt2server(server, false)) > 0) { merr = pbs_gpureset_err(ct, node, gpuid, ecc_perm, ecc_vol, &local_errno); if (merr != 0) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { fprintf(stderr, " qgpureset: %s ", errmsg); free(errmsg); } else { fprintf(stderr, " qgpureset: Error (%d - %s) resetting GPU ECC counts", local_errno, pbs_strerror(local_errno)); } if (notNULL(server)) fprintf(stderr, "@%s", server); fprintf(stderr, "\n"); exitstatus = 2; } pbs_disconnect(ct); } else { local_errno = -1 * ct; fprintf(stderr, " qgpureset: could not connect to server %s (%d) %s\n", server, local_errno, pbs_strerror(local_errno)); exitstatus = 2; } }
static void execute( int manner, /* I */ const char *server) /* I */ { int ct; /* Connection to the server */ int err; /* Error return from pbs_terminate */ char *errmsg; /* Error message from pbs_terminate */ int local_errno = 0; if ((ct = cnt2server(server)) > 0) { err = pbs_terminate_err(ct, manner, NULL, &local_errno); if (err != 0) { errmsg = pbs_geterrmsg(ct); if (errmsg != NULL) { fprintf(stderr, "qterm: %s", errmsg); } else { fprintf(stderr, "qterm: Error (%d - %s) terminating server ", local_errno, pbs_strerror(local_errno)); } fprintf(stderr, "%s\n", server); exitstatus = 2; } pbs_disconnect(ct); } else { /* FAILURE */ local_errno = -1 * ct; fprintf(stderr, "qterm: could not connect to server '%s' (%d) %s\n", server, local_errno, pbs_strerror(local_errno)); exitstatus = 2; } return; } /* END execute() */
void print_server_port_to_stderr( char *s_name) { int rc = PBSE_NONE; char *s_addr = NULL; unsigned short af_family; struct in_addr hostaddr; char *ip_addr = NULL; int s_len = 0; if ((rc = get_hostaddr_hostent_af(&rc, s_name, &af_family, &s_addr, &s_len)) == PBSE_NONE) { memcpy((void *)&hostaddr, (void *)s_addr, s_len); ip_addr = inet_ntoa(hostaddr); fprintf(stderr, "Unable to communicate with %s(%s)\n", s_name, ip_addr); } else { const char *err_msg = ""; if (rc > 0) err_msg = pbs_strerror(rc); fprintf(stderr, "Can not resolve name for server %s. (rc = %d - %s)\n", s_name, rc, err_msg); } if (s_addr != NULL) free(s_addr); }
int parse_response_svr( int sock, char **err_msg) { /* * PBS_BATCH_PROT_TYPE * PBS_BATCH_PROT_VER * reply->brp_code * reply->brp_auxcode * reply->brp_choice * if reply->brp_choice == BATCH_REPLY_CHOICE_Text also read: * preq->rq_reply.brp_un.brp_txt.brp_str * using * preq->rq_reply.brp_un.brp_txt.brp_txtlen */ int rc = PBSE_NONE; struct batch_reply *reply = NULL; char *tmp_val = NULL; struct tcp_chan *chan = NULL; if ((chan = DIS_tcp_setup(sock)) == NULL) { } else if ((reply = (struct batch_reply *)calloc(1, sizeof(struct batch_reply))) == NULL) { } else if ((rc = decode_DIS_replyCmd(chan, reply))) { free(reply); if (chan->IsTimeout == TRUE) { rc = PBSE_TIMEOUT; } else { rc = PBSE_PROTOCOL; } if ((tmp_val = pbs_strerror(rc)) == NULL) { char err_buf[80]; snprintf(err_buf, 79, "Error creating error message for code %d", rc); *err_msg = strdup(err_buf); } else *err_msg = strdup(tmp_val); } else { rc = reply->brp_code; if (reply->brp_code != PBSE_NONE) { *err_msg = strdup(reply->brp_un.brp_txt.brp_str); } free(reply); } DIS_tcp_cleanup(chan); return rc; }
struct batch_status *statnode( int con, char *nodearg) { struct batch_status *bstatus; char *errmsg; int local_errno = 0; bstatus = pbs_statnode_err(con, nodearg, NULL, NULL, &local_errno); if (bstatus == NULL) { if (local_errno) { if (!quiet) { if ((errmsg = pbs_geterrmsg(con)) != NULL) { fprintf(stderr, "%s: %s\n", progname, errmsg); } else { fprintf(stderr, "%s: Error %d (%s)\n", progname, local_errno, pbs_strerror(local_errno)); } } exit(1); } if (!quiet) fprintf(stderr, "%s: No nodes found\n", progname); exit(2); } return bstatus; } /* END statnode() */
void print_server_port_to_stderr( char *s_name) { int rc = PBSE_NONE; char *s_addr = NULL; unsigned short af_family; struct in_addr hostaddr; char *ip_addr = NULL; int s_len = 0; if ((rc = get_hostaddr_hostent_af(&rc, s_name, &af_family, &s_addr, &s_len)) == PBSE_NONE) { memcpy((void *)&hostaddr, (void *)s_addr, s_len); ip_addr = inet_ntoa(hostaddr); fprintf(stderr, "Error communicating with %s(%s)\n", s_name, ip_addr); } else { fprintf(stderr, "Can not resolve name for server %s. (rc = %d - %s)\n", s_name, rc, pbs_strerror(rc)); } }
static int simplecom( int stream, int *local_errno, int com) { struct out *op; if ((op = findout(local_errno, stream)) == NULL) { return(-1); } op->len = -1; if (startcom(op->chan, local_errno, com,0) != DIS_SUCCESS) { close(op->chan->sock); return(-1); } if (DIS_tcp_wflush(op->chan) == -1) { *local_errno = errno; DBPRT(("simplecom: flush error %d (%s)\n", *local_errno, pbs_strerror(*local_errno))) close(op->chan->sock); return(-1); } return(0); } /* END simplecom() */
int main( int argc, char **argv) /* qrerun */ { int any_failed = 0; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[PBS_MAXSERVERNAME] = ""; char rmt_server[MAXSERVERNAME]; char extend[1024]; int errflg = 0; int c; static char usage[] = "usage: qrerun [-f] <JOBID>[ <JOBID>]...\n"; if (argc < 2) { fprintf(stderr,"%s", usage); exit(2); } #define GETOPT_ARGS "m:f" extend[0] = '\0'; while ((c = (char) getopt(argc, argv, GETOPT_ARGS)) != EOF) { switch (c) { case 'm': /* add delete message */ if (extend[0] != '\0') { /* extension option already specified */ errflg++; break; } if (strchr(optarg, '=')) { /* message cannot contain '=' character */ errflg++; break; } strncpy(extend, optarg, sizeof(extend)); break; case 'f': if (extend[0] != '\0') { errflg++; break; } strcpy(extend, RERUNFORCE); break; } /* END switch (c) */ } /* END while ((c = getopt(argc,argv,GETOPT_ARGS)) != EOF) */ if ((errflg != 0) || (optind >= argc)) { fprintf(stderr,"%s", usage); exit(2); } for (;optind < argc;optind++) { int connect; int stat = 0; int located = FALSE; snprintf(job_id, sizeof(job_id), "%s", argv[optind]); if (get_server(job_id, job_id_out, sizeof(job_id_out), server_out, sizeof(server_out))) { fprintf(stderr, "qrerun: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } cnt: connect = cnt2server(server_out); if (connect <= 0) { any_failed = -1 * connect; if (server_out[0] != 0) fprintf(stderr, "qrerun: cannot connect to server %s (errno=%d) %s\n", server_out, any_failed, pbs_strerror(any_failed)); else fprintf(stderr, "qrerun: cannot connect to server %s (errno=%d) %s\n", pbs_server, any_failed, pbs_strerror(any_failed)); continue; } if (extend[0] != '\0') stat = pbs_rerunjob_err(connect, job_id_out, extend, &any_failed); else stat = pbs_rerunjob_err(connect, job_id_out, NULL, &any_failed); if (stat && (any_failed != PBSE_UNKJOBID)) { prt_job_err("qrerun", connect, job_id_out); } else if (stat && (any_failed == PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); snprintf(server_out, sizeof(server_out), "%s", rmt_server); goto cnt; } prt_job_err("qrerun", connect, job_id_out); } pbs_disconnect(connect); } exit(any_failed); } /* END main() */
int qdel_main( int argc, char **argv, char **envp) { int c; int errflg = 0; int any_failed = 0; int purge_completed = FALSE; int located = FALSE; char *pc; bool dash_t = false; /* for array submission job handling */ int past_failure = 0; /* for multiple job submission error message tracking */ char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME] = ""; char rmt_server[MAXSERVERNAME] = ""; char extend[1024]; job_data_container attr; job_data *tmp_data = NULL; int client_retry = 0; #define GETOPT_ARGS "ab:cm:pW:t:" set_env_opts(&attr, envp); process_config_file(&attr); if (hash_find(&attr, "PBS_CLIENTRETRY", &tmp_data)) { client_retry = atoi(tmp_data->value.c_str()); } extend[0] = '\0'; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) { switch (c) { case 'a': /* Async job deletion */ if (extend[0] != '\0') { errflg++; break; } snprintf(extend, sizeof(extend), "%s", DELASYNC); break; case 'b': client_retry = atoi(optarg); break; case 'c': if (extend[0] != '\0') { errflg++; break; } snprintf(extend,sizeof(extend),"%s%ld",PURGECOMP,(long)(time(NULL))); purge_completed = TRUE; break; case 'm': /* add delete message */ if (extend[0] != '\0') { /* extension option already specified */ errflg++; break; } snprintf(extend, sizeof(extend), "%s", optarg); break; case 'p': if (extend[0] != '\0') { errflg++; break; } snprintf(extend, sizeof(extend), "%s1", DELPURGE); break; case 't': dash_t = true; if (extend[0] != '\0') { errflg++; break; } pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qdel: illegal -t value (array range cannot be zero length)\n"); errflg++; break; } snprintf(extend,sizeof(extend),"%s%s", ARRAY_RANGE, pc); break; case 'W': if (extend[0] != '\0') { errflg++; break; } pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qdel: illegal -W value\n"); errflg++; break; } while (*pc != '\0') { if (!isdigit(*pc)) { fprintf(stderr, "qdel: illegal -W value\n"); errflg++; break; } pc++; } snprintf(extend, sizeof(extend), "%s%s", DELDELAY, optarg); break; default: errflg++; break; } } /* END while (c) */ if (purge_completed) { snprintf(server_out, sizeof(server_out), "%s", pbs_default()); goto cnt; } if ((errflg != 0) || (optind >= argc)) { static char usage[] = "usage: qdel [{ -a | -c | -p | -t | -W delay | -m message}] [-b retry_seconds] [<JOBID>[<JOBID>]|'all'|'ALL']...\n"; fprintf(stderr, "%s", usage); fprintf(stderr, " -a -c, -m, -p, -t, and -W are mutually exclusive\n"); exit(2); } if (client_retry > 0) { cnt2server_conf(client_retry); /* set number of seconds to retry */ } for (;optind < argc;optind++) { int connect; int stat; /* check to see if user specified 'all' to delete all jobs */ snprintf(job_id, sizeof(job_id), "%s", argv[optind]); if ((dash_t == true) && is_array(job_id) == false) { fprintf(stderr, "qdel: Error: job id '%s' isn't a job array but -t was specified.\n", job_id); any_failed = 1; exit(any_failed); } if (get_server(job_id, job_id_out, sizeof(job_id_out), server_out, sizeof(server_out))) { fprintf(stderr, "qdel: illegally formed job identifier: %s\n", job_id); any_failed = 1; exit(any_failed); } cnt: connect = cnt2server(server_out); if (connect <= 0) { any_failed = -1 * connect; if(server_out[0] != 0) fprintf(stderr, "qdel: cannot connect to server %s (errno=%d) %s\n", server_out, any_failed, pbs_strerror(any_failed)); else fprintf(stderr, "qdel: cannot connect to server %s (errno=%d) %s\n", pbs_server, any_failed, pbs_strerror(any_failed)); continue; } int retries = 0; do { stat = pbs_deljob_err(connect, job_id_out, extend, &any_failed); if (any_failed == PBSE_TIMEOUT) { sleep(1); fprintf(stdout, "Connection to server timed out. Trying again"); } } while ((++retries < MAX_RETRIES) && (any_failed == PBSE_TIMEOUT)); if (past_failure == 0) past_failure = any_failed; if (stat && (any_failed != PBSE_UNKJOBID)) { if (!located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } } prt_job_err((char *)"qdel", connect, job_id_out); } if (!located && any_failed != 0) { fprintf(stderr, "qdel: nonexistent job id: %s\n", job_id); } pbs_disconnect(connect); } if ((past_failure != PBSE_NONE) && (any_failed == PBSE_NONE)) any_failed = past_failure; exit(any_failed); } /* END qdel_main() */
int cnt2server( const char *SpecServer) /* I (optional) */ { int connect; time_t firsttime = 0, thistime = 0; char Server[1024]; if (cnt2server_retry > 0) { firsttime = time(NULL); } memset(Server, 0, sizeof(Server)); if ((SpecServer != NULL) && (SpecServer[0] != '\0')) { snprintf(Server, sizeof(Server)-1, "%s", SpecServer); } /* NOTE: env vars PBS_DEFAULT and PBS_SERVER will be checked and applied w/in pbs_connect() */ start: connect = pbs_connect(Server); if (connect <= 0) { /* PBSE_ * -1 is returned if applicable */ if ((connect * -1) > PBSE_) { switch (connect * -1) { case PBSE_BADHOST: if (Server[0] == '\0') { fprintf(stderr, "Cannot resolve default server host '%s' - check server_name file.\n", pbs_default()); } else { fprintf(stderr, "Cannot resolve specified server host '%s'.\n", Server); } break; case PBSE_NOCONNECTS: if (thistime == 0) fprintf(stderr, "Too many open connections.\n"); if (cnt2server_retry != 0) goto retry; break; case PBSE_NOSERVER: fprintf(stderr, "No default server name - check server_name file.\n"); break; case PBSE_SYSTEM: if (thistime == 0) fprintf(stderr, "System call failure.\n"); if (cnt2server_retry != 0) goto retry; break; case PBSE_PERM: if (thistime == 0) fprintf(stderr, "No Permission.\n"); if (cnt2server_retry != 0) goto retry; break; case PBSE_IFF_NOT_FOUND: fprintf(stderr, "pbs_iff command not found.\n"); break; case PBSE_PROTOCOL: fprintf(stderr, "protocol failure.\n"); break; default: if (thistime == 0) fprintf(stderr, "Communication failure.\n"); if (cnt2server_retry != 0) goto retry; break; } } /* END if (a PBSE_ was reported) */ else /* These represent system errors (errno numbers) */ { if (thistime == 0) { if ((connect *-1) == ECONNREFUSED) { if (Server[0] == '\0') { char *fbserver; fbserver = pbs_fbserver(); if ((fbserver != NULL) && (fbserver[0] != '\0')) { snprintf(Server, sizeof(Server), "%s", fbserver); if (getenv("PBSDEBUG") != NULL) { fprintf(stderr, "attempting fallback server %s\n", fbserver); } goto start; } fprintf(stderr, "Cannot connect to default server host '%s' - check pbs_server daemon and/or trqauthd.\n", pbs_default()); } else { fprintf(stderr, "Cannot connect to specified server host '%s'.\n", Server); } } else { pbs_strerror(connect *-1); } } if (cnt2server_retry != 0) goto retry; } } /* END if (connect <= 0) */ return(connect); retry: if (thistime == 0) { fprintf(stderr, "Retrying for %d seconds\n", (int)cnt2server_retry); } thistime = time(NULL); if (cnt2server_retry > 0) /* negative is infinite */ { if ((thistime - firsttime) > cnt2server_retry) { return(connect); } if (getenv("PBSDEBUG") != NULL) { fprintf(stderr, "seconds remaining: %d\n", (int)(cnt2server_retry - (thistime - firsttime))); } } else { if (getenv("PBSDEBUG") != NULL) fprintf(stderr, "retrying...\n"); } sleep(CNTRETRYDELAY); goto start; } /* END cnt2server() */
int svr_save( struct server *ps, int mode) { #ifndef SERVER_XML static char *this_function_name = "svr_save"; int i; int sdb; int save_acl(attribute *, attribute_def *, char *, char *); #endif /* ndef SERVER_XML */ /* save the server in xml only if configured */ #ifdef SERVER_XML return(svr_save_xml(ps,mode)); #endif /* def SERVER_XML */ #ifndef SERVER_XML if (mode == SVR_SAVE_QUICK) { sdb = open(path_svrdb, O_WRONLY | O_CREAT | O_Sync, 0600); if (sdb < 0) { log_err(errno, this_function_name, msg_svdbopen); return(-1); } while ((i = write( sdb, &ps->sv_qs, sizeof(struct server_qs))) != sizeof(struct server_qs)) { if ((i == -1) && (errno == EINTR)) continue; log_err(errno, this_function_name, msg_svdbnosv); return(-1); } close(sdb); } else { /* SVR_SAVE_FULL Save */ sdb = open(path_svrdb_new, O_WRONLY | O_CREAT | O_Sync, 0600); if (sdb < 0) { log_err(errno, this_function_name, msg_svdbopen); return(-1); } ps->sv_qs.sv_savetm = time_now; save_setup(sdb); if (save_struct((char *)&ps->sv_qs, sizeof(struct server_qs)) != 0) { snprintf(log_buffer, 1024, "cannot save data into server db, errno=%d (%s)", errno, pbs_strerror(errno)); log_err(errno, this_function_name, log_buffer); close(sdb); return(-1); } if (save_attr(svr_attr_def, ps->sv_attr, (int)SRV_ATR_LAST) != 0) { close(sdb); return(-1); } if (save_flush() != 0) { close(sdb); return(-1); } /* new db successfully created, remove original db */ close(sdb); unlink(path_svrdb); if (link(path_svrdb_new, path_svrdb) == -1) { snprintf(log_buffer, 1024, "cannot move new database to default database location, errno=%d (%s)", errno, pbs_strerror(errno)); log_err(errno, this_function_name, log_buffer); } else { unlink(path_svrdb_new); } /* save the server acls to their own files: */ /* priv/svracl/(attr name) */ for (i = 0;i < SRV_ATR_LAST;i++) { if (ps->sv_attr[i].at_type == ATR_TYPE_ACL) save_acl(&ps->sv_attr[i], &svr_attr_def[i], PBS_SVRACL, svr_attr_def[i].at_name); } } /* END else (mode == SVR_SAVE_QUICK) */ return(0); #endif /* ndef SERVER_XML */ } /* END svr_save() */
int main( int argc, char **argv) { int c; int errflg = 0; char *errmsg; #define MAX_OPTARG_LEN 256 #define MAX_RESOURCE_NAME_LEN 256 char optargout[MAX_OPTARG_LEN+1]; char resource_name[MAX_RESOURCE_NAME_LEN+1]; enum batch_op op; enum batch_op *pop = &op; struct attropl *select_list = 0; static char destination[PBS_MAXROUTEDEST+1] = ""; char server_out[MAXSERVERNAME] = ""; char *queue_name_out; char *server_name_out; int connect; char **selectjob_list; char *res_pos; char *pc; int u_cnt, o_cnt, s_cnt, n_cnt; time_t after; char a_value[80]; int exec_only = 0; if (getenv("PBS_QSTAT_EXECONLY") != NULL) exec_only = 1; #define GETOPT_ARGS "a:A:ec:h:l:N:p:q:r:s:u:" while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) switch (c) { case 'a': check_op(optarg, pop, optargout); if ((after = cvtdate(optargout)) < 0) { fprintf(stderr, "qselect: illegal -a value\n"); errflg++; break; } sprintf(a_value, "%ld", (long)after); set_attrop(&select_list, ATTR_a, NULL, a_value, op); break; case 'e': exec_only = 1; break; case 'c': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)*pc)) pc++; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } if (strcmp(pc, "u") == 0) { if ((op != EQ) && (op != NE)) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } } else if ((strcmp(pc, "n") != 0) && (strcmp(pc, "s") != 0) && (strcmp(pc, "c") != 0)) { if (strncmp(pc, "c=", 2) != 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } pc += 2; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } while (*pc != '\0') { if (!isdigit((int)*pc)) { fprintf(stderr, "qselect: illegal -c value\n"); errflg++; break; } pc++; } } set_attrop(&select_list, ATTR_c, NULL, optargout, op); break; case 'h': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)*pc)) pc++; if (strlen(pc) == 0) { fprintf(stderr, "qselect: illegal -h value\n"); errflg++; break; } u_cnt = o_cnt = s_cnt = n_cnt = 0; while (*pc) { if (*pc == 'u') u_cnt++; else if (*pc == 'o') o_cnt++; else if (*pc == 's') s_cnt++; else if (*pc == 'n') n_cnt++; else { fprintf(stderr, "qselect: illegal -h value\n"); errflg++; break; } pc++; } if (n_cnt && (u_cnt + o_cnt + s_cnt)) { fprintf(stderr, "qselect: illegal -h value\n"); errflg++; break; } set_attrop(&select_list, ATTR_h, NULL, optargout, op); break; case 'l': res_pos = optarg; while (*res_pos != '\0') { if (check_res_op(res_pos, resource_name, pop, optargout, &res_pos) != 0) { errflg++; break; } set_attrop(&select_list, ATTR_l, resource_name, optargout, op); } break; case 'p': check_op(optarg, pop, optargout); set_attrop(&select_list, ATTR_p, NULL, optargout, op); break; case 'q': strncpy(destination, optarg, PBS_MAXROUTEDEST); check_op(optarg, pop, optargout); set_attrop(&select_list, ATTR_q, NULL, optargout, op); break; case 'r': op = EQ; pc = optarg; while (isspace((int)(*pc))) pc++; if (strlen(pc) != 1) { fprintf(stderr, "qsub: illegal -r value\n"); errflg++; break; } if (*pc != 'y' && *pc != 'n') { fprintf(stderr, "qsub: illegal -r value\n"); errflg++; break; } set_attrop(&select_list, ATTR_r, NULL, pc, op); break; case 's': check_op(optarg, pop, optargout); pc = optargout; while (isspace((int)(*pc))) pc++; if (strlen(optarg) == 0) { fprintf(stderr, "qselect: illegal -s value\n"); errflg++; break; } while (*pc) { if (*pc != 'C' && *pc != 'E' && *pc != 'H' && *pc != 'Q' && *pc != 'R' && *pc != 'T' && *pc != 'W') { fprintf(stderr, "qselect: illegal -s value\n"); errflg++; break; } pc++; } set_attrop(&select_list, ATTR_state, NULL, optargout, op); break; case 'u': op = EQ; if (parse_at_list(optarg, FALSE, FALSE)) { fprintf(stderr, "qselect: illegal -u value\n"); errflg++; break; } set_attrop(&select_list, ATTR_u, NULL, optarg, op); break; case 'A': op = EQ; set_attrop(&select_list, ATTR_A, NULL, optarg, op); break; case 'N': op = EQ; set_attrop(&select_list, ATTR_N, NULL, optarg, op); break; default : errflg++; } if (errflg || (optind < argc)) { static char usage[] = "usage: qselect \ [-a [op]date_time] [-A account_string] [-e] [-c [op]interval] \n\ [-h hold_list] [-l resource_list] [-N name] [-p [op]priority] \n\ [-q destination] [-r y|n] [-s states] [-u user_name]\n"; fprintf(stderr,"%s", usage); exit(2); } if (notNULL(destination)) { if (parse_destination_id(destination, &queue_name_out, &server_name_out)) { fprintf(stderr, "qselect: illegally formed destination: %s\n", destination); exit(2); } else { if (notNULL(server_name_out)) { strcpy(server_out, server_name_out); } } } connect = cnt2server(server_out); if (connect <= 0) { fprintf(stderr, "qselect: cannot connect to server %s (errno=%d) %s\n", pbs_server, pbs_errno, pbs_strerror(pbs_errno)); exit(pbs_errno); } selectjob_list = pbs_selectjob(connect, select_list, exec_only ? EXECQUEONLY : NULL); if (selectjob_list == NULL) { if (pbs_errno != PBSE_NONE) { errmsg = pbs_geterrmsg(connect); if (errmsg != NULL) { fprintf(stderr, "qselect: %s\n", errmsg); } else { fprintf(stderr, "qselect: Error (%d - %s) selecting jobs\n", pbs_errno, pbs_strerror(pbs_errno)); } exit(pbs_errno); } } else /* got some jobs ids */ { int i = 0; while (selectjob_list[i] != NULL) { printf("%s\n", selectjob_list[i++]); } free(selectjob_list); } pbs_disconnect(connect); exit(0); }
static void execute( char *job, /* I */ char *server, /* I */ char *location, /* I */ int async) /* I */ { int ct; /* Connection to the server */ int err; /* Error return from pbs_run */ int located = FALSE; char rmt_server[MAXSERVERNAME]; int local_errno = 0; cnt: if ((ct = cnt2server(server)) > 0) { if (async == TRUE) { err = pbs_asyrunjob_err(ct, job, location, NULL, &local_errno); /* see lib/Libifl/pbsD_runjob.c */ } else { err = pbs_runjob_err(ct, job, location, NULL, &local_errno); /* see lib/Libifl/pbsD_runjob.c */ } if (err && (local_errno == PBSE_UNKNODE)) { fprintf(stderr, "qrun: Unknown node in hostlist '%.16s...' for job %s\n", location, job); exitstatus = 2; } else if (err && (local_errno != PBSE_UNKJOBID)) { prt_job_err("qrun", ct, job); exitstatus = 2; } else if (err && (local_errno == PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job, server, rmt_server)) { pbs_disconnect(ct); strcpy(server, rmt_server); goto cnt; } prt_job_err("qrun", ct, job); exitstatus = 2; } pbs_disconnect(ct); } else { fprintf(stderr, "qrun: could not connect to server %s (%d) %s\n", server, ct * -1, pbs_strerror(ct * -1)); exitstatus = 2; } return; } /* END execute() */
int do_mom( char *HPtr, int MOMPort, int CmdIndex) { int sd; if ((sd = openrm(HPtr, MOMPort)) < 0) { /* FAILURE */ extern char TRMEMsg[]; fprintf(stderr, "cannot connect to MOM on node '%s', errno=%d (%s)\n", HPtr, pbs_errno, strerror(pbs_errno)); if (TRMEMsg[0] != '\0') { fprintf(stderr, " %s\n", TRMEMsg); } return(sd); } if (IsVerbose == TRUE) { fprintf(stderr, "INFO: successfully connected to %s\n", HPtr); } switch (CmdIndex) { case momClear: { char tmpLine[1024]; char *Value; snprintf(tmpLine, 1024, "clearjob=%s", (JPtr != NULL) ? JPtr : "all"); if (addreq(sd, tmpLine) != 0) { /* FAILURE */ fprintf(stderr,"ERROR: cannot request job clear on %s (errno=%d-%s: %d-%s)\n", HPtr, errno, pbs_strerror(errno), pbs_errno, pbs_strerror(pbs_errno)); closerm(sd); return(FAILURE); } if ((Value = (char *)getreq(sd)) == NULL) { /* FAILURE */ fprintf(stderr,"ERROR: job clear failed on %s (errno=%d-%s: %d-%s)\n", HPtr, errno, pbs_strerror(errno), pbs_errno, pbs_strerror(pbs_errno)); closerm(sd); return(FAILURE); } /* job cleared */ fprintf(stdout,"job clear request successful on %s\n", HPtr); } /* END BLOCK (case momClear) */ break; case momShutdown: { int rc; rc = downrm(sd); if (rc != 0) { /* FAILURE */ fprintf(stderr,"ERROR: cannot shutdown mom daemon on %s (errno=%d-%s: %d-%s)\n", HPtr, errno, pbs_strerror(errno), pbs_errno, pbs_strerror(pbs_errno)); closerm(sd); exit(EXIT_FAILURE); } fprintf(stdout, "shutdown request successful on %s\n", HPtr); } /* END BLOCK */ break; case momReconfig: { int rc; rc = configrm(sd, ConfigBuf); if (rc != 0) { /* FAILURE */ fprintf(stderr,"ERROR: cannot reconfigure mom on %s (errno=%d-%s: %d-%s)\n", HPtr, errno, pbs_strerror(errno), pbs_errno, pbs_strerror(pbs_errno)); closerm(sd); return(FAILURE); } fprintf(stdout, "reconfig successful on %s\n", HPtr); } /* END BLOCK (case momReconfig) */ break; case momQuery: default: { char *ptr; int rindex; char *Value; int was_error = 0; for (rindex = 0; rindex < QueryI; rindex++) { if (addreq(sd, Query[rindex]) != 0) { fprintf(stderr,"ERROR: cannot add query for '%s' on %s (errno=%d-%s: %d-%s)\n", Query[rindex], HPtr, errno, pbs_strerror(errno), pbs_errno, pbs_strerror(pbs_errno)); was_error = 1; } } for (rindex = 0; rindex < QueryI; rindex++) { if ((ptr = strchr(Query[rindex],'=')) != NULL) { *ptr = '\0'; } if ((Value = (char *)getreq(sd)) == NULL) { fprintf(stderr, "ERROR: query[%d] '%s' failed on %s (errno=%d-%s: %d-%s)\n", rindex, Query[rindex], HPtr, errno, pbs_strerror(errno), pbs_errno, pbs_strerror(pbs_errno)); was_error = 1; } else { if (!strncmp(Query[rindex], "diag", strlen("diag"))) { fprintf(stdout, "%s\n", Value); } else if (!strncmp(Query[rindex], "cycle", strlen("cycle"))) { fprintf(stdout, "mom %s successfully cycled %s\n", HPtr, Value); } else { fprintf(stdout, "%12s: %12s = '%s'\n", HPtr, Query[rindex], Value); } } if (ptr != NULL) { *ptr = '='; } } /* END for (rindex) */ return (was_error); } /* END BLOCK (case momQuery) */ break; } /* END switch(CmdIndex) */ closerm(sd); return(0); } /* END do_mom() */
int main( int argc, char **argv) { int c; int errflg = 0; int any_failed = 0; int purge_completed = FALSE; int located = FALSE; char *pc; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME] = ""; char rmt_server[MAXSERVERNAME] = ""; char extend[1024]; #define GETOPT_ARGS "acm:pW:t:" initialize_network_info(); extend[0] = '\0'; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) { switch (c) { case 'a': /* Async job deletion */ if (extend[0] != '\0') { errflg++; break; } strcpy(extend, DELASYNC); break; case 'c': if (extend[0] != '\0') { errflg++; break; } snprintf(extend,sizeof(extend),"%s%ld",PURGECOMP,(long)(time(NULL))); purge_completed = TRUE; break; case 'm': /* add delete message */ if (extend[0] != '\0') { /* extension option already specified */ errflg++; break; } strncpy(extend, optarg, sizeof(extend)); break; case 'p': if (extend[0] != '\0') { errflg++; break; } strcpy(extend, DELPURGE); strcat(extend, "1"); break; case 't': if (extend[0] != '\0') { errflg++; break; } pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qdel: illegal -t value (array range cannot be zero length)\n"); errflg++; break; } snprintf(extend,sizeof(extend),"%s%s", ARRAY_RANGE, pc); break; case 'W': if (extend[0] != '\0') { errflg++; break; } pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qdel: illegal -W value\n"); errflg++; break; } while (*pc != '\0') { if (!isdigit(*pc)) { fprintf(stderr, "qdel: illegal -W value\n"); errflg++; break; } pc++; } strcpy(extend, DELDELAY); strcat(extend, optarg); break; default: errflg++; break; } } /* END while (c) */ if (purge_completed) { strcpy(server_out,pbs_default()); goto cnt; } if ((errflg != 0) || (optind >= argc)) { static char usage[] = "usage: qdel [{ -a | -c | -p | -t | -W delay | -m message}] [<JOBID>[<JOBID>]|'all'|'ALL']...\n"; fprintf(stderr, "%s", usage); fprintf(stderr, " -a -c, -m, -p, -t, and -W are mutually exclusive\n"); exit(2); } for (;optind < argc;optind++) { int connect; int stat = 0; /* check to see if user specified 'all' to delete all jobs */ snprintf(job_id, sizeof(job_id), "%s", argv[optind]); if (get_server(job_id, job_id_out, sizeof(job_id_out), server_out, sizeof(server_out))) { fprintf(stderr, "qdel: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } cnt: connect = cnt2server(server_out); if (connect <= 0) { any_failed = -1 * connect; if(server_out[0] != 0) fprintf(stderr, "qdel: cannot connect to server %s (errno=%d) %s\n", server_out, any_failed, pbs_strerror(any_failed)); else fprintf(stderr, "qdel: cannot connect to server %s (errno=%d) %s\n", pbs_server, any_failed, pbs_strerror(any_failed)); continue; } stat = pbs_deljob_err(connect, job_id_out, extend, &any_failed); if (stat && (any_failed != PBSE_UNKJOBID)) { prt_job_err("qdel", connect, job_id_out); } else if (stat && (any_failed != PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } prt_job_err("qdel", connect, job_id_out); } pbs_disconnect(connect); } exit(any_failed); } /* END main() */
int main(int argc, char **argv) { char *server = NULL; char *jobid = NULL; char *var = NULL; char *value = NULL; int server_fd = 0; int ret = 0; int c = 0; struct batch_status *job = NULL; struct attrl *attribute = NULL; char *var_string = NULL; struct option prg_options[] = { {"help", no_argument, 0, 'h'}, {"version", no_argument, 0, 'V'}, }; for ( ; ; ) { int option_index = 0; c = getopt_long(argc, argv, "s:hV", prg_options, &option_index ); if (c == -1) break; switch (c) { case 'h': usage(0); break; case 'V': printf("qsetenv version: %s; for torque version %s\n", QSETENV_VERSION, TORQUE_VERSION); exit(0); break; case 's': server = optarg; break; } } for (c = optind; c != argc; c++) { switch (c-optind) { case 0: jobid = argv[c]; break; case 1: var = argv[c]; break; case 2: value = argv[c]; break; default: printf("Too many arguments!\n"); usage(1); break; } } if (value == NULL) { printf("Too few arguments!\n"); usage(1); } if (server == NULL) { server = pbs_get_server_list(); } char *tok_server = server; char *tgt_server = NULL; while ((tgt_server = strtok(tok_server, ",")) != NULL) { tok_server = NULL; server_fd = pbs_connect(tgt_server); if (server_fd > 0) { break; } } if (server_fd <= 0) { fprintf(stderr, "Failed to connect to PBS server!\n"); exit(1); } printf("Querying job %s\n", jobid); job = pbs_statjob(server_fd, jobid, NULL, 0); if (job != NULL) { printf("job name: %s\n", job->name); var_string = job_setenv_varstr(job, var, value); attribute = (struct attrl *) malloc(sizeof(struct attrl)); memset(attribute, 0, sizeof(struct attrl)); attribute->name = ATTR_v; attribute->value = var_string; attribute->next = NULL; ret = pbs_alterjob(server_fd, jobid, attribute, NULL); if (ret != 0) { printf("Got error: %s\n", pbs_strerror(pbs_errno)); } free(attribute); attribute = NULL; } if (var_string != NULL) { free(var_string); } if (job != NULL) { pbs_statfree(job); job = NULL; } pbs_disconnect(server_fd); if (ret != 0) { return 1; } return 0; }
#elif defined(ENOCONNECT) *local_errno = ENOCONNECT; #else *local_errno = ETXTBSY; #endif DBPRT(("configrm: diswcs %s\n", dis_emsg[ret])) return(-1); } if (DIS_tcp_wflush(op->chan) == -1) { DBPRT(("configrm: flush error %d (%s)\n", errno, pbs_strerror(errno))) return(-1 * errno); } if (simpleget(local_errno, op->chan)) { return(-1); } return(0); } /* END configrm() */ #ifdef __cplusplus } #endif
/* if a user requested deleting 'all' then this routine will get the list of * jobs from the server and try to delete all jobs that are not in a * 'C'omplete or 'E'xiting state */ void qdel_all( char *extend) /* I */ { char *jobid; char *state = 0; int connect; int stat; int retries; struct batch_status *p_status; struct batch_status *p; struct attropl *p_atropl = 0; struct attrl *a; connect = cnt2server('\0'); if (connect <= 0) { fprintf(stderr, "qdel: cannot connect to default server (errno=%d) %s\n", pbs_errno, pbs_strerror(pbs_errno)); return; } p_status = pbs_selstat(connect, p_atropl, NULL); if (p_status == NULL) { fprintf(stderr, "qdel: cannot find any jobs to delete\n"); } for (p = p_status;p != NULL;p = p->next) { jobid = p->name; a = p->attribs; while (a != NULL) { if ((a->name != NULL) && (!strcmp(a->name, ATTR_state))) { state = a->value; break; } a = a->next; } /* * Don't bother deleting jobs that are 'C'omplete or 'E'xiting * Unless we are Purging, then try 'C'ompleted jobs as well */ if (((strstr(extend,DELPURGE) != NULL) && (*state != 'E')) || ((*state != 'E') && (*state != 'C'))) { retries = 0; redo: stat = pbs_deljob(connect, jobid, extend); /* * if MOM is too slow to respond, we will retry a few times before * before giving up */ if (stat && (pbs_errno == PBSE_NORELYMOM) && (retries < 3)) { sleep(1); retries++; goto redo; } if (stat && (pbs_errno != PBSE_UNKJOBID) && (pbs_errno != PBSE_BADSTATE)) { printf("Deletion Error: %d (%s)\n", pbs_errno, pbs_strerror(pbs_errno)); prt_job_err("qdel", connect, jobid); } } } pbs_disconnect(connect); return; }
int main( int argc, /* I */ char **argv) /* I */ { int c; int errflg = 0; int any_failed = 0; int u_cnt, o_cnt, s_cnt; char *pc; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME] = ""; char rmt_server[MAXSERVERNAME]; char extend[MAXPATHLEN]; #define MAX_HOLD_TYPE_LEN 32 char hold_type[MAX_HOLD_TYPE_LEN+1]; #define GETOPT_ARGS "h:t:" hold_type[0] = '\0'; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) { switch (c) { case 'h': while (isspace((int)*optarg)) optarg++; if (strlen(optarg) == 0) { fprintf(stderr, "qrls: illegal -h value\n"); errflg++; break; } pc = optarg; u_cnt = o_cnt = s_cnt = 0; while (*pc) { if (*pc == 'u') u_cnt++; else if (*pc == 'o') o_cnt++; else if (*pc == 's') s_cnt++; else { fprintf(stderr, "qrls: illegal -h value\n"); errflg++; break; } pc++; } strcpy(hold_type, optarg); break; case 't': pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qrls: illegal -t value (array range cannot be zero length)\n"); errflg++; break; } snprintf(extend,sizeof(extend),"%s%s", ARRAY_RANGE, pc); break; default: errflg++; break; } } if (errflg || optind >= argc) { static char usage[] = "usage: qrls [-h {uos}] [-t array_range] job_identifier...\n"; fprintf(stderr,"%s", usage); exit(2); } for (;optind < argc;optind++) { int connect; int stat = 0; int located = FALSE; snprintf(job_id, sizeof(job_id), "%s", argv[optind]); if (get_server(job_id, job_id_out, sizeof(job_id_out), server_out, sizeof(server_out))) { fprintf(stderr, "qrls: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } cnt: connect = cnt2server(server_out); if (connect <= 0) { any_failed = -1 * connect; if (server_out[0] != 0) fprintf(stderr, "qrls: cannot connect to server %s (errno=%d) %s\n", server_out, any_failed, pbs_strerror(any_failed)); else fprintf(stderr, "qrls: cannot connect to server %s (errno=%d) %s\n", pbs_server, any_failed, pbs_strerror(any_failed)); continue; } stat = pbs_rlsjob_err(connect, job_id_out, hold_type, extend, &any_failed); if (stat && (any_failed != PBSE_UNKJOBID)) { prt_job_err("qrls", connect, job_id_out); } else if (stat && (any_failed != PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } prt_job_err("qrls", connect, job_id_out); } pbs_disconnect(connect); } /* END for () */ exit(any_failed); /*NOTREACHED*/ return(0); } /* END main() */
int main( int argc, char **argv) /* qmsg */ { int c; int to_file; int errflg = 0; int any_failed = 0; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME] = ""; char rmt_server[MAXSERVERNAME]; #define MAX_MSG_STRING_LEN 256 char msg_string[MAX_MSG_STRING_LEN+1]; #define GETOPT_ARGS "EO" msg_string[0] = '\0'; to_file = 0; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) switch (c) { case 'E': to_file |= MSG_ERR; break; case 'O': to_file |= MSG_OUT; break; default : errflg++; } if (to_file == 0) to_file = MSG_ERR; /* default */ if (errflg || ((optind + 1) >= argc)) { static char usage[] = "usage: qmsg [-O] [-E] msg_string job_identifier...\n"; fprintf(stderr,"%s", usage); exit(2); } snprintf(msg_string, sizeof(msg_string), "%s", argv[optind]); for (optind++; optind < argc; optind++) { int connect; int stat = 0; int located = FALSE; std::string server_name; std::vector<std::string> id_list; snprintf(job_id, sizeof(job_id), "%s", argv[optind]); if (get_server_and_job_ids(job_id, id_list, server_name)) { fprintf(stderr, "qmsg: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } snprintf(server_out, sizeof(server_out), "%s", server_name.c_str()); cnt: connect = cnt2server(server_out, false); if (connect <= 0) { any_failed = -1 * connect; if (server_out[0] != 0) fprintf(stderr, "qmsg: cannot connect to server %s (errno=%d) %s\n", server_out, any_failed, pbs_strerror(any_failed)); else fprintf(stderr, "qmsg: cannot connect to server %s (errno=%d) %s\n", pbs_server, any_failed, pbs_strerror(any_failed)); continue; } for (size_t i = 0; i < id_list.size(); i++) { snprintf(job_id_out, sizeof(job_id_out), "%s", id_list[i].c_str()); stat = pbs_msgjob_err(connect, job_id_out, to_file, msg_string, NULL, &any_failed); if (any_failed != PBSE_UNKJOBID) break; } if (stat && (any_failed != PBSE_UNKJOBID)) { prt_job_err("qmsg", connect, job_id_out); } else if (stat && (any_failed == PBSE_UNKJOBID) && !located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } prt_job_err("qmsg", connect, job_id_out); } pbs_disconnect(connect); } exit(any_failed); }
int pbs_original_connect( char *server) /* I (FORMAT: NULL | '\0' | HOSTNAME | HOSTNAME:PORT )*/ { struct sockaddr_in server_addr; char *if_name; struct addrinfo *addr_info; int out; int i; int opt_value = 1; int rc = PBSE_NONE; int local_errno; struct sockaddr preferred_addr; /* set if TRQ_IFNAME set in torque.cfg */ struct passwd *pw; int use_unixsock = 0; uid_t pbs_current_uid; long sockflags; int retry = 1; #ifdef ENABLE_UNIX_SOCKETS struct sockaddr_un unserver_addr; char hnamebuf[256]; #endif char *ptr; memset(&server_addr, 0, sizeof(server_addr)); /* Read the timeout from the environment */ if ((ptr = getenv("PBSAPITIMEOUT")) != NULL) { pbs_tcp_timeout = strtol(ptr, NULL, 0); if (pbs_tcp_timeout <= 0) pbs_tcp_timeout = 300; if (pbs_tcp_timeout > 2) retry = 0; } else pbs_tcp_timeout = 300; /* reserve a connection state record */ out = -1; for (i = 1;i < NCONNECTS;i++) { if (connection[i].ch_mutex == NULL) { connection[i].ch_mutex = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(connection[i].ch_mutex,NULL); } pthread_mutex_lock(connection[i].ch_mutex); if (connection[i].ch_inuse == FALSE) { out = i; connection[out].ch_inuse = TRUE; connection[out].ch_errno = 0; connection[out].ch_socket = -1; connection[out].ch_errtxt = NULL; break; } pthread_mutex_unlock(connection[i].ch_mutex); } if (out < 0) { if (getenv("PBSDEBUG")) fprintf(stderr, "ALERT: cannot locate free channel\n"); /* FAILURE */ /* no need to unlock mutex here - in this case no connection was found */ return(PBSE_NOCONNECTS * -1); } /* get server host and port */ server = PBS_get_server(server, &server_port); if (server == NULL) { connection[out].ch_inuse = FALSE; pthread_mutex_unlock(connection[out].ch_mutex); if (getenv("PBSDEBUG")) fprintf(stderr, "ALERT: PBS_get_server() failed\n"); rc = PBSE_NOSERVER * -1; goto cleanup_conn_lite; } /* determine who we are */ pbs_current_uid = getuid(); if ((pw = getpwuid(pbs_current_uid)) == NULL) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ALERT: cannot get password info for uid %ld\n", (long)pbs_current_uid); } rc = PBSE_NOSERVER * -1; goto cleanup_conn_lite; } snprintf(pbs_current_user, PBS_MAXUSER, "%s", pw->pw_name); pbs_server = server; /* set for error messages from commands */ #ifdef ENABLE_UNIX_SOCKETS /* determine if we want to use unix domain socket */ if (!strcmp(server, "localhost")) use_unixsock = 1; else if ((gethostname(hnamebuf, sizeof(hnamebuf) - 1) == 0) && !strcmp(hnamebuf, server)) use_unixsock = 1; /* NOTE: if any part of using unix domain sockets fails, * we just cleanup and try again with inet sockets */ /* get socket */ if (use_unixsock) { connection[out].ch_socket = socket(AF_UNIX, SOCK_STREAM, 0); if (connection[out].ch_socket < 0) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot create socket: errno=%d (%s)\n", errno, strerror(errno)); } connection[out].ch_inuse = FALSE; local_errno = PBSE_PROTOCOL; use_unixsock = 0; } } /* and connect... */ if (use_unixsock) { unserver_addr.sun_family = AF_UNIX; strcpy(unserver_addr.sun_path, TSOCK_PATH); if (connect( connection[out].ch_socket, (struct sockaddr *)&unserver_addr, (strlen(unserver_addr.sun_path) + sizeof(unserver_addr.sun_family))) < 0) { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; local_errno = errno; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot connect to server, errno=%d (%s)\n", errno, strerror(errno)); } use_unixsock = 0; /* will try again with inet socket */ } } if (use_unixsock) { if (!send_unix_creds(connection[out].ch_socket)) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot send unix creds to pbs_server: errno=%d (%s)\n", errno, strerror(errno)); } close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; local_errno = PBSE_PROTOCOL; use_unixsock = 0; /* will try again with inet socket */ } } #endif /* END ENABLE_UNIX_SOCKETS */ if (!use_unixsock) { int retries = 0; std::string err_msg; /* at this point, either using unix sockets failed, or we determined not to * try */ do { connection[out].ch_socket = socket(AF_INET, SOCK_STREAM, 0); if (connection[out].ch_socket < 0) { if (getenv("PBSDEBUG")) { if (!retry || retries >= MAX_RETRIES) fprintf(stderr, "ERROR: cannot connect to server \"%s\", errno=%d (%s)\n", server, errno, strerror(errno)); } retries++; if (!retry || retries >= MAX_RETRIES) { rc = PBSE_SYSTEM * -1; goto cleanup_conn; } else { connection[out].ch_inuse = FALSE; pthread_mutex_unlock(connection[out].ch_mutex); usleep(1000); continue; } } if (setsockopt(connection[out].ch_socket, SOL_SOCKET, SO_REUSEADDR, &opt_value, sizeof(opt_value))) perror("Couldn't set socket option"); /* This is probably an IPv4 solution for the if_name and preferred_addr We need to see what ioctl call we need for IPv6 */ if_name = trq_get_if_name(); if (if_name) { rc = trq_set_preferred_network_interface(if_name, &preferred_addr); if (rc != PBSE_NONE) { if (!retry || retries >= MAX_RETRIES) fprintf(stderr, "could not set preferred network interface (%s): %d\n", if_name, rc); if (if_name) free(if_name); retries++; if (!retry || retries >= MAX_RETRIES) { rc = rc * -1; goto cleanup_conn; } else { connection[out].ch_inuse = FALSE; pthread_mutex_unlock(connection[out].ch_mutex); usleep(1000); continue; } } rc = bind(connection[out].ch_socket, &preferred_addr, sizeof(struct sockaddr)); if (rc < 0) { if (!retry || retries >= MAX_RETRIES) fprintf(stderr, "ERROR: could not bind preferred network interface (%s): errno: %d", if_name, errno); if (if_name) free(if_name); retries++; if (!retry || retries >= MAX_RETRIES) { rc = PBSE_SYSTEM * -1; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; usleep(1000); continue; } } } /* we are done with if_name at this point. trq_get_if_name allocated space for it. We need to free it */ if (if_name) free(if_name); server_addr.sin_family = AF_INET; if ((rc = pbs_getaddrinfo(server, NULL, &addr_info)) != 0) { if (getenv("PBSDEBUG")) { if (!retry || retries >= MAX_RETRIES) fprintf(stderr, "ERROR: cannot get servername (%s) errno=%d (%s)\n", server, errno, strerror(errno)); } retries++; if (!retry || retries >= MAX_RETRIES) { rc = PBSE_BADHOST * -1; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; usleep(1000); continue; } } server_addr.sin_addr = ((struct sockaddr_in *)addr_info->ai_addr)->sin_addr; server_addr.sin_port = htons(server_port); /* Set the socket to non-blocking mode so we can timeout */ if ((sockflags = fcntl(connection[out].ch_socket, F_GETFL, NULL)) < 0) { retries++; if (!retry || retries >= MAX_RETRIES) { if (getenv("PBSDEBUG")) fprintf(stderr, "ERROR: getting socket flags failed\n"); rc = errno * -1; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; rc = sockflags; usleep(1000); continue; } } sockflags |= O_NONBLOCK; if ((rc = fcntl(connection[out].ch_socket, F_SETFL, sockflags)) < 0) { retries++; if (!retry || retries >= MAX_RETRIES) { if (getenv("PBSDEBUG")) fprintf(stderr, "ERROR: setting socket flags failed\n"); rc = errno * -1; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; usleep(1000); continue; } } int sock = connection[out].ch_socket; int conn_retries = 0; while (((rc = connect(sock, (struct sockaddr *)&server_addr, sizeof(server_addr))) != 0) && (conn_retries < MAX_RETRIES)) { rc = socket_wait_for_write(sock); if (rc == PERMANENT_SOCKET_FAIL) { rc = errno; break; } conn_retries++; } if (rc != 0) { close(sock); retries++; continue; } // if we are at this point, connect has succeeded, proceed to authorize /* Set the socket back to blocking so read()s actually work */ sockflags &= (~O_NONBLOCK); if ((rc = fcntl(connection[out].ch_socket, F_SETFL, sockflags)) < 0) { if (getenv("PBSDEBUG")) fprintf(stderr, "ERROR: setting socket flags failed\n"); retries++; if (!retry || retries >= MAX_RETRIES) { rc = PBSE_SOCKET_FAULT * -1; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; usleep(1000); continue; } } /* FIXME: is this necessary? Contributed by one user that fixes a problem, but doesn't fix the same problem for another user! */ #if 0 #if defined(__hpux) /*HP-UX : avoiding socket caching */ send(connection[out].ch_socket, '?', 1, MSG_OOB); #endif #endif #ifdef MUNGE_AUTH rc = PBSD_munge_authenticate(connection[out].ch_socket, out); if (rc != 0) { if (rc == PBSE_MUNGE_NOT_FOUND) { local_errno = PBSE_MUNGE_NOT_FOUND; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot find munge executable\n"); } rc = -1 * local_errno; goto cleanup_conn; } else { retries++; if (!retry || retries >= MAX_RETRIES) { local_errno = PBSE_PERM; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot authenticate connection to server \"%s\", errno=%d (%s)\n", server, errno, strerror(errno)); } rc = -1 * local_errno; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; usleep(1000); continue; } } } #else /* new version of iff using daemon */ if ((ENABLE_TRUSTED_AUTH == FALSE) && ((rc = validate_socket(connection[out].ch_socket, err_msg)) != PBSE_NONE)) { if (!retry || retries >= MAX_RETRIES) { if (getenv("PBSDEBUG")) { const char *tmp_err_msg = ""; if (rc > 0) tmp_err_msg = pbs_strerror(rc); fprintf(stderr, "ERROR: cannot authenticate connection to server \"%s\", errno=%d (%s)\n", server, rc, tmp_err_msg); } local_errno = PBSE_SOCKET_FAULT; rc = -1 * local_errno; goto cleanup_conn; } else { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; retries++; usleep(1000); continue; } } #endif /* ifdef MUNGE_AUTH */ } while ((rc != PBSE_NONE) && (retries < MAX_RETRIES)); if (rc != PBSE_NONE) { fprintf(stderr, "%s\n", err_msg.c_str()); goto cleanup_conn; } } /* END if !use_unixsock */ pthread_mutex_unlock(connection[out].ch_mutex); return(out); cleanup_conn: if (connection[out].ch_socket >= 0) close(connection[out].ch_socket); cleanup_conn_lite: connection[out].ch_inuse = FALSE; pthread_mutex_unlock(connection[out].ch_mutex); return(rc < 0 ? rc : rc * -1); } /* END pbs_original_connect() */
int main( int argc, /* I */ char **argv) /* I */ { struct batch_status *bstatus = NULL; int con; char *specified_server = NULL; int errflg = 0; int i; extern char *optarg; extern int optind; char **pa; struct batch_status *pbstat; int flag = ALLI; char *note = NULL; enum note_flags note_flag = unused; char **nodeargs = NULL; int lindex; enum NStateEnum ListType = tnsNONE; /* get default server, may be changed by -s option */ progname = strdup(argv[0]); while ((i = getopt(argc, argv, "acdlopqrs:x-:N:n")) != EOF) { switch (i) { case 'a': flag = ALLI; break; case 'c': flag = CLEAR; break; case 'd': flag = DIAG; break; case 'l': flag = LIST; break; case 'o': flag = OFFLINE; break; case 'p': flag = PURGE; break; case 'q': quiet = 1; break; case 'r': flag = RESET; break; case 's': specified_server = optarg; break; case 'x': flag = ALLI; DisplayXML = TRUE; break; case 'N': /* preserve any previous option other than the default, * to allow -N to be combined with -o, -c, etc */ if (flag == ALLI) flag = NOTE; note = strdup(optarg); if (note == NULL) { perror("Error: strdup() returned NULL"); exit(1); } note_flag = set; /* -N n is the same as -N "" -- it clears the note */ if (!strcmp(note, "n")) *note = '\0'; if (strlen(note) > MAX_NOTE) { fprintf(stderr, "Warning: note exceeds length limit (%d) - server may reject it...\n", MAX_NOTE); } if (strchr(note, '\n') != NULL) fprintf(stderr, "Warning: note contains a newline - server may reject it...\n"); break; case 'n': note_flag = list; break; case '-': if ((optarg != NULL) && !strcmp(optarg, "version")) { fprintf(stderr, "Version: %s\nRevision: %s\n", PACKAGE_VERSION, SVN_VERSION); exit(0); } else if ((optarg != NULL) && !strcmp(optarg, "about")) { TShowAbout_exit(); } errflg = 1; break; case '?': default: errflg = 1; break; } /* END switch (i) */ } /* END while (i = getopt()) */ if ((note_flag == list) && (flag != LIST)) { fprintf(stderr, "Error: -n requires -l\n"); errflg = 1; } for (pa = argv + optind;*pa;pa++) { if (strlen(*pa) == 0) { errflg = 1; } } if (errflg != 0) { if (!quiet) { fprintf(stderr, "usage:\t%s [-{c|d|l|o|p|r}] [-s server] [-n] [-N \"note\"] [-q] node ...\n", progname); fprintf(stderr, "\t%s [-{a|x}] [-s server] [-q] [node]\n", progname); } exit(1); } con = cnt2server(specified_server); if (con <= 0) { if (!quiet) { fprintf(stderr, "%s: cannot connect to server %s, error=%d (%s)\n", progname, (specified_server) ? specified_server : pbs_default(), con * -1, pbs_strerror(con * -1)); } exit(1); } /* if flag is ALLI, LIST, get status of all nodes */ if ((flag == ALLI) || (flag == LIST) || (flag == DIAG)) { if ((flag == ALLI) || (flag == LIST) || (flag == DIAG)) { if (flag == LIST) { /* allow state specification */ if (argv[optind] != NULL) { for (lindex = 1;lindex < tnsLAST;lindex++) { if (!strcasecmp(NState[lindex], argv[optind])) { ListType = lindex; optind++; break; } } } } /* allow node specification (if none, then create an empty list) */ if (argv[optind] != NULL) { nodeargs = argv + optind; } else { nodeargs = calloc(2, sizeof(char **)); nodeargs[0] = strdup(""); nodeargs[1] = '\0'; } } } if ((note_flag == set) && (note != NULL)) { /* set the note attrib string on specified nodes */ for (pa = argv + optind;*pa;pa++) { set_note(con, *pa, note); } } switch (flag) { case DIAG: /* NYI */ break; case CLEAR: /* clear OFFLINE from specified nodes */ for (pa = argv + optind;*pa;pa++) { marknode(con, *pa, ND_offline, DECR, NULL, DECR); } break; case RESET: /* clear OFFLINE, add DOWN to specified nodes */ for (pa = argv + optind;*pa;pa++) { marknode(con, *pa, ND_offline, DECR, ND_down, INCR); } break; case OFFLINE: /* set OFFLINE on specified nodes */ for (pa = argv + optind;*pa;pa++) { marknode(con, *pa, ND_offline, INCR, NULL, INCR); } break; case PURGE: /* remove node record */ /* NYI */ break; case ALLI: if (DisplayXML == TRUE) { char *tmpBuf = NULL, *tail = NULL; int bufsize; mxml_t *DE; DE = NULL; MXMLCreateE(&DE, "Data"); for (lindex = 0;nodeargs[lindex] != '\0';lindex++) { bstatus = statnode(con, nodeargs[lindex]); for (pbstat = bstatus;pbstat;pbstat = pbstat->next) { addxmlnode(DE, pbstat); } /* END for (pbstat) */ pbs_statfree(pbstat); } MXMLToXString(DE, &tmpBuf, &bufsize, INT_MAX, &tail, TRUE); MXMLDestroyE(&DE); fprintf(stdout, "%s\n", tmpBuf); } else { for (lindex = 0;nodeargs[lindex] != '\0';lindex++) { bstatus = statnode(con, nodeargs[lindex]); for (pbstat = bstatus;pbstat;pbstat = pbstat->next) { printf("%s\n", pbstat->name); prt_node_attr(pbstat, 0); putchar('\n'); } /* END for (bpstat) */ pbs_statfree(pbstat); } } break; case LIST: /* list any node that is DOWN, OFFLINE, or UNKNOWN */ for (lindex = 0;nodeargs[lindex] != '\0';lindex++) { bstatus = statnode(con, nodeargs[lindex]); for (pbstat = bstatus;pbstat != NULL;pbstat = pbstat->next) { char *S; S = get_nstate(pbstat); if (filterbystate(pbstat, ListType, S)) { char *n; if ((note_flag == list) && (n = get_note(pbstat))) { printf("%-20.20s %-26.26s %s\n", pbstat->name, S, n); } else { printf("%-20.20s %s\n", pbstat->name, S); } } } pbs_statfree(pbstat); } break; } /* END switch (flag) */ pbs_disconnect(con); return(0); } /* END main() */
int main( int argc, char **argv) /* qhold */ { int c; int errflg = 0; int any_failed = 0; int u_cnt, o_cnt, s_cnt; char *pc; char extend[1024]; char job_id[PBS_MAXCLTJOBID]; /* from the command line */ char job_id_out[PBS_MAXCLTJOBID]; char server_out[MAXSERVERNAME] = ""; char rmt_server[MAXSERVERNAME] = ""; #define MAX_HOLD_TYPE_LEN 32 char hold_type[MAX_HOLD_TYPE_LEN+1]; #define GETOPT_ARGS "h:t:" hold_type[0] = '\0'; extend[0] = '\0'; while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) switch (c) { case 'h': while (isspace((int)*optarg)) optarg++; if (strlen(optarg) == 0) { fprintf(stderr, "qhold: illegal -h value\n"); errflg++; break; } pc = optarg; u_cnt = o_cnt = s_cnt = 0; while (*pc) { if (*pc == 'u') u_cnt++; else if (*pc == 'o') o_cnt++; else if (*pc == 's') s_cnt++; else { fprintf(stderr, "qhold: illegal -h value\n"); errflg++; break; } pc++; } strcpy(hold_type, optarg); break; case 't': pc = optarg; if (strlen(pc) == 0) { fprintf(stderr, "qhold: illegal -t value (array range cannot be zero length)\n"); errflg++; break; } snprintf(extend,sizeof(extend),"%s%s", ARRAY_RANGE, pc); break; default : errflg++; } if (errflg || optind >= argc) { static char usage[] = "usage: qhold [-h hold_list] [-t array_range] job_identifier...\n"; fprintf(stderr,"%s", usage); exit(2); } for (; optind < argc; optind++) { int connect; int stat = 0; int located = FALSE; std::string server_name; std::vector<std::string> id_list; snprintf(job_id, sizeof(job_id), "%s", argv[optind]); if (get_server_and_job_ids(job_id, id_list, server_name)) { fprintf(stderr, "qhold: illegally formed job identifier: %s\n", job_id); any_failed = 1; continue; } cnt: connect = cnt2server(server_name.c_str()); if (connect <= 0) { any_failed = -1 * connect; if (server_out[0] != 0) fprintf(stderr, "qhold: cannot connect to server %s (errno=%d) %s\n", server_out, any_failed, pbs_strerror(any_failed)); else fprintf(stderr, "qhold: cannot connect to server %s (errno=%d) %s\n", pbs_server, any_failed, pbs_strerror(any_failed)); continue; } for (size_t i = 0; i < id_list.size(); i++) { snprintf(job_id_out, sizeof(job_id_out), "%s", id_list[i].c_str()); if (extend[0] == '\0') stat = pbs_holdjob_err(connect, job_id_out, hold_type, NULL, &any_failed); else stat = pbs_holdjob_err(connect, job_id_out, hold_type, extend, &any_failed); if (any_failed != PBSE_UNKJOBID) break; } if (stat && (any_failed != PBSE_UNKJOBID)) { if (!located) { located = TRUE; if (locate_job(job_id_out, server_out, rmt_server)) { pbs_disconnect(connect); strcpy(server_out, rmt_server); goto cnt; } } prt_job_err("qhold", connect, job_id_out); } pbs_disconnect(connect); } exit(any_failed); }
int main( int argc, /* I */ char **argv) /* I */ { char job_id1[PBS_MAXCLTJOBID+1]; /* from the command line */ char job_id2[PBS_MAXCLTJOBID+1]; /* from the command line */ char job_id1_out[PBS_MAXCLTJOBID+1]; char job_id2_out[PBS_MAXCLTJOBID+1]; char *pn; int port1 = 0; int port2 = 0; char server_out1[MAXSERVERNAME+1]; char server_out2[MAXSERVERNAME+1]; char svrtmp[MAXSERVERNAME+1] = ""; int connect; int stat = 0; int rc = 0; int local_errno = 0; if (argc != 3) { static char usage[] = "usage: qorder job_identifier job_identifier\n"; fprintf(stderr, "%s", usage); exit(2); } snprintf(job_id1, sizeof(job_id1), "%s", argv[1]); snprintf(job_id2, sizeof(job_id2), "%s", argv[2]); svrtmp[0] = '\0'; if (get_server(job_id1, job_id1_out, sizeof(job_id1_out), svrtmp, sizeof(svrtmp))) { fprintf(stderr, "qorder: illegally formed job identifier: %s\n", job_id1); exit(1); } if (*svrtmp == '\0') { if ((pn = pbs_default())) { strcpy(svrtmp, pn); } else { fprintf(stderr, "qorder: could not get default server: %s\n", job_id1); exit(1); } } if ((pn = strchr(svrtmp, (int)':')) != 0) { *pn = '\0'; port1 = atoi(pn + 1); } if (get_fullhostname(svrtmp, server_out1, MAXSERVERNAME, NULL) != 0) { fprintf(stderr, "qorder: invalid server name: %s\n", job_id1); exit(1); } svrtmp[0] = '\0'; if (get_server(job_id2, job_id2_out, sizeof(job_id2_out), svrtmp, sizeof(svrtmp))) { fprintf(stderr, "qorder: illegally formed job identifier: %s\n", job_id2); exit(1); } if (*svrtmp == '\0') { if ((pn = pbs_default())) { snprintf(svrtmp, sizeof(svrtmp), "%s", pn); } else { fprintf(stderr, "qorder: could not get default server: %s\n", job_id1); exit(1); } } if ((pn = strchr(svrtmp, (int)':')) != 0) { *pn = '\0'; port2 = atoi(pn + 1); } if (get_fullhostname(svrtmp, server_out2, MAXSERVERNAME, NULL) != 0) { fprintf(stderr, "qorder: invalid server name: %s\n", job_id2); exit(1); } if ((strcmp(server_out1, server_out2) != 0) || (port1 != port2)) { fprintf(stderr, "qorder: both jobs ids must specify the same server\n"); exit(1); } if (pn != NULL) *pn = ':'; /* restore : if it was present */ connect = cnt2server(svrtmp); if (connect <= 0) { local_errno = -1 * connect; if (svrtmp[0] != 0) fprintf(stderr, "qorder: cannot connect to server %s (errno=%d) %s\n", svrtmp, local_errno, pbs_strerror(local_errno)); else fprintf(stderr, "qorder: cannot connect to server %s (errno=%d) %s\n", pbs_server, local_errno, pbs_strerror(local_errno)); exit(1); } stat = pbs_orderjob_err(connect, job_id1_out, job_id2_out, NULL, &local_errno); if (stat != 0) { prt_job_err("qorder", connect, ""); rc = local_errno; } pbs_disconnect(connect); exit(rc); } /* END main() */
int svr_startjob( job *pjob, /* I job to run (modified) */ struct batch_request *preq, /* I Run Job batch request (optional) */ char *FailHost, /* O (optional,minsize=1024) */ char *EMsg) /* O (optional,minsize=1024) */ { int f; int rc; #ifdef BOEING int sock, nodenum; struct hostent *hp; char *nodestr, *cp, *hostlist; int size; struct sockaddr_in saddr; badplace *bp; char *id = "svr_startjob"; #endif if (FailHost != NULL) FailHost[0] = '\0'; if (EMsg != NULL) EMsg[0] = '\0'; /* if not already setup, transfer the control/script file basename */ /* into an attribute accessible by MOM */ if (!(pjob->ji_wattr[(int)JOB_ATR_hashname].at_flags & ATR_VFLAG_SET)) { if (job_attr_def[(int)JOB_ATR_hashname].at_decode( &pjob->ji_wattr[(int)JOB_ATR_hashname], NULL, NULL, pjob->ji_qs.ji_fileprefix)) { return(PBSE_SYSTEM); } } /* if exec_host already set and either (hot start or checkpoint) */ /* then use the host(s) listed in exec_host */ /* NOTE: qrun hostlist assigned in req_runjob() */ rc = 0; f = pjob->ji_wattr[(int)JOB_ATR_exec_host].at_flags & ATR_VFLAG_SET; if ((f != 0) && ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HOTSTART) || (pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE)) && ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HasNodes) == 0)) { rc = assign_hosts( /* inside svr_startjob() */ pjob, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str, 0, FailHost, EMsg); } else if (f == 0) { /* exec_host not already set, get hosts and set it */ rc = assign_hosts( pjob, NULL, 1, FailHost, EMsg); /* inside svr_startjob() */ } if (rc != 0) { /* FAILURE */ return(rc); } #ifdef BOEING /* Verify that all the nodes are alive via a TCP connect. */ /* NOTE: Copy the nodes into a temp string because strtok() is destructive. */ size = strlen(pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str); hostlist = malloc(size + 1); if (hostlist == NULL) { sprintf(log_buffer, "could not allocate temporary buffer (malloc failed) -- skipping TCP connect check"); log_err(errno, id, log_buffer); } else { /* Get the first host. */ strncpy(hostlist, pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str, size); hostlist[size] = '\0'; nodestr = strtok(hostlist, "+"); } while (nodestr != NULL) { /* truncate from trailing slash on (if one exists). */ if ((cp = strchr(nodestr, '/')) != NULL) { cp[0] = '\0'; } /* Lookup IP address of host. */ if ((hp = gethostbyname(nodestr)) == NULL) { sprintf(log_buffer, "could not contact %s (gethostbyname failed, errno: %d (%s))", nodestr, errno, pbs_strerror(errno)); if (FailHost != NULL) strncpy(FailHost, nodestr, 1024); if (EMsg != NULL) strncpy(EMsg, log_buffer, 1024); log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); /* Add this host to the reject destination list for the job */ bp = (badplace *)malloc(sizeof(badplace)); if (bp == NULL) { log_err(errno, id, msg_err_malloc); return; } CLEAR_LINK(bp->bp_link); strcpy(bp->bp_dest, nodestr); append_link(&pjob->ji_rejectdest, &bp->bp_link, bp); /* FAILURE - cannot lookup master compute host */ return(PBSE_RESCUNAV); } /* open a socket. */ /* NOTE: should change to PF_* */ if ((sock = socket(AF_INET, SOCK_STREAM, 0)) == -1) { sprintf(log_buffer, "could not contact %s (cannot create socket, errno: %d (%s))", nodestr, errno, pbs_strerror(errno)); if (FailHost != NULL) strncpy(FailHost, nodestr, 1024); if (EMsg != NULL) strncpy(EMsg, log_buffer, 1024); log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); /* Add this host to the reject destination list for the job */ bp = (badplace *)malloc(sizeof(badplace)); if (bp == NULL) { /* FAILURE - cannot allocate memory */ log_err(errno, id, msg_err_malloc); return(PBSE_RESCUNAV); } CLEAR_LINK(bp->bp_link); strcpy(bp->bp_dest, nodestr); append_link(&pjob->ji_rejectdest, &bp->bp_link, bp); /* FAILURE - cannot create socket for master compute host */ return(PBSE_RESCUNAV); } /* Set the host information. */ memset(&saddr, '\0', sizeof(saddr)); saddr.sin_family = AF_INET; memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length); saddr.sin_port = htons(pbs_rm_port); /* Connect to the host. */ if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { sprintf(log_buffer, "could not contact %s (connect failed, errno: %d (%s))", nodestr, errno, pbs_strerror(errno)); if (FailHost != NULL) strncpy(FailHost, nodestr, 1024); if (EMsg != NULL) strncpy(EMsg, log_buffer, 1024); log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); /* Add this host to the reject list for the job */ bp = (badplace *)malloc(sizeof(badplace)); if (bp == NULL) { /* FAILURE - cannot allocate memory */ log_err(errno, id, msg_err_malloc); return(PBSE_RESCUNAV); } CLEAR_LINK(bp->bp_link); strcpy(bp->bp_dest, nodestr); append_link(&pjob->ji_rejectdest, &bp->bp_link, bp); /* FAILURE - cannot connect to master compute host */ return(PBSE_RESCUNAV); } /* clean up and get next host. */ close(sock); nodestr = strtok(NULL, "+"); } /* END while (nodestr != NULL) */ if (hostlist != NULL) free(hostlist); /* END MOM verification check via TCP. */ #endif /* END BOEING */ /* Next, are there files to be staged-in? */ if ((pjob->ji_wattr[(int)JOB_ATR_stagein].at_flags & ATR_VFLAG_SET) && (pjob->ji_qs.ji_substate != JOB_SUBSTATE_STAGECMP)) { /* yes, we do that first; then start the job */ rc = svr_stagein( pjob, preq, JOB_STATE_RUNNING, JOB_SUBSTATE_STAGEGO); /* note, the positive acknowledgment is done by svr_stagein */ } else if (is_checkpoint_restart(pjob)) { /* Checkpoint file copy needed, start copy */ rc = svr_send_checkpoint( pjob, preq, JOB_STATE_RUNNING, JOB_SUBSTATE_CHKPTGO); } else { /* No stage-in or already done, start job executing */ rc = svr_strtjob2(pjob, preq); } return(rc); } /* END svr_startjob() */
int do_mom( char *HPtr, int MOMPort, int CmdIndex) { int socket; int local_errno = 0; struct tcp_chan *chan = NULL; int rc; if ((socket = openrm(HPtr, MOMPort)) < 0) { /* FAILURE */ extern char TRMEMsg[]; fprintf(stderr, "cannot connect to MOM on node '%s', errno=%d (%s)\n", HPtr, errno, strerror(errno)); if (TRMEMsg[0] != '\0') { fprintf(stderr, " %s\n", TRMEMsg); } return(socket); } else if ((chan = DIS_tcp_setup(socket)) == NULL) { fprintf(stderr, "%s: can not allocate memory of socket buffers\n", __func__); return -1; } /* send protocol and version, plus how many queries we're sending */ if (QueryI == 0) QueryI = 1; if (start_dialogue(chan) != DIS_SUCCESS) { fprintf(stderr,"ERROR: Unable to write the number of queries to %s (errno=%d-%s)\n", HPtr, errno, strerror(errno)); send_command(chan,RM_CMD_CLOSE); DIS_tcp_cleanup(chan); return(-1); } if (IsVerbose == TRUE) { fprintf(stderr, "INFO: successfully connected to %s\n", HPtr); } switch (CmdIndex) { case momClear: { char tmpLine[1024]; char *Value; snprintf(tmpLine, 1024, "clearjob=%s", (JPtr != NULL) ? JPtr : "all"); if (send_command_str(chan, RM_CMD_REQUEST, tmpLine) != 0) { /* FAILURE */ fprintf(stderr,"ERROR: cannot request job clear on %s (errno=%d-%s)\n", HPtr, errno, strerror(errno)); send_command(chan,RM_CMD_CLOSE); return(-1); } if ((Value = (char *)read_mom_reply(&local_errno, chan)) == NULL) { /* FAILURE */ fprintf(stderr,"ERROR: job clear failed on %s (errno=%d - %s: %d - %s)\n", HPtr, errno, pbs_strerror(errno), local_errno, pbs_strerror(local_errno)); send_command(chan,RM_CMD_CLOSE); return(-1); } /* job cleared */ fprintf(stdout,"job clear request successful on %s\n", HPtr); free(Value); } /* END BLOCK (case momClear) */ break; case momShutdown: { if ((send_command(chan,RM_CMD_SHUTDOWN) != PBSE_NONE) || (check_success(chan) != PBSE_NONE)) { /* FAILURE */ fprintf(stderr,"ERROR: cannot shutdown mom daemon on %s (errno=%d-%s)\n", HPtr, errno, pbs_strerror(errno)); send_command(chan,RM_CMD_CLOSE); exit(EXIT_FAILURE); } fprintf(stdout, "shutdown request successful on %s\n", HPtr); } /* END BLOCK */ break; case momReconfig: { if ((send_command(chan,RM_CMD_CONFIG) != PBSE_NONE) || (check_success(chan) != PBSE_NONE)) { /* FAILURE */ fprintf(stderr,"ERROR: cannot reconfigure mom on %s (errno=%d-%s)\n", HPtr, errno, pbs_strerror(errno)); send_command(chan,RM_CMD_CLOSE); return(-1); } fprintf(stdout, "reconfig successful on %s\n", HPtr); } /* END BLOCK (case momReconfig) */ break; case momLayout: char *value; if (send_command(chan, RM_CMD_LAYOUT) != PBSE_NONE) { fprintf(stdout, "Layout command failed to send to mom\n"); return(-1); } if ((value = read_mom_reply(&local_errno, chan)) == NULL) { fprintf(stdout, "Could not read a layout reply from the mom\n"); return(-1); } else { fprintf(stdout, "%s", value); free(value); } break; case momQuery: default: { char *ptr; int rindex; char *Value; for (rindex = 0; rindex < QueryI; rindex++) { if (send_command_str(chan, RM_CMD_REQUEST, Query[rindex]) != 0) { fprintf(stderr,"ERROR: cannot add query for '%s' on %s (errno=%d-%s)\n", Query[rindex], HPtr, errno, pbs_strerror(errno)); } } for (rindex = 0;rindex < QueryI;rindex++) { if ((ptr = strchr(Query[rindex],'=')) != NULL) { *ptr = '\0'; } if ((Value = (char *)read_mom_reply(&local_errno, chan)) == NULL) { fprintf(stderr, "ERROR: query[%d] '%s' failed on %s (errno=%d - %s : %d - %s)\n", rindex, Query[rindex], HPtr, errno, pbs_strerror(errno), local_errno, pbs_strerror(local_errno)); return(-1); } else { if (!strncmp(Query[rindex], "diag", strlen("diag"))) { fprintf(stdout, "%s\n", Value); } else if (!strncmp(Query[rindex], "cycle", strlen("cycle"))) { fprintf(stdout, "mom %s successfully cycled %s\n", HPtr, Value); } else { fprintf(stdout, "%12s: %12s = '%s'\n", HPtr, Query[rindex], Value); } } free(Value); if (ptr != NULL) { *ptr = '='; } } /* END for (rindex) */ } /* END BLOCK (case momQuery) */ break; } /* END switch(CmdIndex) */ rc = diswsi(chan, RM_PROTOCOL); if (rc != DIS_SUCCESS) goto do_mom_fail; rc = diswsi(chan, RM_PROTOCOL_VER); if (rc != DIS_SUCCESS) goto do_mom_fail; rc = diswsi(chan, 1); if (rc != DIS_SUCCESS) goto do_mom_fail; /* send_command will free chan */ send_command(chan,RM_CMD_CLOSE); return(0); do_mom_fail: DIS_tcp_close(chan); return(rc); } /* END do_mom() */
int pbs_original_connect( char *server) /* I (FORMAT: NULL | '\0' | HOSTNAME | HOSTNAME:PORT )*/ { struct sockaddr_in server_addr; char *if_name; struct addrinfo *addr_info; int out; int i; int rc; int local_errno; struct sockaddr preferred_addr; /* set if TRQ_IFNAME set in torque.cfg */ struct passwd *pw; int use_unixsock = 0; uid_t pbs_current_uid; #ifdef ENABLE_UNIX_SOCKETS struct sockaddr_un unserver_addr; char hnamebuf[256]; #endif char *ptr; /* reserve a connection state record */ out = -1; for (i = 1;i < NCONNECTS;i++) { if (connection[i].ch_mutex == NULL) { connection[i].ch_mutex = calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(connection[i].ch_mutex,NULL); } pthread_mutex_lock(connection[i].ch_mutex); if (connection[i].ch_inuse == FALSE) { out = i; connection[out].ch_inuse = TRUE; connection[out].ch_errno = 0; connection[out].ch_socket = -1; connection[out].ch_errtxt = NULL; break; } pthread_mutex_unlock(connection[i].ch_mutex); } if (out < 0) { if (getenv("PBSDEBUG")) fprintf(stderr, "ALERT: cannot locate free channel\n"); /* FAILURE */ /* no need to unlock mutex here - in this case no connection was found */ return(PBSE_NOCONNECTS * -1); } /* get server host and port */ server = PBS_get_server(server, &server_port); if (server == NULL) { connection[out].ch_inuse = FALSE; pthread_mutex_unlock(connection[out].ch_mutex); if (getenv("PBSDEBUG")) fprintf(stderr, "ALERT: PBS_get_server() failed\n"); return(PBSE_NOSERVER * -1); } /* determine who we are */ pbs_current_uid = getuid(); if ((pw = getpwuid(pbs_current_uid)) == NULL) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ALERT: cannot get password info for uid %ld\n", (long)pbs_current_uid); } pthread_mutex_unlock(connection[out].ch_mutex); return(PBSE_SYSTEM * -1); } strcpy(pbs_current_user, pw->pw_name); pbs_server = server; /* set for error messages from commands */ #ifdef ENABLE_UNIX_SOCKETS /* determine if we want to use unix domain socket */ if (!strcmp(server, "localhost")) use_unixsock = 1; else if ((gethostname(hnamebuf, sizeof(hnamebuf) - 1) == 0) && !strcmp(hnamebuf, server)) use_unixsock = 1; /* NOTE: if any part of using unix domain sockets fails, * we just cleanup and try again with inet sockets */ /* get socket */ if (use_unixsock) { connection[out].ch_socket = socket(AF_UNIX, SOCK_STREAM, 0); if (connection[out].ch_socket < 0) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot create socket: errno=%d (%s)\n", errno, strerror(errno)); } connection[out].ch_inuse = FALSE; local_errno = PBSE_PROTOCOL; use_unixsock = 0; } } /* and connect... */ if (use_unixsock) { unserver_addr.sun_family = AF_UNIX; strcpy(unserver_addr.sun_path, TSOCK_PATH); if (connect( connection[out].ch_socket, (struct sockaddr *)&unserver_addr, (strlen(unserver_addr.sun_path) + sizeof(unserver_addr.sun_family))) < 0) { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; local_errno = errno; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot connect to server, errno=%d (%s)\n", errno, strerror(errno)); } use_unixsock = 0; /* will try again with inet socket */ } } if (use_unixsock) { if (!send_unix_creds(connection[out].ch_socket)) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot send unix creds to pbs_server: errno=%d (%s)\n", errno, strerror(errno)); } close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; local_errno = PBSE_PROTOCOL; use_unixsock = 0; /* will try again with inet socket */ } } #endif /* END ENABLE_UNIX_SOCKETS */ if (!use_unixsock) { /* at this point, either using unix sockets failed, or we determined not to * try */ connection[out].ch_socket = socket(AF_INET, SOCK_STREAM, 0); if (connection[out].ch_socket < 0) { if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot connect to server \"%s\", errno=%d (%s)\n", server, errno, strerror(errno)); } connection[out].ch_inuse = FALSE; pthread_mutex_unlock(connection[out].ch_mutex); return(PBSE_PROTOCOL * -1); } /* This is probably an IPv4 solution for the if_name and preferred_addr We need to see what ioctl call we need for IPv6 */ if_name = trq_get_if_name(); if (if_name) { rc = trq_set_preferred_network_interface(if_name, &preferred_addr); if (rc != PBSE_NONE) { fprintf(stderr, "could not set preferred network interface (%s): %d\n", if_name, rc); if(if_name) free(if_name); return(rc); } rc = bind(connection[out].ch_socket, &preferred_addr, sizeof(struct sockaddr)); if (rc < 0) { fprintf(stderr, "ERROR: could not bind preferred network interface (%s): errno: %d", if_name, errno); if (if_name) free(if_name); return(PBSE_SYSTEM * -1); } } /* we are done with if_name at this point. trq_get_if_name allocated space for it. We need to free it */ if (if_name) free(if_name); server_addr.sin_family = AF_INET; if (getaddrinfo(server, NULL, NULL, &addr_info) != 0) { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot get servername (%s) errno=%d (%s)\n", (server != NULL) ? server : "NULL", errno, strerror(errno)); } pthread_mutex_unlock(connection[out].ch_mutex); return(PBSE_BADHOST * -1); } server_addr.sin_addr = ((struct sockaddr_in *)addr_info->ai_addr)->sin_addr; freeaddrinfo(addr_info); server_addr.sin_port = htons(server_port); if (connect( connection[out].ch_socket, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot connect to server, errno=%d (%s)\n", errno, strerror(errno)); } pthread_mutex_unlock(connection[out].ch_mutex); return(errno * -1); } /* FIXME: is this necessary? Contributed by one user that fixes a problem, but doesn't fix the same problem for another user! */ #if 0 #if defined(__hpux) /*HP-UX : avoiding socket caching */ send(connection[out].ch_socket, '?', 1, MSG_OOB); #endif #endif #ifdef MUNGE_AUTH rc = PBSD_munge_authenticate(connection[out].ch_socket, out); if (rc != 0) { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; if (rc == PBSE_MUNGE_NOT_FOUND) { local_errno = PBSE_MUNGE_NOT_FOUND; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot find munge executable\n"); } } else { local_errno = PBSE_PERM; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot authenticate connection to server \"%s\", errno=%d (%s)\n", server, errno, strerror(errno)); } } pthread_mutex_unlock(connection[out].ch_mutex); return(-1 * local_errno); } #else /* new version of iff using daemon */ if ((ENABLE_TRUSTED_AUTH == FALSE) && ((rc = validate_socket(connection[out].ch_socket)) != PBSE_NONE)) { close(connection[out].ch_socket); connection[out].ch_inuse = FALSE; if (getenv("PBSDEBUG")) { fprintf(stderr, "ERROR: cannot authenticate connection to server \"%s\", errno=%d (%s)\n", server, rc, pbs_strerror(rc)); } local_errno = PBSE_SOCKET_FAULT; pthread_mutex_unlock(connection[out].ch_mutex); return(-1 * local_errno); } #endif /* ifdef MUNGE_AUTH */ } /* END if !use_unixsock */ /* setup DIS support routines for following pbs_* calls */ if ((ptr = getenv("PBSAPITIMEOUT")) != NULL) { pbs_tcp_timeout = strtol(ptr, NULL, 0); if (pbs_tcp_timeout <= 0) { pbs_tcp_timeout = 10800; /* set for 3 hour time out */ } } else { pbs_tcp_timeout = 10800; /* set for 3 hour time out */ } pthread_mutex_unlock(connection[out].ch_mutex); return(out); } /* END pbs_original_connect() */