int PBSD_jobfile( int c, int req_type, char *path, char *jobid, enum job_file which) { int i; int cc; int rc; int fd; char s_buf[SCRIPT_CHUNK_Z]; if ((c < 0) || (c >= PBS_NET_MAX_CONNECTIONS)) { return(PBSE_IVALREQ); } if (path[0] == '\0') return(PBSE_NONE); if ((fd = open(path, O_RDONLY, 0)) < 0) { return(-1); } i = 0; cc = read_ac_socket(fd, s_buf, SCRIPT_CHUNK_Z); while ((cc > 0) && (PBSD_scbuf(c, req_type, i, s_buf, cc, jobid, which) == 0)) { i++; cc = read_ac_socket(fd, s_buf, SCRIPT_CHUNK_Z); } close(fd); if (cc < 0) /* read failed */ { return(-1); } pthread_mutex_lock(connection[c].ch_mutex); rc = connection[c].ch_errno; pthread_mutex_unlock(connection[c].ch_mutex); return(rc); } /* END PBSD_jobfile() */
int PBSD_jscript( int c, char *script_file, char *jobid) { int i; int fd; int cc; int rc; char s_buf[SCRIPT_CHUNK_Z]; if ((c < 0) || (c >= PBS_NET_MAX_CONNECTIONS)) { return(PBSE_IVALREQ); } if ((fd = open(script_file, O_RDONLY, 0)) < 0) { return (-1); } i = 0; cc = read_ac_socket(fd, s_buf, SCRIPT_CHUNK_Z); while ((cc > 0) && (PBSD_scbuf(c, PBS_BATCH_jobscript, i, s_buf, cc, jobid, JScript) == 0)) { i++; cc = read_ac_socket(fd, s_buf, SCRIPT_CHUNK_Z); } close(fd); if (cc < 0) /* read failed */ return (-1); pthread_mutex_lock(connection[c].ch_mutex); rc = connection[c].ch_errno; pthread_mutex_unlock(connection[c].ch_mutex); return(rc); }
void readit( int sock, struct routem *prm) { int amt; char buf[256]; FILE *fil; int i; char *pc; if (prm->r_where == old_out) fil = stdout; else fil = stderr; i = 0; if ((amt = read_ac_socket(sock, buf, 256)) > 0) { for (pc = buf + i; pc < buf + amt; ++pc) { #ifdef DEBUG if (prm->r_nl != 0) { fprintf(fil, "socket %d: ", sock); prm->r_nl = 0; } #endif /* DEBUG */ putc(*pc, fil); if (*pc == '\n') { prm->r_nl = 1; fflush(fil); } } } else { close(sock); prm->r_where = invalid; FD_CLR(sock, &readset); } return; } /* END readit() */
int read_tm_info( int fds) { int outport, errport; tm_task_id taskid; tm_task_id nodeid; if (read_ac_socket(fds, (char *)&outport, sizeof(int)) != sizeof(int)) { fprintf(stderr, "short read of TM output info\n"); exit(2); } if (read_ac_socket(fds, (char *)&errport, sizeof(int)) != sizeof(int)) { fprintf(stderr, "short read of TM error info\n"); exit(2); } if (read_ac_socket(fds, (char *)&taskid, sizeof(tm_task_id)) != sizeof(tm_task_id)) { fprintf(stderr, "short read of TM task info\n"); exit(2); } if (read_ac_socket(fds, (char *)&nodeid, sizeof(tm_node_id)) != sizeof(tm_node_id)) { fprintf(stderr, "short read of TM nodeid info\n"); exit(2); } printf("stdout port = %d\nstderr port = %d\ntaskid = %d\nnodeid = %d\n", outport, errport, taskid, nodeid); return(0); }
int pbs_disconnect_socket( int sock) /* I (socket descriptor) */ { char tmp_buf[THE_BUF_SIZE / 4]; struct tcp_chan *chan = NULL; if ((chan = DIS_tcp_setup(sock)) == NULL) { } else if ((encode_DIS_ReqHdr(chan,PBS_BATCH_Disconnect, pbs_current_user) == 0) && (DIS_tcp_wflush(chan) == 0)) { int atime; struct sigaction act; struct sigaction oldact; /* set alarm to break out of potentially infinite read */ /* act.sa_handler = SIG_IGN; */ act.sa_handler = empty_alarm_handler; /* need SOME handler or blocking read never gets interrupted */ sigemptyset(&act.sa_mask); act.sa_flags = 0; sigaction(SIGALRM, &act, &oldact); atime = alarm(5); while (1) { /* wait for server to close connection */ /* NOTE: if read of 'sock' is blocking, request below may hang forever -- hence the signal handler above */ if (read_ac_socket(sock, &tmp_buf, sizeof(tmp_buf)) < 1) break; } alarm(atime); sigaction(SIGALRM, &oldact, NULL); } if (chan != NULL) DIS_tcp_cleanup(chan); close(sock); return(0); } /* END pbs_disconnect_socket() */
void parse_mom_hierarchy( int fds) { int bytes_read; char buffer[MAXLINE<<10]; char *current; char *parent; char *child; char log_buf[LOCAL_LOG_BUF_SIZE]; int path_index = -1; memset(&buffer, 0, sizeof(buffer)); if ((bytes_read = read_ac_socket(fds, buffer, sizeof(buffer) - 1)) < 0) { snprintf(log_buf, sizeof(log_buf), "Unable to read from mom hierarchy file"); log_err(errno, __func__, log_buf); return; } current = buffer; while (get_parent_and_child(current, &parent, &child, ¤t) == PBSE_NONE) { if (!strncmp(parent,"path",strlen("path"))) handle_path(child, path_index); else { /* non-fatal error */ snprintf(log_buf, sizeof(log_buf), "Found noise in the mom hierarchy file. Ignoring <%s>%s</%s>", parent, child, parent); log_err(-1, __func__, log_buf); } } } /* END parse_mom_hierarchy() */
int main(int argc, char *argv[]) { int amt; int f; int fp; struct tracking track; if (argc < 1) { fprintf(stderr, "usage: %s file [file ...]\n", argv[0]); return 1; } for (f = 1; f < argc; ++f) { fp = open(argv[f], O_RDONLY, 0); if (fp < 0) { perror("open failed"); fprintf(stderr, "unable to open file %s\n", argv[f]); exit(1); } while ((amt = read_ac_socket(fp, &track, sizeof(track))) == sizeof(track)) { prt_track_struct(&track); } (void)close(fp); printf("\n"); } return (0); }
int read_attr( int fd) { int amt; int i; svrattrl *pal; svrattrl tempal; i = read_ac_socket(fd, (char *) & tempal, sizeof(tempal)); if (i != sizeof(tempal)) { fprintf(stderr, "bad read of attribute\n"); /* FAILURE */ return(0); } if (tempal.al_tsize == ENDATTRIBUTES) { /* FAILURE */ return(0); } pal = (svrattrl *)calloc(1, tempal.al_tsize); if (pal == NULL) { fprintf(stderr, "malloc failed\n"); exit(1); } *pal = tempal; /* read in the actual attribute data */ amt = pal->al_tsize - sizeof(svrattrl); i = read_ac_socket(fd, (char *)pal + sizeof(svrattrl), amt - 1); if (i != amt) { fprintf(stderr, "short read of attribute\n"); exit(2); } pal->al_name = (char *)pal + sizeof(svrattrl); if (pal->al_rescln != 0) pal->al_resc = pal->al_name + pal->al_nameln; else pal->al_resc = NULL; if (pal->al_valln != 0) pal->al_value = pal->al_name + pal->al_nameln + pal->al_rescln; else pal->al_value = NULL; printf("%s", pal->al_name); if (pal->al_resc != NULL) { printf(".%s", pal->al_resc); } printf(" = "); if (pal->al_value != NULL) { printf("%s", pal->al_value); } printf("\n"); free(pal); return(1); }
int recov_attr( int fd, void *parent, struct attribute_def *padef, pbs_attribute *pattr, int limit, int unknown, int do_actions) { int amt; int i; int index; int palsize = 0; int resc_access_perm = ATR_DFLAG_ACCESS; svrattrl *pal = NULL; svrattrl tempal; char *endPal; #ifndef PBS_MOM bool exec_host_found = false; char job_state; #endif /* set all privileges (read and write) for decoding resources */ /* This is a special (kludge) flag for the recovery case, see */ /* decode_resc() in lib/Libattr/attr_fn_resc.c */ /* For each pbs_attribute, read in the attr_extern header */ while (1) { i = read_ac_socket(fd, (char *) & tempal, sizeof(tempal)); if (i != sizeof(tempal)) { log_err(errno, __func__, "read1"); return(-1); } if (tempal.al_tsize == ENDATTRIBUTES) break; /* hit dummy pbs_attribute that is eof */ if (tempal.al_tsize <= (int)sizeof(tempal)) { log_err(-1, __func__, "attr size too small"); return(-1); } /* read in the pbs_attribute chunk (name and encoded value) */ palsize = tempal.al_tsize; pal = (svrattrl *)calloc(1, palsize); if (pal == NULL) { log_err(errno, __func__, "calloc failed"); return(-1); } endPal = (char *)pal + palsize; memcpy(pal, &tempal, sizeof(svrattrl)); CLEAR_LINK(pal->al_link); /* read in the actual pbs_attribute data */ amt = pal->al_tsize - sizeof(svrattrl); i = read_ac_socket(fd, (char *)pal + sizeof(svrattrl), amt); if (i != amt) { log_err(errno, __func__, "read2"); free(pal); return(-1); } /* the pointer into the data are of course bad, so reset them */ pal->al_name = (char *)pal + sizeof(svrattrl); if (pal->al_rescln) { pal->al_resc = pal->al_name + pal->al_nameln; if(((char *)pal->al_resc > endPal)||((char *)pal->al_resc < (char *)pal)) { //Bad size in file. free(pal); return(-1); } } else pal->al_resc = NULL; if (pal->al_valln) { pal->al_value = pal->al_name + pal->al_nameln + pal->al_rescln; if(((char *)pal->al_value > endPal)||((char *)pal->al_value < (char *)pal)) { //Bad size in file. free(pal); return(-1); } } else pal->al_value = NULL; if((pal->al_name + pal->al_nameln + pal->al_rescln + pal->al_valln) > endPal) { //Bad size in file. free(pal); return(-1); } /* find the pbs_attribute definition based on the name */ index = find_attr(padef, pal->al_name, limit); if (index < 0) { /* * There are two ways this could happen: * 1. if the (job) pbs_attribute is in the "unknown" list - * keep it there; * 2. if the server was rebuilt and an pbs_attribute was * deleted, - the fact is logged and the pbs_attribute * is discarded (system,queue) or kept (job) * */ if (unknown > 0) { index = unknown; } else { log_err(-1, __func__, "unknown attribute discarded"); free(pal); continue; } } /* END if (index < 0) */ #ifndef PBS_MOM if (!strcmp(pal->al_name, ATTR_exechost)) { exec_host_found = true; } if (!strcmp(pal->al_name, ATTR_state)) { job_state = *pal->al_value; } #endif (padef + index)->at_decode( pattr + index, pal->al_name, pal->al_resc, pal->al_value, resc_access_perm); if ((do_actions) && (padef + index)->at_action != NULL) (padef + index)->at_action(pattr + index, parent, ATR_ACTION_RECOV); (pattr + index)->at_flags = pal->al_flags & ~ATR_VFLAG_MODIFY; free(pal); } /* END while (1) */ #ifndef PBS_MOM if ((exec_host_found == false) && ((job_state == 'R') || (job_state == 'E'))) { return(-1); } #endif return(0); } /* END recov_attr() */
pbs_queue *que_recov_xml( char *filename) { int fds; int rc; pbs_queue *pq; char namebuf[MAXPATHLEN]; char buf[MAXLINE<<10]; char *parent; char *child; char *current; char *begin; char *end; char log_buf[LOCAL_LOG_BUF_SIZE]; time_t time_now = time(NULL); pq = que_alloc(filename, TRUE); /* allocate & init queue structure space */ if (pq == NULL) { log_err(-1, __func__, "que_alloc failed"); return(NULL); } snprintf(namebuf, sizeof(namebuf), "%s%s", path_queues, filename); fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { log_err(errno, __func__, "open error"); que_free(pq, TRUE); return(NULL); } /* read in queue save sub-structure */ if (read_ac_socket(fds,buf,sizeof(buf)) < 0) { snprintf(log_buf,sizeof(log_buf), "Unable to read from queue file %s", filename); log_err(errno, __func__, log_buf); close(fds); return(NULL); } current = begin = buf; /* advance past the queue tag */ current = strstr(current,"<queue>"); if (current == NULL) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, "Cannot find a queue tag, attempting to load legacy format"); que_free(pq, TRUE); close(fds); return(que_recov(filename)); } end = strstr(current,"</queue>"); if (end == NULL) { log_err(-1, __func__, "No queue tag found in the queue file???"); que_free(pq, TRUE); close(fds); return(NULL); } /* move past the queue tag */ current += strlen("<queue>"); /* adjust the end for the newline preceeding the close queue tag */ end--; while (current < end) { if (get_parent_and_child(current,&parent,&child,¤t)) { /* ERROR */ snprintf(log_buf,sizeof(log_buf), "Bad XML in the queue file at: %s", current); log_err(-1, __func__, log_buf); que_free(pq, TRUE); close(fds); return(NULL); } if (!strcmp(parent,"modified")) pq->qu_qs.qu_modified = atoi(child); else if (!strcmp(parent,"type")) pq->qu_qs.qu_type = atoi(child); else if (!strcmp(parent,"create_time")) pq->qu_qs.qu_ctime = atoi(child); else if (!strcmp(parent,"modify_time")) pq->qu_qs.qu_mtime = atoi(child); else if (!strcmp(parent,"name")) snprintf(pq->qu_qs.qu_name,sizeof(pq->qu_qs.qu_name),"%s",child); else if (!strcmp(parent,"attributes")) { char *attr_ptr = child; char *child_parent; char *child_attr; while (*attr_ptr != '\0') { if (get_parent_and_child(attr_ptr,&child_parent,&child_attr,&attr_ptr)) { /* ERROR */ snprintf(log_buf,sizeof(log_buf), "Bad XML in the queue file at: %s", current); log_err(-1, __func__, log_buf); que_free(pq, TRUE); close(fds); return(NULL); } if ((rc = str_to_attr(child_parent,child_attr,pq->qu_attr,que_attr_def))) { /* ERROR */ snprintf(log_buf,sizeof(log_buf), "Error creating attribute %s", child_parent); log_err(rc, __func__, log_buf); que_free(pq, TRUE); close(fds); return(NULL); } } } } /* all done recovering the queue */ close(fds); if ((pq->qu_attr[QA_ATR_MTime].at_flags & ATR_VFLAG_SET) == 0) { /* if we are recovering a pre-2.1.2 queue, save a new mtime */ pq->qu_attr[QA_ATR_MTime].at_val.at_long = time_now; pq->qu_attr[QA_ATR_MTime].at_flags = ATR_VFLAG_SET; que_save(pq); } return(pq); } /* END que_recov_xml() */
void recov_acl( pbs_attribute *pattr, /* acl pbs_attribute */ attribute_def *pdef, /* pbs_attribute def structure */ const char *subdir, /* directory path */ const char *name) /* parent object name = file name */ { static const char *this_function_name = "recov_acl"; char *buf; int fds; char filename1[MAXPATHLEN]; char log_buf[LOCAL_LOG_BUF_SIZE]; struct stat sb; pbs_attribute tempat; errno = 0; if (subdir != NULL) snprintf(filename1, sizeof(filename1), "%s%s/%s", path_priv, subdir, name); else snprintf(filename1, sizeof(filename1), "%s%s", path_priv, name); fds = open(filename1, O_RDONLY, 0600); if (fds < 0) { if (errno != ENOENT) { sprintf(log_buf, "unable to open acl file %s", filename1); log_err(errno, this_function_name, log_buf); } return; } if (fstat(fds, &sb) < 0) { close(fds); return; } if (sb.st_size == 0) { close(fds); return; /* no data */ } buf = (char *)calloc(1, (size_t)sb.st_size + 1); /* 1 extra for added null */ if (buf == NULL) { close(fds); return; } if (read_ac_socket(fds, buf, (unsigned int)sb.st_size) != (int)sb.st_size) { log_err(errno, this_function_name, (char *)"unable to read acl file"); close(fds); free(buf); return; } close(fds); *(buf + sb.st_size) = '\0'; clear_attr(&tempat, pdef); if (pdef->at_decode(&tempat, pdef->at_name, NULL, buf, ATR_DFLAG_ACCESS) < 0) { sprintf(log_buf, "decode of acl %s failed", pdef->at_name); log_err(errno, this_function_name, log_buf); } else if (pdef->at_set(pattr, &tempat, SET) != 0) { sprintf(log_buf, "set of acl %s failed", pdef->at_name); log_err(errno, this_function_name, log_buf); } pdef->at_free(&tempat); free(buf); return; } /* END recov_acl() */
/* array_recov reads in an array struct saved to disk and inserts it into the servers list of arrays */ int array_recov( char *path, job_array **new_pa) { job_array *pa; array_request_node *rn; char log_buf[LOCAL_LOG_BUF_SIZE]; int fd; int old_version; int num_tokens; int i; int len; int rc; *new_pa = NULL; old_version = ARRAY_QS_STRUCT_VERSION; /* allocate the storage for the struct */ pa = (job_array*)calloc(1,sizeof(job_array)); if (pa == NULL) { return(PBSE_SYSTEM); } /* initialize the linked list nodes */ CLEAR_HEAD(pa->request_tokens); fd = open(path, O_RDONLY, 0); if(fd < 0) { free(pa); return(PBSE_SYSTEM); } if (array_259_upgrade) { rc = read_and_convert_259_array(fd, pa, path); if (rc != PBSE_NONE) { free(pa); close(fd); return(rc); } } else { /* read the file into the struct previously allocated. */ len = read_ac_socket(fd, &(pa->ai_qs), sizeof(pa->ai_qs)); if ((len < 0) || ((len < (int)sizeof(pa->ai_qs)) && (pa->ai_qs.struct_version == ARRAY_QS_STRUCT_VERSION))) { sprintf(log_buf, "error reading %s", path); log_err(errno, __func__, log_buf); free(pa); close(fd); return(PBSE_SYSTEM); } if (pa->ai_qs.struct_version != ARRAY_QS_STRUCT_VERSION) { rc = array_upgrade(pa, fd, pa->ai_qs.struct_version, &old_version); if (rc) { sprintf(log_buf, "Cannot upgrade array version %d to %d", pa->ai_qs.struct_version, ARRAY_QS_STRUCT_VERSION); log_err(errno, __func__, log_buf); free(pa); close(fd); return(rc); } } } pa->job_ids = (char **)calloc(pa->ai_qs.array_size, sizeof(char *)); /* check to see if there is any additional info saved in the array file */ /* check if there are any array request tokens that haven't been fully processed */ if (old_version > 1) { if (read_ac_socket(fd, &num_tokens, sizeof(int)) != sizeof(int)) { sprintf(log_buf, "error reading token count from %s", path); log_err(errno, __func__, log_buf); free(pa); close(fd); return(PBSE_SYSTEM); } for (i = 0; i < num_tokens; i++) { rn = (array_request_node *)calloc(1, sizeof(array_request_node)); if (read_ac_socket(fd, rn, sizeof(array_request_node)) != sizeof(array_request_node)) { sprintf(log_buf, "error reading array_request_node from %s", path); log_err(errno, __func__, log_buf); free(rn); for (rn = (array_request_node*)GET_NEXT(pa->request_tokens); rn != NULL; rn = (array_request_node*)GET_NEXT(pa->request_tokens)) { delete_link(&rn->request_tokens_link); free(rn); } free(pa); close(fd); return(PBSE_SYSTEM); } CLEAR_LINK(rn->request_tokens_link); append_link(&pa->request_tokens, &rn->request_tokens_link, (void*)rn); } } close(fd); CLEAR_HEAD(pa->ai_qs.deps); if (old_version != ARRAY_QS_STRUCT_VERSION) { /* resave the array struct if the version on disk is older than the current */ array_save(pa); } pa->ai_mutex = (pthread_mutex_t *)calloc(1, sizeof(pthread_mutex_t)); pthread_mutex_init(pa->ai_mutex,NULL); lock_ai_mutex(pa, __func__, NULL, LOGLEVEL); /* link the struct into the servers list of job arrays */ insert_array(pa); *new_pa = pa; return(PBSE_NONE); } /* END array_recov() */
int read_and_convert_259_array( int fd, job_array *pa, char *path) { char log_buf[LOCAL_LOG_BUF_SIZE]; int len; job_array_259 *pa_259; /* This is for a backward compatibility problem put into 2.5.9 and 3.0.3 */ /* allocate the storage for the struct */ pa_259 = (job_array_259*)calloc(1, sizeof(job_array_259)); if (pa_259 == NULL) { return PBSE_SYSTEM; } len = read_ac_socket(fd, &(pa_259->ai_qs), sizeof(pa_259->ai_qs)); if (len < 0) { sprintf(log_buf, "error reading %s", path); log_err(errno, "read_and_convert_259_array", log_buf); free(pa_259); close(fd); return PBSE_BAD_ARRAY_DATA; } if (pa_259->ai_qs.struct_version == ARRAY_QS_STRUCT_VERSION) { sprintf(log_buf, "Already at array structure version 4. Restart pbs_server without -u option"); log_err(errno, "read_and_convert_259_array", log_buf); free(pa_259); close(fd); return PBSE_BAD_ARRAY_DATA; } if (pa_259->ai_qs.struct_version != 3) { sprintf(log_buf, "Cannot upgrade array version %d to %d", pa_259->ai_qs.struct_version, ARRAY_QS_STRUCT_VERSION); log_err(errno, "read_and_convert_259_array", log_buf); free(pa_259); close(fd); return PBSE_BAD_ARRAY_DATA; } pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION; pa->ai_qs.array_size = pa_259->ai_qs.array_size; pa->ai_qs.num_jobs = pa_259->ai_qs.num_jobs; pa->ai_qs.slot_limit = pa_259->ai_qs.slot_limit; pa->ai_qs.jobs_running = pa_259->ai_qs.jobs_running; pa->ai_qs.jobs_done = pa_259->ai_qs.jobs_done; pa->ai_qs.num_cloned = pa_259->ai_qs.num_cloned; pa->ai_qs.num_started = pa_259->ai_qs.num_started; pa->ai_qs.num_failed = pa_259->ai_qs.num_failed; pa->ai_qs.num_successful = pa_259->ai_qs.num_successful; pa->ai_qs.num_purged = pa_259->ai_qs.num_purged; pa->ai_qs.deps = pa_259->ai_qs.deps; snprintf(pa->ai_qs.owner, sizeof(pa->ai_qs.owner), "%s", pa_259->ai_qs.owner); snprintf(pa->ai_qs.parent_id, sizeof(pa->ai_qs.parent_id), "%s", pa_259->ai_qs.parent_id); snprintf(pa->ai_qs.fileprefix, sizeof(pa->ai_qs.fileprefix), "%s", pa_259->ai_qs.fileprefix); snprintf(pa->ai_qs.submit_host, sizeof(pa->ai_qs.submit_host), "%s", pa_259->ai_qs.submit_host); free(pa_259); array_save(pa); return(PBSE_NONE); } /* END read_and_convert_259_array() */
job *job_recov( char *filename) /* I */ /* pathname to job save file */ { int fds; job *pj; char *pn; char namebuf[MAXPATHLEN]; char log_buf[LOCAL_LOG_BUF_SIZE]; #ifndef PBS_MOM char parent_id[PBS_MAXSVRJOBID + 1]; job_array *pa; #endif pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == NULL) { /* FAILURE - cannot alloc memory */ return(NULL); } snprintf(namebuf, MAXPATHLEN, "%s%s", path_jobs, filename); /* job directory path, filename */ fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unable to open %s", namebuf); log_err(errno, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "1", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); /* FAILURE - cannot open job file */ return(NULL); } /* read in job quick save sub-structure */ if (read_ac_socket(fds, (char *)&pj->ji_qs, sizeof(pj->ji_qs)) != sizeof(pj->ji_qs) && pj->ji_qs.qs_version == PBS_QS_VERSION) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Unable to read %s", namebuf); log_err(errno, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "2", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); close(fds); return(NULL); } /* is ji_qs the version we expect? */ if (pj->ji_qs.qs_version != PBS_QS_VERSION) { /* ji_qs is older version */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "%s appears to be from an old version. Attempting to convert.\n", namebuf); log_err(-1, __func__, log_buf); if (job_qs_upgrade(pj, fds, namebuf, pj->ji_qs.qs_version) != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unable to upgrade %s\n", namebuf); log_err(-1, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "3", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); close(fds); return(NULL); } } /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */ /* Does file name match the internal name? */ /* This detects ghost files */ pn = strrchr(namebuf, (int)'/') + 1; if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) { /* mismatch, discard job */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, namebuf); log_err(-1, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "4", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); close(fds); return(NULL); } /* read in working attributes */ if (recov_attr( fds, pj, job_attr_def, pj->ji_wattr, JOB_ATR_LAST, JOB_ATR_UNKN, TRUE) != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unable to recover %s (file is likely corrupted)", namebuf); log_err(-1, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "5", LOGLEVEL); job_free(pj, FALSE); #else mom_job_free(pj); #endif close(fds); return(NULL); } #ifndef PBS_MOM /* Comment out the mother superior tracking. Will be debugged later if (pj->ji_wattr[JOB_ATR_exec_host].at_val.at_str != NULL) {*/ /* add job to the mother superior list for it's node */ /* char *ms = strdup(pj->ji_wattr[JOB_ATR_exec_host].at_val.at_str); char *end = strchr(ms, '/'); if (end != NULL) *end = '\0'; if ((end = strchr(ms, '+')) != NULL) *end = '\0'; add_to_ms_list(ms, pj); free(ms); }*/ #endif #ifdef PBS_MOM /* read in tm sockets and ips */ if (recov_tmsock(fds, pj) != 0) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, __func__, log_buf); } #else /* not PBS_MOM */ if (strchr(pj->ji_qs.ji_jobid, '[') != NULL) { /* job is part of an array. We need to put a link back to the server job array struct for this array. We also have to link this job into the linked list of jobs belonging to the array. */ array_get_parent_id(pj->ji_qs.ji_jobid, parent_id); pa = get_array(parent_id); if (pa == NULL) { job_abt(&pj, (char *)"Array job missing array struct, aborting job"); close(fds); return NULL; } strcpy(pj->ji_arraystructid, parent_id); if (strcmp(parent_id, pj->ji_qs.ji_jobid) == 0) { pj->ji_is_array_template = TRUE; } else { pa->job_ids[(int)pj->ji_wattr[JOB_ATR_job_array_id].at_val.at_long] = strdup(pj->ji_qs.ji_jobid); pa->jobs_recovered++; /* This is a bit of a kluge, but for some reason if an array job was on hold when the server went down the ji_wattr[JOB_ATR_hold].at_val.at_long value is 0 on recovery even though pj->ji_qs.ji_state is JOB_STATE_HELD and the substate is JOB_SUBSTATE_HELD */ if ((pj->ji_qs.ji_state == JOB_STATE_HELD) && (pj->ji_qs.ji_substate == JOB_SUBSTATE_HELD)) { pj->ji_wattr[JOB_ATR_hold].at_val.at_long = HOLD_l; pj->ji_wattr[JOB_ATR_hold].at_flags = ATR_VFLAG_SET; } } if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } } #endif close(fds); pj->ji_commit_done = 1; /* all done recovering the job */ job_save(pj, SAVEJOB_FULL, 0); return(pj); } /* END job_recov() */
int main( int argc, char *argv[]) { int amt; int err = 0; int f; int fp; int no_attributes = 0; job xjob; extern int optind; while ((f = getopt(argc, argv, "a")) != EOF) { switch (f) { case 'a': no_attributes = 1; break; default: err = 1; break; } } if (err || (argc - optind < 1)) { fprintf(stderr, "usage: %s [-a] file[ file]...}\n", argv[0]); return(1); } for (f = optind;f < argc;++f) { fp = open(argv[f], O_RDONLY, 0); if (fp < 0) { perror("open failed"); fprintf(stderr, "unable to open file %s\n", argv[f]); exit(1); } amt = read_ac_socket(fp, &xjob.ji_qs, sizeof(xjob.ji_qs)); if (amt != sizeof(xjob.ji_qs)) { fprintf(stderr, "Short read of %d bytes, file %s\n", amt, argv[f]); } if (xjob.ji_qs.qs_version != PBS_QS_VERSION) { printf("%s contains an old version of the ji_qs structure.\n" " expecting version %#010x, read %#010x\n" " Skipping prt_job_struct()\n" " pbs_server may be able to upgrade job automatically\n", argv[f], PBS_QS_VERSION, xjob.ji_qs.qs_version); close(fp); continue; } /* print out job structure */ prt_job_struct(&xjob); /* now do attributes, one at a time */ if (no_attributes == 0) { printf("--attributes--\n"); while (read_attr(fp)) /* NO-OP, reading */; } if (xjob.ji_qs.ji_un_type == JOB_UNION_TYPE_MOM) { printf("--TM info--\n"); read_tm_info(fp); } close(fp); printf("\n"); } /* END for (f) */ return(0); } /* END main() */
int svr_recov( char *svrfile, /* I */ int read_only) /* I */ { int i; int sdb; char log_buf[LOCAL_LOG_BUF_SIZE]; void recov_acl(pbs_attribute *, attribute_def *, const char *, const char *); sdb = open(svrfile, O_RDONLY, 0); if (sdb < 0) { if (errno == ENOENT) { char tmpLine[LOG_BUF_SIZE]; snprintf(tmpLine, sizeof(tmpLine), "cannot locate server database '%s' - use 'pbs_server -t create' to create new database if database has not been initialized.", svrfile); log_err(errno, __func__, tmpLine); } else { log_err(errno, __func__, msg_svdbopen); } return(-1); } /* read in server structure */ lock_sv_qs_mutex(server.sv_qs_mutex, __func__); i = read_ac_socket(sdb, (char *) & server.sv_qs, sizeof(server_qs)); if (i != sizeof(server_qs)) { unlock_sv_qs_mutex(server.sv_qs_mutex, log_buf); if (i < 0) log_err(errno, __func__, "read of serverdb failed"); else log_err(errno, __func__, "short read of serverdb"); close(sdb); return(-1); } /* Save the sv_jobidnumber field in case it is set by the attributes. */ i = server.sv_qs.sv_jobidnumber; /* read in server attributes */ if (recov_attr( sdb, &server, svr_attr_def, server.sv_attr, SRV_ATR_LAST, 0, !read_only) != 0 ) { unlock_sv_qs_mutex(server.sv_qs_mutex, log_buf); log_err(errno, __func__, "error on recovering server attr"); close(sdb); return(-1); } /* Restore the current job number and make it visible in qmgr print server commnad. */ if (!read_only) { server.sv_qs.sv_jobidnumber = i; server.sv_attr[SRV_ATR_NextJobNumber].at_val.at_long = i; server.sv_attr[SRV_ATR_NextJobNumber].at_flags |= ATR_VFLAG_SET| ATR_VFLAG_MODIFY; } unlock_sv_qs_mutex(server.sv_qs_mutex, __func__); close(sdb); /* recover the server various acls from their own files */ for (i = 0;i < SRV_ATR_LAST;i++) { if (server.sv_attr[i].at_type == ATR_TYPE_ACL) { recov_acl( &server.sv_attr[i], &svr_attr_def[i], PBS_SVRACL, svr_attr_def[i].at_name); if ((!read_only) && (svr_attr_def[i].at_action != (int (*)(pbs_attribute*, void*, int))0)) { svr_attr_def[i].at_action( &server.sv_attr[i], &server, ATR_ACTION_RECOV); } } } /* END for (i) */ return(PBSE_NONE); } /* END svr_recov() */
int job_recov_binary( char *filename, /* I */ /* pathname to job save file */ job **pjob, /* M */ /* pointer to a pointer of job structure to fill info */ char *log_buf, /* O */ /* buffer to hold error message */ size_t buf_len) /* I */ /* len of the error buffer */ { int fds; job *pj = *pjob; char *pn; #ifdef PBS_MOM char fileid[MAXPATHLEN]; #endif fds = open(filename, O_RDONLY, 0); if (fds < 0) { snprintf(log_buf, buf_len, "unable to open %s", filename); return -1; } /* read in job quick save sub-structure */ if (read_ac_socket(fds, (char *)&pj->ji_qs, sizeof(pj->ji_qs)) != sizeof(pj->ji_qs) && pj->ji_qs.qs_version == PBS_QS_VERSION) { snprintf(log_buf, buf_len, "Unable to read %s", filename); close(fds); return -1; } /* is ji_qs the version we expect? */ if (pj->ji_qs.qs_version != PBS_QS_VERSION) { /* ji_qs is older version */ snprintf(log_buf, buf_len, "%s appears to be from an old version. Attempting to convert.\n", filename); log_err(-1, __func__, log_buf); if (job_qs_upgrade(pj, fds, filename, pj->ji_qs.qs_version) != 0) { snprintf(log_buf, buf_len, "unable to upgrade %s\n", filename); close(fds); return -1; } } /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */ /* Does file name match the internal name? */ /* This detects ghost files */ pn = strrchr(filename, (int)'/') + 1; #ifndef PBS_MOM if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) #else if(multi_mom != 0) { sprintf(fileid,"%s%d",pj->ji_qs.ji_fileprefix,pbs_rm_port); } else { strcpy(fileid,pj->ji_qs.ji_fileprefix); } if (strncmp(pn, fileid, strlen(fileid)) != 0) #endif { /* mismatch, discard job */ snprintf(log_buf, buf_len, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, filename); close(fds); return -1; } /* read in working attributes */ if (recov_attr( fds, pj, job_attr_def, pj->ji_wattr, JOB_ATR_LAST, JOB_ATR_UNKN, TRUE) != 0) { snprintf(log_buf, buf_len, "unable to recover %s (file is likely corrupted)", filename); close(fds); return -1; } #ifdef PBS_MOM /* read in tm sockets and ips */ if (recov_tmsock(fds, pj) != 0) { snprintf(log_buf, buf_len, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)", filename); log_err(-1, __func__, log_buf); } #endif /* PBS_MOM */ close(fds); return PBSE_NONE; } /* END job_recov_binary() */
int svr_recov_xml( char *svrfile, /* I */ int read_only) /* I */ { int sdb; int bytes_read; int errorCount = 0; int rc; char buffer[MAXLINE<<10]; char *parent; char *child; char *current; char *begin; char *end; char log_buf[LOCAL_LOG_BUF_SIZE]; sdb = open(svrfile, O_RDONLY, 0); if (sdb < 0) { if (errno == ENOENT) { snprintf(log_buf,sizeof(log_buf), "cannot locate server database '%s' - use 'pbs_server -t create' to create new database if database has not been initialized.", svrfile); log_err(errno, __func__, log_buf); } else { log_err(errno, __func__, msg_svdbopen); } return(-1); } bytes_read = read_ac_socket(sdb,buffer,sizeof(buffer)); if (bytes_read < 0) { snprintf(log_buf,sizeof(log_buf), "Unable to read from serverdb file - %s", strerror(errno)); log_err(errno, __func__, log_buf); close(sdb); return(-1); } /* start reading the serverdb file */ current = begin = buffer; /* advance past the server tag */ current = strstr(current,"<server_db>"); if (current == NULL) { /* no server tag - check if this is the old format */ log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, "Cannot find a server tag, attempting to load legacy format\n"); close(sdb); rc = svr_recov(svrfile,read_only); return(rc); } end = strstr(current,"</server_db>"); if (end == NULL) { /* no server tag???? */ log_err(-1, __func__, "No server tag found in the database file???"); close(sdb); return(-1); } /* adjust to not process server tag */ current += strlen("<server_db>"); /* adjust end for the newline character preceeding the close server tag */ end--; lock_sv_qs_mutex(server.sv_qs_mutex, __func__); server.sv_qs.sv_numjobs = 0; server.sv_qs.sv_numque = server.sv_qs.sv_jobidnumber = 0; while (current < end) { if (get_parent_and_child(current,&parent,&child,¤t)) { /* ERROR */ errorCount++; break; } if (!strcmp("numjobs",parent)) { server.sv_qs.sv_numjobs = atoi(child); } else if (!strcmp("numque",parent)) { server.sv_qs.sv_numque = atoi(child); } else if (!strcmp("nextjobid",parent)) { server.sv_qs.sv_jobidnumber = atoi(child); } else if (!strcmp("savetime",parent)) { server.sv_qs.sv_savetm = atol(child); } else if (!strcmp("attributes",parent)) { char *attr_ptr = child; char *child_parent; char *child_attr; while (*attr_ptr != '\0') { if (get_parent_and_child(attr_ptr,&child_parent,&child_attr, &attr_ptr)) { /* ERROR */ errorCount++; break; } if ((rc = str_to_attr(child_parent,child_attr,server.sv_attr,svr_attr_def))) { /* ERROR */ errorCount++; snprintf(log_buf,sizeof(log_buf), "Error creating attribute %s", child_parent); log_err(rc, __func__, log_buf); break; } } if (recovered_tcp_timeout < 300) disable_timeout_check = TRUE; } else { /* shouldn't get here */ } } close(sdb); if (errorCount) return -1; if (!read_only) { server.sv_attr[SRV_ATR_NextJobNumber].at_val.at_long = server.sv_qs.sv_jobidnumber; server.sv_attr[SRV_ATR_NextJobNumber].at_flags |= ATR_VFLAG_SET| ATR_VFLAG_MODIFY; } unlock_sv_qs_mutex(server.sv_qs_mutex, __func__); return(PBSE_NONE); } /* END svr_recov_xml() */
int PBSD_munge_authenticate( int psock, /* I */ int handle) /* I */ { int rc = PBSE_NONE; int fd; FILE *munge_pipe; char munge_buf[MUNGE_SIZE]; char munge_command[MUNGE_SIZE]; char *ptr; /* pointer to the current place to copy data into munge_buf */ int bytes_read; int total_bytes_read = 0; int local_errno = 0; /* user id and name stuff */ struct passwd *pwent; uid_t myrealuid; struct batch_reply *reply; unsigned short user_port = 0; struct sockaddr_in sockname; socklen_t socknamelen = sizeof(sockname); struct tcp_chan *chan = NULL; snprintf(munge_command,sizeof(munge_command), "munge -n 2>/dev/null"); memset(munge_buf, 0, MUNGE_SIZE); ptr = munge_buf; if ((munge_pipe = popen(munge_command,"r")) == NULL) { /* FAILURE */ return(-1); } fd = fileno(munge_pipe); while ((bytes_read = read_ac_socket(fd, ptr, MUNGE_SIZE - total_bytes_read)) > 0) { total_bytes_read += bytes_read; ptr += bytes_read; } pclose(munge_pipe); if (bytes_read == -1) { /* read failed */ local_errno = errno; log_err(local_errno, __func__, "error reading pipe in PBSD_munge_authenticate"); return -1; } /* if we got no bytes back then Munge may not be installed etc. */ if (total_bytes_read == 0) { return(PBSE_MUNGE_NOT_FOUND); } /* We got the certificate. Now make the PBS_BATCH_AltAuthenUser request */ myrealuid = getuid(); pwent = getpwuid(myrealuid); rc = getsockname(psock, (struct sockaddr *)&sockname, &socknamelen); if (rc == -1) { fprintf(stderr, "getsockname failed: %d\n", errno); return rc; } user_port = ntohs(sockname.sin_port); if ((chan = DIS_tcp_setup(psock)) == NULL) { rc = PBSE_MEM_MALLOC; } else if ((rc = encode_DIS_ReqHdr(chan,PBS_BATCH_AltAuthenUser,pwent->pw_name)) || (rc = diswui(chan, user_port)) || (rc = diswst(chan, munge_buf)) || (rc = encode_DIS_ReqExtend(chan, NULL)) || (rc = DIS_tcp_wflush(chan))) { /* ERROR */ } else { /* read the reply */ if ((reply = PBSD_rdrpy(&local_errno, handle)) != NULL) free(reply); rc = PBSE_NONE; } if (chan != NULL) DIS_tcp_cleanup(chan); return rc; } /* END PBSD_munge_authenticate() */
void getstdout(void) { struct timeval tv = { 0, 10000 }; fd_set rfsd; int newfd, i; char buf[1024]; ssize_t bytes; int ret; static int maxfd = -1; int flags; if (maxfd == -1) { if (stdoutfd > *tm_conn) maxfd = stdoutfd; else maxfd = *tm_conn; } rfsd = permrfsd; if (maxfd < (int)FD_SETSIZE) FD_SET(stdoutfd, &rfsd); FD_SET(*tm_conn, &permrfsd); if ((ret = select(maxfd + 1, &rfsd, NULL, NULL, &tv)) > 0) { if (FD_ISSET(*tm_conn, &rfsd)) { return; } if (FD_ISSET(stdoutfd, &rfsd)) { newfd = accept(stdoutfd, NULL, NULL); if (newfd > maxfd) maxfd = newfd; flags = fcntl(newfd, F_GETFL); #if defined(FNDELAY) && !defined(__hpux) flags |= FNDELAY; #else flags |= O_NONBLOCK; #endif fcntl(newfd, F_SETFL, flags); FD_SET(newfd, &permrfsd); FD_CLR(stdoutfd, &rfsd); ret--; } if (ret) { for (i = 0; i <= maxfd; i++) { if (FD_ISSET(i, &rfsd)) { if ((bytes = read_ac_socket(i, &buf, 1023)) > 0) { buf[bytes] = '\0'; fprintf(stdout, "%s", buf); } else if (bytes == 0) { FD_CLR(i, &permrfsd); close(i); if (i == maxfd) { int j; maxfd = stdoutfd; for (j = 0; j < i; j++) if (FD_ISSET(j, &permrfsd)) if (j > maxfd) maxfd = j; } } else { fprintf(stderr, "%s: error in read\n", id); } ret--; if (ret <= 0) break; } } } } }
pbs_queue *que_recov( char *filename) /* pathname to queue save file */ { int fds; int i; pbs_queue *pq; char namebuf[MAXPATHLEN]; time_t time_now = time(NULL); pq = que_alloc(filename, TRUE); /* allocate & init queue structure space */ if (pq == NULL) { log_err(-1, __func__, "que_alloc failed"); return(NULL); } snprintf(namebuf, sizeof(namebuf), "%s%s", path_queues, filename); fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { log_err(errno, __func__, "open error"); que_free(pq, TRUE); return(NULL); } /* read in queue save sub-structure */ if (read_ac_socket(fds, (char *)&pq->qu_qs, sizeof(queuefix)) != sizeof(queuefix)) { log_err(errno, __func__, "read error"); que_free(pq, TRUE); close(fds); return ((pbs_queue *)0); } /* read in queue attributes */ if (recov_attr(fds, pq, que_attr_def, pq->qu_attr, QA_ATR_LAST, 0, TRUE) != 0) { log_err(-1, __func__, "recov_attr[common] failed"); que_free(pq, TRUE); close(fds); return ((pbs_queue *)0); } /* * now reload the access control lists, these attributes were * saved separately */ for (i = 0;i < QA_ATR_LAST;i++) { if (pq->qu_attr[i].at_type == ATR_TYPE_ACL) { recov_acl( &pq->qu_attr[i], &que_attr_def[i], que_attr_def[i].at_name, pq->qu_qs.qu_name); } } /* all done recovering the queue */ close(fds); if ((pq->qu_attr[QA_ATR_MTime].at_flags & ATR_VFLAG_SET) == 0) { /* if we are recovering a pre-2.1.2 queue, save a new mtime */ pq->qu_attr[QA_ATR_MTime].at_val.at_long = time_now; pq->qu_attr[QA_ATR_MTime].at_flags = ATR_VFLAG_SET; que_save(pq); } return(pq); }
int pipe_and_read_unmunge( char *mungeFileName, /* I */ struct batch_request *preq, /* I */ int sock) /* I */ { char munge_buf[MUNGE_SIZE << 4]; char log_buf[LOCAL_LOG_BUF_SIZE]; FILE *munge_pipe; char *ptr; /* pointer to the current place to copy data into munge_buf */ char munge_command[MAXPATHLEN<<1]; int bytes_read; int total_bytes_read = 0; int fd; int rc; snprintf(munge_command,sizeof(munge_command), "unmunge --input=%s", mungeFileName); if ((munge_pipe = popen(munge_command,"r")) == NULL) { /* FAILURE */ snprintf(log_buf,sizeof(log_buf), "Unable to popen command '%s' for reading", munge_command); log_err(errno, __func__, log_buf); unlink(mungeFileName); req_reject(PBSE_SYSTEM, 0, preq, NULL, "couldn't create pipe to unmunge"); return(-1); } memset(munge_buf, 0, MUNGE_SIZE); ptr = munge_buf; fd = fileno(munge_pipe); while ((bytes_read = read_ac_socket(fd, ptr, MUNGE_SIZE)) > 0) { total_bytes_read += bytes_read; ptr += bytes_read; } pclose(munge_pipe); if (bytes_read == -1) { /* read failed */ req_reject(PBSE_SYSTEM, 0, preq, NULL, "error reading unmunge data"); rc = -1; } else if (total_bytes_read == 0) { /* * unmunge failed. Probably a bad credential. But we do not know for sure. * Bad credential gives us ECHILD error which gets added to log message * and confuses users, so reset it to zero show it does not show up in log */ if (errno == ECHILD) errno = 0; req_reject(PBSE_SYSTEM, 0, preq, NULL, "could not unmunge credentials"); rc = -1; } else if ((rc = get_encode_host(sock, munge_buf, preq)) == PBSE_NONE) { rc = get_UID(sock, munge_buf, preq); } return(rc); } /* END pipe_and_read_unmunge() */