void req_orderjob( struct batch_request *req) /* I */ { #ifndef NDEBUG char *id = "req_orderjob"; #endif job *pjob; job *pjob1; job *pjob2; int rank; int rc; char tmpqn[PBS_MAXQUEUENAME+1]; if ((pjob1 = chk_job_request(req->rq_ind.rq_move.rq_jid, req)) == NULL) { return; } if ((pjob2 = chk_job_request(req->rq_ind.rq_move.rq_destin, req)) == NULL) { return; } if (((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_RUNNING) || ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_RUNNING)) { #ifndef NDEBUG sprintf(log_buffer, "%s %d", pbse_to_txt(PBSE_BADSTATE), pjob->ji_qs.ji_state); strcat(log_buffer, id); log_event( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); #endif /* NDEBUG */ req_reject(PBSE_BADSTATE, 0, req, NULL, NULL); return; } else if (pjob1->ji_qhdr != pjob2->ji_qhdr) { /* jobs are in different queues */ if ((rc = svr_chkque( pjob1, pjob2->ji_qhdr, get_variable(pjob1, pbs_o_host), MOVE_TYPE_Order, NULL)) || (rc = svr_chkque( pjob2, pjob1->ji_qhdr, get_variable(pjob2, pbs_o_host), MOVE_TYPE_Order, NULL))) { req_reject(rc, 0, req, NULL, NULL); return; } } /* now swap the order of the two jobs in the queue lists */ rank = pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long; pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long; pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = rank; if (pjob1->ji_qhdr != pjob2->ji_qhdr) { (void)strcpy(tmpqn, pjob1->ji_qs.ji_queue); (void)strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue); (void)strcpy(pjob2->ji_qs.ji_queue, tmpqn); svr_dequejob(pjob1); svr_dequejob(pjob2); (void)svr_enquejob(pjob1); (void)svr_enquejob(pjob2); } else { swap_link(&pjob1->ji_jobque, &pjob2->ji_jobque); swap_link(&pjob1->ji_alljobs, &pjob2->ji_alljobs); } /* need to update disk copy of both jobs to save new order */ job_save(pjob1, SAVEJOB_FULL); job_save(pjob2, SAVEJOB_FULL); reply_ack(req); /* SUCCESS */ return; } /* END req_orderjob() */
void stat_update( struct batch_request *preq, struct stat_cntl *cntl) { job *pjob; struct batch_reply *preply; struct brp_status *pstatus; svrattrl *sattrl; int oldsid; int bad = 0; time_t time_now = time(NULL); char *msg_ptr = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; preply = &preq->rq_reply; if ((preply->brp_choice != BATCH_REPLY_CHOICE_Queue) && (preply->brp_un.brp_txt.brp_str != NULL)) { msg_ptr = strstr(preply->brp_un.brp_txt.brp_str, PBS_MSG_EQUAL); if (msg_ptr != NULL) msg_ptr += strlen(PBS_MSG_EQUAL); } if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) { pstatus = (struct brp_status *)GET_NEXT(preply->brp_un.brp_status); while (pstatus != NULL) { if ((pjob = svr_find_job(pstatus->brp_objname, FALSE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); sattrl = (svrattrl *)GET_NEXT(pstatus->brp_attr); oldsid = pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long; modify_job_attr( pjob, sattrl, ATR_DFLAG_MGWR | ATR_DFLAG_SvWR, &bad); if (oldsid != pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long) { /* first save since running job (or the sid has changed), */ /* must save session id */ job_save(pjob, SAVEJOB_FULL, 0); } #ifdef USESAVEDRESOURCES else { /* save so we can recover resources used */ job_save(pjob, SAVEJOB_FULL, 0); } #endif /* USESAVEDRESOURCES */ pjob->ji_momstat = time_now; } pstatus = (struct brp_status *)GET_NEXT(pstatus->brp_stlink); } /* END while (pstatus != NULL) */ } /* END if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) */ else if ((preply->brp_choice == BATCH_REPLY_CHOICE_Text) && (preply->brp_code == PBSE_UNKJOBID) && (msg_ptr != NULL) && (!strcmp(msg_ptr, preq->rq_ind.rq_status.rq_id))) { /* we sent a stat request, but mom says it doesn't know anything about the job */ if ((pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE)) != NULL) { /* job really isn't running any more - mom doesn't know anything about it this can happen if a diskless node reboots and the mom_priv/jobs directory is cleared, set its state to queued so job_abt doesn't think it is still running */ mutex_mgr job_mutex(pjob->ji_mutex, true); snprintf(log_buf, sizeof(log_buf), "mother superior no longer recognizes %s as a valid job, aborting. Last reported time was %ld", preq->rq_ind.rq_status.rq_id, pjob->ji_last_reported_time); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE); rel_resc(pjob); job_mutex.set_unlock_on_exit(false); job_abt(&pjob, "Job does not exist on node"); /* TODO, if the job is rerunnable we should set its state back to queued */ } } else { if (preply->brp_choice == BATCH_REPLY_CHOICE_Queue) { snprintf(log_buf, sizeof(log_buf), "Unexpected reply: reply was on queue"); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } else { snprintf(log_buf, sizeof(log_buf), "Poll job request failed for job %s", preq->rq_ind.rq_status.rq_id); log_err(preply->brp_code, __func__, log_buf); } } cntl->sc_conn = -1; if (cntl->sc_post) cntl->sc_post(cntl); /* continue where we left off */ return; } /* END stat_update() */
/* start_domainsocket_listener * Starts a listen thread on a UNIX domain socket connection */ int start_domainsocket_listener( const char *socket_name, void *(*process_meth)(void *)) { struct sockaddr_un addr; int rc = PBSE_NONE; char log_buf[LOCAL_LOG_BUF_SIZE]; int *new_conn_port = NULL; int listen_socket = 0; int total_cntr = 0; pthread_t tid; pthread_attr_t t_attr; int objclass = 0; char authd_host_port[1024]; memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1); /* socket_name is a file in the file system. It must be gone before we can bind to it. Unlink it */ unlink(socket_name); if ( (listen_socket = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { snprintf(log_buf, sizeof(log_buf), "socket failed: %d", errno); log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf); rc = PBSE_SOCKET_FAULT; } else if ( bind(listen_socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) { snprintf(log_buf, sizeof(log_buf), "failed to bind socket %s: %d", socket_name, errno); log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf); rc = PBSE_SOCKET_FAULT; } else if (chmod(socket_name, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) < 0) { snprintf(log_buf, sizeof(log_buf), "failed to change file permissions on %s: %d", socket_name, errno); log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf); rc = PBSE_SOCKET_FAULT; } else if ( listen(listen_socket, 64) < 0) { snprintf(log_buf, sizeof(log_buf), "listen failed %s: %d", socket_name, errno); log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf); rc = PBSE_SOCKET_LISTEN; } else if ((rc = pthread_attr_init(&t_attr)) != 0) { /* Can not init thread attribute structure */ rc = PBSE_THREADATTR; } else if ((rc = pthread_attr_setdetachstate(&t_attr, PTHREAD_CREATE_DETACHED)) != 0) { /* Can not set thread initial state as detached */ pthread_attr_destroy(&t_attr); } else { log_get_set_eventclass(&objclass, GETV); if (objclass == PBS_EVENTCLASS_TRQAUTHD) { log_get_host_port(authd_host_port, sizeof(authd_host_port)); if (authd_host_port[0]) snprintf(log_buf, sizeof(log_buf), "TORQUE authd daemon started and listening on %s (unix socket %s)", authd_host_port, socket_name); else snprintf(log_buf, sizeof(log_buf), "TORQUE authd daemon started and listening unix socket %s", socket_name); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_TRQAUTHD, msg_daemonname, log_buf); } while (1) { if((new_conn_port = (int *)calloc(1, sizeof(int))) == NULL) { printf("Error allocating new connection handle on accept.\n"); break; } if ((*new_conn_port = accept(listen_socket, NULL, NULL)) == -1) { if (errno == EMFILE) { sleep(1); printf("Temporary pause\n"); } else { printf("error in accept %s\n", strerror(errno)); break; } errno = 0; free(new_conn_port); new_conn_port = NULL; } else { if (debug_mode == TRUE) { process_meth((void *)new_conn_port); } else { pthread_create(&tid, &t_attr, process_meth, (void *)new_conn_port); } } if (debug_mode == TRUE) { if (total_cntr % 1000 == 0) { printf("Total requests: %d\n", total_cntr); } total_cntr++; } } if (new_conn_port != NULL) { free(new_conn_port); } pthread_attr_destroy(&t_attr); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, "net_srvr", "Socket close of network listener requested"); } if (listen_socket != -1) close(listen_socket); return(rc); } /* END start_listener() */
int req_stat_job( struct batch_request *preq) /* ptr to the decoded request */ { struct stat_cntl cntl; /* see svrfunc.h */ char *name; job *pjob = NULL; pbs_queue *pque = NULL; int rc = PBSE_NONE; char log_buf[LOCAL_LOG_BUF_SIZE]; bool condensed = false; enum TJobStatTypeEnum type = tjstNONE; /* * first, validate the name of the requested object, either * a job, a queue, or the whole server. */ if (LOGLEVEL >= 7) { sprintf(log_buf, "note"); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } /* FORMAT: name = { <JOBID> | <QUEUEID> | '' } */ name = preq->rq_ind.rq_status.rq_id; if (preq->rq_extend != NULL) { /* evaluate pbs_job_stat() 'extension' field */ if (!strncasecmp(preq->rq_extend, "truncated", strlen("truncated"))) { /* truncate response by 'max_report' */ type = tjstTruncatedServer; } else if (!strncasecmp(preq->rq_extend, "summarize_arrays", strlen("summarize_arrays"))) { type = tjstSummarizeArraysServer; } if (preq->rq_extend[strlen(preq->rq_extend) - 1] == 'C') { condensed = true; } } /* END if (preq->rq_extend != NULL) */ if (isdigit((int)*name)) { /* status a single job */ if (is_array(name)) { if (type != tjstSummarizeArraysServer) { type = tjstArray; } } else { type = tjstJob; if ((pjob = svr_find_job(name, FALSE)) == NULL) { rc = PBSE_UNKJOBID; } else unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } } else if (isalpha(name[0])) { if (type == tjstNONE) type = tjstQueue; else if (type == tjstSummarizeArraysServer) type = tjstSummarizeArraysQueue; else type = tjstTruncatedQueue; /* if found, this mutex is released later */ if ((pque = find_queuebyname(name)) == NULL) { rc = PBSE_UNKQUE; } } else if ((*name == '\0') || (*name == '@')) { /* status all jobs at server */ if (type == tjstNONE) type = tjstServer; } else { rc = PBSE_IVALREQ; } if (rc != 0) { /* is invalid - an error */ req_reject(rc, 0, preq, NULL, NULL); return(rc); } set_reply_type(&preq->rq_reply, BATCH_REPLY_CHOICE_Status); CLEAR_HEAD(preq->rq_reply.brp_un.brp_status); if ((type == tjstTruncatedQueue) || (type == tjstTruncatedServer)) { if (pque != NULL) { unlock_queue(pque, __func__, "", LOGLEVEL); pque = NULL; } } memset(&cntl, 0, sizeof(cntl)); cntl.sc_type = (int)type; cntl.sc_conn = -1; cntl.sc_pque = pque; cntl.sc_origrq = preq; cntl.sc_post = req_stat_job_step2; cntl.sc_jobid[0] = '\0'; /* cause "start from beginning" */ cntl.sc_condensed = condensed; req_stat_job_step2(&cntl); /* go to step 2, see if running is current */ if (pque != NULL) unlock_queue(pque, "req_stat_job", (char *)"success", LOGLEVEL); return(PBSE_NONE); } /* END req_stat_job() */
void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (free'd on return) */ { batch_request *preq = cntl->sc_origrq; svrattrl *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); job *pjob = NULL; struct batch_reply *preply = &preq->rq_reply; int rc = 0; enum TJobStatTypeEnum type = (enum TJobStatTypeEnum)cntl->sc_type; bool exec_only = false; int bad = 0; /* delta time - only report full pbs_attribute list if J->MTime > DTime */ int job_array_index = -1; job_array *pa = NULL; all_jobs_iterator *iter; if (preq->rq_extend != NULL) { /* FORMAT: { EXECQONLY } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = true; } if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { handle_truncated_qstat(exec_only, cntl->sc_condensed, preq); return; } /* END if ((type == tjstTruncatedServer) || ...) */ else if (type == tjstJob) { pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); if (pjob != NULL) { if ((rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad))) req_reject(rc, bad, preq, NULL, NULL); else reply_send_svr(preq); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } else { req_reject(PBSE_JOBNOTFOUND, bad, preq, NULL, NULL); } } else { if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); if (pa == NULL) { req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array"); return; } } else if ((type == tjstSummarizeArraysQueue) || (type == tjstSummarizeArraysServer)) update_array_statuses(); iter = get_correct_status_iterator(cntl); for (pjob = get_next_status_job(cntl, job_array_index, pa, iter); pjob != NULL; pjob = get_next_status_job(cntl, job_array_index, pa, iter)) { mutex_mgr job_mutex(pjob->ji_mutex, true); /* go ahead and build the status reply for this job */ if (pjob->ji_being_recycled == true) continue; if (exec_only) { if (cntl->sc_pque != NULL) { if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution) continue; } else if (in_execution_queue(pjob, pa) == false) continue; } rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad); if ((rc != PBSE_NONE) && (rc != PBSE_PERM)) { if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); req_reject(rc, bad, preq, NULL, NULL); delete iter; return; } } /* END for (pjob != NULL) */ delete iter; if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } reply_send_svr(preq); } if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */
/* read_config() -- Open and parse config file and extract values of variables */ bool_t read_config() { char buf[(BUF_SZ + 1)], *p, *q, *r; FILE *fp; if(config_file == (char *)NULL) { config_file = strdup(CONFIGURATION_FILE); if(config_file == (char *)NULL) { die("parse_config() -- strdup() failed"); } } if((fp = fopen(config_file, "r")) == NULL) { return(False); } while(fgets(buf, sizeof(buf), fp)) { char *begin=buf; char *rightside; /* Make comments invisible */ if((p = strchr(buf, '#'))) { *p = (char)NULL; } /* Ignore malformed lines and comments */ if(strchr(buf, '=') == (char *)NULL) continue; /* Parse out keywords */ p=firsttok(&begin, "= \t\n"); if(p){ rightside=begin; q = firsttok(&begin, "= \t\n"); } if(p && q) { if(strcasecmp(p, "Root") == 0) { if((root = strdup(q)) == (char *)NULL) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set Root=\"%s\"\n", root); } } else if(strcasecmp(p, "MailHub") == 0) { if((r = strchr(q, ':')) != NULL) { *r++ = '\0'; port = atoi(r); } if((mailhost = strdup(q)) == (char *)NULL) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set MailHub=\"%s\"\n", mailhost); log_event(LOG_INFO, "Set RemotePort=\"%d\"\n", port); } } else if(strcasecmp(p, "HostName") == 0) { free(hostname); hostname = strdup(q); if (!hostname) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set HostName=\"%s\"\n", hostname); } } else if(strcasecmp(p,"AddHeader") == 0) { if((r = firsttok(&rightside, "\n#")) != NULL) { header_save(r); free(r); } else { die("cannot AddHeader"); } if(log_level > 0 ) { log_event(LOG_INFO, "Set AddHeader=\"%s\"\n", q); } } #ifdef REWRITE_DOMAIN else if(strcasecmp(p, "RewriteDomain") == 0) { if((p = strrchr(q, '@'))) { mail_domain = strdup(++p); log_event(LOG_ERR, "Set RewriteDomain=\"%s\" is invalid\n", q); log_event(LOG_ERR, "Set RewriteDomain=\"%s\" used\n", mail_domain); } else { mail_domain = strdup(q); } if(mail_domain == (char *)NULL) { die("parse_config() -- strdup() failed"); } rewrite_domain = True; if(log_level > 0) { log_event(LOG_INFO, "Set RewriteDomain=\"%s\"\n", mail_domain); } } #endif else if(strcasecmp(p, "FromLineOverride") == 0) { if(strcasecmp(q, "YES") == 0) { override_from = True; } else { override_from = False; } if(log_level > 0) { log_event(LOG_INFO, "Set FromLineOverride=\"%s\"\n", override_from ? "True" : "False"); } } else if(strcasecmp(p, "RemotePort") == 0) { port = atoi(q); if(log_level > 0) { log_event(LOG_INFO, "Set RemotePort=\"%d\"\n", port); } } #ifdef HAVE_SSL else if(strcasecmp(p, "UseTLS") == 0) { if(strcasecmp(q, "YES") == 0) { use_tls = True; } else { use_tls = False; use_starttls = False; } if(log_level > 0) { log_event(LOG_INFO, "Set UseTLS=\"%s\"\n", use_tls ? "True" : "False"); } } else if(strcasecmp(p, "UseSTARTTLS") == 0) { if(strcasecmp(q, "YES") == 0) { use_starttls = True; use_tls = True; } else { use_starttls = False; } if(log_level > 0) { log_event(LOG_INFO, "Set UseSTARTTLS=\"%s\"\n", use_tls ? "True" : "False"); } } else if(strcasecmp(p, "UseTLSCert") == 0) { if(strcasecmp(q, "YES") == 0) { use_cert = True; } else { use_cert = False; } if(log_level > 0) { log_event(LOG_INFO, "Set UseTLSCert=\"%s\"\n", use_cert ? "True" : "False"); } } else if(strcasecmp(p, "TLSCert") == 0) { if((tls_cert = strdup(q)) == (char *)NULL) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set TLSCert=\"%s\"\n", tls_cert); } } #endif /* Command-line overrides these */ else if(strcasecmp(p, "AuthUser") == 0 && !auth_user) { if((auth_user = strdup(q)) == (char *)NULL) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set AuthUser=\"%s\"\n", auth_user); } } else if(strcasecmp(p, "AuthPass") == 0 && !auth_pass) { if((auth_pass = strdup(q)) == (char *)NULL) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set AuthPass=\"%s\"\n", auth_pass); } } else if(strcasecmp(p, "AuthMethod") == 0 && !auth_method) { if((auth_method = strdup(q)) == (char *)NULL) { die("parse_config() -- strdup() failed"); } if(log_level > 0) { log_event(LOG_INFO, "Set AuthMethod=\"%s\"\n", auth_method); } } else if(strcasecmp(p, "UseOldAUTH") == 0) { if(strcasecmp(q, "YES") == 0) { use_oldauth = True; } else { use_oldauth = False; } if(log_level > 0) { log_event(LOG_INFO, "Set UseOldAUTH=\"%s\"\n", use_oldauth ? "True" : "False"); } } else if (strcasecmp(p, "Debug") == 0) { if (strcasecmp(q, "YES") == 0) { log_level = 1; } else { log_level = 0; } } else { log_event(LOG_INFO, "Unable to set %s=\"%s\"\n", p, q); } free(p); free(q); } } (void)fclose(fp); return(True); }
int job_save_fs(job *pjob, int updatetype) { int fds; int i; char *filename; char namebuf1[MAXPATHLEN+1]; char namebuf2[MAXPATHLEN+1]; int openflags; int redo; int pmode; #ifdef WIN32 pmode = _S_IWRITE | _S_IREAD; #else pmode = 0600; #endif (void)strcpy(namebuf1, path_jobs); /* job directory path */ if (*pjob->ji_qs.ji_fileprefix != '\0') (void)strcat(namebuf1, pjob->ji_qs.ji_fileprefix); else (void)strcat(namebuf1, pjob->ji_qs.ji_jobid); (void)strcpy(namebuf2, namebuf1); /* setup for later */ (void)strcat(namebuf1, JOB_FILE_SUFFIX); /* if ji_modified is set, ie an attribute changed, then update mtime */ if (pjob->ji_modified) { pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long = time_now; pjob->ji_wattr[JOB_ATR_mtime].at_flags |= ATR_VFLAG_MODCACHE; } if (pjob->ji_qs.ji_jsversion != JSVERSION) { /* version of job structure changed, force full write */ pjob->ji_qs.ji_jsversion = JSVERSION; updatetype = SAVEJOB_FULLFORCE; } if (updatetype == SAVEJOB_QUICK) { openflags = O_WRONLY; fds = open(namebuf1, openflags, pmode); if (fds < 0) { log_err(errno, "job_save", "error on open"); return (-1); } #ifdef WIN32 secure_file(namebuf1, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); setmode(fds, O_BINARY); #endif /* just write the "critical" base structure to the file */ save_setup(fds); if ((save_struct((char *)&pjob->ji_qs, fixedsize) == 0) && (save_struct((char *)&pjob->ji_extended, extndsize) == 0) && (save_flush() == 0)) { (void)close(fds); } else { log_err(errno, "job_save", "error quickwrite"); (void)close(fds); return (-1); } } else { /* * write the whole structure to the file. * For a update, this is done to a new file to protect the * old against crashs. * The file is written in four parts: * (1) the job structure, * (2) the extended area, * (3) if a Array Job, the index tracking table * (4) the attributes in the "encoded "external form, and last * (5) the dependency list. */ (void)strcat(namebuf2, JOB_FILE_COPY); openflags = O_CREAT | O_WRONLY; #ifdef WIN32 fix_perms2(namebuf2, namebuf1); #endif if (updatetype == SAVEJOB_NEW) filename = namebuf1; else filename = namebuf2; fds = open(filename, openflags, pmode); if (fds < 0) { log_err(errno, "job_save", "error opening for full save"); return (-1); } #ifdef WIN32 secure_file(filename, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); setmode(fds, O_BINARY); #endif for (i=0; i<MAX_SAVE_TRIES; ++i) { redo = 0; /* try to save twice */ save_setup(fds); if (save_struct((char *)&pjob->ji_qs, fixedsize) != 0) { redo++; } else if (save_struct((char *)&pjob->ji_extended, extndsize) != 0) { redo++; #ifndef PBS_MOM } else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) && (save_struct((char *)pjob->ji_ajtrk, pjob->ji_ajtrk->tkm_size) != 0)) { redo++; #endif } else if (save_attr_fs(job_attr_def, pjob->ji_wattr, (int)JOB_ATR_LAST) != 0) { redo++; } else if (save_flush() != 0) { redo++; } if (redo != 0) { if (lseek(fds, (off_t)0, SEEK_SET) < 0) { log_err(errno, "job_save", "error lseek"); } } else break; } (void)close(fds); if (i >= MAX_SAVE_TRIES) { if ((updatetype == SAVEJOB_FULL) || (updatetype == SAVEJOB_FULLFORCE)) (void)unlink(namebuf2); return (-1); } if ((updatetype == SAVEJOB_FULL) || (updatetype == SAVEJOB_FULLFORCE)) { #ifdef WIN32 if (MoveFileEx(namebuf2, namebuf1, MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH) == 0) { errno = GetLastError(); sprintf(log_buffer, "MoveFileEx(%s,%s) failed!", namebuf2, namebuf1); log_err(errno, "job_save", log_buffer); } secure_file(namebuf1, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); #else if (rename(namebuf2, namebuf1) == -1) { log_event(PBSEVENT_ERROR|PBSEVENT_SECURITY, PBS_EVENTCLASS_JOB, LOG_ERR, pjob->ji_qs.ji_jobid, "rename in job_save failed"); } #endif } pjob->ji_modified = 0; } return (0); }
void req_deletejob( struct batch_request *preq) /* I */ { job *pjob; struct work_task *pwtold; struct work_task *pwtnew; struct work_task *pwtcheck; int rc; char *sigt = "SIGTERM"; char *Msg = NULL; /* check if we are getting a purgecomplete from scheduler */ if ((preq->rq_extend != NULL) && !strncmp(preq->rq_extend,PURGECOMP,strlen(PURGECOMP))) { /* * purge_completed_jobs will respond with either an ack or reject */ purge_completed_jobs(preq); return; } /* The way this is implemented, if the user enters the command "qdel -p <jobid>", * they can then delete jobs other than their own since the authorization * checks are made below in chk_job_request. This should probably be fixed. */ if (forced_jobpurge(preq) != 0) { return; } /* NOTE: should support rq_objname={<JOBID>|ALL|<name:<JOBNAME>} */ /* NYI */ pjob = chk_job_request(preq->rq_ind.rq_delete.rq_objname, preq); if (pjob == NULL) { /* NOTE: chk_job_request() will issue req_reject() */ return; } if (preq->rq_extend != NULL) { if (strncmp(preq->rq_extend, deldelaystr, strlen(deldelaystr)) && strncmp(preq->rq_extend, delasyncstr, strlen(delasyncstr)) && strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr))) { /* have text message in request extension, add it */ Msg = preq->rq_extend; /* * Message capability is only for operators and managers. * Check if request is authorized */ if ((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) == 0) { req_reject(PBSE_PERM, 0, preq, NULL, "must have operator or manager privilege to use -m parameter"); return; } } } if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) { /* * Find pid of router from existing work task entry, * then establish another work task on same child. * Next, signal the router and wait for its completion; */ pwtold = (struct work_task *)GET_NEXT(pjob->ji_svrtask); while (pwtold != NULL) { if ((pwtold->wt_type == WORK_Deferred_Child) || (pwtold->wt_type == WORK_Deferred_Cmp)) { pwtnew = set_task( pwtold->wt_type, pwtold->wt_event, post_delete_route, preq); if (pwtnew != NULL) { /* * reset type in case the SIGCHLD came * in during the set_task; it makes * sure that next_task() will find the * new entry. */ pwtnew->wt_type = pwtold->wt_type; pwtnew->wt_aux = pwtold->wt_aux; kill((pid_t)pwtold->wt_event, SIGTERM); pjob->ji_qs.ji_substate = JOB_SUBSTATE_ABORT; return; /* all done for now */ } else { req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL); return; } } pwtold = (struct work_task *)GET_NEXT(pwtold->wt_linkobj); } /* should never get here ... */ log_err(-1, "req_delete", "Did not find work task for router"); req_reject(PBSE_INTERNAL, 0, preq, NULL, NULL); return; } if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN1 || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN2 || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN3 ) { /* If JOB_SUBSTATE_PRERUN being sent to MOM, wait till she gets it going */ /* retry in one second */ /* If JOB_SUBSTATE_RERUN, RERUN1, RERUN2 or RERUN3 the job is being requeued. Wait until finished */ static time_t cycle_check_when = 0; static char cycle_check_jid[PBS_MAXSVRJOBID + 1]; if (cycle_check_when != 0) { if (!strcmp(pjob->ji_qs.ji_jobid, cycle_check_jid) && (time_now - cycle_check_when > 10)) { /* state not updated after 10 seconds */ /* did the mom ever get it? delete it anyways... */ cycle_check_jid[0] = '\0'; cycle_check_when = 0; goto jump; } if (time_now - cycle_check_when > 20) { /* give up after 20 seconds */ cycle_check_jid[0] = '\0'; cycle_check_when = 0; } } /* END if (cycle_check_when != 0) */ if (cycle_check_when == 0) { /* new PRERUN job located */ cycle_check_when = time_now; strcpy(cycle_check_jid, pjob->ji_qs.ji_jobid); } sprintf(log_buffer, "job cannot be deleted, state=PRERUN, requeuing delete request"); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); pwtnew = set_task( WORK_Timed, time_now + 1, post_delete_route, preq); if (pwtnew == 0) req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL); return; } /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */ jump: /* * Log delete and if requesting client is not job owner, send mail. */ sprintf(log_buffer, "requestor=%s@%s", preq->rq_user, preq->rq_host); /* NOTE: should annotate accounting record with extend message (NYI) */ account_record(PBS_ACCT_DEL, pjob, log_buffer); sprintf(log_buffer, msg_manager, msg_deletejob, preq->rq_user, preq->rq_host); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); /* NOTE: should incorporate job delete message */ if (Msg != NULL) { /* have text message in request extension, add it */ strcat(log_buffer, "\n"); strcat(log_buffer, Msg); } if ((svr_chk_owner(preq, pjob) != 0) && !has_job_delete_nanny(pjob)) { /* only send email if owner did not delete job and job deleted has not been previously attempted */ svr_mailowner(pjob, MAIL_DEL, MAIL_FORCE, log_buffer); /* * If we sent mail and already sent the extra message * then reset message so we don't trigger a redundant email * in job_abt() */ if (Msg != NULL) { Msg = NULL; } } if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, change restart comment if failed */ change_restart_comment_if_needed(pjob); } if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* * setup a nanny task to make sure the job is actually deleted (see the * comments at job_delete_nanny()). */ if (has_job_delete_nanny(pjob)) { req_reject(PBSE_IVALREQ, 0, preq, NULL, "job cancel in progress"); return; } apply_job_delete_nanny(pjob, time_now + 60); /* check if we are getting a asynchronous delete */ if ((preq->rq_extend != NULL) && !strncmp(preq->rq_extend,DELASYNC,strlen(DELASYNC))) { struct batch_request *preq_tmp = NULL; /* * Respond with an ack now instead of after MOM processing * Create a new batch request and fill it in. It will be freed by reply_ack */ snprintf(log_buffer,sizeof(log_buffer), "Deleting job asynchronously"); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buffer); preq_tmp = alloc_br(PBS_BATCH_DeleteJob); preq_tmp->rq_perm = preq->rq_perm; preq_tmp->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd; preq_tmp->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype; preq_tmp->rq_fromsvr = preq->rq_fromsvr; preq_tmp->rq_extsz = preq->rq_extsz; preq_tmp->rq_conn = preq->rq_conn; memcpy(preq_tmp->rq_ind.rq_manager.rq_objname, preq->rq_ind.rq_manager.rq_objname, PBS_MAXSVRJOBID + 1); memcpy(preq_tmp->rq_user, preq->rq_user, PBS_MAXUSER + 1); memcpy(preq_tmp->rq_host, preq->rq_host, PBS_MAXHOSTNAME + 1); reply_ack(preq_tmp); preq->rq_noreply = TRUE; /* set for no more replies */ } /* make a cleanup task if set */ if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) && (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0)) { pwtcheck = set_task( WORK_Timed, time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long, ensure_deleted, preq); if (pwtcheck != NULL) append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck); } /* * Send signal request to MOM. The server will automagically * pick up and "finish" off the client request when MOM replies. */ if ((rc = issue_signal(pjob, sigt, post_delete_mom1, preq))) { /* cant send to MOM */ req_reject(rc, 0, preq, NULL, NULL); } /* normally will ack reply when mom responds */ sprintf(log_buffer, msg_delrunjobsig, sigt); LOG_EVENT( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); return; } /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */ /* make a cleanup task if set */ if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) && (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0)) { pwtcheck = set_task( WORK_Timed, time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long, ensure_deleted, preq); if (pwtcheck != NULL) append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck); } /* if configured, and this job didn't have a slot limit hold, free a job * held with the slot limit hold */ if ((server.sv_attr[SRV_ATR_MoabArrayCompatible].at_val.at_long != FALSE) && ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE)) { if ((pjob->ji_arraystruct != NULL) && (pjob->ji_is_array_template == FALSE)) { int i; int newstate; int newsub; job *tmp; job_array *pa = pjob->ji_arraystruct; for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->jobs[i] == NULL) continue; tmp = (job *)pa->jobs[i]; if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) { tmp->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l; if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long == 0) { tmp->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET; } svr_evaljobstate(tmp, &newstate, &newsub, 1); svr_setjobstate(tmp, newstate, newsub); job_save(tmp, SAVEJOB_FULL, 0); break; } } } } /* END MoabArrayCompatible check */ if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, do end job processing */ svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING); pjob->ji_momhandle = -1; /* force new connection */ pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob); if (pwtnew) { append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew); } } else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0) { /* job has staged-in file, should remove them */ remove_stagein(pjob); job_abt(&pjob, Msg); } else { /* * the job is not transitting (though it may have been) and * is not running, so put in into a complete state. */ struct work_task *ptask; struct pbs_queue *pque; int KeepSeconds = 0; svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE); if ((pque = pjob->ji_qhdr) && (pque != NULL)) { pque->qu_numcompleted++; } KeepSeconds = attr_ifelse_long( &pque->qu_attr[QE_ATR_KeepCompleted], &server.sv_attr[SRV_ATR_KeepCompleted], 0); ptask = set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, pjob); if (ptask != NULL) { append_link(&pjob->ji_svrtask, &ptask->wt_linkobj, ptask); } } /* END else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) */ reply_ack(preq); return; } /* END req_deletejob() */
static void post_delete_mom1( struct work_task *pwt) { int delay = 0; int dellen = strlen(deldelaystr); job *pjob; struct work_task *pwtnew; pbs_queue *pque; struct batch_request *preq_sig; /* signal request to MOM */ struct batch_request *preq_clt; /* original client request */ int rc; preq_sig = pwt->wt_parm1; rc = preq_sig->rq_reply.brp_code; preq_clt = preq_sig->rq_extra; release_req(pwt); pjob = find_job(preq_clt->rq_ind.rq_delete.rq_objname); if (pjob == NULL) { /* job has gone away */ req_reject(PBSE_UNKJOBID, 0, preq_clt, NULL, NULL); return; } if (rc) { /* mom rejected request */ if (rc == PBSE_UNKJOBID) { /* MOM claims no knowledge, so just purge it */ log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, "MOM rejected signal during delete"); /* removed the resources assigned to job */ free_nodes(pjob); set_resc_assigned(pjob, DECR); job_purge(pjob); reply_ack(preq_clt); } else { req_reject(rc, 0, preq_clt, NULL, NULL); } return; } if (preq_clt->rq_extend) { if (strncmp(preq_clt->rq_extend, deldelaystr, dellen) == 0) { delay = atoi(preq_clt->rq_extend + dellen); } } reply_ack(preq_clt); /* dont need it, reply now */ /* * if no delay specified in original request, see if kill_delay * queue attribute is set. */ if (delay == 0) { pque = pjob->ji_qhdr; delay = attr_ifelse_long(&pque->qu_attr[QE_ATR_KillDelay], &server.sv_attr[SRV_ATR_KillDelay], 2); } pwtnew = set_task(WORK_Timed, delay + time_now, post_delete_mom2, pjob); if (pwtnew) { /* insure that work task will be removed if job goes away */ append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew); } /* * Since the first signal has succeeded, let's reschedule the * nanny to be 1 minute after the second phase. */ apply_job_delete_nanny(pjob, time_now + delay + 60); return; } /* END post_delete_mom1() */
/* * read reply and check for success */ int read_tcp_reply( struct tcp_chan *chan, int protocol, int version, int command, int *exit_status) { char log_buf[LOCAL_LOG_BUF_SIZE]; int ret; int value; /* value read from sock */ *exit_status = UNREAD_STATUS; if(LOGLEVEL >= 6) { sprintf(log_buf, "protocol: %d version: %d command:%d sock:%d", protocol, version, command, chan->sock); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,__func__, log_buf); } if ((value = disrsi(chan,&ret)) != protocol) { snprintf(log_buf,sizeof(log_buf), "Mismatching protocols. Expected protocol %d but read reply for %d\n", protocol, value); log_err(-1, __func__, log_buf); } else if (ret != DIS_SUCCESS) { } else if ((value = disrsi(chan,&ret)) != version) { snprintf(log_buf, sizeof(log_buf), "Mismatching versions. Expected version %d for protocol %d but read version %d\n", version, protocol, value); log_err(-1, __func__, log_buf); } else if (ret != DIS_SUCCESS) { } else if ((value = disrsi(chan,&ret)) != command) { snprintf(log_buf, sizeof(log_buf), "Mismatching commands. Expected command %d for protocol %d but read command %d\n", command, protocol, value); log_err(-1, __func__, log_buf); } else if (ret != DIS_SUCCESS) { } else { /* read the exit code */ *exit_status = disrsi(chan,&ret); /* DIS_tcp_reset(chan,0); */ } if (ret != DIS_SUCCESS) { if (ret >= 0) { snprintf(log_buf, sizeof(log_buf), "Could not read reply for protocol %d command %d: %s", protocol, command, dis_emsg[ret]); } else { snprintf(log_buf, sizeof(log_buf), "Could not read reply for protocol %d command %d", protocol, command); } log_err(-1, __func__, log_buf); } return(ret); } /* END read_tcp_reply() */
void purge_completed_jobs( struct batch_request *preq) /* I */ { char *id = "purge_completed_jobs"; job *pjob; char *time_str; time_t purge_time = 0; /* get the time to purge the jobs that completed before */ time_str = preq->rq_extend; time_str += strlen(PURGECOMP); purge_time = strtol(time_str,NULL,10); /* * Clean unreported capability is only for operators and managers. * Check if request is authorized */ if ((preq->rq_perm & (ATR_DFLAG_OPRD|ATR_DFLAG_OPWR| ATR_DFLAG_MGRD|ATR_DFLAG_MGWR)) == 0) { req_reject(PBSE_PERM,0,preq,NULL, "must have operator or manager privilege to use -c parameter"); return; } if (LOGLEVEL >= 4) { sprintf(log_buffer,"Received purge completed jobs command, purge time is %ld (%s)", (long)purge_time, preq->rq_extend); LOG_EVENT( PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, id, log_buffer); } for (pjob = (job *)GET_NEXT(svr_alljobs); pjob != NULL; pjob = (job *)GET_NEXT(pjob->ji_alljobs)) { if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_COMPLETE) && (pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long <= purge_time) && ((pjob->ji_wattr[JOB_ATR_reported].at_flags & ATR_VFLAG_SET) != 0) && (pjob->ji_wattr[JOB_ATR_reported].at_val.at_long == 0)) { if (LOGLEVEL >= 4) { sprintf(log_buffer,"Reported job is COMPLETED (%ld), setting reported to TRUE", pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } pjob->ji_wattr[JOB_ATR_reported].at_val.at_long = 1; pjob->ji_wattr[JOB_ATR_reported].at_flags = ATR_VFLAG_SET | ATR_VFLAG_MODIFY; job_save(pjob, SAVEJOB_FULL, 0); } } reply_ack(preq); return; } /* END purge_completed_jobs() */
void HTTPServer_Impl::connection_thread_main(TCPConnection connection) { try { std::string request; bool bool_result = read_line(connection, request); if (bool_result == false) { connection.disconnect_abortive(); return; } std::string headers; bool_result = read_lines(connection, headers); if (bool_result == false) { connection.disconnect_abortive(); return; } // Extract request command, url and version: std::string command, url, version; std::string::size_type pos1 = request.find(' '); if (pos1 == std::string::npos) throw Exception("Bad request"); command = request.substr(0, pos1); if (command != "POST" && command != "GET") throw Exception("Unsupported"); std::string::size_type pos2 = request.find(' ', pos1 + 1); if (pos2 == std::string::npos) throw Exception("Bad request"); url = request.substr(pos1+1, pos2-pos1-1); std::string::size_type pos3 = request.find(' ', pos2 + 1); if (pos3 != std::string::npos) throw Exception("Bad request"); version = request.substr(pos2 + 1); DataBuffer request_data; // Handle request: // Look for a request handler that will deal with the HTTP request: MutexSection mutex_lock(&mutex); std::vector<HTTPRequestHandler>::size_type index, size; size = handlers.size(); bool handled = false; for (index = 0; index < size; index++) { if (handlers[index].is_handling_request(command, url, headers)) { HTTPRequestHandler handler = handlers[index]; mutex_lock.unlock(); if (command == "POST") { write_line(connection, "HTTP/1.1 100 Continue"); // write_line(connection, "Content-Length: 0"); write_line(connection, ""); } std::shared_ptr<HTTPServerConnection_Impl> connection_impl(std::make_shared<HTTPServerConnection_Impl>()); connection_impl->connection = connection; connection_impl->request_type = command; connection_impl->request_url = url; connection_impl->request_headers = headers; HTTPServerConnection http_connection(connection_impl); handler.handle_request(http_connection); handled = true; break; } } mutex_lock.unlock(); if (!handled) { // No handler wants it. Reply with 404 Not Found: std::string error_msg("404 Not Found"); write_line(connection, "HTTP/1.1 404 Not Found"); write_line(connection, "Server: ClanLib HTTP Server"); write_line(connection, "Connection: close"); write_line(connection, "Vary: *"); write_line(connection, "Content-Type: text/plain"); write_line(connection, "Content-Length: " + StringHelp::int_to_local8(error_msg.length()+2)); write_line(connection, ""); write_line(connection, error_msg); } connection.disconnect_graceful(); /* if (url == "/") { File file("Sources/test.html", File::open_existing); DataBuffer response(file.get_size()); file.read(response.get_data(), response.get_size(), true); file.close(); // Send back response. write_line(connection, "HTTP/1.1 200 OK"); write_line(connection, "Server: ClanLib HTTP Server"); write_line(connection, "Connection: close"); // write_line(connection, "Date: Sun, 16 Oct 2005 20:13:00 GMT"); // write_line(connection, "Expires: Sun, 16 Oct 2005 20:13:00 GMT"); write_line(connection, "Vary: *"); // write_line(connection, "ETag: \"foobar\""); write_line(connection, "Content-Type: text/html"); write_line(connection, "Content-Length: " + StringHelp::int_to_local8(response.get_size())); write_line(connection, ""); connection.send(response.get_data(), response.get_size(), true); } else { DataBuffer response; bool error = false; try { response = handle_request(url, headers, request_data); } catch (const Exception &e) { // write_line(connection, "HTTP/1.1 404 Not Found"); write_line(connection, "HTTP/1.1 404 Not Found"); write_line(connection, "Server: ClanLib HTTP Server"); write_line(connection, "Connection: close"); // write_line(connection, "Date: Sun, 16 Oct 2005 20:13:00 GMT"); // write_line(connection, "Expires: Sun, 16 Oct 2005 20:13:00 GMT"); write_line(connection, "Vary: *"); // write_line(connection, "ETag: \"foobar\""); write_line(connection, "Content-Type: text/plain"); write_line(connection, "Content-Length: 0"); write_line(connection, ""); error = true; } // Send back response. if (!error) { write_line(connection, "HTTP/1.1 200 OK"); write_line(connection, "Server: ClanLib HTTP Server"); write_line(connection, "Connection: close"); // write_line(connection, "Date: Sun, 16 Oct 2005 20:13:00 GMT"); // write_line(connection, "Expires: Sun, 16 Oct 2005 20:13:00 GMT"); write_line(connection, "Vary: *"); // write_line(connection, "ETag: \"foobar\""); write_line(connection, "Content-Type: text/xml"); write_line(connection, "Content-Length: " + StringHelp::int_to_local8(response.get_size())); write_line(connection, ""); connection.send(response.get_data(), response.get_size(), true); } } */ } catch (const Exception& e) { log_event("error", e.message); } }
void handle_cr4_event(ikgt_event_info_t *event_info) { uint64_t new_cr4_value; uint64_t cur_cr4_value; uint64_t diff; ikgt_cpu_event_info_t *cpuinfo; ikgt_vmcs_guest_state_reg_id_t operand_reg_id; ikgt_status_t status; int i, tmp, str_count; policy_entry_t *entry; policy_cr4_ctx ctx; char log_entry_message[LOG_MESSAGE_SIZE]; char access[MAX_ACCESS_BUF_SIZE], action[MAX_ACTION_BUF_SIZE]; event_info->response = IKGT_EVENT_RESPONSE_ALLOW; g_cr4_count++; cpuinfo = (ikgt_cpu_event_info_t *) (event_info->event_specific_data); if (IKGT_CPU_REG_UNKNOWN == cpuinfo->operand_reg) { ikgt_printf("Error, cpuinfo->operand_reg=IKGT_CPU_REG_UNKNOWN\n"); return; } status = read_guest_reg(VMCS_GUEST_STATE_CR4, &cur_cr4_value); if (IKGT_STATUS_SUCCESS != status) { return; } /* get the VMCS reg ID for the operand */ status = get_vmcs_guest_reg_id(cpuinfo->operand_reg, &operand_reg_id); if (IKGT_STATUS_SUCCESS != status) { return; } /* read the guest register from VMCS * new_cr4_value contains the new value to be written to cr4 */ status = read_guest_reg(operand_reg_id, &new_cr4_value); if (IKGT_STATUS_SUCCESS != status) { return; } diff = cur_cr4_value ^ new_cr4_value; if (0 == diff) return; ctx.event_info = event_info; ctx.new_cr4_value = new_cr4_value; ctx.cur_cr4_value = cur_cr4_value; ctx.diff = diff; ctx.log = FALSE; for (i = 0; i < POLICY_MAX_ENTRIES; i++) { entry = policy_get_entry_by_index(i); if ((POLICY_GET_RESOURCE_ID(entry) == RESOURCE_ID_UNKNOWN) || !IS_CR4_ENTRY(entry)) continue; process_cr4_policy(entry, &ctx); } if (ctx.new_cr4_value == cur_cr4_value) { event_info->response = IKGT_EVENT_RESPONSE_REDIRECT; } if (ctx.log) { tmp = mon_sprintf_s(access, MAX_ACCESS_BUF_SIZE, "write"); action_to_string(&event_info->response, action); str_count = mon_sprintf_s(log_entry_message, LOG_MESSAGE_SIZE, "resource-name=CR4, access=%s, value=0x%016llx, RIP=0x%016llx, action=%s", access, new_cr4_value, event_info->vmcs_guest_state.ia32_reg_rip, action); log_event(log_entry_message, event_info->thread_id); } /* If response is skip then return */ if (event_info->response == IKGT_EVENT_RESPONSE_REDIRECT) return; status = write_guest_reg(operand_reg_id, ctx.new_cr4_value); if (IKGT_STATUS_SUCCESS != status) { ikgt_printf("error, write_guest_reg(%u)=%u\n", operand_reg_id, status); } event_info->response = IKGT_EVENT_RESPONSE_ALLOW; }
int process_request( struct tcp_chan *chan) /* file descriptor (socket) to get request */ { int rc = PBSE_NONE; struct batch_request *request = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; long acl_enable = FALSE; long state = SV_STATE_DOWN; time_t time_now = time(NULL); int free_request = TRUE; char tmpLine[MAXLINE]; char *auth_err = NULL; enum conn_type conn_active; unsigned short conn_socktype; unsigned short conn_authen; unsigned long conn_addr; int sfds = chan->sock; pthread_mutex_lock(svr_conn[sfds].cn_mutex); conn_active = svr_conn[sfds].cn_active; conn_socktype = svr_conn[sfds].cn_socktype; conn_authen = svr_conn[sfds].cn_authen; conn_addr = svr_conn[sfds].cn_addr; svr_conn[sfds].cn_lasttime = time_now; pthread_mutex_unlock(svr_conn[sfds].cn_mutex); if ((request = alloc_br(0)) == NULL) { snprintf(tmpLine, sizeof(tmpLine), "cannot allocate memory for request from %lu", conn_addr); req_reject(PBSE_MEM_MALLOC, 0, request, NULL, tmpLine); free_request = FALSE; rc = PBSE_SYSTEM; goto process_request_cleanup; } request->rq_conn = sfds; /* * Read in the request and decode it to the internal request structure. */ if (conn_active == FromClientDIS || conn_active == ToServerDIS) { #ifdef ENABLE_UNIX_SOCKETS if ((conn_socktype & PBS_SOCK_UNIX) && (conn_authen != PBS_NET_CONN_AUTHENTICATED)) { /* get_creds interestingly always returns 0 */ get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname); } #endif /* END ENABLE_UNIX_SOCKETS */ rc = dis_request_read(chan, request); } else { char out[80]; snprintf(tmpLine, MAXLINE, "request on invalid type of connection: %d, sock type: %d, from address %s", conn_active,conn_socktype, netaddr_long(conn_addr, out)); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, "process_req", tmpLine); snprintf(tmpLine, sizeof(tmpLine), "request on invalid type of connection (%d) from %s", conn_active, netaddr_long(conn_addr, out)); req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine); free_request = FALSE; rc = PBSE_BADHOST; goto process_request_cleanup; } if (rc == -1) { /* FAILURE */ /* premature end of file */ rc = PBSE_PREMATURE_EOF; goto process_request_cleanup; } if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL) || (rc == PBSE_SOCKET_CLOSE)) { /* FAILURE */ /* read error, likely cannot send reply so just disconnect */ /* ??? not sure about this ??? */ goto process_request_cleanup; } if (rc > 0) { /* FAILURE */ /* * request didn't decode, either garbage or unknown * request type, in either case, return reject-reply */ req_reject(rc, 0, request, NULL, "cannot decode message"); free_request = FALSE; goto process_request_cleanup; } if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0) { sprintf(log_buf, "%s: %lu", pbse_to_txt(PBSE_BADHOST), conn_addr); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buf); snprintf(tmpLine, sizeof(tmpLine), "cannot determine hostname for connection from %lu", conn_addr); req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine); free_request = FALSE; rc = PBSE_BADHOST; goto process_request_cleanup; } if (LOGLEVEL >= 1) { sprintf(log_buf, msg_request, reqtype_to_txt(request->rq_type), request->rq_user, request->rq_host, sfds); log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buf); } /* is the request from a host acceptable to the server */ if (conn_socktype & PBS_SOCK_UNIX) { strcpy(request->rq_host, server_name); } get_svr_attr_l(SRV_ATR_acl_host_enable, &acl_enable); if (acl_enable) { /* acl enabled, check it; always allow myself and nodes */ struct array_strings *pas = NULL; struct pbsnode *isanode; get_svr_attr_arst(SRV_ATR_acl_hosts, &pas); isanode = PGetNodeFromAddr(conn_addr); if ((isanode == NULL) && (strcmp(server_host, request->rq_host) != 0) && (acl_check_my_array_string(pas, request->rq_host, ACL_Host) == 0)) { char tmpLine[MAXLINE]; snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s", request->rq_host); req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine); free_request = FALSE; rc = PBSE_BADHOST; goto process_request_cleanup; } if (isanode != NULL) unlock_node(isanode, "process_request", NULL, LOGLEVEL); } /* * determine source (user client or another server) of request. * set the permissions granted to the client */ if (conn_authen == PBS_NET_CONN_FROM_PRIVIL) { /* request came from another server */ request->rq_fromsvr = 1; request->rq_perm = ATR_DFLAG_USRD | ATR_DFLAG_USWR | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; } else { /* request not from another server */ conn_credent[sfds].timestamp = time_now; request->rq_fromsvr = 0; /* * Client must be authenticated by an Authenticate User Request, if not, * reject request and close connection. -- The following is retained for * compat with old cmds -- The exception to this is of course the Connect * Request which cannot have been authenticated, because it contains the * needed ticket; so trap it here. Of course, there is no prior * authentication on the Authenticate User request either, but it comes * over a reserved port and appears from another server, hence is * automatically granted authentication. * * The above is only true with inet sockets. With unix domain sockets, the * user creds were read before the first dis_request_read call above. * We automatically granted authentication because we can trust the socket * creds. Authorization is still granted in svr_get_privilege below */ if (request->rq_type == PBS_BATCH_Connect) { req_connect(request); if (conn_socktype == PBS_SOCK_INET) { rc = PBSE_IVALREQ; req_reject(rc, 0, request, NULL, NULL); free_request = FALSE; goto process_request_cleanup; } } if (conn_socktype & PBS_SOCK_UNIX) { pthread_mutex_lock(svr_conn[sfds].cn_mutex); svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED; pthread_mutex_unlock(svr_conn[sfds].cn_mutex); } if (ENABLE_TRUSTED_AUTH == TRUE ) rc = PBSE_NONE; /* bypass the authentication of the user--trust the client completely */ else if (munge_on) { /* If munge_on is true we will validate the connection now */ if (request->rq_type == PBS_BATCH_AltAuthenUser) { rc = req_altauthenuser(request); free_request = FALSE; goto process_request_cleanup; } else { rc = authenticate_user(request, &conn_credent[sfds], &auth_err); } } else if (conn_authen != PBS_NET_CONN_AUTHENTICATED) /* skip checking user if we did not get an authenticated credential */ rc = PBSE_BADCRED; else rc = authenticate_user(request, &conn_credent[sfds], &auth_err); if (rc != 0) { req_reject(rc, 0, request, NULL, auth_err); if (auth_err != NULL) free(auth_err); free_request = FALSE; goto process_request_cleanup; } /* * pbs_mom and checkpoint restart scripts both need the authority to do * alters and releases on checkpointable jobs. Allow manager permission * for root on the jobs execution node. */ if (((request->rq_type == PBS_BATCH_ModifyJob) || (request->rq_type == PBS_BATCH_ReleaseJob)) && (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0)) { job *pjob; char *dptr; int skip = FALSE; char short_host[PBS_MAXHOSTNAME+1]; /* make short host name */ strcpy(short_host, request->rq_host); if ((dptr = strchr(short_host, '.')) != NULL) { *dptr = '\0'; } if ((pjob = svr_find_job(request->rq_ind.rq_modify.rq_objname, FALSE)) != (job *)0) { if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) && ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) || (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) || (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) && (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL)) { request->rq_perm = svr_get_privilege(request->rq_user, server_host); skip = TRUE; } } unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } if (!skip) { request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host); } } else { request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host); } } /* END else (conn_authen == PBS_NET_CONN_FROM_PRIVIL) */ /* if server shutting down, disallow new jobs and new running */ get_svr_attr_l(SRV_ATR_State, &state); if (state > SV_STATE_RUN) { switch (request->rq_type) { case PBS_BATCH_AsyrunJob: case PBS_BATCH_JobCred: case PBS_BATCH_MoveJob: case PBS_BATCH_QueueJob: case PBS_BATCH_RunJob: case PBS_BATCH_StageIn: case PBS_BATCH_jobscript: req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL); rc = PBSE_SVRDOWN; free_request = FALSE; goto process_request_cleanup; /*NOTREACHED*/ break; } } /* * dispatch the request to the correct processing function. * The processing function must call reply_send() to free * the request struture. */ rc = dispatch_request(sfds, request); return(rc); process_request_cleanup: if (free_request == TRUE) free_br(request); return(rc); } /* END process_request() */
/* smtp_open() -- Open connection to a remote SMTP listener */ int smtp_open(char *host, int port) { #ifdef INET6 struct addrinfo hints, *ai0, *ai; char servname[NI_MAXSERV]; int s; #else struct sockaddr_in name; struct hostent *hent; int i, s, namelen; #endif #ifdef HAVE_SSL int err; char buf[(BUF_SZ + 1)]; /* Init SSL stuff */ SSL_CTX *ctx; SSL_METHOD *meth; X509 *server_cert; SSL_load_error_strings(); SSLeay_add_ssl_algorithms(); meth=SSLv23_client_method(); ctx = SSL_CTX_new(meth); if(!ctx) { log_event(LOG_ERR, "No SSL support initiated\n"); return(-1); } if(use_cert == True) { if(SSL_CTX_use_certificate_chain_file(ctx, tls_cert) <= 0) { perror("Use certfile"); return(-1); } if(SSL_CTX_use_PrivateKey_file(ctx, tls_cert, SSL_FILETYPE_PEM) <= 0) { perror("Use PrivateKey"); return(-1); } if(!SSL_CTX_check_private_key(ctx)) { log_event(LOG_ERR, "Private key does not match the certificate public key\n"); return(-1); } } #endif #ifdef INET6 memset(&hints, 0, sizeof(hints)); hints.ai_family = p_family; hints.ai_socktype = SOCK_STREAM; snprintf(servname, sizeof(servname), "%d", port); /* Check we can reach the host */ if (getaddrinfo(host, servname, &hints, &ai0)) { log_event(LOG_ERR, "Unable to locate %s", host); return(-1); } for (ai = ai0; ai; ai = ai->ai_next) { /* Create a socket for the connection */ s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (s < 0) { continue; } if (connect(s, ai->ai_addr, ai->ai_addrlen) < 0) { s = -1; continue; } break; } if(s < 0) { log_event (LOG_ERR, "Unable to connect to \"%s\" port %d.\n", host, port); return(-1); } #else /* Check we can reach the host */ if((hent = gethostbyname(host)) == (struct hostent *)NULL) { log_event(LOG_ERR, "Unable to locate %s", host); return(-1); } if(hent->h_length > sizeof(hent->h_addr)) { log_event(LOG_ERR, "Buffer overflow in gethostbyname()"); return(-1); } /* Create a socket for the connection */ if((s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) { log_event(LOG_ERR, "Unable to create a socket"); return(-1); } for (i = 0; ; ++i) { if (!hent->h_addr_list[i]) { log_event(LOG_ERR, "Unable to connect to %s:%d", host, port); return(-1); } /* This SHOULD already be in Network Byte Order from gethostbyname() */ name.sin_addr.s_addr = ((struct in_addr *)(hent->h_addr_list[i]))->s_addr; name.sin_family = hent->h_addrtype; name.sin_port = htons(port); namelen = sizeof(struct sockaddr_in); if(connect(s, (struct sockaddr *)&name, namelen) < 0) continue; break; } #endif #ifdef HAVE_SSL if(use_tls == True) { log_event(LOG_INFO, "Creating SSL connection to host"); if (use_starttls == True) { use_tls=False; /* need to write plain text for a while */ if (smtp_okay(s, buf)) { smtp_write(s, "EHLO %s", hostname); if (smtp_okay(s, buf)) { smtp_write(s, "STARTTLS"); /* assume STARTTLS regardless */ if (!smtp_okay(s, buf)) { log_event(LOG_ERR, "STARTTLS not working"); return(-1); } } else { log_event(LOG_ERR, "Invalid response: %s (%s)", buf, hostname); } } else { log_event(LOG_ERR, "Invalid response SMTP Server (STARTTLS)"); return(-1); } use_tls=True; /* now continue as normal for SSL */ } ssl = SSL_new(ctx); if(!ssl) { log_event(LOG_ERR, "SSL not working"); return(-1); } SSL_set_fd(ssl, s); err = SSL_connect(ssl); if(err < 0) { perror("SSL_connect"); return(-1); } if(log_level > 0 || 1) { log_event(LOG_INFO, "SSL connection using %s", SSL_get_cipher(ssl)); } server_cert = SSL_get_peer_certificate(ssl); if(!server_cert) { return(-1); } X509_free(server_cert); /* TODO: Check server cert if changed! */ } #endif return(s); }
static int forced_jobpurge( struct batch_request *preq) { job *pjob; if ((pjob = find_job(preq->rq_ind.rq_delete.rq_objname)) == NULL) { log_event( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, preq->rq_ind.rq_delete.rq_objname, pbse_to_txt(PBSE_UNKJOBID)); req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL); return(-1); } /* check about possibly purging the job */ if (preq->rq_extend != NULL) { if (!strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr))) { if (((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) != 0) || ((svr_chk_owner(preq, pjob) == 0) && (server.sv_attr[SRV_ATR_OwnerPurge].at_val.at_long))) { sprintf(log_buffer, "purging job without checking MOM"); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); reply_ack(preq); free_nodes(pjob); if (pjob->ji_qhdr->qu_qs.qu_type == QTYPE_Execution) { set_resc_assigned(pjob, DECR); } job_purge(pjob); return(1); } else { /* FAILURE */ req_reject(PBSE_PERM, 0, preq, NULL, NULL); return(-1); } } } return(0); } /* END forced_jobpurge() */
/* ssmtp() -- send the message (exactly one) from stdin to the mailhub SMTP port */ int ssmtp(char *argv[]) { char b[(BUF_SZ + 2)], *buf = b+1, *p, *q; #ifdef MD5AUTH char challenge[(BUF_SZ + 1)]; #endif struct passwd *pw; int i, sock; uid_t uid; bool_t minus_v_save, leadingdot, linestart = True; int timeout = 0; int bufsize = sizeof(b)-1; b[0] = '.'; outbytes = 0; ht = &headers; uid = getuid(); if((pw = getpwuid(uid)) == (struct passwd *)NULL) { die("Could not find password entry for UID %d", uid); } get_arpadate(arpadate); if(read_config() == False) { log_event(LOG_INFO, "%s not found", config_file); } if((p = strtok(pw->pw_gecos, ";,"))) { if((gecos = strdup(p)) == (char *)NULL) { die("ssmtp() -- strdup() failed"); } } revaliases(pw); /* revaliases() may have defined this */ if(uad == (char *)NULL) { uad = append_domain(pw->pw_name); } rt = &rcpt_list; header_parse(stdin); #if 1 /* With FromLineOverride=YES set, try to recover sane MAIL FROM address */ uad = append_domain(uad); #endif from = from_format(uad, override_from); /* Now to the delivery of the message */ (void)signal(SIGALRM, (void(*)())handler); /* Catch SIGALRM */ (void)alarm((unsigned) MAXWAIT); /* Set initial timer */ if(setjmp(TimeoutJmpBuf) != 0) { /* Then the timer has gone off and we bail out */ die("Connection lost in middle of processing"); } if((sock = smtp_open(mailhost, port)) == -1) { die("Cannot open %s:%d", mailhost, port); } else if (use_starttls == False) /* no initial response after STARTTLS */ { if(smtp_okay(sock, buf) == False) die("Invalid response SMTP server"); } /* If user supplied username and password, then try ELHO */ if(auth_user) { outbytes += smtp_write(sock, "EHLO %s", hostname); } else { outbytes += smtp_write(sock, "HELO %s", hostname); } (void)alarm((unsigned) MEDWAIT); if(smtp_okay(sock, buf) == False) { die("%s (%s)", buf, hostname); } /* Try to log in if username was supplied */ if(auth_user) { #ifdef MD5AUTH if(auth_pass == (char *)NULL) { auth_pass = strdup(""); } if(auth_method && strcasecmp(auth_method, "cram-md5") == 0) { outbytes += smtp_write(sock, "AUTH CRAM-MD5"); (void)alarm((unsigned) MEDWAIT); if(smtp_read(sock, buf) != 3) { die("Server rejected AUTH CRAM-MD5 (%s)", buf); } strncpy(challenge, strchr(buf,' ') + 1, sizeof(challenge)); memset(buf, 0, bufsize); crammd5(challenge, auth_user, auth_pass, buf); } else { #endif memset(buf, 0, bufsize); to64frombits(buf, auth_user, strlen(auth_user)); if (use_oldauth) { outbytes += smtp_write(sock, "AUTH LOGIN %s", buf); } else { outbytes += smtp_write(sock, "AUTH LOGIN"); (void)alarm((unsigned) MEDWAIT); if(smtp_read(sock, buf) != 3) { die("Server didn't like our AUTH LOGIN (%s)", buf); } /* we assume server asked us for Username */ memset(buf, 0, bufsize); to64frombits(buf, auth_user, strlen(auth_user)); outbytes += smtp_write(sock, buf); } (void)alarm((unsigned) MEDWAIT); if(smtp_read(sock, buf) != 3) { die("Server didn't accept AUTH LOGIN (%s)", buf); } memset(buf, 0, bufsize); to64frombits(buf, auth_pass, strlen(auth_pass)); #ifdef MD5AUTH } #endif /* We do NOT want the password output to STDERR * even base64 encoded.*/ minus_v_save = minus_v; minus_v = False; outbytes += smtp_write(sock, "%s", buf); minus_v = minus_v_save; (void)alarm((unsigned) MEDWAIT); if(smtp_okay(sock, buf) == False) { die("Authorization failed (%s)", buf); } } /* Send "MAIL FROM:" line */ outbytes += smtp_write(sock, "MAIL FROM:<%s>", uad); (void)alarm((unsigned) MEDWAIT); if(smtp_okay(sock, buf) == 0) { die("%s", buf); } /* Send all the To: adresses */ /* Either we're using the -t option, or we're using the arguments */ if(minus_t) { if(rcpt_list.next == (rcpt_t *)NULL) { die("No recipients specified although -t option used"); } rt = &rcpt_list; while(rt->next) { p = rcpt_remap(rt->string); outbytes += smtp_write(sock, "RCPT TO:<%s>", p); (void)alarm((unsigned)MEDWAIT); if(smtp_okay(sock, buf) == 0) { die("RCPT TO:<%s> (%s)", p, buf); } rt = rt->next; } } else { for(i = 1; (argv[i] != NULL); i++) { p = strtok(argv[i], ","); while(p) { /* RFC822 Address -> "foo@bar" */ q = rcpt_remap(addr_parse(p)); outbytes += smtp_write(sock, "RCPT TO:<%s>", q); (void)alarm((unsigned) MEDWAIT); if(smtp_okay(sock, buf) == 0) { die("RCPT TO:<%s> (%s)", q, buf); } p = strtok(NULL, ","); } } } /* Send DATA */ outbytes += smtp_write(sock, "DATA"); (void)alarm((unsigned) MEDWAIT); if(smtp_read(sock, buf) != 3) { /* Oops, we were expecting "354 send your data" */ die("%s", buf); } outbytes += smtp_write(sock, "Received: by %s (sSMTP sendmail emulation); %s", hostname, arpadate); if(have_from == False) { outbytes += smtp_write(sock, "From: %s", from); } if(have_date == False) { outbytes += smtp_write(sock, "Date: %s", arpadate); } #ifdef HASTO_OPTION if(have_to == False) { outbytes += smtp_write(sock, "To: postmaster"); } #endif ht = &headers; while(ht->next) { outbytes += smtp_write(sock, "%s", ht->string); ht = ht->next; } (void)alarm((unsigned) MEDWAIT); /* End of headers, start body */ outbytes += smtp_write(sock, ""); /*prevent blocking on pipes, we really shouldnt be using stdio functions like fgets in the first place */ fcntl(STDIN_FILENO,F_SETFL,O_NONBLOCK); while(!feof(stdin)) { if (!fgets(buf, bufsize, stdin)) { /* if nothing was received, then no transmission * over smtp should be done */ sleep(1); /* don't hang forever when reading from stdin */ if (++timeout >= MEDWAIT) { log_event(LOG_ERR, "killed: timeout on stdin while reading body -- message saved to dead.letter."); die("Timeout on stdin while reading body"); } continue; } /* Trim off \n, double leading .'s */ leadingdot = standardise(buf, &linestart); if (linestart || feof(stdin)) { linestart = True; outbytes += smtp_write(sock, "%s", leadingdot ? b : buf); } else { if (log_level > 0) { log_event(LOG_INFO, "Sent a very long line in chunks"); } if (leadingdot) { outbytes += fd_puts(sock, b, sizeof(b)); } else { outbytes += fd_puts(sock, buf, bufsize); } } (void)alarm((unsigned) MEDWAIT); } if(!linestart) { smtp_write(sock, ""); } /* End of body */ outbytes += smtp_write(sock, "."); (void)alarm((unsigned) MAXWAIT); if(smtp_okay(sock, buf) == 0) { die("%s", buf); } /* Close connection */ (void)signal(SIGALRM, SIG_IGN); outbytes += smtp_write(sock, "QUIT"); (void)smtp_okay(sock, buf); (void)close(sock); log_event(LOG_INFO, "Sent mail for %s (%s) uid=%d username=%s outbytes=%d", from_strip(uad), buf, uid, pw->pw_name, outbytes); return(0); }
void post_signal_req( batch_request *preq) { char *jobid; job *pjob; char log_buf[LOCAL_LOG_BUF_SIZE]; /* request has been handled elsewhere */ if (preq == NULL) return; preq->rq_conn = preq->rq_orgconn; /* restore client socket */ if (preq->rq_reply.brp_code) { log_event( PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, preq->rq_ind.rq_signal.rq_jid, pbse_to_txt(PBSE_MOMREJECT)); errno = 0; req_reject(preq->rq_reply.brp_code, 0, preq, NULL, NULL); } else { if ((jobid = preq->rq_extra) == NULL) { log_err(ENOMEM, __func__, "Cannot allocate memory! FAILURE"); return; } if ((pjob = svr_find_job(jobid, FALSE)) != NULL) { if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_SUSPEND) == 0) { if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) == 0) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_Suspend; set_statechar(pjob); job_save(pjob, SAVEJOB_QUICK, 0); /* release resources allocated to suspended job - NORWAY */ free_nodes(pjob); } } else if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_RESUME) == 0) { if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) { /* re-allocate assigned node to resumed job - NORWAY */ set_old_nodes(pjob); pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend; set_statechar(pjob); job_save(pjob, SAVEJOB_QUICK, 0); } } unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL); } else { /* job is gone */ snprintf(log_buf,sizeof(log_buf), "Cannot find job '%s', assuming success", jobid); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, __func__, log_buf); } free(jobid); reply_ack(preq); } return; } /* END post_signal_req() */
/** * @brief * validate_job_formula - validate that the sorting forumla is in the * correct form. We do this by calling python and having * it catch exceptions. * */ int validate_job_formula(attribute *pattr, void *pobject, int actmode) { char *formula; char *errmsg = NULL; struct resource_def *pres; FILE *fp; char buf[1024]; char pathbuf[MAXPATHLEN]; char *globals1 = NULL; int globals_size1 = 1024; char *globals2 = NULL; int globals_size2 = 1024; char *script = NULL; int script_size = 2048; PyThreadState *ts_main = NULL; PyThreadState *ts_sub = NULL; int rc = 0; int err = 0; if (actmode == ATR_ACTION_FREE) return (0); #ifndef PYTHON return PBSE_INTERNAL; #else if (!Py_IsInitialized()) return PBSE_INTERNAL; formula = pattr->at_val.at_str; if (formula == NULL) return PBSE_INTERNAL; globals1 = malloc(globals_size1); if(globals1 == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } globals2 = malloc(globals_size2); if(globals2 == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } strcpy(globals1, "globals1={"); strcpy(globals2, "globals2={"); /* We need to create a python dictionary to pass to python as a list * of valid symbols. */ for (pres = svr_resc_def; pres; pres = pres->rs_next) { /* unknown resource is used as a delimiter between builtin and custom resources */ if (strcmp(pres->rs_name, RESOURCE_UNKNOWN) != 0) { snprintf(buf, sizeof(buf), "\'%s\':1,", pres->rs_name); if(pbs_strcat(&globals1, &globals_size1, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } if (pres->rs_type == ATR_TYPE_LONG || pres->rs_type == ATR_TYPE_SIZE || pres->rs_type == ATR_TYPE_LL || pres->rs_type == ATR_TYPE_SHORT || pres->rs_type == ATR_TYPE_FLOAT) { if(pbs_strcat(&globals2, &globals_size2, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } } } } snprintf(buf, sizeof(buf), "\'%s\':1, '%s':1, \'%s\':1,\'%s\':1, \'%s\':1, \'%s\':1, \'%s\':1, \'%s\': 1}\n", FORMULA_ELIGIBLE_TIME, FORMULA_QUEUE_PRIO, FORMULA_JOB_PRIO, FORMULA_FSPERC, FORMULA_FSPERC_DEP, FORMULA_TREE_USAGE, FORMULA_FSFACTOR, FORMULA_ACCRUE_TYPE); if (pbs_strcat(&globals1, &globals_size1, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } if (pbs_strcat(&globals2, &globals_size2, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } /* Allocate a buffer for the Python code */ script = malloc(script_size); if (script == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } *script = '\0'; /* import math and initialize variables */ sprintf(buf, "ans = 0\n" "errnum = 0\n" "errmsg = \'\'\n" "try:\n" " from math import *\n" "except ImportError, e:\n" " errnum=4\n" " errmsg=str(e)\n"); if (pbs_strcat(&script, &script_size, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } /* set up our globals dictionary */ if (pbs_strcat(&script, &script_size, globals1) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } if (pbs_strcat(&script, &script_size, globals2) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } /* Now for the real guts: The initial try/except block*/ sprintf(buf, "try:\n" " exec(\'ans="); if (pbs_strcat(&script, &script_size, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } if (pbs_strcat(&script, &script_size, formula) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } sprintf(buf, "\', globals1, locals())\n" "except SyntaxError, e:\n" " errnum=1\n" " errmsg=str(e)\n" "except NameError, e:\n" " errnum=2\n" " errmsg=str(e)\n" "except Exception, e:\n" " pass\n" "if errnum == 0:\n" " try:\n" " exec(\'ans="); if (pbs_strcat(&script, &script_size, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } if (pbs_strcat(&script, &script_size, formula) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } sprintf(buf, "\', globals2, locals())\n" " except NameError, e:\n" " errnum=3\n" " errmsg=str(e)\n" " except Exception, e:\n" " pass\n"); if (pbs_strcat(&script, &script_size, buf) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } /* run the script in a subinterpreter */ ts_main = PyThreadState_Get(); ts_sub = Py_NewInterpreter(); if (!ts_sub) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } err = PyRun_SimpleString(script); /* peek into the interpreter to get the values of err and errmsg */ if (err == 0) { PyObject *module; PyObject *dict; PyObject *val; err = -1; if ((module = PyImport_AddModule("__main__"))) { if ((dict = PyModule_GetDict(module))) { char *p; if ((val = PyDict_GetItemString(dict, "errnum"))) { p = pbs_python_object_str(val); if (*p != '\0') err = atoi(p); } if ((val = PyDict_GetItemString(dict, "errmsg"))) { p = pbs_python_object_str(val); if (*p != '\0') errmsg = strdup(p); } } } } switch(err) { case 0: /* Success */ rc = 0; break; case 1: /* Syntax error in formula */ rc = PBSE_BAD_FORMULA; break; case 2: /* unknown resource name */ rc = PBSE_BAD_FORMULA_KW; break; case 3: /* resource of non-numeric type */ rc = PBSE_BAD_FORMULA_TYPE; break; case 4: /* import error */ rc = PBSE_SYSTEM; break; default: /* unrecognized error */ rc = PBSE_INTERNAL; break; } if (err == 0) { snprintf(pathbuf, sizeof(pathbuf), "%s/%s", pbs_conf.pbs_home_path, FORMULA_ATTR_PATH_SCHED); if ((fp = fopen(pathbuf, "w")) == NULL) { rc = PBSE_SYSTEM; goto validate_job_formula_exit; } fprintf(fp, "### PBS INTERNAL FILE DO NOT MODIFY ###\n"); fprintf(fp, "%s\n", formula); fclose(fp); } else { snprintf(buf, sizeof(buf), "Validation Error: %s", errmsg?errmsg:"Internal error"); log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_SERVER, LOG_DEBUG, __func__, buf); } validate_job_formula_exit: if (ts_main) { if (ts_sub) Py_EndInterpreter(ts_sub); PyThreadState_Swap(ts_main); } free(script); free(globals1); free(globals2); free(errmsg); return rc; #endif }
/* Returns: NSSM_HOOK_STATUS_SUCCESS if the hook ran successfully. NSSM_HOOK_STATUS_NOTFOUND if no hook was found. NSSM_HOOK_STATUS_ABORT if the hook failed and we should cancel service start. NSSM_HOOK_STATUS_ERROR on error. NSSM_HOOK_STATUS_NOTRUN if the hook didn't run. NSSM_HOOK_STATUS_TIMEOUT if the hook timed out. NSSM_HOOK_STATUS_FAILED if the hook failed. */ int nssm_hook(hook_thread_t *hook_threads, nssm_service_t *service, TCHAR *hook_event, TCHAR *hook_action, unsigned long *hook_control, unsigned long deadline, bool async) { int ret = 0; hook_t *hook = (hook_t *) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(hook_t)); if (! hook) { log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_OUT_OF_MEMORY, _T("hook"), _T("nssm_hook()"), 0); return NSSM_HOOK_STATUS_ERROR; } FILETIME now; GetSystemTimeAsFileTime(&now); EnterCriticalSection(&service->hook_section); /* Set the environment. */ if (service->env) duplicate_environment(service->env); if (service->env_extra) set_environment_block(service->env_extra); /* ABI version. */ TCHAR number[16]; _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), NSSM_HOOK_VERSION); SetEnvironmentVariable(NSSM_HOOK_ENV_VERSION, number); /* Event triggering this action. */ SetEnvironmentVariable(NSSM_HOOK_ENV_EVENT, hook_event); /* Hook action. */ SetEnvironmentVariable(NSSM_HOOK_ENV_ACTION, hook_action); /* Control triggering this action. May be empty. */ if (hook_control) SetEnvironmentVariable(NSSM_HOOK_ENV_TRIGGER, service_control_text(*hook_control)); else SetEnvironmentVariable(NSSM_HOOK_ENV_TRIGGER, _T("")); /* Last control handled. */ SetEnvironmentVariable(NSSM_HOOK_ENV_LAST_CONTROL, service_control_text(service->last_control)); /* Path to NSSM. */ TCHAR path[PATH_LENGTH]; GetModuleFileName(0, path, _countof(path)); SetEnvironmentVariable(NSSM_HOOK_ENV_IMAGE_PATH, path); /* NSSM version. */ SetEnvironmentVariable(NSSM_HOOK_ENV_NSSM_CONFIGURATION, NSSM_CONFIGURATION); SetEnvironmentVariable(NSSM_HOOK_ENV_NSSM_VERSION, NSSM_VERSION); SetEnvironmentVariable(NSSM_HOOK_ENV_BUILD_DATE, NSSM_DATE); /* NSSM PID. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), GetCurrentProcessId()); SetEnvironmentVariable(NSSM_HOOK_ENV_PID, number); /* NSSM runtime. */ set_hook_runtime(NSSM_HOOK_ENV_RUNTIME, &service->nssm_creation_time, &now); /* Application PID. */ if (service->pid) { _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->pid); SetEnvironmentVariable(NSSM_HOOK_ENV_APPLICATION_PID, number); /* Application runtime. */ set_hook_runtime(NSSM_HOOK_ENV_APPLICATION_RUNTIME, &service->creation_time, &now); /* Exit code. */ SetEnvironmentVariable(NSSM_HOOK_ENV_EXITCODE, _T("")); } else { SetEnvironmentVariable(NSSM_HOOK_ENV_APPLICATION_PID, _T("")); if (str_equiv(hook_event, NSSM_HOOK_EVENT_START) && str_equiv(hook_action, NSSM_HOOK_ACTION_PRE)) { SetEnvironmentVariable(NSSM_HOOK_ENV_APPLICATION_RUNTIME, _T("")); SetEnvironmentVariable(NSSM_HOOK_ENV_EXITCODE, _T("")); } else { set_hook_runtime(NSSM_HOOK_ENV_APPLICATION_RUNTIME, &service->creation_time, &service->exit_time); /* Exit code. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->exitcode); SetEnvironmentVariable(NSSM_HOOK_ENV_EXITCODE, number); } } /* Deadline for this script. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), deadline); SetEnvironmentVariable(NSSM_HOOK_ENV_DEADLINE, number); /* Service name. */ SetEnvironmentVariable(NSSM_HOOK_ENV_SERVICE_NAME, service->name); SetEnvironmentVariable(NSSM_HOOK_ENV_SERVICE_DISPLAYNAME, service->displayname); /* Times the service was asked to start. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->start_requested_count); SetEnvironmentVariable(NSSM_HOOK_ENV_START_REQUESTED_COUNT, number); /* Times the service actually did start. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->start_count); SetEnvironmentVariable(NSSM_HOOK_ENV_START_COUNT, number); /* Times the service exited. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->exit_count); SetEnvironmentVariable(NSSM_HOOK_ENV_EXIT_COUNT, number); /* Throttled count. */ _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->throttle); SetEnvironmentVariable(NSSM_HOOK_ENV_THROTTLE_COUNT, number); /* Command line. */ TCHAR app[CMD_LENGTH]; _sntprintf_s(app, _countof(app), _TRUNCATE, _T("\"%s\" %s"), service->exe, service->flags); SetEnvironmentVariable(NSSM_HOOK_ENV_COMMAND_LINE, app); TCHAR cmd[CMD_LENGTH]; if (get_hook(service->name, hook_event, hook_action, cmd, sizeof(cmd))) { log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_GET_HOOK_FAILED, hook_event, hook_action, service->name, 0); duplicate_environment_strings(service->initial_env); LeaveCriticalSection(&service->hook_section); HeapFree(GetProcessHeap(), 0, hook); return NSSM_HOOK_STATUS_ERROR; } /* No hook. */ if (! _tcslen(cmd)) { duplicate_environment_strings(service->initial_env); LeaveCriticalSection(&service->hook_section); HeapFree(GetProcessHeap(), 0, hook); return NSSM_HOOK_STATUS_NOTFOUND; } /* Run the command. */ STARTUPINFO si; ZeroMemory(&si, sizeof(si)); si.cb = sizeof(si); PROCESS_INFORMATION pi; ZeroMemory(&pi, sizeof(pi)); unsigned long flags = 0; #ifdef UNICODE flags |= CREATE_UNICODE_ENVIRONMENT; #endif ret = NSSM_HOOK_STATUS_NOTRUN; if (CreateProcess(0, cmd, 0, 0, false, flags, 0, service->dir, &si, &pi)) { hook->name = (TCHAR *) HeapAlloc(GetProcessHeap(), 0, HOOK_NAME_LENGTH * sizeof(TCHAR)); if (hook->name) _sntprintf_s(hook->name, HOOK_NAME_LENGTH, _TRUNCATE, _T("%s (%s/%s)"), service->name, hook_event, hook_action); hook->process_handle = pi.hProcess; hook->pid = pi.dwProcessId; hook->deadline = deadline; if (get_process_creation_time(hook->process_handle, &hook->creation_time)) GetSystemTimeAsFileTime(&hook->creation_time); unsigned long tid; HANDLE thread_handle = CreateThread(NULL, 0, await_hook, (void *) hook, 0, &tid); if (thread_handle) { if (async) { ret = 0; await_hook_threads(hook_threads, service->status_handle, &service->status, 0); add_thread_handle(hook_threads, thread_handle, hook->name); } else { await_single_handle(service->status_handle, &service->status, thread_handle, hook->name, _T(__FUNCTION__), deadline + NSSM_SERVICE_STATUS_DEADLINE); unsigned long exitcode; GetExitCodeThread(thread_handle, &exitcode); ret = (int) exitcode; CloseHandle(thread_handle); } } else { log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_CREATETHREAD_FAILED, error_string(GetLastError()), 0); await_hook(hook); if (hook->name) HeapFree(GetProcessHeap(), 0, hook->name); HeapFree(GetProcessHeap(), 0, hook); } } else { log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_HOOK_CREATEPROCESS_FAILED, hook_event, hook_action, service->name, cmd, error_string(GetLastError()), 0); HeapFree(GetProcessHeap(), 0, hook); } /* Restore our environment. */ duplicate_environment_strings(service->initial_env); LeaveCriticalSection(&service->hook_section); return ret; }
int job_or_resv_save_fs(void *pobj, int updatetype, int objtype) { int fds; int openflags; int redo; char *filename; char namebuf1[MAXPATHLEN+1]; char namebuf2[MAXPATHLEN+1]; char *path = NULL; char *err_msg = NULL; char *err_msgl = NULL; char *prefix = NULL; char *suffix = NULL; char *cpsuffix = NULL; char *p_oid = NULL; long *p_mtime = NULL; int *p_modified = NULL; void *pfixed = NULL; ssize_t i; size_t fixed_size; attribute_def *p_attr_def = NULL; attribute *wattr = NULL; int final_attr; int eventclass; int pmode; #ifdef WIN32 pmode = _S_IWRITE | _S_IREAD; #else pmode = 0600; #endif if (objtype == RESC_RESV_OBJECT || objtype == RESV_JOB_OBJECT) { #ifndef PBS_MOM /* MOM knows not of resc_resv structs */ resc_resv *presv; presv = (resc_resv *)pobj; err_msg = "reservation_save"; err_msgl = "Link in reservation_save failed"; path = path_resvs; prefix = presv->ri_qs.ri_fileprefix; suffix = RESV_FILE_SUFFIX; cpsuffix = RESV_FILE_COPY; p_modified = &presv->ri_modified; pfixed = (void *)&presv->ri_qs; fixed_size = sizeof(struct resvfix); p_attr_def = resv_attr_def; wattr = presv->ri_wattr; final_attr = RESV_ATR_LAST; p_mtime = &presv->ri_wattr[RESV_ATR_mtime].at_val.at_long; p_oid = presv->ri_qs.ri_resvID; eventclass = PBS_EVENTCLASS_RESV; #else /* PBS_MOM only: Execution will never come here for MOM */ return (-1); #endif } else if (objtype == JOB_OBJECT) { job *pj = (job *)pobj; #ifndef PBS_MOM /*MOM knows not of resc_resv structs*/ if (pj->ji_resvp) { int rc = 0; if (updatetype == SAVEJOB_QUICK) rc = job_or_resv_save((void *)pj->ji_resvp, SAVERESV_QUICK, RESC_RESV_OBJECT); else if ((updatetype == SAVEJOB_FULL) || (updatetype == SAVEJOB_FULLFORCE) || (updatetype == SAVEJOB_NEW)) rc = job_or_resv_save((void *)pj->ji_resvp, SAVERESV_FULL, RESC_RESV_OBJECT); if (rc) return (rc); } #endif err_msg = "job_save"; err_msgl = "Link in job_save failed"; path = path_jobs; if (*pj->ji_qs.ji_fileprefix != '\0') prefix = pj->ji_qs.ji_fileprefix; else prefix = pj->ji_qs.ji_jobid; suffix = JOB_FILE_SUFFIX; cpsuffix = JOB_FILE_COPY; p_modified = &pj->ji_modified; pfixed = (void *)&pj->ji_qs; fixed_size = sizeof(struct jobfix); p_attr_def = job_attr_def; wattr = pj->ji_wattr; final_attr = JOB_ATR_LAST; p_mtime = &pj->ji_wattr[JOB_ATR_mtime].at_val.at_long; p_oid = pj->ji_qs.ji_jobid; eventclass = PBS_EVENTCLASS_JOB; return (job_save_fs(pj, updatetype)); } else { /*Don't expect to get here; incorrect object type*/ return (-1); } (void)strcpy(namebuf1, path); /* directory path */ (void)strcat(namebuf1, prefix); (void)strcpy(namebuf2, namebuf1); /* setup for later */ (void)strcat(namebuf1, suffix); /*if an attribute changed (modified==1) update mtime*/ if (*p_modified) { *p_mtime = time_now; } if (updatetype == SAVEJOB_QUICK || updatetype == SAVERESV_QUICK) { openflags = O_WRONLY; fds = open(namebuf1, openflags, pmode); if (fds < 0) { log_err(errno, err_msg, "error on open"); return (-1); } #ifdef WIN32 secure_file(namebuf1, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); setmode(fds, O_BINARY); #endif /* just write the "critical" base structure to the file */ while ((i = write(fds, (char *)pfixed, fixed_size)) != fixed_size) { if ((i < 0) && (errno == EINTR)) { /* retry the write */ if (lseek(fds, (off_t)0, SEEK_SET) < 0) { log_err(errno, err_msg, "lseek"); (void)close(fds); return (-1); } continue; } else { log_err(errno, err_msg, "quickwrite"); (void)close(fds); return (-1); } } (void)close(fds); } else { /* * write the whole structure to the file. * For a update, this is done to a new file to protect the * old against crashs. * The file is written in four parts: * (1) the job (resc_resv) structure, * (2) the attributes in "encoded" form, * (3) the attributes in the "external" form, and last * (4) the dependency list. */ (void)strcat(namebuf2, cpsuffix); openflags = O_CREAT | O_WRONLY; #ifdef WIN32 fix_perms2(namebuf2, namebuf1); #endif if (updatetype == SAVEJOB_NEW || updatetype == SAVERESV_NEW) filename = namebuf1; else filename = namebuf2; fds = open(filename, openflags, pmode); if (fds < 0) { log_err(errno, err_msg, "open for full save"); return (-1); } #ifdef WIN32 secure_file(filename, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); setmode(fds, O_BINARY); #endif redo = 0; /* try to save twice */ do { save_setup(fds); if (save_struct((char *)pfixed, fixed_size) != 0) { redo++; } else if (save_attr_fs(p_attr_def, wattr, final_attr) != 0) { redo++; } else if (save_flush() != 0) { redo++; } if (redo != 0) { if (lseek(fds, (off_t)0, SEEK_SET) < 0) { log_err(errno, err_msg, "full lseek"); redo++; } } } while (redo == 1); (void)close(fds); if (redo > 1) { if (updatetype == SAVEJOB_FULL || updatetype == SAVEJOB_FULLFORCE || updatetype == SAVERESV_FULL) (void)unlink(namebuf2); return (-1); } if (updatetype == SAVEJOB_FULL || updatetype == SAVEJOB_FULLFORCE || updatetype == SAVERESV_FULL) { #ifdef WIN32 if (MoveFileEx(namebuf2, namebuf1, MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH) == 0) { errno = GetLastError(); sprintf(log_buffer, "MoveFileEx(%s,%s) failed!", namebuf2, namebuf1); log_err(errno, err_msg, log_buffer); } secure_file(namebuf1, "Administrators", READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED); #else if (rename(namebuf2, namebuf1) == -1) { log_event(PBSEVENT_ERROR|PBSEVENT_SECURITY, eventclass, LOG_ERR, p_oid, err_msgl); } #endif } *p_modified = 0; } return (0); }
static void inc_new_length(unsigned int length, struct charset_header *header, FILE *file, char *charset, char *char1, char2_table char2, chars_table *chars) { long offset; int value, pos, i, j; char *buffer; int count; log_event("- Switching to length %d", length + 1); char1[0] = 0; if (length) memset(char2, 0, sizeof(*char2)); for (pos = 0; pos <= (int)length - 2; pos++) memset(chars[pos], 0, sizeof(**chars)); offset = (long)header->offsets[length][0] | ((long)header->offsets[length][1] << 8) | ((long)header->offsets[length][2] << 16) | ((long)header->offsets[length][3] << 24); if (fseek(file, offset, SEEK_SET)) pexit("fseek"); i = j = pos = -1; if ((value = getc(file)) != EOF) do { if (value != CHARSET_ESC) { switch (pos) { case -1: inc_format_error(charset); case 0: buffer = char1; break; case 1: if (j < 0) inc_format_error(charset); buffer = (*char2)[j]; break; default: if (i < 0 || j < 0) inc_format_error(charset); buffer = (*chars[pos - 2])[i][j]; } buffer[count = 0] = value; while ((value = getc(file)) != EOF) { buffer[++count] = value; if (value == CHARSET_ESC) break; if (count >= CHARSET_SIZE) inc_format_error(charset); } buffer[count] = 0; continue; } if ((value = getc(file)) == EOF) break; else if (value == CHARSET_NEW) { if ((value = getc(file)) != (int)length) break; if ((value = getc(file)) == EOF) break; if (value < 0 || value > (int)length) inc_format_error(charset); pos = value; } else if (value == CHARSET_LINE) { if (pos < 0) inc_format_error(charset); if ((value = getc(file)) == EOF) break; i = value; if (i < 0 || i > CHARSET_SIZE) inc_format_error(charset); if ((value = getc(file)) == EOF) break; j = value; if (j < 0 || j > CHARSET_SIZE) inc_format_error(charset); } else inc_format_error(charset); value = getc(file); } while (value != EOF); if (value == EOF) { if (ferror(file)) pexit("getc"); else inc_format_error(charset); } }
void handle_truncated_qstat( bool exec_only, bool condensed, batch_request *preq) { long sentJobCounter = 0; long qmaxreport; all_queues_iterator *queue_iter = NULL; pbs_queue *pque; char log_buf[LOCAL_LOG_BUF_SIZE]; job *pjob; svrattrl *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); batch_reply *preply = &preq->rq_reply; int bad = 0; svr_queues.lock(); queue_iter = svr_queues.get_iterator(); svr_queues.unlock(); /* loop through all queues */ while ((pque = next_queue(&svr_queues, queue_iter)) != NULL) { long qjcounter = 0; mutex_mgr queue_mutex(pque->qu_mutex, true); if ((exec_only == true) && (pque->qu_qs.qu_type != QTYPE_Execution)) { /* ignore routing queues */ continue; } if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) && (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0)) { qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long; } else { qmaxreport = TMAX_JOB; } if (LOGLEVEL >= 5) { snprintf(log_buf, sizeof(log_buf), "Reporting up to %ld idle jobs in queue %s\n", qmaxreport, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } /* loop through jobs in queue */ all_jobs_iterator *jobiter = NULL; pque->qu_jobs->lock(); jobiter = pque->qu_jobs->get_iterator(); pque->qu_jobs->unlock(); while ((pjob = next_job(pque->qu_jobs, jobiter)) != NULL) { mutex_mgr job_mgr(pjob->ji_mutex, true); if ((qjcounter >= qmaxreport) && (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)) { /* max_report of queued jobs reached for queue */ continue; } int rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, condensed, &bad); if ((rc != 0) && (rc != PBSE_PERM)) { req_reject(rc, bad, preq, NULL, NULL); delete queue_iter; return; } sentJobCounter++; if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED) qjcounter++; } /* END foreach (pjob from pque) */ if (LOGLEVEL >= 5) { snprintf(log_buf, sizeof(log_buf), "Reported %ld total jobs for queue %s\n", sentJobCounter, pque->qu_qs.qu_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf); } } /* END for (pque) */ reply_send_svr(preq); delete queue_iter; return; } // END handle_truncated_qstat()
void do_incremental_crack(struct db_main *db, char *mode) { char *charset; int min_length, max_length, max_count; char *extra; FILE *file; struct charset_header *header; unsigned int check; char allchars[CHARSET_SIZE + 1]; char char1[CHARSET_SIZE + 1]; char2_table char2; chars_table chars[CHARSET_LENGTH - 2]; unsigned char *ptr; unsigned int length, fixed, count; unsigned int real_count; int last_length, last_count; int pos; if (!mode) { if (db->format == &fmt_LM) mode = "LanMan"; else if (db->format == &fmt_NETLM) mode = "LanMan"; else if (db->format == &fmt_NETHALFLM) mode = "LanMan"; else mode = "All"; } log_event("Proceeding with \"incremental\" mode: %.100s", mode); if (!(charset = cfg_get_param(SECTION_INC, mode, "File"))) { log_event("! No charset defined"); fprintf(stderr, "No charset defined for mode: %s\n", mode); error(); } extra = cfg_get_param(SECTION_INC, mode, "Extra"); if ((min_length = cfg_get_int(SECTION_INC, mode, "MinLen")) < 0) min_length = 0; if ((max_length = cfg_get_int(SECTION_INC, mode, "MaxLen")) < 0) max_length = CHARSET_LENGTH; max_count = cfg_get_int(SECTION_INC, mode, "CharCount"); if (min_length > max_length) { log_event("! MinLen = %d exceeds MaxLen = %d", min_length, max_length); fprintf(stderr, "MinLen = %d exceeds MaxLen = %d\n", min_length, max_length); error(); } if (min_length > db->format->params.plaintext_length) { log_event("! MinLen = %d is too large for this hash type", min_length); fprintf(stderr, "MinLen = %d exceeds the maximum possible " "length for the current hash type (%d)\n", min_length, db->format->params.plaintext_length); error(); } if (max_length > db->format->params.plaintext_length) { log_event("! MaxLen = %d is too large for this hash type", max_length); fprintf(stderr, "Warning: " "MaxLen = %d is too large for the current hash type, " "reduced to %d\n", max_length, db->format->params.plaintext_length); max_length = db->format->params.plaintext_length; } if (max_length > CHARSET_LENGTH) { log_event("! MaxLen = %d exceeds the compile-time limit of %d", max_length, CHARSET_LENGTH); fprintf(stderr, "\n" "MaxLen = %d exceeds the compile-time limit of %d\n\n" "There are several good reasons why you probably don't " "need to raise it:\n" "- many hash types don't support passwords " "(or password halves) longer than\n" "7 or 8 characters;\n" "- you probably don't have sufficient statistical " "information to generate a\n" "charset file for lengths beyond 8;\n" "- the limitation applies to incremental mode only.\n", max_length, CHARSET_LENGTH); error(); } if (!(file = fopen(path_expand(charset), "rb"))) pexit("fopen: %s", path_expand(charset)); header = (struct charset_header *)mem_alloc(sizeof(*header)); charset_read_header(file, header); if (ferror(file)) pexit("fread"); if (feof(file) || (memcmp(header->version, CHARSET_V1, sizeof(header->version)) && memcmp(header->version, CHARSET_V2, sizeof(header->version))) || !header->count) inc_format_error(charset); if (header->min != CHARSET_MIN || header->max != CHARSET_MAX || header->length != CHARSET_LENGTH) { log_event("! Incompatible charset file: %.100s", charset); fprintf(stderr, "Incompatible charset file: %s\n", charset); error(); } if (header->count > CHARSET_SIZE) inc_format_error(charset); check = (unsigned int)header->check[0] | ((unsigned int)header->check[1] << 8) | ((unsigned int)header->check[2] << 16) | ((unsigned int)header->check[3] << 24); if (!rec_restoring_now) rec_check = check; if (rec_check != check) { log_event("! Charset file has changed: %.100s", charset); fprintf(stderr, "Charset file has changed: %s\n", charset); error(); } fread(allchars, header->count, 1, file); if (ferror(file)) pexit("fread"); if (feof(file)) inc_format_error(charset); allchars[header->count] = 0; if (expand(allchars, extra ? extra : "", sizeof(allchars))) inc_format_error(charset); real_count = strlen(allchars); if (max_count < 0) max_count = CHARSET_SIZE; if (min_length != max_length) log_event("- Lengths %d to %d, up to %d different characters", min_length, max_length, max_count); else log_event("- Length %d, up to %d different characters", min_length, max_count); if ((unsigned int)max_count > real_count) { log_event("! Only %u characters available", real_count); fprintf(stderr, "Warning: only %u characters available\n", real_count); } if (!(db->format->params.flags & FMT_CASE)) switch (is_mixedcase(allchars)) { case -1: inc_format_error(charset); case 1: log_event("! Mixed-case charset, " "but the hash type is case-insensitive"); fprintf(stderr, "Warning: mixed-case charset, " "but the current hash type is case-insensitive;\n" "some candidate passwords may be unnecessarily " "tried more than once.\n"); } if (header->length >= 2) char2 = (char2_table)mem_alloc(sizeof(*char2)); else char2 = NULL; for (pos = 0; pos < (int)header->length - 2; pos++) chars[pos] = (chars_table)mem_alloc(sizeof(*chars[0])); rec_compat = 0; rec_entry = 0; memset(rec_numbers, 0, sizeof(rec_numbers)); status_init(NULL, 0); rec_restore_mode(restore_state); #ifdef WEBAPI if(packet_id) { int ret; // This is a new packet inc_rec_state.initialized = 0; inc_rec_state.words_requested = packet_rounds; inc_rec_state.words_generated = 0; inc_rec_state.cc_0 = -1; inc_rec_state.cc_1 = -1; inc_rec_state.cc_2 = -1; ret = sscanf(packet_state, "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%127[^\n]", &rec_entry, &rec_numbers[0], &rec_numbers[1], &rec_numbers[2], &rec_numbers[3], &rec_numbers[4], &rec_numbers[5], &rec_numbers[6], &rec_numbers[7], &inc_rec_state.pos, &inc_rec_state.numbers_cache, &inc_rec_state.cc_0, &inc_rec_state.cc_1, &inc_rec_state.cc_2, inc_rec_state.key_i); if(ret < 14 || ret > 15) { log_event("Invalid packet state, found %d fields in %s", ret, packet_state); // XXX - Handle more gracefully.. error(); } status_init(webapi_inc_get_progress, 0); } #endif rec_init(db, save_state); ptr = header->order + (entry = rec_entry) * 3; memcpy(numbers, rec_numbers, sizeof(numbers)); crk_init(db, fix_state, NULL); last_count = last_length = -1; entry--; while (ptr < &header->order[sizeof(header->order) - 1]) { entry++; length = *ptr++; fixed = *ptr++; count = *ptr++; if (length >= CHARSET_LENGTH || fixed > length || count >= CHARSET_SIZE) inc_format_error(charset); if (entry != rec_entry) memset(numbers, 0, sizeof(numbers)); if (count >= real_count || (fixed && !count)) continue; if ((int)length + 1 < min_length || (int)length >= max_length || (int)count >= max_count) continue; if ((int)length != last_length) { inc_new_length(last_length = length, header, file, charset, char1, char2, chars); last_count = -1; } if ((int)count > last_count) inc_new_count(length, last_count = count, charset, allchars, char1, char2, chars); if (!length && !min_length) { min_length = 1; if (crk_process_key("")) break; } #if 0 log_event("- Trying length %d, fixed @%d, character count %d", length + 1, fixed + 1, count + 1); #endif if (inc_key_loop(length, fixed, count, char1, char2, chars)) break; } crk_done(); rec_done(event_abort); for (pos = 0; pos < (int)header->length - 2; pos++) MEM_FREE(chars[pos]); MEM_FREE(char2); MEM_FREE(header); fclose(file); }
int stat_to_mom( const char *job_id, struct stat_cntl *cntl) /* M */ { struct batch_request *newrq; int rc = PBSE_NONE; unsigned long addr; char log_buf[LOCAL_LOG_BUF_SIZE+1]; struct pbsnode *node; int handle = -1; unsigned long job_momaddr = -1; unsigned short job_momport = -1; char *job_momname = NULL; job *pjob = NULL; if ((pjob = svr_find_job(job_id, FALSE)) == NULL) return(PBSE_JOBNOTFOUND); mutex_mgr job_mutex(pjob->ji_mutex, true); if ((pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) || (!pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str)) { job_mutex.unlock(); snprintf(log_buf, sizeof(log_buf), "Job %s missing MOM's information. Skipping statting on this job", pjob->ji_qs.ji_jobid); log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); return PBSE_BAD_PARAMETER; } job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr; job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport; job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str); job_mutex.unlock(); if (job_momname == NULL) return PBSE_MEM_MALLOC; if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL) { free(job_momname); return PBSE_MEM_MALLOC; } if (cntl->sc_type == 1) snprintf(newrq->rq_ind.rq_status.rq_id, sizeof(newrq->rq_ind.rq_status.rq_id), "%s", job_id); else newrq->rq_ind.rq_status.rq_id[0] = '\0'; /* get stat of all */ CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr); /* if MOM is down just return stale information */ addr = job_momaddr; node = tfind_addr(addr,job_momport,job_momname); free(job_momname); if (node == NULL) return PBSE_UNKNODE; if ((node->nd_state & INUSE_NOT_READY)||(node->nd_power_state != POWER_STATE_RUNNING)) { if (LOGLEVEL >= 6) { snprintf(log_buf, sizeof(log_buf), "node '%s' is allocated to job but in state 'down'", node->get_name()); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf); } node->unlock_node(__func__, "no rely mom", LOGLEVEL); free_br(newrq); return PBSE_NORELYMOM; } /* get connection to MOM */ node->unlock_node(__func__, "before svr_connect", LOGLEVEL); handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL); if (handle >= 0) { if ((rc = issue_Drequest(handle, newrq, true)) == PBSE_NONE) { stat_update(newrq, cntl); } } else rc = PBSE_CONNECT; if (rc == PBSE_SYSTEM) rc = PBSE_MEM_MALLOC; free_br(newrq); return(rc); } /* END stat_to_mom() */
static void inc_format_error(char *charset) { log_event("! Incorrect charset file format: %.100s", charset); fprintf(stderr, "Incorrect charset file format: %s\n", charset); error(); }
/** * @brief * Function to migrate filesystem data to database. * Reads serverdb, scheddb, job files, node, nodestate, queue, resv information * from the filesystem and save them into the database. All the information is * recovered and saved into the database under a single database transaction, * so any failure rolls back all the updates to the database. If all the updates * to the database succeed, only then the respective files are deleted from the * filesystem, else no deletion takes place. * * @return Error code * @retval 0 : success * @retval -1 : Failure * */ int svr_migrate_data_from_fs(void) { int baselen; struct dirent *pdirent; DIR *dir; int had; char *job_suffix = JOB_FILE_SUFFIX; int job_suf_len = strlen(job_suffix); job *pjob = NULL; pbs_queue *pque; resc_resv *presv; char *psuffix; int rc; int recovered = 0; char basen[MAXPATHLEN+1]; char scrfile[MAXPATHLEN+1]; char jobfile[MAXPATHLEN+1]; char origdir[MAXPATHLEN+1]; int fd; struct stat stbuf; char *scrbuf = NULL; pbs_db_jobscr_info_t jobscr; pbs_db_obj_info_t obj; path_svrdb_new = build_path(path_priv, PBS_SERVERDB, new_tag); path_scheddb = build_path(path_priv, PBS_SCHEDDB, NULL); path_scheddb_new = build_path(path_priv, PBS_SCHEDDB, new_tag); path_queues = build_path(path_priv, PBS_QUEDIR, suffix_slash); path_resvs = build_path(path_priv, PBS_RESVDIR, suffix_slash); path_nodes = build_path(path_priv, NODE_DESCRIP, NULL); path_nodestate = build_path(path_priv, NODE_STATUS, NULL); /* If not a "create" initialization, recover server db */ /* and sched db */ if (chk_save_file(path_svrdb) != 0) { fprintf(stderr, "No serverdb found to update to datastore\n"); return (0); } if (setup_resc(1) == -1) { fprintf(stderr, "%s\n", log_buffer); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } init_server_attrs(); /* start a database transation for the whole recovery */ if (pbs_db_begin_trx(svr_db_conn, 0, 0) != 0) return (-1); /* preprocess the nodes file to convert old properties to resources */ if (setup_nodes_fs(1) == -1) { fprintf(stderr, "%s\n", log_buffer); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } /* Open the server database (save file) and read it in */ if (svr_recov_fs(path_svrdb) == -1) { fprintf(stderr, "%s\n", msg_init_baddb); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } /* save server information to database now */ if (svr_save_db(&server, SVR_SAVE_NEW) != 0) { fprintf(stderr, "Could not save server db\n"); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } /* now do sched db */ if (sched_recov_fs(path_scheddb) == -1) { fprintf(stderr, "Unable to recover scheddb\n"); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } if (sched_save_db(dflt_scheduler, SVR_SAVE_NEW) != 0) { fprintf(stderr, "Could not save scheduler db\n"); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } set_sched_default(dflt_scheduler, 0); /* save current working dir before any chdirs */ if (getcwd(origdir, MAXPATHLEN) == NULL) { fprintf(stderr, "getcwd failed\n"); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); return (-1); } if (chdir(path_queues) != 0) { fprintf(stderr, msg_init_chdir, path_queues); fprintf(stderr, "\n"); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); chdir(origdir); return (-1); } had = server.sv_qs.sv_numque; server.sv_qs.sv_numque = 0; dir = opendir("."); if (dir == NULL) { fprintf(stderr, "%s\n", msg_init_noqueues); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); chdir(origdir); return (-1); } while (errno = 0, (pdirent = readdir(dir)) != NULL) { if (chk_save_file(pdirent->d_name) == 0) { if ((pque = que_recov_fs(pdirent->d_name)) != NULL) { /* que_recov increments sv_numque */ fprintf(stderr, msg_init_recovque, pque->qu_qs.qu_name); fprintf(stderr, "\n"); if (que_save_db(pque, QUE_SAVE_NEW) != 0) { fprintf(stderr, "Could not save queue info for queue %s\n", pque->qu_qs.qu_name); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); (void) closedir(dir); chdir(origdir); return (-1); } } } } if (errno != 0 && errno != ENOENT) { fprintf(stderr, "%s\n", msg_init_noqueues); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); (void) closedir(dir); chdir(origdir); return (-1); } (void) closedir(dir); if (had != server.sv_qs.sv_numque) { fprintf(stderr, msg_init_expctq, had, server.sv_qs.sv_numque); fprintf(stderr, "\n"); } /* Open and read in node list if one exists */ if (setup_nodes_fs(0) == -1) { fprintf(stderr, "%s\n", log_buffer); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); chdir(origdir); return (-1); } /* * Recover reservations. */ if (chdir(path_resvs) != 0) { fprintf(stderr, msg_init_chdir, path_resvs); fprintf(stderr, "\n"); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); chdir(origdir); return (-1); } dir = opendir("."); if (dir == NULL) { fprintf(stderr, "%s\n", msg_init_noresvs); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); chdir(origdir); return (-1); } while (errno = 0, (pdirent = readdir(dir)) != NULL) { if (chk_save_file(pdirent->d_name) == 0) { presv = (resc_resv *) job_or_resv_recov_fs(pdirent->d_name, RESC_RESV_OBJECT); if (presv != NULL) { if (resv_save_db(presv, SAVERESV_NEW) != 0) { fprintf(stderr, "Could not save resv info for resv %s\n", presv->ri_qs.ri_resvID); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); (void) closedir(dir); chdir(origdir); return (-1); } } } } if (errno != 0 && errno != ENOENT) { fprintf(stderr, "%s\n", msg_init_noresvs); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); (void) closedir(dir); chdir(origdir); return (-1); } (void) closedir(dir); /* * Recover jobs */ if (chdir(path_jobs) != 0) { fprintf(stderr, msg_init_chdir, path_jobs); fprintf(stderr, "\n"); chdir(origdir); return (-1); } server.sv_qs.sv_numjobs = 0; recovered = 0; dir = opendir("."); if (dir == NULL) { fprintf(stderr, "%s\n", msg_init_nojobs); } else { /* Now, for each job found ... */ while (errno = 0, (pdirent = readdir(dir)) != NULL) { if (chk_save_file(pdirent->d_name) != 0) continue; /* recover the job */ baselen = strlen(pdirent->d_name) - job_suf_len; psuffix = pdirent->d_name + baselen; if (strcmp(psuffix, job_suffix)) continue; if ((pjob = job_recov_fs(pdirent->d_name)) == NULL) { (void)strcpy(basen, pdirent->d_name); psuffix = basen + baselen; (void)strcpy(psuffix, JOB_BAD_SUFFIX); (void)snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s", basen); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, LOG_NOTICE, msg_daemonname, log_buffer); continue; } if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { /* load the job script file */ strcpy(scrfile, path_jobs); #ifndef WIN32 /* under WIN32, there's already a prefixed '/' */ (void) strcat(scrfile, "/"); #endif strcat(scrfile, pdirent->d_name); baselen = strlen(scrfile) - strlen(JOB_FILE_SUFFIX); scrfile[baselen] = 0; /* put null char */ strcat(scrfile, JOB_SCRIPT_SUFFIX); rc = 1; #ifdef WIN32 if ((fd = open(scrfile, O_BINARY | O_RDONLY)) != -1) #else if ((fd = open(scrfile, O_RDONLY)) != -1) #endif { /* load the script */ if (fstat(fd, &stbuf) == 0) { if ((scrbuf = malloc(stbuf.st_size + 1))) { if (read(fd, scrbuf, stbuf.st_size) == stbuf.st_size) { scrbuf[stbuf.st_size] = '\0'; /* null character */ rc = 0; /* success loading */ } } } close(fd); } if (rc != 0) { fprintf(stderr, "Could not recover script file for job %s\n", pjob->ji_qs.ji_jobid); (void) strcpy(basen, scrfile); psuffix = basen + strlen(scrfile) - strlen(JOB_SCRIPT_SUFFIX); (void) strcpy(psuffix, JOB_BAD_SUFFIX); (void) strcpy(jobfile, scrfile); psuffix = jobfile + strlen(jobfile) - strlen(JOB_SCRIPT_SUFFIX); (void) strcpy(psuffix, JOB_FILE_SUFFIX); #ifdef WIN32 if (MoveFileEx(jobfile, basen, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == 0) { errno = GetLastError(); snprintf(log_buffer, sizeof(log_buffer), "MoveFileEx(%s, %s) failed!", jobfile, basen); log_err(errno, "script", log_buffer); } secure_file(basen, "Administrators", READS_MASK | WRITES_MASK | STANDARD_RIGHTS_REQUIRED); #else if (rename(jobfile, basen) == -1) { snprintf(log_buffer, sizeof(log_buffer), "error renaming job file %s", jobfile); log_err(errno, "job_recov", log_buffer); } #endif (void) snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s", basen); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, LOG_NOTICE, msg_daemonname, log_buffer); free(scrbuf); scrbuf = NULL; continue; } } /* now save job first */ if (job_save_db(pjob, SAVEJOB_NEW) != 0) { fprintf(stderr, "Could not save job info for jobid %s\n", pjob->ji_qs.ji_jobid); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); (void) closedir(dir); chdir(origdir); free(scrbuf); return (-1); } if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { /* save job script */ strcpy(jobscr.ji_jobid, pjob->ji_qs.ji_jobid); jobscr.script = scrbuf; obj.pbs_db_obj_type = PBS_DB_JOBSCR; obj.pbs_db_un.pbs_db_jobscr = &jobscr; if (pbs_db_save_obj(svr_db_conn, &obj, PBS_INSERT_DB) != 0) { fprintf(stderr, "Could not save job script for jobid %s\n", pjob->ji_qs.ji_jobid); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); free(scrbuf); (void) closedir(dir); chdir(origdir); return (-1); } free(scrbuf); scrbuf = NULL; } recovered++; } if (errno != 0 && errno != ENOENT) { if (pjob) fprintf(stderr, "readdir error for jobid %s\n", pjob->ji_qs.ji_jobid); else fprintf(stderr, "readdir error\n"); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); free(scrbuf); (void) closedir(dir); chdir(origdir); return (-1); } (void) closedir(dir); fprintf(stderr, msg_init_exptjobs, recovered); fprintf(stderr, "\n"); } if (save_nodes_db(0, NULL) != 0) { fprintf(stderr, "Could not save nodes\n"); if (svr_db_conn->conn_db_err) fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err); (void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK); chdir(origdir); return (-1); } if (pbs_db_end_trx(svr_db_conn, PBS_DB_COMMIT) == 0) { rm_migrated_files(path_priv); chdir(origdir); return (0); } chdir(origdir); return -1; }
int req_rerunjob( struct batch_request *preq) { int rc = PBSE_NONE; job *pjob; int Force; int MgrRequired = TRUE; char log_buf[LOCAL_LOG_BUF_SIZE]; /* check if requestor is admin, job owner, etc */ if ((pjob = chk_job_request(preq->rq_ind.rq_rerun, preq)) == 0) { /* FAILURE */ /* chk_job_request calls req_reject() */ rc = PBSE_SYSTEM; return rc; /* This needs to fixed to return an accurate error */ } /* the job must be running or completed */ if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING) { if (pjob->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET) { /* allow end-users to rerun checkpointed jobs */ MgrRequired = FALSE; } } else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* job is running */ /* NO-OP */ } else { /* FAILURE - job is in bad state */ rc = PBSE_BADSTATE; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "job %s is in a bad state", preq->rq_ind.rq_rerun); req_reject(rc, 0, preq, NULL, log_buf); unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); return rc; } if ((MgrRequired == TRUE) && ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0)) { /* FAILURE */ rc = PBSE_PERM; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "additional permissions required (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)"); req_reject(rc, 0, preq, NULL, log_buf); unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); return rc; } /* the job must be rerunnable */ if (pjob->ji_wattr[JOB_ATR_rerunable].at_val.at_long == 0) { /* NOTE: should force override this constraint? maybe (???) */ /* no, the user is saying that the job will break, and IEEE Std 1003.1 specifically says rerun is to be rejected if rerunable==FALSE -garrick */ rc = PBSE_NORERUN; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "job %s not rerunnable", preq->rq_ind.rq_rerun); req_reject(rc, 0, preq, NULL, log_buf); unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL); return rc; } if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* ask MOM to kill off the job if it is running */ static const char *rerun = "rerun"; char *extra = strdup(rerun); rc = issue_signal(&pjob, "SIGKILL", post_rerun, extra); } else { if (pjob->ji_wattr[JOB_ATR_hold].at_val.at_long == HOLD_n) { svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED, FALSE); } else { svr_setjobstate(pjob, JOB_STATE_HELD, JOB_SUBSTATE_HELD, FALSE); } /* reset some job attributes */ pjob->ji_wattr[JOB_ATR_comp_time].at_flags &= ~ATR_VFLAG_SET; pjob->ji_wattr[JOB_ATR_reported].at_flags &= ~ATR_VFLAG_SET; set_statechar(pjob); rc = -1; } if (preq->rq_extend && !strncasecmp(preq->rq_extend, RERUNFORCE, strlen(RERUNFORCE))) Force = 1; else Force = 0; switch (rc) { case - 1: /* completed job was requeued */ /* clear out job completion time if there is one */ break; case 0: /* requeue request successful */ if (pjob != NULL) pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN; break; case PBSE_SYSTEM: /* This may not be accurate...*/ rc = PBSE_MEM_MALLOC; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Can not allocate memory"); req_reject(rc, 0, preq, NULL, log_buf); return rc; break; default: if (Force == 0) { rc = PBSE_MOMREJECT; snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Rejected by mom"); req_reject(rc, 0, preq, NULL, log_buf); if (pjob != NULL) unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL); return rc; } else { int newstate; int newsubst; unsigned int dummy; char *tmp; long cray_enabled = FALSE; if (pjob != NULL) { get_svr_attr_l(SRV_ATR_CrayEnabled, &cray_enabled); if ((cray_enabled == TRUE) && (pjob->ji_wattr[JOB_ATR_login_node_id].at_val.at_str != NULL)) tmp = parse_servername(pjob->ji_wattr[JOB_ATR_login_node_id].at_val.at_str, &dummy); else tmp = parse_servername(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, &dummy); /* Cannot communicate with MOM, forcibly requeue job. This is a relatively disgusting thing to do */ sprintf(log_buf, "rerun req to %s failed (rc=%d), forcibly requeueing job", tmp, rc); free(tmp); log_event( PBSEVENT_ERROR | PBSEVENT_ADMIN | PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); log_err(-1, __func__, log_buf); strcat(log_buf, ", previous output files may be lost"); svr_mailowner(pjob, MAIL_OTHER, MAIL_FORCE, log_buf); svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_RERUN3, FALSE); rel_resc(pjob); /* free resc assigned to job */ if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HOTSTART) == 0) { /* in case of server shutdown, don't clear exec_host */ /* will use it on hotstart when next comes up */ job_attr_def[JOB_ATR_exec_host].at_free(&pjob->ji_wattr[JOB_ATR_exec_host]); job_attr_def[JOB_ATR_session_id].at_free(&pjob->ji_wattr[JOB_ATR_session_id]); job_attr_def[JOB_ATR_exec_gpus].at_free(&pjob->ji_wattr[JOB_ATR_exec_gpus]); } pjob->ji_modified = 1; /* force full job save */ pjob->ji_momhandle = -1; pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_StagedIn; svr_evaljobstate(pjob, &newstate, &newsubst, 0); svr_setjobstate(pjob, newstate, newsubst, FALSE); } } break; } /* END switch (rc) */ /* So job has run and is to be rerun (not restarted) */ if (pjob == NULL) { rc = PBSE_JOB_RERUN; } else { pjob->ji_qs.ji_svrflags = (pjob->ji_qs.ji_svrflags & ~(JOB_SVFLG_CHECKPOINT_FILE |JOB_SVFLG_CHECKPOINT_MIGRATEABLE | JOB_SVFLG_CHECKPOINT_COPIED)) | JOB_SVFLG_HASRUN; sprintf(log_buf, msg_manager, msg_jobrerun, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); reply_ack(preq); /* note in accounting file */ account_record(PBS_ACCT_RERUN, pjob, NULL); unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL); } return rc; } /* END req_rerunjob() */
/* Note, in extremely high load cases, the alloc value in /proc/net/sockstat can exceed the max value. This will substantially slow down throughput and generate connection failures (accept gets a EMFILE error). As the client is designed to run on each submit host, that issue shouldn't occur. The client must be restarted to clear out this issue. */ int start_listener( const char *server_ip, int server_port, void *(*process_meth)(void *)) { struct sockaddr_in adr_svr; struct sockaddr_in adr_client; int rc = PBSE_NONE; int sockoptval = 1; int len_inet = sizeof(struct sockaddr_in); int *new_conn_port = NULL; int listen_socket = 0; int total_cntr = 0; pthread_t tid; pthread_attr_t t_attr; int objclass = 0; char msg_started[1024]; memset(&adr_svr, 0, sizeof(adr_svr)); adr_svr.sin_family = AF_INET; if (!(adr_svr.sin_port = htons(server_port))) { } else if ((adr_svr.sin_addr.s_addr = inet_addr(server_ip)) == INADDR_NONE) { rc = PBSE_SOCKET_FAULT; } else if ((listen_socket = socket_get_tcp()) < 0) { /* Can not get socket for listening */ rc = PBSE_SOCKET_FAULT; } else if (bind(listen_socket, (struct sockaddr *)&adr_svr, sizeof(struct sockaddr_in)) == -1) { /* Can not bind local socket */ rc = PBSE_SOCKET_FAULT; } else if (setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (void *)&sockoptval, sizeof(sockoptval)) == -1) { rc = PBSE_SOCKET_FAULT; } else if (listen(listen_socket, 128) == -1) { /* Can not listener on local socket */ rc = PBSE_SOCKET_LISTEN; } else if ((rc = pthread_attr_init(&t_attr)) != 0) { /* Can not init thread attribute structure */ rc = PBSE_THREADATTR; } else if ((rc = pthread_attr_setdetachstate(&t_attr, PTHREAD_CREATE_DETACHED)) != 0) { /* Can not set thread initial state as detached */ pthread_attr_destroy(&t_attr); } else { log_get_set_eventclass(&objclass, GETV); if (objclass == PBS_EVENTCLASS_TRQAUTHD) { snprintf(msg_started, sizeof(msg_started), "TORQUE authd daemon started and listening on IP:port %s:%d", server_ip, server_port); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_TRQAUTHD, msg_daemonname, msg_started); } while (1) { if ((new_conn_port = (int *)calloc(1, sizeof(int))) == NULL) { printf("Error allocating new connection handle on accept.\n"); break; } if ((*new_conn_port = accept(listen_socket, (struct sockaddr *)&adr_client, (socklen_t *)&len_inet)) == -1) { if (errno == EMFILE) { sleep(1); printf("Temporary pause\n"); } else { printf("error in accept %s\n", strerror(errno)); break; } errno = 0; free(new_conn_port); new_conn_port = NULL; } else { if (debug_mode == TRUE) { process_meth((void *)new_conn_port); } else { pthread_create(&tid, &t_attr, process_meth, (void *)new_conn_port); } } if (debug_mode == TRUE) { if (total_cntr % 1000 == 0) { printf("Total requests: %d\n", total_cntr); } total_cntr++; } } if (new_conn_port != NULL) { free(new_conn_port); } pthread_attr_destroy(&t_attr); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, "net_srvr", "Socket close of network listener requested"); } if (listen_socket != -1) close(listen_socket); return(rc); } /* END start_listener() */
void req_movejob( struct batch_request *req) { #ifndef NDEBUG char *id = "req_movejob"; #endif job *jobp; jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req); if (jobp == NULL) { return; } if ((jobp->ji_qs.ji_state != JOB_STATE_QUEUED) && (jobp->ji_qs.ji_state != JOB_STATE_HELD) && (jobp->ji_qs.ji_state != JOB_STATE_WAITING)) { #ifndef NDEBUG sprintf(log_buffer, "%s %d", pbse_to_txt(PBSE_BADSTATE), jobp->ji_qs.ji_state); strcat(log_buffer, id); log_event( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, log_buffer); #endif /* NDEBUG */ req_reject(PBSE_BADSTATE, 0, req, NULL, NULL); return; } /* * svr_movejob() does the real work, handles both local and * network moves */ switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, req)) { case ROUTE_SUCCESS: /* success */ strcpy(log_buffer, msg_movejob); sprintf(log_buffer + strlen(log_buffer), msg_manager, req->rq_ind.rq_move.rq_destin, req->rq_user, req->rq_host); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, log_buffer); reply_ack(req); break; case ROUTE_PERM_FAILURE: case ROUTE_RETRY: /* fail */ /* NOTE: can pass detailed response to requestor (NYI) */ req_reject(pbs_errno, 0, req, NULL, NULL); break; case ROUTE_DEFERRED: /* deferred, will be handled by */ /* post_movejob() when the child completes */ /* NO-OP */ break; } /* END switch (svr_movejob(jobp,req->rq_ind.rq_move.rq_destin,req)) */ return; } /* END req_movejob() */