Ejemplo n.º 1
0
void req_orderjob(

  struct batch_request *req)  /* I */

  {
#ifndef NDEBUG
  char *id = "req_orderjob";
#endif
  job *pjob;
  job *pjob1;
  job *pjob2;
  int  rank;
  int  rc;
  char  tmpqn[PBS_MAXQUEUENAME+1];

  if ((pjob1 = chk_job_request(req->rq_ind.rq_move.rq_jid, req)) == NULL)
    {
    return;
    }

  if ((pjob2 = chk_job_request(req->rq_ind.rq_move.rq_destin, req)) == NULL)
    {
    return;
    }

  if (((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_RUNNING) ||
      ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_RUNNING))
    {
#ifndef NDEBUG
    sprintf(log_buffer, "%s %d",
            pbse_to_txt(PBSE_BADSTATE),
            pjob->ji_qs.ji_state);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);
#endif /* NDEBUG */

    req_reject(PBSE_BADSTATE, 0, req, NULL, NULL);

    return;
    }
  else if (pjob1->ji_qhdr != pjob2->ji_qhdr)
    {
    /* jobs are in different queues */

    if ((rc = svr_chkque(
                pjob1,
                pjob2->ji_qhdr,
                get_variable(pjob1, pbs_o_host),
                MOVE_TYPE_Order,
                NULL)) ||
        (rc = svr_chkque(
                pjob2,
                pjob1->ji_qhdr,
                get_variable(pjob2, pbs_o_host),
                MOVE_TYPE_Order,
                NULL)))
      {
      req_reject(rc, 0, req, NULL, NULL);

      return;
      }
    }

  /* now swap the order of the two jobs in the queue lists */

  rank = pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long;

  pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long =
    pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long;

  pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = rank;

  if (pjob1->ji_qhdr != pjob2->ji_qhdr)
    {
    (void)strcpy(tmpqn, pjob1->ji_qs.ji_queue);
    (void)strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue);
    (void)strcpy(pjob2->ji_qs.ji_queue, tmpqn);
    svr_dequejob(pjob1);
    svr_dequejob(pjob2);
    (void)svr_enquejob(pjob1);
    (void)svr_enquejob(pjob2);

    }
  else
    {
    swap_link(&pjob1->ji_jobque,  &pjob2->ji_jobque);
    swap_link(&pjob1->ji_alljobs, &pjob2->ji_alljobs);
    }

  /* need to update disk copy of both jobs to save new order */

  job_save(pjob1, SAVEJOB_FULL);

  job_save(pjob2, SAVEJOB_FULL);

  reply_ack(req);

  /* SUCCESS */

  return;
  }  /* END req_orderjob() */
Ejemplo n.º 2
0
void stat_update(
    
  struct batch_request *preq,
  struct stat_cntl     *cntl)

  {
  job                  *pjob;
  struct batch_reply   *preply;
  struct brp_status    *pstatus;
  svrattrl             *sattrl;
  int                   oldsid;
  int                   bad = 0;
  time_t                time_now = time(NULL);
  char                 *msg_ptr = NULL;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  preply = &preq->rq_reply;

  if ((preply->brp_choice != BATCH_REPLY_CHOICE_Queue) &&
      (preply->brp_un.brp_txt.brp_str != NULL))
    {
    msg_ptr = strstr(preply->brp_un.brp_txt.brp_str, PBS_MSG_EQUAL);
  
    if (msg_ptr != NULL)
      msg_ptr += strlen(PBS_MSG_EQUAL);
    }

  if (preply->brp_choice == BATCH_REPLY_CHOICE_Status)
    {
    pstatus = (struct brp_status *)GET_NEXT(preply->brp_un.brp_status);

    while (pstatus != NULL)
      {
      if ((pjob = svr_find_job(pstatus->brp_objname, FALSE)) != NULL)
        {
        mutex_mgr job_mutex(pjob->ji_mutex, true);

        sattrl = (svrattrl *)GET_NEXT(pstatus->brp_attr);

        oldsid = pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long;

        modify_job_attr(
          pjob,
          sattrl,
          ATR_DFLAG_MGWR | ATR_DFLAG_SvWR,
          &bad);

        if (oldsid != pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long)
          {
          /* first save since running job (or the sid has changed), */
          /* must save session id    */

          job_save(pjob, SAVEJOB_FULL, 0);
          }

#ifdef USESAVEDRESOURCES
        else
          {
          /* save so we can recover resources used */
          job_save(pjob, SAVEJOB_FULL, 0);
          }
#endif    /* USESAVEDRESOURCES */

        pjob->ji_momstat = time_now;
        }

      pstatus = (struct brp_status *)GET_NEXT(pstatus->brp_stlink);
      }  /* END while (pstatus != NULL) */
    }    /* END if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) */
  else if ((preply->brp_choice == BATCH_REPLY_CHOICE_Text) &&
           (preply->brp_code == PBSE_UNKJOBID) &&
           (msg_ptr != NULL) &&
           (!strcmp(msg_ptr,  preq->rq_ind.rq_status.rq_id)))
    {
    /* we sent a stat request, but mom says it doesn't know anything about
       the job */
    if ((pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE)) != NULL)
      {
      /* job really isn't running any more - mom doesn't know anything about it
         this can happen if a diskless node reboots and the mom_priv/jobs
         directory is cleared, set its state to queued so job_abt doesn't
         think it is still running */
      mutex_mgr job_mutex(pjob->ji_mutex, true);
      
      snprintf(log_buf, sizeof(log_buf),
        "mother superior no longer recognizes %s as a valid job, aborting. Last reported time was %ld",
        preq->rq_ind.rq_status.rq_id, pjob->ji_last_reported_time);
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
      
      svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE);
      rel_resc(pjob);
      job_mutex.set_unlock_on_exit(false);
      job_abt(&pjob, "Job does not exist on node");

      /* TODO, if the job is rerunnable we should set its state back to queued */
      }
    }
  else
    {
    if (preply->brp_choice == BATCH_REPLY_CHOICE_Queue)
      {
      snprintf(log_buf, sizeof(log_buf), "Unexpected reply: reply was on queue");
      log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
      }
    else
      {
      snprintf(log_buf, sizeof(log_buf),
        "Poll job request failed for job %s", preq->rq_ind.rq_status.rq_id);
      log_err(preply->brp_code, __func__, log_buf);
      }
    }
  
  cntl->sc_conn = -1;

  if (cntl->sc_post)
    cntl->sc_post(cntl); /* continue where we left off */

  return;
  }  /* END stat_update() */
Ejemplo n.º 3
0
/* start_domainsocket_listener
 * Starts a listen thread on a UNIX domain socket connection
 */
int start_domainsocket_listener(
    
  const char   *socket_name,
  void          *(*process_meth)(void *))

  {
  struct sockaddr_un addr;
  int rc = PBSE_NONE;
  char log_buf[LOCAL_LOG_BUF_SIZE];
  int                *new_conn_port = NULL;
  int                 listen_socket = 0;
  int                 total_cntr = 0;
  pthread_t           tid;
  pthread_attr_t      t_attr;
  int objclass = 0;
  char authd_host_port[1024];

  memset(&addr, 0, sizeof(addr));
  addr.sun_family = AF_UNIX;
  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path)-1);

  /* socket_name is a file in the file system. It must be gone 
     before we can bind to it. Unlink it */
  unlink(socket_name);


  if ( (listen_socket = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
    {
    snprintf(log_buf, sizeof(log_buf), "socket failed: %d", errno);
    log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf);
    rc = PBSE_SOCKET_FAULT;
    }
  else if ( bind(listen_socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
    {
    snprintf(log_buf, sizeof(log_buf), "failed to bind socket %s: %d", socket_name, errno);
    log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf);
    rc = PBSE_SOCKET_FAULT;
    }
  else if (chmod(socket_name,  S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) < 0)
    {
    snprintf(log_buf, sizeof(log_buf), "failed to change file permissions on  %s: %d", socket_name, errno);
    log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf);
    rc = PBSE_SOCKET_FAULT;
    }
  else if ( listen(listen_socket, 64) < 0)
    {
    snprintf(log_buf, sizeof(log_buf), "listen failed %s: %d", socket_name, errno);
    log_event(PBSEVENT_ADMIN | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, __func__, log_buf);
    rc = PBSE_SOCKET_LISTEN;
    }
   else if ((rc = pthread_attr_init(&t_attr)) != 0)
    {
    /* Can not init thread attribute structure */
    rc = PBSE_THREADATTR;
    }
  else if ((rc = pthread_attr_setdetachstate(&t_attr, PTHREAD_CREATE_DETACHED)) != 0)
    {
    /* Can not set thread initial state as detached */
    pthread_attr_destroy(&t_attr);
    }
  else
    {
    log_get_set_eventclass(&objclass, GETV);
    if (objclass == PBS_EVENTCLASS_TRQAUTHD)
      {
      log_get_host_port(authd_host_port, sizeof(authd_host_port));
      if (authd_host_port[0])
        snprintf(log_buf, sizeof(log_buf),
          "TORQUE authd daemon started and listening on %s (unix socket %s)", 
            authd_host_port, socket_name);
      else
      snprintf(log_buf, sizeof(log_buf),
        "TORQUE authd daemon started and listening unix socket %s", socket_name);
      log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_TRQAUTHD,
        msg_daemonname, log_buf);
      }
    while (1)
      {
      if((new_conn_port = (int *)calloc(1, sizeof(int))) == NULL)
        {
        printf("Error allocating new connection handle on accept.\n");
        break;
        }
      if ((*new_conn_port = accept(listen_socket, NULL, NULL)) == -1)
        {
        if (errno == EMFILE)
          {
          sleep(1);
          printf("Temporary pause\n");
          }
        else
          {
          printf("error in accept %s\n", strerror(errno));
          break;
          }
        
        errno = 0;

        free(new_conn_port);
        new_conn_port = NULL;
        }
      else
        {
        if (debug_mode == TRUE)
          {
          process_meth((void *)new_conn_port);
          }
        else
          {
          pthread_create(&tid, &t_attr, process_meth, (void *)new_conn_port);
          }
        }
      if (debug_mode == TRUE)
        {
        if (total_cntr % 1000 == 0)
          {
          printf("Total requests: %d\n", total_cntr);
          }
        total_cntr++;
        }
      }

    if (new_conn_port != NULL)
      {
      free(new_conn_port);
      }

    pthread_attr_destroy(&t_attr);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, "net_srvr",
                "Socket close of network listener requested");
    }

  if (listen_socket != -1)
    close(listen_socket);

  return(rc);
  } /* END start_listener() */
Ejemplo n.º 4
0
int req_stat_job(

  struct batch_request *preq)  /* ptr to the decoded request */

  {
  struct stat_cntl      cntl; /* see svrfunc.h  */
  char                 *name;
  job                  *pjob = NULL;
  pbs_queue            *pque = NULL;
  int                   rc = PBSE_NONE;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  bool                  condensed = false;

  enum TJobStatTypeEnum type = tjstNONE;

  /*
   * first, validate the name of the requested object, either
   * a job, a queue, or the whole server.
   */
  if (LOGLEVEL >= 7)
    {
    sprintf(log_buf, "note");
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    }


  /* FORMAT:  name = { <JOBID> | <QUEUEID> | '' } */

  name = preq->rq_ind.rq_status.rq_id;

  if (preq->rq_extend != NULL)
    {
    /* evaluate pbs_job_stat() 'extension' field */

    if (!strncasecmp(preq->rq_extend, "truncated", strlen("truncated")))
      {
      /* truncate response by 'max_report' */

      type = tjstTruncatedServer;
      }
    else if (!strncasecmp(preq->rq_extend, "summarize_arrays", strlen("summarize_arrays")))
      {
      type = tjstSummarizeArraysServer;
      }

    if (preq->rq_extend[strlen(preq->rq_extend) - 1] == 'C')
      {
      condensed = true;
      }

    }    /* END if (preq->rq_extend != NULL) */

  if (isdigit((int)*name))
    {
    /* status a single job */

    if (is_array(name))
      {
      if (type != tjstSummarizeArraysServer)
        {
        type = tjstArray;
        }
      }
    else
      {
      type = tjstJob;

      if ((pjob = svr_find_job(name, FALSE)) == NULL)
        {
        rc = PBSE_UNKJOBID;
        }
      else
        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
      }
    }
  else if (isalpha(name[0]))
    {
    if (type == tjstNONE)
      type = tjstQueue;
    else if (type == tjstSummarizeArraysServer)
      type = tjstSummarizeArraysQueue;
    else
      type = tjstTruncatedQueue;

    /* if found, this mutex is released later */
    if ((pque = find_queuebyname(name)) == NULL)
      {
      rc = PBSE_UNKQUE;
      }
    }
  else if ((*name == '\0') || (*name == '@'))
    {
    /* status all jobs at server */

    if (type == tjstNONE)
      type = tjstServer;
    }
  else
    {
    rc = PBSE_IVALREQ;
    }

  if (rc != 0)
    {
    /* is invalid - an error */
    req_reject(rc, 0, preq, NULL, NULL);

    return(rc);
    }

  set_reply_type(&preq->rq_reply, BATCH_REPLY_CHOICE_Status);

  CLEAR_HEAD(preq->rq_reply.brp_un.brp_status);

  if ((type == tjstTruncatedQueue) ||
      (type == tjstTruncatedServer))
    {
    if (pque != NULL)
      {
      unlock_queue(pque, __func__, "", LOGLEVEL);
      pque = NULL;
      }
    }

  memset(&cntl, 0, sizeof(cntl));

  cntl.sc_type   = (int)type;
  cntl.sc_conn   = -1;
  cntl.sc_pque   = pque;
  cntl.sc_origrq = preq;
  cntl.sc_post   = req_stat_job_step2;
  cntl.sc_jobid[0] = '\0'; /* cause "start from beginning" */
  cntl.sc_condensed = condensed;

  req_stat_job_step2(&cntl); /* go to step 2, see if running is current */

  if (pque != NULL)
    unlock_queue(pque, "req_stat_job", (char *)"success", LOGLEVEL);

  return(PBSE_NONE);
  }  /* END req_stat_job() */
Ejemplo n.º 5
0
void req_stat_job_step2(

  struct stat_cntl *cntl)  /* I/O (free'd on return) */

  {
  batch_request         *preq = cntl->sc_origrq;
  svrattrl              *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
  job                   *pjob = NULL;

  struct batch_reply    *preply = &preq->rq_reply;
  int                    rc = 0;
  enum TJobStatTypeEnum  type = (enum TJobStatTypeEnum)cntl->sc_type;
  bool                   exec_only = false;

  int                    bad = 0;
  /* delta time - only report full pbs_attribute list if J->MTime > DTime */
  int                    job_array_index = -1;
  job_array             *pa = NULL;
  all_jobs_iterator     *iter;

  if (preq->rq_extend != NULL)
    {
    /* FORMAT:  { EXECQONLY } */
    if (strstr(preq->rq_extend, EXECQUEONLY))
      exec_only = true;
    }

  if ((type == tjstTruncatedServer) || 
      (type == tjstTruncatedQueue))
    {
    handle_truncated_qstat(exec_only, cntl->sc_condensed, preq);

    return;
    } /* END if ((type == tjstTruncatedServer) || ...) */
  else if (type == tjstJob)
    {
    pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE);

    if (pjob != NULL)
      {
      if ((rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad)))
        req_reject(rc, bad, preq, NULL, NULL);
      else
        reply_send_svr(preq);

      unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
      }
    else
      {
      req_reject(PBSE_JOBNOTFOUND, bad, preq, NULL, NULL);
      }
    }
  else
    {
    if (type == tjstArray)
      {
      pa = get_array(preq->rq_ind.rq_status.rq_id);

      if (pa == NULL)
        {
        req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array");
        return;
        }
      }
    else if ((type == tjstSummarizeArraysQueue) || 
             (type == tjstSummarizeArraysServer))
      update_array_statuses();

    iter = get_correct_status_iterator(cntl);

    for (pjob = get_next_status_job(cntl, job_array_index, pa, iter);
         pjob != NULL;
         pjob = get_next_status_job(cntl, job_array_index, pa, iter))
      {
      mutex_mgr job_mutex(pjob->ji_mutex, true);

      /* go ahead and build the status reply for this job */
      if (pjob->ji_being_recycled == true)
        continue;

      if (exec_only)
        {
        if (cntl->sc_pque != NULL)
          {
          if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution)
            continue;
          }
        else if (in_execution_queue(pjob, pa) == false)
          continue;
        }

      rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad);

      if ((rc != PBSE_NONE) && 
          (rc != PBSE_PERM))
        {
        if (pa != NULL)
          unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);

        req_reject(rc, bad, preq, NULL, NULL);

        delete iter;

        return;
        }
      }  /* END for (pjob != NULL) */

    delete iter;

    if (pa != NULL)
      {
      unlock_ai_mutex(pa, __func__, "1", LOGLEVEL);
      }
   
    reply_send_svr(preq);
    }

  if (LOGLEVEL >= 7)
    {
    log_event(PBSEVENT_SYSTEM,
      PBS_EVENTCLASS_JOB,
      "req_statjob",
      "Successfully returned the status of queued jobs\n");
    }

  return;
  }  /* END req_stat_job_step2() */
Ejemplo n.º 6
0
/*
read_config() -- Open and parse config file and extract values of variables
*/
bool_t read_config()
{
	char buf[(BUF_SZ + 1)], *p, *q, *r;
	FILE *fp;

	if(config_file == (char *)NULL) {
		config_file = strdup(CONFIGURATION_FILE);
		if(config_file == (char *)NULL) {
			die("parse_config() -- strdup() failed");
		}
	}

	if((fp = fopen(config_file, "r")) == NULL) {
		return(False);
	}

	while(fgets(buf, sizeof(buf), fp)) {
		char *begin=buf;
		char *rightside;
		/* Make comments invisible */
		if((p = strchr(buf, '#'))) {
			*p = (char)NULL;
		}

		/* Ignore malformed lines and comments */
		if(strchr(buf, '=') == (char *)NULL) continue;

		/* Parse out keywords */
		p=firsttok(&begin, "= \t\n");
		if(p){
			rightside=begin;
			q = firsttok(&begin, "= \t\n");
		}
		if(p && q) {
			if(strcasecmp(p, "Root") == 0) {
				if((root = strdup(q)) == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set Root=\"%s\"\n", root);
				}
			}
			else if(strcasecmp(p, "MailHub") == 0) {
				if((r = strchr(q, ':')) != NULL) {
					*r++ = '\0';
					port = atoi(r);
				}

				if((mailhost = strdup(q)) == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set MailHub=\"%s\"\n", mailhost);
					log_event(LOG_INFO, "Set RemotePort=\"%d\"\n", port);
				}
			}
			else if(strcasecmp(p, "HostName") == 0) {
				free(hostname);
				hostname = strdup(q);
				if (!hostname) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set HostName=\"%s\"\n", hostname);
				}
			}
			else if(strcasecmp(p,"AddHeader") == 0) {
				if((r = firsttok(&rightside, "\n#")) != NULL) {
					header_save(r);
					free(r);
				} else {
					die("cannot AddHeader");
				}
				if(log_level > 0 ) {
					log_event(LOG_INFO, "Set AddHeader=\"%s\"\n", q);
				}
			}
#ifdef REWRITE_DOMAIN
			else if(strcasecmp(p, "RewriteDomain") == 0) {
				if((p = strrchr(q, '@'))) {
					mail_domain = strdup(++p);

					log_event(LOG_ERR,
						"Set RewriteDomain=\"%s\" is invalid\n", q);
					log_event(LOG_ERR,
						"Set RewriteDomain=\"%s\" used\n", mail_domain);
				}
				else {
					mail_domain = strdup(q);
				}

				if(mail_domain == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}
				rewrite_domain = True;

				if(log_level > 0) {
					log_event(LOG_INFO,
						"Set RewriteDomain=\"%s\"\n", mail_domain);
				}
			}
#endif
			else if(strcasecmp(p, "FromLineOverride") == 0) {
				if(strcasecmp(q, "YES") == 0) {
					override_from = True;
				}
				else {
					override_from = False;
				}

				if(log_level > 0) {
					log_event(LOG_INFO,
						"Set FromLineOverride=\"%s\"\n",
						override_from ? "True" : "False");
				}
			}
			else if(strcasecmp(p, "RemotePort") == 0) {
				port = atoi(q);

				if(log_level > 0) {
					log_event(LOG_INFO, "Set RemotePort=\"%d\"\n", port);
				}
			}
#ifdef HAVE_SSL
			else if(strcasecmp(p, "UseTLS") == 0) {
				if(strcasecmp(q, "YES") == 0) {
					use_tls = True;
				}
				else {
					use_tls = False;
					use_starttls = False;
				}

				if(log_level > 0) { 
					log_event(LOG_INFO,
						"Set UseTLS=\"%s\"\n", use_tls ? "True" : "False");
				}
			}
			else if(strcasecmp(p, "UseSTARTTLS") == 0) {
				if(strcasecmp(q, "YES") == 0) {
					use_starttls = True;
					use_tls = True;
				}
				else {
					use_starttls = False;
				}

				if(log_level > 0) { 
					log_event(LOG_INFO,
						"Set UseSTARTTLS=\"%s\"\n", use_tls ? "True" : "False");
				}
			}
			else if(strcasecmp(p, "UseTLSCert") == 0) {
				if(strcasecmp(q, "YES") == 0) {
					use_cert = True;
				}
				else {
					use_cert = False;
				}

				if(log_level > 0) {
					log_event(LOG_INFO,
						"Set UseTLSCert=\"%s\"\n",
						use_cert ? "True" : "False");
				}
			}
			else if(strcasecmp(p, "TLSCert") == 0) {
				if((tls_cert = strdup(q)) == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set TLSCert=\"%s\"\n", tls_cert);
				}
			}
#endif
			/* Command-line overrides these */
			else if(strcasecmp(p, "AuthUser") == 0 && !auth_user) {
				if((auth_user = strdup(q)) == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set AuthUser=\"%s\"\n", auth_user);
				}
			}
			else if(strcasecmp(p, "AuthPass") == 0 && !auth_pass) {
				if((auth_pass = strdup(q)) == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set AuthPass=\"%s\"\n", auth_pass);
				}
			}
			else if(strcasecmp(p, "AuthMethod") == 0 && !auth_method) {
				if((auth_method = strdup(q)) == (char *)NULL) {
					die("parse_config() -- strdup() failed");
				}

				if(log_level > 0) {
					log_event(LOG_INFO, "Set AuthMethod=\"%s\"\n", auth_method);
				}
			}
			else if(strcasecmp(p, "UseOldAUTH") == 0) {
				if(strcasecmp(q, "YES") == 0) {
					use_oldauth = True;
				}
				else {
					use_oldauth = False;
				}
 
				if(log_level > 0) {
					log_event(LOG_INFO,
						"Set UseOldAUTH=\"%s\"\n",
						use_oldauth ? "True" : "False");
				}
			}
			else if (strcasecmp(p, "Debug") == 0)
			{
				if (strcasecmp(q, "YES") == 0)
				{
					log_level = 1;
				}
				else
				{
					log_level = 0;
				}
			}
			else {
				log_event(LOG_INFO, "Unable to set %s=\"%s\"\n", p, q);
			}
			free(p);
			free(q);
		} 
	}
	(void)fclose(fp);

	return(True);
}
Ejemplo n.º 7
0
int
job_save_fs(job *pjob, int updatetype)
{
	int	fds;
	int	i;
	char	*filename;
	char	namebuf1[MAXPATHLEN+1];
	char	namebuf2[MAXPATHLEN+1];
	int	openflags;
	int	redo;
	int	pmode;

#ifdef WIN32
	pmode = _S_IWRITE | _S_IREAD;
#else
	pmode = 0600;
#endif

	(void)strcpy(namebuf1, path_jobs);	/* job directory path */
	if (*pjob->ji_qs.ji_fileprefix != '\0')
		(void)strcat(namebuf1, pjob->ji_qs.ji_fileprefix);
	else
		(void)strcat(namebuf1, pjob->ji_qs.ji_jobid);
	(void)strcpy(namebuf2, namebuf1);	/* setup for later */
	(void)strcat(namebuf1, JOB_FILE_SUFFIX);

	/* if ji_modified is set, ie an attribute changed, then update mtime */

	if (pjob->ji_modified) {
		pjob->ji_wattr[JOB_ATR_mtime].at_val.at_long = time_now;
		pjob->ji_wattr[JOB_ATR_mtime].at_flags |= ATR_VFLAG_MODCACHE;

	}

	if (pjob->ji_qs.ji_jsversion != JSVERSION) {
		/* version of job structure changed, force full write */
		pjob->ji_qs.ji_jsversion = JSVERSION;
		updatetype = SAVEJOB_FULLFORCE;
	}

	if (updatetype == SAVEJOB_QUICK) {

		openflags =  O_WRONLY;
		fds = open(namebuf1, openflags, pmode);
		if (fds < 0) {
			log_err(errno, "job_save", "error on open");
			return (-1);
		}
#ifdef WIN32
		secure_file(namebuf1, "Administrators",
			READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
		setmode(fds, O_BINARY);
#endif

		/* just write the "critical" base structure to the file */

		save_setup(fds);
		if ((save_struct((char *)&pjob->ji_qs, fixedsize) == 0) &&
			(save_struct((char *)&pjob->ji_extended, extndsize) == 0) &&
			(save_flush() == 0)) {
			(void)close(fds);
		} else {
			log_err(errno, "job_save", "error quickwrite");
			(void)close(fds);
			return (-1);
		}

	} else {

		/*
		 * write the whole structure to the file.
		 * For a update, this is done to a new file to protect the
		 * old against crashs.
		 * The file is written in four parts:
		 * (1) the job structure,
		 * (2) the extended area,
		 * (3) if a Array Job, the index tracking table
		 * (4) the attributes in the "encoded "external form, and last
		 * (5) the dependency list.
		 */

		(void)strcat(namebuf2, JOB_FILE_COPY);
		openflags =  O_CREAT | O_WRONLY;

#ifdef WIN32
		fix_perms2(namebuf2, namebuf1);
#endif

		if (updatetype == SAVEJOB_NEW)
			filename = namebuf1;
		else
			filename = namebuf2;

		fds = open(filename, openflags, pmode);
		if (fds < 0) {
			log_err(errno, "job_save",
				"error opening for full save");
			return (-1);
		}

#ifdef WIN32
		secure_file(filename, "Administrators",
			READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
		setmode(fds, O_BINARY);
#endif

		for (i=0; i<MAX_SAVE_TRIES; ++i) {
			redo = 0;	/* try to save twice */
			save_setup(fds);
			if (save_struct((char *)&pjob->ji_qs, fixedsize)
				!= 0) {
				redo++;
			} else if (save_struct((char *)&pjob->ji_extended,
				extndsize) != 0) {
				redo++;
#ifndef PBS_MOM
			} else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) &&
				(save_struct((char *)pjob->ji_ajtrk,
				pjob->ji_ajtrk->tkm_size) != 0)) {
				redo++;
#endif
			} else if (save_attr_fs(job_attr_def, pjob->ji_wattr,
				(int)JOB_ATR_LAST) != 0) {
				redo++;
			} else if (save_flush() != 0) {
				redo++;
			}
			if (redo != 0) {
				if (lseek(fds, (off_t)0, SEEK_SET) < 0) {
					log_err(errno, "job_save", "error lseek");
				}
			} else
				break;
		}


		(void)close(fds);
		if (i >= MAX_SAVE_TRIES) {
			if ((updatetype == SAVEJOB_FULL) ||
				(updatetype == SAVEJOB_FULLFORCE))
				(void)unlink(namebuf2);
			return (-1);
		}

		if ((updatetype == SAVEJOB_FULL) ||
			(updatetype == SAVEJOB_FULLFORCE)) {
#ifdef WIN32
			if (MoveFileEx(namebuf2, namebuf1,
				MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH) == 0) {

				errno = GetLastError();
				sprintf(log_buffer, "MoveFileEx(%s,%s) failed!",
					namebuf2, namebuf1);
				log_err(errno, "job_save", log_buffer);
			}
			secure_file(namebuf1, "Administrators",
				READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
#else
			if (rename(namebuf2, namebuf1) == -1) {
				log_event(PBSEVENT_ERROR|PBSEVENT_SECURITY,
					PBS_EVENTCLASS_JOB, LOG_ERR,
					pjob->ji_qs.ji_jobid,
					"rename in job_save failed");
			}
#endif
		}
		pjob->ji_modified = 0;
	}
	return (0);
}
Ejemplo n.º 8
0
void req_deletejob(

  struct batch_request *preq)  /* I */

  {
  job              *pjob;

  struct work_task *pwtold;

  struct work_task *pwtnew;
  struct work_task *pwtcheck;

  int               rc;
  char             *sigt = "SIGTERM";

  char             *Msg = NULL;

  /* check if we are getting a purgecomplete from scheduler */
  if ((preq->rq_extend != NULL) && 
        !strncmp(preq->rq_extend,PURGECOMP,strlen(PURGECOMP)))
    {

    /*
     * purge_completed_jobs will respond with either an ack or reject
     */
    purge_completed_jobs(preq);

    return;
    }

  /* The way this is implemented, if the user enters the command "qdel -p <jobid>",
   * they can then delete jobs other than their own since the authorization
   * checks are made below in chk_job_request. This should probably be fixed.
   */

  if (forced_jobpurge(preq) != 0)
    {
    return;
    }

  /* NOTE:  should support rq_objname={<JOBID>|ALL|<name:<JOBNAME>} */

  /* NYI */

  pjob = chk_job_request(preq->rq_ind.rq_delete.rq_objname, preq);

  if (pjob == NULL)
    {
    /* NOTE:  chk_job_request() will issue req_reject() */

    return;
    }

  if (preq->rq_extend != NULL)
    {
    if (strncmp(preq->rq_extend, deldelaystr, strlen(deldelaystr)) &&
        strncmp(preq->rq_extend, delasyncstr, strlen(delasyncstr)) &&
        strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr)))
      {
      /* have text message in request extension, add it */

      Msg = preq->rq_extend;

      /*
       * Message capability is only for operators and managers.
       * Check if request is authorized
      */

      if ((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
                            ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) == 0)
        {
        req_reject(PBSE_PERM, 0, preq, NULL,
                   "must have operator or manager privilege to use -m parameter");
        return;
        }
      }
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT)
    {
    /*
     * Find pid of router from existing work task entry,
     * then establish another work task on same child.
     * Next, signal the router and wait for its completion;
     */

    pwtold = (struct work_task *)GET_NEXT(pjob->ji_svrtask);

    while (pwtold != NULL)
      {
      if ((pwtold->wt_type == WORK_Deferred_Child) ||
          (pwtold->wt_type == WORK_Deferred_Cmp))
        {
        pwtnew = set_task(
                   pwtold->wt_type,
                   pwtold->wt_event,
                   post_delete_route,
                   preq);

        if (pwtnew != NULL)
          {
          /*
           * reset type in case the SIGCHLD came
           * in during the set_task;  it makes
           * sure that next_task() will find the
           * new entry.
           */

          pwtnew->wt_type = pwtold->wt_type;
          pwtnew->wt_aux = pwtold->wt_aux;

          kill((pid_t)pwtold->wt_event, SIGTERM);

          pjob->ji_qs.ji_substate = JOB_SUBSTATE_ABORT;

          return; /* all done for now */
          }
        else
          {
          req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

          return;
          }
        }

      pwtold = (struct work_task *)GET_NEXT(pwtold->wt_linkobj);
      }

    /* should never get here ...  */

    log_err(-1, "req_delete", "Did not find work task for router");

    req_reject(PBSE_INTERNAL, 0, preq, NULL, NULL);

    return;
    }

  if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN1 ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN2 ||
      pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN3 )
    {
    /* If JOB_SUBSTATE_PRERUN being sent to MOM, wait till she gets it going */
    /* retry in one second                            */
    /* If JOB_SUBSTATE_RERUN, RERUN1, RERUN2 or RERUN3 the
       job is being requeued. Wait until finished */

    static time_t  cycle_check_when = 0;
    static char    cycle_check_jid[PBS_MAXSVRJOBID + 1];

    if (cycle_check_when != 0)
      {
      if (!strcmp(pjob->ji_qs.ji_jobid, cycle_check_jid) &&
          (time_now - cycle_check_when > 10))
        {
        /* state not updated after 10 seconds */

        /* did the mom ever get it? delete it anyways... */

        cycle_check_jid[0] = '\0';
        cycle_check_when  = 0;

        goto jump;
        }

      if (time_now - cycle_check_when > 20)
        {
        /* give up after 20 seconds */

        cycle_check_jid[0] = '\0';
        cycle_check_when  = 0;
        }
      }    /* END if (cycle_check_when != 0) */

    if (cycle_check_when == 0)
      {
      /* new PRERUN job located */

      cycle_check_when = time_now;
      strcpy(cycle_check_jid, pjob->ji_qs.ji_jobid);
      }

    sprintf(log_buffer, "job cannot be deleted, state=PRERUN, requeuing delete request");

    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    pwtnew = set_task(
               WORK_Timed,
               time_now + 1,
               post_delete_route,
               preq);

    if (pwtnew == 0)
      req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL);

    return;
    }  /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */

jump:

  /*
   * Log delete and if requesting client is not job owner, send mail.
   */

  sprintf(log_buffer, "requestor=%s@%s",
          preq->rq_user,
          preq->rq_host);


  /* NOTE:  should annotate accounting record with extend message (NYI) */

  account_record(PBS_ACCT_DEL, pjob, log_buffer);

  sprintf(log_buffer, msg_manager,
          msg_deletejob,
          preq->rq_user,
          preq->rq_host);

  log_event(
    PBSEVENT_JOB,
    PBS_EVENTCLASS_JOB,
    pjob->ji_qs.ji_jobid,
    log_buffer);

  /* NOTE:  should incorporate job delete message */

  if (Msg != NULL)
    {
    /* have text message in request extension, add it */

    strcat(log_buffer, "\n");
    strcat(log_buffer, Msg);
    }

  if ((svr_chk_owner(preq, pjob) != 0) &&
      !has_job_delete_nanny(pjob))
    {
    /* only send email if owner did not delete job and job deleted
       has not been previously attempted */

    svr_mailowner(pjob, MAIL_DEL, MAIL_FORCE, log_buffer);
    /*
     * If we sent mail and already sent the extra message
     * then reset message so we don't trigger a redundant email
     * in job_abt()
    */

    if (Msg != NULL)
      {
      Msg = NULL;
      }
    }

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, change restart comment if failed */

    change_restart_comment_if_needed(pjob);
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /*
     * setup a nanny task to make sure the job is actually deleted (see the
     * comments at job_delete_nanny()).
     */

    if (has_job_delete_nanny(pjob))
      {
      req_reject(PBSE_IVALREQ, 0, preq, NULL, "job cancel in progress");

      return;
      }

    apply_job_delete_nanny(pjob, time_now + 60);

    /* check if we are getting a asynchronous delete */

    if ((preq->rq_extend != NULL) &&
          !strncmp(preq->rq_extend,DELASYNC,strlen(DELASYNC)))
      {
      struct batch_request *preq_tmp = NULL;
      /*
       * Respond with an ack now instead of after MOM processing
       * Create a new batch request and fill it in. It will be freed by reply_ack
       */

      snprintf(log_buffer,sizeof(log_buffer), "Deleting job asynchronously");
      log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buffer);

      preq_tmp = alloc_br(PBS_BATCH_DeleteJob);
      preq_tmp->rq_perm = preq->rq_perm;
      preq_tmp->rq_ind.rq_manager.rq_cmd = preq->rq_ind.rq_manager.rq_cmd;
      preq_tmp->rq_ind.rq_manager.rq_objtype = preq->rq_ind.rq_manager.rq_objtype;
      preq_tmp->rq_fromsvr = preq->rq_fromsvr;
      preq_tmp->rq_extsz = preq->rq_extsz;
      preq_tmp->rq_conn = preq->rq_conn;
      memcpy(preq_tmp->rq_ind.rq_manager.rq_objname,
          preq->rq_ind.rq_manager.rq_objname, PBS_MAXSVRJOBID + 1);
      memcpy(preq_tmp->rq_user, preq->rq_user, PBS_MAXUSER + 1);
      memcpy(preq_tmp->rq_host, preq->rq_host, PBS_MAXHOSTNAME + 1);

      reply_ack(preq_tmp);
      preq->rq_noreply = TRUE; /* set for no more replies */
      }
  
    /* make a cleanup task if set */
    if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) &&
        (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0))
      {
      pwtcheck = set_task(
        WORK_Timed,
        time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long,
        ensure_deleted,
        preq);
    
      if (pwtcheck != NULL)
        append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck);
      }

    /*
     * Send signal request to MOM.  The server will automagically
     * pick up and "finish" off the client request when MOM replies.
     */

    if ((rc = issue_signal(pjob, sigt, post_delete_mom1, preq)))
      {
      /* cant send to MOM */

      req_reject(rc, 0, preq, NULL, NULL);
      }

    /* normally will ack reply when mom responds */

    sprintf(log_buffer, msg_delrunjobsig,
            sigt);

    LOG_EVENT(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      log_buffer);

    return;
    }  /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */

  /* make a cleanup task if set */
  if ((server.sv_attr[SRV_ATR_JobForceCancelTime].at_flags & ATR_VFLAG_SET) &&
      (server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long > 0))
    {
    pwtcheck = set_task(
        WORK_Timed,
        time_now + server.sv_attr[SRV_ATR_JobForceCancelTime].at_val.at_long,
        ensure_deleted,
        preq);
    
    if (pwtcheck != NULL)
      append_link(&pjob->ji_svrtask, &pwtcheck->wt_linkobj, pwtcheck);
    }

  /* if configured, and this job didn't have a slot limit hold, free a job
   * held with the slot limit hold */
  if ((server.sv_attr[SRV_ATR_MoabArrayCompatible].at_val.at_long != FALSE) &&
      ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE))
    {
    if ((pjob->ji_arraystruct != NULL) &&
        (pjob->ji_is_array_template == FALSE))
      {
      int        i;
      int        newstate;
      int        newsub;
      job       *tmp;
      job_array *pa = pjob->ji_arraystruct;

      for (i = 0; i < pa->ai_qs.array_size; i++)
        {
        if (pa->jobs[i] == NULL)
          continue;

        tmp = (job *)pa->jobs[i];

        if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l)
          {
          tmp->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l;
              
          if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long == 0)
            {
            tmp->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET;
            }
          
          svr_evaljobstate(tmp, &newstate, &newsub, 1);
          svr_setjobstate(tmp, newstate, newsub);
          job_save(tmp, SAVEJOB_FULL, 0);

          break;
          }
        }
      }
    } /* END MoabArrayCompatible check */

  if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0)
    {
    /* job has restart file at mom, do end job processing */
    
    svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING);

    pjob->ji_momhandle = -1;

    /* force new connection */

    pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob);

    if (pwtnew)
      {
      append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
      }
    }
  else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0)
    {
    /* job has staged-in file, should remove them */

    remove_stagein(pjob);

    job_abt(&pjob, Msg);
    }
  else
    {
    /*
     * the job is not transitting (though it may have been) and
     * is not running, so put in into a complete state.
     */

    struct work_task *ptask;
    struct pbs_queue *pque;
    int  KeepSeconds = 0;

    svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE);

    if ((pque = pjob->ji_qhdr) && (pque != NULL))
      {
      pque->qu_numcompleted++;
      }

    KeepSeconds = attr_ifelse_long(
                    &pque->qu_attr[QE_ATR_KeepCompleted],
                    &server.sv_attr[SRV_ATR_KeepCompleted],
                    0);
    ptask = set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, pjob);

    if (ptask != NULL)
      {
      append_link(&pjob->ji_svrtask, &ptask->wt_linkobj, ptask);
      }
    }  /* END else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) */

  reply_ack(preq);

  return;
  }  /* END req_deletejob() */
Ejemplo n.º 9
0
static void post_delete_mom1(

  struct work_task *pwt)

  {
  int         delay = 0;
  int        dellen = strlen(deldelaystr);
  job       *pjob;

  struct work_task   *pwtnew;
  pbs_queue      *pque;

  struct batch_request *preq_sig;  /* signal request to MOM */

  struct batch_request *preq_clt;  /* original client request */
  int        rc;

  preq_sig = pwt->wt_parm1;
  rc       = preq_sig->rq_reply.brp_code;
  preq_clt = preq_sig->rq_extra;

  release_req(pwt);

  pjob = find_job(preq_clt->rq_ind.rq_delete.rq_objname);

  if (pjob == NULL)
    {
    /* job has gone away */

    req_reject(PBSE_UNKJOBID, 0, preq_clt, NULL, NULL);

    return;
    }

  if (rc)
    {
    /* mom rejected request */

    if (rc == PBSE_UNKJOBID)
      {
      /* MOM claims no knowledge, so just purge it */

      log_event(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        pjob->ji_qs.ji_jobid,
        "MOM rejected signal during delete");

      /* removed the resources assigned to job */

      free_nodes(pjob);

      set_resc_assigned(pjob, DECR);

      job_purge(pjob);

      reply_ack(preq_clt);
      }
    else
      {
      req_reject(rc, 0, preq_clt, NULL, NULL);
      }

    return;
    }

  if (preq_clt->rq_extend)
    {
    if (strncmp(preq_clt->rq_extend, deldelaystr, dellen) == 0)
      {
      delay = atoi(preq_clt->rq_extend + dellen);
      }
    }

  reply_ack(preq_clt);  /* dont need it, reply now */

  /*
   * if no delay specified in original request, see if kill_delay
   * queue attribute is set.
   */

  if (delay == 0)
    {
    pque = pjob->ji_qhdr;

    delay = attr_ifelse_long(&pque->qu_attr[QE_ATR_KillDelay],
                             &server.sv_attr[SRV_ATR_KillDelay],
                             2);
    }

  pwtnew = set_task(WORK_Timed, delay + time_now, post_delete_mom2, pjob);

  if (pwtnew)
    {
    /* insure that work task will be removed if job goes away */

    append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew);
    }

  /*
   * Since the first signal has succeeded, let's reschedule the
   * nanny to be 1 minute after the second phase.
   */

  apply_job_delete_nanny(pjob, time_now + delay + 60);

  return;
  }  /* END post_delete_mom1() */
Ejemplo n.º 10
0
/* 
 * read reply and check for success
 */
int read_tcp_reply(

  struct tcp_chan *chan,
  int  protocol,
  int  version,
  int  command,
  int *exit_status)

  {
  char log_buf[LOCAL_LOG_BUF_SIZE];
  int  ret;
  int  value; /* value read from sock */

  *exit_status = UNREAD_STATUS;

  if(LOGLEVEL >= 6)
    {
    sprintf(log_buf, "protocol: %d  version: %d  command:%d  sock:%d", protocol, version, command, chan->sock);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,__func__, log_buf);
    }

  if ((value = disrsi(chan,&ret)) != protocol)
    {
    snprintf(log_buf,sizeof(log_buf),
      "Mismatching protocols. Expected protocol %d but read reply for %d\n",
      protocol,
      value);
    log_err(-1, __func__, log_buf);
    }
  else if (ret != DIS_SUCCESS)
    {
    }
  else if ((value = disrsi(chan,&ret)) != version)
    {
    snprintf(log_buf, sizeof(log_buf),
      "Mismatching versions. Expected version %d for protocol %d but read version %d\n",
      version,
      protocol,
      value);
    log_err(-1, __func__, log_buf);
    }
  else if (ret != DIS_SUCCESS)
    {
    }
  else if ((value = disrsi(chan,&ret)) != command)
    {
    snprintf(log_buf, sizeof(log_buf),
      "Mismatching commands. Expected command %d for protocol %d but read command %d\n",
      command,
      protocol,
      value);
    log_err(-1, __func__, log_buf);
    }
  else if (ret != DIS_SUCCESS)
    {
    }
  else
    {
    /* read the exit code */
    *exit_status = disrsi(chan,&ret);
    
/*    DIS_tcp_reset(chan,0); */
    }

  if (ret != DIS_SUCCESS)
    {
    if (ret >= 0)
      {
      snprintf(log_buf, sizeof(log_buf),
        "Could not read reply for protocol %d command %d: %s",
        protocol, command, dis_emsg[ret]);
      }
    else
      {
      snprintf(log_buf, sizeof(log_buf),
        "Could not read reply for protocol %d command %d",
        protocol, command);
      }
    log_err(-1, __func__, log_buf);
    }

  return(ret);
  } /* END read_tcp_reply() */
Ejemplo n.º 11
0
void purge_completed_jobs(

  struct batch_request *preq)  /* I */

  {
  char *id = "purge_completed_jobs";
  job  *pjob;
  char *time_str;
  time_t purge_time = 0;

  /* get the time to purge the jobs that completed before */
  time_str = preq->rq_extend;
  time_str += strlen(PURGECOMP);
  purge_time = strtol(time_str,NULL,10);
  
  /*
    * Clean unreported capability is only for operators and managers.
    * Check if request is authorized
  */

  if ((preq->rq_perm & (ATR_DFLAG_OPRD|ATR_DFLAG_OPWR|
                    ATR_DFLAG_MGRD|ATR_DFLAG_MGWR)) == 0)
    {
    req_reject(PBSE_PERM,0,preq,NULL,
      "must have operator or manager privilege to use -c parameter");
    return;
    }
    
  if (LOGLEVEL >= 4)
    {
    sprintf(log_buffer,"Received purge completed jobs command, purge time is %ld (%s)",
      (long)purge_time, preq->rq_extend);

    LOG_EVENT(
      PBSEVENT_SYSTEM, 
      PBS_EVENTCLASS_REQUEST,
      id,
      log_buffer);
    }

  for (pjob = (job *)GET_NEXT(svr_alljobs);
        pjob != NULL;
        pjob = (job *)GET_NEXT(pjob->ji_alljobs)) 
    {
    if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_COMPLETE) &&
      (pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long <= purge_time) &&
      ((pjob->ji_wattr[JOB_ATR_reported].at_flags & ATR_VFLAG_SET) != 0) &&
      (pjob->ji_wattr[JOB_ATR_reported].at_val.at_long == 0))
      {
      if (LOGLEVEL >= 4)
        {
        sprintf(log_buffer,"Reported job is COMPLETED (%ld), setting reported to TRUE",
          pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long);
          
        log_event(
          PBSEVENT_JOB, 
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);
        }

      pjob->ji_wattr[JOB_ATR_reported].at_val.at_long = 1;
      pjob->ji_wattr[JOB_ATR_reported].at_flags =
        ATR_VFLAG_SET | ATR_VFLAG_MODIFY;
          
     job_save(pjob, SAVEJOB_FULL, 0); 
      }
    }


  reply_ack(preq);
  return;
  } /* END purge_completed_jobs() */
Ejemplo n.º 12
0
void HTTPServer_Impl::connection_thread_main(TCPConnection connection)
{
	try
	{
		std::string request;
		bool bool_result = read_line(connection, request);
		if (bool_result == false)
		{
			connection.disconnect_abortive();
			return;
		}

		std::string headers;
		bool_result = read_lines(connection, headers);
		if (bool_result == false)
		{
			connection.disconnect_abortive();
			return;
		}

		// Extract request command, url and version:

		std::string command, url, version;
		std::string::size_type pos1 = request.find(' ');
		if (pos1 == std::string::npos)
			throw Exception("Bad request");
		command = request.substr(0, pos1);
		if (command != "POST" && command != "GET")
			throw Exception("Unsupported");
		std::string::size_type pos2 = request.find(' ', pos1 + 1);
		if (pos2 == std::string::npos)
			throw Exception("Bad request");
		url = request.substr(pos1+1, pos2-pos1-1);
		std::string::size_type pos3 = request.find(' ', pos2 + 1);
		if (pos3 != std::string::npos)
			throw Exception("Bad request");
		version = request.substr(pos2 + 1);

		DataBuffer request_data;

		// Handle request:

		// Look for a request handler that will deal with the HTTP request:
		MutexSection mutex_lock(&mutex);
		std::vector<HTTPRequestHandler>::size_type index, size;
		size = handlers.size();
		bool handled = false;
		for (index = 0; index < size; index++)
		{
			if (handlers[index].is_handling_request(command, url, headers))
			{
				HTTPRequestHandler handler = handlers[index];
				mutex_lock.unlock();

				if (command == "POST")
				{
					write_line(connection, "HTTP/1.1 100 Continue");
					// write_line(connection, "Content-Length: 0");
					write_line(connection, "");
				}

				std::shared_ptr<HTTPServerConnection_Impl> connection_impl(std::make_shared<HTTPServerConnection_Impl>());
				connection_impl->connection = connection;
				connection_impl->request_type = command;
				connection_impl->request_url = url;
				connection_impl->request_headers = headers;
				HTTPServerConnection http_connection(connection_impl);
				handler.handle_request(http_connection);
				handled = true;
				break;
			}
		}
		mutex_lock.unlock();

		if (!handled)
		{
			// No handler wants it.  Reply with 404 Not Found:
			std::string error_msg("404 Not Found");
			write_line(connection, "HTTP/1.1 404 Not Found");
			write_line(connection, "Server: ClanLib HTTP Server");
			write_line(connection, "Connection: close");
			write_line(connection, "Vary: *");
			write_line(connection, "Content-Type: text/plain");
			write_line(connection, "Content-Length: " + StringHelp::int_to_local8(error_msg.length()+2));
			write_line(connection, "");
			write_line(connection, error_msg);
		}

		connection.disconnect_graceful();

/*
		if (url == "/")
		{
			File file("Sources/test.html", File::open_existing);
			DataBuffer response(file.get_size());
			file.read(response.get_data(), response.get_size(), true);
			file.close();

			// Send back response.

			write_line(connection, "HTTP/1.1 200 OK");
			write_line(connection, "Server: ClanLib HTTP Server");
			write_line(connection, "Connection: close");
//			write_line(connection, "Date: Sun, 16 Oct 2005 20:13:00 GMT");
//			write_line(connection, "Expires: Sun, 16 Oct 2005 20:13:00 GMT");
			write_line(connection, "Vary: *");
//			write_line(connection, "ETag: \"foobar\"");
			write_line(connection, "Content-Type: text/html");
			write_line(connection, "Content-Length: " + StringHelp::int_to_local8(response.get_size()));
			write_line(connection, "");
			connection.send(response.get_data(), response.get_size(), true);
		}
		else
		{
			DataBuffer response;
			bool error = false;
			try
			{
				response = handle_request(url, headers, request_data);
			}
			catch (const Exception &e)
			{
//					write_line(connection, "HTTP/1.1 404 Not Found");
				write_line(connection, "HTTP/1.1 404 Not Found");
				write_line(connection, "Server: ClanLib HTTP Server");
				write_line(connection, "Connection: close");
//				write_line(connection, "Date: Sun, 16 Oct 2005 20:13:00 GMT");
//				write_line(connection, "Expires: Sun, 16 Oct 2005 20:13:00 GMT");
				write_line(connection, "Vary: *");
//				write_line(connection, "ETag: \"foobar\"");
				write_line(connection, "Content-Type: text/plain");
				write_line(connection, "Content-Length: 0");
				write_line(connection, "");
				error = true;
			}

			// Send back response.

			if (!error)
			{
				write_line(connection, "HTTP/1.1 200 OK");
				write_line(connection, "Server: ClanLib HTTP Server");
				write_line(connection, "Connection: close");
//				write_line(connection, "Date: Sun, 16 Oct 2005 20:13:00 GMT");
//				write_line(connection, "Expires: Sun, 16 Oct 2005 20:13:00 GMT");
				write_line(connection, "Vary: *");
//				write_line(connection, "ETag: \"foobar\"");
				write_line(connection, "Content-Type: text/xml");
				write_line(connection, "Content-Length: " + StringHelp::int_to_local8(response.get_size()));
				write_line(connection, "");
				connection.send(response.get_data(), response.get_size(), true);
			}
		}
*/
	}
	catch (const Exception& e)
	{
		log_event("error", e.message);
	}
}
Ejemplo n.º 13
0
void handle_cr4_event(ikgt_event_info_t *event_info)
{
	uint64_t new_cr4_value;
	uint64_t cur_cr4_value;
	uint64_t diff;
	ikgt_cpu_event_info_t *cpuinfo;
	ikgt_vmcs_guest_state_reg_id_t operand_reg_id;
	ikgt_status_t status;
	int i, tmp, str_count;
	policy_entry_t *entry;
	policy_cr4_ctx ctx;
	char log_entry_message[LOG_MESSAGE_SIZE];
	char access[MAX_ACCESS_BUF_SIZE], action[MAX_ACTION_BUF_SIZE];

	event_info->response = IKGT_EVENT_RESPONSE_ALLOW;
	g_cr4_count++;

	cpuinfo = (ikgt_cpu_event_info_t *) (event_info->event_specific_data);

	if (IKGT_CPU_REG_UNKNOWN == cpuinfo->operand_reg) {
		ikgt_printf("Error, cpuinfo->operand_reg=IKGT_CPU_REG_UNKNOWN\n");
		return;
	}

	status = read_guest_reg(VMCS_GUEST_STATE_CR4, &cur_cr4_value);
	if (IKGT_STATUS_SUCCESS != status) {
		return;
	}

	/* get the VMCS reg ID for the operand */
	status = get_vmcs_guest_reg_id(cpuinfo->operand_reg, &operand_reg_id);
	if (IKGT_STATUS_SUCCESS != status) {
		return;
	}

	/* read the guest register from VMCS
	* new_cr4_value contains the new value to be written to cr4
	*/
	status = read_guest_reg(operand_reg_id, &new_cr4_value);
	if (IKGT_STATUS_SUCCESS != status) {
		return;
	}

	diff = cur_cr4_value ^ new_cr4_value;
	if (0 == diff)
		return;

	ctx.event_info = event_info;
	ctx.new_cr4_value = new_cr4_value;
	ctx.cur_cr4_value = cur_cr4_value;
	ctx.diff = diff;
	ctx.log = FALSE;

	for (i = 0; i < POLICY_MAX_ENTRIES; i++) {
		entry = policy_get_entry_by_index(i);
		if ((POLICY_GET_RESOURCE_ID(entry) == RESOURCE_ID_UNKNOWN) || !IS_CR4_ENTRY(entry))
			continue;

		process_cr4_policy(entry, &ctx);
	}

	if (ctx.new_cr4_value == cur_cr4_value) {
		event_info->response = IKGT_EVENT_RESPONSE_REDIRECT;
	}

	if (ctx.log) {
		tmp = mon_sprintf_s(access, MAX_ACCESS_BUF_SIZE, "write");
		action_to_string(&event_info->response, action);

		str_count = mon_sprintf_s(log_entry_message, LOG_MESSAGE_SIZE, "resource-name=CR4, access=%s, value=0x%016llx, RIP=0x%016llx, action=%s",
					access, new_cr4_value, event_info->vmcs_guest_state.ia32_reg_rip, action);
		log_event(log_entry_message, event_info->thread_id);
	}

	/* If response is skip then return */
	if (event_info->response == IKGT_EVENT_RESPONSE_REDIRECT)
		return;

	status = write_guest_reg(operand_reg_id, ctx.new_cr4_value);
	if (IKGT_STATUS_SUCCESS != status) {
		ikgt_printf("error, write_guest_reg(%u)=%u\n", operand_reg_id, status);
	}

	event_info->response = IKGT_EVENT_RESPONSE_ALLOW;
}
Ejemplo n.º 14
0
int process_request(

  struct tcp_chan *chan) /* file descriptor (socket) to get request */

  {
  int                   rc = PBSE_NONE;
  struct batch_request *request = NULL;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  long                  acl_enable = FALSE;
  long                  state = SV_STATE_DOWN;

  time_t                time_now = time(NULL);
  int                   free_request = TRUE;
  char                  tmpLine[MAXLINE];
  char                 *auth_err = NULL;
  enum conn_type        conn_active;
  unsigned short        conn_socktype;
  unsigned short        conn_authen;
  unsigned long         conn_addr;
  int                   sfds = chan->sock;

  pthread_mutex_lock(svr_conn[sfds].cn_mutex);
  conn_active = svr_conn[sfds].cn_active;
  conn_socktype = svr_conn[sfds].cn_socktype;
  conn_authen = svr_conn[sfds].cn_authen;
  conn_addr = svr_conn[sfds].cn_addr;
  svr_conn[sfds].cn_lasttime = time_now;
  pthread_mutex_unlock(svr_conn[sfds].cn_mutex);

  if ((request = alloc_br(0)) == NULL)
    {
    snprintf(tmpLine, sizeof(tmpLine),
        "cannot allocate memory for request from %lu",
        conn_addr);
    req_reject(PBSE_MEM_MALLOC, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_SYSTEM;
    goto process_request_cleanup;
    }

  request->rq_conn = sfds;

  /*
   * Read in the request and decode it to the internal request structure.
   */
  if (conn_active == FromClientDIS || conn_active == ToServerDIS)
    {
#ifdef ENABLE_UNIX_SOCKETS

    if ((conn_socktype & PBS_SOCK_UNIX) &&
        (conn_authen != PBS_NET_CONN_AUTHENTICATED))
      {
      /* get_creds interestingly always returns 0 */
      get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname);
      }

#endif /* END ENABLE_UNIX_SOCKETS */
    rc = dis_request_read(chan, request);
    }
  else
    {
    char out[80];

    snprintf(tmpLine, MAXLINE, "request on invalid type of connection: %d, sock type: %d, from address %s", 
                conn_active,conn_socktype, netaddr_long(conn_addr, out));
    log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST,
      "process_req", tmpLine);
    snprintf(tmpLine, sizeof(tmpLine),
        "request on invalid type of connection (%d) from %s",
        conn_active,
        netaddr_long(conn_addr, out));
    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (rc == -1)
    {
    /* FAILURE */
    /* premature end of file */
    rc = PBSE_PREMATURE_EOF;
    goto process_request_cleanup;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL) || (rc == PBSE_SOCKET_CLOSE))
    {
    /* FAILURE */
    /* read error, likely cannot send reply so just disconnect */
    /* ??? not sure about this ??? */
    goto process_request_cleanup;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");
    free_request = FALSE;
    goto process_request_cleanup;
    }

  if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    sprintf(log_buf, "%s: %lu",
      pbse_to_txt(PBSE_BADHOST),
      conn_addr);

    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buf);

    snprintf(tmpLine, sizeof(tmpLine),
        "cannot determine hostname for connection from %lu",
        conn_addr);

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(log_buf,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      sfds);

    log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buf);
    }

  /* is the request from a host acceptable to the server */
  if (conn_socktype & PBS_SOCK_UNIX)
    {
    strcpy(request->rq_host, server_name);
    }

  get_svr_attr_l(SRV_ATR_acl_host_enable, &acl_enable);
  if (acl_enable)
    {
    /* acl enabled, check it; always allow myself and nodes */
    struct array_strings *pas = NULL;
    struct pbsnode       *isanode;

    get_svr_attr_arst(SRV_ATR_acl_hosts, &pas);
    isanode = PGetNodeFromAddr(conn_addr);

    if ((isanode == NULL) &&
        (strcmp(server_host, request->rq_host) != 0) &&
        (acl_check_my_array_string(pas, request->rq_host, ACL_Host) == 0))
      {
      char tmpLine[MAXLINE];
      snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s",
               request->rq_host);

      req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
      free_request = FALSE;
      rc = PBSE_BADHOST;
      goto process_request_cleanup;
      }

    if (isanode != NULL)
      unlock_node(isanode, "process_request", NULL, LOGLEVEL);
    }

  /*
   * determine source (user client or another server) of request.
   * set the permissions granted to the client
   */

  if (conn_authen == PBS_NET_CONN_FROM_PRIVIL)
    {
    /* request came from another server */

    request->rq_fromsvr = 1;

    request->rq_perm =
      ATR_DFLAG_USRD | ATR_DFLAG_USWR |
      ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
      ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
      ATR_DFLAG_SvWR;
    }
  else
    {
    /* request not from another server */
    conn_credent[sfds].timestamp = time_now;

    request->rq_fromsvr = 0;

    /*
     * Client must be authenticated by an Authenticate User Request, if not,
     * reject request and close connection.  -- The following is retained for
     * compat with old cmds -- The exception to this is of course the Connect
     * Request which cannot have been authenticated, because it contains the
     * needed ticket; so trap it here.  Of course, there is no prior
     * authentication on the Authenticate User request either, but it comes
     * over a reserved port and appears from another server, hence is
     * automatically granted authentication.
     *
     * The above is only true with inet sockets.  With unix domain sockets, the
     * user creds were read before the first dis_request_read call above.
     * We automatically granted authentication because we can trust the socket
     * creds.  Authorization is still granted in svr_get_privilege below
     */

    if (request->rq_type == PBS_BATCH_Connect)
      {
      req_connect(request);

      if (conn_socktype == PBS_SOCK_INET)
        {
        rc = PBSE_IVALREQ;
        req_reject(rc, 0, request, NULL, NULL);
        free_request = FALSE;
        goto process_request_cleanup;
        }

      }

    if (conn_socktype & PBS_SOCK_UNIX)
      {
      pthread_mutex_lock(svr_conn[sfds].cn_mutex);
      svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
      pthread_mutex_unlock(svr_conn[sfds].cn_mutex);
      }

    if (ENABLE_TRUSTED_AUTH == TRUE )
      rc = PBSE_NONE;  /* bypass the authentication of the user--trust the client completely */
    else if (munge_on)
      {
      /* If munge_on is true we will validate the connection now */
      if (request->rq_type == PBS_BATCH_AltAuthenUser)
        {
        rc = req_altauthenuser(request);
        free_request = FALSE;
        goto process_request_cleanup;
        }
      else
        {
        rc = authenticate_user(request, &conn_credent[sfds], &auth_err);
        }
      }
    else if (conn_authen != PBS_NET_CONN_AUTHENTICATED)
      /* skip checking user if we did not get an authenticated credential */
      rc = PBSE_BADCRED;
    else
      rc = authenticate_user(request, &conn_credent[sfds], &auth_err);

    if (rc != 0)
      {
      req_reject(rc, 0, request, NULL, auth_err);
      if (auth_err != NULL)
        free(auth_err);
      free_request = FALSE;
      goto process_request_cleanup;
      }

    /*
     * pbs_mom and checkpoint restart scripts both need the authority to do
     * alters and releases on checkpointable jobs.  Allow manager permission
     * for root on the jobs execution node.
     */
     
    if (((request->rq_type == PBS_BATCH_ModifyJob) ||
        (request->rq_type == PBS_BATCH_ReleaseJob)) &&
        (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0))
      {
      job *pjob;
      char *dptr;
      int skip = FALSE;
      char short_host[PBS_MAXHOSTNAME+1];

      /* make short host name */

      strcpy(short_host, request->rq_host);
      if ((dptr = strchr(short_host, '.')) != NULL)
        {
        *dptr = '\0';
        }
      
      if ((pjob = svr_find_job(request->rq_ind.rq_modify.rq_objname, FALSE)) != (job *)0)
        {
        if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
          {

          if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) &&
              ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) &&
              (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL))
            {
            request->rq_perm = svr_get_privilege(request->rq_user, server_host);
            skip = TRUE;
            }

          }
        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
        }
      
      if (!skip)
        {
        request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
        }
      }
    else
      {
      request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
      }
    }  /* END else (conn_authen == PBS_NET_CONN_FROM_PRIVIL) */

  /* if server shutting down, disallow new jobs and new running */
  get_svr_attr_l(SRV_ATR_State, &state);

  if (state > SV_STATE_RUN)
    {
    switch (request->rq_type)
      {
      case PBS_BATCH_AsyrunJob:
      case PBS_BATCH_JobCred:
      case PBS_BATCH_MoveJob:
      case PBS_BATCH_QueueJob:
      case PBS_BATCH_RunJob:
      case PBS_BATCH_StageIn:
      case PBS_BATCH_jobscript:

        req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL);
        rc = PBSE_SVRDOWN;
        free_request = FALSE;
        goto process_request_cleanup;
        /*NOTREACHED*/

        break;
      }
    }

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  rc = dispatch_request(sfds, request);

  return(rc);

process_request_cleanup:

  if (free_request == TRUE)
    free_br(request);

  return(rc);
  }  /* END process_request() */
Ejemplo n.º 15
0
/*
smtp_open() -- Open connection to a remote SMTP listener
*/
int smtp_open(char *host, int port)
{
#ifdef INET6
	struct addrinfo hints, *ai0, *ai;
	char servname[NI_MAXSERV];
	int s;
#else
	struct sockaddr_in name;
	struct hostent *hent;
	int i, s, namelen;
#endif

#ifdef HAVE_SSL
	int err;
	char buf[(BUF_SZ + 1)];

	/* Init SSL stuff */
	SSL_CTX *ctx;
	SSL_METHOD *meth;
	X509 *server_cert;

	SSL_load_error_strings();
	SSLeay_add_ssl_algorithms();
	meth=SSLv23_client_method();
	ctx = SSL_CTX_new(meth);
	if(!ctx) {
		log_event(LOG_ERR, "No SSL support initiated\n");
		return(-1);
	}

	if(use_cert == True) { 
		if(SSL_CTX_use_certificate_chain_file(ctx, tls_cert) <= 0) {
			perror("Use certfile");
			return(-1);
		}

		if(SSL_CTX_use_PrivateKey_file(ctx, tls_cert, SSL_FILETYPE_PEM) <= 0) {
			perror("Use PrivateKey");
			return(-1);
		}

		if(!SSL_CTX_check_private_key(ctx)) {
			log_event(LOG_ERR, "Private key does not match the certificate public key\n");
			return(-1);
		}
	}
#endif

#ifdef INET6
	memset(&hints, 0, sizeof(hints));
	hints.ai_family = p_family;
	hints.ai_socktype = SOCK_STREAM;
	snprintf(servname, sizeof(servname), "%d", port);

	/* Check we can reach the host */
	if (getaddrinfo(host, servname, &hints, &ai0)) {
		log_event(LOG_ERR, "Unable to locate %s", host);
		return(-1);
	}

	for (ai = ai0; ai; ai = ai->ai_next) {
		/* Create a socket for the connection */
		s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
		if (s < 0) {
			continue;
		}

		if (connect(s, ai->ai_addr, ai->ai_addrlen) < 0) {
			s = -1;
			continue;
		}
		break;
	}

	if(s < 0) {
		log_event (LOG_ERR,
			"Unable to connect to \"%s\" port %d.\n", host, port);

		return(-1);
	}
#else
	/* Check we can reach the host */
	if((hent = gethostbyname(host)) == (struct hostent *)NULL) {
		log_event(LOG_ERR, "Unable to locate %s", host);
		return(-1);
	}

	if(hent->h_length > sizeof(hent->h_addr)) {
		log_event(LOG_ERR, "Buffer overflow in gethostbyname()");
		return(-1);
	}

	/* Create a socket for the connection */
	if((s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
		log_event(LOG_ERR, "Unable to create a socket");
		return(-1);
	}

	for (i = 0; ; ++i) {
		if (!hent->h_addr_list[i]) {
			log_event(LOG_ERR, "Unable to connect to %s:%d", host, port);
			return(-1);
		}

	/* This SHOULD already be in Network Byte Order from gethostbyname() */
	name.sin_addr.s_addr = ((struct in_addr *)(hent->h_addr_list[i]))->s_addr;
	name.sin_family = hent->h_addrtype;
	name.sin_port = htons(port);

	namelen = sizeof(struct sockaddr_in);
	if(connect(s, (struct sockaddr *)&name, namelen) < 0)
		continue;
	break;
	}
#endif

#ifdef HAVE_SSL
	if(use_tls == True) {
		log_event(LOG_INFO, "Creating SSL connection to host");

		if (use_starttls == True)
		{
			use_tls=False; /* need to write plain text for a while */

			if (smtp_okay(s, buf))
			{
				smtp_write(s, "EHLO %s", hostname);
				if (smtp_okay(s, buf)) {
					smtp_write(s, "STARTTLS"); /* assume STARTTLS regardless */
					if (!smtp_okay(s, buf)) {
						log_event(LOG_ERR, "STARTTLS not working");
						return(-1);
					}
				}
				else
				{
					log_event(LOG_ERR, "Invalid response: %s (%s)", buf, hostname);
				}
			}
			else
			{
				log_event(LOG_ERR, "Invalid response SMTP Server (STARTTLS)");
				return(-1);
			}
			use_tls=True; /* now continue as normal for SSL */
		}

		ssl = SSL_new(ctx);
		if(!ssl) {
			log_event(LOG_ERR, "SSL not working");
			return(-1);
		}
		SSL_set_fd(ssl, s);

		err = SSL_connect(ssl);
		if(err < 0) { 
			perror("SSL_connect");
			return(-1);
		}

		if(log_level > 0 || 1) {
			log_event(LOG_INFO, "SSL connection using %s",
				SSL_get_cipher(ssl));
		}

		server_cert = SSL_get_peer_certificate(ssl);
		if(!server_cert) {
			return(-1);
		}
		X509_free(server_cert);

		/* TODO: Check server cert if changed! */
	}
#endif

	return(s);
}
Ejemplo n.º 16
0
static int forced_jobpurge(

  struct batch_request *preq)

  {
  job *pjob;

  if ((pjob = find_job(preq->rq_ind.rq_delete.rq_objname)) == NULL)
    {
    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      preq->rq_ind.rq_delete.rq_objname,
      pbse_to_txt(PBSE_UNKJOBID));

    req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL);

    return(-1);
    }

  /* check about possibly purging the job */

  if (preq->rq_extend != NULL)
    {
    if (!strncmp(preq->rq_extend, delpurgestr, strlen(delpurgestr)))
      {
      if (((preq->rq_perm & (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) != 0) ||
          ((svr_chk_owner(preq, pjob) == 0) && (server.sv_attr[SRV_ATR_OwnerPurge].at_val.at_long)))
        {
        sprintf(log_buffer, "purging job without checking MOM");

        log_event(
          PBSEVENT_JOB,
          PBS_EVENTCLASS_JOB,
          pjob->ji_qs.ji_jobid,
          log_buffer);

        reply_ack(preq);

        free_nodes(pjob);

        if (pjob->ji_qhdr->qu_qs.qu_type == QTYPE_Execution)
          {
          set_resc_assigned(pjob, DECR);
          }

        job_purge(pjob);

        return(1);
        }
      else
        {
        /* FAILURE */

        req_reject(PBSE_PERM, 0, preq, NULL, NULL);

        return(-1);
        }
      }
    }

  return(0);
  }  /* END forced_jobpurge() */
Ejemplo n.º 17
0
/*
ssmtp() -- send the message (exactly one) from stdin to the mailhub SMTP port
*/
int ssmtp(char *argv[])
{
	char b[(BUF_SZ + 2)], *buf = b+1, *p, *q;
#ifdef MD5AUTH
	char challenge[(BUF_SZ + 1)];
#endif
	struct passwd *pw;
	int i, sock;
	uid_t uid;
	bool_t minus_v_save, leadingdot, linestart = True;
	int timeout = 0;
	int bufsize = sizeof(b)-1;

	b[0] = '.';
	outbytes = 0;
	ht = &headers;

	uid = getuid();
	if((pw = getpwuid(uid)) == (struct passwd *)NULL) {
		die("Could not find password entry for UID %d", uid);
	}
	get_arpadate(arpadate);

	if(read_config() == False) {
		log_event(LOG_INFO, "%s not found", config_file);
	}

	if((p = strtok(pw->pw_gecos, ";,"))) {
		if((gecos = strdup(p)) == (char *)NULL) {
			die("ssmtp() -- strdup() failed");
		}
	}
	revaliases(pw);

	/* revaliases() may have defined this */
	if(uad == (char *)NULL) {
		uad = append_domain(pw->pw_name);
	}

	rt = &rcpt_list;

	header_parse(stdin);

#if 1
	/* With FromLineOverride=YES set, try to recover sane MAIL FROM address */
	uad = append_domain(uad);
#endif

	from = from_format(uad, override_from);

	/* Now to the delivery of the message */
	(void)signal(SIGALRM, (void(*)())handler);	/* Catch SIGALRM */
	(void)alarm((unsigned) MAXWAIT);			/* Set initial timer */
	if(setjmp(TimeoutJmpBuf) != 0) {
		/* Then the timer has gone off and we bail out */
		die("Connection lost in middle of processing");
	}

	if((sock = smtp_open(mailhost, port)) == -1) {
		die("Cannot open %s:%d", mailhost, port);
	}
	else if (use_starttls == False) /* no initial response after STARTTLS */
	{
		if(smtp_okay(sock, buf) == False)
			die("Invalid response SMTP server");
	}

	/* If user supplied username and password, then try ELHO */
	if(auth_user) {
		outbytes += smtp_write(sock, "EHLO %s", hostname);
	}
	else {
		outbytes += smtp_write(sock, "HELO %s", hostname);
	}
	(void)alarm((unsigned) MEDWAIT);

	if(smtp_okay(sock, buf) == False) {
		die("%s (%s)", buf, hostname);
	}

	/* Try to log in if username was supplied */
	if(auth_user) {
#ifdef MD5AUTH
		if(auth_pass == (char *)NULL) {
			auth_pass = strdup("");
		}

		if(auth_method && strcasecmp(auth_method, "cram-md5") == 0) {
			outbytes += smtp_write(sock, "AUTH CRAM-MD5");
			(void)alarm((unsigned) MEDWAIT);

			if(smtp_read(sock, buf) != 3) {
				die("Server rejected AUTH CRAM-MD5 (%s)", buf);
			}
			strncpy(challenge, strchr(buf,' ') + 1, sizeof(challenge));

			memset(buf, 0, bufsize);
			crammd5(challenge, auth_user, auth_pass, buf);
		}
		else {
#endif
		memset(buf, 0, bufsize);
		to64frombits(buf, auth_user, strlen(auth_user));
		if (use_oldauth) {
			outbytes += smtp_write(sock, "AUTH LOGIN %s", buf);
		}
		else {
			outbytes += smtp_write(sock, "AUTH LOGIN");
			(void)alarm((unsigned) MEDWAIT);
			if(smtp_read(sock, buf) != 3) {
				die("Server didn't like our AUTH LOGIN (%s)", buf);
			}
			/* we assume server asked us for Username */
			memset(buf, 0, bufsize);
			to64frombits(buf, auth_user, strlen(auth_user));
			outbytes += smtp_write(sock, buf);
		}

		(void)alarm((unsigned) MEDWAIT);
		if(smtp_read(sock, buf) != 3) {
			die("Server didn't accept AUTH LOGIN (%s)", buf);
		}
		memset(buf, 0, bufsize);

		to64frombits(buf, auth_pass, strlen(auth_pass));
#ifdef MD5AUTH
		}
#endif
		/* We do NOT want the password output to STDERR
		 * even base64 encoded.*/
		minus_v_save = minus_v;
		minus_v = False;
		outbytes += smtp_write(sock, "%s", buf);
		minus_v = minus_v_save;
		(void)alarm((unsigned) MEDWAIT);

		if(smtp_okay(sock, buf) == False) {
			die("Authorization failed (%s)", buf);
		}
	}

	/* Send "MAIL FROM:" line */
	outbytes += smtp_write(sock, "MAIL FROM:<%s>", uad);

	(void)alarm((unsigned) MEDWAIT);

	if(smtp_okay(sock, buf) == 0) {
		die("%s", buf);
	}

	/* Send all the To: adresses */
	/* Either we're using the -t option, or we're using the arguments */
	if(minus_t) {
		if(rcpt_list.next == (rcpt_t *)NULL) {
			die("No recipients specified although -t option used");
		}
		rt = &rcpt_list;

		while(rt->next) {
			p = rcpt_remap(rt->string);
			outbytes += smtp_write(sock, "RCPT TO:<%s>", p);

			(void)alarm((unsigned)MEDWAIT);

			if(smtp_okay(sock, buf) == 0) {
				die("RCPT TO:<%s> (%s)", p, buf);
			}

			rt = rt->next;
		}
	}
	else {
		for(i = 1; (argv[i] != NULL); i++) {
			p = strtok(argv[i], ",");
			while(p) {
				/* RFC822 Address -> "foo@bar" */
				q = rcpt_remap(addr_parse(p));
				outbytes += smtp_write(sock, "RCPT TO:<%s>", q);

				(void)alarm((unsigned) MEDWAIT);

				if(smtp_okay(sock, buf) == 0) {
					die("RCPT TO:<%s> (%s)", q, buf);
				}

				p = strtok(NULL, ",");
			}
		}
	}

	/* Send DATA */
	outbytes += smtp_write(sock, "DATA");
	(void)alarm((unsigned) MEDWAIT);

	if(smtp_read(sock, buf) != 3) {
		/* Oops, we were expecting "354 send your data" */
		die("%s", buf);
	}

	outbytes += smtp_write(sock,
		"Received: by %s (sSMTP sendmail emulation); %s", hostname, arpadate);

	if(have_from == False) {
		outbytes += smtp_write(sock, "From: %s", from);
	}

	if(have_date == False) {
		outbytes += smtp_write(sock, "Date: %s", arpadate);
	}

#ifdef HASTO_OPTION
	if(have_to == False) {
		outbytes += smtp_write(sock, "To: postmaster");
	}
#endif

	ht = &headers;
	while(ht->next) {
		outbytes += smtp_write(sock, "%s", ht->string);
		ht = ht->next;
	}

	(void)alarm((unsigned) MEDWAIT);

	/* End of headers, start body */
	outbytes += smtp_write(sock, "");

	/*prevent blocking on pipes, we really shouldnt be using
	  stdio functions like fgets in the first place */
	fcntl(STDIN_FILENO,F_SETFL,O_NONBLOCK);

	while(!feof(stdin)) {
		if (!fgets(buf, bufsize, stdin)) {
			/* if nothing was received, then no transmission
			 * over smtp should be done */
			sleep(1);
			/* don't hang forever when reading from stdin */
			if (++timeout >= MEDWAIT) {
				log_event(LOG_ERR, "killed: timeout on stdin while reading body -- message saved to dead.letter.");
				die("Timeout on stdin while reading body");
			}
			continue;
		}
		/* Trim off \n, double leading .'s */
		leadingdot = standardise(buf, &linestart);

		if (linestart || feof(stdin)) {
			linestart = True;
			outbytes += smtp_write(sock, "%s", leadingdot ? b : buf);
		} else {
			if (log_level > 0) {
				log_event(LOG_INFO, "Sent a very long line in chunks");
			}
			if (leadingdot) {
				outbytes += fd_puts(sock, b, sizeof(b));
			} else {
				outbytes += fd_puts(sock, buf, bufsize);
			}
		}
		(void)alarm((unsigned) MEDWAIT);
	}
	if(!linestart) {
		smtp_write(sock, "");
	}
	/* End of body */

	outbytes += smtp_write(sock, ".");
	(void)alarm((unsigned) MAXWAIT);

	if(smtp_okay(sock, buf) == 0) {
		die("%s", buf);
	}

	/* Close connection */
	(void)signal(SIGALRM, SIG_IGN);

	outbytes += smtp_write(sock, "QUIT");
	(void)smtp_okay(sock, buf);
	(void)close(sock);

	log_event(LOG_INFO, "Sent mail for %s (%s) uid=%d username=%s outbytes=%d", 
		from_strip(uad), buf, uid, pw->pw_name, outbytes);

	return(0);
}
Ejemplo n.º 18
0
void post_signal_req(

  batch_request *preq)

  {
  char                 *jobid;
  job                  *pjob;

  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  /* request has been handled elsewhere */
  if (preq == NULL)
    return;

  preq->rq_conn = preq->rq_orgconn;  /* restore client socket */

  if (preq->rq_reply.brp_code)
    {
    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_REQUEST,
      preq->rq_ind.rq_signal.rq_jid,
      pbse_to_txt(PBSE_MOMREJECT));

    errno = 0;

    req_reject(preq->rq_reply.brp_code, 0, preq, NULL, NULL);
    }
  else
    {
    if ((jobid = preq->rq_extra) == NULL)
      {
      log_err(ENOMEM, __func__, "Cannot allocate memory! FAILURE");
      return;
      }

    if ((pjob = svr_find_job(jobid, FALSE)) != NULL)
      {
      if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_SUSPEND) == 0)
        {
        if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) == 0)
          {
          pjob->ji_qs.ji_svrflags |= JOB_SVFLG_Suspend;
          
          set_statechar(pjob);
          
          job_save(pjob, SAVEJOB_QUICK, 0);
          
          /* release resources allocated to suspended job - NORWAY */
          
          free_nodes(pjob);
          }
        }
      else if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_RESUME) == 0)
        {
        if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend)
          {
          /* re-allocate assigned node to resumed job - NORWAY */
          
          set_old_nodes(pjob);
          
          pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend;
          
          set_statechar(pjob);
          
          job_save(pjob, SAVEJOB_QUICK, 0);
          }
        }
    
      unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL);
      }
    else
      {
      /* job is gone */
      snprintf(log_buf,sizeof(log_buf),
        "Cannot find job '%s', assuming success",
        jobid);
      log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, __func__, log_buf);
      }

    free(jobid);

    reply_ack(preq);
    }

  return;
  }  /* END post_signal_req() */
Ejemplo n.º 19
0
/**
 * @brief
 * 	validate_job_formula - validate that the sorting forumla is in the
 *	correct form.  We do this by calling python and having
 *	it catch exceptions.
 *
 */
int validate_job_formula(attribute *pattr, void *pobject, int actmode) {
	char *formula;
	char *errmsg = NULL;
	struct resource_def *pres;
	FILE *fp;
	char buf[1024];
	char pathbuf[MAXPATHLEN];
	char *globals1 = NULL;
	int globals_size1 = 1024;
	char *globals2 = NULL;
	int globals_size2 = 1024;
	char *script = NULL;
	int script_size = 2048;
	PyThreadState *ts_main = NULL;
	PyThreadState *ts_sub = NULL;
	int rc = 0;
	int err = 0;

	if (actmode == ATR_ACTION_FREE)
		return (0);

#ifndef PYTHON
	return PBSE_INTERNAL;
#else
	if (!Py_IsInitialized())
		return PBSE_INTERNAL;

	formula = pattr->at_val.at_str;
	if (formula == NULL)
		return PBSE_INTERNAL;

	globals1 = malloc(globals_size1);
	if(globals1 == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}

	globals2 = malloc(globals_size2);
	if(globals2 == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}

	strcpy(globals1, "globals1={");
	strcpy(globals2, "globals2={");

	/* We need to create a python dictionary to pass to python as a list
	 * of valid symbols.
	 */

	for (pres = svr_resc_def; pres; pres = pres->rs_next) {
		/* unknown resource is used as a delimiter between builtin and custom resources */
		if (strcmp(pres->rs_name, RESOURCE_UNKNOWN) != 0) {
			snprintf(buf, sizeof(buf), "\'%s\':1,", pres->rs_name);
			if(pbs_strcat(&globals1, &globals_size1, buf) == NULL) {
				rc = PBSE_SYSTEM;
				goto validate_job_formula_exit;
			}
			if (pres->rs_type == ATR_TYPE_LONG ||
				pres->rs_type == ATR_TYPE_SIZE ||
				pres->rs_type == ATR_TYPE_LL ||
				pres->rs_type == ATR_TYPE_SHORT ||
				pres->rs_type ==  ATR_TYPE_FLOAT) {
				if(pbs_strcat(&globals2, &globals_size2, buf) ==  NULL) {
					rc = PBSE_SYSTEM;
					goto validate_job_formula_exit;
				}
			}

		}
	}

	snprintf(buf, sizeof(buf), "\'%s\':1, '%s':1, \'%s\':1,\'%s\':1, \'%s\':1, \'%s\':1, \'%s\':1, \'%s\': 1}\n",
		FORMULA_ELIGIBLE_TIME, FORMULA_QUEUE_PRIO, FORMULA_JOB_PRIO,
		FORMULA_FSPERC, FORMULA_FSPERC_DEP, FORMULA_TREE_USAGE, FORMULA_FSFACTOR, FORMULA_ACCRUE_TYPE);
	if (pbs_strcat(&globals1, &globals_size1, buf) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	if (pbs_strcat(&globals2, &globals_size2, buf) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}

	/* Allocate a buffer for the Python code */
	script = malloc(script_size);
	if (script == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	*script = '\0';

	/* import math and initialize variables */
	sprintf(buf,
		"ans = 0\n"
		"errnum = 0\n"
		"errmsg = \'\'\n"
		"try:\n"
		"    from math import *\n"
		"except ImportError, e:\n"
		"    errnum=4\n"
		"    errmsg=str(e)\n");
	if (pbs_strcat(&script, &script_size, buf) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	/* set up our globals dictionary */
	if (pbs_strcat(&script, &script_size, globals1) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	if (pbs_strcat(&script, &script_size, globals2) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	/* Now for the real guts: The initial try/except block*/
	sprintf(buf,
		"try:\n"
		"    exec(\'ans=");
	if (pbs_strcat(&script, &script_size, buf) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	if (pbs_strcat(&script, &script_size, formula) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	sprintf(buf, "\', globals1, locals())\n"
		"except SyntaxError, e:\n"
		"    errnum=1\n"
		"    errmsg=str(e)\n"
		"except NameError, e:\n"
		"    errnum=2\n"
		"    errmsg=str(e)\n"
		"except Exception, e:\n"
		"    pass\n"
		"if errnum == 0:\n"
		"    try:\n"
		"        exec(\'ans=");
	if (pbs_strcat(&script, &script_size, buf) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	if (pbs_strcat(&script, &script_size, formula) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	sprintf(buf, "\', globals2, locals())\n"
		"    except NameError, e:\n"
		"        errnum=3\n"
		"        errmsg=str(e)\n"
		"    except Exception, e:\n"
		"        pass\n");
	if (pbs_strcat(&script, &script_size, buf) == NULL) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}

	/* run the script in a subinterpreter */
	ts_main = PyThreadState_Get();
	ts_sub = Py_NewInterpreter();
	if (!ts_sub) {
		rc = PBSE_SYSTEM;
		goto validate_job_formula_exit;
	}
	err = PyRun_SimpleString(script);

	/* peek into the interpreter to get the values of err and errmsg */
	if (err == 0) {
		PyObject *module;
		PyObject *dict;
		PyObject *val;
		err = -1;
		if ((module = PyImport_AddModule("__main__"))) {
			if ((dict = PyModule_GetDict(module))) {
				char *p;
				if ((val = PyDict_GetItemString(dict, "errnum"))) {
					p = pbs_python_object_str(val);
					if (*p != '\0')
						err = atoi(p);
				}
				if ((val = PyDict_GetItemString(dict, "errmsg"))) {
					p = pbs_python_object_str(val);
					if (*p != '\0')
						errmsg = strdup(p);
				}
			}
		}
	}

	switch(err)
	{
		case 0: /* Success */
			rc = 0;
			break;
		case 1: /* Syntax error in formula */
			rc = PBSE_BAD_FORMULA;
			break;
		case 2: /* unknown resource name */
			rc = PBSE_BAD_FORMULA_KW;
			break;
		case 3: /* resource of non-numeric type */
			rc = PBSE_BAD_FORMULA_TYPE;
			break;
		case 4: /* import error */
			rc = PBSE_SYSTEM;
			break;
		default: /* unrecognized error */
			rc = PBSE_INTERNAL;
			break;
	}

	if (err == 0) {
		snprintf(pathbuf, sizeof(pathbuf), "%s/%s", pbs_conf.pbs_home_path,
			FORMULA_ATTR_PATH_SCHED);
		if ((fp = fopen(pathbuf, "w")) == NULL) {
			rc = PBSE_SYSTEM;
			goto validate_job_formula_exit;
		}

		fprintf(fp, "### PBS INTERNAL FILE DO NOT MODIFY ###\n");
		fprintf(fp, "%s\n", formula);

		fclose(fp);
	} else {
		snprintf(buf, sizeof(buf), "Validation Error: %s", errmsg?errmsg:"Internal error");
		log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_SERVER, LOG_DEBUG, __func__, buf);
	}

validate_job_formula_exit:
	if (ts_main) {
		if (ts_sub)
			Py_EndInterpreter(ts_sub);
		PyThreadState_Swap(ts_main);
	}
	free(script);
	free(globals1);
	free(globals2);
	free(errmsg);
	return rc;
#endif
}
Ejemplo n.º 20
0
/*
   Returns:
   NSSM_HOOK_STATUS_SUCCESS  if the hook ran successfully.
   NSSM_HOOK_STATUS_NOTFOUND if no hook was found.
   NSSM_HOOK_STATUS_ABORT    if the hook failed and we should cancel service start.
   NSSM_HOOK_STATUS_ERROR    on error.
   NSSM_HOOK_STATUS_NOTRUN   if the hook didn't run.
   NSSM_HOOK_STATUS_TIMEOUT  if the hook timed out.
   NSSM_HOOK_STATUS_FAILED   if the hook failed.
*/
int nssm_hook(hook_thread_t *hook_threads, nssm_service_t *service, TCHAR *hook_event, TCHAR *hook_action, unsigned long *hook_control, unsigned long deadline, bool async) {
  int ret = 0;

  hook_t *hook = (hook_t *) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(hook_t));
  if (! hook) {
    log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_OUT_OF_MEMORY, _T("hook"), _T("nssm_hook()"), 0);
    return NSSM_HOOK_STATUS_ERROR;
  }

  FILETIME now;
  GetSystemTimeAsFileTime(&now);

  EnterCriticalSection(&service->hook_section);

  /* Set the environment. */
  if (service->env) duplicate_environment(service->env);
  if (service->env_extra) set_environment_block(service->env_extra);

  /* ABI version. */
  TCHAR number[16];
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), NSSM_HOOK_VERSION);
  SetEnvironmentVariable(NSSM_HOOK_ENV_VERSION, number);

  /* Event triggering this action. */
  SetEnvironmentVariable(NSSM_HOOK_ENV_EVENT, hook_event);

  /* Hook action. */
  SetEnvironmentVariable(NSSM_HOOK_ENV_ACTION, hook_action);

  /* Control triggering this action.  May be empty. */
  if (hook_control) SetEnvironmentVariable(NSSM_HOOK_ENV_TRIGGER, service_control_text(*hook_control));
  else SetEnvironmentVariable(NSSM_HOOK_ENV_TRIGGER, _T(""));

  /* Last control handled. */
  SetEnvironmentVariable(NSSM_HOOK_ENV_LAST_CONTROL, service_control_text(service->last_control));

  /* Path to NSSM. */
  TCHAR path[PATH_LENGTH];
  GetModuleFileName(0, path, _countof(path));
  SetEnvironmentVariable(NSSM_HOOK_ENV_IMAGE_PATH, path);

  /* NSSM version. */
  SetEnvironmentVariable(NSSM_HOOK_ENV_NSSM_CONFIGURATION, NSSM_CONFIGURATION);
  SetEnvironmentVariable(NSSM_HOOK_ENV_NSSM_VERSION, NSSM_VERSION);
  SetEnvironmentVariable(NSSM_HOOK_ENV_BUILD_DATE, NSSM_DATE);

  /* NSSM PID. */
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), GetCurrentProcessId());
  SetEnvironmentVariable(NSSM_HOOK_ENV_PID, number);

  /* NSSM runtime. */
  set_hook_runtime(NSSM_HOOK_ENV_RUNTIME, &service->nssm_creation_time, &now);

  /* Application PID. */
  if (service->pid) {
    _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->pid);
    SetEnvironmentVariable(NSSM_HOOK_ENV_APPLICATION_PID, number);
    /* Application runtime. */
    set_hook_runtime(NSSM_HOOK_ENV_APPLICATION_RUNTIME, &service->creation_time, &now);
    /* Exit code. */
    SetEnvironmentVariable(NSSM_HOOK_ENV_EXITCODE, _T(""));
  }
  else {
    SetEnvironmentVariable(NSSM_HOOK_ENV_APPLICATION_PID, _T(""));
    if (str_equiv(hook_event, NSSM_HOOK_EVENT_START) && str_equiv(hook_action, NSSM_HOOK_ACTION_PRE)) {
      SetEnvironmentVariable(NSSM_HOOK_ENV_APPLICATION_RUNTIME, _T(""));
      SetEnvironmentVariable(NSSM_HOOK_ENV_EXITCODE, _T(""));
    }
    else {
      set_hook_runtime(NSSM_HOOK_ENV_APPLICATION_RUNTIME, &service->creation_time, &service->exit_time);
      /* Exit code. */
      _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->exitcode);
      SetEnvironmentVariable(NSSM_HOOK_ENV_EXITCODE, number);
    }
  }

  /* Deadline for this script. */
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), deadline);
  SetEnvironmentVariable(NSSM_HOOK_ENV_DEADLINE, number);

  /* Service name. */
  SetEnvironmentVariable(NSSM_HOOK_ENV_SERVICE_NAME, service->name);
  SetEnvironmentVariable(NSSM_HOOK_ENV_SERVICE_DISPLAYNAME, service->displayname);

  /* Times the service was asked to start. */
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->start_requested_count);
  SetEnvironmentVariable(NSSM_HOOK_ENV_START_REQUESTED_COUNT, number);

  /* Times the service actually did start. */
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->start_count);
  SetEnvironmentVariable(NSSM_HOOK_ENV_START_COUNT, number);

  /* Times the service exited. */
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->exit_count);
  SetEnvironmentVariable(NSSM_HOOK_ENV_EXIT_COUNT, number);

  /* Throttled count. */
  _sntprintf_s(number, _countof(number), _TRUNCATE, _T("%lu"), service->throttle);
  SetEnvironmentVariable(NSSM_HOOK_ENV_THROTTLE_COUNT, number);

  /* Command line. */
  TCHAR app[CMD_LENGTH];
  _sntprintf_s(app, _countof(app), _TRUNCATE, _T("\"%s\" %s"), service->exe, service->flags);
  SetEnvironmentVariable(NSSM_HOOK_ENV_COMMAND_LINE, app);

  TCHAR cmd[CMD_LENGTH];
  if (get_hook(service->name, hook_event, hook_action, cmd, sizeof(cmd))) {
    log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_GET_HOOK_FAILED, hook_event, hook_action, service->name, 0);
    duplicate_environment_strings(service->initial_env);
    LeaveCriticalSection(&service->hook_section);
    HeapFree(GetProcessHeap(), 0, hook);
    return NSSM_HOOK_STATUS_ERROR;
  }

  /* No hook. */
  if (! _tcslen(cmd)) {
    duplicate_environment_strings(service->initial_env);
    LeaveCriticalSection(&service->hook_section);
    HeapFree(GetProcessHeap(), 0, hook);
    return NSSM_HOOK_STATUS_NOTFOUND;
  }

  /* Run the command. */
  STARTUPINFO si;
  ZeroMemory(&si, sizeof(si));
  si.cb = sizeof(si);
  PROCESS_INFORMATION pi;
  ZeroMemory(&pi, sizeof(pi));
  unsigned long flags = 0;
#ifdef UNICODE
  flags |= CREATE_UNICODE_ENVIRONMENT;
#endif
  ret = NSSM_HOOK_STATUS_NOTRUN;
  if (CreateProcess(0, cmd, 0, 0, false, flags, 0, service->dir, &si, &pi)) {
    hook->name = (TCHAR *) HeapAlloc(GetProcessHeap(), 0, HOOK_NAME_LENGTH * sizeof(TCHAR));
    if (hook->name) _sntprintf_s(hook->name, HOOK_NAME_LENGTH, _TRUNCATE, _T("%s (%s/%s)"), service->name, hook_event, hook_action);
    hook->process_handle = pi.hProcess;
    hook->pid = pi.dwProcessId;
    hook->deadline = deadline;
    if (get_process_creation_time(hook->process_handle, &hook->creation_time)) GetSystemTimeAsFileTime(&hook->creation_time);

    unsigned long tid;
    HANDLE thread_handle = CreateThread(NULL, 0, await_hook, (void *) hook, 0, &tid);
    if (thread_handle) {
      if (async) {
        ret = 0;
        await_hook_threads(hook_threads, service->status_handle, &service->status, 0);
        add_thread_handle(hook_threads, thread_handle, hook->name);
      }
      else {
        await_single_handle(service->status_handle, &service->status, thread_handle, hook->name, _T(__FUNCTION__), deadline + NSSM_SERVICE_STATUS_DEADLINE);
        unsigned long exitcode;
        GetExitCodeThread(thread_handle, &exitcode);
        ret = (int) exitcode;
        CloseHandle(thread_handle);
      }
    }
    else {
      log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_CREATETHREAD_FAILED, error_string(GetLastError()), 0);
      await_hook(hook);
      if (hook->name) HeapFree(GetProcessHeap(), 0, hook->name);
      HeapFree(GetProcessHeap(), 0, hook);
    }
  }
  else {
    log_event(EVENTLOG_ERROR_TYPE, NSSM_EVENT_HOOK_CREATEPROCESS_FAILED, hook_event, hook_action, service->name, cmd, error_string(GetLastError()), 0);
    HeapFree(GetProcessHeap(), 0, hook);
  }

  /* Restore our environment. */
  duplicate_environment_strings(service->initial_env);

  LeaveCriticalSection(&service->hook_section);

  return ret;
}
Ejemplo n.º 21
0
int
job_or_resv_save_fs(void *pobj, int updatetype, int objtype)
{
	int	fds;
	int	openflags;
	int	redo;
	char	*filename;
	char	namebuf1[MAXPATHLEN+1];
	char	namebuf2[MAXPATHLEN+1];

	char		*path = NULL;
	char		*err_msg = NULL;
	char		*err_msgl = NULL;
	char		*prefix = NULL;
	char		*suffix = NULL;
	char		*cpsuffix = NULL;

	char		*p_oid = NULL;
	long		*p_mtime = NULL;
	int		*p_modified = NULL;
	void		*pfixed = NULL;
	ssize_t		i;
	size_t		fixed_size;
	attribute_def	*p_attr_def = NULL;
	attribute	*wattr = NULL;
	int		final_attr;
	int		eventclass;
	int		pmode;

#ifdef WIN32
	pmode = _S_IWRITE | _S_IREAD;
#else
	pmode = 0600;
#endif

	if (objtype == RESC_RESV_OBJECT || objtype == RESV_JOB_OBJECT) {
#ifndef PBS_MOM	/* MOM knows not of resc_resv structs */
		resc_resv  *presv;
		presv = (resc_resv *)pobj;
		err_msg = "reservation_save";
		err_msgl = "Link in reservation_save failed";
		path = path_resvs;
		prefix = presv->ri_qs.ri_fileprefix;
		suffix = RESV_FILE_SUFFIX;
		cpsuffix = RESV_FILE_COPY;
		p_modified = &presv->ri_modified;
		pfixed = (void *)&presv->ri_qs;
		fixed_size = sizeof(struct resvfix);
		p_attr_def = resv_attr_def;
		wattr = presv->ri_wattr;
		final_attr = RESV_ATR_LAST;
		p_mtime = &presv->ri_wattr[RESV_ATR_mtime].at_val.at_long;
		p_oid = presv->ri_qs.ri_resvID;
		eventclass = PBS_EVENTCLASS_RESV;
#else	/* PBS_MOM only: Execution will never come here for MOM */
		return (-1);
#endif
	}
	else if (objtype == JOB_OBJECT) {
		job   *pj = (job *)pobj;

#ifndef PBS_MOM	/*MOM knows not of resc_resv structs*/
		if (pj->ji_resvp) {
			int	rc = 0;

			if (updatetype == SAVEJOB_QUICK)
				rc = job_or_resv_save((void *)pj->ji_resvp,
					SAVERESV_QUICK,
					RESC_RESV_OBJECT);
			else if ((updatetype == SAVEJOB_FULL) ||
				(updatetype == SAVEJOB_FULLFORCE)  ||
				(updatetype == SAVEJOB_NEW))
				rc = job_or_resv_save((void *)pj->ji_resvp,
					SAVERESV_FULL,
					RESC_RESV_OBJECT);
			if (rc)
				return (rc);
		}
#endif
		err_msg = "job_save";
		err_msgl = "Link in job_save failed";
		path = path_jobs;
		if (*pj->ji_qs.ji_fileprefix != '\0')
			prefix = pj->ji_qs.ji_fileprefix;
		else
			prefix = pj->ji_qs.ji_jobid;
		suffix = JOB_FILE_SUFFIX;
		cpsuffix = JOB_FILE_COPY;
		p_modified = &pj->ji_modified;
		pfixed = (void *)&pj->ji_qs;
		fixed_size = sizeof(struct jobfix);
		p_attr_def = job_attr_def;
		wattr = pj->ji_wattr;
		final_attr = JOB_ATR_LAST;
		p_mtime = &pj->ji_wattr[JOB_ATR_mtime].at_val.at_long;
		p_oid = pj->ji_qs.ji_jobid;
		eventclass = PBS_EVENTCLASS_JOB;
		return (job_save_fs(pj, updatetype));
	} else {
		/*Don't expect to get here; incorrect object type*/
		return (-1);
	}

	(void)strcpy(namebuf1, path);		/* directory path */
	(void)strcat(namebuf1, prefix);
	(void)strcpy(namebuf2, namebuf1);	/* setup for later */
	(void)strcat(namebuf1, suffix);

	/*if an attribute changed (modified==1) update mtime*/

	if (*p_modified) {
		*p_mtime = time_now;
	}

	if (updatetype == SAVEJOB_QUICK || updatetype == SAVERESV_QUICK) {

		openflags =  O_WRONLY;
		fds = open(namebuf1, openflags, pmode);
		if (fds < 0) {
			log_err(errno, err_msg, "error on open");
			return (-1);
		}
#ifdef WIN32
		secure_file(namebuf1, "Administrators",
			READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
		setmode(fds, O_BINARY);
#endif
		/* just write the "critical" base structure to the file */

		while ((i = write(fds, (char *)pfixed, fixed_size)) !=
			fixed_size) {
			if ((i < 0) && (errno == EINTR)) {
				/* retry the write */
				if (lseek(fds, (off_t)0, SEEK_SET) < 0) {
					log_err(errno, err_msg, "lseek");
					(void)close(fds);
					return (-1);
				}
				continue;
			} else {
				log_err(errno, err_msg, "quickwrite");
				(void)close(fds);
				return (-1);
			}
		}
		(void)close(fds);

	} else {

		/*
		 * write the whole structure to the file.
		 * For a update, this is done to a new file to protect the
		 * old against crashs.
		 * The file is written in four parts:
		 * (1) the job (resc_resv) structure,
		 * (2) the attributes in "encoded" form,
		 * (3) the attributes in the "external" form, and last
		 * (4) the dependency list.
		 */

		(void)strcat(namebuf2, cpsuffix);
		openflags =  O_CREAT | O_WRONLY;
#ifdef WIN32
		fix_perms2(namebuf2, namebuf1);
#endif
		if (updatetype == SAVEJOB_NEW || updatetype == SAVERESV_NEW)
			filename = namebuf1;
		else
			filename = namebuf2;

		fds = open(filename, openflags, pmode);
		if (fds < 0) {
			log_err(errno, err_msg, "open for full save");
			return (-1);
		}

#ifdef WIN32
		secure_file(filename, "Administrators",
			READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
		setmode(fds, O_BINARY);
#endif

		redo = 0;	/* try to save twice */
		do {
			save_setup(fds);
			if (save_struct((char *)pfixed, fixed_size) != 0) {
				redo++;
			} else if (save_attr_fs(p_attr_def, wattr, final_attr) != 0) {
				redo++;
			} else if (save_flush() != 0) {
				redo++;
			}
			if (redo != 0) {
				if (lseek(fds, (off_t)0, SEEK_SET) < 0) {
					log_err(errno, err_msg, "full lseek");
					redo++;
				}
			}
		} while (redo == 1);


		(void)close(fds);
		if (redo > 1) {
			if (updatetype == SAVEJOB_FULL ||
				updatetype == SAVEJOB_FULLFORCE ||
				updatetype == SAVERESV_FULL)
				(void)unlink(namebuf2);
			return (-1);
		}

		if (updatetype == SAVEJOB_FULL ||
			updatetype == SAVEJOB_FULLFORCE ||
			updatetype == SAVERESV_FULL) {

#ifdef WIN32
			if (MoveFileEx(namebuf2, namebuf1,
				MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH) ==
				0) {
				errno = GetLastError();
				sprintf(log_buffer, "MoveFileEx(%s,%s) failed!",
					namebuf2, namebuf1);
				log_err(errno, err_msg, log_buffer);
			}
			secure_file(namebuf1, "Administrators",
				READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
#else
			if (rename(namebuf2, namebuf1) == -1) {
				log_event(PBSEVENT_ERROR|PBSEVENT_SECURITY,
					eventclass, LOG_ERR, p_oid, err_msgl);
			}
#endif
		}
		*p_modified = 0;
	}
	return (0);
}
static void inc_new_length(unsigned int length,
	struct charset_header *header, FILE *file, char *charset,
	char *char1, char2_table char2, chars_table *chars)
{
	long offset;
	int value, pos, i, j;
	char *buffer;
	int count;

	log_event("- Switching to length %d", length + 1);

	char1[0] = 0;
	if (length)
		memset(char2, 0, sizeof(*char2));
	for (pos = 0; pos <= (int)length - 2; pos++)
		memset(chars[pos], 0, sizeof(**chars));

	offset =
		(long)header->offsets[length][0] |
		((long)header->offsets[length][1] << 8) |
		((long)header->offsets[length][2] << 16) |
		((long)header->offsets[length][3] << 24);
	if (fseek(file, offset, SEEK_SET)) pexit("fseek");

	i = j = pos = -1;
	if ((value = getc(file)) != EOF)
	do {
		if (value != CHARSET_ESC) {
			switch (pos) {
			case -1:
				inc_format_error(charset);

			case 0:
				buffer = char1;
				break;

			case 1:
				if (j < 0)
					inc_format_error(charset);
				buffer = (*char2)[j];
				break;

			default:
				if (i < 0 || j < 0)
					inc_format_error(charset);
				buffer = (*chars[pos - 2])[i][j];
			}

			buffer[count = 0] = value;
			while ((value = getc(file)) != EOF) {
				buffer[++count] = value;
				if (value == CHARSET_ESC) break;
				if (count >= CHARSET_SIZE)
					inc_format_error(charset);
			}
			buffer[count] = 0;

			continue;
		}

		if ((value = getc(file)) == EOF) break; else
		if (value == CHARSET_NEW) {
			if ((value = getc(file)) != (int)length) break;
			if ((value = getc(file)) == EOF) break;
			if (value < 0 || value > (int)length)
				inc_format_error(charset);
			pos = value;
		} else
		if (value == CHARSET_LINE) {
			if (pos < 0)
				inc_format_error(charset);
			if ((value = getc(file)) == EOF) break;
			i = value;
			if (i < 0 || i > CHARSET_SIZE)
				inc_format_error(charset);
			if ((value = getc(file)) == EOF) break;
			j = value;
			if (j < 0 || j > CHARSET_SIZE)
				inc_format_error(charset);
		} else
			inc_format_error(charset);

		value = getc(file);
	} while (value != EOF);

	if (value == EOF) {
		if (ferror(file))
			pexit("getc");
		else
			inc_format_error(charset);
	}
}
Ejemplo n.º 23
0
void handle_truncated_qstat(
    
  bool           exec_only,
  bool           condensed,
  batch_request *preq)

  {
  long                 sentJobCounter = 0;
  long                 qmaxreport;
  all_queues_iterator *queue_iter = NULL;
  pbs_queue           *pque;
  char                 log_buf[LOCAL_LOG_BUF_SIZE];
  job                 *pjob;
  svrattrl            *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);
  batch_reply         *preply = &preq->rq_reply;
  int                  bad = 0;

  svr_queues.lock();
  queue_iter = svr_queues.get_iterator();
  svr_queues.unlock();

  /* loop through all queues */
  while ((pque = next_queue(&svr_queues, queue_iter)) != NULL)
    {
    long      qjcounter = 0;
    mutex_mgr queue_mutex(pque->qu_mutex, true);

    if ((exec_only == true) &&
        (pque->qu_qs.qu_type != QTYPE_Execution))
      {
      /* ignore routing queues */
      continue;
      }

    if (((pque->qu_attr[QA_ATR_MaxReport].at_flags & ATR_VFLAG_SET) != 0) &&
        (pque->qu_attr[QA_ATR_MaxReport].at_val.at_long >= 0))
      {
      qmaxreport = pque->qu_attr[QA_ATR_MaxReport].at_val.at_long;
      }
    else
      {
      qmaxreport = TMAX_JOB;
      }

    if (LOGLEVEL >= 5)
      {
      snprintf(log_buf, sizeof(log_buf), "Reporting up to %ld idle jobs in queue %s\n",
        qmaxreport,
        pque->qu_qs.qu_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
      }

    /* loop through jobs in queue */
    all_jobs_iterator *jobiter = NULL;
    pque->qu_jobs->lock();
    jobiter = pque->qu_jobs->get_iterator();
    pque->qu_jobs->unlock();

    while ((pjob = next_job(pque->qu_jobs, jobiter)) != NULL)
      {
      mutex_mgr job_mgr(pjob->ji_mutex, true);

      if ((qjcounter >= qmaxreport) &&
          (pjob->ji_qs.ji_state == JOB_STATE_QUEUED))
        {
        /* max_report of queued jobs reached for queue */
        continue;
        }

      int rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, condensed, &bad);

      if ((rc != 0) &&
          (rc != PBSE_PERM))
        {
        req_reject(rc, bad, preq, NULL, NULL);

        delete queue_iter;

        return;
        }

      sentJobCounter++;

      if (pjob->ji_qs.ji_state == JOB_STATE_QUEUED)
        qjcounter++;
      } /* END foreach (pjob from pque) */

    if (LOGLEVEL >= 5)
      {
      snprintf(log_buf, sizeof(log_buf), "Reported %ld total jobs for queue %s\n",
        sentJobCounter,
        pque->qu_qs.qu_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_QUEUE,pque->qu_qs.qu_name,log_buf);
      }
    } /* END for (pque) */

  reply_send_svr(preq);

  delete queue_iter;

  return;
  } // END handle_truncated_qstat()
void do_incremental_crack(struct db_main *db, char *mode)
{
	char *charset;
	int min_length, max_length, max_count;
	char *extra;
	FILE *file;
	struct charset_header *header;
	unsigned int check;
	char allchars[CHARSET_SIZE + 1];
	char char1[CHARSET_SIZE + 1];
	char2_table char2;
	chars_table chars[CHARSET_LENGTH - 2];
	unsigned char *ptr;
	unsigned int length, fixed, count;
	unsigned int real_count;
	int last_length, last_count;
	int pos;

	if (!mode) {
		if (db->format == &fmt_LM)
			mode = "LanMan";
		else if (db->format == &fmt_NETLM)
			mode = "LanMan";
		else if (db->format == &fmt_NETHALFLM)
			mode = "LanMan";
		else
			mode = "All";
	}

	log_event("Proceeding with \"incremental\" mode: %.100s", mode);

	if (!(charset = cfg_get_param(SECTION_INC, mode, "File"))) {
		log_event("! No charset defined");
		fprintf(stderr, "No charset defined for mode: %s\n", mode);
		error();
	}

	extra = cfg_get_param(SECTION_INC, mode, "Extra");

	if ((min_length = cfg_get_int(SECTION_INC, mode, "MinLen")) < 0)
		min_length = 0;
	if ((max_length = cfg_get_int(SECTION_INC, mode, "MaxLen")) < 0)
		max_length = CHARSET_LENGTH;
	max_count = cfg_get_int(SECTION_INC, mode, "CharCount");

	if (min_length > max_length) {
		log_event("! MinLen = %d exceeds MaxLen = %d",
			min_length, max_length);
		fprintf(stderr, "MinLen = %d exceeds MaxLen = %d\n",
			min_length, max_length);
		error();
	}

	if (min_length > db->format->params.plaintext_length) {
		log_event("! MinLen = %d is too large for this hash type",
			min_length);
		fprintf(stderr, "MinLen = %d exceeds the maximum possible "
			"length for the current hash type (%d)\n",
			min_length, db->format->params.plaintext_length);
		error();
	}

	if (max_length > db->format->params.plaintext_length) {
		log_event("! MaxLen = %d is too large for this hash type",
			max_length);
		fprintf(stderr, "Warning: "
			"MaxLen = %d is too large for the current hash type, "
			"reduced to %d\n",
			max_length, db->format->params.plaintext_length);
		max_length = db->format->params.plaintext_length;
	}

	if (max_length > CHARSET_LENGTH) {
		log_event("! MaxLen = %d exceeds the compile-time limit of %d",
			max_length, CHARSET_LENGTH);
		fprintf(stderr,
			"\n"
			"MaxLen = %d exceeds the compile-time limit of %d\n\n"
			"There are several good reasons why you probably don't "
			"need to raise it:\n"
			"- many hash types don't support passwords "
			"(or password halves) longer than\n"
			"7 or 8 characters;\n"
			"- you probably don't have sufficient statistical "
			"information to generate a\n"
			"charset file for lengths beyond 8;\n"
			"- the limitation applies to incremental mode only.\n",
			max_length, CHARSET_LENGTH);
		error();
	}

	if (!(file = fopen(path_expand(charset), "rb")))
		pexit("fopen: %s", path_expand(charset));

	header = (struct charset_header *)mem_alloc(sizeof(*header));

	charset_read_header(file, header);
	if (ferror(file)) pexit("fread");

	if (feof(file) ||
	    (memcmp(header->version, CHARSET_V1, sizeof(header->version)) &&
	    memcmp(header->version, CHARSET_V2, sizeof(header->version))) ||
	    !header->count)
		inc_format_error(charset);

	if (header->min != CHARSET_MIN || header->max != CHARSET_MAX ||
	    header->length != CHARSET_LENGTH) {
		log_event("! Incompatible charset file: %.100s", charset);
		fprintf(stderr, "Incompatible charset file: %s\n", charset);
		error();
	}

	if (header->count > CHARSET_SIZE)
		inc_format_error(charset);

	check =
		(unsigned int)header->check[0] |
		((unsigned int)header->check[1] << 8) |
		((unsigned int)header->check[2] << 16) |
		((unsigned int)header->check[3] << 24);
	if (!rec_restoring_now)
		rec_check = check;
	if (rec_check != check) {
		log_event("! Charset file has changed: %.100s", charset);
		fprintf(stderr, "Charset file has changed: %s\n", charset);
		error();
	}

	fread(allchars, header->count, 1, file);
	if (ferror(file)) pexit("fread");
	if (feof(file)) inc_format_error(charset);

	allchars[header->count] = 0;
	if (expand(allchars, extra ? extra : "", sizeof(allchars)))
		inc_format_error(charset);
	real_count = strlen(allchars);

	if (max_count < 0) max_count = CHARSET_SIZE;

	if (min_length != max_length)
		log_event("- Lengths %d to %d, up to %d different characters",
			min_length, max_length, max_count);
	else
		log_event("- Length %d, up to %d different characters",
			min_length, max_count);

	if ((unsigned int)max_count > real_count) {
		log_event("! Only %u characters available", real_count);
		fprintf(stderr, "Warning: only %u characters available\n",
			real_count);
	}

	if (!(db->format->params.flags & FMT_CASE))
	switch (is_mixedcase(allchars)) {
	case -1:
		inc_format_error(charset);

	case 1:
		log_event("! Mixed-case charset, "
			"but the hash type is case-insensitive");
		fprintf(stderr, "Warning: mixed-case charset, "
			"but the current hash type is case-insensitive;\n"
			"some candidate passwords may be unnecessarily "
			"tried more than once.\n");
	}

	if (header->length >= 2)
		char2 = (char2_table)mem_alloc(sizeof(*char2));
	else
		char2 = NULL;
	for (pos = 0; pos < (int)header->length - 2; pos++)
		chars[pos] = (chars_table)mem_alloc(sizeof(*chars[0]));

	rec_compat = 0;
	rec_entry = 0;
	memset(rec_numbers, 0, sizeof(rec_numbers));

	status_init(NULL, 0);
	rec_restore_mode(restore_state);

#ifdef WEBAPI
	if(packet_id) {
		int ret;

		// This is a new packet
		inc_rec_state.initialized = 0;

		inc_rec_state.words_requested = packet_rounds;
		inc_rec_state.words_generated = 0;
		inc_rec_state.cc_0 = -1;
		inc_rec_state.cc_1 = -1;
		inc_rec_state.cc_2 = -1;

		ret = sscanf(packet_state, "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%127[^\n]", 
				&rec_entry,
				&rec_numbers[0], &rec_numbers[1], &rec_numbers[2], &rec_numbers[3],
				&rec_numbers[4], &rec_numbers[5], &rec_numbers[6], &rec_numbers[7],
				&inc_rec_state.pos,
				&inc_rec_state.numbers_cache,
				&inc_rec_state.cc_0, &inc_rec_state.cc_1, &inc_rec_state.cc_2,
				inc_rec_state.key_i);
		
		if(ret < 14 || ret > 15) {
			log_event("Invalid packet state, found %d fields in %s", ret, packet_state);
			// XXX - Handle more gracefully..
			error();
		}


		status_init(webapi_inc_get_progress, 0);
	}
#endif

	rec_init(db, save_state);

	ptr = header->order + (entry = rec_entry) * 3;
	memcpy(numbers, rec_numbers, sizeof(numbers));

	crk_init(db, fix_state, NULL);

	last_count = last_length = -1;

	entry--;
	while (ptr < &header->order[sizeof(header->order) - 1]) {
		entry++;
		length = *ptr++; fixed = *ptr++; count = *ptr++;

		if (length >= CHARSET_LENGTH ||
			fixed > length ||
			count >= CHARSET_SIZE) inc_format_error(charset);

		if (entry != rec_entry)
			memset(numbers, 0, sizeof(numbers));

		if (count >= real_count || (fixed && !count)) continue;

		if ((int)length + 1 < min_length ||
			(int)length >= max_length ||
			(int)count >= max_count) continue;

		if ((int)length != last_length) {
			inc_new_length(last_length = length,
				header, file, charset, char1, char2, chars);
			last_count = -1;
		}
		if ((int)count > last_count)
			inc_new_count(length, last_count = count, charset,
				allchars, char1, char2, chars);

		if (!length && !min_length) {
			min_length = 1;
			if (crk_process_key("")) break;
		}

#if 0
		log_event("- Trying length %d, fixed @%d, character count %d",
			length + 1, fixed + 1, count + 1);
#endif
		if (inc_key_loop(length, fixed, count, char1, char2, chars))
			break;
	}
	

	crk_done();
	rec_done(event_abort);

	for (pos = 0; pos < (int)header->length - 2; pos++)
		MEM_FREE(chars[pos]);
	MEM_FREE(char2);
	MEM_FREE(header);

	fclose(file);
}
Ejemplo n.º 25
0
int stat_to_mom(

  const char       *job_id,
  struct stat_cntl *cntl)  /* M */

  {
  struct batch_request *newrq;
  int                   rc = PBSE_NONE;
  unsigned long         addr;
  char                  log_buf[LOCAL_LOG_BUF_SIZE+1];
  struct pbsnode       *node;
  int                   handle = -1;
  unsigned long         job_momaddr = -1;
  unsigned short        job_momport = -1;
  char                 *job_momname = NULL;
  job                  *pjob = NULL;

  if ((pjob = svr_find_job(job_id, FALSE)) == NULL)
    return(PBSE_JOBNOTFOUND);

  mutex_mgr job_mutex(pjob->ji_mutex, true);

  if ((pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) || 
      (!pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str))
    {
    job_mutex.unlock();
    snprintf(log_buf, sizeof(log_buf),
      "Job %s missing MOM's information. Skipping statting on this job", pjob->ji_qs.ji_jobid);
    log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    return PBSE_BAD_PARAMETER;
    }

  job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;
  job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str);
  job_mutex.unlock();

  if (job_momname == NULL)
    return PBSE_MEM_MALLOC;

  if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL)
    {
    free(job_momname);
    return PBSE_MEM_MALLOC;
    }

  if (cntl->sc_type == 1)
    snprintf(newrq->rq_ind.rq_status.rq_id, sizeof(newrq->rq_ind.rq_status.rq_id), "%s", job_id);
  else
    newrq->rq_ind.rq_status.rq_id[0] = '\0';  /* get stat of all */

  CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr);

  /* if MOM is down just return stale information */
  addr = job_momaddr;

  node = tfind_addr(addr,job_momport,job_momname);
  free(job_momname);

  if (node == NULL)
    return PBSE_UNKNODE;
  if ((node->nd_state & INUSE_NOT_READY)||(node->nd_power_state != POWER_STATE_RUNNING))
    {
    if (LOGLEVEL >= 6)
      {
      snprintf(log_buf, sizeof(log_buf),
          "node '%s' is allocated to job but in state 'down'",
          node->get_name());

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf);
      }

    node->unlock_node(__func__, "no rely mom", LOGLEVEL);
    free_br(newrq);

    return PBSE_NORELYMOM;
    }

  /* get connection to MOM */
  node->unlock_node(__func__, "before svr_connect", LOGLEVEL);
  handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL);

  if (handle >= 0)
    {
    if ((rc = issue_Drequest(handle, newrq, true)) == PBSE_NONE)
      {
      stat_update(newrq, cntl);
      }
    }
  else
    rc = PBSE_CONNECT;

  if (rc == PBSE_SYSTEM)
    rc = PBSE_MEM_MALLOC;

  free_br(newrq);

  return(rc);
  }  /* END stat_to_mom() */
static void inc_format_error(char *charset)
{
	log_event("! Incorrect charset file format: %.100s", charset);
	fprintf(stderr, "Incorrect charset file format: %s\n", charset);
	error();
}
Ejemplo n.º 27
0
/**
 * @brief
 *		Function to migrate filesystem data to database.
 * 		Reads serverdb, scheddb, job files, node, nodestate, queue, resv information
 * 		from the filesystem and save them into the database. All the information is
 * 		recovered and saved into the database under a single database transaction,
 * 		so any failure rolls back all the updates to the database. If all the updates
 * 		to the database succeed, only then the respective files are deleted from the
 * 		filesystem, else no deletion takes place.
 *
 * @return	Error code
 * @retval	0	: success
 * @retval	-1	: Failure
 *
 */
int
svr_migrate_data_from_fs(void)
{
	int baselen;
	struct dirent *pdirent;
	DIR *dir;
	int had;
	char *job_suffix = JOB_FILE_SUFFIX;
	int job_suf_len = strlen(job_suffix);
	job *pjob = NULL;
	pbs_queue *pque;
	resc_resv *presv;
	char *psuffix;
	int rc;
	int recovered = 0;
	char    basen[MAXPATHLEN+1];
	char	scrfile[MAXPATHLEN+1];
	char	jobfile[MAXPATHLEN+1];
	char	origdir[MAXPATHLEN+1];
	int fd;
	struct stat stbuf;
	char *scrbuf = NULL;
	pbs_db_jobscr_info_t	jobscr;
	pbs_db_obj_info_t		obj;

	path_svrdb_new = build_path(path_priv, PBS_SERVERDB, new_tag);
	path_scheddb = build_path(path_priv, PBS_SCHEDDB, NULL);
	path_scheddb_new = build_path(path_priv, PBS_SCHEDDB, new_tag);
	path_queues = build_path(path_priv, PBS_QUEDIR, suffix_slash);
	path_resvs = build_path(path_priv, PBS_RESVDIR, suffix_slash);
	path_nodes = build_path(path_priv, NODE_DESCRIP, NULL);
	path_nodestate = build_path(path_priv, NODE_STATUS, NULL);

	/*    If not a "create" initialization, recover server db */
	/*    and sched db					  */
	if (chk_save_file(path_svrdb) != 0) {
		fprintf(stderr, "No serverdb found to update to datastore\n");
		return (0);
	}

	if (setup_resc(1) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	init_server_attrs();

	/* start a database transation for the whole recovery */
	if (pbs_db_begin_trx(svr_db_conn, 0, 0) != 0)
		return (-1);

	/* preprocess the nodes file to convert old properties to resources */
	if (setup_nodes_fs(1) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* Open the server database (save file) and read it in */
	if (svr_recov_fs(path_svrdb) == -1) {
		fprintf(stderr, "%s\n", msg_init_baddb);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* save server information to database now */
	if (svr_save_db(&server, SVR_SAVE_NEW) != 0) {
		fprintf(stderr, "Could not save server db\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* now do sched db */
	if (sched_recov_fs(path_scheddb) == -1) {
		fprintf(stderr, "Unable to recover scheddb\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	if (sched_save_db(dflt_scheduler, SVR_SAVE_NEW) != 0) {
		fprintf(stderr, "Could not save scheduler db\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}
	set_sched_default(dflt_scheduler, 0);
	/* save current working dir before any chdirs */
	if (getcwd(origdir, MAXPATHLEN) == NULL) {
		fprintf(stderr, "getcwd failed\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	if (chdir(path_queues) != 0) {
		fprintf(stderr, msg_init_chdir, path_queues);
		fprintf(stderr, "\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	had = server.sv_qs.sv_numque;
	server.sv_qs.sv_numque = 0;
	dir = opendir(".");
	if (dir == NULL) {
		fprintf(stderr, "%s\n", msg_init_noqueues);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}
	while (errno = 0, (pdirent = readdir(dir)) != NULL) {
		if (chk_save_file(pdirent->d_name) == 0) {
			if ((pque = que_recov_fs(pdirent->d_name)) !=
				NULL) {
				/* que_recov increments sv_numque */
				fprintf(stderr, msg_init_recovque,
					pque->qu_qs.qu_name);
				fprintf(stderr, "\n");
				if (que_save_db(pque, QUE_SAVE_NEW) != 0) {
					fprintf(stderr,
						"Could not save queue info for queue %s\n",
						pque->qu_qs.qu_name);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn,
						PBS_DB_ROLLBACK);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
			}
		}
	}
	if (errno != 0 && errno != ENOENT) {
		fprintf(stderr, "%s\n", msg_init_noqueues);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		(void) closedir(dir);
		chdir(origdir);
		return (-1);
	}
	(void) closedir(dir);
	if (had != server.sv_qs.sv_numque) {
		fprintf(stderr, msg_init_expctq, had, server.sv_qs.sv_numque);
		fprintf(stderr, "\n");
	}

	/* Open and read in node list if one exists */
	if (setup_nodes_fs(0) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	/*
	 * Recover reservations.
	 */
	if (chdir(path_resvs) != 0) {
		fprintf(stderr, msg_init_chdir, path_resvs);
		fprintf(stderr, "\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	dir = opendir(".");
	if (dir == NULL) {
		fprintf(stderr, "%s\n", msg_init_noresvs);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}
	while (errno = 0, (pdirent = readdir(dir)) != NULL) {
		if (chk_save_file(pdirent->d_name) == 0) {
			presv = (resc_resv *)
				job_or_resv_recov_fs(pdirent->d_name,
				RESC_RESV_OBJECT);
			if (presv != NULL) {
				if (resv_save_db(presv, SAVERESV_NEW) != 0) {
					fprintf(stderr,
						"Could not save resv info for resv %s\n",
						presv->ri_qs.ri_resvID);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn,
						PBS_DB_ROLLBACK);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
			}
		}
	}
	if (errno != 0 && errno != ENOENT) {
		fprintf(stderr, "%s\n", msg_init_noresvs);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		(void) closedir(dir);
		chdir(origdir);
		return (-1);
	}
	(void) closedir(dir);

	/*
	 *   Recover jobs
	 */
	if (chdir(path_jobs) != 0) {
		fprintf(stderr, msg_init_chdir, path_jobs);
		fprintf(stderr, "\n");
		chdir(origdir);
		return (-1);
	}

	server.sv_qs.sv_numjobs = 0;
	recovered = 0;
	dir = opendir(".");
	if (dir == NULL) {
		fprintf(stderr, "%s\n", msg_init_nojobs);
	} else {
		/* Now, for each job found ... */
		while (errno = 0,
			(pdirent = readdir(dir)) != NULL) {
			if (chk_save_file(pdirent->d_name) != 0)
				continue;

			/* recover the job */
			baselen = strlen(pdirent->d_name) - job_suf_len;
			psuffix = pdirent->d_name + baselen;
			if (strcmp(psuffix, job_suffix))
				continue;

			if ((pjob = job_recov_fs(pdirent->d_name)) == NULL) {
				(void)strcpy(basen, pdirent->d_name);
				psuffix = basen + baselen;
				(void)strcpy(psuffix, JOB_BAD_SUFFIX);
				(void)snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s",
					basen);
				log_event(PBSEVENT_SYSTEM,
					PBS_EVENTCLASS_SERVER, LOG_NOTICE,
					msg_daemonname, log_buffer);
				continue;
			}

			if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				/* load the job script file */
				strcpy(scrfile, path_jobs);
#ifndef WIN32
				/* under WIN32, there's already a prefixed '/' */
				(void) strcat(scrfile, "/");
#endif
				strcat(scrfile, pdirent->d_name);
				baselen = strlen(scrfile) - strlen(JOB_FILE_SUFFIX);
				scrfile[baselen] = 0; /* put null char */
				strcat(scrfile, JOB_SCRIPT_SUFFIX);
				rc = 1;
#ifdef WIN32
				if ((fd = open(scrfile, O_BINARY | O_RDONLY)) != -1)
#else
				if ((fd = open(scrfile, O_RDONLY)) != -1)
#endif
				{
					/* load the script */
					if (fstat(fd, &stbuf) == 0) {
						if ((scrbuf = malloc(stbuf.st_size + 1))) {
							if (read(fd, scrbuf, stbuf.st_size) == stbuf.st_size) {
								scrbuf[stbuf.st_size] = '\0'; /* null character */
								rc = 0; /* success loading */
							}
						}
					}
					close(fd);
				}

				if (rc != 0) {
					fprintf(stderr, "Could not recover script file for job %s\n", pjob->ji_qs.ji_jobid);
					(void) strcpy(basen, scrfile);
					psuffix = basen + strlen(scrfile) - strlen(JOB_SCRIPT_SUFFIX);
					(void) strcpy(psuffix, JOB_BAD_SUFFIX);

					(void) strcpy(jobfile, scrfile);
					psuffix = jobfile + strlen(jobfile) - strlen(JOB_SCRIPT_SUFFIX);
					(void) strcpy(psuffix, JOB_FILE_SUFFIX);
#ifdef WIN32
					if (MoveFileEx(jobfile, basen,
						MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == 0) {
						errno = GetLastError();
						snprintf(log_buffer, sizeof(log_buffer), "MoveFileEx(%s, %s) failed!",
							jobfile, basen);
						log_err(errno, "script", log_buffer);

					}
					secure_file(basen, "Administrators",
						READS_MASK | WRITES_MASK | STANDARD_RIGHTS_REQUIRED);
#else
					if (rename(jobfile, basen) == -1) {
						snprintf(log_buffer, sizeof(log_buffer), "error renaming job file %s",
							jobfile);
						log_err(errno, "job_recov", log_buffer);
					}
#endif
					(void) snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s",
						basen);
					log_event(PBSEVENT_SYSTEM,
						PBS_EVENTCLASS_SERVER, LOG_NOTICE,
						msg_daemonname, log_buffer);
					free(scrbuf);
					scrbuf = NULL;
					continue;
				}
			}

			/* now save job first */
			if (job_save_db(pjob, SAVEJOB_NEW) != 0) {
				fprintf(stderr, "Could not save job info for jobid %s\n",
					pjob->ji_qs.ji_jobid);
				if (svr_db_conn->conn_db_err)
					fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
				(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
				(void) closedir(dir);
				chdir(origdir);
				free(scrbuf);
				return (-1);
			}

			if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				/* save job script */
				strcpy(jobscr.ji_jobid, pjob->ji_qs.ji_jobid);
				jobscr.script = scrbuf;
				obj.pbs_db_obj_type = PBS_DB_JOBSCR;
				obj.pbs_db_un.pbs_db_jobscr = &jobscr;
				if (pbs_db_save_obj(svr_db_conn, &obj, PBS_INSERT_DB) != 0) {
					fprintf(stderr, "Could not save job script for jobid %s\n",
						pjob->ji_qs.ji_jobid);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
					free(scrbuf);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
				free(scrbuf);
				scrbuf = NULL;
			}

			recovered++;
		}
		if (errno != 0 && errno != ENOENT) {
			if (pjob)
				fprintf(stderr, "readdir error for jobid %s\n", pjob->ji_qs.ji_jobid);
			else
				fprintf(stderr, "readdir error\n");
			(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
			free(scrbuf);
			(void) closedir(dir);
			chdir(origdir);
			return (-1);
		}
		(void) closedir(dir);
		fprintf(stderr, msg_init_exptjobs, recovered);
		fprintf(stderr, "\n");
	}

	if (save_nodes_db(0, NULL) != 0) {
		fprintf(stderr, "Could not save nodes\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	if (pbs_db_end_trx(svr_db_conn, PBS_DB_COMMIT) == 0) {
		rm_migrated_files(path_priv);
		chdir(origdir);
		return (0);
	}
	chdir(origdir);
	return -1;
}
Ejemplo n.º 28
0
int req_rerunjob(
   
  struct batch_request *preq)

  {
  int     rc = PBSE_NONE;
  job    *pjob;

  int     Force;
  int     MgrRequired = TRUE;
  char    log_buf[LOCAL_LOG_BUF_SIZE];

  /* check if requestor is admin, job owner, etc */

  if ((pjob = chk_job_request(preq->rq_ind.rq_rerun, preq)) == 0)
    {
    /* FAILURE */

    /* chk_job_request calls req_reject() */

    rc = PBSE_SYSTEM;
    return rc; /* This needs to fixed to return an accurate error */
    }

  /* the job must be running or completed */

  if (pjob->ji_qs.ji_state >= JOB_STATE_EXITING)
    {
    if (pjob->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET)
      {
      /* allow end-users to rerun checkpointed jobs */

      MgrRequired = FALSE;
      }
    }
  else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /* job is running */

    /* NO-OP */
    }
  else
    {
    /* FAILURE - job is in bad state */
    rc = PBSE_BADSTATE;
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "job %s is in a bad state",
        preq->rq_ind.rq_rerun);
    req_reject(rc, 0, preq, NULL, log_buf);
    unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL);
    return rc;
    }

  if ((MgrRequired == TRUE) &&
      ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0))
    {
    /* FAILURE */

    rc = PBSE_PERM;
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
        "additional permissions required (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)");
    req_reject(rc, 0, preq, NULL, log_buf);
    unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL);
    return rc;
    }

  /* the job must be rerunnable */

  if (pjob->ji_wattr[JOB_ATR_rerunable].at_val.at_long == 0)
    {
    /* NOTE:  should force override this constraint? maybe (???) */
    /*          no, the user is saying that the job will break, and
                IEEE Std 1003.1 specifically says rerun is to be rejected
                if rerunable==FALSE -garrick */

    rc = PBSE_NORERUN;
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "job %s not rerunnable",
        preq->rq_ind.rq_rerun);
    req_reject(rc, 0, preq, NULL, log_buf);
    unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL);
    return rc;
    }

  if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
    {
    /* ask MOM to kill off the job if it is running */
    static const char *rerun = "rerun";
    char              *extra = strdup(rerun);

    rc = issue_signal(&pjob, "SIGKILL", post_rerun, extra);
    }
  else
    { 
    if (pjob->ji_wattr[JOB_ATR_hold].at_val.at_long == HOLD_n)
      {
      svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_QUEUED, FALSE);
      }
    else
      {
      svr_setjobstate(pjob, JOB_STATE_HELD, JOB_SUBSTATE_HELD, FALSE);
      }

    /* reset some job attributes */
    
    pjob->ji_wattr[JOB_ATR_comp_time].at_flags &= ~ATR_VFLAG_SET;
    pjob->ji_wattr[JOB_ATR_reported].at_flags &= ~ATR_VFLAG_SET;

    set_statechar(pjob);

    rc = -1;
    }

  if (preq->rq_extend && !strncasecmp(preq->rq_extend, RERUNFORCE, strlen(RERUNFORCE)))
    Force = 1;
  else
    Force = 0;

  switch (rc)
    {

    case - 1:

      /* completed job was requeued */

      /* clear out job completion time if there is one */
      break;

    case 0:

      /* requeue request successful */

      if (pjob != NULL)
        pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN;

      break;

    case PBSE_SYSTEM: /* This may not be accurate...*/
      rc = PBSE_MEM_MALLOC;
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Can not allocate memory");
      req_reject(rc, 0, preq, NULL, log_buf);
      return rc;
      break;

    default:

      if (Force == 0)
        {
        rc = PBSE_MOMREJECT;
        snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Rejected by mom");
        req_reject(rc, 0, preq, NULL, log_buf);
        if (pjob != NULL)
          unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL);
        return rc;
        }
      else
        {
        int           newstate;
        int           newsubst;
        unsigned int  dummy;
        char         *tmp;
        long          cray_enabled = FALSE;
       
        if (pjob != NULL)
          {
          get_svr_attr_l(SRV_ATR_CrayEnabled, &cray_enabled);

          if ((cray_enabled == TRUE) &&
              (pjob->ji_wattr[JOB_ATR_login_node_id].at_val.at_str != NULL))
            tmp = parse_servername(pjob->ji_wattr[JOB_ATR_login_node_id].at_val.at_str, &dummy);
          else
            tmp = parse_servername(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, &dummy);
          
          /* Cannot communicate with MOM, forcibly requeue job.
             This is a relatively disgusting thing to do */
          
          sprintf(log_buf, "rerun req to %s failed (rc=%d), forcibly requeueing job",
            tmp, rc);

          free(tmp);
  
          log_event(
            PBSEVENT_ERROR | PBSEVENT_ADMIN | PBSEVENT_JOB,
            PBS_EVENTCLASS_JOB,
            pjob->ji_qs.ji_jobid,
            log_buf);
          
          log_err(-1, __func__, log_buf);
          
          strcat(log_buf, ", previous output files may be lost");
  
          svr_mailowner(pjob, MAIL_OTHER, MAIL_FORCE, log_buf);
  
          svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_RERUN3, FALSE);
  
          rel_resc(pjob); /* free resc assigned to job */
          
          if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_HOTSTART) == 0)
            {
            /* in case of server shutdown, don't clear exec_host */
            /* will use it on hotstart when next comes up        */
            
            job_attr_def[JOB_ATR_exec_host].at_free(&pjob->ji_wattr[JOB_ATR_exec_host]);
  
            job_attr_def[JOB_ATR_session_id].at_free(&pjob->ji_wattr[JOB_ATR_session_id]);
            
            job_attr_def[JOB_ATR_exec_gpus].at_free(&pjob->ji_wattr[JOB_ATR_exec_gpus]);          
            }
          
          pjob->ji_modified = 1;    /* force full job save */
          
          pjob->ji_momhandle = -1;
          pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_StagedIn;
          
          svr_evaljobstate(pjob, &newstate, &newsubst, 0);
          svr_setjobstate(pjob, newstate, newsubst, FALSE);
          }
        }

      break;
    }  /* END switch (rc) */

  /* So job has run and is to be rerun (not restarted) */
  if (pjob == NULL)
    {
    rc = PBSE_JOB_RERUN;
    }
  else
    {
    pjob->ji_qs.ji_svrflags = (pjob->ji_qs.ji_svrflags &
        ~(JOB_SVFLG_CHECKPOINT_FILE |JOB_SVFLG_CHECKPOINT_MIGRATEABLE |
          JOB_SVFLG_CHECKPOINT_COPIED)) | JOB_SVFLG_HASRUN;
    
    sprintf(log_buf, msg_manager, msg_jobrerun, preq->rq_user, preq->rq_host);
    log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf);

    reply_ack(preq);
  
    /* note in accounting file */
    account_record(PBS_ACCT_RERUN, pjob, NULL);
    unlock_ji_mutex(pjob, __func__, "6", LOGLEVEL);
    }

  return rc;
  }  /* END req_rerunjob() */
Ejemplo n.º 29
0
/* Note, in extremely high load cases, the alloc value in /proc/net/sockstat can exceed the max value. This will substantially slow down throughput and generate connection failures (accept gets a EMFILE error). As the client is designed to run on each submit host, that issue shouldn't occur. The client must be restarted to clear out this issue. */
int start_listener(
    
  const char   *server_ip,
  int            server_port,
  void          *(*process_meth)(void *))

  {
  struct sockaddr_in  adr_svr;
  struct sockaddr_in  adr_client;
  int                 rc = PBSE_NONE;
  int                 sockoptval = 1;
  int                 len_inet = sizeof(struct sockaddr_in);
  int                *new_conn_port = NULL;
  int                 listen_socket = 0;
  int                 total_cntr = 0;
  pthread_t           tid;
  pthread_attr_t      t_attr;
  int objclass = 0;
  char msg_started[1024];

  memset(&adr_svr, 0, sizeof(adr_svr));
  adr_svr.sin_family = AF_INET;

  if (!(adr_svr.sin_port = htons(server_port)))
    {
    }
  else if ((adr_svr.sin_addr.s_addr = inet_addr(server_ip)) == INADDR_NONE)
    {
    rc = PBSE_SOCKET_FAULT;
    }
  else if ((listen_socket = socket_get_tcp()) < 0)
    {
    /* Can not get socket for listening */
    rc = PBSE_SOCKET_FAULT;
    }
  else if (bind(listen_socket, (struct sockaddr *)&adr_svr, sizeof(struct sockaddr_in)) == -1)
    {
    /* Can not bind local socket */
    rc = PBSE_SOCKET_FAULT;
    }
  else if (setsockopt(listen_socket, SOL_SOCKET, SO_REUSEADDR, (void *)&sockoptval, sizeof(sockoptval)) == -1)
    {
    rc = PBSE_SOCKET_FAULT;
    }
  else if (listen(listen_socket, 128) == -1)
    {
    /* Can not listener on local socket */
    rc = PBSE_SOCKET_LISTEN;
    }
  else if ((rc = pthread_attr_init(&t_attr)) != 0)
    {
    /* Can not init thread attribute structure */
    rc = PBSE_THREADATTR;
    }
  else if ((rc = pthread_attr_setdetachstate(&t_attr, PTHREAD_CREATE_DETACHED)) != 0)
    {
    /* Can not set thread initial state as detached */
    pthread_attr_destroy(&t_attr);
    }
  else
    {
    log_get_set_eventclass(&objclass, GETV);
    if (objclass == PBS_EVENTCLASS_TRQAUTHD)
      {
      snprintf(msg_started, sizeof(msg_started),
        "TORQUE authd daemon started and listening on IP:port %s:%d", server_ip, server_port);
      log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_TRQAUTHD,
        msg_daemonname, msg_started);
      }
    while (1)
      {
      if ((new_conn_port = (int *)calloc(1, sizeof(int))) == NULL)
        {
        printf("Error allocating new connection handle on accept.\n");
        break;
        }

      if ((*new_conn_port = accept(listen_socket, (struct sockaddr *)&adr_client, (socklen_t *)&len_inet)) == -1)
        {
        if (errno == EMFILE)
          {
          sleep(1);
          printf("Temporary pause\n");
          }
        else
          {
          printf("error in accept %s\n", strerror(errno));
          break;
          }
        
        errno = 0;
        free(new_conn_port);
        new_conn_port = NULL;
        }
      else
        {
        if (debug_mode == TRUE)
          {
          process_meth((void *)new_conn_port);
          }
        else
          {
          pthread_create(&tid, &t_attr, process_meth, (void *)new_conn_port);
          }
        }
      if (debug_mode == TRUE)
        {
        if (total_cntr % 1000 == 0)
          {
          printf("Total requests: %d\n", total_cntr);
          }
        total_cntr++;
        }
      }
    if (new_conn_port != NULL)
      {
      free(new_conn_port);
      }
    pthread_attr_destroy(&t_attr);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, "net_srvr",
                "Socket close of network listener requested");
    }

  if (listen_socket != -1)
    close(listen_socket);

  return(rc);
  } /* END start_listener() */
Ejemplo n.º 30
0
void req_movejob(

  struct batch_request *req)

  {
#ifndef NDEBUG
  char *id = "req_movejob";
#endif
  job *jobp;

  jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req);

  if (jobp == NULL)
    {
    return;
    }

  if ((jobp->ji_qs.ji_state != JOB_STATE_QUEUED) &&
      (jobp->ji_qs.ji_state != JOB_STATE_HELD) &&
      (jobp->ji_qs.ji_state != JOB_STATE_WAITING))
    {
#ifndef NDEBUG
    sprintf(log_buffer, "%s %d",
            pbse_to_txt(PBSE_BADSTATE),
            jobp->ji_qs.ji_state);

    strcat(log_buffer, id);

    log_event(
      PBSEVENT_DEBUG,
      PBS_EVENTCLASS_JOB,
      jobp->ji_qs.ji_jobid,
      log_buffer);
#endif /* NDEBUG */

    req_reject(PBSE_BADSTATE, 0, req, NULL, NULL);

    return;
    }

  /*
   * svr_movejob() does the real work, handles both local and
   * network moves
   */

  switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, req))
    {

    case ROUTE_SUCCESS:

      /* success */

      strcpy(log_buffer, msg_movejob);

      sprintf(log_buffer + strlen(log_buffer), msg_manager,
              req->rq_ind.rq_move.rq_destin,
              req->rq_user,
              req->rq_host);

      log_event(
        PBSEVENT_JOB,
        PBS_EVENTCLASS_JOB,
        jobp->ji_qs.ji_jobid,
        log_buffer);

      reply_ack(req);

      break;

    case ROUTE_PERM_FAILURE:

    case ROUTE_RETRY:

      /* fail */

      /* NOTE:  can pass detailed response to requestor (NYI) */

      req_reject(pbs_errno, 0, req, NULL, NULL);

      break;

    case ROUTE_DEFERRED:

      /* deferred, will be handled by    */
      /* post_movejob() when the child completes */

      /* NO-OP */

      break;
    }  /* END switch (svr_movejob(jobp,req->rq_ind.rq_move.rq_destin,req)) */

  return;
  }  /* END req_movejob() */