Exemplo n.º 1
0
static int
schd_alterserver(int sv_conn, char *name, char *value)
  {
  char   *id = "schd_alterserver";
  int     err;
  AttrOpList alist;

  /* Fill in the attribute struct with appropriate parameters */

  alist.resource = NULL;
  alist.value    = value;
  alist.name     = name;
  alist.next     = NULL;
  alist.op       = SET;

  err = pbs_manager(sv_conn, MGR_CMD_SET, MGR_OBJ_SERVER, "", &alist,
                    NULL);

  if (err)
    {
    (void)sprintf(log_buffer,
                  "pbs_alterserver(%s, %s) failed: %d", name, value, pbs_errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    }

  return (err);
  }
Exemplo n.º 2
0
int is_stat_get(

  char            *node_name,
  struct tcp_chan *chan)

  {
  int             rc;
  char            log_buf[LOCAL_LOG_BUF_SIZE];
  dynamic_string *status_info;

  if (LOGLEVEL >= 3)
    {
    sprintf(log_buf, "received status from node %s", node_name);
    log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf);
    }

  status_info = get_status_info(chan);
 
  if (is_reporter_node(node_name))
    rc = process_alps_status(node_name, status_info);
  else
    rc = process_status_info(node_name, status_info);

  free_dynamic_string(status_info);

  return(rc);
  }  /* END is_stat_get() */
Exemplo n.º 3
0
void
schd_comment_server(char *reason)
  {
  char   *id = "schd_comment_server";
  char   *msg_ptr;
  static char *old_msg = NULL;

  if (reason == NULL)
    msg_ptr = "";
  else
    msg_ptr = reason;

  if (old_msg == NULL)
    {
    old_msg = (char *)malloc(MAX_TXT);

    if (old_msg == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "malloc(old_msg)");
      return;
      }

    old_msg[0] = '\0';
    }

  if (strncmp(msg_ptr, old_msg, MAX_TXT - 1) != 0)
    {
    schd_alterserver(connector, ATTR_comment, msg_ptr);
    strncpy(old_msg, msg_ptr, MAX_TXT - 1);
    }

  return;
  }
Exemplo n.º 4
0
void job_log_close(

  int msg)  /* BOOLEAN - write close message */

  {
  if (job_log_opened == 1)
    {
    job_log_auto_switch = 0;

    if (msg)
      {
      log_record(
        PBSEVENT_SYSTEM,
        PBS_EVENTCLASS_SERVER,
        "Log",
        "Log closed");
      }

    fclose(joblogfile);

    job_log_opened = 0;
    }

#if SYSLOG

  if (syslogopen)
    closelog();

#endif /* SYSLOG */

  return;
  }  /* END job_log_close() */
Exemplo n.º 5
0
/* Alter a job's actual attributes. */
int
schd_alterjob(int sv_conn, Job *job, char *name, char *value, char *rsrc)
  {
  char   *id = "schd_alterjob";
  int     err = 0;
  AttrList atp;

  /* Fill the attribute structure with function parameters */
  atp.resource = rsrc;
  atp.value    = value;
  atp.name     = name;
  atp.next     = NULL;

  err = pbs_alterjob(sv_conn, job->jobid, &atp, NULL);

  if (err)
    {
    (void)sprintf(log_buffer,
                  "pbs_alterjob(%s, %s, %s, %s) failed: %d",
                  job->jobid, name, value, rsrc, pbs_errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    }

  return (err);
  }
Exemplo n.º 6
0
void *check_if_orphaned(

  void *vp)

  {
  char                 *rsv_id = (char *)vp;
  char                  job_id[PBS_MAXSVRJOBID];
  struct batch_request *preq;
  int                   handle = -1;
  int                   retries = 0;
  struct pbsnode       *pnode;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  if (is_orphaned(rsv_id, job_id) == TRUE)
    {
    if((preq = alloc_br(PBS_BATCH_DeleteReservation)) == NULL)
      return NULL;
    preq->rq_extend = rsv_id;

    /* Assume the request will be successful and remove the RSV from the hash table */
    remove_alps_reservation(rsv_id);

    if ((pnode = get_next_login_node(NULL)) != NULL)
      {
      struct in_addr hostaddr;
      int            local_errno;
      pbs_net_t      momaddr;

      memcpy(&hostaddr, &pnode->nd_sock_addr.sin_addr, sizeof(hostaddr));
      momaddr = ntohl(hostaddr.s_addr);

      snprintf(log_buf, sizeof(log_buf),
        "Found orphan ALPS reservation ID %s for job %s; asking %s to remove it",
        rsv_id,
        job_id,
        pnode->nd_name);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, __func__, log_buf);

      while ((handle < 0) &&
             (retries < 3))
        {
        handle = svr_connect(momaddr, pnode->nd_mom_port, &local_errno, pnode, NULL, ToServerDIS);
        retries++;
        }

      /* unlock before the network transaction */
      unlock_node(pnode, __func__, NULL, LOGLEVEL);
      
      if (handle >= 0)
        issue_Drequest(handle, preq, true);
        
      free_br(preq);
      }
    }
  else
    free(rsv_id);

  return(NULL);
  } /* END check_if_orphaned() */
Exemplo n.º 7
0
bool pbsnode::update_internal_failure_counts(

  int rc)

  {
  bool held = false;
  char log_buf[2048];

  if (rc == PBSE_NONE)
    {
    this->nd_consecutive_successes++;

    if (this->nd_consecutive_successes > 1)
      {
      this->nd_proximal_failures = 0;

      if (this->nd_state & INUSE_NETWORK_FAIL)
        {
        snprintf(log_buf, sizeof(log_buf),
          "Node '%s' has had two or more consecutive network successes, marking online.",
          this->nd_name.c_str());
        log_record(1, 2, __func__, log_buf);
        this->remove_node_state_flag(INUSE_NETWORK_FAIL);
        }
      }
    }
  else
    {
    this->nd_proximal_failures++;
    this->nd_consecutive_successes = 0;

    if ((this->nd_proximal_failures > 2) &&
        ((this->nd_state & INUSE_NETWORK_FAIL) == 0))
      {
      snprintf(log_buf, sizeof(log_buf),
        "Node '%s' has had %d failures in close proximity, marking offline.",
        this->nd_name.c_str(), this->nd_proximal_failures);
      log_record(1, 2, __func__, log_buf);

      update_node_state(this, INUSE_NETWORK_FAIL);
      held = true;
      }
    }

  return(held);
  }
Exemplo n.º 8
0
int unlock_ji_mutex(

  job        *pjob,
  const char *id,
  const char *msg,
  int        logging)

  {
  int rc = PBSE_NONE;
  char *err_msg = NULL;
  char stub_msg[] = "no pos";

  if (logging >= 10)
    {
    err_msg = (char *)calloc(1, MSG_LEN_LONG);
    if (msg == NULL)
      msg = stub_msg;
    snprintf(err_msg, MSG_LEN_LONG, "unlocking %s in method %s-%s", pjob->ji_qs.ji_jobid, id, msg);
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, id, err_msg);
    }

  if (pjob->ji_mutex != NULL)
    {
    if (pthread_mutex_unlock(pjob->ji_mutex) != 0)
      {
    if (logging >= 20)
        {
        snprintf(err_msg, MSG_LEN_LONG, "ALERT: cannot unlock job %s mutex in method %s",
                                            pjob->ji_qs.ji_jobid, id);
        log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, id, err_msg);
        }
      rc = PBSE_MUTEX;
      }
    }
  else
    {
    rc = -1;
    log_err(rc, __func__, "Uninitialized mutex pass to pthread_mutex_unlock!");
    }

   if (err_msg != NULL)
     free(err_msg);

   return rc;
   }
Exemplo n.º 9
0
int
schd_get_queue_util(void)
  {
  int   moved = 0;
  char  *id = "get_queue_util";
  QueueList *qptr;


  /*
   * Move any jobs on this queue from the global list onto the queue's
   * list. Keep track of when the longest-running job will end, and set
   * the 'empty_by' field to that value. Maintain the ordering as it was
   * in "schd_AllJobs".
   */

  if (schd_AllJobs)
    {
    qptr = schd_SubmitQueue;
    moved = queue_claim_jobs(qptr->queue, &schd_AllJobs);

    if (moved < 0)
      {
      sprintf(log_buffer, "Warning: queue %s failed to claim jobs",
              qptr->queue->qname);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      }

    for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next)
      {
      moved = queue_claim_jobs(qptr->queue, &schd_AllJobs);

      if (moved < 0)
        {
        sprintf(log_buffer, "Warning: queue %s failed to claim jobs",
                qptr->queue->qname);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                   log_buffer);
        }
      }
    }

  return (0);
  }
Exemplo n.º 10
0
/*
 * Record the reason that the current candidate job cannot currently run.
 * When it is decided that the job will remain queued, place the reason
 * string in the comment field of the job structure.
 */
void
schd_comment_job(Job *job, char *reason, int optional)
  {
  char   *id = "schd_comment_job";
  char   *msg_ptr;
  char   *old_msg;

  /*
   * If the 'optional' argument is true, then this comment is optional.
   * Do not bother commenting this job if this is not the first time it
   * has been seen, and it has been recently modified (hopefully it was
   * a comment change).  If there is no comment for the job, comment it
   * this time.
   */

  if (optional &&
      (!schd_FirstRun) &&
      (job->comment != NULL) &&
      !(job->flags & JFLAGS_FIRST_SEEN) &&
      (MIN_COMMENT_AGE && ((schd_TimeNow - job->mtime) < MIN_COMMENT_AGE)))
    {
    return;
    }

  if (reason == NULL)
    reason = "";

  old_msg = job->comment;

  /* If there is no old message, or they are different, set it. */
  if ((old_msg == NULL) || (strcmp(reason, old_msg) != 0))
    {

    msg_ptr = schd_strdup(reason);

    /* Alter PBS' view of the job. */
    schd_alterjob(connector, job, ATTR_comment, msg_ptr, NULL);

    /* Copy the new comment into the job field. */

    if (job->comment)
      free(job->comment);

    job->comment = msg_ptr;

    if (job->comment == NULL)
      {
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id,
                 "schd_strdup(job->comment)");
      return;
      }
    }

  return;
  }
Exemplo n.º 11
0
int set_note_str(

  pbs_attribute *attr,
  pbs_attribute *new_attr,
  enum batch_op  op)

  {
  static char id[] = "set_note_str";
  size_t      nsize;
  int         rc = 0;
  char        log_buf[LOCAL_LOG_BUF_SIZE];

  assert(attr && new_attr && new_attr->at_val.at_str && (new_attr->at_flags & ATR_VFLAG_SET));
  nsize = strlen(new_attr->at_val.at_str);    /* length of new note */

  if (nsize > MAX_NOTE)
    {
    sprintf(log_buf, "Warning: Client attempted to set note with len (%d) > MAX_NOTE (%d)",
      (int)nsize,
      MAX_NOTE);

    log_record(PBSEVENT_SECURITY,PBS_EVENTCLASS_REQUEST,id,log_buf);

    rc = PBSE_BADNDATVAL;
    }

  if (strchr(new_attr->at_val.at_str, '\n') != NULL)
    {
    sprintf(log_buf, "Warning: Client attempted to set note with a newline char");

    log_record(PBSEVENT_SECURITY,PBS_EVENTCLASS_REQUEST,id,log_buf);

    rc = PBSE_BADNDATVAL;
    }

  if (rc != 0)
    return(rc);

  rc = set_str(attr, new_attr, op);

  return(rc);
  }  /* END set_note_str() */
Exemplo n.º 12
0
Arquivo: xmpp.c Projeto: grouzen/xinb
gboolean xmpp_send_presence(Xinb *x, LmMessageSubType subtype)
{
    LmMessage *m;

    if(x->state != LM_CONNECTION_STATE_AUTHENTICATED) {
        log_record(x, LOGS_ERR,
                   "Unable to send presense: not authenticated");
        return FALSE;
    }

    m = lm_message_new_with_sub_type(NULL, LM_MESSAGE_TYPE_PRESENCE, subtype);
    if(!lm_connection_send(x->conn, m, &(x->gerror))) {
        log_record(x, LOGS_ERR, "Unable to send presence of type '%d': %s",
                   subtype, x->gerror->message);
        g_clear_error(&(x->gerror));
        return FALSE;
    }

    return TRUE;
}
Exemplo n.º 13
0
int socket_read_force(

    int        socket,
    char      *the_str,
    long long  avail_bytes,
    long long *byte_count)

{
    int rc = PBSE_NONE;
    char *read_loc = the_str;
    long long tmp_len = avail_bytes;
    long long bytes_read = 1;
    long long sock_check = 0;
    char log_buf[LOCAL_LOG_BUF_SIZE+1];
    while (bytes_read != 0)
    {
        bytes_read = read(socket, read_loc, tmp_len);
        if ((bytes_read == -1) && (errno != EINTR))
        {
            if (getenv("PBSDEBUG") != NULL)
                fprintf(stderr, "Error reading data from socket %d - (%s)\n",
                        errno, strerror(errno));
            rc = PBSE_SOCKET_READ;
            break;
        }
        else if (bytes_read == 0)
        {
            if (*byte_count == 0)
                rc = PBSE_SOCKET_READ;
            break;
        }
        else if (bytes_read == avail_bytes)
        {
            *byte_count += bytes_read;
            break;
        }
        else
        {
            tmp_len -= bytes_read;
            read_loc += bytes_read;
            *byte_count += bytes_read;
            sock_check = socket_avail_bytes_on_descriptor(socket);
            if (sock_check == 0)
            {
                snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "ioctl hsa been lying, expected avail %lld, actual avail %lld", tmp_len, sock_check);
                log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, __func__, log_buf);
                break;
            }
            if (sock_check < tmp_len)
                tmp_len = sock_check;
        }
    }
    return rc;
} /* END socket_read_force() */
Exemplo n.º 14
0
int process_gpu_status(

  struct pbsnode           *pnode,
  unsigned int             &i,
  std::vector<std::string> &status_info)

  {
  pbs_attribute   temp;
  int             gpu_count = 0;
  int             rc = PBSE_NONE;
  char            buf[MAXLINE * 2];
  std::string     gpu_info = "";

  memset(&temp, 0, sizeof(temp));
  
  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");

    finish_gpu_status(i, status_info);

    return(rc);
    }

  /* move past the initial gpu status */
  i++;
  
  for (; i < status_info.size(); i++)
    {
    if (!strcmp(status_info[i].c_str(), CRAY_GPU_STATUS_END))
      break;

    if (!strncmp(status_info[i].c_str(), "gpu_id=", strlen("gpu_id=")))
      {
      snprintf(buf, sizeof(buf), "gpu[%d]=%s;", gpu_count, status_info[i].c_str());
      gpu_info += buf;
      gpu_count++;
      }
    else
      {
      gpu_info += status_info[i].c_str();
      gpu_info += ';';
      }
    }

  set_ngpus(pnode, gpu_count);
  decode_arst(&temp, NULL, NULL, gpu_info.c_str(), 0);
  node_gpustatus_list(&temp, pnode, ATR_ACTION_ALTER);
  
  free_arst(&temp);

  return(rc);
  } /* END process_gpu_status() */
Exemplo n.º 15
0
/* logs session characteristics */
mreturn mod_log_session_end(mapi m, void *arg)
{
	time_t t = time(NULL);

	log_debug("creating session log entry");

	log_record(jid_full(m->user->id), "session", "end",
		   "%s %d %d %d %s", m->s->ip, (int) (t - m->s->started),
		   m->s->c_in, m->s->c_out, m->s->res);

	return M_PASS;
}
Exemplo n.º 16
0
Arquivo: xmpp.c Projeto: grouzen/xinb
LmHandlerResult xmpp_receive_command(LmMessageHandler *handler,
                        LmConnection *conn, LmMessage *m, gpointer udata)
{
    Xinb *x = udata;
    LmMessageNode *body;
    gchar *jid;
    const gchar *from;
    
    if(x->state != LM_CONNECTION_STATE_AUTHENTICATED) {
        log_record(x, LOGS_ERR,
                   "Unable to receive message: not authenticated");
        goto out;
    }
    
    from = lm_message_node_get_attribute(m->node, "from");
    jid = xmpp_get_jid(from);
    if(g_strcmp0(jid, g_hash_table_lookup(x->config, "owner"))) {
        log_record(x, LOGS_ERR, "To command may only owner: '%s'", from);
        x->to = jid;
        x->message = g_strdup("You don't have permission to command me");
        xmpp_send_message(x, LM_MESSAGE_SUB_TYPE_CHAT);
        g_free(x->message);
        goto out;
    }
    
    body = lm_message_node_find_child(m->node, "body");
    if(lm_message_get_sub_type(m) != LM_MESSAGE_SUB_TYPE_CHAT) {
        log_record(x, LOGS_ERR, "Invalid subtype of the command");
        goto out;
    }
    
    if(command_run(x, body->value)) {
        log_record(x, LOGS_INFO, "The command was successfully executed");
    }

    out:
    g_free(jid);
    lm_message_unref(m);
    return LM_HANDLER_RESULT_REMOVE_MESSAGE;
}
Exemplo n.º 17
0
int lock_node(
    
  struct pbsnode *the_node,
  const char     *id,
  char           *msg,
  int             logging)

  {
  int   rc = PBSE_NONE;
  char  err_msg[MSG_LEN_LONG + 1];
  char  stub_msg[] = "no pos";
  
  if (logging >= 10)
    {
    if (msg == NULL)
      msg = stub_msg;
    snprintf(err_msg, MSG_LEN_LONG, "locking start %s in method %s-%s", the_node->nd_name, id, msg);
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, err_msg);
    }

  
  if (pthread_mutex_lock(the_node->nd_mutex) != 0)
    {
    if (logging >= 10)
      {
      snprintf(err_msg, MSG_LEN_LONG, "ALERT: cannot lock node %s mutex in method %s",
          the_node->nd_name, id);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, err_msg);
      }
    rc = PBSE_MUTEX;
    }
  
  if (logging >= 7)
    {
    snprintf(err_msg, MSG_LEN_LONG, "locking complete %s in method %s", the_node->nd_name, id);
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, err_msg);
    }

  return rc;
  } /* END lock_node() */
Exemplo n.º 18
0
void job_free(

  job *pj)  /* I (modified) */

  {
  int    i;

  if (LOGLEVEL >= 8)
    {
    sprintf(log_buffer, "freeing job");

    log_record(PBSEVENT_DEBUG,
               PBS_EVENTCLASS_JOB,
               pj->ji_qs.ji_jobid,
               log_buffer);
    }

  /* remove any malloc working attribute space */

  for (i = 0;i < JOB_ATR_LAST;i++)
    {
    job_attr_def[i].at_free(&pj->ji_wattr[i]);
    }

  if (pj->ji_grpcache)
    free(pj->ji_grpcache);

  assert(pj->ji_preq == NULL);

  nodes_free(pj);

  tasks_free(pj);

  if (pj->ji_resources)
    {
    free(pj->ji_resources);
    pj->ji_resources = NULL;
    }

  if (pj->ji_globid)
    {
    free(pj->ji_globid);
    pj->ji_globid = NULL;
    }

  /* now free the main structure */

  free((char *)pj);

  return;
  }  /* END job_free() */
Exemplo n.º 19
0
int token_acct_open(char *filename)
  {
  char  filen[_POSIX_PATH_MAX];
  char  logmsg[_POSIX_PATH_MAX+80];
  FILE *newacct;
  time_t now;

  struct tm *ptm;

  if (filename == (char *)0)   /* go with default */
    {
    now = time(0);
    ptm = localtime(&now);
    (void)sprintf(filen, "%s/%04d%02d%02d",
                  path_acct,
                  ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday);
    filename = filen;
    acct_auto_switch = 1;
    acct_opened_day = ptm->tm_yday;
    }
  else if (*filename == '\0')   /* a null name is not an error */
    {
    return (0);  /* turns off account logging.  */
    }
  else if (*filename != '/')
    {
    return (-1);  /* not absolute */
    }

  if ((newacct = fopen(filename, "a")) == NULL)
    {
    fprintf(stderr, "In token_acct_open filed to open file %s\n", filename);
    perror("acct_open");
    return (-1);
    }

  setbuf(newacct, NULL);  /* set no buffering */

  if (acct_opened > 0)   /* if acct was open, close it */
    (void)fclose(acctfile);

  acctfile = newacct;

  acct_opened = 1;   /* note that file is open */

  (void)sprintf(logmsg, "Account file %s opened", filename);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, "TokenAct", logmsg);

  return (0);
  }
Exemplo n.º 20
0
Arquivo: xmpp.c Projeto: grouzen/xinb
/* TODO: large messages are not sent. (?)
         maybe I'll be splitting messages.
 */
gboolean xmpp_send_message(Xinb *x, LmMessageSubType subtype)
{
    LmMessage *m;

    if(x->state != LM_CONNECTION_STATE_AUTHENTICATED) {
        log_record(x, LOGS_ERR,
                   "Unable to send message: not authenticated");
        return FALSE;
    }

    m = lm_message_new_with_sub_type(x->to, LM_MESSAGE_TYPE_MESSAGE, subtype);
    lm_message_node_add_child(m->node, "body", x->message);
    if(!lm_connection_send(x->conn, m, &(x->gerror))) {
        log_record(x, LOGS_ERR, "Unable to send message to '%s': %s",
                   x->to, x->gerror->message);
        g_clear_error(&(x->gerror));
        lm_message_unref(m);
        return FALSE;
    }
    
    lm_message_unref(m);
    return TRUE;
}
Exemplo n.º 21
0
void print_trace(

  int socknum)

  {
  void  *array[10];
  int    size;
  char **meth_names;
  int    cntr;
  char   msg[120];
  char   meth_name[20];

  size = backtrace(array, 10);
  meth_names = backtrace_symbols(array, size);
  snprintf(meth_name, sizeof(meth_name), "pt - pos %d", socknum);
  snprintf(msg, sizeof(msg), "Obtained %d stack frames.\n", size);
  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, meth_name, msg);
  for (cntr = 0; cntr < size; cntr++)
    {
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, meth_name, meth_names[cntr]);
    }
  free(meth_names);
  }
Exemplo n.º 22
0
/*
 * Populate the queue struct with the information needed for scheduling;
 * querying the resource monitor for queue->exechost's information.
 */
int
schd_get_queue_info(Queue *queue)
  {
  char *id = "get_queue_info";

  if (queue->ncpus_assn == UNSPECIFIED)
    queue->ncpus_assn = 0;

  if (queue->mem_assn   == UNSPECIFIED)
    queue->mem_assn   = 0;

  if (queue->running    == UNSPECIFIED)
    queue->running    = 0;

  /*
   * Get the resources for this queue from the resource monitor (if
   * available).  If the resmom is not accessible, disable the queue.
   * Don't bother checking if the queue is Stopped.
   */

  if (strcmp(queue->qname, schd_SubmitQueue->queue->qname) != 0 &&
      (queue->flags & QFLAGS_STOPPED) == 0)
    {
    queue->rsrcs = schd_get_resources(queue->exechost);

    if (queue->rsrcs != NULL)
      {

      /* Account for this queue's resources. */
      queue->rsrcs->ncpus_alloc += queue->ncpus_assn;
      queue->rsrcs->mem_alloc   += queue->mem_assn;
      queue->rsrcs->njobs       += queue->running;
      queue->ncpus_max =
        (queue->ncpus_max <= queue->rsrcs->ncpus_total ?
         queue->ncpus_max  : queue->rsrcs->ncpus_total);
      }
    else
      {
      (void)sprintf(log_buffer,
                    "Can't get resources for %s@%s - marking unavailable.",
                    queue->qname, queue->exechost);
      log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
      DBPRT(("%s: %s\n", id, log_buffer));

      queue->flags |= QFLAGS_NODEDOWN;
      }
    }

  return (0);
  }
Exemplo n.º 23
0
int unlock_queue(

  struct pbs_queue *the_queue,
  const char       *id,
  char             *msg,
  int               logging)

  {
  int rc = PBSE_NONE;
  char *err_msg = NULL;
  char stub_msg[] = "no pos";

  if (logging >= 10)
    {
    err_msg = (char *)calloc(1, MSG_LEN_LONG);
    if (msg == NULL)
      msg = stub_msg;
    snprintf(err_msg, MSG_LEN_LONG, "unlocking %s in method %s-%s", the_queue->qu_qs.qu_name, id, msg);
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, err_msg);
    }

  if (pthread_mutex_unlock(the_queue->qu_mutex) != 0)
    {
    if (logging >= 10)
      {
      snprintf(err_msg, MSG_LEN_LONG, "ALERT: cannot unlock queue %s mutex in method %s",
          the_queue->qu_qs.qu_name, id);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, err_msg);
      }
    rc = PBSE_MUTEX;
    }

  if (err_msg != NULL)
    free(err_msg);

  return rc;
  }
Exemplo n.º 24
0
int set_ncpus(

  struct pbsnode *current,
  struct pbsnode *parent,
  int             ncpus)

  {
  int difference;
  int i;
  int orig_svr_clnodes;

  if (current == NULL)
    return(PBSE_BAD_PARAMETER);
  
  difference = ncpus - current->nd_slots.get_total_execution_slots();
  orig_svr_clnodes = svr_clnodes;

  for (i = 0; i < abs(difference); i++)
    {
    if (difference > 0)
      {
      add_execution_slot(current); 

      svr_clnodes++;
      }
    else if (difference < 0)
      {
      delete_a_subnode(current);
      svr_clnodes--;
      }
    }
   
  if (difference < 0)
    {
    snprintf(log_buffer, sizeof(log_buffer), "ncpus was reduced from %d to %d", orig_svr_clnodes, svr_clnodes);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_NODE, __func__, log_buffer);
    }
  else if (current->nd_slots.get_total_execution_slots() > parent->max_subnode_nppn)
    parent->max_subnode_nppn = current->nd_slots.get_total_execution_slots();

#ifdef PENABLE_LINUX_CGROUPS
  if (current->nd_layout.getTotalThreads() != current->nd_slots.get_total_execution_slots())
    {
    current->nd_layout = Machine(current->nd_slots.get_total_execution_slots());
    }
#endif

  return(PBSE_NONE);
  } /* END set_ncpus() */
Exemplo n.º 25
0
/*
**      Clean up after a signal.
*/
void
die(int sig)
{
    char    *id = "die";

    if (sig > 0)
    {
        sprintf(log_buffer, "caught signal %d", sig);
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
    }
    else
    {
        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, "abnormal termination");
    }

    if (interp)
        Tcl_DeleteInterp(interp);

    log_close(1);

    exit(1);
}
Exemplo n.º 26
0
static void
dump_resources(Resources *rsrcs)
  {
  char   *id = "dump_resources";

  /* Log the system's status */

  (void)sprintf(log_buffer,
                "Resources for host %s", rsrcs->exechost);
  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  (void)sprintf(log_buffer, " :: %-24s = %s", "Memory (free):",
                schd_byte2val(rsrcs->freemem));
  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  (void)sprintf(log_buffer, " :: %-24s = %d / %d (%.2f%% utilization)",
                "Nodes allocated:", rsrcs->nodes_alloc, rsrcs->nodes_total,
                (rsrcs->nodes_alloc * 100.0) / rsrcs->nodes_total);
  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  (void)sprintf(log_buffer, " :: %-24s = %d", "Running jobs:",
                rsrcs->njobs);
  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
  }
Exemplo n.º 27
0
int
schd_get_queue_util(void)
  {
  int    ret;
  char  *id = "get_queue_util";
  QueueList *qptr;
  void schd_calc_suspended_jobs(void);

  /* first, get status of nodes from the server.
   */
  get_node_status();

  /* next, get status of queues for those nodes from the server.
   */
  qptr = schd_SubmitQueue;
  ret = schd_get_queue_info(qptr->queue);

  if (ret)
    DBPRT(("get_queue_util: get_queue_info for %s failed.\n",
           schd_SubmitQueue->queue->qname));

  for (qptr = schd_BatchQueues; qptr != NULL; qptr = qptr->next)
    {
    if (qptr->queue->flags & QFLAGS_NODEDOWN)
      {
      sprintf(log_buffer, "Skipping UNAVAILABLE node %s",
              qptr->queue->exechost);
      DBPRT(("%s: %s\n", id, log_buffer));
      log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, id, log_buffer);
      }
    else
      {
      ret = schd_get_queue_info(qptr->queue);

      if (ret)
        DBPRT(("get_queue_util: get_queue_info for %s failed.\n",
               qptr->queue->qname));
      }
    }

  /*
   * Update queue resources assigned for suspended jobs.
   */
  schd_calc_suspended_jobs();

  return (0);
  }
Exemplo n.º 28
0
int restricted(

  char  *name)

  {
  static  char    id[] = "restricted";

  struct  hostent         *host;

  struct  in_addr saddr;
  pbs_net_t       *newclients;

  if ((host = gethostbyname(name)) == NULL)
    {
    sprintf(log_buffer, "host %s not found",
            name);

    log_err(-1, id, log_buffer);

    return(-1);
    }

  if (mask_num > 0)
    {
    newclients = realloc(maskclient, sizeof(pbs_net_t) * (mask_num + 1));
    }
  else
    {
    newclients = malloc(sizeof(pbs_net_t));
    }

  if (newclients == NULL)
    {
    return(-1);
    }

  maskclient = newclients;

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, name);

  memcpy((char *)&saddr, host->h_addr, host->h_length);

  maskclient[mask_num++] = saddr.s_addr;

  return(0);
  }
Exemplo n.º 29
0
int parse_alps_output(

  std::string              &alps_output)

  {
  xmlDocPtr  doc;
  xmlNode   *child;

  if ((doc = xmlReadMemory(alps_output.c_str(), alps_output.length(), "apbasil", NULL, 0)) == NULL)
    {
    char buf[MAXLINE * 4];
    xmlErrorPtr pErr = xmlGetLastError();
    snprintf(buf, sizeof(buf), "Failed to parse the output of alps - %s", pErr->message);
    log_err(-1, __func__, buf);
    return(ALPS_PARSING_ERROR);
    }

  if (process_element(xmlDocGetRootElement(doc)) == ALPS_QUERY_FAILURE)
    {
    xmlNode   *root = xmlDocGetRootElement(doc);
	  // Verbose debug output for ALPS_QUERY_FAILURE node error message
	  for (child = root->children; child != NULL; child = child->next)
	    {
	    if (!strcmp((const char *)child->name, response_data))
	      {
        for (xmlNode *gchild = child->children; gchild != NULL; gchild = gchild->next)
          {
          if (!strcmp((const char *)gchild->name, "Message"))
            {
            snprintf(log_buffer, sizeof(log_buffer),
              "Failed to query ALPS: %s", (const char *)xmlNodeGetContent(gchild));
            log_record(PBSEVENT_SYSTEM, 0, __func__, log_buffer);
            }
          }
	      }
	    }

  	return(ALPS_QUERY_FAILURE);
    }


  xmlFreeDoc(doc);
  xmlMemoryDump();

  return(PBSE_NONE);
  } /* END parse_alps_output() */
Exemplo n.º 30
0
void log_event(

  int         eventtype,
  int         objclass,
  const char *objname,
  const char *text)

  {
  if (((eventtype & PBSEVENT_FORCE) == 0) &&
      ((*log_event_mask & eventtype) == 0))
    {
    return;  /* not logging this type of event */
    }

  log_record(eventtype, objclass, objname, text);

  return;
  }  /* END log_event() */