Example #1
0
void *send_power_state_to_mom(
    
  void *arg)

  {
  struct batch_request  *pRequest = (struct batch_request *)arg;
  struct pbsnode        *pNode = find_nodebyname(pRequest->rq_host);

  if (pNode == NULL)
    {
    free_br(pRequest);
    return NULL;
    }

  int handle = 0;
  int local_errno = 0;

  handle = svr_connect(pNode->nd_addrs[0],pNode->nd_mom_port,&local_errno,pNode,NULL);
  if (handle < 0)
    {
    unlock_node(pNode, __func__, "Error connecting", LOGLEVEL);
    return NULL;
    }

  unlock_node(pNode, __func__, "Done connecting", LOGLEVEL);
  issue_Drequest(handle, pRequest, true);

  return NULL;
  }
struct pbsnode *get_numa_from_str(
    
  const char     *str, /* I */
  struct pbsnode *np)  /* I */

  {
  const char     *numa_id;
  struct pbsnode *numa;
  unsigned long   numa_index;
  char            log_buf[LOCAL_LOG_BUF_SIZE];
  
  if (np->node_boards == NULL)
    {
    /* ERROR */
    snprintf(log_buf,sizeof(log_buf),
      "Node %s isn't declared to be NUMA, but mom is reporting\n",
      np->nd_name);
    log_err(-1, __func__, log_buf);
  
    unlock_node(np, __func__, "np numa update", LOGLEVEL);
    
    return(NULL);
    }
  
  numa_id = str + strlen(NUMA_KEYWORD);
  numa_index = atoi(numa_id);
  
  numa = AVL_find(numa_index, np->nd_mom_port, np->node_boards);
  
  if (numa == NULL)
    {
    /* ERROR */
    snprintf(log_buf,sizeof(log_buf),
      "Could not find NUMA index %lu for node %s\n",
      numa_index,
      np->nd_name);
    log_err(-1, __func__, log_buf);
    
    unlock_node(np, __func__, "np numa update", LOGLEVEL);
    
    return(NULL);
    }
 
  /* SUCCESS */
  unlock_node(np, __func__, "np numa update", LOGLEVEL);
  lock_node(numa, __func__, "numa numa update", LOGLEVEL);
  
  numa->nd_lastupdate = time(NULL);
  
  return(numa);
  } /* END get_numa_from_str() */
int get_alps_statuses(

  struct pbsnode       *parent,
  struct batch_request *preq,
  int                  *bad,
  tlist_head           *pstathd)

  {
  struct pbsnode *alps_node;
  all_nodes_iterator  *iter = NULL;
  int             rc = PBSE_NONE;

  while ((alps_node = next_host(parent->alps_subnodes, &iter, NULL)) != NULL)
    {
    rc = status_node(alps_node, preq, bad, pstathd);
    unlock_node(alps_node, __func__, NULL, LOGLEVEL);

    if (rc != PBSE_NONE)
      break;
    }

  if (iter != NULL)
    delete iter;

  return(rc);
  } /* END get_alps_statuses() */
Example #4
0
struct pbsnode *determine_node_from_str(

  char           *str,
  struct pbsnode *parent,
  struct pbsnode *current)

  {
  struct pbsnode *next = NULL;
  char           *node_id = str + strlen("node=");

  if ((current == NULL) || 
      (strcmp(node_id, current->nd_name)))
    {
    if (current != NULL)
      unlock_node(current, __func__, NULL, 0);

    if ((next = find_alpsnode_by_name(parent, node_id)) == NULL)
      {
      /* create the node */
      next = create_alps_subnode(parent, node_id);
      }
    
    }
  else
    {
    next = current;
    }
  
  if (next != NULL)
    next->nd_lastupdate = time(NULL);

  return(next);
  } /* END determine_node_from_str() */
void *check_if_orphaned(

  void *vp)

  {
  char                 *rsv_id = (char *)vp;
  char                  job_id[PBS_MAXSVRJOBID];
  struct batch_request *preq;
  int                   handle = -1;
  int                   retries = 0;
  struct pbsnode       *pnode;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];

  if (is_orphaned(rsv_id, job_id) == TRUE)
    {
    if((preq = alloc_br(PBS_BATCH_DeleteReservation)) == NULL)
      return NULL;
    preq->rq_extend = rsv_id;

    /* Assume the request will be successful and remove the RSV from the hash table */
    remove_alps_reservation(rsv_id);

    if ((pnode = get_next_login_node(NULL)) != NULL)
      {
      struct in_addr hostaddr;
      int            local_errno;
      pbs_net_t      momaddr;

      memcpy(&hostaddr, &pnode->nd_sock_addr.sin_addr, sizeof(hostaddr));
      momaddr = ntohl(hostaddr.s_addr);

      snprintf(log_buf, sizeof(log_buf),
        "Found orphan ALPS reservation ID %s for job %s; asking %s to remove it",
        rsv_id,
        job_id,
        pnode->nd_name);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, __func__, log_buf);

      while ((handle < 0) &&
             (retries < 3))
        {
        handle = svr_connect(momaddr, pnode->nd_mom_port, &local_errno, pnode, NULL, ToServerDIS);
        retries++;
        }

      /* unlock before the network transaction */
      unlock_node(pnode, __func__, NULL, LOGLEVEL);
      
      if (handle >= 0)
        issue_Drequest(handle, preq, true);
        
      free_br(preq);
      }
    }
  else
    free(rsv_id);

  return(NULL);
  } /* END check_if_orphaned() */
struct pbsnode *get_node_from_str(

  const char     *str,     /* I */
  const char     *orig_id, /* I */
  struct pbsnode *np)      /* M */

  {
  /* this is a node reporting on another node as well */
  const char     *node_id = str + strlen("node=");
  struct pbsnode *next = NULL;
  char            log_buf[LOCAL_LOG_BUF_SIZE];
 
  /* don't do anything if the name is the same as this node's name */
  if (strcmp(node_id, np->nd_name))
    {
    unlock_node(np, __func__, "np not numa update", LOGLEVEL);
    
    next = find_nodebyname(node_id);
    
    if (next == NULL)
      {
      /* NYI: should we add logic here to attempt the canonical name if this 
       * is the short name, and attempt the short name if this is the 
       * canonical name? */
      
      /* ERROR */
      snprintf(log_buf,sizeof(log_buf),
        "Node %s is reporting on node %s, which pbs_server doesn't know about\n",
        orig_id,
        node_id);
      log_err(-1, __func__, log_buf);
      }
    else
      {
      if (LOGLEVEL >= 7)
        {
        snprintf(log_buf,sizeof(log_buf),
          "Node %s is reporting for node %s\n",
          orig_id,
          node_id);
        
        log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, __func__, log_buf);
        }
      
      next->nd_lastupdate = time(NULL);
      }
    }
  else
    {
    next = np;
    next->nd_lastupdate = time(NULL);
    }

  /* next may be NULL */

  return(next);
  } /* END get_node_from_str() */
/*
 * record_reservation()
 *
 * @pre-cond: pnode and rsv_id must be valid pointers
 * @post-cond: the reservation will be recorded in pbs_server's tracking mechanism
 * and on the job which has the node reserved, or -1 is returned and the reservation
 * is not recorded.
 * @param - pnode the node which is reporting the reservation
 * @param - rsv_id the id of the reservation being reported
 * @return - PBSE_NONE if the reservation was successfully recorded, -1 otherwise
 */
int record_reservation(

  struct pbsnode *pnode,
  const char     *rsv_id)

  {
  job            *pjob;
  bool            found_job = false;
  char            jobid[PBS_MAXSVRJOBID + 1];

  for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++)
    {
    /* cray only allows one job per node, so any valid job will be the job that is 
     * reserving this node. */
    job_usage_info *jui = pnode->nd_job_usages[i];
    strcpy(jobid, jui->jobid);

    unlock_node(pnode, __func__, NULL, LOGLEVEL);

    if ((pjob = svr_find_job(jobid, TRUE)) != NULL)
      {
      mutex_mgr job_mutex(pjob->ji_mutex, true);
      pjob->ji_wattr[JOB_ATR_reservation_id].at_val.at_str = strdup(rsv_id);
      pjob->ji_wattr[JOB_ATR_reservation_id].at_flags = ATR_VFLAG_SET;

      /* add environment variable BATCH_PARTITION_ID */
      char buf[1024];
      snprintf(buf, sizeof(buf), "BATCH_PARTITION_ID=%s", rsv_id);
      pbs_attribute  tempattr;
      clear_attr(&tempattr, &job_attr_def[JOB_ATR_variables]);
      job_attr_def[JOB_ATR_variables].at_decode(&tempattr,
        NULL, NULL, buf, 0);

      job_attr_def[JOB_ATR_variables].at_set(
        &pjob->ji_wattr[JOB_ATR_variables], &tempattr, INCR);

      job_attr_def[JOB_ATR_variables].at_free(&tempattr);

      track_alps_reservation(pjob);
      found_job = true;

      job_mutex.unlock(); 
      lock_node(pnode, __func__, NULL, LOGLEVEL);
      break;
      }
    else
      lock_node(pnode, __func__, NULL, LOGLEVEL);
    }

  if (found_job == false)
    return(-1);

  return(PBSE_NONE);
  } /* END record_reservation() */
Example #8
0
struct pbsnode *get_next_login_node(

  struct prop *needed)

  {
  struct pbsnode *pnode = NULL;
  login_node     *ln;
  int             node_fits = TRUE;

  pthread_mutex_lock(logins.ln_mutex);
  ln = (login_node *)logins.ra->slots[logins.next_node].item;

  if (ln != NULL)
    {
    pnode = ln->pnode;
    lock_node(pnode, __func__, NULL, LOGLEVEL);
    
    if (needed != NULL)
      {
      if (hasprop(pnode, needed) == FALSE)
        {
        node_fits = FALSE;
        }
      }
    
    /* must have at least one execution slot available */
    if ((pnode->nd_nsn - pnode->nd_np_to_be_used < 1) ||
        ((pnode->nd_state & INUSE_DOWN) != 0) ||
        ((pnode->nd_state & INUSE_OFFLINE) != 0))
      {
      node_fits = FALSE;
      }
    
    if (node_fits == FALSE)
      {
      unlock_node(pnode, __func__, NULL, LOGLEVEL);
      pnode = find_fitting_node(needed);
      }
    else
      {
      ln->times_used++;
      update_next_node_index(ln->times_used);
      }
    }

  pthread_mutex_unlock(logins.ln_mutex);

  return(pnode);
  } /* END get_next_login_node() */
Example #9
0
void *check_if_orphaned(

  void *vp)

  {
  char                 *rsv_id = (char *)vp;
  struct batch_request *preq;
  int                   handle = -1;
  int                   retries = 0;
  struct pbsnode       *pnode;

  if (is_orphaned(rsv_id) == TRUE)
    {
    preq = alloc_br(PBS_BATCH_DeleteReservation);
    preq->rq_extend = rsv_id;

    if ((pnode = get_next_login_node(NULL)) != NULL)
      {
      struct in_addr hostaddr;
      int            local_errno;
      pbs_net_t      momaddr;

      memcpy(&hostaddr, &pnode->nd_sock_addr.sin_addr, sizeof(hostaddr));
      momaddr = ntohl(hostaddr.s_addr);

      while ((handle < 0) &&
             (retries < 3))
        {
        handle = svr_connect(momaddr, pnode->nd_mom_port, &local_errno, pnode, NULL, ToServerDIS);
        retries++;
        }

      /* unlock before the network transaction */
      unlock_node(pnode, __func__, NULL, 0);
      
      if (handle >= 0)
        {
        issue_Drequest(handle, preq, release_req, 0);
        }
      else
        free_br(preq);
      }
    }
  else
    free(rsv_id);

  return(NULL);
  } /* END check_if_orphaned() */
int is_reporter_node(

  char *node_id)

  {
  struct pbsnode *pnode = find_nodebyname(node_id);
  int             rc = FALSE;

  if (pnode != NULL)
    {
    rc = pnode->nd_is_alps_reporter;
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    }

  return(rc);
  } /* END is_reporter_node() */
Example #11
0
struct pbsnode *get_next_login_node(

  struct prop *needed)

  {
  struct pbsnode *pnode = NULL;
  int             node_fits = TRUE;

  pthread_mutex_lock(logins.ln_mutex);
  login_node &ln = logins.nodes[logins.next_node];

  pnode = ln.pnode;
  lock_node(pnode, __func__, NULL, LOGLEVEL);
  
  if (needed != NULL)
    {
    if (hasprop(pnode, needed) == FALSE)
      {
      node_fits = FALSE;
      }
    }
  
  /* must have at least one execution slot available */
  if ((pnode->nd_slots.get_total_execution_slots() - pnode->nd_np_to_be_used < 1) ||
      ((pnode->nd_state & INUSE_NOT_READY) != 0) ||
      ((pnode->nd_state & INUSE_OFFLINE) != 0) ||
      (pnode->nd_power_state != POWER_STATE_RUNNING))
    {
    node_fits = FALSE;
    }
  
  if (node_fits == FALSE)
    {
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    pnode = find_fitting_node(needed);
    }
  else
    {
    ln.times_used++;
    update_next_node_index(ln.times_used);
    }

  pthread_mutex_unlock(logins.ln_mutex);

  return(pnode);
  } /* END get_next_login_node() */
Example #12
0
/*
 * record_reservation()
 *
 * @pre-cond: pnode and rsv_id must be valid pointers
 * @post-cond: the reservation will be recorded in pbs_server's tracking mechanism
 * and on the job which has the node reserved, or -1 is returned and the reservation
 * is not recorded.
 * @param - pnode the node which is reporting the reservation
 * @param - rsv_id the id of the reservation being reported
 * @return - PBSE_NONE if the reservation was successfully recorded, -1 otherwise
 */
int record_reservation(

  struct pbsnode *pnode,
  const char     *rsv_id)

  {
  job            *pjob;
  bool            found_job = false;
  char            jobid[PBS_MAXSVRJOBID + 1];

  for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++)
    {
    /* cray only allows one job per node, so any valid job will be the job that is 
     * reserving this node. */
    job_usage_info *jui = pnode->nd_job_usages[i];
    strcpy(jobid, jui->jobid);

    unlock_node(pnode, __func__, NULL, LOGLEVEL);

    if ((pjob = svr_find_job(jobid, TRUE)) != NULL)
      {
      mutex_mgr job_mutex(pjob->ji_mutex, true);
      pjob->ji_wattr[JOB_ATR_reservation_id].at_val.at_str = strdup(rsv_id);
      pjob->ji_wattr[JOB_ATR_reservation_id].at_flags = ATR_VFLAG_SET;

      track_alps_reservation(pjob);
      found_job = true;

      job_mutex.unlock(); 
      lock_node(pnode, __func__, NULL, LOGLEVEL);
      break;
      }
    else
      lock_node(pnode, __func__, NULL, LOGLEVEL);
    }

  if (found_job == false)
    return(-1);

  return(PBSE_NONE);
  } /* END record_reservation() */
Example #13
0
struct pbsnode *check_node(

  login_node  *ln,
  struct prop *needed)

  {
  struct pbsnode *pnode = ln->pnode;

  lock_node(pnode, __func__, NULL, LOGLEVEL);

  if ((hasprop(pnode, needed) == TRUE) &&
      (pnode->nd_nsn - pnode->nd_np_to_be_used >= 1) &&
      ((pnode->nd_state & INUSE_DOWN) == 0) &&
      ((pnode->nd_state & INUSE_OFFLINE) == 0))
    return(pnode);
  else
    {
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return(NULL);
    }
  } /* END check_node() */
Example #14
0
/* instead of getting the status on a node with numa nodes, report
 * the status of all the numa nodes
 *
 * @param pnode - the node to report on
 * @param preq - the batch request
 * @param pstathd - the list to add this response to
 *
 * @return - 0 on SUCCESS, error code otherwise
 */
int get_numa_statuses(

  struct pbsnode       *pnode,    /* ptr to node receiving status query */
  struct batch_request *preq,
  int                  *bad,      /* O */
  tlist_head           *pstathd)  /* head of list to append status to  */

  {
  int i;
  int rc = 0;

  struct pbsnode *pn;

  if (pnode->num_node_boards == 0)
    {
    /* no numa nodes, just return the status for this node */
    rc = status_node(pnode, preq, bad, pstathd);

    return(rc);
    }

  for (i = 0; i < pnode->num_node_boards; i++)
    {
    pn = AVL_find(i,pnode->nd_mom_port,pnode->node_boards);

    if (pn == NULL)
      continue;

    lock_node(pn, __func__, NULL, LOGLEVEL);
    rc = status_node(pn, preq, bad, pstathd);
    unlock_node(pn, __func__, NULL, LOGLEVEL);

    if (rc != PBSE_NONE)
      {
      return(rc);
      }
    }

  return(rc);
  } /* END get_numa_statuses() */
Example #15
0
/*
 * check_node() 
 *
 * @return a pointer to the node if it is valid to be used
 * @param ln - a pointer to the login node struct containing the 
 * node that should be checked
 * @pre-cond - ln must be a pointer to a valid login node struct
 * @param needed - an optional pointer to the required properties for
 * the login node to have.
 */
struct pbsnode *check_node(

  login_node  *ln,
  struct prop *needed)

  {
  struct pbsnode *pnode = ln->pnode;

  lock_node(pnode, __func__, NULL, LOGLEVEL);

  if ((hasprop(pnode, needed) == TRUE) &&
      (pnode->nd_slots.get_number_free() - pnode->nd_np_to_be_used >= 1) &&
      ((pnode->nd_state & INUSE_NOT_READY) == 0) &&
      ((pnode->nd_state & INUSE_OFFLINE) == 0) &&
      (pnode->nd_power_state == POWER_STATE_RUNNING))
    return(pnode);
  else
    {
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return(NULL);
    }
  } /* END check_node() */
int get_alps_statuses(

  struct pbsnode       *parent,
  struct batch_request *preq,
  int                  *bad,
  tlist_head           *pstathd)

  {
  struct pbsnode *alps_node;
  int             iter = -1;
  int             rc = PBSE_NONE;

  while ((alps_node = next_host(&(parent->alps_subnodes), &iter, NULL)) != NULL)
    {
    rc = status_node(alps_node, preq, bad, pstathd);
    unlock_node(alps_node, __func__, NULL, 0);

    if (rc != PBSE_NONE)
      break;
    }

  return(rc);
  } /* END get_alps_statuses() */
Example #17
0
void mgr_node_modify(

  struct batch_request *preq)  /* I */

  {
  int               need_todo = 0;

  int               rc;
  int               bad = 0;

  const char       *nodename = NULL;

  svrattrl         *plist;
  node_check_info   nci;

  struct pbsnode   *pnode = NULL;

  nodename = preq->rq_ind.rq_manager.rq_objname;
  pnode = find_nodebyname(nodename);

  if (pnode == NULL)
    {
    req_reject(PBSE_UNKNODE, 0, preq, NULL, NULL);

    return;
    }

  plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_manager.rq_attr);

  save_characteristic(pnode,&nci);

  rc = mgr_modify_node(
         &pnode,
         node_attr_def,
         ND_ATR_LAST,
         plist,
         preq->rq_perm,
         &bad,
         ATR_ACTION_ALTER);

  if (rc != 0)
    {
    /* In the specific node case, reply w/ error and return*/

    switch (rc)
      {
      case PBSE_INTERNAL:

      case PBSE_SYSTEM:

        req_reject(rc, bad, preq, NULL, NULL);

        break;

      case PBSE_NOATTR:

      case PBSE_ATTRRO:

      case PBSE_MUTUALEX:

      case PBSE_BADNDATVAL:

        reply_badattr(rc, bad, plist, preq);

        break;

      default:

        req_reject(rc, 0, preq, NULL, NULL);

        break;
      }

    if(pnode != NULL)
      {
      unlock_node(pnode, "mgr_node_set", (char *)"error", LOGLEVEL);
      pnode = NULL;
      }

    return;
    } /* END if (rc != 0) */
  else
    {
      /* modifications succeeded for this node */
    if(pnode != NULL)
      {
      chk_characteristic(pnode, &nci, &need_todo);
      }
    }

  if(pnode != NULL)
    {
    unlock_node(pnode, "mgr_node_set", (char *)"single_node", LOGLEVEL);
    pnode = NULL;
    }

  if (need_todo & WRITENODE_STATE)
    {
    /*some nodes set to "offline"*/
    write_node_state();

    need_todo &= ~(WRITENODE_STATE);
    }

  if (need_todo & WRITENODE_POWER_STATE)
    {
    /*some nodes changed power state*/
    write_node_power_state();

    need_todo &= ~(WRITENODE_POWER_STATE);
    }

  if (need_todo & WRITENODE_NOTE)
    {
    /*some nodes have new "note"s*/
    write_node_note();

    need_todo &= ~(WRITENODE_NOTE);
    }

  if (need_todo & WRITE_NEW_NODESFILE)
    {
    /*create/delete/prop/ntype change*/
    if (!update_nodes_file(NULL))
      need_todo &= ~(WRITE_NEW_NODESFILE);  /*successful on update*/
    }

  recompute_ntype_cnts();

  reply_ack(preq);  /*request completely successful*/

  return;
  }  /* END void mgr_node_set() */
Example #18
0
int set_node_power_state(
    
  struct pbsnode **ppNode,
  unsigned short   newState)

  {
  struct pbsnode *pNode = *ppNode;
  if (pNode->nd_addrs == NULL)
    {
    return PBSE_BAD_PARAMETER;
    }

  if (newState == POWER_STATE_RUNNING)
    {
    static std::string interface;
    static unsigned char mac_addr[6];
    if (interface.length() == 0)
      {
      if (!getMacAddr(interface,mac_addr))
        {
        return PBSE_SYSTEM;
        }
      }

    int sock;
    if ((sock = socket(AF_INET,SOCK_PACKET,SOCK_PACKET)) < 0)
      {
      return PBSE_SYSTEM;
      }

    unsigned char outpack[1000];

    memcpy(outpack+6,mac_addr,6);
    memcpy(outpack,pNode->nd_mac_addr,6);
    outpack[12] = 0x08;
    outpack[13] = 0x42;
    int offset = 14;
    memset(outpack + offset,0xff,6);
    offset += 6;

    for (int i = 0;i < 16;i++)
      {
      memcpy(outpack + offset,pNode->nd_mac_addr,6);
      offset += 6;
      }

    int one = 1;
    if (setsockopt(sock, SOL_SOCKET, SO_BROADCAST, (char *)&one, sizeof(one)) < 0)
      {
      close(sock);
      return PBSE_SYSTEM;
      }

    struct sockaddr whereto;
    whereto.sa_family = 0;
    snprintf(whereto.sa_data, sizeof(whereto.sa_data), "%s", interface.c_str());

    if (sendto(sock, outpack, offset, 0, &whereto, sizeof(whereto)) < 0)
      {
      close(sock);
      return PBSE_SYSTEM;
      }

    close(sock);
    return PBSE_NONE;
    }

  if (pNode->nd_job_usages.size() != 0)
    {
    //Can't change the power state on a node with running jobs.
    return PBSE_CANT_CHANGE_POWER_STATE_WITH_JOBS_RUNNING;
    }
  struct batch_request *request = alloc_br(PBS_BATCH_ChangePowerState);
  if (request == NULL)
    {
    return PBSE_SYSTEM;
    }

  request->rq_ind.rq_powerstate = newState;
  pNode->nd_power_state_change_time = time(NULL);

  snprintf(request->rq_host, sizeof(request->rq_host), "%s", pNode->nd_name);
  std::string hostname(request->rq_host);
  int rc = PBSE_NONE;

  {
    int handle = 0;
    int local_errno = 0;
    handle = svr_connect(pNode->nd_addrs[0],pNode->nd_mom_port,&local_errno,pNode,NULL);
    if(handle < 0)
      {
      unlock_node(pNode, __func__, "Error connecting", LOGLEVEL);
      *ppNode = NULL;
      return local_errno;
      }
    unlock_node(pNode, __func__, "Done connecting", LOGLEVEL);
    *ppNode = NULL;
    rc = issue_Drequest(handle, request,true);
    if(rc == PBSE_NONE)
      {
      rc = request->rq_reply.brp_code;
      if(rc < 0) rc = -rc;
      }
  }
  pNode = find_nodebyname(hostname.c_str());
  *ppNode = pNode;
  if ((rc == PBSE_NONE)&&(pNode != NULL))
    {
    pNode->nd_power_state = newState;
    }

  return(rc);
  }
Example #19
0
int site_check_user_map(

  job  *pjob,  /* I */
  char *luser, /* I */
  char *EMsg,  /* O (optional,minsize=1024) */
  int logging) /* I */

  {
  char *orighost;
  char  owner[PBS_MAXUSER + 1];
  char *p1;
  char *p2;
  int   rc;

  int   ProxyAllowed = 0;
  int   ProxyRequested = 0;
  int   HostAllowed = 0;

  char  *dptr;

  struct pbsnode *tmp;

#ifdef MUNGE_AUTH
  char  uh[PBS_MAXUSER + PBS_MAXHOSTNAME + 2];
#endif

  if (EMsg != NULL)
    EMsg[0] = '\0';

  /* get just the owner name, without the "@host" */

  p1 = pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str;

  p2 = owner;

  while ((*p1 != '@') && (*p1 != '\0'))
    *p2++ = *p1++;

  *p2 = '\0';

  orighost = get_variable(pjob, pbs_o_host);

  if (orighost == NULL)
    {
    /* access denied */

    log_event(
      PBSEVENT_JOB,
      PBS_EVENTCLASS_JOB,
      pjob->ji_qs.ji_jobid,
      msg_orighost);

    if (EMsg != NULL)
      strcpy(EMsg, "source host not specified");

    return(-1);
    }

  if ((server.sv_attr[SRV_ATR_AllowProxyUser].at_flags & ATR_VFLAG_SET) && \
      (server.sv_attr[SRV_ATR_AllowProxyUser].at_val.at_long == 1))
    {
    ProxyAllowed = 1;
    }

  if (strcmp(owner, luser) != 0)
    {
    ProxyRequested = 1;
    }

  if (!strcmp(orighost, server_host) && !strcmp(owner, luser))
    {
    /* submitting from server host, access allowed */

    if ((ProxyRequested == 0) || (ProxyAllowed == 1))
      {
      return(0);
      }

    /* host is fine, must validate proxy via ruserok() */

    HostAllowed = 1;
    }

  /* make short host name */

  if ((dptr = strchr(orighost, '.')) != NULL)
    {
    *dptr = '\0';
    }

  if ((HostAllowed == 0) &&
      (server.sv_attr[SRV_ATR_AllowNodeSubmit].at_flags & ATR_VFLAG_SET) &&
      (server.sv_attr[SRV_ATR_AllowNodeSubmit].at_val.at_long == 1) &&
      ((tmp = find_nodebyname(orighost)) != NULL))
    {
    /* job submitted from compute host, access allowed */
    unlock_node(tmp, "site_check_user_map", NULL, logging);

    if (dptr != NULL)
      *dptr = '.';

    if ((ProxyRequested == 0) || (ProxyAllowed == 1))
      {
      return(0);
      }

    /* host is fine, must validate proxy via ruserok() */

    HostAllowed = 1;
    }

  if ((HostAllowed == 0) &&
      (server.sv_attr[SRV_ATR_SubmitHosts].at_flags & ATR_VFLAG_SET))
    {

    struct array_strings *submithosts = NULL;
    char                 *testhost;
    int                   hostnum = 0;

    submithosts = server.sv_attr[SRV_ATR_SubmitHosts].at_val.at_arst;

    for (hostnum = 0;hostnum < submithosts->as_usedptr;hostnum++)
      {
      testhost = submithosts->as_string[hostnum];

      if (!strcasecmp(testhost, orighost))
        {
        /* job submitted from host found in trusted submit host list, access allowed */

        if (dptr != NULL)
          *dptr = '.';

        if ((ProxyRequested == 0) || (ProxyAllowed == 1))
          {
          return(0);
          }

        /* host is fine, must validate proxy via ruserok() */

        HostAllowed = 1;

        break;
        }
      }  /* END for (hostnum) */
    }    /* END if (SRV_ATR_SubmitHosts) */

  if (dptr != NULL)
    *dptr = '.';

#ifdef MUNGE_AUTH
  sprintf(uh, "%s@%s", owner, orighost);
  rc = acl_check(&server.sv_attr[SRV_ATR_authusers], uh, ACL_User_Host);
  if(rc <= 0)
    {
    /* rc == 0 means we did not find a match.
       this is a failure */
    if(EMsg != NULL)
      {
      snprintf(EMsg, 1024, "could not authorize user %s from %s",
               owner, orighost);
      }
    rc = -1; /* -1 is what set_jobexid is expecting for a failure*/
    }
  else
    {
    /*SUCCESS*/
    rc = 0; /* the call to ruserok below was in the code first. ruserok returns 
               0 on success but acl_check returns a positive value on success. 
               We set rc to 0 to be consistent with the original ruserok functionality */
    }
#else
  rc = ruserok(orighost, 0, owner, luser);

  if (rc != 0 && EMsg != NULL)
    {
    /* Test rc so as to not fill this message in the case of success, since other
     * callers might not fill this message in the case of their errors and
     * very misleading error message will go into the logs.
     */
    snprintf(EMsg, 1024, "ruserok failed validating %s/%s from %s",
             owner,
             luser,
             orighost);
    }
#endif

   

#ifdef sun
  /* broken Sun ruserok() sets process so it appears to be owned */
  /* by the luser, change it back for cosmetic reasons           */

  setuid(0);

#endif /* sun */

  return(rc);
  }  /* END site_check_user_map() */
Example #20
0
int req_gpuctrl_svr(
    
  struct batch_request *preq)

  {
  int rc = PBSE_NONE;
  char  *nodename = NULL;
  char  *gpuid = NULL;
  int    gpumode = -1;
  int    reset_perm = -1;
  int    reset_vol = -1;
  char   log_buf[LOCAL_LOG_BUF_SIZE+1];
  int    local_errno = 0;
  struct pbsnode *pnode = NULL;
  int    gpuidx = -1;
  int    conn;

  if ((preq->rq_perm &
       (ATR_DFLAG_MGWR | ATR_DFLAG_MGRD | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR)) == 0)
    {
    rc = PBSE_PERM;
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
        "invalid permissions (ATR_DFLAG_MGWR | ATR_DFLAG_MGRD | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR)");
    req_reject(rc, 0, preq, NULL, log_buf);
    return rc;
    }

  nodename = preq->rq_ind.rq_gpuctrl.rq_momnode;
  gpuid = preq->rq_ind.rq_gpuctrl.rq_gpuid;
  gpumode = preq->rq_ind.rq_gpuctrl.rq_gpumode;
  reset_perm = preq->rq_ind.rq_gpuctrl.rq_reset_perm;
  reset_vol = preq->rq_ind.rq_gpuctrl.rq_reset_vol;

  if (LOGLEVEL >= 7)
    {
    sprintf(
      log_buf,
      "GPU control request for node %s gpuid %s mode %d reset_perm %d reset_vol %d",
      nodename,
      gpuid,
      gpumode,
      reset_perm,
      reset_vol);

    log_ext(-1, __func__, log_buf, LOG_INFO);
    }

  /* validate mom node exists */

  pnode = find_nodebyname(nodename);

  if (pnode == NULL)
    {
    req_reject(PBSE_UNKNODE, 0, preq, NULL, NULL);
    return PBSE_UNKNODE;
    }

  /* validate that the node is up */

  if ((pnode->nd_state & (INUSE_DOWN | INUSE_OFFLINE | INUSE_UNKNOWN))||(pnode->nd_power_state != POWER_STATE_RUNNING))
    {
    rc = PBSE_UNKREQ;
    sprintf(log_buf,"Node %s is not available",pnode->nd_name);
    req_reject(rc, 0, preq, NULL, log_buf);
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* validate that the node has real gpus not virtual */

  if (!pnode->nd_gpus_real)
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "Not allowed for virtual gpus");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* validate the gpuid exists */

  if ((gpuidx = gpu_entry_by_id(pnode, gpuid, FALSE)) == -1)
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "GPU ID does not exist on node");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* validate that we have a real request */

  if ((gpumode == -1) && (reset_perm == -1) && (reset_vol == -1))
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "No action specified");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* for mode changes validate the mode with the driver_version */

  if ((pnode->nd_gpusn[gpuidx].driver_ver == 260) && (gpumode > 2))
    {
    rc = PBSE_UNKREQ;
    req_reject(rc, 0, preq, NULL, "GPU driver version does not support mode 3");
    unlock_node(pnode, __func__, NULL, LOGLEVEL);
    return rc;
    }

  /* we need to relay request to the mom for processing */
  /* have MOM attempt to change the gpu mode */

  preq->rq_orgconn = preq->rq_conn;  /* restore client socket */

  unlock_node(pnode, __func__, NULL, LOGLEVEL);
  conn = svr_connect(
           pnode->nd_addrs[0],
           pbs_mom_port,
           &local_errno,
           NULL,
           NULL);
    

  if (conn >= 0)
    {
    if ((rc = issue_Drequest(conn, preq)) != PBSE_NONE)
      req_reject(rc, 0, preq, NULL, NULL);
    else
      process_gpu_request_reply(preq);
    }
  else
    {
    req_reject(PBSE_UNKREQ, 0, preq, NULL, "Failed to get connection to mom");
    }

  return rc;
  }
int relay_to_mom(

  job                   **pjob_ptr,
  struct batch_request   *request, /* the request to send */
  void                  (*func)(struct work_task *))

  {
  int             handle; /* a client style connection handle */
  int             rc;
  int             local_errno = 0;
  pbs_net_t       addr;
  unsigned short  port;
  job            *pjob = *pjob_ptr;
  char            jobid[PBS_MAXSVRJOBID + 1];
  char *job_momname = NULL;

  struct pbsnode *node;
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  /* if MOM is down don't try to connect */
  addr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  port = pjob->ji_qs.ji_un.ji_exect.ji_momport;
  job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str);
  if (job_momname == NULL)
    return PBSE_MEM_MALLOC;

  if ((node = tfind_addr(addr, port, job_momname)) == NULL)
    {
    free(job_momname);
    return(PBSE_NORELYMOM);
    }
  free(job_momname);

  if ((node != NULL) &&
      (node->nd_state & INUSE_DOWN))
    {
    unlock_node(node, __func__, "no rely mom", LOGLEVEL);
    return(PBSE_NORELYMOM);
    }

  if (LOGLEVEL >= 7)
    {
    char *tmp = netaddr_pbs_net_t(pjob->ji_qs.ji_un.ji_exect.ji_momaddr);
    sprintf(log_buf, "momaddr=%s",tmp);

    log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf);

    free(tmp);
    }

  unlock_node(node, __func__, "after svr_connect", LOGLEVEL);
  handle = svr_connect(
           pjob->ji_qs.ji_un.ji_exect.ji_momaddr,
           pjob->ji_qs.ji_un.ji_exect.ji_momport,
           &local_errno,
           NULL,
           NULL,
           ToServerDIS);
    

  if (handle < 0)
    {
    log_event(PBSEVENT_ERROR,PBS_EVENTCLASS_REQUEST,"",msg_norelytomom);

    return(PBSE_NORELYMOM);
    }

  strcpy(jobid, pjob->ji_qs.ji_jobid);
  unlock_ji_mutex(pjob, __func__, NULL, LOGLEVEL);

  request->rq_orgconn = request->rq_conn; /* save client socket */

  rc = issue_Drequest(handle, request);

  *pjob_ptr = svr_find_job(jobid, TRUE);

  return(rc);
  }  /* END relay_to_mom() */
Example #22
0
struct stdfss_res *mkdir(int wpid, struct working_thread *thread, struct stdfss_mkdir *mkdir_cmd)
{
	struct stdfss_res *ret = NULL;
	struct smount_info *minf = NULL;
	struct sdevice_info *opening_dinf = NULL;
	char *str = NULL, *old_str = NULL, *strmatched = NULL;
	int parse_ret, dir_exists;
	struct gc_node *dir_base_node = NULL, *new_dir_base_node = NULL;
	unsigned int nodeid;

	// get dir name from smo
	str = get_string(mkdir_cmd->dir_path);

	ret = check_path(str, thread->command.command);
	if(ret != NULL) return ret;

	// get mount info
	wait_mutex(&mounted_mutex);
	minf = (struct smount_info *)lpt_getvalue_parcial_matchE(mounted, str, &strmatched);
	leave_mutex(&mounted_mutex);

	if(minf == NULL)
	{
		ret = build_response_msg(mkdir_cmd->command, STDFSSERR_DEVICE_NOT_MOUNTED);
		free(str);
		return ret;
	}

	// remove trailing '/'
	if(str[len(str)] == '/')
	{
		old_str = str;
		str = substring(str, 0, len(str) - 1);
		free(old_str);
	}

	// check file does not exist
	parse_ret = parse_directory(TRUE, &dir_base_node, OFS_NODELOCK_EXCLUSIVE | OFS_NODELOCK_BLOCKING, thread->command.command, thread, wpid, minf, str, len(strmatched), NULL, &nodeid, NULL, &dir_exists, &ret);

	if(ret != NULL) free(ret);
	ret = NULL;

	free(strmatched);
	
	if(parse_ret)
	{
		// file exists
		if(thread->lastdir_parsed_node != NULL)
		{
			nfree(minf->dinf, thread->lastdir_parsed_node);
			thread->lastdir_parsed_node = NULL;
		}
		free(str);
		unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);
		nfree(minf->dinf, dir_base_node);
		return build_response_msg(thread->command.command, STDFSSERR_FILE_EXISTS);
	}
	if(!dir_exists)
	{
		// worng path
		if(thread->lastdir_parsed_node != NULL)
		{
			nfree(minf->dinf, thread->lastdir_parsed_node);
			thread->lastdir_parsed_node = NULL;
		}
		free(str);
		return build_response_msg(thread->command.command, STDFSSERR_FILE_DOESNOTEXIST);
	}

	dir_base_node = thread->lastdir_parsed_node;
	
	// Create directory file
	if(!create_file(dir_base_node, str, last_index_of(str, '/') + 1, OFS_DIRECTORY_FILE, TRUE, minf, wpid, thread->command.command, &nodeid, &new_dir_base_node , OFS_NOFLAGS, &ret))
	{
		if(thread->lastdir_parsed_node != NULL)
		{
			nfree(minf->dinf, thread->lastdir_parsed_node);
			thread->lastdir_parsed_node = NULL;
		}
		free(str);
		return ret;
	}

	nfree(minf->dinf, thread->lastdir_parsed_node);
	thread->lastdir_parsed_node = NULL;
	nfree(minf->dinf, new_dir_base_node);

	free(str);


	// unlock the dir node
	unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);

	return build_response_msg(thread->command.command, STDFSSERR_OK);
}
Example #23
0
int stat_to_mom(

  char             *job_id,
  struct stat_cntl *cntl)  /* M */

  {
  struct batch_request *newrq;
  int                   rc = PBSE_NONE;
  unsigned long         addr;
  char                  log_buf[LOCAL_LOG_BUF_SIZE+1];
  struct pbsnode       *node;
  int                   handle = -1;
  unsigned long         job_momaddr = -1;
  unsigned short        job_momport = -1;
  char                 *job_momname = NULL;
  job                  *pjob = NULL;

  if ((pjob = svr_find_job(job_id, FALSE)) == NULL)
    return(PBSE_JOBNOTFOUND);

  mutex_mgr job_mutex(pjob->ji_mutex, true);

  if ((pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) || 
      (!pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str))
    {
    job_mutex.unlock();
    snprintf(log_buf, sizeof(log_buf),
      "Job %s missing MOM's information. Skipping statting on this job", pjob->ji_qs.ji_jobid);
    log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf);
    return PBSE_BAD_PARAMETER;
    }

  job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;
  job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str);
  job_mutex.unlock();

  if (job_momname == NULL)
    return PBSE_MEM_MALLOC;

  if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL)
    {
    free(job_momname);
    return PBSE_MEM_MALLOC;
    }

  if (cntl->sc_type == 1)
    snprintf(newrq->rq_ind.rq_status.rq_id, sizeof(newrq->rq_ind.rq_status.rq_id), "%s", job_id);
  else
    newrq->rq_ind.rq_status.rq_id[0] = '\0';  /* get stat of all */

  CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr);

  /* if MOM is down just return stale information */
  addr = job_momaddr;

  node = tfind_addr(addr,job_momport,job_momname);
  free(job_momname);

  if (node == NULL)
    return PBSE_UNKNODE;
  if ((node->nd_state & INUSE_DOWN)||(node->nd_power_state != POWER_STATE_RUNNING))
    {
    if (LOGLEVEL >= 6)
      {
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
          "node '%s' is allocated to job but in state 'down'",
          node->nd_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf);
      }

    unlock_node(node, __func__, "no rely mom", LOGLEVEL);
    free_br(newrq);

    return PBSE_NORELYMOM;
    }

  /* get connection to MOM */
  unlock_node(node, __func__, "before svr_connect", LOGLEVEL);
  handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL);

  if (handle >= 0)
    {
    if ((rc = issue_Drequest(handle, newrq, true)) == PBSE_NONE)
      {
      stat_update(newrq, cntl);
      }
    }
  else
    rc = PBSE_CONNECT;

  if (rc == PBSE_SYSTEM)
    rc = PBSE_MEM_MALLOC;

  free_br(newrq);

  return(rc);
  }  /* END stat_to_mom() */
int process_alps_status(

  char           *nd_name,
  dynamic_string *status_info)

  {
  char           *str;
  char           *ccu_p = NULL;
  char           *current_node_id = NULL;
  char            node_index_buf[MAXLINE];
  int             node_index = 0;
  struct pbsnode *parent;
  struct pbsnode *current = NULL;
  int             rc;
  pbs_attribute   temp;
  hash_table_t   *rsv_ht;
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if we can't find the parent node, ignore the update */
  if ((parent = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  /* keep track of reservations so that they're only processed once per update */
  rsv_ht = create_hash(INITIAL_RESERVATION_HOLDER_SIZE);

  /* loop over each string */
  for (str = status_info->str; str != NULL && *str != '\0'; str += strlen(str) + 1)
    {
    if (!strncmp(str, "node=", strlen("node=")))
      {
      if (str != status_info->str)
        {
        snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
        decode_arst(&temp, NULL, NULL, node_index_buf, 0);
        
        if (current != NULL)
          save_node_status(current, &temp);
        }

      if ((current = determine_node_from_str(str, parent, current)) == NULL)
        break;
      else
        continue;
      }

    if (current == NULL)
      continue;

    /* process the gpu status information separately */
    if (!strcmp(CRAY_GPU_STATUS_START, str))
      {
      process_gpu_status(current, &str);
      
      continue;
      }
    else if (!strncmp(reservation_id, str, strlen(reservation_id)))
      {
      char *just_rsv_id = str + strlen(reservation_id);

      if (get_value_hash(rsv_ht, just_rsv_id) == -1)
        {
        add_hash(rsv_ht, 1, strdup(just_rsv_id));

        /* sub-functions will attempt to lock a job, so we must unlock the
         * reporter node */
        unlock_node(parent, __func__, NULL, LOGLEVEL);

        process_reservation_id(current, str);

        current_node_id = strdup(current->nd_name);
        unlock_node(current, __func__, NULL, LOGLEVEL);

        /* re-lock the parent */
        if ((parent = find_nodebyname(nd_name)) == NULL)
          {
          /* reporter node disappeared - this shouldn't be possible */
          log_err(PBSE_UNKNODE, __func__, "Alps reporter node disappeared while recording a reservation");
          free_arst(&temp);
          free_all_keys(rsv_ht);
          free_hash(rsv_ht);
          free(current_node_id);
          return(PBSE_NONE);
          }

        if ((current = find_node_in_allnodes(&parent->alps_subnodes, current_node_id)) == NULL)
          {
          /* current node disappeared, this shouldn't be possible either */
          unlock_node(parent, __func__, NULL, LOGLEVEL);
          snprintf(log_buf, sizeof(log_buf), "Current node '%s' disappeared while recording a reservation",
            current_node_id);
          log_err(PBSE_UNKNODE, __func__, log_buf);
          free_arst(&temp);
          free_all_keys(rsv_ht);
          free_hash(rsv_ht);
          free(current_node_id);
          return(PBSE_NONE);
          }

        free(current_node_id);
        current_node_id = NULL;
        }
      }
    /* save this as is to the status strings */
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      free_arst(&temp);
      free_all_keys(rsv_ht);
      free_hash(rsv_ht);
      return(rc);
      }

    /* perform any special processing */
    if (!strncmp(str, ccu_eq, ac_ccu_eq_len))
      {
      /* save compute unit count in case we need it */
      /* note: this string (ccu_eq (CCU=)) needs to be found before cprocs_eq (CPROCS=) */
      /*  for the node */
      ccu_p = str;
      }
    else if (!strncmp(str, cproc_eq, ac_cproc_eq_len))
      {
      int ncpus;
      long svr_nppcu_value = 0;

      /*
       * Get the server nppcu value which determines how Hyper-Threaded
       * cores are reported. When server nppcu value is:
       *
       *  0 - Let ALPS choose whether or not to use Hyper-Threaded cores 
       *      (report all cores)
       *  1 - Do not use Hyper-Threaded cores
       *      (report only physical core (compute unit count)
       *  2 - Use Hyper-Threaded cores
       *      (report all cores)
       */
      get_svr_attr_l(SRV_ATR_nppcu, &svr_nppcu_value);

      if (svr_nppcu_value == NPPCU_NO_USE_HT && ccu_p != NULL)
        {
        /* no HT (nppcu==1), so use compute unit count */
        ncpus = atoi(ccu_p + ac_ccu_eq_len);

        /* use CPROC value if we are using APBASIL protocol < 1.3 */
        if (ncpus == 0)
          ncpus = atoi(str + ac_cproc_eq_len);

        /* reset the pointer */
        ccu_p = NULL;
        }
      else
        {
        /* let ALPS choose (nppcu==0) or use HT (nppcu==2), use actual processor count */
        ncpus = atoi(str + ac_cproc_eq_len);
        }

      set_ncpus(current, parent, ncpus);
      }
    else if (!strncmp(str, state, strlen(state)))
      {
      set_state(current, str);
      }

    } /* END processing the status update */

  if (current != NULL)
    {
    snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
    decode_arst(&temp, NULL, NULL, node_index_buf, 0);
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, LOGLEVEL);
    }

  unlock_node(parent, __func__, NULL, LOGLEVEL);

  free_all_keys(rsv_ht);
  free_hash(rsv_ht);

  return(PBSE_NONE);
  } /* END process_alps_status() */
int process_request(

  struct tcp_chan *chan) /* file descriptor (socket) to get request */

  {
  int                   rc = PBSE_NONE;
  struct batch_request *request = NULL;
  char                  log_buf[LOCAL_LOG_BUF_SIZE];
  long                  acl_enable = FALSE;
  long                  state = SV_STATE_DOWN;

  time_t                time_now = time(NULL);
  int                   free_request = TRUE;
  char                  tmpLine[MAXLINE];
  char                 *auth_err = NULL;
  enum conn_type        conn_active;
  unsigned short        conn_socktype;
  unsigned short        conn_authen;
  unsigned long         conn_addr;
  int                   sfds = chan->sock;

  pthread_mutex_lock(svr_conn[sfds].cn_mutex);
  conn_active = svr_conn[sfds].cn_active;
  conn_socktype = svr_conn[sfds].cn_socktype;
  conn_authen = svr_conn[sfds].cn_authen;
  conn_addr = svr_conn[sfds].cn_addr;
  svr_conn[sfds].cn_lasttime = time_now;
  pthread_mutex_unlock(svr_conn[sfds].cn_mutex);

  if ((request = alloc_br(0)) == NULL)
    {
    snprintf(tmpLine, sizeof(tmpLine),
        "cannot allocate memory for request from %lu",
        conn_addr);
    req_reject(PBSE_MEM_MALLOC, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_SYSTEM;
    goto process_request_cleanup;
    }

  request->rq_conn = sfds;

  /*
   * Read in the request and decode it to the internal request structure.
   */
  if (conn_active == FromClientDIS || conn_active == ToServerDIS)
    {
#ifdef ENABLE_UNIX_SOCKETS

    if ((conn_socktype & PBS_SOCK_UNIX) &&
        (conn_authen != PBS_NET_CONN_AUTHENTICATED))
      {
      /* get_creds interestingly always returns 0 */
      get_creds(sfds, conn_credent[sfds].username, conn_credent[sfds].hostname);
      }

#endif /* END ENABLE_UNIX_SOCKETS */
    rc = dis_request_read(chan, request);
    }
  else
    {
    char out[80];

    snprintf(tmpLine, MAXLINE, "request on invalid type of connection: %d, sock type: %d, from address %s", 
                conn_active,conn_socktype, netaddr_long(conn_addr, out));
    log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST,
      "process_req", tmpLine);
    snprintf(tmpLine, sizeof(tmpLine),
        "request on invalid type of connection (%d) from %s",
        conn_active,
        netaddr_long(conn_addr, out));
    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (rc == -1)
    {
    /* FAILURE */
    /* premature end of file */
    rc = PBSE_PREMATURE_EOF;
    goto process_request_cleanup;
    }

  if ((rc == PBSE_SYSTEM) || (rc == PBSE_INTERNAL) || (rc == PBSE_SOCKET_CLOSE))
    {
    /* FAILURE */
    /* read error, likely cannot send reply so just disconnect */
    /* ??? not sure about this ??? */
    goto process_request_cleanup;
    }

  if (rc > 0)
    {
    /* FAILURE */

    /*
     * request didn't decode, either garbage or unknown
     * request type, in either case, return reject-reply
     */

    req_reject(rc, 0, request, NULL, "cannot decode message");
    free_request = FALSE;
    goto process_request_cleanup;
    }

  if (get_connecthost(sfds, request->rq_host, PBS_MAXHOSTNAME) != 0)
    {
    sprintf(log_buf, "%s: %lu",
      pbse_to_txt(PBSE_BADHOST),
      conn_addr);

    log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, "", log_buf);

    snprintf(tmpLine, sizeof(tmpLine),
        "cannot determine hostname for connection from %lu",
        conn_addr);

    req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
    free_request = FALSE;
    rc = PBSE_BADHOST;
    goto process_request_cleanup;
    }

  if (LOGLEVEL >= 1)
    {
    sprintf(log_buf,
      msg_request,
      reqtype_to_txt(request->rq_type),
      request->rq_user,
      request->rq_host,
      sfds);

    log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_REQUEST, "", log_buf);
    }

  /* is the request from a host acceptable to the server */
  if (conn_socktype & PBS_SOCK_UNIX)
    {
    strcpy(request->rq_host, server_name);
    }

  get_svr_attr_l(SRV_ATR_acl_host_enable, &acl_enable);
  if (acl_enable)
    {
    /* acl enabled, check it; always allow myself and nodes */
    struct array_strings *pas = NULL;
    struct pbsnode       *isanode;

    get_svr_attr_arst(SRV_ATR_acl_hosts, &pas);
    isanode = PGetNodeFromAddr(conn_addr);

    if ((isanode == NULL) &&
        (strcmp(server_host, request->rq_host) != 0) &&
        (acl_check_my_array_string(pas, request->rq_host, ACL_Host) == 0))
      {
      char tmpLine[MAXLINE];
      snprintf(tmpLine, sizeof(tmpLine), "request not authorized from host %s",
               request->rq_host);

      req_reject(PBSE_BADHOST, 0, request, NULL, tmpLine);
      free_request = FALSE;
      rc = PBSE_BADHOST;
      goto process_request_cleanup;
      }

    if (isanode != NULL)
      unlock_node(isanode, "process_request", NULL, LOGLEVEL);
    }

  /*
   * determine source (user client or another server) of request.
   * set the permissions granted to the client
   */

  if (conn_authen == PBS_NET_CONN_FROM_PRIVIL)
    {
    /* request came from another server */

    request->rq_fromsvr = 1;

    request->rq_perm =
      ATR_DFLAG_USRD | ATR_DFLAG_USWR |
      ATR_DFLAG_OPRD | ATR_DFLAG_OPWR |
      ATR_DFLAG_MGRD | ATR_DFLAG_MGWR |
      ATR_DFLAG_SvWR;
    }
  else
    {
    /* request not from another server */
    conn_credent[sfds].timestamp = time_now;

    request->rq_fromsvr = 0;

    /*
     * Client must be authenticated by an Authenticate User Request, if not,
     * reject request and close connection.  -- The following is retained for
     * compat with old cmds -- The exception to this is of course the Connect
     * Request which cannot have been authenticated, because it contains the
     * needed ticket; so trap it here.  Of course, there is no prior
     * authentication on the Authenticate User request either, but it comes
     * over a reserved port and appears from another server, hence is
     * automatically granted authentication.
     *
     * The above is only true with inet sockets.  With unix domain sockets, the
     * user creds were read before the first dis_request_read call above.
     * We automatically granted authentication because we can trust the socket
     * creds.  Authorization is still granted in svr_get_privilege below
     */

    if (request->rq_type == PBS_BATCH_Connect)
      {
      req_connect(request);

      if (conn_socktype == PBS_SOCK_INET)
        {
        rc = PBSE_IVALREQ;
        req_reject(rc, 0, request, NULL, NULL);
        free_request = FALSE;
        goto process_request_cleanup;
        }

      }

    if (conn_socktype & PBS_SOCK_UNIX)
      {
      pthread_mutex_lock(svr_conn[sfds].cn_mutex);
      svr_conn[sfds].cn_authen = PBS_NET_CONN_AUTHENTICATED;
      pthread_mutex_unlock(svr_conn[sfds].cn_mutex);
      }

    if (ENABLE_TRUSTED_AUTH == TRUE )
      rc = PBSE_NONE;  /* bypass the authentication of the user--trust the client completely */
    else if (munge_on)
      {
      /* If munge_on is true we will validate the connection now */
      if (request->rq_type == PBS_BATCH_AltAuthenUser)
        {
        rc = req_altauthenuser(request);
        free_request = FALSE;
        goto process_request_cleanup;
        }
      else
        {
        rc = authenticate_user(request, &conn_credent[sfds], &auth_err);
        }
      }
    else if (conn_authen != PBS_NET_CONN_AUTHENTICATED)
      /* skip checking user if we did not get an authenticated credential */
      rc = PBSE_BADCRED;
    else
      rc = authenticate_user(request, &conn_credent[sfds], &auth_err);

    if (rc != 0)
      {
      req_reject(rc, 0, request, NULL, auth_err);
      if (auth_err != NULL)
        free(auth_err);
      free_request = FALSE;
      goto process_request_cleanup;
      }

    /*
     * pbs_mom and checkpoint restart scripts both need the authority to do
     * alters and releases on checkpointable jobs.  Allow manager permission
     * for root on the jobs execution node.
     */
     
    if (((request->rq_type == PBS_BATCH_ModifyJob) ||
        (request->rq_type == PBS_BATCH_ReleaseJob)) &&
        (strcmp(request->rq_user, PBS_DEFAULT_ADMIN) == 0))
      {
      job *pjob;
      char *dptr;
      int skip = FALSE;
      char short_host[PBS_MAXHOSTNAME+1];

      /* make short host name */

      strcpy(short_host, request->rq_host);
      if ((dptr = strchr(short_host, '.')) != NULL)
        {
        *dptr = '\0';
        }
      
      if ((pjob = svr_find_job(request->rq_ind.rq_modify.rq_objname, FALSE)) != (job *)0)
        {
        if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING)
          {

          if ((pjob->ji_wattr[JOB_ATR_checkpoint].at_flags & ATR_VFLAG_SET) &&
              ((csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "s") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "c") != NULL) ||
               (csv_find_string(pjob->ji_wattr[JOB_ATR_checkpoint].at_val.at_str, "enabled") != NULL)) &&
              (strstr(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str, short_host) != NULL))
            {
            request->rq_perm = svr_get_privilege(request->rq_user, server_host);
            skip = TRUE;
            }

          }
        unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);
        }
      
      if (!skip)
        {
        request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
        }
      }
    else
      {
      request->rq_perm = svr_get_privilege(request->rq_user, request->rq_host);
      }
    }  /* END else (conn_authen == PBS_NET_CONN_FROM_PRIVIL) */

  /* if server shutting down, disallow new jobs and new running */
  get_svr_attr_l(SRV_ATR_State, &state);

  if (state > SV_STATE_RUN)
    {
    switch (request->rq_type)
      {
      case PBS_BATCH_AsyrunJob:
      case PBS_BATCH_JobCred:
      case PBS_BATCH_MoveJob:
      case PBS_BATCH_QueueJob:
      case PBS_BATCH_RunJob:
      case PBS_BATCH_StageIn:
      case PBS_BATCH_jobscript:

        req_reject(PBSE_SVRDOWN, 0, request, NULL, NULL);
        rc = PBSE_SVRDOWN;
        free_request = FALSE;
        goto process_request_cleanup;
        /*NOTREACHED*/

        break;
      }
    }

  /*
   * dispatch the request to the correct processing function.
   * The processing function must call reply_send() to free
   * the request struture.
   */

  rc = dispatch_request(sfds, request);

  return(rc);

process_request_cleanup:

  if (free_request == TRUE)
    free_br(request);

  return(rc);
  }  /* END process_request() */
Example #26
0
int stat_to_mom(

  char             *job_id,
  struct stat_cntl *cntl)  /* M */

  {
  struct batch_request *newrq;
  int                   rc = PBSE_NONE;
  unsigned long         addr;
  char                  log_buf[LOCAL_LOG_BUF_SIZE+1];
  struct pbsnode       *node;
  int handle = -1;
  unsigned long job_momaddr = -1;
  unsigned short job_momport = -1;
  char *job_momname = NULL;
  job *pjob = NULL;

  if ((pjob = svr_find_job(job_id, FALSE)) == NULL)
    return PBSE_JOBNOTFOUND;

  job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr;
  job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport;
  job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str);
  unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL);

  if (job_momname == NULL)
    return PBSE_MEM_MALLOC;

  if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL)
    {
    free(job_momname);
    return PBSE_MEM_MALLOC;
    }

  if (cntl->sc_type == 1)
    strcpy(newrq->rq_ind.rq_status.rq_id, job_id);
  else
    newrq->rq_ind.rq_status.rq_id[0] = '\0';  /* get stat of all */

  CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr);

  /* if MOM is down just return stale information */
  addr = job_momaddr;

  node = tfind_addr(addr,job_momport,job_momname);
  free(job_momname);

  if (node == NULL)
    return PBSE_UNKNODE;
  if (node->nd_state & INUSE_DOWN)
    {
    if (LOGLEVEL >= 6)
      {
      snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
          "node '%s' is allocated to job but in state 'down'",
          node->nd_name);

      log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf);
      }

    unlock_node(node, __func__, "no rely mom", LOGLEVEL);
    free_br(newrq);

    return PBSE_NORELYMOM;
    }

  /* get connection to MOM */
  unlock_node(node, __func__, "before svr_connect", LOGLEVEL);
  handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL, ToServerDIS);

  /* Unlock job here */
  if (handle >= 0)
    {
    if ((rc = issue_Drequest(handle, newrq)) == PBSE_NONE)
      {
      stat_update(newrq, cntl);
      }
    }
  else
    rc = PBSE_CONNECT;

  if (rc == PBSE_SYSTEM)
    rc = PBSE_MEM_MALLOC;

  free_br(newrq);

  return rc;
  }  /* END stat_to_mom() */
Example #27
0
int req_stat_node(

  struct batch_request *preq)

  {
  char                 *name;

  int                   rc   = PBSE_NONE;
  int                   type = 0;
  int                   bad  = 0;

  struct pbsnode       *pnode = NULL;
  struct batch_reply   *preply;
  struct prop props;
  svrattrl             *pal;

  /*
   * first, check that the server indeed has a list of nodes
   * and if it does, validate the name of the requested object--
   * either name is that of a specific node, or name[0] is null/@
   * meaning request is for all nodes in the server's jurisdiction
   */

  if (LOGLEVEL >= 6)
    {
    log_record( PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, "entered");
    }

  if (svr_totnodes <= 0)
    {
    rc = PBSE_NONODES;
    req_reject(rc, 0, preq, NULL, "node list is empty - check 'server_priv/nodes' file");

    return rc;
    }

  name = preq->rq_ind.rq_status.rq_id;

  if ((*name == '\0') || (*name == '@'))
    {
    type = 1;
    }
  else if ((*name == ':') && (*(name + 1) != '\0'))
    {
    if (!strcmp(name + 1, "ALL"))
      {
      type = 1;  /* psuedo-group for all nodes */
      }
    else
      {
      type = 2;
      props.name = name + 1;
      props.mark = 1;
      props.next = NULL;
      }
    }

  preply = &preq->rq_reply;

  preply->brp_choice = BATCH_REPLY_CHOICE_Status;

  CLEAR_HEAD(preply->brp_un.brp_status);

  if (type == 0)
    {
    /* get status of the named node */
    pnode = find_nodebyname(name);
    if (pnode == NULL)
      {
      rc = PBSE_UNKNODE;
      req_reject(rc, 0, preq, NULL, "cannot locate specified node");
      return(rc);
      }

    /* get the status on all of the numa nodes */
    if (pnode->nd_is_alps_reporter == TRUE)
      rc = get_alps_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);
    else
      rc = get_numa_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);

    unlock_node(pnode, __func__, "type == 0", LOGLEVEL);
    }
  else
    {
    /* get status of all or several nodes */
    all_nodes_iterator *iter = NULL;

    while ((pnode = next_host(&allnodes,&iter,NULL)) != NULL)
      {
      if ((type == 2) && 
          (!hasprop(pnode, &props)))
        {
        unlock_node(pnode, __func__, "type != 0, next_host", LOGLEVEL);
        continue;
        }

      /* get the status on all of the numa nodes */
      if (pnode->nd_is_alps_reporter == TRUE)
        rc = get_alps_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);
      else
        rc = get_numa_statuses(pnode, preq, &bad, &preply->brp_un.brp_status);
      
      if (rc != PBSE_NONE)
        {
        unlock_node(pnode, __func__, "type != 0, rc != 0, get_numa_statuses", LOGLEVEL);
        break;
        }

      unlock_node(pnode, __func__, "type != 0, rc == 0, get_numa_statuses", LOGLEVEL);
      }

    if (iter != NULL)
      delete iter;
    }

  if (rc == PBSE_NONE)
    {
    /* SUCCESS */

    reply_send_svr(preq);
    }
  else
    {
    if (rc != PBSE_UNKNODEATR)
      {
      req_reject(rc, 0, preq, NULL, NULL);
      }
    else
      {
      pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr);

      reply_badattr(rc, bad, pal, preq);
      }
    }

  return(rc);
  }  /* END req_stat_node() */
int process_status_info(

  const char               *nd_name,
  std::vector<std::string> &status_info)

  {
  const char     *name = nd_name;
  struct pbsnode *current;
  long            mom_job_sync = FALSE;
  long            auto_np = FALSE;
  long            down_on_error = FALSE;
  int             dont_change_state = FALSE;
  pbs_attribute   temp;
  int             rc = PBSE_NONE;
  bool            send_hello = false;

  get_svr_attr_l(SRV_ATR_MomJobSync, &mom_job_sync);
  get_svr_attr_l(SRV_ATR_AutoNodeNP, &auto_np);
  get_svr_attr_l(SRV_ATR_DownOnError, &down_on_error);

  /* Before filling the "temp" pbs_attribute, initialize it.
   * The second and third parameter to decode_arst are never
   * used, so just leave them empty. (GBS) */
  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if original node cannot be found do not process the update */
  if ((current = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  //A node we put to sleep is up and running.
  if (current->nd_power_state != POWER_STATE_RUNNING)
    {
    //Make sure we wait for a stray update that came after we changed the state to pass
    //by.
    if((current->nd_power_state_change_time + NODE_POWER_CHANGE_TIMEOUT) < time(NULL))
      {
      current->nd_power_state = POWER_STATE_RUNNING;
      write_node_power_state();
      }
    }

  /* loop over each string */
  for (unsigned int i = 0; i != status_info.size(); i++)
    {
    const char *str = status_info[i].c_str();
    /* these two options are for switching nodes */
    if (!strncmp(str, NUMA_KEYWORD, strlen(NUMA_KEYWORD)))
      {
      /* if we've already processed some, save this before moving on */
      if (i != 0)
        save_node_status(current, &temp);
      
      dont_change_state = FALSE;

      if ((current = get_numa_from_str(str, current)) == NULL)
        break;
      else
        continue;
      }
    else if (!strncmp(str, "node=", strlen("node=")))
      {
      /* if we've already processed some, save this before moving on */
      if (i != 0)
        save_node_status(current, &temp);

      dont_change_state = FALSE;

      if ((current = get_node_from_str(str, name, current)) == NULL)
        break;
      else
        {
        if (current->nd_mom_reported_down == TRUE)
          {
          /* There is a race condition if using a mom hierarchy and manually
           * shutting down a non-level 1 mom: if its message that the mom is
           * shutting down gets there before its last status update, the node
           * can incorrectly be set as free again. For that reason, only set
           * a mom back up if its reporting for itself. */
          if (strcmp(name, str + strlen("node=")) != 0)
            dont_change_state = TRUE;
          else
            current->nd_mom_reported_down = FALSE;
          }

        continue;
        }
      }

    /* add the info to the "temp" pbs_attribute */
    else if (!strcmp(str, START_GPU_STATUS))
      {
      is_gpustat_get(current, i, status_info);
      str = status_info[i].c_str();
      }
    else if (!strcmp(str, START_MIC_STATUS))
      {
      process_mic_status(current, i, status_info);
      str = status_info[i].c_str();
      }
#ifdef PENABLE_LINUX_CGROUPS
    else if (!strncmp(str, "layout", 6))
      {
      if (current->nd_layout == NULL)
        {
        current->nd_layout = new Machine(status_info[i]);
        }

      continue;
      }
#endif
    else if (!strcmp(str, "first_update=true"))
      {
      /* mom is requesting that we send the mom hierarchy file to her */
      //remove_hello(&hellos, current->nd_id);
      send_hello = true;
      
      /* reset gpu data in case mom reconnects with changed gpus */
      clear_nvidia_gpus(current);
      }
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      DBPRT(("is_stat_get: cannot add attributes\n"));

      free_arst(&temp);

      break;
      }

    if (!strncmp(str, "state", 5))
      {
      if (dont_change_state == FALSE)
        process_state_str(current, str);
      }
    else if ((allow_any_mom == TRUE) &&
             (!strncmp(str, "uname", 5))) 
      {
      process_uname_str(current, str);
      }
    else if (!strncmp(str, "me", 2))  /* shorter str compare than "message" */
      {
      if ((!strncmp(str, "message=ERROR", 13)) &&
          (down_on_error == TRUE))
        {
        update_node_state(current, INUSE_DOWN);
        dont_change_state = TRUE;
        set_note_error(current, str);
        }
      }
    else if (!strncmp(str,"macaddr=",8))
      {
      update_node_mac_addr(current,str + 8);
      }
    else if ((mom_job_sync == TRUE) &&
             (!strncmp(str, "jobdata=", 8)))
      {
      /* update job attributes based on what the MOM gives us */      
      update_job_data(current, str + strlen("jobdata="));
      }
    else if ((mom_job_sync == TRUE) &&
             (!strncmp(str, "jobs=", 5)))
      {
      /* walk job list reported by mom */
      size_t         len = strlen(str) + strlen(current->nd_name) + 2;
      char          *jobstr = (char *)calloc(1, len);
      sync_job_info *sji = (sync_job_info *)calloc(1, sizeof(sync_job_info));

      if ((jobstr != NULL) &&
          (sji != NULL))
        {
        sprintf(jobstr, "%s:%s", current->nd_name, str+5);
        sji->input = jobstr;
        sji->timestamp = time(NULL);

        /* sji must be freed in sync_node_jobs */
        enqueue_threadpool_request(sync_node_jobs, sji, task_pool);
        }
      else
        {
        if (jobstr != NULL)
          {
          free(jobstr);
          }
        if (sji != NULL)
          {
          free(sji);
          }
        }
      }
    else if (auto_np)
      {
      if (!(strncmp(str, "ncpus=", 6)))
        {
        handle_auto_np(current, str);
        }
      }
    } /* END processing strings */

  if (current != NULL)
    {
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, LOGLEVEL);
    }
  
  if ((rc == PBSE_NONE) &&
      (send_hello == true))
    rc = SEND_HELLO;
    
  return(rc);
  } /* END process_status_info() */
Example #29
0
struct stdfss_res *mkdevice(int wpid, struct working_thread *thread, struct stdfss_mkdevice *mkdevice_cmd)
{
	struct stdfss_res *ret = NULL;
	struct smount_info *minf = NULL;
	struct sdevice_info *opening_dinf = NULL;
	char *str = NULL, *strmatched = NULL;
	int parse_ret, dir_exists;
	struct gc_node *dir_base_node = NULL, *device_base_node = NULL;
	unsigned int *block = NULL;
	unsigned int nodeid;
	
	// get device file name from smo
	str = get_string(mkdevice_cmd->path_smo);
	
	ret = check_path(str, thread->command.command);
	if(ret != NULL) return ret;

	char *devstr = get_string(mkdevice_cmd->service_name);

	ret = check_path(devstr, thread->command.command);
	if(ret != NULL)
	{
		free(str);
		return ret;
	}

	// check path is ok
	if(str[len(str)] == '/')
	{
		free(str);
		free(devstr);
		return build_response_msg(thread->command.command, STDFSSERR_INVALID_COMMAND_PARAMS);
	}

	// get mount info
	wait_mutex(&mounted_mutex);
	minf = (struct smount_info *)lpt_getvalue_parcial_matchE(mounted, str, &strmatched);
	leave_mutex(&mounted_mutex);

	if(minf == NULL)
	{
		free(str);
		free(devstr);
		return build_response_msg(mkdevice_cmd->command, STDFSSERR_DEVICE_NOT_MOUNTED);
	}

	// check file does not exist
	parse_ret = parse_directory(TRUE, &dir_base_node, OFS_NODELOCK_EXCLUSIVE | OFS_NODELOCK_BLOCKING, thread->command.command, thread, wpid, minf, str, len(strmatched), NULL, &nodeid, NULL, &dir_exists, &ret);

	if(ret != NULL) free(ret);
	ret = NULL;

	if(parse_ret)
	{
		// file exists
		if(thread->lastdir_parsed_node != NULL)
		{
			nfree(minf->dinf, thread->lastdir_parsed_node);
			thread->lastdir_parsed_node = NULL;
		}
		free(strmatched);
		free(str);
		free(devstr);
		unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);
		nfree(minf->dinf, dir_base_node);
		return build_response_msg(thread->command.command, STDFSSERR_FILE_EXISTS);
	}

	if(!dir_exists)
	{
		// worng path
		if(thread->lastdir_parsed_node != NULL)
		{
			nfree(minf->dinf, thread->lastdir_parsed_node);
			thread->lastdir_parsed_node = NULL;
		}
		free(strmatched);
		free(str);
		free(devstr);
		return build_response_msg(thread->command.command, STDFSSERR_FILE_DOESNOTEXIST);
	}

	dir_base_node = nref(minf->dinf, thread->lastdir_parsed_node->nodeid);
	
	free(strmatched);

	// Create device file
	if(!create_file(dir_base_node, str, last_index_of(str, '/') + 1, OFS_DEVICE_FILE, TRUE, minf, wpid, thread->command.command, &nodeid, &device_base_node, OFS_NOFLAGS , &ret))
	{
		nfree(minf->dinf, thread->lastdir_parsed_node);
		nfree(minf->dinf, dir_base_node);
		thread->lastdir_parsed_node = NULL;
		
		free(str);
		free(devstr);
		return ret;
	}
	
	nfree(minf->dinf, thread->lastdir_parsed_node);
	nfree(minf->dinf, dir_base_node);
	thread->lastdir_parsed_node = NULL;
	
	// get a free block 
	block = get_free_blocks(1, TRUE, minf, thread->command.command, wpid, &ret);
	if(ret != NULL)
	{
		free(str);
		free(devstr);
		unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);
		nfree(minf->dinf, device_base_node);
		return ret;
	}

	free(str);

	clear_dir_buffer(thread->directory_buffer.buffer);

	// write buffer
	/*
		int LOGIC DEVICE ID: internal ID on the service (4 BYTES)
		int service name size; (4 BYTES)
		char SERVICE NAME[]: name of the device driver (zero terminated devstring) 
	*/
	*((unsigned int *)thread->directory_buffer.buffer) = (unsigned int)mkdevice_cmd->logic_deviceid;
	*((unsigned int *)(thread->directory_buffer.buffer + 4)) = len(devstr);
	mem_copy((unsigned char *)devstr, ((unsigned char *)thread->directory_buffer.buffer + 8), len(devstr) + 1);

	free(devstr);

	write_buffer((char *)thread->directory_buffer.buffer, OFS_DIR_BUFFERSIZE, *block, thread->command.command, wpid, minf, &ret);
	if(ret != NULL)
	{
		free_block(TRUE, TRUE, *block, minf, thread->command.command, wpid, &ret);
		free(block);
		unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);
		nfree(minf->dinf, device_base_node);
		return ret;
	}

	// update node
	device_base_node->n.file_size = 8 + len(devstr) + 1;
	device_base_node->n.blocks[0] = *block;

	if(!write_node(device_base_node, minf, wpid, thread->command.command, &ret) || ret != NULL)
	{
		free_block(TRUE, TRUE, *block, minf, thread->command.command, wpid, &ret);
		free(block);
		unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);
		nfree(minf->dinf, device_base_node);
		return ret;
	}

	nfree(minf->dinf, device_base_node);

	// unlock device node
	unlock_node(wpid, FALSE, OFS_LOCKSTATUS_OK);	

	return build_response_msg(thread->command.command, STDFSSERR_OK);
}
Example #30
0
int process_alps_status(

  char           *nd_name,
  dynamic_string *status_info)

  {
  char           *str;
  char            node_index_buf[MAXLINE];
  int             node_index = 0;
  struct pbsnode *parent;
  struct pbsnode *current = NULL;
  int             rc;
  pbs_attribute   temp;

  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if we can't find the parent node, ignore the update */
  if ((parent = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  /* loop over each string */
  for (str = status_info->str; str != NULL && *str != '\0'; str += strlen(str) + 1)
    {
    if (!strncmp(str, "node=", strlen("node=")))
      {
      if (str != status_info->str)
        {
        snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
        decode_arst(&temp, NULL, NULL, node_index_buf, 0);
        save_node_status(current, &temp);
        }

      if ((current = determine_node_from_str(str, parent, current)) == NULL)
        break;
      else
        continue;
      }

    /* process the gpu status information separately */
    if (!strcmp(CRAY_GPU_STATUS_START, str))
      {
      process_gpu_status(current, &str);
      continue;
      }
    else if (!strncmp(reservation_id, str, strlen(reservation_id)))
      {
      process_reservation_id(current, str);
      }
    /* save this as is to the status strings */
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      free_arst(&temp);
      return(rc);
      }

    /* perform any special processing */
    if (!strncmp(str, cproc_eq, cproc_eq_len))
      {
      set_ncpus(current, str);
      }
    else if (!strncmp(str, state, strlen(state)))
      {
      set_state(current, str);
      }

    } /* END processing the status update */

  if (current != NULL)
    {
    snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
    decode_arst(&temp, NULL, NULL, node_index_buf, 0);
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, 0);
    }

  unlock_node(parent, __func__, NULL, 0);

  return(PBSE_NONE);
  } /* END process_alps_status() */