Esempio n. 1
0
int process_alps_status(

  char           *nd_name,
  dynamic_string *status_info)

  {
  char           *str;
  char            node_index_buf[MAXLINE];
  int             node_index = 0;
  struct pbsnode *parent;
  struct pbsnode *current = NULL;
  int             rc;
  pbs_attribute   temp;

  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if we can't find the parent node, ignore the update */
  if ((parent = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  /* loop over each string */
  for (str = status_info->str; str != NULL && *str != '\0'; str += strlen(str) + 1)
    {
    if (!strncmp(str, "node=", strlen("node=")))
      {
      if (str != status_info->str)
        {
        snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
        decode_arst(&temp, NULL, NULL, node_index_buf, 0);
        save_node_status(current, &temp);
        }

      if ((current = determine_node_from_str(str, parent, current)) == NULL)
        break;
      else
        continue;
      }

    /* process the gpu status information separately */
    if (!strcmp(CRAY_GPU_STATUS_START, str))
      {
      process_gpu_status(current, &str);
      continue;
      }
    else if (!strncmp(reservation_id, str, strlen(reservation_id)))
      {
      process_reservation_id(current, str);
      }
    /* save this as is to the status strings */
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      free_arst(&temp);
      return(rc);
      }

    /* perform any special processing */
    if (!strncmp(str, cproc_eq, cproc_eq_len))
      {
      set_ncpus(current, str);
      }
    else if (!strncmp(str, state, strlen(state)))
      {
      set_state(current, str);
      }

    } /* END processing the status update */

  if (current != NULL)
    {
    snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
    decode_arst(&temp, NULL, NULL, node_index_buf, 0);
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, 0);
    }

  unlock_node(parent, __func__, NULL, 0);

  return(PBSE_NONE);
  } /* END process_alps_status() */
int process_status_info(

  const char               *nd_name,
  std::vector<std::string> &status_info)

  {
  const char     *name = nd_name;
  struct pbsnode *current;
  long            mom_job_sync = FALSE;
  long            auto_np = FALSE;
  long            down_on_error = FALSE;
  int             dont_change_state = FALSE;
  pbs_attribute   temp;
  int             rc = PBSE_NONE;
  bool            send_hello = false;

  get_svr_attr_l(SRV_ATR_MomJobSync, &mom_job_sync);
  get_svr_attr_l(SRV_ATR_AutoNodeNP, &auto_np);
  get_svr_attr_l(SRV_ATR_DownOnError, &down_on_error);

  /* Before filling the "temp" pbs_attribute, initialize it.
   * The second and third parameter to decode_arst are never
   * used, so just leave them empty. (GBS) */
  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if original node cannot be found do not process the update */
  if ((current = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  //A node we put to sleep is up and running.
  if (current->nd_power_state != POWER_STATE_RUNNING)
    {
    //Make sure we wait for a stray update that came after we changed the state to pass
    //by.
    if((current->nd_power_state_change_time + NODE_POWER_CHANGE_TIMEOUT) < time(NULL))
      {
      current->nd_power_state = POWER_STATE_RUNNING;
      write_node_power_state();
      }
    }

  /* loop over each string */
  for (unsigned int i = 0; i != status_info.size(); i++)
    {
    const char *str = status_info[i].c_str();
    /* these two options are for switching nodes */
    if (!strncmp(str, NUMA_KEYWORD, strlen(NUMA_KEYWORD)))
      {
      /* if we've already processed some, save this before moving on */
      if (i != 0)
        save_node_status(current, &temp);
      
      dont_change_state = FALSE;

      if ((current = get_numa_from_str(str, current)) == NULL)
        break;
      else
        continue;
      }
    else if (!strncmp(str, "node=", strlen("node=")))
      {
      /* if we've already processed some, save this before moving on */
      if (i != 0)
        save_node_status(current, &temp);

      dont_change_state = FALSE;

      if ((current = get_node_from_str(str, name, current)) == NULL)
        break;
      else
        {
        if (current->nd_mom_reported_down == TRUE)
          {
          /* There is a race condition if using a mom hierarchy and manually
           * shutting down a non-level 1 mom: if its message that the mom is
           * shutting down gets there before its last status update, the node
           * can incorrectly be set as free again. For that reason, only set
           * a mom back up if its reporting for itself. */
          if (strcmp(name, str + strlen("node=")) != 0)
            dont_change_state = TRUE;
          else
            current->nd_mom_reported_down = FALSE;
          }

        continue;
        }
      }

    /* add the info to the "temp" pbs_attribute */
    else if (!strcmp(str, START_GPU_STATUS))
      {
      is_gpustat_get(current, i, status_info);
      str = status_info[i].c_str();
      }
    else if (!strcmp(str, START_MIC_STATUS))
      {
      process_mic_status(current, i, status_info);
      str = status_info[i].c_str();
      }
#ifdef PENABLE_LINUX_CGROUPS
    else if (!strncmp(str, "layout", 6))
      {
      if (current->nd_layout == NULL)
        {
        current->nd_layout = new Machine(status_info[i]);
        }

      continue;
      }
#endif
    else if (!strcmp(str, "first_update=true"))
      {
      /* mom is requesting that we send the mom hierarchy file to her */
      //remove_hello(&hellos, current->nd_id);
      send_hello = true;
      
      /* reset gpu data in case mom reconnects with changed gpus */
      clear_nvidia_gpus(current);
      }
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      DBPRT(("is_stat_get: cannot add attributes\n"));

      free_arst(&temp);

      break;
      }

    if (!strncmp(str, "state", 5))
      {
      if (dont_change_state == FALSE)
        process_state_str(current, str);
      }
    else if ((allow_any_mom == TRUE) &&
             (!strncmp(str, "uname", 5))) 
      {
      process_uname_str(current, str);
      }
    else if (!strncmp(str, "me", 2))  /* shorter str compare than "message" */
      {
      if ((!strncmp(str, "message=ERROR", 13)) &&
          (down_on_error == TRUE))
        {
        update_node_state(current, INUSE_DOWN);
        dont_change_state = TRUE;
        set_note_error(current, str);
        }
      }
    else if (!strncmp(str,"macaddr=",8))
      {
      update_node_mac_addr(current,str + 8);
      }
    else if ((mom_job_sync == TRUE) &&
             (!strncmp(str, "jobdata=", 8)))
      {
      /* update job attributes based on what the MOM gives us */      
      update_job_data(current, str + strlen("jobdata="));
      }
    else if ((mom_job_sync == TRUE) &&
             (!strncmp(str, "jobs=", 5)))
      {
      /* walk job list reported by mom */
      size_t         len = strlen(str) + strlen(current->nd_name) + 2;
      char          *jobstr = (char *)calloc(1, len);
      sync_job_info *sji = (sync_job_info *)calloc(1, sizeof(sync_job_info));

      if ((jobstr != NULL) &&
          (sji != NULL))
        {
        sprintf(jobstr, "%s:%s", current->nd_name, str+5);
        sji->input = jobstr;
        sji->timestamp = time(NULL);

        /* sji must be freed in sync_node_jobs */
        enqueue_threadpool_request(sync_node_jobs, sji, task_pool);
        }
      else
        {
        if (jobstr != NULL)
          {
          free(jobstr);
          }
        if (sji != NULL)
          {
          free(sji);
          }
        }
      }
    else if (auto_np)
      {
      if (!(strncmp(str, "ncpus=", 6)))
        {
        handle_auto_np(current, str);
        }
      }
    } /* END processing strings */

  if (current != NULL)
    {
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, LOGLEVEL);
    }
  
  if ((rc == PBSE_NONE) &&
      (send_hello == true))
    rc = SEND_HELLO;
    
  return(rc);
  } /* END process_status_info() */
Esempio n. 3
0
int process_alps_status(

  char           *nd_name,
  dynamic_string *status_info)

  {
  char           *str;
  char           *ccu_p = NULL;
  char           *current_node_id = NULL;
  char            node_index_buf[MAXLINE];
  int             node_index = 0;
  struct pbsnode *parent;
  struct pbsnode *current = NULL;
  int             rc;
  pbs_attribute   temp;
  hash_table_t   *rsv_ht;
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if we can't find the parent node, ignore the update */
  if ((parent = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  /* keep track of reservations so that they're only processed once per update */
  rsv_ht = create_hash(INITIAL_RESERVATION_HOLDER_SIZE);

  /* loop over each string */
  for (str = status_info->str; str != NULL && *str != '\0'; str += strlen(str) + 1)
    {
    if (!strncmp(str, "node=", strlen("node=")))
      {
      if (str != status_info->str)
        {
        snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
        decode_arst(&temp, NULL, NULL, node_index_buf, 0);
        
        if (current != NULL)
          save_node_status(current, &temp);
        }

      if ((current = determine_node_from_str(str, parent, current)) == NULL)
        break;
      else
        continue;
      }

    if (current == NULL)
      continue;

    /* process the gpu status information separately */
    if (!strcmp(CRAY_GPU_STATUS_START, str))
      {
      process_gpu_status(current, &str);
      
      continue;
      }
    else if (!strncmp(reservation_id, str, strlen(reservation_id)))
      {
      char *just_rsv_id = str + strlen(reservation_id);

      if (get_value_hash(rsv_ht, just_rsv_id) == -1)
        {
        add_hash(rsv_ht, 1, strdup(just_rsv_id));

        /* sub-functions will attempt to lock a job, so we must unlock the
         * reporter node */
        unlock_node(parent, __func__, NULL, LOGLEVEL);

        process_reservation_id(current, str);

        current_node_id = strdup(current->nd_name);
        unlock_node(current, __func__, NULL, LOGLEVEL);

        /* re-lock the parent */
        if ((parent = find_nodebyname(nd_name)) == NULL)
          {
          /* reporter node disappeared - this shouldn't be possible */
          log_err(PBSE_UNKNODE, __func__, "Alps reporter node disappeared while recording a reservation");
          free_arst(&temp);
          free_all_keys(rsv_ht);
          free_hash(rsv_ht);
          free(current_node_id);
          return(PBSE_NONE);
          }

        if ((current = find_node_in_allnodes(&parent->alps_subnodes, current_node_id)) == NULL)
          {
          /* current node disappeared, this shouldn't be possible either */
          unlock_node(parent, __func__, NULL, LOGLEVEL);
          snprintf(log_buf, sizeof(log_buf), "Current node '%s' disappeared while recording a reservation",
            current_node_id);
          log_err(PBSE_UNKNODE, __func__, log_buf);
          free_arst(&temp);
          free_all_keys(rsv_ht);
          free_hash(rsv_ht);
          free(current_node_id);
          return(PBSE_NONE);
          }

        free(current_node_id);
        current_node_id = NULL;
        }
      }
    /* save this as is to the status strings */
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      free_arst(&temp);
      free_all_keys(rsv_ht);
      free_hash(rsv_ht);
      return(rc);
      }

    /* perform any special processing */
    if (!strncmp(str, ccu_eq, ac_ccu_eq_len))
      {
      /* save compute unit count in case we need it */
      /* note: this string (ccu_eq (CCU=)) needs to be found before cprocs_eq (CPROCS=) */
      /*  for the node */
      ccu_p = str;
      }
    else if (!strncmp(str, cproc_eq, ac_cproc_eq_len))
      {
      int ncpus;
      long svr_nppcu_value = 0;

      /*
       * Get the server nppcu value which determines how Hyper-Threaded
       * cores are reported. When server nppcu value is:
       *
       *  0 - Let ALPS choose whether or not to use Hyper-Threaded cores 
       *      (report all cores)
       *  1 - Do not use Hyper-Threaded cores
       *      (report only physical core (compute unit count)
       *  2 - Use Hyper-Threaded cores
       *      (report all cores)
       */
      get_svr_attr_l(SRV_ATR_nppcu, &svr_nppcu_value);

      if (svr_nppcu_value == NPPCU_NO_USE_HT && ccu_p != NULL)
        {
        /* no HT (nppcu==1), so use compute unit count */
        ncpus = atoi(ccu_p + ac_ccu_eq_len);

        /* use CPROC value if we are using APBASIL protocol < 1.3 */
        if (ncpus == 0)
          ncpus = atoi(str + ac_cproc_eq_len);

        /* reset the pointer */
        ccu_p = NULL;
        }
      else
        {
        /* let ALPS choose (nppcu==0) or use HT (nppcu==2), use actual processor count */
        ncpus = atoi(str + ac_cproc_eq_len);
        }

      set_ncpus(current, parent, ncpus);
      }
    else if (!strncmp(str, state, strlen(state)))
      {
      set_state(current, str);
      }

    } /* END processing the status update */

  if (current != NULL)
    {
    snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
    decode_arst(&temp, NULL, NULL, node_index_buf, 0);
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, LOGLEVEL);
    }

  unlock_node(parent, __func__, NULL, LOGLEVEL);

  free_all_keys(rsv_ht);
  free_hash(rsv_ht);

  return(PBSE_NONE);
  } /* END process_alps_status() */
Esempio n. 4
0
int process_status_info(

  const char               *nd_name,
  std::vector<std::string> &status_info)

  {
  const char     *name = nd_name;
  pbsnode        *current;
  bool            mom_job_sync = true;
  bool            auto_np = false;
  bool            down_on_error = false;
  bool            note_append_on_error = false;
  int             dont_change_state = FALSE;
  int             rc = PBSE_NONE;
  bool            send_hello = false;
  std::string     temp;
#ifdef PENABLE_LINUX_CGROUPS
  bool            force_layout_update = false;
#endif

  get_svr_attr_b(SRV_ATR_MomJobSync, &mom_job_sync);
  get_svr_attr_b(SRV_ATR_AutoNodeNP, &auto_np);
  get_svr_attr_b(SRV_ATR_NoteAppendOnError, &note_append_on_error);
  get_svr_attr_b(SRV_ATR_DownOnError, &down_on_error);

  /* if original node cannot be found do not process the update */
  if ((current = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  //A node we put to sleep is up and running.
  if (current->nd_power_state != POWER_STATE_RUNNING)
    {
    //Make sure we wait for a stray update that came after we changed the state to pass
    //by.
    if((current->nd_power_state_change_time + NODE_POWER_CHANGE_TIMEOUT) < time(NULL))
      {
      current->nd_power_state = POWER_STATE_RUNNING;
      write_node_power_state();
      }
    }

  /* loop over each string */
  for (unsigned int i = 0; i != status_info.size(); i++)
    {
    const char *str = status_info[i].c_str();

    /* these two options are for switching nodes */
    if (!strncmp(str, NUMA_KEYWORD, strlen(NUMA_KEYWORD)))
      {

      /* if we've already processed some, save this before moving on */
      if (i != 0)
        {
        save_node_status(current, temp);
        temp.clear();
        }
      
      dont_change_state = FALSE;

      if ((current = get_numa_from_str(str, current)) == NULL)
        break;
      else
        continue;
      }
    else if (!strncmp(str, "node=", strlen("node=")))
      {
      /* if we've already processed some, save this before moving on */
      if (i != 0)
        {
        save_node_status(current, temp);
        temp.clear();
        }

      dont_change_state = FALSE;

      if ((current = get_node_from_str(str, name, current)) == NULL)
        break;
      else
        {
        if (current->nd_mom_reported_down == TRUE)
          {
          /* There is a race condition if using a mom hierarchy and manually
           * shutting down a non-level 1 mom: if its message that the mom is
           * shutting down gets there before its last status update, the node
           * can incorrectly be set as free again. For that reason, only set
           * a mom back up if its reporting for itself. */
          if (strcmp(name, str + strlen("node=")) != 0)
            dont_change_state = TRUE;
          else
            current->nd_mom_reported_down = FALSE;
          }

        continue;
        }
      }

    /* add the info to the "temp" pbs_attribute */
    else if (!strcmp(str, START_GPU_STATUS))
      {
      is_gpustat_get(current, i, status_info);
      continue;
      }
    else if (!strcmp(str, START_MIC_STATUS))
      {
      process_mic_status(current, i, status_info);
      continue;
      }
#ifdef PENABLE_LINUX_CGROUPS
    else if (!strcmp(str, "force_layout_update"))
      {
      force_layout_update = true;
      continue;
      }
    else if (!strncmp(str, "layout", 6))
      {
      // Add 7 to skip "layout="
      update_layout_if_needed(current, str + 7, force_layout_update);

      // reset this to false in case we have a mom hierarchy in place
      force_layout_update = false;

      continue;
      }
#endif
    else if (!strncmp(str, PLUGIN_EQUALS, PLUGIN_EQ_LEN))
      {
      current->capture_plugin_resources(str + PLUGIN_EQ_LEN);
      continue;
      }
    else if (!strncmp(str, "jobs=", 5))
      {
      /* walk job list reported by mom */
      sync_job_info *sji = new sync_job_info();
      sji->node_name = current->get_name();
      sji->job_info = str + 5;
      sji->sync_jobs = mom_job_sync;
        
      // sji is freed in sync_node_jobs()
      enqueue_threadpool_request(sync_node_jobs, sji, task_pool);

      continue;
      }
    else if (!strcmp(str, "first_update=true"))
      {
      /* mom is requesting that we send the mom hierarchy file to her */
      //remove_hello(&hellos, current->nd_id);
      send_hello = true;
      
      /* reset gpu data in case mom reconnects with changed gpus */
      clear_nvidia_gpus(current);

      continue;
      }
    else 
      {
      // Save this string to our status line.
      if (temp.size() > 0)
        temp += ",";

      if (!strncmp(str, "message=", 8))
        {
        std::string no_newlines(str);
        size_t pos = no_newlines.find('\n');
        
        while (pos != std::string::npos)
          {
          no_newlines.replace(pos, 1, 1, ' ');
          pos = no_newlines.find('\n');
          }

        temp += no_newlines;
        }
      else
        temp += str;
    
      if (!strncmp(str, "state", 5))
        {
        if (dont_change_state == FALSE)
          process_state_str(current, str);
        }
      else if ((allow_any_mom == TRUE) &&
               (!strncmp(str, "uname", 5))) 
        {
        process_uname_str(current, str);
        }
      else if (!strncmp(str, "me", 2))  /* shorter str compare than "message" */
        {
        if ((!strncmp(str, "message=ERROR", 13)) &&
            (down_on_error == TRUE))
          {
          update_node_state(current, INUSE_DOWN);
          dont_change_state = TRUE;

          if (note_append_on_error == true)
            {
            set_note_error(current, str);
            }
          }
        }
      else if (!strncmp(str,"macaddr=",8))
        {
        update_node_mac_addr(current,str + 8);
        }
      else if ((mom_job_sync == true) &&
               (!strncmp(str, "jobdata=", 8)))
        {
        /* update job attributes based on what the MOM gives us */      
        update_job_data(current, str + strlen("jobdata="));
        }
      else if ((auto_np) &&
               (!(strncmp(str, "ncpus=", 6))))

        {
        handle_auto_np(current, str);
        }
      else if (!strncmp(str, "version=", 8))
        {
        current->set_version(str + 8);
        }
      }

    } /* END processing strings */

  if (current != NULL)
    {
    save_node_status(current, temp);
    current->unlock_node(__func__, NULL, LOGLEVEL);
    }
  
  if ((rc == PBSE_NONE) &&
      (send_hello == true))
    rc = SEND_HELLO;
    
  return(rc);
  } /* END process_status_info() */
Esempio n. 5
0
int process_alps_status(

  char           *nd_name,
  boost::ptr_vector<std::string>& status_info)

  {
  char           *current_node_id = NULL;
  char            node_index_buf[MAXLINE];
  int             node_index = 0;
  struct pbsnode *parent;
  struct pbsnode *current = NULL;
  int             rc;
  pbs_attribute   temp;
  hash_table_t   *rsv_ht;
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  memset(&temp, 0, sizeof(temp));

  if ((rc = decode_arst(&temp, NULL, NULL, NULL, 0)) != PBSE_NONE)
    {
    log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_NODE, __func__, "cannot initialize attribute");
    return(rc);
    }

  /* if we can't find the parent node, ignore the update */
  if ((parent = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  /* keep track of reservations so that they're only processed once per update */
  rsv_ht = create_hash(INITIAL_RESERVATION_HOLDER_SIZE);

  /* loop over each string */
  for(boost::ptr_vector<std::string>::iterator i = status_info.begin();i != status_info.end();i++)
    {
    const char *str = i->c_str();
    if (!strncmp(str, "node=", strlen("node=")))
      {
      if (i != status_info.begin())
        {
        snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
        decode_arst(&temp, NULL, NULL, node_index_buf, 0);
        save_node_status(current, &temp);
        }

      if ((current = determine_node_from_str(str, parent, current)) == NULL)
        break;
      else
        continue;
      }

    if(current == NULL)
      continue;

    /* process the gpu status information separately */
    if (!strcmp(CRAY_GPU_STATUS_START, str))
      {
      rc = process_gpu_status(current, i,status_info.end());
      str = i->c_str();
      continue;
      }
    else if (!strncmp(reservation_id, str, strlen(reservation_id)))
      {
      const char *just_rsv_id = str + strlen(reservation_id);

      if (get_value_hash(rsv_ht, just_rsv_id) == -1)
        {
        add_hash(rsv_ht, 1, strdup(just_rsv_id));

        /* sub-functions will attempt to lock a job, so we must unlock the
         * reporter node */
        unlock_node(parent, __func__, NULL, LOGLEVEL);

        process_reservation_id(current, str);

        current_node_id = strdup(current->nd_name);
        unlock_node(current, __func__, NULL, LOGLEVEL);

        /* re-lock the parent */
        if ((parent = find_nodebyname(nd_name)) == NULL)
          {
          /* reporter node disappeared - this shouldn't be possible */
          log_err(PBSE_UNKNODE, __func__, "Alps reporter node disappeared while recording a reservation");
          free_arst(&temp);
          free_all_keys(rsv_ht);
          free_hash(rsv_ht);
          free(current_node_id);
          return(PBSE_NONE);
          }

        if ((current = find_node_in_allnodes(&parent->alps_subnodes, current_node_id)) == NULL)
          {
          /* current node disappeared, this shouldn't be possible either */
          unlock_node(parent, __func__, NULL, LOGLEVEL);
          snprintf(log_buf, sizeof(log_buf), "Current node '%s' disappeared while recording a reservation",
            current_node_id);
          log_err(PBSE_UNKNODE, __func__, log_buf);
          free_arst(&temp);
          free_all_keys(rsv_ht);
          free_hash(rsv_ht);
          free(current_node_id);
          return(PBSE_NONE);
          }

        free(current_node_id);
        current_node_id = NULL;
        }
      }
    /* save this as is to the status strings */
    else if ((rc = decode_arst(&temp, NULL, NULL, str, 0)) != PBSE_NONE)
      {
      free_arst(&temp);
      free_all_keys(rsv_ht);
      free_hash(rsv_ht);
      return(rc);
      }

    /* perform any special processing */
    if (!strncmp(str, cproc_eq, ac_cproc_eq_len))
      {
      set_ncpus(current, parent, str);
      }
    else if (!strncmp(str, state, strlen(state)))
      {
      set_state(current, str);
      }

    } /* END processing the status update */

  if (current != NULL)
    {
    snprintf(node_index_buf, sizeof(node_index_buf), "node_index=%d", node_index++);
    decode_arst(&temp, NULL, NULL, node_index_buf, 0);
    save_node_status(current, &temp);
    unlock_node(current, __func__, NULL, LOGLEVEL);
    }

  unlock_node(parent, __func__, NULL, LOGLEVEL);

  free_all_keys(rsv_ht);
  free_hash(rsv_ht);

  return(PBSE_NONE);
  } /* END process_alps_status() */
int process_alps_status(

  const char               *nd_name,
  std::vector<std::string> &status_info)

  {
  const char    *ccu_p = NULL;
  char           *current_node_id = NULL;
  struct pbsnode *parent;
  struct pbsnode *current = NULL;
#ifdef PENABLE_LINUX_CGROUPS
  int             numa_nodes = 0;
  int             sockets = 0;
#endif
  std::string     temp;
  container::item_container<const char *> rsv_ht;
  char            log_buf[LOCAL_LOG_BUF_SIZE];

  /* if we can't find the parent node, ignore the update */
  if ((parent = find_nodebyname(nd_name)) == NULL)
    return(PBSE_NONE);

  /* loop over each string */
  for (unsigned int i = 0; i < status_info.size(); i++)
    {
    const char *str = status_info[i].c_str();

    if (!strncmp(str, "node=", strlen("node=")))
      {
      if (i != 0)
        {
        if (current != NULL)
          save_node_status(current, temp);
      
        temp.clear();
        }

      if ((current = determine_node_from_str(str, parent, current)) == NULL)
        break;
      else
        {
#ifdef PENABLE_LINUX_CGROUPS
        sockets = 0;
        numa_nodes = 0;
#endif

        continue;
        }
      }

    if (current == NULL)
      continue;

    /* process the gpu status information separately */
    if (!strcmp(CRAY_GPU_STATUS_START, str))
      {
      process_gpu_status(current, i, status_info);
      continue;
      }
    else if (!strncmp(reservation_id, str, strlen(reservation_id)))
      {
      const char *just_rsv_id = str + strlen(reservation_id);

      rsv_ht.lock();
      if (rsv_ht.find(just_rsv_id) == NULL)
        {
        rsv_ht.insert(just_rsv_id,just_rsv_id);
        rsv_ht.unlock();

        /* sub-functions will attempt to lock a job, so we must unlock the
         * reporter node */
        parent->unlock_node(__func__, NULL, LOGLEVEL);

        process_reservation_id(current, str);

        current_node_id = strdup(current->get_name());
        current->unlock_node(__func__, NULL, LOGLEVEL);

        /* re-lock the parent */
        if ((parent = find_nodebyname(nd_name)) == NULL)
          {
          /* reporter node disappeared - this shouldn't be possible */
          log_err(PBSE_UNKNODE, __func__, "Alps reporter node disappeared while recording a reservation");
          free(current_node_id);
          return(PBSE_NONE);
          }

        if ((current = find_node_in_allnodes(parent->alps_subnodes, current_node_id)) == NULL)
          {
          /* current node disappeared, this shouldn't be possible either */
          parent->unlock_node(__func__, NULL, LOGLEVEL);
          snprintf(log_buf, sizeof(log_buf), "Current node '%s' disappeared while recording a reservation",
            current_node_id);
          log_err(PBSE_UNKNODE, __func__, log_buf);
          free(current_node_id);
          return(PBSE_NONE);
          }

        free(current_node_id);
        current_node_id = NULL;
        }
      else
        {
        rsv_ht.unlock();
        }
      }
    /* save this as is to the status strings */
    else
      {
      if (temp.size() > 0)
        temp += ",";
      temp += str;
      }

    /* perform any special processing */
    if (!strncmp(str, ccu_eq, ac_ccu_eq_len))
      {
      /* save compute unit count in case we need it */
      /* note: this string (ccu_eq (CCU=)) needs to be found before cprocs_eq (CPROCS=) */
      /*  for the node */
      ccu_p = str;
      }
    else if (!strncmp(str, cproc_eq, ac_cproc_eq_len))
      {
      int ncpus;
      long svr_nppcu_value = 0;

      /*
       * Get the server nppcu value which determines how Hyper-Threaded
       * cores are reported. When server nppcu value is:
       *
       *  0 - Let ALPS choose whether or not to use Hyper-Threaded cores 
       *      (report all cores)
       *  1 - Do not use Hyper-Threaded cores
       *      (report only physical core (compute unit count)
       *  2 - Use Hyper-Threaded cores
       *      (report all cores)
       */
      get_svr_attr_l(SRV_ATR_nppcu, &svr_nppcu_value);

      if (svr_nppcu_value == NPPCU_NO_USE_HT && ccu_p != NULL)
        {
        /* no HT (nppcu==1), so use compute unit count */
        ncpus = atoi(ccu_p + ac_ccu_eq_len);

        /* use CPROC value if we are using APBASIL protocol < 1.3 */
        if (ncpus == 0)
          ncpus = atoi(str + ac_cproc_eq_len);

        /* reset the pointer */
        ccu_p = NULL;
        }
      else
        {
        /* let ALPS choose (nppcu==0) or use HT (nppcu==2), use actual processor count */
        ncpus = atoi(str + ac_cproc_eq_len);
        }

      set_ncpus(current, parent, ncpus);

#ifdef PENABLE_LINUX_CGROUPS
      if (numa_nodes == 0)
        numa_nodes = 1;

      if ((current->nd_layout.is_initialized() == false) ||
          (current->nd_layout.getTotalThreads() != current->nd_slots.get_total_execution_slots()))
        {
        Machine m(current->nd_slots.get_total_execution_slots(), numa_nodes, sockets);
        current->nd_layout = m;
        }
#endif
      }
    else if (!strncmp(str, state, strlen(state)))
      {
      set_state(current, str);
      }
#ifdef PENABLE_LINUX_CGROUPS
    else if (!strncmp(str, "totmem", 6))
      {
      set_total_memory(current, str);
      }
    else if (!strncmp(str, numas, 10))
      {
      // 11 is strlen("numa_nodes=")
      numa_nodes = strtol(str + 11, NULL, 10);
      }
    else if (!strncmp(str, "socket", 6))
      {
      // 7 is strlen("socket=")
      sockets = strtol(str + 7, NULL, 10);
      }
#endif

    } /* END processing the status update */

  if (current != NULL)
    {
    save_node_status(current, temp);
    current->unlock_node(__func__, NULL, LOGLEVEL);
    }

  parent->unlock_node(__func__, NULL, LOGLEVEL);

  return(PBSE_NONE);
  } /* END process_alps_status() */