Example #1
0
int talk_with_mom(

    node_info *ninfo)

{
    int mom_sd;   /* connection descriptor to mom */
    char *mom_ans;  /* the answer from mom - getreq() */
    char *endp;   /* used with strtol() */
    double testd;   /* used to convert string -> double */
    int testi;   /* used to convert string -> int */
    char errbuf[256];
    int i;
    int local_errno = 0;

    if ((ninfo != NULL) && !ninfo->is_down && !ninfo->is_offline)
    {
        if ((mom_sd = openrm(ninfo -> name, pbs_rm_port)) < 0)
        {
            sched_log(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, ninfo -> name, "Can not open connection to mom");
            return 1;
        }

        if(begin_rm_req(mom_sd,&local_errno,num_resget) != 0)
        {
            closerm_err(&local_errno, mom_sd);
            return 0;
        }
        for (i = 0; i < num_resget; i++)
            addreq_err(mom_sd, &local_errno, (char *) res_to_get[i]);

        for (i = 0; i < num_resget && (mom_ans = getreq_err(&local_errno, (mom_sd))) != NULL; i++)
        {
            if (!strcmp(res_to_get[i], "max_load"))
            {
                testd = strtod(mom_ans, &endp);

                if (*endp == '\0')
                    ninfo -> max_load = testd;
                else
                    ninfo -> max_load = ninfo -> ncpus;

                free(mom_ans);
            }
            else if (!strcmp(res_to_get[i], "ideal_load"))
            {
                testd = strtod(mom_ans, &endp);

                if (*endp == '\0')
                    ninfo -> ideal_load = testd;
                else
                    ninfo -> ideal_load = ninfo -> ncpus;

                free(mom_ans);
            }
            else if (!strcmp(res_to_get[i], "arch"))
                ninfo -> arch = mom_ans;
            else if (!strcmp(res_to_get[i], "ncpus"))
            {
                testi = strtol(mom_ans, &endp, 10);

                if (*endp == '\0')
                    ninfo -> ncpus = testi;
                else
                    ninfo -> ncpus = 1;

                free(mom_ans);
            }
            else if (!strcmp(res_to_get[i], "physmem"))
            {
                ninfo -> physmem = res_to_num(mom_ans);
                free(mom_ans);
            }
            else if (!strcmp(res_to_get[i], "loadave"))
            {
                testd = strtod(mom_ans, &endp);

                if (*endp == '\0')
                    ninfo -> loadave = testd;
                else
                    ninfo -> loadave = -1.0;

                free(mom_ans);
            }
            else
            {
                sprintf(errbuf, "Unknown resource value[%d]: %s", i, mom_ans);
                sched_log(PBSEVENT_SCHED, PBS_EVENTCLASS_NODE, ninfo -> name, errbuf);
            }
        }

        closerm_err(&local_errno, mom_sd);
    }

    return 0;
}
Example #2
0
/*
 * Attempt to set the state of the hpm counters on the host associated
 * with the given Resources.  Mode must be one of HPM_SETUP_USERMODE or
 * HPM_SETUP_GLOBALMODE.  Return 0 on success, non-zero otherwise.
 */
static int
setup_hpm(Resources *rsrcs, int mode)
  {
  char   *id = "setup_hpm";
  char   *response, *value;
  char    hpm_ctl[64];
  int     rm;
  int     local_errno = 0;

  switch (mode)
    {

    case HPM_SETUP_USERMODE:

      /* Sanity check - is the host already in the requested mode? */

      if (rsrcs->flags & RSRCS_FLAGS_HPM_USER)
        {
        DBPRT(("%s: hpm user mode requested for %s, but already set!\n",
               id, rsrcs->exechost));
        return (0);
        }

      (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_USERMODE_STR);
      break;

    case HPM_SETUP_GLOBALMODE:

      /* Sanity check - is the host already in the requested mode? */

      if (!(rsrcs->flags & RSRCS_FLAGS_HPM_USER))
        {
        DBPRT(("%s: hpm global mode requested for %s, but already set!\n",
               id, rsrcs->exechost));
        return (0);
        }

      (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_GLOBALMODE_STR);
      break;

    case HPM_SETUP_REVOKE:
      /* Sanity check - is the host already in the requested mode? */

      if (!(rsrcs->flags & RSRCS_FLAGS_HPM_USER))
        {
        DBPRT(("%s: hpm revocation requested for %s, but already global!\n",
               id, rsrcs->exechost));
        return (0);
        }

      (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_REVOKE_STR);
      break;

    default:
      DBPRT(("%s: Bogus mode %d - bailing.\n", id, mode));
      return (1);
    }

  DBPRT(("%s: '%s' @ %s\n", id, hpm_ctl, rsrcs->exechost));

  if ((rm = openrm(rsrcs->exechost, 0)) < 0)
    {
    (void)sprintf(log_buffer,
                  "Unable to contact resmom@%s", rsrcs->exechost);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    return (1);
    }

  /* Ask the resource monitor on the remote host to set the mode for us. */
  response = NULL;

  if (addreq_err(rm, &local_errno, hpm_ctl) == 0)
    response = getreq_err(&local_errno, rm);

  closerm(rm);

  if (response == NULL)
    {
    (void)sprintf(log_buffer, "bad return from getreq(%s) @%s, %d",
                  hpm_ctl, rsrcs->exechost, local_errno);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    DBPRT(("%s: %s\n", id, log_buffer));
    return (1);
    }

  /*
   * If a full response was received, move forward to the first character
   * of the value (following the '=' in the attribute-value pair).
   */
  if (value = strchr(response, '='))
    response = ++value;

  /*
   * If the hpm_ctl request succeeded, log the fact, and set the flag in
   * the resources for this host to indicate that it is now in the other
   * state.
   */
  if (strcmp(response, HPM_CTL_OKAY_STR) == 0)
    {
    if (mode == HPM_SETUP_USERMODE)
      rsrcs->flags |= RSRCS_FLAGS_HPM_USER;
    else
      rsrcs->flags &= ~RSRCS_FLAGS_HPM_USER;

    (void)sprintf(log_buffer, "%s on %s succeeded", hpm_ctl,
                  rsrcs->exechost);

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    DBPRT(("%s: %s\n", id, log_buffer));

    return (0);
    }

  /* If it's an error string, just report the error message returned. */
  if (strncmp(response, HPM_CTL_ERROR_STR, strlen(HPM_CTL_ERROR_STR)) == 0)
    {

    response += strlen(HPM_CTL_ERROR_STR); /* Skip the error string. */

    while (*response == ' ')  /* Skip leading whitespace. */
      ++ response;

    /* And generate the log message from the request and the response. */
    (void)sprintf(log_buffer, "%s: %s (%s)", hpm_ctl, response,
                  rsrcs->exechost);
    }
  else
    {
    (void)sprintf(log_buffer, "cannot parse response %s to request %s@%s",
                  response, hpm_ctl, rsrcs->exechost);
    }

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  DBPRT(("%s: %s\n", id, log_buffer));
  return (1);
  }