int talk_with_mom( node_info *ninfo) { int mom_sd; /* connection descriptor to mom */ char *mom_ans; /* the answer from mom - getreq() */ char *endp; /* used with strtol() */ double testd; /* used to convert string -> double */ int testi; /* used to convert string -> int */ char errbuf[256]; int i; int local_errno = 0; if ((ninfo != NULL) && !ninfo->is_down && !ninfo->is_offline) { if ((mom_sd = openrm(ninfo -> name, pbs_rm_port)) < 0) { sched_log(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, ninfo -> name, "Can not open connection to mom"); return 1; } if(begin_rm_req(mom_sd,&local_errno,num_resget) != 0) { closerm_err(&local_errno, mom_sd); return 0; } for (i = 0; i < num_resget; i++) addreq_err(mom_sd, &local_errno, (char *) res_to_get[i]); for (i = 0; i < num_resget && (mom_ans = getreq_err(&local_errno, (mom_sd))) != NULL; i++) { if (!strcmp(res_to_get[i], "max_load")) { testd = strtod(mom_ans, &endp); if (*endp == '\0') ninfo -> max_load = testd; else ninfo -> max_load = ninfo -> ncpus; free(mom_ans); } else if (!strcmp(res_to_get[i], "ideal_load")) { testd = strtod(mom_ans, &endp); if (*endp == '\0') ninfo -> ideal_load = testd; else ninfo -> ideal_load = ninfo -> ncpus; free(mom_ans); } else if (!strcmp(res_to_get[i], "arch")) ninfo -> arch = mom_ans; else if (!strcmp(res_to_get[i], "ncpus")) { testi = strtol(mom_ans, &endp, 10); if (*endp == '\0') ninfo -> ncpus = testi; else ninfo -> ncpus = 1; free(mom_ans); } else if (!strcmp(res_to_get[i], "physmem")) { ninfo -> physmem = res_to_num(mom_ans); free(mom_ans); } else if (!strcmp(res_to_get[i], "loadave")) { testd = strtod(mom_ans, &endp); if (*endp == '\0') ninfo -> loadave = testd; else ninfo -> loadave = -1.0; free(mom_ans); } else { sprintf(errbuf, "Unknown resource value[%d]: %s", i, mom_ans); sched_log(PBSEVENT_SCHED, PBS_EVENTCLASS_NODE, ninfo -> name, errbuf); } } closerm_err(&local_errno, mom_sd); } return 0; }
/* * Attempt to set the state of the hpm counters on the host associated * with the given Resources. Mode must be one of HPM_SETUP_USERMODE or * HPM_SETUP_GLOBALMODE. Return 0 on success, non-zero otherwise. */ static int setup_hpm(Resources *rsrcs, int mode) { char *id = "setup_hpm"; char *response, *value; char hpm_ctl[64]; int rm; int local_errno = 0; switch (mode) { case HPM_SETUP_USERMODE: /* Sanity check - is the host already in the requested mode? */ if (rsrcs->flags & RSRCS_FLAGS_HPM_USER) { DBPRT(("%s: hpm user mode requested for %s, but already set!\n", id, rsrcs->exechost)); return (0); } (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_USERMODE_STR); break; case HPM_SETUP_GLOBALMODE: /* Sanity check - is the host already in the requested mode? */ if (!(rsrcs->flags & RSRCS_FLAGS_HPM_USER)) { DBPRT(("%s: hpm global mode requested for %s, but already set!\n", id, rsrcs->exechost)); return (0); } (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_GLOBALMODE_STR); break; case HPM_SETUP_REVOKE: /* Sanity check - is the host already in the requested mode? */ if (!(rsrcs->flags & RSRCS_FLAGS_HPM_USER)) { DBPRT(("%s: hpm revocation requested for %s, but already global!\n", id, rsrcs->exechost)); return (0); } (void)sprintf(hpm_ctl, HPM_CTL_FORMAT_STR, HPM_CTL_REVOKE_STR); break; default: DBPRT(("%s: Bogus mode %d - bailing.\n", id, mode)); return (1); } DBPRT(("%s: '%s' @ %s\n", id, hpm_ctl, rsrcs->exechost)); if ((rm = openrm(rsrcs->exechost, 0)) < 0) { (void)sprintf(log_buffer, "Unable to contact resmom@%s", rsrcs->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); return (1); } /* Ask the resource monitor on the remote host to set the mode for us. */ response = NULL; if (addreq_err(rm, &local_errno, hpm_ctl) == 0) response = getreq_err(&local_errno, rm); closerm(rm); if (response == NULL) { (void)sprintf(log_buffer, "bad return from getreq(%s) @%s, %d", hpm_ctl, rsrcs->exechost, local_errno); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); } /* * If a full response was received, move forward to the first character * of the value (following the '=' in the attribute-value pair). */ if (value = strchr(response, '=')) response = ++value; /* * If the hpm_ctl request succeeded, log the fact, and set the flag in * the resources for this host to indicate that it is now in the other * state. */ if (strcmp(response, HPM_CTL_OKAY_STR) == 0) { if (mode == HPM_SETUP_USERMODE) rsrcs->flags |= RSRCS_FLAGS_HPM_USER; else rsrcs->flags &= ~RSRCS_FLAGS_HPM_USER; (void)sprintf(log_buffer, "%s on %s succeeded", hpm_ctl, rsrcs->exechost); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (0); } /* If it's an error string, just report the error message returned. */ if (strncmp(response, HPM_CTL_ERROR_STR, strlen(HPM_CTL_ERROR_STR)) == 0) { response += strlen(HPM_CTL_ERROR_STR); /* Skip the error string. */ while (*response == ' ') /* Skip leading whitespace. */ ++ response; /* And generate the log message from the request and the response. */ (void)sprintf(log_buffer, "%s: %s (%s)", hpm_ctl, response, rsrcs->exechost); } else { (void)sprintf(log_buffer, "cannot parse response %s to request %s@%s", response, hpm_ctl, rsrcs->exechost); } log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); DBPRT(("%s: %s\n", id, log_buffer)); return (1); }