/* * record_reservation() * * @pre-cond: pnode and rsv_id must be valid pointers * @post-cond: the reservation will be recorded in pbs_server's tracking mechanism * and on the job which has the node reserved, or -1 is returned and the reservation * is not recorded. * @param - pnode the node which is reporting the reservation * @param - rsv_id the id of the reservation being reported * @return - PBSE_NONE if the reservation was successfully recorded, -1 otherwise */ int record_reservation( struct pbsnode *pnode, const char *rsv_id) { job *pjob; bool found_job = false; char jobid[PBS_MAXSVRJOBID + 1]; for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++) { /* cray only allows one job per node, so any valid job will be the job that is * reserving this node. */ job_usage_info *jui = pnode->nd_job_usages[i]; strcpy(jobid, jui->jobid); unlock_node(pnode, __func__, NULL, LOGLEVEL); if ((pjob = svr_find_job(jobid, TRUE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); pjob->ji_wattr[JOB_ATR_reservation_id].at_val.at_str = strdup(rsv_id); pjob->ji_wattr[JOB_ATR_reservation_id].at_flags = ATR_VFLAG_SET; /* add environment variable BATCH_PARTITION_ID */ char buf[1024]; snprintf(buf, sizeof(buf), "BATCH_PARTITION_ID=%s", rsv_id); pbs_attribute tempattr; clear_attr(&tempattr, &job_attr_def[JOB_ATR_variables]); job_attr_def[JOB_ATR_variables].at_decode(&tempattr, NULL, NULL, buf, 0); job_attr_def[JOB_ATR_variables].at_set( &pjob->ji_wattr[JOB_ATR_variables], &tempattr, INCR); job_attr_def[JOB_ATR_variables].at_free(&tempattr); track_alps_reservation(pjob); found_job = true; job_mutex.unlock(); lock_node(pnode, __func__, NULL, LOGLEVEL); break; } else lock_node(pnode, __func__, NULL, LOGLEVEL); } if (found_job == false) return(-1); return(PBSE_NONE); } /* END record_reservation() */
struct pbsnode *create_alps_subnode( struct pbsnode *parent, char *node_id) { struct pbsnode *subnode = calloc(1, sizeof(struct pbsnode)); svrattrl *plist = NULL; int bad; int rc; if (initialize_pbsnode(subnode, strdup(node_id), NULL, NTYPE_CLUSTER) != PBSE_NONE) { free(subnode); log_err(ENOMEM, __func__, ""); return(NULL); } if (create_subnode(subnode) == NULL) { free(subnode); log_err(ENOMEM, __func__, ""); return(NULL); } /* do we need to do something else here? */ subnode->nd_addrs = parent->nd_addrs; rc = mgr_set_node_attr(subnode, node_attr_def, ND_ATR_LAST, plist, ATR_DFLAG_MGRD | ATR_DFLAG_MGWR, &bad, (void *)subnode, ATR_ACTION_ALTER); if (rc != PBSE_NONE) { free(subnode); log_err(rc, __func__, "Couldn't set node attributes"); return(NULL); } subnode->nd_ntype = NTYPE_CLUSTER; subnode->parent = parent; /* add any properties to the subnodes */ copy_properties(subnode, parent); lock_node(subnode, __func__, NULL, 0); insert_node(&(parent->alps_subnodes), subnode); return(subnode); } /* END create_alps_subnode() */
struct pbsnode *create_alps_subnode( struct pbsnode *parent, const char *node_id) { struct pbsnode *subnode = (struct pbsnode *)calloc(1, sizeof(struct pbsnode)); svrattrl *plist = NULL; int bad; int rc = PBSE_NONE; if (initialize_pbsnode(subnode, strdup(node_id), NULL, NTYPE_CLUSTER, FALSE) != PBSE_NONE) { free(subnode); log_err(ENOMEM, __func__, ""); return(NULL); } // all nodes have at least 1 core add_execution_slot(subnode); // we need to increment this count for accuracy svr_clnodes++; /* do we need to do something else here? */ subnode->nd_addrs = parent->nd_addrs; rc = mgr_set_node_attr(subnode, node_attr_def, ND_ATR_LAST, plist, ATR_DFLAG_MGRD | ATR_DFLAG_MGWR, &bad, (void *)subnode, ATR_ACTION_ALTER); if (rc != PBSE_NONE) { free(subnode); log_err(rc, __func__, "Couldn't set node attributes"); return(NULL); } subnode->nd_ntype = NTYPE_CLUSTER; subnode->parent = parent; /* add any properties to the subnodes */ copy_properties(subnode, parent); lock_node(subnode, __func__, NULL, LOGLEVEL); insert_node(&(parent->alps_subnodes), subnode); return(subnode); } /* END create_alps_subnode() */
/* * record_reservation() * * @pre-cond: pnode and rsv_id must be valid pointers * @post-cond: the reservation will be recorded in pbs_server's tracking mechanism * and on the job which has the node reserved, or -1 is returned and the reservation * is not recorded. * @param - pnode the node which is reporting the reservation * @param - rsv_id the id of the reservation being reported * @return - PBSE_NONE if the reservation was successfully recorded, -1 otherwise */ int record_reservation( struct pbsnode *pnode, const char *rsv_id) { job *pjob; bool found_job = false; char jobid[PBS_MAXSVRJOBID + 1]; for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++) { /* cray only allows one job per node, so any valid job will be the job that is * reserving this node. */ job_usage_info *jui = pnode->nd_job_usages[i]; strcpy(jobid, jui->jobid); unlock_node(pnode, __func__, NULL, LOGLEVEL); if ((pjob = svr_find_job(jobid, TRUE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); pjob->ji_wattr[JOB_ATR_reservation_id].at_val.at_str = strdup(rsv_id); pjob->ji_wattr[JOB_ATR_reservation_id].at_flags = ATR_VFLAG_SET; track_alps_reservation(pjob); found_job = true; job_mutex.unlock(); lock_node(pnode, __func__, NULL, LOGLEVEL); break; } else lock_node(pnode, __func__, NULL, LOGLEVEL); } if (found_job == false) return(-1); return(PBSE_NONE); } /* END record_reservation() */
struct pbsnode *get_numa_from_str( const char *str, /* I */ struct pbsnode *np) /* I */ { const char *numa_id; struct pbsnode *numa; unsigned long numa_index; char log_buf[LOCAL_LOG_BUF_SIZE]; if (np->node_boards == NULL) { /* ERROR */ snprintf(log_buf,sizeof(log_buf), "Node %s isn't declared to be NUMA, but mom is reporting\n", np->nd_name); log_err(-1, __func__, log_buf); unlock_node(np, __func__, "np numa update", LOGLEVEL); return(NULL); } numa_id = str + strlen(NUMA_KEYWORD); numa_index = atoi(numa_id); numa = AVL_find(numa_index, np->nd_mom_port, np->node_boards); if (numa == NULL) { /* ERROR */ snprintf(log_buf,sizeof(log_buf), "Could not find NUMA index %lu for node %s\n", numa_index, np->nd_name); log_err(-1, __func__, log_buf); unlock_node(np, __func__, "np numa update", LOGLEVEL); return(NULL); } /* SUCCESS */ unlock_node(np, __func__, "np numa update", LOGLEVEL); lock_node(numa, __func__, "numa numa update", LOGLEVEL); numa->nd_lastupdate = time(NULL); return(numa); } /* END get_numa_from_str() */
struct pbsnode *get_next_login_node( struct prop *needed) { struct pbsnode *pnode = NULL; login_node *ln; int node_fits = TRUE; pthread_mutex_lock(logins.ln_mutex); ln = (login_node *)logins.ra->slots[logins.next_node].item; if (ln != NULL) { pnode = ln->pnode; lock_node(pnode, __func__, NULL, LOGLEVEL); if (needed != NULL) { if (hasprop(pnode, needed) == FALSE) { node_fits = FALSE; } } /* must have at least one execution slot available */ if ((pnode->nd_nsn - pnode->nd_np_to_be_used < 1) || ((pnode->nd_state & INUSE_DOWN) != 0) || ((pnode->nd_state & INUSE_OFFLINE) != 0)) { node_fits = FALSE; } if (node_fits == FALSE) { unlock_node(pnode, __func__, NULL, LOGLEVEL); pnode = find_fitting_node(needed); } else { ln->times_used++; update_next_node_index(ln->times_used); } } pthread_mutex_unlock(logins.ln_mutex); return(pnode); } /* END get_next_login_node() */
struct pbsnode *get_next_login_node( struct prop *needed) { struct pbsnode *pnode = NULL; int node_fits = TRUE; pthread_mutex_lock(logins.ln_mutex); login_node &ln = logins.nodes[logins.next_node]; pnode = ln.pnode; lock_node(pnode, __func__, NULL, LOGLEVEL); if (needed != NULL) { if (hasprop(pnode, needed) == FALSE) { node_fits = FALSE; } } /* must have at least one execution slot available */ if ((pnode->nd_slots.get_total_execution_slots() - pnode->nd_np_to_be_used < 1) || ((pnode->nd_state & INUSE_NOT_READY) != 0) || ((pnode->nd_state & INUSE_OFFLINE) != 0) || (pnode->nd_power_state != POWER_STATE_RUNNING)) { node_fits = FALSE; } if (node_fits == FALSE) { unlock_node(pnode, __func__, NULL, LOGLEVEL); pnode = find_fitting_node(needed); } else { ln.times_used++; update_next_node_index(ln.times_used); } pthread_mutex_unlock(logins.ln_mutex); return(pnode); } /* END get_next_login_node() */
struct pbsnode *check_node( login_node *ln, struct prop *needed) { struct pbsnode *pnode = ln->pnode; lock_node(pnode, __func__, NULL, LOGLEVEL); if ((hasprop(pnode, needed) == TRUE) && (pnode->nd_nsn - pnode->nd_np_to_be_used >= 1) && ((pnode->nd_state & INUSE_DOWN) == 0) && ((pnode->nd_state & INUSE_OFFLINE) == 0)) return(pnode); else { unlock_node(pnode, __func__, NULL, LOGLEVEL); return(NULL); } } /* END check_node() */
/* instead of getting the status on a node with numa nodes, report * the status of all the numa nodes * * @param pnode - the node to report on * @param preq - the batch request * @param pstathd - the list to add this response to * * @return - 0 on SUCCESS, error code otherwise */ int get_numa_statuses( struct pbsnode *pnode, /* ptr to node receiving status query */ struct batch_request *preq, int *bad, /* O */ tlist_head *pstathd) /* head of list to append status to */ { int i; int rc = 0; struct pbsnode *pn; if (pnode->num_node_boards == 0) { /* no numa nodes, just return the status for this node */ rc = status_node(pnode, preq, bad, pstathd); return(rc); } for (i = 0; i < pnode->num_node_boards; i++) { pn = AVL_find(i,pnode->nd_mom_port,pnode->node_boards); if (pn == NULL) continue; lock_node(pn, __func__, NULL, LOGLEVEL); rc = status_node(pn, preq, bad, pstathd); unlock_node(pn, __func__, NULL, LOGLEVEL); if (rc != PBSE_NONE) { return(rc); } } return(rc); } /* END get_numa_statuses() */
/* * check_node() * * @return a pointer to the node if it is valid to be used * @param ln - a pointer to the login node struct containing the * node that should be checked * @pre-cond - ln must be a pointer to a valid login node struct * @param needed - an optional pointer to the required properties for * the login node to have. */ struct pbsnode *check_node( login_node *ln, struct prop *needed) { struct pbsnode *pnode = ln->pnode; lock_node(pnode, __func__, NULL, LOGLEVEL); if ((hasprop(pnode, needed) == TRUE) && (pnode->nd_slots.get_number_free() - pnode->nd_np_to_be_used >= 1) && ((pnode->nd_state & INUSE_NOT_READY) == 0) && ((pnode->nd_state & INUSE_OFFLINE) == 0) && (pnode->nd_power_state == POWER_STATE_RUNNING)) return(pnode); else { unlock_node(pnode, __func__, NULL, LOGLEVEL); return(NULL); } } /* END check_node() */
struct pbsnode *find_alpsnode_by_name( struct pbsnode *parent, char *node_id) { struct pbsnode *node = NULL; int index; pthread_mutex_lock(parent->alps_subnodes.allnodes_mutex); index = get_value_hash(parent->alps_subnodes.ht, node_id); if (index >= 0) node = (struct pbsnode *)parent->alps_subnodes.ra->slots[index].item; pthread_mutex_unlock(parent->alps_subnodes.allnodes_mutex); if (node != NULL) lock_node(node, __func__, NULL, 0); return(node); } /* END find_alpsnode_by_name() */
/************************************************* * svr_is_request * * Return: svr_is_request always returns a non-zero value * and it must call close_conn to close the connection * before returning. PBSE_SOCKET_CLOSE is the code * for a successful return. But which ever retun * code is iused it must terminate the while loop * in start_process_pbs_server_port. *************************************************/ int svr_is_request( struct tcp_chan *chan, int version) { int command = 0; int ret = DIS_SUCCESS; int i; int err; char nodename[PBS_MAXHOSTNAME]; int perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR; unsigned long ipaddr; unsigned short mom_port; unsigned short rm_port; unsigned long tmpaddr; struct sockaddr_in *addr = NULL; struct sockaddr s_addr; unsigned int len = sizeof(s_addr); struct pbsnode *node = NULL; char *node_name = NULL; char log_buf[LOCAL_LOG_BUF_SIZE+1]; command = disrsi(chan, &ret); if (ret != DIS_SUCCESS) goto err; if (LOGLEVEL >= 4) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message received from sock %d (version %d)", chan->sock, version); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } if (getpeername(chan->sock, &s_addr, &len) != 0) { close_conn(chan->sock, FALSE); log_err(errno,__func__, (char *)"Cannot get socket name using getpeername\n"); return(PBSE_SOCKET_CLOSE); } addr = (struct sockaddr_in *)&s_addr; if (version != IS_PROTOCOL_VER) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "protocol version %d unknown from %s", version, netaddr(addr)); log_err(-1, __func__, log_buf); close_conn(chan->sock, FALSE); return PBSE_SOCKET_DATA; } /* check that machine is known */ mom_port = disrsi(chan, &ret); rm_port = disrsi(chan, &ret); if (LOGLEVEL >= 3) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message received from addr %s: mom_port %d - rm_port %d", netaddr(addr), mom_port, rm_port); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } ipaddr = ntohl(addr->sin_addr.s_addr); if ((node = AVL_find(ipaddr, mom_port, ipaddrs)) != NULL) { lock_node(node, __func__, "AVL_find", LOGLEVEL); } /* END if AVL_find != NULL) */ else if (allow_any_mom) { char *name = get_cached_nameinfo(addr); if (name != NULL) snprintf(nodename, sizeof(nodename), "%s", name); else if (getnameinfo(&s_addr, len, nodename, sizeof(nodename)-1, NULL, 0, 0) != 0) { tmpaddr = ntohl(addr->sin_addr.s_addr); sprintf(nodename, "0x%lX", tmpaddr); } else insert_addr_name_info(nodename, NULL, addr); err = create_partial_pbs_node(nodename, ipaddr, perm); if (err == PBSE_NONE) { node = AVL_find(ipaddr, 0, ipaddrs); lock_node(node, __func__, "no error", LOGLEVEL); } } if (node == NULL) { /* node not listed in trusted ipaddrs list */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "bad attempt to connect from %s (address not trusted - check entry in server_priv/nodes)", netaddr(addr)); if (LOGLEVEL >= 2) { log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } else { log_err(-1, __func__, log_buf); } close_conn(chan->sock, FALSE); return PBSE_SOCKET_CLOSE; } if (LOGLEVEL >= 3) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message %s (%d) received from mom on host %s (%s) (sock %d)", PBSServerCmds2[command], command, node->nd_name, netaddr(addr), chan->sock); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } switch (command) { case IS_NULL: /* a ping from server */ DBPRT(("%s: IS_NULL\n", __func__)) break; case IS_UPDATE: DBPRT(("%s: IS_UPDATE\n", __func__)) i = disrui(chan, &ret); if (ret != DIS_SUCCESS) { if (LOGLEVEL >= 1) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_UPDATE error %d on node %s\n", ret, node->nd_name); log_err(ret, __func__, log_buf); } goto err; } DBPRT(("%s: IS_UPDATE %s 0x%x\n", __func__, node->nd_name, i)) update_node_state(node, i); if ((node->nd_state & INUSE_DOWN) != 0) { node->nd_mom_reported_down = TRUE; } break; case IS_STATUS: if (LOGLEVEL >= 2) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_STATUS received from %s", node->nd_name); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, log_buf); } if ((node_name = strdup(node->nd_name)) == NULL) goto err; unlock_node(node, __func__, "before is_stat_get", LOGLEVEL); ret = is_stat_get(node_name, chan); node = find_nodebyname(node_name); if (ret == SEND_HELLO) { struct hello_info *hi = (struct hello_info *)calloc(1, sizeof(struct hello_info)); write_tcp_reply(chan, IS_PROTOCOL, IS_PROTOCOL_VER, IS_STATUS, DIS_SUCCESS); hi->name = strdup(node_name); enqueue_threadpool_request(send_hierarchy_threadtask, hi); ret = DIS_SUCCESS; } else write_tcp_reply(chan,IS_PROTOCOL,IS_PROTOCOL_VER,IS_STATUS,ret); if(node != NULL) node->nd_stream = -1; if (ret != DIS_SUCCESS) { if (LOGLEVEL >= 1) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_STATUS error %d on node %s", ret, node_name); log_err(ret, __func__, log_buf); } free(node_name); goto err; } free(node_name); break; default: snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unknown command %d sent from %s", command, node->nd_name); log_err(-1, __func__, log_buf); goto err; break; } /* END switch (command) */ /* must be closed because mom opens and closes this connection each time */ close_conn(chan->sock, FALSE); if(node != NULL) unlock_node(node, __func__, "close", LOGLEVEL); return PBSE_SOCKET_CLOSE; err: /* a DIS write error has occurred */ if (node != NULL) { if (LOGLEVEL >= 1) { DBPRT(("%s: error processing node %s\n", __func__, node->nd_name)) } sprintf(log_buf, "%s from %s(%s)", dis_emsg[ret], node->nd_name, netaddr(addr)); unlock_node(node, __func__, "err", LOGLEVEL); } else {