/************************************************* * svr_is_request * * Return: svr_is_request always returns a non-zero value * and it must call close_conn to close the connection * before returning. PBSE_SOCKET_CLOSE is the code * for a successful return. But which ever retun * code is iused it must terminate the while loop * in start_process_pbs_server_port. *************************************************/ void *svr_is_request( void *v) { int command = 0; int ret = DIS_SUCCESS; int i; int err; char nodename[PBS_MAXHOSTNAME]; int perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR; unsigned long ipaddr; unsigned short mom_port; unsigned short rm_port; unsigned long tmpaddr; struct sockaddr_in addr; struct pbsnode *node = NULL; char log_buf[LOCAL_LOG_BUF_SIZE+1]; char msg_buf[80]; char tmp[80]; int version; struct tcp_chan *chan; long *args; is_request_info *isr = (is_request_info *)v; if (isr == NULL) return(NULL); chan = isr->chan; args = isr->args; version = disrsi(chan, &ret); if (ret != DIS_SUCCESS) { log_err(-1, __func__, "Cannot read version - skipping this request.\n"); close_conn(chan->sock, FALSE); DIS_tcp_cleanup(chan); return(NULL); } command = disrsi(chan, &ret); if (ret != DIS_SUCCESS) { snprintf(log_buf, sizeof(log_buf), "could not read command: %d", ret); log_err(-1, __func__, log_buf); close_conn(chan->sock, FALSE); DIS_tcp_cleanup(chan); return(NULL); } if (LOGLEVEL >= 4) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message received from sock %d (version %d)", chan->sock, version); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, log_buf); } /* Just a note to let us know we only do IPv4 for now */ addr.sin_family = AF_INET; memcpy(&addr.sin_addr, (void *)&args[1], sizeof(struct in_addr)); addr.sin_port = args[2]; if (version != IS_PROTOCOL_VER) { netaddr_long(args[1], tmp); sprintf(msg_buf, "%s:%ld", tmp, args[2]); snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "protocol version %d unknown from %s", version, msg_buf); log_err(-1, __func__, log_buf); close_conn(chan->sock, FALSE); DIS_tcp_cleanup(chan); return(NULL); } /* check that machine is known */ mom_port = disrsi(chan, &ret); rm_port = disrsi(chan, &ret); if (LOGLEVEL >= 3) { netaddr_long(args[1], tmp); sprintf(msg_buf, "%s:%ld", tmp, args[2]); snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message received from addr %s: mom_port %d - rm_port %d", msg_buf, mom_port, rm_port); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } ipaddr = args[1]; if ((node = AVL_find(ipaddr, mom_port, ipaddrs)) != NULL) { node->lock_node(__func__, "AVL_find", LOGLEVEL); } /* END if AVL_find != NULL) */ else if (allow_any_mom) { const char *name = get_cached_nameinfo(&addr); if (name != NULL) snprintf(nodename, sizeof(nodename), "%s", name); else if (getnameinfo((struct sockaddr *)&addr, sizeof(addr), nodename, sizeof(nodename)-1, NULL, 0, 0) != 0) { tmpaddr = ntohl(addr.sin_addr.s_addr); sprintf(nodename, "0x%lX", tmpaddr); } else insert_addr_name_info(NULL, nodename); err = create_partial_pbs_node(nodename, ipaddr, perm); if (err == PBSE_NONE) { node = AVL_find(ipaddr, 0, ipaddrs); node->lock_node(__func__, "no error", LOGLEVEL); } } if (node == NULL) { /* node not listed in trusted ipaddrs list */ netaddr_long(args[1], tmp); sprintf(msg_buf, "%s:%ld", tmp, args[2]); snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "bad attempt to connect from %s (address not trusted - check entry in server_priv/nodes)", msg_buf); if (LOGLEVEL >= 2) { log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } else { log_err(-1, __func__, log_buf); } close_conn(chan->sock, FALSE); DIS_tcp_cleanup(chan); return(NULL); } if (LOGLEVEL >= 3) { netaddr_long(args[1], tmp); sprintf(msg_buf, "%s:%ld", tmp, args[2]); snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message %s (%d) received from mom on host %s (%s) (sock %d)", PBSServerCmds2[command], command, node->get_name(), msg_buf, chan->sock); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } mutex_mgr node_mutex(&node->nd_mutex, true); switch (command) { case IS_NULL: /* a ping from server */ DBPRT(("%s: IS_NULL\n", __func__)) break; case IS_UPDATE: DBPRT(("%s: IS_UPDATE\n", __func__)) i = disrui(chan, &ret); if (ret != DIS_SUCCESS) { if (LOGLEVEL >= 1) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_UPDATE error %d on node %s\n", ret, node->get_name()); log_err(ret, __func__, log_buf); } goto err; } DBPRT(("%s: IS_UPDATE %s 0x%x\n", __func__, node->get_name(), i)) update_node_state(node, i); if ((node->nd_state & INUSE_DOWN) != 0) { node->nd_mom_reported_down = TRUE; } break; case IS_STATUS: { std::string node_name = node->get_name(); if (LOGLEVEL >= 2) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_STATUS received from %s", node->get_name()); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, log_buf); } node_mutex.unlock(); ret = is_stat_get(node_name.c_str(), chan); node = find_nodebyname(node_name.c_str()); if (node != NULL) { node->nd_stream = -1; node_mutex.mark_as_locked(); if (ret == SEND_HELLO) { //struct hello_info *hi = new hello_info(node->nd_id); write_tcp_reply(chan, IS_PROTOCOL, IS_PROTOCOL_VER, IS_STATUS, DIS_SUCCESS); hierarchy_handler.sendHierarchyToANode(node); ret = DIS_SUCCESS; } else write_tcp_reply(chan,IS_PROTOCOL,IS_PROTOCOL_VER,IS_STATUS,ret); } if (ret != DIS_SUCCESS) { if (LOGLEVEL >= 1) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_STATUS error %d on node %s", ret, node_name.c_str()); log_err(ret, __func__, log_buf); } goto err; } break; } default: snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unknown command %d sent from %s", command, node->get_name()); log_err(-1, __func__, log_buf); goto err; break; } /* END switch (command) */ /* must be closed because mom opens and closes this connection each time */ close_conn(chan->sock, FALSE); DIS_tcp_cleanup(chan); return(NULL); err: /* a DIS write error has occurred */ if (node != NULL) { if (LOGLEVEL >= 1) { DBPRT(("%s: error processing node %s\n", __func__, node->get_name())) } netaddr_long(args[1], tmp); sprintf(msg_buf, "%s:%ld", tmp, args[2]); sprintf(log_buf, "%s from %s(%s)", dis_emsg[ret], node->get_name(), msg_buf); } else {
/************************************************* * svr_is_request * * Return: svr_is_request always returns a non-zero value * and it must call close_conn to close the connection * before returning. PBSE_SOCKET_CLOSE is the code * for a successful return. But which ever retun * code is iused it must terminate the while loop * in start_process_pbs_server_port. *************************************************/ int svr_is_request( struct tcp_chan *chan, int version) { int command = 0; int ret = DIS_SUCCESS; int i; int err; char nodename[PBS_MAXHOSTNAME]; int perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR; unsigned long ipaddr; unsigned short mom_port; unsigned short rm_port; unsigned long tmpaddr; struct sockaddr_in *addr = NULL; struct sockaddr s_addr; unsigned int len = sizeof(s_addr); struct pbsnode *node = NULL; char *node_name = NULL; char log_buf[LOCAL_LOG_BUF_SIZE+1]; command = disrsi(chan, &ret); if (ret != DIS_SUCCESS) goto err; if (LOGLEVEL >= 4) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message received from sock %d (version %d)", chan->sock, version); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } if (getpeername(chan->sock, &s_addr, &len) != 0) { close_conn(chan->sock, FALSE); log_err(errno,__func__, (char *)"Cannot get socket name using getpeername\n"); return(PBSE_SOCKET_CLOSE); } addr = (struct sockaddr_in *)&s_addr; if (version != IS_PROTOCOL_VER) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "protocol version %d unknown from %s", version, netaddr(addr)); log_err(-1, __func__, log_buf); close_conn(chan->sock, FALSE); return PBSE_SOCKET_DATA; } /* check that machine is known */ mom_port = disrsi(chan, &ret); rm_port = disrsi(chan, &ret); if (LOGLEVEL >= 3) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message received from addr %s: mom_port %d - rm_port %d", netaddr(addr), mom_port, rm_port); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } ipaddr = ntohl(addr->sin_addr.s_addr); if ((node = AVL_find(ipaddr, mom_port, ipaddrs)) != NULL) { lock_node(node, __func__, "AVL_find", LOGLEVEL); } /* END if AVL_find != NULL) */ else if (allow_any_mom) { char *name = get_cached_nameinfo(addr); if (name != NULL) snprintf(nodename, sizeof(nodename), "%s", name); else if (getnameinfo(&s_addr, len, nodename, sizeof(nodename)-1, NULL, 0, 0) != 0) { tmpaddr = ntohl(addr->sin_addr.s_addr); sprintf(nodename, "0x%lX", tmpaddr); } else insert_addr_name_info(nodename, NULL, addr); err = create_partial_pbs_node(nodename, ipaddr, perm); if (err == PBSE_NONE) { node = AVL_find(ipaddr, 0, ipaddrs); lock_node(node, __func__, "no error", LOGLEVEL); } } if (node == NULL) { /* node not listed in trusted ipaddrs list */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "bad attempt to connect from %s (address not trusted - check entry in server_priv/nodes)", netaddr(addr)); if (LOGLEVEL >= 2) { log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } else { log_err(-1, __func__, log_buf); } close_conn(chan->sock, FALSE); return PBSE_SOCKET_CLOSE; } if (LOGLEVEL >= 3) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "message %s (%d) received from mom on host %s (%s) (sock %d)", PBSServerCmds2[command], command, node->nd_name, netaddr(addr), chan->sock); log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf); } switch (command) { case IS_NULL: /* a ping from server */ DBPRT(("%s: IS_NULL\n", __func__)) break; case IS_UPDATE: DBPRT(("%s: IS_UPDATE\n", __func__)) i = disrui(chan, &ret); if (ret != DIS_SUCCESS) { if (LOGLEVEL >= 1) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_UPDATE error %d on node %s\n", ret, node->nd_name); log_err(ret, __func__, log_buf); } goto err; } DBPRT(("%s: IS_UPDATE %s 0x%x\n", __func__, node->nd_name, i)) update_node_state(node, i); if ((node->nd_state & INUSE_DOWN) != 0) { node->nd_mom_reported_down = TRUE; } break; case IS_STATUS: if (LOGLEVEL >= 2) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_STATUS received from %s", node->nd_name); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, log_buf); } if ((node_name = strdup(node->nd_name)) == NULL) goto err; unlock_node(node, __func__, "before is_stat_get", LOGLEVEL); ret = is_stat_get(node_name, chan); node = find_nodebyname(node_name); if (ret == SEND_HELLO) { struct hello_info *hi = (struct hello_info *)calloc(1, sizeof(struct hello_info)); write_tcp_reply(chan, IS_PROTOCOL, IS_PROTOCOL_VER, IS_STATUS, DIS_SUCCESS); hi->name = strdup(node_name); enqueue_threadpool_request(send_hierarchy_threadtask, hi); ret = DIS_SUCCESS; } else write_tcp_reply(chan,IS_PROTOCOL,IS_PROTOCOL_VER,IS_STATUS,ret); if(node != NULL) node->nd_stream = -1; if (ret != DIS_SUCCESS) { if (LOGLEVEL >= 1) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "IS_STATUS error %d on node %s", ret, node_name); log_err(ret, __func__, log_buf); } free(node_name); goto err; } free(node_name); break; default: snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "unknown command %d sent from %s", command, node->nd_name); log_err(-1, __func__, log_buf); goto err; break; } /* END switch (command) */ /* must be closed because mom opens and closes this connection each time */ close_conn(chan->sock, FALSE); if(node != NULL) unlock_node(node, __func__, "close", LOGLEVEL); return PBSE_SOCKET_CLOSE; err: /* a DIS write error has occurred */ if (node != NULL) { if (LOGLEVEL >= 1) { DBPRT(("%s: error processing node %s\n", __func__, node->nd_name)) } sprintf(log_buf, "%s from %s(%s)", dis_emsg[ret], node->nd_name, netaddr(addr)); unlock_node(node, __func__, "err", LOGLEVEL); } else {