/** * @brief * svr_movejob - Test if the destination is local or not and call a routine to * do the appropriate move. * * param[in,out] jobp - pointer to job to move * param[in] destination - destination to be moved * param[in] req - client request from a qmove client, null if a route * * @return int * @retval 0 : success * @retval -1 : permenent failure or rejection, * @retval 1 : failed but try again * @reval 2 : deferred (ie move in progress), check later */ int svr_movejob(job *jobp, char *destination, struct batch_request *req) { pbs_net_t destaddr; unsigned int port = pbs_server_port_dis; char *toserver; if (strlen(destination) >= (size_t)PBS_MAXROUTEDEST) { sprintf(log_buffer, "name %s over maximum length of %d", destination, PBS_MAXROUTEDEST); log_err(-1, "svr_movejob", log_buffer); pbs_errno = PBSE_QUENBIG; return -1; } strncpy(jobp->ji_qs.ji_destin, destination, PBS_MAXROUTEDEST); jobp->ji_qs.ji_un_type = JOB_UNION_TYPE_ROUTE; if ((toserver = strchr(destination, '@')) != NULL) { /* check to see if the part after '@' is this server */ destaddr = get_hostaddr(parse_servername(++toserver, &port)); if ((destaddr != pbs_server_addr) || (port != pbs_server_port_dis)) { return (net_move(jobp, req)); /* not a local dest */ } } /* if get to here, it is a local destination */ return (local_move(jobp, req)); }
int conn_qsub( char *hostname, /* I */ long port, /* I */ char *EMsg) /* O (optional,minsize=1024) */ { pbs_net_t hostaddr; int s; int flags; if (EMsg != NULL) EMsg[0] = '\0'; if ((hostaddr = get_hostaddr(hostname)) == (pbs_net_t)0) { #if !defined(H_ERRNO_DECLARED) && !defined(_AIX) extern int h_errno; #endif /* FAILURE */ if (EMsg != NULL) { snprintf(EMsg, 1024, "cannot get address for host '%s', h_errno=%d", hostname, h_errno); } return(-1); } s = client_to_svr(hostaddr, (unsigned int)port, 0, EMsg); /* NOTE: client_to_svr() can return 0 for SUCCESS */ /* assume SUCCESS requires s > 0 (USC) was 'if (s >= 0)' */ /* above comment not enabled */ if (s < 0) { /* FAILURE */ return(-1); } /* SUCCESS */ /* this socket should be blocking */ flags = fcntl(s, F_GETFL); flags &= ~O_NONBLOCK; fcntl(s, F_SETFL, flags); return(s); } /* END conn_qsub() */
static u08 __ip_receive_packet( ip_header * p, u16 len ) // did we eat it? { // check if it's actually for us! if (p->dest_addr != get_bcastaddr() && p->dest_addr != get_hostaddr() && p->dest_addr != 0xfffffffful) return 0; return ip_receive_packet( p, len ) && p->dest_addr != get_bcastaddr(); }
int issue_to_svr(char *servern, struct batch_request *preq, void (*replyfunc)(struct work_task *)) { int do_retry = 0; int handle; pbs_net_t svraddr; char *svrname; unsigned int port = pbs_server_port_dis; struct work_task *pwt; extern int pbs_failover_active; extern char primary_host[]; extern char server_host[]; (void)strcpy(preq->rq_host, servern); preq->rq_fromsvr = 1; preq->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; svrname = parse_servername(servern, &port); if ((pbs_failover_active != 0) && (svrname != NULL)) { /* we are the active secondary server in a failover config */ /* if the message is going to the primary,then redirect to me */ size_t len; len = strlen(svrname); if (strncasecmp(svrname, primary_host, len) == 0) { if ((primary_host[(int)len] == '\0') || (primary_host[(int)len] == '.')) svrname = server_host; } } svraddr = get_hostaddr(svrname); if (svraddr == (pbs_net_t)0) { if (pbs_errno == PBS_NET_RC_RETRY) /* Non fatal error - retry */ do_retry = 1; } else { handle = svr_connect(svraddr, port, process_Dreply, ToServerDIS, PROT_TCP); if (handle >= 0) return (issue_Drequest(handle, preq, replyfunc, 0, 0)); else if (handle == PBS_NET_RC_RETRY) do_retry = 1; } /* if reached here, it didn`t go, do we retry? */ if (do_retry) { pwt = set_task(WORK_Timed, (long)(time_now+(2*PBS_NET_RETRY_TIME)), reissue_to_svr, (void *)preq); pwt->wt_parm2 = (void *)replyfunc; return (0); } else return (-1); }
void __ip_make_header( ip_header * ip, u08 proto, u16 ident, u16 len, u32 dest ) { ip->version = 0x45; ip->tos = 0; ip->length = __htons( len ); ip->fraginfo = 0; ip->ident = ident; ip->dest_addr = dest; ip->src_addr = get_hostaddr(); ip->ttl = 128; ip->proto = proto; ip->checksum = ~__htons( __checksum( ip, sizeof(ip_header) ) ); }
/** * @brief * action routine for the sched's "sched_host" attribute * * @param[in] pattr - attribute being set * @param[in] pobj - Object on which attribute is being set * @param[in] actmode - the mode of setting, recovery or just alter * * @return error code * @retval PBSE_NONE - Success * @retval !PBSE_NONE - Failure * */ int action_sched_host(attribute *pattr, void *pobj, int actmode) { pbs_sched *psched; psched = (pbs_sched *) pobj; if (actmode == ATR_ACTION_NEW || actmode == ATR_ACTION_ALTER || actmode == ATR_ACTION_RECOV) { if ( dflt_scheduler && psched != dflt_scheduler) psched->pbs_scheduler_addr = get_hostaddr(pattr->at_val.at_str); if (psched->pbs_scheduler_addr == (pbs_net_t)0) return PBSE_BADATVAL; } return PBSE_NONE; }
int svr_movejob( job *jobp, char *destination, struct batch_request *req) { pbs_net_t destaddr; int local; unsigned int port; char *toserver; if (strlen(destination) >= (size_t)PBS_MAXROUTEDEST) { sprintf(log_buffer, "name %s over maximum length of %d\n", destination, PBS_MAXROUTEDEST); log_err(-1, "svr_movejob", log_buffer); pbs_errno = PBSE_QUENBIG; return(ROUTE_PERM_FAILURE); } strncpy(jobp->ji_qs.ji_destin, destination, PBS_MAXROUTEDEST); jobp->ji_qs.ji_un_type = JOB_UNION_TYPE_ROUTE; local = 1; if ((toserver = strchr(destination, '@')) != NULL) { /* check to see if the part after '@' is this server */ destaddr = get_hostaddr(parse_servername(++toserver, &port)); if (destaddr != pbs_server_addr) { local = 0; } } if (local != 0) { return(local_move(jobp, req)); } return(net_move(jobp, req)); } /* svr_movejob() */
/* set_trqauthd_addr: This function gets the host name and address where trqauthd is running */ int set_trqauthd_addr() { int local_errno; if (gethostname(trq_hostname, PBS_MAXSERVERNAME) == -1) { fprintf(stderr, "failed to get host name: %d\n", errno); return(PBSE_BADHOST); } trq_server_addr = get_hostaddr(&local_errno, trq_hostname); if (trq_server_addr == 0) { fprintf(stderr, "Could not get host address\n"); return(PBSE_BADHOST); } return(PBSE_NONE); }
int net_move(job *jobp, struct batch_request *req) { void *data; char *destination = jobp->ji_qs.ji_destin; pbs_net_t hostaddr; char *hostname; int move_type; unsigned int port = pbs_server_port_dis; void (*post_func)(struct work_task *); char *toserver; /* Determine to whom are we sending the job */ if ((toserver = strchr(destination, '@')) == NULL) { sprintf(log_buffer, "no server specified in %s", destination); log_err(-1, __func__, log_buffer); return (-1); } toserver++; /* point to server name */ hostname = parse_servername(toserver, &port); hostaddr = get_hostaddr(hostname); if (req) { /* note, in this case, req is the orginal Move Request */ move_type = MOVE_TYPE_Move; post_func = post_movejob; data = req; } else { /* note, in this case req is NULL */ move_type = MOVE_TYPE_Route; post_func = post_routejob; data = 0; } (void)svr_setjobstate(jobp, JOB_STATE_TRANSIT, JOB_SUBSTATE_TRNOUT); return (send_job(jobp, hostaddr, port, move_type, post_func, data)); }
int client_to_svr_extend(pbs_net_t hostaddr, unsigned int port, int authport_flags, char *localaddr) { struct sockaddr_in remote; int sock; int local_port; int errn; int rc; #ifdef WIN32 int ret; int non_block = 1; struct linger li; struct sockaddr_in from; struct timeval tv; fd_set writeset; #else struct pollfd fds[1]; pbs_socklen_t len = sizeof(rc); int oflag; #endif /* If local privilege port requested, bind to one */ /* Must be root privileged to do this */ local_port = authport_flags & B_RESERVED; if (local_port) { #ifdef IP_PORTRANGE_LOW int lport = IPPORT_RESERVED - 1; sock = rresvport(&lport); if (sock < 0) { if (errno == EAGAIN) return PBS_NET_RC_RETRY; else return PBS_NET_RC_FATAL; } #else /* IP_PORTRANGE_LOW */ struct sockaddr_in local; unsigned short tryport; static unsigned short start_port = 0; sock = socket(AF_INET, SOCK_STREAM, 0); if (sock < 0) { return PBS_NET_RC_FATAL; } if (start_port == 0) { /* arbitrary start point */ start_port = (getpid() %(IPPORT_RESERVED/2)) + IPPORT_RESERVED/2; } else if (--start_port < IPPORT_RESERVED/2) start_port = IPPORT_RESERVED - 1; tryport = start_port; memset(&local, 0, sizeof(local)); local.sin_family = AF_INET; if (localaddr != NULL) { local.sin_addr.s_addr = inet_addr(localaddr); if (local.sin_addr.s_addr == INADDR_NONE) { perror("inet_addr failed"); return (PBS_NET_RC_FATAL); } } else if (pbs_conf.pbs_public_host_name) { pbs_net_t public_addr; public_addr = get_hostaddr(pbs_conf.pbs_public_host_name); if (public_addr == (pbs_net_t)0) { return (PBS_NET_RC_FATAL); } local.sin_addr.s_addr = htonl(public_addr); } for (;;) { local.sin_port = htons(tryport); if (bind(sock, (struct sockaddr *)&local, sizeof(local)) == 0) break; #ifdef WIN32 errno = WSAGetLastError(); if (errno != EADDRINUSE && errno != EADDRNOTAVAIL && errno != WSAEACCES) { closesocket(sock); #else if (errno != EADDRINUSE && errno != EADDRNOTAVAIL) { close(sock); #endif return PBS_NET_RC_FATAL; } else if (--tryport < (IPPORT_RESERVED/2)) { tryport = IPPORT_RESERVED - 1; } if (tryport == start_port) { #ifdef WIN32 closesocket(sock); #else close(sock); #endif return PBS_NET_RC_RETRY; } } /* ** Ensure last tryport becomes start port on next call. */ start_port = tryport; #endif /* IP_PORTRANGE_LOW */ } else { sock = socket(AF_INET, SOCK_STREAM, 0); if (sock < 0) { return PBS_NET_RC_FATAL; } } remote.sin_addr.s_addr = htonl(hostaddr); remote.sin_port = htons((unsigned short)port); remote.sin_family = AF_INET; #ifdef WIN32 li.l_onoff = 1; li.l_linger = 5; setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&li, sizeof(li)); if (ioctlsocket(sock, FIONBIO, &non_block) == SOCKET_ERROR) { errno = WSAGetLastError(); closesocket(sock); return (PBS_NET_RC_FATAL); } #else oflag = fcntl(sock, F_GETFL); if (fcntl(sock, F_SETFL, (oflag | O_NONBLOCK)) == -1) { close(sock); return (PBS_NET_RC_FATAL); } #endif if (connect(sock, (struct sockaddr *)&remote, sizeof(remote)) < 0) { #ifdef WIN32 errno = WSAGetLastError(); #endif /* * Bacause of threading, pbs_errno is actually a macro * pointing to a variable within a tread context. On certain * platforms, the threading library resulted in errno being * cleared after pbs_errno was set set from it, so save * errno into a local variable first, then test it. */ errn = errno; pbs_errno = errn; switch (errn) { #ifdef WIN32 case WSAEINTR: #else case EINTR: #endif case EADDRINUSE: case ETIMEDOUT: case ECONNREFUSED: #ifdef WIN32 closesocket(sock); #else close(sock); #endif return (PBS_NET_RC_RETRY); #ifdef WIN32 case WSAEWOULDBLOCK: FD_ZERO(&writeset); FD_SET((unsigned int)sock, &writeset); tv.tv_sec = conn_timeout; /* connect timeout */ tv.tv_usec = 0; ret = select(1, NULL, &writeset, NULL, &tv); if (ret == SOCKET_ERROR) { errno = WSAGetLastError(); errn = errno; pbs_errno = errn; closesocket(sock); return PBS_NET_RC_FATAL; } else if (ret == 0) { closesocket(sock); return PBS_NET_RC_RETRY; } break; #else /* UNIX */ case EWOULDBLOCK: case EINPROGRESS: while (1) { fds[0].fd = sock; fds[0].events = POLLOUT; fds[0].revents = 0; rc = poll(fds, (nfds_t)1, conn_timeout * 1000); if (rc == -1) { errn = errno; if ((errn != EAGAIN) && (errn != EINTR)) break; } else break; /* no error */ } if (rc == 1) { /* socket may be connected and ready to write */ rc = 0; if ((getsockopt(sock, SOL_SOCKET, SO_ERROR, &rc, &len) == -1) || (rc != 0)) { close(sock); return PBS_NET_RC_FATAL; } break; } else if (rc == 0) { /* socket not ready - not connected in time */ close(sock); return PBS_NET_RC_RETRY; } else { /* socket not ready - error */ close(sock); return PBS_NET_RC_FATAL; } #endif /* end UNIX */ default: #ifdef WIN32 closesocket(sock); #else close(sock); #endif return (PBS_NET_RC_FATAL); } } /* reset socket to blocking */ #ifdef WIN32 non_block = 0; if (ioctlsocket(sock, FIONBIO, &non_block) == SOCKET_ERROR) { errno = WSAGetLastError(); closesocket(sock); return PBS_NET_RC_FATAL; } #else /* UNIX */ if (fcntl(sock, F_SETFL, oflag) == -1) { close(sock); return (PBS_NET_RC_FATAL); } #endif if (engage_authentication(sock, remote.sin_addr, port, authport_flags) != -1) return sock; /*authentication unsuccessful*/ #ifdef WIN32 closesocket(sock); #else close(sock); #endif return (PBS_NET_RC_FATAL); }
int authenticate_user( struct batch_request *preq, /* I */ struct credential *pcred, char **autherr) /* O */ { int rc; char uath[PBS_MAXUSER + PBS_MAXHOSTNAME + 1]; time_t time_now = time(NULL); char error_msg[1024]; bool acl_enabled = false; #ifdef MUNGE_AUTH if (strncmp(preq->rq_user, pcred->username, PBS_MAXUSER)) { /* extra check for munge */ struct array_strings *my_acl = NULL; char uh[PBS_MAXUSER + PBS_MAXHOSTNAME + 2]; sprintf(uh, "%s@%s", preq->rq_user, pcred->hostname); get_svr_attr_arst(SRV_ATR_authusers, &my_acl); if ((acl_check_my_array_string(my_acl, uh, ACL_User_Host)) == 0) { *autherr = strdup("User not in authorized user list."); sprintf(error_msg, "%s Requested user %s: requested from host %s", *autherr, preq->rq_user, preq->rq_host); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, error_msg); return(PBSE_BADCRED); } } #else if (strncmp(preq->rq_user, pcred->username, PBS_MAXUSER)) { *autherr = strdup("Users do not match"); sprintf(error_msg, "%s: Requested user %s: credential user %s: requested from host %s", *autherr, preq->rq_user, pcred->username, preq->rq_host); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, error_msg); return(PBSE_BADCRED); } #endif if (strncmp(preq->rq_host, pcred->hostname, PBS_MAXHOSTNAME)) { struct sockaddr_in *sai1; struct sockaddr_in *sai2; struct addrinfo *addr_info1 = NULL; struct addrinfo *addr_info2 = NULL; sai1 = get_cached_addrinfo(preq->rq_host); sai2 = get_cached_addrinfo(pcred->hostname); if ((sai1 == NULL) && (pbs_getaddrinfo(preq->rq_host, NULL, &addr_info1) == PBSE_NONE)) { sai1 = (struct sockaddr_in *)addr_info1->ai_addr; } if ((sai2 == NULL) && (pbs_getaddrinfo(pcred->hostname, NULL, &addr_info2) == PBSE_NONE)) { sai2 = (struct sockaddr_in *)addr_info2->ai_addr; } if ((sai1 == NULL) || (sai2 == NULL) || (memcmp(sai1, sai2, sizeof(struct sockaddr_in)))) { *autherr = strdup("Hosts do not match"); sprintf(error_msg, "%s: Requested host %s: credential host: %s", *autherr, preq->rq_host, pcred->hostname); log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, error_msg); return(PBSE_BADCRED); } } if (pcred->timestamp) { long lifetime = 0; if (get_svr_attr_l(SRV_ATR_CredentialLifetime, &lifetime) == PBSE_NONE) { /* use configured value if set */ } else { /* if not use the default */ lifetime = CREDENTIAL_LIFETIME; } /* negative values mean that credentials have an infinite lifetime */ if (lifetime > -1) { if ((pcred->timestamp - CREDENTIAL_TIME_DELTA > time_now) || (pcred->timestamp + lifetime < time_now)) { return(PBSE_EXPIRED); } } } /* If Server's Acl_User enabled, check if user in list */ get_svr_attr_b(SRV_ATR_AclUserEnabled, &acl_enabled); if (acl_enabled) { struct array_strings *acl_users = NULL; snprintf(uath, sizeof(uath), "%s@%s", preq->rq_user, preq->rq_host); get_svr_attr_arst(SRV_ATR_AclUsers, &acl_users); if (acl_check_my_array_string(acl_users, uath, ACL_User) == 0) { int my_err; pbs_net_t connect_addr = get_hostaddr(&my_err, preq->rq_host); pbs_net_t server_addr = get_hostaddr(&my_err, server_host); #ifdef __CYGWIN__ if ((!IamAdminByName(preq->rq_user)) || (connect_addr != server_addr)) { return(PBSE_PERM); } #else /* __CYGWIN__ */ #ifdef PBS_ROOT_ALWAYS_ADMIN if ((strcmp(preq->rq_user, PBS_DEFAULT_ADMIN) != 0) || (connect_addr != server_addr)) { return(PBSE_PERM); } #else /* PBS_ROOT_ALWAYS_ADMIN */ return(PBSE_PERM); #endif /* PBS_ROOT_ALWAYS_ADMIN */ #endif /* __CYGWIN__ */ } } /* A site stub for additional checking */ rc = site_allow_u(preq->rq_user, preq->rq_host); return(rc); } /* END authenticate_user() */
int svr_get_privilege( char *user, /* I */ char *host) /* I */ { int is_root = 0; int priv = (ATR_DFLAG_USRD | ATR_DFLAG_USWR); int num_host_chars; char uh[PBS_MAXUSER + PBS_MAXHOSTNAME + 2]; char host_no_port[PBS_MAXHOSTNAME+1]; char *colon_loc = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; char *other_host; int other_priv = 0; int my_err; pbs_net_t server_addr; pbs_net_t connect_addr; #ifndef __CYGWIN__ pbs_net_t local_server_addr; #endif if (!user) { sprintf(log_buf, "Invalid user: %s", "null"); log_record(PBSEVENT_SECURITY, PBS_EVENTCLASS_SERVER, __func__, log_buf); return(0); } /* user name cannot be longer than PBS_MAXUSER*/ if (strlen(user) > PBS_MAXUSER) { sprintf(log_buf, "Invalid user: %s", user); log_record(PBSEVENT_SECURITY, PBS_EVENTCLASS_SERVER, __func__, log_buf); return(0); } if (!host) return(0); colon_loc = strchr(host, ':'); /* if the request host has port information in it, we want to strip it out */ if (colon_loc == NULL) { /* no colon found */ num_host_chars = strlen(host); sprintf(host_no_port, "%s", host); } else { num_host_chars = colon_loc - host; /* actually remove the colon for host_no_port */ *colon_loc = '\0'; sprintf(host_no_port,"%s",host); *colon_loc = ':'; } /* num_host_chars cannot be more than PBS_MAXHOSTNAME */ if (num_host_chars > PBS_MAXHOSTNAME) { snprintf(log_buf, sizeof(log_buf), "Invalid host: %s", host); log_record(PBSEVENT_SECURITY, PBS_EVENTCLASS_SERVER, __func__, log_buf); return(0); } sprintf(uh, "%s@%s", user, host); server_addr = get_hostaddr(&my_err, server_host); connect_addr = get_hostaddr(&my_err, host_no_port); #ifdef __CYGWIN__ if ((IamAdminByName(user)) && (server_addr == connect_addr)) { return(priv | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR); } #else /* __CYGWIN__ */ local_server_addr = get_hostaddr(&my_err, server_localhost); if ((strcmp(user, PBS_DEFAULT_ADMIN) == 0) && ((connect_addr == server_addr) || (connect_addr == local_server_addr))) { is_root = 1; #ifdef PBS_ROOT_ALWAYS_ADMIN if (is_root) { /* This statement allows us to compile with gcc-warnings */ /* if PBS_ROOT_ALWAYS_ADMIN is true is_root is assigned but never used */ ; } return(priv | ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_OPRD | ATR_DFLAG_OPWR); #endif } #endif /* __CYGWIN__ */ pthread_mutex_lock(server.sv_attr_mutex); if (!(server.sv_attr[SRV_ATR_managers].at_flags & ATR_VFLAG_SET)) { #ifndef PBS_ROOT_ALWAYS_ADMIN if (is_root) priv |= (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR); #endif } else if (acl_check(&server.sv_attr[SRV_ATR_managers], uh, ACL_User)) { priv |= (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR); } if (!(server.sv_attr[SRV_ATR_operators].at_flags & ATR_VFLAG_SET)) { #ifndef PBS_ROOT_ALWAYS_ADMIN if (is_root) priv |= (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR); #endif } else if (acl_check(&server.sv_attr[SRV_ATR_operators], uh, ACL_User)) { priv |= (ATR_DFLAG_OPRD | ATR_DFLAG_OPWR); } pthread_mutex_unlock(server.sv_attr_mutex); /* resolve using the other hostname (if available) and give the higher privilege */ other_host = get_cached_fullhostname(host, NULL); if ((other_host != NULL) && (strcmp(host, other_host))) other_priv = svr_get_privilege(user, other_host); if (other_priv > priv) priv = other_priv; return(priv); } /* END svr_get_privilege() */
int svr_movejob( job *jobp, char *destination, int *my_err, struct batch_request *req) { pbs_net_t destaddr; int local; unsigned int port; char *toserver; char log_buf[LOCAL_LOG_BUF_SIZE]; if (LOGLEVEL >= 8) { sprintf(log_buf, "%s", jobp->ji_qs.ji_jobid); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } if (strlen(destination) >= (size_t)PBS_MAXROUTEDEST) { sprintf(log_buf, "name %s over maximum length of %d\n", destination, PBS_MAXROUTEDEST); log_err(-1, __func__, log_buf); *my_err = PBSE_QUENBIG; return(-1); } snprintf(jobp->ji_qs.ji_destin, sizeof(jobp->ji_qs.ji_destin), "%s", destination); jobp->ji_qs.ji_un_type = JOB_UNION_TYPE_ROUTE; local = 1; if ((toserver = strchr(destination, '@')) != NULL) { /* check to see if the part after '@' is this server */ char *tmp = parse_servername(++toserver, &port); destaddr = get_hostaddr(my_err, tmp); if (destaddr != pbs_server_addr) { local = 0; } free(tmp); } if (local != 0) { return(local_move(jobp, my_err, req)); } return(net_move(jobp, req)); } /* svr_movejob() */
int send_job_work( char *job_id, const char *node_name, /* I */ int type, /* I */ int *my_err, /* O */ batch_request *preq) /* M */ { int rc = LOCUTION_FAIL; int ret = PBSE_NONE; int local_errno = 0; tlist_head attrl; int encode_type; int mom_err = PBSE_NONE; int resc_access_perm; std::string script_name; char *pc; char stdout_path[MAXPATHLEN + 1]; char stderr_path[MAXPATHLEN + 1]; char chkpt_path[MAXPATHLEN + 1]; char log_buf[LOCAL_LOG_BUF_SIZE]; long start_time = time(NULL); bool attempt_to_queue_job = false; bool change_substate_on_attempt_to_queue = false; bool need_to_send_job_script = false; bool job_has_run = false; job *pjob = NULL; char job_destin[PBS_MAXROUTEDEST+1]; bool Timeout = false; unsigned long job_momaddr = -1; unsigned short job_momport = -1; if ((pjob = svr_find_job(job_id, TRUE)) == NULL) { *my_err = PBSE_JOBNOTFOUND; req_reject(-1, 0, preq, NULL, NULL); return(PBSE_JOBNOTFOUND); } mutex_mgr job_mutex(pjob->ji_mutex, true); if (strlen(pjob->ji_qs.ji_destin) != 0) strcpy(job_destin, pjob->ji_qs.ji_destin); else job_destin[0] = '\0'; job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr; job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport; if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) need_to_send_job_script = TRUE; if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) job_has_run = TRUE; if ((job_destin[0] != '\0') && (type != MOVE_TYPE_Exec)) { if ((pc = strchr(job_destin, '@')) != NULL) { job_momaddr = get_hostaddr(&local_errno, pc + 1); job_momport = pbs_server_port_dis; } } /* encode job attributes to be moved */ CLEAR_HEAD(attrl); /* select attributes/resources to send based on move type */ if (type == MOVE_TYPE_Exec) { /* moving job to MOM - ie job start */ resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; } else { /* moving job to alternate server? */ resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR | ATR_DFLAG_MGWR | ATR_DFLAG_SvRD; encode_type = ATR_ENCODE_SVR; /* clear default resource settings */ ret = svr_dequejob(pjob, FALSE); if (ret) { job_mutex.set_unlock_on_exit(false); return(ret); } } encode_attributes(attrl, pjob, resc_access_perm, encode_type); rc = get_job_script_path(pjob, script_name); if (rc != PBSE_NONE) { if (rc == PBSE_JOB_RECYCLED) job_mutex.set_unlock_on_exit(false); free_server_attrs(&attrl); return(rc); } if (job_has_run) { if ((get_job_file_path(pjob, StdOut, stdout_path, sizeof(stdout_path)) != 0) || (get_job_file_path(pjob, StdErr, stderr_path, sizeof(stderr_path)) != 0) || (get_job_file_path(pjob, Checkpoint, chkpt_path, sizeof(chkpt_path)) != 0)) { job_mutex.unlock(); goto send_job_work_end; } } /* if the job is substate JOB_SUBSTATE_TRNOUTCM it means we are * recovering after being down or a late failure so we just want * to send the "ready-to-commit/commit" */ if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) { attempt_to_queue_job = true; if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) change_substate_on_attempt_to_queue = true; } job_mutex.unlock(); rc = send_job_over_network_with_retries(job_id, job_destin, attrl, attempt_to_queue_job, change_substate_on_attempt_to_queue, Timeout, script_name.c_str(), need_to_send_job_script, job_has_run, job_momaddr, job_momport, stdout_path, stderr_path, chkpt_path, type, my_err, &mom_err); if (Timeout == TRUE) { /* 10 indicates that job migrate timed out, server will mark node down * and abort the job - see post_sendmom() */ sprintf(log_buf, "child timed-out attempting to start job %s", job_id); log_ext(*my_err, __func__, log_buf, LOG_WARNING); rc = LOCUTION_REQUEUE; } else if (rc != LOCUTION_SUCCESS) { if (should_retry_route(*my_err) == -1) { sprintf(log_buf, "child failed and will not retry job %s", job_id); log_err(*my_err, __func__, log_buf); rc = LOCUTION_FAIL; } else rc = LOCUTION_REQUEUE; } if (type == MOVE_TYPE_Exec) { if (node_name != NULL) update_failure_counts(node_name, rc); else update_failure_counts(job_destin, rc); } send_job_work_end: finish_move_process(job_id, preq, start_time, node_name, rc, type, mom_err); free_server_attrs(&attrl); return(rc); } /* END send_job_work() */
static int assign_hosts( job *pjob, /* I (modified) */ char *given, /* I (optional) list of requested hosts */ int set_exec_host, /* I (boolean) */ char *FailHost, /* O (optional,minsize=1024) */ char *EMsg) /* O (optional,minsize=1024) */ { unsigned int dummy; char *list = NULL; char *hosttoalloc = NULL; pbs_net_t momaddr = 0; int rc = 0, procs=0; extern char *mom_host; char *to_free = NULL; resource *pres; if (EMsg != NULL) EMsg[0] = '\0'; if (FailHost != NULL) FailHost[0] = '\0'; #ifdef __TREQSCHED if ((given == NULL) || (given[0] == '\0')) { /* scheduler must specify node allocation for all jobs */ return(PBSE_UNKNODEATR); } #endif /* __TREQSCHED */ if ((given != NULL) && (given[0] != '\0')) { #ifdef NVIDIA_GPUS hosttoalloc = get_correct_spec_string(given, pjob); to_free = hosttoalloc; #else /* assign what was specified in run request */ hosttoalloc = given; #endif } else { /* Build our host list from what is in the job attrs */ pres = find_resc_entry( &pjob->ji_wattr[(int)JOB_ATR_resource], find_resc_def(svr_resc_def, "neednodes", svr_resc_size)); if (pres != NULL) { /* assign what was in "neednodes" */ hosttoalloc = pres->rs_value.at_val.at_str; if ((hosttoalloc == NULL) || (hosttoalloc[0] == '\0')) { return(PBSE_UNKNODEATR); } } pres = find_resc_entry( &pjob->ji_wattr[(int)JOB_ATR_resource], find_resc_def(svr_resc_def, "procs", svr_resc_size)); if (pres != NULL) { /* assign what was in "neednodes" */ procs = pres->rs_value.at_val.at_long; if ((hosttoalloc == NULL) || (hosttoalloc[0] == '\0')) { return(PBSE_UNKNODEATR); } } } if (hosttoalloc != NULL) { /* NO-OP */ } else if (svr_totnodes == 0) { /* assign "local" */ if ((server.sv_attr[(int)SRV_ATR_DefNode].at_flags & ATR_VFLAG_SET) && (server.sv_attr[(int)SRV_ATR_DefNode].at_val.at_str != NULL)) { hosttoalloc = server.sv_attr[(int)SRV_ATR_DefNode].at_val.at_str; } else { hosttoalloc = mom_host; momaddr = pbs_mom_addr; } } else if ((server.sv_attr[(int)SRV_ATR_DefNode].at_flags & ATR_VFLAG_SET) && (server.sv_attr[(int)SRV_ATR_DefNode].at_val.at_str != 0)) { /* alloc server default_node */ hosttoalloc = server.sv_attr[(int)SRV_ATR_DefNode].at_val.at_str; } else if (svr_tsnodes != 0) { /* find first time-shared node */ if ((hosttoalloc = find_ts_node()) == NULL) { /* FAILURE */ return(PBSE_NOTSNODE); } } else { /* fall back to 1 cluster node */ hosttoalloc = PBS_DEFAULT_NODE; } /* do we need to allocate the (cluster) node(s)? */ if (svr_totnodes != 0) { if ((rc = is_ts_node(hosttoalloc)) != 0) { rc = set_nodes(pjob, hosttoalloc, procs, &list, FailHost, EMsg); set_exec_host = 1; /* maybe new VPs, must set */ hosttoalloc = list; } } if (rc == 0) { /* set_nodes succeeded */ if (set_exec_host != 0) { job_attr_def[(int)JOB_ATR_exec_host].at_free( &pjob->ji_wattr[(int)JOB_ATR_exec_host]); job_attr_def[(int)JOB_ATR_exec_host].at_decode( &pjob->ji_wattr[(int)JOB_ATR_exec_host], NULL, NULL, hosttoalloc); /* O */ pjob->ji_modified = 1; } else { /* leave exec_host alone and reuse old IP address */ momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr; hosttoalloc = pjob->ji_wattr[(int)JOB_ATR_exec_host].at_val.at_str; } strncpy( pjob->ji_qs.ji_destin, parse_servername(hosttoalloc, &dummy), PBS_MAXROUTEDEST); if (momaddr == 0) { momaddr = get_hostaddr(pjob->ji_qs.ji_destin); if (momaddr == 0) { free_nodes(pjob); if (list != NULL) free(list); sprintf(log_buffer, "ALERT: job cannot allocate node '%s' (could not determine IP address for node)", pjob->ji_qs.ji_destin); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); if (to_free != NULL) free(to_free); return(PBSE_BADHOST); } } pjob->ji_qs.ji_un.ji_exect.ji_momaddr = momaddr; } /* END if (rc == 0) */ if (list != NULL) free(list); if (to_free != NULL) free(to_free); return(rc); } /* END assign_hosts() */
int issue_to_svr( const char *servern, /* I */ struct batch_request **preq_ptr, /* I */ void (*replyfunc) (struct work_task *)) /* I */ { int rc = PBSE_NONE; bool do_retry = false; int handle; int my_err = 0; pbs_net_t svraddr; char *svrname; unsigned int port = pbs_server_port_dis; batch_request *preq = *preq_ptr; snprintf(preq->rq_host, sizeof(preq->rq_host), "%s", servern); preq->rq_fromsvr = 1; preq->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; svrname = parse_servername(servern, &port); svraddr = get_hostaddr(&my_err,svrname); free(svrname); if (svraddr == (pbs_net_t)0) { if (my_err == PBS_NET_RC_RETRY) { /* Non fatal error - retry */ do_retry = true; } } else { handle = svr_connect(svraddr, port, &my_err, NULL, NULL); if (handle >= 0) { if (((rc = issue_Drequest(handle, preq, true)) == PBSE_NONE) && (handle != PBS_LOCAL_CONNECTION)) { /* preq is already freed if handle == PBS_LOCAL_CONNECTION - a reply * has always been sent */ rc = preq->rq_reply.brp_code; } else if (handle == PBS_LOCAL_CONNECTION) *preq_ptr = NULL; return(rc); } else if (handle == PBS_NET_RC_RETRY) do_retry = true; } /* if reached here, it didn`t go, do we retry? */ if (do_retry) { queue_a_retry_task(preq, replyfunc); return(PBSE_NONE); } /* FAILURE */ return(PBSE_INTERNAL); } /* END issue_to_svr() */
int issue_to_svr( char *servern, /* I */ struct batch_request *preq, /* I */ void (*replyfunc) (struct work_task *)) /* I */ { int rc = PBSE_NONE; int do_retry = 0; int handle; int my_err = 0; pbs_net_t svraddr; char *svrname; unsigned int port = pbs_server_port_dis; struct work_task *pwt; time_t time_now = time(NULL); snprintf(preq->rq_host, sizeof(preq->rq_host), "%s", servern); preq->rq_fromsvr = 1; preq->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; svrname = parse_servername(servern, &port); svraddr = get_hostaddr(&my_err,svrname); free(svrname); if (svraddr == (pbs_net_t)0) { if (my_err == PBS_NET_RC_RETRY) { /* Non fatal error - retry */ do_retry = 1; } } else { handle = svr_connect(svraddr, port, &my_err, NULL, NULL, ToServerDIS); if (handle >= 0) { if (((rc = issue_Drequest(handle, preq)) == PBSE_NONE) && (handle != PBS_LOCAL_CONNECTION)) { /* preq is already freed if handle == PBS_LOCAL_CONNECTION - a reply * has always been sent */ rc = preq->rq_reply.brp_code; } return(rc); } else if (handle == PBS_NET_RC_RETRY) { do_retry = 1; } } /* if reached here, it didn`t go, do we retry? */ if (do_retry) { if (preq->rq_id == NULL) get_batch_request_id(preq); pwt = set_task(WORK_Timed, (long)(time_now + PBS_NET_RETRY_TIME), reissue_to_svr, preq->rq_id, TRUE); pwt->wt_parmfunc = replyfunc; pthread_mutex_unlock(pwt->wt_mutex); return(PBSE_NONE); } /* FAILURE */ return(PBSE_INTERNAL); } /* END issue_to_svr() */
int issue_to_svr( char *servern, /* I */ struct batch_request *preq, /* I */ void (*replyfunc) (struct work_task *)) /* I */ { int do_retry = 0; int handle; pbs_net_t svraddr; char *svrname; unsigned int port = pbs_server_port_dis; struct work_task *pwt; strcpy(preq->rq_host, servern); preq->rq_fromsvr = 1; preq->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; svrname = parse_servername(servern, &port); svraddr = get_hostaddr(svrname); if (svraddr == (pbs_net_t)0) { if (pbs_errno == PBS_NET_RC_RETRY) { /* Non fatal error - retry */ do_retry = 1; } } else { handle = svr_connect(svraddr, port, process_Dreply, ToServerDIS); if (handle >= 0) { return(issue_Drequest(handle, preq, replyfunc, NULL)); } else if (handle == PBS_NET_RC_RETRY) { do_retry = 1; } } /* if reached here, it didn`t go, do we retry? */ if (do_retry) { pwt = set_task( WORK_Timed, (long)(time_now + PBS_NET_RETRY_TIME), reissue_to_svr, (void *)preq); pwt->wt_parmfunc = replyfunc; return(0); } /* FAILURE */ return(-1); } /* END issue_to_svr() */