Exemple #1
0
/**
 * @brief
 *	vn_decode_DIS_V4 - decode version 4 vnode information from Mom
 *
 * @par Functionality:
 *	See vn_decode_DIS() above, This is called from there to decode
 *	V4 information.
 *
 * @param[in]	fd  -     socket descriptor from which to read
 * @param[out]	rcp -     pointer to place to return error code if error.
 *
 * @return	vnl_t *
 * @retval	pointer to decoded vnode information which has been malloc-ed.
 * @retval	NULL on error, see rcp value
 *
 * @par Side Effects: None
 *
 * @par MT-safe: yes
 *
 */
static vnl_t *
vn_decode_DIS_V4(int fd, int *rcp)
{
	unsigned int	i, j;
	unsigned int	size;
	time_t		t;
	vnl_t		*vnlp;

	if ((vnlp = malloc(sizeof(vnl_t))) == NULL) {
		*rcp = DIS_NOMALLOC;
		return ((vnl_t *) NULL);
	}

	t = (time_t) disrsl(fd, rcp);
	if (*rcp != DIS_SUCCESS) {
		free(vnlp);
		return ((vnl_t *) NULL);
	} else {
		vnlp->vnl_modtime = t;
	}
	size = disrui(fd, rcp);
	if (*rcp != DIS_SUCCESS) {
		free(vnlp);
		return ((vnl_t *) NULL);
	} else {
		vnlp->vnl_nelem = vnlp->vnl_used = size;
	}

	if ((vnlp->vnl_list = calloc(vnlp->vnl_nelem,
		sizeof(vnal_t))) == NULL) {
		free(vnlp);
		*rcp = DIS_NOMALLOC;
		return ((vnl_t *) NULL);
	}

	for (i = 0; i < vnlp->vnl_used; i++) {
		vnal_t		*curreslist = VNL_NODENUM(vnlp, i);

		/*
		 *	In case an error occurs and we need to free
		 *	whatever's been allocated so far, we use the
		 *	vnl_cur entry to record the number of vnal_t
		 *	entries to free.
		 */
		vnlp->vnl_cur = i;

		curreslist->vnal_id = disrst(fd, rcp);
		if (*rcp != DIS_SUCCESS)
			return (free_and_return(vnlp));

		size = disrui(fd, rcp);
		if (*rcp != DIS_SUCCESS)
			return (free_and_return(vnlp));
		else
			curreslist->vnal_nelem = curreslist->vnal_used = size;
		if ((curreslist->vnal_list = calloc(curreslist->vnal_nelem,
			sizeof(vna_t))) == NULL)
			return (free_and_return(vnlp));

		for (j = 0; j < size; j++) {
			vna_t	*curres = VNAL_NODENUM(curreslist, j);

			/*
			 *	In case an error occurs and we need to free
			 *	whatever's been allocated so far, we use the
			 *	vnal_cur entry to record the number of vna_t
			 *	entries to free.
			 */
			curreslist->vnal_cur = j;

			curres->vna_name = disrst(fd, rcp);
			if (*rcp != DIS_SUCCESS)
				return (free_and_return(vnlp));
			curres->vna_val = disrst(fd, rcp);
			if (*rcp != DIS_SUCCESS)
				return (free_and_return(vnlp));
			curres->vna_type = disrsi(fd, rcp);
			if (*rcp != DIS_SUCCESS)
				return (free_and_return(vnlp));
			curres->vna_flag = disrsi(fd, rcp);
			if (*rcp != DIS_SUCCESS)
				return (free_and_return(vnlp));
		}
	}

	*rcp = DIS_SUCCESS;
	return (vnlp);
}
Exemple #2
0
/* 
 * read reply and check for success
 */
int read_tcp_reply(

  struct tcp_chan *chan,
  int              protocol,
  int              version,
  int              command,
  int             *exit_status)

  {
  char log_buf[LOCAL_LOG_BUF_SIZE];
  int  ret;
  int  value; /* value read from sock */

  *exit_status = UNREAD_STATUS;

  if (LOGLEVEL >= 6)
    {
    sprintf(log_buf, "protocol: %d  version: %d  command:%d  sock:%d", protocol, version, command, chan->sock);
    log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,__func__, log_buf);
    }

  if ((value = disrsi(chan,&ret)) != protocol)
    {
    snprintf(log_buf,sizeof(log_buf),
      "Mismatching protocols. Expected protocol %d but read reply for %d\n",
      protocol,
      value);
    log_err(-1, __func__, log_buf);
    }
  else if (ret != DIS_SUCCESS)
    {
    }
  else if ((value = disrsi(chan,&ret)) != version)
    {
    snprintf(log_buf, sizeof(log_buf),
      "Mismatching versions. Expected version %d for protocol %d but read version %d\n",
      version,
      protocol,
      value);
    log_err(-1, __func__, log_buf);
    }
  else if (ret != DIS_SUCCESS)
    {
    }
  else if ((value = disrsi(chan,&ret)) != command)
    {
    snprintf(log_buf, sizeof(log_buf),
      "Mismatching commands. Expected command %d for protocol %d but read command %d\n",
      command,
      protocol,
      value);
    log_err(-1, __func__, log_buf);
    }
  else if (ret != DIS_SUCCESS)
    {
    }
  else
    {
    /* read the exit code */
    *exit_status = disrsi(chan,&ret);
    }

  if (ret != DIS_SUCCESS)
    {
    if (ret >= 0)
      {
      snprintf(log_buf, sizeof(log_buf),
        "Could not read reply for protocol %d command %d: %s",
        protocol, command, dis_emsg[ret]);
      }
    else
      {
      snprintf(log_buf, sizeof(log_buf),
        "Could not read reply for protocol %d command %d",
        protocol, command);
      }
    log_err(-1, __func__, log_buf);
    }

  return(ret);
  } /* END read_tcp_reply() */
/*************************************************
 * svr_is_request
 *
 * Return: svr_is_request always returns a non-zero value
 *         and it must call close_conn to close the connection
 *         before returning. PBSE_SOCKET_CLOSE is the code
 *         for a successful return. But which ever retun 
 *         code is iused it must terminate the while loop
 *         in start_process_pbs_server_port.
 *************************************************/
int svr_is_request(
    
  struct tcp_chan *chan,
  int              version)

  {
  int                 command = 0;
  int                 ret = DIS_SUCCESS;
  int                 i;
  int                 err;
  char                nodename[PBS_MAXHOSTNAME];
  int                 perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR;

  unsigned long       ipaddr;
  unsigned short      mom_port;
  unsigned short      rm_port;
  unsigned long       tmpaddr;

  struct sockaddr_in *addr = NULL;
  struct sockaddr     s_addr;
  unsigned int        len = sizeof(s_addr);

  struct pbsnode     *node = NULL;
  char               *node_name = NULL;

  char                log_buf[LOCAL_LOG_BUF_SIZE+1];

  command = disrsi(chan, &ret);

  if (ret != DIS_SUCCESS)
    goto err;

  if (LOGLEVEL >= 4)
    {
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
        "message received from sock %d (version %d)",
        chan->sock,
        version);

    log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf);
    }

  if (getpeername(chan->sock, &s_addr, &len) != 0)
    {
    close_conn(chan->sock, FALSE);
    log_err(errno,__func__, (char *)"Cannot get socket name using getpeername\n");
    return(PBSE_SOCKET_CLOSE);
    }

  addr = (struct sockaddr_in *)&s_addr;

  if (version != IS_PROTOCOL_VER)
    {
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "protocol version %d unknown from %s",
      version,
      netaddr(addr));

    log_err(-1, __func__, log_buf);
    close_conn(chan->sock, FALSE);
    return PBSE_SOCKET_DATA;
    }

  /* check that machine is known */
  mom_port = disrsi(chan, &ret);
  rm_port = disrsi(chan, &ret);

  if (LOGLEVEL >= 3)
    {
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
      "message received from addr %s: mom_port %d  - rm_port %d",
      netaddr(addr),
      mom_port,
      rm_port);

    log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf);
    }

  ipaddr = ntohl(addr->sin_addr.s_addr);
  
  if ((node = AVL_find(ipaddr, mom_port, ipaddrs)) != NULL)
    {
    lock_node(node, __func__, "AVL_find", LOGLEVEL);
    } /* END if AVL_find != NULL) */
  else if (allow_any_mom)
    {
    char *name = get_cached_nameinfo(addr);

    if (name != NULL)
      snprintf(nodename, sizeof(nodename), "%s", name);
    else if (getnameinfo(&s_addr, len, nodename, sizeof(nodename)-1, NULL, 0, 0) != 0)
      {
      tmpaddr = ntohl(addr->sin_addr.s_addr);
      sprintf(nodename, "0x%lX", tmpaddr);
      }
    else
      insert_addr_name_info(nodename, NULL, addr);

    err = create_partial_pbs_node(nodename, ipaddr, perm);

    if (err == PBSE_NONE)
      {
      node = AVL_find(ipaddr, 0, ipaddrs);
       
      lock_node(node, __func__, "no error", LOGLEVEL);
      }                                                         
    }
    
  if (node == NULL)
    {
    /* node not listed in trusted ipaddrs list */
    
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
      "bad attempt to connect from %s (address not trusted - check entry in server_priv/nodes)",
      netaddr(addr));
    
    if (LOGLEVEL >= 2)
      {
      log_record(PBSEVENT_SCHED, PBS_EVENTCLASS_REQUEST, __func__, log_buf);
      }
    else
      {
      log_err(-1, __func__, log_buf);
      }
    
    close_conn(chan->sock, FALSE);
    return PBSE_SOCKET_CLOSE;
    }

  if (LOGLEVEL >= 3)
    {
    snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
      "message %s (%d) received from mom on host %s (%s) (sock %d)",
      PBSServerCmds2[command],
      command,
      node->nd_name,
      netaddr(addr),
      chan->sock);

    log_event(PBSEVENT_ADMIN,PBS_EVENTCLASS_SERVER,__func__,log_buf);
    }

  switch (command)
    {
    case IS_NULL:  /* a ping from server */

      DBPRT(("%s: IS_NULL\n", __func__))

      break;

    case IS_UPDATE:

      DBPRT(("%s: IS_UPDATE\n", __func__))

      i = disrui(chan, &ret);

      if (ret != DIS_SUCCESS)
        {
        if (LOGLEVEL >= 1)
          {
          snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
              "IS_UPDATE error %d on node %s\n", ret, node->nd_name);

          log_err(ret, __func__, log_buf);
          }

        goto err;
        }

      DBPRT(("%s: IS_UPDATE %s 0x%x\n", __func__, node->nd_name, i))

      update_node_state(node, i);

      if ((node->nd_state & INUSE_DOWN) != 0)
        {
        node->nd_mom_reported_down = TRUE;
        }

      break;

    case IS_STATUS:

      if (LOGLEVEL >= 2)
        {
        snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
            "IS_STATUS received from %s", node->nd_name);

        log_event(PBSEVENT_ADMIN, PBS_EVENTCLASS_SERVER, __func__, log_buf);
        }

      if ((node_name = strdup(node->nd_name)) == NULL)
        goto err;
      unlock_node(node, __func__, "before is_stat_get", LOGLEVEL);

      ret = is_stat_get(node_name, chan);

      node = find_nodebyname(node_name);

      if (ret == SEND_HELLO)
        {
        struct hello_info *hi = (struct hello_info *)calloc(1, sizeof(struct hello_info));
        write_tcp_reply(chan, IS_PROTOCOL, IS_PROTOCOL_VER, IS_STATUS, DIS_SUCCESS);

        hi->name = strdup(node_name);
        enqueue_threadpool_request(send_hierarchy_threadtask, hi);
        ret = DIS_SUCCESS;
        }
      else
        write_tcp_reply(chan,IS_PROTOCOL,IS_PROTOCOL_VER,IS_STATUS,ret);

      if(node != NULL)
        node->nd_stream = -1;

      if (ret != DIS_SUCCESS)
        {
        if (LOGLEVEL >= 1)
          {
          snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
              "IS_STATUS error %d on node %s", ret, node_name);

          log_err(ret, __func__, log_buf);
          }
        free(node_name);

        goto err;
        }
      free(node_name);

      break;

    default:

      snprintf(log_buf, LOCAL_LOG_BUF_SIZE,
          "unknown command %d sent from %s",
        command,
        node->nd_name);

      log_err(-1, __func__, log_buf);

      goto err;

      break;
    }  /* END switch (command) */

  /* must be closed because mom opens and closes this connection each time */
  close_conn(chan->sock, FALSE);

  if(node != NULL)
    unlock_node(node, __func__, "close", LOGLEVEL);
  
  return PBSE_SOCKET_CLOSE;

err:

  /* a DIS write error has occurred */

  if (node != NULL)
    {
    if (LOGLEVEL >= 1)
      {
      DBPRT(("%s: error processing node %s\n",
            __func__,
            node->nd_name))
      }

    sprintf(log_buf, "%s from %s(%s)",
      dis_emsg[ret],
      node->nd_name,
      netaddr(addr));
    
    unlock_node(node, __func__, "err", LOGLEVEL);
    }
  else
    {