Exemplo n.º 1
0
static void execute(

  char *queue,
  char *server)

  {
  int ct;         /* Connection to the server */
  int local_errno = 0;
  int merr;       /* Error return from pbs_manager */
  char *errmsg;   /* Error message from pbs_manager */
  /* The disable request */

  static struct attropl attr =
    {
    NULL, (char *)"enabled", NULL, (char *)"TRUE", SET
    };

  if ((ct = cnt2server(server)) > 0)
    {
    merr = pbs_manager_err(ct, MGR_CMD_SET, MGR_OBJ_QUEUE, queue, &attr, NULL, &local_errno);

    if (merr != 0)
      {
      errmsg = pbs_geterrmsg(ct);

      if (errmsg != NULL)
        {
        fprintf(stderr, "qenable: %s ", errmsg);
        free(errmsg);
        }
      else
        {
        fprintf(stderr, "qenable: Error enabling queue: %d - %s ",
          local_errno,
          pbs_strerror(local_errno));
        }

      if (notNULL(queue))
        fprintf(stderr, "%s", queue);

      if (notNULL(server))
        fprintf(stderr, "@%s", server);

      fprintf(stderr, "\n");

      exitstatus = 2;
      }

    pbs_disconnect(ct);
    }
  else
    {
    fprintf(stderr, "qenable: could not connect to server %s (%d)\n", server, ct * -1);
    exitstatus = 2;
    }
  }
Exemplo n.º 2
0
static int marknode(

  int            con,
  char          *name,
  char          *state1,
  enum batch_op  op1,
  char          *state2,
  enum batch_op  op2)

  {
  char          *errmsg;

  struct attropl  new_attr[2];
  int             rc;
  int             local_errno = 0;

  new_attr[0].name     = ATTR_NODE_state;
  new_attr[0].resource = NULL;
  new_attr[0].value    = state1;
  new_attr[0].op       = op1;

  if (state2 == NULL)
    {
    new_attr[0].next     = NULL;
    }
  else
    {
    new_attr[0].next     = &new_attr[1];
    new_attr[1].next     = NULL;
    new_attr[1].name     = ATTR_NODE_state;
    new_attr[1].resource = NULL;
    new_attr[1].value    = state2;
    new_attr[1].op     = op2;
    }

  rc = pbs_manager_err(
         con,
         MGR_CMD_SET,
         MGR_OBJ_NODE,
         name,
         new_attr,
         NULL,
         &local_errno);

  if (rc && !quiet)
    {
    fprintf(stderr, "Error marking node %s - ",
            name);

    if ((errmsg = pbs_geterrmsg(con)) != NULL)
      fprintf(stderr, "%s\n",
              errmsg);
    }

  return(rc);
  }  /* END marknode() */
Exemplo n.º 3
0
/*
 * void execute( char *node, char *gpuid, int mode, const char *server )
 *
 * node     The name of the MOM node.
 * gpuid    The id of the GPU
 * mode     The num mode for the GPU
 * server   The name of the server to send to.
 *
 * Returns:
 *  None
 *
 * File Variables:
 *  exitstatus  Set to two if an error occurs.
 */
static void execute(
    
  char *node,
  char *gpuid,
  int   mode,
  const char *server)

  {
  int ct;         /* Connection to the server */
  int merr;       /* Error return from pbs_manager */
  int local_errno = 0;
  char *errmsg;   /* Error message from pbs_manager */

  /* The request to change mode */

  if ((ct = cnt2server(server)) > 0)
    {
    merr = pbs_gpumode_err(ct, node, gpuid, mode, &local_errno);

    if (merr != 0)
      {
      errmsg = pbs_geterrmsg(ct);

      if (errmsg != NULL)
        {
        fprintf(stderr, "qgpumode: %s ", errmsg);
        free(errmsg);
        }
      else
        {
        fprintf(stderr, "qgpumode: Error (%d - %s) changing GPU mode",
                local_errno,
                pbs_strerror(local_errno));
        }

      if (notNULL(server))
        fprintf(stderr, "@%s", server);

      fprintf(stderr, "\n");

      exitstatus = 2;
      }

    pbs_disconnect(ct);
    }
  else
    {
    local_errno = -1 * ct;

    fprintf(stderr, "qgpumode: could not connect to server %s (%d) %s\n",
            server,
            local_errno,
            pbs_strerror(local_errno));
    exitstatus = 2;
    }
  }
Exemplo n.º 4
0
static void execute(

  int   manner,  /* I */
  const char *server)  /* I */

  {
  int ct;         /* Connection to the server */
  int err;        /* Error return from pbs_terminate */
  char *errmsg;   /* Error message from pbs_terminate */
  int   local_errno = 0;

  if ((ct = cnt2server(server)) > 0)
    {
    err = pbs_terminate_err(ct, manner, NULL, &local_errno);

    if (err != 0)
      {
      errmsg = pbs_geterrmsg(ct);

      if (errmsg != NULL)
        {
        fprintf(stderr, "qterm: %s",
                errmsg);
        }
      else
        {
        fprintf(stderr, "qterm: Error (%d - %s) terminating server ",
                local_errno, pbs_strerror(local_errno));
        }

      fprintf(stderr, "%s\n",

              server);

      exitstatus = 2;
      }

    pbs_disconnect(ct);
    }
  else
    {
    /* FAILURE */
    local_errno = -1 * ct;

    fprintf(stderr, "qterm: could not connect to server '%s' (%d) %s\n",
            server,
            local_errno,
            pbs_strerror(local_errno));

    exitstatus = 2;
    }

  return;
  }  /* END execute() */
Exemplo n.º 5
0
/**
 * @brief
 * 	executes a job 
 *
 * @param[in] job - The fully qualified job id.
 * @param[in] server - The name of the server that manages the job.
 * @param[in] location -  location indicating where to run job
 *
 * @return - Void
 *
 * @File Variables:
 *  exitstatus  Set to two if an error occurs.
 *
 */
static void
execute(char *job, char *server, char *location)
{
	int ct;         /* Connection to the server */
	int err;        /* Error return from pbs_run */
        int out;        /* Stores the size of err_msg_buf*/
	int located = FALSE;
	char *errmsg;
	char err_msg_buf[COMMENT_BUF_SIZE] = {'\0'};	/* generic buffer - comments & logging*/
        char rmt_server[MAXSERVERNAME];
        
cnt:
	if ((ct = cnt2server(server)) > 0) {
		if (async)
			err = pbs_asyrunjob(ct, job, location, NULL);
		else
			err = pbs_runjob(ct, job, location, NULL);

		if (err && (pbs_errno != PBSE_UNKJOBID)) {
			errmsg = pbs_geterrmsg(ct);
			if (errmsg != NULL) {
				if (pbs_errno == PBSE_UNKNODE) {
					out = snprintf(err_msg_buf, sizeof(err_msg_buf),"qrun: %s %s",errmsg, location);
                                        if (out >= sizeof(err_msg_buf)) {
                                                    fprintf(stderr,"%s...\n", err_msg_buf);
                                                } else {
                                                    fprintf(stderr, "%s\n", err_msg_buf);
                                                } 
                                        
				} else {
					prt_job_err("qrun", ct, job);
				}
			} else {
				fprintf(stderr, "qrun : Server returned error %d for job ", pbs_errno);
			}
			exitstatus = 2;
		} else if (err && (pbs_errno == PBSE_UNKJOBID) && !located) {
			located = TRUE;
			if (locate_job(job, server, rmt_server)) {
				pbs_disconnect(ct);
				strcpy(server, rmt_server);
				goto cnt;
			}
			prt_job_err("qrun", ct, job);
			exitstatus = 2;
		}
		pbs_disconnect(ct);
	} else {
		fprintf(stderr,
			"qrun: could not connect to server %s (%d)\n", server, pbs_errno);
		exitstatus = 2;
	}
}
Exemplo n.º 6
0
/*
 * log_commit_error()
 *
 * checks the error status for the connection and logs any error
 * @pre-cond: con must be a valid index into the connection table
 */
void log_commit_error(

  int   con,
  int   mom_err,
  char *job_id,
  bool &timeout)

  {
  char *err_text;
  char  log_buf[LOCAL_LOG_BUF_SIZE];
  int   errno2;

  err_text = pbs_geterrmsg(con);

  /* NOTE:  errno is modified by log_err */
  if (mom_err > PBSE_FLOOR)
    {
    sprintf(log_buf, "send_job commit failed, rc=%d (%s: %s)",
      mom_err, pbse_to_txt(mom_err), (err_text != NULL) ? err_text : "N/A");
    errno2 = mom_err;
    }
  else
    {
    sprintf(log_buf, "send_job commit failed, rc=%d (%s)",
      mom_err, (err_text != NULL) ? err_text : "N/A");
    errno2 = errno;
    }

  if (err_text != NULL)
    free(err_text);

  log_ext(errno2, __func__, log_buf, LOG_WARNING);
    
  /* if failure occurs, pbs_mom should purge job and pbs_server should set *
     job state to idle w/error msg */
  if (errno2 == EINPROGRESS)
    {
    timeout = true;

    sprintf(log_buf, "child commit request timed-out for job %s, increase tcp_timeout?",
      job_id);
    log_ext(errno2, __func__, log_buf, LOG_WARNING);
    }
  else
    {
    sprintf(log_buf, "child failed in commit request for job %s", job_id);
    log_ext(errno2, __func__, log_buf, LOG_CRIT);
    }
  }
Exemplo n.º 7
0
struct batch_status *statnode(

  int   con,
  char *nodearg)

  {

  struct batch_status *bstatus;
  char                *errmsg;
  int                  local_errno = 0;

  bstatus = pbs_statnode_err(con, nodearg, NULL, NULL, &local_errno);

  if (bstatus == NULL)
    {
    if (local_errno)
      {
      if (!quiet)
        {
        if ((errmsg = pbs_geterrmsg(con)) != NULL)
          {
          fprintf(stderr, "%s: %s\n",
                  progname,
                  errmsg);
          }
        else
          {
          fprintf(stderr, "%s: Error %d (%s)\n",
                  progname,
                  local_errno,
                  pbs_strerror(local_errno));
          }
        }

      exit(1);
      }

    if (!quiet)
      fprintf(stderr, "%s: No nodes found\n",
              progname);

    exit(2);
    }

  return bstatus;
  }    /* END statnode() */
Exemplo n.º 8
0
void prt_job_err(

  const char *cmd,
  int   connect,
  const char *id)

  {
  char *errmsg;

  errmsg = pbs_geterrmsg(connect);

  if (errmsg != NULL)
    {
    fprintf(stderr, "%s: %s ", cmd, errmsg);
    fprintf(stderr, "%s\n", id);
    free(errmsg);
    }

  }
Exemplo n.º 9
0
static int set_note(

  int    con,
  char  *name,
  char  *msg)

  {
  char          *errmsg;

  struct attropl  new_attr;
  int             rc;
  int             local_errno = 0;

  new_attr.name     = ATTR_NODE_note;
  new_attr.resource = NULL;
  new_attr.value    = msg;
  new_attr.op       = SET;
  new_attr.next     = NULL;

  rc = pbs_manager_err(
         con,
         MGR_CMD_SET,
         MGR_OBJ_NODE,
         name,
         &new_attr,
         NULL,
         &local_errno);

  if (rc && !quiet)
    {
    fprintf(stderr, "Error setting note attribute for %s - ",
      name);

    if ((errmsg = pbs_geterrmsg(con)) != NULL)
      {
      fprintf(stderr, "%s\n",
        errmsg);
      }
    }

  return(rc);
  }  /* END set_note() */
Exemplo n.º 10
0
void prt_job_err(

  char *cmd,
  int   connect,
  char *id)

  {
  char *errmsg;

  errmsg = pbs_geterrmsg(connect);

  if (errmsg != NULL)
    {
    fprintf(stderr, "%s: %s ", cmd, errmsg);
    }
  else
    {
    fprintf(stderr, "%s: Server returned error %d for job ", cmd, pbs_errno);
    }

  fprintf(stderr, "%s\n", id);
  }
Exemplo n.º 11
0
Arquivo: fifo.c Projeto: CESNET/torque
/*
 *
 * run_update_job - run the job and update the job information
 *
 *   pbs_sd - connection to pbs_server
 *   sinfo  - server job is on
 *   qinfo  - queue job resides in
 *   jinfo  - the job to run
 *
 * returns success/failure - see pbs_errno for more info
 *
 */
int run_update_job(int pbs_sd, server_info *sinfo, queue_info *qinfo,
                   job_info *jinfo)
  {
  int ret;    /* return code from pbs_runjob() */
  node_info *best_node = NULL;  /* best node to run job on */
  char *best_node_name = NULL;  /* name of best node */
  char buf[256] = {'\0'};  /* generic buffer - comments & logging*/
  char timebuf[128];   /* buffer to hold the time and date */
  resource_req *res;   /* ptr to the resource of ncpus */
  int ncpus;    /* numeric amount of resource ncpus */
  char *errmsg;    /* used for pbs_geterrmsg() */

  strftime(timebuf, 128, "started on %a %b %d at %H:%M", localtime(&cstat.current_time));

  if (cstat.load_balancing || cstat.load_balancing_rr)
    {
    best_node = find_best_node(jinfo, sinfo -> timesharing_nodes);

    if (best_node != NULL)
      {
      best_node_name = best_node -> name;
      sprintf(buf, "Job run on node %s - %s", best_node_name, timebuf);
      }
    }

  if (best_node == NULL)
    sprintf(buf, "Job %s", timebuf);

  update_job_comment(pbs_sd, jinfo, buf);

  buf[0] = '\0';

  ret = pbs_runjob(pbs_sd, jinfo -> name, best_node_name, NULL);

  if (ret == 0)
    {
    /* If a job is 100% efficent, it will raise the load average by 1 per
     * cpu is uses.  Temporarly inflate load average by that value
     */
    if (cstat.load_balancing && best_node != NULL)
      {
      if ((res = find_resource_req(jinfo -> resreq, "ncpus")) == NULL)
        ncpus = 1;
      else
        ncpus = res -> amount;

      best_node -> loadave += ncpus;
      }

    if (cstat.help_starving_jobs && jinfo == cstat.starving_job)
      jinfo -> sch_priority = 0;

    sched_log(PBSEVENT_SCHED, PBS_EVENTCLASS_JOB, jinfo -> name, "Job Run");

    update_server_on_run(sinfo, qinfo, jinfo);

    update_queue_on_run(qinfo, jinfo);

    update_job_on_run(pbs_sd, jinfo);

    if (cstat.fair_share)
      update_usage_on_run(jinfo);

    free(sinfo -> running_jobs);

    sinfo -> running_jobs = job_filter(sinfo -> jobs, sinfo -> sc.total,
                                       check_run_job, NULL);

    free(qinfo -> running_jobs);

    qinfo -> running_jobs = job_filter(qinfo -> jobs, qinfo -> sc.total,
                                       check_run_job, NULL);
    }
  else
    {
    errmsg = pbs_geterrmsg(pbs_sd);
    sprintf(buf, "Not Running - PBS Error: %s", errmsg);
    update_job_comment(pbs_sd, jinfo, buf);
    }

  return ret;
  }
Exemplo n.º 12
0
/**
 * @brief
 * 	main - the initialization and main loop of pbs_daemon
 */
int
main(int argc, char *argv[])
{
	char		jobfile[MAXPATHLEN+1];
	char		jobfile_full[MAXPATHLEN+1];
	pbs_net_t	hostaddr = 0;
	int			port = -1;
	int			move_type = -1;
	pbs_list_head	attrl;
	enum conn_type  cntype = ToServerDIS;
	int		con = -1;
	char		*destin;
	int			encode_type;
	int			i;
	job			*jobp;
	char		 job_id[PBS_MAXSVRJOBID+1];
	attribute	*pattr;
	struct attropl  *pqjatr;      /* list (single) of attropl for quejob */
	char		 script_name[MAXPATHLEN+1];
	int			in_server = -1;
	char		*param_name, *param_val;
	char		buf[4096];
	struct  hostent *hp;
	struct in_addr  addr;
	char            *credbuf = NULL;
	size_t		credlen = 0;
	int 		prot = PROT_TCP;
	/*the real deal or output version and exit?*/

	execution_mode(argc, argv);

	/* If we are not run with real and effective uid of 0, forget it */

	pbs_loadconf(0);

	if (!isAdminPrivilege(getlogin())) {
		fprintf(stderr, "%s: Must be run by root\n", argv[0]);
		exit(SEND_JOB_FATAL);
	}

	/* initialize the pointers in the resource_def array */
	for (i = 0; i < (svr_resc_size - 1); ++i)
		svr_resc_def[i].rs_next = &svr_resc_def[i+1];
	/* last entry is left with null pointer */
	/* set single threaded mode */
	pbs_client_thread_set_single_threaded_mode();
	/* disable attribute verification */
	set_no_attribute_verification();

	/* initialize the thread context */
	if (pbs_client_thread_init_thread_context() != 0) {
		fprintf(stderr, "%s: Unable to initialize thread context\n",
			argv[0]);
		exit(SEND_JOB_FATAL);
	}

	if(set_msgdaemonname("PBS_send_job")) {
		fprintf(stderr, "Out of memory\n");
		return 1;
	}

	winsock_init();

	connection_init();

	while (fgets(buf, sizeof(buf), stdin) != NULL) {
		buf[strlen(buf)-1] = '\0';	/* gets rid of newline */

		param_name = buf;
		param_val = strchr(buf, '=');
		if (param_val) {
			*param_val = '\0';
			param_val++;
		} else {	/* bad param_val -- skipping */
			break;
		}

		if (strcmp(param_name, "jobfile") == 0) {
			jobfile[0] = '\0';
			strncpy(jobfile, param_val, MAXPATHLEN);
		} else if (strcmp(param_name, "destaddr") == 0) {
			hostaddr = atol(param_val);
		} else if (strcmp(param_name, "destport") == 0) {
			port = atoi(param_val);
		} else if (strcmp(param_name, "move_type") == 0) {
			move_type = atoi(param_val);
		} else if (strcmp(param_name, "in_server") == 0) {
			in_server = atoi(param_val);
		} else if (strcmp(param_name, "server_name") == 0) {
			server_name[0] = '\0';
			strncpy(server_name, param_val, PBS_MAXSERVERNAME);
		} else if (strcmp(param_name, "server_host") == 0) {
			server_host[0] = '\0';
			strncpy(server_host, param_val, (sizeof(server_host) - 1));
		} else if (strcmp(param_name, "server_addr") == 0) {
			pbs_server_addr = atol(param_val);
		} else if (strcmp(param_name, "server_port") == 0) {
			pbs_server_port_dis = atoi(param_val);
		} else if (strcmp(param_name, "log_file") == 0) {
			log_file = strdup(param_val);
		} else if (strcmp(param_name, "path_log") == 0) {
			path_log[0] = '\0';
			strncpy(path_log, param_val, MAXPATHLEN);
		} else if (strcmp(param_name, "path_jobs") == 0) {
			path_jobs = strdup(param_val);
		} else if (strcmp(param_name, "path_spool") == 0) {
			path_spool = strdup(param_val);
		} else if (strcmp(param_name, "path_rescdef") == 0) {
			path_rescdef = strdup(param_val);
		} else if (strcmp(param_name, "path_users") == 0) {
			path_users = strdup(param_val);
		} else if (strcmp(param_name, "path_hooks_workdir") == 0) {
			path_hooks_workdir = strdup(param_val);
			if (path_hooks_workdir == NULL)
				exit(SEND_JOB_FATAL);
		} else if (strcmp(param_name, "svr_history_enable") == 0) {
			svr_history_enable = atol(param_val);
		} else if (strcmp(param_name, "svr_history_duration") == 0) {
			svr_history_duration = atol(param_val);
		} else if (strcmp(param_name, "single_signon_password_enable") == 0) {
			if (decode_b(&server.sv_attr[(int)SRV_ATR_ssignon_enable],
				NULL, NULL, param_val) != 0) {
				fprintf(stderr, "%s: failed to set ssignon_password_enable\n", argv[0]);
				exit(SEND_JOB_FATAL);
			}
		} else if (strcmp(param_name, "script_name") == 0) {
			strncpy(script_name, param_val, MAXPATHLEN + 1);
		} else
			break;
	}

	time(&time_now);

	(void)log_open_main(log_file, path_log, 1); /* silent open */

	if (setup_resc(1) == -1) {
		/* log_buffer set in setup_resc */
		log_err(-1, "pbsd_send_job(setup_resc)", log_buffer);
		return (-1);
	}

	if( strlen(jobfile) == 0 || hostaddr == 0 || port == 0 ||  move_type == -1 || \
			in_server == -1 || strlen(server_name) == 0 || strlen(server_host) == 0 || \
			pbs_server_addr == 0 || pbs_server_port_dis == 0 || \
			strlen(path_log) == 0 || path_jobs == NULL || \
			path_spool == NULL || path_users == NULL ) {
		log_err(-1, "pbs_send_job", "error on one of the parameters");
		log_close(0);	/* silent close */
		exit(SEND_JOB_FATAL);
	}

	CLEAR_HEAD(task_list_immed);
	CLEAR_HEAD(task_list_timed);
	CLEAR_HEAD(task_list_event);
	CLEAR_HEAD(svr_queues);
	CLEAR_HEAD(svr_alljobs);
	CLEAR_HEAD(svr_newjobs);
	CLEAR_HEAD(svr_allresvs);
	CLEAR_HEAD(svr_newresvs);
	CLEAR_HEAD(svr_deferred_req);
	CLEAR_HEAD(svr_unlicensedjobs);

	strcpy(jobfile_full, path_jobs);
	strcat(jobfile_full, jobfile);

	if (chk_save_file(jobfile_full) != 0) {
		sprintf(log_buffer, "Error opening jobfile=%s", jobfile);
		log_err(-1, __func__, log_buffer);
		goto fatal_exit;
	}

	if ((jobp=job_recov_fs(jobfile, RECOV_SUBJOB)) == NULL) {
		sprintf(log_buffer, "Failed to recreate job in jobfile=%s", jobfile);
		log_err(-1, __func__, log_buffer);
		goto fatal_exit;
	}

	/* now delete the temp job file that was created by job_save_fs in server code
	 * jobs are in database now, no need to keep in filesystem
	 */
	unlink(jobfile_full);

	if (in_server)
		append_link(&svr_alljobs, &jobp->ji_alljobs, jobp);


	/* select attributes/resources to send based on move type */

	if (move_type == MOVE_TYPE_Exec) {
		resc_access_perm = ATR_DFLAG_MOM;
		encode_type = ATR_ENCODE_MOM;
		cntype = ToServerDIS;
	} else {
		resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR |
			ATR_DFLAG_MGWR | ATR_DFLAG_SvRD;
		encode_type = ATR_ENCODE_SVR;
		svr_dequejob(jobp);
	}

	CLEAR_HEAD(attrl);
	pattr = jobp->ji_wattr;
	for (i=0; i < (int)JOB_ATR_LAST; i++) {
		if ((job_attr_def+i)->at_flags & resc_access_perm) {
			(void)(job_attr_def+i)->at_encode(pattr+i, &attrl,
				(job_attr_def+i)->at_name, NULL,
				encode_type, NULL);
		}
	}
	attrl_fixlink(&attrl);

	/* script name is passed from parent */

	/* get host name */
	pbs_loadconf(0);
	addr.s_addr = htonl(hostaddr);
	hp = gethostbyaddr((void *)&addr, sizeof(struct in_addr), AF_INET);
	if (hp == NULL) {
		sprintf(log_buffer, "%s: h_errno=%d",
			inet_ntoa(addr), h_errno);
		log_err(-1, __func__, log_buffer);
	}
	else {
		/* read any credential file */
		(void)get_credential(hp->h_name, jobp,  PBS_GC_BATREQ,
			&credbuf, &credlen);
	}
	/* save the job id for when after we purge the job */

	(void)strcpy(job_id, jobp->ji_qs.ji_jobid);

	con = -1;

	DIS_tcparray_init();

	for (i=0; i<RETRY; i++) {

		pbs_errno = 0;
		/* connect to receiving server with retries */

		if (i > 0) {	/* recycle after an error */
			if (con >= 0)
				svr_disconnect(con);
			if (should_retry_route(pbs_errno) == -1) {
				goto fatal_exit;	/* fatal error, don't retry */
			}
			sleep(1<<i);
		}
		if ((con = svr_connect(hostaddr, port, 0, cntype, prot)) ==
			PBS_NET_RC_FATAL) {
			(void)sprintf(log_buffer, "send_job failed to %lx port %d",
				hostaddr, port);
			log_err(pbs_errno, __func__, log_buffer);
			goto fatal_exit;
		} else if (con == PBS_NET_RC_RETRY) {
			pbs_errno = WSAECONNREFUSED;	/* should retry */
			continue;
		}
		/*
		 * if the job is substate JOB_SUBSTATE_TRNOUTCM which means
		 * we are recovering after being down or a late failure, we
		 * just want to send the "read-to-commit/commit"
		 */


		if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) {

			if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) {
				jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT;
			}

			pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl;
			destin = jobp->ji_qs.ji_destin;

			if (PBSD_queuejob(con, jobp->ji_qs.ji_jobid, destin, pqjatr, NULL, prot, NULL)== 0) {
				if (pbs_errno == PBSE_JOBEXIST && move_type == MOVE_TYPE_Exec) {
					/* already running, mark it so */
					log_event(PBSEVENT_ERROR,
						PBS_EVENTCLASS_JOB, LOG_INFO,
						jobp->ji_qs.ji_jobid, "Mom reports job already running");
					goto ok_exit;

				} else if ((pbs_errno == PBSE_HOOKERROR) ||
					(pbs_errno == PBSE_HOOK_REJECT)  ||
					(pbs_errno == PBSE_HOOK_REJECT_RERUNJOB)  ||
					(pbs_errno == PBSE_HOOK_REJECT_DELETEJOB)) {
					char		name_buf[MAXPATHLEN+1];
					int		rfd;
					int		len;
					char		*reject_msg;
					int		err;

					err = pbs_errno;

					reject_msg = pbs_geterrmsg(con);
					(void)snprintf(log_buffer, sizeof(log_buffer),
						"send of job to %s failed error = %d reject_msg=%s",
						destin, err,
						reject_msg?reject_msg:"");
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
						LOG_INFO, jobp->ji_qs.ji_jobid,
						log_buffer);

					(void)strcpy(name_buf, path_hooks_workdir);
					(void)strcat(name_buf, jobp->ji_qs.ji_jobid);
					(void)strcat(name_buf, HOOK_REJECT_SUFFIX);

					if ((reject_msg != NULL) &&
						(reject_msg[0] != '\0')) {

						if ((rfd = open(name_buf,
							O_RDWR|O_CREAT|O_TRUNC, 0600)) == -1) {
							snprintf(log_buffer,
								sizeof(log_buffer),
								"open of reject file %s failed: errno %d",
								name_buf, errno);
							log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
						} else {
							secure_file(name_buf, "Administrators",
								READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
							setmode(rfd, O_BINARY);
							len = strlen(reject_msg)+1;
							/* write also trailing null char */
							if (write(rfd, reject_msg, len) != len) {
								snprintf(log_buffer,
									sizeof(log_buffer),
									"write to file %s incomplete: errno %d", name_buf, errno);
								log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
							}
							close(rfd);
						}
					}

					if (err == PBSE_HOOKERROR)
						exit(SEND_JOB_HOOKERR);
					if (err == PBSE_HOOK_REJECT)
						exit(SEND_JOB_HOOK_REJECT);
					if (err == PBSE_HOOK_REJECT_RERUNJOB)
						exit(SEND_JOB_HOOK_REJECT_RERUNJOB);
					if (err == PBSE_HOOK_REJECT_DELETEJOB)
						exit(SEND_JOB_HOOK_REJECT_DELETEJOB);
				} else {
					(void)sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno);
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
					continue;
				}
			}

			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				if (PBSD_jscript(con, script_name, prot, NULL) != 0)
					continue;
			}

			if (credlen > 0) {
				int     ret;

				ret = PBSD_jcred(con,
					jobp->ji_extended.ji_ext.ji_credtype,
					credbuf, credlen, prot, NULL);
				if ((ret == 0) || (i == (RETRY - 1)))
					free(credbuf);	/* free credbuf if credbuf is sent successfully OR */
				/* at the end of all retry attempts */
				if (ret != 0)
					continue;
			}

			if ((move_type == MOVE_TYPE_Exec) &&
				(jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) &&
				(hostaddr !=  pbs_server_addr)) {
				/* send files created on prior run */
				if ((move_job_file(con, jobp, StdOut, prot) != 0) ||
					(move_job_file(con, jobp, StdErr, prot) != 0) ||
					(move_job_file(con, jobp, Chkpt, prot) != 0))
					continue;
			}

			jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM;
		}

		if (PBSD_rdytocmt(con, job_id, prot, NULL) != 0)
			continue;

		if (PBSD_commit(con, job_id, prot, NULL) != 0)
			goto fatal_exit;
		goto ok_exit;	/* This child process is all done */
	}
	if (con >= 0)
		svr_disconnect(con);
	/*
	 * If connection is actively refused by the execution node(or mother superior) OR
	 * the execution node(or mother superior) is rejecting request with error
	 * PBSE_BADHOST(failing to authorize server host), the node should be marked down.
	 */
	if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == WSAECONNREFUSED || pbs_errno == PBSE_BADHOST)) {
		i = SEND_JOB_NODEDW;
	} else if (should_retry_route(pbs_errno) == -1) {
		i = SEND_JOB_FATAL;
	} else {
		i = SEND_JOB_RETRY;
	}
	(void)sprintf(log_buffer, "send_job failed with error %d", pbs_errno);
	log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_NOTICE,
		jobp->ji_qs.ji_jobid, log_buffer);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(i);

fatal_exit:
	if (con >= 0)
		svr_disconnect(con);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(SEND_JOB_FATAL);

ok_exit:
	if (con >= 0)
		svr_disconnect(con);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(SEND_JOB_OK);
}
Exemplo n.º 13
0
/**
 *
 * @brief
 * 		Send a job over the network to some other server or MOM.
 * @par
 * 		Under Linux/Unix, this starts a child process to do the work.
 *		Connect to the destination host and port,
 * 		and go through the protocol to transfer the job.
 * 		Signals are blocked.
 *
 * @param[in]	jobp	-	pointer to the job being sent.
 * @param[in]	hostaddr	-	the address of host to send job to, host byte order.
 * @param[in]	port	-	the destination port, host byte order
 * @param[in]	move_type	-	the type of move (e.g. MOVE_TYPE_exec)
 * @param[in]	post_func	-	the function to execute once the child process
 *								sending job completes (Linux/Unix only)
 * @param[in]	data	-	input data to 'post_func'
 *
 * @return	int
 * @retval	2	parent	: success (child forked)
 * @retval	-1	parent	: on failure (pbs_errno set to error number)
 * @retval	SEND_JOB_OK	child	: 0 success, job sent
 * @retval	SEND_JOB_FATAL	child	: 1 permenent failure or rejection,
 * @retval	SEND_JOB_RETRY	child	: 2 failed but try again
 * @retval	SEND_JOB_NODEDW child	: 3 execution node down, retry different node
 */
int
send_job(job *jobp, pbs_net_t hostaddr, int port, int move_type,
	void (*post_func)(struct work_task *), struct batch_request *preq)
{

#ifdef WIN32
	char	cmdline[80];
	pio_handles	pio;
	char	buf[4096];
	struct work_task *ptask;
	int	newstate;
	int	newsub;
	long	tempval;
	char	script_name[MAXPATHLEN+1];
	int 		gridproxy_cred = 0;

#ifdef  PBS_CRED_GRIDPROXY
	if (jobp->ji_extended.ji_ext.ji_credtype == PBS_CREDTYPE_GRIDPROXY)
		gridproxy_cred = 1;
#endif

	if (pbs_conf.pbs_use_tcp == 1 && move_type == MOVE_TYPE_Exec && gridproxy_cred == 0) {
		return (send_job_exec(jobp, hostaddr, port, preq));
	}

	sprintf(cmdline, "%s/sbin/pbs_send_job", pbs_conf.pbs_exec_path);

	if (win_popen(cmdline, "w", &pio, NULL) == 0) {
		errno = GetLastError();
		pbs_errno = errno;
		(void)sprintf(log_buffer, "executing %s for job %s failed errno=%d", cmdline, jobp->ji_qs.ji_jobid, errno);
		log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_ERR,
			jobp->ji_qs.ji_jobid, log_buffer);
		/* force re-eval of job state out of Transit */
		svr_evaljobstate(jobp, &newstate, &newsub, 1);
		svr_setjobstate(jobp, newstate, newsub);

		win_pclose(&pio);
		return (-1);
	}

	ptask = set_task(WORK_Deferred_Child, (long)pio.pi.hProcess, post_func, preq);
	if (!ptask) {
		log_err(errno, __func__, msg_err_malloc);
		errno = ENOMEM;
		pbs_errno = errno;
		win_pclose(&pio);
		/* force re-eval of job state out of Transit */
		svr_evaljobstate(jobp, &newstate, &newsub, 1);
		svr_setjobstate(jobp, newstate, newsub);
		return (-1);
	} else {
		ptask->wt_parm2 = jobp;
		append_link(&((job *)jobp)->ji_svrtask, &ptask->wt_linkobj, ptask);
	}

	script_name[0] = '\0';
	/* if job has a script read it from database */
	if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
		/*
		 * copy the job script from database to a temp file
		 * PBSD_jscript works with a file
		 * delete it at the end of the send
		 */
		if (svr_create_tmp_jobscript(jobp, &script_name) != 0) {
			pbs_errno = PBSE_SYSTEM;
			snprintf(log_buffer, sizeof(log_buffer),
				"Failed to create temporary job script for job %s",
				jobp->ji_qs.ji_jobid);
			log_err(pbs_errno, "send_job", log_buffer);
			win_pclose2(&pio);
			return (-1);
		}
	}

	addpid(pio.pi.hProcess);

	/* our job is to calc eligible time accurately and save it */
	/* on new server, accrue type should be calc afresh */
	/* Note: if job is being sent for execution on mom, then don't calc eligible time */

	if ((jobp->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) &&
		(server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) &&
		(move_type != MOVE_TYPE_Exec)) {
		tempval = ((long)time_now - jobp->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long);
		jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long += tempval;
		jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE;
	}

	/* in windows code, a child process "w32_send_job" handles the send
	 * This needs the job information, so we save using the filesystem
	 * This avoids the child process from having to "connect" to the database again
	 * The file is deleted by the send_job child process when it has done recovering the job
	 */
	job_save_fs(jobp, SAVEJOB_FULLFORCE);	/* so the spawned process can get a fresh copy of job */

	if (*jobp->ji_qs.ji_fileprefix != '\0')
		sprintf(buf, "jobfile=%s%s\n", jobp->ji_qs.ji_fileprefix, JOB_FILE_SUFFIX);
	else
		sprintf(buf, "jobfile=%s%s\n", jobp->ji_qs.ji_jobid, JOB_FILE_SUFFIX);

	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "destaddr=%ld\n", hostaddr);
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "destport=%d\n", port);
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "move_type=%d\n", move_type);
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "in_server=%d\n", is_linked(&svr_alljobs, &jobp->ji_alljobs));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "server_name=%s\n", (server_name?server_name:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "server_host=%s\n", (server_host?server_host:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "server_addr=%ld\n", pbs_server_addr);
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "server_port=%d\n", pbs_server_port_dis);
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "log_file=%s\n", (log_file?log_file:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "path_log=%s\n", (path_log?path_log:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "path_jobs=%s\n", (path_jobs?path_jobs:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "path_spool=%s\n", (path_spool?path_spool:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "path_rescdef=%s\n", (path_rescdef?path_rescdef:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "path_users=%s\n", (path_users?path_users:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "path_hooks_workdir=%s\n",
		(path_hooks_workdir?path_hooks_workdir:""));
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "svr_history_enable=%ld\n", svr_history_enable);
	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "svr_history_duration=%ld\n", svr_history_duration);
	win_pwrite(&pio, buf, strlen(buf));

	if ( (server.sv_attr[SRV_ATR_ssignon_enable].at_flags & \
                                                ATR_VFLAG_SET) && \
             (server.sv_attr[SRV_ATR_ssignon_enable].at_val.at_long == 1) )
		strcpy(buf, "single_signon_password_enable=1\n");
	else
		strcpy(buf, "single_signon_password_enable=0\n");

	win_pwrite(&pio, buf, strlen(buf));

	sprintf(buf, "script_name=%s\n", script_name);
	win_pwrite(&pio, buf, strlen(buf));

	strcpy(buf, "quit\n");
	win_pwrite(&pio, buf, strlen(buf));
	win_pclose2(&pio);	/* closes all handles except the process handle */
	return (2);
#else
	pbs_list_head	 attrl;
	enum conn_type   cntype = ToServerDIS;
	int		 con;
	char		*credbuf = NULL;
	size_t		 credlen = 0;
	char		*destin = jobp->ji_qs.ji_destin;
	int		 encode_type;
	int		 i;
	char		 job_id[PBS_MAXSVRJOBID+1];
	attribute	*pattr;
	pid_t		 pid;
	struct attropl  *pqjatr;      /* list (single) of attropl for quejob */
	char		 script_name[MAXPATHLEN+1];
	struct work_task *ptask;
	struct  hostent *hp;
	struct in_addr   addr;
	long		 tempval;
	int 		gridproxy_cred = 0;
	int 		rpp = 0;

#ifdef  PBS_CRED_GRIDPROXY
	if (jobp->ji_extended.ji_ext.ji_credtype == PBS_CREDTYPE_GRIDPROXY)
		gridproxy_cred = 1;
#endif

	if (pbs_conf.pbs_use_tcp == 1 && move_type == MOVE_TYPE_Exec && gridproxy_cred == 0) {
		return (send_job_exec(jobp, hostaddr, port, preq));
	}

	script_name[0] = '\0';
	/* if job has a script read it from database */
	if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
		/*
		 * copy the job script from database to a temp file
		 * PBSD_jscript works with a file
		 * delete it at the end of the send
		 */
		if (svr_create_tmp_jobscript(jobp, script_name) != 0) {
			pbs_errno = PBSE_SYSTEM;
			snprintf(log_buffer, sizeof(log_buffer),
				"Failed to create temporary job script for job %s",
				jobp->ji_qs.ji_jobid);
			log_err(pbs_errno, "send_job", log_buffer);
			return -1;
		}
	}

	pid = fork();
	if (pid == -1) {	/* Error on fork */
		log_err(errno, __func__, "fork failed\n");
		pbs_errno = PBSE_SYSTEM;
		return -1;
	}

	if (pid != 0) {		/* The parent (main server) */

		ptask = set_task(WORK_Deferred_Child, pid, post_func, preq);
		if (!ptask) {
			log_err(errno, __func__, msg_err_malloc);
			return (-1);
		} else {
			ptask->wt_parm2 = jobp;
			append_link(&((job *)jobp)->ji_svrtask,
				&ptask->wt_linkobj, ptask);
		}
		return 2;
	}

	/*
	 * the child process
	 *
	 * set up signal cather for error return
	 */
	DBPRT(("%s: child started, sending to port %d\n", __func__, port))
	rpp_terminate();

	/* Unprotect child from being killed by kernel */
	daemon_protect(0, PBS_DAEMON_PROTECT_OFF);

#ifdef WIN32
	/* get host name */
	/*
	 * If host address is loopback address then do not resolve with dns
	 * Use "localhost" as the host name.
	 */
	if ((htonl(hostaddr) == loopback_addr->sin_addr.s_addr)) {
		(void)get_credential(LOCALHOST_SHORTNAME, jobp, PBS_GC_BATREQ,
			&credbuf, &credlen);
	} else {
#endif
		addr.s_addr = htonl(hostaddr);
		hp = gethostbyaddr((void *)&addr, sizeof(struct in_addr), AF_INET);
		if (hp == NULL) {
			sprintf(log_buffer, "%s: h_errno=%d",
				inet_ntoa(addr), h_errno);
			log_err(-1, __func__, log_buffer);
		} else {
			/* read any credential file */
			(void)get_credential(hp->h_name, jobp, PBS_GC_BATREQ,
				&credbuf, &credlen);
		}
#ifdef WIN32
	}
#endif

	/* encode job attributes to be moved */

	CLEAR_HEAD(attrl);

	/* select attributes/resources to send based on move type */

	if (move_type == MOVE_TYPE_Exec) {
		resc_access_perm = ATR_DFLAG_MOM;
		encode_type = ATR_ENCODE_MOM;
		cntype = ToServerDIS;
	} else {
		resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR |
			ATR_DFLAG_MGWR | ATR_DFLAG_SvRD;
		encode_type = ATR_ENCODE_SVR;
		svr_dequejob(jobp);	/* clears default resource settings */
	}

	/* our job is to calc eligible time accurately and save it */
	/* on new server, accrue type should be calc afresh */
	/* Note: if job is being sent for execution on mom, then don't calc eligible time */

	if ((jobp->ji_wattr[(int)JOB_ATR_accrue_type].at_val.at_long == JOB_ELIGIBLE) &&
		(server.sv_attr[(int)SRV_ATR_EligibleTimeEnable].at_val.at_long == 1) &&
		(move_type != MOVE_TYPE_Exec)) {
		tempval = ((long)time_now - jobp->ji_wattr[(int)JOB_ATR_sample_starttime].at_val.at_long);
		jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_val.at_long += tempval;
		jobp->ji_wattr[(int)JOB_ATR_eligible_time].at_flags |= ATR_VFLAG_MODCACHE;
	}

	pattr = jobp->ji_wattr;
	for (i=0; i < (int)JOB_ATR_LAST; i++) {
		if ((job_attr_def+i)->at_flags & resc_access_perm) {
			(void)(job_attr_def+i)->at_encode(pattr+i, &attrl,
				(job_attr_def+i)->at_name, (char *)0,
				encode_type, NULL);
		}
	}
	attrl_fixlink(&attrl);


	/* save the job id for when after we purge the job */

	(void)strcpy(job_id, jobp->ji_qs.ji_jobid);

	pbs_errno = 0;
	con = -1;

	for (i=0; i<RETRY; i++) {

		/* connect to receiving server with retries */

		if (i > 0) {	/* recycle after an error */
			if (con >= 0)
				svr_disconnect(con);
			if (should_retry_route(pbs_errno) == -1) {
				/* delete the temp script file */
				unlink(script_name);
				exit(SEND_JOB_FATAL);	/* fatal error, don't retry */
			}
			sleep(1<<i);
		}
		if ((con = svr_connect(hostaddr, port, 0, cntype, rpp)) ==
			PBS_NET_RC_FATAL) {
			(void)sprintf(log_buffer, "send_job failed to %lx port %d",
				hostaddr, port);
			log_err(pbs_errno, __func__, log_buffer);

			/* delete the temp script file */
			unlink(script_name);

			if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == PBSE_BADCRED))
				exit(SEND_JOB_NODEDW);

			exit(SEND_JOB_FATAL);
		} else if (con == PBS_NET_RC_RETRY) {
			pbs_errno = ECONNREFUSED;	/* should retry */
			continue;
		}

		/*
		 * if the job is substate JOB_SUBSTATE_TRNOUTCM which means
		 * we are recovering after being down or a late failure, we
		 * just want to send the commit"
		 */

		if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) {

			if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) {
				jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT;
			}

			pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl;
			if (PBSD_queuejob(con, jobp->ji_qs.ji_jobid, destin,
				pqjatr, (char *)0, rpp, NULL) == 0) {
				if (pbs_errno == PBSE_JOBEXIST &&
					move_type == MOVE_TYPE_Exec) {
					/* already running, mark it so */
					log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_JOB,
						LOG_INFO, jobp->ji_qs.ji_jobid,
						"Mom reports job already running");
					exit(SEND_JOB_OK);
				}
				else if ((pbs_errno == PBSE_HOOKERROR) ||
					(pbs_errno == PBSE_HOOK_REJECT)  ||
					(pbs_errno == PBSE_HOOK_REJECT_RERUNJOB)  ||
					(pbs_errno == PBSE_HOOK_REJECT_DELETEJOB)) {
					char		name_buf[MAXPATHLEN+1];
					int		rfd;
					int		len;
					char		*reject_msg;
					int		err;

					err = pbs_errno;

					reject_msg = pbs_geterrmsg(con);
					(void)sprintf(log_buffer,
						"send of job to %s failed error = %d reject_msg=%s",
						destin, err,
						reject_msg?reject_msg:"");
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
						LOG_INFO, jobp->ji_qs.ji_jobid,
						log_buffer);

					(void)strcpy(name_buf, path_hooks_workdir);
					(void)strcat(name_buf, jobp->ji_qs.ji_jobid);
					(void)strcat(name_buf, HOOK_REJECT_SUFFIX);

					if ((reject_msg != NULL) &&
						(reject_msg[0] != '\0')) {

						if ((rfd = open(name_buf,
							O_RDWR|O_CREAT|O_TRUNC, 0600)) == -1) {
							sprintf(log_buffer,
								"open of reject file %s failed: errno %d",
								name_buf, errno);
							log_event(PBSEVENT_JOB,
								PBS_EVENTCLASS_JOB,
								LOG_INFO, jobp->ji_qs.ji_jobid,
								log_buffer);
						} else {
#ifdef WIN32
							secure_file(name_buf, "Administrators",
								READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
							setmode(rfd, O_BINARY);
#endif
							len = strlen(reject_msg)+1;
							/* write also trailing null char */
							if (write(rfd, reject_msg, len) != len) {
								sprintf(log_buffer,
									"write to file %s incomplete: errno %d", name_buf, errno);
								log_event(PBSEVENT_JOB,
									PBS_EVENTCLASS_JOB,
									LOG_INFO, jobp->ji_qs.ji_jobid,
									log_buffer);
							}
							close(rfd);
						}
					}

					if (err == PBSE_HOOKERROR)
						exit(SEND_JOB_HOOKERR);
					if (err == PBSE_HOOK_REJECT)
						exit(SEND_JOB_HOOK_REJECT);
					if (err == PBSE_HOOK_REJECT_RERUNJOB)
						exit(SEND_JOB_HOOK_REJECT_RERUNJOB);
					if (err == PBSE_HOOK_REJECT_DELETEJOB)
						exit(SEND_JOB_HOOK_REJECT_DELETEJOB);
				}
				else {
					(void)sprintf(log_buffer,
						"send of job to %s failed error = %d",
						destin, pbs_errno);
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
						LOG_INFO, jobp->ji_qs.ji_jobid,
						log_buffer);
					continue;
				}
			}

			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				if (PBSD_jscript(con, script_name, rpp, NULL) != 0)
					continue;
			}

			if (credlen > 0) {
				int	ret;

				ret = PBSD_jcred(con,
					jobp->ji_extended.ji_ext.ji_credtype,
					credbuf, credlen, rpp, NULL);
				if ((ret == 0) || (i == (RETRY - 1)))
					free(credbuf);	/* free credbuf if cred info is sent successfully OR */
				/* at the end of all retry attempts */
				if (ret != 0)
					continue;
			}

			if ((move_type == MOVE_TYPE_Exec) &&
				(jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) &&
				(hostaddr !=  pbs_server_addr)) {
				/* send files created on prior run */
				if ((move_job_file(con, jobp, StdOut, rpp, NULL) != 0) ||
					(move_job_file(con, jobp, StdErr, rpp, NULL) != 0) ||
					(move_job_file(con, jobp, Chkpt, rpp, NULL) != 0))
					continue;
			}

			jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM;
		}

		if (PBSD_rdytocmt(con, job_id, rpp, NULL) != 0)
			continue;

		if (PBSD_commit(con, job_id, rpp, NULL) != 0) {
			/* delete the temp script file */
			unlink(script_name);
			exit(SEND_JOB_FATAL);
		}
		svr_disconnect(con);

		/* delete the temp script file */
		unlink(script_name);

		exit(SEND_JOB_OK);	/* This child process is all done */
	}
	if (con >= 0)
		svr_disconnect(con);
	/*
	 * If connection is actively refused by the execution node(or mother superior) OR
	 * the execution node(or mother superior) is rejecting request with error
	 * PBSE_BADHOST(failing to authorize server host), the node should be marked down.
	 */
	if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == ECONNREFUSED  || pbs_errno == PBSE_BADHOST)) {
		i = SEND_JOB_NODEDW;
	} else if (should_retry_route(pbs_errno) == -1) {
		i = SEND_JOB_FATAL;
	} else {
		i = SEND_JOB_RETRY;
	}
	(void)sprintf(log_buffer, "send_job failed with error %d", pbs_errno);
	log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_NOTICE,
		jobp->ji_qs.ji_jobid, log_buffer);

	/* delete the temp script file */
	unlink(script_name);

	exit(i);
	return -1;		/* NOT REACHED */

#endif /* !WIN32 */
}
Exemplo n.º 14
0
int
main(int argc, char **argv, char **envp) /* qselect */
{
    int c;
    int errflg=0;
    char *errmsg;

#define MAX_OPTARG_LEN 256
#define MAX_RESOURCE_NAME_LEN 256
    char optargout[MAX_OPTARG_LEN+1];
    char resource_name[MAX_RESOURCE_NAME_LEN+1];

    enum batch_op op;
    enum batch_op *pop = &op;

    struct attropl *select_list = 0;

    static char destination[PBS_MAXQUEUENAME+1] = "";
    char server_out[MAXSERVERNAME] = "";

    char *queue_name_out;
    char *server_name_out;

    int connect;
    char **selectjob_list;
    char *res_pos;
    char *pc;
    time_t after;
    char a_value[80];
    char extendopts[4] = "";
    char *attr_time = NULL;
    struct ecl_attribute_errors *err_list;
    char *resc_time = NULL;

#define GETOPT_ARGS "a:A:c:h:HJl:N:p:q:r:s:t:Tu:xP:"

    /*test for real deal or just version and exit*/

    execution_mode(argc, argv);

#ifdef WIN32
    winsock_init();
#endif

    while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF)
        switch (c) {
        case 'a':
            check_op(optarg, pop, optargout);
            if ((after = cvtdate(optargout)) < 0) {
                fprintf(stderr, "qselect: illegal -a value\n");
                errflg++;
                break;
            }
            sprintf(a_value, "%ld", (long)after);
            set_attrop(&select_list, ATTR_a, NULL, a_value, op);
            break;
        case 'c':
            check_op(optarg, pop, optargout);
            pc = optargout;
            while (isspace((int)*pc)) pc++;
            if (strlen(pc) == 0) {
                fprintf(stderr, "qselect: illegal -c value\n");
                errflg++;
                break;
            }
            set_attrop(&select_list, ATTR_c, NULL, optargout, op);
            break;
        case 'h':
            check_op(optarg, pop, optargout);
            pc = optargout;
            while (isspace((int)*pc)) pc++;
            set_attrop(&select_list, ATTR_h, NULL, optargout, op);
            break;
        case 'J':
            op = EQ;
            set_attrop(&select_list, ATTR_array, NULL, "True", op);
            break;
        case 'l':
            res_pos = optarg;
            while (*res_pos != '\0') {
                if (check_res_op(res_pos, resource_name, pop, optargout, &res_pos) != 0) {
                    errflg++;
                    break;
                }
                set_attrop(&select_list, ATTR_l, resource_name, optargout, op);
            }
            break;
        case 'p':
            check_op(optarg, pop, optargout);
            set_attrop(&select_list, ATTR_p, NULL, optargout, op);
            break;
        case 'q':
            strcpy(destination, optarg);
            check_op(optarg, pop, optargout);
            set_attrop(&select_list, ATTR_q, NULL, optargout, op);
            break;
        case 'r':
            op = EQ;
            pc = optarg;
            while (isspace((int)(*pc))) pc++;
            if (*pc != 'y' && *pc != 'n') { /* qselect specific check - stays */
                fprintf(stderr, "qselect: illegal -r value\n");
                errflg++;
                break;
            }
            set_attrop(&select_list, ATTR_r, NULL, pc, op);
            break;
        case 's':
            check_op(optarg, pop, optargout);
            pc = optargout;
            while (isspace((int)(*pc))) pc++;
            set_attrop(&select_list, ATTR_state, NULL, optargout, op);
            break;
        case 't':
            if (get_tsubopt(*optarg, &attr_time, &resc_time)) {
                fprintf(stderr, "qselect: illegal -t value\n");
                errflg++;
                break;
            }
            /* 1st character possess the subopt, so send optarg++ */
            optarg ++;
            check_op(optarg, pop, optargout);
            if ((after = cvtdate(optargout)) < 0) {
                fprintf(stderr, "qselect: illegal -t value\n");
                errflg++;
                break;
            }
            sprintf(a_value, "%ld", (long)after);
            set_attrop(&select_list, attr_time, resc_time, a_value, op);
            break;
        case 'T':
            if (strchr(extendopts, (int)'T') == NULL)
                (void)strcat(extendopts, "T");
            break;
        case 'x':
            if (strchr(extendopts, (int)'x') == NULL)
                (void)strcat(extendopts, "x");
            break;
        case 'H':
            op = EQ;
            if (strchr(extendopts, (int)'x') == NULL)
                (void)strcat(extendopts, "x");
            set_attrop(&select_list, ATTR_state, NULL, "FM", op);
            break;
        case 'u':
            op = EQ;
            set_attrop(&select_list, ATTR_u, NULL, optarg, op);
            break;
        case 'A':
            op = EQ;
            set_attrop(&select_list, ATTR_A, NULL, optarg, op);
            break;
        case 'P':
            op = EQ;
            set_attrop(&select_list, ATTR_project, NULL, optarg, op);
            break;
        case 'N':
            op = EQ;
            set_attrop(&select_list, ATTR_N, NULL, optarg, op);
            break;
        default :
            errflg++;
        }

    if (errflg || (optind < argc)) {
        print_usage();
        exit(2);
    }

    if (notNULL(destination)) {
        if (parse_destination_id(destination, &queue_name_out, &server_name_out)) {
            fprintf(stderr, "qselect: illegally formed destination: %s\n", destination);
            exit(2);
        } else {
            if (notNULL(server_name_out)) {
                strcpy(server_out, server_name_out);
            }
        }
    }

    /*perform needed security library initializations (including none)*/

    if (CS_client_init() != CS_SUCCESS) {
        fprintf(stderr, "qselect: unable to initialize security library.\n");
        exit(2);
    }

    connect = cnt2server(server_out);
    if (connect <= 0) {
        fprintf(stderr, "qselect: cannot connect to server %s (errno=%d)\n",
                pbs_server, pbs_errno);

        /*cleanup security library initializations before exiting*/
        CS_close_app();

        exit(pbs_errno);
    }

    if (extendopts[0] == '\0')
        selectjob_list = pbs_selectjob(connect, select_list, NULL);
    else
        selectjob_list = pbs_selectjob(connect, select_list, extendopts);
    if (selectjob_list == NULL) {
        if ((err_list = pbs_get_attributes_in_error(connect)))
            handle_attribute_errors(err_list);

        if (pbs_errno != PBSE_NONE) {
            errmsg = pbs_geterrmsg(connect);
            if (errmsg != NULL) {
                fprintf(stderr, "qselect: %s\n", errmsg);
            } else {
                fprintf(stderr, "qselect: Error (%d) selecting jobs\n", pbs_errno);
            }

            /*
             * If the server is not configured for history jobs i.e.
             * job_history_enable svr attr is unset/set to FALSE, qselect
             * command with -x/-H option is being used, then pbs_selectjob()
             * will return PBSE_JOBHISTNOTSET error code. But command will
             * exit with exit code '0'after printing the corresponding error
             * message. i.e. "job_history_enable is set to false"
             */
            if (pbs_errno == PBSE_JOBHISTNOTSET)
                pbs_errno = 0;

            /*cleanup security library initializations before exiting*/
            CS_close_app();
            exit(pbs_errno);
        }
    } else {   /* got some jobs ids */
        int i = 0;
        while (selectjob_list[i] != NULL) {
            printf("%s\n", selectjob_list[i++]);
        }
        free(selectjob_list);
    }
    pbs_disconnect(connect);

    /*cleanup security library initializations before exiting*/
    CS_close_app();

    exit(0);
}
Exemplo n.º 15
0
/**
 * @brief
 *	The main function in C - entry point
 *
 * @param[in]  argc - argument count
 * @param[in]  argv - pointer to argument array
 * @param[in]  envp - pointer to environment values
 *
 * @return  int
 * @retval  0 - success
 * @retval  !0 - error
 */
int
main(int argc, char *argv[], char *envp[])
{
	int errflg;			/* command line option error */
	int connect;			/* return from pbs_connect */
	char *errmsg;			/* return from pbs_geterrmsg */
	char destbuf[256];		/* buffer for option server */
	struct attrl *attrib;		/* the attrib list */
	char *new_resvname;		/* the name returned from pbs_submit_resv */
	struct ecl_attribute_errors *err_list;
	char *interactive = NULL;

	/*test for real deal or just version and exit*/

	execution_mode(argc, argv);

#ifdef WIN32
	winsock_init();
#endif

	destbuf[0] = '\0';
	errflg = process_opts(argc, argv, &attrib, destbuf); /* get cmdline options */

	if (errflg || ((optind+1) < argc) || argc == 1) {
		print_usage();
		exit(2);
	}

	/* Get any required environment variables needing to be sent. */
	if (! set_resv_env(envp)) {
		fprintf(stderr, "pbs_rsub: can't send environment with the reservation\n");
		exit(3);
	}

	/*perform needed security library initializations (including none)*/

	if (CS_client_init() != CS_SUCCESS) {
		fprintf(stderr, "pbs_rsub: unable to initialize security library.\n");
		exit(1);
	}

	/* Connect to the server */
	connect = cnt2server(destbuf);
	if (connect <= 0) {
		fprintf(stderr, "pbs_rsub: cannot connect to server %s (errno=%d)\n",
			pbs_server, pbs_errno);
		CS_close_app();
		exit(pbs_errno);
	}

	if (qmoveflg == TRUE) {
		qmoveflg = FALSE;
		interactive = get_attr(attrib, ATTR_inter, NULL);
		if (interactive == NULL) {
			set_attr(&attrib, ATTR_inter, DEFAULT_INTERACTIVE);
		} else {
			if (atoi(interactive) > -1) {
				fprintf(stderr, "pbs_rsub: -I <timeout> value must be negative when used with -Wqmove option.\n");
				CS_close_app();
				exit(2);
			}
		}
		errflg = cnvrt_proc_attrib(connect, &attrib, destbuf);
		if (errflg) {
			fprintf(stderr, "pbs_rsub: can't make a reservation with the qmove option\n");
			CS_close_app();
			exit(2);
		}
	}

	pbs_errno = 0;
	new_resvname = pbs_submit_resv(connect, (struct attropl *)attrib, NULL);
	if (new_resvname == NULL) {
		if ((err_list = pbs_get_attributes_in_error(connect)))
			handle_attribute_errors(err_list);

		errmsg = pbs_geterrmsg(connect);
		if (errmsg != NULL) {
			fprintf(stderr, "pbs_rsub: %s\n", errmsg);
		}
		else
			fprintf(stderr, "pbs_rsub: Error (%d) submitting reservation\n", pbs_errno);
		CS_close_app();
		exit(pbs_errno);
	} else {
		printf("%s\n", new_resvname);
		free(new_resvname);
	}

	/* Disconnet from the server. */
	pbs_disconnect(connect);

	CS_close_app();
	exit(0);
}
Exemplo n.º 16
0
int main(

  int    argc,
  char **argv)

  {
  int c;
  int errflg = 0;
  char *errmsg;

#define MAX_OPTARG_LEN 256
#define MAX_RESOURCE_NAME_LEN 256
  char optargout[MAX_OPTARG_LEN+1];
  char resource_name[MAX_RESOURCE_NAME_LEN+1];

  enum batch_op op;
  enum batch_op *pop = &op;

  struct attropl *select_list = 0;

  static char destination[PBS_MAXROUTEDEST+1] = "";
  char server_out[MAXSERVERNAME] = "";

  char *queue_name_out;
  char *server_name_out;

  int connect;
  char **selectjob_list;
  char *res_pos;
  char *pc;
  int u_cnt, o_cnt, s_cnt, n_cnt;
  time_t after;
  char a_value[80];
  int exec_only = 0;

  if (getenv("PBS_QSTAT_EXECONLY") != NULL)
    exec_only = 1;

#define GETOPT_ARGS "a:A:ec:h:l:N:p:q:r:s:u:"

  while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF)
    switch (c)
      {

      case 'a':
        check_op(optarg, pop, optargout);

        if ((after = cvtdate(optargout)) < 0)
          {
          fprintf(stderr, "qselect: illegal -a value\n");
          errflg++;
          break;
          }

        sprintf(a_value, "%ld", (long)after);

        set_attrop(&select_list, ATTR_a, NULL, a_value, op);
        break;

      case 'e':
        exec_only = 1;
        break;

      case 'c':
        check_op(optarg, pop, optargout);
        pc = optargout;

        while (isspace((int)*pc)) pc++;

        if (strlen(pc) == 0)
          {
          fprintf(stderr, "qselect: illegal -c value\n");
          errflg++;
          break;
          }

        if (strcmp(pc, "u") == 0)
          {
          if ((op != EQ) && (op != NE))
            {
            fprintf(stderr, "qselect: illegal -c value\n");
            errflg++;
            break;
            }
          }
        else if ((strcmp(pc, "n") != 0) &&
                 (strcmp(pc, "s") != 0) &&
                 (strcmp(pc, "c") != 0))
          {
          if (strncmp(pc, "c=", 2) != 0)
            {
            fprintf(stderr, "qselect: illegal -c value\n");
            errflg++;
            break;
            }

          pc += 2;

          if (strlen(pc) == 0)
            {
            fprintf(stderr, "qselect: illegal -c value\n");
            errflg++;
            break;
            }

          while (*pc != '\0')
            {
            if (!isdigit((int)*pc))
              {
              fprintf(stderr, "qselect: illegal -c value\n");
              errflg++;
              break;
              }

            pc++;
            }
          }

        set_attrop(&select_list, ATTR_c, NULL, optargout, op);

        break;

      case 'h':
        check_op(optarg, pop, optargout);
        pc = optargout;

        while (isspace((int)*pc)) pc++;

        if (strlen(pc) == 0)
          {
          fprintf(stderr, "qselect: illegal -h value\n");
          errflg++;
          break;
          }

        u_cnt = o_cnt = s_cnt = n_cnt = 0;

        while (*pc)
          {
          if (*pc == 'u')
            u_cnt++;
          else if (*pc == 'o')
            o_cnt++;
          else if (*pc == 's')
            s_cnt++;
          else if (*pc == 'n')
            n_cnt++;
          else
            {
            fprintf(stderr, "qselect: illegal -h value\n");
            errflg++;
            break;
            }

          pc++;
          }

        if (n_cnt && (u_cnt + o_cnt + s_cnt))
          {
          fprintf(stderr, "qselect: illegal -h value\n");
          errflg++;
          break;
          }

        set_attrop(&select_list, ATTR_h, NULL, optargout, op);

        break;

      case 'l':
        res_pos = optarg;

        while (*res_pos != '\0')
          {
          if (check_res_op(res_pos, resource_name, pop, optargout, &res_pos) != 0)
            {
            errflg++;
            break;
            }

          set_attrop(&select_list, ATTR_l, resource_name, optargout, op);
          }

        break;

      case 'p':
        check_op(optarg, pop, optargout);
        set_attrop(&select_list, ATTR_p, NULL, optargout, op);
        break;

      case 'q':
        strncpy(destination, optarg, PBS_MAXROUTEDEST);
        check_op(optarg, pop, optargout);
        set_attrop(&select_list, ATTR_q, NULL, optargout, op);
        break;

      case 'r':
        op = EQ;
        pc = optarg;

        while (isspace((int)(*pc))) pc++;

        if (strlen(pc) != 1)
          {
          fprintf(stderr, "qsub: illegal -r value\n");
          errflg++;
          break;
          }

        if (*pc != 'y' && *pc != 'n')
          {
          fprintf(stderr, "qsub: illegal -r value\n");
          errflg++;
          break;
          }

        set_attrop(&select_list, ATTR_r, NULL, pc, op);

        break;

      case 's':
        check_op(optarg, pop, optargout);
        pc = optargout;

        while (isspace((int)(*pc))) pc++;

        if (strlen(optarg) == 0)
          {
          fprintf(stderr, "qselect: illegal -s value\n");
          errflg++;
          break;
          }

        while (*pc)
          {
          if (*pc != 'C' && *pc != 'E' && *pc != 'H' && 
              *pc != 'Q' && *pc != 'R' && *pc != 'T' && 
              *pc != 'W')
            {
            fprintf(stderr, "qselect: illegal -s value\n");
            errflg++;
            break;
            }

          pc++;
          }

        set_attrop(&select_list, ATTR_state, NULL, optargout, op);

        break;

      case 'u':
        op = EQ;

        if (parse_at_list(optarg, FALSE, FALSE))
          {
          fprintf(stderr, "qselect: illegal -u value\n");
          errflg++;
          break;
          }

        set_attrop(&select_list, ATTR_u, NULL, optarg, op);

        break;

      case 'A':
        op = EQ;
        set_attrop(&select_list, ATTR_A, NULL, optarg, op);
        break;

      case 'N':
        op = EQ;
        set_attrop(&select_list, ATTR_N, NULL, optarg, op);
        break;

      default :
        errflg++;
      }

  if (errflg || (optind < argc))
    {
    static char usage[] = "usage: qselect \
                          [-a [op]date_time] [-A account_string] [-e] [-c [op]interval] \n\
                          [-h hold_list] [-l resource_list] [-N name] [-p [op]priority] \n\
                          [-q destination] [-r y|n] [-s states] [-u user_name]\n";
    fprintf(stderr,"%s", usage);
    exit(2);
    }

  if (notNULL(destination))
    {
    if (parse_destination_id(destination, &queue_name_out, &server_name_out))
      {
      fprintf(stderr, "qselect: illegally formed destination: %s\n", destination);
      exit(2);
      }
    else
      {
      if (notNULL(server_name_out))
        {
        strcpy(server_out, server_name_out);
        }
      }
    }

  connect = cnt2server(server_out);

  if (connect <= 0)
    {
    fprintf(stderr, "qselect: cannot connect to server %s (errno=%d) %s\n",
            pbs_server, pbs_errno, pbs_strerror(pbs_errno));
    exit(pbs_errno);
    }

  selectjob_list = pbs_selectjob(connect, select_list, exec_only ? EXECQUEONLY : NULL);

  if (selectjob_list == NULL)
    {
    if (pbs_errno != PBSE_NONE)
      {
      errmsg = pbs_geterrmsg(connect);

      if (errmsg != NULL)
        {
        fprintf(stderr, "qselect: %s\n", errmsg);
        }
      else
        {
        fprintf(stderr, "qselect: Error (%d - %s) selecting jobs\n",
                pbs_errno,
                pbs_strerror(pbs_errno));
        }

      exit(pbs_errno);
      }
    }
  else     /* got some jobs ids */
    {
    int i = 0;

    while (selectjob_list[i] != NULL)
      {
      printf("%s\n", selectjob_list[i++]);
      }

    free(selectjob_list);
    }

  pbs_disconnect(connect);

  exit(0);
  }
Exemplo n.º 17
0
int
main(int argc, char **argv, char **envp) /* pbs_release_nodes */
{
	int c;
	int errflg=0;
	int any_failed=0;

	char job_id[PBS_MAXCLTJOBID];       /* from the command line */
	char job_id_out[PBS_MAXCLTJOBID];
	char server_out[MAXSERVERNAME];
	char rmt_server[MAXSERVERNAME];
	int  len;
	char *node_list = NULL;
	int connect;
	int stat=0;
	int k;
	int all_opt = 0;

#define GETOPT_ARGS "j:a"

	/*test for real deal or just version and exit*/
	execution_mode(argc, argv);

#ifdef WIN32
	winsock_init();
#endif

	job_id[0] = '\0';
	while ((c = getopt(argc, argv, GETOPT_ARGS)) != EOF) {
		switch (c) {
			case 'j':
				strncpy(job_id, optarg, PBS_MAXCLTJOBID);
				job_id[PBS_MAXCLTJOBID-1] = '\0';
				break;
			case 'a':
				all_opt = 1;
				break;
			default :
				errflg++;
		}
	}
	if (job_id[0] == '\0') {
		char *jid;
		jid = getenv("PBS_JOBID");
		strncpy(job_id, jid?jid:"", PBS_MAXCLTJOBID);
		job_id[PBS_MAXCLTJOBID-1] = '\0';
	}

	if (errflg ||
		((optind == argc) && !all_opt) ||
		((optind != argc) && all_opt) ) {
		fprintf(stderr, USAGE);
		fprintf(stderr, USAGE2);
		fprintf(stderr, USAGE3);
		exit(2);
	}

	if (job_id[0] == '\0') {
		fprintf(stderr, "pbs_release_nodes: No jobid given\n");
		exit(2);
	}

	/*perform needed security library initializations (including none)*/

	if (CS_client_init() != CS_SUCCESS) {
		fprintf(stderr, "pbs_release_nodes: unable to initialize security library.\n");
		exit(2);
	}

	len = 0;
	for (k = optind; k < argc; k++) {
		len += (strlen(argv[k]) + 1);	/* +1 for space */
	}

	node_list = (char *)malloc(len + 1);
	if (node_list == NULL) {
		fprintf(stderr, "failed to malloc to store data (error %d)", errno);
		exit(2);
	}
	node_list[0] = '\0';

	for (k = optind; k < argc; k++) {
		if (k != optind)
			strcat(node_list, "+");
		strcat(node_list, argv[k]);
	}
	if (get_server(job_id, job_id_out, server_out)) {
		fprintf(stderr, "pbs_release_nodes: illegally formed job identifier: %s\n", job_id);
		free(node_list);
		exit(2);
	}

	pbs_errno = 0;
	stat = 0;
	while(1) {
		connect = cnt2server(server_out);
		if (connect <= 0) {
			fprintf(stderr,
				"pbs_release_nodes: cannot connect to server %s (errno=%d)\n",
							pbs_server, pbs_errno);
			break;
		}

		stat = pbs_relnodesjob(connect, job_id_out, node_list, NULL);
		if (stat && (pbs_errno == PBSE_UNKJOBID)) {
			if (locate_job(job_id_out, server_out, rmt_server)) {
				/*
				 * job located at a different server
				 * retry connect on the new server
				 */
				pbs_disconnect(connect);
				strcpy(server_out, rmt_server);
			} else {
				prt_job_err("pbs_release_nodes", connect, job_id_out);
				break;
			}
		} else {
			char *info_msg;

			if (stat && (pbs_errno != PBSE_UNKJOBID)) {
				prt_job_err("pbs_release_nodes", connect, "");
			} else if ((info_msg = pbs_geterrmsg(connect)) != NULL) {
				/* print potential warning message */
				printf("pbs_release_nodes: %s\n", info_msg);
			}
			break;
		}
	}
	any_failed = pbs_errno;

	pbs_disconnect(connect);

	/*cleanup security library initializations before exiting*/
	CS_close_app();

	exit(any_failed);
}