Ejemplo n.º 1
0
/**
 * @brief
 *		Function to migrate filesystem data to database.
 * 		Reads serverdb, scheddb, job files, node, nodestate, queue, resv information
 * 		from the filesystem and save them into the database. All the information is
 * 		recovered and saved into the database under a single database transaction,
 * 		so any failure rolls back all the updates to the database. If all the updates
 * 		to the database succeed, only then the respective files are deleted from the
 * 		filesystem, else no deletion takes place.
 *
 * @return	Error code
 * @retval	0	: success
 * @retval	-1	: Failure
 *
 */
int
svr_migrate_data_from_fs(void)
{
	int baselen;
	struct dirent *pdirent;
	DIR *dir;
	int had;
	char *job_suffix = JOB_FILE_SUFFIX;
	int job_suf_len = strlen(job_suffix);
	job *pjob = NULL;
	pbs_queue *pque;
	resc_resv *presv;
	char *psuffix;
	int rc;
	int recovered = 0;
	char    basen[MAXPATHLEN+1];
	char	scrfile[MAXPATHLEN+1];
	char	jobfile[MAXPATHLEN+1];
	char	origdir[MAXPATHLEN+1];
	int fd;
	struct stat stbuf;
	char *scrbuf = NULL;
	pbs_db_jobscr_info_t	jobscr;
	pbs_db_obj_info_t		obj;

	path_svrdb_new = build_path(path_priv, PBS_SERVERDB, new_tag);
	path_scheddb = build_path(path_priv, PBS_SCHEDDB, NULL);
	path_scheddb_new = build_path(path_priv, PBS_SCHEDDB, new_tag);
	path_queues = build_path(path_priv, PBS_QUEDIR, suffix_slash);
	path_resvs = build_path(path_priv, PBS_RESVDIR, suffix_slash);
	path_nodes = build_path(path_priv, NODE_DESCRIP, NULL);
	path_nodestate = build_path(path_priv, NODE_STATUS, NULL);

	/*    If not a "create" initialization, recover server db */
	/*    and sched db					  */
	if (chk_save_file(path_svrdb) != 0) {
		fprintf(stderr, "No serverdb found to update to datastore\n");
		return (0);
	}

	if (setup_resc(1) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	init_server_attrs();

	/* start a database transation for the whole recovery */
	if (pbs_db_begin_trx(svr_db_conn, 0, 0) != 0)
		return (-1);

	/* preprocess the nodes file to convert old properties to resources */
	if (setup_nodes_fs(1) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* Open the server database (save file) and read it in */
	if (svr_recov_fs(path_svrdb) == -1) {
		fprintf(stderr, "%s\n", msg_init_baddb);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* save server information to database now */
	if (svr_save_db(&server, SVR_SAVE_NEW) != 0) {
		fprintf(stderr, "Could not save server db\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* now do sched db */
	if (sched_recov_fs(path_scheddb) == -1) {
		fprintf(stderr, "Unable to recover scheddb\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	if (sched_save_db(&scheduler, SVR_SAVE_NEW) != 0) {
		fprintf(stderr, "Could not save scheduler db\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* save current working dir before any chdirs */
	if (getcwd(origdir, MAXPATHLEN) == NULL) {
		fprintf(stderr, "getcwd failed\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	if (chdir(path_queues) != 0) {
		fprintf(stderr, msg_init_chdir, path_queues);
		fprintf(stderr, "\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	had = server.sv_qs.sv_numque;
	server.sv_qs.sv_numque = 0;
	dir = opendir(".");
	if (dir == (DIR *) 0) {
		fprintf(stderr, "%s\n", msg_init_noqueues);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}
	while (errno = 0, (pdirent = readdir(dir)) != (struct dirent *) 0) {
		if (chk_save_file(pdirent->d_name) == 0) {
			if ((pque = que_recov_fs(pdirent->d_name)) !=
				(pbs_queue *) 0) {
				/* que_recov increments sv_numque */
				fprintf(stderr, msg_init_recovque,
					pque->qu_qs.qu_name);
				fprintf(stderr, "\n");
				if (que_save_db(pque, QUE_SAVE_NEW) != 0) {
					fprintf(stderr,
						"Could not save queue info for queue %s\n",
						pque->qu_qs.qu_name);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn,
						PBS_DB_ROLLBACK);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
			}
		}
	}
	if (errno != 0 && errno != ENOENT) {
		fprintf(stderr, "%s\n", msg_init_noqueues);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		(void) closedir(dir);
		chdir(origdir);
		return (-1);
	}
	(void) closedir(dir);
	if (had != server.sv_qs.sv_numque) {
		fprintf(stderr, msg_init_expctq, had, server.sv_qs.sv_numque);
		fprintf(stderr, "\n");
	}

	/* Open and read in node list if one exists */
	if (setup_nodes_fs(0) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	/*
	 * Recover reservations.
	 */
	if (chdir(path_resvs) != 0) {
		fprintf(stderr, msg_init_chdir, path_resvs);
		fprintf(stderr, "\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	dir = opendir(".");
	if (dir == (DIR *) 0) {
		fprintf(stderr, "%s\n", msg_init_noresvs);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}
	while (errno = 0, (pdirent = readdir(dir)) != (struct dirent *) 0) {
		if (chk_save_file(pdirent->d_name) == 0) {
			presv = (resc_resv *)
				job_or_resv_recov_fs(pdirent->d_name,
				RESC_RESV_OBJECT);
			if (presv != (resc_resv *) 0) {
				if (resv_save_db(presv, SAVERESV_NEW) != 0) {
					fprintf(stderr,
						"Could not save resv info for resv %s\n",
						presv->ri_qs.ri_resvID);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn,
						PBS_DB_ROLLBACK);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
			}
		}
	}
	if (errno != 0 && errno != ENOENT) {
		fprintf(stderr, "%s\n", msg_init_noresvs);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		(void) closedir(dir);
		chdir(origdir);
		return (-1);
	}
	(void) closedir(dir);

	/*
	 *   Recover jobs
	 */
	if (chdir(path_jobs) != 0) {
		fprintf(stderr, msg_init_chdir, path_jobs);
		fprintf(stderr, "\n");
		chdir(origdir);
		return (-1);
	}

	had = server.sv_qs.sv_numjobs;
	server.sv_qs.sv_numjobs = 0;
	recovered = 0;
	dir = opendir(".");
	if (dir == (DIR *) 0) {
		if (had == 0) {
			fprintf(stderr, msg_init_nojobs);
		} else {
			fprintf(stderr, msg_init_exptjobs, had, 0);
		}
		fprintf(stderr, "\n");
	} else {
		/* Now, for each job found ... */
		while (errno = 0,
			(pdirent = readdir(dir)) != (struct dirent *) 0) {
			if (chk_save_file(pdirent->d_name) != 0)
				continue;

			/* recover the job */
			baselen = strlen(pdirent->d_name) - job_suf_len;
			psuffix = pdirent->d_name + baselen;
			if (strcmp(psuffix, job_suffix))
				continue;

			if ((pjob = job_recov_fs(pdirent->d_name,  RECOV_SUBJOB)) == NULL) {
				(void)strcpy(basen, pdirent->d_name);
				psuffix = basen + baselen;
				(void)strcpy(psuffix, JOB_BAD_SUFFIX);
				(void)snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s",
					basen);
				log_event(PBSEVENT_SYSTEM,
					PBS_EVENTCLASS_SERVER, LOG_NOTICE,
					msg_daemonname, log_buffer);
				continue;
			}

			if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				/* load the job script file */
				strcpy(scrfile, path_jobs);
#ifndef WIN32
				/* under WIN32, there's already a prefixed '/' */
				(void) strcat(scrfile, "/");
#endif
				strcat(scrfile, pdirent->d_name);
				baselen = strlen(scrfile) - strlen(JOB_FILE_SUFFIX);
				scrfile[baselen] = 0; /* put null char */
				strcat(scrfile, JOB_SCRIPT_SUFFIX);
				rc = 1;
#ifdef WIN32
				if ((fd = open(scrfile, O_BINARY | O_RDONLY)) != -1)
#else
				if ((fd = open(scrfile, O_RDONLY)) != -1)
#endif
				{
					/* load the script */
					if (fstat(fd, &stbuf) == 0) {
						if ((scrbuf = malloc(stbuf.st_size + 1))) {
							if (read(fd, scrbuf, stbuf.st_size) == stbuf.st_size) {
								scrbuf[stbuf.st_size] = '\0'; /* null character */
								rc = 0; /* success loading */
							}
						}
					}
					close(fd);
				}

				if (rc != 0) {
					fprintf(stderr, "Could not recover script file for job %s\n", pjob->ji_qs.ji_jobid);
					(void) strcpy(basen, scrfile);
					psuffix = basen + strlen(scrfile) - strlen(JOB_SCRIPT_SUFFIX);
					(void) strcpy(psuffix, JOB_BAD_SUFFIX);

					(void) strcpy(jobfile, scrfile);
					psuffix = jobfile + strlen(jobfile) - strlen(JOB_SCRIPT_SUFFIX);
					(void) strcpy(psuffix, JOB_FILE_SUFFIX);
#ifdef WIN32
					if (MoveFileEx(jobfile, basen,
						MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == 0) {
						errno = GetLastError();
						snprintf(log_buffer, sizeof(log_buffer), "MoveFileEx(%s, %s) failed!",
							jobfile, basen);
						log_err(errno, "script", log_buffer);

					}
					secure_file(basen, "Administrators",
						READS_MASK | WRITES_MASK | STANDARD_RIGHTS_REQUIRED);
#else
					if (rename(jobfile, basen) == -1) {
						snprintf(log_buffer, sizeof(log_buffer), "error renaming job file %s",
							jobfile);
						log_err(errno, "job_recov", log_buffer);
					}
#endif
					(void) snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s",
						basen);
					log_event(PBSEVENT_SYSTEM,
						PBS_EVENTCLASS_SERVER, LOG_NOTICE,
						msg_daemonname, log_buffer);
					free(scrbuf);
					scrbuf = NULL;
					continue;
				}
			}

			/* now save job first */
			if (job_save_db(pjob, SAVEJOB_NEW) != 0) {
				fprintf(stderr, "Could not save job info for jobid %s\n",
					pjob->ji_qs.ji_jobid);
				if (svr_db_conn->conn_db_err)
					fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
				(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
				(void) closedir(dir);
				chdir(origdir);
				free(scrbuf);
				return (-1);
			}

			if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				/* save job script */
				strcpy(jobscr.ji_jobid, pjob->ji_qs.ji_jobid);
				jobscr.script = scrbuf;
				obj.pbs_db_obj_type = PBS_DB_JOBSCR;
				obj.pbs_db_un.pbs_db_jobscr = &jobscr;
				if (pbs_db_insert_obj(svr_db_conn, &obj) != 0) {
					fprintf(stderr, "Could not save job script for jobid %s\n",
						pjob->ji_qs.ji_jobid);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
					free(scrbuf);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
				free(scrbuf);
				scrbuf = NULL;
			}

			recovered++;
		}
		if (errno != 0 && errno != ENOENT) {
			if (pjob)
				fprintf(stderr, "readdir error for jobid %s\n", pjob->ji_qs.ji_jobid);
			else
				fprintf(stderr, "readdir error\n");
			(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
			free(scrbuf);
			(void) closedir(dir);
			chdir(origdir);
			return (-1);
		}
		(void) closedir(dir);
		if (had != recovered) {
			fprintf(stderr, msg_init_exptjobs, had, recovered);
			fprintf(stderr, "\n");
		}
	}

	if (save_nodes_db(0) != 0) {
		fprintf(stderr, "Could not save nodes\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	if (pbs_db_end_trx(svr_db_conn, PBS_DB_COMMIT) == 0) {
		rm_migrated_files(path_priv);
		chdir(origdir);
		return (0);
	}
	chdir(origdir);
	return -1;
}
Ejemplo n.º 2
0
/**
 * @brief
 * 	main - the initialization and main loop of pbs_daemon
 */
int
main(int argc, char *argv[])
{
	char		jobfile[MAXPATHLEN+1];
	char		jobfile_full[MAXPATHLEN+1];
	pbs_net_t	hostaddr = 0;
	int			port = -1;
	int			move_type = -1;
	pbs_list_head	attrl;
	enum conn_type  cntype = ToServerDIS;
	int		con = -1;
	char		*destin;
	int			encode_type;
	int			i;
	job			*jobp;
	char		 job_id[PBS_MAXSVRJOBID+1];
	attribute	*pattr;
	struct attropl  *pqjatr;      /* list (single) of attropl for quejob */
	char		 script_name[MAXPATHLEN+1];
	int			in_server = -1;
	char		*param_name, *param_val;
	char		buf[4096];
	struct  hostent *hp;
	struct in_addr  addr;
	char            *credbuf = NULL;
	size_t		credlen = 0;
	int 		prot = PROT_TCP;
	/*the real deal or output version and exit?*/

	execution_mode(argc, argv);

	/* If we are not run with real and effective uid of 0, forget it */

	pbs_loadconf(0);

	if (!isAdminPrivilege(getlogin())) {
		fprintf(stderr, "%s: Must be run by root\n", argv[0]);
		exit(SEND_JOB_FATAL);
	}

	/* initialize the pointers in the resource_def array */
	for (i = 0; i < (svr_resc_size - 1); ++i)
		svr_resc_def[i].rs_next = &svr_resc_def[i+1];
	/* last entry is left with null pointer */
	/* set single threaded mode */
	pbs_client_thread_set_single_threaded_mode();
	/* disable attribute verification */
	set_no_attribute_verification();

	/* initialize the thread context */
	if (pbs_client_thread_init_thread_context() != 0) {
		fprintf(stderr, "%s: Unable to initialize thread context\n",
			argv[0]);
		exit(SEND_JOB_FATAL);
	}

	if(set_msgdaemonname("PBS_send_job")) {
		fprintf(stderr, "Out of memory\n");
		return 1;
	}

	winsock_init();

	connection_init();

	while (fgets(buf, sizeof(buf), stdin) != NULL) {
		buf[strlen(buf)-1] = '\0';	/* gets rid of newline */

		param_name = buf;
		param_val = strchr(buf, '=');
		if (param_val) {
			*param_val = '\0';
			param_val++;
		} else {	/* bad param_val -- skipping */
			break;
		}

		if (strcmp(param_name, "jobfile") == 0) {
			jobfile[0] = '\0';
			strncpy(jobfile, param_val, MAXPATHLEN);
		} else if (strcmp(param_name, "destaddr") == 0) {
			hostaddr = atol(param_val);
		} else if (strcmp(param_name, "destport") == 0) {
			port = atoi(param_val);
		} else if (strcmp(param_name, "move_type") == 0) {
			move_type = atoi(param_val);
		} else if (strcmp(param_name, "in_server") == 0) {
			in_server = atoi(param_val);
		} else if (strcmp(param_name, "server_name") == 0) {
			server_name[0] = '\0';
			strncpy(server_name, param_val, PBS_MAXSERVERNAME);
		} else if (strcmp(param_name, "server_host") == 0) {
			server_host[0] = '\0';
			strncpy(server_host, param_val, (sizeof(server_host) - 1));
		} else if (strcmp(param_name, "server_addr") == 0) {
			pbs_server_addr = atol(param_val);
		} else if (strcmp(param_name, "server_port") == 0) {
			pbs_server_port_dis = atoi(param_val);
		} else if (strcmp(param_name, "log_file") == 0) {
			log_file = strdup(param_val);
		} else if (strcmp(param_name, "path_log") == 0) {
			path_log[0] = '\0';
			strncpy(path_log, param_val, MAXPATHLEN);
		} else if (strcmp(param_name, "path_jobs") == 0) {
			path_jobs = strdup(param_val);
		} else if (strcmp(param_name, "path_spool") == 0) {
			path_spool = strdup(param_val);
		} else if (strcmp(param_name, "path_rescdef") == 0) {
			path_rescdef = strdup(param_val);
		} else if (strcmp(param_name, "path_users") == 0) {
			path_users = strdup(param_val);
		} else if (strcmp(param_name, "path_hooks_workdir") == 0) {
			path_hooks_workdir = strdup(param_val);
			if (path_hooks_workdir == NULL)
				exit(SEND_JOB_FATAL);
		} else if (strcmp(param_name, "svr_history_enable") == 0) {
			svr_history_enable = atol(param_val);
		} else if (strcmp(param_name, "svr_history_duration") == 0) {
			svr_history_duration = atol(param_val);
		} else if (strcmp(param_name, "single_signon_password_enable") == 0) {
			if (decode_b(&server.sv_attr[(int)SRV_ATR_ssignon_enable],
				NULL, NULL, param_val) != 0) {
				fprintf(stderr, "%s: failed to set ssignon_password_enable\n", argv[0]);
				exit(SEND_JOB_FATAL);
			}
		} else if (strcmp(param_name, "script_name") == 0) {
			strncpy(script_name, param_val, MAXPATHLEN + 1);
		} else
			break;
	}

	time(&time_now);

	(void)log_open_main(log_file, path_log, 1); /* silent open */

	if (setup_resc(1) == -1) {
		/* log_buffer set in setup_resc */
		log_err(-1, "pbsd_send_job(setup_resc)", log_buffer);
		return (-1);
	}

	if( strlen(jobfile) == 0 || hostaddr == 0 || port == 0 ||  move_type == -1 || \
			in_server == -1 || strlen(server_name) == 0 || strlen(server_host) == 0 || \
			pbs_server_addr == 0 || pbs_server_port_dis == 0 || \
			strlen(path_log) == 0 || path_jobs == NULL || \
			path_spool == NULL || path_users == NULL ) {
		log_err(-1, "pbs_send_job", "error on one of the parameters");
		log_close(0);	/* silent close */
		exit(SEND_JOB_FATAL);
	}

	CLEAR_HEAD(task_list_immed);
	CLEAR_HEAD(task_list_timed);
	CLEAR_HEAD(task_list_event);
	CLEAR_HEAD(svr_queues);
	CLEAR_HEAD(svr_alljobs);
	CLEAR_HEAD(svr_newjobs);
	CLEAR_HEAD(svr_allresvs);
	CLEAR_HEAD(svr_newresvs);
	CLEAR_HEAD(svr_deferred_req);
	CLEAR_HEAD(svr_unlicensedjobs);

	strcpy(jobfile_full, path_jobs);
	strcat(jobfile_full, jobfile);

	if (chk_save_file(jobfile_full) != 0) {
		sprintf(log_buffer, "Error opening jobfile=%s", jobfile);
		log_err(-1, __func__, log_buffer);
		goto fatal_exit;
	}

	if ((jobp=job_recov_fs(jobfile, RECOV_SUBJOB)) == NULL) {
		sprintf(log_buffer, "Failed to recreate job in jobfile=%s", jobfile);
		log_err(-1, __func__, log_buffer);
		goto fatal_exit;
	}

	/* now delete the temp job file that was created by job_save_fs in server code
	 * jobs are in database now, no need to keep in filesystem
	 */
	unlink(jobfile_full);

	if (in_server)
		append_link(&svr_alljobs, &jobp->ji_alljobs, jobp);


	/* select attributes/resources to send based on move type */

	if (move_type == MOVE_TYPE_Exec) {
		resc_access_perm = ATR_DFLAG_MOM;
		encode_type = ATR_ENCODE_MOM;
		cntype = ToServerDIS;
	} else {
		resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR |
			ATR_DFLAG_MGWR | ATR_DFLAG_SvRD;
		encode_type = ATR_ENCODE_SVR;
		svr_dequejob(jobp);
	}

	CLEAR_HEAD(attrl);
	pattr = jobp->ji_wattr;
	for (i=0; i < (int)JOB_ATR_LAST; i++) {
		if ((job_attr_def+i)->at_flags & resc_access_perm) {
			(void)(job_attr_def+i)->at_encode(pattr+i, &attrl,
				(job_attr_def+i)->at_name, NULL,
				encode_type, NULL);
		}
	}
	attrl_fixlink(&attrl);

	/* script name is passed from parent */

	/* get host name */
	pbs_loadconf(0);
	addr.s_addr = htonl(hostaddr);
	hp = gethostbyaddr((void *)&addr, sizeof(struct in_addr), AF_INET);
	if (hp == NULL) {
		sprintf(log_buffer, "%s: h_errno=%d",
			inet_ntoa(addr), h_errno);
		log_err(-1, __func__, log_buffer);
	}
	else {
		/* read any credential file */
		(void)get_credential(hp->h_name, jobp,  PBS_GC_BATREQ,
			&credbuf, &credlen);
	}
	/* save the job id for when after we purge the job */

	(void)strcpy(job_id, jobp->ji_qs.ji_jobid);

	con = -1;

	DIS_tcparray_init();

	for (i=0; i<RETRY; i++) {

		pbs_errno = 0;
		/* connect to receiving server with retries */

		if (i > 0) {	/* recycle after an error */
			if (con >= 0)
				svr_disconnect(con);
			if (should_retry_route(pbs_errno) == -1) {
				goto fatal_exit;	/* fatal error, don't retry */
			}
			sleep(1<<i);
		}
		if ((con = svr_connect(hostaddr, port, 0, cntype, prot)) ==
			PBS_NET_RC_FATAL) {
			(void)sprintf(log_buffer, "send_job failed to %lx port %d",
				hostaddr, port);
			log_err(pbs_errno, __func__, log_buffer);
			goto fatal_exit;
		} else if (con == PBS_NET_RC_RETRY) {
			pbs_errno = WSAECONNREFUSED;	/* should retry */
			continue;
		}
		/*
		 * if the job is substate JOB_SUBSTATE_TRNOUTCM which means
		 * we are recovering after being down or a late failure, we
		 * just want to send the "read-to-commit/commit"
		 */


		if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) {

			if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) {
				jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT;
			}

			pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl;
			destin = jobp->ji_qs.ji_destin;

			if (PBSD_queuejob(con, jobp->ji_qs.ji_jobid, destin, pqjatr, NULL, prot, NULL)== 0) {
				if (pbs_errno == PBSE_JOBEXIST && move_type == MOVE_TYPE_Exec) {
					/* already running, mark it so */
					log_event(PBSEVENT_ERROR,
						PBS_EVENTCLASS_JOB, LOG_INFO,
						jobp->ji_qs.ji_jobid, "Mom reports job already running");
					goto ok_exit;

				} else if ((pbs_errno == PBSE_HOOKERROR) ||
					(pbs_errno == PBSE_HOOK_REJECT)  ||
					(pbs_errno == PBSE_HOOK_REJECT_RERUNJOB)  ||
					(pbs_errno == PBSE_HOOK_REJECT_DELETEJOB)) {
					char		name_buf[MAXPATHLEN+1];
					int		rfd;
					int		len;
					char		*reject_msg;
					int		err;

					err = pbs_errno;

					reject_msg = pbs_geterrmsg(con);
					(void)snprintf(log_buffer, sizeof(log_buffer),
						"send of job to %s failed error = %d reject_msg=%s",
						destin, err,
						reject_msg?reject_msg:"");
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB,
						LOG_INFO, jobp->ji_qs.ji_jobid,
						log_buffer);

					(void)strcpy(name_buf, path_hooks_workdir);
					(void)strcat(name_buf, jobp->ji_qs.ji_jobid);
					(void)strcat(name_buf, HOOK_REJECT_SUFFIX);

					if ((reject_msg != NULL) &&
						(reject_msg[0] != '\0')) {

						if ((rfd = open(name_buf,
							O_RDWR|O_CREAT|O_TRUNC, 0600)) == -1) {
							snprintf(log_buffer,
								sizeof(log_buffer),
								"open of reject file %s failed: errno %d",
								name_buf, errno);
							log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
						} else {
							secure_file(name_buf, "Administrators",
								READS_MASK|WRITES_MASK|STANDARD_RIGHTS_REQUIRED);
							setmode(rfd, O_BINARY);
							len = strlen(reject_msg)+1;
							/* write also trailing null char */
							if (write(rfd, reject_msg, len) != len) {
								snprintf(log_buffer,
									sizeof(log_buffer),
									"write to file %s incomplete: errno %d", name_buf, errno);
								log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
							}
							close(rfd);
						}
					}

					if (err == PBSE_HOOKERROR)
						exit(SEND_JOB_HOOKERR);
					if (err == PBSE_HOOK_REJECT)
						exit(SEND_JOB_HOOK_REJECT);
					if (err == PBSE_HOOK_REJECT_RERUNJOB)
						exit(SEND_JOB_HOOK_REJECT_RERUNJOB);
					if (err == PBSE_HOOK_REJECT_DELETEJOB)
						exit(SEND_JOB_HOOK_REJECT_DELETEJOB);
				} else {
					(void)sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno);
					log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, jobp->ji_qs.ji_jobid, log_buffer);
					continue;
				}
			}

			if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				if (PBSD_jscript(con, script_name, prot, NULL) != 0)
					continue;
			}

			if (credlen > 0) {
				int     ret;

				ret = PBSD_jcred(con,
					jobp->ji_extended.ji_ext.ji_credtype,
					credbuf, credlen, prot, NULL);
				if ((ret == 0) || (i == (RETRY - 1)))
					free(credbuf);	/* free credbuf if credbuf is sent successfully OR */
				/* at the end of all retry attempts */
				if (ret != 0)
					continue;
			}

			if ((move_type == MOVE_TYPE_Exec) &&
				(jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) &&
				(hostaddr !=  pbs_server_addr)) {
				/* send files created on prior run */
				if ((move_job_file(con, jobp, StdOut, prot) != 0) ||
					(move_job_file(con, jobp, StdErr, prot) != 0) ||
					(move_job_file(con, jobp, Chkpt, prot) != 0))
					continue;
			}

			jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM;
		}

		if (PBSD_rdytocmt(con, job_id, prot, NULL) != 0)
			continue;

		if (PBSD_commit(con, job_id, prot, NULL) != 0)
			goto fatal_exit;
		goto ok_exit;	/* This child process is all done */
	}
	if (con >= 0)
		svr_disconnect(con);
	/*
	 * If connection is actively refused by the execution node(or mother superior) OR
	 * the execution node(or mother superior) is rejecting request with error
	 * PBSE_BADHOST(failing to authorize server host), the node should be marked down.
	 */
	if ((move_type == MOVE_TYPE_Exec) && (pbs_errno == WSAECONNREFUSED || pbs_errno == PBSE_BADHOST)) {
		i = SEND_JOB_NODEDW;
	} else if (should_retry_route(pbs_errno) == -1) {
		i = SEND_JOB_FATAL;
	} else {
		i = SEND_JOB_RETRY;
	}
	(void)sprintf(log_buffer, "send_job failed with error %d", pbs_errno);
	log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_NOTICE,
		jobp->ji_qs.ji_jobid, log_buffer);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(i);

fatal_exit:
	if (con >= 0)
		svr_disconnect(con);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(SEND_JOB_FATAL);

ok_exit:
	if (con >= 0)
		svr_disconnect(con);
	log_close(0);
	net_close(-1);
	unlink(script_name);
	exit(SEND_JOB_OK);
}