コード例 #1
0
ファイル: svr_recov_db.c プロジェクト: agrawalravi90/pbspro
int
svr_save_db(struct server *ps, int mode)
{
	pbs_db_conn_t *conn = (pbs_db_conn_t *) svr_db_conn;
	pbs_db_svr_info_t dbsvr;
	pbs_db_obj_info_t obj;
	int savetype = PBS_UPDATE_DB_FULL;
	int rc;

	ps->sv_qs.sv_savetm = time_now;

	/* as part of the server save, update svrlive file now,
	 * used in failover
	 */
	if (update_svrlive() !=0)
		return -1;

	if (mode == SVR_SAVE_FULL)
		savetype = PBS_UPDATE_DB_FULL;
	else if (mode == SVR_SAVE_QUICK)
		savetype = PBS_UPDATE_DB_QUICK;
	else
		savetype = PBS_INSERT_DB;

	if (svr_to_db_svr(ps, &dbsvr, savetype) != 0)
		goto db_err;

	obj.pbs_db_obj_type = PBS_DB_SVR;
	obj.pbs_db_un.pbs_db_svr = &dbsvr;

	rc = pbs_db_save_obj(conn, &obj, savetype);
	if (rc != 0) {
		savetype = PBS_INSERT_DB;
		rc = pbs_db_save_obj(conn, &obj, savetype);
	}

	pbs_db_reset_obj(&obj);

	if (rc != 0)
		goto db_err;

	return (0);

db_err:
	strcpy(log_buffer, msg_svdbnosv);
	if (conn->conn_db_err != NULL)
		strncat(log_buffer, conn->conn_db_err, LOG_BUF_SIZE - strlen(log_buffer) - 1);
	log_err(-1, __func__, log_buffer);

	panic_stop_db(log_buffer);
	return (-1);
}
コード例 #2
0
ファイル: svr_recov_db.c プロジェクト: agrawalravi90/pbspro
int
sched_save_db(pbs_sched *ps, int mode)
{
	pbs_db_conn_t *conn = (pbs_db_conn_t *) svr_db_conn;
	pbs_db_sched_info_t dbsched;
	pbs_db_obj_info_t obj;
	int savetype = PBS_UPDATE_DB_FULL;
	int rc;

	if (mode == SVR_SAVE_FULL)
		savetype = PBS_UPDATE_DB_FULL;
	else if (mode == SVR_SAVE_QUICK)
		savetype = PBS_UPDATE_DB_QUICK;
	else
		savetype = PBS_INSERT_DB;

	if (svr_to_db_sched(ps, &dbsched, savetype) != 0)
		goto db_err;

	obj.pbs_db_obj_type = PBS_DB_SCHED;
	obj.pbs_db_un.pbs_db_sched = &dbsched;


	rc = pbs_db_save_obj(conn, &obj, savetype);
	if (rc != 0) {
		savetype = PBS_INSERT_DB;
		rc = pbs_db_save_obj(conn, &obj, savetype);
	}

	/* free the attribute list allocated by encode_attrs */
	pbs_db_reset_obj(&obj);

	if (rc != 0)
		goto db_err;

	return (0);

db_err:
	strcpy(log_buffer, schedemsg);
	if (conn->conn_db_err != NULL)
		strncat(log_buffer, conn->conn_db_err, LOG_BUF_SIZE - strlen(log_buffer) - 1);
	log_err(-1, __func__, log_buffer);

	panic_stop_db(log_buffer);
	return (-1);
}
コード例 #3
0
ファイル: queue_recov_db.c プロジェクト: Bhagat-Rajput/pbspro
/**
 * @brief
 *	Save a queue to the database
 *
 * @param[in]	pque  - Pointer to the queue to save
 * @param[in]	mode:
 *		QUE_SAVE_FULL - Save full queue information (update)
 *		QUE_SAVE_NEW  - Save new queue information (insert)
 *
 * @return      Error code
 * @retval	0 - Success
 * @retval	1 - Failure
 *
 */
int
que_save_db(pbs_queue *pque, int mode)
{
	pbs_db_que_info_t	dbque;
	pbs_db_obj_info_t	obj;
	pbs_db_conn_t		*conn = (pbs_db_conn_t *) svr_db_conn;
	int savetype = PBS_UPDATE_DB_FULL;

	if (svr_to_db_que(pque, &dbque, savetype) != 0)
		goto db_err;

	obj.pbs_db_obj_type = PBS_DB_QUEUE;
	obj.pbs_db_un.pbs_db_que = &dbque;

	if (mode == QUE_SAVE_NEW)
		savetype = PBS_INSERT_DB;

	if (pbs_db_save_obj(conn, &obj, savetype) != 0)
		goto db_err;

	pbs_db_reset_obj(&obj);

	return (0);

db_err:
	/* free the attribute list allocated by encode_attrs */
	free(dbque.attr_list.attributes);

	strcpy(log_buffer, "que_save failed ");
	if (conn->conn_db_err != NULL)
		strncat(log_buffer, conn->conn_db_err, LOG_BUF_SIZE - strlen(log_buffer) - 1);
	log_err(-1, __func__, log_buffer);

	panic_stop_db(log_buffer);
	return (-1);
}
コード例 #4
0
/**
 * @brief
 *		Function to migrate filesystem data to database.
 * 		Reads serverdb, scheddb, job files, node, nodestate, queue, resv information
 * 		from the filesystem and save them into the database. All the information is
 * 		recovered and saved into the database under a single database transaction,
 * 		so any failure rolls back all the updates to the database. If all the updates
 * 		to the database succeed, only then the respective files are deleted from the
 * 		filesystem, else no deletion takes place.
 *
 * @return	Error code
 * @retval	0	: success
 * @retval	-1	: Failure
 *
 */
int
svr_migrate_data_from_fs(void)
{
	int baselen;
	struct dirent *pdirent;
	DIR *dir;
	int had;
	char *job_suffix = JOB_FILE_SUFFIX;
	int job_suf_len = strlen(job_suffix);
	job *pjob = NULL;
	pbs_queue *pque;
	resc_resv *presv;
	char *psuffix;
	int rc;
	int recovered = 0;
	char    basen[MAXPATHLEN+1];
	char	scrfile[MAXPATHLEN+1];
	char	jobfile[MAXPATHLEN+1];
	char	origdir[MAXPATHLEN+1];
	int fd;
	struct stat stbuf;
	char *scrbuf = NULL;
	pbs_db_jobscr_info_t	jobscr;
	pbs_db_obj_info_t		obj;

	path_svrdb_new = build_path(path_priv, PBS_SERVERDB, new_tag);
	path_scheddb = build_path(path_priv, PBS_SCHEDDB, NULL);
	path_scheddb_new = build_path(path_priv, PBS_SCHEDDB, new_tag);
	path_queues = build_path(path_priv, PBS_QUEDIR, suffix_slash);
	path_resvs = build_path(path_priv, PBS_RESVDIR, suffix_slash);
	path_nodes = build_path(path_priv, NODE_DESCRIP, NULL);
	path_nodestate = build_path(path_priv, NODE_STATUS, NULL);

	/*    If not a "create" initialization, recover server db */
	/*    and sched db					  */
	if (chk_save_file(path_svrdb) != 0) {
		fprintf(stderr, "No serverdb found to update to datastore\n");
		return (0);
	}

	if (setup_resc(1) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	init_server_attrs();

	/* start a database transation for the whole recovery */
	if (pbs_db_begin_trx(svr_db_conn, 0, 0) != 0)
		return (-1);

	/* preprocess the nodes file to convert old properties to resources */
	if (setup_nodes_fs(1) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* Open the server database (save file) and read it in */
	if (svr_recov_fs(path_svrdb) == -1) {
		fprintf(stderr, "%s\n", msg_init_baddb);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* save server information to database now */
	if (svr_save_db(&server, SVR_SAVE_NEW) != 0) {
		fprintf(stderr, "Could not save server db\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	/* now do sched db */
	if (sched_recov_fs(path_scheddb) == -1) {
		fprintf(stderr, "Unable to recover scheddb\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	if (sched_save_db(dflt_scheduler, SVR_SAVE_NEW) != 0) {
		fprintf(stderr, "Could not save scheduler db\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}
	set_sched_default(dflt_scheduler, 0);
	/* save current working dir before any chdirs */
	if (getcwd(origdir, MAXPATHLEN) == NULL) {
		fprintf(stderr, "getcwd failed\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		return (-1);
	}

	if (chdir(path_queues) != 0) {
		fprintf(stderr, msg_init_chdir, path_queues);
		fprintf(stderr, "\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	had = server.sv_qs.sv_numque;
	server.sv_qs.sv_numque = 0;
	dir = opendir(".");
	if (dir == NULL) {
		fprintf(stderr, "%s\n", msg_init_noqueues);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}
	while (errno = 0, (pdirent = readdir(dir)) != NULL) {
		if (chk_save_file(pdirent->d_name) == 0) {
			if ((pque = que_recov_fs(pdirent->d_name)) !=
				NULL) {
				/* que_recov increments sv_numque */
				fprintf(stderr, msg_init_recovque,
					pque->qu_qs.qu_name);
				fprintf(stderr, "\n");
				if (que_save_db(pque, QUE_SAVE_NEW) != 0) {
					fprintf(stderr,
						"Could not save queue info for queue %s\n",
						pque->qu_qs.qu_name);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn,
						PBS_DB_ROLLBACK);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
			}
		}
	}
	if (errno != 0 && errno != ENOENT) {
		fprintf(stderr, "%s\n", msg_init_noqueues);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		(void) closedir(dir);
		chdir(origdir);
		return (-1);
	}
	(void) closedir(dir);
	if (had != server.sv_qs.sv_numque) {
		fprintf(stderr, msg_init_expctq, had, server.sv_qs.sv_numque);
		fprintf(stderr, "\n");
	}

	/* Open and read in node list if one exists */
	if (setup_nodes_fs(0) == -1) {
		fprintf(stderr, "%s\n", log_buffer);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	/*
	 * Recover reservations.
	 */
	if (chdir(path_resvs) != 0) {
		fprintf(stderr, msg_init_chdir, path_resvs);
		fprintf(stderr, "\n");
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	dir = opendir(".");
	if (dir == NULL) {
		fprintf(stderr, "%s\n", msg_init_noresvs);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}
	while (errno = 0, (pdirent = readdir(dir)) != NULL) {
		if (chk_save_file(pdirent->d_name) == 0) {
			presv = (resc_resv *)
				job_or_resv_recov_fs(pdirent->d_name,
				RESC_RESV_OBJECT);
			if (presv != NULL) {
				if (resv_save_db(presv, SAVERESV_NEW) != 0) {
					fprintf(stderr,
						"Could not save resv info for resv %s\n",
						presv->ri_qs.ri_resvID);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn,
						PBS_DB_ROLLBACK);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
			}
		}
	}
	if (errno != 0 && errno != ENOENT) {
		fprintf(stderr, "%s\n", msg_init_noresvs);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		(void) closedir(dir);
		chdir(origdir);
		return (-1);
	}
	(void) closedir(dir);

	/*
	 *   Recover jobs
	 */
	if (chdir(path_jobs) != 0) {
		fprintf(stderr, msg_init_chdir, path_jobs);
		fprintf(stderr, "\n");
		chdir(origdir);
		return (-1);
	}

	server.sv_qs.sv_numjobs = 0;
	recovered = 0;
	dir = opendir(".");
	if (dir == NULL) {
		fprintf(stderr, "%s\n", msg_init_nojobs);
	} else {
		/* Now, for each job found ... */
		while (errno = 0,
			(pdirent = readdir(dir)) != NULL) {
			if (chk_save_file(pdirent->d_name) != 0)
				continue;

			/* recover the job */
			baselen = strlen(pdirent->d_name) - job_suf_len;
			psuffix = pdirent->d_name + baselen;
			if (strcmp(psuffix, job_suffix))
				continue;

			if ((pjob = job_recov_fs(pdirent->d_name)) == NULL) {
				(void)strcpy(basen, pdirent->d_name);
				psuffix = basen + baselen;
				(void)strcpy(psuffix, JOB_BAD_SUFFIX);
				(void)snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s",
					basen);
				log_event(PBSEVENT_SYSTEM,
					PBS_EVENTCLASS_SERVER, LOG_NOTICE,
					msg_daemonname, log_buffer);
				continue;
			}

			if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				/* load the job script file */
				strcpy(scrfile, path_jobs);
#ifndef WIN32
				/* under WIN32, there's already a prefixed '/' */
				(void) strcat(scrfile, "/");
#endif
				strcat(scrfile, pdirent->d_name);
				baselen = strlen(scrfile) - strlen(JOB_FILE_SUFFIX);
				scrfile[baselen] = 0; /* put null char */
				strcat(scrfile, JOB_SCRIPT_SUFFIX);
				rc = 1;
#ifdef WIN32
				if ((fd = open(scrfile, O_BINARY | O_RDONLY)) != -1)
#else
				if ((fd = open(scrfile, O_RDONLY)) != -1)
#endif
				{
					/* load the script */
					if (fstat(fd, &stbuf) == 0) {
						if ((scrbuf = malloc(stbuf.st_size + 1))) {
							if (read(fd, scrbuf, stbuf.st_size) == stbuf.st_size) {
								scrbuf[stbuf.st_size] = '\0'; /* null character */
								rc = 0; /* success loading */
							}
						}
					}
					close(fd);
				}

				if (rc != 0) {
					fprintf(stderr, "Could not recover script file for job %s\n", pjob->ji_qs.ji_jobid);
					(void) strcpy(basen, scrfile);
					psuffix = basen + strlen(scrfile) - strlen(JOB_SCRIPT_SUFFIX);
					(void) strcpy(psuffix, JOB_BAD_SUFFIX);

					(void) strcpy(jobfile, scrfile);
					psuffix = jobfile + strlen(jobfile) - strlen(JOB_SCRIPT_SUFFIX);
					(void) strcpy(psuffix, JOB_FILE_SUFFIX);
#ifdef WIN32
					if (MoveFileEx(jobfile, basen,
						MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == 0) {
						errno = GetLastError();
						snprintf(log_buffer, sizeof(log_buffer), "MoveFileEx(%s, %s) failed!",
							jobfile, basen);
						log_err(errno, "script", log_buffer);

					}
					secure_file(basen, "Administrators",
						READS_MASK | WRITES_MASK | STANDARD_RIGHTS_REQUIRED);
#else
					if (rename(jobfile, basen) == -1) {
						snprintf(log_buffer, sizeof(log_buffer), "error renaming job file %s",
							jobfile);
						log_err(errno, "job_recov", log_buffer);
					}
#endif
					(void) snprintf(log_buffer, sizeof(log_buffer), "moved bad file to %s",
						basen);
					log_event(PBSEVENT_SYSTEM,
						PBS_EVENTCLASS_SERVER, LOG_NOTICE,
						msg_daemonname, log_buffer);
					free(scrbuf);
					scrbuf = NULL;
					continue;
				}
			}

			/* now save job first */
			if (job_save_db(pjob, SAVEJOB_NEW) != 0) {
				fprintf(stderr, "Could not save job info for jobid %s\n",
					pjob->ji_qs.ji_jobid);
				if (svr_db_conn->conn_db_err)
					fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
				(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
				(void) closedir(dir);
				chdir(origdir);
				free(scrbuf);
				return (-1);
			}

			if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) {
				/* save job script */
				strcpy(jobscr.ji_jobid, pjob->ji_qs.ji_jobid);
				jobscr.script = scrbuf;
				obj.pbs_db_obj_type = PBS_DB_JOBSCR;
				obj.pbs_db_un.pbs_db_jobscr = &jobscr;
				if (pbs_db_save_obj(svr_db_conn, &obj, PBS_INSERT_DB) != 0) {
					fprintf(stderr, "Could not save job script for jobid %s\n",
						pjob->ji_qs.ji_jobid);
					if (svr_db_conn->conn_db_err)
						fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
					(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
					free(scrbuf);
					(void) closedir(dir);
					chdir(origdir);
					return (-1);
				}
				free(scrbuf);
				scrbuf = NULL;
			}

			recovered++;
		}
		if (errno != 0 && errno != ENOENT) {
			if (pjob)
				fprintf(stderr, "readdir error for jobid %s\n", pjob->ji_qs.ji_jobid);
			else
				fprintf(stderr, "readdir error\n");
			(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
			free(scrbuf);
			(void) closedir(dir);
			chdir(origdir);
			return (-1);
		}
		(void) closedir(dir);
		fprintf(stderr, msg_init_exptjobs, recovered);
		fprintf(stderr, "\n");
	}

	if (save_nodes_db(0, NULL) != 0) {
		fprintf(stderr, "Could not save nodes\n");
		if (svr_db_conn->conn_db_err)
			fprintf(stderr, "[%s]\n", (char*)svr_db_conn->conn_db_err);
		(void) pbs_db_end_trx(svr_db_conn, PBS_DB_ROLLBACK);
		chdir(origdir);
		return (-1);
	}

	if (pbs_db_end_trx(svr_db_conn, PBS_DB_COMMIT) == 0) {
		rm_migrated_files(path_priv);
		chdir(origdir);
		return (0);
	}
	chdir(origdir);
	return -1;
}