예제 #1
0
int gtmsource_mode_change(int to_mode)
{
	uint4		savepid;
	int		exit_status;
	int		status, detach_status, remove_status;
	sgmnt_addrs	*repl_csa;

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	repl_log(stdout, TRUE, TRUE, "Initiating %s operation on source server pid [%d] for secondary instance [%s]\n",
		(GTMSOURCE_MODE_ACTIVE == to_mode) ? "ACTIVATE" : "DEACTIVATE",
		jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname);
	if (jnlpool.gtmsource_local->mode == to_mode)
	{
		repl_log(stderr, FALSE, TRUE, "Source Server already %s, not changing mode\n",
				(to_mode == GTMSOURCE_MODE_ACTIVE) ? "ACTIVE" : "PASSIVE");
		return (ABNORMAL_SHUTDOWN);
	}
	assert(ROOTPRIMARY_UNSPECIFIED != gtmsource_options.rootprimary);
	if ((GTMSOURCE_MODE_ACTIVE == to_mode)
			&& (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) && jnlpool.jnlpool_ctl->upd_disabled)
	{	/* ACTIVATE is specified with ROOTPRIMARY on a journal pool that was created with PROPAGATEPRIMARY. This is a
		 * case of transition from propagating primary to root primary. Enable updates in this journal pool and append
		 * a histinfo record to the replication instance file. The function "gtmsource_rootprimary_init" does just that.
		 */
		gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno);
	}
	DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
int gtmsource_mode_change(int to_mode)
{
	uint4		savepid;
	int		exit_status;
	int		status, detach_status, remove_status;
	int 	        log_fd = 0, close_status = 0;
	char*           err_code;
	int	        save_errno;
	sgmnt_addrs	*repl_csa;

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	repl_log(stdout, TRUE, TRUE, "Initiating %s operation on source server pid [%d] for secondary instance [%s]\n",
		(GTMSOURCE_MODE_ACTIVE_REQUESTED == to_mode) ? "ACTIVATE" : "DEACTIVATE",
		jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname);
	if ((jnlpool.gtmsource_local->mode == GTMSOURCE_MODE_ACTIVE_REQUESTED)
		|| (jnlpool.gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE_REQUESTED))
	{
		repl_log(stderr, FALSE, TRUE, "Source Server %s already requested, not changing mode\n",
				(to_mode == GTMSOURCE_MODE_ACTIVE_REQUESTED) ? "ACTIVATE" : "DEACTIVATE");
		return (ABNORMAL_SHUTDOWN);
	}
	if (((GTMSOURCE_MODE_ACTIVE == jnlpool.gtmsource_local->mode) && (GTMSOURCE_MODE_ACTIVE_REQUESTED == to_mode))
		|| ((GTMSOURCE_MODE_PASSIVE == jnlpool.gtmsource_local->mode) && (GTMSOURCE_MODE_PASSIVE_REQUESTED == to_mode)))
	{
		repl_log(stderr, FALSE, TRUE, "Source Server already %s, not changing mode\n",
				(to_mode == GTMSOURCE_MODE_ACTIVE_REQUESTED) ? "ACTIVE" : "PASSIVE");
		return (ABNORMAL_SHUTDOWN);
	}
	assert(ROOTPRIMARY_UNSPECIFIED != gtmsource_options.rootprimary);
	/*check if the new log file is writable*/
	if ('\0' != gtmsource_options.log_file[0] && 0 != STRCMP(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file))
	{
		OPENFILE3(gtmsource_options.log_file,
			      O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, log_fd);
		if (log_fd < 0) {
			save_errno = ERRNO;
			err_code = STRERROR(save_errno);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLLOGOPN, 6,
					   LEN_AND_STR(gtmsource_options.log_file),
					   LEN_AND_STR(err_code),
					   LEN_AND_STR(NULL_DEVICE));
			return (ABNORMAL_SHUTDOWN);
		}
		CLOSEFILE_IF_OPEN(log_fd, close_status);
		assert(close_status==0);
	}
	if ((GTMSOURCE_MODE_ACTIVE_REQUESTED == to_mode)
			&& (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary) && jnlpool.jnlpool_ctl->upd_disabled)
	{	/* ACTIVATE is specified with ROOTPRIMARY on a journal pool that was created with PROPAGATEPRIMARY. This is a
		 * case of transition from propagating primary to root primary. Enable updates in this journal pool and append
		 * a histinfo record to the replication instance file. The function "gtmsource_rootprimary_init" does just that.
		 */
		gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno);
	}
	DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
예제 #3
0
int gtmsource_changelog(void)
{
	uint4	changelog_desired = 0, changelog_accepted = 0;
	int     log_fd = 0; /*used to indicate whether the new specified log file is writable*/
	int     close_status = 0; /*used to indicate if log file is successfully closed*/
	char*   err_code;
	int	save_errno;

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	repl_log(stderr, TRUE, TRUE, "Initiating CHANGELOG operation on source server pid [%d] for secondary instance [%s]\n",
		jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname);
	if (0 != jnlpool.gtmsource_local->changelog)
	{
		util_out_print("Change log is already in progress. Not initiating change in log file or log interval", TRUE);
		return (ABNORMAL_SHUTDOWN);
	}
	if ('\0' != gtmsource_options.log_file[0]) /* trigger change in log file */
	{
		changelog_desired |= REPLIC_CHANGE_LOGFILE;
		if (0 != STRCMP(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file))
		{	/*check if the new log file is writable*/
			OPENFILE3_CLOEXEC(gtmsource_options.log_file,
				      O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, log_fd);
			if (log_fd < 0)
			{
				save_errno = ERRNO;
				err_code = STRERROR(save_errno);
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLLOGOPN, 6,
						   LEN_AND_STR(gtmsource_options.log_file),
						   LEN_AND_STR(err_code),
						   LEN_AND_STR(NULL_DEVICE));
			} else {
				CLOSEFILE_IF_OPEN(log_fd, close_status);
				assert(close_status==0);
				changelog_accepted |= REPLIC_CHANGE_LOGFILE;
				STRCPY(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file);
				util_out_print("Change log initiated with file !AD", TRUE, LEN_AND_STR(gtmsource_options.log_file));
			}
		} else
			util_out_print("Log file is already !AD. Not initiating change in log file", TRUE,
					LEN_AND_STR(gtmsource_options.log_file));
	}
	if (0 != gtmsource_options.src_log_interval) /* trigger change in log interval */
	{
		changelog_desired |= REPLIC_CHANGE_LOGINTERVAL;
		if (gtmsource_options.src_log_interval != jnlpool.gtmsource_local->log_interval)
		{
			changelog_accepted |= REPLIC_CHANGE_LOGINTERVAL;
			jnlpool.gtmsource_local->log_interval = gtmsource_options.src_log_interval;
			util_out_print("Change log initiated with interval !UL", TRUE, gtmsource_options.src_log_interval);
		} else
			util_out_print("Log interval is already !UL. Not initiating change in log interval", TRUE,
					gtmsource_options.src_log_interval);
	}
	if (0 != changelog_accepted)
		jnlpool.gtmsource_local->changelog = changelog_accepted;
	else
		util_out_print("No change to log file or log interval", TRUE);
	return ((0 != changelog_accepted && changelog_accepted == changelog_desired) ? NORMAL_SHUTDOWN : ABNORMAL_SHUTDOWN);
}
예제 #4
0
int updhelper_reader(void)
{
	uint4			pre_read_offset;
	int			lcnt;
	boolean_t		continue_reading;

	call_on_signal = updhelper_reader_sigstop;
	updhelper_init(UPD_HELPER_READER);
	repl_log(updhelper_log_fp, TRUE, TRUE, "Helper reader started. PID %d [0x%X]\n", process_id, process_id);
	GVKEY_INIT(gv_currkey, gv_keysize);
	GVKEY_INIT(gv_altkey, gv_keysize);
	last_pre_read_offset = 0;
	continue_reading = TRUE;
	do
	{
		REPL_DPRINT1("Will wait\n");
		for (lcnt = 1; continue_reading && last_pre_read_offset ==
						(pre_read_offset = recvpool.upd_helper_ctl->pre_read_offset); )
		{
			SHORT_SLEEP(lcnt);
			if (lcnt <= MAX_LCNT)
				lcnt++;
			continue_reading = NO_SHUTDOWN == helper_entry->helper_shutdown;
		}
		last_pre_read_offset = pre_read_offset;
	} while (continue_reading && updproc_preread());
	updhelper_reader_end();
	return SS_NORMAL;
}
예제 #5
0
static void  updproc_stop(boolean_t exit)
{
	int4		status;
	int		fclose_res;

	call_on_signal = NULL;	/* Don't reenter on error */
	if (pool_init)
	{
		rel_lock(jnlpool.jnlpool_dummy_reg);
		/* nullify jnlpool_ctl before detaching from jnlpool since if it is the other way, we might be interrupted
		 * by the periodic timer routines and end up in jnl_write_epoch_rec() routine that dereferences jnlpool_ctl
		 * since it is non-NULL although it has been detached from and is no longer valid memory.
		 */
		jnlpool_ctl = NULL;
#ifdef UNIX
		mutex_cleanup(jnlpool.jnlpool_dummy_reg);
		SHMDT(jnlpool.jnlpool_ctl);
#elif defined(VMS)
		if (SS$_NORMAL != (status = detach_shm(jnlpool.shm_range)))
			repl_log(stderr, TRUE, TRUE, "Error detaching from jnlpool : %s\n", REPL_STR_ERROR);
		if (SS$_NORMAL != (status = signoff_from_gsec(jnlpool.shm_lockid)))
			repl_log(stderr, TRUE, TRUE, "Error dequeueing lock on jnlpool global section : %s\n", REPL_STR_ERROR);
#else
#error Unsupported Platform
#endif
		jnlpool.jnlpool_ctl = NULL;
		pool_init = FALSE;
	}
	recvpool.upd_proc_local->upd_proc_shutdown = NORMAL_SHUTDOWN;
	recvpool.upd_proc_local->upd_proc_pid = 0;
#ifdef UNIX
	SHMDT(recvpool.recvpool_ctl);
#elif defined(VMS)
	if(SS$_NORMAL != (status = detach_shm(recvpool.shm_range)))
		repl_log(stderr, TRUE, TRUE, "Update process could not detach from recvpool : %s\n", REPL_STR_ERROR);
	if (SS$_NORMAL != (status = signoff_from_gsec(recvpool.shm_lockid)))
		repl_log(stderr, TRUE, TRUE, "Error dequeueing lock on recvpool global section : %s\n", REPL_STR_ERROR);
#else
#error Unsupported Platform
#endif
	recvpool.recvpool_ctl = NULL;
	gtm_event_log_close();
	if (exit)
		mupip_exit(SS_NORMAL);
	return;
}
예제 #6
0
int gtmsource_needrestart(void)
{
	gtmsource_local_ptr_t	gtmsource_local;
	sgmnt_addrs		*repl_csa;

	error_def(ERR_MUPCLIERR);

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);

	gtmsource_local = jnlpool.gtmsource_local;
	if (NULL != gtmsource_local)
	{
		assert(!STRCMP(gtmsource_options.secondary_instname, gtmsource_local->secondary_instname));
		repl_log(stderr, TRUE, TRUE,
			"Initiating NEEDRESTART operation on source server pid [%d] for secondary instance [%s]\n",
			gtmsource_local->gtmsource_pid, gtmsource_options.secondary_instname);
	} else
		repl_log(stderr, TRUE, TRUE, "Initiating NEEDRESTART operation for secondary instance [%s]\n",
			gtmsource_options.secondary_instname);
	DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
예제 #7
0
int gtmrecv_endupd(void)
{
	pid_t 		savepid;
	int		exit_status;
	pid_t		waitpid_res;

	repl_log(stdout, TRUE, TRUE, "Initiating shut down of Update Process\n");
	recvpool.upd_proc_local->upd_proc_shutdown = SHUTDOWN;
	/* Wait for update process to shut down */
	while((SHUTDOWN == recvpool.upd_proc_local->upd_proc_shutdown)
		&& (0 < (savepid = (pid_t)recvpool.upd_proc_local->upd_proc_pid)) && is_proc_alive(savepid, 0))
	{
		SHORT_SLEEP(GTMRECV_WAIT_FOR_UPD_SHUTDOWN);
		WAITPID(savepid, &exit_status, WNOHANG, waitpid_res); /* Release defunct update process if dead */
	}
	exit_status = recvpool.upd_proc_local->upd_proc_shutdown;
	if (SHUTDOWN == exit_status)
	{
		if (0 == savepid) /* No Update Process */
			exit_status = NORMAL_SHUTDOWN;
		else /* Update Process Crashed */
		{
			repl_log(stderr, TRUE, TRUE, "Update Process exited abnormally, INTEGRITY CHECK might be warranted\n");
			exit_status = ABNORMAL_SHUTDOWN;
		}
	}
	/* Wait for the Update Process to detach */
	if (0 == grab_sem(RECV, UPD_PROC_COUNT_SEM))
	{
		if(0 != (errno = rel_sem(RECV, UPD_PROC_COUNT_SEM)))
			repl_log(stderr, TRUE, TRUE, "Error releasing the Update Process Count semaphore : %s\n", REPL_SEM_ERROR);
		repl_log(stdout, TRUE, TRUE, "Update Process exited\n");
	} else
	{
		repl_log(stderr, TRUE, TRUE, "Error in update proc count semaphore : %s\n", REPL_SEM_ERROR);
		exit_status = ABNORMAL_SHUTDOWN;
	}
	return (exit_status);
}
예제 #8
0
int gtmsource_losttncomplete(void)
{
	int			idx;
	gtmsource_local_ptr_t	gtmsourcelocal_ptr;
	sgmnt_addrs		*repl_csa;

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	assert(NULL == jnlpool.gtmsource_local);
	repl_log(stderr, TRUE, TRUE, "Initiating LOSTTNCOMPLETE operation on instance [%s]\n",
		jnlpool.repl_inst_filehdr->inst_info.this_instname);
	/* If this is a root primary instance, propagate this information to secondaries as well so they reset zqgblmod_seqno to 0.
	 * If propagating primary, no need to send this to tertiaries as the receiver on the tertiary cannot have started with
	 * non-zero "zqgblmod_seqno" to begin with (PRIMARYNOTROOT error would have been issued).
	 */
	if (!jnlpool.jnlpool_ctl->upd_disabled)
	{
		DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
int gtmsource_statslog(void)
{
	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	repl_log(stderr, TRUE, TRUE, "Initiating STATSLOG operation on source server pid [%d] for secondary instance [%s]\n",
		jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname);
	if (gtmsource_options.statslog == jnlpool.gtmsource_local->statslog)
	{
		util_out_print("STATSLOG is already !AD. Not initiating change in stats log", TRUE, gtmsource_options.statslog ?
				strlen("ON") : strlen("OFF"), gtmsource_options.statslog ? "ON" : "OFF");
		return (ABNORMAL_SHUTDOWN);
	}
	if (!gtmsource_options.statslog)
	{
		jnlpool.gtmsource_local->statslog = FALSE;
		jnlpool.gtmsource_local->statslog_file[0] = '\0';
		util_out_print("STATSLOG turned OFF", TRUE);
		return (NORMAL_SHUTDOWN);
	}
	jnlpool.gtmsource_local->statslog = TRUE;
	util_out_print("Stats log turned on", TRUE);
	return (NORMAL_SHUTDOWN);
}
예제 #10
0
int gtmsource_losttncomplete(void)
{
	int			idx;
	gtmsource_local_ptr_t	gtmsourcelocal_ptr;

	error_def(ERR_MUPCLIERR);
	error_def(ERR_TEXT);

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	/* We dont need the access control semaphore here. So release it first and avoid any potential deadlocks. */
	if (0 != rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))
		rts_error(VARLSTCNT(5) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in source server losttncomplete rel_sem"),
				REPL_SEM_ERRNO);
	assert(NULL == jnlpool.gtmsource_local);
	repl_log(stderr, TRUE, TRUE, "Initiating LOSTTNCOMPLETE operation on instance [%s]\n",
		jnlpool.repl_inst_filehdr->this_instname);
	/* If this is a root primary instance, propagate this information to secondaries as well so they reset zqgblmod_seqno to 0.
	 * If propagating primary, no need to send this to tertiaries as the receiver on the tertiary cannot have started with
	 * non-zero "zqgblmod_seqno" to begin with (PRIMARYNOTROOT error would have been issued).
	 */
	if (!jnlpool.jnlpool_ctl->upd_disabled)
	{
		grab_lock(jnlpool.jnlpool_dummy_reg);
		jnlpool.jnlpool_ctl->send_losttn_complete = TRUE;
		gtmsourcelocal_ptr = jnlpool.gtmsource_local_array;
		for (idx = 0; idx < NUM_GTMSRC_LCL; idx++, gtmsourcelocal_ptr++)
		{
			if (('\0' == gtmsourcelocal_ptr->secondary_instname[0])
					&& (0 == gtmsourcelocal_ptr->read_jnl_seqno)
					&& (0 == gtmsourcelocal_ptr->connect_jnl_seqno))
				continue;
			gtmsourcelocal_ptr->send_losttn_complete = TRUE;
		}
		rel_lock(jnlpool.jnlpool_dummy_reg);
	}
	/* Reset zqgblmod_seqno and zqgblmod_tn to 0 in this instance as well */
	repl_inst_reset_zqgblmod_seqno_and_tn();
	return (NORMAL_SHUTDOWN);
}
void mupip_exit_handler(void)
{
	char		err_log[1024];
	FILE		*fp;
	boolean_t	files_closed = TRUE;

	if (exit_handler_active)	/* Don't recurse if exit handler exited */
		return;
	exit_handler_active = TRUE;
	SET_PROCESS_EXITING_TRUE;
	if (jgbl.mupip_journal)
	{
		files_closed = mur_close_files();
		mupip_jnl_recover = FALSE;
	}
	jgbl.dont_reset_gbl_jrec_time = jgbl.forw_phase_recovery = FALSE;
	CANCEL_TIMERS;			/* Cancel all unsafe timers - No unpleasant surprises */
	secshr_db_clnup(NORMAL_TERMINATION);
	if (dollar_tlevel)
		OP_TROLLBACK(0);
	if (is_updhelper && NULL != helper_entry) /* haven't had a chance to cleanup, must be an abnormal exit */
	{
		helper_entry->helper_shutdown = ABNORMAL_SHUTDOWN;
		helper_entry->helper_pid = 0; /* vacate my slot */
		helper_entry = NULL;
	}
	if (recvpool.recvpool_ctl)
	{
		SHMDT(recvpool.recvpool_ctl);
		recvpool.recvpool_ctl = NULL;
	}
	gv_rundown(); /* also takes care of detaching from the journal pool */
	relinkctl_rundown(TRUE, TRUE);	/* decrement relinkctl-attach & rtnobj-reference counts */
	/* Log the exit of replication servers. In case they are exiting abnormally, their log file pointers
	 * might not be set up. In that case, use "stderr" for logging.
	 */
	if (is_src_server)
	{
		fp = (NULL != gtmsource_log_fp) ? gtmsource_log_fp : stderr;
		repl_log(fp, TRUE, TRUE, "Source server exiting...\n\n");
	} else if (is_rcvr_server)
	{
		fp = (NULL != gtmrecv_log_fp) ? gtmrecv_log_fp : stderr;
		repl_log(fp, TRUE, TRUE, "Receiver server exiting...\n\n");
	} else if (is_updproc)
	{
		fp = (NULL != updproc_log_fp) ? updproc_log_fp : stderr;
		repl_log(fp, TRUE, TRUE, "Update process exiting...\n\n");
	} else if (is_updhelper)
	{
		fp = (NULL != updhelper_log_fp) ? updhelper_log_fp : stderr;
		repl_log(fp, TRUE, TRUE, "Helper exiting...\n\n");
	} else
		mu_reset_term_characterstics(); /* the replication servers use files for output/error, not terminal */
	flush_pio();
	util_out_close();
	close_repl_logfiles();
	print_exit_stats();
	io_rundown(RUNDOWN_EXCEPT_STD);
	GTMCRYPT_CLOSE;
	if (need_core && !created_core)
	{
		++core_in_progress;
		DUMP_CORE;	/* This will not return */
	}
	if (!files_closed)
		_exit(EXIT_FAILURE);
}
예제 #12
0
static int helper_init(upd_helper_entry_ptr_t helper, recvpool_user helper_type)
{
	int			save_errno, save_shutdown;
	char			helper_cmd[UPDHELPER_CMD_MAXLEN];
	int			status;
	int4			i4status;
	mstr			helper_log_cmd, helper_trans_cmd;
	upd_helper_ctl_ptr_t	upd_helper_ctl;
#ifdef UNIX
	pid_t			helper_pid, waitpid_res;
#elif defined(VMS)
	uint4			helper_pid, cmd_channel;
	char			mbx_suffix[2 + 1]; /* hex representation of numbers 0 through MAX_UPD_HELPERS-1, +1 for '\0' */
	$DESCRIPTOR(cmd_desc_reader, UPDHELPER_READER_CMD_STR);
	$DESCRIPTOR(cmd_desc_writer, UPDHELPER_WRITER_CMD_STR);
#endif

	upd_helper_ctl = recvpool.upd_helper_ctl;
	save_shutdown = helper->helper_shutdown;
	helper->helper_shutdown = NO_SHUTDOWN;
#ifdef UNIX
	if (0 > (helper_pid = fork()))	/* BYPASSOK: we exec immediately, no FORK_CLEAN needed */
	{
		save_errno = errno;
		helper->helper_shutdown = save_shutdown;
		gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2,
				LEN_AND_LIT("Could not fork update process"), save_errno);
		repl_errno = EREPL_UPDSTART_FORK;
		return UPDPROC_START_ERR;
	}
	if (0 == helper_pid)
	{	/* helper */
		getjobnum();
		helper->helper_pid_prev = process_id; /* identify owner of slot */
		helper_log_cmd.len  = STR_LIT_LEN(UPDHELPER_CMD);
		helper_log_cmd.addr = UPDHELPER_CMD;
		if (SS_NORMAL != (i4status = TRANS_LOG_NAME(&helper_log_cmd, &helper_trans_cmd, helper_cmd, SIZEOF(helper_cmd),
								dont_sendmsg_on_log2long)))
		{
			helper->helper_shutdown = save_shutdown;
			gtm_putmsg(VARLSTCNT(6) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2,
				   LEN_AND_LIT("Could not find path of Helper Process. Check value of $gtm_dist"));
			if (SS_LOG2LONG == i4status)
				gtm_putmsg(VARLSTCNT(5) ERR_LOGTOOLONG, 3, LEN_AND_LIT(UPDHELPER_CMD), SIZEOF(helper_cmd) - 1);
			repl_errno = EREPL_UPDSTART_BADPATH;
			return UPDPROC_START_ERR;
		}
		helper_cmd[helper_trans_cmd.len] = '\0';
		if (-1 == EXECL(helper_cmd, helper_cmd, UPDHELPER_CMD_ARG1, UPDHELPER_CMD_ARG2,
				(UPD_HELPER_READER == helper_type) ? UPDHELPER_READER_CMD_ARG3 : UPDHELPER_WRITER_CMD_ARG3, NULL))
		{
			save_errno = errno;
			helper->helper_shutdown = save_shutdown;
			gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2,
				   LEN_AND_LIT("Could not exec Helper Process"), save_errno);
			repl_errno = EREPL_UPDSTART_EXEC;
			return UPDPROC_START_ERR;
		}
	}
#elif defined(VMS)
	/* Create detached server and write startup commands to it */
	i2hex(helper - upd_helper_ctl->helper_list, LIT_AND_LEN(mbx_suffix));
	mbx_suffix[SIZEOF(mbx_suffix) - 1] = '\0';
	/* A mailbox is created per helper, and the mailbox name is assigned to a logical. This logical will persist until the
	 * helper terminates. So, we need to assign a unique logical per helper. Hence the suffix. */
	if (SS_NORMAL != (status = repl_create_server((UPD_HELPER_READER == helper_type) ?  &cmd_desc_reader : &cmd_desc_writer,
					    		UPDHELPER_MBX_PREFIX, mbx_suffix, &cmd_channel, &helper->helper_pid_prev,
							ERR_RECVPOOLSETUP)))
	{
		gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2, LEN_AND_LIT("Unable to spawn Helper process"), status);
		helper->helper_shutdown = save_shutdown;
		repl_errno = EREPL_UPDSTART_FORK;
		return UPDPROC_START_ERR;
	}
	helper_pid = helper->helper_pid_prev;
#endif
	/* Wait for helper to startup */
	while (helper_pid != helper->helper_pid && is_proc_alive(helper_pid, 0))
	{
		SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START);
		UNIX_ONLY(WAITPID(helper_pid, &status, WNOHANG, waitpid_res);) /* Release defunct helper process if dead */
	}
	/* The helper has now gone far enough in the initialization, or died before initialization. Consider startup completed. */
#if defined(VMS)
	/* Deassign the send-cmd mailbox channel */
	if (SS_NORMAL != (status = sys$dassgn(cmd_channel)))
	{
		gtm_putmsg(VARLSTCNT(7) ERR_RECVPOOLSETUP, 0, ERR_TEXT, 2,
				RTS_ERROR_LITERAL("Unable to close upd-send-cmd mbox channel"), status);
		helper->helper_shutdown = save_shutdown;
		repl_errno = EREPL_UPDSTART_BADPATH; /* Just to make an auto-shutdown */
		return UPDPROC_START_ERR;
	}
#endif
	repl_log(gtmrecv_log_fp, TRUE, TRUE, "Helper %s started. PID %d [0x%X]\n",
			(UPD_HELPER_READER == helper_type) ? "reader" : "writer", helper_pid, helper_pid);
	return UPDPROC_STARTED;
}
예제 #13
0
int gtmrecv_end1(boolean_t auto_shutdown)
{
	int4		strm_idx;
	int		exit_status, idx, status, save_errno;
	int		fclose_res, rc;
	seq_num		log_seqno, log_seqno1, jnlpool_seqno, jnlpool_strm_seqno[MAX_SUPPL_STRMS];
	uint4		savepid;

	exit_status = gtmrecv_end_helpers(TRUE);
	exit_status = gtmrecv_endupd();
	log_seqno = recvpool.recvpool_ctl->jnl_seqno;
	log_seqno1 = recvpool.upd_proc_local->read_jnl_seqno;
	strm_idx = recvpool.gtmrecv_local->strm_index;
	/* Detach from receive pool */
	recvpool.gtmrecv_local->shutdown = exit_status;
	recvpool.gtmrecv_local->recv_serv_pid = 0;
	if (0 > SHMDT(recvpool.recvpool_ctl))
		repl_log(stderr, TRUE, TRUE, "Error detaching from Receive Pool : %s\n", REPL_STR_ERROR);
	recvpool.recvpool_ctl = NULL;
	assert((NULL != jnlpool_ctl) && (jnlpool_ctl == jnlpool.jnlpool_ctl));
	if (NULL != jnlpool.jnlpool_ctl)
	{	/* Reset fields that might have been initialized by the receiver server after connecting to the primary.
		 * It is ok not to hold the journal pool lock while updating jnlpool_ctl fields since this will be the
		 * only process updating those fields.
		 */
		jnlpool.jnlpool_ctl->primary_instname[0] = '\0';
		jnlpool.jnlpool_ctl->gtmrecv_pid = 0;
		jnlpool_seqno = jnlpool.jnlpool_ctl->jnl_seqno;
		for (idx = 0; idx < MAX_SUPPL_STRMS; idx++)
			jnlpool_strm_seqno[idx] = jnlpool.jnlpool_ctl->strm_seqno[idx];
		/* Also take this opportunity to detach from the journal pool except in the auto_shutdown case. This is because
		 * the fields "jnlpool_ctl->repl_inst_filehdr->recvpool_semid" and "jnlpool_ctl->repl_inst_filehdr->recvpool_shmid"
		 * need to be reset by "gtmrecv_jnlpool_reset" (called from "gtmrecv_shutdown") which is invoked a little later.
		 */
		if (!auto_shutdown)
		{
			JNLPOOL_SHMDT(status, save_errno);
			if (0 > status)
				repl_log(stderr, TRUE, TRUE, "Error detaching from Journal Pool : %s\n", STRERROR(save_errno));
			jnlpool.jnlpool_ctl = jnlpool_ctl = NULL;
			jnlpool.repl_inst_filehdr = NULL;
			jnlpool.gtmsrc_lcl_array = NULL;
			jnlpool.gtmsource_local_array = NULL;
			jnlpool.jnldata_base = NULL;
			pool_init = FALSE;
		}
	} else
		jnlpool_seqno = 0;
	gtmrecv_free_msgbuff();
	gtmrecv_free_filter_buff();
	recvpool.recvpool_ctl = NULL;
	/* Close the connection with the Receiver */
	if (FD_INVALID != gtmrecv_listen_sock_fd)
		CLOSEFILE_RESET(gtmrecv_listen_sock_fd, rc);	/* resets "gtmrecv_listen_sock_fd" to FD_INVALID */
	if (FD_INVALID != gtmrecv_sock_fd)
		CLOSEFILE_RESET(gtmrecv_sock_fd, rc);	/* resets "gtmrecv_sock_fd" to FD_INVALID */
	repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Current Jnlpool Seqno : %llu\n", jnlpool_seqno);
	for (idx = 0; idx < MAX_SUPPL_STRMS; idx++)
	{
		if (jnlpool_strm_seqno[idx])
			repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Stream # %d : Current Jnlpool Stream Seqno : %llu\n",
				idx, jnlpool_strm_seqno[idx]);
	}
	if (0 < strm_idx)
		repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Receiver server has Stream # %d\n", strm_idx);
	repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Update process Read Seqno : %llu\n", log_seqno1);
	repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Receive Pool Seqno : %llu\n", log_seqno);
	/* If log_seqno/log_seqno1 is 0, then do not decrement it as that will be interpreted as a huge positive seqno. Keep it 0 */
	if (log_seqno)
		log_seqno--;
	if (log_seqno1)
		log_seqno1--;
	repl_log(gtmrecv_log_fp, TRUE, FALSE, "REPL INFO - Last Recvd Seqno : %llu  Jnl Total : %llu  Msg Total : %llu\n",
			log_seqno, repl_recv_data_processed, repl_recv_data_recvd);
	repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Last Seqno processed by update process : %llu\n", log_seqno1);
	gtm_event_log_close();
	if (gtmrecv_filter & EXTERNAL_FILTER)
		repl_stop_filter();
	if (auto_shutdown)
		return (exit_status);
	else
		gtmrecv_exit(exit_status - NORMAL_SHUTDOWN);

	return -1; /* This will never get executed, added to make compiler happy */
}
예제 #14
0
int gtmrecv_poll_actions1(int *pending_data_len, int *buff_unprocessed, unsigned char *buffp)
{
	static int		report_cnt = 1;
	static int		next_report_at = 1;
	static boolean_t	send_xoff = FALSE;
	static boolean_t	xoff_sent = FALSE;
	static seq_num		send_seqno;
	static boolean_t	log_draining_msg = FALSE;
	static boolean_t	send_badtrans = FALSE;
	static boolean_t	send_cmp2uncmp = FALSE;
	static boolean_t	upd_shut_too_early_logged = FALSE;
	static time_t		last_reap_time = 0;
	repl_msg_t		xoff_msg;
	repl_badtrans_msg_t	bad_trans_msg;
	boolean_t		alert = FALSE, info = FALSE;
	int			return_status;
	gd_region		*region_top;
	unsigned char		*msg_ptr;				/* needed for REPL_{SEND,RECV}_LOOP */
	int			tosend_len, sent_len, sent_this_iter;	/* needed for REPL_SEND_LOOP */
	int			torecv_len, recvd_len, recvd_this_iter;	/* needed for REPL_RECV_LOOP */
	int			status, poll_dir;			/* needed for REPL_{SEND,RECV}_LOOP */
	int			temp_len, pending_msg_size;
	int			upd_start_status, upd_start_attempts;
	int			buffered_data_len;
	int			upd_exit_status;
	seq_num			temp_send_seqno;
	boolean_t		bad_trans_detected = FALSE, onln_rlbk_flg_set = FALSE;
	recvpool_ctl_ptr_t	recvpool_ctl;
	upd_proc_local_ptr_t	upd_proc_local;
	gtmrecv_local_ptr_t	gtmrecv_local;
	upd_helper_ctl_ptr_t	upd_helper_ctl;
	pid_t			waitpid_res;
	int4			msg_type, msg_len;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	recvpool_ctl = recvpool.recvpool_ctl;
	upd_proc_local = recvpool.upd_proc_local;
	gtmrecv_local = recvpool.gtmrecv_local;
	upd_helper_ctl = recvpool.upd_helper_ctl;
	if (SHUTDOWN == gtmrecv_local->shutdown)
	{
		repl_log(gtmrecv_log_fp, TRUE, TRUE, "Shutdown signalled\n");
		gtmrecv_end(); /* Won't return */
	}
#	ifdef GTM_TLS
	/* If we sent a REPL_RENEG_ACK, then we cannot afford to send anymore asynchronous messages (like XOFF_ACK_ME) until we
	 * receive a REPL_RENEG_COMPLETE from the source server. This ensures that while the source server attempts to do a SSL/TLS
	 * renegotiation, it doesn't have any application data (like XOFF_ACK_ME) sitting in the pipe.
	 */
	if (REPLTLS_WAITING_FOR_RENEG_COMPLETE == repl_tls.renegotiate_state)
		return STOP_POLL;
#	endif
	/* Reset report_cnt and next_report_at to 1 when a new upd proc is forked */
	if ((1 == report_cnt) || (report_cnt == next_report_at))
	{
		/* A comment on the usage of NO_SHUTDOWN below for the alert variable. Since upd_proc_local->upd_proc_shutdown is
		 * a shared memory field (and could be concurrently changed by either the receiver server or the update process),
		 * we want to make sure it is the same value BEFORE and AFTER checking whether the update process is alive or not.
		 * If it is not NO_SHUTDOWN (i.e. is SHUTDOWN or NORMAL_SHUTDOWN or ABNORMAL_SHUTDOWN) it has shut down due to
		 * an external request so we do want to send out a false update-process-is-not-alive alert.
		 */
		if ((alert = ((NO_SHUTDOWN == upd_proc_local->upd_proc_shutdown) && (SRV_DEAD == is_updproc_alive())
				&& (NO_SHUTDOWN == upd_proc_local->upd_proc_shutdown)))
			|| (info = (((NORMAL_SHUTDOWN == upd_proc_local->upd_proc_shutdown)
				|| (ABNORMAL_SHUTDOWN == upd_proc_local->upd_proc_shutdown)) && (SRV_DEAD == is_updproc_alive()))))
		{
			if (alert)
				repl_log(gtmrecv_log_fp, TRUE, TRUE,
					"ALERT : Receiver Server detected that Update Process is not ALIVE\n");
			else
				repl_log(gtmrecv_log_fp, TRUE, TRUE,
					"INFO : Update process not running due to user initiated shutdown\n");
			if (1 == report_cnt)
			{
				send_xoff = TRUE;
				recvpool_ctl->old_jnl_seqno = recvpool_ctl->jnl_seqno;
				recvpool_ctl->jnl_seqno = 0;
				/* Even though we have identified that the update process is NOT alive, a waitpid on the update
				 * process PID is necessary so that the system doesn't leave any zombie process lying around.
				 * This is possible since any child process that dies without the parent doing a waitpid on it
				 * will be defunct unless the parent dies at which point the "init" process takes the role of
				 * the parent and invokes waitpid to remove the zombies.
				 * NOTE: It is possible that the update process was killed before the receiver server got a
				 * chance to record it's PID in the recvpool.upd_proc_local structure. In such a case, don't
				 * invoke waitpid as that will block us (receiver server) if this instance of the receiver
				 * server was started with helper processes.
				 */
				if (0 < upd_proc_local->upd_proc_pid)
				{
					WAITPID(upd_proc_local->upd_proc_pid, &upd_exit_status, 0, waitpid_res);
					/* Since the update process as part of its shutdown does NOT reset the upd_proc_pid, reset
					 * it here ONLY if the update process was NOT kill -9ed. This is needed because receiver
					 * server as part of its shutdown relies on this field (upd_proc_pid) to determine if the
					 * update process was cleanly shutdown or was kill -9ed.
					 */
					if (!alert)
						upd_proc_local->upd_proc_pid = 0;
				}
				upd_proc_local->bad_trans = FALSE; /* No point in doing bad transaction processing */
				upd_proc_local->onln_rlbk_flg = FALSE; /* No point handling online rollback */
			}
			gtmrecv_wait_for_jnl_seqno = TRUE;
			REPL_DPRINT1(
			       "gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because of upd crash/shutdown\n");
			next_report_at *= GTMRECV_NEXT_REPORT_FACTOR;
			report_cnt++;
		}
	} else
		report_cnt++;
	/* Check if REPL_CMP2UNCMP or REPL_BADTRANS message needs to be sent */
	if (upd_proc_local->onln_rlbk_flg)
	{	/* Update process detected an online rollback and is requesting us to restart the connection. But before that, send
		 * REPL_XOFF source side and drain the replication pipe
		 */
		onln_rlbk_flg_set = TRUE;
		send_xoff = TRUE;
	} else if (!send_cmp2uncmp && gtmrecv_send_cmp2uncmp)
	{
		send_xoff = TRUE;
		send_seqno = recvpool_ctl->jnl_seqno;
		send_cmp2uncmp = TRUE;
	} else if (!send_badtrans && upd_proc_local->bad_trans)
	{
		send_xoff = TRUE;
		send_seqno = upd_proc_local->read_jnl_seqno;
		send_badtrans = TRUE;
		bad_trans_detected = TRUE;
	} else if (!upd_proc_local->bad_trans && send_badtrans && 1 != report_cnt)
	{
		send_badtrans = FALSE;
		bad_trans_detected = FALSE;
	}
	if (send_xoff && !xoff_sent)
	{	/* Send XOFF_ACK_ME if the receiver has a connection to the source. Do not attempt to send it if we dont even
		 * know the endianness of the remote side. In that case, we are guaranteed no initial handshake occurred and
		 * so no point sending the XOFF too. This saves us lots of trouble in case of cross-endian replication connections.
		 */
		assert((FD_INVALID  != gtmrecv_sock_fd) || repl_connection_reset);
		if ((FD_INVALID != gtmrecv_sock_fd) && remote_side->endianness_known)
		{
			send_seqno = upd_proc_local->read_jnl_seqno;
			if (!remote_side->cross_endian)
			{
				xoff_msg.type = REPL_XOFF_ACK_ME;
				xoff_msg.len = MIN_REPL_MSGLEN;
				memcpy((uchar_ptr_t)&xoff_msg.msg[0], (uchar_ptr_t)&send_seqno, SIZEOF(seq_num));
			} else
			{
				xoff_msg.type = GTM_BYTESWAP_32(REPL_XOFF_ACK_ME);
				xoff_msg.len = GTM_BYTESWAP_32(MIN_REPL_MSGLEN);
				temp_send_seqno = GTM_BYTESWAP_64(send_seqno);
				memcpy((uchar_ptr_t)&xoff_msg.msg[0], (uchar_ptr_t)&temp_send_seqno, SIZEOF(seq_num));
			}
			REPL_SEND_LOOP(gtmrecv_sock_fd, &xoff_msg, MIN_REPL_MSGLEN, REPL_POLL_NOWAIT)
				; /* Empty Body */
			if (SS_NORMAL != status)
			{
				if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending XOFF_ACK_ME. "
							"Status = %d ; %s\n", status, STRERROR(status));
					repl_close(&gtmrecv_sock_fd);
					repl_connection_reset = TRUE;
					xoff_sent = FALSE;
					send_badtrans = FALSE;

				} else if (EREPL_SEND == repl_errno)
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
						LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. "
								"Error in send"), status);
				else
				{
					assert(EREPL_SELECT == repl_errno);
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
						LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. "
								"Error in select"), status);
				}
			} else
			{
				xoff_sent = TRUE;
				log_draining_msg = TRUE;
			}
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_XOFF_ACK_ME sent due to upd shutdown/crash or bad trans "
					"or ONLINE_ROLLBACK\n");
			send_xoff = FALSE;
		} else
		{	/* Connection has been lost OR initial handshake needs to happen again, so no point sending XOFF/BADTRANS */
			send_xoff = FALSE;
			send_badtrans = FALSE;
		}
	}
	/* Drain pipe */
	if (xoff_sent)
	{
		if (log_draining_msg)
		{	/* avoid multiple logs per instance */
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Draining replication pipe due to %s\n",
					send_cmp2uncmp ? "CMP2UNCMP" : (send_badtrans ? "BAD_TRANS" :
							(onln_rlbk_flg_set ? "ONLINE_ROLLBACK" : "UPD shutdown/crash")));
			log_draining_msg = FALSE;
		}
		if (0 != *buff_unprocessed)
		{	/* Throw away the current contents of the buffer */
			buffered_data_len = ((*pending_data_len <= *buff_unprocessed) ? *pending_data_len : *buff_unprocessed);
			*buff_unprocessed -= buffered_data_len;
			buffp += buffered_data_len;
			*pending_data_len -= buffered_data_len;
			REPL_DPRINT2("gtmrecv_poll_actions : (1) Throwing away %d bytes from old buffer while draining\n",
				buffered_data_len);
			assert(remote_side->endianness_known);	/* only then is remote_side->cross_endian reliable */
			while (REPL_MSG_HDRLEN <= *buff_unprocessed)
			{
				assert(0 == (((unsigned long)buffp) % REPL_MSG_ALIGN));
				msg_len = ((repl_msg_ptr_t)buffp)->len;
				msg_type = ((repl_msg_ptr_t)buffp)->type;
		        	if (remote_side->cross_endian)
			        {
			                msg_len = GTM_BYTESWAP_32(msg_len);
			                msg_type = GTM_BYTESWAP_32(msg_type);
			        }
				msg_type = (msg_type & REPL_TR_CMP_MSG_TYPE_MASK);
				assert((REPL_TR_CMP_JNL_RECS == msg_type) || (0 == (msg_len % REPL_MSG_ALIGN)));
				*pending_data_len = ROUND_UP2(msg_len, REPL_MSG_ALIGN);
				buffered_data_len = ((*pending_data_len <= *buff_unprocessed) ?
								*pending_data_len : *buff_unprocessed);
				*buff_unprocessed -= buffered_data_len;
				buffp += buffered_data_len;
				*pending_data_len -= buffered_data_len;
				REPL_DPRINT3("gtmrecv_poll_actions : (1) Throwing away message of "
					"type %d and length %d from old buffer while draining\n", msg_type, buffered_data_len);
			}
			if (0 < *buff_unprocessed)
			{
				memmove((unsigned char *)gtmrecv_msgp, buffp, *buff_unprocessed);
				REPL_DPRINT2("gtmrecv_poll_actions : Incomplete header of length %d while draining\n",
					*buff_unprocessed);
			}
		}
		status = SS_NORMAL;
		if (0 != *buff_unprocessed || 0 == *pending_data_len)
		{	/* Receive the header of a message */
			assert(REPL_MSG_HDRLEN > *buff_unprocessed);	/* so we dont pass negative length in REPL_RECV_LOOP */
			REPL_RECV_LOOP(gtmrecv_sock_fd, ((unsigned char *)gtmrecv_msgp) + *buff_unprocessed,
				       (REPL_MSG_HDRLEN - *buff_unprocessed), REPL_POLL_WAIT)
				; /* Empty Body */
			if (SS_NORMAL == status)
			{
				assert(remote_side->endianness_known);	/* only then is remote_side->cross_endian reliable */
		        	if (!remote_side->cross_endian)
	        		{
			                msg_len = gtmrecv_msgp->len;
			                msg_type = gtmrecv_msgp->type;
			        } else
			        {
			                msg_len = GTM_BYTESWAP_32(gtmrecv_msgp->len);
			                msg_type = GTM_BYTESWAP_32(gtmrecv_msgp->type);
			        }
				msg_type = (msg_type & REPL_TR_CMP_MSG_TYPE_MASK);
				assert((REPL_TR_CMP_JNL_RECS == msg_type) || (0 == (msg_len % REPL_MSG_ALIGN)));
				msg_len = ROUND_UP2(msg_len, REPL_MSG_ALIGN);
				REPL_DPRINT3("gtmrecv_poll_actions : Received message of type %d and length %d while draining\n",
					msg_type, msg_len);
			}
		}
		if ((SS_NORMAL == status) && (0 != *buff_unprocessed || 0 == *pending_data_len) && (REPL_XOFF_ACK == msg_type))
		{	/* Receive the rest of the XOFF_ACK msg and signal the drain as complete */
			REPL_RECV_LOOP(gtmrecv_sock_fd, gtmrecv_msgp, (MIN_REPL_MSGLEN - REPL_MSG_HDRLEN), REPL_POLL_WAIT)
				; /* Empty Body */
			if (SS_NORMAL == status)
			{
				repl_log(gtmrecv_log_fp, TRUE, TRUE,
						"REPL INFO - XOFF_ACK received. Drained replication pipe completely\n");
				upd_shut_too_early_logged = FALSE;
				xoff_sent = FALSE;
				return_status = STOP_POLL;
			}
		} else if (SS_NORMAL == status)
		{	/* Drain the rest of the message */
			if (0 < *pending_data_len)
			{
				pending_msg_size = *pending_data_len;
				REPL_DPRINT2("gtmrecv_poll_actions : (2) Throwing away %d bytes from pipe\n", pending_msg_size);
			} else
			{
				pending_msg_size = msg_len - REPL_MSG_HDRLEN;
				REPL_DPRINT3("gtmrecv_poll_actions : (2) Throwing away message of "
					"type %d and length %d from pipe\n", msg_type, msg_len);
			}
			for ( ; SS_NORMAL == status && 0 < pending_msg_size; pending_msg_size -= gtmrecv_max_repl_msglen)
			{
				temp_len = (pending_msg_size < gtmrecv_max_repl_msglen)? pending_msg_size : gtmrecv_max_repl_msglen;
				REPL_RECV_LOOP(gtmrecv_sock_fd, gtmrecv_msgp, temp_len, REPL_POLL_WAIT)
					; /* Empty Body */
			}
			*buff_unprocessed = 0; *pending_data_len = 0;
			if (SS_NORMAL == status && info && !upd_shut_too_early_logged)
			{
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "ALERT : User initiated shutdown of Update Process done "
						"when there was data in the replication pipe\n");
				upd_shut_too_early_logged = TRUE;
			}
			return_status = CONTINUE_POLL;
		}
		if (SS_NORMAL != status)
		{
			if (EREPL_RECV == repl_errno)
			{
				if (REPL_CONN_RESET(status))
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while receiving XOFF_ACK. "
							"Status = %d ; %s\n", status, STRERROR(status));
					repl_close(&gtmrecv_sock_fd);
					repl_connection_reset = TRUE;
					xoff_sent = FALSE;
					send_badtrans = FALSE;
					return_status = STOP_POLL;
				} else
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
						LEN_AND_LIT("Error while draining replication pipe. Error in recv"), status);
			} else
			{
				assert(EREPL_SELECT == repl_errno);
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
					LEN_AND_LIT("Error while draining replication pipe. Error in select"), status);
			}
		}
	} else
		return_status = STOP_POLL;
	/* Like was done before for the XOFF_ACK_ME message, send a BADTRANS/CMP2UNCMP message only if we know
	 * the endianness of the other side. If not, no point in sending one anyways and saves us trouble in
	 * case of cross-endian replication connections.
	 */
	if ((STOP_POLL == return_status) && (send_badtrans || send_cmp2uncmp)
		&& (FD_INVALID != gtmrecv_sock_fd) && remote_side->endianness_known)
	{	/* Send REPL_BADTRANS or REPL_CMP2UNCMP message */
		if (!remote_side->cross_endian)
		{
			bad_trans_msg.type = send_cmp2uncmp ? REPL_CMP2UNCMP : REPL_BADTRANS;
			bad_trans_msg.len  = MIN_REPL_MSGLEN;
			bad_trans_msg.start_seqno = send_seqno;
		} else
		{
			bad_trans_msg.type = send_cmp2uncmp ? GTM_BYTESWAP_32(REPL_CMP2UNCMP) : GTM_BYTESWAP_32(REPL_BADTRANS);
			bad_trans_msg.len  = GTM_BYTESWAP_32(MIN_REPL_MSGLEN);
			bad_trans_msg.start_seqno = GTM_BYTESWAP_64(send_seqno);
		}
		REPL_SEND_LOOP(gtmrecv_sock_fd, &bad_trans_msg, bad_trans_msg.len, REPL_POLL_NOWAIT)
			; /* Empty Body */
		if (SS_NORMAL == status)
		{
			if (send_cmp2uncmp)
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_CMP2UNCMP message sent with seqno %llu\n", send_seqno);
			else
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_BADTRANS message sent with seqno %llu\n", send_seqno);
		} else
		{
			if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
			{
				if (send_cmp2uncmp)
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_CMP2UNCMP. "
							"Status = %d ; %s\n", status, STRERROR(status));
				} else
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_BADTRANS. "
							"Status = %d ; %s\n", status, STRERROR(status));
				}
				repl_close(&gtmrecv_sock_fd);
				repl_connection_reset = TRUE;
				return_status = STOP_POLL;
			} else if (EREPL_SEND == repl_errno)
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
					LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in send"), status);
			else
			{
				assert(EREPL_SELECT == repl_errno);
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
					LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in select"), status);
			}
		}
		send_badtrans = FALSE;
		if (send_cmp2uncmp)
		{
			REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because this receiver"
				"server requested a fall-back from compressed to uncompressed operation\n");
			gtmrecv_wait_for_jnl_seqno = TRUE;/* set this to TRUE to break out and go back to a fresh "do_main_loop" */
			gtmrecv_bad_trans_sent = TRUE;
			gtmrecv_send_cmp2uncmp = FALSE;
			send_cmp2uncmp = FALSE;
		}
	}
	if ((upd_proc_local->bad_trans && bad_trans_detected) || onln_rlbk_flg_set
		|| (UPDPROC_START == upd_proc_local->start_upd) && (1 != report_cnt))
	{
		if (UPDPROC_START == upd_proc_local->start_upd)
		{
			assert(is_updproc_alive() != SRV_ALIVE);
			upd_proc_local->upd_proc_shutdown = NO_SHUTDOWN;
		}
		recvpool_ctl->wrapped = FALSE;
		recvpool_ctl->write_wrap = recvpool_ctl->recvpool_size;
		recvpool_ctl->write = 0;
		/* Reset last_rcvd_histinfo, last_valid_histinfo etc. as they reflect context from unprocessed data
		 * in the receive pool and those are no longer valid because we have drained the receive pool.
		 */
		GTMRECV_CLEAR_CACHED_HISTINFO(recvpool.recvpool_ctl, jnlpool, jnlpool_ctl, INSERT_STRM_HISTINFO_FALSE);
		if (UPDPROC_START == upd_proc_local->start_upd)
		{
			/* Attempt starting the update process */
			for (upd_start_attempts = 0;
			     UPDPROC_START_ERR == (upd_start_status = gtmrecv_upd_proc_init(FALSE)) &&
			     GTMRECV_MAX_UPDSTART_ATTEMPTS > upd_start_attempts;
			     upd_start_attempts++)
			{
				if (EREPL_UPDSTART_SEMCTL == repl_errno || EREPL_UPDSTART_BADPATH == repl_errno)
				{
					gtmrecv_autoshutdown();
				} else if (EREPL_UPDSTART_FORK == repl_errno)
				{
					/* Couldn't start up update now, can try later */
					LONG_SLEEP(GTMRECV_WAIT_FOR_PROC_SLOTS);
					continue;
				} else if (EREPL_UPDSTART_EXEC == repl_errno)
				{
					/* In forked child, could not exec, should exit */
					gtmrecv_exit(ABNORMAL_SHUTDOWN);
				}
			}
			if (UPDPROC_STARTED == (upd_proc_local->start_upd = upd_start_status))
			{
				REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because of "
					     "upd restart\n");
				gtmrecv_wait_for_jnl_seqno = TRUE;
				report_cnt = next_report_at = 1;
				if (send_xoff && (FD_INVALID == gtmrecv_sock_fd))
				{
					/* Update start command was issued before connection was established,
					 * no point in sending XOFF.  */
					send_xoff = FALSE;
				}
			} else
			{
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "%d failed attempts to fork update process. Try later\n",
					 upd_start_attempts);
			}
		} else
		{
			gtmrecv_wait_for_jnl_seqno = TRUE;/* set this to TRUE to break out and go back to a fresh "do_main_loop" */
			if (onln_rlbk_flg_set)
			{
				assert(NULL != jnlpool_ctl);
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "Closing connection due to ONLINE ROLLBACK\n");
 				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n",
 						jnlpool_ctl->jnl_seqno);
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Receive Pool Seqno : %llu\n",
						recvpool_ctl->jnl_seqno);
				repl_close(&gtmrecv_sock_fd);
				repl_connection_reset = TRUE;
				xoff_sent = FALSE;
				send_badtrans = FALSE;
				upd_proc_local->onln_rlbk_flg = FALSE;
				/* Before restarting afresh, sync the online rollback cycles. This way any future grab_lock that
				 * we do after restarting should not realize an unhandled online rollback.  For receiver, it is
				 * just syncing the journal pool cycles as the databases are not opened. But, to be safe, grab
				 * the lock and sync the cycles.
				 */
				grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY);
				SYNC_ONLN_RLBK_CYCLES;
				rel_lock(jnlpool.jnlpool_dummy_reg);
				return_status = STOP_POLL;
				recvpool_ctl->jnl_seqno = 0;
			} else
			{
				REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because bad trans"
						"sent\n");
				gtmrecv_bad_trans_sent = TRUE;
				upd_proc_local->bad_trans = FALSE;
				recvpool_ctl->jnl_seqno = upd_proc_local->read_jnl_seqno;
			}
		}
	}
	if ((0 == *pending_data_len) && (0 != gtmrecv_local->changelog))
	{
		if (gtmrecv_local->changelog & REPLIC_CHANGE_LOGINTERVAL)
		{
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "Changing log interval from %u to %u\n",
					log_interval, gtmrecv_local->log_interval);
			log_interval = gtmrecv_local->log_interval;
			gtmrecv_reinit_logseqno(); /* will force a LOG on the first recv following the interval change */
		}
		if (gtmrecv_local->changelog & REPLIC_CHANGE_LOGFILE)
		{
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "Changing log file to %s\n", gtmrecv_local->log_file);
			repl_log_init(REPL_GENERAL_LOG, &gtmrecv_log_fd, gtmrecv_local->log_file);
			repl_log_fd2fp(&gtmrecv_log_fp, gtmrecv_log_fd);
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "Change log to %s successful\n",gtmrecv_local->log_file);
		}
		/* NOTE: update process and receiver each ignore any setting specific to the other (REPLIC_CHANGE_UPD_LOGINTERVAL,
		 * REPLIC_CHANGE_LOGINTERVAL) */
		if (REPLIC_CHANGE_LOGINTERVAL == gtmrecv_local->changelog)
			upd_proc_local->changelog = 0;
		else
			upd_proc_local->changelog = gtmrecv_local->changelog; /* Pass changelog request to the update process */
		gtmrecv_local->changelog = 0;
	}
	if (0 == *pending_data_len && !gtmrecv_logstats && gtmrecv_local->statslog)
	{
		gtmrecv_logstats = TRUE;
		repl_log(gtmrecv_log_fp, TRUE, TRUE, "Begin statistics logging\n");
	} else if (0 == *pending_data_len && gtmrecv_logstats && !gtmrecv_local->statslog)
	{
		gtmrecv_logstats = FALSE;
		/* Force all data out to the file before closing the file */
		repl_log(gtmrecv_log_fp, TRUE, TRUE, "End statistics logging\n");
	}
	if (0 == *pending_data_len)
	{
		if (upd_helper_ctl->start_helpers)
		{
			gtmrecv_helpers_init(upd_helper_ctl->start_n_readers, upd_helper_ctl->start_n_writers);
			upd_helper_ctl->start_helpers = FALSE;
		}
		if (HELPER_REAP_NONE != (status = upd_helper_ctl->reap_helpers) ||
			(double)GTMRECV_REAP_HELPERS_INTERVAL <= difftime(gtmrecv_now, last_reap_time))
		{
			gtmrecv_reap_helpers(HELPER_REAP_WAIT == status);
			last_reap_time = gtmrecv_now;
		}
	}
	return (return_status);
}
int gtmsource_checkhealth(void)
{
	uint4			gtmsource_pid;
	int			status, semval, save_errno;
	boolean_t		srv_alive, all_files_open;
	gtmsource_local_ptr_t	gtmsourcelocal_ptr;
	int4			index, num_servers;
	seq_num			reg_seqno, jnlseqno;
	gd_region		*reg, *region_top;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	char			errtxt[OUT_BUFF_SIZE];
	char			*modestr;

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	if (NULL != jnlpool.gtmsource_local)	/* Check health of a specific source server */
		gtmsourcelocal_ptr = jnlpool.gtmsource_local;
	else
		gtmsourcelocal_ptr = &jnlpool.gtmsource_local_array[0];
	num_servers = 0;
	status = SRV_ALIVE;
	for (index = 0; index < NUM_GTMSRC_LCL; index++, gtmsourcelocal_ptr++)
	{
		if ('\0' == gtmsourcelocal_ptr->secondary_instname[0])
		{
			assert(NULL == jnlpool.gtmsource_local);
			continue;
		}
		gtmsource_pid = gtmsourcelocal_ptr->gtmsource_pid;
		/* If CHECKHEALTH on a specific secondary instance is requested, print the health information irrespective
		 * of whether a source server for that instance is alive or not. For CHECKHEALTH on ALL secondary instances
		 * print health information only for those instances that have an active or passive source server alive.
		 */
		if ((NULL == jnlpool.gtmsource_local) && (0 == gtmsource_pid))
			continue;
		repl_log(stdout, TRUE, TRUE, "Initiating CHECKHEALTH operation on source server pid [%d] for secondary instance"
			" name [%s]\n", gtmsource_pid, gtmsourcelocal_ptr->secondary_instname);
		srv_alive = (0 == gtmsource_pid) ? FALSE : is_proc_alive(gtmsource_pid, 0);
		if (srv_alive)
		{
			if (GTMSOURCE_MODE_ACTIVE == gtmsourcelocal_ptr->mode)
				modestr = "ACTIVE";
			else if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsourcelocal_ptr->mode)
				modestr = "ACTIVE REQUESTED";
			else if (GTMSOURCE_MODE_PASSIVE == gtmsourcelocal_ptr->mode)
				modestr = "PASSIVE";
			else if (GTMSOURCE_MODE_PASSIVE_REQUESTED == gtmsourcelocal_ptr->mode)
				modestr = "PASSIVE REQUESTED";
			else
			{
				assert(gtmsourcelocal_ptr->mode != gtmsourcelocal_ptr->mode);
				modestr = "UNKNOWN";
			}
			repl_log(stderr, FALSE, TRUE, FORMAT_STR1, gtmsource_pid, "Source server", "", modestr);
			status |= SRV_ALIVE;
			num_servers++;
		} else
		{
			repl_log(stderr, FALSE, TRUE, FORMAT_STR, gtmsource_pid, "Source server", " NOT");
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_SRCSRVNOTEXIST, 2,
					LEN_AND_STR(gtmsourcelocal_ptr->secondary_instname));
			status |= SRV_DEAD;
		}
		if (NULL != jnlpool.gtmsource_local)
			break;
	}
	if (NULL == jnlpool.gtmsource_local)
	{	/* Compare number of servers that were found alive with the current value of the COUNT semaphore.
		 * If they are not equal, report the discrepancy.
		 */
		semval = get_sem_info(SOURCE, SRC_SERV_COUNT_SEM, SEM_INFO_VAL);
		if (-1 == semval)
		{
			save_errno = errno;
			repl_log(stderr, FALSE, TRUE,
				"Error fetching source server count semaphore value : %s\n", STRERROR(save_errno));
			status |= SRV_ERR;
		} else if (semval != num_servers)
		{
			repl_log(stderr, FALSE, FALSE,
				"Error : Expected %d source server(s) to be alive but found %d actually alive\n",
				semval, num_servers);
			repl_log(stderr, FALSE, TRUE, "Error : Check if any pid reported above is NOT a source server process\n");
			status |= SRV_ERR;
		}
	}
	/* Check that there are no regions with replication state = WAS_ON (i.e. repl_was_open). If so report that.
	 * But to determine that, we need to attach to all the database regions.
	 */
	gvinit();
	/* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */
	all_files_open = region_init(FALSE);
	if (!all_files_open)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
		status |= SRV_ERR;
	} else
	{
		for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++)
		{
			csa = &FILE_INFO(reg)->s_addrs;
			csd = csa->hdr;
			if (REPL_WAS_ENABLED(csd))
			{
				assert(!JNL_ENABLED(csd) || REPL_ENABLED(csd));	/* || is for turning replication on concurrently */
				reg_seqno = csd->reg_seqno;
				jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO;
				sgtm_putmsg(errtxt, VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(reg),
					&reg_seqno, &reg_seqno, &jnlseqno, &jnlseqno);
				repl_log(stderr, FALSE, TRUE, errtxt);
				status |= SRV_ERR;
			}
		}
	}
	if (jnlpool.jnlpool_ctl->freeze)
	{
		repl_log(stderr, FALSE, FALSE, "Warning: Instance Freeze is ON\n");
		repl_log(stderr, FALSE, TRUE, "   Freeze Comment: %s\n", jnlpool.jnlpool_ctl->freeze_comment);
		status |= SRV_ERR;
	}
	return (status + NORMAL_SHUTDOWN);
}
예제 #16
0
int gtmrecv_shutdown(boolean_t auto_shutdown, int exit_status)
{
	uint4           savepid;
	boolean_t       shut_upd_too = FALSE;
	int             status;
	unix_db_info	*udi;

	error_def(ERR_RECVPOOLSETUP);
	error_def(ERR_TEXT);

	repl_log(stdout, TRUE, TRUE, "Initiating shut down\n");
	call_on_signal = NULL;		/* So we don't reenter on error */
	/* assert that auto shutdown should be invoked only if the current process is a receiver server */
	assert(!auto_shutdown || gtmrecv_srv_count);
	if (auto_shutdown)
	{	/* grab the ftok semaphore and recvpool access control lock IN THAT ORDER (to avoid deadlocks) */
		repl_inst_ftok_sem_lock();
		status = grab_sem(RECV, RECV_POOL_ACCESS_SEM);
		if (0 > status)
		{
			repl_log(stderr, TRUE, TRUE,
				"Error grabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR);
			return (ABNORMAL_SHUTDOWN);
		}
	} else
	{	/* ftok semaphore and recvpool access semaphore should already be held from the previous call to "recvpool_init" */
		DEBUG_ONLY(udi = (unix_db_info *)FILE_INFO(recvpool.recvpool_dummy_reg);)
		assert(udi->grabbed_ftok_sem);
		assert(holds_sem[RECV][RECV_POOL_ACCESS_SEM]);
		/* We do not want to hold the options semaphore to avoid deadlocks with receiver server startup (C9F12-002766) */
		assert(!holds_sem[RECV][RECV_SERV_OPTIONS_SEM]);
		recvpool.gtmrecv_local->shutdown = SHUTDOWN;
		/* Wait for receiver server to die. But release ftok semaphore and recvpool access control semaphore before
		 * waiting as the concurrently running receiver server might need these (e.g. if it is about to call the
		 * function "repl_inst_was_rootprimary").
		 */
		if (0 != rel_sem(RECV, RECV_POOL_ACCESS_SEM))
			gtm_putmsg(VARLSTCNT(7) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in receiver server shutdown rel_sem"),
				REPL_SEM_ERRNO);
		repl_inst_ftok_sem_release();
		/* Wait for receiver server to shut down */
		while((SHUTDOWN == recvpool.gtmrecv_local->shutdown)
				&& (0 < (savepid = recvpool.gtmrecv_local->recv_serv_pid))
				&& is_proc_alive(savepid, 0))
			SHORT_SLEEP(GTMRECV_WAIT_FOR_SHUTDOWN);
		/* (Re)Grab the ftok semaphore and recvpool access control semaphore IN THAT ORDER (to avoid deadlocks) */
		repl_inst_ftok_sem_lock();
		status = grab_sem(RECV, RECV_POOL_ACCESS_SEM);
		if (0 > status)
		{
			repl_log(stderr, TRUE, TRUE,
				"Error regrabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR);
			return (ABNORMAL_SHUTDOWN);
		}
		exit_status = recvpool.gtmrecv_local->shutdown;
		if (SHUTDOWN == exit_status)
		{
			if (0 == savepid) /* No Receiver Process */
				exit_status = NORMAL_SHUTDOWN;
			else /* Receiver Server Crashed */
			{
				repl_log(stderr, FALSE, TRUE, "Receiver Server exited abnormally\n");
				exit_status = ABNORMAL_SHUTDOWN;
				shut_upd_too = TRUE;
			}
		}
	}
예제 #17
0
static int helper_init(upd_helper_entry_ptr_t helper, recvpool_user helper_type)
{
	int			save_errno, save_shutdown;
	char			helper_cmd[UPDHELPER_CMD_MAXLEN];
	int			helper_cmd_len;
	int			status;
	int4			i4status;
	pid_t			helper_pid, waitpid_res;

	save_shutdown = helper->helper_shutdown;
	helper->helper_shutdown = NO_SHUTDOWN;
	if (!gtm_dist_ok_to_use)
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_GTMDISTUNVERIF, 4, STRLEN(gtm_dist), gtm_dist,
				gtmImageNames[image_type].imageNameLen, gtmImageNames[image_type].imageName);
	if (WBTEST_ENABLED(WBTEST_MAXGTMDIST_HELPER_PROCESS))
	{
		memset(gtm_dist, 'a', GTM_PATH_MAX-2);
		gtm_dist[GTM_PATH_MAX-1] = '\0';
	}
	helper_cmd_len = SNPRINTF(helper_cmd, UPDHELPER_CMD_MAXLEN, UPDHELPER_CMD, gtm_dist);
	if ((-1 == helper_cmd_len) || (UPDHELPER_CMD_MAXLEN <= helper_cmd_len))
	{
		helper->helper_shutdown = save_shutdown;
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_HLPPROC, 0, ERR_TEXT, 2,
			   LEN_AND_LIT("Could not find path of Helper Process. Check value of $gtm_dist"));
		repl_errno = EREPL_UPDSTART_BADPATH;
		return UPDPROC_START_ERR ;
	}
	FORK(helper_pid);
	if (0 > helper_pid)
	{
		save_errno = errno;
		helper->helper_shutdown = save_shutdown;
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_HLPPROC, 0, ERR_TEXT, 2,
				LEN_AND_LIT("Could not fork Helper process"), save_errno);
		repl_errno = EREPL_UPDSTART_FORK;
		return UPDPROC_START_ERR;
	}
	if (0 == helper_pid)
	{	/* helper */
		getjobnum();
		helper->helper_pid_prev = process_id; /* identify owner of slot */
		if (WBTEST_ENABLED(WBTEST_BADEXEC_HELPER_PROCESS))
			STRCPY(helper_cmd, "ersatz");
		if (-1 == EXECL(helper_cmd, helper_cmd, UPDHELPER_CMD_ARG1, UPDHELPER_CMD_ARG2,
				(UPD_HELPER_READER == helper_type) ? UPDHELPER_READER_CMD_ARG3 : UPDHELPER_WRITER_CMD_ARG3, NULL))
		{
			save_errno = errno;
			helper->helper_shutdown = save_shutdown;
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_HLPPROC, 0, ERR_TEXT, 2,
				   LEN_AND_LIT("Could not exec Helper Process"), save_errno);
			repl_errno = EREPL_UPDSTART_EXEC;
			UNDERSCORE_EXIT(UPDPROC_START_ERR);
		}
	}
	/* Wait for helper to startup */
	while (helper_pid != helper->helper_pid && is_proc_alive(helper_pid, 0))
	{
		SHORT_SLEEP(GTMRECV_WAIT_FOR_SRV_START);
		UNIX_ONLY(WAITPID(helper_pid, &status, WNOHANG, waitpid_res);) /* Release defunct helper process if dead */
	}
	/* The helper has now gone far enough in the initialization, or died before initialization. Consider startup completed. */
	repl_log(gtmrecv_log_fp, TRUE, TRUE, "Helper %s started. PID %d [0x%X]\n",
			(UPD_HELPER_READER == helper_type) ? "reader" : "writer", helper_pid, helper_pid);
	return UPDPROC_STARTED;
}
예제 #18
0
int gtmsource()
{
	int			status, log_init_status, waitpid_res, save_errno;
	char			print_msg[1024], tmpmsg[1024];
	gd_region		*reg, *region_top;
	sgmnt_addrs		*csa, *repl_csa;
	boolean_t		all_files_open, isalive;
	pid_t			pid, ppid, procgp;
	seq_num			read_jnl_seqno, jnl_seqno;
	unix_db_info		*udi;
	gtmsource_local_ptr_t	gtmsource_local;
	boolean_t		this_side_std_null_coll;
	int			null_fd, rc;

	memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs));
	call_on_signal = gtmsource_sigstop;
	ESTABLISH_RET(gtmsource_ch, SS_NORMAL);
	if (-1 == gtmsource_get_opt())
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR);
	if (gtmsource_options.shut_down)
	{	/* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return
		 * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while
		 * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other
		 * MUPIP REPLIC commands that need these locks for their function.
		 */
		if (0 < gtmsource_options.shutdown_time)
		{
			repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n",
												gtmsource_options.shutdown_time);
			LONG_SLEEP(gtmsource_options.shutdown_time);
		} else
			repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n");
	} else if (gtmsource_options.start)
	{
		repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n",
			gtmsource_options.secondary_instname);
	}
	if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary))
	{	/* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now
		 * in case this is a secondary -> primary transition. This is so we can later switch journal files in all
		 * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since
		 * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we
		 * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always
		 * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has
		 * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic
		 * currently has assumptions that a process holds only one ftok at any point in time.
		 */
		assert(NULL == gd_header);
		gvinit();
		all_files_open = region_init(FALSE);
		if (!all_files_open)
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator);
	/* is_jnlpool_creator == TRUE ==> this process created the journal pool
	 * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it.
	 */
	if (gtmsource_options.shut_down)
		gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.activate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.deactivate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.checkhealth)
		gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.changelog)
		 gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showbacklog)
		gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.stopsourcefilter)
		gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.jnlpool)
		gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.losttncomplete)
		gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.needrestart)
		gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showfreeze)
		gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.setfreeze)
		gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN);
	else if (!gtmsource_options.start)
	{
		assert(CLI_PRESENT == cli_present("STATSLOG"));
		gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN);
	}
	assert(gtmsource_options.start);
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */
	gtmsource_local = jnlpool.gtmsource_local;
	gtmsource_local->child_server_running = FALSE;
	FORK(pid);
	if (0 > pid)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
			ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno);
	} else if (0 < pid)
	{	/* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child
		 * source server has completed its initialization phase and is all set so the parent command can return.
		 */
		while (isalive = is_proc_alive(pid, 0))	/* note : intended assignment */
		{
			if (gtmsource_local->child_server_running)
				break;
			/* To take care of reassignment of PIDs, the while condition should be && with the condition
			 * (PPID of pid == process_id)
			 */
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START);
			WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */
		}
		if (isalive)
		{	/* Child process is alive and started with no issues */
			if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)))
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
					ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno);
			ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE);
		} else
		{	/* Child source server process errored out at startup and is no longer alive.
			 * If we were the one who created the journal pool, let us clean it up.
			 */
			repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n");
			if (is_jnlpool_creator)
				status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN);
		}
		/* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup
		 * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the
		 * value of sem 1 before the child grabs sem 1.
		 */
		gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR);
	}
	/* Point stdin to /dev/null */
	OPENFILE("/dev/null", O_RDONLY, null_fd);
	if (0 > null_fd)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0);
	FCNTL3(null_fd, F_DUPFD, 0, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0);
	CLOSEFILE(null_fd, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0);
	/* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore
	 * at this point. The variables that indicate this would have been copied over to the child during the fork. This will
	 * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child
	 * to ensure they do not misrepresent the holder of those semaphores.
	 */
	ftok_sem_reg = NULL;
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	assert(udi->grabbed_ftok_sem);
	udi->grabbed_ftok_sem = FALSE;
	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE;
	assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]);
	/* Start child source server initialization */
	is_src_server = TRUE;
	OPERATOR_LOG_MSG;
	process_id = getpid();
	/* Reinvoke secshr related initialization with the child's pid */
	INVOKE_INIT_SECSHR_ADDRS;
	/* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool.
	 * Note that the initialization would already have been done by the parent receiver startup command but we need to
	 * redo the initialization with the child process id.
	 */
	assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id));
	mutex_per_process_init();
	START_HEARTBEAT_IF_NEEDED;
	ppid = getppid();
	log_init_status = repl_log_init(REPL_GENERAL_LOG, &gtmsource_log_fd, gtmsource_options.log_file);
	assert(SS_NORMAL == log_init_status);
	repl_log_fd2fp(&gtmsource_log_fp, gtmsource_log_fd);
	if (-1 == (procgp = setsid()))
		send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				RTS_ERROR_LITERAL("Source server error in setsid"), errno);
#	endif /* REPL_DEBUG_NOBACKGROUND */
	if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level)
		gtm_zlib_init();	/* Open zlib shared library for compression/decompression */
	REPL_DPRINT1("Setting up regions\n");
	gvinit();

	/* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */
	all_files_open = region_init(FALSE);
	if (!all_files_open)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
		gtmsource_exit(ABNORMAL_SHUTDOWN);
	}
	/* Determine primary side null subscripts collation order */
	/* Also check whether all regions have same null collation order */
	this_side_std_null_coll = -1;
	for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++)
	{
		csa = &FILE_INFO(reg)->s_addrs;
		if (this_side_std_null_coll != csa->hdr->std_null_coll)
		{
			if (-1 == this_side_std_null_coll)
				this_side_std_null_coll = csa->hdr->std_null_coll;
			else
			{
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF);
				gtmsource_exit(ABNORMAL_SHUTDOWN);
			}
		}
		if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		if (reg->read_only && REPL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				   RTS_ERROR_LITERAL("Source Server does not have write permissions to one or "
					             "more database files that are replicated"));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	/* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */
	gtmsource_local->gtmsource_pid = process_id;
	gtmsource_local->gtmsource_state = GTMSOURCE_START;
	if (is_jnlpool_creator)
	{
		DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id);
		gtmsource_seqno_init(this_side_std_null_coll);
		if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)
		{	/* Created the journal pool as a root primary. Append a history record to the replication instance file.
			 * Invoke the function "gtmsource_rootprimary_init" to do that.
			 */
			gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno);
		}
	}
	/* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used
	 * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0.
	 * Note that the parent source server startup command would have done an increment of the ftok counter semaphore
	 * for the replication instance file. But the source server process (the child) that comes here would not have done
	 * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay.
	 */
	if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg))
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP);
	/* Increment the source server count semaphore */
	status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM);
	if (0 != status)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno);
	}
#	else
	if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM)))
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno);
	}
#	endif /* REPL_DEBUG_NOBACKGROUND */

	gtmsource_srv_count++;
	gtmsource_local->child_server_running = TRUE;	/* At this point, the parent startup command will stop waiting for child */
	gtm_event_log_init();
	/* Log source server startup command line first */
	SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str);
	repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg);

	SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]",
		process_id, gtmsource_local->secondary_instname);
	sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
	repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	if (is_jnlpool_creator)
	{
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	} else
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
#	ifdef GTM_TLS
	if (REPL_TLS_REQUESTED)
	{
		repl_do_tls_init(gtmsource_log_fp);
		assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK);
	}
#	endif
	if (jnlpool.jnlpool_ctl->freeze)
	{
		last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze;
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname);
		repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg);
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment);
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	}
	gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly;
	do
	{ 	/* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */
		gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START;
		if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN))
		{
			gtmsource_poll_actions(FALSE);
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE);
			continue;
		}
		if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode)
		{	/* Shutdown initiated */
			assert(gtmsource_local->shutdown == SHUTDOWN);
			sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2,
				    RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled"));
			repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
			gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
			break;
		}
		gtmsource_poll_actions(FALSE);
		if (GTMSOURCE_CHANGING_MODE == gtmsource_state)
			continue;
		if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode)
			gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE;
		SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port);
		sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
		gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
		DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
		assert(!repl_csa->hold_onto_crit);	/* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */
		grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK);
		if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state)
		{
			repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n");
			repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n",
					jnlpool.jnlpool_ctl->jnl_seqno);
			continue;
		}
		QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr);
		gtmsource_local->read = jnlpool.jnlpool_ctl->write;
		gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL;
		read_jnl_seqno = gtmsource_local->read_jnl_seqno;
		assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno);
		if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno)
		{
			gtmsource_local->read_state = READ_FILE;
			QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno);
			gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */
		}
		rel_lock(jnlpool.jnlpool_dummy_reg);
		if (SS_NORMAL != (status = gtmsource_alloc_tcombuff()))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
				  RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status);
		gtmsource_filter = NO_FILTER;
		if ('\0' != gtmsource_local->filter_cmd[0])
		{
			if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd)))
				gtmsource_filter |= EXTERNAL_FILTER;
			else
				gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		gtmsource_process();
		/* gtmsource_process returns only when mode needs to be changed to PASSIVE */
		assert(gtmsource_state == GTMSOURCE_CHANGING_MODE);
		gtmsource_ctl_close();
		gtmsource_free_msgbuff();
		gtmsource_free_tcombuff();
		gtmsource_free_filter_buff();
		gtmsource_stop_heartbeat();
		if (FD_INVALID != gtmsource_sock_fd)
			repl_close(&gtmsource_sock_fd);
		if (gtmsource_filter & EXTERNAL_FILTER)
			repl_stop_filter();
	} while (TRUE);
예제 #19
0
int	gtmsource_ipc_cleanup(boolean_t auto_shutdown, int *exit_status)
{
	boolean_t	i_am_the_last_user, attempt_ipc_cleanup;
	int		status, detach_status, remove_status, expected_nattach;
	unix_db_info	*udi;
	struct shmid_ds	shm_buf;

	/* Attempt cleaning up the IPCs */
	attempt_ipc_cleanup = TRUE;

	/* Wait for the Source Server to detach and takeover the semaphore */
	if (!auto_shutdown && 0 > grab_sem(SOURCE, SRC_SERV_COUNT_SEM))
	{
		repl_log(stderr, FALSE, TRUE, "Error taking control of source server count semaphore : %s. Shutdown not complete\n",
														REPL_SEM_ERROR);
		*exit_status = ABNORMAL_SHUTDOWN;
		attempt_ipc_cleanup = FALSE;
	}

	udi = (unix_db_info *)FILE_INFO(jnlpool.jnlpool_dummy_reg);

	if (!auto_shutdown || !gtmsource_srv_count)
		expected_nattach = 1; /* Self, or parent */
	else
		expected_nattach = 0;  /* Source server already detached */

	i_am_the_last_user = (((status = shmctl(udi->shmid, IPC_STAT, &shm_buf)) == 0) && (shm_buf.shm_nattch == expected_nattach));
	if (!i_am_the_last_user)
	{
		if (status < 0)
			repl_log(stderr, FALSE, TRUE, "Error in jnlpool shmctl : %s\n", STRERROR(ERRNO));
		else
			repl_log(stderr, FALSE, TRUE, "Not deleting jnlpool ipcs. %d processes still attached to jnlpool\n",
				 shm_buf.shm_nattch - expected_nattach);
		attempt_ipc_cleanup = FALSE;
		*exit_status = ABNORMAL_SHUTDOWN;
	}

	if (attempt_ipc_cleanup)
	{
		if (INVALID_SHMID != udi->shmid && (auto_shutdown || (detach_status = SHMDT(jnlpool.jnlpool_ctl)) == 0)
				   && (remove_status = shm_rmid(udi->shmid)) == 0)
		{
			jnlpool.jnlpool_ctl = jnlpool_ctl = NULL;
			pool_init = FALSE;
			repl_log(stdout, FALSE, FALSE, "Journal pool shared memory removed\n");
			if (0 == (status = remove_sem_set(SOURCE)))
				repl_log(stdout, FALSE, TRUE, "Journal pool semaphore removed\n");
			else
			{
				repl_log(stderr, FALSE, TRUE, "Error removing jnlpool semaphore : %s\n", STRERROR(status));
				*exit_status = ABNORMAL_SHUTDOWN;
			}
		} else if (INVALID_SHMID != udi->shmid)
		{
			if (!auto_shutdown && detach_status < 0)
				repl_log(stderr, FALSE, FALSE,
						"Error detaching from jnlpool shared memory : %s. Shared memory not removed\n",
						STRERROR(ERRNO));
			else if (remove_status != 0)
			{
				if (!auto_shutdown)
				{
					jnlpool.jnlpool_ctl = NULL; /* Detached successfully */
					jnlpool_ctl = NULL;
					pool_init = FALSE;
				}
				repl_log(stderr, FALSE, TRUE, "Error removing jnlpool shared memory : %s\n", STRERROR(ERRNO));
			}
			*exit_status = ABNORMAL_SHUTDOWN;
		}
	}
	return attempt_ipc_cleanup;
}
예제 #20
0
int	gtmrecv_ipc_cleanup(boolean_t auto_shutdown, int *exit_status)
{

	boolean_t	i_am_the_last_user, attempt_ipc_cleanup;
	int		status, detach_status, remove_status, expected_nattach;
	struct shmid_ds	shm_buf;

	/* Attempt cleaning up the IPCs */
	attempt_ipc_cleanup = TRUE;

	/*
	 * Wait for the Receiver Server and Update Process to detach and
	 * takeover the semaphores. Note that the Receiver Server has already
	 * waited for the Update Process to detach. It is done here as a
	 * precaution against Receiver Server crashes.
	 */

	if (!auto_shutdown)
		status = grab_sem(RECV, RECV_SERV_COUNT_SEM);
	else
		status = 0;
	if (0 == status && 0 > (status = grab_sem(RECV, UPD_PROC_COUNT_SEM)))
		rel_sem(RECV, RECV_SERV_COUNT_SEM);
	if (status < 0)
	{
		repl_log(stderr, FALSE, TRUE,
			"Error taking control of Receiver Server/Update Process count semaphore : %s. Shutdown not complete\n",
			REPL_SEM_ERROR);
		*exit_status = ABNORMAL_SHUTDOWN;
		attempt_ipc_cleanup = FALSE;
	}

	/* Now we have locked out all users from the receive pool */

	if (!auto_shutdown || !gtmrecv_srv_count)
		expected_nattach = 1; /* Self, or parent */
	else
		expected_nattach = 0; /* Receiver server already detached */

	i_am_the_last_user = (((status = shmctl(recvpool_shmid, IPC_STAT, &shm_buf)) == 0)
		&& (shm_buf.shm_nattch == expected_nattach));
	if (!i_am_the_last_user)
	{
		if (status < 0)
			repl_log(stderr, FALSE, TRUE, "Error in jnlpool shmctl : %s\n", STRERROR(ERRNO));
		else
			repl_log(stderr, FALSE, TRUE,
				"Not deleting receive pool ipcs. %d processes still attached to receive pool\n",
				shm_buf.shm_nattch - expected_nattach);
		attempt_ipc_cleanup = FALSE;
		*exit_status = ABNORMAL_SHUTDOWN;
	}

	if (attempt_ipc_cleanup)
	{
		if (INVALID_SHMID != recvpool_shmid && (auto_shutdown || (detach_status = SHMDT(recvpool.recvpool_ctl)) == 0)
				       && (remove_status = shm_rmid(recvpool_shmid)) == 0)
		{
			recvpool.recvpool_ctl = NULL;
			repl_log(stdout, FALSE, FALSE, "Receive pool shared memory removed\n");
			if (0 == (status = remove_sem_set(RECV)))
				repl_log(stdout, FALSE, TRUE, "Receive pool semaphore removed\n");
			else
			{
				repl_log(stderr, FALSE, TRUE, "Error removing receive pool semaphore : %s\n", STRERROR(status));
				*exit_status = ABNORMAL_SHUTDOWN;
			}
		} else if (INVALID_SHMID != recvpool_shmid)
		{
			if (!auto_shutdown && detach_status < 0)
				repl_log(stderr, FALSE, FALSE,
					"Error detaching from receive pool shared memory : %s. Shared memory not removed\n",
					STRERROR(ERRNO));
			else if (remove_status != 0)
			{
				if (!auto_shutdown)
					recvpool.recvpool_ctl = NULL; /* Detached successfully */
				repl_log(stderr, FALSE, TRUE, "Error removing receive pool shared memory : %s\n", STRERROR(ERRNO));
			}
			*exit_status = ABNORMAL_SHUTDOWN;
		}
	}

	return attempt_ipc_cleanup;
}