Example #1
0
int gtmrecv_changelog(void)
{
	uint4	changelog_desired = 0, changelog_accepted = 0;

	/* Grab the recvpool jnlpool option write lock */
	if (0 > grab_sem(RECV, RECV_SERV_OPTIONS_SEM))
	{
		util_out_print("Error grabbing recvpool option write lock. Could not initiate change log", TRUE);
		return (ABNORMAL_SHUTDOWN);
	}
	if (0 != recvpool.gtmrecv_local->changelog || 0 != recvpool.upd_proc_local->changelog)
	{
		util_out_print("Change log is already in progress. Not initiating change in log file or log interval", TRUE);
		rel_sem(RECV, RECV_SERV_OPTIONS_SEM);
		return (ABNORMAL_SHUTDOWN);
	}
	if ('\0' != gtmrecv_options.log_file[0]) /* trigger change in log file (for both receiver and update process) */
	{
		changelog_desired |= REPLIC_CHANGE_LOGFILE;
		if (0 != strcmp(recvpool.gtmrecv_local->log_file, gtmrecv_options.log_file))
		{
			changelog_accepted |= REPLIC_CHANGE_LOGFILE;
			strcpy(recvpool.gtmrecv_local->log_file, gtmrecv_options.log_file);
			util_out_print("Change log initiated with file !AD", TRUE, LEN_AND_STR(gtmrecv_options.log_file));
		} else
			util_out_print("Log file is already !AD. Not initiating change in log file", TRUE,
					LEN_AND_STR(gtmrecv_options.log_file));
	}
	if (0 != gtmrecv_options.rcvr_log_interval) /* trigger change in receiver log interval */
	{
		changelog_desired |= REPLIC_CHANGE_LOGINTERVAL;
		if (gtmrecv_options.rcvr_log_interval != recvpool.gtmrecv_local->log_interval)
		{
			changelog_accepted |= REPLIC_CHANGE_LOGINTERVAL;
			recvpool.gtmrecv_local->log_interval = gtmrecv_options.rcvr_log_interval;
			util_out_print("Change initiated with receiver log interval !UL", TRUE, gtmrecv_options.rcvr_log_interval);
		} else
			util_out_print("Receiver log interval is already !UL. Not initiating change in log interval", TRUE,
					gtmrecv_options.rcvr_log_interval);
	}
	if (0 != gtmrecv_options.upd_log_interval) /* trigger change in update process log interval */
	{
		changelog_desired |= REPLIC_CHANGE_UPD_LOGINTERVAL;
		if (gtmrecv_options.upd_log_interval != recvpool.upd_proc_local->log_interval)
		{
			changelog_accepted |= REPLIC_CHANGE_UPD_LOGINTERVAL;
			recvpool.upd_proc_local->log_interval = gtmrecv_options.upd_log_interval;
			util_out_print("Change initiated with update process log interval !UL", TRUE,
					gtmrecv_options.upd_log_interval);
		} else
			util_out_print("Update process log interval is already !UL. Not initiating change in log interval", TRUE,
					gtmrecv_options.upd_log_interval);
	}
	if (0 != changelog_accepted)
		recvpool.gtmrecv_local->changelog = changelog_accepted;
	else
		util_out_print("No change to log file or log interval", TRUE);
	rel_sem(RECV, RECV_SERV_OPTIONS_SEM);
	return ((0 != changelog_accepted && changelog_accepted == changelog_desired) ? NORMAL_SHUTDOWN : ABNORMAL_SHUTDOWN);
}
int gtmsource_secnd_update(boolean_t print_message)
{
	if (grab_sem(SOURCE, SRC_SERV_OPTIONS_SEM) < 0)
	{
		util_out_print("Error grabbing jnlpool option write lock. Could not initiate change log", TRUE);
		return(ABNORMAL_SHUTDOWN);
	}
	grab_lock(jnlpool.jnlpool_dummy_reg, ASSERT_NO_ONLINE_ROLLBACK);
	jnlpool.jnlpool_ctl->upd_disabled = update_disable;
	rel_lock(jnlpool.jnlpool_dummy_reg);
	rel_sem(SOURCE, SRC_SERV_OPTIONS_SEM);
	if (print_message)
		util_out_print("Updates are now !AZ", TRUE, update_disable ? "disabled" : "enabled");
	return(NORMAL_SHUTDOWN);
}
Example #3
0
int gtmrecv_endupd(void)
{
	pid_t 		savepid;
	int		exit_status;
	pid_t		waitpid_res;

	repl_log(stdout, TRUE, TRUE, "Initiating shut down of Update Process\n");
	recvpool.upd_proc_local->upd_proc_shutdown = SHUTDOWN;
	/* Wait for update process to shut down */
	while((SHUTDOWN == recvpool.upd_proc_local->upd_proc_shutdown)
		&& (0 < (savepid = (pid_t)recvpool.upd_proc_local->upd_proc_pid)) && is_proc_alive(savepid, 0))
	{
		SHORT_SLEEP(GTMRECV_WAIT_FOR_UPD_SHUTDOWN);
		WAITPID(savepid, &exit_status, WNOHANG, waitpid_res); /* Release defunct update process if dead */
	}
	exit_status = recvpool.upd_proc_local->upd_proc_shutdown;
	if (SHUTDOWN == exit_status)
	{
		if (0 == savepid) /* No Update Process */
			exit_status = NORMAL_SHUTDOWN;
		else /* Update Process Crashed */
		{
			repl_log(stderr, TRUE, TRUE, "Update Process exited abnormally, INTEGRITY CHECK might be warranted\n");
			exit_status = ABNORMAL_SHUTDOWN;
		}
	}
	/* Wait for the Update Process to detach */
	if (0 == grab_sem(RECV, UPD_PROC_COUNT_SEM))
	{
		if(0 != (errno = rel_sem(RECV, UPD_PROC_COUNT_SEM)))
			repl_log(stderr, TRUE, TRUE, "Error releasing the Update Process Count semaphore : %s\n", REPL_SEM_ERROR);
		repl_log(stdout, TRUE, TRUE, "Update Process exited\n");
	} else
	{
		repl_log(stderr, TRUE, TRUE, "Error in update proc count semaphore : %s\n", REPL_SEM_ERROR);
		exit_status = ABNORMAL_SHUTDOWN;
	}
	return (exit_status);
}
Example #4
0
int gtmsource_losttncomplete(void)
{
	int			idx;
	gtmsource_local_ptr_t	gtmsourcelocal_ptr;

	error_def(ERR_MUPCLIERR);
	error_def(ERR_TEXT);

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	/* We dont need the access control semaphore here. So release it first and avoid any potential deadlocks. */
	if (0 != rel_sem(SOURCE, JNL_POOL_ACCESS_SEM))
		rts_error(VARLSTCNT(5) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in source server losttncomplete rel_sem"),
				REPL_SEM_ERRNO);
	assert(NULL == jnlpool.gtmsource_local);
	repl_log(stderr, TRUE, TRUE, "Initiating LOSTTNCOMPLETE operation on instance [%s]\n",
		jnlpool.repl_inst_filehdr->this_instname);
	/* If this is a root primary instance, propagate this information to secondaries as well so they reset zqgblmod_seqno to 0.
	 * If propagating primary, no need to send this to tertiaries as the receiver on the tertiary cannot have started with
	 * non-zero "zqgblmod_seqno" to begin with (PRIMARYNOTROOT error would have been issued).
	 */
	if (!jnlpool.jnlpool_ctl->upd_disabled)
	{
		grab_lock(jnlpool.jnlpool_dummy_reg);
		jnlpool.jnlpool_ctl->send_losttn_complete = TRUE;
		gtmsourcelocal_ptr = jnlpool.gtmsource_local_array;
		for (idx = 0; idx < NUM_GTMSRC_LCL; idx++, gtmsourcelocal_ptr++)
		{
			if (('\0' == gtmsourcelocal_ptr->secondary_instname[0])
					&& (0 == gtmsourcelocal_ptr->read_jnl_seqno)
					&& (0 == gtmsourcelocal_ptr->connect_jnl_seqno))
				continue;
			gtmsourcelocal_ptr->send_losttn_complete = TRUE;
		}
		rel_lock(jnlpool.jnlpool_dummy_reg);
	}
	/* Reset zqgblmod_seqno and zqgblmod_tn to 0 in this instance as well */
	repl_inst_reset_zqgblmod_seqno_and_tn();
	return (NORMAL_SHUTDOWN);
}
/*
 * This will rundown a replication instance journal (and receiver) pool.
 *	Input Parameter:
 *		replpool_id of the instance. Instance file name must be null terminated in replpool_id.
 * Returns :
 *	TRUE,  if successful.
 *	FALSE, otherwise.
 */
boolean_t mu_rndwn_repl_instance(replpool_identifier *replpool_id, boolean_t immediate, boolean_t rndwn_both_pools,
					boolean_t *jnlpool_sem_created)
{
	boolean_t		jnlpool_stat = SS_NORMAL, recvpool_stat = SS_NORMAL, decr_cnt, sem_created = FALSE, ipc_rmvd;
	char			*instfilename;
	unsigned char		ipcs_buff[MAX_IPCS_ID_BUF], *ipcs_ptr;
	gd_region		*r_save;
	repl_inst_hdr		repl_instance;
	static	gd_region	*reg = NULL;
	struct semid_ds		semstat;
	struct shmid_ds		shmstat;
	unix_db_info		*udi;
	int			save_errno, sem_id, shm_id, status;
	sgmnt_addrs		*repl_csa;
	boolean_t		was_crit;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	if (NULL == reg)
	{
		r_save = gv_cur_region;
		mu_gv_cur_reg_init();
		reg = gv_cur_region;
		gv_cur_region = r_save;
	}
	*jnlpool_sem_created = FALSE;
	/* Assert that the layout of replpool_identifier is identical for all versions going forward as the function
	 * "validate_replpool_shm_entry" (used by the argumentless mupip rundown aka "mupip rundown") relies on this.
	 * This assert is placed here (instead of there) because the automated tests exercise this logic much more
	 * than the argumentless code. If any of these asserts fail, "validate_replpool_shm_entry" needs to change
	 * to handle the old and new layouts.
	 *
	 *	Structure ----> replpool_identifier <----    size 312 [0x0138]
	 *
	 *		offset = 0000 [0x0000]      size = 0012 [0x000c]    ----> replpool_identifier.label
	 *		offset = 0012 [0x000c]      size = 0001 [0x0001]    ----> replpool_identifier.pool_type
	 *		offset = 0013 [0x000d]      size = 0036 [0x0024]    ----> replpool_identifier.now_running
	 *		offset = 0052 [0x0034]      size = 0004 [0x0004]    ----> replpool_identifier.repl_pool_key_filler
	 *		offset = 0056 [0x0038]      size = 0256 [0x0100]    ----> replpool_identifier.instfilename
	 */
	assert(0 == OFFSETOF(replpool_identifier, label[0]));
	assert(12 == SIZEOF(((replpool_identifier *)NULL)->label));
	assert(12 == OFFSETOF(replpool_identifier, pool_type));
	assert(1 == SIZEOF(((replpool_identifier *)NULL)->pool_type));
	assert(13 == OFFSETOF(replpool_identifier, now_running[0]));
	assert(36 == SIZEOF(((replpool_identifier *)NULL)->now_running));
	assert(56 == OFFSETOF(replpool_identifier, instfilename[0]));
	assert(256 == SIZEOF(((replpool_identifier *)NULL)->instfilename));
	/* End asserts */
	jnlpool.jnlpool_dummy_reg = reg;
	recvpool.recvpool_dummy_reg = reg;
	instfilename = replpool_id->instfilename;
	reg->dyn.addr->fname_len = strlen(instfilename);
	assert(0 == instfilename[reg->dyn.addr->fname_len]);
	memcpy((char *)reg->dyn.addr->fname, instfilename, reg->dyn.addr->fname_len + 1);
	udi = FILE_INFO(reg);
	udi->fn = (char *)reg->dyn.addr->fname;
	/* Lock replication instance using ftok semaphore so that no other replication process can startup until we are done with
	 * rundown
	 */
	if (!ftok_sem_get(reg, TRUE, REPLPOOL_ID, immediate))
		return FALSE;
	ESTABLISH_RET(mu_rndwn_repl_instance_ch, FALSE);
	repl_inst_read(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr));
	assert(rndwn_both_pools || JNLPOOL_SEGMENT == replpool_id->pool_type || RECVPOOL_SEGMENT == replpool_id->pool_type);
	if (rndwn_both_pools || (JNLPOOL_SEGMENT == replpool_id->pool_type))
	{	/* --------------------------
		 * First rundown Journal pool
		 * --------------------------
		 */
		shm_id = repl_instance.jnlpool_shmid;
		if (SS_NORMAL == (jnlpool_stat = mu_replpool_grab_sem(&repl_instance, JNLPOOL_SEGMENT, &sem_created, immediate)))
		{
			/* Got JNL_POOL_ACCESS_SEM and incremented SRC_SRV_COUNT_SEM */
			assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
			assert(holds_sem[SOURCE][SRC_SERV_COUNT_SEM]);
			sem_id = repl_instance.jnlpool_semid;
			if ((INVALID_SHMID == shm_id) || (-1 == shmctl(shm_id, IPC_STAT, &shmstat))
				|| (shmstat.shm_ctime != repl_instance.jnlpool_shmid_ctime))
			{
				repl_instance.jnlpool_shmid = shm_id = INVALID_SHMID;
				repl_instance.jnlpool_shmid_ctime = 0;
			}
			assert((INVALID_SHMID != shm_id) || ((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl)));
			ipc_rmvd = TRUE;
			if (INVALID_SHMID != shm_id)
			{
				replpool_id->pool_type = JNLPOOL_SEGMENT;
				jnlpool_stat = mu_rndwn_replpool(replpool_id, &repl_instance, shm_id, &ipc_rmvd);
				ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id);
				*ipcs_ptr = '\0';
				if (rndwn_both_pools && ((SS_NORMAL != jnlpool_stat) || ipc_rmvd))
					gtm_putmsg(VARLSTCNT(6) (jnlpool_stat ? ERR_MUJPOOLRNDWNFL : ERR_MUJPOOLRNDWNSUC),
						4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename));
			}
			assert(ipc_rmvd || (NULL != jnlpool_ctl));
			assert((NULL == jnlpool.jnlpool_ctl) || (SS_NORMAL == jnlpool_stat) || jgbl.onlnrlbk);
			assert((INVALID_SHMID != repl_instance.jnlpool_shmid) || (0 == repl_instance.jnlpool_shmid_ctime));
			assert((INVALID_SHMID == repl_instance.jnlpool_shmid) || (0 != repl_instance.jnlpool_shmid_ctime));
			assert(INVALID_SEMID != sem_id);
			if (!mur_options.rollback)
			{	/* Invoked by MUPIP RUNDOWN in which case the semaphores needs to be removed. But, remove the
				 * semaphore ONLY if we created it here OR the journal pool was successfully removed.
				 */
				if (NULL == jnlpool_ctl)
				{
					if (((sem_created || (SS_NORMAL == jnlpool_stat))
						&& (SS_NORMAL == mu_replpool_release_sem(&repl_instance, JNLPOOL_SEGMENT, TRUE))))
					{	/* Now that semaphores are removed, reset fields in file header */
						if (!sem_created)
						{	/* If sem_id was created by mu_replpool_grab_sem then do NOT report the
							 * MURPOOLRNDWNSUC message as it indicates that the semaphore was orphaned
							 * and we removed it when in fact there was no orphaned semaphore and we
							 * created it as part of mu_replpool_grab_sem to get standalone access to
							 * rundown the receiver pool (which may or may not exist)
							 */
							ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, sem_id);
							*ipcs_ptr = '\0';
							gtm_putmsg(VARLSTCNT(9) ERR_MUJPOOLRNDWNSUC, 4, LEN_AND_STR(ipcs_buff),
								LEN_AND_STR(instfilename), ERR_SEMREMOVED, 1, sem_id);
						}
						repl_inst_jnlpool_reset();
					}
				} else
				{	/* Anticipatory Freeze scheme is turned ON. So, release just the JNL_POOL_ACCESS_SEM. The
					 * semaphore will be released/removed in the caller (mupip_rundown)
					 */
					assert(ANTICIPATORY_FREEZE_AVAILABLE);
					assertpro(SS_NORMAL == (status = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)));
					assert(!holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
					/* Since we are not resetting the semaphore IDs in the file header, we need to write out
					 * the semaphore IDs in the instance file (if we created them).
					 */
					if (sem_created)
						repl_inst_write(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance,
									SIZEOF(repl_inst_hdr));
				}
				/* If semaphore is not created and the journal pool rundown failed (due to attached processes),
				 * rundown process continues to holds the journal pool access control semaphore. This way, we hold
				 * the semaphore on behalf of the source server (now no longer alive) to prevent mu_rndwn_sem_all
				 * (invoked later) from cleaning up this orphaned semaphore (which causes REPLREQROLLBACK if the
				 * source server is restarted). But, since the semaphore is not released (until the rundown process
				 * dies), holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] continues to remain TRUE. This causes asserts in
				 * ftok_sem_get if mu_rndwn_repl_instance is invoked for a different journal/receive pool. To
				 * workaround it, set holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] to FALSE. This is an interim solution
				 * until we record such semaphores in an ignore-list (or some such) and change mu_rndwn_sem_all to
				 * skip the ones that are present in the ignore list.
				 */
				holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE;
			}
		} else if (rndwn_both_pools && (INVALID_SHMID != shm_id))
		{
			ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id);
			*ipcs_ptr = '\0';
			if (rndwn_both_pools)
				gtm_putmsg(VARLSTCNT(6) ERR_MUJPOOLRNDWNFL, 4, LEN_AND_STR(ipcs_buff),
					LEN_AND_STR(instfilename));
		}
		*jnlpool_sem_created = sem_created;
	}
	if (((SS_NORMAL == jnlpool_stat) || !jgbl.mur_rollback) &&
		(rndwn_both_pools || (RECVPOOL_SEGMENT == replpool_id->pool_type)))
	{	/* --------------------------
		 * Now rundown Receivpool
		 * --------------------------
		 * Note: RECVPOOL is rundown ONLY if the JNLPOOL rundown was successful. This way, we don't end up
		 * creating new semaphores for the RECVPOOL if ROLLBACK is not going to start anyways because of the failed
		 * JNLPOOL rundown. The only exception is MUPIP RUNDOWN command in which case we try running down the
		 * RECVPOOL even if the JNLPOOL rundown failed.
		 */
		shm_id = repl_instance.recvpool_shmid;
		if (SS_NORMAL == (recvpool_stat = mu_replpool_grab_sem(&repl_instance, RECVPOOL_SEGMENT, &sem_created, immediate)))
		{
			sem_id = repl_instance.recvpool_semid;
			if ((INVALID_SHMID == shm_id) || (-1 == shmctl(shm_id, IPC_STAT, &shmstat))
				|| (shmstat.shm_ctime != repl_instance.recvpool_shmid_ctime))
			{
				repl_instance.recvpool_shmid = shm_id = INVALID_SHMID;
				repl_instance.recvpool_shmid_ctime = 0;
			}
			ipc_rmvd = TRUE;
			if (INVALID_SHMID != shm_id)
			{
				replpool_id->pool_type = RECVPOOL_SEGMENT;
				recvpool_stat = mu_rndwn_replpool(replpool_id, &repl_instance, shm_id, &ipc_rmvd);
				ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id);
				*ipcs_ptr = '\0';
				if (rndwn_both_pools && ((SS_NORMAL != recvpool_stat) || ipc_rmvd))
					gtm_putmsg(VARLSTCNT(6) (recvpool_stat ? ERR_MURPOOLRNDWNFL : ERR_MURPOOLRNDWNSUC),
						4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(instfilename));
			}
			assert((TRUE == ipc_rmvd) || (SS_NORMAL != recvpool_stat) || jgbl.onlnrlbk);
			assert((INVALID_SHMID != repl_instance.recvpool_shmid) || (0 == repl_instance.recvpool_shmid_ctime));
			assert((INVALID_SHMID == repl_instance.recvpool_shmid) || (0 != repl_instance.recvpool_shmid_ctime));
			assert(INVALID_SEMID != sem_id);
			if (!mur_options.rollback)
			{	/* Invoked by MUPIP RUNDOWN in which case the semaphores needs to be removed. But, remove the
				 * semaphore ONLY if we created it here OR the receive pool was successfully removed.
				 */
				if ((sem_created || (SS_NORMAL == recvpool_stat))
					&& (SS_NORMAL == mu_replpool_release_sem(&repl_instance, RECVPOOL_SEGMENT, TRUE)))
				{	/* Now that semaphores are removed, reset fields in file header */
					if (!sem_created)
					{	/* if sem_id was "created" by mu_replpool_grab_sem then do NOT report the
						 * MURPOOLRNDWNSUC message as it indicates that the semaphore was orphaned and we
						 * removed it when in fact there was no orphaned semaphore and we "created" it as
						 * part of mu_replpool_grab_sem to get standalone access to rundown the receiver
						 * pool (which may or may not exist)
						 */
						ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, sem_id);
						*ipcs_ptr = '\0';
						gtm_putmsg(VARLSTCNT(9) ERR_MURPOOLRNDWNSUC, 4, LEN_AND_STR(ipcs_buff),
							LEN_AND_STR(instfilename), ERR_SEMREMOVED, 1, sem_id);
					}
					if (NULL != jnlpool_ctl)
					{	/* Journal pool is not yet removed. So, grab lock before resetting semid/shmid
						 * fields in the file header as the function expects the caller to hold crit
						 * if the journal pool is available
						 */
						repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;
						assert(!repl_csa->now_crit);
						assert(!repl_csa->hold_onto_crit);
						was_crit = repl_csa->now_crit;
						/* Since we do grab_lock, below, we need to do a per-process initialization. Also,
						 * start heartbeat so that grab_lock can issue MUTEXLCKALERT and get C-stacks if
						 * waiting for crit
						 */
						START_HEARTBEAT_IF_NEEDED;
						mutex_per_process_init();
						if (!was_crit)
							grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY);
					}
					repl_inst_recvpool_reset();
					if ((NULL != jnlpool_ctl) && !was_crit)
						rel_lock(jnlpool.jnlpool_dummy_reg);
				}
				/* If semaphore is not created and the receive pool rundown failed (due to attached processes),
				 * rundown process continues to holds the receive pool access control semaphore. This way, we hold
				 * the semaphore on behalf of the receiver server (now no longer alive) to prevent mu_rndwn_sem_all
				 * (invoked later) from cleaning up this orphaned semaphore (which causes REPLREQROLLBACK if the
				 * receiver is restarted). But, since the semaphore is not released (until the rundown process
				 * dies), holds_sem[RECV][RECV_POOL_ACCESS_SEM] continues to remain TRUE. This causes asserts in
				 * ftok_sem_get if mu_rndwn_repl_instance is invoked for a different journal/receive pool. To
				 * workaround it, set holds_sem[SOURCE][RECV_POOL_ACCESS_SEM] to FALSE. This is an interim solution
				 * until we record such semaphores in an ignore-list (or some such) and change mu_rndwn_sem_all to
				 * skip the ones that are present in the ignore list.
				 */
				assert((sem_created || (SS_NORMAL == recvpool_stat)) || holds_sem[RECV][RECV_POOL_ACCESS_SEM]);
				DEBUG_ONLY(set_sem_set_recvr(sem_id));
			}
		} else if (rndwn_both_pools && (INVALID_SHMID != shm_id))
		{
			ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, shm_id);
			*ipcs_ptr = '\0';
			if (rndwn_both_pools)
				gtm_putmsg(VARLSTCNT(6) ERR_MURPOOLRNDWNFL, 4, LEN_AND_STR(ipcs_buff),
					LEN_AND_STR(instfilename));
		}
	}
	assert(jgbl.onlnrlbk || ANTICIPATORY_FREEZE_AVAILABLE || (NULL == jnlpool.repl_inst_filehdr));
	if (mur_options.rollback && (SS_NORMAL == jnlpool_stat) && (SS_NORMAL == recvpool_stat))
	{
		assert(jgbl.onlnrlbk || ANTICIPATORY_FREEZE_AVAILABLE || ((INVALID_SHMID == repl_instance.jnlpool_shmid)
			&& (INVALID_SHMID == repl_instance.recvpool_shmid)));
		/* Initialize jnlpool.repl_inst_filehdr as it is used later by gtmrecv_fetchresync() */
		decr_cnt = FALSE;
		if (NULL == jnlpool.repl_inst_filehdr)
		{	/* Possible if there is NO journal pool in the first place. In this case, malloc the structure here and
			 * copy the file header from repl_instance structure.
			 */
			jnlpool.repl_inst_filehdr = (repl_inst_hdr_ptr_t)malloc(SIZEOF(repl_inst_hdr));
			memcpy(jnlpool.repl_inst_filehdr, &repl_instance, SIZEOF(repl_inst_hdr));
		} else
		{
			assert(repl_instance.jnlpool_semid == jnlpool.repl_inst_filehdr->jnlpool_semid);
			assert(repl_instance.jnlpool_semid_ctime == jnlpool.repl_inst_filehdr->jnlpool_semid_ctime);
			assert(repl_instance.jnlpool_shmid == jnlpool.repl_inst_filehdr->jnlpool_shmid);
			assert(repl_instance.jnlpool_shmid_ctime == jnlpool.repl_inst_filehdr->jnlpool_shmid_ctime);
			/* If the ONLINE ROLLBACK command is run on the primary when the source server is up and running,
			 * jnlpool.repl_inst_filehdr->recvpool_semid will be INVALID because there is NO receiver server
			 * running. However, ROLLBACK creates semaphores for both journal pool and receive pool and writes
			 * it to the instance file header. Copy this information to the file header copy in the jnlpool
			 * as well
			 */
			jnlpool.repl_inst_filehdr->recvpool_semid = repl_instance.recvpool_semid;
			jnlpool.repl_inst_filehdr->recvpool_semid_ctime = repl_instance.recvpool_semid_ctime;
		}
		/* Flush changes to the replication instance file header to disk */
		repl_inst_write(instfilename, (off_t)0, (sm_uc_ptr_t)&repl_instance, SIZEOF(repl_inst_hdr));
	} else /* for MUPIP RUNDOWN, semid fields in the file header are reset and is written in mu_replpool_release_sem() above */
		decr_cnt = (NULL == jnlpool_ctl); /* for anticipatory freeze, mupip_rundown releases the semaphore */
	REVERT;
	/* Release replication instance ftok semaphore lock */
	if (!ftok_sem_release(reg, decr_cnt, immediate)) /* Do not decrement the counter if ROLLBACK */
		return FALSE;
	return ((SS_NORMAL == jnlpool_stat) && (SS_NORMAL == recvpool_stat));
}
Example #6
0
int decr_sem(int set_index, int sem_num)
{
	return rel_sem(set_index, sem_num);
}
Example #7
0
int	gtmrecv_ipc_cleanup(boolean_t auto_shutdown, int *exit_status)
{

	boolean_t	i_am_the_last_user, attempt_ipc_cleanup;
	int		status, detach_status, remove_status, expected_nattach;
	struct shmid_ds	shm_buf;

	/* Attempt cleaning up the IPCs */
	attempt_ipc_cleanup = TRUE;

	/*
	 * Wait for the Receiver Server and Update Process to detach and
	 * takeover the semaphores. Note that the Receiver Server has already
	 * waited for the Update Process to detach. It is done here as a
	 * precaution against Receiver Server crashes.
	 */

	if (!auto_shutdown)
		status = grab_sem(RECV, RECV_SERV_COUNT_SEM);
	else
		status = 0;
	if (0 == status && 0 > (status = grab_sem(RECV, UPD_PROC_COUNT_SEM)))
		rel_sem(RECV, RECV_SERV_COUNT_SEM);
	if (status < 0)
	{
		repl_log(stderr, FALSE, TRUE,
			"Error taking control of Receiver Server/Update Process count semaphore : %s. Shutdown not complete\n",
			REPL_SEM_ERROR);
		*exit_status = ABNORMAL_SHUTDOWN;
		attempt_ipc_cleanup = FALSE;
	}

	/* Now we have locked out all users from the receive pool */

	if (!auto_shutdown || !gtmrecv_srv_count)
		expected_nattach = 1; /* Self, or parent */
	else
		expected_nattach = 0; /* Receiver server already detached */

	i_am_the_last_user = (((status = shmctl(recvpool_shmid, IPC_STAT, &shm_buf)) == 0)
		&& (shm_buf.shm_nattch == expected_nattach));
	if (!i_am_the_last_user)
	{
		if (status < 0)
			repl_log(stderr, FALSE, TRUE, "Error in jnlpool shmctl : %s\n", STRERROR(ERRNO));
		else
			repl_log(stderr, FALSE, TRUE,
				"Not deleting receive pool ipcs. %d processes still attached to receive pool\n",
				shm_buf.shm_nattch - expected_nattach);
		attempt_ipc_cleanup = FALSE;
		*exit_status = ABNORMAL_SHUTDOWN;
	}

	if (attempt_ipc_cleanup)
	{
		if (INVALID_SHMID != recvpool_shmid && (auto_shutdown || (detach_status = SHMDT(recvpool.recvpool_ctl)) == 0)
				       && (remove_status = shm_rmid(recvpool_shmid)) == 0)
		{
			recvpool.recvpool_ctl = NULL;
			repl_log(stdout, FALSE, FALSE, "Receive pool shared memory removed\n");
			if (0 == (status = remove_sem_set(RECV)))
				repl_log(stdout, FALSE, TRUE, "Receive pool semaphore removed\n");
			else
			{
				repl_log(stderr, FALSE, TRUE, "Error removing receive pool semaphore : %s\n", STRERROR(status));
				*exit_status = ABNORMAL_SHUTDOWN;
			}
		} else if (INVALID_SHMID != recvpool_shmid)
		{
			if (!auto_shutdown && detach_status < 0)
				repl_log(stderr, FALSE, FALSE,
					"Error detaching from receive pool shared memory : %s. Shared memory not removed\n",
					STRERROR(ERRNO));
			else if (remove_status != 0)
			{
				if (!auto_shutdown)
					recvpool.recvpool_ctl = NULL; /* Detached successfully */
				repl_log(stderr, FALSE, TRUE, "Error removing receive pool shared memory : %s\n", STRERROR(ERRNO));
			}
			*exit_status = ABNORMAL_SHUTDOWN;
		}
	}

	return attempt_ipc_cleanup;
}
Example #8
0
int gtmsource()
{
	int			status, log_init_status, waitpid_res, save_errno;
	char			print_msg[1024], tmpmsg[1024];
	gd_region		*reg, *region_top;
	sgmnt_addrs		*csa, *repl_csa;
	boolean_t		all_files_open, isalive;
	pid_t			pid, ppid, procgp;
	seq_num			read_jnl_seqno, jnl_seqno;
	unix_db_info		*udi;
	gtmsource_local_ptr_t	gtmsource_local;
	boolean_t		this_side_std_null_coll;
	int			null_fd, rc;

	memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs));
	call_on_signal = gtmsource_sigstop;
	ESTABLISH_RET(gtmsource_ch, SS_NORMAL);
	if (-1 == gtmsource_get_opt())
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR);
	if (gtmsource_options.shut_down)
	{	/* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return
		 * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while
		 * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other
		 * MUPIP REPLIC commands that need these locks for their function.
		 */
		if (0 < gtmsource_options.shutdown_time)
		{
			repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n",
												gtmsource_options.shutdown_time);
			LONG_SLEEP(gtmsource_options.shutdown_time);
		} else
			repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n");
	} else if (gtmsource_options.start)
	{
		repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n",
			gtmsource_options.secondary_instname);
	}
	if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary))
	{	/* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now
		 * in case this is a secondary -> primary transition. This is so we can later switch journal files in all
		 * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since
		 * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we
		 * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always
		 * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has
		 * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic
		 * currently has assumptions that a process holds only one ftok at any point in time.
		 */
		assert(NULL == gd_header);
		gvinit();
		all_files_open = region_init(FALSE);
		if (!all_files_open)
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator);
	/* is_jnlpool_creator == TRUE ==> this process created the journal pool
	 * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it.
	 */
	if (gtmsource_options.shut_down)
		gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.activate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.deactivate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.checkhealth)
		gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.changelog)
		 gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showbacklog)
		gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.stopsourcefilter)
		gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.jnlpool)
		gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.losttncomplete)
		gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.needrestart)
		gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showfreeze)
		gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.setfreeze)
		gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN);
	else if (!gtmsource_options.start)
	{
		assert(CLI_PRESENT == cli_present("STATSLOG"));
		gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN);
	}
	assert(gtmsource_options.start);
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */
	gtmsource_local = jnlpool.gtmsource_local;
	gtmsource_local->child_server_running = FALSE;
	FORK(pid);
	if (0 > pid)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
			ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno);
	} else if (0 < pid)
	{	/* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child
		 * source server has completed its initialization phase and is all set so the parent command can return.
		 */
		while (isalive = is_proc_alive(pid, 0))	/* note : intended assignment */
		{
			if (gtmsource_local->child_server_running)
				break;
			/* To take care of reassignment of PIDs, the while condition should be && with the condition
			 * (PPID of pid == process_id)
			 */
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START);
			WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */
		}
		if (isalive)
		{	/* Child process is alive and started with no issues */
			if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)))
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
					ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno);
			ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE);
		} else
		{	/* Child source server process errored out at startup and is no longer alive.
			 * If we were the one who created the journal pool, let us clean it up.
			 */
			repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n");
			if (is_jnlpool_creator)
				status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN);
		}
		/* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup
		 * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the
		 * value of sem 1 before the child grabs sem 1.
		 */
		gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR);
	}
	/* Point stdin to /dev/null */
	OPENFILE("/dev/null", O_RDONLY, null_fd);
	if (0 > null_fd)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0);
	FCNTL3(null_fd, F_DUPFD, 0, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0);
	CLOSEFILE(null_fd, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0);
	/* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore
	 * at this point. The variables that indicate this would have been copied over to the child during the fork. This will
	 * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child
	 * to ensure they do not misrepresent the holder of those semaphores.
	 */
	ftok_sem_reg = NULL;
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	assert(udi->grabbed_ftok_sem);
	udi->grabbed_ftok_sem = FALSE;
	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE;
	assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]);
	/* Start child source server initialization */
	is_src_server = TRUE;
	OPERATOR_LOG_MSG;
	process_id = getpid();
	/* Reinvoke secshr related initialization with the child's pid */
	INVOKE_INIT_SECSHR_ADDRS;
	/* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool.
	 * Note that the initialization would already have been done by the parent receiver startup command but we need to
	 * redo the initialization with the child process id.
	 */
	assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id));
	mutex_per_process_init();
	START_HEARTBEAT_IF_NEEDED;
	ppid = getppid();
	log_init_status = repl_log_init(REPL_GENERAL_LOG, &gtmsource_log_fd, gtmsource_options.log_file);
	assert(SS_NORMAL == log_init_status);
	repl_log_fd2fp(&gtmsource_log_fp, gtmsource_log_fd);
	if (-1 == (procgp = setsid()))
		send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				RTS_ERROR_LITERAL("Source server error in setsid"), errno);
#	endif /* REPL_DEBUG_NOBACKGROUND */
	if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level)
		gtm_zlib_init();	/* Open zlib shared library for compression/decompression */
	REPL_DPRINT1("Setting up regions\n");
	gvinit();

	/* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */
	all_files_open = region_init(FALSE);
	if (!all_files_open)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
		gtmsource_exit(ABNORMAL_SHUTDOWN);
	}
	/* Determine primary side null subscripts collation order */
	/* Also check whether all regions have same null collation order */
	this_side_std_null_coll = -1;
	for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++)
	{
		csa = &FILE_INFO(reg)->s_addrs;
		if (this_side_std_null_coll != csa->hdr->std_null_coll)
		{
			if (-1 == this_side_std_null_coll)
				this_side_std_null_coll = csa->hdr->std_null_coll;
			else
			{
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF);
				gtmsource_exit(ABNORMAL_SHUTDOWN);
			}
		}
		if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		if (reg->read_only && REPL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				   RTS_ERROR_LITERAL("Source Server does not have write permissions to one or "
					             "more database files that are replicated"));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	/* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */
	gtmsource_local->gtmsource_pid = process_id;
	gtmsource_local->gtmsource_state = GTMSOURCE_START;
	if (is_jnlpool_creator)
	{
		DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id);
		gtmsource_seqno_init(this_side_std_null_coll);
		if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)
		{	/* Created the journal pool as a root primary. Append a history record to the replication instance file.
			 * Invoke the function "gtmsource_rootprimary_init" to do that.
			 */
			gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno);
		}
	}
	/* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used
	 * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0.
	 * Note that the parent source server startup command would have done an increment of the ftok counter semaphore
	 * for the replication instance file. But the source server process (the child) that comes here would not have done
	 * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay.
	 */
	if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg))
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP);
	/* Increment the source server count semaphore */
	status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM);
	if (0 != status)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno);
	}
#	else
	if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM)))
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno);
	}
#	endif /* REPL_DEBUG_NOBACKGROUND */

	gtmsource_srv_count++;
	gtmsource_local->child_server_running = TRUE;	/* At this point, the parent startup command will stop waiting for child */
	gtm_event_log_init();
	/* Log source server startup command line first */
	SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str);
	repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg);

	SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]",
		process_id, gtmsource_local->secondary_instname);
	sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
	repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	if (is_jnlpool_creator)
	{
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	} else
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
#	ifdef GTM_TLS
	if (REPL_TLS_REQUESTED)
	{
		repl_do_tls_init(gtmsource_log_fp);
		assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK);
	}
#	endif
	if (jnlpool.jnlpool_ctl->freeze)
	{
		last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze;
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname);
		repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg);
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment);
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	}
	gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly;
	do
	{ 	/* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */
		gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START;
		if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN))
		{
			gtmsource_poll_actions(FALSE);
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE);
			continue;
		}
		if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode)
		{	/* Shutdown initiated */
			assert(gtmsource_local->shutdown == SHUTDOWN);
			sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2,
				    RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled"));
			repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
			gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
			break;
		}
		gtmsource_poll_actions(FALSE);
		if (GTMSOURCE_CHANGING_MODE == gtmsource_state)
			continue;
		if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode)
			gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE;
		SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port);
		sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
		gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
		DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
		assert(!repl_csa->hold_onto_crit);	/* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */
		grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK);
		if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state)
		{
			repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n");
			repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n",
					jnlpool.jnlpool_ctl->jnl_seqno);
			continue;
		}
		QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr);
		gtmsource_local->read = jnlpool.jnlpool_ctl->write;
		gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL;
		read_jnl_seqno = gtmsource_local->read_jnl_seqno;
		assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno);
		if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno)
		{
			gtmsource_local->read_state = READ_FILE;
			QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno);
			gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */
		}
		rel_lock(jnlpool.jnlpool_dummy_reg);
		if (SS_NORMAL != (status = gtmsource_alloc_tcombuff()))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
				  RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status);
		gtmsource_filter = NO_FILTER;
		if ('\0' != gtmsource_local->filter_cmd[0])
		{
			if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd)))
				gtmsource_filter |= EXTERNAL_FILTER;
			else
				gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		gtmsource_process();
		/* gtmsource_process returns only when mode needs to be changed to PASSIVE */
		assert(gtmsource_state == GTMSOURCE_CHANGING_MODE);
		gtmsource_ctl_close();
		gtmsource_free_msgbuff();
		gtmsource_free_tcombuff();
		gtmsource_free_filter_buff();
		gtmsource_stop_heartbeat();
		if (FD_INVALID != gtmsource_sock_fd)
			repl_close(&gtmsource_sock_fd);
		if (gtmsource_filter & EXTERNAL_FILTER)
			repl_stop_filter();
	} while (TRUE);
Example #9
0
int gtmrecv_shutdown(boolean_t auto_shutdown, int exit_status)
{
	uint4           savepid;
	boolean_t       shut_upd_too = FALSE;
	int             status;
	unix_db_info	*udi;

	error_def(ERR_RECVPOOLSETUP);
	error_def(ERR_TEXT);

	repl_log(stdout, TRUE, TRUE, "Initiating shut down\n");
	call_on_signal = NULL;		/* So we don't reenter on error */
	/* assert that auto shutdown should be invoked only if the current process is a receiver server */
	assert(!auto_shutdown || gtmrecv_srv_count);
	if (auto_shutdown)
	{	/* grab the ftok semaphore and recvpool access control lock IN THAT ORDER (to avoid deadlocks) */
		repl_inst_ftok_sem_lock();
		status = grab_sem(RECV, RECV_POOL_ACCESS_SEM);
		if (0 > status)
		{
			repl_log(stderr, TRUE, TRUE,
				"Error grabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR);
			return (ABNORMAL_SHUTDOWN);
		}
	} else
	{	/* ftok semaphore and recvpool access semaphore should already be held from the previous call to "recvpool_init" */
		DEBUG_ONLY(udi = (unix_db_info *)FILE_INFO(recvpool.recvpool_dummy_reg);)
		assert(udi->grabbed_ftok_sem);
		assert(holds_sem[RECV][RECV_POOL_ACCESS_SEM]);
		/* We do not want to hold the options semaphore to avoid deadlocks with receiver server startup (C9F12-002766) */
		assert(!holds_sem[RECV][RECV_SERV_OPTIONS_SEM]);
		recvpool.gtmrecv_local->shutdown = SHUTDOWN;
		/* Wait for receiver server to die. But release ftok semaphore and recvpool access control semaphore before
		 * waiting as the concurrently running receiver server might need these (e.g. if it is about to call the
		 * function "repl_inst_was_rootprimary").
		 */
		if (0 != rel_sem(RECV, RECV_POOL_ACCESS_SEM))
			gtm_putmsg(VARLSTCNT(7) ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in receiver server shutdown rel_sem"),
				REPL_SEM_ERRNO);
		repl_inst_ftok_sem_release();
		/* Wait for receiver server to shut down */
		while((SHUTDOWN == recvpool.gtmrecv_local->shutdown)
				&& (0 < (savepid = recvpool.gtmrecv_local->recv_serv_pid))
				&& is_proc_alive(savepid, 0))
			SHORT_SLEEP(GTMRECV_WAIT_FOR_SHUTDOWN);
		/* (Re)Grab the ftok semaphore and recvpool access control semaphore IN THAT ORDER (to avoid deadlocks) */
		repl_inst_ftok_sem_lock();
		status = grab_sem(RECV, RECV_POOL_ACCESS_SEM);
		if (0 > status)
		{
			repl_log(stderr, TRUE, TRUE,
				"Error regrabbing receive pool control semaphore : %s. Shutdown not complete\n", REPL_SEM_ERROR);
			return (ABNORMAL_SHUTDOWN);
		}
		exit_status = recvpool.gtmrecv_local->shutdown;
		if (SHUTDOWN == exit_status)
		{
			if (0 == savepid) /* No Receiver Process */
				exit_status = NORMAL_SHUTDOWN;
			else /* Receiver Server Crashed */
			{
				repl_log(stderr, FALSE, TRUE, "Receiver Server exited abnormally\n");
				exit_status = ABNORMAL_SHUTDOWN;
				shut_upd_too = TRUE;
			}
		}
	}