Example #1
0
void tp_incr_clean_up(short newlevel)
{
	uint4			num_free;
	boolean_t		freed;
	sgm_info 		*si;
	cw_set_element 		*cse, *next_cse, *tmp_cse;
	cw_set_element		*cse_newlvl;	/* pointer to that cse in a given horizontal list closest to "newlevel" */
	srch_blk_status		*tp_srch_status;
	int			min_t_level;	/* t_level of the head of the horizontal-list of a given cw-set-element */
	gd_region		*tmp_gv_cur_region;
	ht_ent_int4		*tabent;

	assert(newlevel > 0);
	if (JNL_FENCE_LIST_END != jnl_fence_ctl.fence_list)	/* currently global_tlvl_info struct holds only jnl related info */
		rollbk_gbl_tlvl_info(newlevel);
	tmp_gv_cur_region = gv_cur_region;	/* save region and associated pointers to restore them later */
	for (si = first_sgm_info;  si != NULL;  si = si->next_sgm_info)
	{
		num_free = 0;
		sgm_info_ptr = si;	/* maintain sgm_info_ptr & gv_cur_region binding whenever doing TP_CHANGE_REG */
		TP_CHANGE_REG_IF_NEEDED(si->gv_cur_region);
		rollbk_sgm_tlvl_info(newlevel, si);			/* rollback all the tlvl specific info */
		cse = si->first_cw_set;
		DEBUG_ONLY(min_t_level = 1);
		/* A property that will help a lot in understanding this algorithm is the following.
		 * All cse's in a given horizontal list will have their "next_cw_set" pointing to the same cse
		 * 	which is guaranteed to be the head of the horizontal list of the next cw-set-element in the vertical list.
		 */
		while (NULL != cse)
		{
			assert(NULL == cse->low_tlevel);
			next_cse = cse->next_cw_set;
			/* Note down tp_srch_status corresponding to cse (in case it exists). Need to later reset "->cse" field
			 * of this structure to point to the new cse for this block. Note that if cse->mode is gds_t_create,
			 * there will be no tp_srch_status entry allotted for cse->blk (one will be there only for the chain.flag
			 * representation of this to-be-created block). Same case with mode of kill_t_create as it also corresponds
			 * to a non-existent block#. Therefore dont try looking up the hashtable for this block in those cases.
			 */
			tp_srch_status = NULL;
			assert((gds_t_create == cse->mode) || (kill_t_create == cse->mode)
				|| (gds_t_write == cse->mode) || (kill_t_write == cse->mode));
			if ((gds_t_create != cse->mode) && (kill_t_create != cse->mode)
					&& (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use, (uint4 *)&cse->blk))))
				tp_srch_status = tabent->value;
			DEBUG_ONLY(
				tmp_cse = cse;
				TRAVERSE_TO_LATEST_CSE(tmp_cse);
				assert((NULL == tp_srch_status) || (tp_srch_status->cse == tmp_cse));
			)
			if (newlevel < cse->t_level)
			{	/* delete the entire horizontal list for this cw-set-element.
				 * And because of the following assert, we will be deleting the entire horizontal list for
				 * 	all cw-set-elements following the current one in the vertical list.
				 */
				assert(min_t_level <= cse->t_level);
				DEBUG_ONLY(min_t_level = cse->t_level;)
				if (!num_free)
				{	/* first time an entire cw-set-element's horizontal-list needs to be removed.
					 * reset si->first_cw_set or si->last_cw_set pointers as appropriate.
					 * the actual free up of the cw-set-elements will occur later in this loop
					 */
					tmp_cse = cse->prev_cw_set;
					assert(((NULL == tmp_cse) && (cse == si->first_cw_set))
							|| ((NULL != tmp_cse) && (cse != si->first_cw_set)));
					if (cse == si->first_cw_set)
						si->first_cw_set = NULL;
					si->last_cw_set = tmp_cse;
					while (NULL != tmp_cse)
					{	/* reset forward-link of horizontal-list of the previous cw_set_element */
						assert(tmp_cse->next_cw_set == cse);
						tmp_cse->next_cw_set = NULL;
						tmp_cse = tmp_cse->high_tlevel;
					}
				}
				num_free++;	/* count of number of elements whose vertical list has been completely removed */
				cse_newlvl = NULL;
			} else
/* Workhorse of fetching source for given trigger.
 */
STATICFNDEF void trigger_fill_xecute_buffer_read_trigger_source(gv_trigger_t *trigdsc)
{
	enum cdb_sc		cdb_status;
	int4			index;
	mstr			gbl, xecute_buff;
	mval			trig_index;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	gvt_trigger_t		*gvt_trigger;
	gv_namehead		*gvt;
	gv_namehead		*save_gv_target;
	gd_region		*save_gv_cur_region;
	sgm_info		*save_sgm_info_ptr;
	gv_key			save_currkey[DBKEYALLOC(MAX_KEY_SZ)];

	assert(0 < dollar_tlevel);
	assert(NULL != trigdsc);
	SAVE_TRIGGER_REGION_INFO(save_currkey);

	gvt_trigger = trigdsc->gvt_trigger;			/* We now know our base block now */
	index = trigdsc - gvt_trigger->gv_trig_array + 1;	/* We now know our trigger index value */
	i2mval(&trig_index, index);
	DBGTRIGR((stderr, "trigger_fill_xecute_buffer_read_trigger_source: entry $tlevel:%d\tindex:%d of %d\n",
				dollar_tlevel, index, gvt_trigger->num_gv_triggers));
	gvt = gv_target = gvt_trigger->gv_target;		/* gv_target contains global name */
	gbl.addr = gvt->gvname.var_name.addr;
	gbl.len = gvt->gvname.var_name.len;
	/* Our situation is that while our desired gv_target has csa information, we don't know specifically
	 * which global directory was in use so we can't run gv_bind_name() lest we find the given global
	 * name in the wrong global directory thus running the wrong triggers. But we know this target is
	 * properly formed since it had to be when it was recorded when the triggers were loaded. Because of
	 * that, we can get the correct csa and gv_target and csa-region will point us to a region that will
	 * work even if it isn't exactly the one we used to get to this trigger.
	 */
	TP_CHANGE_REG_IF_NEEDED(gvt->gd_csa->region);
	csa = cs_addrs;
	csd = csa->hdr;
	assert(csd == cs_data);
	tp_set_sgm();
	/* See if we need to reload our triggers */
	if ((csa->db_trigger_cycle != gvt->db_trigger_cycle)
	    || (csa->db_dztrigger_cycle && (gvt->db_dztrigger_cycle != csa->db_dztrigger_cycle)))
	{       /* The process' view of the triggers could be potentially stale. Restart to be safe.
		 * Triggers can be invoked only by GT.M and Update process. Out of these, we expect only
		 * GT.M to see restarts due to concurrent trigger changes. Update process is the only
		 * updater on the secondary so we dont expect it to see any concurrent trigger changes
		 * Assert accordingly.
		 */
		DBGTRIGR((stderr, "trigger_fill_xecute_buffer_read_trigger_source: stale trigger view\n"));
		assert(CDB_STAGNATE > t_tries);
		assert(IS_GTM_IMAGE);
		t_retry(cdb_sc_triggermod);
	}
	SET_GVTARGET_TO_HASHT_GBL(csa);
	INITIAL_HASHT_ROOT_SEARCH_IF_NEEDED;
	assert(0 == trigdsc->xecute_str.str.len);	/* Make sure not replacing/losing a buffer */
	xecute_buff.addr = trigger_gbl_fill_xecute_buffer(gbl.addr, gbl.len, &trig_index, NULL, (int4 *)&xecute_buff.len);
	trigdsc->xecute_str.str = xecute_buff;
	/* Restore gv_target/gv_currkey which need to be kept in sync */
	RESTORE_TRIGGER_REGION_INFO(save_currkey);
	return;
}
Example #3
0
boolean_t	tp_tend(boolean_t crit_only)
{
	block_id		tp_blk;
	boolean_t		history_validated, is_mm, was_crit, x_lock, do_validation;
	boolean_t		do_deferred_writes = FALSE, replication = FALSE;
	bt_rec_ptr_t		bt;
	cache_rec_ptr_t		cr;
	cw_set_element		*cse;
	file_control		*fc;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	jnl_format_buffer	*jfb;
	sgm_info		*si, *tmpsi;
	tp_region		*tr, *tr_last;
	sgmnt_addrs		*csa, *tmpcsa;
	sgmnt_data_ptr_t	csd;
	srch_blk_status		*t1;
	trans_num		ctn, tnque_earliest_tn;
	trans_num		valid_thru;	/* buffers touched by this transaction will be valid thru this tn */
	enum cdb_sc		status;
	gd_region		*save_gv_cur_region;
	int			lcnt, participants;
	jnldata_hdr_ptr_t	jnl_header;
	int			repl_tp_region_count = 0;
	boolean_t		first_time = TRUE, release_crit, yes_jnl_no_repl, retvalue;
	uint4			jnl_status, leafmods, indexmods;
	uint4			total_jnl_rec_size;
	jnlpool_ctl_ptr_t	jpl, tjpl;

	error_def(ERR_DLCKAVOIDANCE);
	error_def(ERR_JNLTRANS2BIG);

	assert(dollar_tlevel > 0);
	assert(0 == jnl_fence_ctl.level);
	participants = 0;
	status = cdb_sc_normal;
	/* if the transaction does no updates and the transaction history has not changed, we do not need any more validation */
	do_validation = FALSE;	/* initially set to FALSE, but set to TRUE below */
	jnl_status = 0;
	if (FALSE == crit_only)
	{
		for (si = first_sgm_info;  (NULL != si); si = si->next_sgm_info)
		{
			sgm_info_ptr = si;
			TP_CHANGE_REG_IF_NEEDED(si->gv_cur_region);
			csa = cs_addrs;
			csd = cs_data;
			if ((csd->wc_blocked) ||			/* If blocked, or.. */
				((dba_mm == csa->hdr->acc_meth) &&	/* we have MM and.. */
				(csa->total_blks != csa->ti->total_blks)))	/* and file has been extended */
			{	/* Force repair */
				t_fail_hist[t_tries] = cdb_sc_helpedout; /* special status to prevent punishing altruism */
				TP_TRACE_HIST(CR_BLKEMPTY, NULL);
				return FALSE;
			}
			/* whenever si->first_cw_set is non-NULL, ensure that si->update_trans is TRUE */
			assert((NULL == si->first_cw_set) || si->update_trans);
			/* whenever si->first_cw_set is NULL, ensure that si->update_trans is FALSE
			 * except when the set noop optimization is enabled */
			assert((NULL != si->first_cw_set) || !si->update_trans || gvdupsetnoop);
			if (!si->update_trans)
			{
				if (si->start_tn == csa->ti->early_tn)
				{	/* read with no change to the transaction history. ensure we haven't overrun
					 * our history buffer and we have reasonable values for first and last */
					assert(si->last_tp_hist - si->first_tp_hist <= si->tp_hist_size);
					continue;
				} else
					do_validation = TRUE;
			} else
			{
				do_validation = TRUE;
				is_mm = (dba_mm == cs_data->acc_meth);
				/* We are still out of crit if this is not our last attempt. If so, run the region list and check
				 * that we have sufficient free blocks for our update. If not, get them now while we can.
				 * We will repeat this check later in crit but it will hopefully have little or nothing to do.
				 * bypass 1st check if already in crit -- check later
				 */
				if (!csa->now_crit && !is_mm && (csa->nl->wc_in_free < si->cw_set_depth + 1)
						&& !wcs_get_space(si->gv_cur_region, si->cw_set_depth + 1, NULL))
					assert(FALSE);	/* wcs_get_space() should have returned TRUE unconditionally in this case */
			}
			if (si->update_trans && JNL_ENABLED(csa))
			{	/* compute the total journal record size requirements before grab_crit().
				 * there is code later that will check for state changes from now to then
				 */
				TOTAL_TPJNL_REC_SIZE(total_jnl_rec_size, si, csa);
				/* compute current transaction's maximum journal space needs in number of disk blocks */
				si->tot_jrec_size = MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size);
				/* check if current TP transaction's journal size needs are greater than max jnl file size */
				if (si->tot_jrec_size > csd->autoswitchlimit)
					/* can't fit in current transaction's journal records into one journal file */
					rts_error(VARLSTCNT(6) ERR_JNLTRANS2BIG, 4, si->tot_jrec_size,
						JNL_LEN_STR(csd), csd->autoswitchlimit);
			}
		}	/* for (si ... ) */
		if (!do_validation)
		{
			if (CDB_STAGNATE <= t_tries)
			{
				for (tr = tp_reg_list; NULL != tr; tr = tr->fPtr)
					rel_crit(tr->reg);
			}
			UNIX_ONLY(
				/* Must be done after REVERT since we are no longer in crit */
				if (unhandled_stale_timer_pop)
					process_deferred_stale();
			)
			return TRUE;
		}
	}
uint4 mur_process_intrpt_recov()
{
	jnl_ctl_list			*jctl, *last_jctl;
	reg_ctl_list			*rctl, *rctl_top;
	int				rename_fn_len, save_name_len, idx;
	char				prev_jnl_fn[MAX_FN_LEN + 1], rename_fn[MAX_FN_LEN + 1], save_name[MAX_FN_LEN + 1];
	jnl_create_info			jnl_info;
	uint4				status, status2;
	uint4				max_autoswitchlimit, max_jnl_alq, max_jnl_deq, freeblks;
	sgmnt_data_ptr_t		csd;
	jnl_private_control		*jpc;
	jnl_buffer_ptr_t		jbp;
	boolean_t			jfh_changed;
	jnl_record			*jnlrec;
	jnl_file_header			*jfh;
	jnl_tm_t			now;

	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		TP_CHANGE_REG(rctl->gd);
		csd = cs_data;	/* MM logic after wcs_flu call requires this to be set */
		assert(csd == rctl->csa->hdr);
		jctl = rctl->jctl_turn_around;
		max_jnl_alq = max_jnl_deq = max_autoswitchlimit = 0;
		for (last_jctl = NULL ; (NULL != jctl); last_jctl = jctl, jctl = jctl->next_gen)
		{
			jfh = jctl->jfh;
			if (max_autoswitchlimit < jfh->autoswitchlimit)
			{	/* Note that max_jnl_alq, max_jnl_deq are not the maximum journal allocation/extensions across
				 * generations, but rather the allocation/extension corresponding to the maximum autoswitchlimit.
				 */
				max_autoswitchlimit = jfh->autoswitchlimit;
				max_jnl_alq         = jfh->jnl_alq;
				max_jnl_deq         = jfh->jnl_deq;
			}
			/* Until now, "rctl->blks_to_upgrd_adjust" holds the number of V4 format newly created bitmap blocks
			 * seen in INCTN records in backward processing. It is possible that backward processing might have
			 * missed out on seeing those INCTN records which are part of virtually-truncated or completely-rolled-bak
			 * journal files. The journal file-header has a separate field "prev_recov_blks_to_upgrd_adjust" which
			 * maintains exactly this count. Therefore adjust the rctl counter accordingly.
			 */
			assert(!jfh->prev_recov_blks_to_upgrd_adjust || !jfh->recover_interrupted);
			assert(!jfh->prev_recov_blks_to_upgrd_adjust || jfh->prev_recov_end_of_data);
			rctl->blks_to_upgrd_adjust += jfh->prev_recov_blks_to_upgrd_adjust;
		}
		if (max_autoswitchlimit > last_jctl->jfh->autoswitchlimit)
		{
			csd->jnl_alq         = max_jnl_alq;
			csd->jnl_deq         = max_jnl_deq;
			csd->autoswitchlimit = max_autoswitchlimit;
		} else
		{
			assert(csd->jnl_alq         == last_jctl->jfh->jnl_alq);
			assert(csd->jnl_deq         == last_jctl->jfh->jnl_deq);
			assert(csd->autoswitchlimit == last_jctl->jfh->autoswitchlimit);
		}
		jctl = rctl->jctl_turn_around;
		/* Get a pointer to the turn around point EPOCH record */
		jnlrec = rctl->mur_desc->jnlrec;
		assert(JRT_EPOCH == jnlrec->prefix.jrec_type);
		assert(jctl->turn_around_time == jnlrec->prefix.time);
		assert(jctl->turn_around_seqno == jnlrec->jrec_epoch.jnl_seqno);
		assert(jctl->turn_around_tn == jnlrec->prefix.tn);
		assert(jctl->rec_offset == jctl->turn_around_offset);
		/* Reset file-header "blks_to_upgrd" counter to the turn around point epoch value. Adjust this to include
		 * the number of new V4 format bitmaps created by post-turnaround-point db file extensions.
		 * The adjustment value is maintained in rctl->blks_to_upgrd_adjust.
		 */
		csd->blks_to_upgrd = jnlrec->jrec_epoch.blks_to_upgrd;
		csd->blks_to_upgrd += rctl->blks_to_upgrd_adjust;
#		ifdef GTM_TRIGGER
		/* online rollback can potentially take the database to a point in the past where the triggers that were
		 * previously installed are no longer a part of the current database state and so any process that restarts
		 * AFTER online rollback completes SHOULD reload triggers and the only way to do that is by incrementing the
		 * db_trigger_cycle in the file header.
		 */
		if (jgbl.onlnrlbk && (0 < csd->db_trigger_cycle))
		{	/* check for non-zero db_trigger_cycle is to prevent other processes (continuing after online rollback)
			 * to establish implicit TP (on seeing the trigger cycle mismatch) when there are actually no triggers
			 * installed in the database (because there were none at the start of online rollback).
			 */
			csd->db_trigger_cycle++;
			if (0 == csd->db_trigger_cycle)
				csd->db_trigger_cycle = 1;	/* Don't allow cycle set to 0 which means uninitialized */
		}
#		endif
		assert((WBTEST_ALLOW_ARBITRARY_FULLY_UPGRADED == gtm_white_box_test_case_number) ||
			(FALSE == jctl->turn_around_fullyupgraded) || (TRUE == jctl->turn_around_fullyupgraded));
		/* Set csd->fully_upgraded to FALSE if:
		 * a) The turn around EPOCH had the fully_upgraded field set to FALSE
		 * OR
		 * b) If csd->blks_to_upgrd counter is non-zero. This field can be non-zero even if the turnaround EPOCH's
		 * fully_upgraded field is TRUE. This is possible if the database was downgraded to V4 (post turnaround EPOCH)
		 * format and database extensions happened causing new V4 format bitmap blocks to be written. The count of V4
		 * format bitmap blocks is maintained ONLY as part of INCTN records (with INCTN opcode SET_JNL_FILE_CLOSE_EXTEND)
		 * noted down in rctl->blks_to_upgrd_adjust counter as part of BACKWARD processing which are finally added to
		 * csd->blks_to_upgrd.
		 */
		if (!jctl->turn_around_fullyupgraded || csd->blks_to_upgrd)
			csd->fully_upgraded = FALSE;
		csd->trans_hist.early_tn = jctl->turn_around_tn;
		csd->trans_hist.curr_tn = csd->trans_hist.early_tn;	/* INCREMENT_CURR_TN macro not used but noted in comment
									 * to identify all places that set curr_tn */
		csd->jnl_eovtn = csd->trans_hist.curr_tn;
		csd->turn_around_point = TRUE;
		/* MUPIP REORG UPGRADE/DOWNGRADE stores its partially processed state in the database file header.
		 * It is difficult for recovery to restore those fields to a correct partial value.
		 * Hence reset the related fields as if the desired_db_format got set just ONE tn BEFORE the EPOCH record
		 * 	and that there was no more processing that happened.
		 * This might potentially mean some duplicate processing for MUPIP REORG UPGRADE/DOWNGRADE after the recovery.
		 * But that will only be the case as long as the database is in compatibility (mixed) mode (hopefully not long).
		 */
		if (csd->desired_db_format_tn >= jctl->turn_around_tn)
			csd->desired_db_format_tn = jctl->turn_around_tn - 1;
		if (csd->reorg_db_fmt_start_tn >= jctl->turn_around_tn)
			csd->reorg_db_fmt_start_tn = jctl->turn_around_tn - 1;
		if (csd->tn_upgrd_blks_0 > jctl->turn_around_tn)
			csd->tn_upgrd_blks_0 = (trans_num)-1;
		csd->reorg_upgrd_dwngrd_restart_block = 0;
		/* Compute current value of "free_blocks" based on the value of "free_blocks" at the turnaround point epoch
		 * record and the change in "total_blks" since that epoch to the present form of the database. Any difference
		 * in "total_blks" implies database file extensions happened since the turnaround point. A backward rollback
		 * undoes everything (including all updates) except file extensions (it does not truncate the file size).
		 * Therefore every block that was newly allocated as part of those file extensions should be considered FREE
		 * for the current calculations except for the local bitmap blocks which are BUSY the moment they are created.
		 */
		assert(jnlrec->jrec_epoch.total_blks <= csd->trans_hist.total_blks);
		csd->trans_hist.free_blocks = jnlrec->jrec_epoch.free_blocks
			+ (csd->trans_hist.total_blks - jnlrec->jrec_epoch.total_blks)
			- DIVIDE_ROUND_UP(csd->trans_hist.total_blks, BLKS_PER_LMAP)
			+ DIVIDE_ROUND_UP(jnlrec->jrec_epoch.total_blks, BLKS_PER_LMAP);
		assert(!csd->blks_to_upgrd || !csd->fully_upgraded);
		assert((freeblks = mur_blocks_free(rctl)) == csd->trans_hist.free_blocks);
		/* Update strm_reg_seqno[] in db file header to reflect the turn around point.
		 * Before updating "strm_reg_seqno", make sure value is saved into "save_strm_reg_seqno".
		 * This is relied upon by the function "mur_get_max_strm_reg_seqno" in case of interrupted rollback.
		 */
		for (idx = 0; idx < MAX_SUPPL_STRMS; idx++)
		{
			if (!csd->save_strm_reg_seqno[idx])
				csd->save_strm_reg_seqno[idx] = csd->strm_reg_seqno[idx];
			csd->strm_reg_seqno[idx] = jnlrec->jrec_epoch.strm_seqno[idx];
		}
		wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_FSYNC_DB);
		assert(cs_addrs->ti->curr_tn == jctl->turn_around_tn);
#		ifdef UNIX
		if (jgbl.onlnrlbk)
		{
			if (dba_bg == cs_addrs->hdr->acc_meth)
			{	/* dryclean the cache (basically reset the cycle fields in all teh cache records) so as to make
				 * GT.M processes that only does 'reads' to require crit and hence realize that online rollback
				 * is in progress
				 */
				bt_refresh(cs_addrs, FALSE); /* sets earliest bt TN to be the turn around TN */
			}
			db_csh_ref(cs_addrs, FALSE);
			assert(NULL != cs_addrs->jnl);
			jpc = cs_addrs->jnl;
			assert(NULL != jpc->jnl_buff);
			jbp = jpc->jnl_buff;
			/* Since Rollback simulates the journal record along with the timestamp at which the update was made, it
			 * sets jgbl.dont_reset_gbl_jrec_time to TRUE so that during forward processing t_end or tp_tend does not
			 * reset the gbl_jrec_time to reflect the current time. But, with Online Rollback, one can have the shared
			 * memory up and running and hence can have jbp->prev_jrec_time to be the time of the most recent journal
			 * update made. Later in t_end/tp_tend, ADJUST_GBL_JREC_TIME is invoked which ensures that if ever
			 * gbl_jrec_time (the time of the current update) is less than jbp->prev_jrec_time (time of the latest
			 * journal update), dont_reset_gbl_jrec_time better be FALSE. But, this assert will trip since Rollback
			 * sets the latter to TRUE. To fix this, set jbp->prev_jrec_time to the turn around time stamp. This way
			 * we are guaranteed that all the updates done in the forward processing will have a timestamp that is
			 * greater than the turn around timestamp
			 */
			SET_JNLBUFF_PREV_JREC_TIME(jbp, jctl->turn_around_time, DO_GBL_JREC_TIME_CHECK_FALSE);
		} else if (dba_bg == csd->acc_meth)
		{	/* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */
			SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1);
		}
#		else
		if (dba_bg == csd->acc_meth)
		{	/* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */
			SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1);
		}
#		endif
		csd->turn_around_point = FALSE;
		assert(OLDEST_HIST_TN(cs_addrs) == (cs_addrs->ti->curr_tn - 1));
		/* In case this is MM and wcs_flu() remapped an extended database, reset rctl->csd */
		assert((dba_mm == cs_data->acc_meth) || (rctl->csd == cs_data));
		rctl->csd = cs_data;
	}
	JNL_SHORT_TIME(now);
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		TP_CHANGE_REG_IF_NEEDED(rctl->gd);
		if (!rctl->jfh_recov_interrupted)
			jctl = rctl->jctl_turn_around;
		else
		{
			DEBUG_ONLY(
				for (jctl = rctl->jctl_turn_around; NULL != jctl->next_gen; jctl = jctl->next_gen)
					;
				/* check that latest gener file name does not match db header */
				assert((rctl->csd->jnl_file_len != jctl->jnl_fn_len)
					|| (0 != memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)));
			)
			jctl = rctl->jctl_alt_head;
		}
		assert(NULL != jctl);
		for ( ; NULL != jctl->next_gen; jctl = jctl->next_gen)
			;
		assert(rctl->csd->jnl_file_len == jctl->jnl_fn_len); 			       /* latest gener file name */
		assert(0 == memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)); /* should match db header */
		if (SS_NORMAL != (status = prepare_unique_name((char *)jctl->jnl_fn, jctl->jnl_fn_len, "", "",
								rename_fn, &rename_fn_len, now, &status2)))
			return status;
		jctl->jnl_fn_len = rename_fn_len;  /* change the name in memory to the proposed name */
		memcpy(jctl->jnl_fn, rename_fn, rename_fn_len + 1);
		/* Rename hasn't happened yet at the filesystem level. In case current recover command is interrupted,
		 * we need to update jfh->next_jnl_file_name before mur_forward(). Update jfh->next_jnl_file_name for
		 * all journal files from which PBLK records were applied. Create new journal files for forward play.
		 */
		assert(NULL != rctl->jctl_turn_around);
		jctl = rctl->jctl_turn_around; /* points to journal file which has current recover's turn around point */
		assert(0 != jctl->turn_around_offset);
		jfh = jctl->jfh;
		jfh->turn_around_offset = jctl->turn_around_offset;	/* save progress in file header for 	*/
		jfh->turn_around_time = jctl->turn_around_time;		/* possible re-issue of recover 	*/
		for (idx = 0; idx < MAX_SUPPL_STRMS; idx++)
			jfh->strm_end_seqno[idx] = csd->strm_reg_seqno[idx];
		jfh_changed = TRUE;
		/* We are about to update the journal file header of the turnaround-point journal file to store the
		 * non-zero jfh->turn_around_offset. Ensure corresponding database is considered updated.
		 * This is needed in case journal recovery/rollback terminates abnormally and we go to mur_close_files.
		 * We need to ensure csd->recov_interrupted does not get reset to FALSE even if this region did not have
		 * have any updates to the corresponding database file otherwise. (GTM-8394)
		 */
		rctl->db_updated = TRUE;
		for ( ; NULL != jctl; jctl = jctl->next_gen)
		{	/* setup the next_jnl links. note that in the case of interrupted recovery, next_jnl links
			 * would have been already set starting from the turn-around point journal file of the
			 * interrupted recovery but the new recovery MIGHT have taken us to a still previous
			 * generation journal file that needs its next_jnl link set. this is why we do the next_jnl
			 * link setup even in the case of interrupted recovery although in most cases it is unnecessary.
			 */
			jfh = jctl->jfh;
			if (NULL != jctl->next_gen)
			{
				jfh->next_jnl_file_name_length = jctl->next_gen->jnl_fn_len;
				memcpy(jfh->next_jnl_file_name, jctl->next_gen->jnl_fn, jctl->next_gen->jnl_fn_len);
				jfh_changed = TRUE;
			} else
				assert(0 == jfh->next_jnl_file_name_length); /* null link from latest generation */
			if (jfh->turn_around_offset && (jctl != rctl->jctl_turn_around))
			{	/* It is possible that the current recovery has a turn-around-point much before the
				 * previously interrupted recovery. If it happens to be a previous generation journal
				 * file then we have to reset the original turn-around-point to be zero in the journal
				 * file header in order to ensure if this recovery gets interrupted we do interrupted
				 * recovery processing until the new turn-around-point instead of stopping incorrectly
				 * at the original turn-around-point itself. Note that there could be more than one
				 * journal file with a non-zero turn_around_offset (depending on how many previous
				 * recoveries got interrupted in this loop) that need to be reset.
				 */
				assert(!jctl->turn_around_offset);
				assert(rctl->recov_interrupted || rctl->jctl_apply_pblk); /* rctl->jfh_recov_interrupted can fail */
				jfh->turn_around_offset = 0;
				jfh->turn_around_time = 0;
				jfh_changed = TRUE;
			}
			if (jfh_changed)
			{
				/* Since overwriting the journal file header (an already allocated block
				 * in the file) should not cause ENOSPC, we dont take the trouble of
				 * passing csa or jnl_fn (first two parameters). Instead we pass NULL.
				 */
				JNL_DO_FILE_WRITE(NULL, NULL, jctl->channel, 0, jfh,
					REAL_JNL_HDR_LEN, jctl->status, jctl->status2);
				if (SS_NORMAL != jctl->status)
				{
					assert(FALSE);
					if (SS_NORMAL == jctl->status2)
						gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(5) ERR_JNLWRERR, 2, jctl->jnl_fn_len,
							jctl->jnl_fn, jctl->status);
					else
						gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT1(6) ERR_JNLWRERR, 2, jctl->jnl_fn_len,
							jctl->jnl_fn, jctl->status, PUT_SYS_ERRNO(jctl->status2));
					return jctl->status;
				}
				GTM_JNL_FSYNC(rctl->csa, jctl->channel, jctl->status);
				if (-1 == jctl->status)
				{
					jctl->status2 = errno;
					assert(FALSE);
					gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(9) ERR_JNLFSYNCERR, 2,
						jctl->jnl_fn_len, jctl->jnl_fn,
						ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), jctl->status2);
					return ERR_JNLFSYNCERR;
				}
			}
			jfh_changed = FALSE;
		}
		memset(&jnl_info, 0, SIZEOF(jnl_info));
		jnl_info.status = jnl_info.status2 = SS_NORMAL;
		jnl_info.prev_jnl = &prev_jnl_fn[0];
		set_jnl_info(rctl->gd, &jnl_info);
		jnl_info.prev_jnl_len = rctl->jctl_turn_around->jnl_fn_len;
		memcpy(jnl_info.prev_jnl, rctl->jctl_turn_around->jnl_fn, rctl->jctl_turn_around->jnl_fn_len);
		jnl_info.prev_jnl[jnl_info.prev_jnl_len] = 0;
		jnl_info.jnl_len = rctl->csd->jnl_file_len;
		memcpy(jnl_info.jnl, rctl->csd->jnl_file_name, jnl_info.jnl_len);
		jnl_info.jnl[jnl_info.jnl_len] = 0;
		assert(!mur_options.rollback || jgbl.mur_rollback);
		jnl_info.reg_seqno = rctl->jctl_turn_around->turn_around_seqno;
		jgbl.gbl_jrec_time = rctl->jctl_turn_around->turn_around_time;	/* time needed for cre_jnl_file_common() */
		if (EXIT_NRM != cre_jnl_file_common(&jnl_info, rename_fn, rename_fn_len))
		{
			gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_JNLNOCREATE, 2, jnl_info.jnl_len, jnl_info.jnl);
			return jnl_info.status;
		}
#		ifdef UNIX
		if (jgbl.onlnrlbk)
		{
			cs_addrs = rctl->csa;
			/* Mimic what jnl_file_close in case of cleanly a closed journal file */
			jpc = cs_addrs->jnl; /* the previous loop makes sure cs_addrs->jnl->jnl_buff is valid*/
			NULLIFY_JNL_FILE_ID(cs_addrs);
			jpc->jnl_buff->cycle++; /* so that, all other processes knows to switch to newer journal file */
			jpc->cycle--; /* decrement cycle so jnl_ensure_open() knows to reopen the journal */
		}
#		endif
		if (NULL != rctl->jctl_alt_head) /* remove the journal files created by last interrupted recover process */
		{
			mur_rem_jctls(rctl);
			rctl->jctl_alt_head = NULL;
		}
		/* From this point on, journal records are written into the newly created journal file. However, we still read
		 * from old journal files.
		 */
	}