Ejemplo n.º 1
0
void wcs_recover(gd_region *reg)
{
	bt_rec_ptr_t		bt;
	cache_rec_ptr_t		cr, cr_alt, cr_alt_new, cr_lo, cr_top, hash_hdr;
	cache_que_head_ptr_t	active_head, hq, wip_head, wq;
	gd_region		*save_reg;
	que_ent_ptr_t		back_link; /* should be crit & not need interlocked ops. */
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	node_local_ptr_t	cnl;
	int4			bml_full, dummy_errno, blk_size;
	uint4			jnl_status, epid, r_epid;
	int4			bt_buckets, bufindx;	/* should be the same type as "csd->bt_buckets" */
	inctn_opcode_t          save_inctn_opcode;
	unsigned int		bplmap, lcnt, total_blks, wait_in_rip;
	sm_uc_ptr_t		buffptr;
	blk_hdr_ptr_t		blk_ptr;
	INTPTR_T		bp_lo, bp_top, old_block;
	boolean_t		backup_block_saved, change_bmm;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	sgm_info		*si;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	/* If this is the source server, do not invoke cache recovery as that implies touching the database file header
	 * (incrementing the curr_tn etc.) and touching the journal file (writing INCTN records) both of which are better
	 * avoided by the source server; It is best to keep it as read-only to the db/jnl as possible.  It is ok to do
	 * so because the source server anyways does not rely on the integrity of the database cache and so does not need
	 * to fix it right away. Any other process that does rely on the cache integrity will fix it when it gets crit next.
	 */
	if (is_src_server)
		return;
	save_reg = gv_cur_region;	/* protect against [at least] M LOCK code which doesn't maintain cs_addrs and cs_data */
	TP_CHANGE_REG(reg);		/* which are needed by called routines such as wcs_wtstart and wcs_mm_recover */
	if (dba_mm == reg->dyn.addr->acc_meth)	 /* MM uses wcs_recover to remap the database in case of a file extension */
	{
		wcs_mm_recover(reg);
		TP_CHANGE_REG(save_reg);
		TREF(wcs_recover_done) = TRUE;
		return;
	}
	csa = &FILE_INFO(reg)->s_addrs;
	csd = csa->hdr;
	cnl = csa->nl;
	si = csa->sgm_info_ptr;
	/* If a mupip truncate operation was abruptly interrupted we have to correct any inconsistencies */
	GTM_TRUNCATE_ONLY(recover_truncate(csa, csd, gv_cur_region);)
Ejemplo n.º 2
0
void		wcs_recover(gd_region *reg)
{
	bt_rec_ptr_t		bt;
	cache_rec_ptr_t		cr, cr_alt, cr_lo, cr_top, hash_hdr;
	cache_que_head_ptr_t	active_head, hq, wip_head, wq;
	gd_region		*save_reg;
	que_ent_ptr_t		back_link; /* should be crit & not need interlocked ops. */
	sgmnt_data_ptr_t	csd;
	sgmnt_addrs		*csa;
	bool			blk_used, change_bmm;
	int4			bml_full, dummy_errno, blk_size;
	uint4			jnl_status, epid;
	int			bt_buckets;
	inctn_opcode_t          save_inctn_opcode;
	unsigned int		bplmap, lcnt, total_blks;
	sm_uc_ptr_t		buffptr;

	error_def(ERR_BUFRDTIMEOUT);
	error_def(ERR_DBCCERR);
	error_def(ERR_DBCNTRLERR);
	error_def(ERR_DBDANGER);
	error_def(ERR_ERRCALL);
	error_def(ERR_INVALIDRIP);
	error_def(ERR_STOPTIMEOUT);
	error_def(ERR_TEXT);

	save_reg = gv_cur_region;	/* protect against [at least] M LOCK code which doesn't maintain cs_addrs and cs_data */
	TP_CHANGE_REG(reg);		/* which are needed by called routines such as wcs_wtstart and wcs_mm_recover */
	if (dba_mm == reg->dyn.addr->acc_meth)	 /* MM uses wcs_recover to remap the database in case of a file extension */
	{
		wcs_mm_recover(reg);
		TP_CHANGE_REG(save_reg);
		return;
	}
	csa = &FILE_INFO(reg)->s_addrs;
	csd = csa->hdr;
	assert(csa->now_crit || csd->clustered);
	DEBUG_ONLY(in_wcs_recover = TRUE;)	/* used by bt_put() called below */
Ejemplo n.º 3
0
void gtcmd_rundown(connection_struct *cnx, bool clean_exit)
{
	int4			link;
	cm_region_list		*ptr, *last, *que_next, *que_last;
	cm_region_head		*region;
	uint4			jnl_status;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	int			refcnt;
	boolean_t		was_crit;
	int4			rundown_status = EXIT_NRM;			/* if gds_rundown went smoothly */

	for (ptr = cnx->region_root;  ptr;)
	{
		region = ptr->reghead;
		TP_CHANGE_REG(region->reg);
		jpc = cs_addrs->jnl;
		if (ptr->pini_addr && clean_exit && JNL_ENABLED(cs_data) && (NOJNL != jpc->channel))
		{
			was_crit = cs_addrs->now_crit;
			if (!was_crit)
				grab_crit(gv_cur_region);
			if (JNL_ENABLED(cs_data))
			{
				jpc->pini_addr = ptr->pini_addr;
				SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pfin needs this to be set */
				jbp = jpc->jnl_buff;
				/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
				 * of jnl records.  This needs to be done BEFORE the jnl_ensure_open as that could write
				 * journal records (if it decides to switch to a new journal file).
				 */
				ADJUST_GBL_JREC_TIME(jgbl, jbp);
				jnl_status = jnl_ensure_open();
				if (0 == jnl_status)
				{
					if (0 != jpc->pini_addr)
						jnl_put_jrt_pfin(cs_addrs);
				} else
					send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			}
			if (!was_crit)
				rel_crit(gv_cur_region);
		}
		refcnt = --region->refcnt;
		/* Dont know how refcnt can become negative but in pro handle it by bypassing this region. The reason is the
		 * following. refcnt should have originally been a positive value. Every time this function is invoked, it would
		 * be decremented by one. There should have been one invocation that saw refcnt to be zero. That would have
		 * done the rundown of the region or if it is still in the stack the rundown is still in progress. Therefore
		 * it is not a good idea to try running down this region when we see refcnt to be negative (as otherwise we
		 * will get confused and could potentially end up with SIG-11 or ACCVIO errors). The worst case is that we
		 * would not have rundown the region in which case an externally issued MUPIP RUNDOWN would be enough.
		 */
		assert(0 <= refcnt);
		if (0 == refcnt)
		{	/* free up only as little as needed to facilitate structure reuse when the region is opened again */
			assert(region->head.fl == region->head.bl);
			VMS_ONLY(gtcm_ast_avail++);
			if (JNL_ALLOWED(cs_data))
				jpc->pini_addr = 0;
			UNIX_ONLY(rundown_status |=) gds_rundown();
			gd_ht_kill(region->reg_hash, TRUE);	/* TRUE to free up the table and the gv_targets it holds too */
			FREE_CSA_DIR_TREE(cs_addrs);
			cm_del_gdr_ptr(gv_cur_region);
		}
		que_next = (cm_region_list *)((unsigned char *)ptr + ptr->regque.fl);
		que_last = (cm_region_list *)((unsigned char *)ptr + ptr->regque.bl);
		link = (int4)((unsigned char *)que_next - (unsigned char *)que_last);
		que_last->regque.fl = link;
		que_next->regque.bl = -link;
		last = ptr;
		ptr = ptr->next;
		free(last);
	}
Ejemplo n.º 4
0
void	mu_reorg_upgrd_dwngrd(void)
{
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	block_id		*blkid_ptr, curblk, curbmp, start_blk, stop_blk, start_bmp, last_bmp;
	block_id		startblk_input, stopblk_input;
	boolean_t		upgrade, downgrade, safejnl, nosafejnl, region, first_reorg_in_this_db_fmt, reorg_entiredb;
	boolean_t		startblk_specified, stopblk_specified, set_fully_upgraded, db_got_to_v5_once, mark_blk_free;
	cache_rec_ptr_t		cr;
	char			*bml_lcl_buff = NULL, *command, *reorg_command;
	sm_uc_ptr_t		bptr = NULL;
	cw_set_element		*cse;
	enum cdb_sc		cdb_status;
	enum db_ver		new_db_format, ondsk_blkver;
	gd_region		*reg;
	int			cycle;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	int4			blocks_left, expected_blks2upgrd, actual_blks2upgrd, total_blks, free_blks;
	int4			status, status1, mapsize, lcnt, bml_status;
	reorg_stats_t		reorg_stats;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	sm_uc_ptr_t		blkBase, bml_sm_buff;	/* shared memory pointer to the bitmap global buffer */
	srch_hist		alt_hist;
	srch_blk_status		*blkhist, bmlhist;
	tp_region		*rptr;
	trans_num		curr_tn;
	unsigned char    	save_cw_set_depth;
	uint4			lcl_update_trans;

	region    = (CLI_PRESENT == cli_present("REGION"));
	upgrade   = (CLI_PRESENT == cli_present("UPGRADE"));
	downgrade = (CLI_PRESENT == cli_present("DOWNGRADE"));
	assert(upgrade && !downgrade || !upgrade && downgrade);
	command = upgrade ? "UPGRADE" : "DOWNGRADE";
	reorg_command = upgrade ? "MUPIP REORG UPGRADE" : "MUPIP REORG DOWNGRADE";
	reorg_entiredb = TRUE;	/* unless STARTBLK or STOPBLK is specified we are going to {up,down}grade the entire database */
	startblk_specified = FALSE;
	assert(SIZEOF(block_id) == SIZEOF(uint4));
	if ((CLI_PRESENT == cli_present("STARTBLK")) && (cli_get_hex("STARTBLK", (uint4 *)&startblk_input)))
	{
		reorg_entiredb = FALSE;
		startblk_specified = TRUE;
	}
	stopblk_specified = FALSE;
	assert(SIZEOF(block_id) == SIZEOF(uint4));
	if ((CLI_PRESENT == cli_present("STOPBLK")) && (cli_get_hex("STOPBLK", (uint4 *)&stopblk_input)))
	{
		reorg_entiredb = FALSE;
		stopblk_specified = TRUE;
	}
	mu_reorg_upgrd_dwngrd_in_prog = TRUE;
	mu_reorg_nosafejnl = (CLI_NEGATED == cli_present("SAFEJNL")) ? TRUE : FALSE;

	assert(region);
	status = SS_NORMAL;
	error_mupip = FALSE;
	gvinit();	/* initialize gd_header (needed by the later call to mu_getlst) */
	mu_getlst("REG_NAME", SIZEOF(tp_region));	/* get the parameter corresponding to REGION qualifier */
	if (error_mupip)
	{
		util_out_print("!/MUPIP REORG !AD cannot proceed with above errors!/", TRUE, LEN_AND_STR(command));
		mupip_exit(ERR_MUNOACTION);
	}
	assert(DBKEYSIZE(MAX_KEY_SZ) == gv_keysize);	/* no need to invoke GVKEYSIZE_INIT_IF_NEEDED macro */
	gv_target = targ_alloc(gv_keysize, NULL, NULL);	/* t_begin needs this initialized */
	gv_target_list = NULL;
	memset(&alt_hist, 0, SIZEOF(alt_hist));	/* null-initialize history */
	blkhist = &alt_hist.h[0];
	for (rptr = grlist;  NULL != rptr;  rptr = rptr->fPtr)
	{
		if (mu_ctrly_occurred || mu_ctrlc_occurred)
			break;
		reg = rptr->reg;
		util_out_print("!/Region !AD : MUPIP REORG !AD started", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
		if (reg_cmcheck(reg))
		{
			util_out_print("Region !AD : MUPIP REORG !AD cannot run across network",
				TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = ERR_MUNOFINISH;
			continue;
		}
		mu_reorg_process = TRUE;	/* gvcst_init will use this value to use gtm_poollimit settings. */
		gvcst_init(reg);
		mu_reorg_process = FALSE;
		assert(update_array != NULL);
		/* access method stored in global directory and database file header might be different in which case
		 * the database setting prevails. therefore, the access method check can be done only after opening
		 * the database (i.e. after the gvcst_init)
		 */
		if (dba_bg != REG_ACC_METH(reg))
		{
			util_out_print("Region !AD : MUPIP REORG !AD cannot continue as access method is not BG",
				TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = ERR_MUNOFINISH;
			continue;
		}
		/* The mu_getlst call above uses insert_region to create the grlist, which ensures that duplicate regions mapping to
		 * the same db file correspond to only one grlist entry.
		 */
		assert(FALSE == reg->was_open);
		TP_CHANGE_REG(reg);	/* sets gv_cur_region, cs_addrs, cs_data */
		csa = cs_addrs;
		csd = cs_data;
		blk_size = csd->blk_size;	/* "blk_size" is used by the BLK_FINI macro */
		if (reg->read_only)
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg));
			status = ERR_MUNOFINISH;
			continue;
		}
		assert(GDSVCURR == GDSV6); /* so we trip this assert in case GDSVCURR changes without a change to this module */
		new_db_format = (upgrade ? GDSV6 : GDSV4);
		grab_crit(reg);
		curr_tn = csd->trans_hist.curr_tn;
		/* set the desired db format in the file header to the appropriate version, increment transaction number */
		status1 = desired_db_format_set(reg, new_db_format, reorg_command);
		assert(csa->now_crit);	/* desired_db_format_set() should not have released crit */
		first_reorg_in_this_db_fmt = TRUE;	/* with the current desired_db_format, this is the first reorg */
		if (SS_NORMAL != status1)
		{	/* "desired_db_format_set" would have printed appropriate error messages */
			if (ERR_MUNOACTION != status1)
			{	/* real error occurred while setting the db format. skip to next region */
				status = ERR_MUNOFINISH;
				rel_crit(reg);
				continue;
			}
			util_out_print("Region !AD : Desired DB Format remains at !AD after !AD", TRUE, REG_LEN_STR(reg),
				LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command));
			if (csd->reorg_db_fmt_start_tn == csd->desired_db_format_tn)
				first_reorg_in_this_db_fmt = FALSE;
		} else
			util_out_print("Region !AD : Desired DB Format set to !AD by !AD", TRUE, REG_LEN_STR(reg),
				LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command));
		assert(dba_bg == csd->acc_meth);
		/* Check blks_to_upgrd counter to see if upgrade/downgrade is complete */
		total_blks = csd->trans_hist.total_blks;
		free_blks = csd->trans_hist.free_blocks;
		actual_blks2upgrd = csd->blks_to_upgrd;
		/* If MUPIP REORG UPGRADE and there is no block to upgrade in the database as indicated by BOTH
		 * 	"csd->blks_to_upgrd" and "csd->fully_upgraded", then we can skip processing.
		 * If MUPIP REORG UPGRADE and all non-free blocks need to be upgraded then again we can skip processing.
		 */
		if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded)
			|| (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd)))
		{
			util_out_print("Region !AD : Blocks to Upgrade counter indicates no action needed for MUPIP REORG !AD",
				       TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : "
				       "Blocks to upgrade = [0x!XL]",
				       TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd);
			util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			rel_crit(reg);
			continue;
		}
		stop_blk = total_blks;
		if (stopblk_specified && stopblk_input <= stop_blk)
			stop_blk = stopblk_input;
		if (first_reorg_in_this_db_fmt)
		{	/* Note down reorg start tn (in case we are interrupted, future reorg will know to resume) */
			csd->reorg_db_fmt_start_tn = csd->desired_db_format_tn;
			csd->reorg_upgrd_dwngrd_restart_block = 0;
			start_blk = (startblk_specified ? startblk_input : 0);
		} else
		{	/* Either a concurrent MUPIP REORG of the same type ({up,down}grade) is currently running
			 * or a previously running REORG of the same type was interrupted (Ctrl-Ced).
			 * In either case resume processing from whatever restart block number is stored in fileheader
			 * the only exception is if "STARTBLK" was specified in the input in which use that unconditionally.
			 */
			start_blk = (startblk_specified ? startblk_input : csd->reorg_upgrd_dwngrd_restart_block);
		}
		if (start_blk > stop_blk)
			start_blk = stop_blk;
		mu_reorg_upgrd_dwngrd_start_tn = csd->reorg_db_fmt_start_tn;
		/* Before releasing crit, flush the file-header and dirty buffers in cache to disk. This is because we are now
		 * going to read each GDS block directly from disk to determine if it needs to be upgraded/downgraded or not.
		 */
		if (!wcs_flu(WCSFLU_FLUSH_HDR))	/* wcs_flu assumes gv_cur_region is set (which it is in this routine) */
		{
			rel_crit(reg);
			gtm_putmsg_csa(CSA_ARG(csa)
				VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg));
			status = ERR_MUNOFINISH;
			continue;
		}
		rel_crit(reg);
		/* Loop through entire database one GDS block at a time and upgrade/downgrade each of them */
		status1 = SS_NORMAL;
		start_bmp = ROUND_DOWN2(start_blk, BLKS_PER_LMAP);
		last_bmp  = ROUND_DOWN2(stop_blk - 1, BLKS_PER_LMAP);
		curblk = start_blk;	/* curblk is the block to be upgraded/downgraded */
		util_out_print("Region !AD : Started processing from block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
		if (NULL != bptr)
		{	/* malloc/free "bptr" for each region as GDS block-size can be different */
			free(bptr);
			bptr = NULL;
		}
		memset(&reorg_stats, 0, SIZEOF(reorg_stats));	/* initialize statistics for this region */
		for (curbmp = start_bmp; curbmp <= last_bmp; curbmp += BLKS_PER_LMAP)
		{
			if (mu_ctrly_occurred || mu_ctrlc_occurred)
			{
				status1 = ERR_MUNOFINISH;
				break;
			}
			/* --------------------------------------------------------------
			 *             Read in current bitmap block
			 * --------------------------------------------------------------
			 */
			assert(!csa->now_crit);
			bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* bring block into the cache outside of crit */
			reorg_stats.blks_read_from_disk_bmp++;
			grab_crit_encr_cycle_sync(reg); /* needed so t_qread does not return NULL below */
			if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn)
			{	/* csd->desired_db_format changed since reorg started. discontinue the reorg */
				/* see later comment on "csd->reorg_upgrd_dwngrd_restart_block" for why the assignment
				 * of this field should be done only if a db format change did not occur.
				 */
				rel_crit(reg);
				status1 = ERR_MUNOFINISH;
				/* This "start_tn" check is redone after the for-loop and an error message is printed there */
				break;
			} else if (reorg_entiredb)
			{	/* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */
				assert(csd->reorg_upgrd_dwngrd_restart_block <= MAX(start_blk, curbmp));
				csd->reorg_upgrd_dwngrd_restart_block = curbmp;	/* previous blocks have been upgraded/downgraded */
			}
			/* Check blks_to_upgrd counter to see if upgrade/downgrade is complete.
			 * Repeat check done a few steps earlier outside of this for loop.
			 */
			total_blks = csd->trans_hist.total_blks;
			free_blks = csd->trans_hist.free_blocks;
			actual_blks2upgrd = csd->blks_to_upgrd;
			if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded)
				|| (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd)))
			{
				rel_crit(reg);
				break;
			}
			bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* now that in crit, note down stable buffer */
			if (NULL == bml_sm_buff)
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			ondsk_blkver = cr->ondsk_blkver;	/* note down db fmt on disk for bitmap block */
			/* Take a copy of the shared memory bitmap buffer into process-private memory before releasing crit.
			 * We are interested in those blocks that are currently marked as USED in the bitmap.
			 * It is possible that once we release crit, concurrent updates change the bitmap state of those blocks.
			 * In that case, those updates will take care of doing the upgrade/downgrade of those blocks in the
			 * format currently set in csd->desired_db_format i.e. accomplishing MUPIP REORG UPGRADE/DOWNGRADE's job.
			 * If the desired_db_format changes concurrently, we will stop doing REORG UPGRADE/DOWNGRADE processing.
			 */
			if (NULL == bml_lcl_buff)
				bml_lcl_buff = malloc(BM_SIZE(BLKS_PER_LMAP));
			memcpy(bml_lcl_buff, (blk_hdr_ptr_t)bml_sm_buff, BM_SIZE(BLKS_PER_LMAP));
			if (FALSE == cert_blk(reg, curbmp, (blk_hdr_ptr_t)bml_lcl_buff, 0, FALSE))
			{	/* certify the block while holding crit as cert_blk uses fields from file-header (shared memory) */
				assert(FALSE);	/* in pro, skip ugprading/downgarding all blks in this unreliable local bitmap */
				rel_crit(reg);
				util_out_print("Region !AD : Bitmap Block [0x!XL] has integrity errors. Skipping this bitmap.",
					TRUE, REG_LEN_STR(reg), curbmp);
				status1 = ERR_MUNOFINISH;
				continue;
			}
			rel_crit(reg);
			/* ------------------------------------------------------------------------
			 *         Upgrade/Downgrade all BUSY blocks in the current bitmap
			 * ------------------------------------------------------------------------
			 */
			curblk = (curbmp == start_bmp) ? start_blk : curbmp;
			mapsize = (curbmp == last_bmp) ? (stop_blk - curbmp) : BLKS_PER_LMAP;
			assert(0 != mapsize);
			assert(mapsize <= BLKS_PER_LMAP);
			db_got_to_v5_once = csd->db_got_to_v5_once;
			for (lcnt = curblk - curbmp; lcnt < mapsize; lcnt++, curblk++)
			{
				if (mu_ctrly_occurred || mu_ctrlc_occurred)
				{
					status1 = ERR_MUNOFINISH;
					goto stop_reorg_on_this_reg;	/* goto needed because of nested FOR Loop */
				}
				GET_BM_STATUS(bml_lcl_buff, lcnt, bml_status);
				assert(BLK_MAPINVALID != bml_status); /* cert_blk ran clean so we dont expect invalid entries */
				if (BLK_FREE == bml_status)
				{
					reorg_stats.blks_skipped_free++;
					continue;
				}
				/* MUPIP REORG UPGRADE/DOWNGRADE will convert USED & RECYCLED blocks */
				if (db_got_to_v5_once || (BLK_RECYCLED != bml_status))
				{	/* Do NOT read recycled V4 block from disk unless it is guaranteed NOT to be too full */
					if (lcnt)
					{	/* non-bitmap block */
						/* read in block from disk into private buffer. dont pollute the cache yet */
						if (NULL == bptr)
							bptr = (sm_uc_ptr_t)malloc(blk_size);
						status1 = dsk_read(curblk, bptr, &ondsk_blkver, FALSE);
						/* dsk_read on curblk could return an error (DYNUPGRDFAIL) if curblk needs to be
						 * upgraded and if its block size was too big to allow the extra block-header space
						 * requirements for a dynamic upgrade. a MUPIP REORG DOWNGRADE should not error out
						 * in that case as the block is already in the downgraded format.
						 */
						if (SS_NORMAL != status1)
						{
							if (!upgrade && (ERR_DYNUPGRDFAIL == status1))
							{
								assert(GDSV4 == new_db_format);
								ondsk_blkver = new_db_format;
							} else
							{
								gtm_putmsg_csa(CSA_ARG(csa)
									VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status1);
								util_out_print("Region !AD : Error occurred while reading block "
									"[0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
								status1 = ERR_MUNOFINISH;
								goto stop_reorg_on_this_reg;/* goto needed due to nested FOR Loop */
							}
						}
						reorg_stats.blks_read_from_disk_nonbmp++;
					} /* else bitmap block has been read in crit earlier and ondsk_blkver appropriately set */
					if (new_db_format == ondsk_blkver)
					{
						assert((SS_NORMAL == status1) || (!upgrade && (ERR_DYNUPGRDFAIL == status1)));
						status1 = SS_NORMAL;	/* treat DYNUPGRDFAIL as no error in case of downgrade */
						reorg_stats.blks_skipped_newfmtindisk++;
						continue;	/* current disk version is identical to what is desired */
					}
					assert(SS_NORMAL == status1);
				}
				/* Begin non-TP transaction to upgrade/downgrade the block.
				 * The way we do that is by updating the block using a null update array.
				 * Any update to a block will trigger an automatic upgrade/downgrade of the block based on
				 * 	the current fileheader desired_db_format setting and we use that here.
				 */
				t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
				for (; ;)
				{
					CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
					curr_tn = csd->trans_hist.curr_tn;
					db_got_to_v5_once = csd->db_got_to_v5_once;
					if (db_got_to_v5_once || (BLK_RECYCLED != bml_status))
					{
						blkhist->cse = NULL;	/* start afresh (do not use value from previous retry) */
						blkBase = t_qread(curblk, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr);
						if (NULL == blkBase)
						{
							t_retry((enum cdb_sc)rdfail_detail);
							continue;
						}
						blkhist->blk_num = curblk;
						blkhist->buffaddr = blkBase;
						ondsk_blkver = blkhist->cr->ondsk_blkver;
						new_hdr = *(blk_hdr_ptr_t)blkBase;
						mu_reorg_upgrd_dwngrd_blktn = new_hdr.tn;
						mark_blk_free = FALSE;
						inctn_opcode = upgrade ? inctn_blkupgrd : inctn_blkdwngrd;
					} else
					{
						mark_blk_free = TRUE;
						inctn_opcode = inctn_blkmarkfree;
					}
					inctn_detail.blknum_struct.blknum = curblk;
					/* t_end assumes that the history it is passed does not contain a bitmap block.
					 * for bitmap block, the history validation information is passed through cse instead.
					 * therefore we need to handle bitmap and non-bitmap cases separately.
					 */
					if (!lcnt)
					{	/* Means a bitmap block.
						 * At this point we can do a "new_db_format != ondsk_blkver" check to determine
						 * if the block got converted since we did the dsk_read (see the non-bitmap case
						 * for a similar check done there), but in that case we will have a transaction
						 * which has read 1 bitmap block and is updating no block. "t_end" currently cannot
						 * handle this case as it expects any bitmap block that needs validation to also
						 * have a corresponding cse which will hold its history. Hence we avoid doing the
						 * new_db_format check. The only disadvantage of this is that we will end up
						 * modifying the bitmap block as part of this transaction (in an attempt to convert
						 * its ondsk_blkver) even though it is already in the right format. Since this
						 * overhead is going to be one per bitmap block and since the block is in the cache
						 * at this point, we should not lose much.
						 */
						assert(!mark_blk_free);
						BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id);
						*blkid_ptr = 0;
						t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0);
						assert(&alt_hist.h[0] == blkhist);
						alt_hist.h[0].blk_num = 0; /* create empty history for bitmap block */
						assert(update_trans);
					} else
					{	/* non-bitmap block. fill in history for validation in t_end */
						assert(curblk);	/* we should never come here for block 0 (bitmap) */
						if (!mark_blk_free)
						{
							assert(blkhist->blk_num == curblk);
							assert(blkhist->buffaddr == blkBase);
							blkhist->tn      = curr_tn;
							alt_hist.h[1].blk_num = 0;
						}
						/* Also need to pass the bitmap as history to detect if any concurrent M-kill
						 * is freeing up the same USED block that we are trying to convert OR if any
						 * concurrent M-set is reusing the same RECYCLED block that we are trying to
						 * convert. Because of t_end currently not being able to validate a bitmap
						 * without that simultaneously having a cse, we need to create a cse for the
						 * bitmap that is used only for bitmap history validation, but should not be
						 * used to update the contents of the bitmap block in bg_update.
						 */
						bmlhist.buffaddr = t_qread(curbmp, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
						if (NULL == bmlhist.buffaddr)
						{
							t_retry((enum cdb_sc)rdfail_detail);
							continue;
						}
						bmlhist.blk_num = curbmp;
						bmlhist.tn = curr_tn;
						GET_BM_STATUS(bmlhist.buffaddr, lcnt, bml_status);
						if (BLK_MAPINVALID == bml_status)
						{
							t_retry(cdb_sc_lostbmlcr);
							continue;
						}
						if (!mark_blk_free)
						{
							if ((new_db_format != ondsk_blkver) && (BLK_FREE != bml_status))
							{	/* block still needs to be converted. create cse */
								BLK_INIT(bs_ptr, bs1);
								BLK_SEG(bs_ptr, blkBase + SIZEOF(new_hdr),
									new_hdr.bsiz - SIZEOF(new_hdr));
								BLK_FINI(bs_ptr, bs1);
								t_write(blkhist, (unsigned char *)bs1, 0, 0,
									((blk_hdr_ptr_t)blkBase)->levl, FALSE,
									FALSE, GDS_WRITE_PLAIN);
								/* The directory tree status for now is only used to determine
								 * whether writing the block to snapshot file (see t_end_sysops.c).
 								 * For reorg upgrade/downgrade process, the block is updated in a
								 * sequential way without changing the gv_target. In this case, we
								 * assume the block is in directory tree so as to have it written to
								 * the snapshot file.
			 					 */
								BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state);
								/* reset update_trans in case previous retry had set it to 0 */
								update_trans = UPDTRNS_DB_UPDATED_MASK;
								if (BLK_RECYCLED == bml_status)
								{	/* If block that we are upgarding is RECYCLED, indicate to
									 * bg_update that blks_to_upgrd counter should NOT be
									 * touched in this case by setting "mode" to a special value
									 */
									assert(cw_set[cw_set_depth-1].mode == gds_t_write);
									cw_set[cw_set_depth-1].mode = gds_t_write_recycled;
									/* we SET block as NOT RECYCLED, otherwise, the mm_update()
									 * or bg_update_phase2 may skip writing it to snapshot file
									 * when its level is 0
									 */
									BIT_CLEAR_RECYCLED(cw_set[cw_set_depth-1].blk_prior_state);
								}
							} else
							{	/* Block got converted by another process since we did the dsk_read.
								 * 	or this block became marked free in the bitmap.
								 * No need to update this block. just call t_end for validation of
								 * 	both the non-bitmap block as well as the bitmap block.
								 * Note down that this transaction is no longer updating any blocks.
								 */
								update_trans = 0;
							}
							/* Need to put bit maps on the end of the cw set for concurrency checking.
							 * We want to simulate t_write_map, except we want to update "cw_map_depth"
							 * instead of "cw_set_depth". Hence the save and restore logic below.
							 * This part of the code is similar to the one in mu_swap_blk.c
							 */
							save_cw_set_depth = cw_set_depth;
							assert(!cw_map_depth);
							t_write_map(&bmlhist, NULL, curr_tn, 0); /* will increment cw_set_depth */
							cw_map_depth = cw_set_depth; /* set cw_map_depth to latest cw_set_depth */
							cw_set_depth = save_cw_set_depth;/* restore cw_set_depth */
							/* t_write_map simulation end */
						} else
						{
							if (BLK_RECYCLED != bml_status)
							{	/* Block was RECYCLED at beginning but no longer so. Retry */
								t_retry(cdb_sc_bmlmod);
								continue;
							}
							/* Mark recycled block as FREE in bitmap */
							assert(lcnt == (curblk - curbmp));
							assert(update_array_ptr == update_array);
							*((block_id *)update_array_ptr) = lcnt;
							update_array_ptr += SIZEOF(block_id);
							/* the following assumes SIZEOF(block_id) == SIZEOF(int) */
							assert(SIZEOF(block_id) == SIZEOF(int));
							*(int *)update_array_ptr = 0;
							t_write_map(&bmlhist, (unsigned char *)update_array, curr_tn, 0);
							update_trans = UPDTRNS_DB_UPDATED_MASK;
						}
					}
					assert(SIZEOF(lcl_update_trans) == SIZEOF(update_trans));
					lcl_update_trans = update_trans;	/* take a copy before t_end modifies it */
					if ((trans_num)0 != t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
					{	/* In case this is MM and t_end() remapped an extended database, reset csd */
						assert(csd == cs_data);
						if (!lcl_update_trans)
						{
							assert(lcnt);
							assert(!mark_blk_free);
							assert((new_db_format == ondsk_blkver) || (BLK_BUSY != bml_status));
							if (BLK_BUSY != bml_status)
								reorg_stats.blks_skipped_free++;
							else
								reorg_stats.blks_skipped_newfmtincache++;
						} else if (!lcnt)
							reorg_stats.blks_converted_bmp++;
						else
							reorg_stats.blks_converted_nonbmp++;
						break;
					}
					assert(csd == cs_data);
				}
			}
		}
	stop_reorg_on_this_reg:
		/* even though ctrl-c occurred, update file-header fields to store reorg's progress before exiting */
		grab_crit(reg);
		blocks_left = 0;
		assert(csd->trans_hist.total_blks >= csd->blks_to_upgrd);
		actual_blks2upgrd = csd->blks_to_upgrd;
		total_blks = csd->trans_hist.total_blks;
		free_blks = csd->trans_hist.free_blocks;
		/* Care should be taken not to set "csd->reorg_upgrd_dwngrd_restart_block" in case of a concurrent db fmt
		 * change. This is because let us say we are doing REORG UPGRADE. A concurrent REORG DOWNGRADE would
		 * have reset "csd->reorg_upgrd_dwngrd_restart_block" field to 0 and if that reorg is interrupted by a
		 * Ctrl-C (before this reorg came here) it would have updated "csd->reorg_upgrd_dwngrd_restart_block" to
		 * a non-zero value indicating how many blocks from 0 have been downgraded. We should not reset this
		 * field to "curblk" as it will be mis-interpreted as the number of blocks that have been DOWNgraded.
		 */
		set_fully_upgraded = FALSE;
		if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn)
		{	/* csd->desired_db_format changed since reorg started. discontinue the reorg */
			util_out_print("Region !AD : Desired DB Format changed during REORG. Stopping REORG.",
				TRUE, REG_LEN_STR(reg));
			status1 = ERR_MUNOFINISH;
		} else if (reorg_entiredb)
		{	/* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */
			assert(csd->reorg_upgrd_dwngrd_restart_block <= curblk);
			csd->reorg_upgrd_dwngrd_restart_block = curblk;	/* blocks lesser than this have been upgraded/downgraded */
			expected_blks2upgrd = upgrade ? 0 : (total_blks - free_blks);
			blocks_left = upgrade ? actual_blks2upgrd : (expected_blks2upgrd - actual_blks2upgrd);
			/* If this reorg command went through all blocks in the database, then it should have
			 * 	correctly concluded at this point whether the reorg is complete or not.
			 * If this reorg command started from where a previous incomplete reorg left
			 *	(i.e. first_reorg_in_this_db_fmt is FALSE), it cannot determine if the initial
			 *	GDS blocks that it skipped are completely {up,down}graded or not.
			 */
			assert((0 == blocks_left) || (SS_NORMAL != status1) || !first_reorg_in_this_db_fmt);
			/* If this is a MUPIP REORG UPGRADE that did go through every block in the database (indicated by
			 * "reorg_entiredb" && "first_reorg_in_this_db_fmt") and the current count of "blks_to_upgrd" is
			 * 0 in the file-header and the desired_db_format did not change since the start of the REORG,
			 * we can be sure that the entire database has been upgraded. Set "csd->fully_upgraded" to TRUE.
			 */
			if ((SS_NORMAL == status1) && first_reorg_in_this_db_fmt && upgrade && (0 == actual_blks2upgrd))
			{
				csd->fully_upgraded = TRUE;
				csd->db_got_to_v5_once = TRUE;
				set_fully_upgraded = TRUE;
			}
			/* flush all changes noted down in the file-header */
			if (!wcs_flu(WCSFLU_FLUSH_HDR))	/* wcs_flu assumes gv_cur_region is set (which it is in this routine) */
			{
				gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4,
					LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg));
				status = ERR_MUNOFINISH;
				rel_crit(reg);
				continue;
			}
		}
		curr_tn = csd->trans_hist.curr_tn;
		rel_crit(reg);
		util_out_print("Region !AD : Stopped processing at block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
		/* Print statistics */
		util_out_print("Region !AD : Statistics : Blocks Read From Disk (Bitmap)     : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_bmp);
		util_out_print("Region !AD : Statistics : Blocks Skipped (Free)              : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_free);
		util_out_print("Region !AD : Statistics : Blocks Read From Disk (Non-Bitmap) : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_nonbmp);
		util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in disk)   : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtindisk);
		util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in cache)  : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtincache);
		util_out_print("Region !AD : Statistics : Blocks Converted (Bitmap)          : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_bmp);
		util_out_print("Region !AD : Statistics : Blocks Converted (Non-Bitmap)      : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_nonbmp);
		if (reorg_entiredb && (SS_NORMAL == status1) && (0 != blocks_left))
		{	/* file-header counter does not match what reorg on the entire database expected to see */
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBBTUWRNG, 2, expected_blks2upgrd, actual_blks2upgrd);
			util_out_print("Region !AD : Run MUPIP INTEG (without FAST qualifier) to fix the counter",
				TRUE, REG_LEN_STR(reg));
			status1 = ERR_MUNOFINISH;
		} else
			util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : "
				       "Blocks to upgrade = [0x!XL]",
				       TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd);
		/* Issue success or failure message for this region */
		if (SS_NORMAL == status1)
		{	/* issue success only if REORG did not encounter any error in its processing */
			if (set_fully_upgraded)
				util_out_print("Region !AD : Database is now FULLY UPGRADED", TRUE, REG_LEN_STR(reg));
			util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUREUPDWNGRDEND, 5, REG_LEN_STR(reg),
										process_id, process_id, &curr_tn);
		} else
		{
			assert(ERR_MUNOFINISH == status1);
			assert((SS_NORMAL == status) || (ERR_MUNOFINISH == status));
			util_out_print("Region !AD : MUPIP REORG !AD incomplete. See above messages.!/",
					TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = status1;
		}
	}
	if (NULL != bptr)
		free(bptr);
	if (NULL != bml_lcl_buff)
		free(bml_lcl_buff);
	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REORGCTRLY);
		status = ERR_MUNOFINISH;
	}
	mupip_exit(status);
}
Ejemplo n.º 5
0
uint4 mur_process_intrpt_recov()
{
	jnl_ctl_list			*jctl, *last_jctl;
	reg_ctl_list			*rctl, *rctl_top;
	int				rename_fn_len, save_name_len, idx;
	char				prev_jnl_fn[MAX_FN_LEN + 1], rename_fn[MAX_FN_LEN + 1], save_name[MAX_FN_LEN + 1];
	jnl_create_info			jnl_info;
	uint4				status, status2;
	uint4				max_autoswitchlimit, max_jnl_alq, max_jnl_deq, freeblks;
	sgmnt_data_ptr_t		csd;
	jnl_private_control		*jpc;
	jnl_buffer_ptr_t		jbp;
	boolean_t			jfh_changed;
	jnl_record			*jnlrec;
	jnl_file_header			*jfh;
	jnl_tm_t			now;

	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		TP_CHANGE_REG(rctl->gd);
		csd = cs_data;	/* MM logic after wcs_flu call requires this to be set */
		assert(csd == rctl->csa->hdr);
		jctl = rctl->jctl_turn_around;
		max_jnl_alq = max_jnl_deq = max_autoswitchlimit = 0;
		for (last_jctl = NULL ; (NULL != jctl); last_jctl = jctl, jctl = jctl->next_gen)
		{
			jfh = jctl->jfh;
			if (max_autoswitchlimit < jfh->autoswitchlimit)
			{	/* Note that max_jnl_alq, max_jnl_deq are not the maximum journal allocation/extensions across
				 * generations, but rather the allocation/extension corresponding to the maximum autoswitchlimit.
				 */
				max_autoswitchlimit = jfh->autoswitchlimit;
				max_jnl_alq         = jfh->jnl_alq;
				max_jnl_deq         = jfh->jnl_deq;
			}
			/* Until now, "rctl->blks_to_upgrd_adjust" holds the number of V4 format newly created bitmap blocks
			 * seen in INCTN records in backward processing. It is possible that backward processing might have
			 * missed out on seeing those INCTN records which are part of virtually-truncated or completely-rolled-bak
			 * journal files. The journal file-header has a separate field "prev_recov_blks_to_upgrd_adjust" which
			 * maintains exactly this count. Therefore adjust the rctl counter accordingly.
			 */
			assert(!jfh->prev_recov_blks_to_upgrd_adjust || !jfh->recover_interrupted);
			assert(!jfh->prev_recov_blks_to_upgrd_adjust || jfh->prev_recov_end_of_data);
			rctl->blks_to_upgrd_adjust += jfh->prev_recov_blks_to_upgrd_adjust;
		}
		if (max_autoswitchlimit > last_jctl->jfh->autoswitchlimit)
		{
			csd->jnl_alq         = max_jnl_alq;
			csd->jnl_deq         = max_jnl_deq;
			csd->autoswitchlimit = max_autoswitchlimit;
		} else
		{
			assert(csd->jnl_alq         == last_jctl->jfh->jnl_alq);
			assert(csd->jnl_deq         == last_jctl->jfh->jnl_deq);
			assert(csd->autoswitchlimit == last_jctl->jfh->autoswitchlimit);
		}
		jctl = rctl->jctl_turn_around;
		/* Get a pointer to the turn around point EPOCH record */
		jnlrec = rctl->mur_desc->jnlrec;
		assert(JRT_EPOCH == jnlrec->prefix.jrec_type);
		assert(jctl->turn_around_time == jnlrec->prefix.time);
		assert(jctl->turn_around_seqno == jnlrec->jrec_epoch.jnl_seqno);
		assert(jctl->turn_around_tn == jnlrec->prefix.tn);
		assert(jctl->rec_offset == jctl->turn_around_offset);
		/* Reset file-header "blks_to_upgrd" counter to the turn around point epoch value. Adjust this to include
		 * the number of new V4 format bitmaps created by post-turnaround-point db file extensions.
		 * The adjustment value is maintained in rctl->blks_to_upgrd_adjust.
		 */
		csd->blks_to_upgrd = jnlrec->jrec_epoch.blks_to_upgrd;
		csd->blks_to_upgrd += rctl->blks_to_upgrd_adjust;
#		ifdef GTM_TRIGGER
		/* online rollback can potentially take the database to a point in the past where the triggers that were
		 * previously installed are no longer a part of the current database state and so any process that restarts
		 * AFTER online rollback completes SHOULD reload triggers and the only way to do that is by incrementing the
		 * db_trigger_cycle in the file header.
		 */
		if (jgbl.onlnrlbk && (0 < csd->db_trigger_cycle))
		{	/* check for non-zero db_trigger_cycle is to prevent other processes (continuing after online rollback)
			 * to establish implicit TP (on seeing the trigger cycle mismatch) when there are actually no triggers
			 * installed in the database (because there were none at the start of online rollback).
			 */
			csd->db_trigger_cycle++;
			if (0 == csd->db_trigger_cycle)
				csd->db_trigger_cycle = 1;	/* Don't allow cycle set to 0 which means uninitialized */
		}
#		endif
		assert((WBTEST_ALLOW_ARBITRARY_FULLY_UPGRADED == gtm_white_box_test_case_number) ||
			(FALSE == jctl->turn_around_fullyupgraded) || (TRUE == jctl->turn_around_fullyupgraded));
		/* Set csd->fully_upgraded to FALSE if:
		 * a) The turn around EPOCH had the fully_upgraded field set to FALSE
		 * OR
		 * b) If csd->blks_to_upgrd counter is non-zero. This field can be non-zero even if the turnaround EPOCH's
		 * fully_upgraded field is TRUE. This is possible if the database was downgraded to V4 (post turnaround EPOCH)
		 * format and database extensions happened causing new V4 format bitmap blocks to be written. The count of V4
		 * format bitmap blocks is maintained ONLY as part of INCTN records (with INCTN opcode SET_JNL_FILE_CLOSE_EXTEND)
		 * noted down in rctl->blks_to_upgrd_adjust counter as part of BACKWARD processing which are finally added to
		 * csd->blks_to_upgrd.
		 */
		if (!jctl->turn_around_fullyupgraded || csd->blks_to_upgrd)
			csd->fully_upgraded = FALSE;
		csd->trans_hist.early_tn = jctl->turn_around_tn;
		csd->trans_hist.curr_tn = csd->trans_hist.early_tn;	/* INCREMENT_CURR_TN macro not used but noted in comment
									 * to identify all places that set curr_tn */
		csd->jnl_eovtn = csd->trans_hist.curr_tn;
		csd->turn_around_point = TRUE;
		/* MUPIP REORG UPGRADE/DOWNGRADE stores its partially processed state in the database file header.
		 * It is difficult for recovery to restore those fields to a correct partial value.
		 * Hence reset the related fields as if the desired_db_format got set just ONE tn BEFORE the EPOCH record
		 * 	and that there was no more processing that happened.
		 * This might potentially mean some duplicate processing for MUPIP REORG UPGRADE/DOWNGRADE after the recovery.
		 * But that will only be the case as long as the database is in compatibility (mixed) mode (hopefully not long).
		 */
		if (csd->desired_db_format_tn >= jctl->turn_around_tn)
			csd->desired_db_format_tn = jctl->turn_around_tn - 1;
		if (csd->reorg_db_fmt_start_tn >= jctl->turn_around_tn)
			csd->reorg_db_fmt_start_tn = jctl->turn_around_tn - 1;
		if (csd->tn_upgrd_blks_0 > jctl->turn_around_tn)
			csd->tn_upgrd_blks_0 = (trans_num)-1;
		csd->reorg_upgrd_dwngrd_restart_block = 0;
		/* Compute current value of "free_blocks" based on the value of "free_blocks" at the turnaround point epoch
		 * record and the change in "total_blks" since that epoch to the present form of the database. Any difference
		 * in "total_blks" implies database file extensions happened since the turnaround point. A backward rollback
		 * undoes everything (including all updates) except file extensions (it does not truncate the file size).
		 * Therefore every block that was newly allocated as part of those file extensions should be considered FREE
		 * for the current calculations except for the local bitmap blocks which are BUSY the moment they are created.
		 */
		assert(jnlrec->jrec_epoch.total_blks <= csd->trans_hist.total_blks);
		csd->trans_hist.free_blocks = jnlrec->jrec_epoch.free_blocks
			+ (csd->trans_hist.total_blks - jnlrec->jrec_epoch.total_blks)
			- DIVIDE_ROUND_UP(csd->trans_hist.total_blks, BLKS_PER_LMAP)
			+ DIVIDE_ROUND_UP(jnlrec->jrec_epoch.total_blks, BLKS_PER_LMAP);
		assert(!csd->blks_to_upgrd || !csd->fully_upgraded);
		assert((freeblks = mur_blocks_free(rctl)) == csd->trans_hist.free_blocks);
		/* Update strm_reg_seqno[] in db file header to reflect the turn around point.
		 * Before updating "strm_reg_seqno", make sure value is saved into "save_strm_reg_seqno".
		 * This is relied upon by the function "mur_get_max_strm_reg_seqno" in case of interrupted rollback.
		 */
		for (idx = 0; idx < MAX_SUPPL_STRMS; idx++)
		{
			if (!csd->save_strm_reg_seqno[idx])
				csd->save_strm_reg_seqno[idx] = csd->strm_reg_seqno[idx];
			csd->strm_reg_seqno[idx] = jnlrec->jrec_epoch.strm_seqno[idx];
		}
		wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_FSYNC_DB);
		assert(cs_addrs->ti->curr_tn == jctl->turn_around_tn);
#		ifdef UNIX
		if (jgbl.onlnrlbk)
		{
			if (dba_bg == cs_addrs->hdr->acc_meth)
			{	/* dryclean the cache (basically reset the cycle fields in all teh cache records) so as to make
				 * GT.M processes that only does 'reads' to require crit and hence realize that online rollback
				 * is in progress
				 */
				bt_refresh(cs_addrs, FALSE); /* sets earliest bt TN to be the turn around TN */
			}
			db_csh_ref(cs_addrs, FALSE);
			assert(NULL != cs_addrs->jnl);
			jpc = cs_addrs->jnl;
			assert(NULL != jpc->jnl_buff);
			jbp = jpc->jnl_buff;
			/* Since Rollback simulates the journal record along with the timestamp at which the update was made, it
			 * sets jgbl.dont_reset_gbl_jrec_time to TRUE so that during forward processing t_end or tp_tend does not
			 * reset the gbl_jrec_time to reflect the current time. But, with Online Rollback, one can have the shared
			 * memory up and running and hence can have jbp->prev_jrec_time to be the time of the most recent journal
			 * update made. Later in t_end/tp_tend, ADJUST_GBL_JREC_TIME is invoked which ensures that if ever
			 * gbl_jrec_time (the time of the current update) is less than jbp->prev_jrec_time (time of the latest
			 * journal update), dont_reset_gbl_jrec_time better be FALSE. But, this assert will trip since Rollback
			 * sets the latter to TRUE. To fix this, set jbp->prev_jrec_time to the turn around time stamp. This way
			 * we are guaranteed that all the updates done in the forward processing will have a timestamp that is
			 * greater than the turn around timestamp
			 */
			SET_JNLBUFF_PREV_JREC_TIME(jbp, jctl->turn_around_time, DO_GBL_JREC_TIME_CHECK_FALSE);
		} else if (dba_bg == csd->acc_meth)
		{	/* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */
			SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1);
		}
#		else
		if (dba_bg == csd->acc_meth)
		{	/* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */
			SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1);
		}
#		endif
		csd->turn_around_point = FALSE;
		assert(OLDEST_HIST_TN(cs_addrs) == (cs_addrs->ti->curr_tn - 1));
		/* In case this is MM and wcs_flu() remapped an extended database, reset rctl->csd */
		assert((dba_mm == cs_data->acc_meth) || (rctl->csd == cs_data));
		rctl->csd = cs_data;
	}
	JNL_SHORT_TIME(now);
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		TP_CHANGE_REG_IF_NEEDED(rctl->gd);
		if (!rctl->jfh_recov_interrupted)
			jctl = rctl->jctl_turn_around;
		else
		{
			DEBUG_ONLY(
				for (jctl = rctl->jctl_turn_around; NULL != jctl->next_gen; jctl = jctl->next_gen)
					;
				/* check that latest gener file name does not match db header */
				assert((rctl->csd->jnl_file_len != jctl->jnl_fn_len)
					|| (0 != memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)));
			)
			jctl = rctl->jctl_alt_head;
		}
		assert(NULL != jctl);
		for ( ; NULL != jctl->next_gen; jctl = jctl->next_gen)
			;
		assert(rctl->csd->jnl_file_len == jctl->jnl_fn_len); 			       /* latest gener file name */
		assert(0 == memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)); /* should match db header */
		if (SS_NORMAL != (status = prepare_unique_name((char *)jctl->jnl_fn, jctl->jnl_fn_len, "", "",
								rename_fn, &rename_fn_len, now, &status2)))
			return status;
		jctl->jnl_fn_len = rename_fn_len;  /* change the name in memory to the proposed name */
		memcpy(jctl->jnl_fn, rename_fn, rename_fn_len + 1);
		/* Rename hasn't happened yet at the filesystem level. In case current recover command is interrupted,
		 * we need to update jfh->next_jnl_file_name before mur_forward(). Update jfh->next_jnl_file_name for
		 * all journal files from which PBLK records were applied. Create new journal files for forward play.
		 */
		assert(NULL != rctl->jctl_turn_around);
		jctl = rctl->jctl_turn_around; /* points to journal file which has current recover's turn around point */
		assert(0 != jctl->turn_around_offset);
		jfh = jctl->jfh;
		jfh->turn_around_offset = jctl->turn_around_offset;	/* save progress in file header for 	*/
		jfh->turn_around_time = jctl->turn_around_time;		/* possible re-issue of recover 	*/
		for (idx = 0; idx < MAX_SUPPL_STRMS; idx++)
			jfh->strm_end_seqno[idx] = csd->strm_reg_seqno[idx];
		jfh_changed = TRUE;
		/* We are about to update the journal file header of the turnaround-point journal file to store the
		 * non-zero jfh->turn_around_offset. Ensure corresponding database is considered updated.
		 * This is needed in case journal recovery/rollback terminates abnormally and we go to mur_close_files.
		 * We need to ensure csd->recov_interrupted does not get reset to FALSE even if this region did not have
		 * have any updates to the corresponding database file otherwise. (GTM-8394)
		 */
		rctl->db_updated = TRUE;
		for ( ; NULL != jctl; jctl = jctl->next_gen)
		{	/* setup the next_jnl links. note that in the case of interrupted recovery, next_jnl links
			 * would have been already set starting from the turn-around point journal file of the
			 * interrupted recovery but the new recovery MIGHT have taken us to a still previous
			 * generation journal file that needs its next_jnl link set. this is why we do the next_jnl
			 * link setup even in the case of interrupted recovery although in most cases it is unnecessary.
			 */
			jfh = jctl->jfh;
			if (NULL != jctl->next_gen)
			{
				jfh->next_jnl_file_name_length = jctl->next_gen->jnl_fn_len;
				memcpy(jfh->next_jnl_file_name, jctl->next_gen->jnl_fn, jctl->next_gen->jnl_fn_len);
				jfh_changed = TRUE;
			} else
				assert(0 == jfh->next_jnl_file_name_length); /* null link from latest generation */
			if (jfh->turn_around_offset && (jctl != rctl->jctl_turn_around))
			{	/* It is possible that the current recovery has a turn-around-point much before the
				 * previously interrupted recovery. If it happens to be a previous generation journal
				 * file then we have to reset the original turn-around-point to be zero in the journal
				 * file header in order to ensure if this recovery gets interrupted we do interrupted
				 * recovery processing until the new turn-around-point instead of stopping incorrectly
				 * at the original turn-around-point itself. Note that there could be more than one
				 * journal file with a non-zero turn_around_offset (depending on how many previous
				 * recoveries got interrupted in this loop) that need to be reset.
				 */
				assert(!jctl->turn_around_offset);
				assert(rctl->recov_interrupted || rctl->jctl_apply_pblk); /* rctl->jfh_recov_interrupted can fail */
				jfh->turn_around_offset = 0;
				jfh->turn_around_time = 0;
				jfh_changed = TRUE;
			}
			if (jfh_changed)
			{
				/* Since overwriting the journal file header (an already allocated block
				 * in the file) should not cause ENOSPC, we dont take the trouble of
				 * passing csa or jnl_fn (first two parameters). Instead we pass NULL.
				 */
				JNL_DO_FILE_WRITE(NULL, NULL, jctl->channel, 0, jfh,
					REAL_JNL_HDR_LEN, jctl->status, jctl->status2);
				if (SS_NORMAL != jctl->status)
				{
					assert(FALSE);
					if (SS_NORMAL == jctl->status2)
						gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(5) ERR_JNLWRERR, 2, jctl->jnl_fn_len,
							jctl->jnl_fn, jctl->status);
					else
						gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT1(6) ERR_JNLWRERR, 2, jctl->jnl_fn_len,
							jctl->jnl_fn, jctl->status, PUT_SYS_ERRNO(jctl->status2));
					return jctl->status;
				}
				GTM_JNL_FSYNC(rctl->csa, jctl->channel, jctl->status);
				if (-1 == jctl->status)
				{
					jctl->status2 = errno;
					assert(FALSE);
					gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(9) ERR_JNLFSYNCERR, 2,
						jctl->jnl_fn_len, jctl->jnl_fn,
						ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), jctl->status2);
					return ERR_JNLFSYNCERR;
				}
			}
			jfh_changed = FALSE;
		}
		memset(&jnl_info, 0, SIZEOF(jnl_info));
		jnl_info.status = jnl_info.status2 = SS_NORMAL;
		jnl_info.prev_jnl = &prev_jnl_fn[0];
		set_jnl_info(rctl->gd, &jnl_info);
		jnl_info.prev_jnl_len = rctl->jctl_turn_around->jnl_fn_len;
		memcpy(jnl_info.prev_jnl, rctl->jctl_turn_around->jnl_fn, rctl->jctl_turn_around->jnl_fn_len);
		jnl_info.prev_jnl[jnl_info.prev_jnl_len] = 0;
		jnl_info.jnl_len = rctl->csd->jnl_file_len;
		memcpy(jnl_info.jnl, rctl->csd->jnl_file_name, jnl_info.jnl_len);
		jnl_info.jnl[jnl_info.jnl_len] = 0;
		assert(!mur_options.rollback || jgbl.mur_rollback);
		jnl_info.reg_seqno = rctl->jctl_turn_around->turn_around_seqno;
		jgbl.gbl_jrec_time = rctl->jctl_turn_around->turn_around_time;	/* time needed for cre_jnl_file_common() */
		if (EXIT_NRM != cre_jnl_file_common(&jnl_info, rename_fn, rename_fn_len))
		{
			gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_JNLNOCREATE, 2, jnl_info.jnl_len, jnl_info.jnl);
			return jnl_info.status;
		}
#		ifdef UNIX
		if (jgbl.onlnrlbk)
		{
			cs_addrs = rctl->csa;
			/* Mimic what jnl_file_close in case of cleanly a closed journal file */
			jpc = cs_addrs->jnl; /* the previous loop makes sure cs_addrs->jnl->jnl_buff is valid*/
			NULLIFY_JNL_FILE_ID(cs_addrs);
			jpc->jnl_buff->cycle++; /* so that, all other processes knows to switch to newer journal file */
			jpc->cycle--; /* decrement cycle so jnl_ensure_open() knows to reopen the journal */
		}
#		endif
		if (NULL != rctl->jctl_alt_head) /* remove the journal files created by last interrupted recover process */
		{
			mur_rem_jctls(rctl);
			rctl->jctl_alt_head = NULL;
		}
		/* From this point on, journal records are written into the newly created journal file. However, we still read
		 * from old journal files.
		 */
	}