/* Waits for a concurrently running write (of a global buffer to disk) to complete.
 *
 * Returns TRUE if write completes within timeout of approx. 1 minute.
 * Returns FALSE otherwise.
 */
boolean_t	wcs_write_in_progress_wait(node_local_ptr_t cnl, cache_rec_ptr_t cr, wbtest_code_t wbox_test_code)
{
	uint4	lcnt;
	int4	n;


	for (lcnt = 1; ; lcnt++)
	{	/* the design here is that either this process owns the block, or the writer does.
		 * if the writer does, it must be allowed to finish its write; then it will release the block
		 * and the next LOCK will establish ownership
		 */
		LOCK_BUFF_FOR_UPDATE(cr, n, &cnl->db_latch);
		/* This destroys evidence of writer ownership, but this is really a test that
		 * there was no prior owner. It will only be true if the writer has cleared it.
		 */
		if (OWN_BUFF(n))
			break;
		else
		{
			GTM_WHITE_BOX_TEST(wbox_test_code, lcnt, (2 * BUF_OWNER_STUCK));
			/* We have noticed the below assert to fail occasionally on some platforms
			 * We suspect it is because of waiting for another writer that is in jnl_fsync
			 * (as part of flushing a global buffer) which takes more than a minute to finish.
			 * To avoid false failures (where the other writer finishes its job in a little over
			 * a minute) we wait for twice the time in the debug version.
			 */
DEBUG_ONLY(
			if ((BUF_OWNER_STUCK == lcnt) && cr->epid)
				GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
	  )
			if (BUF_OWNER_STUCK DEBUG_ONLY( * 2) < lcnt)
			{	/* sick of waiting */
				if (0 == cr->dirty)
				{	/* someone dropped something; assume it was the writer and go on */
					LOCK_NEW_BUFF_FOR_UPDATE(cr);
					break;
				} else
				{
					if (cr->epid)
					{
#ifdef DEBUG
						GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, TWICE);
						send_msg(VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id, TWICE, \
							cr->epid, cr->blk, DB_LEN_STR(gv_cur_region));
#else
						GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
						send_msg(VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id, ONCE, \
							cr->epid, cr->blk, DB_LEN_STR(gv_cur_region));
#endif
					}
					return FALSE;
				}
			}
			if (WRITER_STILL_OWNS_BUFF(cr, n))
				wcs_sleep(lcnt);
		}
	}	/* end of for loop to control buffer */
示例#2
0
int4	add_inter(int val, sm_int_ptr_t addr, sm_global_latch_ptr_t latch)
{
	int4			cntrval, newcntrval, spins, maxspins, retries;
	boolean_t		cswpsuccess;
	sm_int_ptr_t volatile	cntrval_p;

	++fast_lock_count;
	maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
	cntrval_p = addr;	/* Need volatile context especially on Itanium */
        for (retries = LOCK_TRIES - 1; 0 < retries; retries--)  /* - 1 so do rel_quant 3 times first */
        {	/* seems like a legitinate spin which could take advantage of transactional memory */
		for (spins = maxspins; 0 < spins; spins--)
		{
			cntrval = *cntrval_p;
			newcntrval = cntrval + val;
			/* This is (currently as of 08/2007) the only non-locking usage of compswap in GT.M. We
			   are not passing compswap an actual sm_global_latch_ptr_t addr like its function would
			   normally dictate. However, since the address of the field we want to deal with is the
			   first int in the global_latch_t, we just pass our int address properly cast to the
			   type that compswap is expecting. The assert below verifies that this assumption has
			   not changed (SE 08/2007)
			*/
			assert(0 == OFFSETOF(global_latch_t, u.parts.latch_pid));
			IA64_ONLY(cswpsuccess = compswap_unlock(RECAST(sm_global_latch_ptr_t)cntrval_p, cntrval, newcntrval));
			NON_IA64_ONLY(cswpsuccess = compswap((sm_global_latch_ptr_t)cntrval_p, cntrval, newcntrval));
			if (cswpsuccess)
			{
				--fast_lock_count;
				assert(0 <= fast_lock_count);
				return newcntrval;
			}
		}
		if (retries & 0x3)
			/* On all but every 4th pass, do a simple rel_quant */
			rel_quant();	/* Release processor to holder of lock (hopefully) */
		else
		{
			/* On every 4th pass, we bide for awhile */
			wcs_sleep(LOCK_SLEEP);
			assert(0 == (LOCK_TRIES % 4)); /* assures there are 3 rel_quants prior to first wcs_sleep() */
		}
	}
	--fast_lock_count;
	assert(FALSE);
	rts_error_csa(CSA_ARG(NULL) VARLSTCNT(9) ERR_DBCCERR, 2, LEN_AND_LIT("*unknown*"), ERR_ERRCALL, 3, CALLFROM);
	return 0; /* To keep the compiler quiet */
}
示例#3
0
/* Waits for a concurrently running read (from disk into a global buffer) to complete.
 *
 * Returns TRUE if read completes within timeout of approx. 1 minute.
 * Returns FALSE otherwise.
 *
 * Similar logic is also present in t_qread and wcs_recover but they are different enough that
 * they have not been folded into this routine yet.
 */
boolean_t	wcs_read_in_progress_wait(cache_rec_ptr_t cr, wbtest_code_t wbox_test_code)
{
	uint4	lcnt, r_epid;
	int4	n;

	for (lcnt = 1; -1 != cr->read_in_progress; lcnt++)
	{
		if (-1 > cr->read_in_progress)
		{	/* outside of design; clear to known state */
			INTERLOCK_INIT(cr);
			assert(0 == cr->r_epid);
			cr->r_epid = 0;
			break;
		}
		wcs_sleep(lcnt);
		GTM_WHITE_BOX_TEST(wbox_test_code, lcnt, (2 * BUF_OWNER_STUCK));
		if (BUF_OWNER_STUCK < lcnt)
		{	/* sick of waiting */
			/* Since cr->r_epid can be changing concurrently, take a local copy before using it below,
			 * particularly before calling is_proc_alive as we dont want to call it with a 0 r_epid.
			 */
			r_epid = cr->r_epid;
			if (0 != r_epid)
			{
				if (FALSE == is_proc_alive(r_epid, cr->image_count))
				{	/* process gone; release its lock */
					RELEASE_BUFF_READ_LOCK(cr);
				} else
				{
					assert(gtm_white_box_test_case_enabled);
					return FALSE;
				}
			} else
			{	/* process stopped before could set r_epid */
				RELEASE_BUFF_READ_LOCK(cr);
				if (-1 > cr->read_in_progress)
				{	/* process released since if (cr->r_epid); rectify semaphore  */
					LOCK_BUFF_FOR_READ(cr, n);
				}
			}
		}	/* sick of waiting */
	}
	return TRUE;
}
示例#4
0
static void exec_read(BFILE *bf, char *buf, int nbytes)
{
	int	needed, got;
	int4	status;
	char	*curr;
	pid_t	waitpid_res;

	assert(nbytes > 0);
	needed = nbytes;
	curr = buf;
#ifdef DEBUG_ONLINE
	PRINTF("file descriptor is %d and bytes needed is %d\n", bf->fd, needed);
#endif
	while(0 != (got = read(bf->fd, curr, needed)))
	{
		if (got == needed)
			break;
		else if (got > 0)
		{
			needed -= got;
			curr += got;
		}
		/* the check for EINTR below is valid and should not be converted to an EINTR
		 * wrapper macro, for an immediate retry is not attempted. Instead, wcs_sleep
		 * is called.
		 */
		else if ((EINTR != errno) && (EAGAIN != errno))
		{
			gtm_putmsg(VARLSTCNT(1) errno);
			if ((pipe_child > 0) && (FALSE != is_proc_alive(pipe_child, 0)))
				WAITPID(pipe_child, (int *)&status, 0, waitpid_res);
			close(bf->fd);
			restore_read_errno = errno;
			break;
		}
		wcs_sleep(100);
	}
	return;
}
示例#5
0
cache_rec_ptr_t	db_csh_getn(block_id block)
{
	cache_rec_ptr_t		hdr, q0, start_cr, cr;
	bt_rec_ptr_t		bt;
	unsigned int		lcnt, ocnt;
	int			rip, max_ent, pass1, pass2, pass3;
	int4			flsh_trigger;
	uint4			r_epid, dummy;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	srch_blk_status		*tp_srch_status;

	error_def(ERR_BUFRDTIMEOUT);
	error_def(ERR_INVALIDRIP);

	csa = cs_addrs;
	csd = csa->hdr;
	assert(csa->now_crit);
	assert(csa == &FILE_INFO(gv_cur_region)->s_addrs);
	max_ent = csd->n_bts;
	cr = (cache_rec_ptr_t)GDS_REL2ABS(csa->nl->cur_lru_cache_rec_off);
	hdr = csa->acc_meth.bg.cache_state->cache_array + (block % csd->bt_buckets);
	start_cr = csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets;
	pass1 = max_ent;	/* skip referred or dirty or read-into cache records */
	pass2 = 2 * max_ent;	/* skip referred cache records */
	pass3 = 3 * max_ent;	/* skip nothing */
	INCR_DB_CSH_COUNTER(csa, n_db_csh_getns, 1);
	for (lcnt = 0;  ; lcnt++)
	{
		if (lcnt > pass3)
		{
			BG_TRACE_PRO(wc_blocked_db_csh_getn_loopexceed);
			assert(FALSE);
			break;
		}
		cr++;
		if (cr == start_cr + max_ent)
			cr = start_cr;
		VMS_ONLY(
			if ((lcnt == pass1) || (lcnt == pass2))
				wcs_wtfini(gv_cur_region);
		)
		if (TRUE == cr->refer && lcnt < pass2)
		{	/* in passes 1 & 2, set refer to FALSE and skip; in the third pass attempt reuse even if TRUE == refer */
			cr->refer = FALSE;
			continue;
		}
		if (TRUE == cr->in_cw_set)
		{	/* this process already owns it - skip it */
			cr->refer = TRUE;
			continue;
		}
		if (CDB_STAGNATE <= t_tries || mu_reorg_process)
		{
			/* Prevent stepping on self when crit for entire transaction.
			 * This is done by looking up in sgm_info_ptr->blk_in_use and cw_stagnate for presence of the block.
			 * The following two hashtable lookups are not similar, since in TP, sgm_info_ptr->blks_in_use
			 * 	is updated to the latest cw_stagnate list of blocks only in tp_hist().
			 * Also note that the lookup in sgm_info_ptr->blks_in_use reuses blocks that don't have cse's.
			 * This is to allow big-read TP transactions which may use up more than the available global buffers.
			 * There is one issue here in that a block that has been only read till now may be stepped upon here
			 *	but may later be needed for update. It is handled by updating the block's corresponding
			 *	entry in the set of histories (sgm_info_ptr->first_tp_hist[index] structure) to hold the
			 *	"cr" and "cycle" of the t_qread done for the block when it was intended to be changed for the
			 *	first time within the transaction since otherwise the transaction would restart due to a
			 *	cdb_sc_lostcr status. Note that "tn" (read_tn of the block) in the first_tp_hist will still
			 *	remain the "tn" when the block was first read within this transaction to ensure the block
			 *	hasn't been modified since the start of the transaction. Once we intend on changing the
			 *	block i.e. srch_blk_status->ptr is non-NULL, we ensure in the code below not to step on it.
			 *	[tp_hist() is the routine that updates the "cr", "cycle" and "tn" of the block].
			 * Note that usually in a transaction the first_tp_hist[] structure holds the "cr", "cycle", and "tn"
			 *	of the first t_qread of the block within that transaction. The above is the only exception.
			 * Also note that for blocks in cw_stagnate (i.e. current TP mini-action), we don't reuse any of
			 *	them even if they don't have a cse. This is to ensure that the current action doesn't
			 *	encounter a restart due to cdb_sc_lostcr in tp_hist() even in the fourth-retry.
			 */
			if (dollar_tlevel
				&& (tp_srch_status =
					(srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)cr->blk, &dummy))
				&& tp_srch_status->ptr)
			{	/* this process is already using the block - skip it */
				cr->refer = TRUE;
				continue;
			}
			if (NULL != lookup_hashtab_ent(cw_stagnate, (void *)cr->blk, &dummy))
			{
				cr->refer = TRUE;
				continue;
			}
		}
		if (cr->dirty)
		{	/* Note that in Unix, it is possible that we see a stale value of cr->dirty (possible if a
			 * concurrent wcs_wtstart() has reset dirty to 0 but that update did not reach us yet). In this
			 * case the call to wcs_get_space() below will do the necessary memory barrier instructions
			 * (through calls to aswp()) which will allow us to see the non-stale value of cr->dirty.
			 *
			 * It is also possible that cr->dirty is non-zero but < cr->flushed_dirty_tn. In this case, wcs_get_space
			 * done below will return FALSE forcing a cache-rebuild which will fix this situation.
			 *
			 * In VMS, another process cannot be concurrently resetting cr->dirty to 0 as the resetting routine
			 * is wcs_wtfini() which is executed in crit which another process cannot be in as we are in crit now.
			 */
			if (gv_cur_region->read_only)
				continue;
			if (lcnt < pass1)
			{
				if (!csa->timer && (csa->nl->wcs_timers < 1))
					wcs_timer_start(gv_cur_region, FALSE);
				continue;
			}
			BG_TRACE_PRO(db_csh_getn_flush_dirty);
			if (FALSE == wcs_get_space(gv_cur_region, 0, cr))
			{	/* failed to flush it out - force a rebuild */
				BG_TRACE_PRO(wc_blocked_db_csh_getn_wcsstarvewrt);
				assert(FALSE);
				break;
			}
			assert(0 == cr->dirty);
		}
		UNIX_ONLY(
			/* the cache-record is not free for reuse until the write-latch value becomes LATCH_CLEAR.
			 * In VMS, resetting the write-latch value occurs in wcs_wtfini() which is in CRIT, we are fine.
			 * In Unix, this resetting is done by wcs_wtstart() which is out-of-crit. Therefore, we need to
			 * 	wait for this value to be LATCH_CLEAR before reusing this cache-record.
			 * Note that we are examining the write-latch-value without holding the interlock. It is ok to do
			 * 	this because the only two routines that modify the latch value are bg_update() and
			 * 	wcs_wtstart(). The former cannot be concurrently executing because we are in crit.
			 * 	The latter will not update the latch value unless this cache-record is dirty. But in this
			 * 	case we would have most likely gone through the if (cr->dirty) check above. Most likely
			 * 	because there is one rare possibility where a concurrent wcs_wtstart() has set cr->dirty
			 * 	to 0 but not yet cleared the latch. In that case we wait for the latch to be cleared.
			 * 	In all other cases, nobody is modifying the latch since when we got crit and therefore
			 * 	it is safe to observe the value of the latch without holding the interlock.
			 */
			if (LATCH_CLEAR != WRITE_LATCH_VAL(cr))
			{	/* possible if a concurrent wcs_wtstart() has set cr->dirty to 0 but not yet
				 * cleared the latch. this should be very rare though.
				 */
				if (lcnt < pass2)
					continue; /* try to find some other cache-record to reuse until the 3rd pass */
				for (ocnt = 1; (MAXWRTLATCHWAIT >= ocnt) && (LATCH_CLEAR != WRITE_LATCH_VAL(cr)); ocnt++)
					wcs_sleep(SLEEP_WRTLATCHWAIT);	/* since it is a short lock, sleep the minimum */
				if (MAXWRTLATCHWAIT <= ocnt)
				{
					BG_TRACE_PRO(db_csh_getn_wrt_latch_stuck);
					assert(FALSE);
					continue;
				}
			}
		)
示例#6
0
unsigned char mu_cre_file(void)
{
	unsigned char		*inadr[2], *c, exit_stat;
	enum db_acc_method	temp_acc_meth;
	uint4			lcnt, retadr[2];
	int4			blk_init_size, initial_alq, free_blocks;
	gtm_uint64_t		free_blocks_ll, blocks_for_extension;
	char			buff[GLO_NAME_MAXLEN], fn_buff[MAX_FN_LEN];
	unsigned int		status;
	int			free_space;
	struct FAB		*fcb;
	struct NAM		nam;
	gds_file_id		new_id;
	io_status_block_disk	iosb;
	char			node[16];
	short			len;
	struct {
		short	blen;
		short	code;
		char	*buf;
		short	*len;
		int4	terminator;
	} item = {15, SYI$_NODENAME, &node, &len, 0};
	$DESCRIPTOR(desc, buff);

	exit_stat = EXIT_NRM;
/* The following calculations should duplicate the BT_SIZE macro from GDSBT and the LOCK_BLOCK macro from GDSFHEAD.H,
 * but without using a sgmnt_data which is not yet set up at this point
 */

#ifdef GT_CX_DEF
	/* This section needs serious chnages for the fileheader changes in V5 if it is ever resurrected */
	over_head = DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT
			+ (WC_MAX_BUFFS + getprime(WC_MAX_BUFFS) + 1) * SIZEOF(bt_rec), DISK_BLOCK_SIZE);
	if (gv_cur_region->dyn.addr->acc_meth == dba_bg)
	{
		free_space = over_head - DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT
			+ (gv_cur_region->dyn.addr->global_buffers + getprime(gv_cur_region->dyn.addr->global_buffers) + 1)
				* SIZEOF(bt_rec), DISK_BLOCK_SIZE);
		over_head += gv_cur_region->dyn.addr->lock_space ? gv_cur_region->dyn.addr->lock_space
								 : DEF_LOCK_SIZE / OS_PAGELET_SIZE;
	} else if (gv_cur_region->dyn.addr->acc_meth == dba_mm)
	{
		free_space = over_head - DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT, DISK_BLOCK_SIZE);
		if (gv_cur_region->dyn.addr->lock_space)
		{
			over_head += gv_cur_region->dyn.addr->lock_space;
			free_space += gv_cur_region->dyn.addr->lock_space;
		} else
		{
			over_head += DEF_LOCK_SIZE / OS_PAGELET_SIZE;
			free_space += DEF_LOCK_SIZE / OS_PAGELET_SIZE;
		}
	}
	free_space *= DISK_BLOCK_SIZE;
#else
	assert(START_VBN_CURRENT > DIVIDE_ROUND_UP(SIZEOF_FILE_HDR_DFLT, DISK_BLOCK_SIZE));
	free_space = ((START_VBN_CURRENT - 1) * DISK_BLOCK_SIZE) - SIZEOF_FILE_HDR_DFLT;
#endif
	switch (gv_cur_region->dyn.addr->acc_meth)
	{
		case dba_bg:
		case dba_mm:
			mu_cre_vms_structs(gv_cur_region);
			fcb = ((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->fab;
			cs_addrs = &((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->s_addrs;

			fcb->fab$b_shr &= FAB$M_NIL;	/* No access to this file while it is created */
			fcb->fab$l_nam = &nam;
			nam = cc$rms_nam;
			/* There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
			 * and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
			 * manner as every non-bitmap block must have an associated bitmap)
			*/
			fcb->fab$l_alq += DIVIDE_ROUND_UP(fcb->fab$l_alq, BLKS_PER_LMAP - 1);	/* Bitmaps */
			blk_init_size = fcb->fab$l_alq;
			fcb->fab$l_alq *= BLK_SIZE / DISK_BLOCK_SIZE;
			fcb->fab$l_alq += START_VBN_CURRENT - 1;
			initial_alq = fcb->fab$l_alq;
			fcb->fab$w_mrs = 512;				/* no longer a relevent field to us */
			break;
		case dba_usr:
			util_out_print("Database file for region !AD not created; access method is not GDS.", TRUE,
				REG_LEN_STR(gv_cur_region));
			return EXIT_WRN;
		default:
			gtm_putmsg(VARLSTCNT(1) ERR_BADACCMTHD);
			return EXIT_ERR;
	}
	nam.nam$b_ess = SIZEOF(fn_buff);
	nam.nam$l_esa = fn_buff;
	nam.nam$b_nop |= NAM$M_SYNCHK;
	status = sys$parse(fcb, 0, 0);
	if (RMS$_NORMAL != status)
	{
		gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0);
		return EXIT_ERR;
	}
	if (nam.nam$b_node != 0)
	{
		status = sys$getsyiw(EFN$C_ENF, 0, 0, &item, &iosb, 0, 0);
		if (SS$_NORMAL == status)
			status = iosb.cond;
		if (SS$_NORMAL == status)
		{
			if (len == nam.nam$b_node-2 && !memcmp(nam.nam$l_esa, node, len))
			{
				fcb->fab$l_fna = nam.nam$l_esa + nam.nam$b_node;
				fcb->fab$b_fns = nam.nam$b_esl - nam.nam$b_node;
			}
		} else
		{
			util_out_print("Could not get node for !AD.", TRUE, REG_LEN_STR(gv_cur_region));
			exit_stat = EXIT_WRN;
		}
	}
	assert(gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm);
	nam.nam$l_esa = NULL;
	nam.nam$b_esl = 0;
	status = sys$create(fcb);
	if (status != RMS$_CREATED && status != RMS$_FILEPURGED)
	{
		switch(status)
		{
			case RMS$_FLK:
		 		util_out_print("Database file for region !AD not created; currently locked by another user.", TRUE,
					REG_LEN_STR(gv_cur_region));
				exit_stat = EXIT_INF;
				break;
			case RMS$_NORMAL:
		 		util_out_print("Database file for region !AD not created; already exists.", TRUE,
					REG_LEN_STR(gv_cur_region));
				exit_stat = EXIT_INF;
				break;
			case RMS$_SUPPORT:
				util_out_print("Database file for region !AD not created; cannot create across network.", TRUE,
					REG_LEN_STR(gv_cur_region));
				exit_stat = EXIT_WRN;
				break;
			case RMS$_FUL:
				send_msg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna,
					status, 0, fcb->fab$l_stv, 0);
				/* intentionally falling through */
			default:
				gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna,
					status, 0, fcb->fab$l_stv, 0);
				exit_stat = EXIT_ERR;
		}
		sys$dassgn(fcb->fab$l_stv);
		return exit_stat;
	}

	memcpy(new_id.dvi, nam.nam$t_dvi, SIZEOF(nam.nam$t_dvi));
	memcpy(new_id.did, nam.nam$w_did, SIZEOF(nam.nam$w_did));
	memcpy(new_id.fid, nam.nam$w_fid, SIZEOF(nam.nam$w_fid));
	global_name("GT$S", &new_id, buff);		/* 2nd parm is actually a gds_file_id * in global_name */
	desc.dsc$w_length = buff[0];			/* By definition, a gds_file_id is dvi,fid,did from nam */
	desc.dsc$a_pointer = &buff[1];
	cs_addrs->db_addrs[0] = cs_addrs->db_addrs[1] = inadr[0] = inadr[1] = inadr;	/* used to determine p0 or p1 allocation */
	status = init_sec(cs_addrs->db_addrs, &desc, fcb->fab$l_stv, (START_VBN_CURRENT - 1),
			  SEC$M_DZRO|SEC$M_GBL|SEC$M_WRT|SEC$M_EXPREG);
	if ((SS$_CREATED != status) && (SS$_NORMAL != status))
	{
		gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0);
		sys$dassgn(fcb->fab$l_stv);
		return EXIT_ERR;
	}
	cs_data = (sgmnt_data *)cs_addrs->db_addrs[0];
	memset(cs_data, 0, SIZEOF_FILE_HDR_DFLT);
	cs_data->createinprogress = TRUE;
	cs_data->trans_hist.total_blks = (initial_alq - (START_VBN_CURRENT - 1)) / (BLK_SIZE / DISK_BLOCK_SIZE);
	/* assert that total_blks stored in file-header = non-bitmap blocks (initial allocation) + bitmap blocks */
	assert(cs_data->trans_hist.total_blks == gv_cur_region->dyn.addr->allocation +
				DIVIDE_ROUND_UP(gv_cur_region->dyn.addr->allocation, BLKS_PER_LMAP - 1));
	cs_data->start_vbn = START_VBN_CURRENT;
	temp_acc_meth = gv_cur_region->dyn.addr->acc_meth;
	cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth = dba_bg;
	cs_data->extension_size = gv_cur_region->dyn.addr->ext_blk_count;
	mucregini(blk_init_size);
	cs_addrs->hdr->free_space = free_space;
#ifndef GT_CX_DEF
	cs_addrs->hdr->unbacked_cache = TRUE;
#endif
	cs_data->acc_meth = gv_cur_region->dyn.addr->acc_meth = temp_acc_meth;
	cs_data->createinprogress = FALSE;
	if (SS$_NORMAL == (status = disk_block_available(fcb->fab$l_stv, &free_blocks)))
	{
		blocks_for_extension = (cs_data->blk_size / DISK_BLOCK_SIZE *
				  (DIVIDE_ROUND_UP(EXTEND_WARNING_FACTOR * (gtm_uint64_t)cs_data->extension_size, BLKS_PER_LMAP - 1)
					 + EXTEND_WARNING_FACTOR * (gtm_uint64_t)cs_data->extension_size));
		if ((gtm_uint64_t)free_blocks < blocks_for_extension)
		{
			free_blocks_ll = (gtm_uint64_t)free_blocks;
			gtm_putmsg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, fcb->fab$b_fns, fcb->fab$l_fna, EXTEND_WARNING_FACTOR,
					&blocks_for_extension, DISK_BLOCK_SIZE, &free_blocks_ll);
			send_msg(VARLSTCNT(8) ERR_LOWSPACECRE, 6, fcb->fab$b_fns, fcb->fab$l_fna, EXTEND_WARNING_FACTOR,
					&blocks_for_extension, DISK_BLOCK_SIZE, &free_blocks_ll);
		}
	}
	if (SS$_NORMAL == (status = sys$updsec(((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->s_addrs.db_addrs,
			NULL, PSL$C_USER, 0, efn_immed_wait, &iosb, NULL, 0)))
	{
		status = sys$synch(efn_immed_wait, &iosb);
		if (SS$_NORMAL == status)
			status = iosb.cond;
	} else  if (SS$_NOTMODIFIED == status)
		status = SS$_NORMAL;
	if (SS$_NORMAL == status)
		status = del_sec(SEC$M_GBL, &desc, 0);
	if (SS$_NORMAL == status)
		status = sys$deltva(cs_addrs->db_addrs, retadr, PSL$C_USER);
	if (SS$_NORMAL == status)
		status = sys$dassgn(fcb->fab$l_stv);
	if (SS$_NORMAL == status)
	{
	 	util_out_print("Database file for region !AD created.", TRUE, REG_LEN_STR(gv_cur_region));
		/* the open and close are an attempt to ensure that the file is available, not under the control of an ACP,
		 * before MUPIP exits */
		fcb->fab$b_shr = FAB$M_SHRPUT | FAB$M_SHRGET | FAB$M_UPI;
		fcb->fab$l_fop = 0;
		for (lcnt = 1;  (60 * MAX_OPEN_RETRY) >= lcnt;  lcnt++)
		{	/* per VMS engineering a delay is expected.  We will wait up to an hour as a
			 * Delete Global Section operation is essentially and inherently asynchronous in nature
			 * and could take an arbitrary amount of time.
			 */
			if (RMS$_FLK != (status = sys$open(fcb, NULL, NULL)))
				break;
			wcs_sleep(lcnt);
		}
		assert(RMS$_NORMAL == status);
		if (RMS$_NORMAL == status)
		{
			status = sys$close(fcb);
			assert(RMS$_NORMAL == status);
		}
		if (RMS$_NORMAL != status)
			exit_stat = EXIT_WRN;
	} else
		exit_stat = EXIT_ERR;
	if (RMS$_NORMAL != status)
		gtm_putmsg(VARLSTCNT(8) ERR_DBFILERR, 2, fcb->fab$b_fns, fcb->fab$l_fna, status, 0, fcb->fab$l_stv, 0);
	if ((MAX_RMS_RECORDSIZE - SIZEOF(shmpool_blk_hdr)) < cs_data->blk_size)
		gtm_putmsg(VARLSTCNT(5) ERR_MUNOSTRMBKUP, 3, fcb->fab$b_fns, fcb->fab$l_fna, 32 * 1024 - DISK_BLOCK_SIZE);
	return exit_stat;
}
示例#7
0
bool mubinccpy(backup_reg_list *list)
{
	static readonly mval	null_str = {MV_STR, 0, 0 , 0 , 0, 0};

	int			backup_socket;
	int4                    size, size1, bsize, bm_num, hint, lmsize, save_blks, rsize, match, timeout, outsize;
	uint4                   status, total_blks, bplmap, gds_ratio, blks_per_buff, counter, i, lcnt, read_size;
	uchar_ptr_t		bm_blk_buff, ptr1, ptr1_top, ptr, ptr_top;
	char_ptr_t		outptr, data_ptr;
	unsigned short		rd_iosb[4], port;
	enum db_acc_method	access;
	blk_hdr			*bp, *bptr;
	struct FAB		*fcb, temp_fab, mubincfab;
	struct RAB		temp_rab, mubincrab;
	inc_header		*outbuf;
	mval			val;
	mstr                    *file;
	sgmnt_data_ptr_t        header;
	char			*common, addr[SA_MAXLEN + 1];
	void			(*common_write)();
	void			(*common_close)();
	muinc_blk_hdr_ptr_t	sblkh_p;
	trans_num		blk_tn;
	block_id		blk_num_base, blk_num;
	boolean_t		is_bitmap_blk, backup_this_blk;
	enum db_ver		dummy_odbv;
	int4			blk_bsiz;

	error_def(ERR_BCKUPBUFLUSH);
	error_def(ERR_COMMITWAITSTUCK);
	error_def(ERR_DBCCERR);
	error_def(ERR_ERRCALL);

	assert(list->reg == gv_cur_region);
	assert(incremental);
	/* Make sure inc_header  can be same size on all platforms. Some platforms pad 8 byte aligned structures
	   that end on a 4 byte boundary and some do not. It is critical that this structure is the same size on
	   all platforms as it is sent across TCP connections when doing TCP backup.
	*/
	assert(0 == (SIZEOF(inc_header) % 8));

	/* ================= Initialization and some checks ======================== */

	header  =       list->backup_hdr;
	file    =       &(list->backup_file);

	if (!mubtomag)
		mubmaxblk = BACKUP_TEMPFILE_BUFF_SIZE;
	fcb = ((vms_gds_info *)(gv_cur_region->dyn.addr->file_cntl->file_info))->fab;
	if (list->tn >= header->trans_hist.curr_tn)
	{
		util_out_print("!/TRANSACTION number is greater than or equal to current transaction,", TRUE);
		util_out_print("No blocks backed up from database !AD", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
		return TRUE;
	}

	/* =========== open backup destination and define common_write ================= */
	backup_write_errno = 0;
	backup_close_errno = 0;
	switch(list->backup_to)
	{
		case backup_to_file:
			/* open the file and define the common_write function */
			mubincfab = cc$rms_fab;
			mubincfab.fab$b_fac = FAB$M_PUT;
			mubincfab.fab$l_fop = FAB$M_CBT | FAB$M_MXV | FAB$M_TEF | FAB$M_POS & (~FAB$M_RWC) & (~FAB$M_RWO);
			mubincfab.fab$l_fna = file->addr;
			mubincfab.fab$b_fns = file->len;
			mubincfab.fab$l_alq = cs_addrs->hdr->start_vbn +
				STARTING_BLOCKS * cs_addrs->hdr->blk_size / DISK_BLOCK_SIZE;
			mubincfab.fab$w_mrs = mubmaxblk;
			mubincfab.fab$w_deq = EXTEND_SIZE;
			switch (status = sys$create(&mubincfab))
			{
				case RMS$_NORMAL:
				case RMS$_CREATED:
				case RMS$_SUPERSEDE:
				case RMS$_FILEPURGED:
					break;
				default:
					gtm_putmsg(status, 0, mubincfab.fab$l_stv);
					util_out_print("Error: Cannot create backup file !AD.",
						       TRUE, mubincfab.fab$b_fns, mubincfab.fab$l_fna);
					return FALSE;
			}

			mubincrab = cc$rms_rab;
			mubincrab.rab$l_fab = &mubincfab;
			mubincrab.rab$l_rop = RAB$M_WBH;
			if (RMS$_NORMAL != (status = sys$connect(&mubincrab)))
			{
				gtm_putmsg(status, 0, mubincrab.rab$l_stv);
				util_out_print("Error: Cannot connect to backup file !AD.",
					       TRUE, mubincfab.fab$b_fns, mubincfab.fab$l_fna);
				mubincfab.fab$l_fop |= FAB$M_DLT;
				sys$close(&mubincfab);
				return FALSE;
			}
			common = (char *)(&mubincrab);
			common_write = file_write;
			common_close = file_close;
			break;
		case backup_to_exec:
			util_out_print("Error: Backup to pipe is yet to be implemented.", TRUE);
			util_out_print("Error: Your request to backup database !AD to !AD is currently not valid.", TRUE,
				       fcb->fab$b_fns, fcb->fab$l_fna, file->len, file->addr);
			return FALSE;
		case backup_to_tcp:
			iotcp_fillroutine();
			/* parse it first */
			switch (match = SSCANF(file->addr, "%[^:]:%hu", addr, &port))
			{
				case 1 :
					port = DEFAULT_BKRS_PORT;
				case 2 :
					break;
				default :
					util_out_print("ERROR: A hostname has to be specified to backup through a TCP connection.",
						       TRUE);
					return FALSE;
			}
			if ((0 == cli_get_int("NETTIMEOUT", &timeout)) || (0 > timeout))
				timeout = DEFAULT_BKRS_TIMEOUT;
			if (0 > (backup_socket = tcp_open(addr, port, timeout, FALSE)))
			{
				util_out_print("ERROR: Cannot open tcp connection due to the above error.", TRUE);
				return FALSE;
			}
			common_write = tcp_write;
			common_close = tcp_close;
			common = (char *)(&backup_socket);
			break;
		default :
			util_out_print("ERROR: Backup format !UL not supported.", TRUE, list->backup_to);
			util_out_print("Error: Your request to backup database !AD to !AD is not valid.", TRUE,
				       fcb->fab$b_fns, fcb->fab$l_fna, file->len, file->addr);
			return FALSE;
	}

	/* ============================= write inc_header =========================================== */

	outptr = malloc(SIZEOF(inc_header));
	outbuf = (inc_header *)outptr;
	MEMCPY_LIT(&outbuf->label[0], INC_HEADER_LABEL);
	stringpool.free = stringpool.base;
	op_horolog(&val);
	stringpool.free = stringpool.base;
	op_fnzdate(&val, &mu_bin_datefmt, &null_str, &null_str, &val);
	memcpy(&outbuf->date[0], val.str.addr, val.str.len);
	memcpy(&outbuf->reg[0], gv_cur_region->rname, MAX_RN_LEN);
	outbuf->start_tn = list->tn;
	outbuf->end_tn = header->trans_hist.curr_tn;
	outbuf->db_total_blks = header->trans_hist.total_blks;
	outbuf->blk_size = header->blk_size;
	outbuf->blks_to_upgrd = header->blks_to_upgrd;
	COMMON_WRITE(common, outptr, SIZEOF(inc_header));
	free(outptr);

	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		error_mupip = TRUE;
		COMMON_CLOSE(common);
		util_out_print("WARNING:  DB file !AD backup aborted.", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
		return FALSE;
	}

	/* ============================ read/write appropriate blocks =============================== */

	bsize		= header->blk_size;
	gds_ratio	= bsize / DISK_BLOCK_SIZE;
	blks_per_buff	= BACKUP_READ_SIZE / bsize;
	read_size	= blks_per_buff * bsize;
	outsize		= SIZEOF(muinc_blk_hdr) + bsize;
	outptr		= (char_ptr_t)malloc(MAX(outsize, mubmaxblk));
	sblkh_p		= (muinc_blk_hdr_ptr_t)outptr;
	data_ptr	= (char_ptr_t)(sblkh_p + 1);
	bp		= (blk_hdr_ptr_t)mubbuf;
	bm_blk_buff	= (uchar_ptr_t)malloc(SIZEOF(blk_hdr) + (BLKS_PER_LMAP * BML_BITS_PER_BLK / BITS_PER_UCHAR));
	mubincrab.rab$l_rbf = outptr;
	save_blks	= 0;
	access = header->acc_meth;
	memset(sblkh_p, 0, SIZEOF(*sblkh_p));

	if (access == dba_bg)
		bp = mubbuf;
	else
	{
		ptr = cs_addrs->db_addrs[0] + (cs_addrs->hdr->start_vbn - 1) * DISK_BLOCK_SIZE;
		ptr_top = cs_addrs->db_addrs[1] + 1;
	}

	sblkh_p->use.bkup.ondsk_blkver = GDSNOVER;
	for (blk_num_base = 0; blk_num_base < header->trans_hist.total_blks; blk_num_base += blks_per_buff)
	{
		if (online && (0 != cs_addrs->shmpool_buffer->failed))
			break;
		if (header->trans_hist.total_blks - blk_num_base < blks_per_buff)
		{
			blks_per_buff = header->trans_hist.total_blks - blk_num_base;
			read_size = blks_per_buff * bsize;
		}

		if (access == dba_bg)
		{
			if ((SS$_NORMAL != (status = sys$qiow(EFN$C_ENF, fcb->fab$l_stv, IO$_READVBLK, &rd_iosb, 0, 0, bp,
							      read_size, cs_addrs->hdr->start_vbn + (gds_ratio * blk_num_base),
							      0, 0, 0)))
			    || (SS$_NORMAL != (status = rd_iosb[0])))
			{
				gtm_putmsg(VARLSTCNT(1) status);
				util_out_print("Error reading data from database !AD.", TRUE,
					       fcb->fab$b_fns, fcb->fab$l_fna);
				free(outptr);
				free(bm_blk_buff);
				error_mupip = TRUE;
				COMMON_CLOSE(common);
				return FALSE;
			}
		} else
		{
			assert(dba_mm == access);
			bp = ptr + blk_num_base * bsize;
		}

		bptr = (blk_hdr *)bp;
		/* The blocks we back up will be whatever version they are. There is no implicit conversion in this
		   part of the backup/restore. Since we aren't even looking at the blocks (and indeed some of these blocks
		   could potentially contain unintialized garbage data), we set the block version to GDSNOVER to signal
		   that the block version is unknown. The above applies to "regular" blocks but not to bitmap blocks which
		   we know are initialized. Because we have to read the bitmap blocks, they will be converted as necessary.
		*/
		for (i = 0;
		     i < blks_per_buff && ((blk_num_base + i) < header->trans_hist.total_blks);
		     i++, bptr = (blk_hdr *)((char *)bptr + bsize))
		{
			blk_num = blk_num_base + i;
			if (mu_ctrly_occurred  ||  mu_ctrlc_occurred)
			{
				free(outptr);
				free(bm_blk_buff);
				error_mupip = TRUE;
				COMMON_CLOSE(common);
				util_out_print("WARNING:  DB file !AD backup aborted.", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
				return FALSE;
			}
			/* Before we check if this block needs backing up, check if this is a new bitmap block or not. If it is,
			   we can fall through and back it up as normal. But if this is NOT a bitmap block, use the
			   existing bitmap to determine if this block has ever been allocated or not. If not, we don't want to
			   even look at this block. It could be uninitialized which will just make things run slower if we
			   go to read it and back it up.
			*/
			if (0 != ((BLKS_PER_LMAP - 1) & blk_num))
			{	/* Not a local bitmap block */
				if (!gvcst_blk_ever_allocated(bm_blk_buff + SIZEOF(blk_hdr),
							      ((blk_num * BML_BITS_PER_BLK)
							       % (BLKS_PER_LMAP * BML_BITS_PER_BLK))))
					continue;		/* Bypass never-set blocks to avoid conversion problems */
				is_bitmap_blk = FALSE;
				if (SIZEOF(v15_blk_hdr) <= (blk_bsiz = ((v15_blk_hdr_ptr_t)bptr)->bsiz))
				{	/* We have either a V4 block or uninitialized garbage */
					if (blk_bsiz > bsize)
						/* This is not a valid V4 block so ignore it */
						continue;
					blk_tn = ((v15_blk_hdr_ptr_t)bptr)->tn;
				} else
				{	/* Assume V5 block */
					if ((blk_bsiz = bptr->bsiz) > bsize)
						/* Not a valid V5 block either */
						continue;
					blk_tn = bptr->tn;
				}
			} else
			{	/* This is a bitmap block so save it into our bitmap block buffer. It is used as the
				   basis of whether or not we have to process a given block or not. We process allocated and
				   recycled blocks leaving free (never used) blocks alone as they have no data worth saving.
				   But after saving it, upgrade it to the current format if necessary.
				*/
				is_bitmap_blk = TRUE;
				memcpy(bm_blk_buff, bptr, BM_SIZE(header->bplmap));
				if (SIZEOF(v15_blk_hdr) <= ((v15_blk_hdr_ptr_t)bm_blk_buff)->bsiz)
				{	/* This is a V4 format block -- needs upgrading */
					status = gds_blk_upgrade(bm_blk_buff, bm_blk_buff, bsize, &dummy_odbv);
					if (SS_NORMAL != status)
					{
						free(outptr);
						free(bm_blk_buff);
						error_mupip = TRUE;
						COMMON_CLOSE(common);
						util_out_print("Error: Block 0x!XL is too large for automatic upgrade", TRUE,
							       sblkh_p->blkid);
						return FALSE;
					}
				}
				assert(BM_SIZE(header->bplmap) == ((blk_hdr_ptr_t)bm_blk_buff)->bsiz);
				assert(LCL_MAP_LEVL == ((blk_hdr_ptr_t)bm_blk_buff)->levl);
				assert(gvcst_blk_is_allocated(bm_blk_buff + SIZEOF(blk_hdr),
							      ((blk_num * BML_BITS_PER_BLK)
							       % (BLKS_PER_LMAP * BML_BITS_PER_BLK))));
				blk_bsiz = BM_SIZE(header->bplmap);
				blk_tn = ((blk_hdr_ptr_t)bm_blk_buff)->tn;
			}
			/* The conditions for backing up a block or ignoring it (in order of evaluation):

			   1) If blk is larger than size of db at time backup was initiated, we ignore the block.
			   2) Always backup blocks 0, 1, and 2 as these are the only blocks that can contain data
			      and still have a transaction number of 0.
			   3) For bitmap blocks, if blks_to_upgrd != 0 and the TN is 0 and the block number >=
			      last_blk_at_last_bkup, then backup the block. This way we get the correct version of
			      the bitmap block in the restore (otherwise have no clue what version to create them in
			      as bitmaps are created with a TN of 0 when before image journaling is enabled).
			   4) If the block TN is below our TN threshold, ignore the block.
			   5) Else if none of the above conditions, backup the block.
			*/
			if (online && (header->trans_hist.curr_tn <= blk_tn))
				backup_this_blk = FALSE;
			else if (3 > blk_num || (is_bitmap_blk && 0 != header->blks_to_upgrd && (trans_num)0 == blk_tn
						 && blk_num >= list->last_blk_at_last_bkup))
				backup_this_blk = TRUE;
			else if ((blk_tn < list->tn))
				backup_this_blk = FALSE;
			else
				backup_this_blk = TRUE;
			if (!backup_this_blk)
			{
				if (online)
					cs_addrs->nl->nbb = blk_num;
				continue; /* not applicable */
			}
			sblkh_p->blkid = blk_num;
			memcpy(data_ptr, bptr, blk_bsiz);
			sblkh_p->valid_data = TRUE;	/* Validation marker */
			COMMON_WRITE(common, outptr, outsize);
			if (online)
			{
				if (0 != cs_addrs->shmpool_buffer->failed)
					break;
				cs_addrs->nl->nbb = blk_num;
			}
			save_blks++;
		}
	}

	/* ============================= write saved information for online backup ========================== */

	if (online && (0 == cs_addrs->shmpool_buffer->failed))
	{
		/* -------- make sure everyone involved finishes -------- */
		cs_addrs->nl->nbb = BACKUP_NOT_IN_PROGRESS;
		/* By getting crit here, we ensure that there is no process still in transaction logic that sees
		   (nbb != BACKUP_NOT_IN_PRORESS). After rel_crit(), any process that enters transaction logic will
		   see (nbb == BACKUP_NOT_IN_PRORESS) because we just set it to that value. At this point, backup
		   buffer is complete and there will not be any more new entries in the backup buffer until the next
		   backup.
		*/
		grab_crit(gv_cur_region);
		assert(cs_data == cs_addrs->hdr);
		if (dba_bg == cs_data->acc_meth)
		{	/* Now that we have crit, wait for any pending phase2 updates to finish. Since phase2 updates happen
			 * outside of crit, we dont want them to keep writing to the backup temporary file even after the
			 * backup is complete and the temporary file has been deleted.
			 */
			if (cs_addrs->nl->wcs_phase2_commit_pidcnt && !wcs_phase2_commit_wait(cs_addrs, NULL))
			{
				gtm_putmsg(VARLSTCNT(7) ERR_COMMITWAITSTUCK, 5, process_id, 1,
					cs_addrs->nl->wcs_phase2_commit_pidcnt, DB_LEN_STR(gv_cur_region));
				rel_crit(gv_cur_region);
				free(outptr);
				free(bm_blk_buff);
				error_mupip = TRUE;
				COMMON_CLOSE(common);
				return FALSE;
			}
		}
		if (debug_mupip)
		{
			util_out_print("MUPIP INFO:   Current Transaction # at end of backup is 0x!16@XQ", TRUE,
				&cs_data->trans_hist.curr_tn);
		}
		rel_crit(gv_cur_region);
		counter = 0;
		while (0 != cs_addrs->shmpool_buffer->backup_cnt)
		{
			if (0 != cs_addrs->shmpool_buffer->failed)
			{
				util_out_print("Process !UL encountered the following error.", TRUE,
					       cs_addrs->shmpool_buffer->failed);
				if (0 != cs_addrs->shmpool_buffer->backup_errno)
					gtm_putmsg(VARLSTCNT(1) cs_addrs->shmpool_buffer->backup_errno);
				free(outptr);
				free(bm_blk_buff);
				error_mupip = TRUE;
				COMMON_CLOSE(common);
				return FALSE;
			}
			backup_buffer_flush(gv_cur_region);
			if (++counter > MAX_BACKUP_FLUSH_TRY)
			{
				gtm_putmsg(VARLSTCNT(1) ERR_BCKUPBUFLUSH);
				free(outptr);
				free(bm_blk_buff);
				error_mupip = TRUE;
				COMMON_CLOSE(common);
				return FALSE;
			}
			if (counter & 0xF)
				wcs_sleep(counter);
			else
			{	/* Force shmpool recovery to see if it can find the lost blocks */
				if (!shmpool_lock_hdr(gv_cur_region))
				{
					gtm_putmsg(VARLSTCNT(9) ERR_DBCCERR, 2, REG_LEN_STR(gv_cur_region),
						   ERR_ERRCALL, 3, CALLFROM);
					free(outptr);
					free(bm_blk_buff);
					error_mupip = TRUE;
					COMMON_CLOSE(common);
					assert(FALSE);
					return FALSE;;
				}
				shmpool_abandoned_blk_chk(gv_cur_region, TRUE);
				shmpool_unlock_hdr(gv_cur_region);
			}
		}

		/* -------- Open the temporary file -------- */
		temp_fab = cc$rms_fab;
		temp_fab.fab$b_fac = FAB$M_GET;
		temp_fab.fab$l_fna = list->backup_tempfile;
		temp_fab.fab$b_fns = strlen(list->backup_tempfile);
		temp_rab = cc$rms_rab;
		temp_rab.rab$l_fab = &temp_fab;

		for (lcnt = 1;  MAX_OPEN_RETRY >= lcnt;  lcnt++)
		{
			if (RMS$_FLK != (status = sys$open(&temp_fab, NULL, NULL)))
				break;
			wcs_sleep(lcnt);
		}

		if (RMS$_NORMAL != status)
		{
			gtm_putmsg(status, 0, temp_fab.fab$l_stv);
			util_out_print("WARNING:  DB file !AD backup aborted.", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
			free(outptr);
			free(bm_blk_buff);
			error_mupip = TRUE;
			COMMON_CLOSE(common);
			return FALSE;
		}

		if (RMS$_NORMAL != (status = sys$connect(&temp_rab)))
		{
			gtm_putmsg(status, 0, temp_rab.rab$l_stv);
			util_out_print("WARNING:  DB file !AD backup aborted.", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
			free(outptr);
			free(bm_blk_buff);
			error_mupip = TRUE;
			COMMON_CLOSE(common);
			return FALSE;
		}

		/* -------- read and write every record in the temporary file -------- */
		while (1)
		{
			temp_rab.rab$w_usz = outsize;
			temp_rab.rab$l_ubf = outptr;
			status = sys$get(&temp_rab);
			if (RMS$_NORMAL != status)
			{
				if (RMS$_EOF == status)
					status = RMS$_NORMAL;
				break;
			}
			assert(outsize == temp_rab.rab$w_rsz);
			/* Still validly sized blk? */
			assert((outsize - SIZEOF(shmpool_blk_hdr)) >= ((blk_hdr_ptr_t)(outptr + SIZEOF(shmpool_blk_hdr)))->bsiz);
			COMMON_WRITE(common, outptr, temp_rab.rab$w_rsz);
		}

		if (RMS$_NORMAL != status)
		{
			gtm_putmsg(status, 0, temp_rab.rab$l_stv);
			util_out_print("WARNING:  DB file !AD backup aborted.", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
			free(outptr);
			free(bm_blk_buff);
			error_mupip = TRUE;
			COMMON_CLOSE(common);
			return FALSE;
		}

		/* ---------------- Close the temporary file ----------------------- */
		if (RMS$_NORMAL != (status = sys$close(&temp_fab)))
		{
			gtm_putmsg(status, 0, temp_fab.fab$l_stv);
			util_out_print("WARNING:  DB file !AD backup aborted.", TRUE, fcb->fab$b_fns, fcb->fab$l_fna);
			free(outptr);
			free(bm_blk_buff);
			error_mupip = TRUE;
			COMMON_CLOSE(common);
			return FALSE;
		}
	}

	/* ============================= write end_msg and fileheader ======================================= */

	if ((!online) || (0 == cs_addrs->shmpool_buffer->failed))
	{
		MEMCPY_LIT(outptr, END_MSG);
		/* Although the write only need be of length SIZEOF(END_MSG) - 1 for file IO, if the write is going
		   to TCP we have to write all these records with common length so just write the "regular" sized
		   buffer. The extra garbage left over from the last write will be ignored as we key only on the
		   this end text.
		*/
		COMMON_WRITE(common, outptr, outsize);

		ptr1 = header;
		size1 = ROUND_UP(SIZEOF(sgmnt_data), DISK_BLOCK_SIZE);
		ptr1_top = ptr1 + size1;
		for (;ptr1 < ptr1_top ; ptr1 += size1)
		{
			if ((size1 = ptr1_top - ptr1) > mubmaxblk)
				size1 = (mubmaxblk / DISK_BLOCK_SIZE) * DISK_BLOCK_SIZE;
			COMMON_WRITE(common, ptr1, size1);
		}

		MEMCPY_LIT(outptr, HDR_MSG);
		COMMON_WRITE(common, outptr, SIZEOF(HDR_MSG));
		ptr1 = MM_ADDR(header);
		size1 = ROUND_UP(MASTER_MAP_SIZE(header), DISK_BLOCK_SIZE);
		ptr1_top = ptr1 + size1;
		for (;ptr1 < ptr1_top ; ptr1 += size1)
		{
			if ((size1 = ptr1_top - ptr1) > mubmaxblk)
				size1 = (mubmaxblk / DISK_BLOCK_SIZE) * DISK_BLOCK_SIZE;
			COMMON_WRITE(common, ptr1, size1);
		}

		MEMCPY_LIT(outptr, MAP_MSG);
		COMMON_WRITE(common, outptr, SIZEOF(MAP_MSG));
	}


	/* ================== close backup destination, output and return ================================== */

	if (online && (0 != cs_addrs->shmpool_buffer->failed))
	{
		util_out_print("Process !UL encountered the following error.", TRUE,
			       cs_addrs->shmpool_buffer->failed);
		if (0 != cs_addrs->shmpool_buffer->backup_errno)
			gtm_putmsg(VARLSTCNT(1) cs_addrs->shmpool_buffer->backup_errno);
		free(outptr);
		free(bm_blk_buff);
		error_mupip = TRUE;
		COMMON_CLOSE(common);
		return FALSE;
	}

	COMMON_CLOSE(common);
	free(outptr);
	free(bm_blk_buff);

	util_out_print("DB file !AD incrementally backed up in !AD", TRUE,
		       fcb->fab$b_fns, fcb->fab$l_fna, file->len, file->addr);
	util_out_print("!UL blocks saved.", TRUE, save_blks);
	util_out_print("Transactions from 0x!16@XQ to 0x!16@XQ are backed up.", TRUE,
		       &cs_addrs->shmpool_buffer->inc_backup_tn, &header->trans_hist.curr_tn);
	cs_addrs->hdr->last_inc_backup = header->trans_hist.curr_tn;
	if (record)
		cs_addrs->hdr->last_rec_backup = header->trans_hist.curr_tn;
	file_backed_up = TRUE;
	return TRUE;
}
/* go after a specific number of buffers or a particular buffer */
bool	wcs_get_space(gd_region *reg, int needed, cache_rec *cr)
{
	unsigned int		lcnt, ocnt, status;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	node_local_ptr_t        cnl;
	que_ent_ptr_t		base, q0;
	int4			dummy_errno;
	boolean_t		is_mm;

	assert((0 != needed) || (NULL != cr));
	csa = &(FILE_INFO(reg)->s_addrs);
	assert(csa == cs_addrs);
	csd = csa->hdr;
	is_mm = (dba_mm == csd->acc_meth);
	assert(is_mm || (dba_bg == csd->acc_meth));
	cnl = csa->nl;
	if (FALSE == csa->now_crit)
	{
		assert(0 != needed);	/* if needed == 0, then we should be in crit */
		for (lcnt = DIVIDE_ROUND_UP(needed, csd->n_wrt_per_flu);  0 < lcnt;  lcnt--)
			JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
		return TRUE;
	}
	if (FALSE == wcs_wtfini(reg))
		return FALSE;
	/* while calculating flush_trigger, the decrement should be atleast 1 if still not reached the minimum allowed */
	csd->flush_trigger = MAX(csd->flush_trigger - MAX(csd->flush_trigger/STEP_FACTOR, 1), MIN_FLUSH_TRIGGER(csd->n_bts));
	if (0 == needed)
	{
		if (!is_mm)
		{	/* If another process is concurrently finishing up phase2 of commit, wait for that to complete first. */
			if (cr->in_tend && !wcs_phase2_commit_wait(csa, cr))
				return FALSE;	/* assumption is that caller will set wc_blocked and trigger cache recovery */
		}
		for (lcnt = 1; (MAXGETSPACEWAIT > lcnt) && (0 != cr->dirty); lcnt++)
		{	/* We want to flush a specific cache-record. We speed up the wait by moving the dirty cache-record
			 * to the head of the active queue. But to do this, we need exclusive access to the active queue.
			 * The only other processes outside of crit that can be touching this concurrently are wcs_wtstart
			 * (which can remove entries from the queue) and bg_update_phase2 (which can add entries to the queue).
			 * In the case of writers, we can wait for those to complete (by setting cnl->wc_blocked to TRUE)
			 * and then play with the queue. But in the case of bg_update_phase2, it is not easily possible to
			 * do a similar wait so in this case we choose to do plain wcs_wtstart (which uses interlocked
			 * queue operations and hence can work well with concurrent bg_update_phase2) and wait until the
			 * cache record of interest becomes non-dirty. The consequence is we might wait a little longer than
			 * necessary but that is considered acceptable for now.
			 */
			/* Check if cache recovery is needed (could be set by another process in
			 * secshr_db_clnup finishing off a phase2 commit). If so, no point invoking
			 * wcs_wtstart as it will return right away. Instead return FALSE so
			 * cache-recovery can be triggered by the caller.
			 */
			if (cnl->wc_blocked)
			{
				assert(gtm_white_box_test_case_enabled);
				return FALSE;
			}
			if (!is_mm && cnl->wcs_phase2_commit_pidcnt)
			{
				JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
				wcs_sleep(lcnt);
			} else if (LATCH_CLEAR == WRITE_LATCH_VAL(cr))
			{
				SIGNAL_WRITERS_TO_STOP(cnl);	/* to stop all active writers */
				WAIT_FOR_WRITERS_TO_STOP(cnl, ocnt, MAXGETSPACEWAIT);
				if (MAXGETSPACEWAIT <= ocnt)
				{
					assert(FALSE);
					return FALSE;
				}
				if (LATCH_CLEAR == WRITE_LATCH_VAL(cr))
				{	/* Check if cache-record is part of the active queue. If so, then remove it from the
					 * tail of the active queue and move it to the head to try and speed up the flush.
					 * If not and if cr->dirty is non-zero, then the only way this is possible we know
					 * of is if a concurrent process encountered an error in the midst of commit in phase2
					 * of bg_update and finished the update but did not reinsert the cache-record in the
					 * active queue (see comment in secshr_db_clnup about why INSQ*I macros are not used
					 * in VMS). In this case, return FALSE as wcs_get_space cannot flush this cache-record.
					 * The caller will trigger appropriate error handling. We are guaranteed that cr cannot
					 * be part of the wip queue because WRITE_LATCH_VAL(cr) is LATCH_CLEAR (in wip queue it
					 * will be > LATCH_CLEAR).
					 */
					if (0 != cr->state_que.fl)
					{	/* We are about to play with the queues without using interlocks.
						 * Assert no one else could be concurrently playing with the queue.
						 */
						assert(!cnl->wcs_phase2_commit_pidcnt && !cnl->in_wtstart);
						base = &csa->acc_meth.bg.cache_state->cacheq_active;
						q0 = (que_ent_ptr_t)((sm_uc_ptr_t)&cr->state_que + cr->state_que.fl);
						shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)base);
					} else if (cr->dirty)
					{
						assert(gtm_white_box_test_case_enabled);
						return FALSE;
					}
				}
				SIGNAL_WRITERS_TO_RESUME(cnl);
				JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
				wcs_sleep(lcnt);
			} else if ((0 == cr->iosb.cond) || (WRT_STRT_PNDNG == cr->iosb.cond))
			{
				JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
				wcs_sleep(lcnt);
			}
			if (FALSE == wcs_wtfini(reg))
				return FALSE;
		}
		if (0 == cr->dirty)
			return TRUE;
		assert(FALSE);
		return FALSE;
	}
	for (lcnt = 1; ((cnl->wc_in_free < needed) && (MAXGETSPACEWAIT > lcnt)); lcnt++)
	{
		DCLAST_WCS_WTSTART(reg, 0, dummy_errno); /* a macro that dclast's wcs_wtstart and checks for errors etc. */
		wcs_sleep(lcnt);
		if (FALSE == wcs_wtfini(reg))
			return FALSE;
	}
	if (cnl->wc_in_free < needed)
	{
		assert(FALSE);
		return FALSE;
	}
	return TRUE;
}
示例#9
0
文件: jnl_fsync.c 项目: 5HT/mumps
void jnl_fsync(gd_region *reg, uint4 fsync_addr)
{
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jb;
	uint4			lcnt, saved_dsk_addr, saved_status;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	int4			lck_state;
	int			fsync_ret, save_errno;

	error_def(ERR_JNLFSYNCERR);
	error_def(ERR_FSYNCTIMOUT);
	error_def(ERR_TEXT);
	error_def(ERR_JNLFRCDTERM);
	error_def(ERR_JNLFSYNCLSTCK);

	csa = &FILE_INFO(reg)->s_addrs;
	jpc = csa->jnl;
	jb  = jpc->jnl_buff;

	if ((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc))
	{
		csd = csa->hdr;
		for (lcnt = 1; fsync_addr > jb->fsync_dskaddr && !JNL_FILE_SWITCHED(jpc); lcnt++)
		{
			if (MAX_FSYNC_WAIT_CNT / 2 == lcnt)	/* half way into max.patience*/
			{
				saved_status = jpc->status;
				jpc->status = SS_NORMAL;
				jnl_send_oper(jpc, ERR_JNLFSYNCLSTCK);
				jpc->status = saved_status ;
			}
			if (MAX_FSYNC_WAIT_CNT == lcnt)	/* tried a long */
			{
				saved_status = jpc->status;
				jpc->status = SS_NORMAL;
				jnl_send_oper(jpc, ERR_JNLFSYNCLSTCK);
				jpc->status = saved_status ;
				send_msg(VARLSTCNT(4) ERR_FSYNCTIMOUT, 2, JNL_LEN_STR(csd));
				GTMASSERT;
			}
			BG_TRACE_PRO_ANY(csa, n_jnl_fsync_tries);
			if (GET_SWAPLOCK(&jb->fsync_in_prog_latch))
				break;
			wcs_sleep(lcnt);
			performCASLatchCheck(&jb->fsync_in_prog_latch, lcnt);
		}
		if (fsync_addr > jb->fsync_dskaddr && !JNL_FILE_SWITCHED(jpc))
		{
			assert(process_id == jb->fsync_in_prog_latch.u.parts.latch_pid);  /* assert we have the lock */
			saved_dsk_addr = jb->dskaddr;
			if (jpc->sync_io)
			{
				/* We need to maintain the fsync control fields irrespective of the type of IO, because we might
				 * switch between these at any time.
				 */
				jb->fsync_dskaddr = saved_dsk_addr;
			} else
			{
				GTM_FSYNC(jpc->channel, fsync_ret);
				if (-1 == fsync_ret)
				{
					save_errno = errno;
					assert(FALSE);
					send_msg(VARLSTCNT(9) ERR_JNLFSYNCERR, 2, JNL_LEN_STR(csd),
						ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), save_errno);
					rts_error(VARLSTCNT(9) ERR_JNLFSYNCERR, 2, JNL_LEN_STR(csd),
						ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), save_errno);
				} else
				{
					jb->fsync_dskaddr = saved_dsk_addr;
					BG_TRACE_PRO_ANY(csa, n_jnl_fsyncs);
				}
			}
		}
		if (process_id == jb->fsync_in_prog_latch.u.parts.latch_pid)
			RELEASE_SWAPLOCK(&jb->fsync_in_prog_latch);
	}
	return;
}
示例#10
0
sm_uc_ptr_t t_qread(block_id blk, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr_out)
/* cycle is used in t_end to detect if the buffer has been refreshed since the t_qread */
{
    uint4			status, duint4, blocking_pid;
    cache_rec_ptr_t		cr;
    bt_rec_ptr_t		bt;
    bool			clustered, was_crit;
    int			dummy, lcnt, ocnt;
    cw_set_element		*cse;
    off_chain		chain1;
    register sgmnt_addrs	*csa;
    register sgmnt_data_ptr_t	csd;
    int4			dummy_errno;
    boolean_t		already_built, is_mm, reset_first_tp_srch_status, set_wc_blocked;

    error_def(ERR_DBFILERR);
    error_def(ERR_BUFOWNERSTUCK);

    first_tp_srch_status = NULL;
    reset_first_tp_srch_status = FALSE;
    csa = cs_addrs;
    csd = csa->hdr;
    INCR_DB_CSH_COUNTER(csa, n_t_qreads, 1);
    is_mm = (dba_mm == csd->acc_meth);
    assert((t_tries < CDB_STAGNATE) || csa->now_crit);
    if (0 < dollar_tlevel)
    {
        assert(sgm_info_ptr);
        if (0 != sgm_info_ptr->cw_set_depth)
        {
            chain1 = *(off_chain *)&blk;
            if (1 == chain1.flag)
            {
                assert(sgm_info_ptr->cw_set_depth);
                if ((int)chain1.cw_index < sgm_info_ptr->cw_set_depth)
                    tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain1.cw_index, &cse);
                else
                {
                    assert(FALSE == csa->now_crit);
                    rdfail_detail = cdb_sc_blknumerr;
                    return (sm_uc_ptr_t)NULL;
                }
            } else
            {
                first_tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use,
                                       (void *)blk, &duint4);
                ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr);
                cse = first_tp_srch_status ? first_tp_srch_status->ptr : NULL;
            }
            assert(!cse || !cse->high_tlevel);
            if (cse)
            {   /* transaction has modified the sought after block  */
                assert(gds_t_writemap != cse->mode);
                if (FALSE == cse->done)
                {   /* out of date, so make it current */
                    already_built = (NULL != cse->new_buff);
                    gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, 0);
                    assert(cse->blk_target);
                    if (!already_built && !chain1.flag)
                    {
                        assert(first_tp_srch_status && (is_mm || first_tp_srch_status->cr)
                               && first_tp_srch_status->buffaddr);
                        if (first_tp_srch_status->tn <=
                                ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn)
                        {
                            assert(CDB_STAGNATE > t_tries);
                            rdfail_detail = cdb_sc_blkmod;	/* should this be something else */
                            TP_TRACE_HIST_MOD(blk, gv_target, tp_blkmod_t_qread, cs_data,
                                              first_tp_srch_status->tn,
                                              ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn,
                                              ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->levl);
                            return (sm_uc_ptr_t)NULL;
                        }
                        if ((!is_mm) && (first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle
                                         || first_tp_srch_status->blk_num != first_tp_srch_status->cr->blk))
                        {
                            assert(CDB_STAGNATE > t_tries);
                            rdfail_detail = cdb_sc_lostcr;	/* should this be something else */
                            return (sm_uc_ptr_t)NULL;
                        }
                        if (certify_all_blocks &&
                                FALSE == cert_blk(gv_cur_region, blk, (blk_hdr_ptr_t)cse->new_buff,
                                                  cse->blk_target->root))
                            GTMASSERT;
                    }
                    cse->done = TRUE;
                }
                *cycle = CYCLE_PVT_COPY;
                *cr_out = 0;
                return (sm_uc_ptr_t)cse->new_buff;
            }
            assert(!chain1.flag);
        } else
            first_tp_srch_status =
                (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &duint4);
        ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr);
        if (!is_mm && first_tp_srch_status)
        {
            assert(first_tp_srch_status->cr && !first_tp_srch_status->ptr);
            if (first_tp_srch_status->cycle == first_tp_srch_status->cr->cycle)
            {
                *cycle = first_tp_srch_status->cycle;
                *cr_out = first_tp_srch_status->cr;
                first_tp_srch_status->cr->refer = TRUE;
                if (CDB_STAGNATE <= t_tries)	/* mu_reorg doesn't use TP else should have an || for that */
                    CWS_INSERT(blk);
                return (sm_uc_ptr_t)first_tp_srch_status->buffaddr;
            } else
            {   /* Block was already part of the read-set of this transaction, but got recycled. Allow for
                 * recycling. But update the first_tp_srch_status (for this blk) in the si->first_tp_hist
                 * array to reflect the new buffer, cycle and cache-record. Since we know those only at the end of
                 * t_qread, set a variable here that will enable the updation before returning from t_qread().
                 */
                reset_first_tp_srch_status = TRUE;
            }
        }
    }
    if ((blk >= csa->ti->total_blks) || (blk < 0))
    {   /* requested block out of range; could occur because of a concurrency conflict */
        if ((&FILE_INFO(gv_cur_region)->s_addrs != csa) || (csd != cs_data))
            GTMASSERT;
        assert(FALSE == csa->now_crit);
        rdfail_detail = cdb_sc_blknumerr;
        return (sm_uc_ptr_t)NULL;
    }
    if (is_mm)
    {
        *cycle = CYCLE_SHRD_COPY;
        *cr_out = 0;
        return (sm_uc_ptr_t)(mm_read(blk));
    }
    assert(dba_bg == csd->acc_meth);
    assert(!first_tp_srch_status || !first_tp_srch_status->cr
           || first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle);
    if (FALSE == (clustered = csd->clustered))
        bt = NULL;
    was_crit = csa->now_crit;
    ocnt = 0;
    set_wc_blocked = FALSE;	/* to indicate whether csd->wc_blocked was set to TRUE by us */
    do
    {
        if (NULL == (cr = db_csh_get(blk)))
        {   /* not in memory */
            if (clustered && (NULL != (bt = bt_get(blk))) && (FALSE == bt->flushing))
                bt = NULL;
            if (FALSE == csa->now_crit)
            {
                if (NULL != bt)
                {   /* at this point, bt is not NULL only if clustered and flushing - wait no crit */
                    assert(clustered);
                    wait_for_block_flush(bt, blk);	/* try for no other node currently writing the block */
                }
                if (csd->flush_trigger <= csa->nl->wcs_active_lvl  &&  FALSE == gv_cur_region->read_only)
                    JNL_ENSURE_OPEN_WCS_WTSTART(csa, gv_cur_region, 0, dummy_errno);
                /* a macro that dclast's wcs_wtstart() and checks for errors etc. */
                grab_crit(gv_cur_region);
                cr = db_csh_get(blk);			/* in case blk arrived before crit */
            }
            if (clustered && (NULL != (bt = bt_get(blk))) && (TRUE == bt->flushing))
            {   /* Once crit, need to assure that if clustered, that flushing is [still] complete
                 * If it isn't, we missed an entire WM cycle and have to wait for another node to finish */
                wait_for_block_flush(bt, blk);	/* ensure no other node currently writing the block */
            }
            if (NULL == cr)
            {   /* really not in memory - must get a new buffer */
                assert(csa->now_crit);
                cr = db_csh_getn(blk);
                if (CR_NOTVALID == (sm_long_t)cr)
                {
                    SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE);
                    BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_getn_invalid_blk);
                    set_wc_blocked = TRUE;
                    break;
                }
                assert(0 <= cr->read_in_progress);
                *cycle = cr->cycle;
                cr->tn = csa->ti->curr_tn;
                if (FALSE == was_crit)
                    rel_crit(gv_cur_region);
                /* read outside of crit may be of a stale block but should be detected by t_end or tp_tend */
                assert(0 == cr->dirty);
                assert(cr->read_in_progress >= 0);
                INCR_DB_CSH_COUNTER(csa, n_dsk_reads, 1);
                if (SS_NORMAL != (status = dsk_read(blk, GDS_REL2ABS(cr->buffaddr))))
                {
                    RELEASE_BUFF_READ_LOCK(cr);
                    assert(was_crit == csa->now_crit);
                    if (FUTURE_READ == status)
                    {   /* in cluster, block can be in the "future" with respect to the local history */
                        assert(TRUE == clustered);
                        assert(FALSE == csa->now_crit);
                        rdfail_detail = cdb_sc_future_read;	/* t_retry forces the history up to date */
                        return (sm_uc_ptr_t)NULL;
                    }
                    rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status);
                }
                assert(0 <= cr->read_in_progress);
                assert(0 == cr->dirty);
                cr->r_epid = 0;
                RELEASE_BUFF_READ_LOCK(cr);
                assert(-1 <= cr->read_in_progress);
                *cr_out = cr;
                assert(was_crit == csa->now_crit);
                if (reset_first_tp_srch_status)
                {   /* keep the parantheses for the if (although single line) since the following is a macro */
                    RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle);
                }
                return (sm_uc_ptr_t)GDS_REL2ABS(cr->buffaddr);
            } else  if ((FALSE == was_crit) && (BAD_LUCK_ABOUNDS > ocnt))
            {
                assert(TRUE == csa->now_crit);
                assert(csa->nl->in_crit == process_id);
                rel_crit(gv_cur_region);
            }
        }
        if (CR_NOTVALID == (sm_long_t)cr)
        {
            SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE);
            BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_get_invalid_blk);
            set_wc_blocked = TRUE;
            break;
        }
        for (lcnt = 1;  ; lcnt++)
        {
            if (0 > cr->read_in_progress)
            {   /* it's not being read */
                if (clustered && (0 == cr->bt_index) &&
                        (cr->tn < ((th_rec *)((uchar_ptr_t)csa->th_base + csa->th_base->tnque.fl))->tn))
                {   /* can't rely on the buffer */
                    cr->cycle++;	/* increment cycle whenever blk number changes (tp_hist depends on this) */
                    cr->blk = CR_BLKEMPTY;
                    break;
                }
                *cycle = cr->cycle;
                *cr_out = cr;
                VMS_ONLY(
                    /* If we were doing the db_csh_get() above (in t_qread itself) and located the cache-record
                     * which, before coming here and taking a copy of cr->cycle a few lines above, was made an
                     * older twin by another process in bg_update (note this can happen in VMS only) which has
                     * already incremented the cycle, we will end up having a copy of the old cache-record with
                     * its incremented cycle number and hence will succeed in tp_hist validation if we return
                     * this <cr,cycle> combination although we don't want to since this "cr" is not current for
                     * the given block as of now. Note that the "indexmod" optimization in tp_tend() relies on
                     * an accurate intermediate validation by tp_hist() which in turn relies on the <cr,cycle>
                     * value returned by t_qread() to be accurate for a given blk at the current point in time.
                     * We detect the older-twin case by the following check. Note that here we depend on the
                     * the fact that bg_update() sets cr->bt_index to 0 before incrementing cr->cycle.
                     * Given that order, cr->bt_index can be guaranteed to be 0 if we read the incremented cycle
                     */
                    if (cr->twin && (0 == cr->bt_index))
                    break;
                )
                    if (cr->blk != blk)
                        break;
                if (was_crit != csa->now_crit)
                    rel_crit(gv_cur_region);
                assert(was_crit == csa->now_crit);
                if (reset_first_tp_srch_status)
                {   /* keep the parantheses for the if (although single line) since the following is a macro */
                    RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle);
                }
                /* Note that at this point we expect t_qread() to return a <cr,cycle> combination that
                 * corresponds to "blk" passed in. It is crucial to get an accurate value for both the fields
                 * since tp_hist() relies on this for its intermediate validation.
                 */
                return (sm_uc_ptr_t)GDS_ANY_REL2ABS(csa, cr->buffaddr);
            }
            if (blk != cr->blk)
                break;
            if (lcnt >= BUF_OWNER_STUCK && (0 == (lcnt % BUF_OWNER_STUCK)))
            {
                if (FALSE == csa->now_crit)
                    grab_crit(gv_cur_region);
                if (cr->read_in_progress < -1)
                {   /* outside of design; clear to known state */
                    BG_TRACE_PRO(t_qread_out_of_design);
                    INTERLOCK_INIT(cr);
                    assert(0 == cr->r_epid);
                    cr->r_epid = 0;
                } else  if (cr->read_in_progress >= 0)
                {
                    BG_TRACE_PRO(t_qread_buf_owner_stuck);
                    if (0 != (blocking_pid = cr->r_epid))
                    {
                        if (FALSE == is_proc_alive(blocking_pid, cr->image_count))
                        {   /* process gone: release that process's lock */
                            assert(0 == cr->bt_index);
                            if (cr->bt_index)
                            {
                                SET_TRACEABLE_VAR(csd->wc_blocked, TRUE);
                                BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index1);
                                set_wc_blocked = TRUE;
                                break;
                            }
                            cr->cycle++;	/* increment cycle for blk number changes (for tp_hist) */
                            cr->blk = CR_BLKEMPTY;
                            RELEASE_BUFF_READ_LOCK(cr);
                        } else
                        {
                            rel_crit(gv_cur_region);
                            send_msg(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region));
                            send_msg(VARLSTCNT(9) ERR_BUFOWNERSTUCK, 7, process_id, blocking_pid,
                                     cr->blk, cr->blk, (lcnt / BUF_OWNER_STUCK),
                                     cr->read_in_progress, cr->rip_latch.latch_pid);
                            if ((4 * BUF_OWNER_STUCK) <= lcnt)
                                GTMASSERT;
                            /* Kickstart the process taking a long time in case it was suspended */
                            UNIX_ONLY(continue_proc(blocking_pid));
                        }
                    } else
                    {   /* process stopped before could set r_epid */
                        assert(0 == cr->bt_index);
                        if (cr->bt_index)
                        {
                            SET_TRACEABLE_VAR(csd->wc_blocked, TRUE);
                            BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index2);
                            set_wc_blocked = TRUE;
                            break;
                        }
                        cr->cycle++;	/* increment cycle for blk number changes (for tp_hist) */
                        cr->blk = CR_BLKEMPTY;
                        RELEASE_BUFF_READ_LOCK(cr);
                        if (cr->read_in_progress < -1)	/* race: process released since if r_epid */
                            LOCK_BUFF_FOR_READ(cr, dummy);
                    }
                }
                if (was_crit != csa->now_crit)
                    rel_crit(gv_cur_region);
            } else
                wcs_sleep(lcnt);
        }
        if (set_wc_blocked)	/* cannot use csd->wc_blocked here as we might not necessarily have crit */
            break;
        ocnt++;
        if (BAD_LUCK_ABOUNDS <= ocnt)
        {
            if (BAD_LUCK_ABOUNDS < ocnt || csa->now_crit)
            {
                rel_crit(gv_cur_region);
                GTMASSERT;
            }
            if (FALSE == csa->now_crit)
                grab_crit(gv_cur_region);
        }
    } while (TRUE);
/* Note we don't increment fast_lock_count as part of getting the latch and decrement it when releasing it because ROLLBACK
 * can hold onto this latch for a long while and can do updates in this duration and we should NOT have a non-zero fast_lock_count
 * as many places like t_begin/dsk_read have asserts to this effect. It is okay to NOT increment fast_lock_count as ROLLBACK
 * anyways have logic to disable interrupts the moment it starts doing database updates.
 */
boolean_t	grab_gtmsource_srv_latch(sm_global_latch_ptr_t latch, uint4 max_timeout_in_secs, uint4 onln_rlbk_action)
{
	int			spins, maxspins, retries, max_retries;
	unix_db_info		*udi;
	sgmnt_addrs		*repl_csa;
	boolean_t		cycle_mismatch;

	assert(!have_crit(CRIT_HAVE_ANY_REG));
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	repl_csa = &udi->s_addrs;
	maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
	max_retries = max_timeout_in_secs * 4 * 1000; /* outer-loop : X minutes, 1 loop in 4 is sleep of 1 ms */
	for (retries = max_retries - 1; 0 < retries; retries--)
	{
		for (spins = maxspins; 0 < spins; spins--)
		{
			assert(latch->u.parts.latch_pid != process_id); /* We better not hold it if trying to get it */
			if (GET_SWAPLOCK(latch))
			{
				DEBUG_ONLY(locknl = repl_csa->nl); /* Use the journal pool to maintain lock history */
				LOCK_HIST("OBTN", latch, process_id, retries);
				DEBUG_ONLY(locknl = NULL);
				if (jnlpool.repl_inst_filehdr->file_corrupt && !jgbl.onlnrlbk)
				{
					/* Journal pool indicates an abnormally terminated online rollback. Cannot continue until
					 * the rollback command is re-run to bring the journal pool/file and instance file to a
					 * consistent state.
					 */
					/* No need to release the latch before rts_error (mupip_exit_handler will do it for us) */
					rts_error(VARLSTCNT(8) ERR_REPLREQROLLBACK, 2, LEN_AND_STR(udi->fn),
						ERR_TEXT, 2, LEN_AND_LIT("file_corrupt field in instance file header is set to"
										" TRUE"));
				}
				cycle_mismatch = (repl_csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle);
				assert((ASSERT_NO_ONLINE_ROLLBACK != onln_rlbk_action) || !cycle_mismatch);
				if ((HANDLE_CONCUR_ONLINE_ROLLBACK == onln_rlbk_action) && cycle_mismatch)
				{
					assert(is_src_server);
					SYNC_ONLN_RLBK_CYCLES;
					gtmsource_onln_rlbk_clnup(); /* side-effect : sets gtmsource_state */
					rel_gtmsource_srv_latch(latch);
				}
				return TRUE;
			}
		}
		if (retries & 0x3)
		{	/* On all but every 4th pass, do a simple rel_quant */
			rel_quant();
		} else
		{
			/* On every 4th pass, we bide for awhile */
			wcs_sleep(LOCK_SLEEP);
			if (RETRY_CASLATCH_CUTOFF == (retries % LOCK_TRIES))
				performCASLatchCheck(latch, TRUE);
		}
	}
	DUMP_LOCKHIST();
	assert(FALSE);
	assert(jnlpool.gtmsource_local && jnlpool.gtmsource_local->gtmsource_pid);
	rts_error(VARLSTCNT(5) ERR_SRVLCKWT2LNG, 2, max_timeout_in_secs, jnlpool.gtmsource_local->gtmsource_pid);
	return FALSE; /* to keep the compiler happy */
}