Beispiel #1
0
cache_rec_ptr_t	db_csh_getn(block_id block)
{
	cache_rec_ptr_t		hdr, q0, start_cr, cr;
	bt_rec_ptr_t		bt;
	unsigned int		lcnt, ocnt;
	int			rip, max_ent, pass1, pass2, pass3;
	int4			flsh_trigger;
	uint4			r_epid, dummy;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	srch_blk_status		*tp_srch_status;

	error_def(ERR_BUFRDTIMEOUT);
	error_def(ERR_INVALIDRIP);

	csa = cs_addrs;
	csd = csa->hdr;
	assert(csa->now_crit);
	assert(csa == &FILE_INFO(gv_cur_region)->s_addrs);
	max_ent = csd->n_bts;
	cr = (cache_rec_ptr_t)GDS_REL2ABS(csa->nl->cur_lru_cache_rec_off);
	hdr = csa->acc_meth.bg.cache_state->cache_array + (block % csd->bt_buckets);
	start_cr = csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets;
	pass1 = max_ent;	/* skip referred or dirty or read-into cache records */
	pass2 = 2 * max_ent;	/* skip referred cache records */
	pass3 = 3 * max_ent;	/* skip nothing */
	INCR_DB_CSH_COUNTER(csa, n_db_csh_getns, 1);
	for (lcnt = 0;  ; lcnt++)
	{
		if (lcnt > pass3)
		{
			BG_TRACE_PRO(wc_blocked_db_csh_getn_loopexceed);
			assert(FALSE);
			break;
		}
		cr++;
		if (cr == start_cr + max_ent)
			cr = start_cr;
		VMS_ONLY(
			if ((lcnt == pass1) || (lcnt == pass2))
				wcs_wtfini(gv_cur_region);
		)
		if (TRUE == cr->refer && lcnt < pass2)
		{	/* in passes 1 & 2, set refer to FALSE and skip; in the third pass attempt reuse even if TRUE == refer */
			cr->refer = FALSE;
			continue;
		}
		if (TRUE == cr->in_cw_set)
		{	/* this process already owns it - skip it */
			cr->refer = TRUE;
			continue;
		}
		if (CDB_STAGNATE <= t_tries || mu_reorg_process)
		{
			/* Prevent stepping on self when crit for entire transaction.
			 * This is done by looking up in sgm_info_ptr->blk_in_use and cw_stagnate for presence of the block.
			 * The following two hashtable lookups are not similar, since in TP, sgm_info_ptr->blks_in_use
			 * 	is updated to the latest cw_stagnate list of blocks only in tp_hist().
			 * Also note that the lookup in sgm_info_ptr->blks_in_use reuses blocks that don't have cse's.
			 * This is to allow big-read TP transactions which may use up more than the available global buffers.
			 * There is one issue here in that a block that has been only read till now may be stepped upon here
			 *	but may later be needed for update. It is handled by updating the block's corresponding
			 *	entry in the set of histories (sgm_info_ptr->first_tp_hist[index] structure) to hold the
			 *	"cr" and "cycle" of the t_qread done for the block when it was intended to be changed for the
			 *	first time within the transaction since otherwise the transaction would restart due to a
			 *	cdb_sc_lostcr status. Note that "tn" (read_tn of the block) in the first_tp_hist will still
			 *	remain the "tn" when the block was first read within this transaction to ensure the block
			 *	hasn't been modified since the start of the transaction. Once we intend on changing the
			 *	block i.e. srch_blk_status->ptr is non-NULL, we ensure in the code below not to step on it.
			 *	[tp_hist() is the routine that updates the "cr", "cycle" and "tn" of the block].
			 * Note that usually in a transaction the first_tp_hist[] structure holds the "cr", "cycle", and "tn"
			 *	of the first t_qread of the block within that transaction. The above is the only exception.
			 * Also note that for blocks in cw_stagnate (i.e. current TP mini-action), we don't reuse any of
			 *	them even if they don't have a cse. This is to ensure that the current action doesn't
			 *	encounter a restart due to cdb_sc_lostcr in tp_hist() even in the fourth-retry.
			 */
			if (dollar_tlevel
				&& (tp_srch_status =
					(srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)cr->blk, &dummy))
				&& tp_srch_status->ptr)
			{	/* this process is already using the block - skip it */
				cr->refer = TRUE;
				continue;
			}
			if (NULL != lookup_hashtab_ent(cw_stagnate, (void *)cr->blk, &dummy))
			{
				cr->refer = TRUE;
				continue;
			}
		}
		if (cr->dirty)
		{	/* Note that in Unix, it is possible that we see a stale value of cr->dirty (possible if a
			 * concurrent wcs_wtstart() has reset dirty to 0 but that update did not reach us yet). In this
			 * case the call to wcs_get_space() below will do the necessary memory barrier instructions
			 * (through calls to aswp()) which will allow us to see the non-stale value of cr->dirty.
			 *
			 * It is also possible that cr->dirty is non-zero but < cr->flushed_dirty_tn. In this case, wcs_get_space
			 * done below will return FALSE forcing a cache-rebuild which will fix this situation.
			 *
			 * In VMS, another process cannot be concurrently resetting cr->dirty to 0 as the resetting routine
			 * is wcs_wtfini() which is executed in crit which another process cannot be in as we are in crit now.
			 */
			if (gv_cur_region->read_only)
				continue;
			if (lcnt < pass1)
			{
				if (!csa->timer && (csa->nl->wcs_timers < 1))
					wcs_timer_start(gv_cur_region, FALSE);
				continue;
			}
			BG_TRACE_PRO(db_csh_getn_flush_dirty);
			if (FALSE == wcs_get_space(gv_cur_region, 0, cr))
			{	/* failed to flush it out - force a rebuild */
				BG_TRACE_PRO(wc_blocked_db_csh_getn_wcsstarvewrt);
				assert(FALSE);
				break;
			}
			assert(0 == cr->dirty);
		}
		UNIX_ONLY(
			/* the cache-record is not free for reuse until the write-latch value becomes LATCH_CLEAR.
			 * In VMS, resetting the write-latch value occurs in wcs_wtfini() which is in CRIT, we are fine.
			 * In Unix, this resetting is done by wcs_wtstart() which is out-of-crit. Therefore, we need to
			 * 	wait for this value to be LATCH_CLEAR before reusing this cache-record.
			 * Note that we are examining the write-latch-value without holding the interlock. It is ok to do
			 * 	this because the only two routines that modify the latch value are bg_update() and
			 * 	wcs_wtstart(). The former cannot be concurrently executing because we are in crit.
			 * 	The latter will not update the latch value unless this cache-record is dirty. But in this
			 * 	case we would have most likely gone through the if (cr->dirty) check above. Most likely
			 * 	because there is one rare possibility where a concurrent wcs_wtstart() has set cr->dirty
			 * 	to 0 but not yet cleared the latch. In that case we wait for the latch to be cleared.
			 * 	In all other cases, nobody is modifying the latch since when we got crit and therefore
			 * 	it is safe to observe the value of the latch without holding the interlock.
			 */
			if (LATCH_CLEAR != WRITE_LATCH_VAL(cr))
			{	/* possible if a concurrent wcs_wtstart() has set cr->dirty to 0 but not yet
				 * cleared the latch. this should be very rare though.
				 */
				if (lcnt < pass2)
					continue; /* try to find some other cache-record to reuse until the 3rd pass */
				for (ocnt = 1; (MAXWRTLATCHWAIT >= ocnt) && (LATCH_CLEAR != WRITE_LATCH_VAL(cr)); ocnt++)
					wcs_sleep(SLEEP_WRTLATCHWAIT);	/* since it is a short lock, sleep the minimum */
				if (MAXWRTLATCHWAIT <= ocnt)
				{
					BG_TRACE_PRO(db_csh_getn_wrt_latch_stuck);
					assert(FALSE);
					continue;
				}
			}
		)
/* go after a specific number of buffers or a particular buffer */
bool	wcs_get_space(gd_region *reg, int needed, cache_rec *cr)
{
	unsigned int		lcnt, ocnt, status;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	node_local_ptr_t        cnl;
	que_ent_ptr_t		base, q0;
	int4			dummy_errno;
	boolean_t		is_mm;

	assert((0 != needed) || (NULL != cr));
	csa = &(FILE_INFO(reg)->s_addrs);
	assert(csa == cs_addrs);
	csd = csa->hdr;
	is_mm = (dba_mm == csd->acc_meth);
	assert(is_mm || (dba_bg == csd->acc_meth));
	cnl = csa->nl;
	if (FALSE == csa->now_crit)
	{
		assert(0 != needed);	/* if needed == 0, then we should be in crit */
		for (lcnt = DIVIDE_ROUND_UP(needed, csd->n_wrt_per_flu);  0 < lcnt;  lcnt--)
			JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
		return TRUE;
	}
	if (FALSE == wcs_wtfini(reg))
		return FALSE;
	/* while calculating flush_trigger, the decrement should be atleast 1 if still not reached the minimum allowed */
	csd->flush_trigger = MAX(csd->flush_trigger - MAX(csd->flush_trigger/STEP_FACTOR, 1), MIN_FLUSH_TRIGGER(csd->n_bts));
	if (0 == needed)
	{
		if (!is_mm)
		{	/* If another process is concurrently finishing up phase2 of commit, wait for that to complete first. */
			if (cr->in_tend && !wcs_phase2_commit_wait(csa, cr))
				return FALSE;	/* assumption is that caller will set wc_blocked and trigger cache recovery */
		}
		for (lcnt = 1; (MAXGETSPACEWAIT > lcnt) && (0 != cr->dirty); lcnt++)
		{	/* We want to flush a specific cache-record. We speed up the wait by moving the dirty cache-record
			 * to the head of the active queue. But to do this, we need exclusive access to the active queue.
			 * The only other processes outside of crit that can be touching this concurrently are wcs_wtstart
			 * (which can remove entries from the queue) and bg_update_phase2 (which can add entries to the queue).
			 * In the case of writers, we can wait for those to complete (by setting cnl->wc_blocked to TRUE)
			 * and then play with the queue. But in the case of bg_update_phase2, it is not easily possible to
			 * do a similar wait so in this case we choose to do plain wcs_wtstart (which uses interlocked
			 * queue operations and hence can work well with concurrent bg_update_phase2) and wait until the
			 * cache record of interest becomes non-dirty. The consequence is we might wait a little longer than
			 * necessary but that is considered acceptable for now.
			 */
			/* Check if cache recovery is needed (could be set by another process in
			 * secshr_db_clnup finishing off a phase2 commit). If so, no point invoking
			 * wcs_wtstart as it will return right away. Instead return FALSE so
			 * cache-recovery can be triggered by the caller.
			 */
			if (cnl->wc_blocked)
			{
				assert(gtm_white_box_test_case_enabled);
				return FALSE;
			}
			if (!is_mm && cnl->wcs_phase2_commit_pidcnt)
			{
				JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
				wcs_sleep(lcnt);
			} else if (LATCH_CLEAR == WRITE_LATCH_VAL(cr))
			{
				SIGNAL_WRITERS_TO_STOP(cnl);	/* to stop all active writers */
				WAIT_FOR_WRITERS_TO_STOP(cnl, ocnt, MAXGETSPACEWAIT);
				if (MAXGETSPACEWAIT <= ocnt)
				{
					assert(FALSE);
					return FALSE;
				}
				if (LATCH_CLEAR == WRITE_LATCH_VAL(cr))
				{	/* Check if cache-record is part of the active queue. If so, then remove it from the
					 * tail of the active queue and move it to the head to try and speed up the flush.
					 * If not and if cr->dirty is non-zero, then the only way this is possible we know
					 * of is if a concurrent process encountered an error in the midst of commit in phase2
					 * of bg_update and finished the update but did not reinsert the cache-record in the
					 * active queue (see comment in secshr_db_clnup about why INSQ*I macros are not used
					 * in VMS). In this case, return FALSE as wcs_get_space cannot flush this cache-record.
					 * The caller will trigger appropriate error handling. We are guaranteed that cr cannot
					 * be part of the wip queue because WRITE_LATCH_VAL(cr) is LATCH_CLEAR (in wip queue it
					 * will be > LATCH_CLEAR).
					 */
					if (0 != cr->state_que.fl)
					{	/* We are about to play with the queues without using interlocks.
						 * Assert no one else could be concurrently playing with the queue.
						 */
						assert(!cnl->wcs_phase2_commit_pidcnt && !cnl->in_wtstart);
						base = &csa->acc_meth.bg.cache_state->cacheq_active;
						q0 = (que_ent_ptr_t)((sm_uc_ptr_t)&cr->state_que + cr->state_que.fl);
						shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)base);
					} else if (cr->dirty)
					{
						assert(gtm_white_box_test_case_enabled);
						return FALSE;
					}
				}
				SIGNAL_WRITERS_TO_RESUME(cnl);
				JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
				wcs_sleep(lcnt);
			} else if ((0 == cr->iosb.cond) || (WRT_STRT_PNDNG == cr->iosb.cond))
			{
				JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno);
					/* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */
				wcs_sleep(lcnt);
			}
			if (FALSE == wcs_wtfini(reg))
				return FALSE;
		}
		if (0 == cr->dirty)
			return TRUE;
		assert(FALSE);
		return FALSE;
	}
	for (lcnt = 1; ((cnl->wc_in_free < needed) && (MAXGETSPACEWAIT > lcnt)); lcnt++)
	{
		DCLAST_WCS_WTSTART(reg, 0, dummy_errno); /* a macro that dclast's wcs_wtstart and checks for errors etc. */
		wcs_sleep(lcnt);
		if (FALSE == wcs_wtfini(reg))
			return FALSE;
	}
	if (cnl->wc_in_free < needed)
	{
		assert(FALSE);
		return FALSE;
	}
	return TRUE;
}