示例#1
0
	unix_db_info		*udi;
	int4			size, save_errno;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	sm_uc_ptr_t		buff;
	DEBUG_ONLY(
		blk_hdr_ptr_t	blk_hdr;
	)

	udi = (unix_db_info *)(reg->dyn.addr->file_cntl->file_info);
	csa = &udi->s_addrs;
	csd = csa->hdr;
	assert(NULL != csd);
	assert(cr);
	assert(cr->buffaddr);
	buff = GDS_ANY_REL2ABS(csa, cr->buffaddr);
	DEBUG_ONLY(
		/* Check GDS block that is about to be written. Dont do this for DSE as it may intentionally create bad blocks */
		if (!dse_running)
		{
			blk_hdr = (blk_hdr_ptr_t)buff;
			assert((unsigned)GDSVLAST > (unsigned)blk_hdr->bver);
			assert((LCL_MAP_LEVL == blk_hdr->levl) || ((unsigned)MAX_BT_DEPTH > (unsigned)blk_hdr->levl));
			assert((unsigned)csd->blk_size >= (unsigned)blk_hdr->bsiz);
			assert(csd->trans_hist.curr_tn >= blk_hdr->tn);
		}
	)
	assert(((blk_hdr_ptr_t)buff)->bver);	/* GDSV4 (0) version uses this field as a block length so should always be > 0 */
	assert(0 == fast_lock_count); /* ensure the static reformat buffer is not being used currently */
	++fast_lock_count; 	/* Prevents interrupt from using reformat buffer while we have it */
	/* reformat_buffer_in_use should always be incremented only AFTER incrementing fast_lock_count
示例#2
0
void dse_chng_bhead(void)
{
	block_id		blk;
	int4			x;
	trans_num		tn;
	cache_rec_ptr_t		cr;
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	boolean_t		ismap;
	boolean_t		chng_blk;
	boolean_t		was_crit;
	boolean_t		was_hold_onto_crit;
	uint4			mapsize;
	srch_blk_status		blkhist;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
#	ifdef GTM_CRYPT
	int			req_enc_blk_size;
	int			crypt_status;
	blk_hdr_ptr_t		bp, save_bp, save_old_block;
#	endif

	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_DSEFAIL);
	error_def(ERR_DBRDONLY);

        if (gv_cur_region->read_only)
                rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	chng_blk = FALSE;
	csa = cs_addrs;
	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		if (!cli_get_hex("BLOCK", (uint4 *)&blk))
			return;
		if (blk < 0 || blk > csa->ti->total_blks)
		{
			util_out_print("Error: invalid block number.", TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	csd = csa->hdr;
	assert(csd == cs_data);
	blk_size = csd->blk_size;
	ismap = (patch_curr_blk / csd->bplmap * csd->bplmap == patch_curr_blk);
	mapsize = BM_SIZE(csd->bplmap);

	t_begin_crit(ERR_DSEFAIL);
	blkhist.blk_num = patch_curr_blk;
	if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr;

	if (cli_present("LEVEL") == CLI_PRESENT)
	{
		if (!cli_get_hex("LEVEL", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && (unsigned char)x != LCL_MAP_LEVL)
		{
			util_out_print("Error: invalid level for a bit map block.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1))
		{
			util_out_print("Error: invalid level.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
	 	new_hdr.levl = (unsigned char)x;

		chng_blk = TRUE;
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
	}
	if (cli_present("BSIZ") == CLI_PRESENT)
	{
		if (!cli_get_hex("BSIZ", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && x != mapsize)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		} else if (x < SIZEOF(blk_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		chng_blk = TRUE;
		new_hdr.bsiz = x;
	}
	if (!chng_blk)
		t_abort(gv_cur_region, csa);
	else
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			util_out_print("Error: bad block build.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, csa->ti->curr_tn);
		t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED);
	}
	if (cli_present("TN") == CLI_PRESENT)
	{
		if (!cli_get_hex64("TN", &tn))
			return;
		was_crit = csa->now_crit;
		t_begin_crit(ERR_DSEFAIL);
		CHECK_TN(csa, csd, csd->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
		assert(csa->ti->early_tn == csa->ti->curr_tn);
		if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		{
			util_out_print("Error: Unable to read buffer.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		BLK_FINI(bs_ptr, bs1);
		t_write(&blkhist, (unsigned char *)bs1, 0, 0,
			((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		/* Pass the desired tn as argument to bg_update/mm_update below */
		BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, tn);
		was_hold_onto_crit = csa->hold_onto_crit;
		csa->hold_onto_crit = TRUE;
		t_end(&dummy_hist, NULL, tn);
#		ifdef GTM_CRYPT
		if (csd->is_encrypted && (tn < csa->ti->curr_tn))
		{	/* BG and db encryption is enabled and the DSE update caused the block-header to potentially have a tn
			 * that is LESS than what it had before. At this point, the global buffer (corresponding to blkhist.blk_num)
			 * reflects the contents of the block AFTER the dse update (bg_update would have touched this) whereas
			 * the corresponding encryption global buffer reflects the contents of the block BEFORE the update.
			 * Normally wcs_wtstart takes care of propagating the tn update from the regular global buffer to the
			 * corresponding encryption buffer. But if before it gets a chance, let us say a process goes to t_end
			 * as part of a subsequent transaction and updates this same block. Since the  blk-hdr-tn potentially
			 * decreased, it is possible that the PBLK writing check (comparing blk-hdr-tn with the epoch_tn) decides
			 * to write a PBLK for this block (even though a PBLK was already written for this block as part of a
			 * previous DSE CHANGE -BL -TN in the same epoch). In this case, since the db is encrypted, the logic
			 * will assume there were no updates to this block since the last time wcs_wtstart updated the encryption
			 * buffer and therefore use that to write the pblk, which is incorrect since it does not yet contain the
			 * tn update. The consequence of this is would be writing an older before-image PBLK) record to the
			 * journal file. To prevent this situation, we update the encryption buffer here (before releasing crit)
			 * using logic like that in wcs_wtstart to ensure it is in sync with the regular global buffer.
			 * Note:
			 * Although we use cw_set[0] to access the global buffer corresponding to the block number being updated,
			 * cw_set_depth at this point is 0 because t_end resets it. This is considered safe since cw_set is a
			 * static array (as opposed to malloc'ed memory) and hence is always available and valid until it gets
			 * overwritten by subsequent updates.
			 */
			bp = (blk_hdr_ptr_t)GDS_ANY_REL2ABS(csa, cw_set[0].cr->buffaddr);
			DBG_ENSURE_PTR_IS_VALID_GLOBUFF(csa, csd, (sm_uc_ptr_t)bp);
			save_bp = (blk_hdr_ptr_t)GDS_ANY_ENCRYPTGLOBUF(bp, csa);
			DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(csa, csd, (sm_uc_ptr_t)save_bp);
			assert((bp->bsiz <= csd->blk_size) && (bp->bsiz >= SIZEOF(*bp)));
			req_enc_blk_size = MIN(csd->blk_size, bp->bsiz) - SIZEOF(*bp);
			if (BLK_NEEDS_ENCRYPTION(bp->levl, req_enc_blk_size))
			{
				ASSERT_ENCRYPTION_INITIALIZED;
				memcpy(save_bp, bp, SIZEOF(blk_hdr));
				GTMCRYPT_ENCODE_FAST(csa->encr_key_handle, (char *)(bp + 1), req_enc_blk_size,
					(char *)(save_bp + 1), crypt_status);
				if (0 != crypt_status)
					GC_GTM_PUTMSG(crypt_status, gv_cur_region->dyn.addr->fname);
			} else
				memcpy(save_bp, bp, bp->bsiz);
		}
#		endif
		if (!was_hold_onto_crit)
			csa->hold_onto_crit = FALSE;
		if (!was_crit)
			rel_crit(gv_cur_region);
		if (unhandled_stale_timer_pop)
			process_deferred_stale();
	}
	return;
}
示例#3
0
bt_rec_ptr_t bt_put(gd_region *reg, int4 block)
{
	bt_rec_ptr_t		bt, q0, q1, hdr;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	cache_rec_ptr_t		cr;
	th_rec_ptr_t		th;
	trans_num		lcl_tn;
	uint4			lcnt;

	csa = (sgmnt_addrs *)&FILE_INFO(reg)->s_addrs;
	csd = csa->hdr;
	assert(csa->now_crit || csd->clustered);
	assert(dba_mm != csa->hdr->acc_meth);
	lcl_tn = csa->ti->curr_tn;
	hdr = csa->bt_header + (block % csd->bt_buckets);
	assert(BT_QUEHEAD == hdr->blk);
	for (lcnt = 0, bt = (bt_rec_ptr_t)((sm_uc_ptr_t)hdr + hdr->blkque.fl);  ;
		bt = (bt_rec_ptr_t)((sm_uc_ptr_t)bt + bt->blkque.fl), lcnt++)
	{
		if (BT_QUEHEAD == bt->blk)
		{	/* there is no matching bt */
			assert(bt == hdr);
			bt = (bt_rec_ptr_t)((sm_uc_ptr_t)(csa->th_base) + csa->th_base->tnque.fl - SIZEOF(th->tnque));
			if (CR_NOTVALID != bt->cache_index)
			{	/* the oldest bt is still valid */
				assert(!in_wcs_recover);
				cr = (cache_rec_ptr_t)GDS_ANY_REL2ABS(csa, bt->cache_index);
				if (cr->dirty)
				{	/* get it written so it can be reused */
					BG_TRACE_PRO_ANY(csa, bt_put_flush_dirty);
					if (FALSE == wcs_get_space(reg, 0, cr))
					{
						assert(csa->nl->wc_blocked);	/* only reason we currently know
										 * why wcs_get_space could fail */
						assert(gtm_white_box_test_case_enabled);
						BG_TRACE_PRO_ANY(csa, wcb_bt_put);
						send_msg(VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_bt_put"),
							process_id, &lcl_tn, DB_LEN_STR(reg));
						return NULL;
					}
				}
				bt->cache_index = CR_NOTVALID;
				cr->bt_index = 0;
			}
			q0 = (bt_rec_ptr_t)((sm_uc_ptr_t)bt + bt->blkque.fl);
			q1 = (bt_rec_ptr_t)remqt((que_ent_ptr_t)q0);
			if (EMPTY_QUEUE == (sm_long_t)q1)
				rts_error(VARLSTCNT(3) ERR_BTFAIL, 1, 1);
			bt->blk = block;
			bt->killtn = lcl_tn;
			insqt((que_ent_ptr_t)bt, (que_ent_ptr_t)hdr);
			th = (th_rec_ptr_t)remqh((que_ent_ptr_t)csa->th_base);
			if (EMPTY_QUEUE == (sm_long_t)th)
				GTMASSERT;
			break;
		}
		if (bt->blk == block)
		{	/* bt_put should never be called twice for the same block with the same lcl_tn. This is because
			 * t_end/tp_tend update every block only once as part of each update transaction. Assert this.
			 * The two exceptions are
			 *   a) Forward journal recovery which simulates a 2-phase M-kill where the same block
			 *	could get updated in both phases (example bitmap block gets updated for blocks created
			 *	within the TP transaction as well as for blocks that are freed up in the 2nd phase of
			 *	the M-kill) with the same transaction number. This is because although GT.M would have
			 *	updated the same block with different transaction numbers in the two phases, forward
			 *	recovery will update it with the same tn and instead increment the db tn on seeing the
			 *	following INCTN journal record(s).
			 *   b) Cache recovery (wcs_recover). It could call bt_put more than once for the same block
			 *	and potentially with the same tn. This is because the state of the queues is questionable
			 *	and there could be more than one cache record for a given block number.
			 */
			assert(in_wcs_recover || (bt->tn < lcl_tn) || (jgbl.forw_phase_recovery && !JNL_ENABLED(csa)));
			q0 = (bt_rec_ptr_t)((sm_uc_ptr_t)bt + bt->tnque.fl);
			th = (th_rec_ptr_t)remqt((que_ent_ptr_t)((sm_uc_ptr_t)q0 + SIZEOF(th->tnque)));
			if (EMPTY_QUEUE == (sm_long_t)th)
				GTMASSERT;
			break;
		}
		if (0 == bt->blkque.fl)
			rts_error(VARLSTCNT(3) ERR_BTFAIL, 1, 2);
		if (lcnt >= csd->n_bts)
			rts_error(VARLSTCNT(3) ERR_BTFAIL, 1, 3);
	}
	insqt((que_ent_ptr_t)th, (que_ent_ptr_t)csa->th_base);
	bt->tn = lcl_tn;
	return bt;
}
示例#4
0
sm_uc_ptr_t t_qread(block_id blk, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr_out)
/* cycle is used in t_end to detect if the buffer has been refreshed since the t_qread */
{
    uint4			status, duint4, blocking_pid;
    cache_rec_ptr_t		cr;
    bt_rec_ptr_t		bt;
    bool			clustered, was_crit;
    int			dummy, lcnt, ocnt;
    cw_set_element		*cse;
    off_chain		chain1;
    register sgmnt_addrs	*csa;
    register sgmnt_data_ptr_t	csd;
    int4			dummy_errno;
    boolean_t		already_built, is_mm, reset_first_tp_srch_status, set_wc_blocked;

    error_def(ERR_DBFILERR);
    error_def(ERR_BUFOWNERSTUCK);

    first_tp_srch_status = NULL;
    reset_first_tp_srch_status = FALSE;
    csa = cs_addrs;
    csd = csa->hdr;
    INCR_DB_CSH_COUNTER(csa, n_t_qreads, 1);
    is_mm = (dba_mm == csd->acc_meth);
    assert((t_tries < CDB_STAGNATE) || csa->now_crit);
    if (0 < dollar_tlevel)
    {
        assert(sgm_info_ptr);
        if (0 != sgm_info_ptr->cw_set_depth)
        {
            chain1 = *(off_chain *)&blk;
            if (1 == chain1.flag)
            {
                assert(sgm_info_ptr->cw_set_depth);
                if ((int)chain1.cw_index < sgm_info_ptr->cw_set_depth)
                    tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain1.cw_index, &cse);
                else
                {
                    assert(FALSE == csa->now_crit);
                    rdfail_detail = cdb_sc_blknumerr;
                    return (sm_uc_ptr_t)NULL;
                }
            } else
            {
                first_tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use,
                                       (void *)blk, &duint4);
                ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr);
                cse = first_tp_srch_status ? first_tp_srch_status->ptr : NULL;
            }
            assert(!cse || !cse->high_tlevel);
            if (cse)
            {   /* transaction has modified the sought after block  */
                assert(gds_t_writemap != cse->mode);
                if (FALSE == cse->done)
                {   /* out of date, so make it current */
                    already_built = (NULL != cse->new_buff);
                    gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, 0);
                    assert(cse->blk_target);
                    if (!already_built && !chain1.flag)
                    {
                        assert(first_tp_srch_status && (is_mm || first_tp_srch_status->cr)
                               && first_tp_srch_status->buffaddr);
                        if (first_tp_srch_status->tn <=
                                ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn)
                        {
                            assert(CDB_STAGNATE > t_tries);
                            rdfail_detail = cdb_sc_blkmod;	/* should this be something else */
                            TP_TRACE_HIST_MOD(blk, gv_target, tp_blkmod_t_qread, cs_data,
                                              first_tp_srch_status->tn,
                                              ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn,
                                              ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->levl);
                            return (sm_uc_ptr_t)NULL;
                        }
                        if ((!is_mm) && (first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle
                                         || first_tp_srch_status->blk_num != first_tp_srch_status->cr->blk))
                        {
                            assert(CDB_STAGNATE > t_tries);
                            rdfail_detail = cdb_sc_lostcr;	/* should this be something else */
                            return (sm_uc_ptr_t)NULL;
                        }
                        if (certify_all_blocks &&
                                FALSE == cert_blk(gv_cur_region, blk, (blk_hdr_ptr_t)cse->new_buff,
                                                  cse->blk_target->root))
                            GTMASSERT;
                    }
                    cse->done = TRUE;
                }
                *cycle = CYCLE_PVT_COPY;
                *cr_out = 0;
                return (sm_uc_ptr_t)cse->new_buff;
            }
            assert(!chain1.flag);
        } else
            first_tp_srch_status =
                (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &duint4);
        ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr);
        if (!is_mm && first_tp_srch_status)
        {
            assert(first_tp_srch_status->cr && !first_tp_srch_status->ptr);
            if (first_tp_srch_status->cycle == first_tp_srch_status->cr->cycle)
            {
                *cycle = first_tp_srch_status->cycle;
                *cr_out = first_tp_srch_status->cr;
                first_tp_srch_status->cr->refer = TRUE;
                if (CDB_STAGNATE <= t_tries)	/* mu_reorg doesn't use TP else should have an || for that */
                    CWS_INSERT(blk);
                return (sm_uc_ptr_t)first_tp_srch_status->buffaddr;
            } else
            {   /* Block was already part of the read-set of this transaction, but got recycled. Allow for
                 * recycling. But update the first_tp_srch_status (for this blk) in the si->first_tp_hist
                 * array to reflect the new buffer, cycle and cache-record. Since we know those only at the end of
                 * t_qread, set a variable here that will enable the updation before returning from t_qread().
                 */
                reset_first_tp_srch_status = TRUE;
            }
        }
    }
    if ((blk >= csa->ti->total_blks) || (blk < 0))
    {   /* requested block out of range; could occur because of a concurrency conflict */
        if ((&FILE_INFO(gv_cur_region)->s_addrs != csa) || (csd != cs_data))
            GTMASSERT;
        assert(FALSE == csa->now_crit);
        rdfail_detail = cdb_sc_blknumerr;
        return (sm_uc_ptr_t)NULL;
    }
    if (is_mm)
    {
        *cycle = CYCLE_SHRD_COPY;
        *cr_out = 0;
        return (sm_uc_ptr_t)(mm_read(blk));
    }
    assert(dba_bg == csd->acc_meth);
    assert(!first_tp_srch_status || !first_tp_srch_status->cr
           || first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle);
    if (FALSE == (clustered = csd->clustered))
        bt = NULL;
    was_crit = csa->now_crit;
    ocnt = 0;
    set_wc_blocked = FALSE;	/* to indicate whether csd->wc_blocked was set to TRUE by us */
    do
    {
        if (NULL == (cr = db_csh_get(blk)))
        {   /* not in memory */
            if (clustered && (NULL != (bt = bt_get(blk))) && (FALSE == bt->flushing))
                bt = NULL;
            if (FALSE == csa->now_crit)
            {
                if (NULL != bt)
                {   /* at this point, bt is not NULL only if clustered and flushing - wait no crit */
                    assert(clustered);
                    wait_for_block_flush(bt, blk);	/* try for no other node currently writing the block */
                }
                if (csd->flush_trigger <= csa->nl->wcs_active_lvl  &&  FALSE == gv_cur_region->read_only)
                    JNL_ENSURE_OPEN_WCS_WTSTART(csa, gv_cur_region, 0, dummy_errno);
                /* a macro that dclast's wcs_wtstart() and checks for errors etc. */
                grab_crit(gv_cur_region);
                cr = db_csh_get(blk);			/* in case blk arrived before crit */
            }
            if (clustered && (NULL != (bt = bt_get(blk))) && (TRUE == bt->flushing))
            {   /* Once crit, need to assure that if clustered, that flushing is [still] complete
                 * If it isn't, we missed an entire WM cycle and have to wait for another node to finish */
                wait_for_block_flush(bt, blk);	/* ensure no other node currently writing the block */
            }
            if (NULL == cr)
            {   /* really not in memory - must get a new buffer */
                assert(csa->now_crit);
                cr = db_csh_getn(blk);
                if (CR_NOTVALID == (sm_long_t)cr)
                {
                    SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE);
                    BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_getn_invalid_blk);
                    set_wc_blocked = TRUE;
                    break;
                }
                assert(0 <= cr->read_in_progress);
                *cycle = cr->cycle;
                cr->tn = csa->ti->curr_tn;
                if (FALSE == was_crit)
                    rel_crit(gv_cur_region);
                /* read outside of crit may be of a stale block but should be detected by t_end or tp_tend */
                assert(0 == cr->dirty);
                assert(cr->read_in_progress >= 0);
                INCR_DB_CSH_COUNTER(csa, n_dsk_reads, 1);
                if (SS_NORMAL != (status = dsk_read(blk, GDS_REL2ABS(cr->buffaddr))))
                {
                    RELEASE_BUFF_READ_LOCK(cr);
                    assert(was_crit == csa->now_crit);
                    if (FUTURE_READ == status)
                    {   /* in cluster, block can be in the "future" with respect to the local history */
                        assert(TRUE == clustered);
                        assert(FALSE == csa->now_crit);
                        rdfail_detail = cdb_sc_future_read;	/* t_retry forces the history up to date */
                        return (sm_uc_ptr_t)NULL;
                    }
                    rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status);
                }
                assert(0 <= cr->read_in_progress);
                assert(0 == cr->dirty);
                cr->r_epid = 0;
                RELEASE_BUFF_READ_LOCK(cr);
                assert(-1 <= cr->read_in_progress);
                *cr_out = cr;
                assert(was_crit == csa->now_crit);
                if (reset_first_tp_srch_status)
                {   /* keep the parantheses for the if (although single line) since the following is a macro */
                    RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle);
                }
                return (sm_uc_ptr_t)GDS_REL2ABS(cr->buffaddr);
            } else  if ((FALSE == was_crit) && (BAD_LUCK_ABOUNDS > ocnt))
            {
                assert(TRUE == csa->now_crit);
                assert(csa->nl->in_crit == process_id);
                rel_crit(gv_cur_region);
            }
        }
        if (CR_NOTVALID == (sm_long_t)cr)
        {
            SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE);
            BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_get_invalid_blk);
            set_wc_blocked = TRUE;
            break;
        }
        for (lcnt = 1;  ; lcnt++)
        {
            if (0 > cr->read_in_progress)
            {   /* it's not being read */
                if (clustered && (0 == cr->bt_index) &&
                        (cr->tn < ((th_rec *)((uchar_ptr_t)csa->th_base + csa->th_base->tnque.fl))->tn))
                {   /* can't rely on the buffer */
                    cr->cycle++;	/* increment cycle whenever blk number changes (tp_hist depends on this) */
                    cr->blk = CR_BLKEMPTY;
                    break;
                }
                *cycle = cr->cycle;
                *cr_out = cr;
                VMS_ONLY(
                    /* If we were doing the db_csh_get() above (in t_qread itself) and located the cache-record
                     * which, before coming here and taking a copy of cr->cycle a few lines above, was made an
                     * older twin by another process in bg_update (note this can happen in VMS only) which has
                     * already incremented the cycle, we will end up having a copy of the old cache-record with
                     * its incremented cycle number and hence will succeed in tp_hist validation if we return
                     * this <cr,cycle> combination although we don't want to since this "cr" is not current for
                     * the given block as of now. Note that the "indexmod" optimization in tp_tend() relies on
                     * an accurate intermediate validation by tp_hist() which in turn relies on the <cr,cycle>
                     * value returned by t_qread() to be accurate for a given blk at the current point in time.
                     * We detect the older-twin case by the following check. Note that here we depend on the
                     * the fact that bg_update() sets cr->bt_index to 0 before incrementing cr->cycle.
                     * Given that order, cr->bt_index can be guaranteed to be 0 if we read the incremented cycle
                     */
                    if (cr->twin && (0 == cr->bt_index))
                    break;
                )
                    if (cr->blk != blk)
                        break;
                if (was_crit != csa->now_crit)
                    rel_crit(gv_cur_region);
                assert(was_crit == csa->now_crit);
                if (reset_first_tp_srch_status)
                {   /* keep the parantheses for the if (although single line) since the following is a macro */
                    RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle);
                }
                /* Note that at this point we expect t_qread() to return a <cr,cycle> combination that
                 * corresponds to "blk" passed in. It is crucial to get an accurate value for both the fields
                 * since tp_hist() relies on this for its intermediate validation.
                 */
                return (sm_uc_ptr_t)GDS_ANY_REL2ABS(csa, cr->buffaddr);
            }
            if (blk != cr->blk)
                break;
            if (lcnt >= BUF_OWNER_STUCK && (0 == (lcnt % BUF_OWNER_STUCK)))
            {
                if (FALSE == csa->now_crit)
                    grab_crit(gv_cur_region);
                if (cr->read_in_progress < -1)
                {   /* outside of design; clear to known state */
                    BG_TRACE_PRO(t_qread_out_of_design);
                    INTERLOCK_INIT(cr);
                    assert(0 == cr->r_epid);
                    cr->r_epid = 0;
                } else  if (cr->read_in_progress >= 0)
                {
                    BG_TRACE_PRO(t_qread_buf_owner_stuck);
                    if (0 != (blocking_pid = cr->r_epid))
                    {
                        if (FALSE == is_proc_alive(blocking_pid, cr->image_count))
                        {   /* process gone: release that process's lock */
                            assert(0 == cr->bt_index);
                            if (cr->bt_index)
                            {
                                SET_TRACEABLE_VAR(csd->wc_blocked, TRUE);
                                BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index1);
                                set_wc_blocked = TRUE;
                                break;
                            }
                            cr->cycle++;	/* increment cycle for blk number changes (for tp_hist) */
                            cr->blk = CR_BLKEMPTY;
                            RELEASE_BUFF_READ_LOCK(cr);
                        } else
                        {
                            rel_crit(gv_cur_region);
                            send_msg(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region));
                            send_msg(VARLSTCNT(9) ERR_BUFOWNERSTUCK, 7, process_id, blocking_pid,
                                     cr->blk, cr->blk, (lcnt / BUF_OWNER_STUCK),
                                     cr->read_in_progress, cr->rip_latch.latch_pid);
                            if ((4 * BUF_OWNER_STUCK) <= lcnt)
                                GTMASSERT;
                            /* Kickstart the process taking a long time in case it was suspended */
                            UNIX_ONLY(continue_proc(blocking_pid));
                        }
                    } else
                    {   /* process stopped before could set r_epid */
                        assert(0 == cr->bt_index);
                        if (cr->bt_index)
                        {
                            SET_TRACEABLE_VAR(csd->wc_blocked, TRUE);
                            BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index2);
                            set_wc_blocked = TRUE;
                            break;
                        }
                        cr->cycle++;	/* increment cycle for blk number changes (for tp_hist) */
                        cr->blk = CR_BLKEMPTY;
                        RELEASE_BUFF_READ_LOCK(cr);
                        if (cr->read_in_progress < -1)	/* race: process released since if r_epid */
                            LOCK_BUFF_FOR_READ(cr, dummy);
                    }
                }
                if (was_crit != csa->now_crit)
                    rel_crit(gv_cur_region);
            } else
                wcs_sleep(lcnt);
        }
        if (set_wc_blocked)	/* cannot use csd->wc_blocked here as we might not necessarily have crit */
            break;
        ocnt++;
        if (BAD_LUCK_ABOUNDS <= ocnt)
        {
            if (BAD_LUCK_ABOUNDS < ocnt || csa->now_crit)
            {
                rel_crit(gv_cur_region);
                GTMASSERT;
            }
            if (FALSE == csa->now_crit)
                grab_crit(gv_cur_region);
        }
    } while (TRUE);