Exemple #1
0
int mu_extr_getblk(unsigned char *ptr)
{
	error_def(ERR_GVGETFAIL);
	enum cdb_sc	status;
	rec_hdr_ptr_t	rp;
	bool		two_histories, end_of_tree;
	blk_hdr_ptr_t	bp;
	srch_blk_status	*bh;
	srch_hist	*rt_history;

	t_begin(ERR_GVGETFAIL, FALSE);
	for (;;)
	{
		if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL)))
		{
			t_retry(status);
			continue;
		}
		end_of_tree = two_histories
			    = FALSE;
		bh = gv_target->hist.h;
		rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
		bp = (blk_hdr_ptr_t)bh->buffaddr;
		if (rp >= (rec_hdr_ptr_t)CST_TOB(bp))
		{
			rt_history = gv_target->alt_hist;
			if (cdb_sc_normal == (status = gvcst_rtsib(rt_history, 0)))
			{
				two_histories = TRUE;
				if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h)))
				{
					t_retry(status);
					continue;
				}
				bp = (blk_hdr_ptr_t)rt_history->h[0].buffaddr;
			} else if (cdb_sc_endtree == status)
					end_of_tree = TRUE;
			else
			{
				t_retry(status);
				continue;
			}
		}
		memcpy(ptr, bp, bp->bsiz);
		if (t_end(&gv_target->hist, two_histories ? rt_history : NULL) != 0)
		{
			if (two_histories)
				memcpy(gv_target->hist.h, rt_history->h, sizeof(srch_blk_status) * (rt_history->depth + 1));
			return !end_of_tree;
		}
	}
}
Exemple #2
0
void gvcst_root_search(void)
{
	srch_blk_status	*h0;
	uchar_ptr_t	c, c1;
	sm_uc_ptr_t	rp;
	unsigned short	rlen, hdr_len;
	uchar_ptr_t	subrec_ptr;
	enum cdb_sc	status;
	boolean_t	gbl_target_was_set;
	gv_namehead	*save_targ;
	mname_entry	*gvent;
	int		altkeylen;

	assert((dba_bg == gv_cur_region->dyn.addr->acc_meth) || (dba_mm == gv_cur_region->dyn.addr->acc_meth));
	assert(gv_altkey->top == gv_currkey->top);
	assert(gv_altkey->top == gv_keysize);
	assert(gv_currkey->end < gv_currkey->top);
	for (c = gv_altkey->base, c1 = gv_currkey->base;  *c1;)
		*c++ = *c1++;
	*c++ = 0;
	*c = 0;
	gv_altkey->end = c - gv_altkey->base;
	assert(gv_altkey->end < gv_altkey->top);
	assert(gv_target != cs_addrs->dir_tree);
	save_targ = gv_target;
	/* Check if "gv_target->gvname" matches "gv_altkey->base". If not, there is a name mismatch (out-of-design situation).
	 * This check is temporary until we catch the situation that caused D9H02-002641 */
	/* --- Check BEGIN --- */
	gvent = &save_targ->gvname;
	altkeylen = gv_altkey->end - 1;
	if (!altkeylen || (altkeylen != gvent->var_name.len) || memcmp(gv_altkey->base, gvent->var_name.addr, gvent->var_name.len))
		GTMASSERT;
	/* --- Check END   --- */
	if (INVALID_GV_TARGET != reset_gv_target)
		gbl_target_was_set = TRUE;
	else
	{
		gbl_target_was_set = FALSE;
		reset_gv_target = save_targ;
	}
	gv_target = cs_addrs->dir_tree;
	if (is_standalone)  /* *&&  (0 != gv_target->clue.end)  &&  (FALSE == is_valid_hist(&gv_target->hist))) */
		gv_target->clue.end = 0;
	T_BEGIN_READ_NONTP_OR_TP(ERR_GVGETFAIL);
	assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		hdr_len = rlen = 0;
		gv_target = cs_addrs->dir_tree;
		if (dollar_trestart)
			gv_target->clue.end = 0;
		if (cdb_sc_normal == (status = gvcst_search(gv_altkey, 0)))
		{
			if (gv_altkey->end + 1 == gv_target->hist.h[0].curr_rec.match)
			{
				h0 = gv_target->hist.h;
				rp = (h0->buffaddr + h0->curr_rec.offset);
				hdr_len = sizeof(rec_hdr) + gv_altkey->end + 1 - ((rec_hdr_ptr_t)rp)->cmpc;
				GET_USHORT(rlen, rp);
				if (FALSE == (CHKRECLEN(rp, h0->buffaddr, rlen)) || (rlen < hdr_len + sizeof(block_id)))
				{
					gv_target->clue.end = 0;
					RESET_GV_TARGET_LCL(save_targ);
					t_retry(cdb_sc_rmisalign);
					continue;
				}
				GET_LONG(save_targ->root, (rp + hdr_len));
				if (rlen > hdr_len + sizeof(block_id))
				{
					assert(NULL != global_collation_mstr.addr || 0 == global_collation_mstr.len);
					if (global_collation_mstr.len < rlen - (hdr_len + sizeof(block_id)))
					{
						if (NULL != global_collation_mstr.addr)
							free(global_collation_mstr.addr);
						global_collation_mstr.len = rlen - (hdr_len + SIZEOF(block_id));
						global_collation_mstr.addr = (char *)malloc(global_collation_mstr.len);
					}
					/* the memcpy needs to be done here instead of out of for loop for
					 * concurrency consideration. We don't use s2pool because the pointer rp is 64 bits
					 */
					memcpy(global_collation_mstr.addr, rp + hdr_len + sizeof(block_id),
							rlen - (hdr_len + sizeof(block_id)));
				}
				if (0 != dollar_tlevel)
				{
					status = tp_hist(NULL);
					if (cdb_sc_normal != status)
					{
						gv_target->clue.end = 0;
						RESET_GV_TARGET_LCL(save_targ);
						gv_target->root = 0;
						t_retry(status);
						continue;
					}
					break;
				}
			}
			if (0 == dollar_tlevel)
			{
				if ((trans_num)0 != t_end(&gv_target->hist, 0))
					break;
			} else
			{
				status = tp_hist(NULL);
				if (cdb_sc_normal == status)
					break;
				gv_target->clue.end = 0;
				RESET_GV_TARGET_LCL(save_targ);
				gv_target->root = 0;
				t_retry(status);
				continue;
			}
			save_targ->root = 0;
		} else
		{
			gv_target->clue.end = 0;
			RESET_GV_TARGET_LCL(save_targ);
			t_retry(status);
			continue;
		}
	}
	RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ);
	if (rlen > hdr_len + sizeof(block_id))
	{
		assert(NULL != global_collation_mstr.addr);
		subrec_ptr = get_spec((uchar_ptr_t)global_collation_mstr.addr,
				      (int)(rlen - (hdr_len + sizeof(block_id))), COLL_SPEC);
		if (subrec_ptr)
		{
			gv_target->nct = *(subrec_ptr + COLL_NCT_OFFSET);
			gv_target->act = *(subrec_ptr + COLL_ACT_OFFSET);
			gv_target->ver = *(subrec_ptr + COLL_VER_OFFSET);
		} else
		{
			gv_target->nct = 0;
			gv_target->act = 0;
			gv_target->ver = 0;
		}
	} else
	{
		gv_target->nct = 0;
		gv_target->act = cs_addrs->hdr->def_coll;
		gv_target->ver = cs_addrs->hdr->def_coll_ver;
	}
	if (gv_target->act)
		act_in_gvt();
	assert(gv_target->act || NULL == gv_target->collseq);
	return;
}
Exemple #3
0
void	mu_swap_root(glist *gl_ptr, int *root_swap_statistic_ptr)
{
	sgmnt_data_ptr_t	csd;
	sgmnt_addrs		*csa;
	node_local_ptr_t	cnl;
	srch_hist		*dir_hist_ptr, *gvt_hist_ptr;
	gv_namehead		*save_targ;
	block_id		root_blk_id, child_blk_id, free_blk_id;
	sm_uc_ptr_t		root_blk_ptr, child_blk_ptr;
	kill_set		kill_set_list;
	trans_num		curr_tn, ret_tn;
	int			level, root_blk_lvl;
	block_id		save_root;
	boolean_t		tn_aborted;
	unsigned int		lcl_t_tries;
	enum cdb_sc		status;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(mu_reorg_process);
	gv_target = gl_ptr->gvt;
	gv_target->root = 0;		/* reset root so we recompute it in DO_OP_GVNAME below */
	gv_target->clue.end = 0;	/* reset clue since reorg action on later globals might have invalidated it */
	reorg_gv_target->gvname.var_name = gv_target->gvname.var_name;	/* needed by SAVE_ROOTSRCH_ENTRY_STATE */
	dir_hist_ptr = gv_target->alt_hist;
	gvt_hist_ptr = &(gv_target->hist);
	inctn_opcode = inctn_invalid_op;
	DO_OP_GVNAME(gl_ptr);
		/* sets gv_target/gv_currkey/gv_cur_region/cs_addrs/cs_data to correspond to <globalname,reg> in gl_ptr */
	csa = cs_addrs;
	cnl = csa->nl;
	csd = cs_data;	/* Be careful to keep csd up to date. With MM, cs_data can change, and
			 * dereferencing an older copy can result in a SIG-11.
			 */
	if (gv_cur_region->read_only)
		return;	/* Cannot proceed for read-only data files */
	if (0 == gv_target->root)
	{	/* Global does not exist (online rollback). No problem. */
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_GBLNOEXIST, 2, GNAME(gl_ptr).len, GNAME(gl_ptr).addr);
		return;
	}
	if (dba_mm == csd->acc_meth)
		/* return for now without doing any swapping operation because later mu_truncate
		 * is going to issue the MUTRUNCNOTBG message.
		 */
		return;
	SET_GV_ALTKEY_TO_GBLNAME_FROM_GV_CURRKEY;		/* set up gv_altkey to be just the gblname */
	/* ------------ Swap root block of global variable tree --------- */
	t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
	for (;;)
	{
		curr_tn = csa->ti->curr_tn;
		kill_set_list.used = 0;
		save_root = gv_target->root;
		gv_target->root = csa->dir_tree->root;
		gv_target->clue.end = 0;
		if (cdb_sc_normal != (status = gvcst_search(gv_altkey, dir_hist_ptr)))
		{	/* Assign directory tree path to dir_hist_ptr */
			assert(t_tries < CDB_STAGNATE);
			gv_target->root = save_root;
			t_retry(status);
			continue;
		}
		gv_target->root = save_root;
		gv_target->clue.end = 0;
		if (cdb_sc_normal != (gvcst_search(gv_currkey, NULL)))
		{	/* Assign global variable tree path to gvt_hist_ptr */
			assert(t_tries < CDB_STAGNATE);
			t_retry(status);
			continue;
		}
		/* We've already search the directory tree in op_gvname/t_retry and obtained gv_target->root.
		 * Should restart with gvtrootmod2 if they don't agree. gvcst_root_search is the final arbiter.
		 * Really need that for debug info and also should assert(gv_currkey is global name).
		 */
		root_blk_lvl = gvt_hist_ptr->depth;
		assert(root_blk_lvl > 0);
		root_blk_ptr = gvt_hist_ptr->h[root_blk_lvl].buffaddr;
		root_blk_id = gvt_hist_ptr->h[root_blk_lvl].blk_num;
		assert((CDB_STAGNATE > t_tries) || (gv_target->root == gvt_hist_ptr->h[root_blk_lvl].blk_num));
		free_blk_id = swap_root_or_directory_block(0, root_blk_lvl, dir_hist_ptr, root_blk_id,
				root_blk_ptr, &kill_set_list, curr_tn);
		if (RETRY_SWAP == free_blk_id)
			continue;
		else if (ABORT_SWAP == free_blk_id)
			break;
		update_trans = UPDTRNS_DB_UPDATED_MASK;
		inctn_opcode = inctn_mu_reorg;
		assert(1 == kill_set_list.used);
		need_kip_incr = TRUE;
		if (!csa->now_crit)
			WAIT_ON_INHIBIT_KILLS(cnl, MAXWAIT2KILL);
		DEBUG_ONLY(lcl_t_tries = t_tries);
		TREF(in_mu_swap_root_state) = MUSWP_INCR_ROOT_CYCLE;
		assert(!TREF(in_gvcst_redo_root_search));
		if ((trans_num)0 == (ret_tn = t_end(gvt_hist_ptr, dir_hist_ptr, TN_NOT_SPECIFIED)))
		{
			TREF(in_mu_swap_root_state) = MUSWP_NONE;
			need_kip_incr = FALSE;
			assert(NULL == kip_csa);
			ABORT_TRANS_IF_GBL_EXIST_NOMORE(lcl_t_tries, tn_aborted);
			if (tn_aborted)
			{	/* It is not an error if the global (that once existed) doesn't exist anymore (due to ROLLBACK) */
				gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_GBLNOEXIST, 2, GNAME(gl_ptr).len, GNAME(gl_ptr).addr);
				return;
			}
			continue;
		}
		TREF(in_mu_swap_root_state) = MUSWP_NONE;
		/* Note that this particular process's csa->root_search_cycle is now behind cnl->root_search_cycle.
		 * This forces a cdb_sc_gvtrootmod2 restart in gvcst_bmp_mark_free below.
		 */
		assert(cnl->root_search_cycle > csa->root_search_cycle);
		gvcst_kill_sort(&kill_set_list);
		GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, csa);
		DECR_KIP(csd, csa, kip_csa);
		*root_swap_statistic_ptr += 1;
		break;
	}
	/* ------------ Swap blocks in branch of directory tree --------- */
	for (level = 0; level <= MAX_BT_DEPTH; level++)
	{
		t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
		for (;;)
		{
			curr_tn = csa->ti->curr_tn;
			kill_set_list.used = 0;
			save_root = gv_target->root;
			gv_target->root = csa->dir_tree->root;
			gv_target->clue.end = 0;
			if (cdb_sc_normal != (status = gvcst_search(gv_altkey, dir_hist_ptr)))
			{	/* assign branch path of directory tree into dir_hist_ptr */
				assert(t_tries < CDB_STAGNATE);
				gv_target->root = save_root;
				t_retry(status);
				continue;
			}
			gv_target->root = save_root;
			gv_target->clue.end = 0;
			if (level >= dir_hist_ptr->depth)
			{	/* done */
				t_abort(gv_cur_region, csa);
				return;
			}
			child_blk_ptr = dir_hist_ptr->h[level].buffaddr;
			child_blk_id = dir_hist_ptr->h[level].blk_num;
			assert(csa->dir_tree->root != child_blk_id);
			free_blk_id = swap_root_or_directory_block(level + 1, level, dir_hist_ptr, child_blk_id,
					child_blk_ptr, &kill_set_list, curr_tn);
			if (level == 0)
				/* set level as 1 to mark this kill set is for level-0 block in directory tree.
				 * The kill-set level later will be used in gvcst_bmp_markfree to assign a special value to
				 * cw_set_element, which will be eventually used by t_end to write the block to snapshot
				 */
				kill_set_list.blk[kill_set_list.used - 1].level = 1;
			if (RETRY_SWAP == free_blk_id)
				continue;
			else if (ABORT_SWAP == free_blk_id)
				break;
			update_trans = UPDTRNS_DB_UPDATED_MASK;
			inctn_opcode = inctn_mu_reorg;
			assert(1 == kill_set_list.used);
			need_kip_incr = TRUE;
			if (!csa->now_crit)
				WAIT_ON_INHIBIT_KILLS(cnl, MAXWAIT2KILL);
			DEBUG_ONLY(lcl_t_tries = t_tries);
			TREF(in_mu_swap_root_state) = MUSWP_DIRECTORY_SWAP;
			if ((trans_num)0 == (ret_tn = t_end(dir_hist_ptr, NULL, TN_NOT_SPECIFIED)))
			{
				TREF(in_mu_swap_root_state) = MUSWP_NONE;
				need_kip_incr = FALSE;
				assert(NULL == kip_csa);
				continue;
			}
			TREF(in_mu_swap_root_state) = MUSWP_NONE;
			gvcst_kill_sort(&kill_set_list);
			TREF(in_mu_swap_root_state) = MUSWP_FREE_BLK;
			GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg,
					inctn_opcode, csa);
			TREF(in_mu_swap_root_state) = MUSWP_NONE;
			DECR_KIP(csd, csa, kip_csa);
			break;
		}
	}
	return;
}
Exemple #4
0
boolean_t	gvcst_query2(void)
{
	boolean_t	found, two_histories;
	enum cdb_sc	status;
	blk_hdr_ptr_t	bp;
	rec_hdr_ptr_t	rp;
	unsigned char	*c1, *c2;
	srch_blk_status	*bh;
	srch_hist	*rt_history;

	T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYFAIL);
	assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		two_histories = FALSE;
#		if defined(DEBUG) && defined(UNIX)
		if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYFAIL == gtm_white_box_test_case_number))
		{
			t_retry(cdb_sc_blknumerr);
			continue;
		}
#		endif
		if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0)))
		{
			found = TRUE;
			bh = &gv_target->hist.h[0];
			rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
			bp = (blk_hdr_ptr_t)bh->buffaddr;
			if (rp >= (rec_hdr_ptr_t)CST_TOB(bp))
			{
				two_histories = TRUE;
				rt_history = gv_target->alt_hist;
				status = gvcst_rtsib(rt_history, 0);
				if (cdb_sc_endtree == status)		/* end of tree */
				{
					found = FALSE;
					two_histories = FALSE;		/* second history not valid */
				} else  if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				} else
				{
					bh = &rt_history->h[0];
					if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh)))
					{
						t_retry(status);
						continue;
					}
					rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
					bp = (blk_hdr_ptr_t)bh->buffaddr;
				}
			}
			if (found)
			{	/* !found indicates that the end of tree has been reached (see call to
				 *  gvcst_rtsib).  If there is no more tree, don't bother doing expansion.
				 */
				status = gvcst_expand_curr_key(bh, gv_currkey, gv_altkey);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
			}
			if (!dollar_tlevel)
			{
				if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED))
					continue;
			} else
			{
				status = tp_hist(!two_histories ? NULL : rt_history);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
		    	}
			assert(cs_data == cs_addrs->hdr);
			INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, 1);
			if (found)
			{
				c1 = &gv_altkey->base[0];
				c2 = &gv_currkey->base[0];
				for ( ; *c2; )
				{
					if (*c2++ != *c1++)
						break;
				}
				if (!*c2 && !*c1)
					return TRUE;
			}
			return FALSE;
		}
		t_retry(status);
	}
}
Exemple #5
0
mint	gvcst_data(void)
{
	blk_hdr_ptr_t	bp;
	enum cdb_sc	status;
	mint		val;
	rec_hdr_ptr_t	rp;
	unsigned short	rec_size;
	srch_blk_status *bh;
	srch_hist	*rt_history;
	sm_uc_ptr_t	b_top;

	assert((gv_target->root < cs_addrs->ti->total_blks) || (0 < dollar_tlevel));
	T_BEGIN_READ_NONTP_OR_TP(ERR_GVDATAFAIL);
	assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		rt_history = gv_target->alt_hist;
		rt_history->h[0].blk_num = 0;
		if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal)
		{
			t_retry(status);
			continue;
		}
		bh = gv_target->hist.h;
		bp = (blk_hdr_ptr_t)bh->buffaddr;
		rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
		b_top = bh->buffaddr + bp->bsiz;
		val = 0;
		if (gv_currkey->end + 1 == bh->curr_rec.match)
			val = 1;
		else if (bh->curr_rec.match >= gv_currkey->end)
			val = 10;
		if (1 == val  ||  rp == (rec_hdr_ptr_t)b_top)
		{
			GET_USHORT(rec_size, &rp->rsiz);
			if (rp == (rec_hdr_ptr_t)b_top  ||  (sm_uc_ptr_t)rp + rec_size == b_top)
			{
				if (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0)))
				{
					if ((cdb_sc_normal != status)
						|| (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h))))
					{
						t_retry(status);
						continue;
					}
					if (rt_history->h[0].curr_rec.match >= gv_currkey->end)
						val += 10;
				}
			} else
			{
				if ((sm_uc_ptr_t)rp + rec_size > b_top)
				{
					t_retry(cdb_sc_rmisalign);
					continue;
				}
				rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rec_size);
				if (rp->cmpc >= gv_currkey->end)
					val += 10;
			}
		}
		if (0 == dollar_tlevel)
		{
			if ((trans_num)0 == t_end(&gv_target->hist, 0 == rt_history->h[0].blk_num ? NULL : rt_history))
				continue;
		} else
		{
			status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history);
			if (cdb_sc_normal != status)
			{
				t_retry(status);
				continue;
			}
		}
		INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_data, 1);
		return val;
	}
}
Exemple #6
0
/* This function is the equivalent of invoking gvcst_data & gvcst_get at the same time.
 * One crucial difference is that this function does NOT handle restarts by automatically invoking t_retry.
 * Instead, it returns the restart code to the caller so that it can handle the restart accordingly.
 * This is important in the case of triggers because we do NOT want to call t_retry in case of a implicit tstart
 * wrapped gvcst_put or gvcst_kill trigger-invoking update transaction. Additionally, this function assumes
 * that it is called always inside of TP (i.e. dollar_tlevel is non-zero).
 */
enum cdb_sc gvcst_dataget(mint *dollar_data, mval *val)
{
	blk_hdr_ptr_t	bp;
	boolean_t	do_rtsib;
	enum cdb_sc	status;
	mint		dlr_data;
	rec_hdr_ptr_t	rp;
	unsigned short	match, rsiz;
	srch_blk_status *bh;
	srch_hist	*rt_history;
	sm_uc_ptr_t	b_top;
	int		key_size, data_len;
	uint4		save_t_err;

	error_def(ERR_GVDATAGETFAIL);
	error_def(ERR_GVKILLFAIL);

	/* The following code is lifted from gvcst_data. Any changes here might need to be reflected there as well */
	assert(dollar_tlevel);
	assert((CDB_STAGNATE > t_tries) || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	save_t_err = t_err;
	assert(ERR_GVKILLFAIL == save_t_err);	/* this function should currently be called only from gvcst_kill */
	t_err = ERR_GVDATAGETFAIL;	/* switch t_err to reflect dataget sub-operation (under the KILL operation) */
	/* In case of a failure return, it is ok to return with t_err set to ERR_GVDATAGETFAIL as that gives a better
	 * picture of exactly where in the transaction the failure occurred.
	 */
	rt_history = gv_target->alt_hist;
	rt_history->h[0].blk_num = 0;
	if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL)))
		return status;
	bh = gv_target->hist.h;
	bp = (blk_hdr_ptr_t)bh->buffaddr;
	rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
	b_top = bh->buffaddr + bp->bsiz;
	match = bh->curr_rec.match;
	key_size = gv_currkey->end + 1;
	do_rtsib = FALSE;
	/* Even if key does not exist, return null string in "val". Caller can use dollar_data to distinguish
	 * whether the key is undefined or defined and set to the null string.
	 */
	val->mvtype = MV_STR;
	val->str.len = 0;
	if (key_size == match)
	{
		dlr_data = 1;
		/* the following code is lifted from gvcst_get. any changes here might need to be reflected there as well */
		GET_USHORT(rsiz, &rp->rsiz);
		data_len = rsiz + rp->cmpc - SIZEOF(rec_hdr) - key_size;
		if ((0 > data_len) || ((sm_uc_ptr_t)rp + rsiz > b_top))
		{
			assert(CDB_STAGNATE > t_tries);
			status = cdb_sc_rmisalign1;
			return status;
		} else
		{
			ENSURE_STP_FREE_SPACE(data_len);
			memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len);
			val->str.addr = (char *)stringpool.free;
			val->str.len = data_len;
			stringpool.free += data_len;
		}
		/* --------------------- end code lifted from gvcst_get ---------------------------- */
		rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rsiz);
		if ((sm_uc_ptr_t)rp > b_top)
		{
			status = cdb_sc_rmisalign;
			return status;
		} else if ((sm_uc_ptr_t)rp == b_top)
			do_rtsib = TRUE;
		else if (rp->cmpc >= gv_currkey->end)
			dlr_data += 10;
	} else if (match >= gv_currkey->end)
		dlr_data = 10;
	else
	{
		dlr_data = 0;
		if (rp == (rec_hdr_ptr_t)b_top)
			do_rtsib = TRUE;
	}
	if (do_rtsib && (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0))))
	{
		if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h))))
			return status;
		if (rt_history->h[0].curr_rec.match >= gv_currkey->end)
		{
			assert(1 >= dlr_data);
			dlr_data += 10;
		}
	}
	status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history);
	if (cdb_sc_normal != status)
		return status;
	*dollar_data = dlr_data;
	t_err = save_t_err;	/* restore t_err to what it was at function entry */
	return status;
}
boolean_t	gvcst_order2(void)
{
	blk_hdr_ptr_t	bp;
	boolean_t	found, two_histories;
	enum cdb_sc	status;
	rec_hdr_ptr_t	rp;
	unsigned short	rec_size;
	srch_blk_status	*bh;
	srch_hist	*rt_history;
	sm_uc_ptr_t	c1, c2, ctop, alt_top;
	int		tmp_cmpc;

	T_BEGIN_READ_NONTP_OR_TP(ERR_GVORDERFAIL);
	for (;;)
	{
		assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
		two_histories = FALSE;
#if defined(DEBUG) && defined(UNIX)
		if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVORDERFAIL == gtm_white_box_test_case_number))
		{
			status = cdb_sc_blknumerr;
			t_retry(status);
			continue;
		}
#endif
		if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL)))
		{
			found = TRUE;
			bh = gv_target->hist.h;
			rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
			bp = (blk_hdr_ptr_t)bh->buffaddr;
			if ((rec_hdr_ptr_t)CST_TOB(bp) <= rp)
			{
				two_histories = TRUE;
				rt_history = gv_target->alt_hist;
				status = gvcst_rtsib(rt_history, 0);
				if (cdb_sc_normal == status)
				{
					bh = rt_history->h;
			       		if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh)))
					{
						t_retry(status);
						continue;
					}
					rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
					bp = (blk_hdr_ptr_t)bh->buffaddr;
				} else
				{
			  	     	if (cdb_sc_endtree == status)
					{
						found = FALSE;
						two_histories = FALSE;		/* second history not valid */
					} else
					{
						t_retry(status);
						continue;
					}
				}
			}
			if (found)
			{
				assert(gv_altkey->top == gv_currkey->top);
				assert(gv_altkey->top == gv_keysize);
				assert(gv_altkey->end < gv_altkey->top);
				/* store new subscipt */
				c1 = gv_altkey->base;
				alt_top = gv_altkey->base + gv_altkey->top - 1;
					/* Make alt_top one less than gv_altkey->top to allow double-null at end of a key-name */
				/* 4/17/96
				 * HP compiler bug work-around.  The original statement was
				 * c2 = (unsigned char *)CST_BOK(rp) + bh->curr_rec.match - rp->cmpc;
				 *
				 * ...but this was sometimes compiled incorrectly (the lower 4 bits
				 * of rp->cmpc, sign extended, were subtracted from bh->curr_rec.match).
				 * I separated out the subtraction of rp->cmpc.
				 *
				 * -VTF.
				 */
				c2 = (sm_uc_ptr_t)CST_BOK(rp) + bh->curr_rec.match;
				memcpy(c1, gv_currkey->base, bh->curr_rec.match);
				c1 += bh->curr_rec.match;
				c2 -= EVAL_CMPC(rp);
				GET_USHORT(rec_size, &rp->rsiz);
				ctop = (sm_uc_ptr_t)rp + rec_size;
				for (;;)
				{
					if (c2 >= ctop  ||  c1 >= alt_top)
					{
						assert(CDB_STAGNATE > t_tries);
						status = cdb_sc_rmisalign;
						goto restart;	/* goto needed because of nested FOR loop */
					}
 					if (0 == (*c1++ = *c2++))
					{
						*c1 = 0;
						break;
					}
				}
				gv_altkey->end = c1 - gv_altkey->base;
				assert(gv_altkey->end < gv_altkey->top);
			}
                        if (!dollar_tlevel)
			{
				if ((trans_num)0 == t_end(&gv_target->hist, two_histories ? rt_history : NULL, TN_NOT_SPECIFIED))
					continue;
			} else
			{
				status = tp_hist(two_histories ? rt_history : NULL);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
			}
			assert(cs_data == cs_addrs->hdr);
			INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, 1);
			return (found && (bh->curr_rec.match >= gv_currkey->prev));
		}
restart:	t_retry(status);
	}
}
boolean_t	gvcst_gblmod(mval *v)
{
	boolean_t	gblmod, is_dummy;
	enum cdb_sc	status;
	int		key_size,  key_size2, data_len;
	srch_hist	*alt_history;
	blk_hdr_ptr_t	bp;
	rec_hdr_ptr_t	rp;
	unsigned short	match, match2, rsiz, offset_to_value, oldend;
	srch_blk_status	*bh;
	sm_uc_ptr_t	b_top;
	trans_num	tn_to_compare;

	T_BEGIN_READ_NONTP_OR_TP(ERR_GBLMODFAIL);
	assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		gblmod = TRUE;
		if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL)))
		{
			alt_history = gv_target->alt_hist;
			alt_history->h[0].blk_num = 0;

			VMS_ONLY(
				if (cs_addrs->hdr->resync_tn >= ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->tn)
					gblmod = FALSE;
			)
#			ifdef UNIX
				tn_to_compare = ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->tn;
				bh = gv_target->hist.h;
				bp = (blk_hdr_ptr_t) bh->buffaddr;
				rp = (rec_hdr_ptr_t) (bh->buffaddr + bh->curr_rec.offset);
				b_top = bh->buffaddr + bp->bsiz;
				GET_USHORT(rsiz, &rp->rsiz);
				key_size = gv_currkey->end + 1;
				data_len = rsiz + EVAL_CMPC(rp) - SIZEOF(rec_hdr) - key_size;
				match = bh->curr_rec.match;
				if (key_size == match)
				{
					if ((0 > data_len) || ((sm_uc_ptr_t)rp + rsiz > b_top))
					{
						status = cdb_sc_rmisalign1;
						t_retry(status);
						continue;
					}
					offset_to_value = SIZEOF(rec_hdr) + key_size - EVAL_CMPC(rp);
					/* If it could be a spanning node, i.e., has special value, then try to get tn from the
					 * block that contains the first special subscript. Since dummy nodes always have the
					 * same value, the tn number is not updated It s enough to do only the first piece
					 * since all pieces of a spanning node are killed  before an update is applied.
					 */
					if (IS_SN_DUMMY(data_len, (sm_uc_ptr_t)rp + offset_to_value))
					{
						oldend = gv_currkey->end;
						APPEND_HIDDEN_SUB(gv_currkey);
						if (cdb_sc_normal == (status = gvcst_search(gv_currkey, alt_history)))
						{
							key_size2 = gv_currkey->end + 1;
							match = alt_history->h[0].curr_rec.match;
							if (key_size2 == match)
								tn_to_compare =  ((blk_hdr_ptr_t)alt_history->h[0].buffaddr)->tn;
						}
						else
						{
							gv_currkey->end = oldend;
							gv_currkey->base[gv_currkey->end - 1] = KEY_DELIMITER;
							gv_currkey->base[gv_currkey->end] = KEY_DELIMITER;
							t_retry(status);
							continue;
						}
						gv_currkey->end = oldend;
						gv_currkey->base[gv_currkey->end - 1] = KEY_DELIMITER;
						gv_currkey->base[gv_currkey->end] = KEY_DELIMITER;
					}
				}
				if (cs_addrs->hdr->zqgblmod_tn > tn_to_compare)
					gblmod = FALSE;
#			endif
			if (!dollar_tlevel)
			{
				if ((trans_num)0 == t_end(&gv_target->hist, 0 == alt_history->h[0].blk_num ? NULL : alt_history,
						TN_NOT_SPECIFIED))
					continue;
			} else
			{
				status = tp_hist(0 == alt_history->h[0].blk_num ? NULL : alt_history);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
			}
			return gblmod;
		}
Exemple #9
0
bool	gvcst_get(mval *v)
{
	srch_blk_status	*s;
	enum cdb_sc	status;
	int		key_size, data_len;
	unsigned short	rsiz;
	rec_hdr_ptr_t	rp;

	T_BEGIN_READ_NONTP_OR_TP(ERR_GVGETFAIL);
	assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL)))
		{
			if ((key_size = gv_currkey->end + 1) == gv_target->hist.h[0].curr_rec.match)
			{
				rp = (rec_hdr_ptr_t)(gv_target->hist.h[0].buffaddr + gv_target->hist.h[0].curr_rec.offset);
				GET_USHORT(rsiz, &rp->rsiz);
				data_len = rsiz + rp->cmpc - sizeof(rec_hdr) - key_size;
				if (data_len < 0  || (sm_uc_ptr_t)rp + rsiz >
					gv_target->hist.h[0].buffaddr + ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz)
				{
					assert(CDB_STAGNATE > t_tries);
					status = cdb_sc_rmisalign1;
				} else
				{
					if (stringpool.top - stringpool.free < data_len)
						stp_gcol(data_len);
					assert(stringpool.top - stringpool.free >= data_len);
					memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len);
					if (0 == dollar_tlevel)
					{
						if (0 == t_end(&gv_target->hist, NULL))
							continue;
					} else
					{
						status = tp_hist(NULL);
						if (cdb_sc_normal != status)
						{
							t_retry(status);
							continue;
						}
					}
					v->mvtype = MV_STR;
					v->str.addr = (char *)stringpool.free;
					v->str.len = data_len;
					stringpool.free += data_len;
					if (cs_addrs->read_write)
						cs_addrs->hdr->n_gets++;
					return TRUE;
				}
			} else
			{
				if (0 == dollar_tlevel)
				{
					if (0 == t_end(&gv_target->hist, NULL))
						continue;
				} else
				{
					status = tp_hist(NULL);
					if (cdb_sc_normal != status)
					{
						t_retry(status);
						continue;
					}
				}

				cs_addrs->hdr->n_gets++;
				return FALSE;
			}
		}
		t_retry(status);
	}
}
Exemple #10
0
mint	gvcst_data(void)
{
	blk_hdr_ptr_t	bp;
	boolean_t	do_rtsib;
	enum cdb_sc	status;
	mint		val;
	rec_hdr_ptr_t	rp;
	unsigned short	match, rsiz;
	srch_blk_status *bh;
	srch_hist	*rt_history;
	sm_uc_ptr_t	b_top;

	assert((gv_target->root < cs_addrs->ti->total_blks) || dollar_tlevel);
	T_BEGIN_READ_NONTP_OR_TP(ERR_GVDATAFAIL);
	assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		/* The following code is duplicated in gvcst_dataget. Any changes here might need to be reflected there as well */
		rt_history = gv_target->alt_hist;
		rt_history->h[0].blk_num = 0;
		if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL)))
		{
			t_retry(status);
			continue;
		}
		bh = gv_target->hist.h;
		bp = (blk_hdr_ptr_t)bh->buffaddr;
		rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
		b_top = bh->buffaddr + bp->bsiz;
		match = bh->curr_rec.match;
		do_rtsib = FALSE;
		if (gv_currkey->end + 1 == match)
		{
			val = 1;
			GET_USHORT(rsiz, &rp->rsiz);
			rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rsiz);
			if ((sm_uc_ptr_t)rp > b_top)
			{
				t_retry(cdb_sc_rmisalign);
				continue;
			} else if ((sm_uc_ptr_t)rp == b_top)
				do_rtsib = TRUE;
			else if (rp->cmpc >= gv_currkey->end)
				val += 10;
		} else if (match >= gv_currkey->end)
			val = 10;
		else
		{
			val = 0;
			if (rp == (rec_hdr_ptr_t)b_top)
				do_rtsib = TRUE;
		}
		if (do_rtsib && (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0))))
		{
			if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h))))
			{
				t_retry(status);
				continue;
			}
			if (rt_history->h[0].curr_rec.match >= gv_currkey->end)
			{
				assert(1 >= val);
				val += 10;
			}
		}
		if (!dollar_tlevel)
		{
			if ((trans_num)0 == t_end(&gv_target->hist, 0 == rt_history->h[0].blk_num ? NULL : rt_history,
				TN_NOT_SPECIFIED))
				continue;
		} else
		{
			status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history);
			if (cdb_sc_normal != status)
			{
				t_retry(status);
				continue;
			}
		}
		INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_data, 1);
		return val;
	}
}
Exemple #11
0
boolean_t gvcst_queryget2(mval *val, unsigned char *sn_ptr)
{
	blk_hdr_ptr_t	bp;
	boolean_t	found, two_histories;
	enum cdb_sc	status;
	int		rsiz, key_size, data_len;
	rec_hdr_ptr_t	rp;
	srch_blk_status	*bh;
	srch_hist	*rt_history;
	unsigned short	temp_ushort;
	int		tmp_cmpc;
	DEBUG_ONLY(unsigned char *save_strp = NULL);

	T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYGETFAIL);
	assert((CDB_STAGNATE > t_tries) || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
	for (;;)
	{
		two_histories = FALSE;
#if defined(DEBUG) && defined(UNIX)
			if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYGETFAIL == gtm_white_box_test_case_number))
			{
				status = cdb_sc_blknumerr;
				t_retry(status);
				continue;
			}
#endif
		if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0)))
		{
			found = TRUE;
			bh = &gv_target->hist.h[0];
			rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
			bp = (blk_hdr_ptr_t)bh->buffaddr;
			if (rp >= (rec_hdr_ptr_t)CST_TOB(bp))
			{
				two_histories = TRUE;
				rt_history = gv_target->alt_hist;
				status = gvcst_rtsib(rt_history, 0);
				if (cdb_sc_endtree == status)		/* end of tree */
				{
					found = FALSE;
					two_histories = FALSE;		/* second history not valid */
				} else  if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				} else
				{
					bh = &rt_history->h[0];
					if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh)))
					{
						t_retry(status);
						continue;
					}
					rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset);
					bp = (blk_hdr_ptr_t)bh->buffaddr;
				}
			}
			/* !found indicates that the end of tree has been reached (see call to
			 *  gvcst_rtsib).  If there is no more tree, don't bother doing expansion.
			 */
			if (found)
			{
				status = gvcst_expand_key((blk_hdr_ptr_t)bh->buffaddr, (int4)((sm_uc_ptr_t)rp - bh->buffaddr),
						gv_altkey);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
				key_size = gv_altkey->end + 1;
				GET_RSIZ(rsiz, rp);
				data_len = rsiz + EVAL_CMPC(rp) - SIZEOF(rec_hdr) - key_size;
				if (data_len < 0  || (sm_uc_ptr_t)rp + rsiz > (sm_uc_ptr_t)bp + ((blk_hdr_ptr_t)bp)->bsiz)
				{
					assert(CDB_STAGNATE > t_tries);
					t_retry(cdb_sc_rmisalign1);
					continue;
				}
				ENSURE_STP_FREE_SPACE(data_len);
				DEBUG_ONLY (
				if (!save_strp)
					save_strp = stringpool.free);
				assert(stringpool.top - stringpool.free >= data_len);
				memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len);
				/* Assumption: t_end/tp_hist will never cause stp_gcol() call BYPASSOK */
			}
			if (!dollar_tlevel)
			{
				if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED))
					continue;
			} else
			{
				status = tp_hist(!two_histories ? NULL : rt_history);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
			}
			if (found)
			{
				DEBUG_ONLY(assert(save_strp == stringpool.free));
				/* Process val first. Already copied to string pool. */
				val->mvtype = MV_STR;
				val->str.addr = (char *)stringpool.free;
				val->str.len = data_len;
				stringpool.free += data_len;
				INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_get, 1);
			}
			return found;
		}
		t_retry(status);
	}
Exemple #12
0
bool gvcst_zprevious(void)
{
	static gv_key	*zprev_temp_key;
	static int4	zprev_temp_keysize = 0;
	blk_hdr_ptr_t	bp;
	bool		found, two_histories;
	enum cdb_sc	status;
	rec_hdr_ptr_t	rp;
	unsigned char	*c1, *c2, *ctop;
	srch_blk_status	*bh;
	srch_hist	*lft_history;

	T_BEGIN_READ_NONTP_OR_TP(ERR_GVORDERFAIL);
	for (;;)
	{
		assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit);	/* we better hold crit in the final retry (TP & non-TP) */
		two_histories = FALSE;
		if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL)))
		{
			found = TRUE;
			bh = gv_target->hist.h;
			if (0 == bh->prev_rec.offset)
			{
				two_histories = TRUE;
				lft_history = gv_target->alt_hist;
				status = gvcst_lftsib(lft_history);
				if (cdb_sc_normal == status)
				{
					bh = lft_history->h;
					if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh)))
					{
						t_retry(status);
						continue;
					}
				} else  if (cdb_sc_endtree == status)
				{
					found = FALSE;
					two_histories = FALSE;		/* second history not valid */
				} else
				{
					t_retry(status);
					continue;
				}
			}
			if (found)
			{	/* store new subscipt */
				assert(gv_altkey->top == gv_currkey->top);
				assert(gv_altkey->top == gv_keysize);
				assert(gv_currkey->end < gv_currkey->top);
				rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->prev_rec.offset);
				bp = (blk_hdr_ptr_t)bh->buffaddr;
				c1 = gv_altkey->base;
				memcpy(c1, gv_currkey->base, bh->prev_rec.match);
				c1 += bh->prev_rec.match;
				assert(zprev_temp_keysize <= gv_keysize);
				if (zprev_temp_keysize < gv_keysize)
				{
					zprev_temp_keysize = gv_keysize;
					GVKEY_INIT(zprev_temp_key, zprev_temp_keysize);
				}
				assert(zprev_temp_key->top >= gv_currkey->top);
				if (cdb_sc_normal != (status = gvcst_expand_key((blk_hdr_ptr_t)bh->buffaddr, bh->prev_rec.offset,
									zprev_temp_key)))
				{
					t_retry(status);
					continue;
				}
				if ((zprev_temp_key->end < gv_currkey->end) && (zprev_temp_key->end <= gv_currkey->prev))
					found = FALSE;
				else
				{
					c2 = zprev_temp_key->base + bh->prev_rec.match;
					ctop = zprev_temp_key->base + zprev_temp_key->end;
					for (;;)
					{
						if (c2 >= ctop)
						{
							assert(CDB_STAGNATE > t_tries);
							status = cdb_sc_rmisalign;
							goto restart;	/* goto needed because of nested FOR loop */
						}
	 					if (0 == (*c1++ = *c2++))
						{
							*c1 = 0;
							break;
						}
					}
				}
				gv_altkey->end = c1 - gv_altkey->base;
				assert(gv_altkey->end < gv_altkey->top);
			}
			if (!dollar_tlevel)
			{
				if ((trans_num)0 == t_end(&gv_target->hist, two_histories ? lft_history : NULL, TN_NOT_SPECIFIED))
					continue;
			} else
			{
				status = tp_hist(two_histories ? lft_history : NULL);
				if (cdb_sc_normal != status)
				{
					t_retry(status);
					continue;
				}
			}
			assert(cs_data == cs_addrs->hdr);
			INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_zprev, 1);
			return (found && (bh->prev_rec.match >= gv_currkey->prev));
		}
restart:	t_retry(status);
	}
}
Exemple #13
0
/******************************************************************************************
Input Parameters:
	level: level of working block
	dest_blk_id: last destination used for swap
Output Parameters:
	kill_set_ptr: Kill set to be freed
	*exclude_glist_ptr: List of globals not to be moved for a swap destination
Input/Output Parameters:
	gv_target : as working block's history
	reorg_gv_target->hist : as desitnitions block's history
 ******************************************************************************************/
enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr)
{
	unsigned char		x_blk_lmap;
	unsigned short		temp_ushort;
	int			rec_size1, rec_size2;
	int			wlevel, nslevel, dest_blk_level;
	int			piece_len1, piece_len2, first_offset, second_offset,
				work_blk_size, work_parent_size, dest_blk_size, dest_parent_size;
	int			dest_child_cycle;
	int			blk_seg_cnt, blk_size;
	trans_num		ctn;
	int			key_len, key_len_dir;
	block_id		dest_blk_id, work_blk_id, child1, child2;
	enum cdb_sc		status;
	srch_hist 		*dest_hist_ptr, *dir_hist_ptr;
	cache_rec_ptr_t		dest_child_cr;
	blk_segment		*bs1, *bs_ptr;
	sm_uc_ptr_t		saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr,
				bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1;
	boolean_t		gbl_target_was_set, blk_was_free, deleted;
	gv_namehead		*save_targ;
	srch_blk_status		bmlhist, destblkhist, *hist_ptr;
	unsigned char    	save_cw_set_depth;
	cw_set_element		*tmpcse;
	jnl_buffer_ptr_t	jbbp; /* jbbp is non-NULL only if before-image journaling */
	unsigned int		bsiz;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	dest_blk_id = *pdest_blk_id;
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	if (NULL == TREF(gv_reorgkey))
		GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ));
	dest_hist_ptr = &(reorg_gv_target->hist);
	dir_hist_ptr = reorg_gv_target->alt_hist;
	blk_size = cs_data->blk_size;
	work_parent_ptr = gv_target->hist.h[level+1].buffaddr;
	work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz;
	work_blk_ptr = gv_target->hist.h[level].buffaddr;
	work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz;
	work_blk_id = gv_target->hist.h[level].blk_num;
	if (blk_size < work_blk_size)
	{
		assert(t_tries < CDB_STAGNATE);
		return cdb_sc_blkmod;
	}
	cws_reorg_remove_index = 0;
	/*===== Infinite loop to find the destination block =====*/
	for ( ; ; )
	{
		blk_was_free = FALSE;
		INCR_BLK_NUM(dest_blk_id);
		/* A Pre-order traversal should not cause a child block to go to its parent.
		 * However, in case it happens because already the organization was like that or for any other reason, skip swap.
		 * If we decide to swap, code below should be changed to take care of the special case.
		 * Still a grand-child can go to its grand-parent. This is rare and following code can handle it.
		 */
		if (dest_blk_id == gv_target->hist.h[level+1].blk_num)
			continue;
		if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id)
		{
			*pdest_blk_id = dest_blk_id;
			return cdb_sc_oprnotneeded;
		}
		ctn = cs_addrs->ti->curr_tn;
		/* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk")
		 * through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below).
		 * This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to
		 * choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not
		 * deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and
		 * a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon
		 * the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not
		 * be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing
		 * a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array"
		 * variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread
		 * in one iteration down below. And that number is the sum of
		 *	+     MAX_BT_DEPTH : for the t_qread while loop down the tree done below
		 *	+ 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below
		 *	+ 2                : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a
		 *			     bitmap block done inside the call to get_lmap below
		 *	= 3 * MAX_BT_DEPTH + 2
		 * To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2.
		 * This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h
		 */
		/* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */
		for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--)
		{
			deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]);
			assert(deleted);
		}
		/* read corresponding bitmap block before attempting to read destination  block.
		 * if bitmap indicates block is free, we will not read the destination block
		 */
		bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
		if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap ||
			((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) ||
			((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_badbitmap;
		}
		if (BLK_FREE != x_blk_lmap)
		{	/* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block
			 * in case we later detect that the before-image needs to be written.
			 */
			if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr)))
			{
				assert(t_tries < CDB_STAGNATE);
				return (enum cdb_sc)rdfail_detail;
			}
			destblkhist.blk_num = dest_blk_id;
			destblkhist.buffaddr = dest_blk_ptr;
			destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl;
		}
		if (BLK_BUSY != x_blk_lmap)
		{	/* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */
			blk_was_free = TRUE;
			break;
		}
		/* dest_blk_id might contain a *-record only.
		 * So follow the pointer to go to the data/index block, which has a non-* key to search.
		 */
		nslevel = dest_blk_level;
		if (MAX_BT_DEPTH <= nslevel)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_maxlvl;
		}
		rec_base = dest_blk_ptr + SIZEOF(blk_hdr);
		GET_RSIZ(rec_size1, rec_base);
		tblk_ptr = dest_blk_ptr;
		while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel))
		{
			GET_LONG(child1, (rec_base + SIZEOF(rec_hdr)));
			if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_rdfail;
			}
			if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr)))
			{
				assert(t_tries < CDB_STAGNATE);
				return (enum cdb_sc)rdfail_detail;
			}
			/* leaf of a killed GVT can have block header only.   Skip those blocks */
			if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
				break;
			nslevel--;
			rec_base = tblk_ptr + SIZEOF(blk_hdr);
			GET_RSIZ(rec_size1, rec_base);
		}
		/* leaf of a killed GVT can have block header only.   Skip those blocks */
		if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
			continue;
		/* get length of global variable name (do not read subscript) for dest_blk_id */
		GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr));
		/* key_len = length of 1st key value (including subscript) for dest_blk_id */
		GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr));
		if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len))
		{	/* Earlier used to restart here always. But dest_blk_id can be a block,
			 * which is just killed and still marked busy.  Skip it, if we are in last retry.
			 */
			if (CDB_STAGNATE <= t_tries)
				continue;
			else
				return cdb_sc_blkmod;
		}
		memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir);
		(TREF(gv_reorgkey))->base[key_len_dir] = 0;
		(TREF(gv_reorgkey))->end = key_len_dir;
		if (exclude_glist_ptr->next)
		{	/* exclude blocks for globals in the list of EXCLUDE option */
			if  (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr))
				continue;
		}
		save_targ = gv_target;
		if (INVALID_GV_TARGET != reset_gv_target)
			gbl_target_was_set = TRUE;
		else
		{
			gbl_target_was_set = FALSE;
			reset_gv_target = save_targ;
		}
		gv_target = reorg_gv_target;
		gv_target->root = cs_addrs->dir_tree->root;
		gv_target->clue.end = 0;
		/* assign Directory tree path to find dest_blk_id in dir_hist_ptr */
		status = gvcst_search(TREF(gv_reorgkey), dir_hist_ptr);
		if (cdb_sc_normal != status)
		{
			assert(t_tries < CDB_STAGNATE);
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			return status;
		}
		if (dir_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1)
		{	/* may be in a kill_set of another process */
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			continue;
		}
		for (wlevel = 0; wlevel <= dir_hist_ptr->depth &&
			dir_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++);
		if (dir_hist_ptr->h[wlevel].blk_num == dest_blk_id)
		{	/* do not swap a dir_tree block */
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			continue;
		}
		/* gv_reorgkey will now have the first key from dest_blk_id,
		 * or, from a descendant of dest_blk_id (in case it had a *-key only).
		 */
		memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len);
		(TREF(gv_reorgkey))->end = key_len - 1;
		GET_KEY_LEN(key_len_dir, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr));
		/* Get root of GVT for dest_blk_id */
		GET_LONG(gv_target->root,
			dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir);
		if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1)))
		{
			assert(t_tries < CDB_STAGNATE);
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			return cdb_sc_blkmod;
		}
		/* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */
		gv_target->clue.end = 0;
		status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr);
		RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
		if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */
			dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */
			continue;
		if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1)))
		{
			assert(t_tries < CDB_STAGNATE);
			return (cdb_sc_normal != status ? status : cdb_sc_blkmod);
		}
		for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++)
			dest_hist_ptr->h[wlevel].tn = ctn;
		dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr;
		dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz;
		dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr;
		dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz;
		break;
	}
	/*===== End of infinite loop to find the destination block =====*/
	/*-----------------------------------------------------
	   Now modify blocks for swapping. Maximum of 4 blocks.
	   -----------------------------------------------------*/
	if (!blk_was_free)
	{	/* 1: dest_blk_id into work_blk_id */
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr));
		if (!BLK_FINI (bs_ptr,bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(gv_target->hist.h[level].blk_num == work_blk_id);
		assert(gv_target->hist.h[level].buffaddr == work_blk_ptr);
		t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN);
	}
	/* 2: work_blk_id into dest_blk_id */
	if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num)
	{	/* work_blk_id will be swapped with its child.
		 * This is the only vertical swap.  Here working block goes to its child.
		 * Working block cannot goto its parent because of traversal
		 */
		if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		BLK_INIT(bs_ptr, bs1);
		BLK_ADDR(saved_blk, dest_parent_size, unsigned char);
		memcpy(saved_blk, dest_parent_ptr, dest_parent_size);
		first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset;
		GET_RSIZ(rec_size1, saved_blk + first_offset);
		if (work_blk_size < first_offset + rec_size1)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		piece_len1 =  first_offset + rec_size1;
		BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr));
		BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
		PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */
		BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
		BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num);
		assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr);
		t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
	} else /* free block or, when working block does not move vertically (swap with parent/child) */
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_ADDR(saved_blk, work_blk_size, unsigned char);
		memcpy(saved_blk, work_blk_ptr, work_blk_size);
		BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		if (blk_was_free)
		{
			tmpcse = &cw_set[cw_set_depth];
			t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level);
			/* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode
			 * from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here.
			 */
			assert(dest_blk_id == tmpcse->blk);
			tmpcse->mode = gds_t_acquired;
			/* If snapshots are in progress, we might want to read the before images of the FREE blocks also.
			 * Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence
			 * will not read the before images of the FREE blocks in t_end. To workaround this, set
			 * cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of
			 * the FREE blocks if needed.
			 */
			(BLK_FREE == x_blk_lmap) ? SET_FREE(tmpcse) : SET_NFREE(tmpcse);
			/* No need to write before-image in case the block is FREE. In case the database had never been fully
			 * upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered
			 * FREE (i.e. no need to write before-images since backward journal recovery will never be expected
			 * to take the database to a point BEFORE the mupip upgrade).
			 */
			if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once)
				tmpcse->old_block = NULL;
			else
			{	/* Destination is a recycled block that needs a before image */
				tmpcse->old_block = destblkhist.buffaddr;
				/* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */
				tmpcse->cr = destblkhist.cr;
				tmpcse->cycle = destblkhist.cycle;
				jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL;
				if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn))
				{	/* Compute CHECKSUM for writing PBLK record before getting crit.
					 * It is possible that we are reading a block that is actually marked free in
					 * the bitmap (due to concurrency issues at this point). Therefore we might be
					 * actually reading uninitialized block headers and in turn a bad value of
					 * "old_block->bsiz". Restart if we ever access a buffer whose size is greater
					 * than the db block size.
					 */
					bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz;
					if (bsiz > blk_size)
					{
						assert(CDB_STAGNATE > t_tries);
						return cdb_sc_lostbmlcr;
					}
					JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz);
				}
			}
			assert(GDSVCURR == tmpcse->ondsk_blkver);	/* should have been set by t_create above */
		} else
		{
			hist_ptr = &dest_hist_ptr->h[dest_blk_level];
			assert(dest_blk_id == hist_ptr->blk_num);
			assert(dest_blk_ptr == hist_ptr->buffaddr);
			t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
		}
	}
	if (!blk_was_free)
	{	/* 3: Parent of destination block (may be parent of working block too) */
		if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num)
		{	/* dest parent == work_blk parent */
			BLK_INIT(bs_ptr, bs1);
			/* Interchange pointer to dest_blk_id and work_blk_id */
			if (level != dest_blk_level ||
				gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset)
			{
				first_offset = gv_target->hist.h[level+1].curr_rec.offset;
				second_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
			} else
			{
				first_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
				second_offset = gv_target->hist.h[level+1].curr_rec.offset;
			}
			GET_RSIZ(rec_size1, dest_parent_ptr + first_offset);
			GET_RSIZ(rec_size2, dest_parent_ptr + second_offset);
			if (dest_parent_size < first_offset + rec_size1 ||
				dest_parent_size < second_offset + rec_size2 ||
				BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			piece_len1 =  first_offset + rec_size1 - SIZEOF(block_id);
			piece_len2 =  second_offset + rec_size2 - SIZEOF(block_id);
			GET_LONG(child1, dest_parent_ptr + piece_len1);
			GET_LONG(child2, dest_parent_ptr + piece_len2);
			BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr));
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, child2);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1,
				second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1);
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, child1);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2,
				dest_parent_size - second_offset - rec_size2);
			if (!BLK_FINI(bs_ptr,bs1))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			assert(level == dest_blk_level);
			assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr);
			t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
		} else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num)
		{	/* Destination block moved in the position of working block.
			 * So destination block's parent's pointer should be changed to work_blk_id
			 */
			BLK_INIT(bs_ptr, bs1);
			GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset);
			if (dest_parent_size < rec_size1 +  dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset ||
				BSTAR_REC_SIZE > rec_size1)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr),
			    dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, work_blk_id);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1,
				dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset  - rec_size1);
			if (!BLK_FINI(bs_ptr,bs1))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr);
			t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1,
				FALSE, TRUE, GDS_WRITE_KILLTN);
		}
	}
	/* 4: Parent of working block, if different than destination's parent or, destination was a free block */
	if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num)
	{	/* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id  */
		GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset));
		if (work_parent_size < rec_size1 +  gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr),
			gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
		BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
		PUT_LONG(bn_ptr, dest_blk_id);
		BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
		BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1,
			work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr);
		t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
	}
	/* else already taken care of, when dest_blk_id moved */
	if (blk_was_free)
	{	/* A free/recycled block will become busy block.
		 * So the local bitmap must be updated.
		 * Local bit map block will be added in the list of update arrray for concurrency check and
		 * 	also the cw_set element will be created to mark the free/recycled block as free.
		 * kill_set_ptr will save the block which will become free.
		 */
		child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */
		bmlhist.buffaddr = bmp_buff;
		bmlhist.blk_num = child1;
		child1 = dest_blk_id - child1;
		assert(child1);
		PUT_LONG(update_array_ptr, child1);
		/* Need to put bit maps on the end of the cw set for concurrency checking.
		 * We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth".
		 * Hence the save and restore logic (for "cw_set_depth") below.
		 */
		save_cw_set_depth = cw_set_depth;
		assert(!cw_map_depth);
		t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1);	/* will increment cw_set_depth */
		cw_map_depth = cw_set_depth;		/* set cw_map_depth to the latest cw_set_depth */
		cw_set_depth = save_cw_set_depth;	/* restore cw_set_depth */
		/* t_write_map simulation end */
		update_array_ptr += SIZEOF(block_id);
		child1 = 0;
		PUT_LONG(update_array_ptr, child1);
		update_array_ptr += SIZEOF(block_id);
		assert(1 == cw_set[cw_map_depth - 1].reference_cnt);	/* 1 free block is now becoming BLK_USED in the bitmap */
		/* working block will be removed */
		kill_set_ptr->blk[kill_set_ptr->used].flag = 0;
		kill_set_ptr->blk[kill_set_ptr->used].level = 0;
		kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id;
	}
	*pdest_blk_id = dest_blk_id;
	return cdb_sc_normal;
}
Exemple #14
0
/****************************************************************
Input Parameter:
	gn = Global name
	exclude_glist_ptr = list of globals in EXCLUDE option
	index_fill_factor = index blocks' fill factor
	data_fill_factor = data blocks' fill factor
Input/Output Parameter:
	resume = resume flag
	reorg_op = What operations to do (coalesce or, swap or, split) [Default is all]
			[Only for debugging]
 ****************************************************************/
boolean_t mu_reorg(mval *gn, glist *exclude_glist_ptr, boolean_t *resume, int index_fill_factor, int data_fill_factor, int reorg_op)
{
	boolean_t		end_of_tree = FALSE, complete_merge, detailed_log;
	int			rec_size;
	/*
	 *
	 * "level" is the level of the working block.
	 * "pre_order_successor_level" is pre_order successor level except in the case
	 * where we are in a left-most descent of the tree
	 * in which case pre_order_successor_level will be the maximum height of that subtree
	 * until we reach the leaf level block .
	 * In other words, pre_order_successor_level and level variable controls the iterative pre-order traversal.
	 * We start reorg from the (root_level - 1) to 0. That is, level = pre_order_successor_level:-1:0.
	 */
	int			pre_order_successor_level, level;
	static block_id		dest_blk_id = 0;
	int			tkeysize;
	int			blks_killed, blks_processed, blks_reused, blks_coalesced, blks_split, blks_swapped,
				count, file_extended, lvls_reduced;
	int			d_max_fill, i_max_fill, blk_size, cur_blk_size, max_fill, toler, d_toler, i_toler;
	int			cnt1, cnt2;
	kill_set		kill_set_list;
	sm_uc_ptr_t		rPtr1;
	enum cdb_sc		status;
	srch_hist		*rtsib_hist;
	jnl_buffer_ptr_t	jbp;
	trans_num		ret_tn;

	error_def(ERR_MUREORGFAIL);
	error_def(ERR_DBRDONLY);
	error_def(ERR_GBLNOEXIST);
	error_def(ERR_MAXBTLEVEL);

	t_err = ERR_MUREORGFAIL;
	kill_set_tail = &kill_set_list;
	/* Initialization for current global */
	op_gvname(VARLSTCNT(1) gn);
	/* Cannot proceed for read-only data files */
	if (gv_cur_region->read_only)
	{
		gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
		return FALSE;
	}
	dest_blk_id = cs_addrs->reorg_last_dest;
	inctn_opcode = inctn_mu_reorg;

	/* If resume option is present, then reorg_restart_key should be not null.
	 * Skip all globals until we are in the region for that global.
	 * Get the reorg_restart_key and reorg_restart_block from database header and restart from there.
	 */
	if (*resume && 0 != cs_data->reorg_restart_key[0])
	{
		/* resume from last key reorged in GVT */
		GET_KEY_LEN(tkeysize, &cs_data->reorg_restart_key[0]);
		memcpy(gv_currkey->base, cs_data->reorg_restart_key, tkeysize);
		gv_currkey->end = tkeysize - 1;
		dest_blk_id = cs_data->reorg_restart_block;
 		if (0 == memcmp(cs_data->reorg_restart_key, gn->str.addr, gn->str.len))
			/* Going to resume from current global, so it resumed and make it false */
			*resume = FALSE;
	} else
	{
		/* start from the left most leaf */
		memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len);
		gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0;
		gv_currkey->end = gn->str.len + 1;
	}
	if (*resume)
	{
		util_out_print("REORG cannot be resumed from this point, Skipping this global...", FLUSH);
		memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len);
		gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0;
		gv_currkey->end = gn->str.len + 1;
		return TRUE;
	}
 	memcpy(&gv_currkey_next_reorg->base[0], &gv_currkey->base[0], gv_currkey->end + 1);
	gv_currkey_next_reorg->end =  gv_currkey->end;
	if (2 > dest_blk_id)
		dest_blk_id = 2; /* we know that first block is bitmap and next one is directory tree root */
	file_extended = cs_data->trans_hist.total_blks;
	blk_size = cs_data->blk_size;
	d_max_fill = (double)data_fill_factor * blk_size / 100.0 - cs_data->reserved_bytes;
	i_max_fill = (double)index_fill_factor * blk_size / 100.0 - cs_data->reserved_bytes;
	d_toler = (double) DATA_FILL_TOLERANCE * blk_size / 100.0;
	i_toler = (double) INDEX_FILL_TOLERANCE * blk_size / 100.0;
	blks_killed = blks_processed = blks_reused = lvls_reduced = blks_coalesced = blks_split = blks_swapped = 0;
	pre_order_successor_level = level = MAX_BT_DEPTH + 1; /* Just some high value to initialize */

	/* --- more detailed debugging information --- */
	if (detailed_log = reorg_op & DETAIL)
		util_out_print("STARTING to work on global ^!AD from region !AD", TRUE,
			gn->str.len, gn->str.addr, REG_LEN_STR(gv_cur_region));

	/* In each iteration of MAIN loop, a working block is processed for a GVT */
	for (; ;)	/* ================ START MAIN LOOP ================ */
	{
		/* If right sibling is completely merged with the working block, do not swap the working block
		 * with its final destination block. Continue trying next right sibling. Swap only at the end.
		 */
		complete_merge = TRUE;
		while(complete_merge)	/* === START WHILE COMPLETE_MERGE === */
		{
			if (mu_ctrlc_occurred || mu_ctrly_occurred)
			{
				cs_data->reorg_restart_block = dest_blk_id;
				memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1);
				return FALSE;
			}
			complete_merge = FALSE;
			blks_processed++;
			t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
			/* Folllowing for loop is to handle concurrency retry for split/coalesce */
			for (; ;)		/* === SPLIT-COALESCE LOOP STARTS === */
			{
				gv_target->clue.end = 0;
				/* search gv_currkey and get the result in gv_target */
				if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal)
				{
					assert(CDB_STAGNATE > t_tries);
					t_retry(status);
					continue;
				} else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match)
                                {
					if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz
						&& 1 == gv_target->hist.depth)
					{
						if (cs_addrs->now_crit)
						{
							t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */
							gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr);
							reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused,
								file_extended, lvls_reduced,
								blks_coalesced, blks_split, blks_swapped);
							return TRUE; /* It is not an error that global was killed */
						} else
						{
							assert(CDB_STAGNATE > t_tries);
							t_retry(status);
							continue;
						}
					}
                                }
				if (gv_target->hist.depth <= level)
				{
					/* Will come here
					 * 	1) first iteration of the for loop (since level == MAX_BT_DEPTH + 1) or,
					 *	2) tree depth decreased for mu_reduce_level or, M-kill
					 */
					pre_order_successor_level = gv_target->hist.depth - 1;
					if (MAX_BT_DEPTH + 1 != level)
					{
						/* break the loop when tree depth decreased (case 2) */
						level = pre_order_successor_level;
						break;
					}
					level = pre_order_successor_level;
				}
				max_fill = (0 == level)? d_max_fill : i_max_fill;
				toler = (0 == level)? d_toler:i_toler;
				cur_blk_size =  ((blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr))->bsiz;
				if (cur_blk_size > max_fill + toler && 0 == (reorg_op & NOSPLIT)) /* SPLIT BLOCK */
				{
					cnt1 = cnt2 = 0;
					/* history of current working block is in gv_target */
					status = mu_split(level, i_max_fill, d_max_fill, &cnt1, &cnt2);
					if (cdb_sc_maxlvl == status)
					{
						gtm_putmsg(VARLSTCNT(4) ERR_MAXBTLEVEL, 2, gn->str.len, gn->str.addr);
						reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused,
							file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped);
						return FALSE;
					} else if (cdb_sc_normal == status)
					{
						if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED)))
						{
							need_kip_incr = FALSE;
							continue;
						}
						if (detailed_log)
							log_detailed_log("SPL", &(gv_target->hist), NULL, level, NULL, ret_tn);
						blks_reused += cnt1;
						lvls_reduced -= cnt2;
						blks_split++;
						break;
					} else if (cdb_sc_oprnotneeded == status)
					{	/* undo any update_array/cw_set changes and DROP THRU to mu_clsce */
						cw_set_depth = 0;
						CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
						assert(0 == cw_map_depth); /* mu_swap_blk (that changes cw_map_depth) comes later */
					} else
					{
						assert(CDB_STAGNATE > t_tries);
						t_retry(status);
						continue;
					}
				} /* end if SPLIT BLOCK */
				/* We are here because, mu_split() was not called or, split was not done or, not required */
				rtsib_hist = gv_target->alt_hist;
				status = gvcst_rtsib(rtsib_hist, level);
				if (cdb_sc_normal != status && cdb_sc_endtree != status)
				{
					assert(CDB_STAGNATE > t_tries);
					t_retry(status);
					continue;
				}
				if (cdb_sc_endtree == status)
				{
					if (0 == level)
						end_of_tree = TRUE;
					break;
				} else if (0 == level)
					pre_order_successor_level = rtsib_hist->depth - 1;
				/* COALESCE WITH RTSIB */
				kill_set_list.used = 0;
				if (cur_blk_size < max_fill - toler && 0 == (reorg_op & NOCOALESCE))
				{
					/* histories are sent in &gv_target->hist and gv_target->alt_hist */
					status = mu_clsce(level, i_max_fill, d_max_fill, &kill_set_list, &complete_merge);
					if (cdb_sc_normal == status)
					{
						if (level) /* delete lower elements of array, t_end might confuse */
						{
							memmove(&rtsib_hist->h[0], &rtsib_hist->h[level],
								SIZEOF(srch_blk_status)*(rtsib_hist->depth - level + 2));
							rtsib_hist->depth = rtsib_hist->depth - level;
						}
						if (0 < kill_set_list.used)     /* increase kill_in_prog */
						{
							need_kip_incr = TRUE;
							if (!cs_addrs->now_crit)	/* Do not sleep while holding crit */
								WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL);
						}
						if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), rtsib_hist,
							TN_NOT_SPECIFIED)))
						{
							need_kip_incr = FALSE;
							assert(NULL == kip_csa);
							if (level)
							{	/* reinitialize level member in rtsib_hist srch_blk_status' */
								for (count = 0; count < MAX_BT_DEPTH; count++)
									rtsib_hist->h[count].level = count;
							}
							continue;
						}
						if (level)
						{	/* reinitialize level member in rtsib_hist srch_blk_status' */
							for (count = 0; count < MAX_BT_DEPTH; count++)
								rtsib_hist->h[count].level = count;
						}
						if (detailed_log)
							log_detailed_log("CLS", &(gv_target->hist), rtsib_hist, level,
								NULL, ret_tn);
						assert(0 < kill_set_list.used || (NULL == kip_csa));
						if (0 < kill_set_list.used)     /* decrease kill_in_prog */
						{
							gvcst_kill_sort(&kill_set_list);
							GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg,
									inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs)
							DECR_KIP(cs_data, cs_addrs, kip_csa);
							if (detailed_log)
								log_detailed_log("KIL", &(gv_target->hist), NULL, level,
									&kill_set_list, ret_tn);
							blks_killed += kill_set_list.used;
						}
						blks_coalesced++;
						break;
					} else if (cdb_sc_oprnotneeded == status)
					{	/* undo any update_array/cw_set changes and DROP THRU to t_end */
						cw_set_depth = 0;
						CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
						assert(0 == cw_map_depth); /* mu_swap_blk (that changes cw_map_depth) comes later */
					} else
					{
						assert(CDB_STAGNATE > t_tries);
						t_retry(status);
						continue;
					}
				} /* end if try coalesce */
				if (0 == level)
				{
					/* Note: In data block level:
					 *      if split is successful or,
					 *	if coalesce is successful without a complete merge of rtsib,
					 *	then gv_currkey_next_reorg is already set from the called function.
					 *	if split or, coalesce do a retry or,
					 *	if coalesce is successful with a complete merge then
					 *	gv_currkey will not be changed.
					 * If split or, coalesce is not successful or, not needed then
					 *	here gv_currkey_next_reorg will be set from right sibling
					 */
					cw_set_depth = cw_map_depth = 0;
					GET_KEY_LEN(tkeysize, rtsib_hist->h[0].buffaddr + SIZEOF(blk_hdr) + SIZEOF(rec_hdr));
					if (2 < tkeysize && MAX_KEY_SZ >= tkeysize)
					{
						memcpy(&(gv_currkey_next_reorg->base[0]), rtsib_hist->h[0].buffaddr
							+ SIZEOF(blk_hdr) +SIZEOF(rec_hdr), tkeysize);
						gv_currkey_next_reorg->end = tkeysize - 1;
						inctn_opcode = inctn_invalid_op; /* temporary reset; satisfy an assert in t_end() */
						assert(UPDTRNS_DB_UPDATED_MASK == update_trans);
						update_trans = 0; /* tell t_end, this is no longer an update transaction */
						if ((trans_num)0 == (ret_tn = t_end(rtsib_hist, NULL, TN_NOT_SPECIFIED)))
						{
							need_kip_incr = FALSE;
							inctn_opcode = inctn_mu_reorg;	/* reset inctn_opcode to its default */
							update_trans = UPDTRNS_DB_UPDATED_MASK;/* reset update_trans to old value */
							assert(NULL == kip_csa);
							continue;
						}
						/* There is no need to reset update_trans in case of a successful "t_end" call.
						 * This is because before the next call to "t_end" we should have a call to
						 * "t_begin" which will reset update_trans anyways.
						 */
						inctn_opcode = inctn_mu_reorg;	/* reset inctn_opcode to its default */
						if (detailed_log)
							log_detailed_log("NOU", rtsib_hist, NULL, level, NULL, ret_tn);
					} else
					{
						assert(CDB_STAGNATE > t_tries);
						t_retry(status);
						continue;
					}
				} /* end if (0 == level) */
				break;
			}/* === SPLIT-COALESCE LOOP END === */
			t_abort(gv_cur_region, cs_addrs);	/* do crit and other cleanup */
		}/* === START WHILE COMPLETE_MERGE === */

		if (mu_ctrlc_occurred || mu_ctrly_occurred)
		{
			cs_data->reorg_restart_block = dest_blk_id;
			memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end+1);
			return FALSE;
		}
		/* Now swap the working block */
		if (0 == (reorg_op & NOSWAP))
		{
			t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
			/* Following loop is to handle concurrency retry for swap */
			for (; ;)	/* === START OF SWAP LOOP === */
			{
				kill_set_list.used = 0;
				gv_target->clue.end = 0;
				/* search gv_currkey and get the result in gv_target */
				if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal)
				{
					assert(CDB_STAGNATE > t_tries);
					t_retry(status);
					continue;
				} else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match)
                                {
					if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz
						&& 1 == gv_target->hist.depth)
					{
						if (cs_addrs->now_crit)
						{
							t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */
							gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr);
							reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused,
								file_extended, lvls_reduced,
								blks_coalesced, blks_split, blks_swapped);
							return TRUE; /* It is not an error that global was killed */
						} else
						{
							assert(CDB_STAGNATE > t_tries);
							t_retry(status);
							continue;
						}
					}
                                }
				if (gv_target->hist.depth <= level)
					break;
				/* swap working block with appropriate dest_blk_id block.
				   Historys are sent as gv_target->hist and reorg_gv_target->hist */
				mu_reorg_in_swap_blk = TRUE;
				status = mu_swap_blk(level, &dest_blk_id, &kill_set_list, exclude_glist_ptr);
				mu_reorg_in_swap_blk = FALSE;
				if (cdb_sc_oprnotneeded == status)
				{
					if (cs_data->trans_hist.total_blks <= dest_blk_id)
					{
						util_out_print("REORG may be incomplete for this global.", TRUE);
						reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused,
							file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped);
						return TRUE;
					}
				} else if (cdb_sc_normal == status)
				{
					if (0 < kill_set_list.used)
					{
						need_kip_incr = TRUE;
						if (!cs_addrs->now_crit)	/* Do not sleep while holding crit */
							WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL);
						/* second history not needed, because,
						   we are reusing a free block, which does not need history */
						if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED)))
						{
							need_kip_incr = FALSE;
							assert(NULL == kip_csa);
							DECR_BLK_NUM(dest_blk_id);
							continue;
						}
						if (detailed_log)
							log_detailed_log("SWA", &(gv_target->hist), NULL, level, NULL, ret_tn);
						gvcst_kill_sort(&kill_set_list);
						GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg,
								inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs)
						DECR_KIP(cs_data, cs_addrs, kip_csa);
						if (detailed_log)
							log_detailed_log("KIL", &(gv_target->hist), NULL, level,
								&kill_set_list, ret_tn);
						blks_reused += kill_set_list.used;
						blks_killed += kill_set_list.used;
					}
					/* gv_target->hist is for working block's history, and
					   reorg_gv_target->hist is for destinition block's history.
					   Note: gv_target and reorg_gv_target can be part of different GVT.  */
					else if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), &(reorg_gv_target->hist),
						TN_NOT_SPECIFIED)))
					{
						need_kip_incr = FALSE;
						assert(NULL == kip_csa);
						DECR_BLK_NUM(dest_blk_id);
						continue;
					}
					if ((0 >= kill_set_list.used) && detailed_log)
						log_detailed_log("SWA", &(gv_target->hist), &(reorg_gv_target->hist),
							level, NULL, ret_tn);
					blks_swapped++;
					if (reorg_op & SWAPHIST)
						util_out_print("Dest !SL From !SL", TRUE, dest_blk_id,
							gv_target->hist.h[level].blk_num);
				} else
				{
					assert(CDB_STAGNATE > t_tries);
					t_retry(status);
					continue;
				}
				break;
			}	/* === END OF SWAP LOOP === */
			t_abort(gv_cur_region, cs_addrs);	/* do crit and other cleanup */
		}
		if (mu_ctrlc_occurred || mu_ctrly_occurred)
		{
			cs_data->reorg_restart_block = dest_blk_id;
			memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1);
			return FALSE;
		}
		if (end_of_tree)
			break;
		if (0 < level)
			level--; /* Order of reorg is root towards leaf */
		else
		{
			level = pre_order_successor_level;
			memcpy(&gv_currkey->base[0], &gv_currkey_next_reorg->base[0], gv_currkey_next_reorg->end + 1);
			gv_currkey->end =  gv_currkey_next_reorg->end;
			cs_data->reorg_restart_block = dest_blk_id;
			memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1);
		}
	}		/* ================ END MAIN LOOP ================ */

	/* =========== START REDUCE LEVEL ============== */
	memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len);
	gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0;
	gv_currkey->end = gn->str.len + 1;
	for (;;)	/* Reduce level continues until it fails to reduce */
	{
		t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
		cnt1 = 0;
		for (; ;) 	/* main reduce level loop starts */
		{
			kill_set_list.used = 0;
			gv_target->clue.end = 0;
			/* search gv_currkey and get the result in gv_target */
			if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal)
			{
				assert(CDB_STAGNATE > t_tries);
				t_retry(status);
				continue;
			} else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match)
			{
				if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz
					&& 1 == gv_target->hist.depth)
				{
					if (cs_addrs->now_crit)
					{
						t_abort(gv_cur_region, cs_addrs);	/* do crit and other cleanup */
						gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr);
						reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused,
							file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped);
						return TRUE; /* It is not an error that global was killed */
					} else
					{
						assert(CDB_STAGNATE > t_tries);
						t_retry(status);
						continue;
					}
				}
			}
			if (gv_target->hist.depth <= level)
				break;
			/* History is passed in gv_target->hist */
			status = mu_reduce_level(&kill_set_list);
			if (cdb_sc_oprnotneeded != status && cdb_sc_normal != status)
			{
				assert(CDB_STAGNATE > t_tries);
				t_retry(status);
				continue;
			} else if (cdb_sc_normal == status)
			{
				assert(0 < kill_set_list.used);
				need_kip_incr = TRUE;
				if (!cs_addrs->now_crit)	/* Do not sleep while holding crit */
					WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL);
				if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED)))
				{
					need_kip_incr = FALSE;
					assert(NULL == kip_csa);
					continue;
				}
				if (detailed_log)
					log_detailed_log("RDL", &(gv_target->hist), NULL, level, NULL, ret_tn);
				gvcst_kill_sort(&kill_set_list);
				GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg,
						inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs)
				DECR_KIP(cs_data, cs_addrs, kip_csa);
				if (detailed_log)
					log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn);
				blks_reused += kill_set_list.used;
				blks_killed += kill_set_list.used;
				cnt1 = 1;
				lvls_reduced++;
			}
			break;
		} 		/* main reduce level loop ends */
		t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */
		if (0 == cnt1)
			break;
	}
	/* =========== END REDUCE LEVEL ===========*/

	reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused,
		file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped);
	return TRUE;

} /* end mu_reorg() */