Beispiel #1
0
/*
	Marshal the header without reassigning a new timestamp.
*/
long SDMmessage::MarshalHeaderOldTimeStamp(char* buf)
{
	buf[0] = MsgName;
	PUT_LONG(&buf[1],sec);
	PUT_LONG(&buf[5],subsec);
	PUT_USHORT(&buf[9],msg_length);
	return HEADER_SIZE;
}
Beispiel #2
0
long SDMmessage::MarshalHeader(char* buf)
{
#ifdef __VXWORKS__
	struct timespec time;
	clock_gettime(CLOCK_REALTIME, &time);
	sec = time.tv_sec;
	subsec = time.tv_nsec/1000;
#else
	struct timeval time;
	gettimeofday(&time,NULL);
	sec = time.tv_sec;
	subsec = time.tv_usec;
#endif
	buf[0] = MsgName;
	PUT_LONG(&buf[1],sec);
	PUT_LONG(&buf[5],subsec);
	PUT_USHORT(&buf[9],msg_length);
	return HEADER_SIZE;
}
/*
 ****************************************************************
 *	Atualiza o bloco de informações da FAT32		*
 ****************************************************************
 */
void
update_fat32_info (void)
{
	UNI		*up = &uni;
	FSINFO		fsinfo;

	/*
	 *	Só é chamada se for RW
	 */
	if (up->u_fs_info == 0)
		return;
#undef	DEBUG
#ifdef	DEBUG
	printf
	(	"u_disk_infree = %d, u_infree = %d\n",
		up->u_disk_infree, up->u_infree
	);
#endif	DEBUG

	if (up->u_disk_infree == up->u_infree)
		return;

	/*
	 *	Atualiza o bloco de informações
	 */
#ifdef	DEBUG
	printf ("Atualizando ...\n");
#endif	DEBUG

	bread (up->u_fs_info,	  &fsinfo);
   /***	bread (up->u_fs_info + 1, (void *)&fsinfo + BLSZ); ***/

	PUT_LONG (up->u_infree, &fsinfo.fs_infree);

	bwrite (up->u_fs_info,	  &fsinfo);
   /***	bwrite (up->u_fs_info + 1, (void *)&fsinfo + BLSZ); ***/

	up->u_disk_infree = up->u_infree;

}	/* update_fat32_info */
Beispiel #4
0
/* Finds a free block and adds information to update array and cw_set */
block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id,
		sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn)
{
	sgmnt_data_ptr_t	csd;
	sgmnt_addrs		*csa;
	node_local_ptr_t	cnl;
	srch_blk_status		bmlhist, freeblkhist;
	block_id		hint_blk_num, free_blk_id, parent_blk_id;
	boolean_t		free_blk_recycled;
	int4			master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap;
	uint4			total_blks;
	int			blk_seg_cnt, blk_size;
	sm_uc_ptr_t		parent_blk_ptr, bn_ptr, saved_blk;
	blk_segment		*bs1, *bs_ptr;
	int			parent_blk_size, child_blk_size, bsiz;
	int			rec_size1, curr_offset, bpntr_end, hdr_len;
	int			tmp_cmpc;
	cw_set_element		*tmpcse;
	jnl_buffer_ptr_t	jbbp; /* jbbp is non-NULL only if before-image journaling */
	unsigned short		temp_ushort;
	unsigned long		temp_long;
	unsigned char		save_cw_set_depth;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	csd = cs_data;
	csa = cs_addrs;
	cnl = csa->nl;
	blk_size = csd->blk_size;
	/* Find a free/recycled block for new block location. */
	hint_blk_num = 0;
	total_blks = csa->ti->total_blks;
	num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps);
	if ((NO_FREE_SPACE == master_bit))
	{
		t_abort(gv_cur_region, csa);
		return ABORT_SWAP;
	}
	bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP;
	if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr)))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry((enum cdb_sc)rdfail_detail);
		return RETRY_SWAP;
	}
	hint_bit = 0;
	maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num;
	free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled);
	free_blk_id = bmlhist.blk_num + free_bit;
	if (DIR_ROOT >= free_blk_id)
	{	/* Bitmap block 0 and directory tree root block 1 should always be marked busy. */
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_badbitmap);
		return RETRY_SWAP;
	}
	if (child_blk_id <= free_blk_id)
	{	/* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check
		 * if the block is to be swapped into a higher block number and if so do not swap
		 */
		t_abort(gv_cur_region, csa);
		return ABORT_SWAP;
	}
	/* ====== begin update array ======
	 * Four blocks get changed.
	 * 	1. Free block becomes busy and gains the contents of child (root block/directory tree block)
	 * 	2. Parent block in directory tree remains busy, but points to new root block location.
	 *	3. Free block's corresponding bitmap reflects above change.
	 * 	4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE)
	 */
	parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */
	parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num;
	CHECK_AND_RESET_UPDATE_ARRAY;
	if (free_blk_recycled)
	{	/* Otherwise, it's a completely free block, in which case no need to read. */
		freeblkhist.blk_num = (block_id)free_blk_id;
		if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr)))
		{
			assert(t_tries < CDB_STAGNATE);
			t_retry((enum cdb_sc)rdfail_detail);
			return RETRY_SWAP;
		}
	}
	child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz;
	BLK_INIT(bs_ptr, bs1);
	BLK_ADDR(saved_blk, child_blk_size, unsigned char);
	memcpy(saved_blk, child_blk_ptr, child_blk_size);
	BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr));
	assert(blk_seg_cnt == child_blk_size);
	if (!BLK_FINI(bs_ptr, bs1))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_blkmod);
		return RETRY_SWAP;
	}
	tmpcse = &cw_set[cw_set_depth];
	(free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state)
			    : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state);
	t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl);
	tmpcse->mode = gds_t_acquired;
	if (!free_blk_recycled || !cs_data->db_got_to_v5_once)
		tmpcse->old_block = NULL;
	else
	{
		tmpcse->old_block = freeblkhist.buffaddr;
		tmpcse->cr = freeblkhist.cr;
		tmpcse->cycle = freeblkhist.cycle;
		jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL;
		if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn))
		{
			bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz;
			if (bsiz > blk_size)
			{
				assert(CDB_STAGNATE > t_tries);
				t_retry(cdb_sc_lostbmlcr);
				return RETRY_SWAP;
			}
			JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz);
		}
	}
	/* 2. Parent block in directory tree remains busy, but points to new child block location. */
	curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset;
	parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz;
	GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset));
	if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_blkmod);
		return RETRY_SWAP;
	}
	BLK_INIT(bs_ptr, bs1);
	if (0 == parent_blk_lvl)
		/* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */
		hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset));
	else
		hdr_len = rec_size1 - SIZEOF(block_id);
	bpntr_end = curr_offset + hdr_len + SIZEOF(block_id);
	BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr));
	BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
	PUT_LONG(bn_ptr, free_blk_id);
	BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
	BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end);
	assert(blk_seg_cnt == parent_blk_size);
	if (!BLK_FINI(bs_ptr, bs1))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_blkmod);
		return RETRY_SWAP;
	}
	t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN);
	/* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */
	BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state);
	/* 3. Free block's corresponding bitmap reflects above change. */
	PUT_LONG(update_array_ptr, free_bit);
	save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */
	assert(!cw_map_depth);
	t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1);
	cw_map_depth = cw_set_depth;
	cw_set_depth = save_cw_set_depth;
	update_array_ptr += SIZEOF(block_id);
	temp_long = 0;
	PUT_LONG(update_array_ptr, temp_long);
	update_array_ptr += SIZEOF(block_id);
	assert(1 == cw_set[cw_map_depth - 1].reference_cnt);
	/* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */
	kill_set_list->blk[kill_set_list->used].flag = 0;
	kill_set_list->blk[kill_set_list->used].level = 0;
	kill_set_list->blk[kill_set_list->used++].block = child_blk_id;
	return free_blk_id;
}
Beispiel #5
0
enum cdb_sc	gvcst_kill_blk(srch_blk_status	*blkhist,
			       char		level,
			       gv_key  		*search_key,
			       srch_rec_status	low,
			       srch_rec_status	high,
			       boolean_t	right_extra,
			       cw_set_element	**cseptr)
{
	typedef sm_uc_ptr_t		bytptr;

	unsigned short			temp_ushort;
	int4				temp_long;
	int				tmp_cmpc;
	int				blk_size, blk_seg_cnt, lmatch, rmatch, targ_len, prev_len, targ_base, next_rec_shrink,
					temp_int, blkseglen;
	bool				kill_root, first_copy;
	blk_hdr_ptr_t			old_blk_hdr;
	rec_hdr_ptr_t			left_ptr;	/*pointer to record before first record to delete*/
	rec_hdr_ptr_t			del_ptr;	/*pointer to first record to delete*/
	rec_hdr_ptr_t	       		right_ptr;	/*pointer to record after last record to delete*/
	rec_hdr_ptr_t			right_prev_ptr;
	rec_hdr_ptr_t			rp, rp1;	/*scratch record pointer*/
	rec_hdr_ptr_t			first_in_blk, top_of_block, new_rec_hdr, star_rec_hdr;
	blk_segment			*bs1, *bs_ptr;
	block_index			new_block_index;
	unsigned char			*skb;
	static readonly block_id	zeroes = 0;
	cw_set_element			*cse, *old_cse;
	bytptr				curr, prev, right_bytptr;
	off_chain			chain1, curr_chain, prev_chain;
	block_id			blk;
	sm_uc_ptr_t			buffer;
	srch_blk_status			*t1;

	*cseptr = NULL;
	if (low.offset == high.offset)
		return cdb_sc_normal;
	blk = blkhist->blk_num;
	if (dollar_tlevel)
	{
		PUT_LONG(&chain1, blk);
		if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth))
		{
			assert(sgm_info_ptr->tp_csa == cs_addrs);
			assert(FALSE == cs_addrs->now_crit);
			return cdb_sc_blknumerr;
		}
	}
	buffer = blkhist->buffaddr;
	old_blk_hdr = (blk_hdr_ptr_t)buffer;
	kill_root = FALSE;
	blk_size = cs_data->blk_size;
	first_in_blk = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + SIZEOF(blk_hdr));
	top_of_block = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + old_blk_hdr->bsiz);
	left_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + low.offset);
	right_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + high.offset);
	if (right_extra && right_ptr < top_of_block)
	{
		right_prev_ptr = right_ptr;
		GET_USHORT(temp_ushort, &right_ptr->rsiz);
		right_ptr = (rec_hdr_ptr_t)((bytptr)right_ptr + temp_ushort);
	}
	if ((bytptr)left_ptr < (bytptr)old_blk_hdr ||
		(bytptr)right_ptr > (bytptr)top_of_block ||
		(bytptr)left_ptr >= (bytptr)right_ptr)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_rmisalign;
	}
	if ((bytptr)left_ptr == (bytptr)old_blk_hdr)
	{
		if ((bytptr)right_ptr == (bytptr)top_of_block)
		{
			if ((bytptr)first_in_blk == (bytptr)top_of_block)
			{
				if (0 != level)
				{
					assert(CDB_STAGNATE > t_tries);
					return cdb_sc_rmisalign;
				}
				return cdb_sc_normal;
			}
			if (!gv_target->hist.h[level + 1].blk_num)
				kill_root = TRUE;
			else
			{	/* We are about to free up the contents of this entire block. If this block corresponded to
				 * a global that has NOISOLATION turned on and has a non-zero recompute list (i.e. some SETs
				 * already happened in this same TP transaction), make sure we disable the NOISOLATION
				 * optimization in this case as that is applicable only if one or more SETs happened in this
				 * data block and NOT if a KILL happens. Usually this is done by a t_write(GDS_WRITE_KILLTN)
				 * call but since in this case the entire block is being freed, "t_write" wont be invoked
				 * so we need to explicitly set GDS_WRITE_KILLTN like t_write would have (GTM-8269).
				 * Note: blkhist->first_tp_srch_status is not reliable outside of TP. Thankfully the recompute
				 * list is also maintained only in case of TP so a check of dollar_tlevel is enough to
				 * dereference both "first_tp_srch_status" and "recompute_list_head".
				 */
				if (dollar_tlevel)
				{
					t1 = blkhist->first_tp_srch_status ? blkhist->first_tp_srch_status : blkhist;
					cse = t1->cse;
					if ((NULL != cse) && cse->recompute_list_head)
						cse->write_type |= GDS_WRITE_KILLTN;
				}
				return cdb_sc_delete_parent;
			}
		}
		del_ptr = first_in_blk;
	} else
	{
		GET_USHORT(temp_ushort, &left_ptr->rsiz);
		del_ptr = (rec_hdr_ptr_t)((bytptr)left_ptr + temp_ushort);
		if ((bytptr)del_ptr <= (bytptr)(left_ptr + 1)  ||  (bytptr)del_ptr > (bytptr)right_ptr)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_rmisalign;
		}
	}
	if ((bytptr)del_ptr == (bytptr)right_ptr)
		return cdb_sc_normal;
	lmatch = low.match;
	rmatch = high.match;
	if (level)
	{
		for (rp = del_ptr ;  rp < right_ptr ;  rp = rp1)
		{
			GET_USHORT(temp_ushort, &rp->rsiz);
			rp1 = (rec_hdr_ptr_t)((bytptr)rp + temp_ushort);
			if (((bytptr)rp1 < (bytptr)(rp + 1) + SIZEOF(block_id)) ||
				((bytptr)rp1 < buffer) || ((bytptr)rp1 > (buffer + blk_size)))
			{
				assert(CDB_STAGNATE > t_tries);
				return cdb_sc_rmisalign;
			}
			GET_LONG(temp_long, ((bytptr)rp1 - SIZEOF(block_id)));
			if (dollar_tlevel)
			{
				chain1 = *(off_chain *)&temp_long;
				if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth))
				{
					assert(sgm_info_ptr->tp_csa == cs_addrs);
					assert(FALSE == cs_addrs->now_crit);
					return cdb_sc_blknumerr;
				}
			}
			gvcst_delete_blk(temp_long, level - 1, FALSE);
		}
	}
	if (kill_root)
	{	/* create an empty data block */
		BLK_INIT(bs_ptr, bs1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_mkblk;
		}
		new_block_index = t_create(blk, (uchar_ptr_t)bs1, 0, 0, 0);
		/* create index block */
		BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
		new_rec_hdr->rsiz = SIZEOF(rec_hdr) + SIZEOF(block_id);
		SET_CMPC(new_rec_hdr, 0);
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr));
		BLK_SEG(bs_ptr, (bytptr)&zeroes, SIZEOF(block_id));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_mkblk;
		}
		cse = t_write(blkhist, (unsigned char *)bs1, SIZEOF(blk_hdr) + SIZEOF(rec_hdr), new_block_index, 1,
			TRUE, FALSE, GDS_WRITE_KILLTN);
		assert(!dollar_tlevel || !cse->high_tlevel);
		*cseptr = cse;
		if (NULL != cse)
			cse->first_off = 0;
		return cdb_sc_normal;
	}
	next_rec_shrink = (int)(old_blk_hdr->bsiz + ((bytptr)del_ptr - (bytptr)right_ptr));
	if (SIZEOF(blk_hdr) >= next_rec_shrink)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_rmisalign;
	}
	if ((bytptr)right_ptr == (bytptr)top_of_block)
	{
		if (level)
		{
			GET_USHORT(temp_ushort, &left_ptr->rsiz);
			next_rec_shrink += SIZEOF(rec_hdr) + SIZEOF(block_id) - temp_ushort;
		}
	} else
	{
		targ_base = (rmatch < lmatch) ? rmatch : lmatch;
		prev_len = 0;
		if (right_extra)
		{
			EVAL_CMPC2(right_prev_ptr, tmp_cmpc);
			targ_len = tmp_cmpc - targ_base;
			if (targ_len < 0)
				targ_len = 0;
			temp_int = tmp_cmpc - EVAL_CMPC(right_ptr);
			if (0 >= temp_int)
				prev_len = - temp_int;
			else
			{
				if (temp_int < targ_len)
					targ_len -= temp_int;
				else
					targ_len = 0;
			}
		} else
		{
			targ_len = EVAL_CMPC(right_ptr) - targ_base;
			if (targ_len < 0)
				targ_len = 0;
		}
		next_rec_shrink += targ_len + prev_len;
	}
	BLK_INIT(bs_ptr, bs1);
	first_copy = TRUE;
	blkseglen = (int)((bytptr)del_ptr - (bytptr)first_in_blk);
	if (0 < blkseglen)
	{
		if (((bytptr)right_ptr != (bytptr)top_of_block)  ||  (0 == level))
		{
			BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen);
			first_copy = FALSE;
		} else
		{
			blkseglen = (int)((bytptr)left_ptr - (bytptr)first_in_blk);
			if (0 < blkseglen)
			{
				BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen);
				first_copy = FALSE;
			}
			BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
			SET_CMPC(star_rec_hdr, 0);
			star_rec_hdr->rsiz = (unsigned short)(SIZEOF(rec_hdr) + SIZEOF(block_id));
			BLK_SEG(bs_ptr, (bytptr)star_rec_hdr, SIZEOF(rec_hdr));
			GET_USHORT(temp_ushort, &left_ptr->rsiz);
			BLK_SEG(bs_ptr, ((bytptr)left_ptr + temp_ushort - SIZEOF(block_id)), SIZEOF(block_id));
		}
	}
	blkseglen = (int)((bytptr)top_of_block - (bytptr)right_ptr);
	assert(0 <= blkseglen);
	if (0 != blkseglen)
	{
		next_rec_shrink = targ_len + prev_len;
		if (0 >= next_rec_shrink)
		{
			BLK_SEG(bs_ptr, (bytptr)right_ptr, blkseglen);
		} else
		{
			BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
			SET_CMPC(new_rec_hdr, EVAL_CMPC(right_ptr) - next_rec_shrink);
			GET_USHORT(temp_ushort, &right_ptr->rsiz);
			new_rec_hdr->rsiz = temp_ushort + next_rec_shrink;
			BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr));
			if (targ_len)
			{
				BLK_ADDR(skb, targ_len, unsigned char);
				memcpy(skb, &search_key->base[targ_base], targ_len);
				BLK_SEG(bs_ptr, skb, targ_len);
			}
			if (prev_len)
				BLK_SEG(bs_ptr, (bytptr)(right_prev_ptr + 1) , prev_len);
			right_bytptr = (bytptr)(right_ptr + 1);
			blkseglen = (int)((bytptr)top_of_block - right_bytptr);
			if (0 < blkseglen)
			{
				BLK_SEG(bs_ptr, right_bytptr, blkseglen);
			} else
			{
				assert(CDB_STAGNATE > t_tries);
				return cdb_sc_rmisalign;
			}
		}
	}
	if (!BLK_FINI(bs_ptr, bs1))
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_mkblk;
	}
	cse = t_write(blkhist, (unsigned char *)bs1, 0, 0, level, first_copy, TRUE, GDS_WRITE_KILLTN);
	assert(!dollar_tlevel || !cse->high_tlevel);
	*cseptr = cse;
	if (horiz_growth)
	{
		old_cse = cse->low_tlevel;
		assert(old_cse && old_cse->done);
		assert(2 == (SIZEOF(old_cse->undo_offset) / SIZEOF(old_cse->undo_offset[0])));
		assert(2 == (SIZEOF(old_cse->undo_next_off) / SIZEOF(old_cse->undo_next_off[0])));
		assert(!old_cse->undo_next_off[0] && !old_cse->undo_offset[0]);
		assert(!old_cse->undo_next_off[1] && !old_cse->undo_offset[1]);
	}
        if ((NULL != cse)  &&  (0 != cse->first_off))
	{	/* fix up chains in the block to account for deleted records */
		prev = NULL;
		curr = buffer + cse->first_off;
		GET_LONGP(&curr_chain, curr);
		while (curr < (bytptr)del_ptr)
		{	/* follow chain to first deleted record */
			if (0 == curr_chain.next_off)
				break;
			if (right_ptr == top_of_block  &&  (bytptr)del_ptr - curr == SIZEOF(off_chain))
				break;	/* special case described below: stop just before the first deleted record */
			prev = curr;
			curr += curr_chain.next_off;
			GET_LONGP(&curr_chain, curr);
		}
		if (right_ptr == top_of_block  &&  (bytptr)del_ptr - curr == SIZEOF(off_chain))
		{
			/* if the right side of the block is gone and our last chain is in the last record,
			 * terminate the chain and adjust the previous entry to point at the new *-key
			 * NOTE: this assumes there's NEVER a TP delete of records in the GVT
			 */
			assert(0 != level);
			/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
			if (horiz_growth)
			{
				old_cse->undo_next_off[0] = curr_chain.next_off;
				old_cse->undo_offset[0] = (block_offset)(curr - buffer);
				assert(old_cse->undo_offset[0]);
			}
			curr_chain.next_off = 0;
			GET_LONGP(curr, &curr_chain);
			if (NULL != prev)
			{	/* adjust previous chain next_off to reflect the fact that the record it refers to is now a *-key */
				GET_LONGP(&prev_chain, prev);
				/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
				if (horiz_growth)
				{
					old_cse->undo_next_off[1] = prev_chain.next_off;
					old_cse->undo_offset[1] = (block_offset)(prev - buffer);
					assert(old_cse->undo_offset[1]);
				}
				prev_chain.next_off = (unsigned int)((bytptr)left_ptr - prev + (unsigned int)(SIZEOF(rec_hdr)));
				GET_LONGP(prev, &prev_chain);
			} else	/* it's the first (and only) one */
				cse->first_off = (block_offset)((bytptr)left_ptr - buffer + SIZEOF(rec_hdr));
		} else if (curr >= (bytptr)del_ptr)
		{	/* may be more records on the right that aren't deleted */
			while (curr < (bytptr)right_ptr)
			{	/* follow chain past last deleted record */
				if (0 == curr_chain.next_off)
					break;
				curr += curr_chain.next_off;
				GET_LONGP(&curr_chain, curr);
			}
			/* prev :   ptr to chain record immediately preceding the deleted area,
			 *	    or 0 if none.
			 *
			 * curr :   ptr to chain record immediately following the deleted area,
			 *	    or to last chain record.
			 */
			if (curr < (bytptr)right_ptr)
			{	/* the former end of the chain is going, going, gone */
				if (NULL != prev)
				{	/* terminate the chain before the delete */
					GET_LONGP(&prev_chain, prev);
					/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
					if (horiz_growth)
					{
						old_cse->undo_next_off[0] = prev_chain.next_off;
						old_cse->undo_offset[0] = (block_offset)(prev - buffer);
						assert(old_cse->undo_offset[0]);
					}
					prev_chain.next_off = 0;
					GET_LONGP(prev, &prev_chain);
				} else
					cse->first_off = 0;		/* the whole chain is gone */
			} else
			{	/* stitch up the left and right to account for the hole in the middle */
				/* next_rec_shrink is the change in record size due to the new compression count */
				if (NULL != prev)
				{
					GET_LONGP(&prev_chain, prev);
					/* ??? new compression may be less (ie +) so why are negative shrinks ignored? */
					/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
					if (horiz_growth)
					{
						old_cse->undo_next_off[0] = prev_chain.next_off;
						old_cse->undo_offset[0] = (block_offset)(prev - buffer);
						assert(old_cse->undo_offset[0]);
					}
					prev_chain.next_off = (unsigned int)(curr - prev - ((bytptr)right_ptr - (bytptr)del_ptr)
						+ (next_rec_shrink > 0 ? next_rec_shrink : 0));
					GET_LONGP(prev, &prev_chain);
				} else	/* curr remains first: adjust the head */
					cse->first_off = (block_offset)(curr - buffer - ((bytptr)right_ptr - (bytptr)del_ptr)
						+ (next_rec_shrink > 0 ? next_rec_shrink : 0));
			}
		}
	}
	horiz_growth = FALSE;
	return cdb_sc_normal;
}
Beispiel #6
0
void	gvcst_delete_blk(block_id blk, int level, boolean_t committed)
{
	cw_set_element	*cse, *old_cse;
	kill_set	*ks;
	off_chain	chain;
	srch_blk_status	*tp_srch_status;
	uint4		dummy, iter;

	/* an assert to verify the validity of the block number was removed
	 * because it could be triggered by a concurrency conflict
	 */

	horiz_growth = FALSE;
	if (dollar_tlevel == 0)
		ks = kill_set_tail;
	else
	{
		PUT_LONG(&chain, blk);
		tp_srch_status = NULL;
		if (chain.flag == 1)
			tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain.cw_index, &cse);
		else
		{
			tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use,
											(void *)blk, &dummy);
			cse = tp_srch_status ? tp_srch_status->ptr : NULL;
		}
		assert(!cse || !cse->high_tlevel);
		if (cse)
		{
			if (!committed)
			{
				assert(dollar_tlevel >= cse->t_level);
				if (cse->t_level != dollar_tlevel)
				{
					/* this part of the code is almost similar to that in t_write(),
					 * any changes in one should be reflected in the other */
					horiz_growth = TRUE;
					old_cse = cse;
					cse = (cw_set_element *)get_new_free_element(sgm_info_ptr->tlvl_cw_set_list);
					memcpy(cse, old_cse, sizeof(cw_set_element));
					cse->low_tlevel = old_cse;
					cse->high_tlevel = NULL;
					old_cse->high_tlevel = cse;
					cse->t_level = dollar_tlevel;
					assert(2 == (sizeof(cse->undo_offset) / sizeof(cse->undo_offset[0])));
					assert(2 == (sizeof(cse->undo_next_off) / sizeof(cse->undo_next_off[0])));
					for (iter = 0; iter < 2; iter++)
						cse->undo_next_off[iter] = cse->undo_offset[iter] = 0;
					if (!old_cse->new_buff)		/* it's possible to arrive here with an unbuilt block */
						gvcst_blk_build(old_cse, (uchar_ptr_t)old_cse->new_buff, 0);
					old_cse->done = TRUE;
					cse->new_buff = ((new_buff_buddy_list *)
								get_new_free_element(sgm_info_ptr->new_buff_list))->new_buff;
					memcpy(cse->new_buff, old_cse->new_buff, ((blk_hdr_ptr_t)old_cse->new_buff)->bsiz);
					/* tp_srch_status->ptr has to be updated here, since gvcst_kill() does
					 * not call tp_hist() at the end as in gvcst_put_blk() */
					if (tp_srch_status)
						tp_srch_status->ptr = (void *)cse;
				}
				switch (cse->mode)
				{
				case gds_t_create:
					cse->mode = kill_t_create;
					if (level == 0)
					    return;
					break;
				case gds_t_write:
					cse->mode = kill_t_write;
					break;
				default:
					;
				}
			} else
			{
				switch(cse->mode)
				{
				case kill_t_create:
					if (level == 0)
						return;
					break;
				default:
					if (chain.flag)
					{
						chain.flag = 0;
						blk = cse->blk;
					}
					break;
				}
			}
		}
		ks = sgm_info_ptr->kill_set_tail;
		if (NULL == ks)		/* Allocate first kill set to sgm_info_ptr block */
		{
			ks = sgm_info_ptr->kill_set_tail = sgm_info_ptr->kill_set_head = (kill_set *)malloc(sizeof(kill_set));
			ks->used = 0;
			ks->next_kill_set = NULL;
		}
	}
	while (ks->used >= BLKS_IN_KILL_SET)
	{
		if (ks->next_kill_set == NULL)
		{
			ks->next_kill_set = (kill_set *)malloc(sizeof(kill_set));
			ks->next_kill_set->used = 0;
			ks->next_kill_set->next_kill_set = NULL;
		}
		ks = kill_set_tail
		   = ks->next_kill_set;
	}
	ks->blk[ks->used].level = level;
	if (dollar_tlevel == 0 || chain.flag == 0)
	{
		ks->blk[ks->used].block = blk;
		ks->blk[ks->used].flag = 0;
	} else
	{
	    	ks->blk[ks->used].block = chain.cw_index;
	    	ks->blk[ks->used].flag = chain.flag;
	}
	++ks->used;
	assert(ks->used <= BLKS_IN_KILL_SET);
}
Beispiel #7
0
/******************************************************************************************
Input Parameters:
	level: level of working block
	dest_blk_id: last destination used for swap
Output Parameters:
	kill_set_ptr: Kill set to be freed
	*exclude_glist_ptr: List of globals not to be moved for a swap destination
Input/Output Parameters:
	gv_target : as working block's history
	reorg_gv_target->hist : as desitnitions block's history
 ******************************************************************************************/
enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr)
{
	unsigned char		x_blk_lmap;
	unsigned short		temp_ushort;
	int			rec_size1, rec_size2;
	int			wlevel, nslevel, dest_blk_level;
	int			piece_len1, piece_len2, first_offset, second_offset,
				work_blk_size, work_parent_size, dest_blk_size, dest_parent_size;
	int			dest_child_cycle;
	int			blk_seg_cnt, blk_size;
	trans_num		ctn;
	int			key_len, key_len_dir;
	block_id		dest_blk_id, work_blk_id, child1, child2;
	enum cdb_sc		status;
	srch_hist 		*dest_hist_ptr, *dir_hist_ptr;
	cache_rec_ptr_t		dest_child_cr;
	blk_segment		*bs1, *bs_ptr;
	sm_uc_ptr_t		saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr,
				bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1;
	boolean_t		gbl_target_was_set, blk_was_free, deleted;
	gv_namehead		*save_targ;
	srch_blk_status		bmlhist, destblkhist, *hist_ptr;
	unsigned char    	save_cw_set_depth;
	cw_set_element		*tmpcse;
	jnl_buffer_ptr_t	jbbp; /* jbbp is non-NULL only if before-image journaling */
	unsigned int		bsiz;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	dest_blk_id = *pdest_blk_id;
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	if (NULL == TREF(gv_reorgkey))
		GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ));
	dest_hist_ptr = &(reorg_gv_target->hist);
	dir_hist_ptr = reorg_gv_target->alt_hist;
	blk_size = cs_data->blk_size;
	work_parent_ptr = gv_target->hist.h[level+1].buffaddr;
	work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz;
	work_blk_ptr = gv_target->hist.h[level].buffaddr;
	work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz;
	work_blk_id = gv_target->hist.h[level].blk_num;
	if (blk_size < work_blk_size)
	{
		assert(t_tries < CDB_STAGNATE);
		return cdb_sc_blkmod;
	}
	cws_reorg_remove_index = 0;
	/*===== Infinite loop to find the destination block =====*/
	for ( ; ; )
	{
		blk_was_free = FALSE;
		INCR_BLK_NUM(dest_blk_id);
		/* A Pre-order traversal should not cause a child block to go to its parent.
		 * However, in case it happens because already the organization was like that or for any other reason, skip swap.
		 * If we decide to swap, code below should be changed to take care of the special case.
		 * Still a grand-child can go to its grand-parent. This is rare and following code can handle it.
		 */
		if (dest_blk_id == gv_target->hist.h[level+1].blk_num)
			continue;
		if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id)
		{
			*pdest_blk_id = dest_blk_id;
			return cdb_sc_oprnotneeded;
		}
		ctn = cs_addrs->ti->curr_tn;
		/* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk")
		 * through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below).
		 * This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to
		 * choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not
		 * deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and
		 * a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon
		 * the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not
		 * be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing
		 * a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array"
		 * variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread
		 * in one iteration down below. And that number is the sum of
		 *	+     MAX_BT_DEPTH : for the t_qread while loop down the tree done below
		 *	+ 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below
		 *	+ 2                : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a
		 *			     bitmap block done inside the call to get_lmap below
		 *	= 3 * MAX_BT_DEPTH + 2
		 * To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2.
		 * This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h
		 */
		/* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */
		for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--)
		{
			deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]);
			assert(deleted);
		}
		/* read corresponding bitmap block before attempting to read destination  block.
		 * if bitmap indicates block is free, we will not read the destination block
		 */
		bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
		if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap ||
			((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) ||
			((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_badbitmap;
		}
		if (BLK_FREE != x_blk_lmap)
		{	/* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block
			 * in case we later detect that the before-image needs to be written.
			 */
			if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr)))
			{
				assert(t_tries < CDB_STAGNATE);
				return (enum cdb_sc)rdfail_detail;
			}
			destblkhist.blk_num = dest_blk_id;
			destblkhist.buffaddr = dest_blk_ptr;
			destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl;
		}
		if (BLK_BUSY != x_blk_lmap)
		{	/* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */
			blk_was_free = TRUE;
			break;
		}
		/* dest_blk_id might contain a *-record only.
		 * So follow the pointer to go to the data/index block, which has a non-* key to search.
		 */
		nslevel = dest_blk_level;
		if (MAX_BT_DEPTH <= nslevel)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_maxlvl;
		}
		rec_base = dest_blk_ptr + SIZEOF(blk_hdr);
		GET_RSIZ(rec_size1, rec_base);
		tblk_ptr = dest_blk_ptr;
		while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel))
		{
			GET_LONG(child1, (rec_base + SIZEOF(rec_hdr)));
			if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_rdfail;
			}
			if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr)))
			{
				assert(t_tries < CDB_STAGNATE);
				return (enum cdb_sc)rdfail_detail;
			}
			/* leaf of a killed GVT can have block header only.   Skip those blocks */
			if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
				break;
			nslevel--;
			rec_base = tblk_ptr + SIZEOF(blk_hdr);
			GET_RSIZ(rec_size1, rec_base);
		}
		/* leaf of a killed GVT can have block header only.   Skip those blocks */
		if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
			continue;
		/* get length of global variable name (do not read subscript) for dest_blk_id */
		GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr));
		/* key_len = length of 1st key value (including subscript) for dest_blk_id */
		GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr));
		if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len))
		{	/* Earlier used to restart here always. But dest_blk_id can be a block,
			 * which is just killed and still marked busy.  Skip it, if we are in last retry.
			 */
			if (CDB_STAGNATE <= t_tries)
				continue;
			else
				return cdb_sc_blkmod;
		}
		memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir);
		(TREF(gv_reorgkey))->base[key_len_dir] = 0;
		(TREF(gv_reorgkey))->end = key_len_dir;
		if (exclude_glist_ptr->next)
		{	/* exclude blocks for globals in the list of EXCLUDE option */
			if  (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr))
				continue;
		}
		save_targ = gv_target;
		if (INVALID_GV_TARGET != reset_gv_target)
			gbl_target_was_set = TRUE;
		else
		{
			gbl_target_was_set = FALSE;
			reset_gv_target = save_targ;
		}
		gv_target = reorg_gv_target;
		gv_target->root = cs_addrs->dir_tree->root;
		gv_target->clue.end = 0;
		/* assign Directory tree path to find dest_blk_id in dir_hist_ptr */
		status = gvcst_search(TREF(gv_reorgkey), dir_hist_ptr);
		if (cdb_sc_normal != status)
		{
			assert(t_tries < CDB_STAGNATE);
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			return status;
		}
		if (dir_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1)
		{	/* may be in a kill_set of another process */
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			continue;
		}
		for (wlevel = 0; wlevel <= dir_hist_ptr->depth &&
			dir_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++);
		if (dir_hist_ptr->h[wlevel].blk_num == dest_blk_id)
		{	/* do not swap a dir_tree block */
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			continue;
		}
		/* gv_reorgkey will now have the first key from dest_blk_id,
		 * or, from a descendant of dest_blk_id (in case it had a *-key only).
		 */
		memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len);
		(TREF(gv_reorgkey))->end = key_len - 1;
		GET_KEY_LEN(key_len_dir, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr));
		/* Get root of GVT for dest_blk_id */
		GET_LONG(gv_target->root,
			dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir);
		if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1)))
		{
			assert(t_tries < CDB_STAGNATE);
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			return cdb_sc_blkmod;
		}
		/* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */
		gv_target->clue.end = 0;
		status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr);
		RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
		if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */
			dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */
			continue;
		if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1)))
		{
			assert(t_tries < CDB_STAGNATE);
			return (cdb_sc_normal != status ? status : cdb_sc_blkmod);
		}
		for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++)
			dest_hist_ptr->h[wlevel].tn = ctn;
		dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr;
		dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz;
		dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr;
		dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz;
		break;
	}
	/*===== End of infinite loop to find the destination block =====*/
	/*-----------------------------------------------------
	   Now modify blocks for swapping. Maximum of 4 blocks.
	   -----------------------------------------------------*/
	if (!blk_was_free)
	{	/* 1: dest_blk_id into work_blk_id */
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr));
		if (!BLK_FINI (bs_ptr,bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(gv_target->hist.h[level].blk_num == work_blk_id);
		assert(gv_target->hist.h[level].buffaddr == work_blk_ptr);
		t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN);
	}
	/* 2: work_blk_id into dest_blk_id */
	if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num)
	{	/* work_blk_id will be swapped with its child.
		 * This is the only vertical swap.  Here working block goes to its child.
		 * Working block cannot goto its parent because of traversal
		 */
		if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		BLK_INIT(bs_ptr, bs1);
		BLK_ADDR(saved_blk, dest_parent_size, unsigned char);
		memcpy(saved_blk, dest_parent_ptr, dest_parent_size);
		first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset;
		GET_RSIZ(rec_size1, saved_blk + first_offset);
		if (work_blk_size < first_offset + rec_size1)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		piece_len1 =  first_offset + rec_size1;
		BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr));
		BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
		PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */
		BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
		BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num);
		assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr);
		t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
	} else /* free block or, when working block does not move vertically (swap with parent/child) */
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_ADDR(saved_blk, work_blk_size, unsigned char);
		memcpy(saved_blk, work_blk_ptr, work_blk_size);
		BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		if (blk_was_free)
		{
			tmpcse = &cw_set[cw_set_depth];
			t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level);
			/* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode
			 * from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here.
			 */
			assert(dest_blk_id == tmpcse->blk);
			tmpcse->mode = gds_t_acquired;
			/* If snapshots are in progress, we might want to read the before images of the FREE blocks also.
			 * Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence
			 * will not read the before images of the FREE blocks in t_end. To workaround this, set
			 * cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of
			 * the FREE blocks if needed.
			 */
			(BLK_FREE == x_blk_lmap) ? SET_FREE(tmpcse) : SET_NFREE(tmpcse);
			/* No need to write before-image in case the block is FREE. In case the database had never been fully
			 * upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered
			 * FREE (i.e. no need to write before-images since backward journal recovery will never be expected
			 * to take the database to a point BEFORE the mupip upgrade).
			 */
			if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once)
				tmpcse->old_block = NULL;
			else
			{	/* Destination is a recycled block that needs a before image */
				tmpcse->old_block = destblkhist.buffaddr;
				/* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */
				tmpcse->cr = destblkhist.cr;
				tmpcse->cycle = destblkhist.cycle;
				jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL;
				if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn))
				{	/* Compute CHECKSUM for writing PBLK record before getting crit.
					 * It is possible that we are reading a block that is actually marked free in
					 * the bitmap (due to concurrency issues at this point). Therefore we might be
					 * actually reading uninitialized block headers and in turn a bad value of
					 * "old_block->bsiz". Restart if we ever access a buffer whose size is greater
					 * than the db block size.
					 */
					bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz;
					if (bsiz > blk_size)
					{
						assert(CDB_STAGNATE > t_tries);
						return cdb_sc_lostbmlcr;
					}
					JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz);
				}
			}
			assert(GDSVCURR == tmpcse->ondsk_blkver);	/* should have been set by t_create above */
		} else
		{
			hist_ptr = &dest_hist_ptr->h[dest_blk_level];
			assert(dest_blk_id == hist_ptr->blk_num);
			assert(dest_blk_ptr == hist_ptr->buffaddr);
			t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
		}
	}
	if (!blk_was_free)
	{	/* 3: Parent of destination block (may be parent of working block too) */
		if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num)
		{	/* dest parent == work_blk parent */
			BLK_INIT(bs_ptr, bs1);
			/* Interchange pointer to dest_blk_id and work_blk_id */
			if (level != dest_blk_level ||
				gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset)
			{
				first_offset = gv_target->hist.h[level+1].curr_rec.offset;
				second_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
			} else
			{
				first_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
				second_offset = gv_target->hist.h[level+1].curr_rec.offset;
			}
			GET_RSIZ(rec_size1, dest_parent_ptr + first_offset);
			GET_RSIZ(rec_size2, dest_parent_ptr + second_offset);
			if (dest_parent_size < first_offset + rec_size1 ||
				dest_parent_size < second_offset + rec_size2 ||
				BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			piece_len1 =  first_offset + rec_size1 - SIZEOF(block_id);
			piece_len2 =  second_offset + rec_size2 - SIZEOF(block_id);
			GET_LONG(child1, dest_parent_ptr + piece_len1);
			GET_LONG(child2, dest_parent_ptr + piece_len2);
			BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr));
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, child2);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1,
				second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1);
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, child1);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2,
				dest_parent_size - second_offset - rec_size2);
			if (!BLK_FINI(bs_ptr,bs1))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			assert(level == dest_blk_level);
			assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr);
			t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
		} else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num)
		{	/* Destination block moved in the position of working block.
			 * So destination block's parent's pointer should be changed to work_blk_id
			 */
			BLK_INIT(bs_ptr, bs1);
			GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset);
			if (dest_parent_size < rec_size1 +  dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset ||
				BSTAR_REC_SIZE > rec_size1)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr),
			    dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, work_blk_id);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1,
				dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset  - rec_size1);
			if (!BLK_FINI(bs_ptr,bs1))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr);
			t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1,
				FALSE, TRUE, GDS_WRITE_KILLTN);
		}
	}
	/* 4: Parent of working block, if different than destination's parent or, destination was a free block */
	if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num)
	{	/* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id  */
		GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset));
		if (work_parent_size < rec_size1 +  gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr),
			gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
		BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
		PUT_LONG(bn_ptr, dest_blk_id);
		BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
		BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1,
			work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr);
		t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
	}
	/* else already taken care of, when dest_blk_id moved */
	if (blk_was_free)
	{	/* A free/recycled block will become busy block.
		 * So the local bitmap must be updated.
		 * Local bit map block will be added in the list of update arrray for concurrency check and
		 * 	also the cw_set element will be created to mark the free/recycled block as free.
		 * kill_set_ptr will save the block which will become free.
		 */
		child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */
		bmlhist.buffaddr = bmp_buff;
		bmlhist.blk_num = child1;
		child1 = dest_blk_id - child1;
		assert(child1);
		PUT_LONG(update_array_ptr, child1);
		/* Need to put bit maps on the end of the cw set for concurrency checking.
		 * We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth".
		 * Hence the save and restore logic (for "cw_set_depth") below.
		 */
		save_cw_set_depth = cw_set_depth;
		assert(!cw_map_depth);
		t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1);	/* will increment cw_set_depth */
		cw_map_depth = cw_set_depth;		/* set cw_map_depth to the latest cw_set_depth */
		cw_set_depth = save_cw_set_depth;	/* restore cw_set_depth */
		/* t_write_map simulation end */
		update_array_ptr += SIZEOF(block_id);
		child1 = 0;
		PUT_LONG(update_array_ptr, child1);
		update_array_ptr += SIZEOF(block_id);
		assert(1 == cw_set[cw_map_depth - 1].reference_cnt);	/* 1 free block is now becoming BLK_USED in the bitmap */
		/* working block will be removed */
		kill_set_ptr->blk[kill_set_ptr->used].flag = 0;
		kill_set_ptr->blk[kill_set_ptr->used].level = 0;
		kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id;
	}
	*pdest_blk_id = dest_blk_id;
	return cdb_sc_normal;
}