Example #1
0
/*
get_lmap.c:
	Reads local bit map and returns buffer address,
	two bit local bit-map value corresponding to the block, cycle and cr
Input Parameter:
	blk: block id of the block whose bit map this routine is to fetch
Output Parameter:
	bits: two bit local bit map
	cycle: Cycle value found in t_qread
	cr: Cache Record value found in t_qread
Returns:
	buffer address of local bitmap block
	Null: if t_qread fails
*/
sm_uc_ptr_t get_lmap (block_id blk, unsigned char *bits, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr)
{
	sm_uc_ptr_t 	ptr, bp;
	block_id	index, offset;
	error_def(ERR_DSEBLKRDFAIL);

	index = ROUND_DOWN2(blk, BLKS_PER_LMAP);
	offset = blk - index;
	bp = t_qread (index, cycle, cr);
	if (bp)
	{
		ptr =  bp + SIZEOF(blk_hdr) + (offset * BML_BITS_PER_BLK) / 8;
		*bits = *ptr;
		switch (blk % (8 / BML_BITS_PER_BLK))
		{	case 0:	break;
			case 1:
				*bits = *bits >> BML_BITS_PER_BLK;
				break;
			case 2:
				*bits = *bits >> 2 * BML_BITS_PER_BLK;
				break;
			case 3:
				*bits = *bits >> 3 * BML_BITS_PER_BLK;
				break;
		}
		*bits = *bits & 3;
	}
	return bp;
}
Example #2
0
void dse_integ(void)
{
	block_id	blk;
	char		util_buff[MAX_UTIL_LEN];
	sm_uc_ptr_t	bp;
	int4		dummy_int, nocrit_present;
	cache_rec_ptr_t	dummy_cr;
	int		util_len;
	bool		was_crit;

	error_def(ERR_DSEBLKRDFAIL);

	if (CLI_PRESENT == cli_present("BLOCK"))
	{
		if (!cli_get_hex("BLOCK", &blk))
			return;
		if (blk < 0 || blk >= cs_addrs->ti->total_blks)
		{
			util_out_print("Error: invalid block number.", TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	memcpy(util_buff, "!/Checking integrity of block ", 30);
	util_len = 30;
	util_len += i2hex_nofill(patch_curr_blk, (uchar_ptr_t)&util_buff[util_len], 8);
	memcpy(&util_buff[util_len], ":", 1);
	util_len += 1;
	util_buff[util_len] = 0;
	util_out_print(util_buff, TRUE);
	was_crit = cs_addrs->now_crit;
	nocrit_present = (CLI_NEGATED == cli_present("CRIT"));

	if (!was_crit)
	{
		if (nocrit_present)
			cs_addrs->now_crit = TRUE;
		else
			grab_crit(gv_cur_region);
	}

	if (!(bp = t_qread(patch_curr_blk, &dummy_int, &dummy_cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	if (TRUE == cert_blk(gv_cur_region, patch_curr_blk, (blk_hdr_ptr_t)bp, 0))
		util_out_print("!/  No errors detected.!/", TRUE);
	else
		util_out_print(NULL, TRUE);
	if (!was_crit)
	{
		if (nocrit_present)
			cs_addrs->now_crit = FALSE;
		else
			rel_crit(gv_cur_region);
	}
	return;
}
Example #3
0
/* This routine returns whether the free_blocks counter in the file-header is ok (TRUE) or not (FALSE).
 * If not, it corrects it. This assumes cs_addrs, cs_data and gv_cur_region to point to the region of interest.
 * It also assumes that the master-map is correct and finds out non-full local bitmaps and counts the number of
 * free blocks in each of them and sums them up to determine the perceived correct free_blocks count.
 * The reason why this is ok is that even if the master-map incorrectly reports a local bitmap as full, our new free_blocks
 * count will effectively make the free space in that local-bitmap invisible and make a gdsfilext necessary and valid.
 * A later mupip integ will scavenge that invisible space for us. The worst that can therefore happen is that we will transiently
 * not be using up existing space. But we will always ensure that the free_blocks counter goes in sync with the master-map.
 */
boolean_t	is_free_blks_ctr_ok(void)
{
	boolean_t	blk_used;
	block_id	bml, free_bit, free_bml, maxbitsthismap;
	cache_rec_ptr_t	cr;
	int		cycle;
	sm_uc_ptr_t	bmp;
	unsigned int	local_maps, total_blks, free_blocks;

	error_def(ERR_DBBADFREEBLKCTR);

	assert(&FILE_INFO(gv_cur_region)->s_addrs == cs_addrs && cs_addrs->hdr == cs_data && cs_addrs->now_crit);
	total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks;
	local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	for (free_blocks = 0, free_bml = 0; free_bml < local_maps; free_bml++)
	{
		bml = bmm_find_free((uint4)free_bml, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps);
		if (bml < free_bml)
			break;
		free_bml = bml;
		bml *= BLKS_PER_LMAP;
		if (!(bmp = t_qread(bml, (sm_int_ptr_t)&cycle, &cr))
				|| (BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz)
				|| (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl))
		{
			assert(FALSE);	/* In pro, we will simply skip counting this local bitmap. */
			continue;
		}
		assert(free_bml <= (local_maps - 1));
		maxbitsthismap = (free_bml != (local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bml;
		for (free_bit = 0; free_bit < maxbitsthismap; free_bit++)
		{
			free_bit = bm_find_blk(free_bit, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), maxbitsthismap, &blk_used);
			assert(NO_FREE_SPACE <= free_bit);
			if (0 > free_bit)
				break;
			free_blocks++;
		}
	}
	assert(cs_addrs->ti->free_blocks == free_blocks);
	if (cs_addrs->ti->free_blocks != free_blocks)
	{
		send_msg(VARLSTCNT(6) ERR_DBBADFREEBLKCTR, 4, DB_LEN_STR(gv_cur_region), cs_addrs->ti->free_blocks, free_blocks);
		cs_addrs->ti->free_blocks = free_blocks;
		return FALSE;
	}
	return TRUE;
}
Example #4
0
void bm_setmap(block_id bml, block_id blk, int4 busy)
{
	sm_uc_ptr_t	bmp;
	trans_num	ctn;
	srch_hist	alt_hist;
	srch_blk_status	blkhist; /* block-history to fill in for t_write_map which uses "blk_num", "buffaddr", "cr", "cycle" */
	cw_set_element  *cse;
	int		lbm_status;	/* local bitmap status of input "blk" i.e. BUSY or FREE or RECYCLED  */
	int4		reference_cnt;
	uint4		bitnum;

	error_def(ERR_DSEFAIL);

	t_begin_crit(ERR_DSEFAIL);
	ctn = cs_addrs->ti->curr_tn;
	if (!(bmp = t_qread(bml, &blkhist.cycle, &blkhist.cr)))
		t_retry((enum cdb_sc)rdfail_detail);
	blkhist.blk_num = bml;
	blkhist.buffaddr = bmp;
	alt_hist.h[0].blk_num = 0;	/* Need for calls to T_END for bitmaps */
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	bitnum = blk - bml;
	/* Find out current status in order to determine if there is going to be a state transition */
	assert(ROUND_DOWN2(blk, cs_data->bplmap) == bml);
	GET_BM_STATUS(bmp, bitnum, lbm_status);
	switch(lbm_status)
	{
		case BLK_BUSY:
			reference_cnt = busy ? 0 : -1;
			break;
		case BLK_FREE:
		case BLK_MAPINVALID:
		case BLK_RECYCLED:
			assert(BLK_MAPINVALID != lbm_status);
			reference_cnt = busy ? 1 : 0;
			break;
		default:
			assert(FALSE);
			break;
	}
	if (reference_cnt)
	{	/* Initialize update array with non-zero bitnum only if reference_cnt is non-zero. */
		assert(bitnum);
		*((block_id_ptr_t)update_array_ptr) = bitnum;
		update_array_ptr += sizeof(block_id);
	}
	/* Terminate update array unconditionally with zero bitnum. */
	*((block_id_ptr_t)update_array_ptr) = 0;
	update_array_ptr += sizeof(block_id);
	t_write_map(&blkhist, (uchar_ptr_t)update_array, ctn, reference_cnt);
	if (JNL_ENABLED(cs_data))
        {
                cse = (cw_set_element *)(&cw_set[0]);
                cse->new_buff = non_tp_jfb_buff_ptr;
                memcpy(non_tp_jfb_buff_ptr, bmp, ((blk_hdr_ptr_t)bmp)->bsiz);
                gvcst_map_build((uint4 *)cse->upd_addr, (uchar_ptr_t)cse->new_buff, cse, cs_addrs->ti->curr_tn);
                cse->done = TRUE;
        }
	/* Call t_end till it succeeds or aborts (error will be reported) */
	while ((trans_num)0 == t_end(&alt_hist, 0))
		;
	return;
}
Example #5
0
boolean_t dse_b_dmp(void)
{
	int4		util_len, head, lmap_num, iter1, iter2, mapsize, bplmap, nocrit_present, dummy_int, count;
	unsigned char	util_buff[MAX_UTIL_LEN], mask;
	boolean_t	free, was_crit, invalid_bitmap = FALSE;
	block_id	blk;
	sm_uc_ptr_t	bp, b_top, rp, mb, dump_record(sm_uc_ptr_t rp, block_id blk, sm_uc_ptr_t bp, sm_uc_ptr_t b_top);
	cache_rec_ptr_t dummy_cr;
	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_CTRLC);
	error_def(ERR_BITMAPSBAD);
	head = cli_present("HEADER");
	if (CLI_PRESENT == cli_present("BLOCK"))
	{
		if (!cli_get_hex("BLOCK", &blk))
			return FALSE;
		if (blk < 0 || blk >= cs_addrs->ti->total_blks)
		{
			util_out_print("Error: invalid block number.", TRUE);
			return FALSE;
		}
		patch_curr_blk = blk;
	} else
		blk = patch_curr_blk;
	if (CLI_PRESENT == cli_present("COUNT"))
	{
		if (!cli_get_hex("COUNT", &count))
			return FALSE;
		if (count < 1)
			return FALSE;
	} else
		count = 1;

	util_out_print(0, TRUE);
	bplmap = cs_addrs->hdr->bplmap;
	mapsize = BM_SIZE(bplmap);
	patch_rec_counter = 1;
	was_crit = cs_addrs->now_crit;
	nocrit_present = (CLI_NEGATED == cli_present("CRIT"));

	if (!was_crit)
	{
		if (nocrit_present)
			cs_addrs->now_crit = TRUE;
		else
			grab_crit(gv_cur_region);
	}

	for ( ; ; )
	{
		if (blk / bplmap * bplmap != blk)
		{
			if(!(bp = t_qread(blk, &dummy_int, &dummy_cr)))
				rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			if (((blk_hdr_ptr_t) bp)->levl && patch_is_fdmp)
			{
				util_out_print("Error:  cannot perform GLO/ZWR dump on index block.", TRUE);
				if (!was_crit)
				{
					if (nocrit_present)
						cs_addrs->now_crit = FALSE;
					else
						rel_crit(gv_cur_region);
				}
				return FALSE;
			}
			if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
				b_top = bp + cs_addrs->hdr->blk_size;
			else if (((blk_hdr_ptr_t) bp)->bsiz < sizeof(blk_hdr))
				b_top = bp + sizeof(blk_hdr);
			else
				b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;
			if (CLI_NEGATED != head && !patch_is_fdmp)
			{	memcpy(util_buff, "Block ", 6);
				util_len = 6;
				util_len += i2hex_nofill(blk, &util_buff[util_len], 8);
				memcpy(&util_buff[util_len], "   Size ", 8);
				util_len += 8;
				util_len += i2hex_nofill(((blk_hdr_ptr_t)bp)->bsiz, &util_buff[util_len], 4);
				memcpy(&util_buff[util_len], "   Level !UL   TN ", 18);
				util_len += 18;
				util_len += i2hex_nofill(((blk_hdr_ptr_t)bp)->tn, &util_buff[util_len], 8);
				memcpy(&util_buff[util_len], "!/", 2);
				util_len += 2;
				util_buff[util_len] = 0;
				util_out_print(util_buff, TRUE, ((blk_hdr_ptr_t) bp)->levl );
			}
			rp = bp + sizeof(blk_hdr);
			if (CLI_PRESENT != head && (!patch_is_fdmp || ((blk_hdr_ptr_t) bp)->levl == 0))
			{
				while (!util_interrupt && (rp = dump_record(rp, blk, bp, b_top)))
					patch_rec_counter += 1;
			}
			if (util_interrupt)
			{
				if (!was_crit)
					rel_crit(gv_cur_region);
				rts_error(VARLSTCNT(1) ERR_CTRLC);
				break;
			}
			if (CLI_NEGATED == head)
				util_out_print(0, TRUE);
		} else if (!patch_is_fdmp)
		{

			if(!(bp = t_qread(blk, &dummy_int, &dummy_cr)))
				rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);

			if (CLI_NEGATED != head)
			{

				if (bplmap == 0)
				{
					memcpy(util_buff, "Block ", 6);
					util_len = 6;
					util_len += i2hex_nofill(blk, &util_buff[util_len], 8);
					memcpy(&util_buff[util_len], "   Size ", 8);
					util_len += 8;
					util_len += i2hex_nofill(mapsize, &util_buff[util_len], 4);
					memcpy(&util_buff[util_len], "   Master Status: Cannot Determine (bplmap == 0)!/", 50);
					util_len += 50;
					util_buff[util_len] = 0;
					util_out_print(util_buff, TRUE );
				} else
				{
					mb = cs_addrs->bmm + blk / (8 * bplmap);
					lmap_num = blk / bplmap;
					mask = 1 << ( lmap_num - lmap_num / 8 * 8);
					free = 	mask & *mb;
					memcpy(util_buff, "Block ", 6);
					util_len = 6;
					util_len += i2hex_nofill(blk, &util_buff[util_len], 8);
					memcpy(&util_buff[util_len], "  Size ", 7);
					util_len += 7;
					util_len += i2hex_nofill(((blk_hdr_ptr_t)bp)->bsiz, &util_buff[util_len], 4);
					memcpy(&util_buff[util_len], "  Level !SB  TN ", 16);
					util_len += 16;
					util_len += i2hex_nofill(((blk_hdr_ptr_t)bp)->tn, &util_buff[util_len], 8);
					util_buff[util_len] = 0;
					util_out_print(util_buff, FALSE, ((blk_hdr_ptr_t) bp)->levl );
					util_len = 0;
					memcpy(&util_buff[util_len], "   Master Status: !AD!/",23);
					util_len = 23;
					util_buff[util_len] = 0;
					util_out_print(util_buff, TRUE, free ? 10 : 4, free ? "Free Space" : "Full");
				}
			}
			if (CLI_PRESENT != head)
			{
				util_out_print ("           !_Low order                         High order", TRUE);

				lmap_num = 0;
				while (lmap_num < bplmap)
				{	memcpy(util_buff, "Block ", 6);
					util_len = 6;
					i2hex_blkfill(blk + lmap_num, &util_buff[util_len], 8);
					util_len += 8;
					memcpy(&util_buff[util_len], ":!_|  ", 6);
					util_len += 6;
					util_buff[util_len] = 0;
					util_out_print (util_buff, FALSE);
					for (iter1 = 0; iter1 < 4; iter1++)
					{
						for (iter2 = 0; iter2 < 8; iter2++)
						{
							mask = dse_lm_blk_free(lmap_num * BML_BITS_PER_BLK, bp + sizeof(blk_hdr));
							if (!mask)
								util_out_print ("!AD", FALSE, 1, BUSY_CHAR);
							else if (BLK_FREE == mask)
								util_out_print ("!AD", FALSE, 1, FREE_CHAR);
							else if (BLK_RECYCLED == mask)
								util_out_print ("!AD", FALSE, 1, REUSABLE_CHAR);
							else {
								invalid_bitmap = TRUE;
								util_out_print ("!AD", FALSE, 1, CORRUPT_CHAR);
							}
							if (++lmap_num >= bplmap)
								break;
						}
						util_out_print ("  ", FALSE);
						if (lmap_num >= bplmap)
							break;
					}
					util_out_print ("|", TRUE);
					if (util_interrupt)
					{
						if (!was_crit)
							rel_crit(gv_cur_region);
						rts_error(VARLSTCNT(1) ERR_CTRLC);
					}
				}
				util_out_print("!/'!AD' == BUSY  '!AD' == FREE  '!AD' == REUSABLE  '!AD' == CORRUPT!/",
					TRUE,1, BUSY_CHAR, 1, FREE_CHAR, 1, REUSABLE_CHAR, 1, CORRUPT_CHAR);
				if (invalid_bitmap)
					rts_error(VARLSTCNT(1) ERR_BITMAPSBAD);
			}
		}
		count--;
		if (count <= 0 || util_interrupt)
			break;
		blk++;
		if (blk >= cs_addrs->ti->total_blks)
			blk = 0;
	}
	patch_curr_blk = blk;
	if (!was_crit)
	{
		if (nocrit_present)
			cs_addrs->now_crit = FALSE;
		else
			rel_crit(gv_cur_region);
	}
	return TRUE;
}
Example #6
0
int dse_ksrch(block_id srch,
	      block_id_ptr_t pp,
	      int4 *off,
	      char *targ_key,
	      int targ_len)
{
	cache_rec_ptr_t dummy_cr;
	int		rsize, tmp_cmpc;
	int4		cmp, dummy_int;
	ssize_t		size;
	sm_uc_ptr_t	blk_id, bp, b_top, key_top, rp, r_top;
	unsigned short	cc, dummy_short;

	if(!(bp = t_qread(srch, &dummy_int, &dummy_cr)))
		rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
		b_top = bp + cs_addrs->hdr->blk_size;
	else if (((blk_hdr_ptr_t) bp)->bsiz < SIZEOF(blk_hdr))
		b_top = bp + SIZEOF(blk_hdr);
	else
		b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;
	CLEAR_DSE_COMPRESS_KEY;
	*off = 0;
	for (rp = bp + SIZEOF(blk_hdr); rp < b_top; rp = r_top)
	{
		*off = (int4)(rp - bp);
		GET_SHORT(dummy_short, &((rec_hdr_ptr_t)rp)->rsiz);
		rsize = dummy_short;
		if (rsize < SIZEOF(rec_hdr))
			r_top = rp + SIZEOF(rec_hdr);
		else
			r_top = rp + rsize;
		if (r_top > b_top)
			r_top = b_top;
		if (r_top - rp < (((blk_hdr_ptr_t)bp)->levl ? SIZEOF(block_id) : MIN_DATA_SIZE) + SIZEOF(rec_hdr))
		{
			*pp = 0;
			break;
		}
		for (key_top = rp + SIZEOF(rec_hdr); key_top < r_top ; )
			if (!*key_top++ && !*key_top++)
				break;
		if (((blk_hdr_ptr_t)bp)->levl && key_top > (blk_id = r_top - SIZEOF(block_id)))
			key_top = blk_id;
		if (EVAL_CMPC((rec_hdr_ptr_t)rp) > patch_comp_count)
			cc = patch_comp_count;
		else
			cc = EVAL_CMPC((rec_hdr_ptr_t)rp);
		size = (ssize_t)(key_top - rp - SIZEOF(rec_hdr));
		if (size > MAX_KEY_SZ - cc)
			size = MAX_KEY_SZ - cc;
		if (size < 0)
			size = 0;
		memcpy(&patch_comp_key[cc], rp + SIZEOF(rec_hdr), size);
		patch_comp_count = (int)(cc + size);
		GET_LONGP(pp, key_top);
		cmp = memvcmp(targ_key, targ_len, &patch_comp_key[0], patch_comp_count);
		if (0 > cmp)
			break;
		if (!cmp)
		{
			if (0 != ((blk_hdr_ptr_t)bp)->levl)
				break;
			if (patch_find_root_search)
			{
				for (key_top = rp + SIZEOF(rec_hdr); key_top < r_top; )
					if (!*key_top++ && !*key_top++)
						break;
				GET_LONG(ksrch_root, key_top);
			}
			return TRUE;
		}
	}
	patch_path_count++;
	if (((blk_hdr_ptr_t) bp)->levl && *pp > 0 && *pp < cs_addrs->ti->total_blks && (*pp % cs_addrs->hdr->bplmap)
	    && dse_ksrch(*pp, pp + 1, off + 1, targ_key, targ_len))
		return TRUE;
	return FALSE;
}
Example #7
0
 /* Performs a random traversal for the sampling methods */
enum cdb_sc mu_size_rand_traverse(double *r, double *a)
{
	sm_uc_ptr_t			pVal, pTop, pRec, pBlkBase;
	block_id			nBlkId;
	block_id			valBlk[MAX_RECS_PER_BLK];	/* valBlk[j] := value in j-th record of current block */
	boolean_t			is_mm;
	cache_rec_ptr_t			cr;
	enum cdb_sc			status;
	int				cycle;
	int4				random;
	int4				rCnt;				/* number of entries in valBlk */
	register gv_namehead		*pTarg;
	register srch_blk_status	*pCurr;
	register srch_hist		*pTargHist;
	trans_num			tn;
	unsigned char			nLevl;
	unsigned short			nRecLen;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	is_mm = (dba_mm == cs_data->acc_meth);
	pTarg = gv_target;
	pTargHist = &gv_target->hist;
	/* The following largely mimics gvcst_search/gvcst_search_blk */
	nBlkId = pTarg->root;
	tn = cs_addrs->ti->curr_tn;
	if (NULL == (pBlkBase = t_qread(nBlkId, (sm_int_ptr_t)&cycle, &cr)))
		return (enum cdb_sc)rdfail_detail;
	nLevl = ((blk_hdr_ptr_t)pBlkBase)->levl;
	if (MAX_BT_DEPTH < (int)nLevl)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_maxlvl;
	}
	if (0 == (int)nLevl)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_badlvl;
	}
	pTargHist->depth = (int)nLevl;
	pCurr = &pTargHist->h[nLevl];
	(pCurr + 1)->blk_num = 0;
	pCurr->tn = tn;
	pCurr->cycle = cycle;
	pCurr->cr = cr;
	for (;;)
	{
		assert(pCurr->level == nLevl);
		pCurr->cse = NULL;
		pCurr->blk_num = nBlkId;
		pCurr->buffaddr = pBlkBase;
		BLK_LOOP(rCnt, pRec, pBlkBase, pTop, nRecLen)
		{	/* enumerate records in block */
			GET_AND_CHECK_RECLEN(status, nRecLen, pRec, pTop, nBlkId);
			if (cdb_sc_normal != status)
			{
				assert(CDB_STAGNATE > t_tries);
				return status;
			}
			valBlk[rCnt] = nBlkId;
			CHECK_ADJACENCY(nBlkId, nLevl, a[nLevl]);
		}
		r[nLevl] = rCnt;
		/* randomly select next block */
		random = (int4)(rCnt * drand48());
		random = random & 0x7fffffff; /* to make sure that the sign bit(msb) is off */
		nBlkId = valBlk[random];
		if (is_mm && (nBlkId > cs_addrs->total_blks))
		{
			if (cs_addrs->total_blks < cs_addrs->ti->total_blks)
				return cdb_sc_helpedout;
			else
				return cdb_sc_blknumerr;
		}
		--pCurr; --nLevl;
		if (nLevl < 1)
			break;
		pCurr->tn = cs_addrs->ti->curr_tn;
		if (NULL == (pBlkBase = t_qread(nBlkId, (sm_int_ptr_t)&pCurr->cycle, &pCurr->cr)))
			return (enum cdb_sc)rdfail_detail;
		if (((blk_hdr_ptr_t)pBlkBase)->levl != nLevl)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_badlvl;
		}
	}
Example #8
0
/* Finds a free block and adds information to update array and cw_set */
block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id,
		sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn)
{
	sgmnt_data_ptr_t	csd;
	sgmnt_addrs		*csa;
	node_local_ptr_t	cnl;
	srch_blk_status		bmlhist, freeblkhist;
	block_id		hint_blk_num, free_blk_id, parent_blk_id;
	boolean_t		free_blk_recycled;
	int4			master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap;
	uint4			total_blks;
	int			blk_seg_cnt, blk_size;
	sm_uc_ptr_t		parent_blk_ptr, bn_ptr, saved_blk;
	blk_segment		*bs1, *bs_ptr;
	int			parent_blk_size, child_blk_size, bsiz;
	int			rec_size1, curr_offset, bpntr_end, hdr_len;
	int			tmp_cmpc;
	cw_set_element		*tmpcse;
	jnl_buffer_ptr_t	jbbp; /* jbbp is non-NULL only if before-image journaling */
	unsigned short		temp_ushort;
	unsigned long		temp_long;
	unsigned char		save_cw_set_depth;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	csd = cs_data;
	csa = cs_addrs;
	cnl = csa->nl;
	blk_size = csd->blk_size;
	/* Find a free/recycled block for new block location. */
	hint_blk_num = 0;
	total_blks = csa->ti->total_blks;
	num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps);
	if ((NO_FREE_SPACE == master_bit))
	{
		t_abort(gv_cur_region, csa);
		return ABORT_SWAP;
	}
	bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP;
	if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr)))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry((enum cdb_sc)rdfail_detail);
		return RETRY_SWAP;
	}
	hint_bit = 0;
	maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num;
	free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled);
	free_blk_id = bmlhist.blk_num + free_bit;
	if (DIR_ROOT >= free_blk_id)
	{	/* Bitmap block 0 and directory tree root block 1 should always be marked busy. */
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_badbitmap);
		return RETRY_SWAP;
	}
	if (child_blk_id <= free_blk_id)
	{	/* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check
		 * if the block is to be swapped into a higher block number and if so do not swap
		 */
		t_abort(gv_cur_region, csa);
		return ABORT_SWAP;
	}
	/* ====== begin update array ======
	 * Four blocks get changed.
	 * 	1. Free block becomes busy and gains the contents of child (root block/directory tree block)
	 * 	2. Parent block in directory tree remains busy, but points to new root block location.
	 *	3. Free block's corresponding bitmap reflects above change.
	 * 	4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE)
	 */
	parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */
	parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num;
	CHECK_AND_RESET_UPDATE_ARRAY;
	if (free_blk_recycled)
	{	/* Otherwise, it's a completely free block, in which case no need to read. */
		freeblkhist.blk_num = (block_id)free_blk_id;
		if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr)))
		{
			assert(t_tries < CDB_STAGNATE);
			t_retry((enum cdb_sc)rdfail_detail);
			return RETRY_SWAP;
		}
	}
	child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz;
	BLK_INIT(bs_ptr, bs1);
	BLK_ADDR(saved_blk, child_blk_size, unsigned char);
	memcpy(saved_blk, child_blk_ptr, child_blk_size);
	BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr));
	assert(blk_seg_cnt == child_blk_size);
	if (!BLK_FINI(bs_ptr, bs1))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_blkmod);
		return RETRY_SWAP;
	}
	tmpcse = &cw_set[cw_set_depth];
	(free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state)
			    : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state);
	t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl);
	tmpcse->mode = gds_t_acquired;
	if (!free_blk_recycled || !cs_data->db_got_to_v5_once)
		tmpcse->old_block = NULL;
	else
	{
		tmpcse->old_block = freeblkhist.buffaddr;
		tmpcse->cr = freeblkhist.cr;
		tmpcse->cycle = freeblkhist.cycle;
		jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL;
		if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn))
		{
			bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz;
			if (bsiz > blk_size)
			{
				assert(CDB_STAGNATE > t_tries);
				t_retry(cdb_sc_lostbmlcr);
				return RETRY_SWAP;
			}
			JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz);
		}
	}
	/* 2. Parent block in directory tree remains busy, but points to new child block location. */
	curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset;
	parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz;
	GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset));
	if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_blkmod);
		return RETRY_SWAP;
	}
	BLK_INIT(bs_ptr, bs1);
	if (0 == parent_blk_lvl)
		/* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */
		hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset));
	else
		hdr_len = rec_size1 - SIZEOF(block_id);
	bpntr_end = curr_offset + hdr_len + SIZEOF(block_id);
	BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr));
	BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
	PUT_LONG(bn_ptr, free_blk_id);
	BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
	BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end);
	assert(blk_seg_cnt == parent_blk_size);
	if (!BLK_FINI(bs_ptr, bs1))
	{
		assert(t_tries < CDB_STAGNATE);
		t_retry(cdb_sc_blkmod);
		return RETRY_SWAP;
	}
	t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN);
	/* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */
	BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state);
	/* 3. Free block's corresponding bitmap reflects above change. */
	PUT_LONG(update_array_ptr, free_bit);
	save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */
	assert(!cw_map_depth);
	t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1);
	cw_map_depth = cw_set_depth;
	cw_set_depth = save_cw_set_depth;
	update_array_ptr += SIZEOF(block_id);
	temp_long = 0;
	PUT_LONG(update_array_ptr, temp_long);
	update_array_ptr += SIZEOF(block_id);
	assert(1 == cw_set[cw_map_depth - 1].reference_cnt);
	/* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */
	kill_set_list->blk[kill_set_list->used].flag = 0;
	kill_set_list->blk[kill_set_list->used].level = 0;
	kill_set_list->blk[kill_set_list->used++].block = child_blk_id;
	return free_blk_id;
}
Example #9
0
void dse_range(void)
{
    char		lower[256], targ_key[256], upper[256], util_buff[MAX_UTIL_LEN];
    block_id		from, to, blk, blk_child;
    sm_uc_ptr_t		bp, b_top, key_bot, key_top, key_top1, rp, r_top;
    char		level;
    int4		dummy_int, nocrit_present;
    cache_rec_ptr_t	dummy_cr;
    short int		rsize, size, size1;
    int			cnt, dummy, lower_len, util_len, upper_len;
    boolean_t		busy_matters, free, got_lonely_star, index, low, lost, star, up, was_crit, was_hold_onto_crit;

    if (cli_present("FROM") == CLI_PRESENT)
    {
	if (!cli_get_hex("FROM", (uint4 *)&from))
	    return;
	if (from < 0 || from > cs_addrs->ti->total_blks
	    || !(from % cs_addrs->hdr->bplmap))
	{
	    util_out_print("Error: invalid block number.", TRUE);
	    return;
	}
    }
    else
	from = 1;
    if (cli_present("TO") == CLI_PRESENT)
    {
	if(!cli_get_hex("TO", (uint4 *)&to))
	    return;
	if (to < 0 || to > cs_addrs->ti->total_blks
	    || !(to % cs_addrs->hdr->bplmap))
	{
	    util_out_print("Error: invalid block number.", TRUE);
	    return;
	}
    }
    else
	to = cs_addrs->ti->total_blks - 1;
    if (low = (cli_present("LOWER") == CLI_PRESENT))
    {
	if (!dse_getki(&lower[0], &lower_len, LIT_AND_LEN("LOWER")))
	    return;
    }
    if (up = (cli_present("UPPER") == CLI_PRESENT))
    {
	if (!dse_getki(&upper[0], &upper_len, LIT_AND_LEN("UPPER")))
	    return;
    }
    star = (cli_present("STAR") == CLI_PRESENT);
    if (!low && !up && !star)
    {
	util_out_print("Must specify star, or a lower or upper key limit.", TRUE);
	return;
    }
    index = (cli_present("INDEX") == CLI_PRESENT);
    lost = (cli_present("LOST") == CLI_PRESENT);
    dummy = cli_present("BUSY");
    if (dummy == CLI_PRESENT)
    {
	busy_matters = TRUE;
	free = FALSE;
    }
    else if (dummy == CLI_NEGATED)
	busy_matters = free = TRUE;
    else
	busy_matters = free = FALSE;
    patch_path[0] = get_dir_root();
    cnt = 0;
    was_crit = cs_addrs->now_crit;
    nocrit_present = (CLI_NEGATED == cli_present("CRIT"));
    DSE_GRAB_CRIT_AS_APPROPRIATE(was_crit, was_hold_onto_crit, nocrit_present, cs_addrs, gv_cur_region);
    for (blk = from; blk <= to ;blk++)
    {
	if (util_interrupt)
	{
	    DSE_REL_CRIT_AS_APPROPRIATE(was_crit, was_hold_onto_crit, nocrit_present, cs_addrs, gv_cur_region);
	    rts_error(VARLSTCNT(1) ERR_CTRLC);
	    break;
	}
	if (!(blk % cs_addrs->hdr->bplmap))
	    continue;
	if (!(bp = t_qread(blk, &dummy_int, &dummy_cr)))
	    rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	level = ((blk_hdr_ptr_t)bp)->levl;
	if (index && (level == 0))
	    continue;
	if (busy_matters && (free != dse_is_blk_free(blk, &dummy_int, &dummy_cr)))
	    continue;
	if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
	    b_top = bp + cs_addrs->hdr->blk_size;
	else if (((blk_hdr_ptr_t) bp)->bsiz < SIZEOF(blk_hdr))
	    b_top = bp + SIZEOF(blk_hdr);
	else
	    b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;
	rp = bp + SIZEOF(blk_hdr);
	GET_SHORT(rsize, &((rec_hdr_ptr_t) rp)->rsiz);
	if (rsize < SIZEOF(rec_hdr))
	    r_top = rp + SIZEOF(rec_hdr);
	else
	    r_top = rp + rsize;
	if (r_top >= b_top)
	    r_top = b_top;
	got_lonely_star = FALSE;
	if (((blk_hdr_ptr_t) bp)->levl)
	{
	    key_top = r_top - SIZEOF(block_id);
	    if (star && (r_top == b_top))
		got_lonely_star = TRUE;
	} else
	{
	    if (!up && !low)
		continue;
	    for (key_top = rp + SIZEOF(rec_hdr); key_top < r_top ; )
		if (!*key_top++ && !*key_top++)
		    break;
	}
	if (!got_lonely_star)
	{
		key_bot = rp + SIZEOF(rec_hdr);
		size = key_top - key_bot;
		if (size <= 0)
		    continue;
		if (size > SIZEOF(targ_key))
			size = SIZEOF(targ_key);
		if (lost)
		{
			for (key_top1 = rp + SIZEOF(rec_hdr); key_top1 < r_top ; )
				if (!*key_top1++)
				    break;
			size1 = key_top1 - rp - SIZEOF(rec_hdr);
			if (size1 > SIZEOF(targ_key))
				size1 = SIZEOF(targ_key);
			patch_find_root_search = TRUE;
			patch_path_count = 1;
			patch_find_blk = blk;
			if (dse_is_blk_in(rp, r_top, size1))
				continue;
		}
		if (low && memcmp(lower, key_bot, MIN(lower_len, size)) > 0)
		    continue;
		if (up && memcmp(upper, key_bot, MIN(upper_len, size)) < 0)
		    continue;
	} else
	{
		got_lonely_star = FALSE;
		if (lost)
		{
			blk_child = *(block_id_ptr_t)key_top;
			if (!(bp = t_qread(blk_child, &dummy_int, &dummy_cr)))
			    rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
			    b_top = bp + cs_addrs->hdr->blk_size;
			else if (((blk_hdr_ptr_t) bp)->bsiz < SIZEOF(blk_hdr))
			    b_top = bp + SIZEOF(blk_hdr);
			else
			    b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;
			rp = bp + SIZEOF(blk_hdr);
			GET_SHORT(rsize, &((rec_hdr_ptr_t) rp)->rsiz);
			if (rsize < SIZEOF(rec_hdr))
			    r_top = rp + SIZEOF(rec_hdr);
			else
			    r_top = rp + rsize;
			if (r_top >= b_top)
			    r_top = b_top;
			if (((blk_hdr_ptr_t) bp)->levl)
			    key_top = r_top - SIZEOF(block_id);
			for (key_top1 = rp + SIZEOF(rec_hdr); key_top1 < r_top ; )
				if (!*key_top1++)
				    break;
			size1 = key_top1 - rp - SIZEOF(rec_hdr);
			if (size1 > 0)
			{
				if (size1 > SIZEOF(targ_key))
					size1 = SIZEOF(targ_key);
				patch_find_root_search = TRUE;
				patch_path_count = 1;
				patch_find_blk = blk;
				if (dse_is_blk_in(rp, r_top, size1))
					continue;
			}
		}
	}
	if (!cnt++)
	    util_out_print("!/Blocks in the specified key range:", TRUE);
	util_out_print("Block:  !8XL Level: !2UL", TRUE, blk, level);
    }
    DSE_REL_CRIT_AS_APPROPRIATE(was_crit, was_hold_onto_crit, nocrit_present, cs_addrs, gv_cur_region);
    if (cnt)
	util_out_print("Found !UL blocks", TRUE, cnt);
    else
	util_out_print("None found.", TRUE);
    return;
}
Example #10
0
void dse_exhaus(int4 pp, int4 op)
{
    sm_uc_ptr_t		bp, np, b_top, rp, r_top, nrp, nr_top, ptr;
    char		util_buff[MAX_UTIL_LEN];
    block_id		last;
    short		temp_short;
    int			count, util_len;
    int4		dummy_int;
    cache_rec_ptr_t	dummy_cr;
    global_dir_path	*d_ptr, *temp;
    error_def(ERR_DSEBLKRDFAIL);
    error_def(ERR_CTRLC);

    last = 0;
    patch_path_count++;
    if(!(bp = t_qread(patch_path[pp - 1],&dummy_int,&dummy_cr)))
	rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
    if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
	b_top = bp + cs_addrs->hdr->blk_size;
    else if (((blk_hdr_ptr_t) bp)->bsiz < sizeof(blk_hdr))
	b_top = bp + sizeof(blk_hdr);
    else
	b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;
    for (rp = bp + sizeof(blk_hdr); rp < b_top ;rp = r_top)
    {
	if (util_interrupt)
	{
	    rts_error(VARLSTCNT(1) ERR_CTRLC);
	    break;
	}
	if (!(np = t_qread(patch_path[pp - 1],&dummy_int,&dummy_cr)))
	    rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	if (np != bp)
	{
	    b_top = np + (b_top - bp);
	    rp = np + (rp - bp);
	    r_top = np + (r_top - bp);
	    bp = np;
	}
	GET_SHORT(temp_short,&((rec_hdr_ptr_t) rp)->rsiz);
	r_top = rp + temp_short;
	if (r_top > b_top)
	    r_top = b_top;
	if (r_top - rp < sizeof(block_id))
	    break;
	if (((blk_hdr_ptr_t)bp)->levl)
	    GET_LONG(patch_path[pp],(r_top - sizeof(block_id)));
	else
	{
	    for (ptr = rp + sizeof(rec_hdr); ; )
	    {
		if (*ptr++ == 0 && *ptr++ ==0)
		    break;
	    }
	    GET_LONG(patch_path[pp],ptr);
	}
	patch_offset[op] = rp - bp;
	if (patch_path[pp] == patch_find_blk)
	{
	    if (!patch_exh_found)
	    {
		if (patch_find_sibs)
		    util_out_print("!/    Left siblings    Right siblings",TRUE);
		patch_exh_found = TRUE;
	    }
	    if (patch_find_sibs)
	    {
		patch_left_sib = last;
		if (r_top < b_top)
		{
		    nrp = r_top;
		    GET_SHORT(temp_short,&((rec_hdr_ptr_t) rp)->rsiz);
		    nr_top = nrp + temp_short;
		    if (nr_top > b_top)
			nr_top = b_top;
		    if (nr_top - nrp >= sizeof(block_id))
		    {
			if (((blk_hdr_ptr_t)bp)->levl)
			    GET_LONG(patch_right_sib,(nr_top - sizeof(block_id)));
			else
			{
			    for (ptr = rp + sizeof(rec_hdr); ;)
			    {
				if (*ptr++ == 0 && *ptr++ == 0)
				    break;
			    }
			    GET_LONG(patch_right_sib,ptr);
			}
		    }
		}
		else
		    patch_right_sib = 0;
		if (patch_left_sib)
		{
		    memcpy(util_buff,"	",1);
		    util_len = 1;
		    util_len += i2hex_nofill(patch_left_sib,(uchar_ptr_t)&util_buff[util_len],8);
		    memcpy(&util_buff[util_len],"	",1);
		    util_len += 1;
		    util_buff[util_len] = 0;
		    util_out_print(util_buff,FALSE);
		}else
		    util_out_print("	none	",FALSE);
		if (patch_right_sib)
		{
		    memcpy(util_buff,"	",1);
		    util_len = 1;
		    util_len += i2hex_nofill(patch_right_sib,(uchar_ptr_t)&util_buff[util_len],8);
		    util_buff[util_len] = 0;
		    util_out_print(util_buff,FALSE);
		}
		else
		    util_out_print("	none",TRUE);
	    }
	    else  /* !patch_find_sibs */
	    {
		patch_path_count--;
		util_out_print("	Directory path!/	Path--blk:off",TRUE);
		if (!patch_find_root_search)
		{
		    d_ptr = global_roots_head->link->dir_path;
		    while(d_ptr)
		    {
			memcpy(util_buff,"	",1);
			util_len = 1;
			util_len += i2hex_nofill(d_ptr->block,(uchar_ptr_t)&util_buff[util_len],8);
			memcpy(&util_buff[util_len],":",1);
			util_len += 1;
			util_len += i2hex_nofill(d_ptr->offset,(uchar_ptr_t)&util_buff[util_len],4);
			util_buff[util_len] = 0;
			util_out_print(util_buff,FALSE);
			temp = d_ptr;
			d_ptr = d_ptr->next;
			free(temp);
		    }
		    global_roots_head->link->dir_path = 0;
		    util_out_print("!/!/	Global paths!/	Path--blk:off",TRUE);
		}
		for (count = 0; count < patch_path_count ;count++)
		{
		    memcpy(util_buff,"	",1);
		    util_len = 1;
		    util_len += i2hex_nofill(patch_path[count],(uchar_ptr_t)&util_buff[util_len],8);
		    memcpy(&util_buff[util_len],":",1);
		    util_len += 1;
		    util_len += i2hex_nofill(patch_offset[count],(uchar_ptr_t)&util_buff[util_len],4);
		    util_buff[util_len] = 0;
		    util_out_print(util_buff,FALSE);
		}
		memcpy(util_buff,"	",1);
		util_len = 1;
		util_len += i2hex_nofill(patch_path[count],(uchar_ptr_t)&util_buff[util_len],8);
		util_buff[util_len] = 0;
		util_out_print(util_buff,TRUE);
		patch_path_count++;
	    }
	}
	if (patch_path[pp] > 0 && patch_path[pp] < cs_addrs->ti->total_blks
	    && (patch_path[pp] % cs_addrs->hdr->bplmap))
	    if (((blk_hdr_ptr_t) bp)->levl > 1)
		dse_exhaus(pp + 1,op + 1);
	    else if (((blk_hdr_ptr_t)bp)->levl == 1 && patch_find_root_search)
		dse_find_roots(patch_path[pp]);
	last = patch_path[pp];
    }
    patch_path_count--;
    return;
}
Example #11
0
void dse_chng_bhead(void)
{
	block_id	blk;
	block_id	*blkid_ptr;
	sgm_info	*dummysi = NULL;
	int4		x;
	cache_rec_ptr_t	cr;
	uchar_ptr_t	bp;
	sm_uc_ptr_t	blkBase;
	blk_hdr		new_hdr;
	blk_segment	*bs1, *bs_ptr;
	cw_set_element  *cse;
	int4		blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	bool		ismap;
	bool		chng_blk;
	uint4		mapsize;
	uint4           jnl_status;

	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_DSEFAIL);
	error_def(ERR_DBRDONLY);

        if (gv_cur_region->read_only)
                rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	assert(update_array);
	/* reset new block mechanism */
	update_array_ptr = update_array;
	chng_blk = FALSE;
	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		if (!cli_get_hex("BLOCK",&blk))
			return;
		if (blk < 0 || blk > cs_addrs->ti->total_blks)
		{	util_out_print("Error: invalid block number.",TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	blk_size = cs_addrs->hdr->blk_size;
	ismap = (patch_curr_blk / cs_addrs->hdr->bplmap * cs_addrs->hdr->bplmap == patch_curr_blk);
	mapsize = BM_SIZE(cs_addrs->hdr->bplmap);

	t_begin_crit (ERR_DSEFAIL);
	if (!(bp = t_qread (patch_curr_blk,&dummy_hist.h[0].cycle,&dummy_hist.h[0].cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	new_hdr = *(blk_hdr_ptr_t)bp;

	if (cli_present("LEVEL") == CLI_PRESENT)
	{
		if (!cli_get_num("LEVEL",&x))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (ismap && (unsigned char)x != LCL_MAP_LEVL)
		{
			util_out_print("Error: invalid level for a bit map block.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1))
		{
			util_out_print("Error: invalid level.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
	 	new_hdr.levl = (unsigned char)x;

		chng_blk = TRUE;
		if (new_hdr.bsiz < sizeof(blk_hdr))
			new_hdr.bsiz = sizeof(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
	}
	if (cli_present("BSIZ") == CLI_PRESENT)
	{
		if (!cli_get_hex("BSIZ",&x))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (ismap && x != mapsize)
		{
			util_out_print("Error: invalid bsiz.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		else if (x < sizeof(blk_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid bsiz.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		chng_blk = TRUE;
		new_hdr.bsiz = x;
	}
	if (!chng_blk)
		t_abort(gv_cur_region, cs_addrs);
	else
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			util_out_print("Error: bad block build.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		t_write (patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, new_hdr.levl, TRUE, FALSE);
		BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse);
		t_end(&dummy_hist, 0);
	}
	if (cli_present("TN") == CLI_PRESENT)
	{
		if (!cli_get_hex("TN",&x))
			return;
		t_begin_crit(ERR_DSEFAIL);
		assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn);
		cs_addrs->ti->early_tn++;
		blkBase = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr);
		if (NULL == blkBase)
		{
			rel_crit(gv_cur_region);
			util_out_print("Error: Unable to read buffer.", TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		/* Create a null update array for a block */
		if (ismap)
		{
			BLK_ADDR(blkid_ptr, sizeof(block_id), block_id);
			*blkid_ptr = 0;
			t_write_map(patch_curr_blk, blkBase, (unsigned char *)blkid_ptr, cs_addrs->ti->curr_tn);
			cr_array_index = 0;
			block_saved = FALSE;
		} else
		{
			BLK_INIT(bs_ptr, bs1);
			BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr));
			BLK_FINI(bs_ptr, bs1);
			t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, blkBase,
						((blk_hdr_ptr_t)blkBase)->levl, TRUE, FALSE);
			cr_array_index = 0;
			block_saved = FALSE;
			if (JNL_ENABLED(cs_data))
			{
				JNL_SHORT_TIME(jgbl.gbl_jrec_time);	/* needed for jnl_put_jrt_pini() and jnl_write_aimg_rec() */
				jnl_status = jnl_ensure_open();
				if (0 == jnl_status)
				{
					cse = (cw_set_element *)(&cw_set[0]);
					cse->new_buff = non_tp_jfb_buff_ptr;
					gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, x);
					cse->done = TRUE;
					if (0 == cs_addrs->jnl->pini_addr)
						jnl_put_jrt_pini(cs_addrs);
					jnl_write_aimg_rec(cs_addrs, cse->blk, (blk_hdr_ptr_t)cse->new_buff);
				} else
					rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			}
		}
		/* Pass the desired tn "x" as argument to bg_update or mm_update */
		if (dba_bg == cs_addrs->hdr->acc_meth)
			bg_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi);
		else
			mm_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi);
		cs_addrs->ti->curr_tn++;
		assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn);
		/* the following code is analogous to that in t_end and should be maintained in a similar fashion */
		while (cr_array_index)
			cr_array[--cr_array_index]->in_cw_set = FALSE;
		rel_crit(gv_cur_region);
		if (block_saved)
			backup_buffer_flush(gv_cur_region);
		UNIX_ONLY(
			if (unhandled_stale_timer_pop)
				process_deferred_stale();
		)
		wcs_timer_start(gv_cur_region, TRUE);
	}
Example #12
0
/******************************************************************************************
Input Parameters:
	level: level of working block
	dest_blk_id: last destination used for swap
Output Parameters:
	kill_set_ptr: Kill set to be freed
	*exclude_glist_ptr: List of globals not to be moved for a swap destination
Input/Output Parameters:
	gv_target : as working block's history
	reorg_gv_target->hist : as desitnitions block's history
 ******************************************************************************************/
enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr)
{
	unsigned char		x_blk_lmap;
	unsigned short		temp_ushort;
	int			rec_size1, rec_size2;
	int			wlevel, nslevel, dest_blk_level;
	int			piece_len1, piece_len2, first_offset, second_offset,
				work_blk_size, work_parent_size, dest_blk_size, dest_parent_size;
	int			dest_child_cycle;
	int			blk_seg_cnt, blk_size;
	trans_num		ctn;
	int			key_len, key_len_dir;
	block_id		dest_blk_id, work_blk_id, child1, child2;
	enum cdb_sc		status;
	srch_hist 		*dest_hist_ptr, *dir_hist_ptr;
	cache_rec_ptr_t		dest_child_cr;
	blk_segment		*bs1, *bs_ptr;
	sm_uc_ptr_t		saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr,
				bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1;
	boolean_t		gbl_target_was_set, blk_was_free, deleted;
	gv_namehead		*save_targ;
	srch_blk_status		bmlhist, destblkhist, *hist_ptr;
	unsigned char    	save_cw_set_depth;
	cw_set_element		*tmpcse;
	jnl_buffer_ptr_t	jbbp; /* jbbp is non-NULL only if before-image journaling */
	unsigned int		bsiz;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	dest_blk_id = *pdest_blk_id;
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	if (NULL == TREF(gv_reorgkey))
		GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ));
	dest_hist_ptr = &(reorg_gv_target->hist);
	dir_hist_ptr = reorg_gv_target->alt_hist;
	blk_size = cs_data->blk_size;
	work_parent_ptr = gv_target->hist.h[level+1].buffaddr;
	work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz;
	work_blk_ptr = gv_target->hist.h[level].buffaddr;
	work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz;
	work_blk_id = gv_target->hist.h[level].blk_num;
	if (blk_size < work_blk_size)
	{
		assert(t_tries < CDB_STAGNATE);
		return cdb_sc_blkmod;
	}
	cws_reorg_remove_index = 0;
	/*===== Infinite loop to find the destination block =====*/
	for ( ; ; )
	{
		blk_was_free = FALSE;
		INCR_BLK_NUM(dest_blk_id);
		/* A Pre-order traversal should not cause a child block to go to its parent.
		 * However, in case it happens because already the organization was like that or for any other reason, skip swap.
		 * If we decide to swap, code below should be changed to take care of the special case.
		 * Still a grand-child can go to its grand-parent. This is rare and following code can handle it.
		 */
		if (dest_blk_id == gv_target->hist.h[level+1].blk_num)
			continue;
		if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id)
		{
			*pdest_blk_id = dest_blk_id;
			return cdb_sc_oprnotneeded;
		}
		ctn = cs_addrs->ti->curr_tn;
		/* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk")
		 * through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below).
		 * This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to
		 * choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not
		 * deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and
		 * a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon
		 * the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not
		 * be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing
		 * a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array"
		 * variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread
		 * in one iteration down below. And that number is the sum of
		 *	+     MAX_BT_DEPTH : for the t_qread while loop down the tree done below
		 *	+ 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below
		 *	+ 2                : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a
		 *			     bitmap block done inside the call to get_lmap below
		 *	= 3 * MAX_BT_DEPTH + 2
		 * To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2.
		 * This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h
		 */
		/* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */
		for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--)
		{
			deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]);
			assert(deleted);
		}
		/* read corresponding bitmap block before attempting to read destination  block.
		 * if bitmap indicates block is free, we will not read the destination block
		 */
		bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
		if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap ||
			((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) ||
			((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_badbitmap;
		}
		if (BLK_FREE != x_blk_lmap)
		{	/* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block
			 * in case we later detect that the before-image needs to be written.
			 */
			if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr)))
			{
				assert(t_tries < CDB_STAGNATE);
				return (enum cdb_sc)rdfail_detail;
			}
			destblkhist.blk_num = dest_blk_id;
			destblkhist.buffaddr = dest_blk_ptr;
			destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl;
		}
		if (BLK_BUSY != x_blk_lmap)
		{	/* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */
			blk_was_free = TRUE;
			break;
		}
		/* dest_blk_id might contain a *-record only.
		 * So follow the pointer to go to the data/index block, which has a non-* key to search.
		 */
		nslevel = dest_blk_level;
		if (MAX_BT_DEPTH <= nslevel)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_maxlvl;
		}
		rec_base = dest_blk_ptr + SIZEOF(blk_hdr);
		GET_RSIZ(rec_size1, rec_base);
		tblk_ptr = dest_blk_ptr;
		while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel))
		{
			GET_LONG(child1, (rec_base + SIZEOF(rec_hdr)));
			if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_rdfail;
			}
			if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr)))
			{
				assert(t_tries < CDB_STAGNATE);
				return (enum cdb_sc)rdfail_detail;
			}
			/* leaf of a killed GVT can have block header only.   Skip those blocks */
			if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
				break;
			nslevel--;
			rec_base = tblk_ptr + SIZEOF(blk_hdr);
			GET_RSIZ(rec_size1, rec_base);
		}
		/* leaf of a killed GVT can have block header only.   Skip those blocks */
		if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz)
			continue;
		/* get length of global variable name (do not read subscript) for dest_blk_id */
		GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr));
		/* key_len = length of 1st key value (including subscript) for dest_blk_id */
		GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr));
		if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len))
		{	/* Earlier used to restart here always. But dest_blk_id can be a block,
			 * which is just killed and still marked busy.  Skip it, if we are in last retry.
			 */
			if (CDB_STAGNATE <= t_tries)
				continue;
			else
				return cdb_sc_blkmod;
		}
		memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir);
		(TREF(gv_reorgkey))->base[key_len_dir] = 0;
		(TREF(gv_reorgkey))->end = key_len_dir;
		if (exclude_glist_ptr->next)
		{	/* exclude blocks for globals in the list of EXCLUDE option */
			if  (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr))
				continue;
		}
		save_targ = gv_target;
		if (INVALID_GV_TARGET != reset_gv_target)
			gbl_target_was_set = TRUE;
		else
		{
			gbl_target_was_set = FALSE;
			reset_gv_target = save_targ;
		}
		gv_target = reorg_gv_target;
		gv_target->root = cs_addrs->dir_tree->root;
		gv_target->clue.end = 0;
		/* assign Directory tree path to find dest_blk_id in dir_hist_ptr */
		status = gvcst_search(TREF(gv_reorgkey), dir_hist_ptr);
		if (cdb_sc_normal != status)
		{
			assert(t_tries < CDB_STAGNATE);
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			return status;
		}
		if (dir_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1)
		{	/* may be in a kill_set of another process */
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			continue;
		}
		for (wlevel = 0; wlevel <= dir_hist_ptr->depth &&
			dir_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++);
		if (dir_hist_ptr->h[wlevel].blk_num == dest_blk_id)
		{	/* do not swap a dir_tree block */
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			continue;
		}
		/* gv_reorgkey will now have the first key from dest_blk_id,
		 * or, from a descendant of dest_blk_id (in case it had a *-key only).
		 */
		memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len);
		(TREF(gv_reorgkey))->end = key_len - 1;
		GET_KEY_LEN(key_len_dir, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr));
		/* Get root of GVT for dest_blk_id */
		GET_LONG(gv_target->root,
			dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir);
		if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1)))
		{
			assert(t_tries < CDB_STAGNATE);
			RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
			return cdb_sc_blkmod;
		}
		/* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */
		gv_target->clue.end = 0;
		status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr);
		RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK);
		if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */
			dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */
			continue;
		if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1)))
		{
			assert(t_tries < CDB_STAGNATE);
			return (cdb_sc_normal != status ? status : cdb_sc_blkmod);
		}
		for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++)
			dest_hist_ptr->h[wlevel].tn = ctn;
		dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr;
		dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz;
		dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr;
		dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz;
		break;
	}
	/*===== End of infinite loop to find the destination block =====*/
	/*-----------------------------------------------------
	   Now modify blocks for swapping. Maximum of 4 blocks.
	   -----------------------------------------------------*/
	if (!blk_was_free)
	{	/* 1: dest_blk_id into work_blk_id */
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr));
		if (!BLK_FINI (bs_ptr,bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(gv_target->hist.h[level].blk_num == work_blk_id);
		assert(gv_target->hist.h[level].buffaddr == work_blk_ptr);
		t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN);
	}
	/* 2: work_blk_id into dest_blk_id */
	if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num)
	{	/* work_blk_id will be swapped with its child.
		 * This is the only vertical swap.  Here working block goes to its child.
		 * Working block cannot goto its parent because of traversal
		 */
		if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		BLK_INIT(bs_ptr, bs1);
		BLK_ADDR(saved_blk, dest_parent_size, unsigned char);
		memcpy(saved_blk, dest_parent_ptr, dest_parent_size);
		first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset;
		GET_RSIZ(rec_size1, saved_blk + first_offset);
		if (work_blk_size < first_offset + rec_size1)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		piece_len1 =  first_offset + rec_size1;
		BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr));
		BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
		PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */
		BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
		BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num);
		assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr);
		t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
	} else /* free block or, when working block does not move vertically (swap with parent/child) */
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_ADDR(saved_blk, work_blk_size, unsigned char);
		memcpy(saved_blk, work_blk_ptr, work_blk_size);
		BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		if (blk_was_free)
		{
			tmpcse = &cw_set[cw_set_depth];
			t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level);
			/* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode
			 * from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here.
			 */
			assert(dest_blk_id == tmpcse->blk);
			tmpcse->mode = gds_t_acquired;
			/* If snapshots are in progress, we might want to read the before images of the FREE blocks also.
			 * Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence
			 * will not read the before images of the FREE blocks in t_end. To workaround this, set
			 * cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of
			 * the FREE blocks if needed.
			 */
			(BLK_FREE == x_blk_lmap) ? SET_FREE(tmpcse) : SET_NFREE(tmpcse);
			/* No need to write before-image in case the block is FREE. In case the database had never been fully
			 * upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered
			 * FREE (i.e. no need to write before-images since backward journal recovery will never be expected
			 * to take the database to a point BEFORE the mupip upgrade).
			 */
			if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once)
				tmpcse->old_block = NULL;
			else
			{	/* Destination is a recycled block that needs a before image */
				tmpcse->old_block = destblkhist.buffaddr;
				/* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */
				tmpcse->cr = destblkhist.cr;
				tmpcse->cycle = destblkhist.cycle;
				jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL;
				if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn))
				{	/* Compute CHECKSUM for writing PBLK record before getting crit.
					 * It is possible that we are reading a block that is actually marked free in
					 * the bitmap (due to concurrency issues at this point). Therefore we might be
					 * actually reading uninitialized block headers and in turn a bad value of
					 * "old_block->bsiz". Restart if we ever access a buffer whose size is greater
					 * than the db block size.
					 */
					bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz;
					if (bsiz > blk_size)
					{
						assert(CDB_STAGNATE > t_tries);
						return cdb_sc_lostbmlcr;
					}
					JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz);
				}
			}
			assert(GDSVCURR == tmpcse->ondsk_blkver);	/* should have been set by t_create above */
		} else
		{
			hist_ptr = &dest_hist_ptr->h[dest_blk_level];
			assert(dest_blk_id == hist_ptr->blk_num);
			assert(dest_blk_ptr == hist_ptr->buffaddr);
			t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN);
		}
	}
	if (!blk_was_free)
	{	/* 3: Parent of destination block (may be parent of working block too) */
		if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num)
		{	/* dest parent == work_blk parent */
			BLK_INIT(bs_ptr, bs1);
			/* Interchange pointer to dest_blk_id and work_blk_id */
			if (level != dest_blk_level ||
				gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset)
			{
				first_offset = gv_target->hist.h[level+1].curr_rec.offset;
				second_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
			} else
			{
				first_offset = dest_hist_ptr->h[level+1].curr_rec.offset;
				second_offset = gv_target->hist.h[level+1].curr_rec.offset;
			}
			GET_RSIZ(rec_size1, dest_parent_ptr + first_offset);
			GET_RSIZ(rec_size2, dest_parent_ptr + second_offset);
			if (dest_parent_size < first_offset + rec_size1 ||
				dest_parent_size < second_offset + rec_size2 ||
				BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			piece_len1 =  first_offset + rec_size1 - SIZEOF(block_id);
			piece_len2 =  second_offset + rec_size2 - SIZEOF(block_id);
			GET_LONG(child1, dest_parent_ptr + piece_len1);
			GET_LONG(child2, dest_parent_ptr + piece_len2);
			BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr));
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, child2);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1,
				second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1);
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, child1);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2,
				dest_parent_size - second_offset - rec_size2);
			if (!BLK_FINI(bs_ptr,bs1))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			assert(level == dest_blk_level);
			assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr);
			t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
		} else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num)
		{	/* Destination block moved in the position of working block.
			 * So destination block's parent's pointer should be changed to work_blk_id
			 */
			BLK_INIT(bs_ptr, bs1);
			GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset);
			if (dest_parent_size < rec_size1 +  dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset ||
				BSTAR_REC_SIZE > rec_size1)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr),
			    dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
			BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
			PUT_LONG(bn_ptr, work_blk_id);
			BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
			BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1,
				dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset  - rec_size1);
			if (!BLK_FINI(bs_ptr,bs1))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr);
			t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1,
				FALSE, TRUE, GDS_WRITE_KILLTN);
		}
	}
	/* 4: Parent of working block, if different than destination's parent or, destination was a free block */
	if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num)
	{	/* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id  */
		GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset));
		if (work_parent_size < rec_size1 +  gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr),
			gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id));
		BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char);
		PUT_LONG(bn_ptr, dest_blk_id);
		BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id));
		BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1,
			work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr);
		t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN);
	}
	/* else already taken care of, when dest_blk_id moved */
	if (blk_was_free)
	{	/* A free/recycled block will become busy block.
		 * So the local bitmap must be updated.
		 * Local bit map block will be added in the list of update arrray for concurrency check and
		 * 	also the cw_set element will be created to mark the free/recycled block as free.
		 * kill_set_ptr will save the block which will become free.
		 */
		child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */
		bmlhist.buffaddr = bmp_buff;
		bmlhist.blk_num = child1;
		child1 = dest_blk_id - child1;
		assert(child1);
		PUT_LONG(update_array_ptr, child1);
		/* Need to put bit maps on the end of the cw set for concurrency checking.
		 * We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth".
		 * Hence the save and restore logic (for "cw_set_depth") below.
		 */
		save_cw_set_depth = cw_set_depth;
		assert(!cw_map_depth);
		t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1);	/* will increment cw_set_depth */
		cw_map_depth = cw_set_depth;		/* set cw_map_depth to the latest cw_set_depth */
		cw_set_depth = save_cw_set_depth;	/* restore cw_set_depth */
		/* t_write_map simulation end */
		update_array_ptr += SIZEOF(block_id);
		child1 = 0;
		PUT_LONG(update_array_ptr, child1);
		update_array_ptr += SIZEOF(block_id);
		assert(1 == cw_set[cw_map_depth - 1].reference_cnt);	/* 1 free block is now becoming BLK_USED in the bitmap */
		/* working block will be removed */
		kill_set_ptr->blk[kill_set_ptr->used].flag = 0;
		kill_set_ptr->blk[kill_set_ptr->used].level = 0;
		kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id;
	}
	*pdest_blk_id = dest_blk_id;
	return cdb_sc_normal;
}
Example #13
0
void dse_maps(void)
{
	block_id		blk, bml_blk;
	blk_segment		*bs1, *bs_ptr;
	int4			blk_seg_cnt, blk_size;		/* needed for BLK_INIT, BLK_SEG and BLK_FINI macros */
	sm_uc_ptr_t		bp;
	char			util_buff[MAX_UTIL_LEN];
	int4			bml_size, bml_list_size, blk_index, bml_index;
	int4			total_blks, blks_in_bitmap;
	int4			bplmap, dummy_int;
	unsigned char		*bml_list;
	cache_rec_ptr_t		cr, dummy_cr;
	bt_rec_ptr_t		btr;
	int			util_len;
	uchar_ptr_t		blk_ptr;
	boolean_t		was_crit;
	uint4			jnl_status;
	srch_blk_status		blkhist;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;

	if (CLI_PRESENT == cli_present("BUSY") || CLI_PRESENT == cli_present("FREE") ||
		CLI_PRESENT == cli_present("MASTER") || CLI_PRESENT == cli_present("RESTORE_ALL"))
	{
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	}
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	csa = cs_addrs;
	assert(&FILE_INFO(gv_cur_region)->s_addrs == csa);
	was_crit = csa->now_crit;
	if (csa->critical)
		crash_count = csa->critical->crashcnt;
	csd = csa->hdr;
	bplmap = csd->bplmap;
	if (CLI_PRESENT == cli_present("BLOCK"))
	{
		if (!cli_get_hex("BLOCK", (uint4 *)&blk))
			return;
		if (blk < 0 || blk >= csa->ti->total_blks)
		{
			util_out_print("Error: invalid block number.", TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	else
		blk = patch_curr_blk;
	if (CLI_PRESENT == cli_present("FREE"))
	{
		if (0 == bplmap)
		{
			util_out_print("Cannot perform map updates:  bplmap field of file header is zero.", TRUE);
			return;
		}
		if (blk / bplmap * bplmap == blk)
		{
			util_out_print("Cannot perform action on a map block.", TRUE);
			return;
		}
		bml_blk = blk / bplmap * bplmap;
		bm_setmap(bml_blk, blk, FALSE);
		return;
	}
	if (CLI_PRESENT == cli_present("BUSY"))
	{
		if (0 == bplmap)
		{
			util_out_print("Cannot perform map updates:  bplmap field of file header is zero.", TRUE);
			return;
		}
		if (blk / bplmap * bplmap == blk)
		{
			util_out_print("Cannot perform action on a map block.", TRUE);
			return;
		}
		bml_blk = blk / bplmap * bplmap;
		bm_setmap(bml_blk, blk, TRUE);
		return;
	}
	blk_size = csd->blk_size;
	if (CLI_PRESENT == cli_present("MASTER"))
	{
		if (0 == bplmap)
		{
			util_out_print("Cannot perform maps updates:  bplmap field of file header is zero.", TRUE);
			return;
		}
		if (!was_crit)
			grab_crit(gv_cur_region);
		bml_blk = blk / bplmap * bplmap;
		if (dba_mm == csd->acc_meth)
			bp = MM_BASE_ADDR(csa) + (off_t)bml_blk * blk_size;
		else
		{
			assert(dba_bg == csd->acc_meth);
			if (!(bp = t_qread(bml_blk, &dummy_int, &dummy_cr)))
				rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
		}
		if ((csa->ti->total_blks / bplmap) * bplmap == bml_blk)
			total_blks = (csa->ti->total_blks - bml_blk);
		else
			total_blks = bplmap;
		if (NO_FREE_SPACE == bml_find_free(0, bp + SIZEOF(blk_hdr), total_blks))
			bit_clear(bml_blk / bplmap, csa->bmm);
		else
			bit_set(bml_blk / bplmap, csa->bmm);
		if (bml_blk > csa->nl->highest_lbm_blk_changed)
			csa->nl->highest_lbm_blk_changed = bml_blk;
		if (!was_crit)
			rel_crit(gv_cur_region);
		return;
	}
	if (CLI_PRESENT == cli_present("RESTORE_ALL"))
	{
		if (0 == bplmap)
		{
			util_out_print("Cannot perform maps updates:  bplmap field of file header is zero.", TRUE);
			return;
		}
		total_blks = csa->ti->total_blks;
		assert(ROUND_DOWN2(blk_size, 2 * SIZEOF(int4)) == blk_size);
		bml_size = BM_SIZE(bplmap);
		bml_list_size = (total_blks + bplmap - 1) / bplmap * bml_size;
		bml_list = (unsigned char *)malloc(bml_list_size);
		for (blk_index = 0, bml_index = 0;  blk_index < total_blks; blk_index += bplmap, bml_index++)
			bml_newmap((blk_hdr_ptr_t)(bml_list + bml_index * bml_size), bml_size, csa->ti->curr_tn);
		if (!was_crit)
		{
			grab_crit(gv_cur_region);
			csa->hold_onto_crit = TRUE;	/* need to do this AFTER grab_crit */
		}
		blk = get_dir_root();
		assert(blk < bplmap);
		csa->ti->free_blocks = total_blks - DIVIDE_ROUND_UP(total_blks, bplmap);
		bml_busy(blk, bml_list + SIZEOF(blk_hdr));
		csa->ti->free_blocks =  csa->ti->free_blocks - 1;
		dse_m_rest(blk, bml_list, bml_size, &csa->ti->free_blocks, TRUE);
		for (blk_index = 0, bml_index = 0;  blk_index < total_blks; blk_index += bplmap, bml_index++)
		{
			t_begin_crit(ERR_DSEFAIL);
			CHECK_TN(csa, csd, csd->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
			CWS_RESET;
			CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
			assert(csa->ti->early_tn == csa->ti->curr_tn);
			blk_ptr = bml_list + bml_index * bml_size;
			blkhist.blk_num = blk_index;
			if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
				rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			BLK_INIT(bs_ptr, bs1);
			BLK_SEG(bs_ptr, blk_ptr + SIZEOF(blk_hdr), bml_size - SIZEOF(blk_hdr));
			BLK_FINI(bs_ptr, bs1);
			t_write(&blkhist, (unsigned char *)bs1, 0, 0, LCL_MAP_LEVL, TRUE, FALSE, GDS_WRITE_KILLTN);
			BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn);
			t_end(&dummy_hist, NULL, csa->ti->curr_tn);
		}
		/* Fill in master map */
		for (blk_index = 0, bml_index = 0;  blk_index < total_blks; blk_index += bplmap, bml_index++)
		{
			blks_in_bitmap = (blk_index + bplmap <= total_blks) ? bplmap : total_blks - blk_index;
			assert(1 < blks_in_bitmap);	/* the last valid block in the database should never be a bitmap block */
			if (NO_FREE_SPACE != bml_find_free(0, (bml_list + bml_index * bml_size) + SIZEOF(blk_hdr), blks_in_bitmap))
				bit_set(blk_index / bplmap, csa->bmm);
			else
				bit_clear(blk_index / bplmap, csa->bmm);
			if (blk_index > csa->nl->highest_lbm_blk_changed)
				csa->nl->highest_lbm_blk_changed = blk_index;
		}
		if (!was_crit)
		{
			csa->hold_onto_crit = FALSE;	/* need to do this before the rel_crit */
			rel_crit(gv_cur_region);
		}
		if (unhandled_stale_timer_pop)
			process_deferred_stale();
		free(bml_list);
		csd->kill_in_prog = csd->abandoned_kills = 0;
		return;
	}
	MEMCPY_LIT(util_buff, "!/Block ");
	util_len = SIZEOF("!/Block ") - 1;
	util_len += i2hex_nofill(blk, (uchar_ptr_t)&util_buff[util_len], 8);
	memcpy(&util_buff[util_len], " is marked !AD in its local bit map.!/",
		SIZEOF(" is marked !AD in its local bit map.!/") - 1);
	util_len += SIZEOF(" is marked !AD in its local bit map.!/") - 1;
	util_buff[util_len] = 0;
	if (!was_crit)
		grab_crit(gv_cur_region);
	util_out_print(util_buff, TRUE, 4, dse_is_blk_free(blk, &dummy_int, &dummy_cr) ? "free" : "busy");
	if (!was_crit)
		rel_crit(gv_cur_region);
	return;
}
Example #14
0
void dse_chng_bhead(void)
{
	block_id		blk;
	int4			x;
	trans_num		tn;
	cache_rec_ptr_t		cr;
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	boolean_t		ismap;
	boolean_t		chng_blk;
	boolean_t		was_crit;
	boolean_t		was_hold_onto_crit;
	uint4			mapsize;
	srch_blk_status		blkhist;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
#	ifdef GTM_CRYPT
	int			req_enc_blk_size;
	int			crypt_status;
	blk_hdr_ptr_t		bp, save_bp, save_old_block;
#	endif

	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_DSEFAIL);
	error_def(ERR_DBRDONLY);

        if (gv_cur_region->read_only)
                rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	chng_blk = FALSE;
	csa = cs_addrs;
	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		if (!cli_get_hex("BLOCK", (uint4 *)&blk))
			return;
		if (blk < 0 || blk > csa->ti->total_blks)
		{
			util_out_print("Error: invalid block number.", TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	csd = csa->hdr;
	assert(csd == cs_data);
	blk_size = csd->blk_size;
	ismap = (patch_curr_blk / csd->bplmap * csd->bplmap == patch_curr_blk);
	mapsize = BM_SIZE(csd->bplmap);

	t_begin_crit(ERR_DSEFAIL);
	blkhist.blk_num = patch_curr_blk;
	if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr;

	if (cli_present("LEVEL") == CLI_PRESENT)
	{
		if (!cli_get_hex("LEVEL", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && (unsigned char)x != LCL_MAP_LEVL)
		{
			util_out_print("Error: invalid level for a bit map block.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1))
		{
			util_out_print("Error: invalid level.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
	 	new_hdr.levl = (unsigned char)x;

		chng_blk = TRUE;
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
	}
	if (cli_present("BSIZ") == CLI_PRESENT)
	{
		if (!cli_get_hex("BSIZ", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && x != mapsize)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		} else if (x < SIZEOF(blk_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		chng_blk = TRUE;
		new_hdr.bsiz = x;
	}
	if (!chng_blk)
		t_abort(gv_cur_region, csa);
	else
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			util_out_print("Error: bad block build.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, csa->ti->curr_tn);
		t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED);
	}
	if (cli_present("TN") == CLI_PRESENT)
	{
		if (!cli_get_hex64("TN", &tn))
			return;
		was_crit = csa->now_crit;
		t_begin_crit(ERR_DSEFAIL);
		CHECK_TN(csa, csd, csd->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
		assert(csa->ti->early_tn == csa->ti->curr_tn);
		if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		{
			util_out_print("Error: Unable to read buffer.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		BLK_FINI(bs_ptr, bs1);
		t_write(&blkhist, (unsigned char *)bs1, 0, 0,
			((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		/* Pass the desired tn as argument to bg_update/mm_update below */
		BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, tn);
		was_hold_onto_crit = csa->hold_onto_crit;
		csa->hold_onto_crit = TRUE;
		t_end(&dummy_hist, NULL, tn);
#		ifdef GTM_CRYPT
		if (csd->is_encrypted && (tn < csa->ti->curr_tn))
		{	/* BG and db encryption is enabled and the DSE update caused the block-header to potentially have a tn
			 * that is LESS than what it had before. At this point, the global buffer (corresponding to blkhist.blk_num)
			 * reflects the contents of the block AFTER the dse update (bg_update would have touched this) whereas
			 * the corresponding encryption global buffer reflects the contents of the block BEFORE the update.
			 * Normally wcs_wtstart takes care of propagating the tn update from the regular global buffer to the
			 * corresponding encryption buffer. But if before it gets a chance, let us say a process goes to t_end
			 * as part of a subsequent transaction and updates this same block. Since the  blk-hdr-tn potentially
			 * decreased, it is possible that the PBLK writing check (comparing blk-hdr-tn with the epoch_tn) decides
			 * to write a PBLK for this block (even though a PBLK was already written for this block as part of a
			 * previous DSE CHANGE -BL -TN in the same epoch). In this case, since the db is encrypted, the logic
			 * will assume there were no updates to this block since the last time wcs_wtstart updated the encryption
			 * buffer and therefore use that to write the pblk, which is incorrect since it does not yet contain the
			 * tn update. The consequence of this is would be writing an older before-image PBLK) record to the
			 * journal file. To prevent this situation, we update the encryption buffer here (before releasing crit)
			 * using logic like that in wcs_wtstart to ensure it is in sync with the regular global buffer.
			 * Note:
			 * Although we use cw_set[0] to access the global buffer corresponding to the block number being updated,
			 * cw_set_depth at this point is 0 because t_end resets it. This is considered safe since cw_set is a
			 * static array (as opposed to malloc'ed memory) and hence is always available and valid until it gets
			 * overwritten by subsequent updates.
			 */
			bp = (blk_hdr_ptr_t)GDS_ANY_REL2ABS(csa, cw_set[0].cr->buffaddr);
			DBG_ENSURE_PTR_IS_VALID_GLOBUFF(csa, csd, (sm_uc_ptr_t)bp);
			save_bp = (blk_hdr_ptr_t)GDS_ANY_ENCRYPTGLOBUF(bp, csa);
			DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(csa, csd, (sm_uc_ptr_t)save_bp);
			assert((bp->bsiz <= csd->blk_size) && (bp->bsiz >= SIZEOF(*bp)));
			req_enc_blk_size = MIN(csd->blk_size, bp->bsiz) - SIZEOF(*bp);
			if (BLK_NEEDS_ENCRYPTION(bp->levl, req_enc_blk_size))
			{
				ASSERT_ENCRYPTION_INITIALIZED;
				memcpy(save_bp, bp, SIZEOF(blk_hdr));
				GTMCRYPT_ENCODE_FAST(csa->encr_key_handle, (char *)(bp + 1), req_enc_blk_size,
					(char *)(save_bp + 1), crypt_status);
				if (0 != crypt_status)
					GC_GTM_PUTMSG(crypt_status, gv_cur_region->dyn.addr->fname);
			} else
				memcpy(save_bp, bp, bp->bsiz);
		}
#		endif
		if (!was_hold_onto_crit)
			csa->hold_onto_crit = FALSE;
		if (!was_crit)
			rel_crit(gv_cur_region);
		if (unhandled_stale_timer_pop)
			process_deferred_stale();
	}
	return;
}
Example #15
0
void	mu_reorg_upgrd_dwngrd(void)
{
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	block_id		*blkid_ptr, curblk, curbmp, start_blk, stop_blk, start_bmp, last_bmp;
	block_id		startblk_input, stopblk_input;
	boolean_t		upgrade, downgrade, safejnl, nosafejnl, region, first_reorg_in_this_db_fmt, reorg_entiredb;
	boolean_t		startblk_specified, stopblk_specified, set_fully_upgraded, db_got_to_v5_once, mark_blk_free;
	cache_rec_ptr_t		cr;
	char			*bml_lcl_buff = NULL, *command, *reorg_command;
	sm_uc_ptr_t		bptr = NULL;
	cw_set_element		*cse;
	enum cdb_sc		cdb_status;
	enum db_ver		new_db_format, ondsk_blkver;
	gd_region		*reg;
	int			cycle;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	int4			blocks_left, expected_blks2upgrd, actual_blks2upgrd, total_blks, free_blks;
	int4			status, status1, mapsize, lcnt, bml_status;
	reorg_stats_t		reorg_stats;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	sm_uc_ptr_t		blkBase, bml_sm_buff;	/* shared memory pointer to the bitmap global buffer */
	srch_hist		alt_hist;
	srch_blk_status		*blkhist, bmlhist;
	tp_region		*rptr;
	trans_num		curr_tn;
	unsigned char    	save_cw_set_depth;
	uint4			lcl_update_trans;

	region    = (CLI_PRESENT == cli_present("REGION"));
	upgrade   = (CLI_PRESENT == cli_present("UPGRADE"));
	downgrade = (CLI_PRESENT == cli_present("DOWNGRADE"));
	assert(upgrade && !downgrade || !upgrade && downgrade);
	command = upgrade ? "UPGRADE" : "DOWNGRADE";
	reorg_command = upgrade ? "MUPIP REORG UPGRADE" : "MUPIP REORG DOWNGRADE";
	reorg_entiredb = TRUE;	/* unless STARTBLK or STOPBLK is specified we are going to {up,down}grade the entire database */
	startblk_specified = FALSE;
	assert(SIZEOF(block_id) == SIZEOF(uint4));
	if ((CLI_PRESENT == cli_present("STARTBLK")) && (cli_get_hex("STARTBLK", (uint4 *)&startblk_input)))
	{
		reorg_entiredb = FALSE;
		startblk_specified = TRUE;
	}
	stopblk_specified = FALSE;
	assert(SIZEOF(block_id) == SIZEOF(uint4));
	if ((CLI_PRESENT == cli_present("STOPBLK")) && (cli_get_hex("STOPBLK", (uint4 *)&stopblk_input)))
	{
		reorg_entiredb = FALSE;
		stopblk_specified = TRUE;
	}
	mu_reorg_upgrd_dwngrd_in_prog = TRUE;
	mu_reorg_nosafejnl = (CLI_NEGATED == cli_present("SAFEJNL")) ? TRUE : FALSE;

	assert(region);
	status = SS_NORMAL;
	error_mupip = FALSE;
	gvinit();	/* initialize gd_header (needed by the later call to mu_getlst) */
	mu_getlst("REG_NAME", SIZEOF(tp_region));	/* get the parameter corresponding to REGION qualifier */
	if (error_mupip)
	{
		util_out_print("!/MUPIP REORG !AD cannot proceed with above errors!/", TRUE, LEN_AND_STR(command));
		mupip_exit(ERR_MUNOACTION);
	}
	assert(DBKEYSIZE(MAX_KEY_SZ) == gv_keysize);	/* no need to invoke GVKEYSIZE_INIT_IF_NEEDED macro */
	gv_target = targ_alloc(gv_keysize, NULL, NULL);	/* t_begin needs this initialized */
	gv_target_list = NULL;
	memset(&alt_hist, 0, SIZEOF(alt_hist));	/* null-initialize history */
	blkhist = &alt_hist.h[0];
	for (rptr = grlist;  NULL != rptr;  rptr = rptr->fPtr)
	{
		if (mu_ctrly_occurred || mu_ctrlc_occurred)
			break;
		reg = rptr->reg;
		util_out_print("!/Region !AD : MUPIP REORG !AD started", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
		if (reg_cmcheck(reg))
		{
			util_out_print("Region !AD : MUPIP REORG !AD cannot run across network",
				TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = ERR_MUNOFINISH;
			continue;
		}
		mu_reorg_process = TRUE;	/* gvcst_init will use this value to use gtm_poollimit settings. */
		gvcst_init(reg);
		mu_reorg_process = FALSE;
		assert(update_array != NULL);
		/* access method stored in global directory and database file header might be different in which case
		 * the database setting prevails. therefore, the access method check can be done only after opening
		 * the database (i.e. after the gvcst_init)
		 */
		if (dba_bg != REG_ACC_METH(reg))
		{
			util_out_print("Region !AD : MUPIP REORG !AD cannot continue as access method is not BG",
				TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = ERR_MUNOFINISH;
			continue;
		}
		/* The mu_getlst call above uses insert_region to create the grlist, which ensures that duplicate regions mapping to
		 * the same db file correspond to only one grlist entry.
		 */
		assert(FALSE == reg->was_open);
		TP_CHANGE_REG(reg);	/* sets gv_cur_region, cs_addrs, cs_data */
		csa = cs_addrs;
		csd = cs_data;
		blk_size = csd->blk_size;	/* "blk_size" is used by the BLK_FINI macro */
		if (reg->read_only)
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg));
			status = ERR_MUNOFINISH;
			continue;
		}
		assert(GDSVCURR == GDSV6); /* so we trip this assert in case GDSVCURR changes without a change to this module */
		new_db_format = (upgrade ? GDSV6 : GDSV4);
		grab_crit(reg);
		curr_tn = csd->trans_hist.curr_tn;
		/* set the desired db format in the file header to the appropriate version, increment transaction number */
		status1 = desired_db_format_set(reg, new_db_format, reorg_command);
		assert(csa->now_crit);	/* desired_db_format_set() should not have released crit */
		first_reorg_in_this_db_fmt = TRUE;	/* with the current desired_db_format, this is the first reorg */
		if (SS_NORMAL != status1)
		{	/* "desired_db_format_set" would have printed appropriate error messages */
			if (ERR_MUNOACTION != status1)
			{	/* real error occurred while setting the db format. skip to next region */
				status = ERR_MUNOFINISH;
				rel_crit(reg);
				continue;
			}
			util_out_print("Region !AD : Desired DB Format remains at !AD after !AD", TRUE, REG_LEN_STR(reg),
				LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command));
			if (csd->reorg_db_fmt_start_tn == csd->desired_db_format_tn)
				first_reorg_in_this_db_fmt = FALSE;
		} else
			util_out_print("Region !AD : Desired DB Format set to !AD by !AD", TRUE, REG_LEN_STR(reg),
				LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command));
		assert(dba_bg == csd->acc_meth);
		/* Check blks_to_upgrd counter to see if upgrade/downgrade is complete */
		total_blks = csd->trans_hist.total_blks;
		free_blks = csd->trans_hist.free_blocks;
		actual_blks2upgrd = csd->blks_to_upgrd;
		/* If MUPIP REORG UPGRADE and there is no block to upgrade in the database as indicated by BOTH
		 * 	"csd->blks_to_upgrd" and "csd->fully_upgraded", then we can skip processing.
		 * If MUPIP REORG UPGRADE and all non-free blocks need to be upgraded then again we can skip processing.
		 */
		if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded)
			|| (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd)))
		{
			util_out_print("Region !AD : Blocks to Upgrade counter indicates no action needed for MUPIP REORG !AD",
				       TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : "
				       "Blocks to upgrade = [0x!XL]",
				       TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd);
			util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			rel_crit(reg);
			continue;
		}
		stop_blk = total_blks;
		if (stopblk_specified && stopblk_input <= stop_blk)
			stop_blk = stopblk_input;
		if (first_reorg_in_this_db_fmt)
		{	/* Note down reorg start tn (in case we are interrupted, future reorg will know to resume) */
			csd->reorg_db_fmt_start_tn = csd->desired_db_format_tn;
			csd->reorg_upgrd_dwngrd_restart_block = 0;
			start_blk = (startblk_specified ? startblk_input : 0);
		} else
		{	/* Either a concurrent MUPIP REORG of the same type ({up,down}grade) is currently running
			 * or a previously running REORG of the same type was interrupted (Ctrl-Ced).
			 * In either case resume processing from whatever restart block number is stored in fileheader
			 * the only exception is if "STARTBLK" was specified in the input in which use that unconditionally.
			 */
			start_blk = (startblk_specified ? startblk_input : csd->reorg_upgrd_dwngrd_restart_block);
		}
		if (start_blk > stop_blk)
			start_blk = stop_blk;
		mu_reorg_upgrd_dwngrd_start_tn = csd->reorg_db_fmt_start_tn;
		/* Before releasing crit, flush the file-header and dirty buffers in cache to disk. This is because we are now
		 * going to read each GDS block directly from disk to determine if it needs to be upgraded/downgraded or not.
		 */
		if (!wcs_flu(WCSFLU_FLUSH_HDR))	/* wcs_flu assumes gv_cur_region is set (which it is in this routine) */
		{
			rel_crit(reg);
			gtm_putmsg_csa(CSA_ARG(csa)
				VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg));
			status = ERR_MUNOFINISH;
			continue;
		}
		rel_crit(reg);
		/* Loop through entire database one GDS block at a time and upgrade/downgrade each of them */
		status1 = SS_NORMAL;
		start_bmp = ROUND_DOWN2(start_blk, BLKS_PER_LMAP);
		last_bmp  = ROUND_DOWN2(stop_blk - 1, BLKS_PER_LMAP);
		curblk = start_blk;	/* curblk is the block to be upgraded/downgraded */
		util_out_print("Region !AD : Started processing from block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
		if (NULL != bptr)
		{	/* malloc/free "bptr" for each region as GDS block-size can be different */
			free(bptr);
			bptr = NULL;
		}
		memset(&reorg_stats, 0, SIZEOF(reorg_stats));	/* initialize statistics for this region */
		for (curbmp = start_bmp; curbmp <= last_bmp; curbmp += BLKS_PER_LMAP)
		{
			if (mu_ctrly_occurred || mu_ctrlc_occurred)
			{
				status1 = ERR_MUNOFINISH;
				break;
			}
			/* --------------------------------------------------------------
			 *             Read in current bitmap block
			 * --------------------------------------------------------------
			 */
			assert(!csa->now_crit);
			bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* bring block into the cache outside of crit */
			reorg_stats.blks_read_from_disk_bmp++;
			grab_crit_encr_cycle_sync(reg); /* needed so t_qread does not return NULL below */
			if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn)
			{	/* csd->desired_db_format changed since reorg started. discontinue the reorg */
				/* see later comment on "csd->reorg_upgrd_dwngrd_restart_block" for why the assignment
				 * of this field should be done only if a db format change did not occur.
				 */
				rel_crit(reg);
				status1 = ERR_MUNOFINISH;
				/* This "start_tn" check is redone after the for-loop and an error message is printed there */
				break;
			} else if (reorg_entiredb)
			{	/* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */
				assert(csd->reorg_upgrd_dwngrd_restart_block <= MAX(start_blk, curbmp));
				csd->reorg_upgrd_dwngrd_restart_block = curbmp;	/* previous blocks have been upgraded/downgraded */
			}
			/* Check blks_to_upgrd counter to see if upgrade/downgrade is complete.
			 * Repeat check done a few steps earlier outside of this for loop.
			 */
			total_blks = csd->trans_hist.total_blks;
			free_blks = csd->trans_hist.free_blocks;
			actual_blks2upgrd = csd->blks_to_upgrd;
			if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded)
				|| (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd)))
			{
				rel_crit(reg);
				break;
			}
			bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* now that in crit, note down stable buffer */
			if (NULL == bml_sm_buff)
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			ondsk_blkver = cr->ondsk_blkver;	/* note down db fmt on disk for bitmap block */
			/* Take a copy of the shared memory bitmap buffer into process-private memory before releasing crit.
			 * We are interested in those blocks that are currently marked as USED in the bitmap.
			 * It is possible that once we release crit, concurrent updates change the bitmap state of those blocks.
			 * In that case, those updates will take care of doing the upgrade/downgrade of those blocks in the
			 * format currently set in csd->desired_db_format i.e. accomplishing MUPIP REORG UPGRADE/DOWNGRADE's job.
			 * If the desired_db_format changes concurrently, we will stop doing REORG UPGRADE/DOWNGRADE processing.
			 */
			if (NULL == bml_lcl_buff)
				bml_lcl_buff = malloc(BM_SIZE(BLKS_PER_LMAP));
			memcpy(bml_lcl_buff, (blk_hdr_ptr_t)bml_sm_buff, BM_SIZE(BLKS_PER_LMAP));
			if (FALSE == cert_blk(reg, curbmp, (blk_hdr_ptr_t)bml_lcl_buff, 0, FALSE))
			{	/* certify the block while holding crit as cert_blk uses fields from file-header (shared memory) */
				assert(FALSE);	/* in pro, skip ugprading/downgarding all blks in this unreliable local bitmap */
				rel_crit(reg);
				util_out_print("Region !AD : Bitmap Block [0x!XL] has integrity errors. Skipping this bitmap.",
					TRUE, REG_LEN_STR(reg), curbmp);
				status1 = ERR_MUNOFINISH;
				continue;
			}
			rel_crit(reg);
			/* ------------------------------------------------------------------------
			 *         Upgrade/Downgrade all BUSY blocks in the current bitmap
			 * ------------------------------------------------------------------------
			 */
			curblk = (curbmp == start_bmp) ? start_blk : curbmp;
			mapsize = (curbmp == last_bmp) ? (stop_blk - curbmp) : BLKS_PER_LMAP;
			assert(0 != mapsize);
			assert(mapsize <= BLKS_PER_LMAP);
			db_got_to_v5_once = csd->db_got_to_v5_once;
			for (lcnt = curblk - curbmp; lcnt < mapsize; lcnt++, curblk++)
			{
				if (mu_ctrly_occurred || mu_ctrlc_occurred)
				{
					status1 = ERR_MUNOFINISH;
					goto stop_reorg_on_this_reg;	/* goto needed because of nested FOR Loop */
				}
				GET_BM_STATUS(bml_lcl_buff, lcnt, bml_status);
				assert(BLK_MAPINVALID != bml_status); /* cert_blk ran clean so we dont expect invalid entries */
				if (BLK_FREE == bml_status)
				{
					reorg_stats.blks_skipped_free++;
					continue;
				}
				/* MUPIP REORG UPGRADE/DOWNGRADE will convert USED & RECYCLED blocks */
				if (db_got_to_v5_once || (BLK_RECYCLED != bml_status))
				{	/* Do NOT read recycled V4 block from disk unless it is guaranteed NOT to be too full */
					if (lcnt)
					{	/* non-bitmap block */
						/* read in block from disk into private buffer. dont pollute the cache yet */
						if (NULL == bptr)
							bptr = (sm_uc_ptr_t)malloc(blk_size);
						status1 = dsk_read(curblk, bptr, &ondsk_blkver, FALSE);
						/* dsk_read on curblk could return an error (DYNUPGRDFAIL) if curblk needs to be
						 * upgraded and if its block size was too big to allow the extra block-header space
						 * requirements for a dynamic upgrade. a MUPIP REORG DOWNGRADE should not error out
						 * in that case as the block is already in the downgraded format.
						 */
						if (SS_NORMAL != status1)
						{
							if (!upgrade && (ERR_DYNUPGRDFAIL == status1))
							{
								assert(GDSV4 == new_db_format);
								ondsk_blkver = new_db_format;
							} else
							{
								gtm_putmsg_csa(CSA_ARG(csa)
									VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status1);
								util_out_print("Region !AD : Error occurred while reading block "
									"[0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
								status1 = ERR_MUNOFINISH;
								goto stop_reorg_on_this_reg;/* goto needed due to nested FOR Loop */
							}
						}
						reorg_stats.blks_read_from_disk_nonbmp++;
					} /* else bitmap block has been read in crit earlier and ondsk_blkver appropriately set */
					if (new_db_format == ondsk_blkver)
					{
						assert((SS_NORMAL == status1) || (!upgrade && (ERR_DYNUPGRDFAIL == status1)));
						status1 = SS_NORMAL;	/* treat DYNUPGRDFAIL as no error in case of downgrade */
						reorg_stats.blks_skipped_newfmtindisk++;
						continue;	/* current disk version is identical to what is desired */
					}
					assert(SS_NORMAL == status1);
				}
				/* Begin non-TP transaction to upgrade/downgrade the block.
				 * The way we do that is by updating the block using a null update array.
				 * Any update to a block will trigger an automatic upgrade/downgrade of the block based on
				 * 	the current fileheader desired_db_format setting and we use that here.
				 */
				t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
				for (; ;)
				{
					CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
					curr_tn = csd->trans_hist.curr_tn;
					db_got_to_v5_once = csd->db_got_to_v5_once;
					if (db_got_to_v5_once || (BLK_RECYCLED != bml_status))
					{
						blkhist->cse = NULL;	/* start afresh (do not use value from previous retry) */
						blkBase = t_qread(curblk, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr);
						if (NULL == blkBase)
						{
							t_retry((enum cdb_sc)rdfail_detail);
							continue;
						}
						blkhist->blk_num = curblk;
						blkhist->buffaddr = blkBase;
						ondsk_blkver = blkhist->cr->ondsk_blkver;
						new_hdr = *(blk_hdr_ptr_t)blkBase;
						mu_reorg_upgrd_dwngrd_blktn = new_hdr.tn;
						mark_blk_free = FALSE;
						inctn_opcode = upgrade ? inctn_blkupgrd : inctn_blkdwngrd;
					} else
					{
						mark_blk_free = TRUE;
						inctn_opcode = inctn_blkmarkfree;
					}
					inctn_detail.blknum_struct.blknum = curblk;
					/* t_end assumes that the history it is passed does not contain a bitmap block.
					 * for bitmap block, the history validation information is passed through cse instead.
					 * therefore we need to handle bitmap and non-bitmap cases separately.
					 */
					if (!lcnt)
					{	/* Means a bitmap block.
						 * At this point we can do a "new_db_format != ondsk_blkver" check to determine
						 * if the block got converted since we did the dsk_read (see the non-bitmap case
						 * for a similar check done there), but in that case we will have a transaction
						 * which has read 1 bitmap block and is updating no block. "t_end" currently cannot
						 * handle this case as it expects any bitmap block that needs validation to also
						 * have a corresponding cse which will hold its history. Hence we avoid doing the
						 * new_db_format check. The only disadvantage of this is that we will end up
						 * modifying the bitmap block as part of this transaction (in an attempt to convert
						 * its ondsk_blkver) even though it is already in the right format. Since this
						 * overhead is going to be one per bitmap block and since the block is in the cache
						 * at this point, we should not lose much.
						 */
						assert(!mark_blk_free);
						BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id);
						*blkid_ptr = 0;
						t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0);
						assert(&alt_hist.h[0] == blkhist);
						alt_hist.h[0].blk_num = 0; /* create empty history for bitmap block */
						assert(update_trans);
					} else
					{	/* non-bitmap block. fill in history for validation in t_end */
						assert(curblk);	/* we should never come here for block 0 (bitmap) */
						if (!mark_blk_free)
						{
							assert(blkhist->blk_num == curblk);
							assert(blkhist->buffaddr == blkBase);
							blkhist->tn      = curr_tn;
							alt_hist.h[1].blk_num = 0;
						}
						/* Also need to pass the bitmap as history to detect if any concurrent M-kill
						 * is freeing up the same USED block that we are trying to convert OR if any
						 * concurrent M-set is reusing the same RECYCLED block that we are trying to
						 * convert. Because of t_end currently not being able to validate a bitmap
						 * without that simultaneously having a cse, we need to create a cse for the
						 * bitmap that is used only for bitmap history validation, but should not be
						 * used to update the contents of the bitmap block in bg_update.
						 */
						bmlhist.buffaddr = t_qread(curbmp, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
						if (NULL == bmlhist.buffaddr)
						{
							t_retry((enum cdb_sc)rdfail_detail);
							continue;
						}
						bmlhist.blk_num = curbmp;
						bmlhist.tn = curr_tn;
						GET_BM_STATUS(bmlhist.buffaddr, lcnt, bml_status);
						if (BLK_MAPINVALID == bml_status)
						{
							t_retry(cdb_sc_lostbmlcr);
							continue;
						}
						if (!mark_blk_free)
						{
							if ((new_db_format != ondsk_blkver) && (BLK_FREE != bml_status))
							{	/* block still needs to be converted. create cse */
								BLK_INIT(bs_ptr, bs1);
								BLK_SEG(bs_ptr, blkBase + SIZEOF(new_hdr),
									new_hdr.bsiz - SIZEOF(new_hdr));
								BLK_FINI(bs_ptr, bs1);
								t_write(blkhist, (unsigned char *)bs1, 0, 0,
									((blk_hdr_ptr_t)blkBase)->levl, FALSE,
									FALSE, GDS_WRITE_PLAIN);
								/* The directory tree status for now is only used to determine
								 * whether writing the block to snapshot file (see t_end_sysops.c).
 								 * For reorg upgrade/downgrade process, the block is updated in a
								 * sequential way without changing the gv_target. In this case, we
								 * assume the block is in directory tree so as to have it written to
								 * the snapshot file.
			 					 */
								BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state);
								/* reset update_trans in case previous retry had set it to 0 */
								update_trans = UPDTRNS_DB_UPDATED_MASK;
								if (BLK_RECYCLED == bml_status)
								{	/* If block that we are upgarding is RECYCLED, indicate to
									 * bg_update that blks_to_upgrd counter should NOT be
									 * touched in this case by setting "mode" to a special value
									 */
									assert(cw_set[cw_set_depth-1].mode == gds_t_write);
									cw_set[cw_set_depth-1].mode = gds_t_write_recycled;
									/* we SET block as NOT RECYCLED, otherwise, the mm_update()
									 * or bg_update_phase2 may skip writing it to snapshot file
									 * when its level is 0
									 */
									BIT_CLEAR_RECYCLED(cw_set[cw_set_depth-1].blk_prior_state);
								}
							} else
							{	/* Block got converted by another process since we did the dsk_read.
								 * 	or this block became marked free in the bitmap.
								 * No need to update this block. just call t_end for validation of
								 * 	both the non-bitmap block as well as the bitmap block.
								 * Note down that this transaction is no longer updating any blocks.
								 */
								update_trans = 0;
							}
							/* Need to put bit maps on the end of the cw set for concurrency checking.
							 * We want to simulate t_write_map, except we want to update "cw_map_depth"
							 * instead of "cw_set_depth". Hence the save and restore logic below.
							 * This part of the code is similar to the one in mu_swap_blk.c
							 */
							save_cw_set_depth = cw_set_depth;
							assert(!cw_map_depth);
							t_write_map(&bmlhist, NULL, curr_tn, 0); /* will increment cw_set_depth */
							cw_map_depth = cw_set_depth; /* set cw_map_depth to latest cw_set_depth */
							cw_set_depth = save_cw_set_depth;/* restore cw_set_depth */
							/* t_write_map simulation end */
						} else
						{
							if (BLK_RECYCLED != bml_status)
							{	/* Block was RECYCLED at beginning but no longer so. Retry */
								t_retry(cdb_sc_bmlmod);
								continue;
							}
							/* Mark recycled block as FREE in bitmap */
							assert(lcnt == (curblk - curbmp));
							assert(update_array_ptr == update_array);
							*((block_id *)update_array_ptr) = lcnt;
							update_array_ptr += SIZEOF(block_id);
							/* the following assumes SIZEOF(block_id) == SIZEOF(int) */
							assert(SIZEOF(block_id) == SIZEOF(int));
							*(int *)update_array_ptr = 0;
							t_write_map(&bmlhist, (unsigned char *)update_array, curr_tn, 0);
							update_trans = UPDTRNS_DB_UPDATED_MASK;
						}
					}
					assert(SIZEOF(lcl_update_trans) == SIZEOF(update_trans));
					lcl_update_trans = update_trans;	/* take a copy before t_end modifies it */
					if ((trans_num)0 != t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
					{	/* In case this is MM and t_end() remapped an extended database, reset csd */
						assert(csd == cs_data);
						if (!lcl_update_trans)
						{
							assert(lcnt);
							assert(!mark_blk_free);
							assert((new_db_format == ondsk_blkver) || (BLK_BUSY != bml_status));
							if (BLK_BUSY != bml_status)
								reorg_stats.blks_skipped_free++;
							else
								reorg_stats.blks_skipped_newfmtincache++;
						} else if (!lcnt)
							reorg_stats.blks_converted_bmp++;
						else
							reorg_stats.blks_converted_nonbmp++;
						break;
					}
					assert(csd == cs_data);
				}
			}
		}
	stop_reorg_on_this_reg:
		/* even though ctrl-c occurred, update file-header fields to store reorg's progress before exiting */
		grab_crit(reg);
		blocks_left = 0;
		assert(csd->trans_hist.total_blks >= csd->blks_to_upgrd);
		actual_blks2upgrd = csd->blks_to_upgrd;
		total_blks = csd->trans_hist.total_blks;
		free_blks = csd->trans_hist.free_blocks;
		/* Care should be taken not to set "csd->reorg_upgrd_dwngrd_restart_block" in case of a concurrent db fmt
		 * change. This is because let us say we are doing REORG UPGRADE. A concurrent REORG DOWNGRADE would
		 * have reset "csd->reorg_upgrd_dwngrd_restart_block" field to 0 and if that reorg is interrupted by a
		 * Ctrl-C (before this reorg came here) it would have updated "csd->reorg_upgrd_dwngrd_restart_block" to
		 * a non-zero value indicating how many blocks from 0 have been downgraded. We should not reset this
		 * field to "curblk" as it will be mis-interpreted as the number of blocks that have been DOWNgraded.
		 */
		set_fully_upgraded = FALSE;
		if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn)
		{	/* csd->desired_db_format changed since reorg started. discontinue the reorg */
			util_out_print("Region !AD : Desired DB Format changed during REORG. Stopping REORG.",
				TRUE, REG_LEN_STR(reg));
			status1 = ERR_MUNOFINISH;
		} else if (reorg_entiredb)
		{	/* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */
			assert(csd->reorg_upgrd_dwngrd_restart_block <= curblk);
			csd->reorg_upgrd_dwngrd_restart_block = curblk;	/* blocks lesser than this have been upgraded/downgraded */
			expected_blks2upgrd = upgrade ? 0 : (total_blks - free_blks);
			blocks_left = upgrade ? actual_blks2upgrd : (expected_blks2upgrd - actual_blks2upgrd);
			/* If this reorg command went through all blocks in the database, then it should have
			 * 	correctly concluded at this point whether the reorg is complete or not.
			 * If this reorg command started from where a previous incomplete reorg left
			 *	(i.e. first_reorg_in_this_db_fmt is FALSE), it cannot determine if the initial
			 *	GDS blocks that it skipped are completely {up,down}graded or not.
			 */
			assert((0 == blocks_left) || (SS_NORMAL != status1) || !first_reorg_in_this_db_fmt);
			/* If this is a MUPIP REORG UPGRADE that did go through every block in the database (indicated by
			 * "reorg_entiredb" && "first_reorg_in_this_db_fmt") and the current count of "blks_to_upgrd" is
			 * 0 in the file-header and the desired_db_format did not change since the start of the REORG,
			 * we can be sure that the entire database has been upgraded. Set "csd->fully_upgraded" to TRUE.
			 */
			if ((SS_NORMAL == status1) && first_reorg_in_this_db_fmt && upgrade && (0 == actual_blks2upgrd))
			{
				csd->fully_upgraded = TRUE;
				csd->db_got_to_v5_once = TRUE;
				set_fully_upgraded = TRUE;
			}
			/* flush all changes noted down in the file-header */
			if (!wcs_flu(WCSFLU_FLUSH_HDR))	/* wcs_flu assumes gv_cur_region is set (which it is in this routine) */
			{
				gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4,
					LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg));
				status = ERR_MUNOFINISH;
				rel_crit(reg);
				continue;
			}
		}
		curr_tn = csd->trans_hist.curr_tn;
		rel_crit(reg);
		util_out_print("Region !AD : Stopped processing at block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
		/* Print statistics */
		util_out_print("Region !AD : Statistics : Blocks Read From Disk (Bitmap)     : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_bmp);
		util_out_print("Region !AD : Statistics : Blocks Skipped (Free)              : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_free);
		util_out_print("Region !AD : Statistics : Blocks Read From Disk (Non-Bitmap) : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_nonbmp);
		util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in disk)   : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtindisk);
		util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in cache)  : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtincache);
		util_out_print("Region !AD : Statistics : Blocks Converted (Bitmap)          : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_bmp);
		util_out_print("Region !AD : Statistics : Blocks Converted (Non-Bitmap)      : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_nonbmp);
		if (reorg_entiredb && (SS_NORMAL == status1) && (0 != blocks_left))
		{	/* file-header counter does not match what reorg on the entire database expected to see */
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBBTUWRNG, 2, expected_blks2upgrd, actual_blks2upgrd);
			util_out_print("Region !AD : Run MUPIP INTEG (without FAST qualifier) to fix the counter",
				TRUE, REG_LEN_STR(reg));
			status1 = ERR_MUNOFINISH;
		} else
			util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : "
				       "Blocks to upgrade = [0x!XL]",
				       TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd);
		/* Issue success or failure message for this region */
		if (SS_NORMAL == status1)
		{	/* issue success only if REORG did not encounter any error in its processing */
			if (set_fully_upgraded)
				util_out_print("Region !AD : Database is now FULLY UPGRADED", TRUE, REG_LEN_STR(reg));
			util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUREUPDWNGRDEND, 5, REG_LEN_STR(reg),
										process_id, process_id, &curr_tn);
		} else
		{
			assert(ERR_MUNOFINISH == status1);
			assert((SS_NORMAL == status) || (ERR_MUNOFINISH == status));
			util_out_print("Region !AD : MUPIP REORG !AD incomplete. See above messages.!/",
					TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = status1;
		}
	}
	if (NULL != bptr)
		free(bptr);
	if (NULL != bml_lcl_buff)
		free(bml_lcl_buff);
	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REORGCTRLY);
		status = ERR_MUNOFINISH;
	}
	mupip_exit(status);
}
Example #16
0
void dse_rmrec(void)
{
	block_id	blk;
	blk_segment	*bs1, *bs_ptr;
	int4		blk_seg_cnt, blk_size, count;
	sm_uc_ptr_t	bp;
	uchar_ptr_t	lbp, b_top, rp, r_top, key_top, rp_base;
	char		cc, comp_key[256], cc_base;
	short int	size, i, rsize;
	cw_set_element	*cse;
	error_def(ERR_DSEFAIL);
	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_DBRDONLY);

        if (gv_cur_region->read_only)
                rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	assert(update_array);
	/* reset new block mechanism */
	update_array_ptr = update_array;
	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		if(!cli_get_hex("BLOCK", &blk))
			return;
		if (blk < 0 || blk >= cs_addrs->ti->total_blks || !(blk % cs_addrs->hdr->bplmap))
		{
			util_out_print("Error: invalid block number.", TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	if (cli_present("COUNT") == CLI_PRESENT)
	{
		if (!cli_get_hex("COUNT", &count) || count < 1)
			return;
	} else
		count = 1;
	t_begin_crit(ERR_DSEFAIL);
	blk_size = cs_addrs->hdr->blk_size;
	if(!(bp = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	lbp = (uchar_ptr_t)malloc(blk_size);
	memcpy(lbp, bp, blk_size);

	if (((blk_hdr_ptr_t)lbp)->bsiz > cs_addrs->hdr->blk_size)
		b_top = lbp + cs_addrs->hdr->blk_size;
	else if (((blk_hdr_ptr_t)lbp)->bsiz < sizeof(blk_hdr))
		b_top = lbp + sizeof(blk_hdr);
	else
		b_top = lbp + ((blk_hdr_ptr_t)lbp)->bsiz;
	if (cli_present("RECORD") == CLI_PRESENT)
	{
		if (!(rp = rp_base = skan_rnum(lbp, FALSE)))
		{
			free(lbp);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
	} else if (!(rp = rp_base = skan_offset(lbp, FALSE)))
	{
		free(lbp);
		t_abort(gv_cur_region, cs_addrs);
		return;
	}
	memcpy(&comp_key[0], &patch_comp_key[0], sizeof(patch_comp_key));
	cc_base = patch_comp_count;
	for ( ; ; )
	{
		GET_SHORT(rsize, &((rec_hdr_ptr_t)rp)->rsiz);
		if (rsize < sizeof(rec_hdr))
			r_top = rp + sizeof(rec_hdr);
		else
			r_top = rp + rsize;
		if (r_top >= b_top)
		{
			if (count)
			{	if (((blk_hdr_ptr_t) lbp)->levl)
					util_out_print("Warning:  removed a star record from the end of this block.", TRUE);
				((blk_hdr_ptr_t)lbp)->bsiz = rp_base - lbp;
				BLK_INIT(bs_ptr, bs1);
				BLK_SEG(bs_ptr, (uchar_ptr_t)lbp + sizeof(blk_hdr),
					(int)((blk_hdr_ptr_t)lbp)->bsiz - sizeof(blk_hdr));
				if (!BLK_FINI(bs_ptr, bs1))
				{
					util_out_print("Error: bad blk build.",TRUE);
					free(lbp);
					t_abort(gv_cur_region, cs_addrs);
					return;
				}
				t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, ((blk_hdr_ptr_t)lbp)->levl, TRUE, FALSE);
				BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse);
				t_end(&dummy_hist, 0);
				free(lbp);
				return;
			}
			r_top = b_top;
		}
		if (((blk_hdr_ptr_t)lbp)->levl)
			key_top = r_top - sizeof(block_id);
		else
		{
			for (key_top = rp + sizeof(rec_hdr); key_top < r_top; )
				if (!*key_top++ && !*key_top++)
					break;
		}
		if (((rec_hdr_ptr_t)rp)->cmpc > patch_comp_count)
			cc = patch_comp_count;
		else
			cc = ((rec_hdr_ptr_t)rp)->cmpc;
		size = key_top - rp - sizeof(rec_hdr);
		if (size < 0)
			size = 0;
		else if (size > sizeof(patch_comp_key) - 2)
			size = sizeof(patch_comp_key) - 2;
		memcpy(&patch_comp_key[cc], rp + sizeof(rec_hdr), size);
		patch_comp_count = cc + size;
		if (--count >= 0)
		{
			rp = r_top;
			continue;
		}
		size = (patch_comp_count < cc_base) ? patch_comp_count : cc_base;
		for (i = 0; i < size && patch_comp_key[i] == comp_key[i]; i++)
			;
		((rec_hdr_ptr_t)rp_base)->cmpc = i;
		rsize = r_top - key_top + sizeof(rec_hdr) + patch_comp_count - i;
		PUT_SHORT(&((rec_hdr_ptr_t)rp_base)->rsiz, rsize);
		memcpy(rp_base + sizeof(rec_hdr), &patch_comp_key[i], patch_comp_count - i);
		memcpy(rp_base + sizeof(rec_hdr) + patch_comp_count - i, key_top, b_top - key_top);
		((blk_hdr_ptr_t)lbp)->bsiz = rp_base + rsize - lbp + b_top - r_top;
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, (uchar_ptr_t)lbp + sizeof(blk_hdr), ((blk_hdr_ptr_t)lbp)->bsiz - sizeof(blk_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			util_out_print("Error: bad blk build.", TRUE);
			free(lbp);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, ((blk_hdr_ptr_t)lbp)->levl, TRUE, FALSE);
		BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse);
		t_end(&dummy_hist, 0);
		free(lbp);
		return;
	}
}
Example #17
0
trans_num gvcst_bmp_mark_free(kill_set *ks)
{
	block_id	bit_map, next_bm, *updptr;
	blk_ident	*blk, *blk_top, *nextblk;
	trans_num	ctn, start_db_fmt_tn;
	unsigned int	len;
	int4		blk_prev_version;
	srch_hist	alt_hist;
	trans_num	ret_tn = 0;
	boolean_t	visit_blks;
	srch_blk_status	bmphist;
	cache_rec_ptr_t	cr;
	enum db_ver	ondsk_blkver;

	error_def(ERR_GVKILLFAIL);

	assert(inctn_bmp_mark_free_gtm == inctn_opcode || inctn_bmp_mark_free_mu_reorg == inctn_opcode);
	/* Note down the desired_db_format_tn before you start relying on cs_data->fully_upgraded.
	 * If the db is fully_upgraded, take the optimal path that does not need to read each block being freed.
	 * But in order to detect concurrent desired_db_format changes, note down the tn (when the last format change occurred)
	 * 	before the fully_upgraded check	and after having noted down the database current_tn.
	 * If they are the same, then we are guaranteed no concurrent desired_db_format change occurred.
	 * If they are not, then fall through to the non-optimal path where each to-be-killed block has to be visited.
	 * The reason we need to visit every block in case desired_db_format changes is to take care of the case where
	 *	MUPIP REORG DOWNGRADE concurrently changes a block that we are about to free.
	 */
	start_db_fmt_tn = cs_data->desired_db_format_tn;
	visit_blks = (!cs_data->fully_upgraded);	/* Local evaluation */
	assert(!visit_blks || (visit_blks && dba_bg == cs_addrs->hdr->acc_meth)); /* must have blks_to_upgrd == 0 for non-BG */
	assert(!dollar_tlevel); 			/* Should NOT be in TP now */
	blk = &ks->blk[0];
	blk_top = &ks->blk[ks->used];
	if (!visit_blks)
	{	/* Database has been completely upgraded. Free all blocks in one bitmap as part of one transaction. */
		assert(cs_data->db_got_to_v5_once); /* assert all V4 fmt blocks (including RECYCLED) have space for V5 upgrade */
		inctn_detail.blknum_struct.blknum = 0; /* to indicate no adjustment to "blks_to_upgrd" necessary */
		for ( ; blk < blk_top;  blk = nextblk)
		{
			if (0 != blk->flag)
			{
				nextblk = blk + 1;
				continue;
			}
			assert(0 < blk->block);
			assert((int4)blk->block < cs_addrs->ti->total_blks);
			bit_map = ROUND_DOWN2((int)blk->block, BLKS_PER_LMAP);
			next_bm = bit_map + BLKS_PER_LMAP;
			CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
			/* Scan for the next local bitmap */
			updptr = (block_id *)update_array_ptr;
			for (nextblk = blk;
				(0 == nextblk->flag) && (nextblk < blk_top) && ((block_id)nextblk->block < next_bm);
				++nextblk)
			{
				assert((block_id)nextblk->block - bit_map);
				*updptr++ = (block_id)nextblk->block - bit_map;
			}
			len = (unsigned int)((char *)nextblk - (char *)blk);
			update_array_ptr = (char *)updptr;
			alt_hist.h[0].blk_num = 0;			/* need for calls to T_END for bitmaps */
			/* the following assumes SIZEOF(blk_ident) == SIZEOF(int) */
			assert(SIZEOF(blk_ident) == SIZEOF(int));
			*(int *)update_array_ptr = 0;
			t_begin(ERR_GVKILLFAIL, UPDTRNS_DB_UPDATED_MASK);
			for (;;)
			{
				ctn = cs_addrs->ti->curr_tn;
				/* Need a read fence before reading fields from cs_data as we are reading outside
				 * of crit and relying on this value to detect desired db format state change.
				 */
				SHM_READ_MEMORY_BARRIER;
				if (start_db_fmt_tn != cs_data->desired_db_format_tn)
				{	/* Concurrent db format change has occurred. Need to visit every block to be killed
					 * to determine its block format. Fall through to the non-optimal path below
					 */
					ret_tn = 0;
					break;
				}
				bmphist.blk_num = bit_map;
				if (NULL == (bmphist.buffaddr = t_qread(bmphist.blk_num, (sm_int_ptr_t)&bmphist.cycle,
									&bmphist.cr)))
				{
					t_retry((enum cdb_sc)rdfail_detail);
					continue;
				}
				t_write_map(&bmphist, (uchar_ptr_t)update_array, ctn, -(int4)(nextblk - blk));
				if ((trans_num)0 == (ret_tn = t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)))
					continue;
				break;
			}
			if (0 == ret_tn) /* db format change occurred. Fall through to below for loop to visit each block */
				break;
		}
	}	/* for all blocks in the kill_set */
Example #18
0
boolean_t dse_r_dmp(void)
{
	block_id	blk;
	sm_uc_ptr_t	bp, b_top, rp;
	int4		count;
	int4		dummy_int;
	cache_rec_ptr_t	dummy_cr;
	short 		record, size;
	boolean_t	was_crit;
	int4		nocrit_present;

	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_CTRLC);

	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		uint4 tmp_blk;

		if(!cli_get_hex("BLOCK", &tmp_blk))
			return FALSE;
		blk = (block_id)tmp_blk;
		if (blk < 0 || blk >= cs_addrs->ti->total_blks || !(blk % cs_addrs->hdr->bplmap))
		{
			util_out_print("Error: invalid block number.", TRUE);
			return FALSE;
		}
		patch_curr_blk = blk;
	}
	if (cli_present("COUNT") == CLI_PRESENT)
	{
		if (!cli_get_hex("COUNT", (uint4 *)&count))
			return FALSE;
	} else
		count = 1;
	was_crit = cs_addrs->now_crit;
	nocrit_present = (CLI_NEGATED == cli_present("CRIT"));
	DSE_GRAB_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
	if (!(bp = t_qread(patch_curr_blk, &dummy_int, &dummy_cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
		b_top = bp + cs_addrs->hdr->blk_size;
	else if (((blk_hdr_ptr_t) bp)->bsiz < SIZEOF(blk_hdr))
		b_top = bp + SIZEOF(blk_hdr);
	else
		b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;
	if (((blk_hdr_ptr_t) bp)->levl && patch_is_fdmp)
	{
		DSE_REL_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
		util_out_print("Error:  cannot perform GLO/ZWR dump on index block.", TRUE);
		return FALSE;
	}
	if (cli_present("RECORD") == CLI_PRESENT)
	{
		if (!(rp = skan_rnum (bp, FALSE)))
		{
			DSE_REL_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
			return FALSE;
		}
	} else if (!(rp = skan_offset (bp, FALSE)))
	{
		DSE_REL_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
		return FALSE;
	}
	util_out_print(0, TRUE);
	for ( ; 0 < count; count--)
	{
		if (util_interrupt || !(rp = dump_record(rp, patch_curr_blk, bp, b_top)))
			break;
		patch_rec_counter += 1;
	}
	DSE_REL_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
	if (util_interrupt)
		rts_error(VARLSTCNT(1) ERR_CTRLC);
	else if (cli_present("HEADER") == CLI_NEGATED)
		util_out_print(0, TRUE);
	return TRUE;
}
Example #19
0
void dse_chng_bhead(void)
{
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	block_id		blk;
	boolean_t		chng_blk, ismap, was_hold_onto_crit;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	int4			x;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	srch_blk_status		blkhist;
	trans_num		tn;
	uint4			mapsize;

	csa = cs_addrs;
        if (gv_cur_region->read_only)
                rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	chng_blk = FALSE;
	if (BADDSEBLK == (blk = dse_getblk("BLOCK", DSEBMLOK, DSEBLKCUR)))		/* WARNING: assignment */
		return;
	csd = csa->hdr;
	assert(csd == cs_data);
	blk_size = csd->blk_size;
	ismap = IS_BITMAP_BLK(blk);
	mapsize = BM_SIZE(csd->bplmap);
	t_begin_crit(ERR_DSEFAIL);
	blkhist.blk_num = blk;
	if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr;
	if (CLI_PRESENT == cli_present("LEVEL"))
	{
		if (!cli_get_hex("LEVEL", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && (unsigned char)x != LCL_MAP_LEVL)
		{
			util_out_print("Error: invalid level for a bit map block.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1))
		{
			util_out_print("Error: invalid level.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		new_hdr.levl = (unsigned char)x;
		chng_blk = TRUE;
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
	}
	if (CLI_PRESENT == cli_present("BSIZ"))
	{
		if (!cli_get_hex("BSIZ", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && x != mapsize)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		} else if (x < SIZEOF(blk_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		chng_blk = TRUE;
		new_hdr.bsiz = x;
	}
	if (!chng_blk)
		t_abort(gv_cur_region, csa);
	else
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_AIMGBLKFAIL, 3, blk, DB_LEN_STR(gv_cur_region));
			t_abort(gv_cur_region, csa);
			return;
		}
		t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn);
		t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED);
	}
	if (CLI_PRESENT == cli_present("TN"))
	{
		if (!cli_get_hex64("TN", &tn))
			return;
		t_begin_crit(ERR_DSEFAIL);
		CHECK_TN(csa, csd, csd->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
		assert(csa->ti->early_tn == csa->ti->curr_tn);
		if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		BLK_FINI(bs_ptr, bs1);
		t_write(&blkhist, (unsigned char *)bs1, 0, 0,
			((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		/* Pass the desired tn as argument to bg_update/mm_update below */
		BUILD_AIMG_IF_JNL_ENABLED_AND_T_END_WITH_EFFECTIVE_TN(csa, csd, tn, &dummy_hist);
	}
	return;
}
Example #20
0
void dse_f_free(void)
{
	block_id	blk;
	bool		in_last_bmap;
	char		util_buff[MAX_UTIL_LEN];
	sm_uc_ptr_t	lmap_base;
	int4		bplmap, total_blks;
	int4		util_len, master_bit, lmap_bit, hint_over_bplmap, hint_mod_bplmap;
	boolean_t	was_crit;
	int4		dummy_int, nocrit_present;
	cache_rec_ptr_t	dummy_cr;
	error_def(ERR_DSEBLKRDFAIL);

	if (cs_addrs->hdr->bplmap == 0)
	{	util_out_print("Cannot perform free block search:  bplmap field of file header is zero.", TRUE);
		return;
	}
	bplmap = cs_addrs->hdr->bplmap;

	if(!cli_get_hex("HINT", (uint4 *)&blk))
		return;
	if (blk < 0 || blk >= cs_addrs->ti->total_blks || (blk / bplmap * bplmap == blk))
	{	util_out_print("Error: invalid block number.", TRUE);
		return;
	}
	hint_over_bplmap = blk / bplmap;
	master_bit = bmm_find_free(hint_over_bplmap, cs_addrs->bmm,
			(cs_addrs->ti->total_blks + bplmap - 1)/ bplmap);
	if (master_bit == -1)
	{	util_out_print("Error: database full.", TRUE);
		return;
	}
	in_last_bmap = (master_bit == (cs_addrs->ti->total_blks / bplmap));
	was_crit = cs_addrs->now_crit;
	nocrit_present = (CLI_NEGATED == cli_present("CRIT"));
	DSE_GRAB_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
	if(!(lmap_base = t_qread(master_bit * bplmap, &dummy_int, &dummy_cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	if (master_bit == hint_over_bplmap)
		hint_mod_bplmap = blk - blk / bplmap * bplmap;
	else
		hint_mod_bplmap = 0;
	if (in_last_bmap)
		total_blks = (cs_addrs->ti->total_blks - master_bit);
	else
		total_blks = bplmap;
	lmap_bit = bml_find_free(hint_mod_bplmap, lmap_base + SIZEOF(blk_hdr), total_blks);
	if (lmap_bit == -1)
	{	memcpy(util_buff, "Error: bit map in block ", 24);
		util_len = 24;
		util_len += i2hex_nofill(master_bit * bplmap, (uchar_ptr_t)&util_buff[util_len], 8);
		memcpy(&util_buff[util_len], " incorrectly marked free in master map.", 39);
		util_len += 39;
		util_buff[util_len] = 0;
		util_out_print(util_buff, TRUE);
		DSE_REL_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
		return;
	}
	memcpy(util_buff, "!/Next free block is ", 21);
	util_len = 21;
	util_len += i2hex_nofill(master_bit * bplmap + lmap_bit, (uchar_ptr_t)&util_buff[util_len], 8);
	memcpy(&util_buff[util_len], ".!/", 3);
	util_len += 3;
	util_buff[util_len] = 0;
	util_out_print(util_buff, TRUE);
	DSE_REL_CRIT_AS_APPROPRIATE(was_crit, nocrit_present, cs_addrs, gv_cur_region);
	return;
}
Example #21
0
/*
 * Performs a random traversal for the sampling methods
 */
enum cdb_sc rand_traverse(double *r)
{
	sm_uc_ptr_t			pVal, pTop, pRec, pBlkBase;
	register gv_namehead		*pTarg;
	register srch_blk_status	*pCurr;
	register srch_hist		*pTargHist;
	block_id			nBlkId;
	block_id			valBlk[MAX_RECS_PER_BLK];	/* valBlk[j] := value in j-th record of current block */
	unsigned char			nLevl;
	cache_rec_ptr_t			cr;
	int				cycle;
	trans_num			tn;
	sm_uc_ptr_t			buffaddr;
	unsigned short			nRecLen;
	uint4				tmp;
	boolean_t			is_mm;
	int4				random;
	int4				rCnt;			/* number of entries in valBlk */
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	is_mm = (dba_mm == cs_data->acc_meth);
	pTarg = gv_target;
	pTargHist = &gv_target->hist;
	/* The following largely mimics gvcst_search/gvcst_search_blk */
	nBlkId = pTarg->root;
	tn = cs_addrs->ti->curr_tn;
	if (NULL == (pBlkBase = t_qread(nBlkId, (sm_int_ptr_t)&cycle, &cr)))
		return (enum cdb_sc)rdfail_detail;
	nLevl = ((blk_hdr_ptr_t)pBlkBase)->levl;
	if (MAX_BT_DEPTH < (int)nLevl)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_maxlvl;
	}
	if (0 == (int)nLevl)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_badlvl;
	}
	pTargHist->depth = (int)nLevl;
	pCurr = &pTargHist->h[nLevl];
	(pCurr + 1)->blk_num = 0;
	pCurr->tn = tn;
	pCurr->cycle = cycle;
	pCurr->cr = cr;
	for (;;)
	{
		assert(pCurr->level == nLevl);
		pCurr->cse = NULL;
		pCurr->blk_num = nBlkId;
		pCurr->buffaddr = pBlkBase;
		for (	rCnt = 0, pRec = pBlkBase + SIZEOF(blk_hdr), pTop = pBlkBase + ((blk_hdr_ptr_t)pBlkBase)->bsiz;
				pRec != pTop && rCnt < MAX_RECS_PER_BLK;
				rCnt++, pRec += nRecLen		)
		{	/* enumerate records in block */
			GET_USHORT(nRecLen, &((rec_hdr_ptr_t)pRec)->rsiz);
			pVal = pRec + nRecLen - SIZEOF(block_id);
			if (nRecLen == 0)
			{
				assert(CDB_STAGNATE > t_tries);
				return cdb_sc_badoffset;
			}
			if (pRec + nRecLen > pTop)
			{
				assert(CDB_STAGNATE > t_tries);
				return cdb_sc_blklenerr;
			}
			GET_LONG(tmp, pVal);
			valBlk[rCnt] = tmp;
		}
		r[nLevl] = rCnt;
		/* randomly select next block */
		random = (int4)(rCnt * drand48());
		random = random & 0x7fffffff; /* to make sure that the sign bit(msb) is off */
		nBlkId = valBlk[random];
		if (is_mm && (nBlkId > cs_addrs->total_blks))
		{
			if (cs_addrs->total_blks < cs_addrs->ti->total_blks)
				return cdb_sc_helpedout;
			else
				return cdb_sc_blknumerr;
		}
		--pCurr; --nLevl;
		if (nLevl < 1)
			break;
		pCurr->tn = cs_addrs->ti->curr_tn;
		if (NULL == (pBlkBase = t_qread(nBlkId, (sm_int_ptr_t)&pCurr->cycle, &pCurr->cr)))
			return (enum cdb_sc)rdfail_detail;
		if (((blk_hdr_ptr_t)pBlkBase)->levl != nLevl)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_badlvl;
		}
	}
	return cdb_sc_normal;
}
Example #22
0
block_id bm_getfree(block_id orig_hint, boolean_t *blk_used, unsigned int cw_work, cw_set_element *cs, int *cw_depth_ptr)
{
	cw_set_element	*cs1;
	sm_uc_ptr_t	bmp;
	block_id	bml, hint, hint_cycled, hint_limit;
	block_id_ptr_t	b_ptr;
	int		cw_set_top, depth, lcnt;
	unsigned int	local_maps, map_size, n_decrements = 0, total_blks;
	trans_num	ctn;
	int4		free_bit, offset;
	uint4		space_needed;
	uint4		status;
	srch_blk_status	blkhist;

	total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks;
	if (orig_hint >= total_blks)		/* for TP, hint can be > total_blks */
		orig_hint = 1;
	hint = orig_hint;
	hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	hint_limit = DIVIDE_ROUND_DOWN(orig_hint, BLKS_PER_LMAP);
	local_maps = hint_cycled + 2;	/* for (up to) 2 wraps */
	for (lcnt = 0; lcnt <= local_maps; lcnt++)
	{
		bml = bmm_find_free(hint / BLKS_PER_LMAP, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps);
		if ((NO_FREE_SPACE == bml) || (bml >= hint_cycled))
		{	/* if no free space or might have looped to original map, extend */
			if ((NO_FREE_SPACE != bml) && (hint_limit < hint_cycled))
			{
				hint_cycled = hint_limit;
				hint = 1;
				continue;
			}
			if (SS_NORMAL != (status = gdsfilext(cs_data->extension_size, total_blks)))
				return (status);
			if (dba_mm == cs_data->acc_meth)
				return (FILE_EXTENDED);
			hint = total_blks;
			total_blks = cs_addrs->ti->total_blks;
			hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
			local_maps = hint_cycled + 2;	/* for (up to) 2 wraps */
			/*
			 * note that you can make an optimization of not going back over the whole database and going over
			 * only the extended section. but since it is very unlikely that a free block won't be found
			 * in the extended section and the fact that we are starting from the extended section in either
			 * approach and the fact that we have a GTMASSERT to check that we don't have a lot of
			 * free blocks while doing an extend and the fact that it is very easy to make the change to do
			 * a full-pass, the full-pass solution is currently being implemented
			 */
			lcnt = -1;	/* allow it one extra pass to ensure that it can take advantage of the entension */
			n_decrements++;	/* used only for debugging purposes */
			continue;
		}
		bml *= BLKS_PER_LMAP;
		if (ROUND_DOWN2(hint, BLKS_PER_LMAP) != bml)
		{	/* not within requested map */
			if ((bml < hint) && (hint_cycled))	/* wrap? - second one should force an extend for sure */
				hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0;
			hint = bml + 1;				/* start at beginning */
		}
		if (ROUND_DOWN2(total_blks, BLKS_PER_LMAP) == bml)
			map_size = (total_blks - bml);
		else
			map_size = BLKS_PER_LMAP;
		if (0 != dollar_tlevel)
		{
			depth = cw_work;
			cw_set_top = *cw_depth_ptr;
			if (depth < cw_set_top)
				tp_get_cw(cs, cw_work, &cs1);
			for (; depth < cw_set_top;  depth++, cs1 = cs1->next_cw_set)
			{	/* do tp front to back because list is more efficient than tp_get_cw and forward pointers exist */
				if (bml == cs1->blk)
				{
					TRAVERSE_TO_LATEST_CSE(cs1);
					break;
				}
			}
			if (depth >= cw_set_top)
			{
				assert(cw_set_top == depth);
				depth = 0;
			}
		} else
		{
			for (depth = *cw_depth_ptr - 1; depth >= cw_work;  depth--)
			{	/* do non-tp back to front, because of adjacency */
				if (bml == (cs + depth)->blk)
				{
					cs1 = cs + depth;
					break;
				}
			}
			if (depth < cw_work)
			{
				assert(cw_work - 1 == depth);
				depth = 0;
			}
		}
		if (0 == depth)
		{
			ctn = cs_addrs->ti->curr_tn;
			if (!(bmp = t_qread(bml, (sm_int_ptr_t)&blkhist.cycle, &blkhist.cr)))
				return MAP_RD_FAIL;
			if ((BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl))
			{
				assert(CDB_STAGNATE > t_tries);
				rdfail_detail = cdb_sc_badbitmap;
				return MAP_RD_FAIL;
			}
			offset = 0;
		} else
		{
			bmp = cs1->old_block;
			b_ptr = (block_id_ptr_t)(cs1->upd_addr);
			b_ptr += cs1->reference_cnt - 1;
			offset = *b_ptr + 1;
		}
		if (offset < map_size)
		{
			free_bit = bm_find_blk(offset, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), map_size, blk_used);
			if (MAP_RD_FAIL == free_bit)
				return MAP_RD_FAIL;
		} else
			free_bit = NO_FREE_SPACE;
		if (NO_FREE_SPACE != free_bit)
			break;
		if ((hint = bml + BLKS_PER_LMAP) >= total_blks)		/* if map is full, start at 1st blk in next map */
		{	/* wrap - second one should force an extend for sure */
			hint = 1;
			if (hint_cycled)
				hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0;
		}
		if ((0 == depth) && (FALSE != cs_addrs->now_crit))	/* if it's from the cw_set, its state is murky */
			bit_clear(bml / BLKS_PER_LMAP, MM_ADDR(cs_data));	/* if crit, repair master map error */
	}
	/* If not in the final retry, it is possible that free_bit is >= map_size (e.g. if bitmap block gets recycled). */
	if (map_size <= (uint4)free_bit && CDB_STAGNATE <= t_tries)
	{	/* bad free bit */
		assert((NO_FREE_SPACE == free_bit) && (lcnt > local_maps));	/* All maps full, should have extended */
		GTMASSERT;
	}
	if (0 != depth)
	{
		b_ptr = (block_id_ptr_t)(cs1->upd_addr);
		b_ptr += cs1->reference_cnt++;
		*b_ptr = free_bit;
	} else
	{
		space_needed = (BLKS_PER_LMAP + 1) * sizeof(block_id);
		if (dollar_tlevel)
		{
			ENSURE_UPDATE_ARRAY_SPACE(space_needed);	/* have brackets for "if" for macros */
		}
		BLK_ADDR(b_ptr, space_needed, block_id);
		memset(b_ptr, 0, space_needed);
		*b_ptr = free_bit;
		blkhist.blk_num = bml;
		blkhist.buffaddr = bmp;	/* cycle and cr have already been assigned from t_qread */
		t_write_map(&blkhist, (uchar_ptr_t)b_ptr, ctn, 1); /* last parameter 1 is what cs->reference_cnt gets set to */
	}
	return bml + free_bit;
}
Example #23
0
boolean_t mu_truncate(int4 truncate_percent)
{
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t 	csd;
	int			num_local_maps;
	int 			lmap_num, lmap_blk_num;
	int			bml_status, sigkill;
	int			save_errno;
	int			ftrunc_status;
	uint4			jnl_status;
	uint4			old_total, new_total;
	uint4			old_free, new_free;
	uint4			end_blocks;
	int4			blks_in_lmap, blk;
	gtm_uint64_t		before_trunc_file_size;
	off_t			trunc_file_size;
	off_t			padding;
	uchar_ptr_t		lmap_addr;
	boolean_t		was_crit;
	uint4			found_busy_blk;
	srch_blk_status		bmphist;
	srch_blk_status 	*blkhist;
	srch_hist		alt_hist;
	trans_num		curr_tn;
	blk_hdr_ptr_t		lmap_blk_hdr;
	block_id		*blkid_ptr;
	unix_db_info    	*udi;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	char			*err_msg;
	intrpt_state_t		prev_intrpt_state;
	off_t			offset;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	csa = cs_addrs;
	csd = cs_data;
	if (dba_mm == csd->acc_meth)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region));
		return TRUE;
	}
	if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region));
		return TRUE;
	}
	if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent);
		return TRUE;
	}
	/* already checked for parallel truncates on this region --- see mupip_reorg.c */
	gv_target = NULL;
	assert(csa->nl->trunc_pid == process_id);
	assert(dba_mm != csd->acc_meth);
	old_total = csa->ti->total_blks;
	old_free = csa->ti->free_blocks;
	sigkill = 0;
	found_busy_blk = 0;
	memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */
	assert(csd->bplmap == BLKS_PER_LMAP);
	end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */
	if (0 == end_blocks)
		end_blocks = BLKS_PER_LMAP;
	num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP);
	/* ======================================== PHASE 1 ======================================== */
	for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--)
	{
		if (mu_ctrly_occurred || mu_ctrlc_occurred)
			return TRUE;
		assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */
		if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region),
					truncate_percent);
			return TRUE;
		}
		lmap_blk_num = lmap_num * BLKS_PER_LMAP;
		if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num)
		{
			found_busy_blk = lmap_blk_num;
			break;
		}
		blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP;
		/* Loop through non-bitmap blocks of this lmap, do recycled2free */
		DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n",
			lmap_num, lmap_blk_num, blks_in_lmap));
		for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;)
		{
			t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK);
			for (;;) /* retry loop for recycled to free transactions */
			{
				curr_tn = csd->trans_hist.curr_tn;
				/* Read the nth local bitmap into memory */
				bmphist.blk_num = lmap_blk_num;
				bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr);
				lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr;
				if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz))
				{ /* Could not read the block successfully. Retry. */
					t_retry((enum cdb_sc)rdfail_detail);
					continue;
				}
				lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr);
				/* starting from the hint (blk itself), find the first busy or recycled block */
				blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status);
				assert(blk < BLKS_PER_LMAP);
				if (blk == -1 || blk >= blks_in_lmap)
				{ /* done with this lmap, continue to next */
					t_abort(gv_cur_region, csa);
					break;
				}
				else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num)
				{ /* stop processing blocks... skip ahead to phase 2 */
					found_busy_blk = lmap_blk_num;
					t_abort(gv_cur_region, csa);
					break;
				}
				else if (BLK_RECYCLED == bml_status)
				{ /* Write PBLK records for recycled blocks only if before_image journaling is
				   * enabled. t_end() takes care of checking if journaling is enabled and
				   * writing PBLK record. We have to at least mark the recycled block as free.
				   */
					RESET_UPDATE_ARRAY;
					update_trans = UPDTRNS_DB_UPDATED_MASK;
					*((block_id *)update_array_ptr) = blk;
					update_array_ptr += SIZEOF(block_id);
					*(int *)update_array_ptr = 0;
					alt_hist.h[1].blk_num = 0;
					alt_hist.h[0].level = 0;
					alt_hist.h[0].cse = NULL;
					alt_hist.h[0].tn = curr_tn;
					alt_hist.h[0].blk_num = lmap_blk_num + blk;
					alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num,
							&alt_hist.h[0].cycle, &alt_hist.h[0].cr);
					if (!alt_hist.h[0].buffaddr)
					{
						t_retry((enum cdb_sc)rdfail_detail);
						continue;
					}
					if (!t_recycled2free(&alt_hist.h[0]))
					{
						t_retry(cdb_sc_lostbmlcr);
						continue;
					}
					t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0);
					/* Set the opcode for INCTN record written by t_end() */
					inctn_opcode = inctn_blkmarkfree;
					if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
						continue;
					/* block processed, scan from the next one */
					blk++;
					break;
				} else
				{
					assert(t_tries < CDB_STAGNATE);
					t_retry(cdb_sc_badbitmap);
					continue;
				}
			} /* END recycled2free retry loop */
		} /* END scanning blocks of this particular lmap */
		/* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */
		/* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */
		DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num));
		t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK);
		for (;;)
		{
			RESET_UPDATE_ARRAY;
			BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id);
			*blkid_ptr = 0;
			update_trans = UPDTRNS_DB_UPDATED_MASK;
			inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */
			curr_tn = csd->trans_hist.curr_tn;
			blkhist = &alt_hist.h[0];
			blkhist->blk_num = lmap_blk_num;
			blkhist->tn = curr_tn;
			blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */
			/* Read the nth local bitmap into memory */
			blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr);
			lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr;
			if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz))
			{ /* Could not read the block successfully. Retry. */
				t_retry((enum cdb_sc)rdfail_detail);
				continue;
			}
			t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0);
			blkhist->blk_num = 0; /* create empty history for bitmap block */
			if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
				continue;
			break;
		}
	} /* END scanning lmaps */
	/* ======================================== PHASE 2 ======================================== */
	assert(!csa->now_crit);
	for (;;)
	{ /* wait for FREEZE, we don't want to truncate a frozen database */
		grab_crit(gv_cur_region);
		if (FROZEN_CHILLED(cs_data))
			DO_CHILLED_AUTORELEASE(csa, cs_data);
		if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN)
			break;
		rel_crit(gv_cur_region);
		while (FROZEN(cs_data) || IS_REPL_INST_FROZEN)
		{
			hiber_start(1000);
			if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data))
				break;
		}
	}
	assert(csa->nl->trunc_pid == process_id);
	/* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */
	if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB))
	{
		assert(FALSE);
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"),
				DB_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return FALSE;
	}
	csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk);
	assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk));
	new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP);
	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (csa->ti->total_blks != old_total || new_total == old_total)
	{
		assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent);
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (SNAPSHOTS_IN_PROG(csa->nl))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	}
	DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state);
	if (JNL_ENABLED(csa))
	{ /* Write JRT_TRUNC and INCTN records */
		if (!jgbl.dont_reset_gbl_jrec_time)
		SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = csa->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open(gv_cur_region, csa);
		if (SS_NORMAL != jnl_status)
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
		else
		{
			if (0 == jpc->pini_addr)
				jnl_put_jrt_pini(csa);
			jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total);
			inctn_opcode = inctn_mu_reorg;
			jnl_write_inctn_rec(csa);
			jnl_status = jnl_flush(gv_cur_region);
			if (SS_NORMAL != jnl_status)
			{
				send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd),
					ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"),
					jnl_status);
				assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */
			}
		}
	}
	/* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */
	curr_tn = csa->ti->curr_tn;
	CHECK_TN(csa, csd, curr_tn);
	udi = FILE_INFO(gv_cur_region);
	/* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */
	trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1));
	csd->after_trunc_total_blks = new_total;
	csd->before_trunc_free_blocks = csa->ti->free_blocks;
	csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */
	/* file size and total blocks: INCONSISTENT */
	csa->ti->total_blks = new_total;
	/* past the point of no return -- shared memory intact */
	assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total));
	csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total);
	new_free = csa->ti->free_blocks;
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */
	fileheader_sync(gv_cur_region);
	DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno);
	CHECK_DBSYNC(gv_cur_region, save_errno);
	/* past the point of no return -- shared memory deleted */
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */
	clear_cache_array(csa, csd, gv_cur_region, new_total, old_total);
	offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size;
	save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff)));
	if (0 != save_errno)
	{
		err_msg = (char *)STRERROR(errno);
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg));
		return FALSE;
	}
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */
	/* Execute an ftruncate() and truncate the DB file
	 * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS)
	 * It ignores kill -9 signal till its operation is completed.
	 * So we can safely assume that the result of ftruncate() will be complete.
	 */
	FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status);
	if (0 != ftrunc_status)
	{
		err_msg = (char *)STRERROR(errno);
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg));
		/* should go through recover_truncate now, which will again try to FTRUNCATE */
		return FALSE;
	}
	/* file size and total blocks: CONSISTENT (shrunk) */
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */
	csa->nl->root_search_cycle++;	/* Force concurrent processes to restart in t_end/tp_tend to make sure no one
					 * tries to commit updates past the end of the file. Bitmap validations together
					 * with highest_lbm_with_busy_blk should actually be sufficient, so this is
					 * just to be safe.
					 */
	csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */
	/* Increment TN */
	assert(csa->ti->early_tn == csa->ti->curr_tn);
	csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1;
	INCREMENT_CURR_TN(csd);
	fileheader_sync(gv_cur_region);
	DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno);
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */
	CHECK_DBSYNC(gv_cur_region, save_errno);
	ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state);
	curr_tn = csa->ti->curr_tn;
	rel_crit(gv_cur_region);
	send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn);
	util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].",
					FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free);
	return TRUE;
} /* END of mu_truncate() */
/*******************************************************************************************
Input Parameter:
	blk_base = Block's base which has the key
	rec_top = record top of the record which will be expanded
Output Parameter:
	expanded_key = expanded key
	rec_size = last record size whic has the key
	keylen = key size
	keycmpc = key compression cound
	hist_ptr = history of blocks read, while expanding a *-key
		History excludes the working block from which key is expanded and
		includes the blocks read below the current block to expand a *-key
	NOTE: hist_ptr.depth will be unchanged
Return:
	cdb_sc_normal on success
	failure code on concurrency failure
 *******************************************************************************************/
enum cdb_sc gvcst_expand_any_key (sm_uc_ptr_t blk_base, sm_uc_ptr_t rec_top, sm_uc_ptr_t expanded_key,
	int *rec_size, int *keylen, int *keycmpc, srch_hist *hist_ptr)
{
	enum cdb_sc	 	status;
	unsigned char		expanded_star_key[MAX_KEY_SZ];
	unsigned short		temp_ushort;
	int			cur_level;
	int			star_keycmpc;
	int			star_keylen;
	int			star_rec_size;
	int			tblk_size;
	block_id		tblk_num;
	sm_uc_ptr_t 		rPtr1, rPtr2, curptr;


	cur_level = ((blk_hdr_ptr_t)blk_base)->levl;
	curptr = blk_base + sizeof(blk_hdr);
	*rec_size = *keycmpc = *keylen = 0;
	while (curptr < rec_top)
	{
		GET_RSIZ(*rec_size, curptr);
		if (0 == cur_level || BSTAR_REC_SIZE != *rec_size)
		{
			READ_RECORD(cur_level, curptr, *keycmpc, *rec_size, expanded_key, *keylen, status);
			if (cdb_sc_normal != status)
			{
				assert(t_tries < CDB_STAGNATE);
				return status;
			}
			else
			{
				curptr += *rec_size;
				if (curptr >= rec_top)
					break;
			}
		}
		else /* a star record in index block */
		{
			if (curptr + *rec_size != rec_top || NULL == hist_ptr)
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_rmisalign;
			}
			while (0 != cur_level)
			{
				tblk_size = ((blk_hdr_ptr_t)blk_base)->bsiz;
				GET_LONG(tblk_num, blk_base + tblk_size - sizeof(block_id));
				if (0 == tblk_num  || cs_data->trans_hist.total_blks - 1 < tblk_num)
				{
					assert(t_tries < CDB_STAGNATE);
					return cdb_sc_badlvl;
				}
				cur_level--;
				hist_ptr->h[cur_level].tn =  cs_addrs->ti->curr_tn;
				if (!(blk_base = t_qread(tblk_num, (sm_int_ptr_t)(&(hist_ptr->h[cur_level].cycle)),
					&(hist_ptr->h[cur_level].cr) )))
				{
					assert(t_tries < CDB_STAGNATE);
					return rdfail_detail;
				}
				if (((blk_hdr_ptr_t)blk_base)->levl != cur_level)
				{
					assert(t_tries < CDB_STAGNATE);
					return cdb_sc_badlvl;
				}
				hist_ptr->h[cur_level].buffaddr = blk_base;
				hist_ptr->h[cur_level].blk_num = tblk_num;
				hist_ptr->h[cur_level].prev_rec.match = 0;
				hist_ptr->h[cur_level].prev_rec.offset = 0;
				hist_ptr->h[cur_level].curr_rec.match = 0;
				hist_ptr->h[cur_level].curr_rec.offset = 0;
			}
			tblk_size = ((blk_hdr_ptr_t)blk_base)->bsiz;
			/* expand *-key from right most leaf level block of the
			   sub-tree, of which, the original block is root  */
			if (cdb_sc_normal != (status = (gvcst_expand_any_key(blk_base, blk_base + tblk_size,
				expanded_star_key, &star_rec_size, &star_keylen, &star_keycmpc, hist_ptr))))
				return status;
			if (*keylen + *keycmpc) /* Previous key exists */
			{
				GET_CMPC(*keycmpc, expanded_key, &expanded_star_key[0]);
			}
			memcpy(expanded_key, expanded_star_key, star_keylen + star_keycmpc);
			*keylen = star_keylen + star_keycmpc - *keycmpc;
			*rec_size  = *keylen + *keycmpc + BSTAR_REC_SIZE;
			return cdb_sc_normal;
		} /* end else if *-record */
	}/* end of "while" loop */
	if (curptr == rec_top)
	{
		return cdb_sc_normal;
	}
	else
	{
		assert(t_tries < CDB_STAGNATE);
		return cdb_sc_rmisalign;
	}
}
Example #25
0
void dse_m_rest (
		 block_id	blk,		/* block number */
		 unsigned char	*bml_list,	/* start of local list of local bit maps */
		 int4		bml_size,	/* size of each entry in *bml_list */
		 sm_vuint_ptr_t	blks_ptr,	/* total free blocks */
		 bool		in_dir_tree)
{
	sm_uc_ptr_t	bp, b_top, rp, r_top, bml_ptr, np, ptr;
	unsigned char	util_buff[MAX_UTIL_LEN];
	block_id	next;
	int		util_len;
	int4		dummy_int;
	cache_rec_ptr_t	dummy_cr;
	int4		bml_index;
	short		level, rsize;
	int4		bplmap;
	error_def(ERR_DSEBLKRDFAIL);
	if(!(bp = t_qread (blk, &dummy_int, &dummy_cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	if (((blk_hdr_ptr_t) bp)->bsiz > cs_addrs->hdr->blk_size)
		b_top = bp + cs_addrs->hdr->blk_size;
	else if (((blk_hdr_ptr_t) bp)->bsiz < sizeof(blk_hdr))
		b_top = bp + sizeof(blk_hdr);
	else
		b_top = bp + ((blk_hdr_ptr_t) bp)->bsiz;

	level = ((blk_hdr_ptr_t)bp)->levl;
	bplmap = cs_addrs->hdr->bplmap;

	for (rp = bp + sizeof (blk_hdr); rp < b_top ;rp = r_top)
	{	if (in_dir_tree || level > 1)	/* reread block because it may have been flushed from read	*/
		{	if (!(np = t_qread(blk,&dummy_int,&dummy_cr))) /* cache due to LRU buffer scheme and reads in recursive */
				rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);	/* calls to dse_m_rest.	*/
			if (np != bp)
			{	b_top = np + (b_top - bp);
				rp = np + (rp - bp);
				r_top = np + (r_top - bp);
				bp = np;
			}
		}
		GET_SHORT(rsize,&((rec_hdr_ptr_t)rp)->rsiz);
		r_top = rp + rsize;
		if (r_top > b_top)
			r_top = b_top;
		if (r_top - rp < (sizeof (rec_hdr) + sizeof (block_id)))
			break;
		if (in_dir_tree && level == 0)
		{
			for (ptr = rp + sizeof(rec_hdr); ; )
			{
				if (*ptr++ == 0 && *ptr++ == 0)
					break;
			}
			GET_LONG(next,ptr);
		}
		else
			GET_LONG(next,r_top - sizeof (block_id));
		if (next < 0 || next >= cs_addrs->ti->total_blks ||
			(next / bplmap * bplmap == next))
		{	memcpy(util_buff,"Invalid pointer in block ",25);
			util_len = 25;
			util_len += i2hex_nofill(blk, &util_buff[util_len], 8);
			memcpy(&util_buff[util_len], " record offset ",15);
			util_len += 15;
			util_len += i2hex_nofill((int)(rp - bp), &util_buff[util_len], 4);
			util_buff[util_len] = 0;
			util_out_print((char*)util_buff,TRUE);
			continue;
		}
		bml_index = next / bplmap;
		bml_ptr = bml_list + bml_index * bml_size;
		if (bml_busy(next - next / bplmap * bplmap, bml_ptr + sizeof(blk_hdr)))
		{	*blks_ptr = *blks_ptr - 1;
			if (((blk_hdr_ptr_t) bp)->levl > 1)
			{	dse_m_rest (next, bml_list, bml_size, blks_ptr, in_dir_tree);
			}
			else if (in_dir_tree)
			{	assert(((blk_hdr_ptr_t) bp)->levl == 0 || ((blk_hdr_ptr_t) bp)->levl == 1);
				dse_m_rest (next, bml_list, bml_size, blks_ptr, ((blk_hdr_ptr_t)bp)->levl);
			}
		}
	}
	return;
}
Example #26
0
void dse_chng_rhead(void)
{
	block_id	blk;
	sm_uc_ptr_t	bp, b_top, cp, rp;
	boolean_t	chng_rec;
	rec_hdr		new_rec;
	uint4		x;
	blk_segment	*bs1, *bs_ptr;
	int4		blk_seg_cnt, blk_size;
	srch_blk_status	blkhist;

	error_def(ERR_DBRDONLY);
	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_DSEFAIL);

        if (gv_cur_region->read_only)
                rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		if(!cli_get_hex("BLOCK", (uint4 *)&blk))
			return;
		patch_curr_blk = blk;
	}
	if (patch_curr_blk < 0 || patch_curr_blk >= cs_addrs->ti->total_blks || !(patch_curr_blk % cs_addrs->hdr->bplmap))
	{
		util_out_print("Error: invalid block number.", TRUE);
		return;
	}

	t_begin_crit(ERR_DSEFAIL);
	blkhist.blk_num = patch_curr_blk;
	if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	bp = blkhist.buffaddr;
	blk_size = cs_addrs->hdr->blk_size;
	chng_rec = FALSE;
	b_top = bp + ((blk_hdr_ptr_t)bp)->bsiz;
	if (((blk_hdr_ptr_t)bp)->bsiz > blk_size || ((blk_hdr_ptr_t)bp)->bsiz < SIZEOF(blk_hdr))
		chng_rec = TRUE;	/* force rewrite to correct size */
	if (cli_present("RECORD") == CLI_PRESENT)
	{
		if (!(rp = skan_rnum(bp, FALSE)))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
	} else if (!(rp = skan_offset(bp, FALSE)))
	{
		t_abort(gv_cur_region, cs_addrs);
		return;
	}
	GET_SHORT(new_rec.rsiz, &((rec_hdr_ptr_t)rp)->rsiz);
	new_rec.cmpc = ((rec_hdr_ptr_t)rp)->cmpc;
	if (cli_present("CMPC") == CLI_PRESENT)
	{
		if (!cli_get_hex("CMPC", &x))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (x > 0x7f)
		{
			util_out_print("Error: invalid cmpc.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (x > patch_comp_count)
			util_out_print("Warning:  specified compression count is larger than the current expanded key size.", TRUE);
		new_rec.cmpc = x;
		chng_rec = TRUE;
	}
	if (cli_present("RSIZ") == CLI_PRESENT)
	{
		if (!cli_get_hex("RSIZ", &x))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (x < SIZEOF(rec_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid rsiz.", TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		new_rec.rsiz = x;
		chng_rec = TRUE;
	}
	if (chng_rec)
	{
		BLK_INIT(bs_ptr, bs1);
		cp = bp;
		cp += SIZEOF(blk_hdr);
		if (chng_rec)
		{
			BLK_SEG(bs_ptr, cp, rp - cp);
			BLK_SEG(bs_ptr, (uchar_ptr_t)&new_rec, SIZEOF(rec_hdr));
			cp = rp + SIZEOF(rec_hdr);
		}
		if (b_top - cp)
			BLK_SEG(bs_ptr, cp, b_top - cp);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			util_out_print("Error: bad blk build.", TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)bp)->levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		BUILD_AIMG_IF_JNL_ENABLED(cs_data, non_tp_jfb_buff_ptr, cs_addrs->ti->curr_tn);
		t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED);
	}
	return;
}
trans_num gvcst_bmp_mark_free(kill_set *ks)
{
	block_id		bit_map, next_bm, *updptr;
	blk_ident		*blk, *blk_top, *nextblk;
	trans_num		ctn, start_db_fmt_tn;
	unsigned int		len;
#	if defined(UNIX) && defined(DEBUG)
	unsigned int		lcl_t_tries;
#	endif
	int4			blk_prev_version;
	srch_hist		alt_hist;
	trans_num		ret_tn = 0;
	boolean_t		visit_blks;
	srch_blk_status		bmphist;
	cache_rec_ptr_t		cr;
	enum db_ver		ondsk_blkver;
	enum cdb_sc		status;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	TREF(in_gvcst_bmp_mark_free) = TRUE;
	assert(inctn_bmp_mark_free_gtm == inctn_opcode || inctn_bmp_mark_free_mu_reorg == inctn_opcode);
	/* Note down the desired_db_format_tn before you start relying on cs_data->fully_upgraded.
	 * If the db is fully_upgraded, take the optimal path that does not need to read each block being freed.
	 * But in order to detect concurrent desired_db_format changes, note down the tn (when the last format change occurred)
	 * 	before the fully_upgraded check	and after having noted down the database current_tn.
	 * If they are the same, then we are guaranteed no concurrent desired_db_format change occurred.
	 * If they are not, then fall through to the non-optimal path where each to-be-killed block has to be visited.
	 * The reason we need to visit every block in case desired_db_format changes is to take care of the case where
	 *	MUPIP REORG DOWNGRADE concurrently changes a block that we are about to free.
	 */
	start_db_fmt_tn = cs_data->desired_db_format_tn;
	visit_blks = (!cs_data->fully_upgraded);	/* Local evaluation */
	assert(!visit_blks || (visit_blks && dba_bg == cs_addrs->hdr->acc_meth)); /* must have blks_to_upgrd == 0 for non-BG */
	assert(!dollar_tlevel); 			/* Should NOT be in TP now */
	blk = &ks->blk[0];
	blk_top = &ks->blk[ks->used];
	if (!visit_blks)
	{	/* Database has been completely upgraded. Free all blocks in one bitmap as part of one transaction. */
		assert(cs_data->db_got_to_v5_once); /* assert all V4 fmt blocks (including RECYCLED) have space for V5 upgrade */
		inctn_detail.blknum_struct.blknum = 0; /* to indicate no adjustment to "blks_to_upgrd" necessary */
		/* If any of the mini transaction below restarts because of an online rollback, we don't want the application
		 * refresh to happen (like $ZONLNRLBK++ or rts_error(DBROLLEDBACK). This is because, although we are currently in
		 * non-tp (dollar_tleve = 0), we could actually be in a TP transaction and have actually faked dollar_tlevel. In
		 * such a case, we should NOT * be issuing a DBROLLEDBACK error as TP transactions are supposed to just restart in
		 * case of an online rollback. So, set the global variable that gtm_onln_rlbk_clnup can check and skip doing the
		 * application refresh, but will reset the clues. The next update will see the cycle mismatch and will accordingly
		 * take the right action.
		 */
		for ( ; blk < blk_top;  blk = nextblk)
		{
			if (0 != blk->flag)
			{
				nextblk = blk + 1;
				continue;
			}
			assert(0 < blk->block);
			assert((int4)blk->block < cs_addrs->ti->total_blks);
			bit_map = ROUND_DOWN2((int)blk->block, BLKS_PER_LMAP);
			next_bm = bit_map + BLKS_PER_LMAP;
			CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
			/* Scan for the next local bitmap */
			updptr = (block_id *)update_array_ptr;
			for (nextblk = blk;
				(0 == nextblk->flag) && (nextblk < blk_top) && ((block_id)nextblk->block < next_bm);
				++nextblk)
			{
				assert((block_id)nextblk->block - bit_map);
				*updptr++ = (block_id)nextblk->block - bit_map;
			}
			len = (unsigned int)((char *)nextblk - (char *)blk);
			update_array_ptr = (char *)updptr;
			alt_hist.h[0].blk_num = 0;			/* need for calls to T_END for bitmaps */
			alt_hist.h[0].blk_target = NULL;		/* need to initialize for calls to T_END */
			/* the following assumes SIZEOF(blk_ident) == SIZEOF(int) */
			assert(SIZEOF(blk_ident) == SIZEOF(int));
			*(int *)update_array_ptr = 0;
			t_begin(ERR_GVKILLFAIL, UPDTRNS_DB_UPDATED_MASK);
			for (;;)
			{
				ctn = cs_addrs->ti->curr_tn;
				/* Need a read fence before reading fields from cs_data as we are reading outside
				 * of crit and relying on this value to detect desired db format state change.
				 */
				SHM_READ_MEMORY_BARRIER;
				if (start_db_fmt_tn != cs_data->desired_db_format_tn)
				{	/* Concurrent db format change has occurred. Need to visit every block to be killed
					 * to determine its block format. Fall through to the non-optimal path below
					 */
					ret_tn = 0;
					break;
				}
				bmphist.blk_num = bit_map;
				if (NULL == (bmphist.buffaddr = t_qread(bmphist.blk_num, (sm_int_ptr_t)&bmphist.cycle,
									&bmphist.cr)))
				{
					t_retry((enum cdb_sc)rdfail_detail);
					continue;
				}
				t_write_map(&bmphist, (uchar_ptr_t)update_array, ctn, -(int4)(nextblk - blk));
				UNIX_ONLY(DEBUG_ONLY(lcl_t_tries = t_tries));
				if ((trans_num)0 == (ret_tn = t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)))
				{
#					ifdef UNIX
					assert((CDB_STAGNATE == t_tries) || (lcl_t_tries == t_tries - 1));
					status = LAST_RESTART_CODE;
					if ((cdb_sc_onln_rlbk1 == status) || (cdb_sc_onln_rlbk2 == status)
						|| TREF(rlbk_during_redo_root))
					{	/* t_end restarted due to online rollback. Discard bitmap free-up and return control
						 * to the application. But, before that reset only_reset_clues_if_onln_rlbk to FALSE
						 */
						TREF(in_gvcst_bmp_mark_free) = FALSE;
						send_msg(VARLSTCNT(6) ERR_IGNBMPMRKFREE, 4, REG_LEN_STR(gv_cur_region),
								DB_LEN_STR(gv_cur_region));
						t_abort(gv_cur_region, cs_addrs);
						return ret_tn; /* actually 0 */
					}
#					endif
					continue;
				}
				break;
			}
			if (0 == ret_tn) /* db format change occurred. Fall through to below for loop to visit each block */
			{
				/* Abort any active transaction to get rid of lingering Non-TP artifacts */
				t_abort(gv_cur_region, cs_addrs);
				break;
			}
		}
	}	/* for all blocks in the kill_set */
Example #28
0
void dse_save(void)
{
	block_id	blk;
	unsigned	i, j, util_len;
	unsigned short	buff_len;
	bool		was_block, was_crit;
	char		buff[100], *ptr, util_buff[MAX_UTIL_LEN];
	sm_uc_ptr_t	bp;
	int4		dummy_int, nocrit_present;
	cache_rec_ptr_t dummy_cr;

	error_def(ERR_DSEBLKRDFAIL);

	memset(util_buff, 0, MAX_UTIL_LEN);

	if (was_block = (cli_present("BLOCK") == CLI_PRESENT))
	{
		if (!cli_get_hex("BLOCK", &blk))
			return;
		if (blk < 0 || blk >= cs_addrs->ti->total_blks)
		{
			util_out_print("Error: invalid block number.", TRUE);
			return;
		}
		patch_curr_blk = blk;
	} else
		blk = patch_curr_blk;
	if (cli_present("LIST") == CLI_PRESENT)
	{
		if (was_block)
		{
			util_len = sizeof("!/Saved versions of block ");
			memcpy(util_buff, "!/Saved versions of block ", util_len);
			util_len += i2hex_nofill(blk, (uchar_ptr_t)&util_buff[util_len-1], 8);
			util_buff[util_len-1] = 0;
			util_out_print(util_buff, TRUE);
			for (i = j = 0;  i < patch_save_count;  i++)
				if (patch_save_set[i].blk == blk)
				{
					j++;

					if (*patch_save_set[i].comment)
						util_out_print("Version !UL  Region !AD  Comment: !AD!/", TRUE,
							patch_save_set[i].ver, REG_LEN_STR(patch_save_set[i].region),
							LEN_AND_STR(patch_save_set[i].comment));

					else
						util_out_print("Version !UL  Region !AD!/", TRUE, patch_save_set[i].ver,
							REG_LEN_STR(patch_save_set[i].region));
				}
			if (!j)
				util_out_print("None.!/", TRUE);
			return;
		}
		util_out_print("!/Save history:!/", TRUE);
		for (i = j = 0;  i < patch_save_count;  i++)
		{
			util_len = sizeof("Block ");
			memcpy(util_buff, "Block ", util_len);
			util_len += i2hex_nofill(patch_save_set[i].blk, (uchar_ptr_t)&util_buff[util_len-1], 8);
			util_buff[util_len-1] = 0;
			util_out_print(util_buff, TRUE);
			j++;
			if (*patch_save_set[i].comment)
			{
				util_out_print("Version !UL  Region !AD  Comment: !AD!/", TRUE,
					patch_save_set[i].ver, REG_LEN_STR(patch_save_set[i].region),
					LEN_AND_STR(patch_save_set[i].comment));

			} else
			{
				util_out_print("Version !UL  Region !AD!/", TRUE, patch_save_set[i].ver,
					REG_LEN_STR(patch_save_set[i].region));
			}
		}
		if (!j)
			util_out_print("  None.!/", TRUE);
		return;
	}
	j = 1;
	for (i = 0;  i < patch_save_count;  i++)
		if (patch_save_set[i].blk == blk && patch_save_set[i].region == gv_cur_region
			&& patch_save_set[i].ver >= j)
			j = patch_save_set[i].ver + 1;
	util_len = sizeof("!/Saving version !UL of block ");
	memcpy(util_buff, "!/Saving version !UL of block ", util_len);
	util_len += i2hex_nofill(blk, (uchar_ptr_t)&util_buff[util_len-1], 8);
	util_buff[util_len-1] = 0;
	util_out_print(util_buff, TRUE, j);
	patch_save_set[patch_save_count].ver = j;
	patch_save_set[patch_save_count].blk = blk;
	patch_save_set[patch_save_count].region = gv_cur_region;
	patch_save_set[patch_save_count].bp = (char *)malloc(cs_addrs->hdr->blk_size);
	if (blk >= cs_addrs->ti->total_blks)
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	was_crit = cs_addrs->now_crit;
	nocrit_present = (CLI_NEGATED == cli_present("CRIT"));

	if (!was_crit)
	{
		if (nocrit_present)
			cs_addrs->now_crit = TRUE;
		else
			grab_crit(gv_cur_region);
	}

	if (!(bp = t_qread(blk, &dummy_int, &dummy_cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	memcpy(patch_save_set[patch_save_count].bp, bp, cs_addrs->hdr->blk_size);
	if (!was_crit)
	{
		if (nocrit_present)
			cs_addrs->now_crit = FALSE;
		else
			rel_crit(gv_cur_region);
	}
	buff_len = sizeof(buff);
	if ((cli_present("COMMENT") == CLI_PRESENT) && cli_get_str("COMMENT", buff, &buff_len))
	{
		ptr = &buff[buff_len];
		*ptr = 0;
		j = ptr - &buff[0] + 1;
		patch_save_set[patch_save_count].comment = (char *)malloc(j);
		memcpy(patch_save_set[patch_save_count].comment, &buff[0], j);
	} else
		patch_save_set[patch_save_count].comment = "";
	patch_save_count++;
	return;
}