Esempio n. 1
0
void tp_get_cw (cw_set_element *cs, int depth, cw_set_element **cs1)
{
	cw_set_element *cs_tmp;			/* to avoid double dereferencing in the TRAVERSE macro */
	assert (depth < sgm_info_ptr->cw_set_depth);
	cs_tmp = (cw_set_element *)find_element(sgm_info_ptr->cw_set_list, depth);

	/* Above returns the first cse (least t_level) in the horizontal list.
	 * Traverse the horizontal list to go to the latest -
	 * since the usual transaction depth is not much (on an average 2), this does
	 * not hamper performance so much to necessiate maintaining links to the head
	 * and tail of horizontal list of cw_set_elements
	 */

	assert(cs_tmp);
	TRAVERSE_TO_LATEST_CSE(cs_tmp);
	*cs1 = cs_tmp;
}
Esempio n. 2
0
void tp_incr_clean_up(short newlevel)
{
	uint4			num_free;
	boolean_t		freed;
	sgm_info 		*si;
	cw_set_element 		*cse, *next_cse, *tmp_cse;
	cw_set_element		*cse_newlvl;	/* pointer to that cse in a given horizontal list closest to "newlevel" */
	srch_blk_status		*tp_srch_status;
	int			min_t_level;	/* t_level of the head of the horizontal-list of a given cw-set-element */
	gd_region		*tmp_gv_cur_region;
	ht_ent_int4		*tabent;

	assert(newlevel > 0);
	if (JNL_FENCE_LIST_END != jnl_fence_ctl.fence_list)	/* currently global_tlvl_info struct holds only jnl related info */
		rollbk_gbl_tlvl_info(newlevel);
	tmp_gv_cur_region = gv_cur_region;	/* save region and associated pointers to restore them later */
	for (si = first_sgm_info;  si != NULL;  si = si->next_sgm_info)
	{
		num_free = 0;
		sgm_info_ptr = si;	/* maintain sgm_info_ptr & gv_cur_region binding whenever doing TP_CHANGE_REG */
		TP_CHANGE_REG_IF_NEEDED(si->gv_cur_region);
		rollbk_sgm_tlvl_info(newlevel, si);			/* rollback all the tlvl specific info */
		cse = si->first_cw_set;
		DEBUG_ONLY(min_t_level = 1);
		/* A property that will help a lot in understanding this algorithm is the following.
		 * All cse's in a given horizontal list will have their "next_cw_set" pointing to the same cse
		 * 	which is guaranteed to be the head of the horizontal list of the next cw-set-element in the vertical list.
		 */
		while (NULL != cse)
		{
			assert(NULL == cse->low_tlevel);
			next_cse = cse->next_cw_set;
			/* Note down tp_srch_status corresponding to cse (in case it exists). Need to later reset "->cse" field
			 * of this structure to point to the new cse for this block. Note that if cse->mode is gds_t_create,
			 * there will be no tp_srch_status entry allotted for cse->blk (one will be there only for the chain.flag
			 * representation of this to-be-created block). Same case with mode of kill_t_create as it also corresponds
			 * to a non-existent block#. Therefore dont try looking up the hashtable for this block in those cases.
			 */
			tp_srch_status = NULL;
			assert((gds_t_create == cse->mode) || (kill_t_create == cse->mode)
				|| (gds_t_write == cse->mode) || (kill_t_write == cse->mode));
			if ((gds_t_create != cse->mode) && (kill_t_create != cse->mode)
					&& (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use, (uint4 *)&cse->blk))))
				tp_srch_status = tabent->value;
			DEBUG_ONLY(
				tmp_cse = cse;
				TRAVERSE_TO_LATEST_CSE(tmp_cse);
				assert((NULL == tp_srch_status) || (tp_srch_status->cse == tmp_cse));
			)
			if (newlevel < cse->t_level)
			{	/* delete the entire horizontal list for this cw-set-element.
				 * And because of the following assert, we will be deleting the entire horizontal list for
				 * 	all cw-set-elements following the current one in the vertical list.
				 */
				assert(min_t_level <= cse->t_level);
				DEBUG_ONLY(min_t_level = cse->t_level;)
				if (!num_free)
				{	/* first time an entire cw-set-element's horizontal-list needs to be removed.
					 * reset si->first_cw_set or si->last_cw_set pointers as appropriate.
					 * the actual free up of the cw-set-elements will occur later in this loop
					 */
					tmp_cse = cse->prev_cw_set;
					assert(((NULL == tmp_cse) && (cse == si->first_cw_set))
							|| ((NULL != tmp_cse) && (cse != si->first_cw_set)));
					if (cse == si->first_cw_set)
						si->first_cw_set = NULL;
					si->last_cw_set = tmp_cse;
					while (NULL != tmp_cse)
					{	/* reset forward-link of horizontal-list of the previous cw_set_element */
						assert(tmp_cse->next_cw_set == cse);
						tmp_cse->next_cw_set = NULL;
						tmp_cse = tmp_cse->high_tlevel;
					}
				}
				num_free++;	/* count of number of elements whose vertical list has been completely removed */
				cse_newlvl = NULL;
			} else
Esempio n. 3
0
block_id bm_getfree(block_id orig_hint, boolean_t *blk_used, unsigned int cw_work, cw_set_element *cs, int *cw_depth_ptr)
{
	cw_set_element	*cs1;
	sm_uc_ptr_t	bmp;
	block_id	bml, hint, hint_cycled, hint_limit;
	block_id_ptr_t	b_ptr;
	int		cw_set_top, depth, lcnt;
	unsigned int	local_maps, map_size, n_decrements = 0, total_blks;
	trans_num	ctn;
	int4		free_bit, offset;
	uint4		space_needed;
	uint4		status;
	srch_blk_status	blkhist;

	total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks;
	if (orig_hint >= total_blks)		/* for TP, hint can be > total_blks */
		orig_hint = 1;
	hint = orig_hint;
	hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	hint_limit = DIVIDE_ROUND_DOWN(orig_hint, BLKS_PER_LMAP);
	local_maps = hint_cycled + 2;	/* for (up to) 2 wraps */
	for (lcnt = 0; lcnt <= local_maps; lcnt++)
	{
		bml = bmm_find_free(hint / BLKS_PER_LMAP, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps);
		if ((NO_FREE_SPACE == bml) || (bml >= hint_cycled))
		{	/* if no free space or might have looped to original map, extend */
			if ((NO_FREE_SPACE != bml) && (hint_limit < hint_cycled))
			{
				hint_cycled = hint_limit;
				hint = 1;
				continue;
			}
			if (SS_NORMAL != (status = gdsfilext(cs_data->extension_size, total_blks)))
				return (status);
			if (dba_mm == cs_data->acc_meth)
				return (FILE_EXTENDED);
			hint = total_blks;
			total_blks = cs_addrs->ti->total_blks;
			hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
			local_maps = hint_cycled + 2;	/* for (up to) 2 wraps */
			/*
			 * note that you can make an optimization of not going back over the whole database and going over
			 * only the extended section. but since it is very unlikely that a free block won't be found
			 * in the extended section and the fact that we are starting from the extended section in either
			 * approach and the fact that we have a GTMASSERT to check that we don't have a lot of
			 * free blocks while doing an extend and the fact that it is very easy to make the change to do
			 * a full-pass, the full-pass solution is currently being implemented
			 */
			lcnt = -1;	/* allow it one extra pass to ensure that it can take advantage of the entension */
			n_decrements++;	/* used only for debugging purposes */
			continue;
		}
		bml *= BLKS_PER_LMAP;
		if (ROUND_DOWN2(hint, BLKS_PER_LMAP) != bml)
		{	/* not within requested map */
			if ((bml < hint) && (hint_cycled))	/* wrap? - second one should force an extend for sure */
				hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0;
			hint = bml + 1;				/* start at beginning */
		}
		if (ROUND_DOWN2(total_blks, BLKS_PER_LMAP) == bml)
			map_size = (total_blks - bml);
		else
			map_size = BLKS_PER_LMAP;
		if (0 != dollar_tlevel)
		{
			depth = cw_work;
			cw_set_top = *cw_depth_ptr;
			if (depth < cw_set_top)
				tp_get_cw(cs, cw_work, &cs1);
			for (; depth < cw_set_top;  depth++, cs1 = cs1->next_cw_set)
			{	/* do tp front to back because list is more efficient than tp_get_cw and forward pointers exist */
				if (bml == cs1->blk)
				{
					TRAVERSE_TO_LATEST_CSE(cs1);
					break;
				}
			}
			if (depth >= cw_set_top)
			{
				assert(cw_set_top == depth);
				depth = 0;
			}
		} else
		{
			for (depth = *cw_depth_ptr - 1; depth >= cw_work;  depth--)
			{	/* do non-tp back to front, because of adjacency */
				if (bml == (cs + depth)->blk)
				{
					cs1 = cs + depth;
					break;
				}
			}
			if (depth < cw_work)
			{
				assert(cw_work - 1 == depth);
				depth = 0;
			}
		}
		if (0 == depth)
		{
			ctn = cs_addrs->ti->curr_tn;
			if (!(bmp = t_qread(bml, (sm_int_ptr_t)&blkhist.cycle, &blkhist.cr)))
				return MAP_RD_FAIL;
			if ((BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl))
			{
				assert(CDB_STAGNATE > t_tries);
				rdfail_detail = cdb_sc_badbitmap;
				return MAP_RD_FAIL;
			}
			offset = 0;
		} else
		{
			bmp = cs1->old_block;
			b_ptr = (block_id_ptr_t)(cs1->upd_addr);
			b_ptr += cs1->reference_cnt - 1;
			offset = *b_ptr + 1;
		}
		if (offset < map_size)
		{
			free_bit = bm_find_blk(offset, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), map_size, blk_used);
			if (MAP_RD_FAIL == free_bit)
				return MAP_RD_FAIL;
		} else
			free_bit = NO_FREE_SPACE;
		if (NO_FREE_SPACE != free_bit)
			break;
		if ((hint = bml + BLKS_PER_LMAP) >= total_blks)		/* if map is full, start at 1st blk in next map */
		{	/* wrap - second one should force an extend for sure */
			hint = 1;
			if (hint_cycled)
				hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0;
		}
		if ((0 == depth) && (FALSE != cs_addrs->now_crit))	/* if it's from the cw_set, its state is murky */
			bit_clear(bml / BLKS_PER_LMAP, MM_ADDR(cs_data));	/* if crit, repair master map error */
	}
	/* If not in the final retry, it is possible that free_bit is >= map_size (e.g. if bitmap block gets recycled). */
	if (map_size <= (uint4)free_bit && CDB_STAGNATE <= t_tries)
	{	/* bad free bit */
		assert((NO_FREE_SPACE == free_bit) && (lcnt > local_maps));	/* All maps full, should have extended */
		GTMASSERT;
	}
	if (0 != depth)
	{
		b_ptr = (block_id_ptr_t)(cs1->upd_addr);
		b_ptr += cs1->reference_cnt++;
		*b_ptr = free_bit;
	} else
	{
		space_needed = (BLKS_PER_LMAP + 1) * sizeof(block_id);
		if (dollar_tlevel)
		{
			ENSURE_UPDATE_ARRAY_SPACE(space_needed);	/* have brackets for "if" for macros */
		}
		BLK_ADDR(b_ptr, space_needed, block_id);
		memset(b_ptr, 0, space_needed);
		*b_ptr = free_bit;
		blkhist.blk_num = bml;
		blkhist.buffaddr = bmp;	/* cycle and cr have already been assigned from t_qread */
		t_write_map(&blkhist, (uchar_ptr_t)b_ptr, ctn, 1); /* last parameter 1 is what cs->reference_cnt gets set to */
	}
	return bml + free_bit;
}
Esempio n. 4
0
void tp_incr_commit(void)
{
	sgm_info 		*si;
	cw_set_element 		*cse, *orig_cse, *prev_cse, *next_cse, *low_cse, *lower_cse;
	tlevel_info		*tli, *prev_tli = NULL, *last_prev_tli = NULL;
	global_tlvl_info 	*gtli, *prev_gtli;
	srch_blk_status		*tp_srch_status;
	ht_ent_int4		*tabent;

	for (si = first_sgm_info;  si != NULL;  si = si->next_sgm_info)
	{
		for (cse = si->first_cw_set; cse; cse = orig_cse->next_cw_set)
		{
			orig_cse = cse;
			TRAVERSE_TO_LATEST_CSE(cse);
			assert(dollar_tlevel >= cse->t_level);
			if (dollar_tlevel == cse->t_level)
			{
				cse->t_level--;
				low_cse = cse->low_tlevel;
				if (low_cse && low_cse->t_level == cse->t_level)	/* delete the duplicate link */
				{
					lower_cse = low_cse->low_tlevel;
					assert((low_cse->done && low_cse->new_buff) || (n_gds_t_op < cse->mode));
					if (lower_cse)
					{
						assert(lower_cse->t_level < cse->t_level);
						lower_cse->high_tlevel = cse;
						cse->low_tlevel = lower_cse;
						if (!cse->new_buff)
						{	/* if we never needed to build in the new level, copy the built copy
							 * (if any) of the older level before going back to that level
							 */
							assert(!cse->done);
							cse->new_buff = low_cse->new_buff;
						} else if (low_cse->new_buff)
							free_element(si->new_buff_list, (char *)low_cse->new_buff);
						free_element(si->tlvl_cw_set_list, (char *)low_cse);
						orig_cse = cse;
					} else
					{	/* In this case, there are only two elements in the horizontal list out of
						 * which we are going to delete one. We prefer to copy the second link into
						 * the first and delete the second (rather than simply deleting the first), since
						 * the first element may be an intermediate element in the vertical list and
						 * buddy list wont permit use of both free_element() and free_last_n_elements()
						 * with a given list together. This might disturb the tp_srch_status->cse, so
						 * reset it properly. Note that if cse->mode is gds_t_create, there will be no
						 * tp_srch_status entry allotted for cse->blk (one will be there only for the
						 * chain.flag representation of this to-be-created block). Same case with mode of
						 * kill_t_create as it also corresponds to a non-existent block#. Therefore dont
						 * try looking up the hashtable for this block in those cases.
						 */
						assert((gds_t_create == cse->mode) || (kill_t_create == cse->mode)
							|| (gds_t_write == cse->mode) || (kill_t_write == cse->mode));
						if ((gds_t_create != cse->mode) && (kill_t_create != cse->mode))
						{
							if (NULL != (tabent = lookup_hashtab_int4(si->blks_in_use,
													(uint4 *)&cse->blk)))
								tp_srch_status = tabent->value;
							else
								tp_srch_status = NULL;
							assert(!tp_srch_status || tp_srch_status->cse == cse);
							if (tp_srch_status)
								tp_srch_status->cse = low_cse;
						}
						assert(low_cse == orig_cse);
						/* Members that may not be uptodate in cse need to be copied back from low_cse.
						 * They are next_cw_set, prev_cw_set, new_buff and done.
						 */
						prev_cse = low_cse->prev_cw_set;
						next_cse = low_cse->next_cw_set;
						if (!cse->new_buff)
						{	/* if we never needed to build in the new level, copy the
							 * built copy of the older level before going back to that level
							 */
							assert(!cse->done);
							cse->new_buff = low_cse->new_buff;
						} else if (low_cse->new_buff)
							free_element(si->new_buff_list, (char *)low_cse->new_buff);
						memcpy(low_cse, cse, SIZEOF(cw_set_element));
						low_cse->next_cw_set = next_cse;
						low_cse->prev_cw_set = prev_cse;
						low_cse->high_tlevel = NULL;
						low_cse->low_tlevel = NULL;
						free_element(si->tlvl_cw_set_list, (char *)cse);
						orig_cse = low_cse;
					}
				} else
					assert(low_cse || orig_cse == cse);
			}
		}/* for (cse) */

		/* delete the tlvl_info for this t_level */
		for (tli = si->tlvl_info_head; tli; tli = tli->next_tlevel_info)
		{
			if (tli->t_level >= dollar_tlevel)
				break;
			prev_tli = tli;
		}
		assert(!tli || !tli->next_tlevel_info);
		if (prev_tli)
			prev_tli->next_tlevel_info = NULL;
		else
			si->tlvl_info_head = NULL;
		if (tli)
			free_last_n_elements(si->tlvl_info_list, 1);

	}/* for (si) */
	/* delete the global (across all segments) tlvl info for this t_level */
	for (prev_gtli = NULL, gtli = global_tlvl_info_head; gtli; gtli = gtli->next_global_tlvl_info)
	{
		if (dollar_tlevel <= gtli->t_level)
			break;
		prev_gtli = gtli;
	}
	assert(!global_tlvl_info_head || gtli);
	assert(!gtli || !gtli->next_global_tlvl_info);
	assert(!prev_gtli || (gtli && (dollar_tlevel == gtli->t_level)));
	FREE_GBL_TLVL_INFO(gtli);
	if (prev_gtli)
		prev_gtli->next_global_tlvl_info = NULL;
	else
		global_tlvl_info_head = NULL;
}
Esempio n. 5
0
void tp_incr_commit(void)
{
	uint4		duint4;
	sgm_info 	*si;
	cw_set_element 	*cse, *orig_cse, *prev_cse, *next_cse, *low_cse, *lower_cse;
	tlevel_info	*tli, *prev_tli = NULL, *last_prev_tli = NULL;
	global_tlvl_info
			*gtli, *prev_gtli;
	srch_blk_status	*tp_srch_status;

	for (si = first_sgm_info;  si != NULL;  si = si->next_sgm_info)
	{
		for (cse = si->first_cw_set; cse; cse = orig_cse->next_cw_set)
		{
			orig_cse = cse;
			TRAVERSE_TO_LATEST_CSE(cse);
			assert(dollar_tlevel >= cse->t_level);
			if (dollar_tlevel == cse->t_level)
			{
				cse->t_level--;
				low_cse = cse->low_tlevel;
				if (low_cse && low_cse->t_level == cse->t_level)	/* delete the duplicate link */
				{
					lower_cse = low_cse->low_tlevel;
					if (lower_cse)
					{
						assert(lower_cse->t_level < cse->t_level);
						lower_cse->high_tlevel = cse;
						cse->low_tlevel = lower_cse;
						assert(low_cse->new_buff);
						if (!cse->new_buff)
						{	/* if we never needed to build in the new level, copy the
							 * built copy of the older level before going back to that level
							 */
							assert(!cse->done && low_cse->done);
							cse->new_buff = low_cse->new_buff;
						} else
							free_element(si->new_buff_list,
									(char *)low_cse->new_buff - sizeof(que_ent));
						free_element(si->tlvl_cw_set_list, (char *)low_cse);
						orig_cse = cse;
					} else
					{
						/* In this case, there are only two elements in the horizontal list out of
						 * which we are going to delete one. We prefer to copy the second link into
						 * the first and delete the second (rather than simply deleting the first), since
						 * the first element may be an intermediate element in the vertical list and
						 * buddy list wont permit use of both free_element() and free_last_n_elements()
						 * with a given list together.
						 * This might disturb the tp_srch_status->ptr, so reset it properly.
						 */
						tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(si->blks_in_use,
												(void *)cse->blk, &duint4);
						assert(!tp_srch_status || tp_srch_status->ptr == cse);
						if (tp_srch_status)
							tp_srch_status->ptr = low_cse;
						assert(low_cse == orig_cse);
						/* Members that may not be uptodate in cse need to be copied back from low_cse.
						 * They are next_cw_set, prev_cw_set, new_buff and done.
						 */
						prev_cse = low_cse->prev_cw_set;
						next_cse = low_cse->next_cw_set;
						assert(low_cse->new_buff);
						if (!cse->new_buff)
						{	/* if we never needed to build in the new level, copy the
							 * built copy of the older level before going back to that level
							 */
							assert(!cse->done && low_cse->done);
							cse->new_buff = low_cse->new_buff;
						} else
							free_element(si->new_buff_list,
									(char *)low_cse->new_buff - sizeof(que_ent));
						memcpy(low_cse, cse, sizeof(cw_set_element));
						low_cse->next_cw_set = next_cse;
						low_cse->prev_cw_set = prev_cse;
						low_cse->high_tlevel = NULL;
						low_cse->low_tlevel = NULL;
						free_element(si->tlvl_cw_set_list, (char *)cse);
						orig_cse = low_cse;
					}
				} else
					assert(low_cse || orig_cse == cse);
			}
		}/* for (cse) */

		/* delete the tlvl_info for this t_level */
		for (tli = si->tlvl_info_head; tli; tli = tli->next_tlevel_info)
		{
			if (tli->t_level >= dollar_tlevel)
				break;
			prev_tli = tli;
		}
		assert(!tli || !tli->next_tlevel_info);
		if (prev_tli)
			prev_tli->next_tlevel_info = NULL;
		else
			si->tlvl_info_head = NULL;
		if (tli)
			free_last_n_elements(si->tlvl_info_list, 1);

	}/* for (si) */
	/* delete the global (across all segments) tlvl info for this t_level */
	for (prev_gtli = NULL, gtli = global_tlvl_info_head; gtli; gtli = gtli->next_global_tlvl_info)
	{
		if (dollar_tlevel <= gtli->t_level)
			break;
		prev_gtli = gtli;
	}
	assert(!global_tlvl_info_head || gtli);
	assert(!gtli || !gtli->next_global_tlvl_info);
	assert(!prev_gtli || (gtli && (dollar_tlevel == gtli->t_level)));
	FREE_GBL_TLVL_INFO(gtli);
	if (prev_gtli)
		prev_gtli->next_global_tlvl_info = NULL;
	else
		global_tlvl_info_head = NULL;
}