コード例 #1
0
ファイル: f1g_queue.c プロジェクト: jos1290/f1goal
i8_p que_obj_tail(que_obj_p p_obj)
{
	pos_t tail = p_obj->tail - 1;

	if (!que_obj_empty(p_obj)) {
		if (tail < 0) {
			tail = p_obj->blk_num - 1;
		}
		return BLK_ADDR(tail, p_obj);
	} else {
		return NULL;
	}
}
コード例 #2
0
ファイル: f1g_queue.c プロジェクト: jos1290/f1goal
i8_t que_obj_pop(que_obj_p p_obj)
{
	elem_p p_elem = NULL;

	if (p_obj->front != p_obj->tail) {
		// not empty
		p_elem = (elem_p)BLK_ADDR(p_obj->front, p_obj);		
		p_elem->data_len = 0;
		p_elem->status = 0x00;
		p_obj->front = (p_obj->front+1)%(p_obj->blk_num);
	}

	return F1G_OK;
}
コード例 #3
0
ファイル: mu_split.c プロジェクト: shabiel/fis-gtm-freebsd
/***********************************************************************************************
	Input Parameters:
		cur_level: Working block's level
		d_max_fill: Database fill factor
		i_max_fill: Index fill factor
	Output Parameters:
		blks_created: how many new blocks are created
		lvls_increased : How much level is increased
	Input/Output Parameters:
		gv_target: History of working block
	Here it is assumed that i_max_fill or, d_max_fill is strictly less than block size.
	Returns:
		cdb_sc_normal: if successful
		cdb_sc status otherwise
 ************************************************************************************************/
enum cdb_sc mu_split(int cur_level, int i_max_fill, int d_max_fill, int *blks_created, int *lvls_increased)
{
    boolean_t	first_copy, new_rtblk_star_only, create_root = FALSE, split_required, insert_in_left;
    unsigned char	curr_prev_key[MAX_KEY_SZ+1], new_blk1_last_key[MAX_KEY_SZ+1];
    unsigned short  temp_ushort;
    int		rec_size, new_ins_keycmpc, tkeycmpc, new_ances_currkeycmpc, old_ances_currkeycmpc;
    int		tmp_cmpc;
    block_index	left_index, right_index;
    block_offset 	ins_off, ins_off2;
    int		level;
    int		new_ins_keysz, new_ances_currkeysz, new_blk1_last_keysz, newblk2_first_keysz, next_gv_currkeysz;
    int		old_ances_currkeylen, new_ins_keylen, new_ances_currkeylen, tkeylen, newblk2_first_keylen;
    int		old_blk1_last_rec_size, old_blk1_sz, save_blk_piece_len, old_right_piece_len;
    int		delta, max_fill;
    enum cdb_sc	status;
    int		blk_seg_cnt, blk_size, new_leftblk_top_off;
    block_id	allocation_clue;
    sm_uc_ptr_t 	rPtr1, rPtr2, rec_base, key_base, next_gv_currkey,
                    bn_ptr1, bn_ptr2, save_blk_piece,
                    old_blk_after_currec, ances_currkey,
                    old_blk1_base,
                    new_blk1_top, new_blk2_top,
                    new_blk2_frec_base, new_blk2_rem,
                    newblk2_first_key, new_ins_key;
    blk_segment     *bs_ptr1, *bs_ptr2;
    cw_set_element  *cse;
    rec_hdr_ptr_t	star_rec_hdr, new_rec_hdr1a, new_rec_hdr1b, new_rec_hdr2, root_hdr;
    blk_hdr_ptr_t	blk_hdr_ptr;

    blk_size = cs_data->blk_size;
    CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */

    BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
    star_rec_hdr->rsiz = BSTAR_REC_SIZE;
    SET_CMPC(star_rec_hdr, 0);
    level = cur_level;
    max_fill = (0 == level)? d_max_fill : i_max_fill;

    /*  -------------------
     *  Split working block.
     *  -------------------
     *  new_blk1_last_key = last key of the new working block after split
     *  new_blk1_last_keysz = size of new_blk1_last_key
     *  old_blk1_last_rec_size = last record size of the new working block after split (for old block)
     *  new_blk2_frec_base = base of first record of right block created after split
     *  newblk2_first_key = first key of new block created after split
     *  newblk2_first_keysz = size of newblk2_first_key
     *  new_blk2_rem = pointer to new block to be created after split exclude 1st record header + key
     */
    blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr);
    old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr;
    old_blk1_sz = blk_hdr_ptr->bsiz;
    new_blk2_top = old_blk1_base + old_blk1_sz;
    if (cdb_sc_normal != (status = locate_block_split_point (old_blk1_base, level, old_blk1_sz, max_fill,
                                   &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off)))
    {
        assert(t_tries < CDB_STAGNATE);
        return cdb_sc_blkmod;
    }
    if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz)
        /* Avoid split to create a small right sibling. Note this should not happen often when tolerance is high */
        return cdb_sc_oprnotneeded;
    old_right_piece_len = old_blk1_sz - new_leftblk_top_off;
    new_blk2_frec_base = old_blk1_base + new_leftblk_top_off;
    BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char);
    READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status);
    if (cdb_sc_normal != status) /* restart for cdb_sc_starrecord too, because we eliminated the possibility already */
    {
        assert(t_tries < CDB_STAGNATE);
        return cdb_sc_blkmod;
    }
    memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* copy the compressed key piece */
    new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen;
    newblk2_first_keysz = newblk2_first_keylen + tkeycmpc;

    /* gv_currkey_next_reorg will be saved for next iteration in mu_reorg */
    next_gv_currkey = newblk2_first_key;
    next_gv_currkeysz = newblk2_first_keysz;

    BLK_ADDR(new_rec_hdr1b, SIZEOF(rec_hdr), rec_hdr);
    new_rec_hdr1b->rsiz = rec_size + tkeycmpc;
    SET_CMPC(new_rec_hdr1b, 0);

    /* Create new split piece, we already know that this will not be *-rec only.
     * Note that this has to be done BEFORE modifying working block as building this buffer relies on the
     * working block to be pinned which is possible only if this cw-set-element is created ahead of that
     * of the working block (since order in which blocks are built is the order in which cses are created).
     */
    BLK_INIT(bs_ptr2, bs_ptr1);
    BLK_SEG(bs_ptr2, (sm_uc_ptr_t)new_rec_hdr1b, SIZEOF(rec_hdr));
    BLK_SEG(bs_ptr2, newblk2_first_key, newblk2_first_keysz);
    BLK_SEG(bs_ptr2, new_blk2_rem, new_blk2_top - new_blk2_rem);
    if (!BLK_FINI(bs_ptr2, bs_ptr1))
    {
        assert(t_tries < CDB_STAGNATE);
        return cdb_sc_blkmod;
    }
    allocation_clue = ALLOCATION_CLUE(cs_data->trans_hist.total_blks);
    right_index = t_create(allocation_clue++, (unsigned char *)bs_ptr1, 0, 0, level);
    (*blks_created)++;

    /* Modify working block removing split piece */
    BLK_INIT(bs_ptr2, bs_ptr1);
    if (0 == level)
    {
        BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr));
    }
    else
    {
        BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr),
                new_leftblk_top_off - SIZEOF(blk_hdr) - old_blk1_last_rec_size);
        BLK_SEG(bs_ptr2, (sm_uc_ptr_t)star_rec_hdr, SIZEOF(rec_hdr) );
        BLK_ADDR(bn_ptr1, SIZEOF(block_id), unsigned char);
        memcpy(bn_ptr1, old_blk1_base + new_leftblk_top_off - SIZEOF(block_id), SIZEOF(block_id));
        BLK_SEG(bs_ptr2, bn_ptr1, SIZEOF(block_id));
    }
    if ( !BLK_FINI(bs_ptr2, bs_ptr1))
    {
        assert(t_tries < CDB_STAGNATE);
        return cdb_sc_blkmod;
    }
    t_write(&gv_target->hist.h[level], (unsigned char *)bs_ptr1, 0, 0, level, FALSE, TRUE, GDS_WRITE_KILLTN);

    /*
    ----------------------------------------------------------------------------
    Modify ancestor block for the split in current level.
    new_ins_key = new key to be inserted in parent because of split in child
    new_ins_key will be inserted after gv_target->hist.h[level].prev_rec and
                                before gv_target->hist.h[level].curr_rec
        new_ins_keysz = size of new_ins_key
        Note: A restriction of the algorithm is to have current key and new_ins_key
    	in the same block, either left or, new right block
    ----------------------------------------------------------------------------
    */
    BLK_ADDR(new_ins_key, new_blk1_last_keysz, unsigned char);
    memcpy(new_ins_key, &new_blk1_last_key[0], new_blk1_last_keysz);
    new_ins_keysz = new_blk1_last_keysz;
    for(;;) 	/* ========== loop through ancestors as necessary ======= */
    {
        level ++;
        max_fill = i_max_fill;
        /*
        old_blk_after_currec = remaining of current block after currec
        ances_currkey = old real value of currkey in ancestor block
        */
        blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr);
        old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr;
        old_blk1_sz = blk_hdr_ptr->bsiz;
        new_blk2_top = old_blk1_base + old_blk1_sz;
        rec_base = old_blk1_base + gv_target->hist.h[level].curr_rec.offset;
        GET_RSIZ(rec_size, rec_base);
        old_blk_after_currec = rec_base + rec_size;
        old_ances_currkeycmpc = EVAL_CMPC((rec_hdr_ptr_t)rec_base);
        old_ances_currkeylen = rec_size - BSTAR_REC_SIZE;
        if (INVALID_RECORD(level, rec_size,  old_ances_currkeylen, old_ances_currkeycmpc))
        {
            assert(t_tries < CDB_STAGNATE);
            return cdb_sc_blkmod;
        }
        if (0 == old_ances_currkeylen)
        {
            if (0 != old_ances_currkeycmpc)
            {
                assert(t_tries < CDB_STAGNATE);
                return cdb_sc_blkmod;
            }
            new_ances_currkeycmpc = new_ances_currkeylen = 0;
        }
        else
        {
            BLK_ADDR(ances_currkey, gv_cur_region->max_rec_size + 1, unsigned char);
            key_base = rec_base +  SIZEOF(rec_hdr);
        }
        new_ances_currkeysz = old_ances_currkeycmpc + old_ances_currkeylen;
        if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) /* cur_rec is not first key */
        {
            if (cdb_sc_normal != (status = gvcst_expand_any_key(old_blk1_base,
                                           old_blk1_base + gv_target->hist.h[level].curr_rec.offset,
                                           &curr_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL)))
            {
                assert(t_tries < CDB_STAGNATE);
                return cdb_sc_blkmod;
            }
            if (old_ances_currkeycmpc)
                memcpy(ances_currkey, &curr_prev_key[0], old_ances_currkeycmpc);
        }
        if (old_ances_currkeylen)
        {
            memcpy(ances_currkey + old_ances_currkeycmpc, key_base, old_ances_currkeylen);
            GET_CMPC(new_ances_currkeycmpc, new_ins_key, ances_currkey);
            new_ances_currkeylen = new_ances_currkeysz - new_ances_currkeycmpc;
        }
        if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset)
        {
            /* new_ins_key will be inseted after curr_prev_key */
            GET_CMPC(new_ins_keycmpc, &curr_prev_key[0], new_ins_key);
        }
        else
            new_ins_keycmpc = 0; /* new_ins_key will be the 1st key */
        new_ins_keylen = new_ins_keysz - new_ins_keycmpc ;

        delta = BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen;
        if (old_blk1_sz + delta > blk_size - cs_data->reserved_bytes) /* split required */
        {
            split_required = TRUE;
            if (level == gv_target->hist.depth)
            {
                create_root = TRUE;
                if (MAX_BT_DEPTH - 1 <= level)  /* maximum level reached */
                    return cdb_sc_maxlvl;
            }
            if (max_fill + BSTAR_REC_SIZE > old_blk1_sz)
            {
                if (SIZEOF(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz)
                    return cdb_sc_oprnotneeded; /* Improve code to avoid this */
                max_fill = old_blk1_sz - BSTAR_REC_SIZE;
            }
            status = locate_block_split_point(old_blk1_base, level, old_blk1_sz, max_fill,
                                              &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off);
            if (cdb_sc_normal != status || new_leftblk_top_off >= old_blk1_sz
                    || 0 == new_blk1_last_keysz)
            {
                assert(t_tries < CDB_STAGNATE);
                return cdb_sc_blkmod;
            }
            assert(BSTAR_REC_SIZE != old_blk1_last_rec_size);
            old_right_piece_len = old_blk1_sz - new_leftblk_top_off;
            new_blk2_frec_base = new_blk1_top = old_blk1_base + new_leftblk_top_off;
            if (BSTAR_REC_SIZE == old_right_piece_len)
                new_rtblk_star_only = TRUE;
            else
                new_rtblk_star_only = FALSE;
            if (new_leftblk_top_off == gv_target->hist.h[level].curr_rec.offset)
            {
                /* inserted key will be the first record of new right block */
                new_ins_keylen = new_ins_keysz;
                new_ins_keycmpc = 0;
            }
            else
                /* process 1st record of new right block */
            {
                BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char);
                READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size,
                            newblk2_first_key, newblk2_first_keylen, status);
                if (cdb_sc_normal == status)
                {
                    memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* compressed piece */
                    new_blk2_rem =  new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen;
                    newblk2_first_keysz = newblk2_first_keylen + tkeycmpc;
                    BLK_ADDR(new_rec_hdr2, SIZEOF(rec_hdr), rec_hdr);
                    new_rec_hdr2->rsiz = newblk2_first_keysz + BSTAR_REC_SIZE;
                    SET_CMPC(new_rec_hdr2, 0);
                }
                else if (cdb_sc_starrecord != status || !new_rtblk_star_only)
                {
                    assert(t_tries < CDB_STAGNATE);
                    return cdb_sc_blkmod;
                }
            }
            /* else gv_target->hist.h[level].curr_rec will be newblk2_first_key */

            if (new_leftblk_top_off >  gv_target->hist.h[level].curr_rec.offset +
                    old_ances_currkeylen + BSTAR_REC_SIZE)
            {
                /* in this case prev_rec (if exists), new key and curr_rec should go into left block */
                if (new_leftblk_top_off + delta - old_blk1_last_rec_size + BSTAR_REC_SIZE
                        <= blk_size - cs_data->reserved_bytes)
                    insert_in_left = TRUE;
                else
                {
                    /* cannot handle it now */
                    return cdb_sc_oprnotneeded;
                }
            }
            else if (new_leftblk_top_off <  gv_target->hist.h[level].curr_rec.offset +
                     old_ances_currkeylen + BSTAR_REC_SIZE)
            {
                /* if gv_target->hist.h[level].curr_rec is the first key in old_blk1
                   then in new right block,
                   	new_ins_key will be the 1st record key and
                	curr_rec will be 2nd record and
                	there will be no prev_rec in right block.
                   Else (if curr_rec is not first key)
                	there will be some records before new_ins_key, at least prev_rec */
                delta = (int)(BSTAR_REC_SIZE + new_ins_keylen
                              - old_ances_currkeylen + new_ances_currkeylen
                              + ((0 == new_ins_keycmpc) ? 0 : (EVAL_CMPC((rec_hdr_ptr_t)new_blk2_frec_base))));
                if (SIZEOF(blk_hdr) + old_right_piece_len + delta <= blk_size - cs_data->reserved_bytes)
                {
                    insert_in_left = FALSE;
                    if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz)
                    {
                        /* cannot handle it now */
                        return cdb_sc_oprnotneeded;
                    }
                }
                else
                {
                    /* cannot handle it now */
                    return cdb_sc_oprnotneeded;
                }
            }
            else
            {
                /* in this case prev_rec (if exists), new key and curr_rec should go into left block
                	and curr_rec will be the last record (*-key) of left new block */
                delta = BSTAR_REC_SIZE + new_ins_keylen;
                if (new_leftblk_top_off + delta <= blk_size - cs_data->reserved_bytes)
                    insert_in_left = TRUE;
                else
                {
                    /* cannot handle it now */
                    return cdb_sc_oprnotneeded;
                }
            }
        } /* end if split required */
        else
コード例 #4
0
ファイル: gvcst_kill_blk.c プロジェクト: mihawk/fis-gtm
enum cdb_sc	gvcst_kill_blk(srch_blk_status	*blkhist,
			       char		level,
			       gv_key  		*search_key,
			       srch_rec_status	low,
			       srch_rec_status	high,
			       boolean_t	right_extra,
			       cw_set_element	**cseptr)
{
	typedef sm_uc_ptr_t		bytptr;

	unsigned short			temp_ushort;
	int4				temp_long;
	int				tmp_cmpc;
	int				blk_size, blk_seg_cnt, lmatch, rmatch, targ_len, prev_len, targ_base, next_rec_shrink,
					temp_int, blkseglen;
	bool				kill_root, first_copy;
	blk_hdr_ptr_t			old_blk_hdr;
	rec_hdr_ptr_t			left_ptr;	/*pointer to record before first record to delete*/
	rec_hdr_ptr_t			del_ptr;	/*pointer to first record to delete*/
	rec_hdr_ptr_t	       		right_ptr;	/*pointer to record after last record to delete*/
	rec_hdr_ptr_t			right_prev_ptr;
	rec_hdr_ptr_t			rp, rp1;	/*scratch record pointer*/
	rec_hdr_ptr_t			first_in_blk, top_of_block, new_rec_hdr, star_rec_hdr;
	blk_segment			*bs1, *bs_ptr;
	block_index			new_block_index;
	unsigned char			*skb;
	static readonly block_id	zeroes = 0;
	cw_set_element			*cse, *old_cse;
	bytptr				curr, prev, right_bytptr;
	off_chain			chain1, curr_chain, prev_chain;
	block_id			blk;
	sm_uc_ptr_t			buffer;
	srch_blk_status			*t1;

	*cseptr = NULL;
	if (low.offset == high.offset)
		return cdb_sc_normal;
	blk = blkhist->blk_num;
	if (dollar_tlevel)
	{
		PUT_LONG(&chain1, blk);
		if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth))
		{
			assert(sgm_info_ptr->tp_csa == cs_addrs);
			assert(FALSE == cs_addrs->now_crit);
			return cdb_sc_blknumerr;
		}
	}
	buffer = blkhist->buffaddr;
	old_blk_hdr = (blk_hdr_ptr_t)buffer;
	kill_root = FALSE;
	blk_size = cs_data->blk_size;
	first_in_blk = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + SIZEOF(blk_hdr));
	top_of_block = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + old_blk_hdr->bsiz);
	left_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + low.offset);
	right_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + high.offset);
	if (right_extra && right_ptr < top_of_block)
	{
		right_prev_ptr = right_ptr;
		GET_USHORT(temp_ushort, &right_ptr->rsiz);
		right_ptr = (rec_hdr_ptr_t)((bytptr)right_ptr + temp_ushort);
	}
	if ((bytptr)left_ptr < (bytptr)old_blk_hdr ||
		(bytptr)right_ptr > (bytptr)top_of_block ||
		(bytptr)left_ptr >= (bytptr)right_ptr)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_rmisalign;
	}
	if ((bytptr)left_ptr == (bytptr)old_blk_hdr)
	{
		if ((bytptr)right_ptr == (bytptr)top_of_block)
		{
			if ((bytptr)first_in_blk == (bytptr)top_of_block)
			{
				if (0 != level)
				{
					assert(CDB_STAGNATE > t_tries);
					return cdb_sc_rmisalign;
				}
				return cdb_sc_normal;
			}
			if (!gv_target->hist.h[level + 1].blk_num)
				kill_root = TRUE;
			else
			{	/* We are about to free up the contents of this entire block. If this block corresponded to
				 * a global that has NOISOLATION turned on and has a non-zero recompute list (i.e. some SETs
				 * already happened in this same TP transaction), make sure we disable the NOISOLATION
				 * optimization in this case as that is applicable only if one or more SETs happened in this
				 * data block and NOT if a KILL happens. Usually this is done by a t_write(GDS_WRITE_KILLTN)
				 * call but since in this case the entire block is being freed, "t_write" wont be invoked
				 * so we need to explicitly set GDS_WRITE_KILLTN like t_write would have (GTM-8269).
				 * Note: blkhist->first_tp_srch_status is not reliable outside of TP. Thankfully the recompute
				 * list is also maintained only in case of TP so a check of dollar_tlevel is enough to
				 * dereference both "first_tp_srch_status" and "recompute_list_head".
				 */
				if (dollar_tlevel)
				{
					t1 = blkhist->first_tp_srch_status ? blkhist->first_tp_srch_status : blkhist;
					cse = t1->cse;
					if ((NULL != cse) && cse->recompute_list_head)
						cse->write_type |= GDS_WRITE_KILLTN;
				}
				return cdb_sc_delete_parent;
			}
		}
		del_ptr = first_in_blk;
	} else
	{
		GET_USHORT(temp_ushort, &left_ptr->rsiz);
		del_ptr = (rec_hdr_ptr_t)((bytptr)left_ptr + temp_ushort);
		if ((bytptr)del_ptr <= (bytptr)(left_ptr + 1)  ||  (bytptr)del_ptr > (bytptr)right_ptr)
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_rmisalign;
		}
	}
	if ((bytptr)del_ptr == (bytptr)right_ptr)
		return cdb_sc_normal;
	lmatch = low.match;
	rmatch = high.match;
	if (level)
	{
		for (rp = del_ptr ;  rp < right_ptr ;  rp = rp1)
		{
			GET_USHORT(temp_ushort, &rp->rsiz);
			rp1 = (rec_hdr_ptr_t)((bytptr)rp + temp_ushort);
			if (((bytptr)rp1 < (bytptr)(rp + 1) + SIZEOF(block_id)) ||
				((bytptr)rp1 < buffer) || ((bytptr)rp1 > (buffer + blk_size)))
			{
				assert(CDB_STAGNATE > t_tries);
				return cdb_sc_rmisalign;
			}
			GET_LONG(temp_long, ((bytptr)rp1 - SIZEOF(block_id)));
			if (dollar_tlevel)
			{
				chain1 = *(off_chain *)&temp_long;
				if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth))
				{
					assert(sgm_info_ptr->tp_csa == cs_addrs);
					assert(FALSE == cs_addrs->now_crit);
					return cdb_sc_blknumerr;
				}
			}
			gvcst_delete_blk(temp_long, level - 1, FALSE);
		}
	}
	if (kill_root)
	{	/* create an empty data block */
		BLK_INIT(bs_ptr, bs1);
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_mkblk;
		}
		new_block_index = t_create(blk, (uchar_ptr_t)bs1, 0, 0, 0);
		/* create index block */
		BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
		new_rec_hdr->rsiz = SIZEOF(rec_hdr) + SIZEOF(block_id);
		SET_CMPC(new_rec_hdr, 0);
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr));
		BLK_SEG(bs_ptr, (bytptr)&zeroes, SIZEOF(block_id));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			assert(CDB_STAGNATE > t_tries);
			return cdb_sc_mkblk;
		}
		cse = t_write(blkhist, (unsigned char *)bs1, SIZEOF(blk_hdr) + SIZEOF(rec_hdr), new_block_index, 1,
			TRUE, FALSE, GDS_WRITE_KILLTN);
		assert(!dollar_tlevel || !cse->high_tlevel);
		*cseptr = cse;
		if (NULL != cse)
			cse->first_off = 0;
		return cdb_sc_normal;
	}
	next_rec_shrink = (int)(old_blk_hdr->bsiz + ((bytptr)del_ptr - (bytptr)right_ptr));
	if (SIZEOF(blk_hdr) >= next_rec_shrink)
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_rmisalign;
	}
	if ((bytptr)right_ptr == (bytptr)top_of_block)
	{
		if (level)
		{
			GET_USHORT(temp_ushort, &left_ptr->rsiz);
			next_rec_shrink += SIZEOF(rec_hdr) + SIZEOF(block_id) - temp_ushort;
		}
	} else
	{
		targ_base = (rmatch < lmatch) ? rmatch : lmatch;
		prev_len = 0;
		if (right_extra)
		{
			EVAL_CMPC2(right_prev_ptr, tmp_cmpc);
			targ_len = tmp_cmpc - targ_base;
			if (targ_len < 0)
				targ_len = 0;
			temp_int = tmp_cmpc - EVAL_CMPC(right_ptr);
			if (0 >= temp_int)
				prev_len = - temp_int;
			else
			{
				if (temp_int < targ_len)
					targ_len -= temp_int;
				else
					targ_len = 0;
			}
		} else
		{
			targ_len = EVAL_CMPC(right_ptr) - targ_base;
			if (targ_len < 0)
				targ_len = 0;
		}
		next_rec_shrink += targ_len + prev_len;
	}
	BLK_INIT(bs_ptr, bs1);
	first_copy = TRUE;
	blkseglen = (int)((bytptr)del_ptr - (bytptr)first_in_blk);
	if (0 < blkseglen)
	{
		if (((bytptr)right_ptr != (bytptr)top_of_block)  ||  (0 == level))
		{
			BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen);
			first_copy = FALSE;
		} else
		{
			blkseglen = (int)((bytptr)left_ptr - (bytptr)first_in_blk);
			if (0 < blkseglen)
			{
				BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen);
				first_copy = FALSE;
			}
			BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
			SET_CMPC(star_rec_hdr, 0);
			star_rec_hdr->rsiz = (unsigned short)(SIZEOF(rec_hdr) + SIZEOF(block_id));
			BLK_SEG(bs_ptr, (bytptr)star_rec_hdr, SIZEOF(rec_hdr));
			GET_USHORT(temp_ushort, &left_ptr->rsiz);
			BLK_SEG(bs_ptr, ((bytptr)left_ptr + temp_ushort - SIZEOF(block_id)), SIZEOF(block_id));
		}
	}
	blkseglen = (int)((bytptr)top_of_block - (bytptr)right_ptr);
	assert(0 <= blkseglen);
	if (0 != blkseglen)
	{
		next_rec_shrink = targ_len + prev_len;
		if (0 >= next_rec_shrink)
		{
			BLK_SEG(bs_ptr, (bytptr)right_ptr, blkseglen);
		} else
		{
			BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr);
			SET_CMPC(new_rec_hdr, EVAL_CMPC(right_ptr) - next_rec_shrink);
			GET_USHORT(temp_ushort, &right_ptr->rsiz);
			new_rec_hdr->rsiz = temp_ushort + next_rec_shrink;
			BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr));
			if (targ_len)
			{
				BLK_ADDR(skb, targ_len, unsigned char);
				memcpy(skb, &search_key->base[targ_base], targ_len);
				BLK_SEG(bs_ptr, skb, targ_len);
			}
			if (prev_len)
				BLK_SEG(bs_ptr, (bytptr)(right_prev_ptr + 1) , prev_len);
			right_bytptr = (bytptr)(right_ptr + 1);
			blkseglen = (int)((bytptr)top_of_block - right_bytptr);
			if (0 < blkseglen)
			{
				BLK_SEG(bs_ptr, right_bytptr, blkseglen);
			} else
			{
				assert(CDB_STAGNATE > t_tries);
				return cdb_sc_rmisalign;
			}
		}
	}
	if (!BLK_FINI(bs_ptr, bs1))
	{
		assert(CDB_STAGNATE > t_tries);
		return cdb_sc_mkblk;
	}
	cse = t_write(blkhist, (unsigned char *)bs1, 0, 0, level, first_copy, TRUE, GDS_WRITE_KILLTN);
	assert(!dollar_tlevel || !cse->high_tlevel);
	*cseptr = cse;
	if (horiz_growth)
	{
		old_cse = cse->low_tlevel;
		assert(old_cse && old_cse->done);
		assert(2 == (SIZEOF(old_cse->undo_offset) / SIZEOF(old_cse->undo_offset[0])));
		assert(2 == (SIZEOF(old_cse->undo_next_off) / SIZEOF(old_cse->undo_next_off[0])));
		assert(!old_cse->undo_next_off[0] && !old_cse->undo_offset[0]);
		assert(!old_cse->undo_next_off[1] && !old_cse->undo_offset[1]);
	}
        if ((NULL != cse)  &&  (0 != cse->first_off))
	{	/* fix up chains in the block to account for deleted records */
		prev = NULL;
		curr = buffer + cse->first_off;
		GET_LONGP(&curr_chain, curr);
		while (curr < (bytptr)del_ptr)
		{	/* follow chain to first deleted record */
			if (0 == curr_chain.next_off)
				break;
			if (right_ptr == top_of_block  &&  (bytptr)del_ptr - curr == SIZEOF(off_chain))
				break;	/* special case described below: stop just before the first deleted record */
			prev = curr;
			curr += curr_chain.next_off;
			GET_LONGP(&curr_chain, curr);
		}
		if (right_ptr == top_of_block  &&  (bytptr)del_ptr - curr == SIZEOF(off_chain))
		{
			/* if the right side of the block is gone and our last chain is in the last record,
			 * terminate the chain and adjust the previous entry to point at the new *-key
			 * NOTE: this assumes there's NEVER a TP delete of records in the GVT
			 */
			assert(0 != level);
			/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
			if (horiz_growth)
			{
				old_cse->undo_next_off[0] = curr_chain.next_off;
				old_cse->undo_offset[0] = (block_offset)(curr - buffer);
				assert(old_cse->undo_offset[0]);
			}
			curr_chain.next_off = 0;
			GET_LONGP(curr, &curr_chain);
			if (NULL != prev)
			{	/* adjust previous chain next_off to reflect the fact that the record it refers to is now a *-key */
				GET_LONGP(&prev_chain, prev);
				/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
				if (horiz_growth)
				{
					old_cse->undo_next_off[1] = prev_chain.next_off;
					old_cse->undo_offset[1] = (block_offset)(prev - buffer);
					assert(old_cse->undo_offset[1]);
				}
				prev_chain.next_off = (unsigned int)((bytptr)left_ptr - prev + (unsigned int)(SIZEOF(rec_hdr)));
				GET_LONGP(prev, &prev_chain);
			} else	/* it's the first (and only) one */
				cse->first_off = (block_offset)((bytptr)left_ptr - buffer + SIZEOF(rec_hdr));
		} else if (curr >= (bytptr)del_ptr)
		{	/* may be more records on the right that aren't deleted */
			while (curr < (bytptr)right_ptr)
			{	/* follow chain past last deleted record */
				if (0 == curr_chain.next_off)
					break;
				curr += curr_chain.next_off;
				GET_LONGP(&curr_chain, curr);
			}
			/* prev :   ptr to chain record immediately preceding the deleted area,
			 *	    or 0 if none.
			 *
			 * curr :   ptr to chain record immediately following the deleted area,
			 *	    or to last chain record.
			 */
			if (curr < (bytptr)right_ptr)
			{	/* the former end of the chain is going, going, gone */
				if (NULL != prev)
				{	/* terminate the chain before the delete */
					GET_LONGP(&prev_chain, prev);
					/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
					if (horiz_growth)
					{
						old_cse->undo_next_off[0] = prev_chain.next_off;
						old_cse->undo_offset[0] = (block_offset)(prev - buffer);
						assert(old_cse->undo_offset[0]);
					}
					prev_chain.next_off = 0;
					GET_LONGP(prev, &prev_chain);
				} else
					cse->first_off = 0;		/* the whole chain is gone */
			} else
			{	/* stitch up the left and right to account for the hole in the middle */
				/* next_rec_shrink is the change in record size due to the new compression count */
				if (NULL != prev)
				{
					GET_LONGP(&prev_chain, prev);
					/* ??? new compression may be less (ie +) so why are negative shrinks ignored? */
					/* store next_off in old_cse before actually changing it in the buffer(for rolling back) */
					if (horiz_growth)
					{
						old_cse->undo_next_off[0] = prev_chain.next_off;
						old_cse->undo_offset[0] = (block_offset)(prev - buffer);
						assert(old_cse->undo_offset[0]);
					}
					prev_chain.next_off = (unsigned int)(curr - prev - ((bytptr)right_ptr - (bytptr)del_ptr)
						+ (next_rec_shrink > 0 ? next_rec_shrink : 0));
					GET_LONGP(prev, &prev_chain);
				} else	/* curr remains first: adjust the head */
					cse->first_off = (block_offset)(curr - buffer - ((bytptr)right_ptr - (bytptr)del_ptr)
						+ (next_rec_shrink > 0 ? next_rec_shrink : 0));
			}
		}
	}
	horiz_growth = FALSE;
	return cdb_sc_normal;
}
コード例 #5
0
ファイル: mu_truncate.c プロジェクト: shabiel/YottaDB
boolean_t mu_truncate(int4 truncate_percent)
{
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t 	csd;
	int			num_local_maps;
	int 			lmap_num, lmap_blk_num;
	int			bml_status, sigkill;
	int			save_errno;
	int			ftrunc_status;
	uint4			jnl_status;
	uint4			old_total, new_total;
	uint4			old_free, new_free;
	uint4			end_blocks;
	int4			blks_in_lmap, blk;
	gtm_uint64_t		before_trunc_file_size;
	off_t			trunc_file_size;
	off_t			padding;
	uchar_ptr_t		lmap_addr;
	boolean_t		was_crit;
	uint4			found_busy_blk;
	srch_blk_status		bmphist;
	srch_blk_status 	*blkhist;
	srch_hist		alt_hist;
	trans_num		curr_tn;
	blk_hdr_ptr_t		lmap_blk_hdr;
	block_id		*blkid_ptr;
	unix_db_info    	*udi;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	char			*err_msg;
	intrpt_state_t		prev_intrpt_state;
	off_t			offset;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	csa = cs_addrs;
	csd = cs_data;
	if (dba_mm == csd->acc_meth)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region));
		return TRUE;
	}
	if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region));
		return TRUE;
	}
	if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent);
		return TRUE;
	}
	/* already checked for parallel truncates on this region --- see mupip_reorg.c */
	gv_target = NULL;
	assert(csa->nl->trunc_pid == process_id);
	assert(dba_mm != csd->acc_meth);
	old_total = csa->ti->total_blks;
	old_free = csa->ti->free_blocks;
	sigkill = 0;
	found_busy_blk = 0;
	memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */
	assert(csd->bplmap == BLKS_PER_LMAP);
	end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */
	if (0 == end_blocks)
		end_blocks = BLKS_PER_LMAP;
	num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP);
	/* ======================================== PHASE 1 ======================================== */
	for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--)
	{
		if (mu_ctrly_occurred || mu_ctrlc_occurred)
			return TRUE;
		assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */
		if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region),
					truncate_percent);
			return TRUE;
		}
		lmap_blk_num = lmap_num * BLKS_PER_LMAP;
		if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num)
		{
			found_busy_blk = lmap_blk_num;
			break;
		}
		blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP;
		/* Loop through non-bitmap blocks of this lmap, do recycled2free */
		DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n",
			lmap_num, lmap_blk_num, blks_in_lmap));
		for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;)
		{
			t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK);
			for (;;) /* retry loop for recycled to free transactions */
			{
				curr_tn = csd->trans_hist.curr_tn;
				/* Read the nth local bitmap into memory */
				bmphist.blk_num = lmap_blk_num;
				bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr);
				lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr;
				if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz))
				{ /* Could not read the block successfully. Retry. */
					t_retry((enum cdb_sc)rdfail_detail);
					continue;
				}
				lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr);
				/* starting from the hint (blk itself), find the first busy or recycled block */
				blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status);
				assert(blk < BLKS_PER_LMAP);
				if (blk == -1 || blk >= blks_in_lmap)
				{ /* done with this lmap, continue to next */
					t_abort(gv_cur_region, csa);
					break;
				}
				else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num)
				{ /* stop processing blocks... skip ahead to phase 2 */
					found_busy_blk = lmap_blk_num;
					t_abort(gv_cur_region, csa);
					break;
				}
				else if (BLK_RECYCLED == bml_status)
				{ /* Write PBLK records for recycled blocks only if before_image journaling is
				   * enabled. t_end() takes care of checking if journaling is enabled and
				   * writing PBLK record. We have to at least mark the recycled block as free.
				   */
					RESET_UPDATE_ARRAY;
					update_trans = UPDTRNS_DB_UPDATED_MASK;
					*((block_id *)update_array_ptr) = blk;
					update_array_ptr += SIZEOF(block_id);
					*(int *)update_array_ptr = 0;
					alt_hist.h[1].blk_num = 0;
					alt_hist.h[0].level = 0;
					alt_hist.h[0].cse = NULL;
					alt_hist.h[0].tn = curr_tn;
					alt_hist.h[0].blk_num = lmap_blk_num + blk;
					alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num,
							&alt_hist.h[0].cycle, &alt_hist.h[0].cr);
					if (!alt_hist.h[0].buffaddr)
					{
						t_retry((enum cdb_sc)rdfail_detail);
						continue;
					}
					if (!t_recycled2free(&alt_hist.h[0]))
					{
						t_retry(cdb_sc_lostbmlcr);
						continue;
					}
					t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0);
					/* Set the opcode for INCTN record written by t_end() */
					inctn_opcode = inctn_blkmarkfree;
					if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
						continue;
					/* block processed, scan from the next one */
					blk++;
					break;
				} else
				{
					assert(t_tries < CDB_STAGNATE);
					t_retry(cdb_sc_badbitmap);
					continue;
				}
			} /* END recycled2free retry loop */
		} /* END scanning blocks of this particular lmap */
		/* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */
		/* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */
		DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num));
		t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK);
		for (;;)
		{
			RESET_UPDATE_ARRAY;
			BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id);
			*blkid_ptr = 0;
			update_trans = UPDTRNS_DB_UPDATED_MASK;
			inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */
			curr_tn = csd->trans_hist.curr_tn;
			blkhist = &alt_hist.h[0];
			blkhist->blk_num = lmap_blk_num;
			blkhist->tn = curr_tn;
			blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */
			/* Read the nth local bitmap into memory */
			blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr);
			lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr;
			if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz))
			{ /* Could not read the block successfully. Retry. */
				t_retry((enum cdb_sc)rdfail_detail);
				continue;
			}
			t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0);
			blkhist->blk_num = 0; /* create empty history for bitmap block */
			if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
				continue;
			break;
		}
	} /* END scanning lmaps */
	/* ======================================== PHASE 2 ======================================== */
	assert(!csa->now_crit);
	for (;;)
	{ /* wait for FREEZE, we don't want to truncate a frozen database */
		grab_crit(gv_cur_region);
		if (FROZEN_CHILLED(cs_data))
			DO_CHILLED_AUTORELEASE(csa, cs_data);
		if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN)
			break;
		rel_crit(gv_cur_region);
		while (FROZEN(cs_data) || IS_REPL_INST_FROZEN)
		{
			hiber_start(1000);
			if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data))
				break;
		}
	}
	assert(csa->nl->trunc_pid == process_id);
	/* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */
	if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB))
	{
		assert(FALSE);
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"),
				DB_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return FALSE;
	}
	csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk);
	assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk));
	new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP);
	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (csa->ti->total_blks != old_total || new_total == old_total)
	{
		assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent);
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (SNAPSHOTS_IN_PROG(csa->nl))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	}
	DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state);
	if (JNL_ENABLED(csa))
	{ /* Write JRT_TRUNC and INCTN records */
		if (!jgbl.dont_reset_gbl_jrec_time)
		SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = csa->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open(gv_cur_region, csa);
		if (SS_NORMAL != jnl_status)
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
		else
		{
			if (0 == jpc->pini_addr)
				jnl_put_jrt_pini(csa);
			jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total);
			inctn_opcode = inctn_mu_reorg;
			jnl_write_inctn_rec(csa);
			jnl_status = jnl_flush(gv_cur_region);
			if (SS_NORMAL != jnl_status)
			{
				send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd),
					ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"),
					jnl_status);
				assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */
			}
		}
	}
	/* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */
	curr_tn = csa->ti->curr_tn;
	CHECK_TN(csa, csd, curr_tn);
	udi = FILE_INFO(gv_cur_region);
	/* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */
	trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1));
	csd->after_trunc_total_blks = new_total;
	csd->before_trunc_free_blocks = csa->ti->free_blocks;
	csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */
	/* file size and total blocks: INCONSISTENT */
	csa->ti->total_blks = new_total;
	/* past the point of no return -- shared memory intact */
	assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total));
	csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total);
	new_free = csa->ti->free_blocks;
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */
	fileheader_sync(gv_cur_region);
	DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno);
	CHECK_DBSYNC(gv_cur_region, save_errno);
	/* past the point of no return -- shared memory deleted */
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */
	clear_cache_array(csa, csd, gv_cur_region, new_total, old_total);
	offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size;
	save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff)));
	if (0 != save_errno)
	{
		err_msg = (char *)STRERROR(errno);
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg));
		return FALSE;
	}
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */
	/* Execute an ftruncate() and truncate the DB file
	 * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS)
	 * It ignores kill -9 signal till its operation is completed.
	 * So we can safely assume that the result of ftruncate() will be complete.
	 */
	FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status);
	if (0 != ftrunc_status)
	{
		err_msg = (char *)STRERROR(errno);
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg));
		/* should go through recover_truncate now, which will again try to FTRUNCATE */
		return FALSE;
	}
	/* file size and total blocks: CONSISTENT (shrunk) */
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */
	csa->nl->root_search_cycle++;	/* Force concurrent processes to restart in t_end/tp_tend to make sure no one
					 * tries to commit updates past the end of the file. Bitmap validations together
					 * with highest_lbm_with_busy_blk should actually be sufficient, so this is
					 * just to be safe.
					 */
	csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */
	/* Increment TN */
	assert(csa->ti->early_tn == csa->ti->curr_tn);
	csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1;
	INCREMENT_CURR_TN(csd);
	fileheader_sync(gv_cur_region);
	DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno);
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */
	CHECK_DBSYNC(gv_cur_region, save_errno);
	ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state);
	curr_tn = csa->ti->curr_tn;
	rel_crit(gv_cur_region);
	send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn);
	util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].",
					FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free);
	return TRUE;
} /* END of mu_truncate() */
コード例 #6
0
ファイル: mu_clsce.c プロジェクト: h4ck3rm1k3/FIS-GT.M
/*************************************************************************************************
Input Parameters:
	gv_target: working block's history
	level : Level of working block and its right sibling
	d_blk_fill_size : Maximum fill allowed in a data block
	i_blk_fill_size : Maximum fill allowed in an index block
Output Parameters:
	kill_set_ptr : List of blocks to be freed from LBM (already killed in mu_clsce)
	remove_rtsib : if right sibling was completely merged with working
Returns:
	cdb_sc_normal on success
	Other wise error status
 *************************************************************************************************/
enum cdb_sc mu_clsce(int level, int i_max_fill, int d_max_fill, kill_set *kill_set_ptr,
	boolean_t *remove_rtsib)
{
	boolean_t	complete_merge = FALSE,
			old_ref_star_only = FALSE,
			new_rtsib_star_only = FALSE,
			star_only_merge = FALSE,
			blk2_ances_star_only = FALSE,
			delete_all_blk2_ances = TRUE,
			levelp_next_is_star, forward_process;
	unsigned char	oldblk1_prev_key[MAX_KEY_SZ+1],
			old_levelp_cur_prev_key[MAX_KEY_SZ+1],
			old_levelp_cur_key[MAX_KEY_SZ+1]; /* keys in private memory */
	unsigned short	temp_ushort;
	int		new_levelp_cur_cmpc, new_levelp_cur_next_cmpc, tkeycmpc,
			oldblk1_last_cmpc, newblk1_mid_cmpc, newblk1_last_cmpc;
	int		levelp, level2;
	int		old_blk1_sz, old_blk2_sz;
	int		old_levelp_cur_prev_keysz,
			old_levelp_cur_keysz,
			old_levelp_cur_next_keysz,
			newblk1_last_keysz,
			newblk2_first_keysz,
			new_blk2_ances_first_keysz;
	int		old_levelp_cur_keylen,
			new_levelp_cur_keylen,
			old_levelp_cur_next_keylen,
			new_levelp_cur_next_keylen,
			oldblk1_last_keylen,
			newblk1_last_keylen,
			newblk2_first_keylen;
	int		rec_size, piece_len, tkeylen, old_levelp_rec_offset;
	int		blk_seg_cnt, blk_size;
	uint4		save_t_err;
	enum cdb_sc	status;
	sm_uc_ptr_t 	oldblk1_last_key, old_levelp_cur_next_key,
			newblk1_last_key, newblk2_first_key, new_blk2_ances_first_key; /* shared memory keys */
	sm_uc_ptr_t 	rec_base, old_levelp_blk_base,
			bn_ptr1, bn_ptr2, blk2_ances_remain, old_blk1_base, old_blk2_base,
			new_blk1_top, new_blk2_first_rec_base, new_blk2_remain; /* shared memory pointers */
	sm_uc_ptr_t 	rPtr1, rPtr2;
	rec_hdr_ptr_t	star_rec_hdr, old_last_rec_hdr1, new_rec_hdr1, new_rec_hdr2,
			blk2_ances_hdr, new_levelp_cur_hdr, new_levelp_cur_next_hdr;
	blk_segment	*bs_ptr1, *bs_ptr2;
	srch_hist	*blk1ptr, *blk2ptr; /* blk2ptr is for right sibling's hist from a minimum sub-tree containing both blocks */
	error_def(ERR_GVKILLFAIL);

	blk_size = cs_data->blk_size;
	assert(update_array != NULL);
	update_array_ptr = update_array;

	blk1ptr = &(gv_target->hist);
	blk2ptr = gv_target->alt_hist;
	old_blk1_base = blk1ptr->h[level].buffaddr;
	old_blk2_base = blk2ptr->h[level].buffaddr;
	old_blk1_sz = ((blk_hdr_ptr_t)old_blk1_base)->bsiz;
	old_blk2_sz = ((blk_hdr_ptr_t)old_blk2_base)->bsiz;
	if (0 != level && sizeof(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz)
		old_ref_star_only = TRUE;
	/* Search an ancestor block at levelp >= level+1,
	which has a real key value corresponding to the working block.
	This key value will be changed after coalesce.  */
	levelp = level;
	do
	{
		if (++levelp > blk1ptr->depth ||  levelp > blk2ptr->depth)
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		old_levelp_blk_base = blk1ptr->h[levelp].buffaddr;
		old_levelp_rec_offset = blk1ptr->h[levelp].curr_rec.offset;
		rec_base = old_levelp_blk_base + old_levelp_rec_offset;
		GET_RSIZ(rec_size, rec_base);
	} while (BSTAR_REC_SIZE == rec_size); /* search ancestors to get a real value */

	/*
	old_levelp_cur_prev_key = real value of the key before the curr_key at levelp
	old_levelp_cur_prev_keysz = uncompressed size of the key
	Note: we may not have a previous key (old_levelp_cur_prev_keysz = 0)
	*/
	if (sizeof(blk_hdr) == old_levelp_rec_offset)
		old_levelp_cur_prev_keysz = 0;
	else
	{
		if (cdb_sc_normal != (status = gvcst_expand_any_key (old_levelp_blk_base, rec_base,
			&old_levelp_cur_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL)))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		old_levelp_cur_prev_keysz = tkeylen + tkeycmpc;
	}

	/*
	old_levelp_cur_key = real value of the curr_key at levelp
	old_levelp_cur_keysz = uncompressed size of the key
	old_levelp_cur_keylen = compressed size of the key
	*/
	READ_RECORD(levelp, rec_base, tkeycmpc, rec_size,  &old_levelp_cur_key[0], old_levelp_cur_keylen, status);
	if (cdb_sc_normal != status)
	{
		assert(t_tries < CDB_STAGNATE);
		return cdb_sc_blkmod;
	}
	if (old_levelp_cur_prev_keysz)
		memcpy(&old_levelp_cur_key[0], &old_levelp_cur_prev_key[0], tkeycmpc);
	rec_base += rec_size;
	old_levelp_cur_keysz = old_levelp_cur_keylen + tkeycmpc;

	/*
	old_levelp_cur_next_key = uncompressed value of the next right key of old_levelp_cur_key
	old_levelp_cur_next_keysz = uncomressed size of the key
	old_levelp_cur_next_keylen = comressed size of the key
		Note: we may not have a next key (old_levelp_cur_next_keysz = 0)
	*/
	BLK_ADDR(old_levelp_cur_next_key, gv_cur_region->max_key_size + 1, unsigned char);
	READ_RECORD(levelp, rec_base, tkeycmpc, rec_size, old_levelp_cur_next_key, old_levelp_cur_next_keylen, status);
	if (cdb_sc_starrecord == status)
		levelp_next_is_star = TRUE;
	else if (cdb_sc_normal != status)
	{
		assert(t_tries < CDB_STAGNATE);
		return cdb_sc_blkmod;
	}
	else
	{
		memcpy(old_levelp_cur_next_key, &old_levelp_cur_key[0], tkeycmpc);
		old_levelp_cur_next_keysz = old_levelp_cur_next_keylen + tkeycmpc;
		levelp_next_is_star = FALSE;
	}


	/*
	Now process the actual working block at current level
		oldblk1_last_key = real value of last key of the working block
			For index block decompress *-key
		oldblk1_last_keylen = compressed size of the last key
		oldblk1_last_cmpc = compression count of last key of working block
		old_last_rec_hdr1 = New working index block's last record header
	*/
	BLK_ADDR(oldblk1_last_key, gv_cur_region->max_key_size + 1, unsigned char);
	if (0 == level) /* data block */
	{
		if (cdb_sc_normal != (status = gvcst_expand_any_key (old_blk1_base, old_blk1_base + old_blk1_sz,
			oldblk1_last_key, &rec_size, &oldblk1_last_keylen, &oldblk1_last_cmpc, NULL)))
		{
			assert(t_tries < CDB_STAGNATE);
			return cdb_sc_blkmod;
		}
		rec_base = old_blk1_base + old_blk1_sz;
	}
	else  /* Index blocks */
	{
		/* Since we will join this working block with the right sibling,
		we need to remove the *-key at the end of working block
		and replace with actual key value (with required compression).
		We will get the real value of *-rec from its ancestor at levelp */
                memcpy (oldblk1_last_key, &old_levelp_cur_key[0], old_levelp_cur_keysz);
		if (!old_ref_star_only) /* if the index block is not a *-key only block) */
		{
			if (cdb_sc_normal != (status = gvcst_expand_any_key (old_blk1_base,
				old_blk1_base + old_blk1_sz - BSTAR_REC_SIZE, &oldblk1_prev_key[0],
				&rec_size, &tkeylen, &tkeycmpc, NULL)))
			{
				assert(t_tries < CDB_STAGNATE);
				return cdb_sc_blkmod;
			}
			GET_CMPC(oldblk1_last_cmpc, &oldblk1_prev_key[0], &old_levelp_cur_key[0]);
			oldblk1_last_keylen = old_levelp_cur_keysz - oldblk1_last_cmpc;
		}
		else /* working block has a *-key record only */
		{
			/* get key value from ancestor blocks key */
			oldblk1_last_keylen = old_levelp_cur_keysz;
			oldblk1_last_cmpc = 0;
        	}
                BLK_ADDR(old_last_rec_hdr1, sizeof(rec_hdr), rec_hdr);
                old_last_rec_hdr1->rsiz = BSTAR_REC_SIZE + oldblk1_last_keylen;
                old_last_rec_hdr1->cmpc = oldblk1_last_cmpc;
	}

	/*
	newblk1_last_key = new working blocks final appended key
	newblk1_mid_cmpc = new working blocks firstly appended key's cmpc
	newblk1_last_keysz = new working blocks lastly appended key's size
	star_only_merge = TRUE, we can append only a *-key record into the working block
				(decompressing current *-key)
	complete_merge = TRUE, rtsib can be completely merged with working block
	piece_len = Size of data from old rtsibling to be merged into working block (includes rec_hdr size)
	*/
	BLK_ADDR(newblk1_last_key, gv_cur_region->max_key_size + 1, unsigned char);
	rec_base = old_blk2_base + sizeof(blk_hdr);
	READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status);
	if (cdb_sc_starrecord == status) /* rtsib index block has *-record only */
	{
		if (old_blk1_sz + oldblk1_last_keylen + BSTAR_REC_SIZE > i_max_fill ) /* cannot fit even one record */
			return cdb_sc_oprnotneeded;
		star_only_merge = TRUE;
		complete_merge = TRUE;
		rec_base = old_blk2_base + sizeof(blk_hdr) + BSTAR_REC_SIZE;
	}
	else if (cdb_sc_normal != status)
	{
		assert(t_tries < CDB_STAGNATE);;
		return cdb_sc_blkmod;
	}
	else /* for both data and non-* index block */
	{
		newblk1_last_keysz = newblk1_last_keylen; /* first key has uncompressed real value */
		GET_CMPC(newblk1_mid_cmpc, oldblk1_last_key, newblk1_last_key);
		piece_len = rec_size - newblk1_mid_cmpc;
		if (level == 0) /* data block */
		{
			if (old_blk1_sz + piece_len > d_max_fill ) /* cannot fit even one record */
				return cdb_sc_oprnotneeded;
		}
		else /* else an index block */
		{
			if (old_blk1_sz + oldblk1_last_keylen + BSTAR_REC_SIZE > i_max_fill ) /* cannot fit even one record */
				return cdb_sc_oprnotneeded;
			if (old_blk1_sz + oldblk1_last_keylen + piece_len + BSTAR_REC_SIZE > i_max_fill )
					star_only_merge = TRUE; /* can fit only a *-record */
		}
		rec_base += rec_size;
	}

	/* new_blk2_first_rec_base and new_blk1_top is set with final value for star_only_merge  for index block */
	new_blk2_first_rec_base = new_blk1_top = rec_base;
	if (!star_only_merge)
	{
		BLK_ADDR(new_rec_hdr1, sizeof(rec_hdr), rec_hdr);
		new_rec_hdr1->rsiz = piece_len;
		new_rec_hdr1->cmpc = newblk1_mid_cmpc;
	}
	/* else only new_blk1_last_key will be appeneded in working block */


	/* find a piece of the right sibling to be copied into the working block.
	Note: rec_base points to 2nd record of old rtsib */
	if (0 == level) /* if data block */
	{
		complete_merge = TRUE;
		while (rec_base < old_blk2_base + old_blk2_sz)
		{
			GET_RSIZ(rec_size, rec_base);
			if (old_blk1_sz + piece_len + rec_size > d_max_fill )
			{
				complete_merge = FALSE;
				break;
			}
			READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status);
			if (cdb_sc_normal != status)
			{
				assert(t_tries < CDB_STAGNATE);;
				return cdb_sc_blkmod;
			}
			newblk1_last_keysz = newblk1_last_keylen + newblk1_last_cmpc;
			rec_base += rec_size;
			piece_len += rec_size;
		}/* end of "while" loop */
		new_blk1_top = new_blk2_first_rec_base = rec_base;
	}
	else /* index block */
	{
		if (!star_only_merge)
		{
			/* we know we can fit more record in working block and rtsibling has more records */
			complete_merge = TRUE;
			while (rec_base < old_blk2_base + old_blk2_sz)
			{
				GET_RSIZ(rec_size, rec_base);
				if (BSTAR_REC_SIZE == rec_size)
				{
					rec_base += rec_size;
					piece_len += rec_size;
					break; /* already we know we can fit this *-record in working block */
				}
				READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size,
					newblk1_last_key, newblk1_last_keylen, status);
				if (cdb_sc_normal != status)
				{
					assert(t_tries < CDB_STAGNATE);;
					return cdb_sc_blkmod;
				}
				newblk1_last_keysz = newblk1_last_keylen + newblk1_last_cmpc;
				rec_base += rec_size;
				piece_len += rec_size;
				if (old_blk1_sz + oldblk1_last_keylen + piece_len + BSTAR_REC_SIZE > i_max_fill )
				{
					complete_merge = FALSE;
					break;
				}
			}/* end of "while" loop */
			new_blk1_top = new_blk2_first_rec_base = rec_base;
		} /* end else  *-only merge */
	} /* end else index block */




	if (!complete_merge)
	{
		/*
		Adjust new right sibling's buffer
		if new_rtsib_star_only == TRUE then
			new right sibling will have a *-key record only
		else
			new_blk2_remain = base pointer of buffer including 1st record but exclude rec_header and key
			new_blk2_first_keysz = size of new rtsib block's first key
		*/
		BLK_ADDR(newblk2_first_key, gv_cur_region->max_key_size + 1, unsigned char);
		READ_RECORD(level, new_blk2_first_rec_base, tkeycmpc, rec_size,
			newblk2_first_key, newblk2_first_keylen, status);
		if (cdb_sc_starrecord == status) /* new rtsib will have a *-record only */
			new_rtsib_star_only = TRUE;
		else if (cdb_sc_normal != status)
		{
			assert(t_tries < CDB_STAGNATE);;
			return cdb_sc_blkmod;
		}
		else
		{
			memcpy(newblk2_first_key, newblk1_last_key, tkeycmpc); /* copy the compressed piece */
			newblk2_first_keysz = newblk2_first_keylen + tkeycmpc;
			new_blk2_remain = new_blk2_first_rec_base + sizeof(rec_hdr) + newblk2_first_keylen;
			BLK_ADDR(new_rec_hdr2, sizeof(rec_hdr), rec_hdr);
			new_rec_hdr2->rsiz = rec_size + tkeycmpc;
			new_rec_hdr2->cmpc = 0;
		}
	}
コード例 #7
0
ファイル: bm_getfree.c プロジェクト: 5HT/mumps
block_id bm_getfree(block_id orig_hint, boolean_t *blk_used, unsigned int cw_work, cw_set_element *cs, int *cw_depth_ptr)
{
	cw_set_element	*cs1;
	sm_uc_ptr_t	bmp;
	block_id	bml, hint, hint_cycled, hint_limit;
	block_id_ptr_t	b_ptr;
	int		cw_set_top, depth, lcnt;
	unsigned int	local_maps, map_size, n_decrements = 0, total_blks;
	trans_num	ctn;
	int4		free_bit, offset;
	uint4		space_needed;
	uint4		status;
	srch_blk_status	blkhist;

	total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks;
	if (orig_hint >= total_blks)		/* for TP, hint can be > total_blks */
		orig_hint = 1;
	hint = orig_hint;
	hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	hint_limit = DIVIDE_ROUND_DOWN(orig_hint, BLKS_PER_LMAP);
	local_maps = hint_cycled + 2;	/* for (up to) 2 wraps */
	for (lcnt = 0; lcnt <= local_maps; lcnt++)
	{
		bml = bmm_find_free(hint / BLKS_PER_LMAP, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps);
		if ((NO_FREE_SPACE == bml) || (bml >= hint_cycled))
		{	/* if no free space or might have looped to original map, extend */
			if ((NO_FREE_SPACE != bml) && (hint_limit < hint_cycled))
			{
				hint_cycled = hint_limit;
				hint = 1;
				continue;
			}
			if (SS_NORMAL != (status = gdsfilext(cs_data->extension_size, total_blks)))
				return (status);
			if (dba_mm == cs_data->acc_meth)
				return (FILE_EXTENDED);
			hint = total_blks;
			total_blks = cs_addrs->ti->total_blks;
			hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
			local_maps = hint_cycled + 2;	/* for (up to) 2 wraps */
			/*
			 * note that you can make an optimization of not going back over the whole database and going over
			 * only the extended section. but since it is very unlikely that a free block won't be found
			 * in the extended section and the fact that we are starting from the extended section in either
			 * approach and the fact that we have a GTMASSERT to check that we don't have a lot of
			 * free blocks while doing an extend and the fact that it is very easy to make the change to do
			 * a full-pass, the full-pass solution is currently being implemented
			 */
			lcnt = -1;	/* allow it one extra pass to ensure that it can take advantage of the entension */
			n_decrements++;	/* used only for debugging purposes */
			continue;
		}
		bml *= BLKS_PER_LMAP;
		if (ROUND_DOWN2(hint, BLKS_PER_LMAP) != bml)
		{	/* not within requested map */
			if ((bml < hint) && (hint_cycled))	/* wrap? - second one should force an extend for sure */
				hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0;
			hint = bml + 1;				/* start at beginning */
		}
		if (ROUND_DOWN2(total_blks, BLKS_PER_LMAP) == bml)
			map_size = (total_blks - bml);
		else
			map_size = BLKS_PER_LMAP;
		if (0 != dollar_tlevel)
		{
			depth = cw_work;
			cw_set_top = *cw_depth_ptr;
			if (depth < cw_set_top)
				tp_get_cw(cs, cw_work, &cs1);
			for (; depth < cw_set_top;  depth++, cs1 = cs1->next_cw_set)
			{	/* do tp front to back because list is more efficient than tp_get_cw and forward pointers exist */
				if (bml == cs1->blk)
				{
					TRAVERSE_TO_LATEST_CSE(cs1);
					break;
				}
			}
			if (depth >= cw_set_top)
			{
				assert(cw_set_top == depth);
				depth = 0;
			}
		} else
		{
			for (depth = *cw_depth_ptr - 1; depth >= cw_work;  depth--)
			{	/* do non-tp back to front, because of adjacency */
				if (bml == (cs + depth)->blk)
				{
					cs1 = cs + depth;
					break;
				}
			}
			if (depth < cw_work)
			{
				assert(cw_work - 1 == depth);
				depth = 0;
			}
		}
		if (0 == depth)
		{
			ctn = cs_addrs->ti->curr_tn;
			if (!(bmp = t_qread(bml, (sm_int_ptr_t)&blkhist.cycle, &blkhist.cr)))
				return MAP_RD_FAIL;
			if ((BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl))
			{
				assert(CDB_STAGNATE > t_tries);
				rdfail_detail = cdb_sc_badbitmap;
				return MAP_RD_FAIL;
			}
			offset = 0;
		} else
		{
			bmp = cs1->old_block;
			b_ptr = (block_id_ptr_t)(cs1->upd_addr);
			b_ptr += cs1->reference_cnt - 1;
			offset = *b_ptr + 1;
		}
		if (offset < map_size)
		{
			free_bit = bm_find_blk(offset, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), map_size, blk_used);
			if (MAP_RD_FAIL == free_bit)
				return MAP_RD_FAIL;
		} else
			free_bit = NO_FREE_SPACE;
		if (NO_FREE_SPACE != free_bit)
			break;
		if ((hint = bml + BLKS_PER_LMAP) >= total_blks)		/* if map is full, start at 1st blk in next map */
		{	/* wrap - second one should force an extend for sure */
			hint = 1;
			if (hint_cycled)
				hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0;
		}
		if ((0 == depth) && (FALSE != cs_addrs->now_crit))	/* if it's from the cw_set, its state is murky */
			bit_clear(bml / BLKS_PER_LMAP, MM_ADDR(cs_data));	/* if crit, repair master map error */
	}
	/* If not in the final retry, it is possible that free_bit is >= map_size (e.g. if bitmap block gets recycled). */
	if (map_size <= (uint4)free_bit && CDB_STAGNATE <= t_tries)
	{	/* bad free bit */
		assert((NO_FREE_SPACE == free_bit) && (lcnt > local_maps));	/* All maps full, should have extended */
		GTMASSERT;
	}
	if (0 != depth)
	{
		b_ptr = (block_id_ptr_t)(cs1->upd_addr);
		b_ptr += cs1->reference_cnt++;
		*b_ptr = free_bit;
	} else
	{
		space_needed = (BLKS_PER_LMAP + 1) * sizeof(block_id);
		if (dollar_tlevel)
		{
			ENSURE_UPDATE_ARRAY_SPACE(space_needed);	/* have brackets for "if" for macros */
		}
		BLK_ADDR(b_ptr, space_needed, block_id);
		memset(b_ptr, 0, space_needed);
		*b_ptr = free_bit;
		blkhist.blk_num = bml;
		blkhist.buffaddr = bmp;	/* cycle and cr have already been assigned from t_qread */
		t_write_map(&blkhist, (uchar_ptr_t)b_ptr, ctn, 1); /* last parameter 1 is what cs->reference_cnt gets set to */
	}
	return bml + free_bit;
}
コード例 #8
0
void	mu_reorg_upgrd_dwngrd(void)
{
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	block_id		*blkid_ptr, curblk, curbmp, start_blk, stop_blk, start_bmp, last_bmp;
	block_id		startblk_input, stopblk_input;
	boolean_t		upgrade, downgrade, safejnl, nosafejnl, region, first_reorg_in_this_db_fmt, reorg_entiredb;
	boolean_t		startblk_specified, stopblk_specified, set_fully_upgraded, db_got_to_v5_once, mark_blk_free;
	cache_rec_ptr_t		cr;
	char			*bml_lcl_buff = NULL, *command, *reorg_command;
	sm_uc_ptr_t		bptr = NULL;
	cw_set_element		*cse;
	enum cdb_sc		cdb_status;
	enum db_ver		new_db_format, ondsk_blkver;
	gd_region		*reg;
	int			cycle;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	int4			blocks_left, expected_blks2upgrd, actual_blks2upgrd, total_blks, free_blks;
	int4			status, status1, mapsize, lcnt, bml_status;
	reorg_stats_t		reorg_stats;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	sm_uc_ptr_t		blkBase, bml_sm_buff;	/* shared memory pointer to the bitmap global buffer */
	srch_hist		alt_hist;
	srch_blk_status		*blkhist, bmlhist;
	tp_region		*rptr;
	trans_num		curr_tn;
	unsigned char    	save_cw_set_depth;
	uint4			lcl_update_trans;

	region    = (CLI_PRESENT == cli_present("REGION"));
	upgrade   = (CLI_PRESENT == cli_present("UPGRADE"));
	downgrade = (CLI_PRESENT == cli_present("DOWNGRADE"));
	assert(upgrade && !downgrade || !upgrade && downgrade);
	command = upgrade ? "UPGRADE" : "DOWNGRADE";
	reorg_command = upgrade ? "MUPIP REORG UPGRADE" : "MUPIP REORG DOWNGRADE";
	reorg_entiredb = TRUE;	/* unless STARTBLK or STOPBLK is specified we are going to {up,down}grade the entire database */
	startblk_specified = FALSE;
	assert(SIZEOF(block_id) == SIZEOF(uint4));
	if ((CLI_PRESENT == cli_present("STARTBLK")) && (cli_get_hex("STARTBLK", (uint4 *)&startblk_input)))
	{
		reorg_entiredb = FALSE;
		startblk_specified = TRUE;
	}
	stopblk_specified = FALSE;
	assert(SIZEOF(block_id) == SIZEOF(uint4));
	if ((CLI_PRESENT == cli_present("STOPBLK")) && (cli_get_hex("STOPBLK", (uint4 *)&stopblk_input)))
	{
		reorg_entiredb = FALSE;
		stopblk_specified = TRUE;
	}
	mu_reorg_upgrd_dwngrd_in_prog = TRUE;
	mu_reorg_nosafejnl = (CLI_NEGATED == cli_present("SAFEJNL")) ? TRUE : FALSE;

	assert(region);
	status = SS_NORMAL;
	error_mupip = FALSE;
	gvinit();	/* initialize gd_header (needed by the later call to mu_getlst) */
	mu_getlst("REG_NAME", SIZEOF(tp_region));	/* get the parameter corresponding to REGION qualifier */
	if (error_mupip)
	{
		util_out_print("!/MUPIP REORG !AD cannot proceed with above errors!/", TRUE, LEN_AND_STR(command));
		mupip_exit(ERR_MUNOACTION);
	}
	assert(DBKEYSIZE(MAX_KEY_SZ) == gv_keysize);	/* no need to invoke GVKEYSIZE_INIT_IF_NEEDED macro */
	gv_target = targ_alloc(gv_keysize, NULL, NULL);	/* t_begin needs this initialized */
	gv_target_list = NULL;
	memset(&alt_hist, 0, SIZEOF(alt_hist));	/* null-initialize history */
	blkhist = &alt_hist.h[0];
	for (rptr = grlist;  NULL != rptr;  rptr = rptr->fPtr)
	{
		if (mu_ctrly_occurred || mu_ctrlc_occurred)
			break;
		reg = rptr->reg;
		util_out_print("!/Region !AD : MUPIP REORG !AD started", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
		if (reg_cmcheck(reg))
		{
			util_out_print("Region !AD : MUPIP REORG !AD cannot run across network",
				TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = ERR_MUNOFINISH;
			continue;
		}
		mu_reorg_process = TRUE;	/* gvcst_init will use this value to use gtm_poollimit settings. */
		gvcst_init(reg);
		mu_reorg_process = FALSE;
		assert(update_array != NULL);
		/* access method stored in global directory and database file header might be different in which case
		 * the database setting prevails. therefore, the access method check can be done only after opening
		 * the database (i.e. after the gvcst_init)
		 */
		if (dba_bg != REG_ACC_METH(reg))
		{
			util_out_print("Region !AD : MUPIP REORG !AD cannot continue as access method is not BG",
				TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = ERR_MUNOFINISH;
			continue;
		}
		/* The mu_getlst call above uses insert_region to create the grlist, which ensures that duplicate regions mapping to
		 * the same db file correspond to only one grlist entry.
		 */
		assert(FALSE == reg->was_open);
		TP_CHANGE_REG(reg);	/* sets gv_cur_region, cs_addrs, cs_data */
		csa = cs_addrs;
		csd = cs_data;
		blk_size = csd->blk_size;	/* "blk_size" is used by the BLK_FINI macro */
		if (reg->read_only)
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg));
			status = ERR_MUNOFINISH;
			continue;
		}
		assert(GDSVCURR == GDSV6); /* so we trip this assert in case GDSVCURR changes without a change to this module */
		new_db_format = (upgrade ? GDSV6 : GDSV4);
		grab_crit(reg);
		curr_tn = csd->trans_hist.curr_tn;
		/* set the desired db format in the file header to the appropriate version, increment transaction number */
		status1 = desired_db_format_set(reg, new_db_format, reorg_command);
		assert(csa->now_crit);	/* desired_db_format_set() should not have released crit */
		first_reorg_in_this_db_fmt = TRUE;	/* with the current desired_db_format, this is the first reorg */
		if (SS_NORMAL != status1)
		{	/* "desired_db_format_set" would have printed appropriate error messages */
			if (ERR_MUNOACTION != status1)
			{	/* real error occurred while setting the db format. skip to next region */
				status = ERR_MUNOFINISH;
				rel_crit(reg);
				continue;
			}
			util_out_print("Region !AD : Desired DB Format remains at !AD after !AD", TRUE, REG_LEN_STR(reg),
				LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command));
			if (csd->reorg_db_fmt_start_tn == csd->desired_db_format_tn)
				first_reorg_in_this_db_fmt = FALSE;
		} else
			util_out_print("Region !AD : Desired DB Format set to !AD by !AD", TRUE, REG_LEN_STR(reg),
				LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command));
		assert(dba_bg == csd->acc_meth);
		/* Check blks_to_upgrd counter to see if upgrade/downgrade is complete */
		total_blks = csd->trans_hist.total_blks;
		free_blks = csd->trans_hist.free_blocks;
		actual_blks2upgrd = csd->blks_to_upgrd;
		/* If MUPIP REORG UPGRADE and there is no block to upgrade in the database as indicated by BOTH
		 * 	"csd->blks_to_upgrd" and "csd->fully_upgraded", then we can skip processing.
		 * If MUPIP REORG UPGRADE and all non-free blocks need to be upgraded then again we can skip processing.
		 */
		if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded)
			|| (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd)))
		{
			util_out_print("Region !AD : Blocks to Upgrade counter indicates no action needed for MUPIP REORG !AD",
				       TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : "
				       "Blocks to upgrade = [0x!XL]",
				       TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd);
			util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			rel_crit(reg);
			continue;
		}
		stop_blk = total_blks;
		if (stopblk_specified && stopblk_input <= stop_blk)
			stop_blk = stopblk_input;
		if (first_reorg_in_this_db_fmt)
		{	/* Note down reorg start tn (in case we are interrupted, future reorg will know to resume) */
			csd->reorg_db_fmt_start_tn = csd->desired_db_format_tn;
			csd->reorg_upgrd_dwngrd_restart_block = 0;
			start_blk = (startblk_specified ? startblk_input : 0);
		} else
		{	/* Either a concurrent MUPIP REORG of the same type ({up,down}grade) is currently running
			 * or a previously running REORG of the same type was interrupted (Ctrl-Ced).
			 * In either case resume processing from whatever restart block number is stored in fileheader
			 * the only exception is if "STARTBLK" was specified in the input in which use that unconditionally.
			 */
			start_blk = (startblk_specified ? startblk_input : csd->reorg_upgrd_dwngrd_restart_block);
		}
		if (start_blk > stop_blk)
			start_blk = stop_blk;
		mu_reorg_upgrd_dwngrd_start_tn = csd->reorg_db_fmt_start_tn;
		/* Before releasing crit, flush the file-header and dirty buffers in cache to disk. This is because we are now
		 * going to read each GDS block directly from disk to determine if it needs to be upgraded/downgraded or not.
		 */
		if (!wcs_flu(WCSFLU_FLUSH_HDR))	/* wcs_flu assumes gv_cur_region is set (which it is in this routine) */
		{
			rel_crit(reg);
			gtm_putmsg_csa(CSA_ARG(csa)
				VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg));
			status = ERR_MUNOFINISH;
			continue;
		}
		rel_crit(reg);
		/* Loop through entire database one GDS block at a time and upgrade/downgrade each of them */
		status1 = SS_NORMAL;
		start_bmp = ROUND_DOWN2(start_blk, BLKS_PER_LMAP);
		last_bmp  = ROUND_DOWN2(stop_blk - 1, BLKS_PER_LMAP);
		curblk = start_blk;	/* curblk is the block to be upgraded/downgraded */
		util_out_print("Region !AD : Started processing from block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
		if (NULL != bptr)
		{	/* malloc/free "bptr" for each region as GDS block-size can be different */
			free(bptr);
			bptr = NULL;
		}
		memset(&reorg_stats, 0, SIZEOF(reorg_stats));	/* initialize statistics for this region */
		for (curbmp = start_bmp; curbmp <= last_bmp; curbmp += BLKS_PER_LMAP)
		{
			if (mu_ctrly_occurred || mu_ctrlc_occurred)
			{
				status1 = ERR_MUNOFINISH;
				break;
			}
			/* --------------------------------------------------------------
			 *             Read in current bitmap block
			 * --------------------------------------------------------------
			 */
			assert(!csa->now_crit);
			bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* bring block into the cache outside of crit */
			reorg_stats.blks_read_from_disk_bmp++;
			grab_crit_encr_cycle_sync(reg); /* needed so t_qread does not return NULL below */
			if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn)
			{	/* csd->desired_db_format changed since reorg started. discontinue the reorg */
				/* see later comment on "csd->reorg_upgrd_dwngrd_restart_block" for why the assignment
				 * of this field should be done only if a db format change did not occur.
				 */
				rel_crit(reg);
				status1 = ERR_MUNOFINISH;
				/* This "start_tn" check is redone after the for-loop and an error message is printed there */
				break;
			} else if (reorg_entiredb)
			{	/* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */
				assert(csd->reorg_upgrd_dwngrd_restart_block <= MAX(start_blk, curbmp));
				csd->reorg_upgrd_dwngrd_restart_block = curbmp;	/* previous blocks have been upgraded/downgraded */
			}
			/* Check blks_to_upgrd counter to see if upgrade/downgrade is complete.
			 * Repeat check done a few steps earlier outside of this for loop.
			 */
			total_blks = csd->trans_hist.total_blks;
			free_blks = csd->trans_hist.free_blocks;
			actual_blks2upgrd = csd->blks_to_upgrd;
			if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded)
				|| (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd)))
			{
				rel_crit(reg);
				break;
			}
			bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* now that in crit, note down stable buffer */
			if (NULL == bml_sm_buff)
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			ondsk_blkver = cr->ondsk_blkver;	/* note down db fmt on disk for bitmap block */
			/* Take a copy of the shared memory bitmap buffer into process-private memory before releasing crit.
			 * We are interested in those blocks that are currently marked as USED in the bitmap.
			 * It is possible that once we release crit, concurrent updates change the bitmap state of those blocks.
			 * In that case, those updates will take care of doing the upgrade/downgrade of those blocks in the
			 * format currently set in csd->desired_db_format i.e. accomplishing MUPIP REORG UPGRADE/DOWNGRADE's job.
			 * If the desired_db_format changes concurrently, we will stop doing REORG UPGRADE/DOWNGRADE processing.
			 */
			if (NULL == bml_lcl_buff)
				bml_lcl_buff = malloc(BM_SIZE(BLKS_PER_LMAP));
			memcpy(bml_lcl_buff, (blk_hdr_ptr_t)bml_sm_buff, BM_SIZE(BLKS_PER_LMAP));
			if (FALSE == cert_blk(reg, curbmp, (blk_hdr_ptr_t)bml_lcl_buff, 0, FALSE))
			{	/* certify the block while holding crit as cert_blk uses fields from file-header (shared memory) */
				assert(FALSE);	/* in pro, skip ugprading/downgarding all blks in this unreliable local bitmap */
				rel_crit(reg);
				util_out_print("Region !AD : Bitmap Block [0x!XL] has integrity errors. Skipping this bitmap.",
					TRUE, REG_LEN_STR(reg), curbmp);
				status1 = ERR_MUNOFINISH;
				continue;
			}
			rel_crit(reg);
			/* ------------------------------------------------------------------------
			 *         Upgrade/Downgrade all BUSY blocks in the current bitmap
			 * ------------------------------------------------------------------------
			 */
			curblk = (curbmp == start_bmp) ? start_blk : curbmp;
			mapsize = (curbmp == last_bmp) ? (stop_blk - curbmp) : BLKS_PER_LMAP;
			assert(0 != mapsize);
			assert(mapsize <= BLKS_PER_LMAP);
			db_got_to_v5_once = csd->db_got_to_v5_once;
			for (lcnt = curblk - curbmp; lcnt < mapsize; lcnt++, curblk++)
			{
				if (mu_ctrly_occurred || mu_ctrlc_occurred)
				{
					status1 = ERR_MUNOFINISH;
					goto stop_reorg_on_this_reg;	/* goto needed because of nested FOR Loop */
				}
				GET_BM_STATUS(bml_lcl_buff, lcnt, bml_status);
				assert(BLK_MAPINVALID != bml_status); /* cert_blk ran clean so we dont expect invalid entries */
				if (BLK_FREE == bml_status)
				{
					reorg_stats.blks_skipped_free++;
					continue;
				}
				/* MUPIP REORG UPGRADE/DOWNGRADE will convert USED & RECYCLED blocks */
				if (db_got_to_v5_once || (BLK_RECYCLED != bml_status))
				{	/* Do NOT read recycled V4 block from disk unless it is guaranteed NOT to be too full */
					if (lcnt)
					{	/* non-bitmap block */
						/* read in block from disk into private buffer. dont pollute the cache yet */
						if (NULL == bptr)
							bptr = (sm_uc_ptr_t)malloc(blk_size);
						status1 = dsk_read(curblk, bptr, &ondsk_blkver, FALSE);
						/* dsk_read on curblk could return an error (DYNUPGRDFAIL) if curblk needs to be
						 * upgraded and if its block size was too big to allow the extra block-header space
						 * requirements for a dynamic upgrade. a MUPIP REORG DOWNGRADE should not error out
						 * in that case as the block is already in the downgraded format.
						 */
						if (SS_NORMAL != status1)
						{
							if (!upgrade && (ERR_DYNUPGRDFAIL == status1))
							{
								assert(GDSV4 == new_db_format);
								ondsk_blkver = new_db_format;
							} else
							{
								gtm_putmsg_csa(CSA_ARG(csa)
									VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status1);
								util_out_print("Region !AD : Error occurred while reading block "
									"[0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
								status1 = ERR_MUNOFINISH;
								goto stop_reorg_on_this_reg;/* goto needed due to nested FOR Loop */
							}
						}
						reorg_stats.blks_read_from_disk_nonbmp++;
					} /* else bitmap block has been read in crit earlier and ondsk_blkver appropriately set */
					if (new_db_format == ondsk_blkver)
					{
						assert((SS_NORMAL == status1) || (!upgrade && (ERR_DYNUPGRDFAIL == status1)));
						status1 = SS_NORMAL;	/* treat DYNUPGRDFAIL as no error in case of downgrade */
						reorg_stats.blks_skipped_newfmtindisk++;
						continue;	/* current disk version is identical to what is desired */
					}
					assert(SS_NORMAL == status1);
				}
				/* Begin non-TP transaction to upgrade/downgrade the block.
				 * The way we do that is by updating the block using a null update array.
				 * Any update to a block will trigger an automatic upgrade/downgrade of the block based on
				 * 	the current fileheader desired_db_format setting and we use that here.
				 */
				t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK);
				for (; ;)
				{
					CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
					curr_tn = csd->trans_hist.curr_tn;
					db_got_to_v5_once = csd->db_got_to_v5_once;
					if (db_got_to_v5_once || (BLK_RECYCLED != bml_status))
					{
						blkhist->cse = NULL;	/* start afresh (do not use value from previous retry) */
						blkBase = t_qread(curblk, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr);
						if (NULL == blkBase)
						{
							t_retry((enum cdb_sc)rdfail_detail);
							continue;
						}
						blkhist->blk_num = curblk;
						blkhist->buffaddr = blkBase;
						ondsk_blkver = blkhist->cr->ondsk_blkver;
						new_hdr = *(blk_hdr_ptr_t)blkBase;
						mu_reorg_upgrd_dwngrd_blktn = new_hdr.tn;
						mark_blk_free = FALSE;
						inctn_opcode = upgrade ? inctn_blkupgrd : inctn_blkdwngrd;
					} else
					{
						mark_blk_free = TRUE;
						inctn_opcode = inctn_blkmarkfree;
					}
					inctn_detail.blknum_struct.blknum = curblk;
					/* t_end assumes that the history it is passed does not contain a bitmap block.
					 * for bitmap block, the history validation information is passed through cse instead.
					 * therefore we need to handle bitmap and non-bitmap cases separately.
					 */
					if (!lcnt)
					{	/* Means a bitmap block.
						 * At this point we can do a "new_db_format != ondsk_blkver" check to determine
						 * if the block got converted since we did the dsk_read (see the non-bitmap case
						 * for a similar check done there), but in that case we will have a transaction
						 * which has read 1 bitmap block and is updating no block. "t_end" currently cannot
						 * handle this case as it expects any bitmap block that needs validation to also
						 * have a corresponding cse which will hold its history. Hence we avoid doing the
						 * new_db_format check. The only disadvantage of this is that we will end up
						 * modifying the bitmap block as part of this transaction (in an attempt to convert
						 * its ondsk_blkver) even though it is already in the right format. Since this
						 * overhead is going to be one per bitmap block and since the block is in the cache
						 * at this point, we should not lose much.
						 */
						assert(!mark_blk_free);
						BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id);
						*blkid_ptr = 0;
						t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0);
						assert(&alt_hist.h[0] == blkhist);
						alt_hist.h[0].blk_num = 0; /* create empty history for bitmap block */
						assert(update_trans);
					} else
					{	/* non-bitmap block. fill in history for validation in t_end */
						assert(curblk);	/* we should never come here for block 0 (bitmap) */
						if (!mark_blk_free)
						{
							assert(blkhist->blk_num == curblk);
							assert(blkhist->buffaddr == blkBase);
							blkhist->tn      = curr_tn;
							alt_hist.h[1].blk_num = 0;
						}
						/* Also need to pass the bitmap as history to detect if any concurrent M-kill
						 * is freeing up the same USED block that we are trying to convert OR if any
						 * concurrent M-set is reusing the same RECYCLED block that we are trying to
						 * convert. Because of t_end currently not being able to validate a bitmap
						 * without that simultaneously having a cse, we need to create a cse for the
						 * bitmap that is used only for bitmap history validation, but should not be
						 * used to update the contents of the bitmap block in bg_update.
						 */
						bmlhist.buffaddr = t_qread(curbmp, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr);
						if (NULL == bmlhist.buffaddr)
						{
							t_retry((enum cdb_sc)rdfail_detail);
							continue;
						}
						bmlhist.blk_num = curbmp;
						bmlhist.tn = curr_tn;
						GET_BM_STATUS(bmlhist.buffaddr, lcnt, bml_status);
						if (BLK_MAPINVALID == bml_status)
						{
							t_retry(cdb_sc_lostbmlcr);
							continue;
						}
						if (!mark_blk_free)
						{
							if ((new_db_format != ondsk_blkver) && (BLK_FREE != bml_status))
							{	/* block still needs to be converted. create cse */
								BLK_INIT(bs_ptr, bs1);
								BLK_SEG(bs_ptr, blkBase + SIZEOF(new_hdr),
									new_hdr.bsiz - SIZEOF(new_hdr));
								BLK_FINI(bs_ptr, bs1);
								t_write(blkhist, (unsigned char *)bs1, 0, 0,
									((blk_hdr_ptr_t)blkBase)->levl, FALSE,
									FALSE, GDS_WRITE_PLAIN);
								/* The directory tree status for now is only used to determine
								 * whether writing the block to snapshot file (see t_end_sysops.c).
 								 * For reorg upgrade/downgrade process, the block is updated in a
								 * sequential way without changing the gv_target. In this case, we
								 * assume the block is in directory tree so as to have it written to
								 * the snapshot file.
			 					 */
								BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state);
								/* reset update_trans in case previous retry had set it to 0 */
								update_trans = UPDTRNS_DB_UPDATED_MASK;
								if (BLK_RECYCLED == bml_status)
								{	/* If block that we are upgarding is RECYCLED, indicate to
									 * bg_update that blks_to_upgrd counter should NOT be
									 * touched in this case by setting "mode" to a special value
									 */
									assert(cw_set[cw_set_depth-1].mode == gds_t_write);
									cw_set[cw_set_depth-1].mode = gds_t_write_recycled;
									/* we SET block as NOT RECYCLED, otherwise, the mm_update()
									 * or bg_update_phase2 may skip writing it to snapshot file
									 * when its level is 0
									 */
									BIT_CLEAR_RECYCLED(cw_set[cw_set_depth-1].blk_prior_state);
								}
							} else
							{	/* Block got converted by another process since we did the dsk_read.
								 * 	or this block became marked free in the bitmap.
								 * No need to update this block. just call t_end for validation of
								 * 	both the non-bitmap block as well as the bitmap block.
								 * Note down that this transaction is no longer updating any blocks.
								 */
								update_trans = 0;
							}
							/* Need to put bit maps on the end of the cw set for concurrency checking.
							 * We want to simulate t_write_map, except we want to update "cw_map_depth"
							 * instead of "cw_set_depth". Hence the save and restore logic below.
							 * This part of the code is similar to the one in mu_swap_blk.c
							 */
							save_cw_set_depth = cw_set_depth;
							assert(!cw_map_depth);
							t_write_map(&bmlhist, NULL, curr_tn, 0); /* will increment cw_set_depth */
							cw_map_depth = cw_set_depth; /* set cw_map_depth to latest cw_set_depth */
							cw_set_depth = save_cw_set_depth;/* restore cw_set_depth */
							/* t_write_map simulation end */
						} else
						{
							if (BLK_RECYCLED != bml_status)
							{	/* Block was RECYCLED at beginning but no longer so. Retry */
								t_retry(cdb_sc_bmlmod);
								continue;
							}
							/* Mark recycled block as FREE in bitmap */
							assert(lcnt == (curblk - curbmp));
							assert(update_array_ptr == update_array);
							*((block_id *)update_array_ptr) = lcnt;
							update_array_ptr += SIZEOF(block_id);
							/* the following assumes SIZEOF(block_id) == SIZEOF(int) */
							assert(SIZEOF(block_id) == SIZEOF(int));
							*(int *)update_array_ptr = 0;
							t_write_map(&bmlhist, (unsigned char *)update_array, curr_tn, 0);
							update_trans = UPDTRNS_DB_UPDATED_MASK;
						}
					}
					assert(SIZEOF(lcl_update_trans) == SIZEOF(update_trans));
					lcl_update_trans = update_trans;	/* take a copy before t_end modifies it */
					if ((trans_num)0 != t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
					{	/* In case this is MM and t_end() remapped an extended database, reset csd */
						assert(csd == cs_data);
						if (!lcl_update_trans)
						{
							assert(lcnt);
							assert(!mark_blk_free);
							assert((new_db_format == ondsk_blkver) || (BLK_BUSY != bml_status));
							if (BLK_BUSY != bml_status)
								reorg_stats.blks_skipped_free++;
							else
								reorg_stats.blks_skipped_newfmtincache++;
						} else if (!lcnt)
							reorg_stats.blks_converted_bmp++;
						else
							reorg_stats.blks_converted_nonbmp++;
						break;
					}
					assert(csd == cs_data);
				}
			}
		}
	stop_reorg_on_this_reg:
		/* even though ctrl-c occurred, update file-header fields to store reorg's progress before exiting */
		grab_crit(reg);
		blocks_left = 0;
		assert(csd->trans_hist.total_blks >= csd->blks_to_upgrd);
		actual_blks2upgrd = csd->blks_to_upgrd;
		total_blks = csd->trans_hist.total_blks;
		free_blks = csd->trans_hist.free_blocks;
		/* Care should be taken not to set "csd->reorg_upgrd_dwngrd_restart_block" in case of a concurrent db fmt
		 * change. This is because let us say we are doing REORG UPGRADE. A concurrent REORG DOWNGRADE would
		 * have reset "csd->reorg_upgrd_dwngrd_restart_block" field to 0 and if that reorg is interrupted by a
		 * Ctrl-C (before this reorg came here) it would have updated "csd->reorg_upgrd_dwngrd_restart_block" to
		 * a non-zero value indicating how many blocks from 0 have been downgraded. We should not reset this
		 * field to "curblk" as it will be mis-interpreted as the number of blocks that have been DOWNgraded.
		 */
		set_fully_upgraded = FALSE;
		if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn)
		{	/* csd->desired_db_format changed since reorg started. discontinue the reorg */
			util_out_print("Region !AD : Desired DB Format changed during REORG. Stopping REORG.",
				TRUE, REG_LEN_STR(reg));
			status1 = ERR_MUNOFINISH;
		} else if (reorg_entiredb)
		{	/* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */
			assert(csd->reorg_upgrd_dwngrd_restart_block <= curblk);
			csd->reorg_upgrd_dwngrd_restart_block = curblk;	/* blocks lesser than this have been upgraded/downgraded */
			expected_blks2upgrd = upgrade ? 0 : (total_blks - free_blks);
			blocks_left = upgrade ? actual_blks2upgrd : (expected_blks2upgrd - actual_blks2upgrd);
			/* If this reorg command went through all blocks in the database, then it should have
			 * 	correctly concluded at this point whether the reorg is complete or not.
			 * If this reorg command started from where a previous incomplete reorg left
			 *	(i.e. first_reorg_in_this_db_fmt is FALSE), it cannot determine if the initial
			 *	GDS blocks that it skipped are completely {up,down}graded or not.
			 */
			assert((0 == blocks_left) || (SS_NORMAL != status1) || !first_reorg_in_this_db_fmt);
			/* If this is a MUPIP REORG UPGRADE that did go through every block in the database (indicated by
			 * "reorg_entiredb" && "first_reorg_in_this_db_fmt") and the current count of "blks_to_upgrd" is
			 * 0 in the file-header and the desired_db_format did not change since the start of the REORG,
			 * we can be sure that the entire database has been upgraded. Set "csd->fully_upgraded" to TRUE.
			 */
			if ((SS_NORMAL == status1) && first_reorg_in_this_db_fmt && upgrade && (0 == actual_blks2upgrd))
			{
				csd->fully_upgraded = TRUE;
				csd->db_got_to_v5_once = TRUE;
				set_fully_upgraded = TRUE;
			}
			/* flush all changes noted down in the file-header */
			if (!wcs_flu(WCSFLU_FLUSH_HDR))	/* wcs_flu assumes gv_cur_region is set (which it is in this routine) */
			{
				gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4,
					LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg));
				status = ERR_MUNOFINISH;
				rel_crit(reg);
				continue;
			}
		}
		curr_tn = csd->trans_hist.curr_tn;
		rel_crit(reg);
		util_out_print("Region !AD : Stopped processing at block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk);
		/* Print statistics */
		util_out_print("Region !AD : Statistics : Blocks Read From Disk (Bitmap)     : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_bmp);
		util_out_print("Region !AD : Statistics : Blocks Skipped (Free)              : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_free);
		util_out_print("Region !AD : Statistics : Blocks Read From Disk (Non-Bitmap) : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_nonbmp);
		util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in disk)   : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtindisk);
		util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in cache)  : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtincache);
		util_out_print("Region !AD : Statistics : Blocks Converted (Bitmap)          : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_bmp);
		util_out_print("Region !AD : Statistics : Blocks Converted (Non-Bitmap)      : 0x!XL",
			TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_nonbmp);
		if (reorg_entiredb && (SS_NORMAL == status1) && (0 != blocks_left))
		{	/* file-header counter does not match what reorg on the entire database expected to see */
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBBTUWRNG, 2, expected_blks2upgrd, actual_blks2upgrd);
			util_out_print("Region !AD : Run MUPIP INTEG (without FAST qualifier) to fix the counter",
				TRUE, REG_LEN_STR(reg));
			status1 = ERR_MUNOFINISH;
		} else
			util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : "
				       "Blocks to upgrade = [0x!XL]",
				       TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd);
		/* Issue success or failure message for this region */
		if (SS_NORMAL == status1)
		{	/* issue success only if REORG did not encounter any error in its processing */
			if (set_fully_upgraded)
				util_out_print("Region !AD : Database is now FULLY UPGRADED", TRUE, REG_LEN_STR(reg));
			util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUREUPDWNGRDEND, 5, REG_LEN_STR(reg),
										process_id, process_id, &curr_tn);
		} else
		{
			assert(ERR_MUNOFINISH == status1);
			assert((SS_NORMAL == status) || (ERR_MUNOFINISH == status));
			util_out_print("Region !AD : MUPIP REORG !AD incomplete. See above messages.!/",
					TRUE, REG_LEN_STR(reg), LEN_AND_STR(command));
			status = status1;
		}
	}
	if (NULL != bptr)
		free(bptr);
	if (NULL != bml_lcl_buff)
		free(bml_lcl_buff);
	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REORGCTRLY);
		status = ERR_MUNOFINISH;
	}
	mupip_exit(status);
}
コード例 #9
0
ファイル: f1g_queue.c プロジェクト: jos1290/f1goal
i8_p que_obj_next_freeblk(que_obj_p p_obj)
{
	return BLK_ADDR(p_obj->tail, p_obj);
}
コード例 #10
0
ファイル: dse_chng_bhead.c プロジェクト: h4ck3rm1k3/FIS-GT.M
void dse_chng_bhead(void)
{
	block_id	blk;
	block_id	*blkid_ptr;
	sgm_info	*dummysi = NULL;
	int4		x;
	cache_rec_ptr_t	cr;
	uchar_ptr_t	bp;
	sm_uc_ptr_t	blkBase;
	blk_hdr		new_hdr;
	blk_segment	*bs1, *bs_ptr;
	cw_set_element  *cse;
	int4		blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	bool		ismap;
	bool		chng_blk;
	uint4		mapsize;
	uint4           jnl_status;

	error_def(ERR_DSEBLKRDFAIL);
	error_def(ERR_DSEFAIL);
	error_def(ERR_DBRDONLY);

        if (gv_cur_region->read_only)
                rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	assert(update_array);
	/* reset new block mechanism */
	update_array_ptr = update_array;
	chng_blk = FALSE;
	if (cli_present("BLOCK") == CLI_PRESENT)
	{
		if (!cli_get_hex("BLOCK",&blk))
			return;
		if (blk < 0 || blk > cs_addrs->ti->total_blks)
		{	util_out_print("Error: invalid block number.",TRUE);
			return;
		}
		patch_curr_blk = blk;
	}
	blk_size = cs_addrs->hdr->blk_size;
	ismap = (patch_curr_blk / cs_addrs->hdr->bplmap * cs_addrs->hdr->bplmap == patch_curr_blk);
	mapsize = BM_SIZE(cs_addrs->hdr->bplmap);

	t_begin_crit (ERR_DSEFAIL);
	if (!(bp = t_qread (patch_curr_blk,&dummy_hist.h[0].cycle,&dummy_hist.h[0].cr)))
		rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	new_hdr = *(blk_hdr_ptr_t)bp;

	if (cli_present("LEVEL") == CLI_PRESENT)
	{
		if (!cli_get_num("LEVEL",&x))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (ismap && (unsigned char)x != LCL_MAP_LEVL)
		{
			util_out_print("Error: invalid level for a bit map block.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1))
		{
			util_out_print("Error: invalid level.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
	 	new_hdr.levl = (unsigned char)x;

		chng_blk = TRUE;
		if (new_hdr.bsiz < sizeof(blk_hdr))
			new_hdr.bsiz = sizeof(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
	}
	if (cli_present("BSIZ") == CLI_PRESENT)
	{
		if (!cli_get_hex("BSIZ",&x))
		{
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		if (ismap && x != mapsize)
		{
			util_out_print("Error: invalid bsiz.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		else if (x < sizeof(blk_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid bsiz.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		chng_blk = TRUE;
		new_hdr.bsiz = x;
	}
	if (!chng_blk)
		t_abort(gv_cur_region, cs_addrs);
	else
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			util_out_print("Error: bad block build.",TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		t_write (patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, new_hdr.levl, TRUE, FALSE);
		BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse);
		t_end(&dummy_hist, 0);
	}
	if (cli_present("TN") == CLI_PRESENT)
	{
		if (!cli_get_hex("TN",&x))
			return;
		t_begin_crit(ERR_DSEFAIL);
		assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn);
		cs_addrs->ti->early_tn++;
		blkBase = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr);
		if (NULL == blkBase)
		{
			rel_crit(gv_cur_region);
			util_out_print("Error: Unable to read buffer.", TRUE);
			t_abort(gv_cur_region, cs_addrs);
			return;
		}
		/* Create a null update array for a block */
		if (ismap)
		{
			BLK_ADDR(blkid_ptr, sizeof(block_id), block_id);
			*blkid_ptr = 0;
			t_write_map(patch_curr_blk, blkBase, (unsigned char *)blkid_ptr, cs_addrs->ti->curr_tn);
			cr_array_index = 0;
			block_saved = FALSE;
		} else
		{
			BLK_INIT(bs_ptr, bs1);
			BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr));
			BLK_FINI(bs_ptr, bs1);
			t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, blkBase,
						((blk_hdr_ptr_t)blkBase)->levl, TRUE, FALSE);
			cr_array_index = 0;
			block_saved = FALSE;
			if (JNL_ENABLED(cs_data))
			{
				JNL_SHORT_TIME(jgbl.gbl_jrec_time);	/* needed for jnl_put_jrt_pini() and jnl_write_aimg_rec() */
				jnl_status = jnl_ensure_open();
				if (0 == jnl_status)
				{
					cse = (cw_set_element *)(&cw_set[0]);
					cse->new_buff = non_tp_jfb_buff_ptr;
					gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, x);
					cse->done = TRUE;
					if (0 == cs_addrs->jnl->pini_addr)
						jnl_put_jrt_pini(cs_addrs);
					jnl_write_aimg_rec(cs_addrs, cse->blk, (blk_hdr_ptr_t)cse->new_buff);
				} else
					rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			}
		}
		/* Pass the desired tn "x" as argument to bg_update or mm_update */
		if (dba_bg == cs_addrs->hdr->acc_meth)
			bg_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi);
		else
			mm_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi);
		cs_addrs->ti->curr_tn++;
		assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn);
		/* the following code is analogous to that in t_end and should be maintained in a similar fashion */
		while (cr_array_index)
			cr_array[--cr_array_index]->in_cw_set = FALSE;
		rel_crit(gv_cur_region);
		if (block_saved)
			backup_buffer_flush(gv_cur_region);
		UNIX_ONLY(
			if (unhandled_stale_timer_pop)
				process_deferred_stale();
		)
		wcs_timer_start(gv_cur_region, TRUE);
	}