i8_p que_obj_tail(que_obj_p p_obj) { pos_t tail = p_obj->tail - 1; if (!que_obj_empty(p_obj)) { if (tail < 0) { tail = p_obj->blk_num - 1; } return BLK_ADDR(tail, p_obj); } else { return NULL; } }
i8_t que_obj_pop(que_obj_p p_obj) { elem_p p_elem = NULL; if (p_obj->front != p_obj->tail) { // not empty p_elem = (elem_p)BLK_ADDR(p_obj->front, p_obj); p_elem->data_len = 0; p_elem->status = 0x00; p_obj->front = (p_obj->front+1)%(p_obj->blk_num); } return F1G_OK; }
/*********************************************************************************************** Input Parameters: cur_level: Working block's level d_max_fill: Database fill factor i_max_fill: Index fill factor Output Parameters: blks_created: how many new blocks are created lvls_increased : How much level is increased Input/Output Parameters: gv_target: History of working block Here it is assumed that i_max_fill or, d_max_fill is strictly less than block size. Returns: cdb_sc_normal: if successful cdb_sc status otherwise ************************************************************************************************/ enum cdb_sc mu_split(int cur_level, int i_max_fill, int d_max_fill, int *blks_created, int *lvls_increased) { boolean_t first_copy, new_rtblk_star_only, create_root = FALSE, split_required, insert_in_left; unsigned char curr_prev_key[MAX_KEY_SZ+1], new_blk1_last_key[MAX_KEY_SZ+1]; unsigned short temp_ushort; int rec_size, new_ins_keycmpc, tkeycmpc, new_ances_currkeycmpc, old_ances_currkeycmpc; int tmp_cmpc; block_index left_index, right_index; block_offset ins_off, ins_off2; int level; int new_ins_keysz, new_ances_currkeysz, new_blk1_last_keysz, newblk2_first_keysz, next_gv_currkeysz; int old_ances_currkeylen, new_ins_keylen, new_ances_currkeylen, tkeylen, newblk2_first_keylen; int old_blk1_last_rec_size, old_blk1_sz, save_blk_piece_len, old_right_piece_len; int delta, max_fill; enum cdb_sc status; int blk_seg_cnt, blk_size, new_leftblk_top_off; block_id allocation_clue; sm_uc_ptr_t rPtr1, rPtr2, rec_base, key_base, next_gv_currkey, bn_ptr1, bn_ptr2, save_blk_piece, old_blk_after_currec, ances_currkey, old_blk1_base, new_blk1_top, new_blk2_top, new_blk2_frec_base, new_blk2_rem, newblk2_first_key, new_ins_key; blk_segment *bs_ptr1, *bs_ptr2; cw_set_element *cse; rec_hdr_ptr_t star_rec_hdr, new_rec_hdr1a, new_rec_hdr1b, new_rec_hdr2, root_hdr; blk_hdr_ptr_t blk_hdr_ptr; blk_size = cs_data->blk_size; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr); star_rec_hdr->rsiz = BSTAR_REC_SIZE; SET_CMPC(star_rec_hdr, 0); level = cur_level; max_fill = (0 == level)? d_max_fill : i_max_fill; /* ------------------- * Split working block. * ------------------- * new_blk1_last_key = last key of the new working block after split * new_blk1_last_keysz = size of new_blk1_last_key * old_blk1_last_rec_size = last record size of the new working block after split (for old block) * new_blk2_frec_base = base of first record of right block created after split * newblk2_first_key = first key of new block created after split * newblk2_first_keysz = size of newblk2_first_key * new_blk2_rem = pointer to new block to be created after split exclude 1st record header + key */ blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr); old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr; old_blk1_sz = blk_hdr_ptr->bsiz; new_blk2_top = old_blk1_base + old_blk1_sz; if (cdb_sc_normal != (status = locate_block_split_point (old_blk1_base, level, old_blk1_sz, max_fill, &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz) /* Avoid split to create a small right sibling. Note this should not happen often when tolerance is high */ return cdb_sc_oprnotneeded; old_right_piece_len = old_blk1_sz - new_leftblk_top_off; new_blk2_frec_base = old_blk1_base + new_leftblk_top_off; BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char); READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_normal != status) /* restart for cdb_sc_starrecord too, because we eliminated the possibility already */ { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* copy the compressed key piece */ new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen; newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; /* gv_currkey_next_reorg will be saved for next iteration in mu_reorg */ next_gv_currkey = newblk2_first_key; next_gv_currkeysz = newblk2_first_keysz; BLK_ADDR(new_rec_hdr1b, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr1b->rsiz = rec_size + tkeycmpc; SET_CMPC(new_rec_hdr1b, 0); /* Create new split piece, we already know that this will not be *-rec only. * Note that this has to be done BEFORE modifying working block as building this buffer relies on the * working block to be pinned which is possible only if this cw-set-element is created ahead of that * of the working block (since order in which blocks are built is the order in which cses are created). */ BLK_INIT(bs_ptr2, bs_ptr1); BLK_SEG(bs_ptr2, (sm_uc_ptr_t)new_rec_hdr1b, SIZEOF(rec_hdr)); BLK_SEG(bs_ptr2, newblk2_first_key, newblk2_first_keysz); BLK_SEG(bs_ptr2, new_blk2_rem, new_blk2_top - new_blk2_rem); if (!BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } allocation_clue = ALLOCATION_CLUE(cs_data->trans_hist.total_blks); right_index = t_create(allocation_clue++, (unsigned char *)bs_ptr1, 0, 0, level); (*blks_created)++; /* Modify working block removing split piece */ BLK_INIT(bs_ptr2, bs_ptr1); if (0 == level) { BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr)); } else { BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr) - old_blk1_last_rec_size); BLK_SEG(bs_ptr2, (sm_uc_ptr_t)star_rec_hdr, SIZEOF(rec_hdr) ); BLK_ADDR(bn_ptr1, SIZEOF(block_id), unsigned char); memcpy(bn_ptr1, old_blk1_base + new_leftblk_top_off - SIZEOF(block_id), SIZEOF(block_id)); BLK_SEG(bs_ptr2, bn_ptr1, SIZEOF(block_id)); } if ( !BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } t_write(&gv_target->hist.h[level], (unsigned char *)bs_ptr1, 0, 0, level, FALSE, TRUE, GDS_WRITE_KILLTN); /* ---------------------------------------------------------------------------- Modify ancestor block for the split in current level. new_ins_key = new key to be inserted in parent because of split in child new_ins_key will be inserted after gv_target->hist.h[level].prev_rec and before gv_target->hist.h[level].curr_rec new_ins_keysz = size of new_ins_key Note: A restriction of the algorithm is to have current key and new_ins_key in the same block, either left or, new right block ---------------------------------------------------------------------------- */ BLK_ADDR(new_ins_key, new_blk1_last_keysz, unsigned char); memcpy(new_ins_key, &new_blk1_last_key[0], new_blk1_last_keysz); new_ins_keysz = new_blk1_last_keysz; for(;;) /* ========== loop through ancestors as necessary ======= */ { level ++; max_fill = i_max_fill; /* old_blk_after_currec = remaining of current block after currec ances_currkey = old real value of currkey in ancestor block */ blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr); old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr; old_blk1_sz = blk_hdr_ptr->bsiz; new_blk2_top = old_blk1_base + old_blk1_sz; rec_base = old_blk1_base + gv_target->hist.h[level].curr_rec.offset; GET_RSIZ(rec_size, rec_base); old_blk_after_currec = rec_base + rec_size; old_ances_currkeycmpc = EVAL_CMPC((rec_hdr_ptr_t)rec_base); old_ances_currkeylen = rec_size - BSTAR_REC_SIZE; if (INVALID_RECORD(level, rec_size, old_ances_currkeylen, old_ances_currkeycmpc)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (0 == old_ances_currkeylen) { if (0 != old_ances_currkeycmpc) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } new_ances_currkeycmpc = new_ances_currkeylen = 0; } else { BLK_ADDR(ances_currkey, gv_cur_region->max_rec_size + 1, unsigned char); key_base = rec_base + SIZEOF(rec_hdr); } new_ances_currkeysz = old_ances_currkeycmpc + old_ances_currkeylen; if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) /* cur_rec is not first key */ { if (cdb_sc_normal != (status = gvcst_expand_any_key(old_blk1_base, old_blk1_base + gv_target->hist.h[level].curr_rec.offset, &curr_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (old_ances_currkeycmpc) memcpy(ances_currkey, &curr_prev_key[0], old_ances_currkeycmpc); } if (old_ances_currkeylen) { memcpy(ances_currkey + old_ances_currkeycmpc, key_base, old_ances_currkeylen); GET_CMPC(new_ances_currkeycmpc, new_ins_key, ances_currkey); new_ances_currkeylen = new_ances_currkeysz - new_ances_currkeycmpc; } if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) { /* new_ins_key will be inseted after curr_prev_key */ GET_CMPC(new_ins_keycmpc, &curr_prev_key[0], new_ins_key); } else new_ins_keycmpc = 0; /* new_ins_key will be the 1st key */ new_ins_keylen = new_ins_keysz - new_ins_keycmpc ; delta = BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen; if (old_blk1_sz + delta > blk_size - cs_data->reserved_bytes) /* split required */ { split_required = TRUE; if (level == gv_target->hist.depth) { create_root = TRUE; if (MAX_BT_DEPTH - 1 <= level) /* maximum level reached */ return cdb_sc_maxlvl; } if (max_fill + BSTAR_REC_SIZE > old_blk1_sz) { if (SIZEOF(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz) return cdb_sc_oprnotneeded; /* Improve code to avoid this */ max_fill = old_blk1_sz - BSTAR_REC_SIZE; } status = locate_block_split_point(old_blk1_base, level, old_blk1_sz, max_fill, &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off); if (cdb_sc_normal != status || new_leftblk_top_off >= old_blk1_sz || 0 == new_blk1_last_keysz) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(BSTAR_REC_SIZE != old_blk1_last_rec_size); old_right_piece_len = old_blk1_sz - new_leftblk_top_off; new_blk2_frec_base = new_blk1_top = old_blk1_base + new_leftblk_top_off; if (BSTAR_REC_SIZE == old_right_piece_len) new_rtblk_star_only = TRUE; else new_rtblk_star_only = FALSE; if (new_leftblk_top_off == gv_target->hist.h[level].curr_rec.offset) { /* inserted key will be the first record of new right block */ new_ins_keylen = new_ins_keysz; new_ins_keycmpc = 0; } else /* process 1st record of new right block */ { BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char); READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_normal == status) { memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* compressed piece */ new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen; newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; BLK_ADDR(new_rec_hdr2, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr2->rsiz = newblk2_first_keysz + BSTAR_REC_SIZE; SET_CMPC(new_rec_hdr2, 0); } else if (cdb_sc_starrecord != status || !new_rtblk_star_only) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } } /* else gv_target->hist.h[level].curr_rec will be newblk2_first_key */ if (new_leftblk_top_off > gv_target->hist.h[level].curr_rec.offset + old_ances_currkeylen + BSTAR_REC_SIZE) { /* in this case prev_rec (if exists), new key and curr_rec should go into left block */ if (new_leftblk_top_off + delta - old_blk1_last_rec_size + BSTAR_REC_SIZE <= blk_size - cs_data->reserved_bytes) insert_in_left = TRUE; else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else if (new_leftblk_top_off < gv_target->hist.h[level].curr_rec.offset + old_ances_currkeylen + BSTAR_REC_SIZE) { /* if gv_target->hist.h[level].curr_rec is the first key in old_blk1 then in new right block, new_ins_key will be the 1st record key and curr_rec will be 2nd record and there will be no prev_rec in right block. Else (if curr_rec is not first key) there will be some records before new_ins_key, at least prev_rec */ delta = (int)(BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen + ((0 == new_ins_keycmpc) ? 0 : (EVAL_CMPC((rec_hdr_ptr_t)new_blk2_frec_base)))); if (SIZEOF(blk_hdr) + old_right_piece_len + delta <= blk_size - cs_data->reserved_bytes) { insert_in_left = FALSE; if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz) { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else { /* in this case prev_rec (if exists), new key and curr_rec should go into left block and curr_rec will be the last record (*-key) of left new block */ delta = BSTAR_REC_SIZE + new_ins_keylen; if (new_leftblk_top_off + delta <= blk_size - cs_data->reserved_bytes) insert_in_left = TRUE; else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } } /* end if split required */ else
enum cdb_sc gvcst_kill_blk(srch_blk_status *blkhist, char level, gv_key *search_key, srch_rec_status low, srch_rec_status high, boolean_t right_extra, cw_set_element **cseptr) { typedef sm_uc_ptr_t bytptr; unsigned short temp_ushort; int4 temp_long; int tmp_cmpc; int blk_size, blk_seg_cnt, lmatch, rmatch, targ_len, prev_len, targ_base, next_rec_shrink, temp_int, blkseglen; bool kill_root, first_copy; blk_hdr_ptr_t old_blk_hdr; rec_hdr_ptr_t left_ptr; /*pointer to record before first record to delete*/ rec_hdr_ptr_t del_ptr; /*pointer to first record to delete*/ rec_hdr_ptr_t right_ptr; /*pointer to record after last record to delete*/ rec_hdr_ptr_t right_prev_ptr; rec_hdr_ptr_t rp, rp1; /*scratch record pointer*/ rec_hdr_ptr_t first_in_blk, top_of_block, new_rec_hdr, star_rec_hdr; blk_segment *bs1, *bs_ptr; block_index new_block_index; unsigned char *skb; static readonly block_id zeroes = 0; cw_set_element *cse, *old_cse; bytptr curr, prev, right_bytptr; off_chain chain1, curr_chain, prev_chain; block_id blk; sm_uc_ptr_t buffer; srch_blk_status *t1; *cseptr = NULL; if (low.offset == high.offset) return cdb_sc_normal; blk = blkhist->blk_num; if (dollar_tlevel) { PUT_LONG(&chain1, blk); if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth)) { assert(sgm_info_ptr->tp_csa == cs_addrs); assert(FALSE == cs_addrs->now_crit); return cdb_sc_blknumerr; } } buffer = blkhist->buffaddr; old_blk_hdr = (blk_hdr_ptr_t)buffer; kill_root = FALSE; blk_size = cs_data->blk_size; first_in_blk = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + SIZEOF(blk_hdr)); top_of_block = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + old_blk_hdr->bsiz); left_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + low.offset); right_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + high.offset); if (right_extra && right_ptr < top_of_block) { right_prev_ptr = right_ptr; GET_USHORT(temp_ushort, &right_ptr->rsiz); right_ptr = (rec_hdr_ptr_t)((bytptr)right_ptr + temp_ushort); } if ((bytptr)left_ptr < (bytptr)old_blk_hdr || (bytptr)right_ptr > (bytptr)top_of_block || (bytptr)left_ptr >= (bytptr)right_ptr) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } if ((bytptr)left_ptr == (bytptr)old_blk_hdr) { if ((bytptr)right_ptr == (bytptr)top_of_block) { if ((bytptr)first_in_blk == (bytptr)top_of_block) { if (0 != level) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } return cdb_sc_normal; } if (!gv_target->hist.h[level + 1].blk_num) kill_root = TRUE; else { /* We are about to free up the contents of this entire block. If this block corresponded to * a global that has NOISOLATION turned on and has a non-zero recompute list (i.e. some SETs * already happened in this same TP transaction), make sure we disable the NOISOLATION * optimization in this case as that is applicable only if one or more SETs happened in this * data block and NOT if a KILL happens. Usually this is done by a t_write(GDS_WRITE_KILLTN) * call but since in this case the entire block is being freed, "t_write" wont be invoked * so we need to explicitly set GDS_WRITE_KILLTN like t_write would have (GTM-8269). * Note: blkhist->first_tp_srch_status is not reliable outside of TP. Thankfully the recompute * list is also maintained only in case of TP so a check of dollar_tlevel is enough to * dereference both "first_tp_srch_status" and "recompute_list_head". */ if (dollar_tlevel) { t1 = blkhist->first_tp_srch_status ? blkhist->first_tp_srch_status : blkhist; cse = t1->cse; if ((NULL != cse) && cse->recompute_list_head) cse->write_type |= GDS_WRITE_KILLTN; } return cdb_sc_delete_parent; } } del_ptr = first_in_blk; } else { GET_USHORT(temp_ushort, &left_ptr->rsiz); del_ptr = (rec_hdr_ptr_t)((bytptr)left_ptr + temp_ushort); if ((bytptr)del_ptr <= (bytptr)(left_ptr + 1) || (bytptr)del_ptr > (bytptr)right_ptr) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } } if ((bytptr)del_ptr == (bytptr)right_ptr) return cdb_sc_normal; lmatch = low.match; rmatch = high.match; if (level) { for (rp = del_ptr ; rp < right_ptr ; rp = rp1) { GET_USHORT(temp_ushort, &rp->rsiz); rp1 = (rec_hdr_ptr_t)((bytptr)rp + temp_ushort); if (((bytptr)rp1 < (bytptr)(rp + 1) + SIZEOF(block_id)) || ((bytptr)rp1 < buffer) || ((bytptr)rp1 > (buffer + blk_size))) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } GET_LONG(temp_long, ((bytptr)rp1 - SIZEOF(block_id))); if (dollar_tlevel) { chain1 = *(off_chain *)&temp_long; if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth)) { assert(sgm_info_ptr->tp_csa == cs_addrs); assert(FALSE == cs_addrs->now_crit); return cdb_sc_blknumerr; } } gvcst_delete_blk(temp_long, level - 1, FALSE); } } if (kill_root) { /* create an empty data block */ BLK_INIT(bs_ptr, bs1); if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } new_block_index = t_create(blk, (uchar_ptr_t)bs1, 0, 0, 0); /* create index block */ BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr->rsiz = SIZEOF(rec_hdr) + SIZEOF(block_id); SET_CMPC(new_rec_hdr, 0); BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr)); BLK_SEG(bs_ptr, (bytptr)&zeroes, SIZEOF(block_id)); if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } cse = t_write(blkhist, (unsigned char *)bs1, SIZEOF(blk_hdr) + SIZEOF(rec_hdr), new_block_index, 1, TRUE, FALSE, GDS_WRITE_KILLTN); assert(!dollar_tlevel || !cse->high_tlevel); *cseptr = cse; if (NULL != cse) cse->first_off = 0; return cdb_sc_normal; } next_rec_shrink = (int)(old_blk_hdr->bsiz + ((bytptr)del_ptr - (bytptr)right_ptr)); if (SIZEOF(blk_hdr) >= next_rec_shrink) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } if ((bytptr)right_ptr == (bytptr)top_of_block) { if (level) { GET_USHORT(temp_ushort, &left_ptr->rsiz); next_rec_shrink += SIZEOF(rec_hdr) + SIZEOF(block_id) - temp_ushort; } } else { targ_base = (rmatch < lmatch) ? rmatch : lmatch; prev_len = 0; if (right_extra) { EVAL_CMPC2(right_prev_ptr, tmp_cmpc); targ_len = tmp_cmpc - targ_base; if (targ_len < 0) targ_len = 0; temp_int = tmp_cmpc - EVAL_CMPC(right_ptr); if (0 >= temp_int) prev_len = - temp_int; else { if (temp_int < targ_len) targ_len -= temp_int; else targ_len = 0; } } else { targ_len = EVAL_CMPC(right_ptr) - targ_base; if (targ_len < 0) targ_len = 0; } next_rec_shrink += targ_len + prev_len; } BLK_INIT(bs_ptr, bs1); first_copy = TRUE; blkseglen = (int)((bytptr)del_ptr - (bytptr)first_in_blk); if (0 < blkseglen) { if (((bytptr)right_ptr != (bytptr)top_of_block) || (0 == level)) { BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen); first_copy = FALSE; } else { blkseglen = (int)((bytptr)left_ptr - (bytptr)first_in_blk); if (0 < blkseglen) { BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen); first_copy = FALSE; } BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr); SET_CMPC(star_rec_hdr, 0); star_rec_hdr->rsiz = (unsigned short)(SIZEOF(rec_hdr) + SIZEOF(block_id)); BLK_SEG(bs_ptr, (bytptr)star_rec_hdr, SIZEOF(rec_hdr)); GET_USHORT(temp_ushort, &left_ptr->rsiz); BLK_SEG(bs_ptr, ((bytptr)left_ptr + temp_ushort - SIZEOF(block_id)), SIZEOF(block_id)); } } blkseglen = (int)((bytptr)top_of_block - (bytptr)right_ptr); assert(0 <= blkseglen); if (0 != blkseglen) { next_rec_shrink = targ_len + prev_len; if (0 >= next_rec_shrink) { BLK_SEG(bs_ptr, (bytptr)right_ptr, blkseglen); } else { BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr); SET_CMPC(new_rec_hdr, EVAL_CMPC(right_ptr) - next_rec_shrink); GET_USHORT(temp_ushort, &right_ptr->rsiz); new_rec_hdr->rsiz = temp_ushort + next_rec_shrink; BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr)); if (targ_len) { BLK_ADDR(skb, targ_len, unsigned char); memcpy(skb, &search_key->base[targ_base], targ_len); BLK_SEG(bs_ptr, skb, targ_len); } if (prev_len) BLK_SEG(bs_ptr, (bytptr)(right_prev_ptr + 1) , prev_len); right_bytptr = (bytptr)(right_ptr + 1); blkseglen = (int)((bytptr)top_of_block - right_bytptr); if (0 < blkseglen) { BLK_SEG(bs_ptr, right_bytptr, blkseglen); } else { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } } } if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } cse = t_write(blkhist, (unsigned char *)bs1, 0, 0, level, first_copy, TRUE, GDS_WRITE_KILLTN); assert(!dollar_tlevel || !cse->high_tlevel); *cseptr = cse; if (horiz_growth) { old_cse = cse->low_tlevel; assert(old_cse && old_cse->done); assert(2 == (SIZEOF(old_cse->undo_offset) / SIZEOF(old_cse->undo_offset[0]))); assert(2 == (SIZEOF(old_cse->undo_next_off) / SIZEOF(old_cse->undo_next_off[0]))); assert(!old_cse->undo_next_off[0] && !old_cse->undo_offset[0]); assert(!old_cse->undo_next_off[1] && !old_cse->undo_offset[1]); } if ((NULL != cse) && (0 != cse->first_off)) { /* fix up chains in the block to account for deleted records */ prev = NULL; curr = buffer + cse->first_off; GET_LONGP(&curr_chain, curr); while (curr < (bytptr)del_ptr) { /* follow chain to first deleted record */ if (0 == curr_chain.next_off) break; if (right_ptr == top_of_block && (bytptr)del_ptr - curr == SIZEOF(off_chain)) break; /* special case described below: stop just before the first deleted record */ prev = curr; curr += curr_chain.next_off; GET_LONGP(&curr_chain, curr); } if (right_ptr == top_of_block && (bytptr)del_ptr - curr == SIZEOF(off_chain)) { /* if the right side of the block is gone and our last chain is in the last record, * terminate the chain and adjust the previous entry to point at the new *-key * NOTE: this assumes there's NEVER a TP delete of records in the GVT */ assert(0 != level); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = curr_chain.next_off; old_cse->undo_offset[0] = (block_offset)(curr - buffer); assert(old_cse->undo_offset[0]); } curr_chain.next_off = 0; GET_LONGP(curr, &curr_chain); if (NULL != prev) { /* adjust previous chain next_off to reflect the fact that the record it refers to is now a *-key */ GET_LONGP(&prev_chain, prev); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[1] = prev_chain.next_off; old_cse->undo_offset[1] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[1]); } prev_chain.next_off = (unsigned int)((bytptr)left_ptr - prev + (unsigned int)(SIZEOF(rec_hdr))); GET_LONGP(prev, &prev_chain); } else /* it's the first (and only) one */ cse->first_off = (block_offset)((bytptr)left_ptr - buffer + SIZEOF(rec_hdr)); } else if (curr >= (bytptr)del_ptr) { /* may be more records on the right that aren't deleted */ while (curr < (bytptr)right_ptr) { /* follow chain past last deleted record */ if (0 == curr_chain.next_off) break; curr += curr_chain.next_off; GET_LONGP(&curr_chain, curr); } /* prev : ptr to chain record immediately preceding the deleted area, * or 0 if none. * * curr : ptr to chain record immediately following the deleted area, * or to last chain record. */ if (curr < (bytptr)right_ptr) { /* the former end of the chain is going, going, gone */ if (NULL != prev) { /* terminate the chain before the delete */ GET_LONGP(&prev_chain, prev); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = prev_chain.next_off; old_cse->undo_offset[0] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[0]); } prev_chain.next_off = 0; GET_LONGP(prev, &prev_chain); } else cse->first_off = 0; /* the whole chain is gone */ } else { /* stitch up the left and right to account for the hole in the middle */ /* next_rec_shrink is the change in record size due to the new compression count */ if (NULL != prev) { GET_LONGP(&prev_chain, prev); /* ??? new compression may be less (ie +) so why are negative shrinks ignored? */ /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = prev_chain.next_off; old_cse->undo_offset[0] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[0]); } prev_chain.next_off = (unsigned int)(curr - prev - ((bytptr)right_ptr - (bytptr)del_ptr) + (next_rec_shrink > 0 ? next_rec_shrink : 0)); GET_LONGP(prev, &prev_chain); } else /* curr remains first: adjust the head */ cse->first_off = (block_offset)(curr - buffer - ((bytptr)right_ptr - (bytptr)del_ptr) + (next_rec_shrink > 0 ? next_rec_shrink : 0)); } } } horiz_growth = FALSE; return cdb_sc_normal; }
boolean_t mu_truncate(int4 truncate_percent) { sgmnt_addrs *csa; sgmnt_data_ptr_t csd; int num_local_maps; int lmap_num, lmap_blk_num; int bml_status, sigkill; int save_errno; int ftrunc_status; uint4 jnl_status; uint4 old_total, new_total; uint4 old_free, new_free; uint4 end_blocks; int4 blks_in_lmap, blk; gtm_uint64_t before_trunc_file_size; off_t trunc_file_size; off_t padding; uchar_ptr_t lmap_addr; boolean_t was_crit; uint4 found_busy_blk; srch_blk_status bmphist; srch_blk_status *blkhist; srch_hist alt_hist; trans_num curr_tn; blk_hdr_ptr_t lmap_blk_hdr; block_id *blkid_ptr; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; char *err_msg; intrpt_state_t prev_intrpt_state; off_t offset; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csa = cs_addrs; csd = cs_data; if (dba_mm == csd->acc_meth) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } /* already checked for parallel truncates on this region --- see mupip_reorg.c */ gv_target = NULL; assert(csa->nl->trunc_pid == process_id); assert(dba_mm != csd->acc_meth); old_total = csa->ti->total_blks; old_free = csa->ti->free_blocks; sigkill = 0; found_busy_blk = 0; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ assert(csd->bplmap == BLKS_PER_LMAP); end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */ if (0 == end_blocks) end_blocks = BLKS_PER_LMAP; num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP); /* ======================================== PHASE 1 ======================================== */ for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--) { if (mu_ctrly_occurred || mu_ctrlc_occurred) return TRUE; assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */ if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } lmap_blk_num = lmap_num * BLKS_PER_LMAP; if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { found_busy_blk = lmap_blk_num; break; } blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP; /* Loop through non-bitmap blocks of this lmap, do recycled2free */ DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n", lmap_num, lmap_blk_num, blks_in_lmap)); for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;) { t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) /* retry loop for recycled to free transactions */ { curr_tn = csd->trans_hist.curr_tn; /* Read the nth local bitmap into memory */ bmphist.blk_num = lmap_blk_num; bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr); lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr; if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr); /* starting from the hint (blk itself), find the first busy or recycled block */ blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status); assert(blk < BLKS_PER_LMAP); if (blk == -1 || blk >= blks_in_lmap) { /* done with this lmap, continue to next */ t_abort(gv_cur_region, csa); break; } else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { /* stop processing blocks... skip ahead to phase 2 */ found_busy_blk = lmap_blk_num; t_abort(gv_cur_region, csa); break; } else if (BLK_RECYCLED == bml_status) { /* Write PBLK records for recycled blocks only if before_image journaling is * enabled. t_end() takes care of checking if journaling is enabled and * writing PBLK record. We have to at least mark the recycled block as free. */ RESET_UPDATE_ARRAY; update_trans = UPDTRNS_DB_UPDATED_MASK; *((block_id *)update_array_ptr) = blk; update_array_ptr += SIZEOF(block_id); *(int *)update_array_ptr = 0; alt_hist.h[1].blk_num = 0; alt_hist.h[0].level = 0; alt_hist.h[0].cse = NULL; alt_hist.h[0].tn = curr_tn; alt_hist.h[0].blk_num = lmap_blk_num + blk; alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num, &alt_hist.h[0].cycle, &alt_hist.h[0].cr); if (!alt_hist.h[0].buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } if (!t_recycled2free(&alt_hist.h[0])) { t_retry(cdb_sc_lostbmlcr); continue; } t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0); /* Set the opcode for INCTN record written by t_end() */ inctn_opcode = inctn_blkmarkfree; if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; /* block processed, scan from the next one */ blk++; break; } else { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); continue; } } /* END recycled2free retry loop */ } /* END scanning blocks of this particular lmap */ /* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */ /* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */ DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num)); t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { RESET_UPDATE_ARRAY; BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; update_trans = UPDTRNS_DB_UPDATED_MASK; inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */ curr_tn = csd->trans_hist.curr_tn; blkhist = &alt_hist.h[0]; blkhist->blk_num = lmap_blk_num; blkhist->tn = curr_tn; blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ /* Read the nth local bitmap into memory */ blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr; if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); blkhist->blk_num = 0; /* create empty history for bitmap block */ if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; break; } } /* END scanning lmaps */ /* ======================================== PHASE 2 ======================================== */ assert(!csa->now_crit); for (;;) { /* wait for FREEZE, we don't want to truncate a frozen database */ grab_crit(gv_cur_region); if (FROZEN_CHILLED(cs_data)) DO_CHILLED_AUTORELEASE(csa, cs_data); if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (FROZEN(cs_data) || IS_REPL_INST_FROZEN) { hiber_start(1000); if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data)) break; } } assert(csa->nl->trunc_pid == process_id); /* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */ if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB)) { assert(FALSE); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"), DB_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return FALSE; } csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk); assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk)); new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP); if (mu_ctrly_occurred || mu_ctrlc_occurred) { rel_crit(gv_cur_region); return TRUE; } else if (csa->ti->total_blks != old_total || new_total == old_total) { assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); rel_crit(gv_cur_region); return TRUE; } else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (SNAPSHOTS_IN_PROG(csa->nl)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); if (JNL_ENABLED(csa)) { /* Write JRT_TRUNC and INCTN records */ if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = csa->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(gv_cur_region, csa); if (SS_NORMAL != jnl_status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); else { if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total); inctn_opcode = inctn_mu_reorg; jnl_write_inctn_rec(csa); jnl_status = jnl_flush(gv_cur_region); if (SS_NORMAL != jnl_status) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"), jnl_status); assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */ } } } /* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */ curr_tn = csa->ti->curr_tn; CHECK_TN(csa, csd, curr_tn); udi = FILE_INFO(gv_cur_region); /* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */ trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1)); csd->after_trunc_total_blks = new_total; csd->before_trunc_free_blocks = csa->ti->free_blocks; csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */ /* file size and total blocks: INCONSISTENT */ csa->ti->total_blks = new_total; /* past the point of no return -- shared memory intact */ assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total)); csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total); new_free = csa->ti->free_blocks; KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */ fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); CHECK_DBSYNC(gv_cur_region, save_errno); /* past the point of no return -- shared memory deleted */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */ clear_cache_array(csa, csd, gv_cur_region, new_total, old_total); offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size; save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff))); if (0 != save_errno) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); return FALSE; } KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */ /* Execute an ftruncate() and truncate the DB file * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS) * It ignores kill -9 signal till its operation is completed. * So we can safely assume that the result of ftruncate() will be complete. */ FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status); if (0 != ftrunc_status) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); /* should go through recover_truncate now, which will again try to FTRUNCATE */ return FALSE; } /* file size and total blocks: CONSISTENT (shrunk) */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */ csa->nl->root_search_cycle++; /* Force concurrent processes to restart in t_end/tp_tend to make sure no one * tries to commit updates past the end of the file. Bitmap validations together * with highest_lbm_with_busy_blk should actually be sufficient, so this is * just to be safe. */ csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */ /* Increment TN */ assert(csa->ti->early_tn == csa->ti->curr_tn); csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1; INCREMENT_CURR_TN(csd); fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */ CHECK_DBSYNC(gv_cur_region, save_errno); ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); curr_tn = csa->ti->curr_tn; rel_crit(gv_cur_region); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn); util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].", FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free); return TRUE; } /* END of mu_truncate() */
/************************************************************************************************* Input Parameters: gv_target: working block's history level : Level of working block and its right sibling d_blk_fill_size : Maximum fill allowed in a data block i_blk_fill_size : Maximum fill allowed in an index block Output Parameters: kill_set_ptr : List of blocks to be freed from LBM (already killed in mu_clsce) remove_rtsib : if right sibling was completely merged with working Returns: cdb_sc_normal on success Other wise error status *************************************************************************************************/ enum cdb_sc mu_clsce(int level, int i_max_fill, int d_max_fill, kill_set *kill_set_ptr, boolean_t *remove_rtsib) { boolean_t complete_merge = FALSE, old_ref_star_only = FALSE, new_rtsib_star_only = FALSE, star_only_merge = FALSE, blk2_ances_star_only = FALSE, delete_all_blk2_ances = TRUE, levelp_next_is_star, forward_process; unsigned char oldblk1_prev_key[MAX_KEY_SZ+1], old_levelp_cur_prev_key[MAX_KEY_SZ+1], old_levelp_cur_key[MAX_KEY_SZ+1]; /* keys in private memory */ unsigned short temp_ushort; int new_levelp_cur_cmpc, new_levelp_cur_next_cmpc, tkeycmpc, oldblk1_last_cmpc, newblk1_mid_cmpc, newblk1_last_cmpc; int levelp, level2; int old_blk1_sz, old_blk2_sz; int old_levelp_cur_prev_keysz, old_levelp_cur_keysz, old_levelp_cur_next_keysz, newblk1_last_keysz, newblk2_first_keysz, new_blk2_ances_first_keysz; int old_levelp_cur_keylen, new_levelp_cur_keylen, old_levelp_cur_next_keylen, new_levelp_cur_next_keylen, oldblk1_last_keylen, newblk1_last_keylen, newblk2_first_keylen; int rec_size, piece_len, tkeylen, old_levelp_rec_offset; int blk_seg_cnt, blk_size; uint4 save_t_err; enum cdb_sc status; sm_uc_ptr_t oldblk1_last_key, old_levelp_cur_next_key, newblk1_last_key, newblk2_first_key, new_blk2_ances_first_key; /* shared memory keys */ sm_uc_ptr_t rec_base, old_levelp_blk_base, bn_ptr1, bn_ptr2, blk2_ances_remain, old_blk1_base, old_blk2_base, new_blk1_top, new_blk2_first_rec_base, new_blk2_remain; /* shared memory pointers */ sm_uc_ptr_t rPtr1, rPtr2; rec_hdr_ptr_t star_rec_hdr, old_last_rec_hdr1, new_rec_hdr1, new_rec_hdr2, blk2_ances_hdr, new_levelp_cur_hdr, new_levelp_cur_next_hdr; blk_segment *bs_ptr1, *bs_ptr2; srch_hist *blk1ptr, *blk2ptr; /* blk2ptr is for right sibling's hist from a minimum sub-tree containing both blocks */ error_def(ERR_GVKILLFAIL); blk_size = cs_data->blk_size; assert(update_array != NULL); update_array_ptr = update_array; blk1ptr = &(gv_target->hist); blk2ptr = gv_target->alt_hist; old_blk1_base = blk1ptr->h[level].buffaddr; old_blk2_base = blk2ptr->h[level].buffaddr; old_blk1_sz = ((blk_hdr_ptr_t)old_blk1_base)->bsiz; old_blk2_sz = ((blk_hdr_ptr_t)old_blk2_base)->bsiz; if (0 != level && sizeof(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz) old_ref_star_only = TRUE; /* Search an ancestor block at levelp >= level+1, which has a real key value corresponding to the working block. This key value will be changed after coalesce. */ levelp = level; do { if (++levelp > blk1ptr->depth || levelp > blk2ptr->depth) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } old_levelp_blk_base = blk1ptr->h[levelp].buffaddr; old_levelp_rec_offset = blk1ptr->h[levelp].curr_rec.offset; rec_base = old_levelp_blk_base + old_levelp_rec_offset; GET_RSIZ(rec_size, rec_base); } while (BSTAR_REC_SIZE == rec_size); /* search ancestors to get a real value */ /* old_levelp_cur_prev_key = real value of the key before the curr_key at levelp old_levelp_cur_prev_keysz = uncompressed size of the key Note: we may not have a previous key (old_levelp_cur_prev_keysz = 0) */ if (sizeof(blk_hdr) == old_levelp_rec_offset) old_levelp_cur_prev_keysz = 0; else { if (cdb_sc_normal != (status = gvcst_expand_any_key (old_levelp_blk_base, rec_base, &old_levelp_cur_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } old_levelp_cur_prev_keysz = tkeylen + tkeycmpc; } /* old_levelp_cur_key = real value of the curr_key at levelp old_levelp_cur_keysz = uncompressed size of the key old_levelp_cur_keylen = compressed size of the key */ READ_RECORD(levelp, rec_base, tkeycmpc, rec_size, &old_levelp_cur_key[0], old_levelp_cur_keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (old_levelp_cur_prev_keysz) memcpy(&old_levelp_cur_key[0], &old_levelp_cur_prev_key[0], tkeycmpc); rec_base += rec_size; old_levelp_cur_keysz = old_levelp_cur_keylen + tkeycmpc; /* old_levelp_cur_next_key = uncompressed value of the next right key of old_levelp_cur_key old_levelp_cur_next_keysz = uncomressed size of the key old_levelp_cur_next_keylen = comressed size of the key Note: we may not have a next key (old_levelp_cur_next_keysz = 0) */ BLK_ADDR(old_levelp_cur_next_key, gv_cur_region->max_key_size + 1, unsigned char); READ_RECORD(levelp, rec_base, tkeycmpc, rec_size, old_levelp_cur_next_key, old_levelp_cur_next_keylen, status); if (cdb_sc_starrecord == status) levelp_next_is_star = TRUE; else if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } else { memcpy(old_levelp_cur_next_key, &old_levelp_cur_key[0], tkeycmpc); old_levelp_cur_next_keysz = old_levelp_cur_next_keylen + tkeycmpc; levelp_next_is_star = FALSE; } /* Now process the actual working block at current level oldblk1_last_key = real value of last key of the working block For index block decompress *-key oldblk1_last_keylen = compressed size of the last key oldblk1_last_cmpc = compression count of last key of working block old_last_rec_hdr1 = New working index block's last record header */ BLK_ADDR(oldblk1_last_key, gv_cur_region->max_key_size + 1, unsigned char); if (0 == level) /* data block */ { if (cdb_sc_normal != (status = gvcst_expand_any_key (old_blk1_base, old_blk1_base + old_blk1_sz, oldblk1_last_key, &rec_size, &oldblk1_last_keylen, &oldblk1_last_cmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } rec_base = old_blk1_base + old_blk1_sz; } else /* Index blocks */ { /* Since we will join this working block with the right sibling, we need to remove the *-key at the end of working block and replace with actual key value (with required compression). We will get the real value of *-rec from its ancestor at levelp */ memcpy (oldblk1_last_key, &old_levelp_cur_key[0], old_levelp_cur_keysz); if (!old_ref_star_only) /* if the index block is not a *-key only block) */ { if (cdb_sc_normal != (status = gvcst_expand_any_key (old_blk1_base, old_blk1_base + old_blk1_sz - BSTAR_REC_SIZE, &oldblk1_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } GET_CMPC(oldblk1_last_cmpc, &oldblk1_prev_key[0], &old_levelp_cur_key[0]); oldblk1_last_keylen = old_levelp_cur_keysz - oldblk1_last_cmpc; } else /* working block has a *-key record only */ { /* get key value from ancestor blocks key */ oldblk1_last_keylen = old_levelp_cur_keysz; oldblk1_last_cmpc = 0; } BLK_ADDR(old_last_rec_hdr1, sizeof(rec_hdr), rec_hdr); old_last_rec_hdr1->rsiz = BSTAR_REC_SIZE + oldblk1_last_keylen; old_last_rec_hdr1->cmpc = oldblk1_last_cmpc; } /* newblk1_last_key = new working blocks final appended key newblk1_mid_cmpc = new working blocks firstly appended key's cmpc newblk1_last_keysz = new working blocks lastly appended key's size star_only_merge = TRUE, we can append only a *-key record into the working block (decompressing current *-key) complete_merge = TRUE, rtsib can be completely merged with working block piece_len = Size of data from old rtsibling to be merged into working block (includes rec_hdr size) */ BLK_ADDR(newblk1_last_key, gv_cur_region->max_key_size + 1, unsigned char); rec_base = old_blk2_base + sizeof(blk_hdr); READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status); if (cdb_sc_starrecord == status) /* rtsib index block has *-record only */ { if (old_blk1_sz + oldblk1_last_keylen + BSTAR_REC_SIZE > i_max_fill ) /* cannot fit even one record */ return cdb_sc_oprnotneeded; star_only_merge = TRUE; complete_merge = TRUE; rec_base = old_blk2_base + sizeof(blk_hdr) + BSTAR_REC_SIZE; } else if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } else /* for both data and non-* index block */ { newblk1_last_keysz = newblk1_last_keylen; /* first key has uncompressed real value */ GET_CMPC(newblk1_mid_cmpc, oldblk1_last_key, newblk1_last_key); piece_len = rec_size - newblk1_mid_cmpc; if (level == 0) /* data block */ { if (old_blk1_sz + piece_len > d_max_fill ) /* cannot fit even one record */ return cdb_sc_oprnotneeded; } else /* else an index block */ { if (old_blk1_sz + oldblk1_last_keylen + BSTAR_REC_SIZE > i_max_fill ) /* cannot fit even one record */ return cdb_sc_oprnotneeded; if (old_blk1_sz + oldblk1_last_keylen + piece_len + BSTAR_REC_SIZE > i_max_fill ) star_only_merge = TRUE; /* can fit only a *-record */ } rec_base += rec_size; } /* new_blk2_first_rec_base and new_blk1_top is set with final value for star_only_merge for index block */ new_blk2_first_rec_base = new_blk1_top = rec_base; if (!star_only_merge) { BLK_ADDR(new_rec_hdr1, sizeof(rec_hdr), rec_hdr); new_rec_hdr1->rsiz = piece_len; new_rec_hdr1->cmpc = newblk1_mid_cmpc; } /* else only new_blk1_last_key will be appeneded in working block */ /* find a piece of the right sibling to be copied into the working block. Note: rec_base points to 2nd record of old rtsib */ if (0 == level) /* if data block */ { complete_merge = TRUE; while (rec_base < old_blk2_base + old_blk2_sz) { GET_RSIZ(rec_size, rec_base); if (old_blk1_sz + piece_len + rec_size > d_max_fill ) { complete_merge = FALSE; break; } READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } newblk1_last_keysz = newblk1_last_keylen + newblk1_last_cmpc; rec_base += rec_size; piece_len += rec_size; }/* end of "while" loop */ new_blk1_top = new_blk2_first_rec_base = rec_base; } else /* index block */ { if (!star_only_merge) { /* we know we can fit more record in working block and rtsibling has more records */ complete_merge = TRUE; while (rec_base < old_blk2_base + old_blk2_sz) { GET_RSIZ(rec_size, rec_base); if (BSTAR_REC_SIZE == rec_size) { rec_base += rec_size; piece_len += rec_size; break; /* already we know we can fit this *-record in working block */ } READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } newblk1_last_keysz = newblk1_last_keylen + newblk1_last_cmpc; rec_base += rec_size; piece_len += rec_size; if (old_blk1_sz + oldblk1_last_keylen + piece_len + BSTAR_REC_SIZE > i_max_fill ) { complete_merge = FALSE; break; } }/* end of "while" loop */ new_blk1_top = new_blk2_first_rec_base = rec_base; } /* end else *-only merge */ } /* end else index block */ if (!complete_merge) { /* Adjust new right sibling's buffer if new_rtsib_star_only == TRUE then new right sibling will have a *-key record only else new_blk2_remain = base pointer of buffer including 1st record but exclude rec_header and key new_blk2_first_keysz = size of new rtsib block's first key */ BLK_ADDR(newblk2_first_key, gv_cur_region->max_key_size + 1, unsigned char); READ_RECORD(level, new_blk2_first_rec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_starrecord == status) /* new rtsib will have a *-record only */ new_rtsib_star_only = TRUE; else if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } else { memcpy(newblk2_first_key, newblk1_last_key, tkeycmpc); /* copy the compressed piece */ newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; new_blk2_remain = new_blk2_first_rec_base + sizeof(rec_hdr) + newblk2_first_keylen; BLK_ADDR(new_rec_hdr2, sizeof(rec_hdr), rec_hdr); new_rec_hdr2->rsiz = rec_size + tkeycmpc; new_rec_hdr2->cmpc = 0; } }
block_id bm_getfree(block_id orig_hint, boolean_t *blk_used, unsigned int cw_work, cw_set_element *cs, int *cw_depth_ptr) { cw_set_element *cs1; sm_uc_ptr_t bmp; block_id bml, hint, hint_cycled, hint_limit; block_id_ptr_t b_ptr; int cw_set_top, depth, lcnt; unsigned int local_maps, map_size, n_decrements = 0, total_blks; trans_num ctn; int4 free_bit, offset; uint4 space_needed; uint4 status; srch_blk_status blkhist; total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks; if (orig_hint >= total_blks) /* for TP, hint can be > total_blks */ orig_hint = 1; hint = orig_hint; hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); hint_limit = DIVIDE_ROUND_DOWN(orig_hint, BLKS_PER_LMAP); local_maps = hint_cycled + 2; /* for (up to) 2 wraps */ for (lcnt = 0; lcnt <= local_maps; lcnt++) { bml = bmm_find_free(hint / BLKS_PER_LMAP, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps); if ((NO_FREE_SPACE == bml) || (bml >= hint_cycled)) { /* if no free space or might have looped to original map, extend */ if ((NO_FREE_SPACE != bml) && (hint_limit < hint_cycled)) { hint_cycled = hint_limit; hint = 1; continue; } if (SS_NORMAL != (status = gdsfilext(cs_data->extension_size, total_blks))) return (status); if (dba_mm == cs_data->acc_meth) return (FILE_EXTENDED); hint = total_blks; total_blks = cs_addrs->ti->total_blks; hint_cycled = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); local_maps = hint_cycled + 2; /* for (up to) 2 wraps */ /* * note that you can make an optimization of not going back over the whole database and going over * only the extended section. but since it is very unlikely that a free block won't be found * in the extended section and the fact that we are starting from the extended section in either * approach and the fact that we have a GTMASSERT to check that we don't have a lot of * free blocks while doing an extend and the fact that it is very easy to make the change to do * a full-pass, the full-pass solution is currently being implemented */ lcnt = -1; /* allow it one extra pass to ensure that it can take advantage of the entension */ n_decrements++; /* used only for debugging purposes */ continue; } bml *= BLKS_PER_LMAP; if (ROUND_DOWN2(hint, BLKS_PER_LMAP) != bml) { /* not within requested map */ if ((bml < hint) && (hint_cycled)) /* wrap? - second one should force an extend for sure */ hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0; hint = bml + 1; /* start at beginning */ } if (ROUND_DOWN2(total_blks, BLKS_PER_LMAP) == bml) map_size = (total_blks - bml); else map_size = BLKS_PER_LMAP; if (0 != dollar_tlevel) { depth = cw_work; cw_set_top = *cw_depth_ptr; if (depth < cw_set_top) tp_get_cw(cs, cw_work, &cs1); for (; depth < cw_set_top; depth++, cs1 = cs1->next_cw_set) { /* do tp front to back because list is more efficient than tp_get_cw and forward pointers exist */ if (bml == cs1->blk) { TRAVERSE_TO_LATEST_CSE(cs1); break; } } if (depth >= cw_set_top) { assert(cw_set_top == depth); depth = 0; } } else { for (depth = *cw_depth_ptr - 1; depth >= cw_work; depth--) { /* do non-tp back to front, because of adjacency */ if (bml == (cs + depth)->blk) { cs1 = cs + depth; break; } } if (depth < cw_work) { assert(cw_work - 1 == depth); depth = 0; } } if (0 == depth) { ctn = cs_addrs->ti->curr_tn; if (!(bmp = t_qread(bml, (sm_int_ptr_t)&blkhist.cycle, &blkhist.cr))) return MAP_RD_FAIL; if ((BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz) || (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl)) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_badbitmap; return MAP_RD_FAIL; } offset = 0; } else { bmp = cs1->old_block; b_ptr = (block_id_ptr_t)(cs1->upd_addr); b_ptr += cs1->reference_cnt - 1; offset = *b_ptr + 1; } if (offset < map_size) { free_bit = bm_find_blk(offset, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), map_size, blk_used); if (MAP_RD_FAIL == free_bit) return MAP_RD_FAIL; } else free_bit = NO_FREE_SPACE; if (NO_FREE_SPACE != free_bit) break; if ((hint = bml + BLKS_PER_LMAP) >= total_blks) /* if map is full, start at 1st blk in next map */ { /* wrap - second one should force an extend for sure */ hint = 1; if (hint_cycled) hint_cycled = (hint_limit < hint_cycled) ? hint_limit: 0; } if ((0 == depth) && (FALSE != cs_addrs->now_crit)) /* if it's from the cw_set, its state is murky */ bit_clear(bml / BLKS_PER_LMAP, MM_ADDR(cs_data)); /* if crit, repair master map error */ } /* If not in the final retry, it is possible that free_bit is >= map_size (e.g. if bitmap block gets recycled). */ if (map_size <= (uint4)free_bit && CDB_STAGNATE <= t_tries) { /* bad free bit */ assert((NO_FREE_SPACE == free_bit) && (lcnt > local_maps)); /* All maps full, should have extended */ GTMASSERT; } if (0 != depth) { b_ptr = (block_id_ptr_t)(cs1->upd_addr); b_ptr += cs1->reference_cnt++; *b_ptr = free_bit; } else { space_needed = (BLKS_PER_LMAP + 1) * sizeof(block_id); if (dollar_tlevel) { ENSURE_UPDATE_ARRAY_SPACE(space_needed); /* have brackets for "if" for macros */ } BLK_ADDR(b_ptr, space_needed, block_id); memset(b_ptr, 0, space_needed); *b_ptr = free_bit; blkhist.blk_num = bml; blkhist.buffaddr = bmp; /* cycle and cr have already been assigned from t_qread */ t_write_map(&blkhist, (uchar_ptr_t)b_ptr, ctn, 1); /* last parameter 1 is what cs->reference_cnt gets set to */ } return bml + free_bit; }
void mu_reorg_upgrd_dwngrd(void) { blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; block_id *blkid_ptr, curblk, curbmp, start_blk, stop_blk, start_bmp, last_bmp; block_id startblk_input, stopblk_input; boolean_t upgrade, downgrade, safejnl, nosafejnl, region, first_reorg_in_this_db_fmt, reorg_entiredb; boolean_t startblk_specified, stopblk_specified, set_fully_upgraded, db_got_to_v5_once, mark_blk_free; cache_rec_ptr_t cr; char *bml_lcl_buff = NULL, *command, *reorg_command; sm_uc_ptr_t bptr = NULL; cw_set_element *cse; enum cdb_sc cdb_status; enum db_ver new_db_format, ondsk_blkver; gd_region *reg; int cycle; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ int4 blocks_left, expected_blks2upgrd, actual_blks2upgrd, total_blks, free_blks; int4 status, status1, mapsize, lcnt, bml_status; reorg_stats_t reorg_stats; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; sm_uc_ptr_t blkBase, bml_sm_buff; /* shared memory pointer to the bitmap global buffer */ srch_hist alt_hist; srch_blk_status *blkhist, bmlhist; tp_region *rptr; trans_num curr_tn; unsigned char save_cw_set_depth; uint4 lcl_update_trans; region = (CLI_PRESENT == cli_present("REGION")); upgrade = (CLI_PRESENT == cli_present("UPGRADE")); downgrade = (CLI_PRESENT == cli_present("DOWNGRADE")); assert(upgrade && !downgrade || !upgrade && downgrade); command = upgrade ? "UPGRADE" : "DOWNGRADE"; reorg_command = upgrade ? "MUPIP REORG UPGRADE" : "MUPIP REORG DOWNGRADE"; reorg_entiredb = TRUE; /* unless STARTBLK or STOPBLK is specified we are going to {up,down}grade the entire database */ startblk_specified = FALSE; assert(SIZEOF(block_id) == SIZEOF(uint4)); if ((CLI_PRESENT == cli_present("STARTBLK")) && (cli_get_hex("STARTBLK", (uint4 *)&startblk_input))) { reorg_entiredb = FALSE; startblk_specified = TRUE; } stopblk_specified = FALSE; assert(SIZEOF(block_id) == SIZEOF(uint4)); if ((CLI_PRESENT == cli_present("STOPBLK")) && (cli_get_hex("STOPBLK", (uint4 *)&stopblk_input))) { reorg_entiredb = FALSE; stopblk_specified = TRUE; } mu_reorg_upgrd_dwngrd_in_prog = TRUE; mu_reorg_nosafejnl = (CLI_NEGATED == cli_present("SAFEJNL")) ? TRUE : FALSE; assert(region); status = SS_NORMAL; error_mupip = FALSE; gvinit(); /* initialize gd_header (needed by the later call to mu_getlst) */ mu_getlst("REG_NAME", SIZEOF(tp_region)); /* get the parameter corresponding to REGION qualifier */ if (error_mupip) { util_out_print("!/MUPIP REORG !AD cannot proceed with above errors!/", TRUE, LEN_AND_STR(command)); mupip_exit(ERR_MUNOACTION); } assert(DBKEYSIZE(MAX_KEY_SZ) == gv_keysize); /* no need to invoke GVKEYSIZE_INIT_IF_NEEDED macro */ gv_target = targ_alloc(gv_keysize, NULL, NULL); /* t_begin needs this initialized */ gv_target_list = NULL; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ blkhist = &alt_hist.h[0]; for (rptr = grlist; NULL != rptr; rptr = rptr->fPtr) { if (mu_ctrly_occurred || mu_ctrlc_occurred) break; reg = rptr->reg; util_out_print("!/Region !AD : MUPIP REORG !AD started", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); if (reg_cmcheck(reg)) { util_out_print("Region !AD : MUPIP REORG !AD cannot run across network", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = ERR_MUNOFINISH; continue; } mu_reorg_process = TRUE; /* gvcst_init will use this value to use gtm_poollimit settings. */ gvcst_init(reg); mu_reorg_process = FALSE; assert(update_array != NULL); /* access method stored in global directory and database file header might be different in which case * the database setting prevails. therefore, the access method check can be done only after opening * the database (i.e. after the gvcst_init) */ if (dba_bg != REG_ACC_METH(reg)) { util_out_print("Region !AD : MUPIP REORG !AD cannot continue as access method is not BG", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = ERR_MUNOFINISH; continue; } /* The mu_getlst call above uses insert_region to create the grlist, which ensures that duplicate regions mapping to * the same db file correspond to only one grlist entry. */ assert(FALSE == reg->was_open); TP_CHANGE_REG(reg); /* sets gv_cur_region, cs_addrs, cs_data */ csa = cs_addrs; csd = cs_data; blk_size = csd->blk_size; /* "blk_size" is used by the BLK_FINI macro */ if (reg->read_only) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg)); status = ERR_MUNOFINISH; continue; } assert(GDSVCURR == GDSV6); /* so we trip this assert in case GDSVCURR changes without a change to this module */ new_db_format = (upgrade ? GDSV6 : GDSV4); grab_crit(reg); curr_tn = csd->trans_hist.curr_tn; /* set the desired db format in the file header to the appropriate version, increment transaction number */ status1 = desired_db_format_set(reg, new_db_format, reorg_command); assert(csa->now_crit); /* desired_db_format_set() should not have released crit */ first_reorg_in_this_db_fmt = TRUE; /* with the current desired_db_format, this is the first reorg */ if (SS_NORMAL != status1) { /* "desired_db_format_set" would have printed appropriate error messages */ if (ERR_MUNOACTION != status1) { /* real error occurred while setting the db format. skip to next region */ status = ERR_MUNOFINISH; rel_crit(reg); continue; } util_out_print("Region !AD : Desired DB Format remains at !AD after !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command)); if (csd->reorg_db_fmt_start_tn == csd->desired_db_format_tn) first_reorg_in_this_db_fmt = FALSE; } else util_out_print("Region !AD : Desired DB Format set to !AD by !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command)); assert(dba_bg == csd->acc_meth); /* Check blks_to_upgrd counter to see if upgrade/downgrade is complete */ total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; actual_blks2upgrd = csd->blks_to_upgrd; /* If MUPIP REORG UPGRADE and there is no block to upgrade in the database as indicated by BOTH * "csd->blks_to_upgrd" and "csd->fully_upgraded", then we can skip processing. * If MUPIP REORG UPGRADE and all non-free blocks need to be upgraded then again we can skip processing. */ if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded) || (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd))) { util_out_print("Region !AD : Blocks to Upgrade counter indicates no action needed for MUPIP REORG !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : " "Blocks to upgrade = [0x!XL]", TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd); util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); rel_crit(reg); continue; } stop_blk = total_blks; if (stopblk_specified && stopblk_input <= stop_blk) stop_blk = stopblk_input; if (first_reorg_in_this_db_fmt) { /* Note down reorg start tn (in case we are interrupted, future reorg will know to resume) */ csd->reorg_db_fmt_start_tn = csd->desired_db_format_tn; csd->reorg_upgrd_dwngrd_restart_block = 0; start_blk = (startblk_specified ? startblk_input : 0); } else { /* Either a concurrent MUPIP REORG of the same type ({up,down}grade) is currently running * or a previously running REORG of the same type was interrupted (Ctrl-Ced). * In either case resume processing from whatever restart block number is stored in fileheader * the only exception is if "STARTBLK" was specified in the input in which use that unconditionally. */ start_blk = (startblk_specified ? startblk_input : csd->reorg_upgrd_dwngrd_restart_block); } if (start_blk > stop_blk) start_blk = stop_blk; mu_reorg_upgrd_dwngrd_start_tn = csd->reorg_db_fmt_start_tn; /* Before releasing crit, flush the file-header and dirty buffers in cache to disk. This is because we are now * going to read each GDS block directly from disk to determine if it needs to be upgraded/downgraded or not. */ if (!wcs_flu(WCSFLU_FLUSH_HDR)) /* wcs_flu assumes gv_cur_region is set (which it is in this routine) */ { rel_crit(reg); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg)); status = ERR_MUNOFINISH; continue; } rel_crit(reg); /* Loop through entire database one GDS block at a time and upgrade/downgrade each of them */ status1 = SS_NORMAL; start_bmp = ROUND_DOWN2(start_blk, BLKS_PER_LMAP); last_bmp = ROUND_DOWN2(stop_blk - 1, BLKS_PER_LMAP); curblk = start_blk; /* curblk is the block to be upgraded/downgraded */ util_out_print("Region !AD : Started processing from block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk); if (NULL != bptr) { /* malloc/free "bptr" for each region as GDS block-size can be different */ free(bptr); bptr = NULL; } memset(&reorg_stats, 0, SIZEOF(reorg_stats)); /* initialize statistics for this region */ for (curbmp = start_bmp; curbmp <= last_bmp; curbmp += BLKS_PER_LMAP) { if (mu_ctrly_occurred || mu_ctrlc_occurred) { status1 = ERR_MUNOFINISH; break; } /* -------------------------------------------------------------- * Read in current bitmap block * -------------------------------------------------------------- */ assert(!csa->now_crit); bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* bring block into the cache outside of crit */ reorg_stats.blks_read_from_disk_bmp++; grab_crit_encr_cycle_sync(reg); /* needed so t_qread does not return NULL below */ if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn) { /* csd->desired_db_format changed since reorg started. discontinue the reorg */ /* see later comment on "csd->reorg_upgrd_dwngrd_restart_block" for why the assignment * of this field should be done only if a db format change did not occur. */ rel_crit(reg); status1 = ERR_MUNOFINISH; /* This "start_tn" check is redone after the for-loop and an error message is printed there */ break; } else if (reorg_entiredb) { /* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */ assert(csd->reorg_upgrd_dwngrd_restart_block <= MAX(start_blk, curbmp)); csd->reorg_upgrd_dwngrd_restart_block = curbmp; /* previous blocks have been upgraded/downgraded */ } /* Check blks_to_upgrd counter to see if upgrade/downgrade is complete. * Repeat check done a few steps earlier outside of this for loop. */ total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; actual_blks2upgrd = csd->blks_to_upgrd; if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded) || (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd))) { rel_crit(reg); break; } bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* now that in crit, note down stable buffer */ if (NULL == bml_sm_buff) rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); ondsk_blkver = cr->ondsk_blkver; /* note down db fmt on disk for bitmap block */ /* Take a copy of the shared memory bitmap buffer into process-private memory before releasing crit. * We are interested in those blocks that are currently marked as USED in the bitmap. * It is possible that once we release crit, concurrent updates change the bitmap state of those blocks. * In that case, those updates will take care of doing the upgrade/downgrade of those blocks in the * format currently set in csd->desired_db_format i.e. accomplishing MUPIP REORG UPGRADE/DOWNGRADE's job. * If the desired_db_format changes concurrently, we will stop doing REORG UPGRADE/DOWNGRADE processing. */ if (NULL == bml_lcl_buff) bml_lcl_buff = malloc(BM_SIZE(BLKS_PER_LMAP)); memcpy(bml_lcl_buff, (blk_hdr_ptr_t)bml_sm_buff, BM_SIZE(BLKS_PER_LMAP)); if (FALSE == cert_blk(reg, curbmp, (blk_hdr_ptr_t)bml_lcl_buff, 0, FALSE)) { /* certify the block while holding crit as cert_blk uses fields from file-header (shared memory) */ assert(FALSE); /* in pro, skip ugprading/downgarding all blks in this unreliable local bitmap */ rel_crit(reg); util_out_print("Region !AD : Bitmap Block [0x!XL] has integrity errors. Skipping this bitmap.", TRUE, REG_LEN_STR(reg), curbmp); status1 = ERR_MUNOFINISH; continue; } rel_crit(reg); /* ------------------------------------------------------------------------ * Upgrade/Downgrade all BUSY blocks in the current bitmap * ------------------------------------------------------------------------ */ curblk = (curbmp == start_bmp) ? start_blk : curbmp; mapsize = (curbmp == last_bmp) ? (stop_blk - curbmp) : BLKS_PER_LMAP; assert(0 != mapsize); assert(mapsize <= BLKS_PER_LMAP); db_got_to_v5_once = csd->db_got_to_v5_once; for (lcnt = curblk - curbmp; lcnt < mapsize; lcnt++, curblk++) { if (mu_ctrly_occurred || mu_ctrlc_occurred) { status1 = ERR_MUNOFINISH; goto stop_reorg_on_this_reg; /* goto needed because of nested FOR Loop */ } GET_BM_STATUS(bml_lcl_buff, lcnt, bml_status); assert(BLK_MAPINVALID != bml_status); /* cert_blk ran clean so we dont expect invalid entries */ if (BLK_FREE == bml_status) { reorg_stats.blks_skipped_free++; continue; } /* MUPIP REORG UPGRADE/DOWNGRADE will convert USED & RECYCLED blocks */ if (db_got_to_v5_once || (BLK_RECYCLED != bml_status)) { /* Do NOT read recycled V4 block from disk unless it is guaranteed NOT to be too full */ if (lcnt) { /* non-bitmap block */ /* read in block from disk into private buffer. dont pollute the cache yet */ if (NULL == bptr) bptr = (sm_uc_ptr_t)malloc(blk_size); status1 = dsk_read(curblk, bptr, &ondsk_blkver, FALSE); /* dsk_read on curblk could return an error (DYNUPGRDFAIL) if curblk needs to be * upgraded and if its block size was too big to allow the extra block-header space * requirements for a dynamic upgrade. a MUPIP REORG DOWNGRADE should not error out * in that case as the block is already in the downgraded format. */ if (SS_NORMAL != status1) { if (!upgrade && (ERR_DYNUPGRDFAIL == status1)) { assert(GDSV4 == new_db_format); ondsk_blkver = new_db_format; } else { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status1); util_out_print("Region !AD : Error occurred while reading block " "[0x!XL]", TRUE, REG_LEN_STR(reg), curblk); status1 = ERR_MUNOFINISH; goto stop_reorg_on_this_reg;/* goto needed due to nested FOR Loop */ } } reorg_stats.blks_read_from_disk_nonbmp++; } /* else bitmap block has been read in crit earlier and ondsk_blkver appropriately set */ if (new_db_format == ondsk_blkver) { assert((SS_NORMAL == status1) || (!upgrade && (ERR_DYNUPGRDFAIL == status1))); status1 = SS_NORMAL; /* treat DYNUPGRDFAIL as no error in case of downgrade */ reorg_stats.blks_skipped_newfmtindisk++; continue; /* current disk version is identical to what is desired */ } assert(SS_NORMAL == status1); } /* Begin non-TP transaction to upgrade/downgrade the block. * The way we do that is by updating the block using a null update array. * Any update to a block will trigger an automatic upgrade/downgrade of the block based on * the current fileheader desired_db_format setting and we use that here. */ t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); for (; ;) { CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ curr_tn = csd->trans_hist.curr_tn; db_got_to_v5_once = csd->db_got_to_v5_once; if (db_got_to_v5_once || (BLK_RECYCLED != bml_status)) { blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ blkBase = t_qread(curblk, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); if (NULL == blkBase) { t_retry((enum cdb_sc)rdfail_detail); continue; } blkhist->blk_num = curblk; blkhist->buffaddr = blkBase; ondsk_blkver = blkhist->cr->ondsk_blkver; new_hdr = *(blk_hdr_ptr_t)blkBase; mu_reorg_upgrd_dwngrd_blktn = new_hdr.tn; mark_blk_free = FALSE; inctn_opcode = upgrade ? inctn_blkupgrd : inctn_blkdwngrd; } else { mark_blk_free = TRUE; inctn_opcode = inctn_blkmarkfree; } inctn_detail.blknum_struct.blknum = curblk; /* t_end assumes that the history it is passed does not contain a bitmap block. * for bitmap block, the history validation information is passed through cse instead. * therefore we need to handle bitmap and non-bitmap cases separately. */ if (!lcnt) { /* Means a bitmap block. * At this point we can do a "new_db_format != ondsk_blkver" check to determine * if the block got converted since we did the dsk_read (see the non-bitmap case * for a similar check done there), but in that case we will have a transaction * which has read 1 bitmap block and is updating no block. "t_end" currently cannot * handle this case as it expects any bitmap block that needs validation to also * have a corresponding cse which will hold its history. Hence we avoid doing the * new_db_format check. The only disadvantage of this is that we will end up * modifying the bitmap block as part of this transaction (in an attempt to convert * its ondsk_blkver) even though it is already in the right format. Since this * overhead is going to be one per bitmap block and since the block is in the cache * at this point, we should not lose much. */ assert(!mark_blk_free); BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); assert(&alt_hist.h[0] == blkhist); alt_hist.h[0].blk_num = 0; /* create empty history for bitmap block */ assert(update_trans); } else { /* non-bitmap block. fill in history for validation in t_end */ assert(curblk); /* we should never come here for block 0 (bitmap) */ if (!mark_blk_free) { assert(blkhist->blk_num == curblk); assert(blkhist->buffaddr == blkBase); blkhist->tn = curr_tn; alt_hist.h[1].blk_num = 0; } /* Also need to pass the bitmap as history to detect if any concurrent M-kill * is freeing up the same USED block that we are trying to convert OR if any * concurrent M-set is reusing the same RECYCLED block that we are trying to * convert. Because of t_end currently not being able to validate a bitmap * without that simultaneously having a cse, we need to create a cse for the * bitmap that is used only for bitmap history validation, but should not be * used to update the contents of the bitmap block in bg_update. */ bmlhist.buffaddr = t_qread(curbmp, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr); if (NULL == bmlhist.buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } bmlhist.blk_num = curbmp; bmlhist.tn = curr_tn; GET_BM_STATUS(bmlhist.buffaddr, lcnt, bml_status); if (BLK_MAPINVALID == bml_status) { t_retry(cdb_sc_lostbmlcr); continue; } if (!mark_blk_free) { if ((new_db_format != ondsk_blkver) && (BLK_FREE != bml_status)) { /* block still needs to be converted. create cse */ BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkBase + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkBase)->levl, FALSE, FALSE, GDS_WRITE_PLAIN); /* The directory tree status for now is only used to determine * whether writing the block to snapshot file (see t_end_sysops.c). * For reorg upgrade/downgrade process, the block is updated in a * sequential way without changing the gv_target. In this case, we * assume the block is in directory tree so as to have it written to * the snapshot file. */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* reset update_trans in case previous retry had set it to 0 */ update_trans = UPDTRNS_DB_UPDATED_MASK; if (BLK_RECYCLED == bml_status) { /* If block that we are upgarding is RECYCLED, indicate to * bg_update that blks_to_upgrd counter should NOT be * touched in this case by setting "mode" to a special value */ assert(cw_set[cw_set_depth-1].mode == gds_t_write); cw_set[cw_set_depth-1].mode = gds_t_write_recycled; /* we SET block as NOT RECYCLED, otherwise, the mm_update() * or bg_update_phase2 may skip writing it to snapshot file * when its level is 0 */ BIT_CLEAR_RECYCLED(cw_set[cw_set_depth-1].blk_prior_state); } } else { /* Block got converted by another process since we did the dsk_read. * or this block became marked free in the bitmap. * No need to update this block. just call t_end for validation of * both the non-bitmap block as well as the bitmap block. * Note down that this transaction is no longer updating any blocks. */ update_trans = 0; } /* Need to put bit maps on the end of the cw set for concurrency checking. * We want to simulate t_write_map, except we want to update "cw_map_depth" * instead of "cw_set_depth". Hence the save and restore logic below. * This part of the code is similar to the one in mu_swap_blk.c */ save_cw_set_depth = cw_set_depth; assert(!cw_map_depth); t_write_map(&bmlhist, NULL, curr_tn, 0); /* will increment cw_set_depth */ cw_map_depth = cw_set_depth; /* set cw_map_depth to latest cw_set_depth */ cw_set_depth = save_cw_set_depth;/* restore cw_set_depth */ /* t_write_map simulation end */ } else { if (BLK_RECYCLED != bml_status) { /* Block was RECYCLED at beginning but no longer so. Retry */ t_retry(cdb_sc_bmlmod); continue; } /* Mark recycled block as FREE in bitmap */ assert(lcnt == (curblk - curbmp)); assert(update_array_ptr == update_array); *((block_id *)update_array_ptr) = lcnt; update_array_ptr += SIZEOF(block_id); /* the following assumes SIZEOF(block_id) == SIZEOF(int) */ assert(SIZEOF(block_id) == SIZEOF(int)); *(int *)update_array_ptr = 0; t_write_map(&bmlhist, (unsigned char *)update_array, curr_tn, 0); update_trans = UPDTRNS_DB_UPDATED_MASK; } } assert(SIZEOF(lcl_update_trans) == SIZEOF(update_trans)); lcl_update_trans = update_trans; /* take a copy before t_end modifies it */ if ((trans_num)0 != t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) { /* In case this is MM and t_end() remapped an extended database, reset csd */ assert(csd == cs_data); if (!lcl_update_trans) { assert(lcnt); assert(!mark_blk_free); assert((new_db_format == ondsk_blkver) || (BLK_BUSY != bml_status)); if (BLK_BUSY != bml_status) reorg_stats.blks_skipped_free++; else reorg_stats.blks_skipped_newfmtincache++; } else if (!lcnt) reorg_stats.blks_converted_bmp++; else reorg_stats.blks_converted_nonbmp++; break; } assert(csd == cs_data); } } } stop_reorg_on_this_reg: /* even though ctrl-c occurred, update file-header fields to store reorg's progress before exiting */ grab_crit(reg); blocks_left = 0; assert(csd->trans_hist.total_blks >= csd->blks_to_upgrd); actual_blks2upgrd = csd->blks_to_upgrd; total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; /* Care should be taken not to set "csd->reorg_upgrd_dwngrd_restart_block" in case of a concurrent db fmt * change. This is because let us say we are doing REORG UPGRADE. A concurrent REORG DOWNGRADE would * have reset "csd->reorg_upgrd_dwngrd_restart_block" field to 0 and if that reorg is interrupted by a * Ctrl-C (before this reorg came here) it would have updated "csd->reorg_upgrd_dwngrd_restart_block" to * a non-zero value indicating how many blocks from 0 have been downgraded. We should not reset this * field to "curblk" as it will be mis-interpreted as the number of blocks that have been DOWNgraded. */ set_fully_upgraded = FALSE; if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn) { /* csd->desired_db_format changed since reorg started. discontinue the reorg */ util_out_print("Region !AD : Desired DB Format changed during REORG. Stopping REORG.", TRUE, REG_LEN_STR(reg)); status1 = ERR_MUNOFINISH; } else if (reorg_entiredb) { /* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */ assert(csd->reorg_upgrd_dwngrd_restart_block <= curblk); csd->reorg_upgrd_dwngrd_restart_block = curblk; /* blocks lesser than this have been upgraded/downgraded */ expected_blks2upgrd = upgrade ? 0 : (total_blks - free_blks); blocks_left = upgrade ? actual_blks2upgrd : (expected_blks2upgrd - actual_blks2upgrd); /* If this reorg command went through all blocks in the database, then it should have * correctly concluded at this point whether the reorg is complete or not. * If this reorg command started from where a previous incomplete reorg left * (i.e. first_reorg_in_this_db_fmt is FALSE), it cannot determine if the initial * GDS blocks that it skipped are completely {up,down}graded or not. */ assert((0 == blocks_left) || (SS_NORMAL != status1) || !first_reorg_in_this_db_fmt); /* If this is a MUPIP REORG UPGRADE that did go through every block in the database (indicated by * "reorg_entiredb" && "first_reorg_in_this_db_fmt") and the current count of "blks_to_upgrd" is * 0 in the file-header and the desired_db_format did not change since the start of the REORG, * we can be sure that the entire database has been upgraded. Set "csd->fully_upgraded" to TRUE. */ if ((SS_NORMAL == status1) && first_reorg_in_this_db_fmt && upgrade && (0 == actual_blks2upgrd)) { csd->fully_upgraded = TRUE; csd->db_got_to_v5_once = TRUE; set_fully_upgraded = TRUE; } /* flush all changes noted down in the file-header */ if (!wcs_flu(WCSFLU_FLUSH_HDR)) /* wcs_flu assumes gv_cur_region is set (which it is in this routine) */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg)); status = ERR_MUNOFINISH; rel_crit(reg); continue; } } curr_tn = csd->trans_hist.curr_tn; rel_crit(reg); util_out_print("Region !AD : Stopped processing at block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk); /* Print statistics */ util_out_print("Region !AD : Statistics : Blocks Read From Disk (Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_bmp); util_out_print("Region !AD : Statistics : Blocks Skipped (Free) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_free); util_out_print("Region !AD : Statistics : Blocks Read From Disk (Non-Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_nonbmp); util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in disk) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtindisk); util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in cache) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtincache); util_out_print("Region !AD : Statistics : Blocks Converted (Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_bmp); util_out_print("Region !AD : Statistics : Blocks Converted (Non-Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_nonbmp); if (reorg_entiredb && (SS_NORMAL == status1) && (0 != blocks_left)) { /* file-header counter does not match what reorg on the entire database expected to see */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBBTUWRNG, 2, expected_blks2upgrd, actual_blks2upgrd); util_out_print("Region !AD : Run MUPIP INTEG (without FAST qualifier) to fix the counter", TRUE, REG_LEN_STR(reg)); status1 = ERR_MUNOFINISH; } else util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : " "Blocks to upgrade = [0x!XL]", TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd); /* Issue success or failure message for this region */ if (SS_NORMAL == status1) { /* issue success only if REORG did not encounter any error in its processing */ if (set_fully_upgraded) util_out_print("Region !AD : Database is now FULLY UPGRADED", TRUE, REG_LEN_STR(reg)); util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUREUPDWNGRDEND, 5, REG_LEN_STR(reg), process_id, process_id, &curr_tn); } else { assert(ERR_MUNOFINISH == status1); assert((SS_NORMAL == status) || (ERR_MUNOFINISH == status)); util_out_print("Region !AD : MUPIP REORG !AD incomplete. See above messages.!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = status1; } } if (NULL != bptr) free(bptr); if (NULL != bml_lcl_buff) free(bml_lcl_buff); if (mu_ctrly_occurred || mu_ctrlc_occurred) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REORGCTRLY); status = ERR_MUNOFINISH; } mupip_exit(status); }
i8_p que_obj_next_freeblk(que_obj_p p_obj) { return BLK_ADDR(p_obj->tail, p_obj); }
void dse_chng_bhead(void) { block_id blk; block_id *blkid_ptr; sgm_info *dummysi = NULL; int4 x; cache_rec_ptr_t cr; uchar_ptr_t bp; sm_uc_ptr_t blkBase; blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; cw_set_element *cse; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ bool ismap; bool chng_blk; uint4 mapsize; uint4 jnl_status; error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); assert(update_array); /* reset new block mechanism */ update_array_ptr = update_array; chng_blk = FALSE; if (cli_present("BLOCK") == CLI_PRESENT) { if (!cli_get_hex("BLOCK",&blk)) return; if (blk < 0 || blk > cs_addrs->ti->total_blks) { util_out_print("Error: invalid block number.",TRUE); return; } patch_curr_blk = blk; } blk_size = cs_addrs->hdr->blk_size; ismap = (patch_curr_blk / cs_addrs->hdr->bplmap * cs_addrs->hdr->bplmap == patch_curr_blk); mapsize = BM_SIZE(cs_addrs->hdr->bplmap); t_begin_crit (ERR_DSEFAIL); if (!(bp = t_qread (patch_curr_blk,&dummy_hist.h[0].cycle,&dummy_hist.h[0].cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)bp; if (cli_present("LEVEL") == CLI_PRESENT) { if (!cli_get_num("LEVEL",&x)) { t_abort(gv_cur_region, cs_addrs); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < sizeof(blk_hdr)) new_hdr.bsiz = sizeof(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (cli_present("BSIZ") == CLI_PRESENT) { if (!cli_get_hex("BSIZ",&x)) { t_abort(gv_cur_region, cs_addrs); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } else if (x < sizeof(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, cs_addrs); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad block build.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } t_write (patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, new_hdr.levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); } if (cli_present("TN") == CLI_PRESENT) { if (!cli_get_hex("TN",&x)) return; t_begin_crit(ERR_DSEFAIL); assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn); cs_addrs->ti->early_tn++; blkBase = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr); if (NULL == blkBase) { rel_crit(gv_cur_region); util_out_print("Error: Unable to read buffer.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } /* Create a null update array for a block */ if (ismap) { BLK_ADDR(blkid_ptr, sizeof(block_id), block_id); *blkid_ptr = 0; t_write_map(patch_curr_blk, blkBase, (unsigned char *)blkid_ptr, cs_addrs->ti->curr_tn); cr_array_index = 0; block_saved = FALSE; } else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, blkBase, ((blk_hdr_ptr_t)blkBase)->levl, TRUE, FALSE); cr_array_index = 0; block_saved = FALSE; if (JNL_ENABLED(cs_data)) { JNL_SHORT_TIME(jgbl.gbl_jrec_time); /* needed for jnl_put_jrt_pini() and jnl_write_aimg_rec() */ jnl_status = jnl_ensure_open(); if (0 == jnl_status) { cse = (cw_set_element *)(&cw_set[0]); cse->new_buff = non_tp_jfb_buff_ptr; gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, x); cse->done = TRUE; if (0 == cs_addrs->jnl->pini_addr) jnl_put_jrt_pini(cs_addrs); jnl_write_aimg_rec(cs_addrs, cse->blk, (blk_hdr_ptr_t)cse->new_buff); } else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); } } /* Pass the desired tn "x" as argument to bg_update or mm_update */ if (dba_bg == cs_addrs->hdr->acc_meth) bg_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi); else mm_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi); cs_addrs->ti->curr_tn++; assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn); /* the following code is analogous to that in t_end and should be maintained in a similar fashion */ while (cr_array_index) cr_array[--cr_array_index]->in_cw_set = FALSE; rel_crit(gv_cur_region); if (block_saved) backup_buffer_flush(gv_cur_region); UNIX_ONLY( if (unhandled_stale_timer_pop) process_deferred_stale(); ) wcs_timer_start(gv_cur_region, TRUE); }