/************************************************************************************** Input Parameters: (GBL_DEF) gv_target: For reference block's history Output Parameters: kill_set_ptr : List of blocks to be freed ***************************************************************************************/ enum cdb_sc mu_reduce_level(kill_set *kill_set_ptr) { int level; int old_blk_sz; int blk_seg_cnt, blk_size; uint4 save_t_err; blk_segment *bs_ptr1, *bs_ptr2; sm_uc_ptr_t old_blk_base, save_blk; error_def(ERR_GVKILLFAIL); level = gv_target->hist.depth; if (1 == level) return cdb_sc_oprnotneeded; blk_size = cs_data->blk_size; kill_set_ptr->used = 0; memset(kill_set_ptr, 0, sizeof(kill_set)); assert(update_array != NULL); update_array_ptr = update_array; old_blk_base = gv_target->hist.h[level].buffaddr; old_blk_sz = ((blk_hdr_ptr_t)(old_blk_base))->bsiz; if (sizeof(blk_hdr) + BSTAR_REC_SIZE != old_blk_sz) return cdb_sc_oprnotneeded; old_blk_base = gv_target->hist.h[level-1].buffaddr; old_blk_sz = ((blk_hdr_ptr_t)(old_blk_base))->bsiz; BLK_ADDR(save_blk, old_blk_sz - sizeof(blk_hdr), unsigned char); memcpy(save_blk, old_blk_base + sizeof(blk_hdr), old_blk_sz - sizeof(blk_hdr)); BLK_INIT(bs_ptr2, bs_ptr1); BLK_SEG(bs_ptr2, save_blk, old_blk_sz - sizeof(blk_hdr)); if (!BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } save_t_err = t_err; t_err = ERR_GVKILLFAIL; t_write(gv_target->hist.h[level].blk_num, (unsigned char *)bs_ptr1, 0, 0, gv_target->hist.h[level].buffaddr, level - 1, TRUE, TRUE); t_err = save_t_err; kill_set_ptr->blk[kill_set_ptr->used].flag = 0; kill_set_ptr->blk[kill_set_ptr->used].level = 0; kill_set_ptr->blk[kill_set_ptr->used++].block = gv_target->hist.h[level-1].blk_num; return cdb_sc_normal; }
/* Finds a free block and adds information to update array and cw_set */ block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id, sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn) { sgmnt_data_ptr_t csd; sgmnt_addrs *csa; node_local_ptr_t cnl; srch_blk_status bmlhist, freeblkhist; block_id hint_blk_num, free_blk_id, parent_blk_id; boolean_t free_blk_recycled; int4 master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap; uint4 total_blks; int blk_seg_cnt, blk_size; sm_uc_ptr_t parent_blk_ptr, bn_ptr, saved_blk; blk_segment *bs1, *bs_ptr; int parent_blk_size, child_blk_size, bsiz; int rec_size1, curr_offset, bpntr_end, hdr_len; int tmp_cmpc; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned short temp_ushort; unsigned long temp_long; unsigned char save_cw_set_depth; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csd = cs_data; csa = cs_addrs; cnl = csa->nl; blk_size = csd->blk_size; /* Find a free/recycled block for new block location. */ hint_blk_num = 0; total_blks = csa->ti->total_blks; num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps); if ((NO_FREE_SPACE == master_bit)) { t_abort(gv_cur_region, csa); return ABORT_SWAP; } bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP; if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } hint_bit = 0; maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num; free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled); free_blk_id = bmlhist.blk_num + free_bit; if (DIR_ROOT >= free_blk_id) { /* Bitmap block 0 and directory tree root block 1 should always be marked busy. */ assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); return RETRY_SWAP; } if (child_blk_id <= free_blk_id) { /* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check * if the block is to be swapped into a higher block number and if so do not swap */ t_abort(gv_cur_region, csa); return ABORT_SWAP; } /* ====== begin update array ====== * Four blocks get changed. * 1. Free block becomes busy and gains the contents of child (root block/directory tree block) * 2. Parent block in directory tree remains busy, but points to new root block location. * 3. Free block's corresponding bitmap reflects above change. * 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */ parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num; CHECK_AND_RESET_UPDATE_ARRAY; if (free_blk_recycled) { /* Otherwise, it's a completely free block, in which case no need to read. */ freeblkhist.blk_num = (block_id)free_blk_id; if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } } child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz; BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, child_blk_size, unsigned char); memcpy(saved_blk, child_blk_ptr, child_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr)); assert(blk_seg_cnt == child_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } tmpcse = &cw_set[cw_set_depth]; (free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state) : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state); t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl); tmpcse->mode = gds_t_acquired; if (!free_blk_recycled || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { tmpcse->old_block = freeblkhist.buffaddr; tmpcse->cr = freeblkhist.cr; tmpcse->cycle = freeblkhist.cycle; jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_lostbmlcr); return RETRY_SWAP; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz); } } /* 2. Parent block in directory tree remains busy, but points to new child block location. */ curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset; parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz; GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset)); if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } BLK_INIT(bs_ptr, bs1); if (0 == parent_blk_lvl) /* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */ hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset)); else hdr_len = rec_size1 - SIZEOF(block_id); bpntr_end = curr_offset + hdr_len + SIZEOF(block_id); BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, free_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end); assert(blk_seg_cnt == parent_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN); /* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* 3. Free block's corresponding bitmap reflects above change. */ PUT_LONG(update_array_ptr, free_bit); save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */ assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1); cw_map_depth = cw_set_depth; cw_set_depth = save_cw_set_depth; update_array_ptr += SIZEOF(block_id); temp_long = 0; PUT_LONG(update_array_ptr, temp_long); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ kill_set_list->blk[kill_set_list->used].flag = 0; kill_set_list->blk[kill_set_list->used].level = 0; kill_set_list->blk[kill_set_list->used++].block = child_blk_id; return free_blk_id; }
/*********************************************************************************************** Input Parameters: cur_level: Working block's level d_max_fill: Database fill factor i_max_fill: Index fill factor Output Parameters: blks_created: how many new blocks are created lvls_increased : How much level is increased Input/Output Parameters: gv_target: History of working block Here it is assumed that i_max_fill or, d_max_fill is strictly less than block size. Returns: cdb_sc_normal: if successful cdb_sc status otherwise ************************************************************************************************/ enum cdb_sc mu_split(int cur_level, int i_max_fill, int d_max_fill, int *blks_created, int *lvls_increased) { boolean_t first_copy, new_rtblk_star_only, create_root = FALSE, split_required, insert_in_left; unsigned char curr_prev_key[MAX_KEY_SZ+1], new_blk1_last_key[MAX_KEY_SZ+1]; unsigned short temp_ushort; int rec_size, new_ins_keycmpc, tkeycmpc, new_ances_currkeycmpc, old_ances_currkeycmpc; int tmp_cmpc; block_index left_index, right_index; block_offset ins_off, ins_off2; int level; int new_ins_keysz, new_ances_currkeysz, new_blk1_last_keysz, newblk2_first_keysz, next_gv_currkeysz; int old_ances_currkeylen, new_ins_keylen, new_ances_currkeylen, tkeylen, newblk2_first_keylen; int old_blk1_last_rec_size, old_blk1_sz, save_blk_piece_len, old_right_piece_len; int delta, max_fill; enum cdb_sc status; int blk_seg_cnt, blk_size, new_leftblk_top_off; block_id allocation_clue; sm_uc_ptr_t rPtr1, rPtr2, rec_base, key_base, next_gv_currkey, bn_ptr1, bn_ptr2, save_blk_piece, old_blk_after_currec, ances_currkey, old_blk1_base, new_blk1_top, new_blk2_top, new_blk2_frec_base, new_blk2_rem, newblk2_first_key, new_ins_key; blk_segment *bs_ptr1, *bs_ptr2; cw_set_element *cse; rec_hdr_ptr_t star_rec_hdr, new_rec_hdr1a, new_rec_hdr1b, new_rec_hdr2, root_hdr; blk_hdr_ptr_t blk_hdr_ptr; blk_size = cs_data->blk_size; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr); star_rec_hdr->rsiz = BSTAR_REC_SIZE; SET_CMPC(star_rec_hdr, 0); level = cur_level; max_fill = (0 == level)? d_max_fill : i_max_fill; /* ------------------- * Split working block. * ------------------- * new_blk1_last_key = last key of the new working block after split * new_blk1_last_keysz = size of new_blk1_last_key * old_blk1_last_rec_size = last record size of the new working block after split (for old block) * new_blk2_frec_base = base of first record of right block created after split * newblk2_first_key = first key of new block created after split * newblk2_first_keysz = size of newblk2_first_key * new_blk2_rem = pointer to new block to be created after split exclude 1st record header + key */ blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr); old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr; old_blk1_sz = blk_hdr_ptr->bsiz; new_blk2_top = old_blk1_base + old_blk1_sz; if (cdb_sc_normal != (status = locate_block_split_point (old_blk1_base, level, old_blk1_sz, max_fill, &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz) /* Avoid split to create a small right sibling. Note this should not happen often when tolerance is high */ return cdb_sc_oprnotneeded; old_right_piece_len = old_blk1_sz - new_leftblk_top_off; new_blk2_frec_base = old_blk1_base + new_leftblk_top_off; BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char); READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_normal != status) /* restart for cdb_sc_starrecord too, because we eliminated the possibility already */ { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* copy the compressed key piece */ new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen; newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; /* gv_currkey_next_reorg will be saved for next iteration in mu_reorg */ next_gv_currkey = newblk2_first_key; next_gv_currkeysz = newblk2_first_keysz; BLK_ADDR(new_rec_hdr1b, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr1b->rsiz = rec_size + tkeycmpc; SET_CMPC(new_rec_hdr1b, 0); /* Create new split piece, we already know that this will not be *-rec only. * Note that this has to be done BEFORE modifying working block as building this buffer relies on the * working block to be pinned which is possible only if this cw-set-element is created ahead of that * of the working block (since order in which blocks are built is the order in which cses are created). */ BLK_INIT(bs_ptr2, bs_ptr1); BLK_SEG(bs_ptr2, (sm_uc_ptr_t)new_rec_hdr1b, SIZEOF(rec_hdr)); BLK_SEG(bs_ptr2, newblk2_first_key, newblk2_first_keysz); BLK_SEG(bs_ptr2, new_blk2_rem, new_blk2_top - new_blk2_rem); if (!BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } allocation_clue = ALLOCATION_CLUE(cs_data->trans_hist.total_blks); right_index = t_create(allocation_clue++, (unsigned char *)bs_ptr1, 0, 0, level); (*blks_created)++; /* Modify working block removing split piece */ BLK_INIT(bs_ptr2, bs_ptr1); if (0 == level) { BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr)); } else { BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr) - old_blk1_last_rec_size); BLK_SEG(bs_ptr2, (sm_uc_ptr_t)star_rec_hdr, SIZEOF(rec_hdr) ); BLK_ADDR(bn_ptr1, SIZEOF(block_id), unsigned char); memcpy(bn_ptr1, old_blk1_base + new_leftblk_top_off - SIZEOF(block_id), SIZEOF(block_id)); BLK_SEG(bs_ptr2, bn_ptr1, SIZEOF(block_id)); } if ( !BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } t_write(&gv_target->hist.h[level], (unsigned char *)bs_ptr1, 0, 0, level, FALSE, TRUE, GDS_WRITE_KILLTN); /* ---------------------------------------------------------------------------- Modify ancestor block for the split in current level. new_ins_key = new key to be inserted in parent because of split in child new_ins_key will be inserted after gv_target->hist.h[level].prev_rec and before gv_target->hist.h[level].curr_rec new_ins_keysz = size of new_ins_key Note: A restriction of the algorithm is to have current key and new_ins_key in the same block, either left or, new right block ---------------------------------------------------------------------------- */ BLK_ADDR(new_ins_key, new_blk1_last_keysz, unsigned char); memcpy(new_ins_key, &new_blk1_last_key[0], new_blk1_last_keysz); new_ins_keysz = new_blk1_last_keysz; for(;;) /* ========== loop through ancestors as necessary ======= */ { level ++; max_fill = i_max_fill; /* old_blk_after_currec = remaining of current block after currec ances_currkey = old real value of currkey in ancestor block */ blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr); old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr; old_blk1_sz = blk_hdr_ptr->bsiz; new_blk2_top = old_blk1_base + old_blk1_sz; rec_base = old_blk1_base + gv_target->hist.h[level].curr_rec.offset; GET_RSIZ(rec_size, rec_base); old_blk_after_currec = rec_base + rec_size; old_ances_currkeycmpc = EVAL_CMPC((rec_hdr_ptr_t)rec_base); old_ances_currkeylen = rec_size - BSTAR_REC_SIZE; if (INVALID_RECORD(level, rec_size, old_ances_currkeylen, old_ances_currkeycmpc)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (0 == old_ances_currkeylen) { if (0 != old_ances_currkeycmpc) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } new_ances_currkeycmpc = new_ances_currkeylen = 0; } else { BLK_ADDR(ances_currkey, gv_cur_region->max_rec_size + 1, unsigned char); key_base = rec_base + SIZEOF(rec_hdr); } new_ances_currkeysz = old_ances_currkeycmpc + old_ances_currkeylen; if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) /* cur_rec is not first key */ { if (cdb_sc_normal != (status = gvcst_expand_any_key(old_blk1_base, old_blk1_base + gv_target->hist.h[level].curr_rec.offset, &curr_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (old_ances_currkeycmpc) memcpy(ances_currkey, &curr_prev_key[0], old_ances_currkeycmpc); } if (old_ances_currkeylen) { memcpy(ances_currkey + old_ances_currkeycmpc, key_base, old_ances_currkeylen); GET_CMPC(new_ances_currkeycmpc, new_ins_key, ances_currkey); new_ances_currkeylen = new_ances_currkeysz - new_ances_currkeycmpc; } if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) { /* new_ins_key will be inseted after curr_prev_key */ GET_CMPC(new_ins_keycmpc, &curr_prev_key[0], new_ins_key); } else new_ins_keycmpc = 0; /* new_ins_key will be the 1st key */ new_ins_keylen = new_ins_keysz - new_ins_keycmpc ; delta = BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen; if (old_blk1_sz + delta > blk_size - cs_data->reserved_bytes) /* split required */ { split_required = TRUE; if (level == gv_target->hist.depth) { create_root = TRUE; if (MAX_BT_DEPTH - 1 <= level) /* maximum level reached */ return cdb_sc_maxlvl; } if (max_fill + BSTAR_REC_SIZE > old_blk1_sz) { if (SIZEOF(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz) return cdb_sc_oprnotneeded; /* Improve code to avoid this */ max_fill = old_blk1_sz - BSTAR_REC_SIZE; } status = locate_block_split_point(old_blk1_base, level, old_blk1_sz, max_fill, &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off); if (cdb_sc_normal != status || new_leftblk_top_off >= old_blk1_sz || 0 == new_blk1_last_keysz) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(BSTAR_REC_SIZE != old_blk1_last_rec_size); old_right_piece_len = old_blk1_sz - new_leftblk_top_off; new_blk2_frec_base = new_blk1_top = old_blk1_base + new_leftblk_top_off; if (BSTAR_REC_SIZE == old_right_piece_len) new_rtblk_star_only = TRUE; else new_rtblk_star_only = FALSE; if (new_leftblk_top_off == gv_target->hist.h[level].curr_rec.offset) { /* inserted key will be the first record of new right block */ new_ins_keylen = new_ins_keysz; new_ins_keycmpc = 0; } else /* process 1st record of new right block */ { BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char); READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_normal == status) { memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* compressed piece */ new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen; newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; BLK_ADDR(new_rec_hdr2, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr2->rsiz = newblk2_first_keysz + BSTAR_REC_SIZE; SET_CMPC(new_rec_hdr2, 0); } else if (cdb_sc_starrecord != status || !new_rtblk_star_only) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } } /* else gv_target->hist.h[level].curr_rec will be newblk2_first_key */ if (new_leftblk_top_off > gv_target->hist.h[level].curr_rec.offset + old_ances_currkeylen + BSTAR_REC_SIZE) { /* in this case prev_rec (if exists), new key and curr_rec should go into left block */ if (new_leftblk_top_off + delta - old_blk1_last_rec_size + BSTAR_REC_SIZE <= blk_size - cs_data->reserved_bytes) insert_in_left = TRUE; else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else if (new_leftblk_top_off < gv_target->hist.h[level].curr_rec.offset + old_ances_currkeylen + BSTAR_REC_SIZE) { /* if gv_target->hist.h[level].curr_rec is the first key in old_blk1 then in new right block, new_ins_key will be the 1st record key and curr_rec will be 2nd record and there will be no prev_rec in right block. Else (if curr_rec is not first key) there will be some records before new_ins_key, at least prev_rec */ delta = (int)(BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen + ((0 == new_ins_keycmpc) ? 0 : (EVAL_CMPC((rec_hdr_ptr_t)new_blk2_frec_base)))); if (SIZEOF(blk_hdr) + old_right_piece_len + delta <= blk_size - cs_data->reserved_bytes) { insert_in_left = FALSE; if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz) { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else { /* in this case prev_rec (if exists), new key and curr_rec should go into left block and curr_rec will be the last record (*-key) of left new block */ delta = BSTAR_REC_SIZE + new_ins_keylen; if (new_leftblk_top_off + delta <= blk_size - cs_data->reserved_bytes) insert_in_left = TRUE; else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } } /* end if split required */ else
enum cdb_sc gvcst_kill_blk(srch_blk_status *blkhist, char level, gv_key *search_key, srch_rec_status low, srch_rec_status high, boolean_t right_extra, cw_set_element **cseptr) { typedef sm_uc_ptr_t bytptr; unsigned short temp_ushort; int4 temp_long; int tmp_cmpc; int blk_size, blk_seg_cnt, lmatch, rmatch, targ_len, prev_len, targ_base, next_rec_shrink, temp_int, blkseglen; bool kill_root, first_copy; blk_hdr_ptr_t old_blk_hdr; rec_hdr_ptr_t left_ptr; /*pointer to record before first record to delete*/ rec_hdr_ptr_t del_ptr; /*pointer to first record to delete*/ rec_hdr_ptr_t right_ptr; /*pointer to record after last record to delete*/ rec_hdr_ptr_t right_prev_ptr; rec_hdr_ptr_t rp, rp1; /*scratch record pointer*/ rec_hdr_ptr_t first_in_blk, top_of_block, new_rec_hdr, star_rec_hdr; blk_segment *bs1, *bs_ptr; block_index new_block_index; unsigned char *skb; static readonly block_id zeroes = 0; cw_set_element *cse, *old_cse; bytptr curr, prev, right_bytptr; off_chain chain1, curr_chain, prev_chain; block_id blk; sm_uc_ptr_t buffer; srch_blk_status *t1; *cseptr = NULL; if (low.offset == high.offset) return cdb_sc_normal; blk = blkhist->blk_num; if (dollar_tlevel) { PUT_LONG(&chain1, blk); if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth)) { assert(sgm_info_ptr->tp_csa == cs_addrs); assert(FALSE == cs_addrs->now_crit); return cdb_sc_blknumerr; } } buffer = blkhist->buffaddr; old_blk_hdr = (blk_hdr_ptr_t)buffer; kill_root = FALSE; blk_size = cs_data->blk_size; first_in_blk = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + SIZEOF(blk_hdr)); top_of_block = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + old_blk_hdr->bsiz); left_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + low.offset); right_ptr = (rec_hdr_ptr_t)((bytptr)old_blk_hdr + high.offset); if (right_extra && right_ptr < top_of_block) { right_prev_ptr = right_ptr; GET_USHORT(temp_ushort, &right_ptr->rsiz); right_ptr = (rec_hdr_ptr_t)((bytptr)right_ptr + temp_ushort); } if ((bytptr)left_ptr < (bytptr)old_blk_hdr || (bytptr)right_ptr > (bytptr)top_of_block || (bytptr)left_ptr >= (bytptr)right_ptr) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } if ((bytptr)left_ptr == (bytptr)old_blk_hdr) { if ((bytptr)right_ptr == (bytptr)top_of_block) { if ((bytptr)first_in_blk == (bytptr)top_of_block) { if (0 != level) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } return cdb_sc_normal; } if (!gv_target->hist.h[level + 1].blk_num) kill_root = TRUE; else { /* We are about to free up the contents of this entire block. If this block corresponded to * a global that has NOISOLATION turned on and has a non-zero recompute list (i.e. some SETs * already happened in this same TP transaction), make sure we disable the NOISOLATION * optimization in this case as that is applicable only if one or more SETs happened in this * data block and NOT if a KILL happens. Usually this is done by a t_write(GDS_WRITE_KILLTN) * call but since in this case the entire block is being freed, "t_write" wont be invoked * so we need to explicitly set GDS_WRITE_KILLTN like t_write would have (GTM-8269). * Note: blkhist->first_tp_srch_status is not reliable outside of TP. Thankfully the recompute * list is also maintained only in case of TP so a check of dollar_tlevel is enough to * dereference both "first_tp_srch_status" and "recompute_list_head". */ if (dollar_tlevel) { t1 = blkhist->first_tp_srch_status ? blkhist->first_tp_srch_status : blkhist; cse = t1->cse; if ((NULL != cse) && cse->recompute_list_head) cse->write_type |= GDS_WRITE_KILLTN; } return cdb_sc_delete_parent; } } del_ptr = first_in_blk; } else { GET_USHORT(temp_ushort, &left_ptr->rsiz); del_ptr = (rec_hdr_ptr_t)((bytptr)left_ptr + temp_ushort); if ((bytptr)del_ptr <= (bytptr)(left_ptr + 1) || (bytptr)del_ptr > (bytptr)right_ptr) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } } if ((bytptr)del_ptr == (bytptr)right_ptr) return cdb_sc_normal; lmatch = low.match; rmatch = high.match; if (level) { for (rp = del_ptr ; rp < right_ptr ; rp = rp1) { GET_USHORT(temp_ushort, &rp->rsiz); rp1 = (rec_hdr_ptr_t)((bytptr)rp + temp_ushort); if (((bytptr)rp1 < (bytptr)(rp + 1) + SIZEOF(block_id)) || ((bytptr)rp1 < buffer) || ((bytptr)rp1 > (buffer + blk_size))) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } GET_LONG(temp_long, ((bytptr)rp1 - SIZEOF(block_id))); if (dollar_tlevel) { chain1 = *(off_chain *)&temp_long; if ((1 == chain1.flag) && ((int)chain1.cw_index >= sgm_info_ptr->cw_set_depth)) { assert(sgm_info_ptr->tp_csa == cs_addrs); assert(FALSE == cs_addrs->now_crit); return cdb_sc_blknumerr; } } gvcst_delete_blk(temp_long, level - 1, FALSE); } } if (kill_root) { /* create an empty data block */ BLK_INIT(bs_ptr, bs1); if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } new_block_index = t_create(blk, (uchar_ptr_t)bs1, 0, 0, 0); /* create index block */ BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr->rsiz = SIZEOF(rec_hdr) + SIZEOF(block_id); SET_CMPC(new_rec_hdr, 0); BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr)); BLK_SEG(bs_ptr, (bytptr)&zeroes, SIZEOF(block_id)); if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } cse = t_write(blkhist, (unsigned char *)bs1, SIZEOF(blk_hdr) + SIZEOF(rec_hdr), new_block_index, 1, TRUE, FALSE, GDS_WRITE_KILLTN); assert(!dollar_tlevel || !cse->high_tlevel); *cseptr = cse; if (NULL != cse) cse->first_off = 0; return cdb_sc_normal; } next_rec_shrink = (int)(old_blk_hdr->bsiz + ((bytptr)del_ptr - (bytptr)right_ptr)); if (SIZEOF(blk_hdr) >= next_rec_shrink) { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } if ((bytptr)right_ptr == (bytptr)top_of_block) { if (level) { GET_USHORT(temp_ushort, &left_ptr->rsiz); next_rec_shrink += SIZEOF(rec_hdr) + SIZEOF(block_id) - temp_ushort; } } else { targ_base = (rmatch < lmatch) ? rmatch : lmatch; prev_len = 0; if (right_extra) { EVAL_CMPC2(right_prev_ptr, tmp_cmpc); targ_len = tmp_cmpc - targ_base; if (targ_len < 0) targ_len = 0; temp_int = tmp_cmpc - EVAL_CMPC(right_ptr); if (0 >= temp_int) prev_len = - temp_int; else { if (temp_int < targ_len) targ_len -= temp_int; else targ_len = 0; } } else { targ_len = EVAL_CMPC(right_ptr) - targ_base; if (targ_len < 0) targ_len = 0; } next_rec_shrink += targ_len + prev_len; } BLK_INIT(bs_ptr, bs1); first_copy = TRUE; blkseglen = (int)((bytptr)del_ptr - (bytptr)first_in_blk); if (0 < blkseglen) { if (((bytptr)right_ptr != (bytptr)top_of_block) || (0 == level)) { BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen); first_copy = FALSE; } else { blkseglen = (int)((bytptr)left_ptr - (bytptr)first_in_blk); if (0 < blkseglen) { BLK_SEG(bs_ptr, (bytptr)first_in_blk, blkseglen); first_copy = FALSE; } BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr); SET_CMPC(star_rec_hdr, 0); star_rec_hdr->rsiz = (unsigned short)(SIZEOF(rec_hdr) + SIZEOF(block_id)); BLK_SEG(bs_ptr, (bytptr)star_rec_hdr, SIZEOF(rec_hdr)); GET_USHORT(temp_ushort, &left_ptr->rsiz); BLK_SEG(bs_ptr, ((bytptr)left_ptr + temp_ushort - SIZEOF(block_id)), SIZEOF(block_id)); } } blkseglen = (int)((bytptr)top_of_block - (bytptr)right_ptr); assert(0 <= blkseglen); if (0 != blkseglen) { next_rec_shrink = targ_len + prev_len; if (0 >= next_rec_shrink) { BLK_SEG(bs_ptr, (bytptr)right_ptr, blkseglen); } else { BLK_ADDR(new_rec_hdr, SIZEOF(rec_hdr), rec_hdr); SET_CMPC(new_rec_hdr, EVAL_CMPC(right_ptr) - next_rec_shrink); GET_USHORT(temp_ushort, &right_ptr->rsiz); new_rec_hdr->rsiz = temp_ushort + next_rec_shrink; BLK_SEG(bs_ptr, (bytptr)new_rec_hdr, SIZEOF(rec_hdr)); if (targ_len) { BLK_ADDR(skb, targ_len, unsigned char); memcpy(skb, &search_key->base[targ_base], targ_len); BLK_SEG(bs_ptr, skb, targ_len); } if (prev_len) BLK_SEG(bs_ptr, (bytptr)(right_prev_ptr + 1) , prev_len); right_bytptr = (bytptr)(right_ptr + 1); blkseglen = (int)((bytptr)top_of_block - right_bytptr); if (0 < blkseglen) { BLK_SEG(bs_ptr, right_bytptr, blkseglen); } else { assert(CDB_STAGNATE > t_tries); return cdb_sc_rmisalign; } } } if (!BLK_FINI(bs_ptr, bs1)) { assert(CDB_STAGNATE > t_tries); return cdb_sc_mkblk; } cse = t_write(blkhist, (unsigned char *)bs1, 0, 0, level, first_copy, TRUE, GDS_WRITE_KILLTN); assert(!dollar_tlevel || !cse->high_tlevel); *cseptr = cse; if (horiz_growth) { old_cse = cse->low_tlevel; assert(old_cse && old_cse->done); assert(2 == (SIZEOF(old_cse->undo_offset) / SIZEOF(old_cse->undo_offset[0]))); assert(2 == (SIZEOF(old_cse->undo_next_off) / SIZEOF(old_cse->undo_next_off[0]))); assert(!old_cse->undo_next_off[0] && !old_cse->undo_offset[0]); assert(!old_cse->undo_next_off[1] && !old_cse->undo_offset[1]); } if ((NULL != cse) && (0 != cse->first_off)) { /* fix up chains in the block to account for deleted records */ prev = NULL; curr = buffer + cse->first_off; GET_LONGP(&curr_chain, curr); while (curr < (bytptr)del_ptr) { /* follow chain to first deleted record */ if (0 == curr_chain.next_off) break; if (right_ptr == top_of_block && (bytptr)del_ptr - curr == SIZEOF(off_chain)) break; /* special case described below: stop just before the first deleted record */ prev = curr; curr += curr_chain.next_off; GET_LONGP(&curr_chain, curr); } if (right_ptr == top_of_block && (bytptr)del_ptr - curr == SIZEOF(off_chain)) { /* if the right side of the block is gone and our last chain is in the last record, * terminate the chain and adjust the previous entry to point at the new *-key * NOTE: this assumes there's NEVER a TP delete of records in the GVT */ assert(0 != level); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = curr_chain.next_off; old_cse->undo_offset[0] = (block_offset)(curr - buffer); assert(old_cse->undo_offset[0]); } curr_chain.next_off = 0; GET_LONGP(curr, &curr_chain); if (NULL != prev) { /* adjust previous chain next_off to reflect the fact that the record it refers to is now a *-key */ GET_LONGP(&prev_chain, prev); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[1] = prev_chain.next_off; old_cse->undo_offset[1] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[1]); } prev_chain.next_off = (unsigned int)((bytptr)left_ptr - prev + (unsigned int)(SIZEOF(rec_hdr))); GET_LONGP(prev, &prev_chain); } else /* it's the first (and only) one */ cse->first_off = (block_offset)((bytptr)left_ptr - buffer + SIZEOF(rec_hdr)); } else if (curr >= (bytptr)del_ptr) { /* may be more records on the right that aren't deleted */ while (curr < (bytptr)right_ptr) { /* follow chain past last deleted record */ if (0 == curr_chain.next_off) break; curr += curr_chain.next_off; GET_LONGP(&curr_chain, curr); } /* prev : ptr to chain record immediately preceding the deleted area, * or 0 if none. * * curr : ptr to chain record immediately following the deleted area, * or to last chain record. */ if (curr < (bytptr)right_ptr) { /* the former end of the chain is going, going, gone */ if (NULL != prev) { /* terminate the chain before the delete */ GET_LONGP(&prev_chain, prev); /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = prev_chain.next_off; old_cse->undo_offset[0] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[0]); } prev_chain.next_off = 0; GET_LONGP(prev, &prev_chain); } else cse->first_off = 0; /* the whole chain is gone */ } else { /* stitch up the left and right to account for the hole in the middle */ /* next_rec_shrink is the change in record size due to the new compression count */ if (NULL != prev) { GET_LONGP(&prev_chain, prev); /* ??? new compression may be less (ie +) so why are negative shrinks ignored? */ /* store next_off in old_cse before actually changing it in the buffer(for rolling back) */ if (horiz_growth) { old_cse->undo_next_off[0] = prev_chain.next_off; old_cse->undo_offset[0] = (block_offset)(prev - buffer); assert(old_cse->undo_offset[0]); } prev_chain.next_off = (unsigned int)(curr - prev - ((bytptr)right_ptr - (bytptr)del_ptr) + (next_rec_shrink > 0 ? next_rec_shrink : 0)); GET_LONGP(prev, &prev_chain); } else /* curr remains first: adjust the head */ cse->first_off = (block_offset)(curr - buffer - ((bytptr)right_ptr - (bytptr)del_ptr) + (next_rec_shrink > 0 ? next_rec_shrink : 0)); } } } horiz_growth = FALSE; return cdb_sc_normal; }
void dse_chng_bhead(void) { blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; block_id blk; boolean_t chng_blk, ismap, was_hold_onto_crit; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ int4 x; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; srch_blk_status blkhist; trans_num tn; uint4 mapsize; csa = cs_addrs; if (gv_cur_region->read_only) rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ chng_blk = FALSE; if (BADDSEBLK == (blk = dse_getblk("BLOCK", DSEBMLOK, DSEBLKCUR))) /* WARNING: assignment */ return; csd = csa->hdr; assert(csd == cs_data); blk_size = csd->blk_size; ismap = IS_BITMAP_BLK(blk); mapsize = BM_SIZE(csd->bplmap); t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr; if (CLI_PRESENT == cli_present("LEVEL")) { if (!cli_get_hex("LEVEL", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.", TRUE); t_abort(gv_cur_region, csa); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.", TRUE); t_abort(gv_cur_region, csa); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (CLI_PRESENT == cli_present("BSIZ")) { if (!cli_get_hex("BSIZ", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } else if (x < SIZEOF(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, csa); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_AIMGBLKFAIL, 3, blk, DB_LEN_STR(gv_cur_region)); t_abort(gv_cur_region, csa); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } if (CLI_PRESENT == cli_present("TN")) { if (!cli_get_hex64("TN", &tn)) return; t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(csa->ti->early_tn == csa->ti->curr_tn); if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); t_abort(gv_cur_region, csa); return; } if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); /* Pass the desired tn as argument to bg_update/mm_update below */ BUILD_AIMG_IF_JNL_ENABLED_AND_T_END_WITH_EFFECTIVE_TN(csa, csd, tn, &dummy_hist); } return; }
void dse_maps(void) { block_id blk, bml_blk; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT, BLK_SEG and BLK_FINI macros */ sm_uc_ptr_t bp; char util_buff[MAX_UTIL_LEN]; int4 bml_size, bml_list_size, blk_index, bml_index; int4 total_blks, blks_in_bitmap; int4 bplmap, dummy_int; unsigned char *bml_list; cache_rec_ptr_t cr, dummy_cr; bt_rec_ptr_t btr; int util_len; uchar_ptr_t blk_ptr; boolean_t was_crit; uint4 jnl_status; srch_blk_status blkhist; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; if (CLI_PRESENT == cli_present("BUSY") || CLI_PRESENT == cli_present("FREE") || CLI_PRESENT == cli_present("MASTER") || CLI_PRESENT == cli_present("RESTORE_ALL")) { if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); } CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ csa = cs_addrs; assert(&FILE_INFO(gv_cur_region)->s_addrs == csa); was_crit = csa->now_crit; if (csa->critical) crash_count = csa->critical->crashcnt; csd = csa->hdr; bplmap = csd->bplmap; if (CLI_PRESENT == cli_present("BLOCK")) { if (!cli_get_hex("BLOCK", (uint4 *)&blk)) return; if (blk < 0 || blk >= csa->ti->total_blks) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } else blk = patch_curr_blk; if (CLI_PRESENT == cli_present("FREE")) { if (0 == bplmap) { util_out_print("Cannot perform map updates: bplmap field of file header is zero.", TRUE); return; } if (blk / bplmap * bplmap == blk) { util_out_print("Cannot perform action on a map block.", TRUE); return; } bml_blk = blk / bplmap * bplmap; bm_setmap(bml_blk, blk, FALSE); return; } if (CLI_PRESENT == cli_present("BUSY")) { if (0 == bplmap) { util_out_print("Cannot perform map updates: bplmap field of file header is zero.", TRUE); return; } if (blk / bplmap * bplmap == blk) { util_out_print("Cannot perform action on a map block.", TRUE); return; } bml_blk = blk / bplmap * bplmap; bm_setmap(bml_blk, blk, TRUE); return; } blk_size = csd->blk_size; if (CLI_PRESENT == cli_present("MASTER")) { if (0 == bplmap) { util_out_print("Cannot perform maps updates: bplmap field of file header is zero.", TRUE); return; } if (!was_crit) grab_crit(gv_cur_region); bml_blk = blk / bplmap * bplmap; if (dba_mm == csd->acc_meth) bp = MM_BASE_ADDR(csa) + (off_t)bml_blk * blk_size; else { assert(dba_bg == csd->acc_meth); if (!(bp = t_qread(bml_blk, &dummy_int, &dummy_cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); } if ((csa->ti->total_blks / bplmap) * bplmap == bml_blk) total_blks = (csa->ti->total_blks - bml_blk); else total_blks = bplmap; if (NO_FREE_SPACE == bml_find_free(0, bp + SIZEOF(blk_hdr), total_blks)) bit_clear(bml_blk / bplmap, csa->bmm); else bit_set(bml_blk / bplmap, csa->bmm); if (bml_blk > csa->nl->highest_lbm_blk_changed) csa->nl->highest_lbm_blk_changed = bml_blk; if (!was_crit) rel_crit(gv_cur_region); return; } if (CLI_PRESENT == cli_present("RESTORE_ALL")) { if (0 == bplmap) { util_out_print("Cannot perform maps updates: bplmap field of file header is zero.", TRUE); return; } total_blks = csa->ti->total_blks; assert(ROUND_DOWN2(blk_size, 2 * SIZEOF(int4)) == blk_size); bml_size = BM_SIZE(bplmap); bml_list_size = (total_blks + bplmap - 1) / bplmap * bml_size; bml_list = (unsigned char *)malloc(bml_list_size); for (blk_index = 0, bml_index = 0; blk_index < total_blks; blk_index += bplmap, bml_index++) bml_newmap((blk_hdr_ptr_t)(bml_list + bml_index * bml_size), bml_size, csa->ti->curr_tn); if (!was_crit) { grab_crit(gv_cur_region); csa->hold_onto_crit = TRUE; /* need to do this AFTER grab_crit */ } blk = get_dir_root(); assert(blk < bplmap); csa->ti->free_blocks = total_blks - DIVIDE_ROUND_UP(total_blks, bplmap); bml_busy(blk, bml_list + SIZEOF(blk_hdr)); csa->ti->free_blocks = csa->ti->free_blocks - 1; dse_m_rest(blk, bml_list, bml_size, &csa->ti->free_blocks, TRUE); for (blk_index = 0, bml_index = 0; blk_index < total_blks; blk_index += bplmap, bml_index++) { t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ CWS_RESET; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ assert(csa->ti->early_tn == csa->ti->curr_tn); blk_ptr = bml_list + bml_index * bml_size; blkhist.blk_num = blk_index; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blk_ptr + SIZEOF(blk_hdr), bml_size - SIZEOF(blk_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, LCL_MAP_LEVL, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn); t_end(&dummy_hist, NULL, csa->ti->curr_tn); } /* Fill in master map */ for (blk_index = 0, bml_index = 0; blk_index < total_blks; blk_index += bplmap, bml_index++) { blks_in_bitmap = (blk_index + bplmap <= total_blks) ? bplmap : total_blks - blk_index; assert(1 < blks_in_bitmap); /* the last valid block in the database should never be a bitmap block */ if (NO_FREE_SPACE != bml_find_free(0, (bml_list + bml_index * bml_size) + SIZEOF(blk_hdr), blks_in_bitmap)) bit_set(blk_index / bplmap, csa->bmm); else bit_clear(blk_index / bplmap, csa->bmm); if (blk_index > csa->nl->highest_lbm_blk_changed) csa->nl->highest_lbm_blk_changed = blk_index; } if (!was_crit) { csa->hold_onto_crit = FALSE; /* need to do this before the rel_crit */ rel_crit(gv_cur_region); } if (unhandled_stale_timer_pop) process_deferred_stale(); free(bml_list); csd->kill_in_prog = csd->abandoned_kills = 0; return; } MEMCPY_LIT(util_buff, "!/Block "); util_len = SIZEOF("!/Block ") - 1; util_len += i2hex_nofill(blk, (uchar_ptr_t)&util_buff[util_len], 8); memcpy(&util_buff[util_len], " is marked !AD in its local bit map.!/", SIZEOF(" is marked !AD in its local bit map.!/") - 1); util_len += SIZEOF(" is marked !AD in its local bit map.!/") - 1; util_buff[util_len] = 0; if (!was_crit) grab_crit(gv_cur_region); util_out_print(util_buff, TRUE, 4, dse_is_blk_free(blk, &dummy_int, &dummy_cr) ? "free" : "busy"); if (!was_crit) rel_crit(gv_cur_region); return; }
void dse_chng_rhead(void) { block_id blk; sm_uc_ptr_t bp, b_top, cp, rp; boolean_t chng_rec; rec_hdr new_rec; uint4 x; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; srch_blk_status blkhist; error_def(ERR_DBRDONLY); error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ if (cli_present("BLOCK") == CLI_PRESENT) { if(!cli_get_hex("BLOCK", (uint4 *)&blk)) return; patch_curr_blk = blk; } if (patch_curr_blk < 0 || patch_curr_blk >= cs_addrs->ti->total_blks || !(patch_curr_blk % cs_addrs->hdr->bplmap)) { util_out_print("Error: invalid block number.", TRUE); return; } t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = patch_curr_blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); bp = blkhist.buffaddr; blk_size = cs_addrs->hdr->blk_size; chng_rec = FALSE; b_top = bp + ((blk_hdr_ptr_t)bp)->bsiz; if (((blk_hdr_ptr_t)bp)->bsiz > blk_size || ((blk_hdr_ptr_t)bp)->bsiz < SIZEOF(blk_hdr)) chng_rec = TRUE; /* force rewrite to correct size */ if (cli_present("RECORD") == CLI_PRESENT) { if (!(rp = skan_rnum(bp, FALSE))) { t_abort(gv_cur_region, cs_addrs); return; } } else if (!(rp = skan_offset(bp, FALSE))) { t_abort(gv_cur_region, cs_addrs); return; } GET_SHORT(new_rec.rsiz, &((rec_hdr_ptr_t)rp)->rsiz); new_rec.cmpc = ((rec_hdr_ptr_t)rp)->cmpc; if (cli_present("CMPC") == CLI_PRESENT) { if (!cli_get_hex("CMPC", &x)) { t_abort(gv_cur_region, cs_addrs); return; } if (x > 0x7f) { util_out_print("Error: invalid cmpc.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } if (x > patch_comp_count) util_out_print("Warning: specified compression count is larger than the current expanded key size.", TRUE); new_rec.cmpc = x; chng_rec = TRUE; } if (cli_present("RSIZ") == CLI_PRESENT) { if (!cli_get_hex("RSIZ", &x)) { t_abort(gv_cur_region, cs_addrs); return; } if (x < SIZEOF(rec_hdr) || x > blk_size) { util_out_print("Error: invalid rsiz.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } new_rec.rsiz = x; chng_rec = TRUE; } if (chng_rec) { BLK_INIT(bs_ptr, bs1); cp = bp; cp += SIZEOF(blk_hdr); if (chng_rec) { BLK_SEG(bs_ptr, cp, rp - cp); BLK_SEG(bs_ptr, (uchar_ptr_t)&new_rec, SIZEOF(rec_hdr)); cp = rp + SIZEOF(rec_hdr); } if (b_top - cp) BLK_SEG(bs_ptr, cp, b_top - cp); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad blk build.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)bp)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(cs_data, non_tp_jfb_buff_ptr, cs_addrs->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } return; }
/****************************************************************************************** Input Parameters: level: level of working block dest_blk_id: last destination used for swap Output Parameters: kill_set_ptr: Kill set to be freed *exclude_glist_ptr: List of globals not to be moved for a swap destination Input/Output Parameters: gv_target : as working block's history reorg_gv_target->hist : as desitnitions block's history ******************************************************************************************/ enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr) { unsigned char x_blk_lmap; unsigned short temp_ushort; int rec_size1, rec_size2; int wlevel, nslevel, dest_blk_level; int piece_len1, piece_len2, first_offset, second_offset, work_blk_size, work_parent_size, dest_blk_size, dest_parent_size; int dest_child_cycle; int blk_seg_cnt, blk_size; trans_num ctn; int key_len, key_len_dir; block_id dest_blk_id, work_blk_id, child1, child2; enum cdb_sc status; srch_hist *dest_hist_ptr, *dir_hist_ptr; cache_rec_ptr_t dest_child_cr; blk_segment *bs1, *bs_ptr; sm_uc_ptr_t saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr, bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1; boolean_t gbl_target_was_set, blk_was_free, deleted; gv_namehead *save_targ; srch_blk_status bmlhist, destblkhist, *hist_ptr; unsigned char save_cw_set_depth; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned int bsiz; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; dest_blk_id = *pdest_blk_id; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ if (NULL == TREF(gv_reorgkey)) GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ)); dest_hist_ptr = &(reorg_gv_target->hist); dir_hist_ptr = reorg_gv_target->alt_hist; blk_size = cs_data->blk_size; work_parent_ptr = gv_target->hist.h[level+1].buffaddr; work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz; work_blk_ptr = gv_target->hist.h[level].buffaddr; work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz; work_blk_id = gv_target->hist.h[level].blk_num; if (blk_size < work_blk_size) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } cws_reorg_remove_index = 0; /*===== Infinite loop to find the destination block =====*/ for ( ; ; ) { blk_was_free = FALSE; INCR_BLK_NUM(dest_blk_id); /* A Pre-order traversal should not cause a child block to go to its parent. * However, in case it happens because already the organization was like that or for any other reason, skip swap. * If we decide to swap, code below should be changed to take care of the special case. * Still a grand-child can go to its grand-parent. This is rare and following code can handle it. */ if (dest_blk_id == gv_target->hist.h[level+1].blk_num) continue; if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id) { *pdest_blk_id = dest_blk_id; return cdb_sc_oprnotneeded; } ctn = cs_addrs->ti->curr_tn; /* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk") * through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below). * This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to * choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not * deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and * a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon * the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not * be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing * a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array" * variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread * in one iteration down below. And that number is the sum of * + MAX_BT_DEPTH : for the t_qread while loop down the tree done below * + 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below * + 2 : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a * bitmap block done inside the call to get_lmap below * = 3 * MAX_BT_DEPTH + 2 * To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2. * This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h */ /* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */ for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--) { deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]); assert(deleted); } /* read corresponding bitmap block before attempting to read destination block. * if bitmap indicates block is free, we will not read the destination block */ bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr); if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap || ((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) || ((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL) { assert(CDB_STAGNATE > t_tries); return cdb_sc_badbitmap; } if (BLK_FREE != x_blk_lmap) { /* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block * in case we later detect that the before-image needs to be written. */ if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr))) { assert(t_tries < CDB_STAGNATE); return (enum cdb_sc)rdfail_detail; } destblkhist.blk_num = dest_blk_id; destblkhist.buffaddr = dest_blk_ptr; destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl; } if (BLK_BUSY != x_blk_lmap) { /* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */ blk_was_free = TRUE; break; } /* dest_blk_id might contain a *-record only. * So follow the pointer to go to the data/index block, which has a non-* key to search. */ nslevel = dest_blk_level; if (MAX_BT_DEPTH <= nslevel) { assert(CDB_STAGNATE > t_tries); return cdb_sc_maxlvl; } rec_base = dest_blk_ptr + SIZEOF(blk_hdr); GET_RSIZ(rec_size1, rec_base); tblk_ptr = dest_blk_ptr; while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel)) { GET_LONG(child1, (rec_base + SIZEOF(rec_hdr))); if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_rdfail; } if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr))) { assert(t_tries < CDB_STAGNATE); return (enum cdb_sc)rdfail_detail; } /* leaf of a killed GVT can have block header only. Skip those blocks */ if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz) break; nslevel--; rec_base = tblk_ptr + SIZEOF(blk_hdr); GET_RSIZ(rec_size1, rec_base); } /* leaf of a killed GVT can have block header only. Skip those blocks */ if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz) continue; /* get length of global variable name (do not read subscript) for dest_blk_id */ GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr)); /* key_len = length of 1st key value (including subscript) for dest_blk_id */ GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr)); if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len)) { /* Earlier used to restart here always. But dest_blk_id can be a block, * which is just killed and still marked busy. Skip it, if we are in last retry. */ if (CDB_STAGNATE <= t_tries) continue; else return cdb_sc_blkmod; } memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir); (TREF(gv_reorgkey))->base[key_len_dir] = 0; (TREF(gv_reorgkey))->end = key_len_dir; if (exclude_glist_ptr->next) { /* exclude blocks for globals in the list of EXCLUDE option */ if (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr)) continue; } save_targ = gv_target; if (INVALID_GV_TARGET != reset_gv_target) gbl_target_was_set = TRUE; else { gbl_target_was_set = FALSE; reset_gv_target = save_targ; } gv_target = reorg_gv_target; gv_target->root = cs_addrs->dir_tree->root; gv_target->clue.end = 0; /* assign Directory tree path to find dest_blk_id in dir_hist_ptr */ status = gvcst_search(TREF(gv_reorgkey), dir_hist_ptr); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); return status; } if (dir_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1) { /* may be in a kill_set of another process */ RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); continue; } for (wlevel = 0; wlevel <= dir_hist_ptr->depth && dir_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++); if (dir_hist_ptr->h[wlevel].blk_num == dest_blk_id) { /* do not swap a dir_tree block */ RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); continue; } /* gv_reorgkey will now have the first key from dest_blk_id, * or, from a descendant of dest_blk_id (in case it had a *-key only). */ memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len); (TREF(gv_reorgkey))->end = key_len - 1; GET_KEY_LEN(key_len_dir, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr)); /* Get root of GVT for dest_blk_id */ GET_LONG(gv_target->root, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir); if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1))) { assert(t_tries < CDB_STAGNATE); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); return cdb_sc_blkmod; } /* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */ gv_target->clue.end = 0; status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */ dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */ continue; if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1))) { assert(t_tries < CDB_STAGNATE); return (cdb_sc_normal != status ? status : cdb_sc_blkmod); } for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++) dest_hist_ptr->h[wlevel].tn = ctn; dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr; dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz; dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr; dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz; break; } /*===== End of infinite loop to find the destination block =====*/ /*----------------------------------------------------- Now modify blocks for swapping. Maximum of 4 blocks. -----------------------------------------------------*/ if (!blk_was_free) { /* 1: dest_blk_id into work_blk_id */ BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr)); if (!BLK_FINI (bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(gv_target->hist.h[level].blk_num == work_blk_id); assert(gv_target->hist.h[level].buffaddr == work_blk_ptr); t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN); } /* 2: work_blk_id into dest_blk_id */ if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* work_blk_id will be swapped with its child. * This is the only vertical swap. Here working block goes to its child. * Working block cannot goto its parent because of traversal */ if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, dest_parent_size, unsigned char); memcpy(saved_blk, dest_parent_ptr, dest_parent_size); first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset; GET_RSIZ(rec_size1, saved_blk + first_offset); if (work_blk_size < first_offset + rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } piece_len1 = first_offset + rec_size1; BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */ BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num); assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr); t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN); } else /* free block or, when working block does not move vertically (swap with parent/child) */ { BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, work_blk_size, unsigned char); memcpy(saved_blk, work_blk_ptr, work_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (blk_was_free) { tmpcse = &cw_set[cw_set_depth]; t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level); /* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode * from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here. */ assert(dest_blk_id == tmpcse->blk); tmpcse->mode = gds_t_acquired; /* If snapshots are in progress, we might want to read the before images of the FREE blocks also. * Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence * will not read the before images of the FREE blocks in t_end. To workaround this, set * cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of * the FREE blocks if needed. */ (BLK_FREE == x_blk_lmap) ? SET_FREE(tmpcse) : SET_NFREE(tmpcse); /* No need to write before-image in case the block is FREE. In case the database had never been fully * upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered * FREE (i.e. no need to write before-images since backward journal recovery will never be expected * to take the database to a point BEFORE the mupip upgrade). */ if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { /* Destination is a recycled block that needs a before image */ tmpcse->old_block = destblkhist.buffaddr; /* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */ tmpcse->cr = destblkhist.cr; tmpcse->cycle = destblkhist.cycle; jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { /* Compute CHECKSUM for writing PBLK record before getting crit. * It is possible that we are reading a block that is actually marked free in * the bitmap (due to concurrency issues at this point). Therefore we might be * actually reading uninitialized block headers and in turn a bad value of * "old_block->bsiz". Restart if we ever access a buffer whose size is greater * than the db block size. */ bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); return cdb_sc_lostbmlcr; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz); } } assert(GDSVCURR == tmpcse->ondsk_blkver); /* should have been set by t_create above */ } else { hist_ptr = &dest_hist_ptr->h[dest_blk_level]; assert(dest_blk_id == hist_ptr->blk_num); assert(dest_blk_ptr == hist_ptr->buffaddr); t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN); } } if (!blk_was_free) { /* 3: Parent of destination block (may be parent of working block too) */ if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* dest parent == work_blk parent */ BLK_INIT(bs_ptr, bs1); /* Interchange pointer to dest_blk_id and work_blk_id */ if (level != dest_blk_level || gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset) { first_offset = gv_target->hist.h[level+1].curr_rec.offset; second_offset = dest_hist_ptr->h[level+1].curr_rec.offset; } else { first_offset = dest_hist_ptr->h[level+1].curr_rec.offset; second_offset = gv_target->hist.h[level+1].curr_rec.offset; } GET_RSIZ(rec_size1, dest_parent_ptr + first_offset); GET_RSIZ(rec_size2, dest_parent_ptr + second_offset); if (dest_parent_size < first_offset + rec_size1 || dest_parent_size < second_offset + rec_size2 || BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } piece_len1 = first_offset + rec_size1 - SIZEOF(block_id); piece_len2 = second_offset + rec_size2 - SIZEOF(block_id); GET_LONG(child1, dest_parent_ptr + piece_len1); GET_LONG(child2, dest_parent_ptr + piece_len2); BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, child2); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1, second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, child1); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2, dest_parent_size - second_offset - rec_size2); if (!BLK_FINI(bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(level == dest_blk_level); assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr); t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* Destination block moved in the position of working block. * So destination block's parent's pointer should be changed to work_blk_id */ BLK_INIT(bs_ptr, bs1); GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset); if (dest_parent_size < rec_size1 + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, work_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1, dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset - rec_size1); if (!BLK_FINI(bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr); t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } } /* 4: Parent of working block, if different than destination's parent or, destination was a free block */ if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id */ GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset)); if (work_parent_size < rec_size1 + gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr), gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, dest_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1, work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr); t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } /* else already taken care of, when dest_blk_id moved */ if (blk_was_free) { /* A free/recycled block will become busy block. * So the local bitmap must be updated. * Local bit map block will be added in the list of update arrray for concurrency check and * also the cw_set element will be created to mark the free/recycled block as free. * kill_set_ptr will save the block which will become free. */ child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */ bmlhist.buffaddr = bmp_buff; bmlhist.blk_num = child1; child1 = dest_blk_id - child1; assert(child1); PUT_LONG(update_array_ptr, child1); /* Need to put bit maps on the end of the cw set for concurrency checking. * We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth". * Hence the save and restore logic (for "cw_set_depth") below. */ save_cw_set_depth = cw_set_depth; assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1); /* will increment cw_set_depth */ cw_map_depth = cw_set_depth; /* set cw_map_depth to the latest cw_set_depth */ cw_set_depth = save_cw_set_depth; /* restore cw_set_depth */ /* t_write_map simulation end */ update_array_ptr += SIZEOF(block_id); child1 = 0; PUT_LONG(update_array_ptr, child1); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 1 free block is now becoming BLK_USED in the bitmap */ /* working block will be removed */ kill_set_ptr->blk[kill_set_ptr->used].flag = 0; kill_set_ptr->blk[kill_set_ptr->used].level = 0; kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id; } *pdest_blk_id = dest_blk_id; return cdb_sc_normal; }
void dse_chng_bhead(void) { block_id blk; int4 x; trans_num tn; cache_rec_ptr_t cr; blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ boolean_t ismap; boolean_t chng_blk; boolean_t was_crit; boolean_t was_hold_onto_crit; uint4 mapsize; srch_blk_status blkhist; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; # ifdef GTM_CRYPT int req_enc_blk_size; int crypt_status; blk_hdr_ptr_t bp, save_bp, save_old_block; # endif error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ chng_blk = FALSE; csa = cs_addrs; if (cli_present("BLOCK") == CLI_PRESENT) { if (!cli_get_hex("BLOCK", (uint4 *)&blk)) return; if (blk < 0 || blk > csa->ti->total_blks) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } csd = csa->hdr; assert(csd == cs_data); blk_size = csd->blk_size; ismap = (patch_curr_blk / csd->bplmap * csd->bplmap == patch_curr_blk); mapsize = BM_SIZE(csd->bplmap); t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = patch_curr_blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr; if (cli_present("LEVEL") == CLI_PRESENT) { if (!cli_get_hex("LEVEL", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.", TRUE); t_abort(gv_cur_region, csa); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.", TRUE); t_abort(gv_cur_region, csa); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (cli_present("BSIZ") == CLI_PRESENT) { if (!cli_get_hex("BSIZ", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } else if (x < SIZEOF(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, csa); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad block build.", TRUE); t_abort(gv_cur_region, csa); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, csa->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } if (cli_present("TN") == CLI_PRESENT) { if (!cli_get_hex64("TN", &tn)) return; was_crit = csa->now_crit; t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(csa->ti->early_tn == csa->ti->curr_tn); if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) { util_out_print("Error: Unable to read buffer.", TRUE); t_abort(gv_cur_region, csa); return; } if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); /* Pass the desired tn as argument to bg_update/mm_update below */ BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, tn); was_hold_onto_crit = csa->hold_onto_crit; csa->hold_onto_crit = TRUE; t_end(&dummy_hist, NULL, tn); # ifdef GTM_CRYPT if (csd->is_encrypted && (tn < csa->ti->curr_tn)) { /* BG and db encryption is enabled and the DSE update caused the block-header to potentially have a tn * that is LESS than what it had before. At this point, the global buffer (corresponding to blkhist.blk_num) * reflects the contents of the block AFTER the dse update (bg_update would have touched this) whereas * the corresponding encryption global buffer reflects the contents of the block BEFORE the update. * Normally wcs_wtstart takes care of propagating the tn update from the regular global buffer to the * corresponding encryption buffer. But if before it gets a chance, let us say a process goes to t_end * as part of a subsequent transaction and updates this same block. Since the blk-hdr-tn potentially * decreased, it is possible that the PBLK writing check (comparing blk-hdr-tn with the epoch_tn) decides * to write a PBLK for this block (even though a PBLK was already written for this block as part of a * previous DSE CHANGE -BL -TN in the same epoch). In this case, since the db is encrypted, the logic * will assume there were no updates to this block since the last time wcs_wtstart updated the encryption * buffer and therefore use that to write the pblk, which is incorrect since it does not yet contain the * tn update. The consequence of this is would be writing an older before-image PBLK) record to the * journal file. To prevent this situation, we update the encryption buffer here (before releasing crit) * using logic like that in wcs_wtstart to ensure it is in sync with the regular global buffer. * Note: * Although we use cw_set[0] to access the global buffer corresponding to the block number being updated, * cw_set_depth at this point is 0 because t_end resets it. This is considered safe since cw_set is a * static array (as opposed to malloc'ed memory) and hence is always available and valid until it gets * overwritten by subsequent updates. */ bp = (blk_hdr_ptr_t)GDS_ANY_REL2ABS(csa, cw_set[0].cr->buffaddr); DBG_ENSURE_PTR_IS_VALID_GLOBUFF(csa, csd, (sm_uc_ptr_t)bp); save_bp = (blk_hdr_ptr_t)GDS_ANY_ENCRYPTGLOBUF(bp, csa); DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(csa, csd, (sm_uc_ptr_t)save_bp); assert((bp->bsiz <= csd->blk_size) && (bp->bsiz >= SIZEOF(*bp))); req_enc_blk_size = MIN(csd->blk_size, bp->bsiz) - SIZEOF(*bp); if (BLK_NEEDS_ENCRYPTION(bp->levl, req_enc_blk_size)) { ASSERT_ENCRYPTION_INITIALIZED; memcpy(save_bp, bp, SIZEOF(blk_hdr)); GTMCRYPT_ENCODE_FAST(csa->encr_key_handle, (char *)(bp + 1), req_enc_blk_size, (char *)(save_bp + 1), crypt_status); if (0 != crypt_status) GC_GTM_PUTMSG(crypt_status, gv_cur_region->dyn.addr->fname); } else memcpy(save_bp, bp, bp->bsiz); } # endif if (!was_hold_onto_crit) csa->hold_onto_crit = FALSE; if (!was_crit) rel_crit(gv_cur_region); if (unhandled_stale_timer_pop) process_deferred_stale(); } return; }
void mu_reorg_upgrd_dwngrd(void) { blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; block_id *blkid_ptr, curblk, curbmp, start_blk, stop_blk, start_bmp, last_bmp; block_id startblk_input, stopblk_input; boolean_t upgrade, downgrade, safejnl, nosafejnl, region, first_reorg_in_this_db_fmt, reorg_entiredb; boolean_t startblk_specified, stopblk_specified, set_fully_upgraded, db_got_to_v5_once, mark_blk_free; cache_rec_ptr_t cr; char *bml_lcl_buff = NULL, *command, *reorg_command; sm_uc_ptr_t bptr = NULL; cw_set_element *cse; enum cdb_sc cdb_status; enum db_ver new_db_format, ondsk_blkver; gd_region *reg; int cycle; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ int4 blocks_left, expected_blks2upgrd, actual_blks2upgrd, total_blks, free_blks; int4 status, status1, mapsize, lcnt, bml_status; reorg_stats_t reorg_stats; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; sm_uc_ptr_t blkBase, bml_sm_buff; /* shared memory pointer to the bitmap global buffer */ srch_hist alt_hist; srch_blk_status *blkhist, bmlhist; tp_region *rptr; trans_num curr_tn; unsigned char save_cw_set_depth; uint4 lcl_update_trans; region = (CLI_PRESENT == cli_present("REGION")); upgrade = (CLI_PRESENT == cli_present("UPGRADE")); downgrade = (CLI_PRESENT == cli_present("DOWNGRADE")); assert(upgrade && !downgrade || !upgrade && downgrade); command = upgrade ? "UPGRADE" : "DOWNGRADE"; reorg_command = upgrade ? "MUPIP REORG UPGRADE" : "MUPIP REORG DOWNGRADE"; reorg_entiredb = TRUE; /* unless STARTBLK or STOPBLK is specified we are going to {up,down}grade the entire database */ startblk_specified = FALSE; assert(SIZEOF(block_id) == SIZEOF(uint4)); if ((CLI_PRESENT == cli_present("STARTBLK")) && (cli_get_hex("STARTBLK", (uint4 *)&startblk_input))) { reorg_entiredb = FALSE; startblk_specified = TRUE; } stopblk_specified = FALSE; assert(SIZEOF(block_id) == SIZEOF(uint4)); if ((CLI_PRESENT == cli_present("STOPBLK")) && (cli_get_hex("STOPBLK", (uint4 *)&stopblk_input))) { reorg_entiredb = FALSE; stopblk_specified = TRUE; } mu_reorg_upgrd_dwngrd_in_prog = TRUE; mu_reorg_nosafejnl = (CLI_NEGATED == cli_present("SAFEJNL")) ? TRUE : FALSE; assert(region); status = SS_NORMAL; error_mupip = FALSE; gvinit(); /* initialize gd_header (needed by the later call to mu_getlst) */ mu_getlst("REG_NAME", SIZEOF(tp_region)); /* get the parameter corresponding to REGION qualifier */ if (error_mupip) { util_out_print("!/MUPIP REORG !AD cannot proceed with above errors!/", TRUE, LEN_AND_STR(command)); mupip_exit(ERR_MUNOACTION); } assert(DBKEYSIZE(MAX_KEY_SZ) == gv_keysize); /* no need to invoke GVKEYSIZE_INIT_IF_NEEDED macro */ gv_target = targ_alloc(gv_keysize, NULL, NULL); /* t_begin needs this initialized */ gv_target_list = NULL; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ blkhist = &alt_hist.h[0]; for (rptr = grlist; NULL != rptr; rptr = rptr->fPtr) { if (mu_ctrly_occurred || mu_ctrlc_occurred) break; reg = rptr->reg; util_out_print("!/Region !AD : MUPIP REORG !AD started", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); if (reg_cmcheck(reg)) { util_out_print("Region !AD : MUPIP REORG !AD cannot run across network", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = ERR_MUNOFINISH; continue; } mu_reorg_process = TRUE; /* gvcst_init will use this value to use gtm_poollimit settings. */ gvcst_init(reg); mu_reorg_process = FALSE; assert(update_array != NULL); /* access method stored in global directory and database file header might be different in which case * the database setting prevails. therefore, the access method check can be done only after opening * the database (i.e. after the gvcst_init) */ if (dba_bg != REG_ACC_METH(reg)) { util_out_print("Region !AD : MUPIP REORG !AD cannot continue as access method is not BG", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = ERR_MUNOFINISH; continue; } /* The mu_getlst call above uses insert_region to create the grlist, which ensures that duplicate regions mapping to * the same db file correspond to only one grlist entry. */ assert(FALSE == reg->was_open); TP_CHANGE_REG(reg); /* sets gv_cur_region, cs_addrs, cs_data */ csa = cs_addrs; csd = cs_data; blk_size = csd->blk_size; /* "blk_size" is used by the BLK_FINI macro */ if (reg->read_only) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg)); status = ERR_MUNOFINISH; continue; } assert(GDSVCURR == GDSV6); /* so we trip this assert in case GDSVCURR changes without a change to this module */ new_db_format = (upgrade ? GDSV6 : GDSV4); grab_crit(reg); curr_tn = csd->trans_hist.curr_tn; /* set the desired db format in the file header to the appropriate version, increment transaction number */ status1 = desired_db_format_set(reg, new_db_format, reorg_command); assert(csa->now_crit); /* desired_db_format_set() should not have released crit */ first_reorg_in_this_db_fmt = TRUE; /* with the current desired_db_format, this is the first reorg */ if (SS_NORMAL != status1) { /* "desired_db_format_set" would have printed appropriate error messages */ if (ERR_MUNOACTION != status1) { /* real error occurred while setting the db format. skip to next region */ status = ERR_MUNOFINISH; rel_crit(reg); continue; } util_out_print("Region !AD : Desired DB Format remains at !AD after !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command)); if (csd->reorg_db_fmt_start_tn == csd->desired_db_format_tn) first_reorg_in_this_db_fmt = FALSE; } else util_out_print("Region !AD : Desired DB Format set to !AD by !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command)); assert(dba_bg == csd->acc_meth); /* Check blks_to_upgrd counter to see if upgrade/downgrade is complete */ total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; actual_blks2upgrd = csd->blks_to_upgrd; /* If MUPIP REORG UPGRADE and there is no block to upgrade in the database as indicated by BOTH * "csd->blks_to_upgrd" and "csd->fully_upgraded", then we can skip processing. * If MUPIP REORG UPGRADE and all non-free blocks need to be upgraded then again we can skip processing. */ if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded) || (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd))) { util_out_print("Region !AD : Blocks to Upgrade counter indicates no action needed for MUPIP REORG !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : " "Blocks to upgrade = [0x!XL]", TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd); util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); rel_crit(reg); continue; } stop_blk = total_blks; if (stopblk_specified && stopblk_input <= stop_blk) stop_blk = stopblk_input; if (first_reorg_in_this_db_fmt) { /* Note down reorg start tn (in case we are interrupted, future reorg will know to resume) */ csd->reorg_db_fmt_start_tn = csd->desired_db_format_tn; csd->reorg_upgrd_dwngrd_restart_block = 0; start_blk = (startblk_specified ? startblk_input : 0); } else { /* Either a concurrent MUPIP REORG of the same type ({up,down}grade) is currently running * or a previously running REORG of the same type was interrupted (Ctrl-Ced). * In either case resume processing from whatever restart block number is stored in fileheader * the only exception is if "STARTBLK" was specified in the input in which use that unconditionally. */ start_blk = (startblk_specified ? startblk_input : csd->reorg_upgrd_dwngrd_restart_block); } if (start_blk > stop_blk) start_blk = stop_blk; mu_reorg_upgrd_dwngrd_start_tn = csd->reorg_db_fmt_start_tn; /* Before releasing crit, flush the file-header and dirty buffers in cache to disk. This is because we are now * going to read each GDS block directly from disk to determine if it needs to be upgraded/downgraded or not. */ if (!wcs_flu(WCSFLU_FLUSH_HDR)) /* wcs_flu assumes gv_cur_region is set (which it is in this routine) */ { rel_crit(reg); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg)); status = ERR_MUNOFINISH; continue; } rel_crit(reg); /* Loop through entire database one GDS block at a time and upgrade/downgrade each of them */ status1 = SS_NORMAL; start_bmp = ROUND_DOWN2(start_blk, BLKS_PER_LMAP); last_bmp = ROUND_DOWN2(stop_blk - 1, BLKS_PER_LMAP); curblk = start_blk; /* curblk is the block to be upgraded/downgraded */ util_out_print("Region !AD : Started processing from block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk); if (NULL != bptr) { /* malloc/free "bptr" for each region as GDS block-size can be different */ free(bptr); bptr = NULL; } memset(&reorg_stats, 0, SIZEOF(reorg_stats)); /* initialize statistics for this region */ for (curbmp = start_bmp; curbmp <= last_bmp; curbmp += BLKS_PER_LMAP) { if (mu_ctrly_occurred || mu_ctrlc_occurred) { status1 = ERR_MUNOFINISH; break; } /* -------------------------------------------------------------- * Read in current bitmap block * -------------------------------------------------------------- */ assert(!csa->now_crit); bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* bring block into the cache outside of crit */ reorg_stats.blks_read_from_disk_bmp++; grab_crit_encr_cycle_sync(reg); /* needed so t_qread does not return NULL below */ if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn) { /* csd->desired_db_format changed since reorg started. discontinue the reorg */ /* see later comment on "csd->reorg_upgrd_dwngrd_restart_block" for why the assignment * of this field should be done only if a db format change did not occur. */ rel_crit(reg); status1 = ERR_MUNOFINISH; /* This "start_tn" check is redone after the for-loop and an error message is printed there */ break; } else if (reorg_entiredb) { /* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */ assert(csd->reorg_upgrd_dwngrd_restart_block <= MAX(start_blk, curbmp)); csd->reorg_upgrd_dwngrd_restart_block = curbmp; /* previous blocks have been upgraded/downgraded */ } /* Check blks_to_upgrd counter to see if upgrade/downgrade is complete. * Repeat check done a few steps earlier outside of this for loop. */ total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; actual_blks2upgrd = csd->blks_to_upgrd; if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded) || (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd))) { rel_crit(reg); break; } bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* now that in crit, note down stable buffer */ if (NULL == bml_sm_buff) rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); ondsk_blkver = cr->ondsk_blkver; /* note down db fmt on disk for bitmap block */ /* Take a copy of the shared memory bitmap buffer into process-private memory before releasing crit. * We are interested in those blocks that are currently marked as USED in the bitmap. * It is possible that once we release crit, concurrent updates change the bitmap state of those blocks. * In that case, those updates will take care of doing the upgrade/downgrade of those blocks in the * format currently set in csd->desired_db_format i.e. accomplishing MUPIP REORG UPGRADE/DOWNGRADE's job. * If the desired_db_format changes concurrently, we will stop doing REORG UPGRADE/DOWNGRADE processing. */ if (NULL == bml_lcl_buff) bml_lcl_buff = malloc(BM_SIZE(BLKS_PER_LMAP)); memcpy(bml_lcl_buff, (blk_hdr_ptr_t)bml_sm_buff, BM_SIZE(BLKS_PER_LMAP)); if (FALSE == cert_blk(reg, curbmp, (blk_hdr_ptr_t)bml_lcl_buff, 0, FALSE)) { /* certify the block while holding crit as cert_blk uses fields from file-header (shared memory) */ assert(FALSE); /* in pro, skip ugprading/downgarding all blks in this unreliable local bitmap */ rel_crit(reg); util_out_print("Region !AD : Bitmap Block [0x!XL] has integrity errors. Skipping this bitmap.", TRUE, REG_LEN_STR(reg), curbmp); status1 = ERR_MUNOFINISH; continue; } rel_crit(reg); /* ------------------------------------------------------------------------ * Upgrade/Downgrade all BUSY blocks in the current bitmap * ------------------------------------------------------------------------ */ curblk = (curbmp == start_bmp) ? start_blk : curbmp; mapsize = (curbmp == last_bmp) ? (stop_blk - curbmp) : BLKS_PER_LMAP; assert(0 != mapsize); assert(mapsize <= BLKS_PER_LMAP); db_got_to_v5_once = csd->db_got_to_v5_once; for (lcnt = curblk - curbmp; lcnt < mapsize; lcnt++, curblk++) { if (mu_ctrly_occurred || mu_ctrlc_occurred) { status1 = ERR_MUNOFINISH; goto stop_reorg_on_this_reg; /* goto needed because of nested FOR Loop */ } GET_BM_STATUS(bml_lcl_buff, lcnt, bml_status); assert(BLK_MAPINVALID != bml_status); /* cert_blk ran clean so we dont expect invalid entries */ if (BLK_FREE == bml_status) { reorg_stats.blks_skipped_free++; continue; } /* MUPIP REORG UPGRADE/DOWNGRADE will convert USED & RECYCLED blocks */ if (db_got_to_v5_once || (BLK_RECYCLED != bml_status)) { /* Do NOT read recycled V4 block from disk unless it is guaranteed NOT to be too full */ if (lcnt) { /* non-bitmap block */ /* read in block from disk into private buffer. dont pollute the cache yet */ if (NULL == bptr) bptr = (sm_uc_ptr_t)malloc(blk_size); status1 = dsk_read(curblk, bptr, &ondsk_blkver, FALSE); /* dsk_read on curblk could return an error (DYNUPGRDFAIL) if curblk needs to be * upgraded and if its block size was too big to allow the extra block-header space * requirements for a dynamic upgrade. a MUPIP REORG DOWNGRADE should not error out * in that case as the block is already in the downgraded format. */ if (SS_NORMAL != status1) { if (!upgrade && (ERR_DYNUPGRDFAIL == status1)) { assert(GDSV4 == new_db_format); ondsk_blkver = new_db_format; } else { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status1); util_out_print("Region !AD : Error occurred while reading block " "[0x!XL]", TRUE, REG_LEN_STR(reg), curblk); status1 = ERR_MUNOFINISH; goto stop_reorg_on_this_reg;/* goto needed due to nested FOR Loop */ } } reorg_stats.blks_read_from_disk_nonbmp++; } /* else bitmap block has been read in crit earlier and ondsk_blkver appropriately set */ if (new_db_format == ondsk_blkver) { assert((SS_NORMAL == status1) || (!upgrade && (ERR_DYNUPGRDFAIL == status1))); status1 = SS_NORMAL; /* treat DYNUPGRDFAIL as no error in case of downgrade */ reorg_stats.blks_skipped_newfmtindisk++; continue; /* current disk version is identical to what is desired */ } assert(SS_NORMAL == status1); } /* Begin non-TP transaction to upgrade/downgrade the block. * The way we do that is by updating the block using a null update array. * Any update to a block will trigger an automatic upgrade/downgrade of the block based on * the current fileheader desired_db_format setting and we use that here. */ t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); for (; ;) { CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ curr_tn = csd->trans_hist.curr_tn; db_got_to_v5_once = csd->db_got_to_v5_once; if (db_got_to_v5_once || (BLK_RECYCLED != bml_status)) { blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ blkBase = t_qread(curblk, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); if (NULL == blkBase) { t_retry((enum cdb_sc)rdfail_detail); continue; } blkhist->blk_num = curblk; blkhist->buffaddr = blkBase; ondsk_blkver = blkhist->cr->ondsk_blkver; new_hdr = *(blk_hdr_ptr_t)blkBase; mu_reorg_upgrd_dwngrd_blktn = new_hdr.tn; mark_blk_free = FALSE; inctn_opcode = upgrade ? inctn_blkupgrd : inctn_blkdwngrd; } else { mark_blk_free = TRUE; inctn_opcode = inctn_blkmarkfree; } inctn_detail.blknum_struct.blknum = curblk; /* t_end assumes that the history it is passed does not contain a bitmap block. * for bitmap block, the history validation information is passed through cse instead. * therefore we need to handle bitmap and non-bitmap cases separately. */ if (!lcnt) { /* Means a bitmap block. * At this point we can do a "new_db_format != ondsk_blkver" check to determine * if the block got converted since we did the dsk_read (see the non-bitmap case * for a similar check done there), but in that case we will have a transaction * which has read 1 bitmap block and is updating no block. "t_end" currently cannot * handle this case as it expects any bitmap block that needs validation to also * have a corresponding cse which will hold its history. Hence we avoid doing the * new_db_format check. The only disadvantage of this is that we will end up * modifying the bitmap block as part of this transaction (in an attempt to convert * its ondsk_blkver) even though it is already in the right format. Since this * overhead is going to be one per bitmap block and since the block is in the cache * at this point, we should not lose much. */ assert(!mark_blk_free); BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); assert(&alt_hist.h[0] == blkhist); alt_hist.h[0].blk_num = 0; /* create empty history for bitmap block */ assert(update_trans); } else { /* non-bitmap block. fill in history for validation in t_end */ assert(curblk); /* we should never come here for block 0 (bitmap) */ if (!mark_blk_free) { assert(blkhist->blk_num == curblk); assert(blkhist->buffaddr == blkBase); blkhist->tn = curr_tn; alt_hist.h[1].blk_num = 0; } /* Also need to pass the bitmap as history to detect if any concurrent M-kill * is freeing up the same USED block that we are trying to convert OR if any * concurrent M-set is reusing the same RECYCLED block that we are trying to * convert. Because of t_end currently not being able to validate a bitmap * without that simultaneously having a cse, we need to create a cse for the * bitmap that is used only for bitmap history validation, but should not be * used to update the contents of the bitmap block in bg_update. */ bmlhist.buffaddr = t_qread(curbmp, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr); if (NULL == bmlhist.buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } bmlhist.blk_num = curbmp; bmlhist.tn = curr_tn; GET_BM_STATUS(bmlhist.buffaddr, lcnt, bml_status); if (BLK_MAPINVALID == bml_status) { t_retry(cdb_sc_lostbmlcr); continue; } if (!mark_blk_free) { if ((new_db_format != ondsk_blkver) && (BLK_FREE != bml_status)) { /* block still needs to be converted. create cse */ BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkBase + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkBase)->levl, FALSE, FALSE, GDS_WRITE_PLAIN); /* The directory tree status for now is only used to determine * whether writing the block to snapshot file (see t_end_sysops.c). * For reorg upgrade/downgrade process, the block is updated in a * sequential way without changing the gv_target. In this case, we * assume the block is in directory tree so as to have it written to * the snapshot file. */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* reset update_trans in case previous retry had set it to 0 */ update_trans = UPDTRNS_DB_UPDATED_MASK; if (BLK_RECYCLED == bml_status) { /* If block that we are upgarding is RECYCLED, indicate to * bg_update that blks_to_upgrd counter should NOT be * touched in this case by setting "mode" to a special value */ assert(cw_set[cw_set_depth-1].mode == gds_t_write); cw_set[cw_set_depth-1].mode = gds_t_write_recycled; /* we SET block as NOT RECYCLED, otherwise, the mm_update() * or bg_update_phase2 may skip writing it to snapshot file * when its level is 0 */ BIT_CLEAR_RECYCLED(cw_set[cw_set_depth-1].blk_prior_state); } } else { /* Block got converted by another process since we did the dsk_read. * or this block became marked free in the bitmap. * No need to update this block. just call t_end for validation of * both the non-bitmap block as well as the bitmap block. * Note down that this transaction is no longer updating any blocks. */ update_trans = 0; } /* Need to put bit maps on the end of the cw set for concurrency checking. * We want to simulate t_write_map, except we want to update "cw_map_depth" * instead of "cw_set_depth". Hence the save and restore logic below. * This part of the code is similar to the one in mu_swap_blk.c */ save_cw_set_depth = cw_set_depth; assert(!cw_map_depth); t_write_map(&bmlhist, NULL, curr_tn, 0); /* will increment cw_set_depth */ cw_map_depth = cw_set_depth; /* set cw_map_depth to latest cw_set_depth */ cw_set_depth = save_cw_set_depth;/* restore cw_set_depth */ /* t_write_map simulation end */ } else { if (BLK_RECYCLED != bml_status) { /* Block was RECYCLED at beginning but no longer so. Retry */ t_retry(cdb_sc_bmlmod); continue; } /* Mark recycled block as FREE in bitmap */ assert(lcnt == (curblk - curbmp)); assert(update_array_ptr == update_array); *((block_id *)update_array_ptr) = lcnt; update_array_ptr += SIZEOF(block_id); /* the following assumes SIZEOF(block_id) == SIZEOF(int) */ assert(SIZEOF(block_id) == SIZEOF(int)); *(int *)update_array_ptr = 0; t_write_map(&bmlhist, (unsigned char *)update_array, curr_tn, 0); update_trans = UPDTRNS_DB_UPDATED_MASK; } } assert(SIZEOF(lcl_update_trans) == SIZEOF(update_trans)); lcl_update_trans = update_trans; /* take a copy before t_end modifies it */ if ((trans_num)0 != t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) { /* In case this is MM and t_end() remapped an extended database, reset csd */ assert(csd == cs_data); if (!lcl_update_trans) { assert(lcnt); assert(!mark_blk_free); assert((new_db_format == ondsk_blkver) || (BLK_BUSY != bml_status)); if (BLK_BUSY != bml_status) reorg_stats.blks_skipped_free++; else reorg_stats.blks_skipped_newfmtincache++; } else if (!lcnt) reorg_stats.blks_converted_bmp++; else reorg_stats.blks_converted_nonbmp++; break; } assert(csd == cs_data); } } } stop_reorg_on_this_reg: /* even though ctrl-c occurred, update file-header fields to store reorg's progress before exiting */ grab_crit(reg); blocks_left = 0; assert(csd->trans_hist.total_blks >= csd->blks_to_upgrd); actual_blks2upgrd = csd->blks_to_upgrd; total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; /* Care should be taken not to set "csd->reorg_upgrd_dwngrd_restart_block" in case of a concurrent db fmt * change. This is because let us say we are doing REORG UPGRADE. A concurrent REORG DOWNGRADE would * have reset "csd->reorg_upgrd_dwngrd_restart_block" field to 0 and if that reorg is interrupted by a * Ctrl-C (before this reorg came here) it would have updated "csd->reorg_upgrd_dwngrd_restart_block" to * a non-zero value indicating how many blocks from 0 have been downgraded. We should not reset this * field to "curblk" as it will be mis-interpreted as the number of blocks that have been DOWNgraded. */ set_fully_upgraded = FALSE; if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn) { /* csd->desired_db_format changed since reorg started. discontinue the reorg */ util_out_print("Region !AD : Desired DB Format changed during REORG. Stopping REORG.", TRUE, REG_LEN_STR(reg)); status1 = ERR_MUNOFINISH; } else if (reorg_entiredb) { /* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */ assert(csd->reorg_upgrd_dwngrd_restart_block <= curblk); csd->reorg_upgrd_dwngrd_restart_block = curblk; /* blocks lesser than this have been upgraded/downgraded */ expected_blks2upgrd = upgrade ? 0 : (total_blks - free_blks); blocks_left = upgrade ? actual_blks2upgrd : (expected_blks2upgrd - actual_blks2upgrd); /* If this reorg command went through all blocks in the database, then it should have * correctly concluded at this point whether the reorg is complete or not. * If this reorg command started from where a previous incomplete reorg left * (i.e. first_reorg_in_this_db_fmt is FALSE), it cannot determine if the initial * GDS blocks that it skipped are completely {up,down}graded or not. */ assert((0 == blocks_left) || (SS_NORMAL != status1) || !first_reorg_in_this_db_fmt); /* If this is a MUPIP REORG UPGRADE that did go through every block in the database (indicated by * "reorg_entiredb" && "first_reorg_in_this_db_fmt") and the current count of "blks_to_upgrd" is * 0 in the file-header and the desired_db_format did not change since the start of the REORG, * we can be sure that the entire database has been upgraded. Set "csd->fully_upgraded" to TRUE. */ if ((SS_NORMAL == status1) && first_reorg_in_this_db_fmt && upgrade && (0 == actual_blks2upgrd)) { csd->fully_upgraded = TRUE; csd->db_got_to_v5_once = TRUE; set_fully_upgraded = TRUE; } /* flush all changes noted down in the file-header */ if (!wcs_flu(WCSFLU_FLUSH_HDR)) /* wcs_flu assumes gv_cur_region is set (which it is in this routine) */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg)); status = ERR_MUNOFINISH; rel_crit(reg); continue; } } curr_tn = csd->trans_hist.curr_tn; rel_crit(reg); util_out_print("Region !AD : Stopped processing at block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk); /* Print statistics */ util_out_print("Region !AD : Statistics : Blocks Read From Disk (Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_bmp); util_out_print("Region !AD : Statistics : Blocks Skipped (Free) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_free); util_out_print("Region !AD : Statistics : Blocks Read From Disk (Non-Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_nonbmp); util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in disk) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtindisk); util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in cache) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtincache); util_out_print("Region !AD : Statistics : Blocks Converted (Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_bmp); util_out_print("Region !AD : Statistics : Blocks Converted (Non-Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_nonbmp); if (reorg_entiredb && (SS_NORMAL == status1) && (0 != blocks_left)) { /* file-header counter does not match what reorg on the entire database expected to see */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBBTUWRNG, 2, expected_blks2upgrd, actual_blks2upgrd); util_out_print("Region !AD : Run MUPIP INTEG (without FAST qualifier) to fix the counter", TRUE, REG_LEN_STR(reg)); status1 = ERR_MUNOFINISH; } else util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : " "Blocks to upgrade = [0x!XL]", TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd); /* Issue success or failure message for this region */ if (SS_NORMAL == status1) { /* issue success only if REORG did not encounter any error in its processing */ if (set_fully_upgraded) util_out_print("Region !AD : Database is now FULLY UPGRADED", TRUE, REG_LEN_STR(reg)); util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUREUPDWNGRDEND, 5, REG_LEN_STR(reg), process_id, process_id, &curr_tn); } else { assert(ERR_MUNOFINISH == status1); assert((SS_NORMAL == status) || (ERR_MUNOFINISH == status)); util_out_print("Region !AD : MUPIP REORG !AD incomplete. See above messages.!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = status1; } } if (NULL != bptr) free(bptr); if (NULL != bml_lcl_buff) free(bml_lcl_buff); if (mu_ctrly_occurred || mu_ctrlc_occurred) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REORGCTRLY); status = ERR_MUNOFINISH; } mupip_exit(status); }
void dse_rmrec(void) { block_id blk; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size, count; sm_uc_ptr_t bp; uchar_ptr_t lbp, b_top, rp, r_top, key_top, rp_base; char cc, comp_key[256], cc_base; short int size, i, rsize; cw_set_element *cse; error_def(ERR_DSEFAIL); error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); assert(update_array); /* reset new block mechanism */ update_array_ptr = update_array; if (cli_present("BLOCK") == CLI_PRESENT) { if(!cli_get_hex("BLOCK", &blk)) return; if (blk < 0 || blk >= cs_addrs->ti->total_blks || !(blk % cs_addrs->hdr->bplmap)) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } if (cli_present("COUNT") == CLI_PRESENT) { if (!cli_get_hex("COUNT", &count) || count < 1) return; } else count = 1; t_begin_crit(ERR_DSEFAIL); blk_size = cs_addrs->hdr->blk_size; if(!(bp = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); lbp = (uchar_ptr_t)malloc(blk_size); memcpy(lbp, bp, blk_size); if (((blk_hdr_ptr_t)lbp)->bsiz > cs_addrs->hdr->blk_size) b_top = lbp + cs_addrs->hdr->blk_size; else if (((blk_hdr_ptr_t)lbp)->bsiz < sizeof(blk_hdr)) b_top = lbp + sizeof(blk_hdr); else b_top = lbp + ((blk_hdr_ptr_t)lbp)->bsiz; if (cli_present("RECORD") == CLI_PRESENT) { if (!(rp = rp_base = skan_rnum(lbp, FALSE))) { free(lbp); t_abort(gv_cur_region, cs_addrs); return; } } else if (!(rp = rp_base = skan_offset(lbp, FALSE))) { free(lbp); t_abort(gv_cur_region, cs_addrs); return; } memcpy(&comp_key[0], &patch_comp_key[0], sizeof(patch_comp_key)); cc_base = patch_comp_count; for ( ; ; ) { GET_SHORT(rsize, &((rec_hdr_ptr_t)rp)->rsiz); if (rsize < sizeof(rec_hdr)) r_top = rp + sizeof(rec_hdr); else r_top = rp + rsize; if (r_top >= b_top) { if (count) { if (((blk_hdr_ptr_t) lbp)->levl) util_out_print("Warning: removed a star record from the end of this block.", TRUE); ((blk_hdr_ptr_t)lbp)->bsiz = rp_base - lbp; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (uchar_ptr_t)lbp + sizeof(blk_hdr), (int)((blk_hdr_ptr_t)lbp)->bsiz - sizeof(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad blk build.",TRUE); free(lbp); t_abort(gv_cur_region, cs_addrs); return; } t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, ((blk_hdr_ptr_t)lbp)->levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); free(lbp); return; } r_top = b_top; } if (((blk_hdr_ptr_t)lbp)->levl) key_top = r_top - sizeof(block_id); else { for (key_top = rp + sizeof(rec_hdr); key_top < r_top; ) if (!*key_top++ && !*key_top++) break; } if (((rec_hdr_ptr_t)rp)->cmpc > patch_comp_count) cc = patch_comp_count; else cc = ((rec_hdr_ptr_t)rp)->cmpc; size = key_top - rp - sizeof(rec_hdr); if (size < 0) size = 0; else if (size > sizeof(patch_comp_key) - 2) size = sizeof(patch_comp_key) - 2; memcpy(&patch_comp_key[cc], rp + sizeof(rec_hdr), size); patch_comp_count = cc + size; if (--count >= 0) { rp = r_top; continue; } size = (patch_comp_count < cc_base) ? patch_comp_count : cc_base; for (i = 0; i < size && patch_comp_key[i] == comp_key[i]; i++) ; ((rec_hdr_ptr_t)rp_base)->cmpc = i; rsize = r_top - key_top + sizeof(rec_hdr) + patch_comp_count - i; PUT_SHORT(&((rec_hdr_ptr_t)rp_base)->rsiz, rsize); memcpy(rp_base + sizeof(rec_hdr), &patch_comp_key[i], patch_comp_count - i); memcpy(rp_base + sizeof(rec_hdr) + patch_comp_count - i, key_top, b_top - key_top); ((blk_hdr_ptr_t)lbp)->bsiz = rp_base + rsize - lbp + b_top - r_top; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (uchar_ptr_t)lbp + sizeof(blk_hdr), ((blk_hdr_ptr_t)lbp)->bsiz - sizeof(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad blk build.", TRUE); free(lbp); t_abort(gv_cur_region, cs_addrs); return; } t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, ((blk_hdr_ptr_t)lbp)->levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); free(lbp); return; } }
void dse_chng_bhead(void) { block_id blk; block_id *blkid_ptr; sgm_info *dummysi = NULL; int4 x; cache_rec_ptr_t cr; uchar_ptr_t bp; sm_uc_ptr_t blkBase; blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; cw_set_element *cse; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ bool ismap; bool chng_blk; uint4 mapsize; uint4 jnl_status; error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); assert(update_array); /* reset new block mechanism */ update_array_ptr = update_array; chng_blk = FALSE; if (cli_present("BLOCK") == CLI_PRESENT) { if (!cli_get_hex("BLOCK",&blk)) return; if (blk < 0 || blk > cs_addrs->ti->total_blks) { util_out_print("Error: invalid block number.",TRUE); return; } patch_curr_blk = blk; } blk_size = cs_addrs->hdr->blk_size; ismap = (patch_curr_blk / cs_addrs->hdr->bplmap * cs_addrs->hdr->bplmap == patch_curr_blk); mapsize = BM_SIZE(cs_addrs->hdr->bplmap); t_begin_crit (ERR_DSEFAIL); if (!(bp = t_qread (patch_curr_blk,&dummy_hist.h[0].cycle,&dummy_hist.h[0].cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)bp; if (cli_present("LEVEL") == CLI_PRESENT) { if (!cli_get_num("LEVEL",&x)) { t_abort(gv_cur_region, cs_addrs); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < sizeof(blk_hdr)) new_hdr.bsiz = sizeof(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (cli_present("BSIZ") == CLI_PRESENT) { if (!cli_get_hex("BSIZ",&x)) { t_abort(gv_cur_region, cs_addrs); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } else if (x < sizeof(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, cs_addrs); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad block build.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } t_write (patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, new_hdr.levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); } if (cli_present("TN") == CLI_PRESENT) { if (!cli_get_hex("TN",&x)) return; t_begin_crit(ERR_DSEFAIL); assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn); cs_addrs->ti->early_tn++; blkBase = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr); if (NULL == blkBase) { rel_crit(gv_cur_region); util_out_print("Error: Unable to read buffer.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } /* Create a null update array for a block */ if (ismap) { BLK_ADDR(blkid_ptr, sizeof(block_id), block_id); *blkid_ptr = 0; t_write_map(patch_curr_blk, blkBase, (unsigned char *)blkid_ptr, cs_addrs->ti->curr_tn); cr_array_index = 0; block_saved = FALSE; } else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, blkBase, ((blk_hdr_ptr_t)blkBase)->levl, TRUE, FALSE); cr_array_index = 0; block_saved = FALSE; if (JNL_ENABLED(cs_data)) { JNL_SHORT_TIME(jgbl.gbl_jrec_time); /* needed for jnl_put_jrt_pini() and jnl_write_aimg_rec() */ jnl_status = jnl_ensure_open(); if (0 == jnl_status) { cse = (cw_set_element *)(&cw_set[0]); cse->new_buff = non_tp_jfb_buff_ptr; gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, x); cse->done = TRUE; if (0 == cs_addrs->jnl->pini_addr) jnl_put_jrt_pini(cs_addrs); jnl_write_aimg_rec(cs_addrs, cse->blk, (blk_hdr_ptr_t)cse->new_buff); } else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); } } /* Pass the desired tn "x" as argument to bg_update or mm_update */ if (dba_bg == cs_addrs->hdr->acc_meth) bg_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi); else mm_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi); cs_addrs->ti->curr_tn++; assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn); /* the following code is analogous to that in t_end and should be maintained in a similar fashion */ while (cr_array_index) cr_array[--cr_array_index]->in_cw_set = FALSE; rel_crit(gv_cur_region); if (block_saved) backup_buffer_flush(gv_cur_region); UNIX_ONLY( if (unhandled_stale_timer_pop) process_deferred_stale(); ) wcs_timer_start(gv_cur_region, TRUE); }