/* Finds a free block and adds information to update array and cw_set */ block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id, sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn) { sgmnt_data_ptr_t csd; sgmnt_addrs *csa; node_local_ptr_t cnl; srch_blk_status bmlhist, freeblkhist; block_id hint_blk_num, free_blk_id, parent_blk_id; boolean_t free_blk_recycled; int4 master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap; uint4 total_blks; int blk_seg_cnt, blk_size; sm_uc_ptr_t parent_blk_ptr, bn_ptr, saved_blk; blk_segment *bs1, *bs_ptr; int parent_blk_size, child_blk_size, bsiz; int rec_size1, curr_offset, bpntr_end, hdr_len; int tmp_cmpc; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned short temp_ushort; unsigned long temp_long; unsigned char save_cw_set_depth; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csd = cs_data; csa = cs_addrs; cnl = csa->nl; blk_size = csd->blk_size; /* Find a free/recycled block for new block location. */ hint_blk_num = 0; total_blks = csa->ti->total_blks; num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps); if ((NO_FREE_SPACE == master_bit)) { t_abort(gv_cur_region, csa); return ABORT_SWAP; } bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP; if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } hint_bit = 0; maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num; free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled); free_blk_id = bmlhist.blk_num + free_bit; if (DIR_ROOT >= free_blk_id) { /* Bitmap block 0 and directory tree root block 1 should always be marked busy. */ assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); return RETRY_SWAP; } if (child_blk_id <= free_blk_id) { /* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check * if the block is to be swapped into a higher block number and if so do not swap */ t_abort(gv_cur_region, csa); return ABORT_SWAP; } /* ====== begin update array ====== * Four blocks get changed. * 1. Free block becomes busy and gains the contents of child (root block/directory tree block) * 2. Parent block in directory tree remains busy, but points to new root block location. * 3. Free block's corresponding bitmap reflects above change. * 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */ parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num; CHECK_AND_RESET_UPDATE_ARRAY; if (free_blk_recycled) { /* Otherwise, it's a completely free block, in which case no need to read. */ freeblkhist.blk_num = (block_id)free_blk_id; if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } } child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz; BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, child_blk_size, unsigned char); memcpy(saved_blk, child_blk_ptr, child_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr)); assert(blk_seg_cnt == child_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } tmpcse = &cw_set[cw_set_depth]; (free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state) : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state); t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl); tmpcse->mode = gds_t_acquired; if (!free_blk_recycled || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { tmpcse->old_block = freeblkhist.buffaddr; tmpcse->cr = freeblkhist.cr; tmpcse->cycle = freeblkhist.cycle; jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_lostbmlcr); return RETRY_SWAP; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz); } } /* 2. Parent block in directory tree remains busy, but points to new child block location. */ curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset; parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz; GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset)); if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } BLK_INIT(bs_ptr, bs1); if (0 == parent_blk_lvl) /* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */ hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset)); else hdr_len = rec_size1 - SIZEOF(block_id); bpntr_end = curr_offset + hdr_len + SIZEOF(block_id); BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, free_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end); assert(blk_seg_cnt == parent_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN); /* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* 3. Free block's corresponding bitmap reflects above change. */ PUT_LONG(update_array_ptr, free_bit); save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */ assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1); cw_map_depth = cw_set_depth; cw_set_depth = save_cw_set_depth; update_array_ptr += SIZEOF(block_id); temp_long = 0; PUT_LONG(update_array_ptr, temp_long); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ kill_set_list->blk[kill_set_list->used].flag = 0; kill_set_list->blk[kill_set_list->used].level = 0; kill_set_list->blk[kill_set_list->used++].block = child_blk_id; return free_blk_id; }
/*********************************************************************************************** Input Parameters: cur_level: Working block's level d_max_fill: Database fill factor i_max_fill: Index fill factor Output Parameters: blks_created: how many new blocks are created lvls_increased : How much level is increased Input/Output Parameters: gv_target: History of working block Here it is assumed that i_max_fill or, d_max_fill is strictly less than block size. Returns: cdb_sc_normal: if successful cdb_sc status otherwise ************************************************************************************************/ enum cdb_sc mu_split(int cur_level, int i_max_fill, int d_max_fill, int *blks_created, int *lvls_increased) { boolean_t first_copy, new_rtblk_star_only, create_root = FALSE, split_required, insert_in_left; unsigned char curr_prev_key[MAX_KEY_SZ+1], new_blk1_last_key[MAX_KEY_SZ+1]; unsigned short temp_ushort; int rec_size, new_ins_keycmpc, tkeycmpc, new_ances_currkeycmpc, old_ances_currkeycmpc; int tmp_cmpc; block_index left_index, right_index; block_offset ins_off, ins_off2; int level; int new_ins_keysz, new_ances_currkeysz, new_blk1_last_keysz, newblk2_first_keysz, next_gv_currkeysz; int old_ances_currkeylen, new_ins_keylen, new_ances_currkeylen, tkeylen, newblk2_first_keylen; int old_blk1_last_rec_size, old_blk1_sz, save_blk_piece_len, old_right_piece_len; int delta, max_fill; enum cdb_sc status; int blk_seg_cnt, blk_size, new_leftblk_top_off; block_id allocation_clue; sm_uc_ptr_t rPtr1, rPtr2, rec_base, key_base, next_gv_currkey, bn_ptr1, bn_ptr2, save_blk_piece, old_blk_after_currec, ances_currkey, old_blk1_base, new_blk1_top, new_blk2_top, new_blk2_frec_base, new_blk2_rem, newblk2_first_key, new_ins_key; blk_segment *bs_ptr1, *bs_ptr2; cw_set_element *cse; rec_hdr_ptr_t star_rec_hdr, new_rec_hdr1a, new_rec_hdr1b, new_rec_hdr2, root_hdr; blk_hdr_ptr_t blk_hdr_ptr; blk_size = cs_data->blk_size; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ BLK_ADDR(star_rec_hdr, SIZEOF(rec_hdr), rec_hdr); star_rec_hdr->rsiz = BSTAR_REC_SIZE; SET_CMPC(star_rec_hdr, 0); level = cur_level; max_fill = (0 == level)? d_max_fill : i_max_fill; /* ------------------- * Split working block. * ------------------- * new_blk1_last_key = last key of the new working block after split * new_blk1_last_keysz = size of new_blk1_last_key * old_blk1_last_rec_size = last record size of the new working block after split (for old block) * new_blk2_frec_base = base of first record of right block created after split * newblk2_first_key = first key of new block created after split * newblk2_first_keysz = size of newblk2_first_key * new_blk2_rem = pointer to new block to be created after split exclude 1st record header + key */ blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr); old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr; old_blk1_sz = blk_hdr_ptr->bsiz; new_blk2_top = old_blk1_base + old_blk1_sz; if (cdb_sc_normal != (status = locate_block_split_point (old_blk1_base, level, old_blk1_sz, max_fill, &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz) /* Avoid split to create a small right sibling. Note this should not happen often when tolerance is high */ return cdb_sc_oprnotneeded; old_right_piece_len = old_blk1_sz - new_leftblk_top_off; new_blk2_frec_base = old_blk1_base + new_leftblk_top_off; BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char); READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_normal != status) /* restart for cdb_sc_starrecord too, because we eliminated the possibility already */ { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* copy the compressed key piece */ new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen; newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; /* gv_currkey_next_reorg will be saved for next iteration in mu_reorg */ next_gv_currkey = newblk2_first_key; next_gv_currkeysz = newblk2_first_keysz; BLK_ADDR(new_rec_hdr1b, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr1b->rsiz = rec_size + tkeycmpc; SET_CMPC(new_rec_hdr1b, 0); /* Create new split piece, we already know that this will not be *-rec only. * Note that this has to be done BEFORE modifying working block as building this buffer relies on the * working block to be pinned which is possible only if this cw-set-element is created ahead of that * of the working block (since order in which blocks are built is the order in which cses are created). */ BLK_INIT(bs_ptr2, bs_ptr1); BLK_SEG(bs_ptr2, (sm_uc_ptr_t)new_rec_hdr1b, SIZEOF(rec_hdr)); BLK_SEG(bs_ptr2, newblk2_first_key, newblk2_first_keysz); BLK_SEG(bs_ptr2, new_blk2_rem, new_blk2_top - new_blk2_rem); if (!BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } allocation_clue = ALLOCATION_CLUE(cs_data->trans_hist.total_blks); right_index = t_create(allocation_clue++, (unsigned char *)bs_ptr1, 0, 0, level); (*blks_created)++; /* Modify working block removing split piece */ BLK_INIT(bs_ptr2, bs_ptr1); if (0 == level) { BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr)); } else { BLK_SEG(bs_ptr2, old_blk1_base + SIZEOF(blk_hdr), new_leftblk_top_off - SIZEOF(blk_hdr) - old_blk1_last_rec_size); BLK_SEG(bs_ptr2, (sm_uc_ptr_t)star_rec_hdr, SIZEOF(rec_hdr) ); BLK_ADDR(bn_ptr1, SIZEOF(block_id), unsigned char); memcpy(bn_ptr1, old_blk1_base + new_leftblk_top_off - SIZEOF(block_id), SIZEOF(block_id)); BLK_SEG(bs_ptr2, bn_ptr1, SIZEOF(block_id)); } if ( !BLK_FINI(bs_ptr2, bs_ptr1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } t_write(&gv_target->hist.h[level], (unsigned char *)bs_ptr1, 0, 0, level, FALSE, TRUE, GDS_WRITE_KILLTN); /* ---------------------------------------------------------------------------- Modify ancestor block for the split in current level. new_ins_key = new key to be inserted in parent because of split in child new_ins_key will be inserted after gv_target->hist.h[level].prev_rec and before gv_target->hist.h[level].curr_rec new_ins_keysz = size of new_ins_key Note: A restriction of the algorithm is to have current key and new_ins_key in the same block, either left or, new right block ---------------------------------------------------------------------------- */ BLK_ADDR(new_ins_key, new_blk1_last_keysz, unsigned char); memcpy(new_ins_key, &new_blk1_last_key[0], new_blk1_last_keysz); new_ins_keysz = new_blk1_last_keysz; for(;;) /* ========== loop through ancestors as necessary ======= */ { level ++; max_fill = i_max_fill; /* old_blk_after_currec = remaining of current block after currec ances_currkey = old real value of currkey in ancestor block */ blk_hdr_ptr = (blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr); old_blk1_base = (sm_uc_ptr_t)blk_hdr_ptr; old_blk1_sz = blk_hdr_ptr->bsiz; new_blk2_top = old_blk1_base + old_blk1_sz; rec_base = old_blk1_base + gv_target->hist.h[level].curr_rec.offset; GET_RSIZ(rec_size, rec_base); old_blk_after_currec = rec_base + rec_size; old_ances_currkeycmpc = EVAL_CMPC((rec_hdr_ptr_t)rec_base); old_ances_currkeylen = rec_size - BSTAR_REC_SIZE; if (INVALID_RECORD(level, rec_size, old_ances_currkeylen, old_ances_currkeycmpc)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (0 == old_ances_currkeylen) { if (0 != old_ances_currkeycmpc) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } new_ances_currkeycmpc = new_ances_currkeylen = 0; } else { BLK_ADDR(ances_currkey, gv_cur_region->max_rec_size + 1, unsigned char); key_base = rec_base + SIZEOF(rec_hdr); } new_ances_currkeysz = old_ances_currkeycmpc + old_ances_currkeylen; if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) /* cur_rec is not first key */ { if (cdb_sc_normal != (status = gvcst_expand_any_key(old_blk1_base, old_blk1_base + gv_target->hist.h[level].curr_rec.offset, &curr_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (old_ances_currkeycmpc) memcpy(ances_currkey, &curr_prev_key[0], old_ances_currkeycmpc); } if (old_ances_currkeylen) { memcpy(ances_currkey + old_ances_currkeycmpc, key_base, old_ances_currkeylen); GET_CMPC(new_ances_currkeycmpc, new_ins_key, ances_currkey); new_ances_currkeylen = new_ances_currkeysz - new_ances_currkeycmpc; } if (SIZEOF(blk_hdr) != gv_target->hist.h[level].curr_rec.offset) { /* new_ins_key will be inseted after curr_prev_key */ GET_CMPC(new_ins_keycmpc, &curr_prev_key[0], new_ins_key); } else new_ins_keycmpc = 0; /* new_ins_key will be the 1st key */ new_ins_keylen = new_ins_keysz - new_ins_keycmpc ; delta = BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen; if (old_blk1_sz + delta > blk_size - cs_data->reserved_bytes) /* split required */ { split_required = TRUE; if (level == gv_target->hist.depth) { create_root = TRUE; if (MAX_BT_DEPTH - 1 <= level) /* maximum level reached */ return cdb_sc_maxlvl; } if (max_fill + BSTAR_REC_SIZE > old_blk1_sz) { if (SIZEOF(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz) return cdb_sc_oprnotneeded; /* Improve code to avoid this */ max_fill = old_blk1_sz - BSTAR_REC_SIZE; } status = locate_block_split_point(old_blk1_base, level, old_blk1_sz, max_fill, &old_blk1_last_rec_size, new_blk1_last_key, &new_blk1_last_keysz, &new_leftblk_top_off); if (cdb_sc_normal != status || new_leftblk_top_off >= old_blk1_sz || 0 == new_blk1_last_keysz) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(BSTAR_REC_SIZE != old_blk1_last_rec_size); old_right_piece_len = old_blk1_sz - new_leftblk_top_off; new_blk2_frec_base = new_blk1_top = old_blk1_base + new_leftblk_top_off; if (BSTAR_REC_SIZE == old_right_piece_len) new_rtblk_star_only = TRUE; else new_rtblk_star_only = FALSE; if (new_leftblk_top_off == gv_target->hist.h[level].curr_rec.offset) { /* inserted key will be the first record of new right block */ new_ins_keylen = new_ins_keysz; new_ins_keycmpc = 0; } else /* process 1st record of new right block */ { BLK_ADDR(newblk2_first_key, gv_cur_region->max_rec_size + 1, unsigned char); READ_RECORD(level, new_blk2_frec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_normal == status) { memcpy(newblk2_first_key, &new_blk1_last_key[0], tkeycmpc); /* compressed piece */ new_blk2_rem = new_blk2_frec_base + SIZEOF(rec_hdr) + newblk2_first_keylen; newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; BLK_ADDR(new_rec_hdr2, SIZEOF(rec_hdr), rec_hdr); new_rec_hdr2->rsiz = newblk2_first_keysz + BSTAR_REC_SIZE; SET_CMPC(new_rec_hdr2, 0); } else if (cdb_sc_starrecord != status || !new_rtblk_star_only) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } } /* else gv_target->hist.h[level].curr_rec will be newblk2_first_key */ if (new_leftblk_top_off > gv_target->hist.h[level].curr_rec.offset + old_ances_currkeylen + BSTAR_REC_SIZE) { /* in this case prev_rec (if exists), new key and curr_rec should go into left block */ if (new_leftblk_top_off + delta - old_blk1_last_rec_size + BSTAR_REC_SIZE <= blk_size - cs_data->reserved_bytes) insert_in_left = TRUE; else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else if (new_leftblk_top_off < gv_target->hist.h[level].curr_rec.offset + old_ances_currkeylen + BSTAR_REC_SIZE) { /* if gv_target->hist.h[level].curr_rec is the first key in old_blk1 then in new right block, new_ins_key will be the 1st record key and curr_rec will be 2nd record and there will be no prev_rec in right block. Else (if curr_rec is not first key) there will be some records before new_ins_key, at least prev_rec */ delta = (int)(BSTAR_REC_SIZE + new_ins_keylen - old_ances_currkeylen + new_ances_currkeylen + ((0 == new_ins_keycmpc) ? 0 : (EVAL_CMPC((rec_hdr_ptr_t)new_blk2_frec_base)))); if (SIZEOF(blk_hdr) + old_right_piece_len + delta <= blk_size - cs_data->reserved_bytes) { insert_in_left = FALSE; if (new_leftblk_top_off + BSTAR_REC_SIZE >= old_blk1_sz) { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } else { /* in this case prev_rec (if exists), new key and curr_rec should go into left block and curr_rec will be the last record (*-key) of left new block */ delta = BSTAR_REC_SIZE + new_ins_keylen; if (new_leftblk_top_off + delta <= blk_size - cs_data->reserved_bytes) insert_in_left = TRUE; else { /* cannot handle it now */ return cdb_sc_oprnotneeded; } } } /* end if split required */ else
/************************************************************************************************* Input Parameters: gv_target: working block's history level : Level of working block and its right sibling d_blk_fill_size : Maximum fill allowed in a data block i_blk_fill_size : Maximum fill allowed in an index block Output Parameters: kill_set_ptr : List of blocks to be freed from LBM (already killed in mu_clsce) remove_rtsib : if right sibling was completely merged with working Returns: cdb_sc_normal on success Other wise error status *************************************************************************************************/ enum cdb_sc mu_clsce(int level, int i_max_fill, int d_max_fill, kill_set *kill_set_ptr, boolean_t *remove_rtsib) { boolean_t complete_merge = FALSE, old_ref_star_only = FALSE, new_rtsib_star_only = FALSE, star_only_merge = FALSE, blk2_ances_star_only = FALSE, delete_all_blk2_ances = TRUE, levelp_next_is_star, forward_process; unsigned char oldblk1_prev_key[MAX_KEY_SZ+1], old_levelp_cur_prev_key[MAX_KEY_SZ+1], old_levelp_cur_key[MAX_KEY_SZ+1]; /* keys in private memory */ unsigned short temp_ushort; int new_levelp_cur_cmpc, new_levelp_cur_next_cmpc, tkeycmpc, oldblk1_last_cmpc, newblk1_mid_cmpc, newblk1_last_cmpc; int levelp, level2; int old_blk1_sz, old_blk2_sz; int old_levelp_cur_prev_keysz, old_levelp_cur_keysz, old_levelp_cur_next_keysz, newblk1_last_keysz, newblk2_first_keysz, new_blk2_ances_first_keysz; int old_levelp_cur_keylen, new_levelp_cur_keylen, old_levelp_cur_next_keylen, new_levelp_cur_next_keylen, oldblk1_last_keylen, newblk1_last_keylen, newblk2_first_keylen; int rec_size, piece_len, tkeylen, old_levelp_rec_offset; int blk_seg_cnt, blk_size; uint4 save_t_err; enum cdb_sc status; sm_uc_ptr_t oldblk1_last_key, old_levelp_cur_next_key, newblk1_last_key, newblk2_first_key, new_blk2_ances_first_key; /* shared memory keys */ sm_uc_ptr_t rec_base, old_levelp_blk_base, bn_ptr1, bn_ptr2, blk2_ances_remain, old_blk1_base, old_blk2_base, new_blk1_top, new_blk2_first_rec_base, new_blk2_remain; /* shared memory pointers */ sm_uc_ptr_t rPtr1, rPtr2; rec_hdr_ptr_t star_rec_hdr, old_last_rec_hdr1, new_rec_hdr1, new_rec_hdr2, blk2_ances_hdr, new_levelp_cur_hdr, new_levelp_cur_next_hdr; blk_segment *bs_ptr1, *bs_ptr2; srch_hist *blk1ptr, *blk2ptr; /* blk2ptr is for right sibling's hist from a minimum sub-tree containing both blocks */ error_def(ERR_GVKILLFAIL); blk_size = cs_data->blk_size; assert(update_array != NULL); update_array_ptr = update_array; blk1ptr = &(gv_target->hist); blk2ptr = gv_target->alt_hist; old_blk1_base = blk1ptr->h[level].buffaddr; old_blk2_base = blk2ptr->h[level].buffaddr; old_blk1_sz = ((blk_hdr_ptr_t)old_blk1_base)->bsiz; old_blk2_sz = ((blk_hdr_ptr_t)old_blk2_base)->bsiz; if (0 != level && sizeof(blk_hdr) + BSTAR_REC_SIZE == old_blk1_sz) old_ref_star_only = TRUE; /* Search an ancestor block at levelp >= level+1, which has a real key value corresponding to the working block. This key value will be changed after coalesce. */ levelp = level; do { if (++levelp > blk1ptr->depth || levelp > blk2ptr->depth) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } old_levelp_blk_base = blk1ptr->h[levelp].buffaddr; old_levelp_rec_offset = blk1ptr->h[levelp].curr_rec.offset; rec_base = old_levelp_blk_base + old_levelp_rec_offset; GET_RSIZ(rec_size, rec_base); } while (BSTAR_REC_SIZE == rec_size); /* search ancestors to get a real value */ /* old_levelp_cur_prev_key = real value of the key before the curr_key at levelp old_levelp_cur_prev_keysz = uncompressed size of the key Note: we may not have a previous key (old_levelp_cur_prev_keysz = 0) */ if (sizeof(blk_hdr) == old_levelp_rec_offset) old_levelp_cur_prev_keysz = 0; else { if (cdb_sc_normal != (status = gvcst_expand_any_key (old_levelp_blk_base, rec_base, &old_levelp_cur_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } old_levelp_cur_prev_keysz = tkeylen + tkeycmpc; } /* old_levelp_cur_key = real value of the curr_key at levelp old_levelp_cur_keysz = uncompressed size of the key old_levelp_cur_keylen = compressed size of the key */ READ_RECORD(levelp, rec_base, tkeycmpc, rec_size, &old_levelp_cur_key[0], old_levelp_cur_keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (old_levelp_cur_prev_keysz) memcpy(&old_levelp_cur_key[0], &old_levelp_cur_prev_key[0], tkeycmpc); rec_base += rec_size; old_levelp_cur_keysz = old_levelp_cur_keylen + tkeycmpc; /* old_levelp_cur_next_key = uncompressed value of the next right key of old_levelp_cur_key old_levelp_cur_next_keysz = uncomressed size of the key old_levelp_cur_next_keylen = comressed size of the key Note: we may not have a next key (old_levelp_cur_next_keysz = 0) */ BLK_ADDR(old_levelp_cur_next_key, gv_cur_region->max_key_size + 1, unsigned char); READ_RECORD(levelp, rec_base, tkeycmpc, rec_size, old_levelp_cur_next_key, old_levelp_cur_next_keylen, status); if (cdb_sc_starrecord == status) levelp_next_is_star = TRUE; else if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } else { memcpy(old_levelp_cur_next_key, &old_levelp_cur_key[0], tkeycmpc); old_levelp_cur_next_keysz = old_levelp_cur_next_keylen + tkeycmpc; levelp_next_is_star = FALSE; } /* Now process the actual working block at current level oldblk1_last_key = real value of last key of the working block For index block decompress *-key oldblk1_last_keylen = compressed size of the last key oldblk1_last_cmpc = compression count of last key of working block old_last_rec_hdr1 = New working index block's last record header */ BLK_ADDR(oldblk1_last_key, gv_cur_region->max_key_size + 1, unsigned char); if (0 == level) /* data block */ { if (cdb_sc_normal != (status = gvcst_expand_any_key (old_blk1_base, old_blk1_base + old_blk1_sz, oldblk1_last_key, &rec_size, &oldblk1_last_keylen, &oldblk1_last_cmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } rec_base = old_blk1_base + old_blk1_sz; } else /* Index blocks */ { /* Since we will join this working block with the right sibling, we need to remove the *-key at the end of working block and replace with actual key value (with required compression). We will get the real value of *-rec from its ancestor at levelp */ memcpy (oldblk1_last_key, &old_levelp_cur_key[0], old_levelp_cur_keysz); if (!old_ref_star_only) /* if the index block is not a *-key only block) */ { if (cdb_sc_normal != (status = gvcst_expand_any_key (old_blk1_base, old_blk1_base + old_blk1_sz - BSTAR_REC_SIZE, &oldblk1_prev_key[0], &rec_size, &tkeylen, &tkeycmpc, NULL))) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } GET_CMPC(oldblk1_last_cmpc, &oldblk1_prev_key[0], &old_levelp_cur_key[0]); oldblk1_last_keylen = old_levelp_cur_keysz - oldblk1_last_cmpc; } else /* working block has a *-key record only */ { /* get key value from ancestor blocks key */ oldblk1_last_keylen = old_levelp_cur_keysz; oldblk1_last_cmpc = 0; } BLK_ADDR(old_last_rec_hdr1, sizeof(rec_hdr), rec_hdr); old_last_rec_hdr1->rsiz = BSTAR_REC_SIZE + oldblk1_last_keylen; old_last_rec_hdr1->cmpc = oldblk1_last_cmpc; } /* newblk1_last_key = new working blocks final appended key newblk1_mid_cmpc = new working blocks firstly appended key's cmpc newblk1_last_keysz = new working blocks lastly appended key's size star_only_merge = TRUE, we can append only a *-key record into the working block (decompressing current *-key) complete_merge = TRUE, rtsib can be completely merged with working block piece_len = Size of data from old rtsibling to be merged into working block (includes rec_hdr size) */ BLK_ADDR(newblk1_last_key, gv_cur_region->max_key_size + 1, unsigned char); rec_base = old_blk2_base + sizeof(blk_hdr); READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status); if (cdb_sc_starrecord == status) /* rtsib index block has *-record only */ { if (old_blk1_sz + oldblk1_last_keylen + BSTAR_REC_SIZE > i_max_fill ) /* cannot fit even one record */ return cdb_sc_oprnotneeded; star_only_merge = TRUE; complete_merge = TRUE; rec_base = old_blk2_base + sizeof(blk_hdr) + BSTAR_REC_SIZE; } else if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } else /* for both data and non-* index block */ { newblk1_last_keysz = newblk1_last_keylen; /* first key has uncompressed real value */ GET_CMPC(newblk1_mid_cmpc, oldblk1_last_key, newblk1_last_key); piece_len = rec_size - newblk1_mid_cmpc; if (level == 0) /* data block */ { if (old_blk1_sz + piece_len > d_max_fill ) /* cannot fit even one record */ return cdb_sc_oprnotneeded; } else /* else an index block */ { if (old_blk1_sz + oldblk1_last_keylen + BSTAR_REC_SIZE > i_max_fill ) /* cannot fit even one record */ return cdb_sc_oprnotneeded; if (old_blk1_sz + oldblk1_last_keylen + piece_len + BSTAR_REC_SIZE > i_max_fill ) star_only_merge = TRUE; /* can fit only a *-record */ } rec_base += rec_size; } /* new_blk2_first_rec_base and new_blk1_top is set with final value for star_only_merge for index block */ new_blk2_first_rec_base = new_blk1_top = rec_base; if (!star_only_merge) { BLK_ADDR(new_rec_hdr1, sizeof(rec_hdr), rec_hdr); new_rec_hdr1->rsiz = piece_len; new_rec_hdr1->cmpc = newblk1_mid_cmpc; } /* else only new_blk1_last_key will be appeneded in working block */ /* find a piece of the right sibling to be copied into the working block. Note: rec_base points to 2nd record of old rtsib */ if (0 == level) /* if data block */ { complete_merge = TRUE; while (rec_base < old_blk2_base + old_blk2_sz) { GET_RSIZ(rec_size, rec_base); if (old_blk1_sz + piece_len + rec_size > d_max_fill ) { complete_merge = FALSE; break; } READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } newblk1_last_keysz = newblk1_last_keylen + newblk1_last_cmpc; rec_base += rec_size; piece_len += rec_size; }/* end of "while" loop */ new_blk1_top = new_blk2_first_rec_base = rec_base; } else /* index block */ { if (!star_only_merge) { /* we know we can fit more record in working block and rtsibling has more records */ complete_merge = TRUE; while (rec_base < old_blk2_base + old_blk2_sz) { GET_RSIZ(rec_size, rec_base); if (BSTAR_REC_SIZE == rec_size) { rec_base += rec_size; piece_len += rec_size; break; /* already we know we can fit this *-record in working block */ } READ_RECORD(level, rec_base, newblk1_last_cmpc, rec_size, newblk1_last_key, newblk1_last_keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } newblk1_last_keysz = newblk1_last_keylen + newblk1_last_cmpc; rec_base += rec_size; piece_len += rec_size; if (old_blk1_sz + oldblk1_last_keylen + piece_len + BSTAR_REC_SIZE > i_max_fill ) { complete_merge = FALSE; break; } }/* end of "while" loop */ new_blk1_top = new_blk2_first_rec_base = rec_base; } /* end else *-only merge */ } /* end else index block */ if (!complete_merge) { /* Adjust new right sibling's buffer if new_rtsib_star_only == TRUE then new right sibling will have a *-key record only else new_blk2_remain = base pointer of buffer including 1st record but exclude rec_header and key new_blk2_first_keysz = size of new rtsib block's first key */ BLK_ADDR(newblk2_first_key, gv_cur_region->max_key_size + 1, unsigned char); READ_RECORD(level, new_blk2_first_rec_base, tkeycmpc, rec_size, newblk2_first_key, newblk2_first_keylen, status); if (cdb_sc_starrecord == status) /* new rtsib will have a *-record only */ new_rtsib_star_only = TRUE; else if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE);; return cdb_sc_blkmod; } else { memcpy(newblk2_first_key, newblk1_last_key, tkeycmpc); /* copy the compressed piece */ newblk2_first_keysz = newblk2_first_keylen + tkeycmpc; new_blk2_remain = new_blk2_first_rec_base + sizeof(rec_hdr) + newblk2_first_keylen; BLK_ADDR(new_rec_hdr2, sizeof(rec_hdr), rec_hdr); new_rec_hdr2->rsiz = rec_size + tkeycmpc; new_rec_hdr2->cmpc = 0; } }
boolean_t gvcst_queryget2(mval *val, unsigned char *sn_ptr) { blk_hdr_ptr_t bp; boolean_t found, two_histories; enum cdb_sc status; int rsiz, key_size, data_len; rec_hdr_ptr_t rp; srch_blk_status *bh; srch_hist *rt_history; unsigned short temp_ushort; int tmp_cmpc; DEBUG_ONLY(unsigned char *save_strp = NULL); T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYGETFAIL); assert((CDB_STAGNATE > t_tries) || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { two_histories = FALSE; #if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYGETFAIL == gtm_white_box_test_case_number)) { status = cdb_sc_blknumerr; t_retry(status); continue; } #endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0))) { found = TRUE; bh = &gv_target->hist.h[0]; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if (rp >= (rec_hdr_ptr_t)CST_TOB(bp)) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_endtree == status) /* end of tree */ { found = FALSE; two_histories = FALSE; /* second history not valid */ } else if (cdb_sc_normal != status) { t_retry(status); continue; } else { bh = &rt_history->h[0]; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } } /* !found indicates that the end of tree has been reached (see call to * gvcst_rtsib). If there is no more tree, don't bother doing expansion. */ if (found) { status = gvcst_expand_key((blk_hdr_ptr_t)bh->buffaddr, (int4)((sm_uc_ptr_t)rp - bh->buffaddr), gv_altkey); if (cdb_sc_normal != status) { t_retry(status); continue; } key_size = gv_altkey->end + 1; GET_RSIZ(rsiz, rp); data_len = rsiz + EVAL_CMPC(rp) - SIZEOF(rec_hdr) - key_size; if (data_len < 0 || (sm_uc_ptr_t)rp + rsiz > (sm_uc_ptr_t)bp + ((blk_hdr_ptr_t)bp)->bsiz) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_rmisalign1); continue; } ENSURE_STP_FREE_SPACE(data_len); DEBUG_ONLY ( if (!save_strp) save_strp = stringpool.free); assert(stringpool.top - stringpool.free >= data_len); memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len); /* Assumption: t_end/tp_hist will never cause stp_gcol() call BYPASSOK */ } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(!two_histories ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } if (found) { DEBUG_ONLY(assert(save_strp == stringpool.free)); /* Process val first. Already copied to string pool. */ val->mvtype = MV_STR; val->str.addr = (char *)stringpool.free; val->str.len = data_len; stringpool.free += data_len; INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_get, 1); } return found; } t_retry(status); }
/******************************************************************************************* Input Parameter: blk_base = Block's base which has the key rec_top = record top of the record which will be expanded Output Parameter: expanded_key = expanded key rec_size = last record size whic has the key keylen = key size keycmpc = key compression cound hist_ptr = history of blocks read, while expanding a *-key History excludes the working block from which key is expanded and includes the blocks read below the current block to expand a *-key NOTE: hist_ptr.depth will be unchanged Return: cdb_sc_normal on success failure code on concurrency failure *******************************************************************************************/ enum cdb_sc gvcst_expand_any_key (sm_uc_ptr_t blk_base, sm_uc_ptr_t rec_top, sm_uc_ptr_t expanded_key, int *rec_size, int *keylen, int *keycmpc, srch_hist *hist_ptr) { enum cdb_sc status; unsigned char expanded_star_key[MAX_KEY_SZ]; unsigned short temp_ushort; int cur_level; int star_keycmpc; int star_keylen; int star_rec_size; int tblk_size; block_id tblk_num; sm_uc_ptr_t rPtr1, rPtr2, curptr; cur_level = ((blk_hdr_ptr_t)blk_base)->levl; curptr = blk_base + sizeof(blk_hdr); *rec_size = *keycmpc = *keylen = 0; while (curptr < rec_top) { GET_RSIZ(*rec_size, curptr); if (0 == cur_level || BSTAR_REC_SIZE != *rec_size) { READ_RECORD(cur_level, curptr, *keycmpc, *rec_size, expanded_key, *keylen, status); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); return status; } else { curptr += *rec_size; if (curptr >= rec_top) break; } } else /* a star record in index block */ { if (curptr + *rec_size != rec_top || NULL == hist_ptr) { assert(t_tries < CDB_STAGNATE); return cdb_sc_rmisalign; } while (0 != cur_level) { tblk_size = ((blk_hdr_ptr_t)blk_base)->bsiz; GET_LONG(tblk_num, blk_base + tblk_size - sizeof(block_id)); if (0 == tblk_num || cs_data->trans_hist.total_blks - 1 < tblk_num) { assert(t_tries < CDB_STAGNATE); return cdb_sc_badlvl; } cur_level--; hist_ptr->h[cur_level].tn = cs_addrs->ti->curr_tn; if (!(blk_base = t_qread(tblk_num, (sm_int_ptr_t)(&(hist_ptr->h[cur_level].cycle)), &(hist_ptr->h[cur_level].cr) ))) { assert(t_tries < CDB_STAGNATE); return rdfail_detail; } if (((blk_hdr_ptr_t)blk_base)->levl != cur_level) { assert(t_tries < CDB_STAGNATE); return cdb_sc_badlvl; } hist_ptr->h[cur_level].buffaddr = blk_base; hist_ptr->h[cur_level].blk_num = tblk_num; hist_ptr->h[cur_level].prev_rec.match = 0; hist_ptr->h[cur_level].prev_rec.offset = 0; hist_ptr->h[cur_level].curr_rec.match = 0; hist_ptr->h[cur_level].curr_rec.offset = 0; } tblk_size = ((blk_hdr_ptr_t)blk_base)->bsiz; /* expand *-key from right most leaf level block of the sub-tree, of which, the original block is root */ if (cdb_sc_normal != (status = (gvcst_expand_any_key(blk_base, blk_base + tblk_size, expanded_star_key, &star_rec_size, &star_keylen, &star_keycmpc, hist_ptr)))) return status; if (*keylen + *keycmpc) /* Previous key exists */ { GET_CMPC(*keycmpc, expanded_key, &expanded_star_key[0]); } memcpy(expanded_key, expanded_star_key, star_keylen + star_keycmpc); *keylen = star_keylen + star_keycmpc - *keycmpc; *rec_size = *keylen + *keycmpc + BSTAR_REC_SIZE; return cdb_sc_normal; } /* end else if *-record */ }/* end of "while" loop */ if (curptr == rec_top) { return cdb_sc_normal; } else { assert(t_tries < CDB_STAGNATE); return cdb_sc_rmisalign; } }
/****************************************************************************************** Input Parameters: level: level of working block dest_blk_id: last destination used for swap Output Parameters: kill_set_ptr: Kill set to be freed *exclude_glist_ptr: List of globals not to be moved for a swap destination Input/Output Parameters: gv_target : as working block's history reorg_gv_target->hist : as desitnitions block's history ******************************************************************************************/ enum cdb_sc mu_swap_blk(int level, block_id *pdest_blk_id, kill_set *kill_set_ptr, glist *exclude_glist_ptr) { unsigned char x_blk_lmap; unsigned short temp_ushort; int rec_size1, rec_size2; int wlevel, nslevel, dest_blk_level; int piece_len1, piece_len2, first_offset, second_offset, work_blk_size, work_parent_size, dest_blk_size, dest_parent_size; int dest_child_cycle; int blk_seg_cnt, blk_size; trans_num ctn; int key_len, key_len_dir; block_id dest_blk_id, work_blk_id, child1, child2; enum cdb_sc status; srch_hist *dest_hist_ptr, *dir_hist_ptr; cache_rec_ptr_t dest_child_cr; blk_segment *bs1, *bs_ptr; sm_uc_ptr_t saved_blk, work_blk_ptr, work_parent_ptr, dest_parent_ptr, dest_blk_ptr, bn_ptr, bmp_buff, tblk_ptr, rec_base, rPtr1; boolean_t gbl_target_was_set, blk_was_free, deleted; gv_namehead *save_targ; srch_blk_status bmlhist, destblkhist, *hist_ptr; unsigned char save_cw_set_depth; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned int bsiz; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; dest_blk_id = *pdest_blk_id; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ if (NULL == TREF(gv_reorgkey)) GVKEY_INIT(TREF(gv_reorgkey), DBKEYSIZE(MAX_KEY_SZ)); dest_hist_ptr = &(reorg_gv_target->hist); dir_hist_ptr = reorg_gv_target->alt_hist; blk_size = cs_data->blk_size; work_parent_ptr = gv_target->hist.h[level+1].buffaddr; work_parent_size = ((blk_hdr_ptr_t)work_parent_ptr)->bsiz; work_blk_ptr = gv_target->hist.h[level].buffaddr; work_blk_size = ((blk_hdr_ptr_t)work_blk_ptr)->bsiz; work_blk_id = gv_target->hist.h[level].blk_num; if (blk_size < work_blk_size) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } cws_reorg_remove_index = 0; /*===== Infinite loop to find the destination block =====*/ for ( ; ; ) { blk_was_free = FALSE; INCR_BLK_NUM(dest_blk_id); /* A Pre-order traversal should not cause a child block to go to its parent. * However, in case it happens because already the organization was like that or for any other reason, skip swap. * If we decide to swap, code below should be changed to take care of the special case. * Still a grand-child can go to its grand-parent. This is rare and following code can handle it. */ if (dest_blk_id == gv_target->hist.h[level+1].blk_num) continue; if (cs_data->trans_hist.total_blks <= dest_blk_id || dest_blk_id == work_blk_id) { *pdest_blk_id = dest_blk_id; return cdb_sc_oprnotneeded; } ctn = cs_addrs->ti->curr_tn; /* We need to save the block numbers that were NEWLY ADDED (since entering this function "mu_swap_blk") * through the CWS_INSERT macro (in db_csh_get/db_csh_getn which can be called by t_qread or gvcst_search below). * This is so that we can delete these blocks from the "cw_stagnate" hashtable in case we determine the need to * choose a different "dest_blk_id" in this for loop (i.e. come to the next iteration). If these blocks are not * deleted, then the hashtable will keep growing (a good example will be if -EXCLUDE qualifier is specified and * a lot of prospective dest_blk_ids get skipped because they contain EXCLUDEd global variables) and very soon * the hashtable will contain more entries than there are global buffers and at that point db_csh_getn will not * be able to get a free global buffer for a new block (since it checks the "cw_stagnate" hashtable before reusing * a buffer in case of MUPIP REORG). To delete these previous iteration blocks, we use the "cws_reorg_remove_array" * variable. This array should have enough entries to accommodate the maximum number of blocks that can be t_qread * in one iteration down below. And that number is the sum of * + MAX_BT_DEPTH : for the t_qread while loop down the tree done below * + 2 * MAX_BT_DEPTH : for the two calls to gvcst_search done below * + 2 : 1 for the t_qread of dest_blk_id and 1 more for the t_qread of a * bitmap block done inside the call to get_lmap below * = 3 * MAX_BT_DEPTH + 2 * To be safe, we give a buffer of MAX_BT_DEPTH elements i.e. (4 * MAX_BT_DEPTH) + 2. * This is defined in the macro CWS_REMOVE_ARRAYSIZE in cws_insert.h */ /* reset whatever blocks the previous iteration of this for loop had filled in the cw_stagnate hashtable */ for ( ; cws_reorg_remove_index > 0; cws_reorg_remove_index--) { deleted = delete_hashtab_int4(&cw_stagnate, (uint4 *)&cws_reorg_remove_array[cws_reorg_remove_index]); assert(deleted); } /* read corresponding bitmap block before attempting to read destination block. * if bitmap indicates block is free, we will not read the destination block */ bmp_buff = get_lmap(dest_blk_id, &x_blk_lmap, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr); if (!bmp_buff || BLK_MAPINVALID == x_blk_lmap || ((blk_hdr_ptr_t)bmp_buff)->bsiz != BM_SIZE(BLKS_PER_LMAP) || ((blk_hdr_ptr_t)bmp_buff)->levl != LCL_MAP_LEVL) { assert(CDB_STAGNATE > t_tries); return cdb_sc_badbitmap; } if (BLK_FREE != x_blk_lmap) { /* x_blk_lmap is either BLK_BUSY or BLK_RECYCLED. In either case, we need to read destination block * in case we later detect that the before-image needs to be written. */ if (!(dest_blk_ptr = t_qread(dest_blk_id, (sm_int_ptr_t)&destblkhist.cycle, &destblkhist.cr))) { assert(t_tries < CDB_STAGNATE); return (enum cdb_sc)rdfail_detail; } destblkhist.blk_num = dest_blk_id; destblkhist.buffaddr = dest_blk_ptr; destblkhist.level = dest_blk_level = ((blk_hdr_ptr_t)dest_blk_ptr)->levl; } if (BLK_BUSY != x_blk_lmap) { /* x_blk_map is either BLK_FREE or BLK_RECYCLED both of which mean the block is not used in the bitmap */ blk_was_free = TRUE; break; } /* dest_blk_id might contain a *-record only. * So follow the pointer to go to the data/index block, which has a non-* key to search. */ nslevel = dest_blk_level; if (MAX_BT_DEPTH <= nslevel) { assert(CDB_STAGNATE > t_tries); return cdb_sc_maxlvl; } rec_base = dest_blk_ptr + SIZEOF(blk_hdr); GET_RSIZ(rec_size1, rec_base); tblk_ptr = dest_blk_ptr; while ((BSTAR_REC_SIZE == rec_size1) && (0 != nslevel)) { GET_LONG(child1, (rec_base + SIZEOF(rec_hdr))); if (0 == child1 || child1 > cs_data->trans_hist.total_blks - 1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_rdfail; } if (!(tblk_ptr = t_qread(child1, (sm_int_ptr_t)&dest_child_cycle, &dest_child_cr))) { assert(t_tries < CDB_STAGNATE); return (enum cdb_sc)rdfail_detail; } /* leaf of a killed GVT can have block header only. Skip those blocks */ if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz) break; nslevel--; rec_base = tblk_ptr + SIZEOF(blk_hdr); GET_RSIZ(rec_size1, rec_base); } /* leaf of a killed GVT can have block header only. Skip those blocks */ if (SIZEOF(blk_hdr) >= ((blk_hdr_ptr_t)tblk_ptr)->bsiz) continue; /* get length of global variable name (do not read subscript) for dest_blk_id */ GET_GBLNAME_LEN(key_len_dir, rec_base + SIZEOF(rec_hdr)); /* key_len = length of 1st key value (including subscript) for dest_blk_id */ GET_KEY_LEN(key_len, rec_base + SIZEOF(rec_hdr)); if ((1 >= key_len_dir || MAX_MIDENT_LEN + 1 < key_len_dir) || (2 >= key_len || MAX_KEY_SZ < key_len)) { /* Earlier used to restart here always. But dest_blk_id can be a block, * which is just killed and still marked busy. Skip it, if we are in last retry. */ if (CDB_STAGNATE <= t_tries) continue; else return cdb_sc_blkmod; } memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len_dir); (TREF(gv_reorgkey))->base[key_len_dir] = 0; (TREF(gv_reorgkey))->end = key_len_dir; if (exclude_glist_ptr->next) { /* exclude blocks for globals in the list of EXCLUDE option */ if (in_exclude_list(&((TREF(gv_reorgkey))->base[0]), key_len_dir - 1, exclude_glist_ptr)) continue; } save_targ = gv_target; if (INVALID_GV_TARGET != reset_gv_target) gbl_target_was_set = TRUE; else { gbl_target_was_set = FALSE; reset_gv_target = save_targ; } gv_target = reorg_gv_target; gv_target->root = cs_addrs->dir_tree->root; gv_target->clue.end = 0; /* assign Directory tree path to find dest_blk_id in dir_hist_ptr */ status = gvcst_search(TREF(gv_reorgkey), dir_hist_ptr); if (cdb_sc_normal != status) { assert(t_tries < CDB_STAGNATE); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); return status; } if (dir_hist_ptr->h[0].curr_rec.match != (TREF(gv_reorgkey))->end + 1) { /* may be in a kill_set of another process */ RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); continue; } for (wlevel = 0; wlevel <= dir_hist_ptr->depth && dir_hist_ptr->h[wlevel].blk_num != dest_blk_id; wlevel++); if (dir_hist_ptr->h[wlevel].blk_num == dest_blk_id) { /* do not swap a dir_tree block */ RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); continue; } /* gv_reorgkey will now have the first key from dest_blk_id, * or, from a descendant of dest_blk_id (in case it had a *-key only). */ memcpy(&((TREF(gv_reorgkey))->base[0]), rec_base + SIZEOF(rec_hdr), key_len); (TREF(gv_reorgkey))->end = key_len - 1; GET_KEY_LEN(key_len_dir, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr)); /* Get root of GVT for dest_blk_id */ GET_LONG(gv_target->root, dir_hist_ptr->h[0].buffaddr + dir_hist_ptr->h[0].curr_rec.offset + SIZEOF(rec_hdr) + key_len_dir); if ((0 == gv_target->root) || (gv_target->root > (cs_data->trans_hist.total_blks - 1))) { assert(t_tries < CDB_STAGNATE); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); return cdb_sc_blkmod; } /* Assign Global Variable Tree path to find dest_blk_id in dest_hist_ptr */ gv_target->clue.end = 0; status = gvcst_search(TREF(gv_reorgkey), dest_hist_ptr); RESET_GV_TARGET_LCL_AND_CLR_GBL(save_targ, DO_GVT_GVKEY_CHECK); if (dest_blk_level >= dest_hist_ptr->depth || /* do not swap in root level */ dest_hist_ptr->h[dest_blk_level].blk_num != dest_blk_id) /* must be in a kill set of another process. */ continue; if ((cdb_sc_normal != status) || (dest_hist_ptr->h[nslevel].curr_rec.match != ((TREF(gv_reorgkey))->end + 1))) { assert(t_tries < CDB_STAGNATE); return (cdb_sc_normal != status ? status : cdb_sc_blkmod); } for (wlevel = nslevel; wlevel <= dest_blk_level; wlevel++) dest_hist_ptr->h[wlevel].tn = ctn; dest_blk_ptr = dest_hist_ptr->h[dest_blk_level].buffaddr; dest_blk_size = ((blk_hdr_ptr_t)dest_blk_ptr)->bsiz; dest_parent_ptr = dest_hist_ptr->h[dest_blk_level+1].buffaddr; dest_parent_size = ((blk_hdr_ptr_t)dest_parent_ptr)->bsiz; break; } /*===== End of infinite loop to find the destination block =====*/ /*----------------------------------------------------- Now modify blocks for swapping. Maximum of 4 blocks. -----------------------------------------------------*/ if (!blk_was_free) { /* 1: dest_blk_id into work_blk_id */ BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, dest_blk_ptr + SIZEOF(blk_hdr), dest_blk_size - SIZEOF(blk_hdr)); if (!BLK_FINI (bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(gv_target->hist.h[level].blk_num == work_blk_id); assert(gv_target->hist.h[level].buffaddr == work_blk_ptr); t_write(&gv_target->hist.h[level], (unsigned char *)bs1, 0, 0, dest_blk_level, TRUE, TRUE, GDS_WRITE_KILLTN); } /* 2: work_blk_id into dest_blk_id */ if (!blk_was_free && work_blk_id == dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* work_blk_id will be swapped with its child. * This is the only vertical swap. Here working block goes to its child. * Working block cannot goto its parent because of traversal */ if (dest_blk_level + 1 != level || dest_parent_size != work_blk_size) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, dest_parent_size, unsigned char); memcpy(saved_blk, dest_parent_ptr, dest_parent_size); first_offset = dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset; GET_RSIZ(rec_size1, saved_blk + first_offset); if (work_blk_size < first_offset + rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } piece_len1 = first_offset + rec_size1; BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), piece_len1 - SIZEOF(block_id) - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, work_blk_id); /* since work_blk_id will now be the child of dest_blk_id */ BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, saved_blk + piece_len1, dest_parent_size - piece_len1); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(dest_blk_id == dest_hist_ptr->h[dest_blk_level].blk_num); assert(dest_blk_ptr == dest_hist_ptr->h[dest_blk_level].buffaddr); t_write(&dest_hist_ptr->h[dest_blk_level], (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN); } else /* free block or, when working block does not move vertically (swap with parent/child) */ { BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, work_blk_size, unsigned char); memcpy(saved_blk, work_blk_ptr, work_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), work_blk_size - SIZEOF(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (blk_was_free) { tmpcse = &cw_set[cw_set_depth]; t_create(dest_blk_id, (unsigned char *)bs1, 0, 0, level); /* Although we invoked t_create, we do not want t_end to allocate the block (i.e. change mode * from gds_t_create to gds_t_acquired). Instead we do that and a little more (that t_end does) all here. */ assert(dest_blk_id == tmpcse->blk); tmpcse->mode = gds_t_acquired; /* If snapshots are in progress, we might want to read the before images of the FREE blocks also. * Since mu_swap_blk mimics a small part of t_end, it sets cse->mode to gds_t_acquired and hence * will not read the before images of the FREE blocks in t_end. To workaround this, set * cse->was_free to TRUE so that in t_end, this condition can be used to read the before images of * the FREE blocks if needed. */ (BLK_FREE == x_blk_lmap) ? SET_FREE(tmpcse) : SET_NFREE(tmpcse); /* No need to write before-image in case the block is FREE. In case the database had never been fully * upgraded from V4 to V5 format (after the MUPIP UPGRADE), all RECYCLED blocks can basically be considered * FREE (i.e. no need to write before-images since backward journal recovery will never be expected * to take the database to a point BEFORE the mupip upgrade). */ if ((BLK_FREE == x_blk_lmap) || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { /* Destination is a recycled block that needs a before image */ tmpcse->old_block = destblkhist.buffaddr; /* Record cr,cycle. This is used later in t_end to determine if checksums need to be recomputed */ tmpcse->cr = destblkhist.cr; tmpcse->cycle = destblkhist.cycle; jbbp = (JNL_ENABLED(cs_addrs) && cs_addrs->jnl_before_image) ? cs_addrs->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { /* Compute CHECKSUM for writing PBLK record before getting crit. * It is possible that we are reading a block that is actually marked free in * the bitmap (due to concurrency issues at this point). Therefore we might be * actually reading uninitialized block headers and in turn a bad value of * "old_block->bsiz". Restart if we ever access a buffer whose size is greater * than the db block size. */ bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); return cdb_sc_lostbmlcr; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, cs_data, cs_addrs, tmpcse->old_block, bsiz); } } assert(GDSVCURR == tmpcse->ondsk_blkver); /* should have been set by t_create above */ } else { hist_ptr = &dest_hist_ptr->h[dest_blk_level]; assert(dest_blk_id == hist_ptr->blk_num); assert(dest_blk_ptr == hist_ptr->buffaddr); t_write(hist_ptr, (unsigned char *)bs1, 0, 0, level, TRUE, TRUE, GDS_WRITE_KILLTN); } } if (!blk_was_free) { /* 3: Parent of destination block (may be parent of working block too) */ if (gv_target->hist.h[level+1].blk_num == dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* dest parent == work_blk parent */ BLK_INIT(bs_ptr, bs1); /* Interchange pointer to dest_blk_id and work_blk_id */ if (level != dest_blk_level || gv_target->hist.h[level+1].curr_rec.offset == dest_hist_ptr->h[level+1].curr_rec.offset) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } if (gv_target->hist.h[level+1].curr_rec.offset < dest_hist_ptr->h[level+1].curr_rec.offset) { first_offset = gv_target->hist.h[level+1].curr_rec.offset; second_offset = dest_hist_ptr->h[level+1].curr_rec.offset; } else { first_offset = dest_hist_ptr->h[level+1].curr_rec.offset; second_offset = gv_target->hist.h[level+1].curr_rec.offset; } GET_RSIZ(rec_size1, dest_parent_ptr + first_offset); GET_RSIZ(rec_size2, dest_parent_ptr + second_offset); if (dest_parent_size < first_offset + rec_size1 || dest_parent_size < second_offset + rec_size2 || BSTAR_REC_SIZE >= rec_size1 || BSTAR_REC_SIZE > rec_size2) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } piece_len1 = first_offset + rec_size1 - SIZEOF(block_id); piece_len2 = second_offset + rec_size2 - SIZEOF(block_id); GET_LONG(child1, dest_parent_ptr + piece_len1); GET_LONG(child2, dest_parent_ptr + piece_len2); BLK_SEG(bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), piece_len1 - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, child2); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + first_offset + rec_size1, second_offset + rec_size2 - SIZEOF(block_id) - first_offset - rec_size1); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, child1); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + second_offset + rec_size2, dest_parent_size - second_offset - rec_size2); if (!BLK_FINI(bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(level == dest_blk_level); assert(dest_parent_ptr == dest_hist_ptr->h[level+1].buffaddr); t_write(&dest_hist_ptr->h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } else if (work_blk_id != dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* Destination block moved in the position of working block. * So destination block's parent's pointer should be changed to work_blk_id */ BLK_INIT(bs_ptr, bs1); GET_RSIZ(rec_size1, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset); if (dest_parent_size < rec_size1 + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_SEG (bs_ptr, dest_parent_ptr + SIZEOF(blk_hdr), dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, work_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, dest_parent_ptr + dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset + rec_size1, dest_parent_size - dest_hist_ptr->h[dest_blk_level+1].curr_rec.offset - rec_size1); if (!BLK_FINI(bs_ptr,bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(dest_parent_ptr == dest_hist_ptr->h[dest_blk_level+1].buffaddr); t_write(&dest_hist_ptr->h[dest_blk_level+1], (unsigned char *)bs1, 0, 0, dest_blk_level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } } /* 4: Parent of working block, if different than destination's parent or, destination was a free block */ if (blk_was_free || gv_target->hist.h[level+1].blk_num != dest_hist_ptr->h[dest_blk_level+1].blk_num) { /* Parent block of working blk should correctly point the working block. Working block went to dest_blk_id */ GET_RSIZ(rec_size1, (work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset)); if (work_parent_size < rec_size1 + gv_target->hist.h[level+1].curr_rec.offset || BSTAR_REC_SIZE > rec_size1) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, work_parent_ptr + SIZEOF(blk_hdr), gv_target->hist.h[level+1].curr_rec.offset + rec_size1 - SIZEOF(blk_hdr) - SIZEOF(block_id)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, dest_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, work_parent_ptr + gv_target->hist.h[level+1].curr_rec.offset + rec_size1, work_parent_size - gv_target->hist.h[level+1].curr_rec.offset - rec_size1); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); return cdb_sc_blkmod; } assert(gv_target->hist.h[level+1].buffaddr == work_parent_ptr); t_write(&gv_target->hist.h[level+1], (unsigned char *)bs1, 0, 0, level+1, FALSE, TRUE, GDS_WRITE_KILLTN); } /* else already taken care of, when dest_blk_id moved */ if (blk_was_free) { /* A free/recycled block will become busy block. * So the local bitmap must be updated. * Local bit map block will be added in the list of update arrray for concurrency check and * also the cw_set element will be created to mark the free/recycled block as free. * kill_set_ptr will save the block which will become free. */ child1 = ROUND_DOWN2(dest_blk_id, BLKS_PER_LMAP); /* bit map block */ bmlhist.buffaddr = bmp_buff; bmlhist.blk_num = child1; child1 = dest_blk_id - child1; assert(child1); PUT_LONG(update_array_ptr, child1); /* Need to put bit maps on the end of the cw set for concurrency checking. * We want to simulate t_write_map, except we want to update "cw_map_depth" instead of "cw_set_depth". * Hence the save and restore logic (for "cw_set_depth") below. */ save_cw_set_depth = cw_set_depth; assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, ctn, 1); /* will increment cw_set_depth */ cw_map_depth = cw_set_depth; /* set cw_map_depth to the latest cw_set_depth */ cw_set_depth = save_cw_set_depth; /* restore cw_set_depth */ /* t_write_map simulation end */ update_array_ptr += SIZEOF(block_id); child1 = 0; PUT_LONG(update_array_ptr, child1); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 1 free block is now becoming BLK_USED in the bitmap */ /* working block will be removed */ kill_set_ptr->blk[kill_set_ptr->used].flag = 0; kill_set_ptr->blk[kill_set_ptr->used].level = 0; kill_set_ptr->blk[kill_set_ptr->used++].block = work_blk_id; } *pdest_blk_id = dest_blk_id; return cdb_sc_normal; }