/********************************************** Statistics of reorg for current global. Also update dest_blklist_ptr for next globals ***********************************************/ void reorg_finish(block_id dest_blk_id, int blks_processed, int blks_killed, int blks_reused, int file_extended, int lvls_reduced, int blks_coalesced, int blks_split, int blks_swapped) { t_abort(gv_cur_region, cs_addrs); file_extended = cs_data->trans_hist.total_blks - file_extended; util_out_print("Blocks processed : !SL ", FLUSH, blks_processed); util_out_print("Blocks coalesced : !SL ", FLUSH, blks_coalesced); util_out_print("Blocks split : !SL ", FLUSH, blks_split); util_out_print("Blocks swapped : !SL ", FLUSH, blks_swapped); util_out_print("Blocks freed : !SL ", FLUSH, blks_killed); util_out_print("Blocks reused : !SL ", FLUSH, blks_reused); if (0 > lvls_reduced) util_out_print("Levels Increased : !SL ", FLUSH, -lvls_reduced); else if (0 < lvls_reduced) util_out_print("Levels Eliminated : !SL ", FLUSH, lvls_reduced); util_out_print("Blocks extended : !SL ", FLUSH, file_extended); cs_addrs->reorg_last_dest = dest_blk_id; /* next attempt for this global will start from the beginning, if RESUME option is present */ cs_data->reorg_restart_block = 0; cs_data->reorg_restart_key[0] = 0; cs_data->reorg_restart_key[1] = 0; }
trans_num gvcst_bmp_mark_free(kill_set *ks) { block_id bit_map, next_bm, *updptr; blk_ident *blk, *blk_top, *nextblk; trans_num ctn, start_db_fmt_tn; unsigned int len; # if defined(UNIX) && defined(DEBUG) unsigned int lcl_t_tries; # endif int4 blk_prev_version; srch_hist alt_hist; trans_num ret_tn = 0; boolean_t visit_blks; srch_blk_status bmphist; cache_rec_ptr_t cr; enum db_ver ondsk_blkver; enum cdb_sc status; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; TREF(in_gvcst_bmp_mark_free) = TRUE; assert(inctn_bmp_mark_free_gtm == inctn_opcode || inctn_bmp_mark_free_mu_reorg == inctn_opcode); /* Note down the desired_db_format_tn before you start relying on cs_data->fully_upgraded. * If the db is fully_upgraded, take the optimal path that does not need to read each block being freed. * But in order to detect concurrent desired_db_format changes, note down the tn (when the last format change occurred) * before the fully_upgraded check and after having noted down the database current_tn. * If they are the same, then we are guaranteed no concurrent desired_db_format change occurred. * If they are not, then fall through to the non-optimal path where each to-be-killed block has to be visited. * The reason we need to visit every block in case desired_db_format changes is to take care of the case where * MUPIP REORG DOWNGRADE concurrently changes a block that we are about to free. */ start_db_fmt_tn = cs_data->desired_db_format_tn; visit_blks = (!cs_data->fully_upgraded); /* Local evaluation */ assert(!visit_blks || (visit_blks && dba_bg == cs_addrs->hdr->acc_meth)); /* must have blks_to_upgrd == 0 for non-BG */ assert(!dollar_tlevel); /* Should NOT be in TP now */ blk = &ks->blk[0]; blk_top = &ks->blk[ks->used]; if (!visit_blks) { /* Database has been completely upgraded. Free all blocks in one bitmap as part of one transaction. */ assert(cs_data->db_got_to_v5_once); /* assert all V4 fmt blocks (including RECYCLED) have space for V5 upgrade */ inctn_detail.blknum_struct.blknum = 0; /* to indicate no adjustment to "blks_to_upgrd" necessary */ /* If any of the mini transaction below restarts because of an online rollback, we don't want the application * refresh to happen (like $ZONLNRLBK++ or rts_error(DBROLLEDBACK). This is because, although we are currently in * non-tp (dollar_tleve = 0), we could actually be in a TP transaction and have actually faked dollar_tlevel. In * such a case, we should NOT * be issuing a DBROLLEDBACK error as TP transactions are supposed to just restart in * case of an online rollback. So, set the global variable that gtm_onln_rlbk_clnup can check and skip doing the * application refresh, but will reset the clues. The next update will see the cycle mismatch and will accordingly * take the right action. */ for ( ; blk < blk_top; blk = nextblk) { if (0 != blk->flag) { nextblk = blk + 1; continue; } assert(0 < blk->block); assert((int4)blk->block < cs_addrs->ti->total_blks); bit_map = ROUND_DOWN2((int)blk->block, BLKS_PER_LMAP); next_bm = bit_map + BLKS_PER_LMAP; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ /* Scan for the next local bitmap */ updptr = (block_id *)update_array_ptr; for (nextblk = blk; (0 == nextblk->flag) && (nextblk < blk_top) && ((block_id)nextblk->block < next_bm); ++nextblk) { assert((block_id)nextblk->block - bit_map); *updptr++ = (block_id)nextblk->block - bit_map; } len = (unsigned int)((char *)nextblk - (char *)blk); update_array_ptr = (char *)updptr; alt_hist.h[0].blk_num = 0; /* need for calls to T_END for bitmaps */ alt_hist.h[0].blk_target = NULL; /* need to initialize for calls to T_END */ /* the following assumes SIZEOF(blk_ident) == SIZEOF(int) */ assert(SIZEOF(blk_ident) == SIZEOF(int)); *(int *)update_array_ptr = 0; t_begin(ERR_GVKILLFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { ctn = cs_addrs->ti->curr_tn; /* Need a read fence before reading fields from cs_data as we are reading outside * of crit and relying on this value to detect desired db format state change. */ SHM_READ_MEMORY_BARRIER; if (start_db_fmt_tn != cs_data->desired_db_format_tn) { /* Concurrent db format change has occurred. Need to visit every block to be killed * to determine its block format. Fall through to the non-optimal path below */ ret_tn = 0; break; } bmphist.blk_num = bit_map; if (NULL == (bmphist.buffaddr = t_qread(bmphist.blk_num, (sm_int_ptr_t)&bmphist.cycle, &bmphist.cr))) { t_retry((enum cdb_sc)rdfail_detail); continue; } t_write_map(&bmphist, (uchar_ptr_t)update_array, ctn, -(int4)(nextblk - blk)); UNIX_ONLY(DEBUG_ONLY(lcl_t_tries = t_tries)); if ((trans_num)0 == (ret_tn = t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))) { # ifdef UNIX assert((CDB_STAGNATE == t_tries) || (lcl_t_tries == t_tries - 1)); status = LAST_RESTART_CODE; if ((cdb_sc_onln_rlbk1 == status) || (cdb_sc_onln_rlbk2 == status) || TREF(rlbk_during_redo_root)) { /* t_end restarted due to online rollback. Discard bitmap free-up and return control * to the application. But, before that reset only_reset_clues_if_onln_rlbk to FALSE */ TREF(in_gvcst_bmp_mark_free) = FALSE; send_msg(VARLSTCNT(6) ERR_IGNBMPMRKFREE, 4, REG_LEN_STR(gv_cur_region), DB_LEN_STR(gv_cur_region)); t_abort(gv_cur_region, cs_addrs); return ret_tn; /* actually 0 */ } # endif continue; } break; } if (0 == ret_tn) /* db format change occurred. Fall through to below for loop to visit each block */ { /* Abort any active transaction to get rid of lingering Non-TP artifacts */ t_abort(gv_cur_region, cs_addrs); break; } } } /* for all blocks in the kill_set */
void mu_swap_root(glist *gl_ptr, int *root_swap_statistic_ptr) { sgmnt_data_ptr_t csd; sgmnt_addrs *csa; node_local_ptr_t cnl; srch_hist *dir_hist_ptr, *gvt_hist_ptr; gv_namehead *save_targ; block_id root_blk_id, child_blk_id, free_blk_id; sm_uc_ptr_t root_blk_ptr, child_blk_ptr; kill_set kill_set_list; trans_num curr_tn, ret_tn; int level, root_blk_lvl; block_id save_root; boolean_t tn_aborted; unsigned int lcl_t_tries; enum cdb_sc status; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(mu_reorg_process); gv_target = gl_ptr->gvt; gv_target->root = 0; /* reset root so we recompute it in DO_OP_GVNAME below */ gv_target->clue.end = 0; /* reset clue since reorg action on later globals might have invalidated it */ reorg_gv_target->gvname.var_name = gv_target->gvname.var_name; /* needed by SAVE_ROOTSRCH_ENTRY_STATE */ dir_hist_ptr = gv_target->alt_hist; gvt_hist_ptr = &(gv_target->hist); inctn_opcode = inctn_invalid_op; DO_OP_GVNAME(gl_ptr); /* sets gv_target/gv_currkey/gv_cur_region/cs_addrs/cs_data to correspond to <globalname,reg> in gl_ptr */ csa = cs_addrs; cnl = csa->nl; csd = cs_data; /* Be careful to keep csd up to date. With MM, cs_data can change, and * dereferencing an older copy can result in a SIG-11. */ if (gv_cur_region->read_only) return; /* Cannot proceed for read-only data files */ if (0 == gv_target->root) { /* Global does not exist (online rollback). No problem. */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_GBLNOEXIST, 2, GNAME(gl_ptr).len, GNAME(gl_ptr).addr); return; } if (dba_mm == csd->acc_meth) /* return for now without doing any swapping operation because later mu_truncate * is going to issue the MUTRUNCNOTBG message. */ return; SET_GV_ALTKEY_TO_GBLNAME_FROM_GV_CURRKEY; /* set up gv_altkey to be just the gblname */ /* ------------ Swap root block of global variable tree --------- */ t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { curr_tn = csa->ti->curr_tn; kill_set_list.used = 0; save_root = gv_target->root; gv_target->root = csa->dir_tree->root; gv_target->clue.end = 0; if (cdb_sc_normal != (status = gvcst_search(gv_altkey, dir_hist_ptr))) { /* Assign directory tree path to dir_hist_ptr */ assert(t_tries < CDB_STAGNATE); gv_target->root = save_root; t_retry(status); continue; } gv_target->root = save_root; gv_target->clue.end = 0; if (cdb_sc_normal != (gvcst_search(gv_currkey, NULL))) { /* Assign global variable tree path to gvt_hist_ptr */ assert(t_tries < CDB_STAGNATE); t_retry(status); continue; } /* We've already search the directory tree in op_gvname/t_retry and obtained gv_target->root. * Should restart with gvtrootmod2 if they don't agree. gvcst_root_search is the final arbiter. * Really need that for debug info and also should assert(gv_currkey is global name). */ root_blk_lvl = gvt_hist_ptr->depth; assert(root_blk_lvl > 0); root_blk_ptr = gvt_hist_ptr->h[root_blk_lvl].buffaddr; root_blk_id = gvt_hist_ptr->h[root_blk_lvl].blk_num; assert((CDB_STAGNATE > t_tries) || (gv_target->root == gvt_hist_ptr->h[root_blk_lvl].blk_num)); free_blk_id = swap_root_or_directory_block(0, root_blk_lvl, dir_hist_ptr, root_blk_id, root_blk_ptr, &kill_set_list, curr_tn); if (RETRY_SWAP == free_blk_id) continue; else if (ABORT_SWAP == free_blk_id) break; update_trans = UPDTRNS_DB_UPDATED_MASK; inctn_opcode = inctn_mu_reorg; assert(1 == kill_set_list.used); need_kip_incr = TRUE; if (!csa->now_crit) WAIT_ON_INHIBIT_KILLS(cnl, MAXWAIT2KILL); DEBUG_ONLY(lcl_t_tries = t_tries); TREF(in_mu_swap_root_state) = MUSWP_INCR_ROOT_CYCLE; assert(!TREF(in_gvcst_redo_root_search)); if ((trans_num)0 == (ret_tn = t_end(gvt_hist_ptr, dir_hist_ptr, TN_NOT_SPECIFIED))) { TREF(in_mu_swap_root_state) = MUSWP_NONE; need_kip_incr = FALSE; assert(NULL == kip_csa); ABORT_TRANS_IF_GBL_EXIST_NOMORE(lcl_t_tries, tn_aborted); if (tn_aborted) { /* It is not an error if the global (that once existed) doesn't exist anymore (due to ROLLBACK) */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_GBLNOEXIST, 2, GNAME(gl_ptr).len, GNAME(gl_ptr).addr); return; } continue; } TREF(in_mu_swap_root_state) = MUSWP_NONE; /* Note that this particular process's csa->root_search_cycle is now behind cnl->root_search_cycle. * This forces a cdb_sc_gvtrootmod2 restart in gvcst_bmp_mark_free below. */ assert(cnl->root_search_cycle > csa->root_search_cycle); gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, csa); DECR_KIP(csd, csa, kip_csa); *root_swap_statistic_ptr += 1; break; } /* ------------ Swap blocks in branch of directory tree --------- */ for (level = 0; level <= MAX_BT_DEPTH; level++) { t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { curr_tn = csa->ti->curr_tn; kill_set_list.used = 0; save_root = gv_target->root; gv_target->root = csa->dir_tree->root; gv_target->clue.end = 0; if (cdb_sc_normal != (status = gvcst_search(gv_altkey, dir_hist_ptr))) { /* assign branch path of directory tree into dir_hist_ptr */ assert(t_tries < CDB_STAGNATE); gv_target->root = save_root; t_retry(status); continue; } gv_target->root = save_root; gv_target->clue.end = 0; if (level >= dir_hist_ptr->depth) { /* done */ t_abort(gv_cur_region, csa); return; } child_blk_ptr = dir_hist_ptr->h[level].buffaddr; child_blk_id = dir_hist_ptr->h[level].blk_num; assert(csa->dir_tree->root != child_blk_id); free_blk_id = swap_root_or_directory_block(level + 1, level, dir_hist_ptr, child_blk_id, child_blk_ptr, &kill_set_list, curr_tn); if (level == 0) /* set level as 1 to mark this kill set is for level-0 block in directory tree. * The kill-set level later will be used in gvcst_bmp_markfree to assign a special value to * cw_set_element, which will be eventually used by t_end to write the block to snapshot */ kill_set_list.blk[kill_set_list.used - 1].level = 1; if (RETRY_SWAP == free_blk_id) continue; else if (ABORT_SWAP == free_blk_id) break; update_trans = UPDTRNS_DB_UPDATED_MASK; inctn_opcode = inctn_mu_reorg; assert(1 == kill_set_list.used); need_kip_incr = TRUE; if (!csa->now_crit) WAIT_ON_INHIBIT_KILLS(cnl, MAXWAIT2KILL); DEBUG_ONLY(lcl_t_tries = t_tries); TREF(in_mu_swap_root_state) = MUSWP_DIRECTORY_SWAP; if ((trans_num)0 == (ret_tn = t_end(dir_hist_ptr, NULL, TN_NOT_SPECIFIED))) { TREF(in_mu_swap_root_state) = MUSWP_NONE; need_kip_incr = FALSE; assert(NULL == kip_csa); continue; } TREF(in_mu_swap_root_state) = MUSWP_NONE; gvcst_kill_sort(&kill_set_list); TREF(in_mu_swap_root_state) = MUSWP_FREE_BLK; GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, csa); TREF(in_mu_swap_root_state) = MUSWP_NONE; DECR_KIP(csd, csa, kip_csa); break; } } return; }
/* Finds a free block and adds information to update array and cw_set */ block_id swap_root_or_directory_block(int parent_blk_lvl, int child_blk_lvl, srch_hist *dir_hist_ptr, block_id child_blk_id, sm_uc_ptr_t child_blk_ptr, kill_set *kill_set_list, trans_num curr_tn) { sgmnt_data_ptr_t csd; sgmnt_addrs *csa; node_local_ptr_t cnl; srch_blk_status bmlhist, freeblkhist; block_id hint_blk_num, free_blk_id, parent_blk_id; boolean_t free_blk_recycled; int4 master_bit, num_local_maps, free_bit, hint_bit, maxbitsthismap; uint4 total_blks; int blk_seg_cnt, blk_size; sm_uc_ptr_t parent_blk_ptr, bn_ptr, saved_blk; blk_segment *bs1, *bs_ptr; int parent_blk_size, child_blk_size, bsiz; int rec_size1, curr_offset, bpntr_end, hdr_len; int tmp_cmpc; cw_set_element *tmpcse; jnl_buffer_ptr_t jbbp; /* jbbp is non-NULL only if before-image journaling */ unsigned short temp_ushort; unsigned long temp_long; unsigned char save_cw_set_depth; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csd = cs_data; csa = cs_addrs; cnl = csa->nl; blk_size = csd->blk_size; /* Find a free/recycled block for new block location. */ hint_blk_num = 0; total_blks = csa->ti->total_blks; num_local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP); master_bit = bmm_find_free((hint_blk_num / BLKS_PER_LMAP), csa->bmm, num_local_maps); if ((NO_FREE_SPACE == master_bit)) { t_abort(gv_cur_region, csa); return ABORT_SWAP; } bmlhist.blk_num = (block_id)master_bit * BLKS_PER_LMAP; if (NULL == (bmlhist.buffaddr = t_qread(bmlhist.blk_num, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } hint_bit = 0; maxbitsthismap = (master_bit != (num_local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bmlhist.blk_num; free_bit = bm_find_blk(hint_bit, bmlhist.buffaddr + SIZEOF(blk_hdr), maxbitsthismap, &free_blk_recycled); free_blk_id = bmlhist.blk_num + free_bit; if (DIR_ROOT >= free_blk_id) { /* Bitmap block 0 and directory tree root block 1 should always be marked busy. */ assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); return RETRY_SWAP; } if (child_blk_id <= free_blk_id) { /* stop swapping root or DT blocks once the database is truncated well enough. A good heuristic for this is to check * if the block is to be swapped into a higher block number and if so do not swap */ t_abort(gv_cur_region, csa); return ABORT_SWAP; } /* ====== begin update array ====== * Four blocks get changed. * 1. Free block becomes busy and gains the contents of child (root block/directory tree block) * 2. Parent block in directory tree remains busy, but points to new root block location. * 3. Free block's corresponding bitmap reflects above change. * 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ parent_blk_ptr = dir_hist_ptr->h[parent_blk_lvl].buffaddr; /* parent_blk_lvl is 0 iff we're moving a gvt root block */ parent_blk_id = dir_hist_ptr->h[parent_blk_lvl].blk_num; CHECK_AND_RESET_UPDATE_ARRAY; if (free_blk_recycled) { /* Otherwise, it's a completely free block, in which case no need to read. */ freeblkhist.blk_num = (block_id)free_blk_id; if (NULL == (freeblkhist.buffaddr = t_qread(free_blk_id, (sm_int_ptr_t)&freeblkhist.cycle, &freeblkhist.cr))) { assert(t_tries < CDB_STAGNATE); t_retry((enum cdb_sc)rdfail_detail); return RETRY_SWAP; } } child_blk_size = ((blk_hdr_ptr_t)child_blk_ptr)->bsiz; BLK_INIT(bs_ptr, bs1); BLK_ADDR(saved_blk, child_blk_size, unsigned char); memcpy(saved_blk, child_blk_ptr, child_blk_size); BLK_SEG(bs_ptr, saved_blk + SIZEOF(blk_hdr), child_blk_size - SIZEOF(blk_hdr)); assert(blk_seg_cnt == child_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } tmpcse = &cw_set[cw_set_depth]; (free_blk_recycled) ? BIT_SET_RECYCLED_AND_CLEAR_FREE(tmpcse->blk_prior_state) : BIT_CLEAR_RECYCLED_AND_SET_FREE(tmpcse->blk_prior_state); t_create(free_blk_id, (unsigned char *)bs1, 0, 0, child_blk_lvl); tmpcse->mode = gds_t_acquired; if (!free_blk_recycled || !cs_data->db_got_to_v5_once) tmpcse->old_block = NULL; else { tmpcse->old_block = freeblkhist.buffaddr; tmpcse->cr = freeblkhist.cr; tmpcse->cycle = freeblkhist.cycle; jbbp = (JNL_ENABLED(csa) && csa->jnl_before_image) ? csa->jnl->jnl_buff : NULL; if ((NULL != jbbp) && (((blk_hdr_ptr_t)tmpcse->old_block)->tn < jbbp->epoch_tn)) { bsiz = ((blk_hdr_ptr_t)(tmpcse->old_block))->bsiz; if (bsiz > blk_size) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_lostbmlcr); return RETRY_SWAP; } JNL_GET_CHECKSUM_ACQUIRED_BLK(tmpcse, csd, csa, tmpcse->old_block, bsiz); } } /* 2. Parent block in directory tree remains busy, but points to new child block location. */ curr_offset = dir_hist_ptr->h[parent_blk_lvl].curr_rec.offset; parent_blk_size = ((blk_hdr_ptr_t)parent_blk_ptr)->bsiz; GET_RSIZ(rec_size1, (parent_blk_ptr + curr_offset)); if ((parent_blk_size < rec_size1 + curr_offset) || (BSTAR_REC_SIZE > rec_size1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } BLK_INIT(bs_ptr, bs1); if (0 == parent_blk_lvl) /* There can be collation stuff in the record value after the block pointer. See gvcst_root_search. */ hdr_len = SIZEOF(rec_hdr) + gv_altkey->end + 1 - EVAL_CMPC((rec_hdr_ptr_t)(parent_blk_ptr + curr_offset)); else hdr_len = rec_size1 - SIZEOF(block_id); bpntr_end = curr_offset + hdr_len + SIZEOF(block_id); BLK_SEG(bs_ptr, parent_blk_ptr + SIZEOF(blk_hdr), curr_offset + hdr_len - SIZEOF(blk_hdr)); BLK_ADDR(bn_ptr, SIZEOF(block_id), unsigned char); PUT_LONG(bn_ptr, free_blk_id); BLK_SEG(bs_ptr, bn_ptr, SIZEOF(block_id)); BLK_SEG(bs_ptr, parent_blk_ptr + bpntr_end, parent_blk_size - bpntr_end); assert(blk_seg_cnt == parent_blk_size); if (!BLK_FINI(bs_ptr, bs1)) { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_blkmod); return RETRY_SWAP; } t_write(&dir_hist_ptr->h[parent_blk_lvl], (unsigned char *)bs1, 0, 0, parent_blk_lvl, FALSE, TRUE, GDS_WRITE_KILLTN); /* To indicate later snapshot file writing process during fast_integ not to skip writing the block to snapshot file */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* 3. Free block's corresponding bitmap reflects above change. */ PUT_LONG(update_array_ptr, free_bit); save_cw_set_depth = cw_set_depth; /* Bit maps go on end of cw_set (more fake acquired) */ assert(!cw_map_depth); t_write_map(&bmlhist, (uchar_ptr_t)update_array_ptr, curr_tn, 1); cw_map_depth = cw_set_depth; cw_set_depth = save_cw_set_depth; update_array_ptr += SIZEOF(block_id); temp_long = 0; PUT_LONG(update_array_ptr, temp_long); update_array_ptr += SIZEOF(block_id); assert(1 == cw_set[cw_map_depth - 1].reference_cnt); /* 4. Child block gets marked recycled in bitmap. (GVCST_BMP_MARK_FREE) */ kill_set_list->blk[kill_set_list->used].flag = 0; kill_set_list->blk[kill_set_list->used].level = 0; kill_set_list->blk[kill_set_list->used++].block = child_blk_id; return free_blk_id; }
boolean_t mu_truncate(int4 truncate_percent) { sgmnt_addrs *csa; sgmnt_data_ptr_t csd; int num_local_maps; int lmap_num, lmap_blk_num; int bml_status, sigkill; int save_errno; int ftrunc_status; uint4 jnl_status; uint4 old_total, new_total; uint4 old_free, new_free; uint4 end_blocks; int4 blks_in_lmap, blk; gtm_uint64_t before_trunc_file_size; off_t trunc_file_size; off_t padding; uchar_ptr_t lmap_addr; boolean_t was_crit; uint4 found_busy_blk; srch_blk_status bmphist; srch_blk_status *blkhist; srch_hist alt_hist; trans_num curr_tn; blk_hdr_ptr_t lmap_blk_hdr; block_id *blkid_ptr; unix_db_info *udi; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; char *err_msg; intrpt_state_t prev_intrpt_state; off_t offset; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; csa = cs_addrs; csd = cs_data; if (dba_mm == csd->acc_meth) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); return TRUE; } if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } /* already checked for parallel truncates on this region --- see mupip_reorg.c */ gv_target = NULL; assert(csa->nl->trunc_pid == process_id); assert(dba_mm != csd->acc_meth); old_total = csa->ti->total_blks; old_free = csa->ti->free_blocks; sigkill = 0; found_busy_blk = 0; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ assert(csd->bplmap == BLKS_PER_LMAP); end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */ if (0 == end_blocks) end_blocks = BLKS_PER_LMAP; num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP); /* ======================================== PHASE 1 ======================================== */ for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--) { if (mu_ctrly_occurred || mu_ctrlc_occurred) return TRUE; assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */ if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); return TRUE; } lmap_blk_num = lmap_num * BLKS_PER_LMAP; if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { found_busy_blk = lmap_blk_num; break; } blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP; /* Loop through non-bitmap blocks of this lmap, do recycled2free */ DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n", lmap_num, lmap_blk_num, blks_in_lmap)); for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;) { t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) /* retry loop for recycled to free transactions */ { curr_tn = csd->trans_hist.curr_tn; /* Read the nth local bitmap into memory */ bmphist.blk_num = lmap_blk_num; bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr); lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr; if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr); /* starting from the hint (blk itself), find the first busy or recycled block */ blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status); assert(blk < BLKS_PER_LMAP); if (blk == -1 || blk >= blks_in_lmap) { /* done with this lmap, continue to next */ t_abort(gv_cur_region, csa); break; } else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num) { /* stop processing blocks... skip ahead to phase 2 */ found_busy_blk = lmap_blk_num; t_abort(gv_cur_region, csa); break; } else if (BLK_RECYCLED == bml_status) { /* Write PBLK records for recycled blocks only if before_image journaling is * enabled. t_end() takes care of checking if journaling is enabled and * writing PBLK record. We have to at least mark the recycled block as free. */ RESET_UPDATE_ARRAY; update_trans = UPDTRNS_DB_UPDATED_MASK; *((block_id *)update_array_ptr) = blk; update_array_ptr += SIZEOF(block_id); *(int *)update_array_ptr = 0; alt_hist.h[1].blk_num = 0; alt_hist.h[0].level = 0; alt_hist.h[0].cse = NULL; alt_hist.h[0].tn = curr_tn; alt_hist.h[0].blk_num = lmap_blk_num + blk; alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num, &alt_hist.h[0].cycle, &alt_hist.h[0].cr); if (!alt_hist.h[0].buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } if (!t_recycled2free(&alt_hist.h[0])) { t_retry(cdb_sc_lostbmlcr); continue; } t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0); /* Set the opcode for INCTN record written by t_end() */ inctn_opcode = inctn_blkmarkfree; if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; /* block processed, scan from the next one */ blk++; break; } else { assert(t_tries < CDB_STAGNATE); t_retry(cdb_sc_badbitmap); continue; } } /* END recycled2free retry loop */ } /* END scanning blocks of this particular lmap */ /* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */ /* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */ DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num)); t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK); for (;;) { RESET_UPDATE_ARRAY; BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; update_trans = UPDTRNS_DB_UPDATED_MASK; inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */ curr_tn = csd->trans_hist.curr_tn; blkhist = &alt_hist.h[0]; blkhist->blk_num = lmap_blk_num; blkhist->tn = curr_tn; blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ /* Read the nth local bitmap into memory */ blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr; if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz)) { /* Could not read the block successfully. Retry. */ t_retry((enum cdb_sc)rdfail_detail); continue; } t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); blkhist->blk_num = 0; /* create empty history for bitmap block */ if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) continue; break; } } /* END scanning lmaps */ /* ======================================== PHASE 2 ======================================== */ assert(!csa->now_crit); for (;;) { /* wait for FREEZE, we don't want to truncate a frozen database */ grab_crit(gv_cur_region); if (FROZEN_CHILLED(cs_data)) DO_CHILLED_AUTORELEASE(csa, cs_data); if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN) break; rel_crit(gv_cur_region); while (FROZEN(cs_data) || IS_REPL_INST_FROZEN) { hiber_start(1000); if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data)) break; } } assert(csa->nl->trunc_pid == process_id); /* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */ if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB)) { assert(FALSE); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"), DB_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return FALSE; } csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk); assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk)); new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP); if (mu_ctrly_occurred || mu_ctrlc_occurred) { rel_crit(gv_cur_region); return TRUE; } else if (csa->ti->total_blks != old_total || new_total == old_total) { assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent); rel_crit(gv_cur_region); return TRUE; } else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (SNAPSHOTS_IN_PROG(csa->nl)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region)); rel_crit(gv_cur_region); return TRUE; } DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); if (JNL_ENABLED(csa)) { /* Write JRT_TRUNC and INCTN records */ if (!jgbl.dont_reset_gbl_jrec_time) SET_GBL_JREC_TIME; /* needed before jnl_ensure_open as that can write jnl records */ jpc = csa->jnl; jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(gv_cur_region, csa); if (SS_NORMAL != jnl_status) send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); else { if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total); inctn_opcode = inctn_mu_reorg; jnl_write_inctn_rec(csa); jnl_status = jnl_flush(gv_cur_region); if (SS_NORMAL != jnl_status) { send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd), ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"), jnl_status); assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */ } } } /* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */ curr_tn = csa->ti->curr_tn; CHECK_TN(csa, csd, curr_tn); udi = FILE_INFO(gv_cur_region); /* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */ trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1)); csd->after_trunc_total_blks = new_total; csd->before_trunc_free_blocks = csa->ti->free_blocks; csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */ /* file size and total blocks: INCONSISTENT */ csa->ti->total_blks = new_total; /* past the point of no return -- shared memory intact */ assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total)); csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total); new_free = csa->ti->free_blocks; KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */ fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); CHECK_DBSYNC(gv_cur_region, save_errno); /* past the point of no return -- shared memory deleted */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */ clear_cache_array(csa, csd, gv_cur_region, new_total, old_total); offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size; save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff))); if (0 != save_errno) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); return FALSE; } KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */ /* Execute an ftruncate() and truncate the DB file * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS) * It ignores kill -9 signal till its operation is completed. * So we can safely assume that the result of ftruncate() will be complete. */ FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status); if (0 != ftrunc_status) { err_msg = (char *)STRERROR(errno); rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg)); /* should go through recover_truncate now, which will again try to FTRUNCATE */ return FALSE; } /* file size and total blocks: CONSISTENT (shrunk) */ KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */ csa->nl->root_search_cycle++; /* Force concurrent processes to restart in t_end/tp_tend to make sure no one * tries to commit updates past the end of the file. Bitmap validations together * with highest_lbm_with_busy_blk should actually be sufficient, so this is * just to be safe. */ csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */ /* Increment TN */ assert(csa->ti->early_tn == csa->ti->curr_tn); csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1; INCREMENT_CURR_TN(csd); fileheader_sync(gv_cur_region); DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno); KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */ CHECK_DBSYNC(gv_cur_region, save_errno); ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state); curr_tn = csa->ti->curr_tn; rel_crit(gv_cur_region); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn); util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].", FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free); return TRUE; } /* END of mu_truncate() */
void dse_chng_bhead(void) { blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; block_id blk; boolean_t chng_blk, ismap, was_hold_onto_crit; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ int4 x; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; srch_blk_status blkhist; trans_num tn; uint4 mapsize; csa = cs_addrs; if (gv_cur_region->read_only) rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ chng_blk = FALSE; if (BADDSEBLK == (blk = dse_getblk("BLOCK", DSEBMLOK, DSEBLKCUR))) /* WARNING: assignment */ return; csd = csa->hdr; assert(csd == cs_data); blk_size = csd->blk_size; ismap = IS_BITMAP_BLK(blk); mapsize = BM_SIZE(csd->bplmap); t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr; if (CLI_PRESENT == cli_present("LEVEL")) { if (!cli_get_hex("LEVEL", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.", TRUE); t_abort(gv_cur_region, csa); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.", TRUE); t_abort(gv_cur_region, csa); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (CLI_PRESENT == cli_present("BSIZ")) { if (!cli_get_hex("BSIZ", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } else if (x < SIZEOF(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, csa); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_AIMGBLKFAIL, 3, blk, DB_LEN_STR(gv_cur_region)); t_abort(gv_cur_region, csa); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } if (CLI_PRESENT == cli_present("TN")) { if (!cli_get_hex64("TN", &tn)) return; t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(csa->ti->early_tn == csa->ti->curr_tn); if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); t_abort(gv_cur_region, csa); return; } if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); /* Pass the desired tn as argument to bg_update/mm_update below */ BUILD_AIMG_IF_JNL_ENABLED_AND_T_END_WITH_EFFECTIVE_TN(csa, csd, tn, &dummy_hist); } return; }
void preemptive_db_clnup(int preemptive_severe) { sgmnt_addrs *csa; sgm_info *si; gd_region *r_top, *reg; gd_addr *addr_ptr; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* Clear "inctn_opcode" global variable now that any in-progress transaction is aborted at this point. * Not doing so would cause future calls to "t_end" to get confused and skip writing logical jnl recs * and instead incorrectly write an INCTN record (GTM-8425). */ if (bml_save_dollar_tlevel) { assert(!dollar_tlevel); dollar_tlevel = bml_save_dollar_tlevel; bml_save_dollar_tlevel = 0; } assert(!dollar_tlevel || (inctn_invalid_op == inctn_opcode) || (inctn_bmp_mark_free_gtm == inctn_opcode)); assert(dollar_tlevel || update_trans || (inctn_invalid_op == inctn_opcode)); inctn_opcode = inctn_invalid_op; if (!dollar_tlevel && update_trans) { /* It's possible we hit an error in the middle of an update, at which point we have * a valid clue and non-NULL cse. However, this causes problems for subsequent * transactions (see comment in t_begin). In particular we could end up pinning buffers * unnecessarily. So clear the cse of any histories that may have been active during the update. */ CLEAR_CSE(gv_target); if ((NULL != gv_target) && (NULL != gv_target->gd_csa)) { CLEAR_CSE(gv_target->gd_csa->dir_tree); GTMTRIG_ONLY(CLEAR_CSE(gv_target->gd_csa->hasht_tree)); } /* Resetting this is necessary to avoid blowing an assert in t_begin that it is 0 at the start of a transaction. */ update_trans = 0; } if (INVALID_GV_TARGET != reset_gv_target) { if (SUCCESS != preemptive_severe && INFO != preemptive_severe) { /* We know of a few cases in Unix where gv_target and gv_currkey could be out of sync at this point. * a) If we are inside trigger code which in turn does an update that does * reads of ^#t global and ends up in a restart. This restart would * in turn do a rts_error_csa(TPRETRY) which would invoke mdb_condition_handler * that would in turn invoke preemptive_db_clnup which invokes this macro. * In this tp restart case though, it is ok for gv_target and gv_currkey * to be out of sync because they are going to be reset by tp_clean_up anyways. * So skip the dbg-only in-sync check. * b) If we are in gvtr_init reading the ^#t global and detect an error (e.g. TRIGINVCHSET) * gv_target after the reset would be pointing to a regular global whereas gv_currkey * would be pointing to ^#t. It is ok to be out-of-sync since in this case, we expect * mdb_condition_handler to be calling us. That has code to reset gv_currkey (and * cs_addrs/cs_data etc.) to reflect gv_target (i.e. get them back in sync). * Therefore in Unix we pass SKIP_GVT_GVKEY_CHECK to skip the gvtarget/gvcurrkey out-of-sync check * in RESET_GV_TARGET. In VMS we pass DO_GVT_GVKEY_CHECK as we dont yet know of an out-of-sync situation. */ RESET_GV_TARGET(UNIX_ONLY(SKIP_GVT_GVKEY_CHECK) VMS_ONLY(DO_GVT_GVKEY_CHECK)); } } need_kip_incr = FALSE; /* in case we got an error in t_end (e.g. GBLOFLOW), dont want this global variable to get * carried over to the next non-TP transaction that this process does (e.g. inside an error trap). */ TREF(expand_prev_key) = FALSE; /* reset global (in case it is TRUE) so it does not get carried over to future operations */ if (dollar_tlevel) { for (si = first_sgm_info; si != NULL; si = si->next_sgm_info) { if (NULL != si->kip_csa) { csa = si->tp_csa; assert(si->tp_csa == si->kip_csa); PROBE_DECR_KIP(csa->hdr, csa, si->kip_csa); } } } else if (NULL != kip_csa && (NULL != kip_csa->hdr) && (NULL != kip_csa->nl)) PROBE_DECR_KIP(kip_csa->hdr, kip_csa, kip_csa); if (IS_DSE_IMAGE) { /* Release crit on any region that was obtained for the current erroring DSE operation. * Take care NOT to release crits obtained by a previous CRIT -SEIZE command. */ for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (reg = addr_ptr->regions, r_top = reg + addr_ptr->n_regions; reg < r_top; reg++) { if (reg->open && !reg->was_open) { csa = &FILE_INFO(reg)->s_addrs; assert(csa->hold_onto_crit || !csa->dse_crit_seize_done); assert(!csa->hold_onto_crit || csa->now_crit); if (csa->now_crit && (!csa->hold_onto_crit || !csa->dse_crit_seize_done)) { rel_crit(reg); csa->hold_onto_crit = FALSE; t_abort(reg, csa); /* cancel mini-transaction if any in progress */ } } } } } }
void dse_chng_rhead(void) { block_id blk; sm_uc_ptr_t bp, b_top, cp, rp; boolean_t chng_rec; rec_hdr new_rec; uint4 x; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; srch_blk_status blkhist; error_def(ERR_DBRDONLY); error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ if (cli_present("BLOCK") == CLI_PRESENT) { if(!cli_get_hex("BLOCK", (uint4 *)&blk)) return; patch_curr_blk = blk; } if (patch_curr_blk < 0 || patch_curr_blk >= cs_addrs->ti->total_blks || !(patch_curr_blk % cs_addrs->hdr->bplmap)) { util_out_print("Error: invalid block number.", TRUE); return; } t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = patch_curr_blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); bp = blkhist.buffaddr; blk_size = cs_addrs->hdr->blk_size; chng_rec = FALSE; b_top = bp + ((blk_hdr_ptr_t)bp)->bsiz; if (((blk_hdr_ptr_t)bp)->bsiz > blk_size || ((blk_hdr_ptr_t)bp)->bsiz < SIZEOF(blk_hdr)) chng_rec = TRUE; /* force rewrite to correct size */ if (cli_present("RECORD") == CLI_PRESENT) { if (!(rp = skan_rnum(bp, FALSE))) { t_abort(gv_cur_region, cs_addrs); return; } } else if (!(rp = skan_offset(bp, FALSE))) { t_abort(gv_cur_region, cs_addrs); return; } GET_SHORT(new_rec.rsiz, &((rec_hdr_ptr_t)rp)->rsiz); new_rec.cmpc = ((rec_hdr_ptr_t)rp)->cmpc; if (cli_present("CMPC") == CLI_PRESENT) { if (!cli_get_hex("CMPC", &x)) { t_abort(gv_cur_region, cs_addrs); return; } if (x > 0x7f) { util_out_print("Error: invalid cmpc.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } if (x > patch_comp_count) util_out_print("Warning: specified compression count is larger than the current expanded key size.", TRUE); new_rec.cmpc = x; chng_rec = TRUE; } if (cli_present("RSIZ") == CLI_PRESENT) { if (!cli_get_hex("RSIZ", &x)) { t_abort(gv_cur_region, cs_addrs); return; } if (x < SIZEOF(rec_hdr) || x > blk_size) { util_out_print("Error: invalid rsiz.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } new_rec.rsiz = x; chng_rec = TRUE; } if (chng_rec) { BLK_INIT(bs_ptr, bs1); cp = bp; cp += SIZEOF(blk_hdr); if (chng_rec) { BLK_SEG(bs_ptr, cp, rp - cp); BLK_SEG(bs_ptr, (uchar_ptr_t)&new_rec, SIZEOF(rec_hdr)); cp = rp + SIZEOF(rec_hdr); } if (b_top - cp) BLK_SEG(bs_ptr, cp, b_top - cp); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad blk build.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)bp)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(cs_data, non_tp_jfb_buff_ptr, cs_addrs->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } return; }
void dse_chng_bhead(void) { block_id blk; int4 x; trans_num tn; cache_rec_ptr_t cr; blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ boolean_t ismap; boolean_t chng_blk; boolean_t was_crit; boolean_t was_hold_onto_crit; uint4 mapsize; srch_blk_status blkhist; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; # ifdef GTM_CRYPT int req_enc_blk_size; int crypt_status; blk_hdr_ptr_t bp, save_bp, save_old_block; # endif error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ chng_blk = FALSE; csa = cs_addrs; if (cli_present("BLOCK") == CLI_PRESENT) { if (!cli_get_hex("BLOCK", (uint4 *)&blk)) return; if (blk < 0 || blk > csa->ti->total_blks) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } csd = csa->hdr; assert(csd == cs_data); blk_size = csd->blk_size; ismap = (patch_curr_blk / csd->bplmap * csd->bplmap == patch_curr_blk); mapsize = BM_SIZE(csd->bplmap); t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = patch_curr_blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr; if (cli_present("LEVEL") == CLI_PRESENT) { if (!cli_get_hex("LEVEL", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.", TRUE); t_abort(gv_cur_region, csa); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.", TRUE); t_abort(gv_cur_region, csa); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (cli_present("BSIZ") == CLI_PRESENT) { if (!cli_get_hex("BSIZ", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } else if (x < SIZEOF(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, csa); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad block build.", TRUE); t_abort(gv_cur_region, csa); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, csa->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } if (cli_present("TN") == CLI_PRESENT) { if (!cli_get_hex64("TN", &tn)) return; was_crit = csa->now_crit; t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(csa->ti->early_tn == csa->ti->curr_tn); if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) { util_out_print("Error: Unable to read buffer.", TRUE); t_abort(gv_cur_region, csa); return; } if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); /* Pass the desired tn as argument to bg_update/mm_update below */ BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, tn); was_hold_onto_crit = csa->hold_onto_crit; csa->hold_onto_crit = TRUE; t_end(&dummy_hist, NULL, tn); # ifdef GTM_CRYPT if (csd->is_encrypted && (tn < csa->ti->curr_tn)) { /* BG and db encryption is enabled and the DSE update caused the block-header to potentially have a tn * that is LESS than what it had before. At this point, the global buffer (corresponding to blkhist.blk_num) * reflects the contents of the block AFTER the dse update (bg_update would have touched this) whereas * the corresponding encryption global buffer reflects the contents of the block BEFORE the update. * Normally wcs_wtstart takes care of propagating the tn update from the regular global buffer to the * corresponding encryption buffer. But if before it gets a chance, let us say a process goes to t_end * as part of a subsequent transaction and updates this same block. Since the blk-hdr-tn potentially * decreased, it is possible that the PBLK writing check (comparing blk-hdr-tn with the epoch_tn) decides * to write a PBLK for this block (even though a PBLK was already written for this block as part of a * previous DSE CHANGE -BL -TN in the same epoch). In this case, since the db is encrypted, the logic * will assume there were no updates to this block since the last time wcs_wtstart updated the encryption * buffer and therefore use that to write the pblk, which is incorrect since it does not yet contain the * tn update. The consequence of this is would be writing an older before-image PBLK) record to the * journal file. To prevent this situation, we update the encryption buffer here (before releasing crit) * using logic like that in wcs_wtstart to ensure it is in sync with the regular global buffer. * Note: * Although we use cw_set[0] to access the global buffer corresponding to the block number being updated, * cw_set_depth at this point is 0 because t_end resets it. This is considered safe since cw_set is a * static array (as opposed to malloc'ed memory) and hence is always available and valid until it gets * overwritten by subsequent updates. */ bp = (blk_hdr_ptr_t)GDS_ANY_REL2ABS(csa, cw_set[0].cr->buffaddr); DBG_ENSURE_PTR_IS_VALID_GLOBUFF(csa, csd, (sm_uc_ptr_t)bp); save_bp = (blk_hdr_ptr_t)GDS_ANY_ENCRYPTGLOBUF(bp, csa); DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(csa, csd, (sm_uc_ptr_t)save_bp); assert((bp->bsiz <= csd->blk_size) && (bp->bsiz >= SIZEOF(*bp))); req_enc_blk_size = MIN(csd->blk_size, bp->bsiz) - SIZEOF(*bp); if (BLK_NEEDS_ENCRYPTION(bp->levl, req_enc_blk_size)) { ASSERT_ENCRYPTION_INITIALIZED; memcpy(save_bp, bp, SIZEOF(blk_hdr)); GTMCRYPT_ENCODE_FAST(csa->encr_key_handle, (char *)(bp + 1), req_enc_blk_size, (char *)(save_bp + 1), crypt_status); if (0 != crypt_status) GC_GTM_PUTMSG(crypt_status, gv_cur_region->dyn.addr->fname); } else memcpy(save_bp, bp, bp->bsiz); } # endif if (!was_hold_onto_crit) csa->hold_onto_crit = FALSE; if (!was_crit) rel_crit(gv_cur_region); if (unhandled_stale_timer_pop) process_deferred_stale(); } return; }
void dse_rmrec(void) { block_id blk; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size, count; sm_uc_ptr_t bp; uchar_ptr_t lbp, b_top, rp, r_top, key_top, rp_base; char cc, comp_key[256], cc_base; short int size, i, rsize; cw_set_element *cse; error_def(ERR_DSEFAIL); error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); assert(update_array); /* reset new block mechanism */ update_array_ptr = update_array; if (cli_present("BLOCK") == CLI_PRESENT) { if(!cli_get_hex("BLOCK", &blk)) return; if (blk < 0 || blk >= cs_addrs->ti->total_blks || !(blk % cs_addrs->hdr->bplmap)) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } if (cli_present("COUNT") == CLI_PRESENT) { if (!cli_get_hex("COUNT", &count) || count < 1) return; } else count = 1; t_begin_crit(ERR_DSEFAIL); blk_size = cs_addrs->hdr->blk_size; if(!(bp = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); lbp = (uchar_ptr_t)malloc(blk_size); memcpy(lbp, bp, blk_size); if (((blk_hdr_ptr_t)lbp)->bsiz > cs_addrs->hdr->blk_size) b_top = lbp + cs_addrs->hdr->blk_size; else if (((blk_hdr_ptr_t)lbp)->bsiz < sizeof(blk_hdr)) b_top = lbp + sizeof(blk_hdr); else b_top = lbp + ((blk_hdr_ptr_t)lbp)->bsiz; if (cli_present("RECORD") == CLI_PRESENT) { if (!(rp = rp_base = skan_rnum(lbp, FALSE))) { free(lbp); t_abort(gv_cur_region, cs_addrs); return; } } else if (!(rp = rp_base = skan_offset(lbp, FALSE))) { free(lbp); t_abort(gv_cur_region, cs_addrs); return; } memcpy(&comp_key[0], &patch_comp_key[0], sizeof(patch_comp_key)); cc_base = patch_comp_count; for ( ; ; ) { GET_SHORT(rsize, &((rec_hdr_ptr_t)rp)->rsiz); if (rsize < sizeof(rec_hdr)) r_top = rp + sizeof(rec_hdr); else r_top = rp + rsize; if (r_top >= b_top) { if (count) { if (((blk_hdr_ptr_t) lbp)->levl) util_out_print("Warning: removed a star record from the end of this block.", TRUE); ((blk_hdr_ptr_t)lbp)->bsiz = rp_base - lbp; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (uchar_ptr_t)lbp + sizeof(blk_hdr), (int)((blk_hdr_ptr_t)lbp)->bsiz - sizeof(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad blk build.",TRUE); free(lbp); t_abort(gv_cur_region, cs_addrs); return; } t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, ((blk_hdr_ptr_t)lbp)->levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); free(lbp); return; } r_top = b_top; } if (((blk_hdr_ptr_t)lbp)->levl) key_top = r_top - sizeof(block_id); else { for (key_top = rp + sizeof(rec_hdr); key_top < r_top; ) if (!*key_top++ && !*key_top++) break; } if (((rec_hdr_ptr_t)rp)->cmpc > patch_comp_count) cc = patch_comp_count; else cc = ((rec_hdr_ptr_t)rp)->cmpc; size = key_top - rp - sizeof(rec_hdr); if (size < 0) size = 0; else if (size > sizeof(patch_comp_key) - 2) size = sizeof(patch_comp_key) - 2; memcpy(&patch_comp_key[cc], rp + sizeof(rec_hdr), size); patch_comp_count = cc + size; if (--count >= 0) { rp = r_top; continue; } size = (patch_comp_count < cc_base) ? patch_comp_count : cc_base; for (i = 0; i < size && patch_comp_key[i] == comp_key[i]; i++) ; ((rec_hdr_ptr_t)rp_base)->cmpc = i; rsize = r_top - key_top + sizeof(rec_hdr) + patch_comp_count - i; PUT_SHORT(&((rec_hdr_ptr_t)rp_base)->rsiz, rsize); memcpy(rp_base + sizeof(rec_hdr), &patch_comp_key[i], patch_comp_count - i); memcpy(rp_base + sizeof(rec_hdr) + patch_comp_count - i, key_top, b_top - key_top); ((blk_hdr_ptr_t)lbp)->bsiz = rp_base + rsize - lbp + b_top - r_top; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, (uchar_ptr_t)lbp + sizeof(blk_hdr), ((blk_hdr_ptr_t)lbp)->bsiz - sizeof(blk_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad blk build.", TRUE); free(lbp); t_abort(gv_cur_region, cs_addrs); return; } t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, ((blk_hdr_ptr_t)lbp)->levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); free(lbp); return; } }
void dse_chng_bhead(void) { block_id blk; block_id *blkid_ptr; sgm_info *dummysi = NULL; int4 x; cache_rec_ptr_t cr; uchar_ptr_t bp; sm_uc_ptr_t blkBase; blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; cw_set_element *cse; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ bool ismap; bool chng_blk; uint4 mapsize; uint4 jnl_status; error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); assert(update_array); /* reset new block mechanism */ update_array_ptr = update_array; chng_blk = FALSE; if (cli_present("BLOCK") == CLI_PRESENT) { if (!cli_get_hex("BLOCK",&blk)) return; if (blk < 0 || blk > cs_addrs->ti->total_blks) { util_out_print("Error: invalid block number.",TRUE); return; } patch_curr_blk = blk; } blk_size = cs_addrs->hdr->blk_size; ismap = (patch_curr_blk / cs_addrs->hdr->bplmap * cs_addrs->hdr->bplmap == patch_curr_blk); mapsize = BM_SIZE(cs_addrs->hdr->bplmap); t_begin_crit (ERR_DSEFAIL); if (!(bp = t_qread (patch_curr_blk,&dummy_hist.h[0].cycle,&dummy_hist.h[0].cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)bp; if (cli_present("LEVEL") == CLI_PRESENT) { if (!cli_get_num("LEVEL",&x)) { t_abort(gv_cur_region, cs_addrs); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < sizeof(blk_hdr)) new_hdr.bsiz = sizeof(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (cli_present("BSIZ") == CLI_PRESENT) { if (!cli_get_hex("BSIZ",&x)) { t_abort(gv_cur_region, cs_addrs); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } else if (x < sizeof(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, cs_addrs); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad block build.",TRUE); t_abort(gv_cur_region, cs_addrs); return; } t_write (patch_curr_blk, (unsigned char *)bs1, 0, 0, bp, new_hdr.levl, TRUE, FALSE); BUILD_AIMG_IF_JNL_ENABLED(cs_addrs, cs_data, non_tp_jfb_buff_ptr, cse); t_end(&dummy_hist, 0); } if (cli_present("TN") == CLI_PRESENT) { if (!cli_get_hex("TN",&x)) return; t_begin_crit(ERR_DSEFAIL); assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn); cs_addrs->ti->early_tn++; blkBase = t_qread(patch_curr_blk, &dummy_hist.h[0].cycle, &dummy_hist.h[0].cr); if (NULL == blkBase) { rel_crit(gv_cur_region); util_out_print("Error: Unable to read buffer.", TRUE); t_abort(gv_cur_region, cs_addrs); return; } /* Create a null update array for a block */ if (ismap) { BLK_ADDR(blkid_ptr, sizeof(block_id), block_id); *blkid_ptr = 0; t_write_map(patch_curr_blk, blkBase, (unsigned char *)blkid_ptr, cs_addrs->ti->curr_tn); cr_array_index = 0; block_saved = FALSE; } else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, bp + sizeof(new_hdr), new_hdr.bsiz - sizeof(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(patch_curr_blk, (unsigned char *)bs1, 0, 0, blkBase, ((blk_hdr_ptr_t)blkBase)->levl, TRUE, FALSE); cr_array_index = 0; block_saved = FALSE; if (JNL_ENABLED(cs_data)) { JNL_SHORT_TIME(jgbl.gbl_jrec_time); /* needed for jnl_put_jrt_pini() and jnl_write_aimg_rec() */ jnl_status = jnl_ensure_open(); if (0 == jnl_status) { cse = (cw_set_element *)(&cw_set[0]); cse->new_buff = non_tp_jfb_buff_ptr; gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, x); cse->done = TRUE; if (0 == cs_addrs->jnl->pini_addr) jnl_put_jrt_pini(cs_addrs); jnl_write_aimg_rec(cs_addrs, cse->blk, (blk_hdr_ptr_t)cse->new_buff); } else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); } } /* Pass the desired tn "x" as argument to bg_update or mm_update */ if (dba_bg == cs_addrs->hdr->acc_meth) bg_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi); else mm_update(cw_set, cw_set + cw_set_depth, cs_addrs->ti->curr_tn, x, dummysi); cs_addrs->ti->curr_tn++; assert(cs_addrs->ti->early_tn == cs_addrs->ti->curr_tn); /* the following code is analogous to that in t_end and should be maintained in a similar fashion */ while (cr_array_index) cr_array[--cr_array_index]->in_cw_set = FALSE; rel_crit(gv_cur_region); if (block_saved) backup_buffer_flush(gv_cur_region); UNIX_ONLY( if (unhandled_stale_timer_pop) process_deferred_stale(); ) wcs_timer_start(gv_cur_region, TRUE); }
/**************************************************************** Input Parameter: gn = Global name exclude_glist_ptr = list of globals in EXCLUDE option index_fill_factor = index blocks' fill factor data_fill_factor = data blocks' fill factor Input/Output Parameter: resume = resume flag reorg_op = What operations to do (coalesce or, swap or, split) [Default is all] [Only for debugging] ****************************************************************/ boolean_t mu_reorg(mval *gn, glist *exclude_glist_ptr, boolean_t *resume, int index_fill_factor, int data_fill_factor, int reorg_op) { boolean_t end_of_tree = FALSE, complete_merge, detailed_log; int rec_size; /* * * "level" is the level of the working block. * "pre_order_successor_level" is pre_order successor level except in the case * where we are in a left-most descent of the tree * in which case pre_order_successor_level will be the maximum height of that subtree * until we reach the leaf level block . * In other words, pre_order_successor_level and level variable controls the iterative pre-order traversal. * We start reorg from the (root_level - 1) to 0. That is, level = pre_order_successor_level:-1:0. */ int pre_order_successor_level, level; static block_id dest_blk_id = 0; int tkeysize; int blks_killed, blks_processed, blks_reused, blks_coalesced, blks_split, blks_swapped, count, file_extended, lvls_reduced; int d_max_fill, i_max_fill, blk_size, cur_blk_size, max_fill, toler, d_toler, i_toler; int cnt1, cnt2; kill_set kill_set_list; sm_uc_ptr_t rPtr1; enum cdb_sc status; srch_hist *rtsib_hist; jnl_buffer_ptr_t jbp; trans_num ret_tn; error_def(ERR_MUREORGFAIL); error_def(ERR_DBRDONLY); error_def(ERR_GBLNOEXIST); error_def(ERR_MAXBTLEVEL); t_err = ERR_MUREORGFAIL; kill_set_tail = &kill_set_list; /* Initialization for current global */ op_gvname(VARLSTCNT(1) gn); /* Cannot proceed for read-only data files */ if (gv_cur_region->read_only) { gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); return FALSE; } dest_blk_id = cs_addrs->reorg_last_dest; inctn_opcode = inctn_mu_reorg; /* If resume option is present, then reorg_restart_key should be not null. * Skip all globals until we are in the region for that global. * Get the reorg_restart_key and reorg_restart_block from database header and restart from there. */ if (*resume && 0 != cs_data->reorg_restart_key[0]) { /* resume from last key reorged in GVT */ GET_KEY_LEN(tkeysize, &cs_data->reorg_restart_key[0]); memcpy(gv_currkey->base, cs_data->reorg_restart_key, tkeysize); gv_currkey->end = tkeysize - 1; dest_blk_id = cs_data->reorg_restart_block; if (0 == memcmp(cs_data->reorg_restart_key, gn->str.addr, gn->str.len)) /* Going to resume from current global, so it resumed and make it false */ *resume = FALSE; } else { /* start from the left most leaf */ memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len); gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0; gv_currkey->end = gn->str.len + 1; } if (*resume) { util_out_print("REORG cannot be resumed from this point, Skipping this global...", FLUSH); memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len); gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0; gv_currkey->end = gn->str.len + 1; return TRUE; } memcpy(&gv_currkey_next_reorg->base[0], &gv_currkey->base[0], gv_currkey->end + 1); gv_currkey_next_reorg->end = gv_currkey->end; if (2 > dest_blk_id) dest_blk_id = 2; /* we know that first block is bitmap and next one is directory tree root */ file_extended = cs_data->trans_hist.total_blks; blk_size = cs_data->blk_size; d_max_fill = (double)data_fill_factor * blk_size / 100.0 - cs_data->reserved_bytes; i_max_fill = (double)index_fill_factor * blk_size / 100.0 - cs_data->reserved_bytes; d_toler = (double) DATA_FILL_TOLERANCE * blk_size / 100.0; i_toler = (double) INDEX_FILL_TOLERANCE * blk_size / 100.0; blks_killed = blks_processed = blks_reused = lvls_reduced = blks_coalesced = blks_split = blks_swapped = 0; pre_order_successor_level = level = MAX_BT_DEPTH + 1; /* Just some high value to initialize */ /* --- more detailed debugging information --- */ if (detailed_log = reorg_op & DETAIL) util_out_print("STARTING to work on global ^!AD from region !AD", TRUE, gn->str.len, gn->str.addr, REG_LEN_STR(gv_cur_region)); /* In each iteration of MAIN loop, a working block is processed for a GVT */ for (; ;) /* ================ START MAIN LOOP ================ */ { /* If right sibling is completely merged with the working block, do not swap the working block * with its final destination block. Continue trying next right sibling. Swap only at the end. */ complete_merge = TRUE; while(complete_merge) /* === START WHILE COMPLETE_MERGE === */ { if (mu_ctrlc_occurred || mu_ctrly_occurred) { cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1); return FALSE; } complete_merge = FALSE; blks_processed++; t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); /* Folllowing for loop is to handle concurrency retry for split/coalesce */ for (; ;) /* === SPLIT-COALESCE LOOP STARTS === */ { gv_target->clue.end = 0; /* search gv_currkey and get the result in gv_target */ if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match) { if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz && 1 == gv_target->hist.depth) { if (cs_addrs->now_crit) { t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; /* It is not an error that global was killed */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } } if (gv_target->hist.depth <= level) { /* Will come here * 1) first iteration of the for loop (since level == MAX_BT_DEPTH + 1) or, * 2) tree depth decreased for mu_reduce_level or, M-kill */ pre_order_successor_level = gv_target->hist.depth - 1; if (MAX_BT_DEPTH + 1 != level) { /* break the loop when tree depth decreased (case 2) */ level = pre_order_successor_level; break; } level = pre_order_successor_level; } max_fill = (0 == level)? d_max_fill : i_max_fill; toler = (0 == level)? d_toler:i_toler; cur_blk_size = ((blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr))->bsiz; if (cur_blk_size > max_fill + toler && 0 == (reorg_op & NOSPLIT)) /* SPLIT BLOCK */ { cnt1 = cnt2 = 0; /* history of current working block is in gv_target */ status = mu_split(level, i_max_fill, d_max_fill, &cnt1, &cnt2); if (cdb_sc_maxlvl == status) { gtm_putmsg(VARLSTCNT(4) ERR_MAXBTLEVEL, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return FALSE; } else if (cdb_sc_normal == status) { if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; continue; } if (detailed_log) log_detailed_log("SPL", &(gv_target->hist), NULL, level, NULL, ret_tn); blks_reused += cnt1; lvls_reduced -= cnt2; blks_split++; break; } else if (cdb_sc_oprnotneeded == status) { /* undo any update_array/cw_set changes and DROP THRU to mu_clsce */ cw_set_depth = 0; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ assert(0 == cw_map_depth); /* mu_swap_blk (that changes cw_map_depth) comes later */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } /* end if SPLIT BLOCK */ /* We are here because, mu_split() was not called or, split was not done or, not required */ rtsib_hist = gv_target->alt_hist; status = gvcst_rtsib(rtsib_hist, level); if (cdb_sc_normal != status && cdb_sc_endtree != status) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } if (cdb_sc_endtree == status) { if (0 == level) end_of_tree = TRUE; break; } else if (0 == level) pre_order_successor_level = rtsib_hist->depth - 1; /* COALESCE WITH RTSIB */ kill_set_list.used = 0; if (cur_blk_size < max_fill - toler && 0 == (reorg_op & NOCOALESCE)) { /* histories are sent in &gv_target->hist and gv_target->alt_hist */ status = mu_clsce(level, i_max_fill, d_max_fill, &kill_set_list, &complete_merge); if (cdb_sc_normal == status) { if (level) /* delete lower elements of array, t_end might confuse */ { memmove(&rtsib_hist->h[0], &rtsib_hist->h[level], SIZEOF(srch_blk_status)*(rtsib_hist->depth - level + 2)); rtsib_hist->depth = rtsib_hist->depth - level; } if (0 < kill_set_list.used) /* increase kill_in_prog */ { need_kip_incr = TRUE; if (!cs_addrs->now_crit) /* Do not sleep while holding crit */ WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL); } if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), rtsib_hist, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); if (level) { /* reinitialize level member in rtsib_hist srch_blk_status' */ for (count = 0; count < MAX_BT_DEPTH; count++) rtsib_hist->h[count].level = count; } continue; } if (level) { /* reinitialize level member in rtsib_hist srch_blk_status' */ for (count = 0; count < MAX_BT_DEPTH; count++) rtsib_hist->h[count].level = count; } if (detailed_log) log_detailed_log("CLS", &(gv_target->hist), rtsib_hist, level, NULL, ret_tn); assert(0 < kill_set_list.used || (NULL == kip_csa)); if (0 < kill_set_list.used) /* decrease kill_in_prog */ { gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs) DECR_KIP(cs_data, cs_addrs, kip_csa); if (detailed_log) log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn); blks_killed += kill_set_list.used; } blks_coalesced++; break; } else if (cdb_sc_oprnotneeded == status) { /* undo any update_array/cw_set changes and DROP THRU to t_end */ cw_set_depth = 0; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ assert(0 == cw_map_depth); /* mu_swap_blk (that changes cw_map_depth) comes later */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } /* end if try coalesce */ if (0 == level) { /* Note: In data block level: * if split is successful or, * if coalesce is successful without a complete merge of rtsib, * then gv_currkey_next_reorg is already set from the called function. * if split or, coalesce do a retry or, * if coalesce is successful with a complete merge then * gv_currkey will not be changed. * If split or, coalesce is not successful or, not needed then * here gv_currkey_next_reorg will be set from right sibling */ cw_set_depth = cw_map_depth = 0; GET_KEY_LEN(tkeysize, rtsib_hist->h[0].buffaddr + SIZEOF(blk_hdr) + SIZEOF(rec_hdr)); if (2 < tkeysize && MAX_KEY_SZ >= tkeysize) { memcpy(&(gv_currkey_next_reorg->base[0]), rtsib_hist->h[0].buffaddr + SIZEOF(blk_hdr) +SIZEOF(rec_hdr), tkeysize); gv_currkey_next_reorg->end = tkeysize - 1; inctn_opcode = inctn_invalid_op; /* temporary reset; satisfy an assert in t_end() */ assert(UPDTRNS_DB_UPDATED_MASK == update_trans); update_trans = 0; /* tell t_end, this is no longer an update transaction */ if ((trans_num)0 == (ret_tn = t_end(rtsib_hist, NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; inctn_opcode = inctn_mu_reorg; /* reset inctn_opcode to its default */ update_trans = UPDTRNS_DB_UPDATED_MASK;/* reset update_trans to old value */ assert(NULL == kip_csa); continue; } /* There is no need to reset update_trans in case of a successful "t_end" call. * This is because before the next call to "t_end" we should have a call to * "t_begin" which will reset update_trans anyways. */ inctn_opcode = inctn_mu_reorg; /* reset inctn_opcode to its default */ if (detailed_log) log_detailed_log("NOU", rtsib_hist, NULL, level, NULL, ret_tn); } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } /* end if (0 == level) */ break; }/* === SPLIT-COALESCE LOOP END === */ t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ }/* === START WHILE COMPLETE_MERGE === */ if (mu_ctrlc_occurred || mu_ctrly_occurred) { cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end+1); return FALSE; } /* Now swap the working block */ if (0 == (reorg_op & NOSWAP)) { t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); /* Following loop is to handle concurrency retry for swap */ for (; ;) /* === START OF SWAP LOOP === */ { kill_set_list.used = 0; gv_target->clue.end = 0; /* search gv_currkey and get the result in gv_target */ if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match) { if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz && 1 == gv_target->hist.depth) { if (cs_addrs->now_crit) { t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; /* It is not an error that global was killed */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } } if (gv_target->hist.depth <= level) break; /* swap working block with appropriate dest_blk_id block. Historys are sent as gv_target->hist and reorg_gv_target->hist */ mu_reorg_in_swap_blk = TRUE; status = mu_swap_blk(level, &dest_blk_id, &kill_set_list, exclude_glist_ptr); mu_reorg_in_swap_blk = FALSE; if (cdb_sc_oprnotneeded == status) { if (cs_data->trans_hist.total_blks <= dest_blk_id) { util_out_print("REORG may be incomplete for this global.", TRUE); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; } } else if (cdb_sc_normal == status) { if (0 < kill_set_list.used) { need_kip_incr = TRUE; if (!cs_addrs->now_crit) /* Do not sleep while holding crit */ WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL); /* second history not needed, because, we are reusing a free block, which does not need history */ if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); DECR_BLK_NUM(dest_blk_id); continue; } if (detailed_log) log_detailed_log("SWA", &(gv_target->hist), NULL, level, NULL, ret_tn); gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs) DECR_KIP(cs_data, cs_addrs, kip_csa); if (detailed_log) log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn); blks_reused += kill_set_list.used; blks_killed += kill_set_list.used; } /* gv_target->hist is for working block's history, and reorg_gv_target->hist is for destinition block's history. Note: gv_target and reorg_gv_target can be part of different GVT. */ else if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), &(reorg_gv_target->hist), TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); DECR_BLK_NUM(dest_blk_id); continue; } if ((0 >= kill_set_list.used) && detailed_log) log_detailed_log("SWA", &(gv_target->hist), &(reorg_gv_target->hist), level, NULL, ret_tn); blks_swapped++; if (reorg_op & SWAPHIST) util_out_print("Dest !SL From !SL", TRUE, dest_blk_id, gv_target->hist.h[level].blk_num); } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } break; } /* === END OF SWAP LOOP === */ t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ } if (mu_ctrlc_occurred || mu_ctrly_occurred) { cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1); return FALSE; } if (end_of_tree) break; if (0 < level) level--; /* Order of reorg is root towards leaf */ else { level = pre_order_successor_level; memcpy(&gv_currkey->base[0], &gv_currkey_next_reorg->base[0], gv_currkey_next_reorg->end + 1); gv_currkey->end = gv_currkey_next_reorg->end; cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1); } } /* ================ END MAIN LOOP ================ */ /* =========== START REDUCE LEVEL ============== */ memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len); gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0; gv_currkey->end = gn->str.len + 1; for (;;) /* Reduce level continues until it fails to reduce */ { t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); cnt1 = 0; for (; ;) /* main reduce level loop starts */ { kill_set_list.used = 0; gv_target->clue.end = 0; /* search gv_currkey and get the result in gv_target */ if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match) { if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz && 1 == gv_target->hist.depth) { if (cs_addrs->now_crit) { t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; /* It is not an error that global was killed */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } } if (gv_target->hist.depth <= level) break; /* History is passed in gv_target->hist */ status = mu_reduce_level(&kill_set_list); if (cdb_sc_oprnotneeded != status && cdb_sc_normal != status) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (cdb_sc_normal == status) { assert(0 < kill_set_list.used); need_kip_incr = TRUE; if (!cs_addrs->now_crit) /* Do not sleep while holding crit */ WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL); if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); continue; } if (detailed_log) log_detailed_log("RDL", &(gv_target->hist), NULL, level, NULL, ret_tn); gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs) DECR_KIP(cs_data, cs_addrs, kip_csa); if (detailed_log) log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn); blks_reused += kill_set_list.used; blks_killed += kill_set_list.used; cnt1 = 1; lvls_reduced++; } break; } /* main reduce level loop ends */ t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ if (0 == cnt1) break; } /* =========== END REDUCE LEVEL ===========*/ reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; } /* end mu_reorg() */