int mu_extr_getblk(unsigned char *ptr) { error_def(ERR_GVGETFAIL); enum cdb_sc status; rec_hdr_ptr_t rp; bool two_histories, end_of_tree; blk_hdr_ptr_t bp; srch_blk_status *bh; srch_hist *rt_history; t_begin(ERR_GVGETFAIL, FALSE); for (;;) { if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL))) { t_retry(status); continue; } end_of_tree = two_histories = FALSE; bh = gv_target->hist.h; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if (rp >= (rec_hdr_ptr_t)CST_TOB(bp)) { rt_history = gv_target->alt_hist; if (cdb_sc_normal == (status = gvcst_rtsib(rt_history, 0))) { two_histories = TRUE; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h))) { t_retry(status); continue; } bp = (blk_hdr_ptr_t)rt_history->h[0].buffaddr; } else if (cdb_sc_endtree == status) end_of_tree = TRUE; else { t_retry(status); continue; } } memcpy(ptr, bp, bp->bsiz); if (t_end(&gv_target->hist, two_histories ? rt_history : NULL) != 0) { if (two_histories) memcpy(gv_target->hist.h, rt_history->h, sizeof(srch_blk_status) * (rt_history->depth + 1)); return !end_of_tree; } } }
boolean_t gvcst_query2(void) { boolean_t found, two_histories; enum cdb_sc status; blk_hdr_ptr_t bp; rec_hdr_ptr_t rp; unsigned char *c1, *c2; srch_blk_status *bh; srch_hist *rt_history; T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYFAIL); assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { two_histories = FALSE; # if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYFAIL == gtm_white_box_test_case_number)) { t_retry(cdb_sc_blknumerr); continue; } # endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0))) { found = TRUE; bh = &gv_target->hist.h[0]; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if (rp >= (rec_hdr_ptr_t)CST_TOB(bp)) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_endtree == status) /* end of tree */ { found = FALSE; two_histories = FALSE; /* second history not valid */ } else if (cdb_sc_normal != status) { t_retry(status); continue; } else { bh = &rt_history->h[0]; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } } if (found) { /* !found indicates that the end of tree has been reached (see call to * gvcst_rtsib). If there is no more tree, don't bother doing expansion. */ status = gvcst_expand_curr_key(bh, gv_currkey, gv_altkey); if (cdb_sc_normal != status) { t_retry(status); continue; } } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(!two_histories ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } assert(cs_data == cs_addrs->hdr); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, 1); if (found) { c1 = &gv_altkey->base[0]; c2 = &gv_currkey->base[0]; for ( ; *c2; ) { if (*c2++ != *c1++) break; } if (!*c2 && !*c1) return TRUE; } return FALSE; } t_retry(status); } }
mint gvcst_data(void) { blk_hdr_ptr_t bp; enum cdb_sc status; mint val; rec_hdr_ptr_t rp; unsigned short rec_size; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t b_top; assert((gv_target->root < cs_addrs->ti->total_blks) || (0 < dollar_tlevel)); T_BEGIN_READ_NONTP_OR_TP(ERR_GVDATAFAIL); assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { rt_history = gv_target->alt_hist; rt_history->h[0].blk_num = 0; if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { t_retry(status); continue; } bh = gv_target->hist.h; bp = (blk_hdr_ptr_t)bh->buffaddr; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); b_top = bh->buffaddr + bp->bsiz; val = 0; if (gv_currkey->end + 1 == bh->curr_rec.match) val = 1; else if (bh->curr_rec.match >= gv_currkey->end) val = 10; if (1 == val || rp == (rec_hdr_ptr_t)b_top) { GET_USHORT(rec_size, &rp->rsiz); if (rp == (rec_hdr_ptr_t)b_top || (sm_uc_ptr_t)rp + rec_size == b_top) { if (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0))) { if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h)))) { t_retry(status); continue; } if (rt_history->h[0].curr_rec.match >= gv_currkey->end) val += 10; } } else { if ((sm_uc_ptr_t)rp + rec_size > b_top) { t_retry(cdb_sc_rmisalign); continue; } rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rec_size); if (rp->cmpc >= gv_currkey->end) val += 10; } } if (0 == dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, 0 == rt_history->h[0].blk_num ? NULL : rt_history)) continue; } else { status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_data, 1); return val; } }
boolean_t gvcst_order2(void) { blk_hdr_ptr_t bp; boolean_t found, two_histories; enum cdb_sc status; rec_hdr_ptr_t rp; unsigned short rec_size; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t c1, c2, ctop, alt_top; int tmp_cmpc; T_BEGIN_READ_NONTP_OR_TP(ERR_GVORDERFAIL); for (;;) { assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ two_histories = FALSE; #if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVORDERFAIL == gtm_white_box_test_case_number)) { status = cdb_sc_blknumerr; t_retry(status); continue; } #endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL))) { found = TRUE; bh = gv_target->hist.h; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if ((rec_hdr_ptr_t)CST_TOB(bp) <= rp) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_normal == status) { bh = rt_history->h; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } else { if (cdb_sc_endtree == status) { found = FALSE; two_histories = FALSE; /* second history not valid */ } else { t_retry(status); continue; } } } if (found) { assert(gv_altkey->top == gv_currkey->top); assert(gv_altkey->top == gv_keysize); assert(gv_altkey->end < gv_altkey->top); /* store new subscipt */ c1 = gv_altkey->base; alt_top = gv_altkey->base + gv_altkey->top - 1; /* Make alt_top one less than gv_altkey->top to allow double-null at end of a key-name */ /* 4/17/96 * HP compiler bug work-around. The original statement was * c2 = (unsigned char *)CST_BOK(rp) + bh->curr_rec.match - rp->cmpc; * * ...but this was sometimes compiled incorrectly (the lower 4 bits * of rp->cmpc, sign extended, were subtracted from bh->curr_rec.match). * I separated out the subtraction of rp->cmpc. * * -VTF. */ c2 = (sm_uc_ptr_t)CST_BOK(rp) + bh->curr_rec.match; memcpy(c1, gv_currkey->base, bh->curr_rec.match); c1 += bh->curr_rec.match; c2 -= EVAL_CMPC(rp); GET_USHORT(rec_size, &rp->rsiz); ctop = (sm_uc_ptr_t)rp + rec_size; for (;;) { if (c2 >= ctop || c1 >= alt_top) { assert(CDB_STAGNATE > t_tries); status = cdb_sc_rmisalign; goto restart; /* goto needed because of nested FOR loop */ } if (0 == (*c1++ = *c2++)) { *c1 = 0; break; } } gv_altkey->end = c1 - gv_altkey->base; assert(gv_altkey->end < gv_altkey->top); } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, two_histories ? rt_history : NULL, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(two_histories ? rt_history : NULL); if (cdb_sc_normal != status) { t_retry(status); continue; } } assert(cs_data == cs_addrs->hdr); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, 1); return (found && (bh->curr_rec.match >= gv_currkey->prev)); } restart: t_retry(status); } }
/* This function is the equivalent of invoking gvcst_data & gvcst_get at the same time. * One crucial difference is that this function does NOT handle restarts by automatically invoking t_retry. * Instead, it returns the restart code to the caller so that it can handle the restart accordingly. * This is important in the case of triggers because we do NOT want to call t_retry in case of a implicit tstart * wrapped gvcst_put or gvcst_kill trigger-invoking update transaction. Additionally, this function assumes * that it is called always inside of TP (i.e. dollar_tlevel is non-zero). */ enum cdb_sc gvcst_dataget(mint *dollar_data, mval *val) { blk_hdr_ptr_t bp; boolean_t do_rtsib; enum cdb_sc status; mint dlr_data; rec_hdr_ptr_t rp; unsigned short match, rsiz; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t b_top; int key_size, data_len; uint4 save_t_err; error_def(ERR_GVDATAGETFAIL); error_def(ERR_GVKILLFAIL); /* The following code is lifted from gvcst_data. Any changes here might need to be reflected there as well */ assert(dollar_tlevel); assert((CDB_STAGNATE > t_tries) || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ save_t_err = t_err; assert(ERR_GVKILLFAIL == save_t_err); /* this function should currently be called only from gvcst_kill */ t_err = ERR_GVDATAGETFAIL; /* switch t_err to reflect dataget sub-operation (under the KILL operation) */ /* In case of a failure return, it is ok to return with t_err set to ERR_GVDATAGETFAIL as that gives a better * picture of exactly where in the transaction the failure occurred. */ rt_history = gv_target->alt_hist; rt_history->h[0].blk_num = 0; if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL))) return status; bh = gv_target->hist.h; bp = (blk_hdr_ptr_t)bh->buffaddr; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); b_top = bh->buffaddr + bp->bsiz; match = bh->curr_rec.match; key_size = gv_currkey->end + 1; do_rtsib = FALSE; /* Even if key does not exist, return null string in "val". Caller can use dollar_data to distinguish * whether the key is undefined or defined and set to the null string. */ val->mvtype = MV_STR; val->str.len = 0; if (key_size == match) { dlr_data = 1; /* the following code is lifted from gvcst_get. any changes here might need to be reflected there as well */ GET_USHORT(rsiz, &rp->rsiz); data_len = rsiz + rp->cmpc - SIZEOF(rec_hdr) - key_size; if ((0 > data_len) || ((sm_uc_ptr_t)rp + rsiz > b_top)) { assert(CDB_STAGNATE > t_tries); status = cdb_sc_rmisalign1; return status; } else { ENSURE_STP_FREE_SPACE(data_len); memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len); val->str.addr = (char *)stringpool.free; val->str.len = data_len; stringpool.free += data_len; } /* --------------------- end code lifted from gvcst_get ---------------------------- */ rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rsiz); if ((sm_uc_ptr_t)rp > b_top) { status = cdb_sc_rmisalign; return status; } else if ((sm_uc_ptr_t)rp == b_top) do_rtsib = TRUE; else if (rp->cmpc >= gv_currkey->end) dlr_data += 10; } else if (match >= gv_currkey->end) dlr_data = 10; else { dlr_data = 0; if (rp == (rec_hdr_ptr_t)b_top) do_rtsib = TRUE; } if (do_rtsib && (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0)))) { if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h)))) return status; if (rt_history->h[0].curr_rec.match >= gv_currkey->end) { assert(1 >= dlr_data); dlr_data += 10; } } status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history); if (cdb_sc_normal != status) return status; *dollar_data = dlr_data; t_err = save_t_err; /* restore t_err to what it was at function entry */ return status; }
mint gvcst_data(void) { blk_hdr_ptr_t bp; boolean_t do_rtsib; enum cdb_sc status; mint val; rec_hdr_ptr_t rp; unsigned short match, rsiz; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t b_top; assert((gv_target->root < cs_addrs->ti->total_blks) || dollar_tlevel); T_BEGIN_READ_NONTP_OR_TP(ERR_GVDATAFAIL); assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { /* The following code is duplicated in gvcst_dataget. Any changes here might need to be reflected there as well */ rt_history = gv_target->alt_hist; rt_history->h[0].blk_num = 0; if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL))) { t_retry(status); continue; } bh = gv_target->hist.h; bp = (blk_hdr_ptr_t)bh->buffaddr; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); b_top = bh->buffaddr + bp->bsiz; match = bh->curr_rec.match; do_rtsib = FALSE; if (gv_currkey->end + 1 == match) { val = 1; GET_USHORT(rsiz, &rp->rsiz); rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rsiz); if ((sm_uc_ptr_t)rp > b_top) { t_retry(cdb_sc_rmisalign); continue; } else if ((sm_uc_ptr_t)rp == b_top) do_rtsib = TRUE; else if (rp->cmpc >= gv_currkey->end) val += 10; } else if (match >= gv_currkey->end) val = 10; else { val = 0; if (rp == (rec_hdr_ptr_t)b_top) do_rtsib = TRUE; } if (do_rtsib && (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0)))) { if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h)))) { t_retry(status); continue; } if (rt_history->h[0].curr_rec.match >= gv_currkey->end) { assert(1 >= val); val += 10; } } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, 0 == rt_history->h[0].blk_num ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_data, 1); return val; } }
boolean_t gvcst_queryget2(mval *val, unsigned char *sn_ptr) { blk_hdr_ptr_t bp; boolean_t found, two_histories; enum cdb_sc status; int rsiz, key_size, data_len; rec_hdr_ptr_t rp; srch_blk_status *bh; srch_hist *rt_history; unsigned short temp_ushort; int tmp_cmpc; DEBUG_ONLY(unsigned char *save_strp = NULL); T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYGETFAIL); assert((CDB_STAGNATE > t_tries) || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { two_histories = FALSE; #if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYGETFAIL == gtm_white_box_test_case_number)) { status = cdb_sc_blknumerr; t_retry(status); continue; } #endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0))) { found = TRUE; bh = &gv_target->hist.h[0]; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if (rp >= (rec_hdr_ptr_t)CST_TOB(bp)) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_endtree == status) /* end of tree */ { found = FALSE; two_histories = FALSE; /* second history not valid */ } else if (cdb_sc_normal != status) { t_retry(status); continue; } else { bh = &rt_history->h[0]; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } } /* !found indicates that the end of tree has been reached (see call to * gvcst_rtsib). If there is no more tree, don't bother doing expansion. */ if (found) { status = gvcst_expand_key((blk_hdr_ptr_t)bh->buffaddr, (int4)((sm_uc_ptr_t)rp - bh->buffaddr), gv_altkey); if (cdb_sc_normal != status) { t_retry(status); continue; } key_size = gv_altkey->end + 1; GET_RSIZ(rsiz, rp); data_len = rsiz + EVAL_CMPC(rp) - SIZEOF(rec_hdr) - key_size; if (data_len < 0 || (sm_uc_ptr_t)rp + rsiz > (sm_uc_ptr_t)bp + ((blk_hdr_ptr_t)bp)->bsiz) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_rmisalign1); continue; } ENSURE_STP_FREE_SPACE(data_len); DEBUG_ONLY ( if (!save_strp) save_strp = stringpool.free); assert(stringpool.top - stringpool.free >= data_len); memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len); /* Assumption: t_end/tp_hist will never cause stp_gcol() call BYPASSOK */ } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(!two_histories ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } if (found) { DEBUG_ONLY(assert(save_strp == stringpool.free)); /* Process val first. Already copied to string pool. */ val->mvtype = MV_STR; val->str.addr = (char *)stringpool.free; val->str.len = data_len; stringpool.free += data_len; INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_get, 1); } return found; } t_retry(status); }
/**************************************************************** Input Parameter: gn = Global name exclude_glist_ptr = list of globals in EXCLUDE option index_fill_factor = index blocks' fill factor data_fill_factor = data blocks' fill factor Input/Output Parameter: resume = resume flag reorg_op = What operations to do (coalesce or, swap or, split) [Default is all] [Only for debugging] ****************************************************************/ boolean_t mu_reorg(mval *gn, glist *exclude_glist_ptr, boolean_t *resume, int index_fill_factor, int data_fill_factor, int reorg_op) { boolean_t end_of_tree = FALSE, complete_merge, detailed_log; int rec_size; /* * * "level" is the level of the working block. * "pre_order_successor_level" is pre_order successor level except in the case * where we are in a left-most descent of the tree * in which case pre_order_successor_level will be the maximum height of that subtree * until we reach the leaf level block . * In other words, pre_order_successor_level and level variable controls the iterative pre-order traversal. * We start reorg from the (root_level - 1) to 0. That is, level = pre_order_successor_level:-1:0. */ int pre_order_successor_level, level; static block_id dest_blk_id = 0; int tkeysize; int blks_killed, blks_processed, blks_reused, blks_coalesced, blks_split, blks_swapped, count, file_extended, lvls_reduced; int d_max_fill, i_max_fill, blk_size, cur_blk_size, max_fill, toler, d_toler, i_toler; int cnt1, cnt2; kill_set kill_set_list; sm_uc_ptr_t rPtr1; enum cdb_sc status; srch_hist *rtsib_hist; jnl_buffer_ptr_t jbp; trans_num ret_tn; error_def(ERR_MUREORGFAIL); error_def(ERR_DBRDONLY); error_def(ERR_GBLNOEXIST); error_def(ERR_MAXBTLEVEL); t_err = ERR_MUREORGFAIL; kill_set_tail = &kill_set_list; /* Initialization for current global */ op_gvname(VARLSTCNT(1) gn); /* Cannot proceed for read-only data files */ if (gv_cur_region->read_only) { gtm_putmsg(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); return FALSE; } dest_blk_id = cs_addrs->reorg_last_dest; inctn_opcode = inctn_mu_reorg; /* If resume option is present, then reorg_restart_key should be not null. * Skip all globals until we are in the region for that global. * Get the reorg_restart_key and reorg_restart_block from database header and restart from there. */ if (*resume && 0 != cs_data->reorg_restart_key[0]) { /* resume from last key reorged in GVT */ GET_KEY_LEN(tkeysize, &cs_data->reorg_restart_key[0]); memcpy(gv_currkey->base, cs_data->reorg_restart_key, tkeysize); gv_currkey->end = tkeysize - 1; dest_blk_id = cs_data->reorg_restart_block; if (0 == memcmp(cs_data->reorg_restart_key, gn->str.addr, gn->str.len)) /* Going to resume from current global, so it resumed and make it false */ *resume = FALSE; } else { /* start from the left most leaf */ memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len); gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0; gv_currkey->end = gn->str.len + 1; } if (*resume) { util_out_print("REORG cannot be resumed from this point, Skipping this global...", FLUSH); memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len); gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0; gv_currkey->end = gn->str.len + 1; return TRUE; } memcpy(&gv_currkey_next_reorg->base[0], &gv_currkey->base[0], gv_currkey->end + 1); gv_currkey_next_reorg->end = gv_currkey->end; if (2 > dest_blk_id) dest_blk_id = 2; /* we know that first block is bitmap and next one is directory tree root */ file_extended = cs_data->trans_hist.total_blks; blk_size = cs_data->blk_size; d_max_fill = (double)data_fill_factor * blk_size / 100.0 - cs_data->reserved_bytes; i_max_fill = (double)index_fill_factor * blk_size / 100.0 - cs_data->reserved_bytes; d_toler = (double) DATA_FILL_TOLERANCE * blk_size / 100.0; i_toler = (double) INDEX_FILL_TOLERANCE * blk_size / 100.0; blks_killed = blks_processed = blks_reused = lvls_reduced = blks_coalesced = blks_split = blks_swapped = 0; pre_order_successor_level = level = MAX_BT_DEPTH + 1; /* Just some high value to initialize */ /* --- more detailed debugging information --- */ if (detailed_log = reorg_op & DETAIL) util_out_print("STARTING to work on global ^!AD from region !AD", TRUE, gn->str.len, gn->str.addr, REG_LEN_STR(gv_cur_region)); /* In each iteration of MAIN loop, a working block is processed for a GVT */ for (; ;) /* ================ START MAIN LOOP ================ */ { /* If right sibling is completely merged with the working block, do not swap the working block * with its final destination block. Continue trying next right sibling. Swap only at the end. */ complete_merge = TRUE; while(complete_merge) /* === START WHILE COMPLETE_MERGE === */ { if (mu_ctrlc_occurred || mu_ctrly_occurred) { cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1); return FALSE; } complete_merge = FALSE; blks_processed++; t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); /* Folllowing for loop is to handle concurrency retry for split/coalesce */ for (; ;) /* === SPLIT-COALESCE LOOP STARTS === */ { gv_target->clue.end = 0; /* search gv_currkey and get the result in gv_target */ if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match) { if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz && 1 == gv_target->hist.depth) { if (cs_addrs->now_crit) { t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; /* It is not an error that global was killed */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } } if (gv_target->hist.depth <= level) { /* Will come here * 1) first iteration of the for loop (since level == MAX_BT_DEPTH + 1) or, * 2) tree depth decreased for mu_reduce_level or, M-kill */ pre_order_successor_level = gv_target->hist.depth - 1; if (MAX_BT_DEPTH + 1 != level) { /* break the loop when tree depth decreased (case 2) */ level = pre_order_successor_level; break; } level = pre_order_successor_level; } max_fill = (0 == level)? d_max_fill : i_max_fill; toler = (0 == level)? d_toler:i_toler; cur_blk_size = ((blk_hdr_ptr_t)(gv_target->hist.h[level].buffaddr))->bsiz; if (cur_blk_size > max_fill + toler && 0 == (reorg_op & NOSPLIT)) /* SPLIT BLOCK */ { cnt1 = cnt2 = 0; /* history of current working block is in gv_target */ status = mu_split(level, i_max_fill, d_max_fill, &cnt1, &cnt2); if (cdb_sc_maxlvl == status) { gtm_putmsg(VARLSTCNT(4) ERR_MAXBTLEVEL, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return FALSE; } else if (cdb_sc_normal == status) { if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; continue; } if (detailed_log) log_detailed_log("SPL", &(gv_target->hist), NULL, level, NULL, ret_tn); blks_reused += cnt1; lvls_reduced -= cnt2; blks_split++; break; } else if (cdb_sc_oprnotneeded == status) { /* undo any update_array/cw_set changes and DROP THRU to mu_clsce */ cw_set_depth = 0; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ assert(0 == cw_map_depth); /* mu_swap_blk (that changes cw_map_depth) comes later */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } /* end if SPLIT BLOCK */ /* We are here because, mu_split() was not called or, split was not done or, not required */ rtsib_hist = gv_target->alt_hist; status = gvcst_rtsib(rtsib_hist, level); if (cdb_sc_normal != status && cdb_sc_endtree != status) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } if (cdb_sc_endtree == status) { if (0 == level) end_of_tree = TRUE; break; } else if (0 == level) pre_order_successor_level = rtsib_hist->depth - 1; /* COALESCE WITH RTSIB */ kill_set_list.used = 0; if (cur_blk_size < max_fill - toler && 0 == (reorg_op & NOCOALESCE)) { /* histories are sent in &gv_target->hist and gv_target->alt_hist */ status = mu_clsce(level, i_max_fill, d_max_fill, &kill_set_list, &complete_merge); if (cdb_sc_normal == status) { if (level) /* delete lower elements of array, t_end might confuse */ { memmove(&rtsib_hist->h[0], &rtsib_hist->h[level], SIZEOF(srch_blk_status)*(rtsib_hist->depth - level + 2)); rtsib_hist->depth = rtsib_hist->depth - level; } if (0 < kill_set_list.used) /* increase kill_in_prog */ { need_kip_incr = TRUE; if (!cs_addrs->now_crit) /* Do not sleep while holding crit */ WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL); } if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), rtsib_hist, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); if (level) { /* reinitialize level member in rtsib_hist srch_blk_status' */ for (count = 0; count < MAX_BT_DEPTH; count++) rtsib_hist->h[count].level = count; } continue; } if (level) { /* reinitialize level member in rtsib_hist srch_blk_status' */ for (count = 0; count < MAX_BT_DEPTH; count++) rtsib_hist->h[count].level = count; } if (detailed_log) log_detailed_log("CLS", &(gv_target->hist), rtsib_hist, level, NULL, ret_tn); assert(0 < kill_set_list.used || (NULL == kip_csa)); if (0 < kill_set_list.used) /* decrease kill_in_prog */ { gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs) DECR_KIP(cs_data, cs_addrs, kip_csa); if (detailed_log) log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn); blks_killed += kill_set_list.used; } blks_coalesced++; break; } else if (cdb_sc_oprnotneeded == status) { /* undo any update_array/cw_set changes and DROP THRU to t_end */ cw_set_depth = 0; CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ assert(0 == cw_map_depth); /* mu_swap_blk (that changes cw_map_depth) comes later */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } /* end if try coalesce */ if (0 == level) { /* Note: In data block level: * if split is successful or, * if coalesce is successful without a complete merge of rtsib, * then gv_currkey_next_reorg is already set from the called function. * if split or, coalesce do a retry or, * if coalesce is successful with a complete merge then * gv_currkey will not be changed. * If split or, coalesce is not successful or, not needed then * here gv_currkey_next_reorg will be set from right sibling */ cw_set_depth = cw_map_depth = 0; GET_KEY_LEN(tkeysize, rtsib_hist->h[0].buffaddr + SIZEOF(blk_hdr) + SIZEOF(rec_hdr)); if (2 < tkeysize && MAX_KEY_SZ >= tkeysize) { memcpy(&(gv_currkey_next_reorg->base[0]), rtsib_hist->h[0].buffaddr + SIZEOF(blk_hdr) +SIZEOF(rec_hdr), tkeysize); gv_currkey_next_reorg->end = tkeysize - 1; inctn_opcode = inctn_invalid_op; /* temporary reset; satisfy an assert in t_end() */ assert(UPDTRNS_DB_UPDATED_MASK == update_trans); update_trans = 0; /* tell t_end, this is no longer an update transaction */ if ((trans_num)0 == (ret_tn = t_end(rtsib_hist, NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; inctn_opcode = inctn_mu_reorg; /* reset inctn_opcode to its default */ update_trans = UPDTRNS_DB_UPDATED_MASK;/* reset update_trans to old value */ assert(NULL == kip_csa); continue; } /* There is no need to reset update_trans in case of a successful "t_end" call. * This is because before the next call to "t_end" we should have a call to * "t_begin" which will reset update_trans anyways. */ inctn_opcode = inctn_mu_reorg; /* reset inctn_opcode to its default */ if (detailed_log) log_detailed_log("NOU", rtsib_hist, NULL, level, NULL, ret_tn); } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } /* end if (0 == level) */ break; }/* === SPLIT-COALESCE LOOP END === */ t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ }/* === START WHILE COMPLETE_MERGE === */ if (mu_ctrlc_occurred || mu_ctrly_occurred) { cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end+1); return FALSE; } /* Now swap the working block */ if (0 == (reorg_op & NOSWAP)) { t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); /* Following loop is to handle concurrency retry for swap */ for (; ;) /* === START OF SWAP LOOP === */ { kill_set_list.used = 0; gv_target->clue.end = 0; /* search gv_currkey and get the result in gv_target */ if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match) { if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz && 1 == gv_target->hist.depth) { if (cs_addrs->now_crit) { t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; /* It is not an error that global was killed */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } } if (gv_target->hist.depth <= level) break; /* swap working block with appropriate dest_blk_id block. Historys are sent as gv_target->hist and reorg_gv_target->hist */ mu_reorg_in_swap_blk = TRUE; status = mu_swap_blk(level, &dest_blk_id, &kill_set_list, exclude_glist_ptr); mu_reorg_in_swap_blk = FALSE; if (cdb_sc_oprnotneeded == status) { if (cs_data->trans_hist.total_blks <= dest_blk_id) { util_out_print("REORG may be incomplete for this global.", TRUE); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; } } else if (cdb_sc_normal == status) { if (0 < kill_set_list.used) { need_kip_incr = TRUE; if (!cs_addrs->now_crit) /* Do not sleep while holding crit */ WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL); /* second history not needed, because, we are reusing a free block, which does not need history */ if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); DECR_BLK_NUM(dest_blk_id); continue; } if (detailed_log) log_detailed_log("SWA", &(gv_target->hist), NULL, level, NULL, ret_tn); gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs) DECR_KIP(cs_data, cs_addrs, kip_csa); if (detailed_log) log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn); blks_reused += kill_set_list.used; blks_killed += kill_set_list.used; } /* gv_target->hist is for working block's history, and reorg_gv_target->hist is for destinition block's history. Note: gv_target and reorg_gv_target can be part of different GVT. */ else if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), &(reorg_gv_target->hist), TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); DECR_BLK_NUM(dest_blk_id); continue; } if ((0 >= kill_set_list.used) && detailed_log) log_detailed_log("SWA", &(gv_target->hist), &(reorg_gv_target->hist), level, NULL, ret_tn); blks_swapped++; if (reorg_op & SWAPHIST) util_out_print("Dest !SL From !SL", TRUE, dest_blk_id, gv_target->hist.h[level].blk_num); } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } break; } /* === END OF SWAP LOOP === */ t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ } if (mu_ctrlc_occurred || mu_ctrly_occurred) { cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1); return FALSE; } if (end_of_tree) break; if (0 < level) level--; /* Order of reorg is root towards leaf */ else { level = pre_order_successor_level; memcpy(&gv_currkey->base[0], &gv_currkey_next_reorg->base[0], gv_currkey_next_reorg->end + 1); gv_currkey->end = gv_currkey_next_reorg->end; cs_data->reorg_restart_block = dest_blk_id; memcpy(&cs_data->reorg_restart_key[0], &gv_currkey->base[0], gv_currkey->end + 1); } } /* ================ END MAIN LOOP ================ */ /* =========== START REDUCE LEVEL ============== */ memcpy(&gv_currkey->base[0], gn->str.addr, gn->str.len); gv_currkey->base[gn->str.len] = gv_currkey->base[gn->str.len + 1] = 0; gv_currkey->end = gn->str.len + 1; for (;;) /* Reduce level continues until it fails to reduce */ { t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); cnt1 = 0; for (; ;) /* main reduce level loop starts */ { kill_set_list.used = 0; gv_target->clue.end = 0; /* search gv_currkey and get the result in gv_target */ if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (gv_currkey->end + 1 != gv_target->hist.h[0].curr_rec.match) { if (SIZEOF(blk_hdr) == ((blk_hdr_ptr_t)gv_target->hist.h[0].buffaddr)->bsiz && 1 == gv_target->hist.depth) { if (cs_addrs->now_crit) { t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ gtm_putmsg(VARLSTCNT(4) ERR_GBLNOEXIST, 2, gn->str.len, gn->str.addr); reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; /* It is not an error that global was killed */ } else { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } } } if (gv_target->hist.depth <= level) break; /* History is passed in gv_target->hist */ status = mu_reduce_level(&kill_set_list); if (cdb_sc_oprnotneeded != status && cdb_sc_normal != status) { assert(CDB_STAGNATE > t_tries); t_retry(status); continue; } else if (cdb_sc_normal == status) { assert(0 < kill_set_list.used); need_kip_incr = TRUE; if (!cs_addrs->now_crit) /* Do not sleep while holding crit */ WAIT_ON_INHIBIT_KILLS(cs_addrs->nl, MAXWAIT2KILL); if ((trans_num)0 == (ret_tn = t_end(&(gv_target->hist), NULL, TN_NOT_SPECIFIED))) { need_kip_incr = FALSE; assert(NULL == kip_csa); continue; } if (detailed_log) log_detailed_log("RDL", &(gv_target->hist), NULL, level, NULL, ret_tn); gvcst_kill_sort(&kill_set_list); GVCST_BMP_MARK_FREE(&kill_set_list, ret_tn, inctn_mu_reorg, inctn_bmp_mark_free_mu_reorg, inctn_opcode, cs_addrs) DECR_KIP(cs_data, cs_addrs, kip_csa); if (detailed_log) log_detailed_log("KIL", &(gv_target->hist), NULL, level, &kill_set_list, ret_tn); blks_reused += kill_set_list.used; blks_killed += kill_set_list.used; cnt1 = 1; lvls_reduced++; } break; } /* main reduce level loop ends */ t_abort(gv_cur_region, cs_addrs); /* do crit and other cleanup */ if (0 == cnt1) break; } /* =========== END REDUCE LEVEL ===========*/ reorg_finish(dest_blk_id, blks_processed, blks_killed, blks_reused, file_extended, lvls_reduced, blks_coalesced, blks_split, blks_swapped); return TRUE; } /* end mu_reorg() */