boolean_t gvcst_order(void) { /* See gvcst_query.c */ boolean_t found, is_hidden, sn_tpwrapped; boolean_t est_first_pass; gv_key save_currkey[DBKEYALLOC(MAX_KEY_SZ)]; int end, prev, oldend; int save_dollar_tlevel; DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel); found = gvcst_order2(); # ifdef UNIX assert(save_dollar_tlevel == dollar_tlevel); CHECK_HIDDEN_SUBSCRIPT_AND_RETURN(found, gv_altkey, is_hidden); assert(found && is_hidden); IF_SN_DISALLOWED_AND_NO_SPAN_IN_DB(return found); SAVE_GV_CURRKEY_LAST_SUBSCRIPT(save_currkey, prev, oldend); if (!dollar_tlevel) { sn_tpwrapped = TRUE; op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0); ESTABLISH_NORET(gvcst_order_ch, est_first_pass); GVCST_ROOT_SEARCH_AND_PREP(est_first_pass); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, (gtm_uint64_t) -1); found = gvcst_order2(); } else sn_tpwrapped = FALSE; if (found) { CHECK_HIDDEN_SUBSCRIPT(gv_altkey, is_hidden); if (is_hidden) { /* Replace last subscript to be the highest possible hidden subscript so another * gvcst_order2 will give us the next non-hidden subscript. */ REPLACE_HIDDEN_SUB_TO_HIGHEST(gv_altkey, gv_currkey); /* uses gv_altkey to modify gv_currkey */ /* fix up since it should only be externally counted as one $order */ INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, (gtm_uint64_t) -1); found = gvcst_order2(); } } if (sn_tpwrapped) { op_tcommit(); REVERT; /* remove our condition handler */ } RESTORE_GV_CURRKEY_LAST_SUBSCRIPT(save_currkey, prev, oldend); assert(save_dollar_tlevel == dollar_tlevel); # endif return found; }
boolean_t gvcst_query(void) { /* Similar to gvcst_order and gvcst_zprevious. In each case we skip over hidden subscripts as needed. * * 1 2 3 NULL <--- order/zprev... * 1 2 3 NULL NULL * 1 2 3 NULL NULL NULL <--- query from here... * 1 2 3 NULL NULL NULL hidden * 1 2 3 NULL NULL hidden * 1 2 3 NULL hidden * 1 2 3 hidden <--- ... skip this guy and go to bottom/top, respectively * 1 2 3 7 <--- ... needs to end up here */ boolean_t found, is_hidden, sn_tpwrapped; boolean_t est_first_pass; gv_key save_currkey[DBKEYALLOC(MAX_KEY_SZ)]; int i; int save_dollar_tlevel; DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel); found = gvcst_query2(); # ifdef UNIX assert(save_dollar_tlevel == dollar_tlevel); CHECK_HIDDEN_SUBSCRIPT_AND_RETURN(found, gv_altkey, is_hidden); IF_SN_DISALLOWED_AND_NO_SPAN_IN_DB(return found); assert(found && is_hidden); SAVE_GV_CURRKEY(save_currkey); if (!dollar_tlevel) { sn_tpwrapped = TRUE; op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0); ESTABLISH_NORET(gvcst_query_ch, est_first_pass); GVCST_ROOT_SEARCH_AND_PREP(est_first_pass); } else sn_tpwrapped = FALSE; for (i = 0; i <= MAX_GVSUBSCRIPTS; i++) { INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, (gtm_uint64_t) -1); found = gvcst_query2(); CHECK_HIDDEN_SUBSCRIPT_AND_BREAK(found, gv_altkey, is_hidden); assert(found && is_hidden); /* Replace last subscript to be the highest possible hidden subscript so another * gvcst_query2 will give us the next non-hidden subscript. */ REPLACE_HIDDEN_SUB_TO_HIGHEST(gv_altkey, gv_currkey); /* uses gv_altkey to modify gv_currkey */ } if (sn_tpwrapped) { op_tcommit(); REVERT; /* remove our condition handler */ } RESTORE_GV_CURRKEY(save_currkey); assert(save_dollar_tlevel == dollar_tlevel); # endif return found; }
boolean_t gvcst_queryget(mval *val) { bool found, is_hidden, is_dummy = FALSE, sn_tpwrapped; boolean_t est_first_pass; char save_currkey[SIZEOF(gv_key) + DBKEYSIZE(MAX_KEY_SZ)]; gv_key *save_gv_currkey; int save_dollar_tlevel; DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel); found = gvcst_queryget2(val, NULL); # ifdef UNIX assert(save_dollar_tlevel == dollar_tlevel); CHECK_HIDDEN_SUBSCRIPT(gv_altkey, is_hidden); if (found && IS_SN_DUMMY(val->str.len, val->str.addr)) is_dummy = TRUE; if (!found || (!is_dummy && !is_hidden)) return found; IF_SN_DISALLOWED_AND_NO_SPAN_IN_DB(return found); SAVE_GV_CURRKEY; if (!dollar_tlevel) { sn_tpwrapped = TRUE; op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0); ESTABLISH_NORET(gvcst_queryget_ch, est_first_pass); GVCST_ROOT_SEARCH_AND_PREP(est_first_pass); } else sn_tpwrapped = FALSE; found = gvcst_query(); COPY_KEY(gv_currkey, gv_altkey); /* set gv_currkey to gv_altkey */ found = gvcst_get(val); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_get, (gtm_uint64_t) -1); /* only counted externally as one get */ INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, (gtm_uint64_t) -1); if (sn_tpwrapped) { op_tcommit(); REVERT; /* remove our condition handler */ } RESTORE_GV_CURRKEY; assert(save_dollar_tlevel == dollar_tlevel); # endif return found; }
boolean_t gvcst_query2(void) { boolean_t found, two_histories; enum cdb_sc status; blk_hdr_ptr_t bp; rec_hdr_ptr_t rp; unsigned char *c1, *c2; srch_blk_status *bh; srch_hist *rt_history; T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYFAIL); assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { two_histories = FALSE; # if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYFAIL == gtm_white_box_test_case_number)) { t_retry(cdb_sc_blknumerr); continue; } # endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0))) { found = TRUE; bh = &gv_target->hist.h[0]; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if (rp >= (rec_hdr_ptr_t)CST_TOB(bp)) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_endtree == status) /* end of tree */ { found = FALSE; two_histories = FALSE; /* second history not valid */ } else if (cdb_sc_normal != status) { t_retry(status); continue; } else { bh = &rt_history->h[0]; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } } if (found) { /* !found indicates that the end of tree has been reached (see call to * gvcst_rtsib). If there is no more tree, don't bother doing expansion. */ status = gvcst_expand_curr_key(bh, gv_currkey, gv_altkey); if (cdb_sc_normal != status) { t_retry(status); continue; } } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(!two_histories ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } assert(cs_data == cs_addrs->hdr); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, 1); if (found) { c1 = &gv_altkey->base[0]; c2 = &gv_currkey->base[0]; for ( ; *c2; ) { if (*c2++ != *c1++) break; } if (!*c2 && !*c1) return TRUE; } return FALSE; } t_retry(status); } }
mint gvcst_data(void) { blk_hdr_ptr_t bp; enum cdb_sc status; mint val; rec_hdr_ptr_t rp; unsigned short rec_size; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t b_top; assert((gv_target->root < cs_addrs->ti->total_blks) || (0 < dollar_tlevel)); T_BEGIN_READ_NONTP_OR_TP(ERR_GVDATAFAIL); assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { rt_history = gv_target->alt_hist; rt_history->h[0].blk_num = 0; if ((status = gvcst_search(gv_currkey, NULL)) != cdb_sc_normal) { t_retry(status); continue; } bh = gv_target->hist.h; bp = (blk_hdr_ptr_t)bh->buffaddr; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); b_top = bh->buffaddr + bp->bsiz; val = 0; if (gv_currkey->end + 1 == bh->curr_rec.match) val = 1; else if (bh->curr_rec.match >= gv_currkey->end) val = 10; if (1 == val || rp == (rec_hdr_ptr_t)b_top) { GET_USHORT(rec_size, &rp->rsiz); if (rp == (rec_hdr_ptr_t)b_top || (sm_uc_ptr_t)rp + rec_size == b_top) { if (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0))) { if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h)))) { t_retry(status); continue; } if (rt_history->h[0].curr_rec.match >= gv_currkey->end) val += 10; } } else { if ((sm_uc_ptr_t)rp + rec_size > b_top) { t_retry(cdb_sc_rmisalign); continue; } rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rec_size); if (rp->cmpc >= gv_currkey->end) val += 10; } } if (0 == dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, 0 == rt_history->h[0].blk_num ? NULL : rt_history)) continue; } else { status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_data, 1); return val; } }
boolean_t gvcst_order2(void) { blk_hdr_ptr_t bp; boolean_t found, two_histories; enum cdb_sc status; rec_hdr_ptr_t rp; unsigned short rec_size; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t c1, c2, ctop, alt_top; int tmp_cmpc; T_BEGIN_READ_NONTP_OR_TP(ERR_GVORDERFAIL); for (;;) { assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ two_histories = FALSE; #if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVORDERFAIL == gtm_white_box_test_case_number)) { status = cdb_sc_blknumerr; t_retry(status); continue; } #endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL))) { found = TRUE; bh = gv_target->hist.h; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if ((rec_hdr_ptr_t)CST_TOB(bp) <= rp) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_normal == status) { bh = rt_history->h; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } else { if (cdb_sc_endtree == status) { found = FALSE; two_histories = FALSE; /* second history not valid */ } else { t_retry(status); continue; } } } if (found) { assert(gv_altkey->top == gv_currkey->top); assert(gv_altkey->top == gv_keysize); assert(gv_altkey->end < gv_altkey->top); /* store new subscipt */ c1 = gv_altkey->base; alt_top = gv_altkey->base + gv_altkey->top - 1; /* Make alt_top one less than gv_altkey->top to allow double-null at end of a key-name */ /* 4/17/96 * HP compiler bug work-around. The original statement was * c2 = (unsigned char *)CST_BOK(rp) + bh->curr_rec.match - rp->cmpc; * * ...but this was sometimes compiled incorrectly (the lower 4 bits * of rp->cmpc, sign extended, were subtracted from bh->curr_rec.match). * I separated out the subtraction of rp->cmpc. * * -VTF. */ c2 = (sm_uc_ptr_t)CST_BOK(rp) + bh->curr_rec.match; memcpy(c1, gv_currkey->base, bh->curr_rec.match); c1 += bh->curr_rec.match; c2 -= EVAL_CMPC(rp); GET_USHORT(rec_size, &rp->rsiz); ctop = (sm_uc_ptr_t)rp + rec_size; for (;;) { if (c2 >= ctop || c1 >= alt_top) { assert(CDB_STAGNATE > t_tries); status = cdb_sc_rmisalign; goto restart; /* goto needed because of nested FOR loop */ } if (0 == (*c1++ = *c2++)) { *c1 = 0; break; } } gv_altkey->end = c1 - gv_altkey->base; assert(gv_altkey->end < gv_altkey->top); } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, two_histories ? rt_history : NULL, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(two_histories ? rt_history : NULL); if (cdb_sc_normal != status) { t_retry(status); continue; } } assert(cs_data == cs_addrs->hdr); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, 1); return (found && (bh->curr_rec.match >= gv_currkey->prev)); } restart: t_retry(status); } }
mint gvcst_data(void) { blk_hdr_ptr_t bp; boolean_t do_rtsib; enum cdb_sc status; mint val; rec_hdr_ptr_t rp; unsigned short match, rsiz; srch_blk_status *bh; srch_hist *rt_history; sm_uc_ptr_t b_top; assert((gv_target->root < cs_addrs->ti->total_blks) || dollar_tlevel); T_BEGIN_READ_NONTP_OR_TP(ERR_GVDATAFAIL); assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { /* The following code is duplicated in gvcst_dataget. Any changes here might need to be reflected there as well */ rt_history = gv_target->alt_hist; rt_history->h[0].blk_num = 0; if (cdb_sc_normal != (status = gvcst_search(gv_currkey, NULL))) { t_retry(status); continue; } bh = gv_target->hist.h; bp = (blk_hdr_ptr_t)bh->buffaddr; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); b_top = bh->buffaddr + bp->bsiz; match = bh->curr_rec.match; do_rtsib = FALSE; if (gv_currkey->end + 1 == match) { val = 1; GET_USHORT(rsiz, &rp->rsiz); rp = (rec_hdr_ptr_t)((sm_uc_ptr_t)rp + rsiz); if ((sm_uc_ptr_t)rp > b_top) { t_retry(cdb_sc_rmisalign); continue; } else if ((sm_uc_ptr_t)rp == b_top) do_rtsib = TRUE; else if (rp->cmpc >= gv_currkey->end) val += 10; } else if (match >= gv_currkey->end) val = 10; else { val = 0; if (rp == (rec_hdr_ptr_t)b_top) do_rtsib = TRUE; } if (do_rtsib && (cdb_sc_endtree != (status = gvcst_rtsib(rt_history, 0)))) { if ((cdb_sc_normal != status) || (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, rt_history->h)))) { t_retry(status); continue; } if (rt_history->h[0].curr_rec.match >= gv_currkey->end) { assert(1 >= val); val += 10; } } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, 0 == rt_history->h[0].blk_num ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(0 == rt_history->h[0].blk_num ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_data, 1); return val; } }
boolean_t gvcst_queryget2(mval *val, unsigned char *sn_ptr) { blk_hdr_ptr_t bp; boolean_t found, two_histories; enum cdb_sc status; int rsiz, key_size, data_len; rec_hdr_ptr_t rp; srch_blk_status *bh; srch_hist *rt_history; unsigned short temp_ushort; int tmp_cmpc; DEBUG_ONLY(unsigned char *save_strp = NULL); T_BEGIN_READ_NONTP_OR_TP(ERR_GVQUERYGETFAIL); assert((CDB_STAGNATE > t_tries) || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ for (;;) { two_histories = FALSE; #if defined(DEBUG) && defined(UNIX) if (gtm_white_box_test_case_enabled && (WBTEST_ANTIFREEZE_GVQUERYGETFAIL == gtm_white_box_test_case_number)) { status = cdb_sc_blknumerr; t_retry(status); continue; } #endif if (cdb_sc_normal == (status = gvcst_search(gv_currkey, 0))) { found = TRUE; bh = &gv_target->hist.h[0]; rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; if (rp >= (rec_hdr_ptr_t)CST_TOB(bp)) { two_histories = TRUE; rt_history = gv_target->alt_hist; status = gvcst_rtsib(rt_history, 0); if (cdb_sc_endtree == status) /* end of tree */ { found = FALSE; two_histories = FALSE; /* second history not valid */ } else if (cdb_sc_normal != status) { t_retry(status); continue; } else { bh = &rt_history->h[0]; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->curr_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; } } /* !found indicates that the end of tree has been reached (see call to * gvcst_rtsib). If there is no more tree, don't bother doing expansion. */ if (found) { status = gvcst_expand_key((blk_hdr_ptr_t)bh->buffaddr, (int4)((sm_uc_ptr_t)rp - bh->buffaddr), gv_altkey); if (cdb_sc_normal != status) { t_retry(status); continue; } key_size = gv_altkey->end + 1; GET_RSIZ(rsiz, rp); data_len = rsiz + EVAL_CMPC(rp) - SIZEOF(rec_hdr) - key_size; if (data_len < 0 || (sm_uc_ptr_t)rp + rsiz > (sm_uc_ptr_t)bp + ((blk_hdr_ptr_t)bp)->bsiz) { assert(CDB_STAGNATE > t_tries); t_retry(cdb_sc_rmisalign1); continue; } ENSURE_STP_FREE_SPACE(data_len); DEBUG_ONLY ( if (!save_strp) save_strp = stringpool.free); assert(stringpool.top - stringpool.free >= data_len); memcpy(stringpool.free, (sm_uc_ptr_t)rp + rsiz - data_len, data_len); /* Assumption: t_end/tp_hist will never cause stp_gcol() call BYPASSOK */ } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, !two_histories ? NULL : rt_history, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(!two_histories ? NULL : rt_history); if (cdb_sc_normal != status) { t_retry(status); continue; } } if (found) { DEBUG_ONLY(assert(save_strp == stringpool.free)); /* Process val first. Already copied to string pool. */ val->mvtype = MV_STR; val->str.addr = (char *)stringpool.free; val->str.len = data_len; stringpool.free += data_len; INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_get, 1); } return found; } t_retry(status); }
bool gvcst_zprevious(void) { static gv_key *zprev_temp_key; static int4 zprev_temp_keysize = 0; blk_hdr_ptr_t bp; bool found, two_histories; enum cdb_sc status; rec_hdr_ptr_t rp; unsigned char *c1, *c2, *ctop; srch_blk_status *bh; srch_hist *lft_history; T_BEGIN_READ_NONTP_OR_TP(ERR_GVORDERFAIL); for (;;) { assert(t_tries < CDB_STAGNATE || cs_addrs->now_crit); /* we better hold crit in the final retry (TP & non-TP) */ two_histories = FALSE; if (cdb_sc_normal == (status = gvcst_search(gv_currkey, NULL))) { found = TRUE; bh = gv_target->hist.h; if (0 == bh->prev_rec.offset) { two_histories = TRUE; lft_history = gv_target->alt_hist; status = gvcst_lftsib(lft_history); if (cdb_sc_normal == status) { bh = lft_history->h; if (cdb_sc_normal != (status = gvcst_search_blk(gv_currkey, bh))) { t_retry(status); continue; } } else if (cdb_sc_endtree == status) { found = FALSE; two_histories = FALSE; /* second history not valid */ } else { t_retry(status); continue; } } if (found) { /* store new subscipt */ assert(gv_altkey->top == gv_currkey->top); assert(gv_altkey->top == gv_keysize); assert(gv_currkey->end < gv_currkey->top); rp = (rec_hdr_ptr_t)(bh->buffaddr + bh->prev_rec.offset); bp = (blk_hdr_ptr_t)bh->buffaddr; c1 = gv_altkey->base; memcpy(c1, gv_currkey->base, bh->prev_rec.match); c1 += bh->prev_rec.match; assert(zprev_temp_keysize <= gv_keysize); if (zprev_temp_keysize < gv_keysize) { zprev_temp_keysize = gv_keysize; GVKEY_INIT(zprev_temp_key, zprev_temp_keysize); } assert(zprev_temp_key->top >= gv_currkey->top); if (cdb_sc_normal != (status = gvcst_expand_key((blk_hdr_ptr_t)bh->buffaddr, bh->prev_rec.offset, zprev_temp_key))) { t_retry(status); continue; } if ((zprev_temp_key->end < gv_currkey->end) && (zprev_temp_key->end <= gv_currkey->prev)) found = FALSE; else { c2 = zprev_temp_key->base + bh->prev_rec.match; ctop = zprev_temp_key->base + zprev_temp_key->end; for (;;) { if (c2 >= ctop) { assert(CDB_STAGNATE > t_tries); status = cdb_sc_rmisalign; goto restart; /* goto needed because of nested FOR loop */ } if (0 == (*c1++ = *c2++)) { *c1 = 0; break; } } } gv_altkey->end = c1 - gv_altkey->base; assert(gv_altkey->end < gv_altkey->top); } if (!dollar_tlevel) { if ((trans_num)0 == t_end(&gv_target->hist, two_histories ? lft_history : NULL, TN_NOT_SPECIFIED)) continue; } else { status = tp_hist(two_histories ? lft_history : NULL); if (cdb_sc_normal != status) { t_retry(status); continue; } } assert(cs_data == cs_addrs->hdr); INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_zprev, 1); return (found && (bh->prev_rec.match >= gv_currkey->prev)); } restart: t_retry(status); } }
uint4 jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size) { file_control *fc; boolean_t need_extend; jnl_buffer_ptr_t jb; jnl_create_info jnl_info; jnl_file_header *header; unsigned char hdr_buff[REAL_JNL_HDR_LEN + MAX_IO_BLOCK_SIZE]; uint4 new_alq; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char prev_jnl_fn[JNL_NAME_SIZE]; uint4 jnl_status = 0, status; int new_blocks, warn_blocks, result; gtm_uint64_t avail_blocks; uint4 aligned_tot_jrec_size, count; uint4 jnl_fs_block_size, read_write_size; DCL_THREADGBL_ACCESS; switch(jpc->region->dyn.addr->acc_meth) { case dba_mm: case dba_bg: csa = &FILE_INFO(jpc->region)->s_addrs; break; default: GTMASSERT; } csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state))); assert(&FILE_INFO(jpc->region)->s_addrs == csa); assert(csa->jnl_state == csd->jnl_state); assertpro(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && (!JNL_FILE_SWITCHED(jpc))); /* crit and messing with the journal file - how could it have vanished? */ if (!csd->jnl_deq || (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit)) { assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq); assert(csd->jnl_alq == csd->autoswitchlimit); new_blocks = csd->jnl_alq; } else /* May cause extension of csd->jnl_deq * n blocks where n > 0 */ new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq); jpc->status = SS_NORMAL; jb = jpc->jnl_buff; assert(0 <= new_blocks); DEBUG_ONLY(count = 0); for (need_extend = (0 != new_blocks); need_extend; ) { DEBUG_ONLY(count++); /* usually we will do the loop just once where we do the file extension. * rarely we might need to do an autoswitch instead after which again rarely * we might need to do an extension on the new journal to fit in the transaction's journal requirements. * therefore we should do this loop a maximum of twice. hence the assert below. */ assert(count <= 2); need_extend = FALSE; if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE))) { warn_blocks = (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit) ? ((csd->jnl_deq > csd->autoswitchlimit) ? csd->jnl_deq : csd->autoswitchlimit) : new_blocks; if ((warn_blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (new_blocks > avail_blocks) { /* If we cannot satisfy the request, it is an error, unless the anticipatory freeze * scheme is in effect in which case, we will assume space is available even if * it is not and go ahead with writes to the disk. If the writes fail with ENOSPC * we will freeze the instance and wait for space to become available and keep * retrying the writes. Therefore, we make the NOSPACEEXT a warning in this case. */ SETUP_THREADGBL_ACCESS; if (!ANTICIPATORY_FREEZE_ENABLED(csa)) { send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); new_blocks = 0; jpc->status = SS_NORMAL; break; } else send_msg(VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); } else send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd), (avail_blocks - warn_blocks)); } } else send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status); new_alq = jb->filesize + new_blocks; /* ensure current journal file size is well within autoswitchlimit --> design constraint */ assert(csd->autoswitchlimit >= jb->filesize); if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks))) /* close to max */ send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize); if (csd->autoswitchlimit < new_alq) { /* Reached max, need to autoswitch */ /* Ensure new journal file can hold the entire current transaction's journal record requirements */ assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size)); memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(gv_cur_region, &jnl_info); assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc))); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* flush the cache and jnl-buffer-contents to current journal file before * switching to a new journal. Set a global variable in_jnl_file_autoswitch * so jnl_write can know not to do the padding check. But because this is a global * variable, we also need to make sure it is reset in case of errors during the * autoswitch (or else calls to jnl_write after we are out of the autoswitch logic * will continue to incorrectly not do the padding check. Hence a condition handler. */ assert(!in_jnl_file_autoswitch); in_jnl_file_autoswitch = TRUE; /* Also make sure time is not changed. This way if "jnl_write" as part of writing a * journal record invokes jnl_file_extend, when the autoswitch is done and writing * of the parent jnl_write resumes, we want it to continue with the same timestamp * and not have to reset its time (non-trivial task) to reflect any changes since then. */ assert(!jgbl.save_dont_reset_gbl_jrec_time); jgbl.save_dont_reset_gbl_jrec_time = jgbl.dont_reset_gbl_jrec_time; jgbl.dont_reset_gbl_jrec_time = TRUE; /* Establish a condition handler so we reset a few global variables that have * temporarily been modified in case of errors inside wcs_flu/jnl_file_close. */ ESTABLISH_RET(jnl_file_autoswitch_ch, EXIT_ERR); /* It is possible we still have not written a PINI record in this journal file * (e.g. mupip extend saw the need to do jnl_file_extend inside jnl_write while * trying to write a PINI record). Write a PINI record in that case before closing * the journal file that way the EOF record will have a non-zero pini_addr. */ if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SPEEDUP_NOBEFORE); jnl_file_close(gv_cur_region, TRUE, TRUE); REVERT; in_jnl_file_autoswitch = FALSE; jgbl.dont_reset_gbl_jrec_time = jgbl.save_dont_reset_gbl_jrec_time; DEBUG_ONLY(jgbl.save_dont_reset_gbl_jrec_time = FALSE); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In MM, wcs_flu() can remap an extended DB, so reset csd to be sure */ } else { if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); } assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr)); assert(jgbl.forw_phase_recovery || (NULL == jgbl.mur_pini_addr_reset_fnptr)); if (NULL != jgbl.mur_pini_addr_reset_fnptr) (*jgbl.mur_pini_addr_reset_fnptr)(csa); assert(!jnl_info.no_rename); assert(!jnl_info.no_prev_link); if (EXIT_NRM == cre_jnl_file(&jnl_info)) { assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len)); assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0'); assert(csd->jnl_file_len == jnl_info.jnl_len); assert(csd->jnl_buffer_size == jnl_info.buffer); assert(csd->jnl_alq == jnl_info.alloc); assert(csd->jnl_deq == jnl_info.extend); assert(csd->jnl_before_image == jnl_info.before_images); csd->jnl_checksum = jnl_info.checksum; csd->jnl_eovtn = csd->trans_hist.curr_tn; send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREAT, 2, JNL_LEN_STR(csd)); fc = gv_cur_region->dyn.addr->file_cntl; fc->op = FC_WRITE; fc->op_buff = (sm_uc_ptr_t)csd; fc->op_len = SGMNT_HDR_LEN; fc->op_pos = 1; status = dbfilop(fc); if (SS_NORMAL != status) send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); assert(JNL_ENABLED(csa)); /* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */ jnl_status = jnl_ensure_open(); /* sets jpc->status */ if (0 != jnl_status) { if (jpc->status) rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); } assert(jb->filesize == csd->jnl_alq); if (csd->jnl_alq + csd->jnl_deq <= csd->autoswitchlimit) { aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size), csd->jnl_alq, csd->jnl_deq); if (aligned_tot_jrec_size > csd->jnl_alq) { /* need to extend more than initial allocation in the new journal file * to accommodate the current transaction. */ new_blocks = aligned_tot_jrec_size - csd->jnl_alq; assert(new_blocks); assert(0 == new_blocks % csd->jnl_deq); need_extend = TRUE; } } } else { send_msg(VARLSTCNT(4) ERR_JNLNOCREATE, 2, JNL_LEN_STR(csd)); jpc->status = ERR_JNLNOCREATE; new_blocks = -1; } } else { assert(!need_extend); /* ensure we won't go through the for loop again */ /* Virtually extend currently used journal file */ jnl_fs_block_size = jb->fs_block_size; header = (jnl_file_header *)(ROUND_UP2((uintszofptr_t)hdr_buff, jnl_fs_block_size)); read_write_size = ROUND_UP2(REAL_JNL_HDR_LEN, jnl_fs_block_size); assert((unsigned char *)header + read_write_size <= ARRAYTOP(hdr_buff)); DO_FILE_READ(jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) { assert(FALSE); rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status); } assert((header->virtual_size + new_blocks) == new_alq); jb->filesize = new_alq; /* Actually this is virtual file size blocks */ header->virtual_size = new_alq; JNL_DO_FILE_WRITE(csa, csd->jnl_file_name, jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) { assert(FALSE); rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status); } } if (0 >= new_blocks) break; } if (0 < new_blocks) { INCR_GVSTATS_COUNTER(csa, csa->nl, n_jnl_extends, 1); return EXIT_NRM; } jpc->status = ERR_JNLREADEOF; jnl_file_lost(jpc, ERR_JNLEXTEND); return EXIT_ERR; }