unix_db_info *udi; int4 size, save_errno; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; sm_uc_ptr_t buff; DEBUG_ONLY( blk_hdr_ptr_t blk_hdr; ) udi = (unix_db_info *)(reg->dyn.addr->file_cntl->file_info); csa = &udi->s_addrs; csd = csa->hdr; assert(NULL != csd); assert(cr); assert(cr->buffaddr); buff = GDS_ANY_REL2ABS(csa, cr->buffaddr); DEBUG_ONLY( /* Check GDS block that is about to be written. Dont do this for DSE as it may intentionally create bad blocks */ if (!dse_running) { blk_hdr = (blk_hdr_ptr_t)buff; assert((unsigned)GDSVLAST > (unsigned)blk_hdr->bver); assert((LCL_MAP_LEVL == blk_hdr->levl) || ((unsigned)MAX_BT_DEPTH > (unsigned)blk_hdr->levl)); assert((unsigned)csd->blk_size >= (unsigned)blk_hdr->bsiz); assert(csd->trans_hist.curr_tn >= blk_hdr->tn); } ) assert(((blk_hdr_ptr_t)buff)->bver); /* GDSV4 (0) version uses this field as a block length so should always be > 0 */ assert(0 == fast_lock_count); /* ensure the static reformat buffer is not being used currently */ ++fast_lock_count; /* Prevents interrupt from using reformat buffer while we have it */ /* reformat_buffer_in_use should always be incremented only AFTER incrementing fast_lock_count
void dse_chng_bhead(void) { block_id blk; int4 x; trans_num tn; cache_rec_ptr_t cr; blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ boolean_t ismap; boolean_t chng_blk; boolean_t was_crit; boolean_t was_hold_onto_crit; uint4 mapsize; srch_blk_status blkhist; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; # ifdef GTM_CRYPT int req_enc_blk_size; int crypt_status; blk_hdr_ptr_t bp, save_bp, save_old_block; # endif error_def(ERR_DSEBLKRDFAIL); error_def(ERR_DSEFAIL); error_def(ERR_DBRDONLY); if (gv_cur_region->read_only) rts_error(VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region)); CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ chng_blk = FALSE; csa = cs_addrs; if (cli_present("BLOCK") == CLI_PRESENT) { if (!cli_get_hex("BLOCK", (uint4 *)&blk)) return; if (blk < 0 || blk > csa->ti->total_blks) { util_out_print("Error: invalid block number.", TRUE); return; } patch_curr_blk = blk; } csd = csa->hdr; assert(csd == cs_data); blk_size = csd->blk_size; ismap = (patch_curr_blk / csd->bplmap * csd->bplmap == patch_curr_blk); mapsize = BM_SIZE(csd->bplmap); t_begin_crit(ERR_DSEFAIL); blkhist.blk_num = patch_curr_blk; if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) rts_error(VARLSTCNT(1) ERR_DSEBLKRDFAIL); new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr; if (cli_present("LEVEL") == CLI_PRESENT) { if (!cli_get_hex("LEVEL", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && (unsigned char)x != LCL_MAP_LEVL) { util_out_print("Error: invalid level for a bit map block.", TRUE); t_abort(gv_cur_region, csa); return; } if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1)) { util_out_print("Error: invalid level.", TRUE); t_abort(gv_cur_region, csa); return; } new_hdr.levl = (unsigned char)x; chng_blk = TRUE; if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; } if (cli_present("BSIZ") == CLI_PRESENT) { if (!cli_get_hex("BSIZ", (uint4 *)&x)) { t_abort(gv_cur_region, csa); return; } if (ismap && x != mapsize) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } else if (x < SIZEOF(blk_hdr) || x > blk_size) { util_out_print("Error: invalid bsiz.", TRUE); t_abort(gv_cur_region, csa); return; } chng_blk = TRUE; new_hdr.bsiz = x; } if (!chng_blk) t_abort(gv_cur_region, csa); else { BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); if (!BLK_FINI(bs_ptr, bs1)) { util_out_print("Error: bad block build.", TRUE); t_abort(gv_cur_region, csa); return; } t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN); BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, csa->ti->curr_tn); t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED); } if (cli_present("TN") == CLI_PRESENT) { if (!cli_get_hex64("TN", &tn)) return; was_crit = csa->now_crit; t_begin_crit(ERR_DSEFAIL); CHECK_TN(csa, csd, csd->trans_hist.curr_tn); /* can issue rts_error TNTOOLARGE */ assert(csa->ti->early_tn == csa->ti->curr_tn); if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr))) { util_out_print("Error: Unable to read buffer.", TRUE); t_abort(gv_cur_region, csa); return; } if (new_hdr.bsiz < SIZEOF(blk_hdr)) new_hdr.bsiz = SIZEOF(blk_hdr); if (new_hdr.bsiz > blk_size) new_hdr.bsiz = blk_size; BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(&blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN); /* Pass the desired tn as argument to bg_update/mm_update below */ BUILD_AIMG_IF_JNL_ENABLED(csd, non_tp_jfb_buff_ptr, tn); was_hold_onto_crit = csa->hold_onto_crit; csa->hold_onto_crit = TRUE; t_end(&dummy_hist, NULL, tn); # ifdef GTM_CRYPT if (csd->is_encrypted && (tn < csa->ti->curr_tn)) { /* BG and db encryption is enabled and the DSE update caused the block-header to potentially have a tn * that is LESS than what it had before. At this point, the global buffer (corresponding to blkhist.blk_num) * reflects the contents of the block AFTER the dse update (bg_update would have touched this) whereas * the corresponding encryption global buffer reflects the contents of the block BEFORE the update. * Normally wcs_wtstart takes care of propagating the tn update from the regular global buffer to the * corresponding encryption buffer. But if before it gets a chance, let us say a process goes to t_end * as part of a subsequent transaction and updates this same block. Since the blk-hdr-tn potentially * decreased, it is possible that the PBLK writing check (comparing blk-hdr-tn with the epoch_tn) decides * to write a PBLK for this block (even though a PBLK was already written for this block as part of a * previous DSE CHANGE -BL -TN in the same epoch). In this case, since the db is encrypted, the logic * will assume there were no updates to this block since the last time wcs_wtstart updated the encryption * buffer and therefore use that to write the pblk, which is incorrect since it does not yet contain the * tn update. The consequence of this is would be writing an older before-image PBLK) record to the * journal file. To prevent this situation, we update the encryption buffer here (before releasing crit) * using logic like that in wcs_wtstart to ensure it is in sync with the regular global buffer. * Note: * Although we use cw_set[0] to access the global buffer corresponding to the block number being updated, * cw_set_depth at this point is 0 because t_end resets it. This is considered safe since cw_set is a * static array (as opposed to malloc'ed memory) and hence is always available and valid until it gets * overwritten by subsequent updates. */ bp = (blk_hdr_ptr_t)GDS_ANY_REL2ABS(csa, cw_set[0].cr->buffaddr); DBG_ENSURE_PTR_IS_VALID_GLOBUFF(csa, csd, (sm_uc_ptr_t)bp); save_bp = (blk_hdr_ptr_t)GDS_ANY_ENCRYPTGLOBUF(bp, csa); DBG_ENSURE_PTR_IS_VALID_ENCTWINGLOBUFF(csa, csd, (sm_uc_ptr_t)save_bp); assert((bp->bsiz <= csd->blk_size) && (bp->bsiz >= SIZEOF(*bp))); req_enc_blk_size = MIN(csd->blk_size, bp->bsiz) - SIZEOF(*bp); if (BLK_NEEDS_ENCRYPTION(bp->levl, req_enc_blk_size)) { ASSERT_ENCRYPTION_INITIALIZED; memcpy(save_bp, bp, SIZEOF(blk_hdr)); GTMCRYPT_ENCODE_FAST(csa->encr_key_handle, (char *)(bp + 1), req_enc_blk_size, (char *)(save_bp + 1), crypt_status); if (0 != crypt_status) GC_GTM_PUTMSG(crypt_status, gv_cur_region->dyn.addr->fname); } else memcpy(save_bp, bp, bp->bsiz); } # endif if (!was_hold_onto_crit) csa->hold_onto_crit = FALSE; if (!was_crit) rel_crit(gv_cur_region); if (unhandled_stale_timer_pop) process_deferred_stale(); } return; }
bt_rec_ptr_t bt_put(gd_region *reg, int4 block) { bt_rec_ptr_t bt, q0, q1, hdr; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; cache_rec_ptr_t cr; th_rec_ptr_t th; trans_num lcl_tn; uint4 lcnt; csa = (sgmnt_addrs *)&FILE_INFO(reg)->s_addrs; csd = csa->hdr; assert(csa->now_crit || csd->clustered); assert(dba_mm != csa->hdr->acc_meth); lcl_tn = csa->ti->curr_tn; hdr = csa->bt_header + (block % csd->bt_buckets); assert(BT_QUEHEAD == hdr->blk); for (lcnt = 0, bt = (bt_rec_ptr_t)((sm_uc_ptr_t)hdr + hdr->blkque.fl); ; bt = (bt_rec_ptr_t)((sm_uc_ptr_t)bt + bt->blkque.fl), lcnt++) { if (BT_QUEHEAD == bt->blk) { /* there is no matching bt */ assert(bt == hdr); bt = (bt_rec_ptr_t)((sm_uc_ptr_t)(csa->th_base) + csa->th_base->tnque.fl - SIZEOF(th->tnque)); if (CR_NOTVALID != bt->cache_index) { /* the oldest bt is still valid */ assert(!in_wcs_recover); cr = (cache_rec_ptr_t)GDS_ANY_REL2ABS(csa, bt->cache_index); if (cr->dirty) { /* get it written so it can be reused */ BG_TRACE_PRO_ANY(csa, bt_put_flush_dirty); if (FALSE == wcs_get_space(reg, 0, cr)) { assert(csa->nl->wc_blocked); /* only reason we currently know * why wcs_get_space could fail */ assert(gtm_white_box_test_case_enabled); BG_TRACE_PRO_ANY(csa, wcb_bt_put); send_msg(VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_bt_put"), process_id, &lcl_tn, DB_LEN_STR(reg)); return NULL; } } bt->cache_index = CR_NOTVALID; cr->bt_index = 0; } q0 = (bt_rec_ptr_t)((sm_uc_ptr_t)bt + bt->blkque.fl); q1 = (bt_rec_ptr_t)remqt((que_ent_ptr_t)q0); if (EMPTY_QUEUE == (sm_long_t)q1) rts_error(VARLSTCNT(3) ERR_BTFAIL, 1, 1); bt->blk = block; bt->killtn = lcl_tn; insqt((que_ent_ptr_t)bt, (que_ent_ptr_t)hdr); th = (th_rec_ptr_t)remqh((que_ent_ptr_t)csa->th_base); if (EMPTY_QUEUE == (sm_long_t)th) GTMASSERT; break; } if (bt->blk == block) { /* bt_put should never be called twice for the same block with the same lcl_tn. This is because * t_end/tp_tend update every block only once as part of each update transaction. Assert this. * The two exceptions are * a) Forward journal recovery which simulates a 2-phase M-kill where the same block * could get updated in both phases (example bitmap block gets updated for blocks created * within the TP transaction as well as for blocks that are freed up in the 2nd phase of * the M-kill) with the same transaction number. This is because although GT.M would have * updated the same block with different transaction numbers in the two phases, forward * recovery will update it with the same tn and instead increment the db tn on seeing the * following INCTN journal record(s). * b) Cache recovery (wcs_recover). It could call bt_put more than once for the same block * and potentially with the same tn. This is because the state of the queues is questionable * and there could be more than one cache record for a given block number. */ assert(in_wcs_recover || (bt->tn < lcl_tn) || (jgbl.forw_phase_recovery && !JNL_ENABLED(csa))); q0 = (bt_rec_ptr_t)((sm_uc_ptr_t)bt + bt->tnque.fl); th = (th_rec_ptr_t)remqt((que_ent_ptr_t)((sm_uc_ptr_t)q0 + SIZEOF(th->tnque))); if (EMPTY_QUEUE == (sm_long_t)th) GTMASSERT; break; } if (0 == bt->blkque.fl) rts_error(VARLSTCNT(3) ERR_BTFAIL, 1, 2); if (lcnt >= csd->n_bts) rts_error(VARLSTCNT(3) ERR_BTFAIL, 1, 3); } insqt((que_ent_ptr_t)th, (que_ent_ptr_t)csa->th_base); bt->tn = lcl_tn; return bt; }
sm_uc_ptr_t t_qread(block_id blk, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr_out) /* cycle is used in t_end to detect if the buffer has been refreshed since the t_qread */ { uint4 status, duint4, blocking_pid; cache_rec_ptr_t cr; bt_rec_ptr_t bt; bool clustered, was_crit; int dummy, lcnt, ocnt; cw_set_element *cse; off_chain chain1; register sgmnt_addrs *csa; register sgmnt_data_ptr_t csd; int4 dummy_errno; boolean_t already_built, is_mm, reset_first_tp_srch_status, set_wc_blocked; error_def(ERR_DBFILERR); error_def(ERR_BUFOWNERSTUCK); first_tp_srch_status = NULL; reset_first_tp_srch_status = FALSE; csa = cs_addrs; csd = csa->hdr; INCR_DB_CSH_COUNTER(csa, n_t_qreads, 1); is_mm = (dba_mm == csd->acc_meth); assert((t_tries < CDB_STAGNATE) || csa->now_crit); if (0 < dollar_tlevel) { assert(sgm_info_ptr); if (0 != sgm_info_ptr->cw_set_depth) { chain1 = *(off_chain *)&blk; if (1 == chain1.flag) { assert(sgm_info_ptr->cw_set_depth); if ((int)chain1.cw_index < sgm_info_ptr->cw_set_depth) tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain1.cw_index, &cse); else { assert(FALSE == csa->now_crit); rdfail_detail = cdb_sc_blknumerr; return (sm_uc_ptr_t)NULL; } } else { first_tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &duint4); ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr); cse = first_tp_srch_status ? first_tp_srch_status->ptr : NULL; } assert(!cse || !cse->high_tlevel); if (cse) { /* transaction has modified the sought after block */ assert(gds_t_writemap != cse->mode); if (FALSE == cse->done) { /* out of date, so make it current */ already_built = (NULL != cse->new_buff); gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, 0); assert(cse->blk_target); if (!already_built && !chain1.flag) { assert(first_tp_srch_status && (is_mm || first_tp_srch_status->cr) && first_tp_srch_status->buffaddr); if (first_tp_srch_status->tn <= ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_blkmod; /* should this be something else */ TP_TRACE_HIST_MOD(blk, gv_target, tp_blkmod_t_qread, cs_data, first_tp_srch_status->tn, ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->tn, ((blk_hdr_ptr_t)(first_tp_srch_status->buffaddr))->levl); return (sm_uc_ptr_t)NULL; } if ((!is_mm) && (first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle || first_tp_srch_status->blk_num != first_tp_srch_status->cr->blk)) { assert(CDB_STAGNATE > t_tries); rdfail_detail = cdb_sc_lostcr; /* should this be something else */ return (sm_uc_ptr_t)NULL; } if (certify_all_blocks && FALSE == cert_blk(gv_cur_region, blk, (blk_hdr_ptr_t)cse->new_buff, cse->blk_target->root)) GTMASSERT; } cse->done = TRUE; } *cycle = CYCLE_PVT_COPY; *cr_out = 0; return (sm_uc_ptr_t)cse->new_buff; } assert(!chain1.flag); } else first_tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)blk, &duint4); ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr); if (!is_mm && first_tp_srch_status) { assert(first_tp_srch_status->cr && !first_tp_srch_status->ptr); if (first_tp_srch_status->cycle == first_tp_srch_status->cr->cycle) { *cycle = first_tp_srch_status->cycle; *cr_out = first_tp_srch_status->cr; first_tp_srch_status->cr->refer = TRUE; if (CDB_STAGNATE <= t_tries) /* mu_reorg doesn't use TP else should have an || for that */ CWS_INSERT(blk); return (sm_uc_ptr_t)first_tp_srch_status->buffaddr; } else { /* Block was already part of the read-set of this transaction, but got recycled. Allow for * recycling. But update the first_tp_srch_status (for this blk) in the si->first_tp_hist * array to reflect the new buffer, cycle and cache-record. Since we know those only at the end of * t_qread, set a variable here that will enable the updation before returning from t_qread(). */ reset_first_tp_srch_status = TRUE; } } } if ((blk >= csa->ti->total_blks) || (blk < 0)) { /* requested block out of range; could occur because of a concurrency conflict */ if ((&FILE_INFO(gv_cur_region)->s_addrs != csa) || (csd != cs_data)) GTMASSERT; assert(FALSE == csa->now_crit); rdfail_detail = cdb_sc_blknumerr; return (sm_uc_ptr_t)NULL; } if (is_mm) { *cycle = CYCLE_SHRD_COPY; *cr_out = 0; return (sm_uc_ptr_t)(mm_read(blk)); } assert(dba_bg == csd->acc_meth); assert(!first_tp_srch_status || !first_tp_srch_status->cr || first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle); if (FALSE == (clustered = csd->clustered)) bt = NULL; was_crit = csa->now_crit; ocnt = 0; set_wc_blocked = FALSE; /* to indicate whether csd->wc_blocked was set to TRUE by us */ do { if (NULL == (cr = db_csh_get(blk))) { /* not in memory */ if (clustered && (NULL != (bt = bt_get(blk))) && (FALSE == bt->flushing)) bt = NULL; if (FALSE == csa->now_crit) { if (NULL != bt) { /* at this point, bt is not NULL only if clustered and flushing - wait no crit */ assert(clustered); wait_for_block_flush(bt, blk); /* try for no other node currently writing the block */ } if (csd->flush_trigger <= csa->nl->wcs_active_lvl && FALSE == gv_cur_region->read_only) JNL_ENSURE_OPEN_WCS_WTSTART(csa, gv_cur_region, 0, dummy_errno); /* a macro that dclast's wcs_wtstart() and checks for errors etc. */ grab_crit(gv_cur_region); cr = db_csh_get(blk); /* in case blk arrived before crit */ } if (clustered && (NULL != (bt = bt_get(blk))) && (TRUE == bt->flushing)) { /* Once crit, need to assure that if clustered, that flushing is [still] complete * If it isn't, we missed an entire WM cycle and have to wait for another node to finish */ wait_for_block_flush(bt, blk); /* ensure no other node currently writing the block */ } if (NULL == cr) { /* really not in memory - must get a new buffer */ assert(csa->now_crit); cr = db_csh_getn(blk); if (CR_NOTVALID == (sm_long_t)cr) { SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_getn_invalid_blk); set_wc_blocked = TRUE; break; } assert(0 <= cr->read_in_progress); *cycle = cr->cycle; cr->tn = csa->ti->curr_tn; if (FALSE == was_crit) rel_crit(gv_cur_region); /* read outside of crit may be of a stale block but should be detected by t_end or tp_tend */ assert(0 == cr->dirty); assert(cr->read_in_progress >= 0); INCR_DB_CSH_COUNTER(csa, n_dsk_reads, 1); if (SS_NORMAL != (status = dsk_read(blk, GDS_REL2ABS(cr->buffaddr)))) { RELEASE_BUFF_READ_LOCK(cr); assert(was_crit == csa->now_crit); if (FUTURE_READ == status) { /* in cluster, block can be in the "future" with respect to the local history */ assert(TRUE == clustered); assert(FALSE == csa->now_crit); rdfail_detail = cdb_sc_future_read; /* t_retry forces the history up to date */ return (sm_uc_ptr_t)NULL; } rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); } assert(0 <= cr->read_in_progress); assert(0 == cr->dirty); cr->r_epid = 0; RELEASE_BUFF_READ_LOCK(cr); assert(-1 <= cr->read_in_progress); *cr_out = cr; assert(was_crit == csa->now_crit); if (reset_first_tp_srch_status) { /* keep the parantheses for the if (although single line) since the following is a macro */ RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle); } return (sm_uc_ptr_t)GDS_REL2ABS(cr->buffaddr); } else if ((FALSE == was_crit) && (BAD_LUCK_ABOUNDS > ocnt)) { assert(TRUE == csa->now_crit); assert(csa->nl->in_crit == process_id); rel_crit(gv_cur_region); } } if (CR_NOTVALID == (sm_long_t)cr) { SET_TRACEABLE_VAR(cs_data->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_get_invalid_blk); set_wc_blocked = TRUE; break; } for (lcnt = 1; ; lcnt++) { if (0 > cr->read_in_progress) { /* it's not being read */ if (clustered && (0 == cr->bt_index) && (cr->tn < ((th_rec *)((uchar_ptr_t)csa->th_base + csa->th_base->tnque.fl))->tn)) { /* can't rely on the buffer */ cr->cycle++; /* increment cycle whenever blk number changes (tp_hist depends on this) */ cr->blk = CR_BLKEMPTY; break; } *cycle = cr->cycle; *cr_out = cr; VMS_ONLY( /* If we were doing the db_csh_get() above (in t_qread itself) and located the cache-record * which, before coming here and taking a copy of cr->cycle a few lines above, was made an * older twin by another process in bg_update (note this can happen in VMS only) which has * already incremented the cycle, we will end up having a copy of the old cache-record with * its incremented cycle number and hence will succeed in tp_hist validation if we return * this <cr,cycle> combination although we don't want to since this "cr" is not current for * the given block as of now. Note that the "indexmod" optimization in tp_tend() relies on * an accurate intermediate validation by tp_hist() which in turn relies on the <cr,cycle> * value returned by t_qread() to be accurate for a given blk at the current point in time. * We detect the older-twin case by the following check. Note that here we depend on the * the fact that bg_update() sets cr->bt_index to 0 before incrementing cr->cycle. * Given that order, cr->bt_index can be guaranteed to be 0 if we read the incremented cycle */ if (cr->twin && (0 == cr->bt_index)) break; ) if (cr->blk != blk) break; if (was_crit != csa->now_crit) rel_crit(gv_cur_region); assert(was_crit == csa->now_crit); if (reset_first_tp_srch_status) { /* keep the parantheses for the if (although single line) since the following is a macro */ RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle); } /* Note that at this point we expect t_qread() to return a <cr,cycle> combination that * corresponds to "blk" passed in. It is crucial to get an accurate value for both the fields * since tp_hist() relies on this for its intermediate validation. */ return (sm_uc_ptr_t)GDS_ANY_REL2ABS(csa, cr->buffaddr); } if (blk != cr->blk) break; if (lcnt >= BUF_OWNER_STUCK && (0 == (lcnt % BUF_OWNER_STUCK))) { if (FALSE == csa->now_crit) grab_crit(gv_cur_region); if (cr->read_in_progress < -1) { /* outside of design; clear to known state */ BG_TRACE_PRO(t_qread_out_of_design); INTERLOCK_INIT(cr); assert(0 == cr->r_epid); cr->r_epid = 0; } else if (cr->read_in_progress >= 0) { BG_TRACE_PRO(t_qread_buf_owner_stuck); if (0 != (blocking_pid = cr->r_epid)) { if (FALSE == is_proc_alive(blocking_pid, cr->image_count)) { /* process gone: release that process's lock */ assert(0 == cr->bt_index); if (cr->bt_index) { SET_TRACEABLE_VAR(csd->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index1); set_wc_blocked = TRUE; break; } cr->cycle++; /* increment cycle for blk number changes (for tp_hist) */ cr->blk = CR_BLKEMPTY; RELEASE_BUFF_READ_LOCK(cr); } else { rel_crit(gv_cur_region); send_msg(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region)); send_msg(VARLSTCNT(9) ERR_BUFOWNERSTUCK, 7, process_id, blocking_pid, cr->blk, cr->blk, (lcnt / BUF_OWNER_STUCK), cr->read_in_progress, cr->rip_latch.latch_pid); if ((4 * BUF_OWNER_STUCK) <= lcnt) GTMASSERT; /* Kickstart the process taking a long time in case it was suspended */ UNIX_ONLY(continue_proc(blocking_pid)); } } else { /* process stopped before could set r_epid */ assert(0 == cr->bt_index); if (cr->bt_index) { SET_TRACEABLE_VAR(csd->wc_blocked, TRUE); BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_bad_bt_index2); set_wc_blocked = TRUE; break; } cr->cycle++; /* increment cycle for blk number changes (for tp_hist) */ cr->blk = CR_BLKEMPTY; RELEASE_BUFF_READ_LOCK(cr); if (cr->read_in_progress < -1) /* race: process released since if r_epid */ LOCK_BUFF_FOR_READ(cr, dummy); } } if (was_crit != csa->now_crit) rel_crit(gv_cur_region); } else wcs_sleep(lcnt); } if (set_wc_blocked) /* cannot use csd->wc_blocked here as we might not necessarily have crit */ break; ocnt++; if (BAD_LUCK_ABOUNDS <= ocnt) { if (BAD_LUCK_ABOUNDS < ocnt || csa->now_crit) { rel_crit(gv_cur_region); GTMASSERT; } if (FALSE == csa->now_crit) grab_crit(gv_cur_region); } } while (TRUE);