cache_rec_ptr_t db_csh_getn(block_id block) { cache_rec_ptr_t hdr, q0, start_cr, cr; bt_rec_ptr_t bt; unsigned int lcnt, ocnt; int rip, max_ent, pass1, pass2, pass3; int4 flsh_trigger; uint4 r_epid, dummy; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; srch_blk_status *tp_srch_status; error_def(ERR_BUFRDTIMEOUT); error_def(ERR_INVALIDRIP); csa = cs_addrs; csd = csa->hdr; assert(csa->now_crit); assert(csa == &FILE_INFO(gv_cur_region)->s_addrs); max_ent = csd->n_bts; cr = (cache_rec_ptr_t)GDS_REL2ABS(csa->nl->cur_lru_cache_rec_off); hdr = csa->acc_meth.bg.cache_state->cache_array + (block % csd->bt_buckets); start_cr = csa->acc_meth.bg.cache_state->cache_array + csd->bt_buckets; pass1 = max_ent; /* skip referred or dirty or read-into cache records */ pass2 = 2 * max_ent; /* skip referred cache records */ pass3 = 3 * max_ent; /* skip nothing */ INCR_DB_CSH_COUNTER(csa, n_db_csh_getns, 1); for (lcnt = 0; ; lcnt++) { if (lcnt > pass3) { BG_TRACE_PRO(wc_blocked_db_csh_getn_loopexceed); assert(FALSE); break; } cr++; if (cr == start_cr + max_ent) cr = start_cr; VMS_ONLY( if ((lcnt == pass1) || (lcnt == pass2)) wcs_wtfini(gv_cur_region); ) if (TRUE == cr->refer && lcnt < pass2) { /* in passes 1 & 2, set refer to FALSE and skip; in the third pass attempt reuse even if TRUE == refer */ cr->refer = FALSE; continue; } if (TRUE == cr->in_cw_set) { /* this process already owns it - skip it */ cr->refer = TRUE; continue; } if (CDB_STAGNATE <= t_tries || mu_reorg_process) { /* Prevent stepping on self when crit for entire transaction. * This is done by looking up in sgm_info_ptr->blk_in_use and cw_stagnate for presence of the block. * The following two hashtable lookups are not similar, since in TP, sgm_info_ptr->blks_in_use * is updated to the latest cw_stagnate list of blocks only in tp_hist(). * Also note that the lookup in sgm_info_ptr->blks_in_use reuses blocks that don't have cse's. * This is to allow big-read TP transactions which may use up more than the available global buffers. * There is one issue here in that a block that has been only read till now may be stepped upon here * but may later be needed for update. It is handled by updating the block's corresponding * entry in the set of histories (sgm_info_ptr->first_tp_hist[index] structure) to hold the * "cr" and "cycle" of the t_qread done for the block when it was intended to be changed for the * first time within the transaction since otherwise the transaction would restart due to a * cdb_sc_lostcr status. Note that "tn" (read_tn of the block) in the first_tp_hist will still * remain the "tn" when the block was first read within this transaction to ensure the block * hasn't been modified since the start of the transaction. Once we intend on changing the * block i.e. srch_blk_status->ptr is non-NULL, we ensure in the code below not to step on it. * [tp_hist() is the routine that updates the "cr", "cycle" and "tn" of the block]. * Note that usually in a transaction the first_tp_hist[] structure holds the "cr", "cycle", and "tn" * of the first t_qread of the block within that transaction. The above is the only exception. * Also note that for blocks in cw_stagnate (i.e. current TP mini-action), we don't reuse any of * them even if they don't have a cse. This is to ensure that the current action doesn't * encounter a restart due to cdb_sc_lostcr in tp_hist() even in the fourth-retry. */ if (dollar_tlevel && (tp_srch_status = (srch_blk_status *)lookup_hashtab_ent(sgm_info_ptr->blks_in_use, (void *)cr->blk, &dummy)) && tp_srch_status->ptr) { /* this process is already using the block - skip it */ cr->refer = TRUE; continue; } if (NULL != lookup_hashtab_ent(cw_stagnate, (void *)cr->blk, &dummy)) { cr->refer = TRUE; continue; } } if (cr->dirty) { /* Note that in Unix, it is possible that we see a stale value of cr->dirty (possible if a * concurrent wcs_wtstart() has reset dirty to 0 but that update did not reach us yet). In this * case the call to wcs_get_space() below will do the necessary memory barrier instructions * (through calls to aswp()) which will allow us to see the non-stale value of cr->dirty. * * It is also possible that cr->dirty is non-zero but < cr->flushed_dirty_tn. In this case, wcs_get_space * done below will return FALSE forcing a cache-rebuild which will fix this situation. * * In VMS, another process cannot be concurrently resetting cr->dirty to 0 as the resetting routine * is wcs_wtfini() which is executed in crit which another process cannot be in as we are in crit now. */ if (gv_cur_region->read_only) continue; if (lcnt < pass1) { if (!csa->timer && (csa->nl->wcs_timers < 1)) wcs_timer_start(gv_cur_region, FALSE); continue; } BG_TRACE_PRO(db_csh_getn_flush_dirty); if (FALSE == wcs_get_space(gv_cur_region, 0, cr)) { /* failed to flush it out - force a rebuild */ BG_TRACE_PRO(wc_blocked_db_csh_getn_wcsstarvewrt); assert(FALSE); break; } assert(0 == cr->dirty); } UNIX_ONLY( /* the cache-record is not free for reuse until the write-latch value becomes LATCH_CLEAR. * In VMS, resetting the write-latch value occurs in wcs_wtfini() which is in CRIT, we are fine. * In Unix, this resetting is done by wcs_wtstart() which is out-of-crit. Therefore, we need to * wait for this value to be LATCH_CLEAR before reusing this cache-record. * Note that we are examining the write-latch-value without holding the interlock. It is ok to do * this because the only two routines that modify the latch value are bg_update() and * wcs_wtstart(). The former cannot be concurrently executing because we are in crit. * The latter will not update the latch value unless this cache-record is dirty. But in this * case we would have most likely gone through the if (cr->dirty) check above. Most likely * because there is one rare possibility where a concurrent wcs_wtstart() has set cr->dirty * to 0 but not yet cleared the latch. In that case we wait for the latch to be cleared. * In all other cases, nobody is modifying the latch since when we got crit and therefore * it is safe to observe the value of the latch without holding the interlock. */ if (LATCH_CLEAR != WRITE_LATCH_VAL(cr)) { /* possible if a concurrent wcs_wtstart() has set cr->dirty to 0 but not yet * cleared the latch. this should be very rare though. */ if (lcnt < pass2) continue; /* try to find some other cache-record to reuse until the 3rd pass */ for (ocnt = 1; (MAXWRTLATCHWAIT >= ocnt) && (LATCH_CLEAR != WRITE_LATCH_VAL(cr)); ocnt++) wcs_sleep(SLEEP_WRTLATCHWAIT); /* since it is a short lock, sleep the minimum */ if (MAXWRTLATCHWAIT <= ocnt) { BG_TRACE_PRO(db_csh_getn_wrt_latch_stuck); assert(FALSE); continue; } } )
/* go after a specific number of buffers or a particular buffer */ bool wcs_get_space(gd_region *reg, int needed, cache_rec *cr) { unsigned int lcnt, ocnt, status; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; que_ent_ptr_t base, q0; int4 dummy_errno; boolean_t is_mm; assert((0 != needed) || (NULL != cr)); csa = &(FILE_INFO(reg)->s_addrs); assert(csa == cs_addrs); csd = csa->hdr; is_mm = (dba_mm == csd->acc_meth); assert(is_mm || (dba_bg == csd->acc_meth)); cnl = csa->nl; if (FALSE == csa->now_crit) { assert(0 != needed); /* if needed == 0, then we should be in crit */ for (lcnt = DIVIDE_ROUND_UP(needed, csd->n_wrt_per_flu); 0 < lcnt; lcnt--) JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ return TRUE; } if (FALSE == wcs_wtfini(reg)) return FALSE; /* while calculating flush_trigger, the decrement should be atleast 1 if still not reached the minimum allowed */ csd->flush_trigger = MAX(csd->flush_trigger - MAX(csd->flush_trigger/STEP_FACTOR, 1), MIN_FLUSH_TRIGGER(csd->n_bts)); if (0 == needed) { if (!is_mm) { /* If another process is concurrently finishing up phase2 of commit, wait for that to complete first. */ if (cr->in_tend && !wcs_phase2_commit_wait(csa, cr)) return FALSE; /* assumption is that caller will set wc_blocked and trigger cache recovery */ } for (lcnt = 1; (MAXGETSPACEWAIT > lcnt) && (0 != cr->dirty); lcnt++) { /* We want to flush a specific cache-record. We speed up the wait by moving the dirty cache-record * to the head of the active queue. But to do this, we need exclusive access to the active queue. * The only other processes outside of crit that can be touching this concurrently are wcs_wtstart * (which can remove entries from the queue) and bg_update_phase2 (which can add entries to the queue). * In the case of writers, we can wait for those to complete (by setting cnl->wc_blocked to TRUE) * and then play with the queue. But in the case of bg_update_phase2, it is not easily possible to * do a similar wait so in this case we choose to do plain wcs_wtstart (which uses interlocked * queue operations and hence can work well with concurrent bg_update_phase2) and wait until the * cache record of interest becomes non-dirty. The consequence is we might wait a little longer than * necessary but that is considered acceptable for now. */ /* Check if cache recovery is needed (could be set by another process in * secshr_db_clnup finishing off a phase2 commit). If so, no point invoking * wcs_wtstart as it will return right away. Instead return FALSE so * cache-recovery can be triggered by the caller. */ if (cnl->wc_blocked) { assert(gtm_white_box_test_case_enabled); return FALSE; } if (!is_mm && cnl->wcs_phase2_commit_pidcnt) { JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); } else if (LATCH_CLEAR == WRITE_LATCH_VAL(cr)) { SIGNAL_WRITERS_TO_STOP(cnl); /* to stop all active writers */ WAIT_FOR_WRITERS_TO_STOP(cnl, ocnt, MAXGETSPACEWAIT); if (MAXGETSPACEWAIT <= ocnt) { assert(FALSE); return FALSE; } if (LATCH_CLEAR == WRITE_LATCH_VAL(cr)) { /* Check if cache-record is part of the active queue. If so, then remove it from the * tail of the active queue and move it to the head to try and speed up the flush. * If not and if cr->dirty is non-zero, then the only way this is possible we know * of is if a concurrent process encountered an error in the midst of commit in phase2 * of bg_update and finished the update but did not reinsert the cache-record in the * active queue (see comment in secshr_db_clnup about why INSQ*I macros are not used * in VMS). In this case, return FALSE as wcs_get_space cannot flush this cache-record. * The caller will trigger appropriate error handling. We are guaranteed that cr cannot * be part of the wip queue because WRITE_LATCH_VAL(cr) is LATCH_CLEAR (in wip queue it * will be > LATCH_CLEAR). */ if (0 != cr->state_que.fl) { /* We are about to play with the queues without using interlocks. * Assert no one else could be concurrently playing with the queue. */ assert(!cnl->wcs_phase2_commit_pidcnt && !cnl->in_wtstart); base = &csa->acc_meth.bg.cache_state->cacheq_active; q0 = (que_ent_ptr_t)((sm_uc_ptr_t)&cr->state_que + cr->state_que.fl); shuffqth((que_ent_ptr_t)q0, (que_ent_ptr_t)base); } else if (cr->dirty) { assert(gtm_white_box_test_case_enabled); return FALSE; } } SIGNAL_WRITERS_TO_RESUME(cnl); JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); } else if ((0 == cr->iosb.cond) || (WRT_STRT_PNDNG == cr->iosb.cond)) { JNL_ENSURE_OPEN_WCS_WTSTART(csa, reg, 0, dummy_errno); /* a macro that ensure jnl is open, dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); } if (FALSE == wcs_wtfini(reg)) return FALSE; } if (0 == cr->dirty) return TRUE; assert(FALSE); return FALSE; } for (lcnt = 1; ((cnl->wc_in_free < needed) && (MAXGETSPACEWAIT > lcnt)); lcnt++) { DCLAST_WCS_WTSTART(reg, 0, dummy_errno); /* a macro that dclast's wcs_wtstart and checks for errors etc. */ wcs_sleep(lcnt); if (FALSE == wcs_wtfini(reg)) return FALSE; } if (cnl->wc_in_free < needed) { assert(FALSE); return FALSE; } return TRUE; }