void heartbeat_timer(void) { gd_addr *addr_ptr; sgmnt_addrs *csa; jnl_private_control *jpc; gd_region *r_local, *r_top; int rc; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* It will take heartbeat_counter about 1014 years to overflow. */ heartbeat_counter++; DEBUG_ONLY(set_enospc_if_needed()); /* Check every 1 minute if we have an older generation journal file open. If so, close it. * The only exceptions are * a) The source server can have older generations open and they should not be closed. * b) If we are in the process of switching to a new journal file while we get interrupted * by the heartbeat timer, we should not close the older generation journal file * as it will anyways be closed by the mainline code. But identifying that we are in * the midst of a journal file switch is tricky so we check if the process is in * crit for this region and if so we skip the close this time and wait for the next heartbeat. */ if ((INTRPT_OK_TO_INTERRUPT == intrpt_ok_state) && !is_src_server && (0 == heartbeat_counter % NUM_HEARTBEATS_FOR_OLDERJNL_CHECK)) { for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions; r_local < r_top; r_local++) { if (!r_local->open || r_local->was_open) continue; if ((dba_bg != r_local->dyn.addr->acc_meth) && (dba_mm != r_local->dyn.addr->acc_meth)) continue; csa = &FILE_INFO(r_local)->s_addrs; if (csa->now_crit) continue; jpc = csa->jnl; if ((NULL != jpc) && (NOJNL != jpc->channel) && JNL_FILE_SWITCHED(jpc)) { /* The journal file we have as open is not the latest generation journal file. Close it */ /* Assert that we never have an active write on a previous generation journal file. */ assert(process_id != jpc->jnl_buff->io_in_prog_latch.u.parts.latch_pid); JNL_FD_CLOSE(jpc->channel, rc); /* sets jpc->channel to NOJNL */ jpc->pini_addr = 0; } } } } start_timer((TID)heartbeat_timer, HEARTBEAT_INTERVAL, heartbeat_timer, 0, NULL); }
void set_enospc_if_needed() { gd_addr *addr_ptr; char enospc_enable_list[MAX_REGIONS]; boolean_t ok_to_interrupt, is_time_to_act; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; if (TREF(gtm_test_fake_enospc) && is_jnlpool_creator && ANTICIPATORY_FREEZE_AVAILABLE) { ok_to_interrupt = (INTRPT_OK_TO_INTERRUPT == intrpt_ok_state) && (0 == gtmMallocDepth); is_time_to_act = (next_heartbeat_counter == heartbeat_counter); if (syslog_deferred && ok_to_interrupt) { send_msg_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_FAKENOSPCLEARED, 1, (heartbeat_counter - syslog_deferred)); syslog_deferred = 0; } if (!is_time_to_act || syslog_deferred || (!ok_to_interrupt && !IS_REPL_INST_FROZEN)) { /* We have to skip this because we have just fallen into deferred zone or we are currently in it */ if (is_time_to_act) next_heartbeat_counter++; /* Try again in the next heartbeat */ return; } assert(0 == syslog_deferred); srand(time(NULL)); addr_ptr = get_next_gdr(NULL); if (NULL == addr_ptr) /* Ensure that there is a global directory to operate on. */ return; assert(NULL == get_next_gdr(addr_ptr)); /* Randomly simulate ENOSPC or free space. NO more than 50 regions are allowed to avoid unnecessary * malloc/frees in debug-only code */ assert(MAX_REGIONS >= addr_ptr->n_regions); if (!IS_REPL_INST_FROZEN) { /* We are in an UNFROZEN state, and about to be FROZEN due to ENOSPC */ choose_random_reg_list(enospc_enable_list, addr_ptr->n_regions); next_heartbeat_counter = heartbeat_counter + ENOSPC_FROZEN_DURATION; } else { /* We are in a FROZEN state, and about to be UNFROZEN due to free space */ memset(enospc_enable_list, 0, MAX_REGIONS); next_heartbeat_counter = heartbeat_counter + ENOSPC_UNFROZEN_DURATION; if (!ok_to_interrupt) syslog_deferred = heartbeat_counter; } set_enospc_flags(addr_ptr, enospc_enable_list, ok_to_interrupt); } }
void util_exit_handler() { int stat; gd_region *r_top, *reg; sgmnt_addrs *csa; gd_addr *addr_ptr; if (exit_handler_active) /* Don't recurse if exit handler exited */ return; exit_handler_active = TRUE; SET_PROCESS_EXITING_TRUE; /* set this BEFORE canceling timers as wcs_phase2_commit_wait relies on this */ if (IS_DSE_IMAGE) { /* Need to clear csa->hold_onto_crit in case it was set */ for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (reg = addr_ptr->regions, r_top = reg + addr_ptr->n_regions; reg < r_top; reg++) { if (reg->open && !reg->was_open) { csa = &FILE_INFO(reg)->s_addrs; csa->hold_onto_crit = FALSE; /* need to do this before the rel_crit */ if (csa->now_crit) rel_crit(reg); } } } } CANCEL_TIMERS; /* Cancel all unsafe timers - No unpleasant surprises */ secshr_db_clnup(NORMAL_TERMINATION); assert(!dollar_tlevel); /* MUPIP and GT.M are the only ones which can run TP and they have their own exit handlers. * So no need to run op_trollback here like mupip_exit_handler and gtm_exit_handler. */ gv_rundown(); print_exit_stats(); util_out_close(); GTMCRYPT_CLOSE; if (need_core && !created_core) DUMP_CORE; }
void gv_rundown(void) { gd_region *r_top, *r_save, *r_local; gd_addr *addr_ptr; sgm_info *si; #ifdef VMS vms_gds_info *gds_info; #endif error_def(ERR_TEXT); r_save = gv_cur_region; /* Save for possible core dump */ gvcmy_rundown(); ENABLE_AST if (pool_init) rel_lock(jnlpool.jnlpool_dummy_reg); for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions; r_local < r_top; r_local++) { if (r_local->open && !r_local->was_open && dba_cm != r_local->dyn.addr->acc_meth) { /* Rundown has already occurred for GT.CM client regions through gvcmy_rundown() above. * Hence the (dba_cm != ...) check in the if above. Note that for GT.CM client regions, * region->open is TRUE although cs_addrs is NULL. */ gv_cur_region = r_local; tp_change_reg(); gds_rundown(); /* Now that gds_rundown is done, free up the memory associated with the region. * Ideally the following memory freeing code should go to gds_rundown, but * GT.CM calls gds_rundown() and we want to reuse memory for GT.CM. */ if (NULL != cs_addrs) { if (NULL != cs_addrs->dir_tree) FREE_CSA_DIR_TREE(cs_addrs); if (cs_addrs->sgm_info_ptr) { si = cs_addrs->sgm_info_ptr; /* It is possible we got interrupted before initializing all fields of "si" * completely so account for NULL values while freeing/releasing those fields. */ assert((si->tp_csa == cs_addrs) || (NULL == si->tp_csa)); if (si->jnl_tail) { CAREFUL_FREEUP_BUDDY_LIST(si->format_buff_list); CAREFUL_FREEUP_BUDDY_LIST(si->jnl_list); } CAREFUL_FREEUP_BUDDY_LIST(si->recompute_list); CAREFUL_FREEUP_BUDDY_LIST(si->new_buff_list); CAREFUL_FREEUP_BUDDY_LIST(si->tlvl_info_list); CAREFUL_FREEUP_BUDDY_LIST(si->tlvl_cw_set_list); CAREFUL_FREEUP_BUDDY_LIST(si->cw_set_list); if (NULL != si->blks_in_use) { free_hashtab_int4(si->blks_in_use); free(si->blks_in_use); si->blks_in_use = NULL; } if (si->cr_array_size) { assert(NULL != si->cr_array); if (NULL != si->cr_array) free(si->cr_array); } if (NULL != si->first_tp_hist) free(si->first_tp_hist); free(si); } if (cs_addrs->jnl) { assert(&FILE_INFO(cs_addrs->jnl->region)->s_addrs == cs_addrs); if (cs_addrs->jnl->jnllsb) { UNIX_ONLY(assert(FALSE)); free(cs_addrs->jnl->jnllsb); } free(cs_addrs->jnl); } GTMCRYPT_ONLY( if (cs_addrs->encrypted_blk_contents) free(cs_addrs->encrypted_blk_contents); ) } assert(gv_cur_region->dyn.addr->file_cntl->file_info); VMS_ONLY( gds_info = (vms_gds_info *)gv_cur_region->dyn.addr->file_cntl->file_info; if (gds_info->xabpro) free(gds_info->xabpro); if (gds_info->xabfhc) free(gds_info->xabfhc); if (gds_info->nam) { free(gds_info->nam->nam$l_esa); free(gds_info->nam); } if (gds_info->fab) free(gds_info->fab); ) free(gv_cur_region->dyn.addr->file_cntl->file_info); free(gv_cur_region->dyn.addr->file_cntl); } r_local->open = r_local->was_open = FALSE; }
void preemptive_db_clnup(int preemptive_severe) { sgmnt_addrs *csa; sgm_info *si; gd_region *r_top, *reg; gd_addr *addr_ptr; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* Clear "inctn_opcode" global variable now that any in-progress transaction is aborted at this point. * Not doing so would cause future calls to "t_end" to get confused and skip writing logical jnl recs * and instead incorrectly write an INCTN record (GTM-8425). */ if (bml_save_dollar_tlevel) { assert(!dollar_tlevel); dollar_tlevel = bml_save_dollar_tlevel; bml_save_dollar_tlevel = 0; } assert(!dollar_tlevel || (inctn_invalid_op == inctn_opcode) || (inctn_bmp_mark_free_gtm == inctn_opcode)); assert(dollar_tlevel || update_trans || (inctn_invalid_op == inctn_opcode)); inctn_opcode = inctn_invalid_op; if (!dollar_tlevel && update_trans) { /* It's possible we hit an error in the middle of an update, at which point we have * a valid clue and non-NULL cse. However, this causes problems for subsequent * transactions (see comment in t_begin). In particular we could end up pinning buffers * unnecessarily. So clear the cse of any histories that may have been active during the update. */ CLEAR_CSE(gv_target); if ((NULL != gv_target) && (NULL != gv_target->gd_csa)) { CLEAR_CSE(gv_target->gd_csa->dir_tree); GTMTRIG_ONLY(CLEAR_CSE(gv_target->gd_csa->hasht_tree)); } /* Resetting this is necessary to avoid blowing an assert in t_begin that it is 0 at the start of a transaction. */ update_trans = 0; } if (INVALID_GV_TARGET != reset_gv_target) { if (SUCCESS != preemptive_severe && INFO != preemptive_severe) { /* We know of a few cases in Unix where gv_target and gv_currkey could be out of sync at this point. * a) If we are inside trigger code which in turn does an update that does * reads of ^#t global and ends up in a restart. This restart would * in turn do a rts_error_csa(TPRETRY) which would invoke mdb_condition_handler * that would in turn invoke preemptive_db_clnup which invokes this macro. * In this tp restart case though, it is ok for gv_target and gv_currkey * to be out of sync because they are going to be reset by tp_clean_up anyways. * So skip the dbg-only in-sync check. * b) If we are in gvtr_init reading the ^#t global and detect an error (e.g. TRIGINVCHSET) * gv_target after the reset would be pointing to a regular global whereas gv_currkey * would be pointing to ^#t. It is ok to be out-of-sync since in this case, we expect * mdb_condition_handler to be calling us. That has code to reset gv_currkey (and * cs_addrs/cs_data etc.) to reflect gv_target (i.e. get them back in sync). * Therefore in Unix we pass SKIP_GVT_GVKEY_CHECK to skip the gvtarget/gvcurrkey out-of-sync check * in RESET_GV_TARGET. In VMS we pass DO_GVT_GVKEY_CHECK as we dont yet know of an out-of-sync situation. */ RESET_GV_TARGET(UNIX_ONLY(SKIP_GVT_GVKEY_CHECK) VMS_ONLY(DO_GVT_GVKEY_CHECK)); } } need_kip_incr = FALSE; /* in case we got an error in t_end (e.g. GBLOFLOW), dont want this global variable to get * carried over to the next non-TP transaction that this process does (e.g. inside an error trap). */ TREF(expand_prev_key) = FALSE; /* reset global (in case it is TRUE) so it does not get carried over to future operations */ if (dollar_tlevel) { for (si = first_sgm_info; si != NULL; si = si->next_sgm_info) { if (NULL != si->kip_csa) { csa = si->tp_csa; assert(si->tp_csa == si->kip_csa); PROBE_DECR_KIP(csa->hdr, csa, si->kip_csa); } } } else if (NULL != kip_csa && (NULL != kip_csa->hdr) && (NULL != kip_csa->nl)) PROBE_DECR_KIP(kip_csa->hdr, kip_csa, kip_csa); if (IS_DSE_IMAGE) { /* Release crit on any region that was obtained for the current erroring DSE operation. * Take care NOT to release crits obtained by a previous CRIT -SEIZE command. */ for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (reg = addr_ptr->regions, r_top = reg + addr_ptr->n_regions; reg < r_top; reg++) { if (reg->open && !reg->was_open) { csa = &FILE_INFO(reg)->s_addrs; assert(csa->hold_onto_crit || !csa->dse_crit_seize_done); assert(!csa->hold_onto_crit || csa->now_crit); if (csa->now_crit && (!csa->hold_onto_crit || !csa->dse_crit_seize_done)) { rel_crit(reg); csa->hold_onto_crit = FALSE; t_abort(reg, csa); /* cancel mini-transaction if any in progress */ } } } } } }
void gv_rundown(void) { gd_region *r_top, *r_save, *r_local; gd_addr *addr_ptr; sgm_info *si; int4 rundown_status = EXIT_NRM; /* if gds_rundown went smoothly */ # ifdef VMS vms_gds_info *gds_info; # elif UNIX unix_db_info *udi; # endif #if defined(DEBUG) && defined(UNIX) sgmnt_addrs *csa; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; r_save = gv_cur_region; /* Save for possible core dump */ gvcmy_rundown(); ENABLE_AST if (pool_init) rel_lock(jnlpool.jnlpool_dummy_reg); for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions; r_local < r_top; r_local++) { if (r_local->open && !r_local->was_open && dba_cm != r_local->dyn.addr->acc_meth) { /* Rundown has already occurred for GT.CM client regions through gvcmy_rundown() above. * Hence the (dba_cm != ...) check in the if above. Note that for GT.CM client regions, * region->open is TRUE although cs_addrs is NULL. */ # if defined(DEBUG) && defined(UNIX) if (is_jnlpool_creator && ANTICIPATORY_FREEZE_AVAILABLE && TREF(gtm_test_fake_enospc)) { /* Clear ENOSPC faking now that we are running down */ csa = REG2CSA(r_local); if (csa->nl->fake_db_enospc || csa->nl->fake_jnl_enospc) { send_msg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TEXT, 2, DB_LEN_STR(r_local), ERR_TEXT, 2, LEN_AND_LIT("Resetting fake_db_enospc and fake_jnl_enospc")); csa->nl->fake_db_enospc = FALSE; csa->nl->fake_jnl_enospc = FALSE; } } # endif gv_cur_region = r_local; tp_change_reg(); UNIX_ONLY(rundown_status |=) gds_rundown(); /* Now that gds_rundown is done, free up the memory associated with the region. * Ideally the following memory freeing code should go to gds_rundown, but * GT.CM calls gds_rundown() and we want to reuse memory for GT.CM. */ if (NULL != cs_addrs) { if (NULL != cs_addrs->dir_tree) FREE_CSA_DIR_TREE(cs_addrs); if (cs_addrs->sgm_info_ptr) { si = cs_addrs->sgm_info_ptr; /* It is possible we got interrupted before initializing all fields of "si" * completely so account for NULL values while freeing/releasing those fields. */ assert((si->tp_csa == cs_addrs) || (NULL == si->tp_csa)); if (si->jnl_tail) { CAREFUL_FREEUP_BUDDY_LIST(si->format_buff_list); CAREFUL_FREEUP_BUDDY_LIST(si->jnl_list); } CAREFUL_FREEUP_BUDDY_LIST(si->recompute_list); CAREFUL_FREEUP_BUDDY_LIST(si->new_buff_list); CAREFUL_FREEUP_BUDDY_LIST(si->tlvl_info_list); CAREFUL_FREEUP_BUDDY_LIST(si->tlvl_cw_set_list); CAREFUL_FREEUP_BUDDY_LIST(si->cw_set_list); if (NULL != si->blks_in_use) { free_hashtab_int4(si->blks_in_use); free(si->blks_in_use); si->blks_in_use = NULL; } if (si->cr_array_size) { assert(NULL != si->cr_array); if (NULL != si->cr_array) free(si->cr_array); } if (NULL != si->first_tp_hist) free(si->first_tp_hist); free(si); } if (cs_addrs->jnl) { assert(&FILE_INFO(cs_addrs->jnl->region)->s_addrs == cs_addrs); if (cs_addrs->jnl->jnllsb) { UNIX_ONLY(assert(FALSE)); free(cs_addrs->jnl->jnllsb); } free(cs_addrs->jnl); } GTMCRYPT_ONLY( if (cs_addrs->encrypted_blk_contents) free(cs_addrs->encrypted_blk_contents); ) } assert(gv_cur_region->dyn.addr->file_cntl->file_info); VMS_ONLY( gds_info = (vms_gds_info *)gv_cur_region->dyn.addr->file_cntl->file_info; if (gds_info->xabpro) free(gds_info->xabpro); if (gds_info->xabfhc) free(gds_info->xabfhc); if (gds_info->nam) { free(gds_info->nam->nam$l_esa); free(gds_info->nam); } if (gds_info->fab) free(gds_info->fab); ) free(gv_cur_region->dyn.addr->file_cntl->file_info); free(gv_cur_region->dyn.addr->file_cntl); } r_local->open = r_local->was_open = FALSE; } }
/* Return number of regions (including jnlpool dummy region) if have or are aquiring crit or in_wtstart * ** NOTE ** This routine is called from signal handlers and is thus called asynchronously. * If CRIT_IN_COMMIT bit is set, we check if in middle of commit (PHASE1 inside crit or PHASE2 outside crit) on some region. * If CRIT_RELEASE bit is set, we release crit on region(s) that: * 1) we hold crit on (neither CRIT_IN_COMMIT NOR CRIT_TRANS_NO_REG is specified) * 2) are part of the current transactions except those regions that are marked as being valid * to have crit in by virtue of their crit_check_cycle value is the same as crit_deadlock_check_cycle. * Note: CRIT_RELEASE implies CRIT_ALL_REGIONS * If CRIT_ALL_REGIONS bit is set, go through the entire list of regions */ uint4 have_crit(uint4 crit_state) { gd_region *r_top, *r_local; gd_addr *addr_ptr; sgmnt_addrs *csa; uint4 crit_reg_cnt = 0; /* in order to proper release the necessary regions, CRIT_RELEASE implies going through all the regions */ if (crit_state & CRIT_RELEASE) { UNIX_ONLY(assert(!jgbl.onlnrlbk)); /* should not request crit to be released if online rollback */ crit_state |= CRIT_ALL_REGIONS; } if (0 != crit_count) { crit_reg_cnt++; if (0 == (crit_state & CRIT_ALL_REGIONS)) return crit_reg_cnt; } for (addr_ptr = get_next_gdr(NULL); addr_ptr; addr_ptr = get_next_gdr(addr_ptr)) { for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions; r_local < r_top; r_local++) { if (r_local->open && !r_local->was_open) { csa = &FILE_INFO(r_local)->s_addrs; if (NULL != csa) { if (csa->now_crit) { crit_reg_cnt++; /* It is possible that if DSE has done a CRIT REMOVE and stolen our crit, it * could be given to someone else which would cause this test to fail. The * current thinking is that the state DSE put this process is no longer viable * and it should die at the earliest opportunity, there being no way to know if * that is what happened anyway. */ if (csa->nl->in_crit != process_id) GTMASSERT; /* If we are releasing (all) regions with critical section or if special * TP case, release if the cycle number doesn't match meaning this is a * region we should not hold crit in (even if it is part of tp_reg_list). */ if ((0 != (crit_state & CRIT_RELEASE)) && (0 == (crit_state & CRIT_NOT_TRANS_REG) || crit_deadlock_check_cycle != csa->crit_check_cycle)) { assert(FALSE); assert(!csa->hold_onto_crit); rel_crit(r_local); send_msg(VARLSTCNT(8) ERR_MUTEXRELEASED, 6, process_id, process_id, DB_LEN_STR(r_local), dollar_tlevel, t_tries); } if (0 == (crit_state & CRIT_ALL_REGIONS)) return crit_reg_cnt; } /* In Commit-crit is defined as the time since when early_tn is 1 + curr_tn upto when * t_commit_crit is set to FALSE. Note that the first check should be done only if we * hold crit as otherwise we could see inconsistent values. */ if ((crit_state & CRIT_IN_COMMIT) && (csa->now_crit && (csa->ti->early_tn != csa->ti->curr_tn) || csa->t_commit_crit)) { crit_reg_cnt++; if (0 == (crit_state & CRIT_ALL_REGIONS)) return crit_reg_cnt; } if ((crit_state & CRIT_IN_WTSTART) && csa->in_wtstart) { crit_reg_cnt++; if (0 == (crit_state & CRIT_ALL_REGIONS)) return crit_reg_cnt; } } } } } if (NULL != jnlpool.jnlpool_ctl) { csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs; if (NULL != csa && csa->now_crit) { crit_reg_cnt++; if (0 != (crit_state & CRIT_RELEASE)) { assert(!csa->hold_onto_crit); rel_lock(jnlpool.jnlpool_dummy_reg); } } } return crit_reg_cnt; }