void wcs_recover(gd_region *reg) { bt_rec_ptr_t bt; cache_rec_ptr_t cr, cr_alt, cr_alt_new, cr_lo, cr_top, hash_hdr; cache_que_head_ptr_t active_head, hq, wip_head, wq; gd_region *save_reg; que_ent_ptr_t back_link; /* should be crit & not need interlocked ops. */ sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; int4 bml_full, dummy_errno, blk_size; uint4 jnl_status, epid, r_epid; int4 bt_buckets, bufindx; /* should be the same type as "csd->bt_buckets" */ inctn_opcode_t save_inctn_opcode; unsigned int bplmap, lcnt, total_blks, wait_in_rip; sm_uc_ptr_t buffptr; blk_hdr_ptr_t blk_ptr; INTPTR_T bp_lo, bp_top, old_block; boolean_t backup_block_saved, change_bmm; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; sgm_info *si; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; /* If this is the source server, do not invoke cache recovery as that implies touching the database file header * (incrementing the curr_tn etc.) and touching the journal file (writing INCTN records) both of which are better * avoided by the source server; It is best to keep it as read-only to the db/jnl as possible. It is ok to do * so because the source server anyways does not rely on the integrity of the database cache and so does not need * to fix it right away. Any other process that does rely on the cache integrity will fix it when it gets crit next. */ if (is_src_server) return; save_reg = gv_cur_region; /* protect against [at least] M LOCK code which doesn't maintain cs_addrs and cs_data */ TP_CHANGE_REG(reg); /* which are needed by called routines such as wcs_wtstart and wcs_mm_recover */ if (dba_mm == reg->dyn.addr->acc_meth) /* MM uses wcs_recover to remap the database in case of a file extension */ { wcs_mm_recover(reg); TP_CHANGE_REG(save_reg); TREF(wcs_recover_done) = TRUE; return; } csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; cnl = csa->nl; si = csa->sgm_info_ptr; /* If a mupip truncate operation was abruptly interrupted we have to correct any inconsistencies */ GTM_TRUNCATE_ONLY(recover_truncate(csa, csd, gv_cur_region);)
void wcs_recover(gd_region *reg) { bt_rec_ptr_t bt; cache_rec_ptr_t cr, cr_alt, cr_lo, cr_top, hash_hdr; cache_que_head_ptr_t active_head, hq, wip_head, wq; gd_region *save_reg; que_ent_ptr_t back_link; /* should be crit & not need interlocked ops. */ sgmnt_data_ptr_t csd; sgmnt_addrs *csa; bool blk_used, change_bmm; int4 bml_full, dummy_errno, blk_size; uint4 jnl_status, epid; int bt_buckets; inctn_opcode_t save_inctn_opcode; unsigned int bplmap, lcnt, total_blks; sm_uc_ptr_t buffptr; error_def(ERR_BUFRDTIMEOUT); error_def(ERR_DBCCERR); error_def(ERR_DBCNTRLERR); error_def(ERR_DBDANGER); error_def(ERR_ERRCALL); error_def(ERR_INVALIDRIP); error_def(ERR_STOPTIMEOUT); error_def(ERR_TEXT); save_reg = gv_cur_region; /* protect against [at least] M LOCK code which doesn't maintain cs_addrs and cs_data */ TP_CHANGE_REG(reg); /* which are needed by called routines such as wcs_wtstart and wcs_mm_recover */ if (dba_mm == reg->dyn.addr->acc_meth) /* MM uses wcs_recover to remap the database in case of a file extension */ { wcs_mm_recover(reg); TP_CHANGE_REG(save_reg); return; } csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; assert(csa->now_crit || csd->clustered); DEBUG_ONLY(in_wcs_recover = TRUE;) /* used by bt_put() called below */
void gtcmd_rundown(connection_struct *cnx, bool clean_exit) { int4 link; cm_region_list *ptr, *last, *que_next, *que_last; cm_region_head *region; uint4 jnl_status; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; int refcnt; boolean_t was_crit; int4 rundown_status = EXIT_NRM; /* if gds_rundown went smoothly */ for (ptr = cnx->region_root; ptr;) { region = ptr->reghead; TP_CHANGE_REG(region->reg); jpc = cs_addrs->jnl; if (ptr->pini_addr && clean_exit && JNL_ENABLED(cs_data) && (NOJNL != jpc->channel)) { was_crit = cs_addrs->now_crit; if (!was_crit) grab_crit(gv_cur_region); if (JNL_ENABLED(cs_data)) { jpc->pini_addr = ptr->pini_addr; SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pfin needs this to be set */ jbp = jpc->jnl_buff; /* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write * journal records (if it decides to switch to a new journal file). */ ADJUST_GBL_JREC_TIME(jgbl, jbp); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { if (0 != jpc->pini_addr) jnl_put_jrt_pfin(cs_addrs); } else send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region)); } if (!was_crit) rel_crit(gv_cur_region); } refcnt = --region->refcnt; /* Dont know how refcnt can become negative but in pro handle it by bypassing this region. The reason is the * following. refcnt should have originally been a positive value. Every time this function is invoked, it would * be decremented by one. There should have been one invocation that saw refcnt to be zero. That would have * done the rundown of the region or if it is still in the stack the rundown is still in progress. Therefore * it is not a good idea to try running down this region when we see refcnt to be negative (as otherwise we * will get confused and could potentially end up with SIG-11 or ACCVIO errors). The worst case is that we * would not have rundown the region in which case an externally issued MUPIP RUNDOWN would be enough. */ assert(0 <= refcnt); if (0 == refcnt) { /* free up only as little as needed to facilitate structure reuse when the region is opened again */ assert(region->head.fl == region->head.bl); VMS_ONLY(gtcm_ast_avail++); if (JNL_ALLOWED(cs_data)) jpc->pini_addr = 0; UNIX_ONLY(rundown_status |=) gds_rundown(); gd_ht_kill(region->reg_hash, TRUE); /* TRUE to free up the table and the gv_targets it holds too */ FREE_CSA_DIR_TREE(cs_addrs); cm_del_gdr_ptr(gv_cur_region); } que_next = (cm_region_list *)((unsigned char *)ptr + ptr->regque.fl); que_last = (cm_region_list *)((unsigned char *)ptr + ptr->regque.bl); link = (int4)((unsigned char *)que_next - (unsigned char *)que_last); que_last->regque.fl = link; que_next->regque.bl = -link; last = ptr; ptr = ptr->next; free(last); }
void mu_reorg_upgrd_dwngrd(void) { blk_hdr new_hdr; blk_segment *bs1, *bs_ptr; block_id *blkid_ptr, curblk, curbmp, start_blk, stop_blk, start_bmp, last_bmp; block_id startblk_input, stopblk_input; boolean_t upgrade, downgrade, safejnl, nosafejnl, region, first_reorg_in_this_db_fmt, reorg_entiredb; boolean_t startblk_specified, stopblk_specified, set_fully_upgraded, db_got_to_v5_once, mark_blk_free; cache_rec_ptr_t cr; char *bml_lcl_buff = NULL, *command, *reorg_command; sm_uc_ptr_t bptr = NULL; cw_set_element *cse; enum cdb_sc cdb_status; enum db_ver new_db_format, ondsk_blkver; gd_region *reg; int cycle; int4 blk_seg_cnt, blk_size; /* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */ int4 blocks_left, expected_blks2upgrd, actual_blks2upgrd, total_blks, free_blks; int4 status, status1, mapsize, lcnt, bml_status; reorg_stats_t reorg_stats; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; sm_uc_ptr_t blkBase, bml_sm_buff; /* shared memory pointer to the bitmap global buffer */ srch_hist alt_hist; srch_blk_status *blkhist, bmlhist; tp_region *rptr; trans_num curr_tn; unsigned char save_cw_set_depth; uint4 lcl_update_trans; region = (CLI_PRESENT == cli_present("REGION")); upgrade = (CLI_PRESENT == cli_present("UPGRADE")); downgrade = (CLI_PRESENT == cli_present("DOWNGRADE")); assert(upgrade && !downgrade || !upgrade && downgrade); command = upgrade ? "UPGRADE" : "DOWNGRADE"; reorg_command = upgrade ? "MUPIP REORG UPGRADE" : "MUPIP REORG DOWNGRADE"; reorg_entiredb = TRUE; /* unless STARTBLK or STOPBLK is specified we are going to {up,down}grade the entire database */ startblk_specified = FALSE; assert(SIZEOF(block_id) == SIZEOF(uint4)); if ((CLI_PRESENT == cli_present("STARTBLK")) && (cli_get_hex("STARTBLK", (uint4 *)&startblk_input))) { reorg_entiredb = FALSE; startblk_specified = TRUE; } stopblk_specified = FALSE; assert(SIZEOF(block_id) == SIZEOF(uint4)); if ((CLI_PRESENT == cli_present("STOPBLK")) && (cli_get_hex("STOPBLK", (uint4 *)&stopblk_input))) { reorg_entiredb = FALSE; stopblk_specified = TRUE; } mu_reorg_upgrd_dwngrd_in_prog = TRUE; mu_reorg_nosafejnl = (CLI_NEGATED == cli_present("SAFEJNL")) ? TRUE : FALSE; assert(region); status = SS_NORMAL; error_mupip = FALSE; gvinit(); /* initialize gd_header (needed by the later call to mu_getlst) */ mu_getlst("REG_NAME", SIZEOF(tp_region)); /* get the parameter corresponding to REGION qualifier */ if (error_mupip) { util_out_print("!/MUPIP REORG !AD cannot proceed with above errors!/", TRUE, LEN_AND_STR(command)); mupip_exit(ERR_MUNOACTION); } assert(DBKEYSIZE(MAX_KEY_SZ) == gv_keysize); /* no need to invoke GVKEYSIZE_INIT_IF_NEEDED macro */ gv_target = targ_alloc(gv_keysize, NULL, NULL); /* t_begin needs this initialized */ gv_target_list = NULL; memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */ blkhist = &alt_hist.h[0]; for (rptr = grlist; NULL != rptr; rptr = rptr->fPtr) { if (mu_ctrly_occurred || mu_ctrlc_occurred) break; reg = rptr->reg; util_out_print("!/Region !AD : MUPIP REORG !AD started", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); if (reg_cmcheck(reg)) { util_out_print("Region !AD : MUPIP REORG !AD cannot run across network", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = ERR_MUNOFINISH; continue; } mu_reorg_process = TRUE; /* gvcst_init will use this value to use gtm_poollimit settings. */ gvcst_init(reg); mu_reorg_process = FALSE; assert(update_array != NULL); /* access method stored in global directory and database file header might be different in which case * the database setting prevails. therefore, the access method check can be done only after opening * the database (i.e. after the gvcst_init) */ if (dba_bg != REG_ACC_METH(reg)) { util_out_print("Region !AD : MUPIP REORG !AD cannot continue as access method is not BG", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = ERR_MUNOFINISH; continue; } /* The mu_getlst call above uses insert_region to create the grlist, which ensures that duplicate regions mapping to * the same db file correspond to only one grlist entry. */ assert(FALSE == reg->was_open); TP_CHANGE_REG(reg); /* sets gv_cur_region, cs_addrs, cs_data */ csa = cs_addrs; csd = cs_data; blk_size = csd->blk_size; /* "blk_size" is used by the BLK_FINI macro */ if (reg->read_only) { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(reg)); status = ERR_MUNOFINISH; continue; } assert(GDSVCURR == GDSV6); /* so we trip this assert in case GDSVCURR changes without a change to this module */ new_db_format = (upgrade ? GDSV6 : GDSV4); grab_crit(reg); curr_tn = csd->trans_hist.curr_tn; /* set the desired db format in the file header to the appropriate version, increment transaction number */ status1 = desired_db_format_set(reg, new_db_format, reorg_command); assert(csa->now_crit); /* desired_db_format_set() should not have released crit */ first_reorg_in_this_db_fmt = TRUE; /* with the current desired_db_format, this is the first reorg */ if (SS_NORMAL != status1) { /* "desired_db_format_set" would have printed appropriate error messages */ if (ERR_MUNOACTION != status1) { /* real error occurred while setting the db format. skip to next region */ status = ERR_MUNOFINISH; rel_crit(reg); continue; } util_out_print("Region !AD : Desired DB Format remains at !AD after !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command)); if (csd->reorg_db_fmt_start_tn == csd->desired_db_format_tn) first_reorg_in_this_db_fmt = FALSE; } else util_out_print("Region !AD : Desired DB Format set to !AD by !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(gtm_dbversion_table[new_db_format]), LEN_AND_STR(reorg_command)); assert(dba_bg == csd->acc_meth); /* Check blks_to_upgrd counter to see if upgrade/downgrade is complete */ total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; actual_blks2upgrd = csd->blks_to_upgrd; /* If MUPIP REORG UPGRADE and there is no block to upgrade in the database as indicated by BOTH * "csd->blks_to_upgrd" and "csd->fully_upgraded", then we can skip processing. * If MUPIP REORG UPGRADE and all non-free blocks need to be upgraded then again we can skip processing. */ if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded) || (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd))) { util_out_print("Region !AD : Blocks to Upgrade counter indicates no action needed for MUPIP REORG !AD", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : " "Blocks to upgrade = [0x!XL]", TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd); util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); rel_crit(reg); continue; } stop_blk = total_blks; if (stopblk_specified && stopblk_input <= stop_blk) stop_blk = stopblk_input; if (first_reorg_in_this_db_fmt) { /* Note down reorg start tn (in case we are interrupted, future reorg will know to resume) */ csd->reorg_db_fmt_start_tn = csd->desired_db_format_tn; csd->reorg_upgrd_dwngrd_restart_block = 0; start_blk = (startblk_specified ? startblk_input : 0); } else { /* Either a concurrent MUPIP REORG of the same type ({up,down}grade) is currently running * or a previously running REORG of the same type was interrupted (Ctrl-Ced). * In either case resume processing from whatever restart block number is stored in fileheader * the only exception is if "STARTBLK" was specified in the input in which use that unconditionally. */ start_blk = (startblk_specified ? startblk_input : csd->reorg_upgrd_dwngrd_restart_block); } if (start_blk > stop_blk) start_blk = stop_blk; mu_reorg_upgrd_dwngrd_start_tn = csd->reorg_db_fmt_start_tn; /* Before releasing crit, flush the file-header and dirty buffers in cache to disk. This is because we are now * going to read each GDS block directly from disk to determine if it needs to be upgraded/downgraded or not. */ if (!wcs_flu(WCSFLU_FLUSH_HDR)) /* wcs_flu assumes gv_cur_region is set (which it is in this routine) */ { rel_crit(reg); gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg)); status = ERR_MUNOFINISH; continue; } rel_crit(reg); /* Loop through entire database one GDS block at a time and upgrade/downgrade each of them */ status1 = SS_NORMAL; start_bmp = ROUND_DOWN2(start_blk, BLKS_PER_LMAP); last_bmp = ROUND_DOWN2(stop_blk - 1, BLKS_PER_LMAP); curblk = start_blk; /* curblk is the block to be upgraded/downgraded */ util_out_print("Region !AD : Started processing from block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk); if (NULL != bptr) { /* malloc/free "bptr" for each region as GDS block-size can be different */ free(bptr); bptr = NULL; } memset(&reorg_stats, 0, SIZEOF(reorg_stats)); /* initialize statistics for this region */ for (curbmp = start_bmp; curbmp <= last_bmp; curbmp += BLKS_PER_LMAP) { if (mu_ctrly_occurred || mu_ctrlc_occurred) { status1 = ERR_MUNOFINISH; break; } /* -------------------------------------------------------------- * Read in current bitmap block * -------------------------------------------------------------- */ assert(!csa->now_crit); bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* bring block into the cache outside of crit */ reorg_stats.blks_read_from_disk_bmp++; grab_crit_encr_cycle_sync(reg); /* needed so t_qread does not return NULL below */ if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn) { /* csd->desired_db_format changed since reorg started. discontinue the reorg */ /* see later comment on "csd->reorg_upgrd_dwngrd_restart_block" for why the assignment * of this field should be done only if a db format change did not occur. */ rel_crit(reg); status1 = ERR_MUNOFINISH; /* This "start_tn" check is redone after the for-loop and an error message is printed there */ break; } else if (reorg_entiredb) { /* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */ assert(csd->reorg_upgrd_dwngrd_restart_block <= MAX(start_blk, curbmp)); csd->reorg_upgrd_dwngrd_restart_block = curbmp; /* previous blocks have been upgraded/downgraded */ } /* Check blks_to_upgrd counter to see if upgrade/downgrade is complete. * Repeat check done a few steps earlier outside of this for loop. */ total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; actual_blks2upgrd = csd->blks_to_upgrd; if ((upgrade && (0 == actual_blks2upgrd) && csd->fully_upgraded) || (!upgrade && ((total_blks - free_blks) == actual_blks2upgrd))) { rel_crit(reg); break; } bml_sm_buff = t_qread(curbmp, (sm_int_ptr_t)&cycle, &cr); /* now that in crit, note down stable buffer */ if (NULL == bml_sm_buff) rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL); ondsk_blkver = cr->ondsk_blkver; /* note down db fmt on disk for bitmap block */ /* Take a copy of the shared memory bitmap buffer into process-private memory before releasing crit. * We are interested in those blocks that are currently marked as USED in the bitmap. * It is possible that once we release crit, concurrent updates change the bitmap state of those blocks. * In that case, those updates will take care of doing the upgrade/downgrade of those blocks in the * format currently set in csd->desired_db_format i.e. accomplishing MUPIP REORG UPGRADE/DOWNGRADE's job. * If the desired_db_format changes concurrently, we will stop doing REORG UPGRADE/DOWNGRADE processing. */ if (NULL == bml_lcl_buff) bml_lcl_buff = malloc(BM_SIZE(BLKS_PER_LMAP)); memcpy(bml_lcl_buff, (blk_hdr_ptr_t)bml_sm_buff, BM_SIZE(BLKS_PER_LMAP)); if (FALSE == cert_blk(reg, curbmp, (blk_hdr_ptr_t)bml_lcl_buff, 0, FALSE)) { /* certify the block while holding crit as cert_blk uses fields from file-header (shared memory) */ assert(FALSE); /* in pro, skip ugprading/downgarding all blks in this unreliable local bitmap */ rel_crit(reg); util_out_print("Region !AD : Bitmap Block [0x!XL] has integrity errors. Skipping this bitmap.", TRUE, REG_LEN_STR(reg), curbmp); status1 = ERR_MUNOFINISH; continue; } rel_crit(reg); /* ------------------------------------------------------------------------ * Upgrade/Downgrade all BUSY blocks in the current bitmap * ------------------------------------------------------------------------ */ curblk = (curbmp == start_bmp) ? start_blk : curbmp; mapsize = (curbmp == last_bmp) ? (stop_blk - curbmp) : BLKS_PER_LMAP; assert(0 != mapsize); assert(mapsize <= BLKS_PER_LMAP); db_got_to_v5_once = csd->db_got_to_v5_once; for (lcnt = curblk - curbmp; lcnt < mapsize; lcnt++, curblk++) { if (mu_ctrly_occurred || mu_ctrlc_occurred) { status1 = ERR_MUNOFINISH; goto stop_reorg_on_this_reg; /* goto needed because of nested FOR Loop */ } GET_BM_STATUS(bml_lcl_buff, lcnt, bml_status); assert(BLK_MAPINVALID != bml_status); /* cert_blk ran clean so we dont expect invalid entries */ if (BLK_FREE == bml_status) { reorg_stats.blks_skipped_free++; continue; } /* MUPIP REORG UPGRADE/DOWNGRADE will convert USED & RECYCLED blocks */ if (db_got_to_v5_once || (BLK_RECYCLED != bml_status)) { /* Do NOT read recycled V4 block from disk unless it is guaranteed NOT to be too full */ if (lcnt) { /* non-bitmap block */ /* read in block from disk into private buffer. dont pollute the cache yet */ if (NULL == bptr) bptr = (sm_uc_ptr_t)malloc(blk_size); status1 = dsk_read(curblk, bptr, &ondsk_blkver, FALSE); /* dsk_read on curblk could return an error (DYNUPGRDFAIL) if curblk needs to be * upgraded and if its block size was too big to allow the extra block-header space * requirements for a dynamic upgrade. a MUPIP REORG DOWNGRADE should not error out * in that case as the block is already in the downgraded format. */ if (SS_NORMAL != status1) { if (!upgrade && (ERR_DYNUPGRDFAIL == status1)) { assert(GDSV4 == new_db_format); ondsk_blkver = new_db_format; } else { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status1); util_out_print("Region !AD : Error occurred while reading block " "[0x!XL]", TRUE, REG_LEN_STR(reg), curblk); status1 = ERR_MUNOFINISH; goto stop_reorg_on_this_reg;/* goto needed due to nested FOR Loop */ } } reorg_stats.blks_read_from_disk_nonbmp++; } /* else bitmap block has been read in crit earlier and ondsk_blkver appropriately set */ if (new_db_format == ondsk_blkver) { assert((SS_NORMAL == status1) || (!upgrade && (ERR_DYNUPGRDFAIL == status1))); status1 = SS_NORMAL; /* treat DYNUPGRDFAIL as no error in case of downgrade */ reorg_stats.blks_skipped_newfmtindisk++; continue; /* current disk version is identical to what is desired */ } assert(SS_NORMAL == status1); } /* Begin non-TP transaction to upgrade/downgrade the block. * The way we do that is by updating the block using a null update array. * Any update to a block will trigger an automatic upgrade/downgrade of the block based on * the current fileheader desired_db_format setting and we use that here. */ t_begin(ERR_MUREORGFAIL, UPDTRNS_DB_UPDATED_MASK); for (; ;) { CHECK_AND_RESET_UPDATE_ARRAY; /* reset update_array_ptr to update_array */ curr_tn = csd->trans_hist.curr_tn; db_got_to_v5_once = csd->db_got_to_v5_once; if (db_got_to_v5_once || (BLK_RECYCLED != bml_status)) { blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */ blkBase = t_qread(curblk, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr); if (NULL == blkBase) { t_retry((enum cdb_sc)rdfail_detail); continue; } blkhist->blk_num = curblk; blkhist->buffaddr = blkBase; ondsk_blkver = blkhist->cr->ondsk_blkver; new_hdr = *(blk_hdr_ptr_t)blkBase; mu_reorg_upgrd_dwngrd_blktn = new_hdr.tn; mark_blk_free = FALSE; inctn_opcode = upgrade ? inctn_blkupgrd : inctn_blkdwngrd; } else { mark_blk_free = TRUE; inctn_opcode = inctn_blkmarkfree; } inctn_detail.blknum_struct.blknum = curblk; /* t_end assumes that the history it is passed does not contain a bitmap block. * for bitmap block, the history validation information is passed through cse instead. * therefore we need to handle bitmap and non-bitmap cases separately. */ if (!lcnt) { /* Means a bitmap block. * At this point we can do a "new_db_format != ondsk_blkver" check to determine * if the block got converted since we did the dsk_read (see the non-bitmap case * for a similar check done there), but in that case we will have a transaction * which has read 1 bitmap block and is updating no block. "t_end" currently cannot * handle this case as it expects any bitmap block that needs validation to also * have a corresponding cse which will hold its history. Hence we avoid doing the * new_db_format check. The only disadvantage of this is that we will end up * modifying the bitmap block as part of this transaction (in an attempt to convert * its ondsk_blkver) even though it is already in the right format. Since this * overhead is going to be one per bitmap block and since the block is in the cache * at this point, we should not lose much. */ assert(!mark_blk_free); BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id); *blkid_ptr = 0; t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0); assert(&alt_hist.h[0] == blkhist); alt_hist.h[0].blk_num = 0; /* create empty history for bitmap block */ assert(update_trans); } else { /* non-bitmap block. fill in history for validation in t_end */ assert(curblk); /* we should never come here for block 0 (bitmap) */ if (!mark_blk_free) { assert(blkhist->blk_num == curblk); assert(blkhist->buffaddr == blkBase); blkhist->tn = curr_tn; alt_hist.h[1].blk_num = 0; } /* Also need to pass the bitmap as history to detect if any concurrent M-kill * is freeing up the same USED block that we are trying to convert OR if any * concurrent M-set is reusing the same RECYCLED block that we are trying to * convert. Because of t_end currently not being able to validate a bitmap * without that simultaneously having a cse, we need to create a cse for the * bitmap that is used only for bitmap history validation, but should not be * used to update the contents of the bitmap block in bg_update. */ bmlhist.buffaddr = t_qread(curbmp, (sm_int_ptr_t)&bmlhist.cycle, &bmlhist.cr); if (NULL == bmlhist.buffaddr) { t_retry((enum cdb_sc)rdfail_detail); continue; } bmlhist.blk_num = curbmp; bmlhist.tn = curr_tn; GET_BM_STATUS(bmlhist.buffaddr, lcnt, bml_status); if (BLK_MAPINVALID == bml_status) { t_retry(cdb_sc_lostbmlcr); continue; } if (!mark_blk_free) { if ((new_db_format != ondsk_blkver) && (BLK_FREE != bml_status)) { /* block still needs to be converted. create cse */ BLK_INIT(bs_ptr, bs1); BLK_SEG(bs_ptr, blkBase + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr)); BLK_FINI(bs_ptr, bs1); t_write(blkhist, (unsigned char *)bs1, 0, 0, ((blk_hdr_ptr_t)blkBase)->levl, FALSE, FALSE, GDS_WRITE_PLAIN); /* The directory tree status for now is only used to determine * whether writing the block to snapshot file (see t_end_sysops.c). * For reorg upgrade/downgrade process, the block is updated in a * sequential way without changing the gv_target. In this case, we * assume the block is in directory tree so as to have it written to * the snapshot file. */ BIT_SET_DIR_TREE(cw_set[cw_set_depth-1].blk_prior_state); /* reset update_trans in case previous retry had set it to 0 */ update_trans = UPDTRNS_DB_UPDATED_MASK; if (BLK_RECYCLED == bml_status) { /* If block that we are upgarding is RECYCLED, indicate to * bg_update that blks_to_upgrd counter should NOT be * touched in this case by setting "mode" to a special value */ assert(cw_set[cw_set_depth-1].mode == gds_t_write); cw_set[cw_set_depth-1].mode = gds_t_write_recycled; /* we SET block as NOT RECYCLED, otherwise, the mm_update() * or bg_update_phase2 may skip writing it to snapshot file * when its level is 0 */ BIT_CLEAR_RECYCLED(cw_set[cw_set_depth-1].blk_prior_state); } } else { /* Block got converted by another process since we did the dsk_read. * or this block became marked free in the bitmap. * No need to update this block. just call t_end for validation of * both the non-bitmap block as well as the bitmap block. * Note down that this transaction is no longer updating any blocks. */ update_trans = 0; } /* Need to put bit maps on the end of the cw set for concurrency checking. * We want to simulate t_write_map, except we want to update "cw_map_depth" * instead of "cw_set_depth". Hence the save and restore logic below. * This part of the code is similar to the one in mu_swap_blk.c */ save_cw_set_depth = cw_set_depth; assert(!cw_map_depth); t_write_map(&bmlhist, NULL, curr_tn, 0); /* will increment cw_set_depth */ cw_map_depth = cw_set_depth; /* set cw_map_depth to latest cw_set_depth */ cw_set_depth = save_cw_set_depth;/* restore cw_set_depth */ /* t_write_map simulation end */ } else { if (BLK_RECYCLED != bml_status) { /* Block was RECYCLED at beginning but no longer so. Retry */ t_retry(cdb_sc_bmlmod); continue; } /* Mark recycled block as FREE in bitmap */ assert(lcnt == (curblk - curbmp)); assert(update_array_ptr == update_array); *((block_id *)update_array_ptr) = lcnt; update_array_ptr += SIZEOF(block_id); /* the following assumes SIZEOF(block_id) == SIZEOF(int) */ assert(SIZEOF(block_id) == SIZEOF(int)); *(int *)update_array_ptr = 0; t_write_map(&bmlhist, (unsigned char *)update_array, curr_tn, 0); update_trans = UPDTRNS_DB_UPDATED_MASK; } } assert(SIZEOF(lcl_update_trans) == SIZEOF(update_trans)); lcl_update_trans = update_trans; /* take a copy before t_end modifies it */ if ((trans_num)0 != t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)) { /* In case this is MM and t_end() remapped an extended database, reset csd */ assert(csd == cs_data); if (!lcl_update_trans) { assert(lcnt); assert(!mark_blk_free); assert((new_db_format == ondsk_blkver) || (BLK_BUSY != bml_status)); if (BLK_BUSY != bml_status) reorg_stats.blks_skipped_free++; else reorg_stats.blks_skipped_newfmtincache++; } else if (!lcnt) reorg_stats.blks_converted_bmp++; else reorg_stats.blks_converted_nonbmp++; break; } assert(csd == cs_data); } } } stop_reorg_on_this_reg: /* even though ctrl-c occurred, update file-header fields to store reorg's progress before exiting */ grab_crit(reg); blocks_left = 0; assert(csd->trans_hist.total_blks >= csd->blks_to_upgrd); actual_blks2upgrd = csd->blks_to_upgrd; total_blks = csd->trans_hist.total_blks; free_blks = csd->trans_hist.free_blocks; /* Care should be taken not to set "csd->reorg_upgrd_dwngrd_restart_block" in case of a concurrent db fmt * change. This is because let us say we are doing REORG UPGRADE. A concurrent REORG DOWNGRADE would * have reset "csd->reorg_upgrd_dwngrd_restart_block" field to 0 and if that reorg is interrupted by a * Ctrl-C (before this reorg came here) it would have updated "csd->reorg_upgrd_dwngrd_restart_block" to * a non-zero value indicating how many blocks from 0 have been downgraded. We should not reset this * field to "curblk" as it will be mis-interpreted as the number of blocks that have been DOWNgraded. */ set_fully_upgraded = FALSE; if (mu_reorg_upgrd_dwngrd_start_tn != csd->desired_db_format_tn) { /* csd->desired_db_format changed since reorg started. discontinue the reorg */ util_out_print("Region !AD : Desired DB Format changed during REORG. Stopping REORG.", TRUE, REG_LEN_STR(reg)); status1 = ERR_MUNOFINISH; } else if (reorg_entiredb) { /* Change "csd->reorg_upgrd_dwngrd_restart_block" only if STARTBLK or STOPBLK was NOT specified */ assert(csd->reorg_upgrd_dwngrd_restart_block <= curblk); csd->reorg_upgrd_dwngrd_restart_block = curblk; /* blocks lesser than this have been upgraded/downgraded */ expected_blks2upgrd = upgrade ? 0 : (total_blks - free_blks); blocks_left = upgrade ? actual_blks2upgrd : (expected_blks2upgrd - actual_blks2upgrd); /* If this reorg command went through all blocks in the database, then it should have * correctly concluded at this point whether the reorg is complete or not. * If this reorg command started from where a previous incomplete reorg left * (i.e. first_reorg_in_this_db_fmt is FALSE), it cannot determine if the initial * GDS blocks that it skipped are completely {up,down}graded or not. */ assert((0 == blocks_left) || (SS_NORMAL != status1) || !first_reorg_in_this_db_fmt); /* If this is a MUPIP REORG UPGRADE that did go through every block in the database (indicated by * "reorg_entiredb" && "first_reorg_in_this_db_fmt") and the current count of "blks_to_upgrd" is * 0 in the file-header and the desired_db_format did not change since the start of the REORG, * we can be sure that the entire database has been upgraded. Set "csd->fully_upgraded" to TRUE. */ if ((SS_NORMAL == status1) && first_reorg_in_this_db_fmt && upgrade && (0 == actual_blks2upgrd)) { csd->fully_upgraded = TRUE; csd->db_got_to_v5_once = TRUE; set_fully_upgraded = TRUE; } /* flush all changes noted down in the file-header */ if (!wcs_flu(WCSFLU_FLUSH_HDR)) /* wcs_flu assumes gv_cur_region is set (which it is in this routine) */ { gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG UPGRADE/DOWNGRADE"), DB_LEN_STR(reg)); status = ERR_MUNOFINISH; rel_crit(reg); continue; } } curr_tn = csd->trans_hist.curr_tn; rel_crit(reg); util_out_print("Region !AD : Stopped processing at block number [0x!XL]", TRUE, REG_LEN_STR(reg), curblk); /* Print statistics */ util_out_print("Region !AD : Statistics : Blocks Read From Disk (Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_bmp); util_out_print("Region !AD : Statistics : Blocks Skipped (Free) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_free); util_out_print("Region !AD : Statistics : Blocks Read From Disk (Non-Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_read_from_disk_nonbmp); util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in disk) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtindisk); util_out_print("Region !AD : Statistics : Blocks Skipped (new fmt in cache) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_skipped_newfmtincache); util_out_print("Region !AD : Statistics : Blocks Converted (Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_bmp); util_out_print("Region !AD : Statistics : Blocks Converted (Non-Bitmap) : 0x!XL", TRUE, REG_LEN_STR(reg), reorg_stats.blks_converted_nonbmp); if (reorg_entiredb && (SS_NORMAL == status1) && (0 != blocks_left)) { /* file-header counter does not match what reorg on the entire database expected to see */ gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBBTUWRNG, 2, expected_blks2upgrd, actual_blks2upgrd); util_out_print("Region !AD : Run MUPIP INTEG (without FAST qualifier) to fix the counter", TRUE, REG_LEN_STR(reg)); status1 = ERR_MUNOFINISH; } else util_out_print("Region !AD : Total Blocks = [0x!XL] : Free Blocks = [0x!XL] : " "Blocks to upgrade = [0x!XL]", TRUE, REG_LEN_STR(reg), total_blks, free_blks, actual_blks2upgrd); /* Issue success or failure message for this region */ if (SS_NORMAL == status1) { /* issue success only if REORG did not encounter any error in its processing */ if (set_fully_upgraded) util_out_print("Region !AD : Database is now FULLY UPGRADED", TRUE, REG_LEN_STR(reg)); util_out_print("Region !AD : MUPIP REORG !AD finished!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUREUPDWNGRDEND, 5, REG_LEN_STR(reg), process_id, process_id, &curr_tn); } else { assert(ERR_MUNOFINISH == status1); assert((SS_NORMAL == status) || (ERR_MUNOFINISH == status)); util_out_print("Region !AD : MUPIP REORG !AD incomplete. See above messages.!/", TRUE, REG_LEN_STR(reg), LEN_AND_STR(command)); status = status1; } } if (NULL != bptr) free(bptr); if (NULL != bml_lcl_buff) free(bml_lcl_buff); if (mu_ctrly_occurred || mu_ctrlc_occurred) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REORGCTRLY); status = ERR_MUNOFINISH; } mupip_exit(status); }
uint4 mur_process_intrpt_recov() { jnl_ctl_list *jctl, *last_jctl; reg_ctl_list *rctl, *rctl_top; int rename_fn_len, save_name_len, idx; char prev_jnl_fn[MAX_FN_LEN + 1], rename_fn[MAX_FN_LEN + 1], save_name[MAX_FN_LEN + 1]; jnl_create_info jnl_info; uint4 status, status2; uint4 max_autoswitchlimit, max_jnl_alq, max_jnl_deq, freeblks; sgmnt_data_ptr_t csd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; boolean_t jfh_changed; jnl_record *jnlrec; jnl_file_header *jfh; jnl_tm_t now; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { TP_CHANGE_REG(rctl->gd); csd = cs_data; /* MM logic after wcs_flu call requires this to be set */ assert(csd == rctl->csa->hdr); jctl = rctl->jctl_turn_around; max_jnl_alq = max_jnl_deq = max_autoswitchlimit = 0; for (last_jctl = NULL ; (NULL != jctl); last_jctl = jctl, jctl = jctl->next_gen) { jfh = jctl->jfh; if (max_autoswitchlimit < jfh->autoswitchlimit) { /* Note that max_jnl_alq, max_jnl_deq are not the maximum journal allocation/extensions across * generations, but rather the allocation/extension corresponding to the maximum autoswitchlimit. */ max_autoswitchlimit = jfh->autoswitchlimit; max_jnl_alq = jfh->jnl_alq; max_jnl_deq = jfh->jnl_deq; } /* Until now, "rctl->blks_to_upgrd_adjust" holds the number of V4 format newly created bitmap blocks * seen in INCTN records in backward processing. It is possible that backward processing might have * missed out on seeing those INCTN records which are part of virtually-truncated or completely-rolled-bak * journal files. The journal file-header has a separate field "prev_recov_blks_to_upgrd_adjust" which * maintains exactly this count. Therefore adjust the rctl counter accordingly. */ assert(!jfh->prev_recov_blks_to_upgrd_adjust || !jfh->recover_interrupted); assert(!jfh->prev_recov_blks_to_upgrd_adjust || jfh->prev_recov_end_of_data); rctl->blks_to_upgrd_adjust += jfh->prev_recov_blks_to_upgrd_adjust; } if (max_autoswitchlimit > last_jctl->jfh->autoswitchlimit) { csd->jnl_alq = max_jnl_alq; csd->jnl_deq = max_jnl_deq; csd->autoswitchlimit = max_autoswitchlimit; } else { assert(csd->jnl_alq == last_jctl->jfh->jnl_alq); assert(csd->jnl_deq == last_jctl->jfh->jnl_deq); assert(csd->autoswitchlimit == last_jctl->jfh->autoswitchlimit); } jctl = rctl->jctl_turn_around; /* Get a pointer to the turn around point EPOCH record */ jnlrec = rctl->mur_desc->jnlrec; assert(JRT_EPOCH == jnlrec->prefix.jrec_type); assert(jctl->turn_around_time == jnlrec->prefix.time); assert(jctl->turn_around_seqno == jnlrec->jrec_epoch.jnl_seqno); assert(jctl->turn_around_tn == jnlrec->prefix.tn); assert(jctl->rec_offset == jctl->turn_around_offset); /* Reset file-header "blks_to_upgrd" counter to the turn around point epoch value. Adjust this to include * the number of new V4 format bitmaps created by post-turnaround-point db file extensions. * The adjustment value is maintained in rctl->blks_to_upgrd_adjust. */ csd->blks_to_upgrd = jnlrec->jrec_epoch.blks_to_upgrd; csd->blks_to_upgrd += rctl->blks_to_upgrd_adjust; # ifdef GTM_TRIGGER /* online rollback can potentially take the database to a point in the past where the triggers that were * previously installed are no longer a part of the current database state and so any process that restarts * AFTER online rollback completes SHOULD reload triggers and the only way to do that is by incrementing the * db_trigger_cycle in the file header. */ if (jgbl.onlnrlbk && (0 < csd->db_trigger_cycle)) { /* check for non-zero db_trigger_cycle is to prevent other processes (continuing after online rollback) * to establish implicit TP (on seeing the trigger cycle mismatch) when there are actually no triggers * installed in the database (because there were none at the start of online rollback). */ csd->db_trigger_cycle++; if (0 == csd->db_trigger_cycle) csd->db_trigger_cycle = 1; /* Don't allow cycle set to 0 which means uninitialized */ } # endif assert((WBTEST_ALLOW_ARBITRARY_FULLY_UPGRADED == gtm_white_box_test_case_number) || (FALSE == jctl->turn_around_fullyupgraded) || (TRUE == jctl->turn_around_fullyupgraded)); /* Set csd->fully_upgraded to FALSE if: * a) The turn around EPOCH had the fully_upgraded field set to FALSE * OR * b) If csd->blks_to_upgrd counter is non-zero. This field can be non-zero even if the turnaround EPOCH's * fully_upgraded field is TRUE. This is possible if the database was downgraded to V4 (post turnaround EPOCH) * format and database extensions happened causing new V4 format bitmap blocks to be written. The count of V4 * format bitmap blocks is maintained ONLY as part of INCTN records (with INCTN opcode SET_JNL_FILE_CLOSE_EXTEND) * noted down in rctl->blks_to_upgrd_adjust counter as part of BACKWARD processing which are finally added to * csd->blks_to_upgrd. */ if (!jctl->turn_around_fullyupgraded || csd->blks_to_upgrd) csd->fully_upgraded = FALSE; csd->trans_hist.early_tn = jctl->turn_around_tn; csd->trans_hist.curr_tn = csd->trans_hist.early_tn; /* INCREMENT_CURR_TN macro not used but noted in comment * to identify all places that set curr_tn */ csd->jnl_eovtn = csd->trans_hist.curr_tn; csd->turn_around_point = TRUE; /* MUPIP REORG UPGRADE/DOWNGRADE stores its partially processed state in the database file header. * It is difficult for recovery to restore those fields to a correct partial value. * Hence reset the related fields as if the desired_db_format got set just ONE tn BEFORE the EPOCH record * and that there was no more processing that happened. * This might potentially mean some duplicate processing for MUPIP REORG UPGRADE/DOWNGRADE after the recovery. * But that will only be the case as long as the database is in compatibility (mixed) mode (hopefully not long). */ if (csd->desired_db_format_tn >= jctl->turn_around_tn) csd->desired_db_format_tn = jctl->turn_around_tn - 1; if (csd->reorg_db_fmt_start_tn >= jctl->turn_around_tn) csd->reorg_db_fmt_start_tn = jctl->turn_around_tn - 1; if (csd->tn_upgrd_blks_0 > jctl->turn_around_tn) csd->tn_upgrd_blks_0 = (trans_num)-1; csd->reorg_upgrd_dwngrd_restart_block = 0; /* Compute current value of "free_blocks" based on the value of "free_blocks" at the turnaround point epoch * record and the change in "total_blks" since that epoch to the present form of the database. Any difference * in "total_blks" implies database file extensions happened since the turnaround point. A backward rollback * undoes everything (including all updates) except file extensions (it does not truncate the file size). * Therefore every block that was newly allocated as part of those file extensions should be considered FREE * for the current calculations except for the local bitmap blocks which are BUSY the moment they are created. */ assert(jnlrec->jrec_epoch.total_blks <= csd->trans_hist.total_blks); csd->trans_hist.free_blocks = jnlrec->jrec_epoch.free_blocks + (csd->trans_hist.total_blks - jnlrec->jrec_epoch.total_blks) - DIVIDE_ROUND_UP(csd->trans_hist.total_blks, BLKS_PER_LMAP) + DIVIDE_ROUND_UP(jnlrec->jrec_epoch.total_blks, BLKS_PER_LMAP); assert(!csd->blks_to_upgrd || !csd->fully_upgraded); assert((freeblks = mur_blocks_free(rctl)) == csd->trans_hist.free_blocks); /* Update strm_reg_seqno[] in db file header to reflect the turn around point. * Before updating "strm_reg_seqno", make sure value is saved into "save_strm_reg_seqno". * This is relied upon by the function "mur_get_max_strm_reg_seqno" in case of interrupted rollback. */ for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) { if (!csd->save_strm_reg_seqno[idx]) csd->save_strm_reg_seqno[idx] = csd->strm_reg_seqno[idx]; csd->strm_reg_seqno[idx] = jnlrec->jrec_epoch.strm_seqno[idx]; } wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_FSYNC_DB); assert(cs_addrs->ti->curr_tn == jctl->turn_around_tn); # ifdef UNIX if (jgbl.onlnrlbk) { if (dba_bg == cs_addrs->hdr->acc_meth) { /* dryclean the cache (basically reset the cycle fields in all teh cache records) so as to make * GT.M processes that only does 'reads' to require crit and hence realize that online rollback * is in progress */ bt_refresh(cs_addrs, FALSE); /* sets earliest bt TN to be the turn around TN */ } db_csh_ref(cs_addrs, FALSE); assert(NULL != cs_addrs->jnl); jpc = cs_addrs->jnl; assert(NULL != jpc->jnl_buff); jbp = jpc->jnl_buff; /* Since Rollback simulates the journal record along with the timestamp at which the update was made, it * sets jgbl.dont_reset_gbl_jrec_time to TRUE so that during forward processing t_end or tp_tend does not * reset the gbl_jrec_time to reflect the current time. But, with Online Rollback, one can have the shared * memory up and running and hence can have jbp->prev_jrec_time to be the time of the most recent journal * update made. Later in t_end/tp_tend, ADJUST_GBL_JREC_TIME is invoked which ensures that if ever * gbl_jrec_time (the time of the current update) is less than jbp->prev_jrec_time (time of the latest * journal update), dont_reset_gbl_jrec_time better be FALSE. But, this assert will trip since Rollback * sets the latter to TRUE. To fix this, set jbp->prev_jrec_time to the turn around time stamp. This way * we are guaranteed that all the updates done in the forward processing will have a timestamp that is * greater than the turn around timestamp */ SET_JNLBUFF_PREV_JREC_TIME(jbp, jctl->turn_around_time, DO_GBL_JREC_TIME_CHECK_FALSE); } else if (dba_bg == csd->acc_meth) { /* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */ SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1); } # else if (dba_bg == csd->acc_meth) { /* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */ SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1); } # endif csd->turn_around_point = FALSE; assert(OLDEST_HIST_TN(cs_addrs) == (cs_addrs->ti->curr_tn - 1)); /* In case this is MM and wcs_flu() remapped an extended database, reset rctl->csd */ assert((dba_mm == cs_data->acc_meth) || (rctl->csd == cs_data)); rctl->csd = cs_data; } JNL_SHORT_TIME(now); for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { TP_CHANGE_REG_IF_NEEDED(rctl->gd); if (!rctl->jfh_recov_interrupted) jctl = rctl->jctl_turn_around; else { DEBUG_ONLY( for (jctl = rctl->jctl_turn_around; NULL != jctl->next_gen; jctl = jctl->next_gen) ; /* check that latest gener file name does not match db header */ assert((rctl->csd->jnl_file_len != jctl->jnl_fn_len) || (0 != memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len))); ) jctl = rctl->jctl_alt_head; } assert(NULL != jctl); for ( ; NULL != jctl->next_gen; jctl = jctl->next_gen) ; assert(rctl->csd->jnl_file_len == jctl->jnl_fn_len); /* latest gener file name */ assert(0 == memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)); /* should match db header */ if (SS_NORMAL != (status = prepare_unique_name((char *)jctl->jnl_fn, jctl->jnl_fn_len, "", "", rename_fn, &rename_fn_len, now, &status2))) return status; jctl->jnl_fn_len = rename_fn_len; /* change the name in memory to the proposed name */ memcpy(jctl->jnl_fn, rename_fn, rename_fn_len + 1); /* Rename hasn't happened yet at the filesystem level. In case current recover command is interrupted, * we need to update jfh->next_jnl_file_name before mur_forward(). Update jfh->next_jnl_file_name for * all journal files from which PBLK records were applied. Create new journal files for forward play. */ assert(NULL != rctl->jctl_turn_around); jctl = rctl->jctl_turn_around; /* points to journal file which has current recover's turn around point */ assert(0 != jctl->turn_around_offset); jfh = jctl->jfh; jfh->turn_around_offset = jctl->turn_around_offset; /* save progress in file header for */ jfh->turn_around_time = jctl->turn_around_time; /* possible re-issue of recover */ for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) jfh->strm_end_seqno[idx] = csd->strm_reg_seqno[idx]; jfh_changed = TRUE; /* We are about to update the journal file header of the turnaround-point journal file to store the * non-zero jfh->turn_around_offset. Ensure corresponding database is considered updated. * This is needed in case journal recovery/rollback terminates abnormally and we go to mur_close_files. * We need to ensure csd->recov_interrupted does not get reset to FALSE even if this region did not have * have any updates to the corresponding database file otherwise. (GTM-8394) */ rctl->db_updated = TRUE; for ( ; NULL != jctl; jctl = jctl->next_gen) { /* setup the next_jnl links. note that in the case of interrupted recovery, next_jnl links * would have been already set starting from the turn-around point journal file of the * interrupted recovery but the new recovery MIGHT have taken us to a still previous * generation journal file that needs its next_jnl link set. this is why we do the next_jnl * link setup even in the case of interrupted recovery although in most cases it is unnecessary. */ jfh = jctl->jfh; if (NULL != jctl->next_gen) { jfh->next_jnl_file_name_length = jctl->next_gen->jnl_fn_len; memcpy(jfh->next_jnl_file_name, jctl->next_gen->jnl_fn, jctl->next_gen->jnl_fn_len); jfh_changed = TRUE; } else assert(0 == jfh->next_jnl_file_name_length); /* null link from latest generation */ if (jfh->turn_around_offset && (jctl != rctl->jctl_turn_around)) { /* It is possible that the current recovery has a turn-around-point much before the * previously interrupted recovery. If it happens to be a previous generation journal * file then we have to reset the original turn-around-point to be zero in the journal * file header in order to ensure if this recovery gets interrupted we do interrupted * recovery processing until the new turn-around-point instead of stopping incorrectly * at the original turn-around-point itself. Note that there could be more than one * journal file with a non-zero turn_around_offset (depending on how many previous * recoveries got interrupted in this loop) that need to be reset. */ assert(!jctl->turn_around_offset); assert(rctl->recov_interrupted || rctl->jctl_apply_pblk); /* rctl->jfh_recov_interrupted can fail */ jfh->turn_around_offset = 0; jfh->turn_around_time = 0; jfh_changed = TRUE; } if (jfh_changed) { /* Since overwriting the journal file header (an already allocated block * in the file) should not cause ENOSPC, we dont take the trouble of * passing csa or jnl_fn (first two parameters). Instead we pass NULL. */ JNL_DO_FILE_WRITE(NULL, NULL, jctl->channel, 0, jfh, REAL_JNL_HDR_LEN, jctl->status, jctl->status2); if (SS_NORMAL != jctl->status) { assert(FALSE); if (SS_NORMAL == jctl->status2) gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(5) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status); else gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT1(6) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status, PUT_SYS_ERRNO(jctl->status2)); return jctl->status; } GTM_JNL_FSYNC(rctl->csa, jctl->channel, jctl->status); if (-1 == jctl->status) { jctl->status2 = errno; assert(FALSE); gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(9) ERR_JNLFSYNCERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), jctl->status2); return ERR_JNLFSYNCERR; } } jfh_changed = FALSE; } memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.status = jnl_info.status2 = SS_NORMAL; jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(rctl->gd, &jnl_info); jnl_info.prev_jnl_len = rctl->jctl_turn_around->jnl_fn_len; memcpy(jnl_info.prev_jnl, rctl->jctl_turn_around->jnl_fn, rctl->jctl_turn_around->jnl_fn_len); jnl_info.prev_jnl[jnl_info.prev_jnl_len] = 0; jnl_info.jnl_len = rctl->csd->jnl_file_len; memcpy(jnl_info.jnl, rctl->csd->jnl_file_name, jnl_info.jnl_len); jnl_info.jnl[jnl_info.jnl_len] = 0; assert(!mur_options.rollback || jgbl.mur_rollback); jnl_info.reg_seqno = rctl->jctl_turn_around->turn_around_seqno; jgbl.gbl_jrec_time = rctl->jctl_turn_around->turn_around_time; /* time needed for cre_jnl_file_common() */ if (EXIT_NRM != cre_jnl_file_common(&jnl_info, rename_fn, rename_fn_len)) { gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_JNLNOCREATE, 2, jnl_info.jnl_len, jnl_info.jnl); return jnl_info.status; } # ifdef UNIX if (jgbl.onlnrlbk) { cs_addrs = rctl->csa; /* Mimic what jnl_file_close in case of cleanly a closed journal file */ jpc = cs_addrs->jnl; /* the previous loop makes sure cs_addrs->jnl->jnl_buff is valid*/ NULLIFY_JNL_FILE_ID(cs_addrs); jpc->jnl_buff->cycle++; /* so that, all other processes knows to switch to newer journal file */ jpc->cycle--; /* decrement cycle so jnl_ensure_open() knows to reopen the journal */ } # endif if (NULL != rctl->jctl_alt_head) /* remove the journal files created by last interrupted recover process */ { mur_rem_jctls(rctl); rctl->jctl_alt_head = NULL; } /* From this point on, journal records are written into the newly created journal file. However, we still read * from old journal files. */ }