uint4 jnl_file_open_switch(gd_region *reg, uint4 sts) { sgmnt_addrs *csa; jnl_private_control *jpc; jnl_create_info create; char prev_jnl_fn[JNL_NAME_SIZE]; csa = &FILE_INFO(reg)->s_addrs; jpc = csa->jnl; assert((ERR_JNLFILOPN != sts) && (NOJNL != jpc->channel) || (ERR_JNLFILOPN == sts) && (NOJNL == jpc->channel)); if ((ERR_JNLFILOPN != sts) && (NOJNL != jpc->channel)) F_CLOSE(jpc->channel); jpc->channel = NOJNL; jnl_send_oper(jpc, sts); /* attempt to create a new journal file */ memset(&create, 0, sizeof(create)); create.status = create.status2 = SS_NORMAL; create.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(reg, &create); create.no_prev_link = TRUE; create.no_rename = FALSE; if (!jgbl.forw_phase_recovery) JNL_SHORT_TIME(jgbl.gbl_jrec_time); /* needed for cre_jnl_file() */ /* else mur_output_record() would have already set jgbl.gbl_jrec_time */ assert(jgbl.gbl_jrec_time); if (EXIT_NRM != cre_jnl_file(&create)) { jpc->status = create.status; jpc->status2 = create.status2; return ERR_JNLINVALID; } else { jpc->status = SS_NORMAL; sts = 0; } send_msg(VARLSTCNT(6) ERR_PREVJNLLINKCUT, 4, JNL_LEN_STR(csa->hdr), DB_LEN_STR(reg)); assert(csa->hdr->jnl_file_len == create.jnl_len); assert(0 == memcmp(csa->hdr->jnl_file_name, create.jnl, create.jnl_len)); return sts; }
int jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size) { file_control *fc; boolean_t need_extend; jnl_buffer_ptr_t jb; jnl_create_info jnl_info; jnl_file_header header; uint4 new_alq; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char prev_jnl_fn[JNL_NAME_SIZE]; uint4 jnl_status = 0, status; int new_blocks, result; GTM_BAVAIL_TYPE avail_blocks; uint4 aligned_tot_jrec_size, count; switch(jpc->region->dyn.addr->acc_meth) { case dba_mm: case dba_bg: csa = &FILE_INFO(jpc->region)->s_addrs; break; default: GTMASSERT; } csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state))); assert(jpc->region == gv_cur_region); assert(csa->jnl_state == csd->jnl_state); if (!JNL_ENABLED(csa) || (NOJNL == jpc->channel) || (JNL_FILE_SWITCHED(jpc))) GTMASSERT; /* crit and messing with the journal file - how could it have vanished? */ if (!csd->jnl_deq) { assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq); assert(csd->jnl_alq == csd->autoswitchlimit); new_blocks = csd->jnl_alq; } else /* May cause extension of csd->jnl_deq * n blocks where n > 0 */ new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq); jpc->status = SS_NORMAL; jb = jpc->jnl_buff; assert(0 <= new_blocks); DEBUG_ONLY(count = 0); for (need_extend = (0 != new_blocks); need_extend; ) { DEBUG_ONLY(count++); /* usually we will do the loop just once where we do the file extension. * rarely we might need to do an autoswitch instead after which again rarely * we might need to do an extension on the new journal to fit in the transaction's journal requirements. * therefore we should do this loop a maximum of twice. hence the assert below. */ assert(count <= 2); need_extend = FALSE; if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE))) { if ((new_blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (new_blocks > avail_blocks) { /* if we cannot satisfy the request, it is an error */ send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); new_blocks = 0; jpc->status = SS_NORMAL; break; } else send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd), (avail_blocks - new_blocks)); } } else send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status); new_alq = jb->filesize + new_blocks; /* ensure current journal file size is well within autoswitchlimit --> design constraint */ assert(csd->autoswitchlimit >= jb->filesize); if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks))) /* close to max */ send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize); if (csd->autoswitchlimit < new_alq) { /* Reached max, need to autoswitch */ /* Ensure new journal file can hold the entire current transaction's journal record requirements */ assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size)); memset(&jnl_info, 0, sizeof(jnl_info)); jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(gv_cur_region, &jnl_info); assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc))); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* flush the cache and jnl-buffer-contents to current journal file before * switching to a new journal. */ wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH); jnl_file_close(gv_cur_region, TRUE, TRUE); } else rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr)); if (jgbl.forw_phase_recovery && (NULL != jgbl.mur_pini_addr_reset_fnptr)) (*jgbl.mur_pini_addr_reset_fnptr)(); assert(!jnl_info.no_rename); assert(!jnl_info.no_prev_link); if (EXIT_NRM == cre_jnl_file(&jnl_info)) { assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len)); assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0'); assert(csd->jnl_file_len == jnl_info.jnl_len); assert(csd->jnl_buffer_size == jnl_info.buffer); assert(csd->jnl_alq == jnl_info.alloc); assert(csd->jnl_deq == jnl_info.extend); assert(csd->jnl_before_image == jnl_info.before_images); csd->trans_hist.header_open_tn = jnl_info.tn; /* needed for successful jnl_file_open() */ send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREATE, 2, JNL_LEN_STR(csd)); fc = gv_cur_region->dyn.addr->file_cntl; fc->op = FC_WRITE; fc->op_buff = (sm_uc_ptr_t)csd; status = dbfilop(fc); if (SS_NORMAL != status) send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); assert(JNL_ENABLED(csa)); /* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */ jnl_status = jnl_ensure_open(); /* sets jpc->status */ if (0 != jnl_status) rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); assert(jb->filesize == csd->jnl_alq); aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size), csd->jnl_alq, csd->jnl_deq); if (aligned_tot_jrec_size > csd->jnl_alq) { /* need to extend more than initial allocation in the new journal file * to accommodate the current transaction. */ new_blocks = aligned_tot_jrec_size - csd->jnl_alq; assert(new_blocks); assert(0 == new_blocks % csd->jnl_deq); need_extend = TRUE; } } else { send_msg(VARLSTCNT(4) ERR_JNLCREATERR, 2, JNL_LEN_STR(csd)); jpc->status = ERR_JNLNOCREATE; new_blocks = -1; } } else { assert(!need_extend); /* ensure we won't go through the for loop again */ /* Virtually extend currently used journal file */ jb->filesize = new_alq; /* Actually this is virtual file size blocks */ DO_FILE_READ(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status); assert((header.virtual_size + new_blocks) == new_alq); header.virtual_size = new_alq; DO_FILE_WRITE(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status); } if (0 >= new_blocks) break; } if (0 >= new_blocks) { jpc->status = ERR_JNLREADEOF; jnl_file_lost(jpc, ERR_JNLEXTEND); new_blocks = -1; } return new_blocks; }
uint4 mur_process_intrpt_recov() { jnl_ctl_list *jctl, *last_jctl; reg_ctl_list *rctl, *rctl_top; int rename_fn_len, save_name_len; char prev_jnl_fn[MAX_FN_LEN + 1], rename_fn[MAX_FN_LEN + 1], save_name[MAX_FN_LEN + 1]; jnl_create_info jnl_info; uint4 status, status2; uint4 max_autoswitchlimit, max_jnl_alq, max_jnl_deq, freeblks; sgmnt_data_ptr_t csd; #if defined(VMS) io_status_block_disk iosb; #endif boolean_t jfh_changed; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { gv_cur_region = rctl->gd; /* wcs_flu requires this to be set */ cs_addrs = rctl->csa; csd = cs_data = rctl->csd; /* MM logic after wcs_flu call requires this to be set */ assert(csd == rctl->csa->hdr); jctl = rctl->jctl_turn_around; max_jnl_alq = max_jnl_deq = max_autoswitchlimit = 0; for (last_jctl = NULL ; (NULL != jctl); last_jctl = jctl, jctl = jctl->next_gen) { if (max_autoswitchlimit < jctl->jfh->autoswitchlimit) { /* Note that max_jnl_alq, max_jnl_deq are not the maximum journal allocation/extensions across * generations, but rather the allocation/extension corresponding to the maximum autoswitchlimit. */ max_autoswitchlimit = jctl->jfh->autoswitchlimit; max_jnl_alq = jctl->jfh->jnl_alq; max_jnl_deq = jctl->jfh->jnl_deq; } /* Until now, "rctl->blks_to_upgrd_adjust" holds the number of V4 format newly created bitmap blocks * seen in INCTN records in backward processing. It is possible that backward processing might have * missed out on seeing those INCTN records which are part of virtually-truncated or completely-rolled-bak * journal files. The journal file-header has a separate field "prev_recov_blks_to_upgrd_adjust" which * maintains exactly this count. Therefore adjust the rctl counter accordingly. */ assert(!jctl->jfh->prev_recov_blks_to_upgrd_adjust || !jctl->jfh->recover_interrupted); assert(!jctl->jfh->prev_recov_blks_to_upgrd_adjust || jctl->jfh->prev_recov_end_of_data); rctl->blks_to_upgrd_adjust += jctl->jfh->prev_recov_blks_to_upgrd_adjust; } if (max_autoswitchlimit > last_jctl->jfh->autoswitchlimit) { csd->jnl_alq = max_jnl_alq; csd->jnl_deq = max_jnl_deq; csd->autoswitchlimit = max_autoswitchlimit; } else { assert(csd->jnl_alq == last_jctl->jfh->jnl_alq); assert(csd->jnl_deq == last_jctl->jfh->jnl_deq); assert(csd->autoswitchlimit == last_jctl->jfh->autoswitchlimit); } /* now that rctl->blks_to_upgrd_adjust is completely computed, use that to increment filehdr blks_to_upgrd. */ csd->blks_to_upgrd += rctl->blks_to_upgrd_adjust; if (csd->blks_to_upgrd) csd->fully_upgraded = FALSE; jctl = rctl->jctl_turn_around; csd->trans_hist.early_tn = jctl->turn_around_tn; csd->trans_hist.curr_tn = csd->trans_hist.early_tn; /* INCREMENT_CURR_TN macro not used but noted in comment * to identify all places that set curr_tn */ csd->jnl_eovtn = csd->trans_hist.curr_tn; csd->turn_around_point = TRUE; /* MUPIP REORG UPGRADE/DOWNGRADE stores its partially processed state in the database file header. * It is difficult for recovery to restore those fields to a correct partial value. * Hence reset the related fields as if the desired_db_format got set just ONE tn BEFORE the EPOCH record * and that there was no more processing that happened. * This might potentially mean some duplicate processing for MUPIP REORG UPGRADE/DOWNGRADE after the recovery. * But that will only be the case as long as the database is in compatibility (mixed) mode (hopefully not long). */ if (csd->desired_db_format_tn >= jctl->turn_around_tn) csd->desired_db_format_tn = jctl->turn_around_tn - 1; if (csd->reorg_db_fmt_start_tn >= jctl->turn_around_tn) csd->reorg_db_fmt_start_tn = jctl->turn_around_tn - 1; if (csd->tn_upgrd_blks_0 > jctl->turn_around_tn) csd->tn_upgrd_blks_0 = (trans_num)-1; csd->reorg_upgrd_dwngrd_restart_block = 0; /* Compute current value of "free_blocks" based on the value of "free_blocks" at the turnaround point epoch * record and the change in "total_blks" since that epoch to the present form of the database. Any difference * in "total_blks" implies database file extensions happened since the turnaround point. A backward rollback * undoes everything (including all updates) except file extensions (it does not truncate the file size). * Therefore every block that was newly allocated as part of those file extensions should be considered FREE * for the current calculations except for the local bitmap blocks which are BUSY the moment they are created. */ assert(rctl->trnarnd_total_blks <= csd->trans_hist.total_blks); csd->trans_hist.free_blocks = rctl->trnarnd_free_blocks + (csd->trans_hist.total_blks - rctl->trnarnd_total_blks) - DIVIDE_ROUND_UP(csd->trans_hist.total_blks, BLKS_PER_LMAP) + DIVIDE_ROUND_UP(rctl->trnarnd_total_blks, BLKS_PER_LMAP); assert((freeblks = mur_blocks_free(rctl)) == csd->trans_hist.free_blocks); if (dba_bg == csd->acc_meth) /* This is taken from bt_refresh() */ ((th_rec *)((uchar_ptr_t)cs_addrs->th_base + cs_addrs->th_base->tnque.fl))->tn = jctl->turn_around_tn - 1; wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_FSYNC_DB); csd->turn_around_point = FALSE; /* In case this is MM and wcs_flu() remapped an extended database, reset rctl->csd */ assert((dba_mm == cs_data->acc_meth) || (rctl->csd == cs_data)); rctl->csd = cs_data; } for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { if (!rctl->jfh_recov_interrupted) jctl = rctl->jctl_turn_around; else { DEBUG_ONLY( for (jctl = rctl->jctl_turn_around; NULL != jctl->next_gen; jctl = jctl->next_gen) ; /* check that latest gener file name does not match db header */ assert((rctl->csd->jnl_file_len != jctl->jnl_fn_len) || (0 != memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len))); ) jctl = rctl->jctl_alt_head; } assert(NULL != jctl); for ( ; NULL != jctl->next_gen; jctl = jctl->next_gen) ; assert(rctl->csd->jnl_file_len == jctl->jnl_fn_len); /* latest gener file name */ assert(0 == memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)); /* should match db header */ if (SS_NORMAL != (status = prepare_unique_name((char *)jctl->jnl_fn, jctl->jnl_fn_len, "", "", rename_fn, &rename_fn_len, &status2))) return status; jctl->jnl_fn_len = rename_fn_len; /* change the name in memory to the proposed name */ memcpy(jctl->jnl_fn, rename_fn, rename_fn_len + 1); /* Rename hasn't happened yet at the filesystem level. In case current recover command is interrupted, * we need to update jfh->next_jnl_file_name before mur_forward(). Update jfh->next_jnl_file_name for * all journal files from which PBLK records were applied. Create new journal files for forward play. */ assert(NULL != rctl->jctl_turn_around); jctl = rctl->jctl_turn_around; /* points to journal file which has current recover's turn around point */ assert(0 != jctl->turn_around_offset); jctl->jfh->turn_around_offset = jctl->turn_around_offset; /* save progress in file header for */ jctl->jfh->turn_around_time = jctl->turn_around_time; /* possible re-issue of recover */ jfh_changed = TRUE; for ( ; NULL != jctl; jctl = jctl->next_gen) { /* setup the next_jnl links. note that in the case of interrupted recovery, next_jnl links * would have been already set starting from the turn-around point journal file of the * interrupted recovery but the new recovery MIGHT have taken us to a still previous * generation journal file that needs its next_jnl link set. this is why we do the next_jnl * link setup even in the case of interrupted recovery although in most cases it is unnecessary. */ if (NULL != jctl->next_gen) { jctl->jfh->next_jnl_file_name_length = jctl->next_gen->jnl_fn_len; memcpy(jctl->jfh->next_jnl_file_name, jctl->next_gen->jnl_fn, jctl->next_gen->jnl_fn_len); jfh_changed = TRUE; } else assert(0 == jctl->jfh->next_jnl_file_name_length); /* null link from latest generation */ if (jctl->jfh->turn_around_offset && (jctl != rctl->jctl_turn_around)) { /* It is possible that the current recovery has a turn-around-point much before the * previously interrupted recovery. If it happens to be a previous generation journal * file then we have to reset the original turn-around-point to be zero in the journal * file header in order to ensure if this recovery gets interrupted we do interrupted * recovery processing until the new turn-around-point instead of stopping incorrectly * at the original turn-around-point itself. Note that there could be more than one * journal file with a non-zero turn_around_offset (depending on how many previous * recoveries got interrupted in this loop) that need to be reset. */ assert(!jctl->turn_around_offset); assert(rctl->recov_interrupted); /* rctl->jfh_recov_interrupted can fail */ jctl->jfh->turn_around_offset = 0; jctl->jfh->turn_around_time = 0; jfh_changed = TRUE; } if (jfh_changed) { DO_FILE_WRITE(jctl->channel, 0, jctl->jfh, REAL_JNL_HDR_LEN, jctl->status, jctl->status2); if (SS_NORMAL != jctl->status) { assert(FALSE); if (SS_NORMAL == jctl->status2) gtm_putmsg(VARLSTCNT(5) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status); else gtm_putmsg(VARLSTCNT1(6) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status, PUT_SYS_ERRNO(jctl->status2)); return jctl->status; } UNIX_ONLY( GTM_FSYNC(jctl->channel, jctl->status); if (-1 == jctl->status) { jctl->status2 = errno; assert(FALSE); gtm_putmsg(VARLSTCNT(9) ERR_JNLFSYNCERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), jctl->status2); return ERR_JNLFSYNCERR; } ) } jfh_changed = FALSE; } memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.status = jnl_info.status2 = SS_NORMAL; jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(rctl->gd, &jnl_info); jnl_info.prev_jnl_len = rctl->jctl_turn_around->jnl_fn_len; memcpy(jnl_info.prev_jnl, rctl->jctl_turn_around->jnl_fn, rctl->jctl_turn_around->jnl_fn_len); jnl_info.prev_jnl[jnl_info.prev_jnl_len] = 0; jnl_info.jnl_len = rctl->csd->jnl_file_len; memcpy(jnl_info.jnl, rctl->csd->jnl_file_name, jnl_info.jnl_len); jnl_info.jnl[jnl_info.jnl_len] = 0; assert(!mur_options.rollback || jgbl.mur_rollback); jnl_info.reg_seqno = rctl->jctl_turn_around->turn_around_seqno; jgbl.gbl_jrec_time = rctl->jctl_turn_around->turn_around_time; /* time needed for cre_jnl_file_common() */ if (EXIT_NRM != cre_jnl_file_common(&jnl_info, rename_fn, rename_fn_len)) { gtm_putmsg(VARLSTCNT(4) ERR_JNLNOCREATE, 2, jnl_info.jnl_len, jnl_info.jnl); return jnl_info.status; } if (NULL != rctl->jctl_alt_head) /* remove the journal files created by last interrupted recover process */ { mur_rem_jctls(rctl); rctl->jctl_alt_head = NULL; } /* From this point on, journal records are written into the newly created journal file. However, we still read * from old journal files. */ }
uint4 jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size) { file_control *fc; boolean_t need_extend; jnl_buffer_ptr_t jb; jnl_create_info jnl_info; jnl_file_header *header; unsigned char hdr_buff[REAL_JNL_HDR_LEN + MAX_IO_BLOCK_SIZE]; uint4 new_alq; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char prev_jnl_fn[JNL_NAME_SIZE]; uint4 jnl_status = 0, status; int new_blocks, warn_blocks, result; gtm_uint64_t avail_blocks; uint4 aligned_tot_jrec_size, count; uint4 jnl_fs_block_size, read_write_size; DCL_THREADGBL_ACCESS; switch(jpc->region->dyn.addr->acc_meth) { case dba_mm: case dba_bg: csa = &FILE_INFO(jpc->region)->s_addrs; break; default: GTMASSERT; } csd = csa->hdr; assert(csa == cs_addrs && csd == cs_data); assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state))); assert(&FILE_INFO(jpc->region)->s_addrs == csa); assert(csa->jnl_state == csd->jnl_state); assertpro(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && (!JNL_FILE_SWITCHED(jpc))); /* crit and messing with the journal file - how could it have vanished? */ if (!csd->jnl_deq || (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit)) { assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq); assert(csd->jnl_alq == csd->autoswitchlimit); new_blocks = csd->jnl_alq; } else /* May cause extension of csd->jnl_deq * n blocks where n > 0 */ new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq); jpc->status = SS_NORMAL; jb = jpc->jnl_buff; assert(0 <= new_blocks); DEBUG_ONLY(count = 0); for (need_extend = (0 != new_blocks); need_extend; ) { DEBUG_ONLY(count++); /* usually we will do the loop just once where we do the file extension. * rarely we might need to do an autoswitch instead after which again rarely * we might need to do an extension on the new journal to fit in the transaction's journal requirements. * therefore we should do this loop a maximum of twice. hence the assert below. */ assert(count <= 2); need_extend = FALSE; if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE))) { warn_blocks = (csd->jnl_alq + csd->jnl_deq > csd->autoswitchlimit) ? ((csd->jnl_deq > csd->autoswitchlimit) ? csd->jnl_deq : csd->autoswitchlimit) : new_blocks; if ((warn_blocks * EXTEND_WARNING_FACTOR) > avail_blocks) { if (new_blocks > avail_blocks) { /* If we cannot satisfy the request, it is an error, unless the anticipatory freeze * scheme is in effect in which case, we will assume space is available even if * it is not and go ahead with writes to the disk. If the writes fail with ENOSPC * we will freeze the instance and wait for space to become available and keep * retrying the writes. Therefore, we make the NOSPACEEXT a warning in this case. */ SETUP_THREADGBL_ACCESS; if (!ANTICIPATORY_FREEZE_ENABLED(csa)) { send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); new_blocks = 0; jpc->status = SS_NORMAL; break; } else send_msg(VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4, JNL_LEN_STR(csd), new_blocks, avail_blocks); } else send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd), (avail_blocks - warn_blocks)); } } else send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status); new_alq = jb->filesize + new_blocks; /* ensure current journal file size is well within autoswitchlimit --> design constraint */ assert(csd->autoswitchlimit >= jb->filesize); if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks))) /* close to max */ send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize); if (csd->autoswitchlimit < new_alq) { /* Reached max, need to autoswitch */ /* Ensure new journal file can hold the entire current transaction's journal record requirements */ assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size)); memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(gv_cur_region, &jnl_info); assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc))); jnl_status = jnl_ensure_open(); if (0 == jnl_status) { /* flush the cache and jnl-buffer-contents to current journal file before * switching to a new journal. Set a global variable in_jnl_file_autoswitch * so jnl_write can know not to do the padding check. But because this is a global * variable, we also need to make sure it is reset in case of errors during the * autoswitch (or else calls to jnl_write after we are out of the autoswitch logic * will continue to incorrectly not do the padding check. Hence a condition handler. */ assert(!in_jnl_file_autoswitch); in_jnl_file_autoswitch = TRUE; /* Also make sure time is not changed. This way if "jnl_write" as part of writing a * journal record invokes jnl_file_extend, when the autoswitch is done and writing * of the parent jnl_write resumes, we want it to continue with the same timestamp * and not have to reset its time (non-trivial task) to reflect any changes since then. */ assert(!jgbl.save_dont_reset_gbl_jrec_time); jgbl.save_dont_reset_gbl_jrec_time = jgbl.dont_reset_gbl_jrec_time; jgbl.dont_reset_gbl_jrec_time = TRUE; /* Establish a condition handler so we reset a few global variables that have * temporarily been modified in case of errors inside wcs_flu/jnl_file_close. */ ESTABLISH_RET(jnl_file_autoswitch_ch, EXIT_ERR); /* It is possible we still have not written a PINI record in this journal file * (e.g. mupip extend saw the need to do jnl_file_extend inside jnl_write while * trying to write a PINI record). Write a PINI record in that case before closing * the journal file that way the EOF record will have a non-zero pini_addr. */ if (0 == jpc->pini_addr) jnl_put_jrt_pini(csa); wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SPEEDUP_NOBEFORE); jnl_file_close(gv_cur_region, TRUE, TRUE); REVERT; in_jnl_file_autoswitch = FALSE; jgbl.dont_reset_gbl_jrec_time = jgbl.save_dont_reset_gbl_jrec_time; DEBUG_ONLY(jgbl.save_dont_reset_gbl_jrec_time = FALSE); assert((dba_mm == cs_data->acc_meth) || (csd == cs_data)); csd = cs_data; /* In MM, wcs_flu() can remap an extended DB, so reset csd to be sure */ } else { if (SS_NORMAL != jpc->status) rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); } assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr)); assert(jgbl.forw_phase_recovery || (NULL == jgbl.mur_pini_addr_reset_fnptr)); if (NULL != jgbl.mur_pini_addr_reset_fnptr) (*jgbl.mur_pini_addr_reset_fnptr)(csa); assert(!jnl_info.no_rename); assert(!jnl_info.no_prev_link); if (EXIT_NRM == cre_jnl_file(&jnl_info)) { assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len)); assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0'); assert(csd->jnl_file_len == jnl_info.jnl_len); assert(csd->jnl_buffer_size == jnl_info.buffer); assert(csd->jnl_alq == jnl_info.alloc); assert(csd->jnl_deq == jnl_info.extend); assert(csd->jnl_before_image == jnl_info.before_images); csd->jnl_checksum = jnl_info.checksum; csd->jnl_eovtn = csd->trans_hist.curr_tn; send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREAT, 2, JNL_LEN_STR(csd)); fc = gv_cur_region->dyn.addr->file_cntl; fc->op = FC_WRITE; fc->op_buff = (sm_uc_ptr_t)csd; fc->op_len = SGMNT_HDR_LEN; fc->op_pos = 1; status = dbfilop(fc); if (SS_NORMAL != status) send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status); assert(JNL_ENABLED(csa)); /* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */ jnl_status = jnl_ensure_open(); /* sets jpc->status */ if (0 != jnl_status) { if (jpc->status) rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status); else rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region)); } assert(jb->filesize == csd->jnl_alq); if (csd->jnl_alq + csd->jnl_deq <= csd->autoswitchlimit) { aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size), csd->jnl_alq, csd->jnl_deq); if (aligned_tot_jrec_size > csd->jnl_alq) { /* need to extend more than initial allocation in the new journal file * to accommodate the current transaction. */ new_blocks = aligned_tot_jrec_size - csd->jnl_alq; assert(new_blocks); assert(0 == new_blocks % csd->jnl_deq); need_extend = TRUE; } } } else { send_msg(VARLSTCNT(4) ERR_JNLNOCREATE, 2, JNL_LEN_STR(csd)); jpc->status = ERR_JNLNOCREATE; new_blocks = -1; } } else { assert(!need_extend); /* ensure we won't go through the for loop again */ /* Virtually extend currently used journal file */ jnl_fs_block_size = jb->fs_block_size; header = (jnl_file_header *)(ROUND_UP2((uintszofptr_t)hdr_buff, jnl_fs_block_size)); read_write_size = ROUND_UP2(REAL_JNL_HDR_LEN, jnl_fs_block_size); assert((unsigned char *)header + read_write_size <= ARRAYTOP(hdr_buff)); DO_FILE_READ(jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) { assert(FALSE); rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status); } assert((header->virtual_size + new_blocks) == new_alq); jb->filesize = new_alq; /* Actually this is virtual file size blocks */ header->virtual_size = new_alq; JNL_DO_FILE_WRITE(csa, csd->jnl_file_name, jpc->channel, 0, header, read_write_size, jpc->status, jpc->status2); if (SS_NORMAL != jpc->status) { assert(FALSE); rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status); } } if (0 >= new_blocks) break; } if (0 < new_blocks) { INCR_GVSTATS_COUNTER(csa, csa->nl, n_jnl_extends, 1); return EXIT_NRM; } jpc->status = ERR_JNLREADEOF; jnl_file_lost(jpc, ERR_JNLEXTEND); return EXIT_ERR; }
uint4 mur_process_intrpt_recov() { jnl_ctl_list *jctl, *last_jctl; reg_ctl_list *rctl, *rctl_top; int rename_fn_len, save_name_len, idx; char prev_jnl_fn[MAX_FN_LEN + 1], rename_fn[MAX_FN_LEN + 1], save_name[MAX_FN_LEN + 1]; jnl_create_info jnl_info; uint4 status, status2; uint4 max_autoswitchlimit, max_jnl_alq, max_jnl_deq, freeblks; sgmnt_data_ptr_t csd; jnl_private_control *jpc; jnl_buffer_ptr_t jbp; boolean_t jfh_changed; jnl_record *jnlrec; jnl_file_header *jfh; jnl_tm_t now; for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { TP_CHANGE_REG(rctl->gd); csd = cs_data; /* MM logic after wcs_flu call requires this to be set */ assert(csd == rctl->csa->hdr); jctl = rctl->jctl_turn_around; max_jnl_alq = max_jnl_deq = max_autoswitchlimit = 0; for (last_jctl = NULL ; (NULL != jctl); last_jctl = jctl, jctl = jctl->next_gen) { jfh = jctl->jfh; if (max_autoswitchlimit < jfh->autoswitchlimit) { /* Note that max_jnl_alq, max_jnl_deq are not the maximum journal allocation/extensions across * generations, but rather the allocation/extension corresponding to the maximum autoswitchlimit. */ max_autoswitchlimit = jfh->autoswitchlimit; max_jnl_alq = jfh->jnl_alq; max_jnl_deq = jfh->jnl_deq; } /* Until now, "rctl->blks_to_upgrd_adjust" holds the number of V4 format newly created bitmap blocks * seen in INCTN records in backward processing. It is possible that backward processing might have * missed out on seeing those INCTN records which are part of virtually-truncated or completely-rolled-bak * journal files. The journal file-header has a separate field "prev_recov_blks_to_upgrd_adjust" which * maintains exactly this count. Therefore adjust the rctl counter accordingly. */ assert(!jfh->prev_recov_blks_to_upgrd_adjust || !jfh->recover_interrupted); assert(!jfh->prev_recov_blks_to_upgrd_adjust || jfh->prev_recov_end_of_data); rctl->blks_to_upgrd_adjust += jfh->prev_recov_blks_to_upgrd_adjust; } if (max_autoswitchlimit > last_jctl->jfh->autoswitchlimit) { csd->jnl_alq = max_jnl_alq; csd->jnl_deq = max_jnl_deq; csd->autoswitchlimit = max_autoswitchlimit; } else { assert(csd->jnl_alq == last_jctl->jfh->jnl_alq); assert(csd->jnl_deq == last_jctl->jfh->jnl_deq); assert(csd->autoswitchlimit == last_jctl->jfh->autoswitchlimit); } jctl = rctl->jctl_turn_around; /* Get a pointer to the turn around point EPOCH record */ jnlrec = rctl->mur_desc->jnlrec; assert(JRT_EPOCH == jnlrec->prefix.jrec_type); assert(jctl->turn_around_time == jnlrec->prefix.time); assert(jctl->turn_around_seqno == jnlrec->jrec_epoch.jnl_seqno); assert(jctl->turn_around_tn == jnlrec->prefix.tn); assert(jctl->rec_offset == jctl->turn_around_offset); /* Reset file-header "blks_to_upgrd" counter to the turn around point epoch value. Adjust this to include * the number of new V4 format bitmaps created by post-turnaround-point db file extensions. * The adjustment value is maintained in rctl->blks_to_upgrd_adjust. */ csd->blks_to_upgrd = jnlrec->jrec_epoch.blks_to_upgrd; csd->blks_to_upgrd += rctl->blks_to_upgrd_adjust; # ifdef GTM_TRIGGER /* online rollback can potentially take the database to a point in the past where the triggers that were * previously installed are no longer a part of the current database state and so any process that restarts * AFTER online rollback completes SHOULD reload triggers and the only way to do that is by incrementing the * db_trigger_cycle in the file header. */ if (jgbl.onlnrlbk && (0 < csd->db_trigger_cycle)) { /* check for non-zero db_trigger_cycle is to prevent other processes (continuing after online rollback) * to establish implicit TP (on seeing the trigger cycle mismatch) when there are actually no triggers * installed in the database (because there were none at the start of online rollback). */ csd->db_trigger_cycle++; if (0 == csd->db_trigger_cycle) csd->db_trigger_cycle = 1; /* Don't allow cycle set to 0 which means uninitialized */ } # endif assert((WBTEST_ALLOW_ARBITRARY_FULLY_UPGRADED == gtm_white_box_test_case_number) || (FALSE == jctl->turn_around_fullyupgraded) || (TRUE == jctl->turn_around_fullyupgraded)); /* Set csd->fully_upgraded to FALSE if: * a) The turn around EPOCH had the fully_upgraded field set to FALSE * OR * b) If csd->blks_to_upgrd counter is non-zero. This field can be non-zero even if the turnaround EPOCH's * fully_upgraded field is TRUE. This is possible if the database was downgraded to V4 (post turnaround EPOCH) * format and database extensions happened causing new V4 format bitmap blocks to be written. The count of V4 * format bitmap blocks is maintained ONLY as part of INCTN records (with INCTN opcode SET_JNL_FILE_CLOSE_EXTEND) * noted down in rctl->blks_to_upgrd_adjust counter as part of BACKWARD processing which are finally added to * csd->blks_to_upgrd. */ if (!jctl->turn_around_fullyupgraded || csd->blks_to_upgrd) csd->fully_upgraded = FALSE; csd->trans_hist.early_tn = jctl->turn_around_tn; csd->trans_hist.curr_tn = csd->trans_hist.early_tn; /* INCREMENT_CURR_TN macro not used but noted in comment * to identify all places that set curr_tn */ csd->jnl_eovtn = csd->trans_hist.curr_tn; csd->turn_around_point = TRUE; /* MUPIP REORG UPGRADE/DOWNGRADE stores its partially processed state in the database file header. * It is difficult for recovery to restore those fields to a correct partial value. * Hence reset the related fields as if the desired_db_format got set just ONE tn BEFORE the EPOCH record * and that there was no more processing that happened. * This might potentially mean some duplicate processing for MUPIP REORG UPGRADE/DOWNGRADE after the recovery. * But that will only be the case as long as the database is in compatibility (mixed) mode (hopefully not long). */ if (csd->desired_db_format_tn >= jctl->turn_around_tn) csd->desired_db_format_tn = jctl->turn_around_tn - 1; if (csd->reorg_db_fmt_start_tn >= jctl->turn_around_tn) csd->reorg_db_fmt_start_tn = jctl->turn_around_tn - 1; if (csd->tn_upgrd_blks_0 > jctl->turn_around_tn) csd->tn_upgrd_blks_0 = (trans_num)-1; csd->reorg_upgrd_dwngrd_restart_block = 0; /* Compute current value of "free_blocks" based on the value of "free_blocks" at the turnaround point epoch * record and the change in "total_blks" since that epoch to the present form of the database. Any difference * in "total_blks" implies database file extensions happened since the turnaround point. A backward rollback * undoes everything (including all updates) except file extensions (it does not truncate the file size). * Therefore every block that was newly allocated as part of those file extensions should be considered FREE * for the current calculations except for the local bitmap blocks which are BUSY the moment they are created. */ assert(jnlrec->jrec_epoch.total_blks <= csd->trans_hist.total_blks); csd->trans_hist.free_blocks = jnlrec->jrec_epoch.free_blocks + (csd->trans_hist.total_blks - jnlrec->jrec_epoch.total_blks) - DIVIDE_ROUND_UP(csd->trans_hist.total_blks, BLKS_PER_LMAP) + DIVIDE_ROUND_UP(jnlrec->jrec_epoch.total_blks, BLKS_PER_LMAP); assert(!csd->blks_to_upgrd || !csd->fully_upgraded); assert((freeblks = mur_blocks_free(rctl)) == csd->trans_hist.free_blocks); /* Update strm_reg_seqno[] in db file header to reflect the turn around point. * Before updating "strm_reg_seqno", make sure value is saved into "save_strm_reg_seqno". * This is relied upon by the function "mur_get_max_strm_reg_seqno" in case of interrupted rollback. */ for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) { if (!csd->save_strm_reg_seqno[idx]) csd->save_strm_reg_seqno[idx] = csd->strm_reg_seqno[idx]; csd->strm_reg_seqno[idx] = jnlrec->jrec_epoch.strm_seqno[idx]; } wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_FSYNC_DB); assert(cs_addrs->ti->curr_tn == jctl->turn_around_tn); # ifdef UNIX if (jgbl.onlnrlbk) { if (dba_bg == cs_addrs->hdr->acc_meth) { /* dryclean the cache (basically reset the cycle fields in all teh cache records) so as to make * GT.M processes that only does 'reads' to require crit and hence realize that online rollback * is in progress */ bt_refresh(cs_addrs, FALSE); /* sets earliest bt TN to be the turn around TN */ } db_csh_ref(cs_addrs, FALSE); assert(NULL != cs_addrs->jnl); jpc = cs_addrs->jnl; assert(NULL != jpc->jnl_buff); jbp = jpc->jnl_buff; /* Since Rollback simulates the journal record along with the timestamp at which the update was made, it * sets jgbl.dont_reset_gbl_jrec_time to TRUE so that during forward processing t_end or tp_tend does not * reset the gbl_jrec_time to reflect the current time. But, with Online Rollback, one can have the shared * memory up and running and hence can have jbp->prev_jrec_time to be the time of the most recent journal * update made. Later in t_end/tp_tend, ADJUST_GBL_JREC_TIME is invoked which ensures that if ever * gbl_jrec_time (the time of the current update) is less than jbp->prev_jrec_time (time of the latest * journal update), dont_reset_gbl_jrec_time better be FALSE. But, this assert will trip since Rollback * sets the latter to TRUE. To fix this, set jbp->prev_jrec_time to the turn around time stamp. This way * we are guaranteed that all the updates done in the forward processing will have a timestamp that is * greater than the turn around timestamp */ SET_JNLBUFF_PREV_JREC_TIME(jbp, jctl->turn_around_time, DO_GBL_JREC_TIME_CHECK_FALSE); } else if (dba_bg == csd->acc_meth) { /* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */ SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1); } # else if (dba_bg == csd->acc_meth) { /* set earliest bt TN to be the turn-around TN (taken from bt_refresh()) */ SET_OLDEST_HIST_TN(cs_addrs, cs_addrs->ti->curr_tn - 1); } # endif csd->turn_around_point = FALSE; assert(OLDEST_HIST_TN(cs_addrs) == (cs_addrs->ti->curr_tn - 1)); /* In case this is MM and wcs_flu() remapped an extended database, reset rctl->csd */ assert((dba_mm == cs_data->acc_meth) || (rctl->csd == cs_data)); rctl->csd = cs_data; } JNL_SHORT_TIME(now); for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++) { TP_CHANGE_REG_IF_NEEDED(rctl->gd); if (!rctl->jfh_recov_interrupted) jctl = rctl->jctl_turn_around; else { DEBUG_ONLY( for (jctl = rctl->jctl_turn_around; NULL != jctl->next_gen; jctl = jctl->next_gen) ; /* check that latest gener file name does not match db header */ assert((rctl->csd->jnl_file_len != jctl->jnl_fn_len) || (0 != memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len))); ) jctl = rctl->jctl_alt_head; } assert(NULL != jctl); for ( ; NULL != jctl->next_gen; jctl = jctl->next_gen) ; assert(rctl->csd->jnl_file_len == jctl->jnl_fn_len); /* latest gener file name */ assert(0 == memcmp(rctl->csd->jnl_file_name, jctl->jnl_fn, jctl->jnl_fn_len)); /* should match db header */ if (SS_NORMAL != (status = prepare_unique_name((char *)jctl->jnl_fn, jctl->jnl_fn_len, "", "", rename_fn, &rename_fn_len, now, &status2))) return status; jctl->jnl_fn_len = rename_fn_len; /* change the name in memory to the proposed name */ memcpy(jctl->jnl_fn, rename_fn, rename_fn_len + 1); /* Rename hasn't happened yet at the filesystem level. In case current recover command is interrupted, * we need to update jfh->next_jnl_file_name before mur_forward(). Update jfh->next_jnl_file_name for * all journal files from which PBLK records were applied. Create new journal files for forward play. */ assert(NULL != rctl->jctl_turn_around); jctl = rctl->jctl_turn_around; /* points to journal file which has current recover's turn around point */ assert(0 != jctl->turn_around_offset); jfh = jctl->jfh; jfh->turn_around_offset = jctl->turn_around_offset; /* save progress in file header for */ jfh->turn_around_time = jctl->turn_around_time; /* possible re-issue of recover */ for (idx = 0; idx < MAX_SUPPL_STRMS; idx++) jfh->strm_end_seqno[idx] = csd->strm_reg_seqno[idx]; jfh_changed = TRUE; /* We are about to update the journal file header of the turnaround-point journal file to store the * non-zero jfh->turn_around_offset. Ensure corresponding database is considered updated. * This is needed in case journal recovery/rollback terminates abnormally and we go to mur_close_files. * We need to ensure csd->recov_interrupted does not get reset to FALSE even if this region did not have * have any updates to the corresponding database file otherwise. (GTM-8394) */ rctl->db_updated = TRUE; for ( ; NULL != jctl; jctl = jctl->next_gen) { /* setup the next_jnl links. note that in the case of interrupted recovery, next_jnl links * would have been already set starting from the turn-around point journal file of the * interrupted recovery but the new recovery MIGHT have taken us to a still previous * generation journal file that needs its next_jnl link set. this is why we do the next_jnl * link setup even in the case of interrupted recovery although in most cases it is unnecessary. */ jfh = jctl->jfh; if (NULL != jctl->next_gen) { jfh->next_jnl_file_name_length = jctl->next_gen->jnl_fn_len; memcpy(jfh->next_jnl_file_name, jctl->next_gen->jnl_fn, jctl->next_gen->jnl_fn_len); jfh_changed = TRUE; } else assert(0 == jfh->next_jnl_file_name_length); /* null link from latest generation */ if (jfh->turn_around_offset && (jctl != rctl->jctl_turn_around)) { /* It is possible that the current recovery has a turn-around-point much before the * previously interrupted recovery. If it happens to be a previous generation journal * file then we have to reset the original turn-around-point to be zero in the journal * file header in order to ensure if this recovery gets interrupted we do interrupted * recovery processing until the new turn-around-point instead of stopping incorrectly * at the original turn-around-point itself. Note that there could be more than one * journal file with a non-zero turn_around_offset (depending on how many previous * recoveries got interrupted in this loop) that need to be reset. */ assert(!jctl->turn_around_offset); assert(rctl->recov_interrupted || rctl->jctl_apply_pblk); /* rctl->jfh_recov_interrupted can fail */ jfh->turn_around_offset = 0; jfh->turn_around_time = 0; jfh_changed = TRUE; } if (jfh_changed) { /* Since overwriting the journal file header (an already allocated block * in the file) should not cause ENOSPC, we dont take the trouble of * passing csa or jnl_fn (first two parameters). Instead we pass NULL. */ JNL_DO_FILE_WRITE(NULL, NULL, jctl->channel, 0, jfh, REAL_JNL_HDR_LEN, jctl->status, jctl->status2); if (SS_NORMAL != jctl->status) { assert(FALSE); if (SS_NORMAL == jctl->status2) gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(5) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status); else gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT1(6) ERR_JNLWRERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, jctl->status, PUT_SYS_ERRNO(jctl->status2)); return jctl->status; } GTM_JNL_FSYNC(rctl->csa, jctl->channel, jctl->status); if (-1 == jctl->status) { jctl->status2 = errno; assert(FALSE); gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(9) ERR_JNLFSYNCERR, 2, jctl->jnl_fn_len, jctl->jnl_fn, ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), jctl->status2); return ERR_JNLFSYNCERR; } } jfh_changed = FALSE; } memset(&jnl_info, 0, SIZEOF(jnl_info)); jnl_info.status = jnl_info.status2 = SS_NORMAL; jnl_info.prev_jnl = &prev_jnl_fn[0]; set_jnl_info(rctl->gd, &jnl_info); jnl_info.prev_jnl_len = rctl->jctl_turn_around->jnl_fn_len; memcpy(jnl_info.prev_jnl, rctl->jctl_turn_around->jnl_fn, rctl->jctl_turn_around->jnl_fn_len); jnl_info.prev_jnl[jnl_info.prev_jnl_len] = 0; jnl_info.jnl_len = rctl->csd->jnl_file_len; memcpy(jnl_info.jnl, rctl->csd->jnl_file_name, jnl_info.jnl_len); jnl_info.jnl[jnl_info.jnl_len] = 0; assert(!mur_options.rollback || jgbl.mur_rollback); jnl_info.reg_seqno = rctl->jctl_turn_around->turn_around_seqno; jgbl.gbl_jrec_time = rctl->jctl_turn_around->turn_around_time; /* time needed for cre_jnl_file_common() */ if (EXIT_NRM != cre_jnl_file_common(&jnl_info, rename_fn, rename_fn_len)) { gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_JNLNOCREATE, 2, jnl_info.jnl_len, jnl_info.jnl); return jnl_info.status; } # ifdef UNIX if (jgbl.onlnrlbk) { cs_addrs = rctl->csa; /* Mimic what jnl_file_close in case of cleanly a closed journal file */ jpc = cs_addrs->jnl; /* the previous loop makes sure cs_addrs->jnl->jnl_buff is valid*/ NULLIFY_JNL_FILE_ID(cs_addrs); jpc->jnl_buff->cycle++; /* so that, all other processes knows to switch to newer journal file */ jpc->cycle--; /* decrement cycle so jnl_ensure_open() knows to reopen the journal */ } # endif if (NULL != rctl->jctl_alt_head) /* remove the journal files created by last interrupted recover process */ { mur_rem_jctls(rctl); rctl->jctl_alt_head = NULL; } /* From this point on, journal records are written into the newly created journal file. However, we still read * from old journal files. */ }