/* This called for TP and non-TP, but not for ZTP */ void jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb) { struct_jrec_upd *jrec; jnl_private_control *jpc; /* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record * to the journal buffer or journal file but write it only to the journal pool from where it gets * sent across to the update process that does not care about these fields so it is ok to leave them as is. */ jpc = csa->jnl; assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa)); assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa)); assert(csa->now_crit); assert(IS_SET_KILL_ZKILL(jfb->rectype)); assert(!IS_ZTP(jfb->rectype)); jrec = (struct_jrec_upd *)jfb->buff; jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr; jrec->prefix.tn = csa->ti->curr_tn; jrec->prefix.time = jgbl.gbl_jrec_time; jrec->prefix.checksum = jfb->checksum; if (jgbl.forw_phase_recovery) { QWASSIGN(jrec->token_seq, jgbl.mur_jrec_token_seq); } else { /* t_end and tp_tend already has set token or jnl_seqno into jnl_fence_ctl.token */ QWASSIGN(jrec->token_seq.token, jnl_fence_ctl.token); } JNL_WRITE_APPROPRIATE(csa, jpc, jfb->rectype, (jnl_record *)jrec, NULL, jfb); }
/* This called for TP and non-TP, but not for ZTP */ void jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb, uint4 com_csum, jnlpool_write_ctx_t *jplctx) { struct_jrec_upd *jrec; struct_jrec_null *jrec_null; struct_jrec_upd *jrec_alt; jnl_private_control *jpc; /* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record * to the journal buffer or journal file but write it only to the journal pool from where it gets * sent across to the update process that does not care about these fields so it is ok to leave them as is. */ jpc = csa->jnl; assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa)); assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa)); assert(csa->now_crit); assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(jfb->rectype) || (JRT_NULL == jfb->rectype)); assert(!IS_ZTP(jfb->rectype)); jrec = (struct_jrec_upd *)jfb->buff; assert(OFFSETOF(struct_jrec_null, prefix) == OFFSETOF(struct_jrec_upd, prefix)); assert(SIZEOF(jrec_null->prefix) == SIZEOF(jrec->prefix)); jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr; jrec->prefix.tn = csa->ti->curr_tn; jrec->prefix.time = jgbl.gbl_jrec_time; /* t_end/tp_tend/mur_output_record has already set token/jnl_seqno into jnl_fence_ctl.token */ assert((0 != jnl_fence_ctl.token) || (!dollar_tlevel && !jgbl.forw_phase_recovery && !REPL_ENABLED(csa)) || (!dollar_tlevel && jgbl.forw_phase_recovery && (repl_open != csa->hdr->intrpt_recov_repl_state))); assert(OFFSETOF(struct_jrec_null, jnl_seqno) == OFFSETOF(struct_jrec_upd, token_seq)); assert(SIZEOF(jrec_null->jnl_seqno) == SIZEOF(jrec->token_seq)); jrec->token_seq.token = jnl_fence_ctl.token; assert(OFFSETOF(struct_jrec_null, strm_seqno) == OFFSETOF(struct_jrec_upd, strm_seqno)); assert(SIZEOF(jrec_null->strm_seqno) == SIZEOF(jrec->strm_seqno)); jrec->strm_seqno = jnl_fence_ctl.strm_seqno; /* update checksum below */ if(JRT_NULL != jrec->prefix.jrec_type) { COMPUTE_LOGICAL_REC_CHECKSUM(jfb->checksum, jrec, com_csum, jrec->prefix.checksum); } else jrec->prefix.checksum = compute_checksum(INIT_CHECKSUM_SEED, (unsigned char *)jrec, SIZEOF(struct_jrec_null)); if (REPL_ALLOWED(csa) && USES_ANY_KEY(csa->hdr)) { jrec_alt = (struct_jrec_upd *)jfb->alt_buff; jrec_alt->prefix = jrec->prefix; jrec_alt->token_seq = jrec->token_seq; jrec_alt->strm_seqno = jrec->strm_seqno; jrec_alt->num_participants = jrec->num_participants; } JNL_WRITE_APPROPRIATE(csa, jpc, jfb->rectype, (jnl_record *)jrec, NULL, jfb, jplctx); }
/* jpc : Journal private control * rectype : Record type * jnl_rec : This contains fixed part of a variable size record or the complete fixed size records. * blk_ptr : For JRT_PBLK and JRT_AIMG this has the block image * jfb : For SET/KILL/ZKILL/ZTWORM records entire record is formatted in this. * For JRT_PBLK and JRT_AIMG it contains partial records */ void jnl_write(jnl_private_control *jpc, enum jnl_record_type rectype, jnl_record *jnl_rec, blk_hdr_ptr_t blk_ptr, jnl_format_buffer *jfb) { int4 align_rec_len, rlen, rlen_with_align, dstlen, lcl_size, lcl_free, lcl_orig_free; jnl_buffer_ptr_t jb; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; node_local_ptr_t cnl; struct_jrec_align align_rec; uint4 status; jrec_suffix suffix; boolean_t nowrap, is_replicated; struct_jrec_blk *jrec_blk; uint4 checksum, jnlpool_size, lcl_freeaddr; sm_uc_ptr_t lcl_buff; gd_region *reg; char *ptr; int jnl_wrt_start_modulus, jnl_wrt_start_mask; uint4 jnl_fs_block_size, aligned_lcl_free, padding_size; uint4 tmp_csum1, tmp_csum2; # ifdef DEBUG uint4 lcl_dskaddr, mumps_node_sz; char *mumps_node_ptr; # endif assert(jnl_write_recursion_depth++ < MAX_JNL_WRITE_RECURSION_DEPTH); reg = jpc->region; csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; is_replicated = jrt_is_replicated[rectype]; /* Ensure that no replicated journal record is written by this routine if REPL-WAS_ENABLED(csa) is TRUE */ assert((JNL_ENABLED(csa) && !REPL_WAS_ENABLED(csa)) || !is_replicated); /* Assert that the only journal records that the source server ever writes are PINI/PFIN/EPOCH/EOF * which it does at the very end when the database is about to be shut down */ assert(!is_src_server || (JRT_EOF == rectype) || (JRT_PINI == rectype) || (JRT_EPOCH == rectype) || (JRT_PFIN == rectype)); assert(csa->now_crit || (csd->clustered && csa->nl->ccp_state == CCST_CLOSED)); assert(rectype > JRT_BAD && rectype < JRT_RECTYPES && JRT_ALIGN != rectype); jb = jpc->jnl_buff; /* Before taking a copy of jb->freeaddr, determine if both free and freeaddr are in sync. If not fix that first. */ if (jb->free_update_pid) { FIX_NONZERO_FREE_UPDATE_PID(csa, jb); } lcl_freeaddr = jb->freeaddr; lcl_free = jb->free; lcl_size = jb->size; lcl_buff = &jb->buff[jb->buff_off]; DBG_CHECK_JNL_BUFF_FREEADDR(jb); ++jb->reccnt[rectype]; assert(NULL != jnl_rec); rlen = jnl_rec->prefix.forwptr; /* Do high-level check on rlen */ assert(rlen <= jb->max_jrec_len); /* Do fine-grained checks on rlen */ GTMTRIG_ONLY(assert(!IS_ZTWORM(rectype) || (MAX_ZTWORM_JREC_LEN >= rlen));) /* ZTWORMHOLE */
/* make sure that the journal file is available if appropriate */ uint4 jnl_ensure_open(gd_region *reg, sgmnt_addrs *csa) { uint4 jnl_status; jnl_private_control *jpc; sgmnt_data_ptr_t csd; boolean_t first_open_of_jnl, need_to_open_jnl; int close_res; csd = csa->hdr; assert(csa->now_crit); jpc = csa->jnl; assert(&FILE_INFO(jpc->region)->s_addrs == csa); assert(&FILE_INFO(reg)->s_addrs == csa); assert(NULL != jpc); assert(JNL_ENABLED(csa->hdr)); /* The goal is to change the code below to do only one JNL_FILE_SWITCHED(jpc) check instead of the additional * (NOJNL == jpc->channel) check done below. The assert below ensures that the NOJNL check can indeed * be subsumed by the JNL_FILE_SWITCHED check (with the exception of the source-server which has a special case that * needs to be fixed in C9D02-002241). Over time, this has to be changed to one check. */ assert((NOJNL != jpc->channel) || JNL_FILE_SWITCHED(jpc) || is_src_server); need_to_open_jnl = FALSE; jnl_status = 0; if (NOJNL == jpc->channel) need_to_open_jnl = TRUE; else if (JNL_FILE_SWITCHED(jpc)) { /* The journal file has been changed "on the fly"; close the old one and open the new one */ JNL_FD_CLOSE(jpc->channel, close_res); /* sets jpc->channel to NOJNL */ need_to_open_jnl = TRUE; } if (need_to_open_jnl) { /* Whenever journal file get switch, reset the pini_addr and new_freeaddr. */ jpc->pini_addr = 0; jpc->new_freeaddr = 0; if (IS_GTCM_GNP_SERVER_IMAGE) gtcm_jnl_switched(reg); /* Reset pini_addr of all clients that had any older journal file open */ first_open_of_jnl = (0 == csa->nl->jnl_file.u.inode); jnl_status = jnl_file_open(reg, first_open_of_jnl); } # ifdef DEBUG else GTM_WHITE_BOX_TEST(WBTEST_JNL_FILE_OPEN_FAIL, jnl_status, ERR_JNLFILOPN); # endif assert((0 != jnl_status) || !JNL_FILE_SWITCHED(jpc) || (is_src_server && !JNL_ENABLED(csa) && REPL_WAS_ENABLED(csa))); return jnl_status; }
/* This called for TP and non-TP, but not for ZTP */ void jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb, uint4 com_csum) { struct_jrec_upd *jrec; struct_jrec_null *jrec_null; GTMCRYPT_ONLY( struct_jrec_upd *jrec_alt; ) jnl_private_control *jpc; /* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record * to the journal buffer or journal file but write it only to the journal pool from where it gets * sent across to the update process that does not care about these fields so it is ok to leave them as is. */ jpc = csa->jnl; assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa)); assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa)); assert(csa->now_crit); assert(IS_SET_KILL_ZKILL_ZTRIG_ZTWORM(jfb->rectype) || (JRT_NULL == jfb->rectype)); assert(!IS_ZTP(jfb->rectype)); jrec = (struct_jrec_upd *)jfb->buff; assert(OFFSETOF(struct_jrec_null, prefix) == OFFSETOF(struct_jrec_upd, prefix)); assert(SIZEOF(jrec_null->prefix) == SIZEOF(jrec->prefix)); jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr; jrec->prefix.tn = csa->ti->curr_tn; jrec->prefix.time = jgbl.gbl_jrec_time; /* t_end/tp_tend/mur_output_record has already set token/jnl_seqno into jnl_fence_ctl.token */ assert((0 != jnl_fence_ctl.token) || (!dollar_tlevel && !jgbl.forw_phase_recovery && !REPL_ENABLED(csa)) || (!dollar_tlevel && jgbl.forw_phase_recovery && (repl_open != csa->hdr->intrpt_recov_repl_state))); assert(OFFSETOF(struct_jrec_null, jnl_seqno) == OFFSETOF(struct_jrec_upd, token_seq)); assert(SIZEOF(jrec_null->jnl_seqno) == SIZEOF(jrec->token_seq));
int gtmsource_checkhealth(void) { uint4 gtmsource_pid; int status, semval, save_errno; boolean_t srv_alive, all_files_open; gtmsource_local_ptr_t gtmsourcelocal_ptr; int4 index, num_servers; seq_num reg_seqno, jnlseqno; gd_region *reg, *region_top; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; char errtxt[OUT_BUFF_SIZE]; char *modestr; assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]); if (NULL != jnlpool.gtmsource_local) /* Check health of a specific source server */ gtmsourcelocal_ptr = jnlpool.gtmsource_local; else gtmsourcelocal_ptr = &jnlpool.gtmsource_local_array[0]; num_servers = 0; status = SRV_ALIVE; for (index = 0; index < NUM_GTMSRC_LCL; index++, gtmsourcelocal_ptr++) { if ('\0' == gtmsourcelocal_ptr->secondary_instname[0]) { assert(NULL == jnlpool.gtmsource_local); continue; } gtmsource_pid = gtmsourcelocal_ptr->gtmsource_pid; /* If CHECKHEALTH on a specific secondary instance is requested, print the health information irrespective * of whether a source server for that instance is alive or not. For CHECKHEALTH on ALL secondary instances * print health information only for those instances that have an active or passive source server alive. */ if ((NULL == jnlpool.gtmsource_local) && (0 == gtmsource_pid)) continue; repl_log(stdout, TRUE, TRUE, "Initiating CHECKHEALTH operation on source server pid [%d] for secondary instance" " name [%s]\n", gtmsource_pid, gtmsourcelocal_ptr->secondary_instname); srv_alive = (0 == gtmsource_pid) ? FALSE : is_proc_alive(gtmsource_pid, 0); if (srv_alive) { if (GTMSOURCE_MODE_ACTIVE == gtmsourcelocal_ptr->mode) modestr = "ACTIVE"; else if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsourcelocal_ptr->mode) modestr = "ACTIVE REQUESTED"; else if (GTMSOURCE_MODE_PASSIVE == gtmsourcelocal_ptr->mode) modestr = "PASSIVE"; else if (GTMSOURCE_MODE_PASSIVE_REQUESTED == gtmsourcelocal_ptr->mode) modestr = "PASSIVE REQUESTED"; else { assert(gtmsourcelocal_ptr->mode != gtmsourcelocal_ptr->mode); modestr = "UNKNOWN"; } repl_log(stderr, FALSE, TRUE, FORMAT_STR1, gtmsource_pid, "Source server", "", modestr); status |= SRV_ALIVE; num_servers++; } else { repl_log(stderr, FALSE, TRUE, FORMAT_STR, gtmsource_pid, "Source server", " NOT"); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_SRCSRVNOTEXIST, 2, LEN_AND_STR(gtmsourcelocal_ptr->secondary_instname)); status |= SRV_DEAD; } if (NULL != jnlpool.gtmsource_local) break; } if (NULL == jnlpool.gtmsource_local) { /* Compare number of servers that were found alive with the current value of the COUNT semaphore. * If they are not equal, report the discrepancy. */ semval = get_sem_info(SOURCE, SRC_SERV_COUNT_SEM, SEM_INFO_VAL); if (-1 == semval) { save_errno = errno; repl_log(stderr, FALSE, TRUE, "Error fetching source server count semaphore value : %s\n", STRERROR(save_errno)); status |= SRV_ERR; } else if (semval != num_servers) { repl_log(stderr, FALSE, FALSE, "Error : Expected %d source server(s) to be alive but found %d actually alive\n", semval, num_servers); repl_log(stderr, FALSE, TRUE, "Error : Check if any pid reported above is NOT a source server process\n"); status |= SRV_ERR; } } /* Check that there are no regions with replication state = WAS_ON (i.e. repl_was_open). If so report that. * But to determine that, we need to attach to all the database regions. */ gvinit(); /* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */ all_files_open = region_init(FALSE); if (!all_files_open) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN); status |= SRV_ERR; } else { for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++) { csa = &FILE_INFO(reg)->s_addrs; csd = csa->hdr; if (REPL_WAS_ENABLED(csd)) { assert(!JNL_ENABLED(csd) || REPL_ENABLED(csd)); /* || is for turning replication on concurrently */ reg_seqno = csd->reg_seqno; jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO; sgtm_putmsg(errtxt, VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(reg), ®_seqno, ®_seqno, &jnlseqno, &jnlseqno); repl_log(stderr, FALSE, TRUE, errtxt); status |= SRV_ERR; } } } if (jnlpool.jnlpool_ctl->freeze) { repl_log(stderr, FALSE, FALSE, "Warning: Instance Freeze is ON\n"); repl_log(stderr, FALSE, TRUE, " Freeze Comment: %s\n", jnlpool.jnlpool_ctl->freeze_comment); status |= SRV_ERR; } return (status + NORMAL_SHUTDOWN); }
boolean_t nowrap; struct_jrec_blk *jrec_blk; uint4 jnlpool_size; uchar_ptr_t jnlrecptr; DEBUG_ONLY(uint4 lcl_dskaddr;) error_def(ERR_JNLWRTNOWWRTR); error_def(ERR_JNLWRTDEFER); assert(NULL != jnl_rec); assert(rectype > JRT_BAD && rectype < JRT_RECTYPES && JRT_ALIGN != rectype); assert(jrt_is_replicated[rectype]); assert((NULL != jnlpool.jnlpool_ctl) && (NULL != jnlpool_ctl)); /* ensure we haven't yet detached from the jnlpool */ assert((&FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs)->now_crit); /* ensure we have the jnl pool lock */ csa = &FILE_INFO(jpc->region)->s_addrs; assert(!JNL_ENABLED(csa) && REPL_WAS_ENABLED(csa)); assert(csa->now_crit || (csa->hdr->clustered && csa->nl->ccp_state == CCST_CLOSED)); jb = jpc->jnl_buff; ++jb->reccnt[rectype]; rlen = jnl_rec->prefix.forwptr; assert(0 == rlen % JNL_REC_START_BNDRY); jb->bytcnt += rlen; DEBUG_ONLY(jgbl.cu_jnl_index++;) jnlpool_size = temp_jnlpool_ctl->jnlpool_size; dstlen = jnlpool_size - temp_jnlpool_ctl->write; jnlrecptr = (jrt_fixed_size[rectype] ? (uchar_ptr_t)jnl_rec : (uchar_ptr_t)jfb->buff); if (rlen <= dstlen) /* dstlen & srclen >= rlen (most frequent case) */ memcpy(jnldata_base + temp_jnlpool_ctl->write, jnlrecptr, rlen); else /* dstlen < rlen <= srclen */ { memcpy(jnldata_base + temp_jnlpool_ctl->write, jnlrecptr, dstlen);