/* This called for TP and non-TP, but not for ZTP */ void jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb) { struct_jrec_upd *jrec; jnl_private_control *jpc; /* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record * to the journal buffer or journal file but write it only to the journal pool from where it gets * sent across to the update process that does not care about these fields so it is ok to leave them as is. */ jpc = csa->jnl; assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa)); assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa)); assert(csa->now_crit); assert(IS_SET_KILL_ZKILL(jfb->rectype)); assert(!IS_ZTP(jfb->rectype)); jrec = (struct_jrec_upd *)jfb->buff; jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr; jrec->prefix.tn = csa->ti->curr_tn; jrec->prefix.time = jgbl.gbl_jrec_time; jrec->prefix.checksum = jfb->checksum; if (jgbl.forw_phase_recovery) { QWASSIGN(jrec->token_seq, jgbl.mur_jrec_token_seq); } else { /* t_end and tp_tend already has set token or jnl_seqno into jnl_fence_ctl.token */ QWASSIGN(jrec->token_seq.token, jnl_fence_ctl.token); } JNL_WRITE_APPROPRIATE(csa, jpc, jfb->rectype, (jnl_record *)jrec, NULL, jfb); }
void repl_sort_tr_buff(uchar_ptr_t tr_buff, uint4 tr_bufflen) { boolean_t already_sorted, is_set_kill_zkill_ztrig_ztworm, sorting_needed; uchar_ptr_t tb, dst_addr, this_jrec_addr, working_record_addr, next_record_addr, reg_top; static uchar_ptr_t private_tr_buff; static reg_jrec_info_t *reg_jrec_info_array; static uint4 private_tr_bufflen = 0, max_participants = 0; struct_jrec_tcom *last_tcom_rec_ptr; enum jnl_record_type rectype; int balanced, tlen; uint4 num_records, cur_rec_idx, reg_idx, reclen, cur_updnum, max_updnum = 0, prev_updnum = 0; uint4 working_record, copy_len, idx, min_updnum_reg, min_val, next_min_val, this_reg_updnum; uint4 participants; # ifdef DEBUG uint4 tmp_sum, tcom_num = 0, prev_updnum_this_reg; # endif jnl_record *rec; jrec_prefix *prefix; long first_tcom_offset = 0; tb = tr_buff; tlen = tr_bufflen; assert(0 != tr_bufflen); assert(0 == ((UINTPTR_T)tb % SIZEOF(uint4))); prefix = (jrec_prefix *)tb; rectype = (enum jnl_record_type)prefix->jrec_type; assert(!IS_ZTP(rectype)); if (prefix->forwptr == tlen) { /* there is only one journal record in this buffer. Make sure it is either JRT_SET/JRT_KILL/JRT_NULL */ assert((JRT_SET == rectype) || (JRT_KILL == rectype) || (JRT_ZKILL == rectype) || (JRT_NULL == rectype)); /* No sorting needed. */ return; } else /* We have a TP transaction buffer */ { if (!IS_TUPD(rectype)) { assert(FALSE); return; } /* We should have at least one TCOM record at the end. The check for balanced TSET/TCOM pairs will be done below */ last_tcom_rec_ptr = (struct_jrec_tcom *)(tb + tlen - SIZEOF(struct_jrec_tcom)); prefix = (jrec_prefix *)(last_tcom_rec_ptr); participants = last_tcom_rec_ptr->num_participants; if (JRT_TCOM != prefix->jrec_type) { assert(FALSE); return; } } PRO_ONLY( /* A single region TP transaction is always sorted. So, for pro, return without addition sorting */ if (1 == participants) return; )
/* This called for TP and non-TP, but not for ZTP */ void jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb, uint4 com_csum, jnlpool_write_ctx_t *jplctx) { struct_jrec_upd *jrec; struct_jrec_null *jrec_null; struct_jrec_upd *jrec_alt; jnl_private_control *jpc; /* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record * to the journal buffer or journal file but write it only to the journal pool from where it gets * sent across to the update process that does not care about these fields so it is ok to leave them as is. */ jpc = csa->jnl; assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa)); assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa)); assert(csa->now_crit); assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(jfb->rectype) || (JRT_NULL == jfb->rectype)); assert(!IS_ZTP(jfb->rectype)); jrec = (struct_jrec_upd *)jfb->buff; assert(OFFSETOF(struct_jrec_null, prefix) == OFFSETOF(struct_jrec_upd, prefix)); assert(SIZEOF(jrec_null->prefix) == SIZEOF(jrec->prefix)); jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr; jrec->prefix.tn = csa->ti->curr_tn; jrec->prefix.time = jgbl.gbl_jrec_time; /* t_end/tp_tend/mur_output_record has already set token/jnl_seqno into jnl_fence_ctl.token */ assert((0 != jnl_fence_ctl.token) || (!dollar_tlevel && !jgbl.forw_phase_recovery && !REPL_ENABLED(csa)) || (!dollar_tlevel && jgbl.forw_phase_recovery && (repl_open != csa->hdr->intrpt_recov_repl_state))); assert(OFFSETOF(struct_jrec_null, jnl_seqno) == OFFSETOF(struct_jrec_upd, token_seq)); assert(SIZEOF(jrec_null->jnl_seqno) == SIZEOF(jrec->token_seq)); jrec->token_seq.token = jnl_fence_ctl.token; assert(OFFSETOF(struct_jrec_null, strm_seqno) == OFFSETOF(struct_jrec_upd, strm_seqno)); assert(SIZEOF(jrec_null->strm_seqno) == SIZEOF(jrec->strm_seqno)); jrec->strm_seqno = jnl_fence_ctl.strm_seqno; /* update checksum below */ if(JRT_NULL != jrec->prefix.jrec_type) { COMPUTE_LOGICAL_REC_CHECKSUM(jfb->checksum, jrec, com_csum, jrec->prefix.checksum); } else jrec->prefix.checksum = compute_checksum(INIT_CHECKSUM_SEED, (unsigned char *)jrec, SIZEOF(struct_jrec_null)); if (REPL_ALLOWED(csa) && USES_ANY_KEY(csa->hdr)) { jrec_alt = (struct_jrec_upd *)jfb->alt_buff; jrec_alt->prefix = jrec->prefix; jrec_alt->token_seq = jrec->token_seq; jrec_alt->strm_seqno = jrec->strm_seqno; jrec_alt->num_participants = jrec->num_participants; } JNL_WRITE_APPROPRIATE(csa, jpc, jfb->rectype, (jnl_record *)jrec, NULL, jfb, jplctx); }
struct_jrec_null *jrec_null; GTMCRYPT_ONLY( struct_jrec_upd *jrec_alt; ) jnl_private_control *jpc; /* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record * to the journal buffer or journal file but write it only to the journal pool from where it gets * sent across to the update process that does not care about these fields so it is ok to leave them as is. */ jpc = csa->jnl; assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa)); assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa)); assert(csa->now_crit); assert(IS_SET_KILL_ZKILL_ZTRIG_ZTWORM(jfb->rectype) || (JRT_NULL == jfb->rectype)); assert(!IS_ZTP(jfb->rectype)); jrec = (struct_jrec_upd *)jfb->buff; assert(OFFSETOF(struct_jrec_null, prefix) == OFFSETOF(struct_jrec_upd, prefix)); assert(SIZEOF(jrec_null->prefix) == SIZEOF(jrec->prefix)); jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr; jrec->prefix.tn = csa->ti->curr_tn; jrec->prefix.time = jgbl.gbl_jrec_time; /* t_end/tp_tend/mur_output_record has already set token/jnl_seqno into jnl_fence_ctl.token */ assert((0 != jnl_fence_ctl.token) || (!dollar_tlevel && !jgbl.forw_phase_recovery && !REPL_ENABLED(csa)) || (!dollar_tlevel && jgbl.forw_phase_recovery && (repl_open != csa->hdr->intrpt_recov_repl_state))); assert(OFFSETOF(struct_jrec_null, jnl_seqno) == OFFSETOF(struct_jrec_upd, token_seq)); assert(SIZEOF(jrec_null->jnl_seqno) == SIZEOF(jrec->token_seq)); jrec->token_seq.token = jnl_fence_ctl.token; assert(OFFSETOF(struct_jrec_null, strm_seqno) == OFFSETOF(struct_jrec_upd, strm_seqno)); assert(SIZEOF(jrec_null->strm_seqno) == SIZEOF(jrec->strm_seqno)); jrec->strm_seqno = jnl_fence_ctl.strm_seqno;
/* This routine formats and outputs journal extract records corresponding to M SET, KILL, ZKILL, TSTART, ZTSTART, and ZTRIGGER commands, $ZTRIGGER function (LGTRIG) and $ZTWORMHOLE */ void mur_extract_set(jnl_ctl_list *jctl, fi_type *fi, jnl_record *rec, pini_list_struct *plst) { enum jnl_record_type rectype; int max_blen, actual, extract_len, val_extr_len, val_len; char *val_ptr, *ptr, *buff; jnl_string *keystr; boolean_t do_format2zwr, is_ztstart; if (!mur_options.detail) extract_len = 0; else EXT_DET_COMMON_PREFIX(jctl); rectype = (enum jnl_record_type)rec->prefix.jrec_type; if (IS_FUPD_TUPD(rectype)) { if (!mur_options.detail) { if (IS_TUPD(rectype)) { EXT2BYTES(&muext_code[MUEXT_TSTART][0]); /* TSTART */ is_ztstart = FALSE; } else /* if (IS_FUPD(rectype)) */ { EXT2BYTES(&muext_code[MUEXT_ZTSTART][0]); /* ZTSTART */ is_ztstart = TRUE; } } else { if (IS_TUPD(rectype)) { strcpy(murgbl.extr_buff + extract_len, "TSTART \\"); is_ztstart = FALSE; } else /* if (IS_FUPD(rectype)) */ { strcpy(murgbl.extr_buff + extract_len, "ZTSTART\\"); is_ztstart = TRUE; } extract_len = STRLEN(murgbl.extr_buff); } EXTTIME(rec->prefix.time); EXTQW(rec->prefix.tn); if (mur_options.detail) EXTINT(rec->prefix.checksum); EXTPID(plst); EXTQW(rec->jrec_set_kill.token_seq.jnl_seqno); if (!is_ztstart) EXT_STRM_SEQNO(rec->jrec_set_kill.strm_seqno); jnlext_write(fi, murgbl.extr_buff, extract_len); } /* Output the SET or KILL or ZKILL or ZTWORMHOLE or LGTRIG or ZTRIG record */ if (!mur_options.detail) { extract_len = 0; if (IS_SET(rectype)) { EXT2BYTES(&muext_code[MUEXT_SET][0]); } else if (IS_KILL(rectype)) { EXT2BYTES(&muext_code[MUEXT_KILL][0]); } else if (IS_ZKILL(rectype)) { EXT2BYTES(&muext_code[MUEXT_ZKILL][0]); } else if (IS_ZTWORM(rectype)) { EXT2BYTES(&muext_code[MUEXT_ZTWORM][0]); } else if (IS_LGTRIG(rectype)) { EXT2BYTES(&muext_code[MUEXT_LGTRIG][0]); } else if (IS_ZTRIG(rectype)) { EXT2BYTES(&muext_code[MUEXT_ZTRIG][0]); } else assert(FALSE); /* The assert will disappear in pro but not the ";" to properly terminate the else */ } else { if (IS_FUPD_TUPD(rectype)) { memcpy(murgbl.extr_buff, " ", 23); extract_len = 23; } else extract_len = STRLEN(murgbl.extr_buff); strcpy(murgbl.extr_buff + extract_len, " \\"); memcpy(murgbl.extr_buff + extract_len, jrt_label[rectype], LAB_LEN); extract_len += LAB_LEN; memcpy(murgbl.extr_buff + extract_len, LAB_TERM, LAB_TERM_SZ); extract_len += LAB_TERM_SZ; } EXTTIME(rec->prefix.time); EXTQW(rec->prefix.tn); if (mur_options.detail) EXTINT(rec->prefix.checksum); EXTPID(plst); if (IS_ZTP(rectype)) { EXTQW(rec->jrec_set_kill.token_seq.token); } else EXTQW(rec->jrec_set_kill.token_seq.jnl_seqno); assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype)); assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_ztworm.strm_seqno); assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_lgtrig.strm_seqno); EXT_STRM_SEQNO(rec->jrec_set_kill.strm_seqno); assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num); assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num); EXTINT(rec->jrec_set_kill.update_num); do_format2zwr = FALSE; if (IS_SET_KILL_ZKILL_ZTRIG(rectype)) { keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node; EXTINT(keystr->nodeflags); buff = &murgbl.extr_buff[extract_len]; max_blen = MIN(MAX_ZWR_KEY_SZ, murgbl.max_extr_record_length - extract_len); assert(MAX_ZWR_KEY_SZ == max_blen); /* We allocated enough for key and data expansion for ZWR format */ ptr = (char *)format_targ_key((uchar_ptr_t)buff, max_blen, gv_currkey, TRUE); assert(NULL != ptr); if (NULL != ptr) extract_len += (int)(ptr - &murgbl.extr_buff[extract_len]); if (IS_SET(rectype)) { murgbl.extr_buff[extract_len++] = '='; val_ptr = &keystr->text[keystr->length]; GET_MSTR_LEN(val_len, val_ptr); val_ptr += SIZEOF(mstr_len_t); do_format2zwr = TRUE; } } else if (IS_ZTWORM(rectype) || IS_LGTRIG(rectype)) { assert(&rec->jrec_ztworm.ztworm_str == &rec->jrec_lgtrig.lgtrig_str); keystr = (jnl_string *)&rec->jrec_ztworm.ztworm_str; val_len = keystr->length; val_ptr = &keystr->text[0]; do_format2zwr = TRUE; } if (do_format2zwr) { if (ZWR_EXP_RATIO(val_len) <= murgbl.max_extr_record_length - extract_len) { ptr = &murgbl.extr_buff[extract_len]; format2zwr((sm_uc_ptr_t)val_ptr, val_len, (unsigned char *)ptr, &val_extr_len); extract_len += val_extr_len; } else { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(9) ERR_JNLBADRECFMT, 3, jctl->jnl_fn_len, jctl->jnl_fn, jctl->rec_offset, ERR_TEXT, 2, LEN_AND_LIT("Length of the record is too high for zwr format")); if (mur_options.verbose || mur_options.detail) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT("After max expansion record length"), ZWR_EXP_RATIO(val_len), ZWR_EXP_RATIO(val_len)); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT("Buffer size"), murgbl.max_extr_record_length - extract_len, murgbl.max_extr_record_length - extract_len); } assert(FALSE); } } murgbl.extr_buff[extract_len++] = '\\'; jnlext_write(fi, murgbl.extr_buff, extract_len); }
int jnl_v11tov15(uchar_ptr_t jnl_buff, uint4 *jnl_len, uchar_ptr_t conv_buff, uint4 *conv_len, uint4 conv_bufsiz) { /* Convert a transaction from jnl version 11 (V4.2-002) to 15 (V.4.4-002) */ unsigned char *jb, *cb, *cstart, *jstart, rectype; int status, reclen; unsigned short key_len; unsigned int long_data_len, jlen, total_data, nzeros, conv_reclen, clen_without_sfx, total_key; jrec_prefix prefix; jrec_suffix suffix; seq_num jsno; jb = jnl_buff; cb = conv_buff; status = SS_NORMAL; jlen = *jnl_len; while (0 < jlen) { if (0 < (reclen = v11_jnl_record_length((jnl_record *)jb, jlen))) { if (reclen <= jlen) { rectype = REF_CHAR(jb + V11_JREC_TYPE_OFFSET); total_key = total_data = 0; assert(IS_REPLICATED(rectype)); if (IS_ZTP(rectype)) GTMASSERT; /* ZTP not supported */ if (IS_SET_KILL_ZKILL(rectype)) { GET_USHORT(key_len, jb + V11_JREC_PREFIX_SIZE + v11_jnl_fixed_size[rectype]); total_key = key_len + sizeof(unsigned short); if (IS_SET(rectype)) { GET_MSTR_LEN(long_data_len, jb + V11_JREC_PREFIX_SIZE + v11_jnl_fixed_size[rectype] + total_key); total_data = long_data_len + sizeof(mstr_len_t); } conv_reclen = JREC_PREFIX_SIZE + FIXED_UPD_RECLEN + total_key + total_data + JREC_SUFFIX_SIZE; conv_reclen = ROUND_UP2(conv_reclen, JNL_REC_START_BNDRY); } else if (IS_COM(rectype)) conv_reclen = JREC_PREFIX_SIZE + TCOM_RECLEN + JREC_SUFFIX_SIZE; clen_without_sfx = conv_reclen - JREC_SUFFIX_SIZE; if (cb - conv_buff + conv_reclen > conv_bufsiz) { repl_errno = EREPL_INTLFILTER_NOSPC; status = -1; break; } cstart = cb; jstart = jb; prefix.jrec_type = rectype; suffix.backptr = prefix.forwptr = conv_reclen; prefix.pini_addr = 0; prefix.time = 0; prefix.tn = 0; suffix.suffix_code = JNL_REC_SUFFIX_CODE; memcpy(cb, (unsigned char*)&prefix, JREC_PREFIX_SIZE); cb += JREC_PREFIX_SIZE; memcpy(cb, jb + V11_JREC_PREFIX_SIZE + V11_JNL_SEQNO_OFFSET, sizeof(seq_num)); cb += sizeof(seq_num); if (IS_SET_KILL_ZKILL(rectype)) { PUT_JNL_STR_LEN(cb, key_len); jb += (V11_JREC_PREFIX_SIZE + V11_MUMPS_NODE_OFFSET + sizeof(unsigned short)); if (IS_FENCED(rectype)) jb += TP_TOKEN_TID_SIZE; cb += sizeof(jnl_str_len_t); memcpy(cb, jb, key_len); cb += key_len; jb += key_len; if (IS_SET(rectype)) { PUT_MSTR_LEN(cb, long_data_len); cb += sizeof(mstr_len_t); jb += sizeof(mstr_len_t); memcpy(cb, jb, long_data_len); cb += long_data_len; } } else if (IS_COM(rectype)) { assert(JRT_TCOM == rectype); memset(cb, 0, TID_STR_SIZE); cb += TID_STR_SIZE; memcpy(cb, jb + V11_JREC_PREFIX_SIZE + V11_TCOM_PARTICIPANTS_OFFSET, sizeof(uint4)); cb += sizeof(uint4); } else assert(FALSE); nzeros = (cstart + clen_without_sfx - cb); if (nzeros > 0) { memset(cb, 0, nzeros); cb += nzeros; } jb = jstart + reclen; memcpy(cb, (unsigned char*)&suffix, JREC_SUFFIX_SIZE); cb += JREC_SUFFIX_SIZE; assert(cb == cstart + conv_reclen); jlen -= reclen; continue; } repl_errno = EREPL_INTLFILTER_INCMPLREC; status = -1; break; } repl_errno = EREPL_INTLFILTER_BADREC; status = -1; break; } assert(0 == jlen || -1 == status); *jnl_len = jb - jnl_buff; *conv_len = cb - conv_buff; return(status); }
void jnl_format(jnl_format_buffer *jfb) { enum jnl_record_type rectype; sgmnt_addrs *csa; uint4 align_fill_size, jrec_size, tmp_jrec_size; int subcode; jnl_action *ja; char *local_buffer; jnl_str_len_t keystrlen; mstr_len_t valstrlen; csa = &FILE_INFO(gv_cur_region)->s_addrs; if (jnl_fence_ctl.level == 0 && dollar_tlevel == 0) { /* Non-TP */ subcode = 0; tmp_jrec_size = FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE; } else { if (NULL == csa->next_fenced) { /* F (or T) */ subcode = 1; csa->next_fenced = jnl_fence_ctl.fence_list; jnl_fence_ctl.fence_list = csa; } else /* G (or U) */ subcode = 3; if (0 != dollar_tlevel) { /* TP */ ++subcode; tmp_jrec_size = FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE; } else tmp_jrec_size = FIXED_ZTP_UPD_RECLEN + JREC_SUFFIX_SIZE; } ja = &(jfb->ja); rectype = jnl_opcode[ja->operation][subcode]; assert(rectype > JRT_BAD && rectype < JRT_RECTYPES); assert(IS_SET_KILL_ZKILL(rectype)); /* Compute actual record length */ assert(NULL != ja->key); keystrlen = ja->key->end; tmp_jrec_size += keystrlen + sizeof(jnl_str_len_t); if (JNL_SET == ja->operation) { assert(NULL != ja->val); valstrlen = ja->val->str.len; tmp_jrec_size += valstrlen + sizeof(mstr_len_t); } jrec_size = ROUND_UP2(tmp_jrec_size, JNL_REC_START_BNDRY); align_fill_size = jrec_size - tmp_jrec_size; /* For JNL_REC_START_BNDRY alignment */ if (dollar_tlevel) { assert((1 << JFB_ELE_SIZE_IN_BITS) == JNL_REC_START_BNDRY); assert(JFB_ELE_SIZE == JNL_REC_START_BNDRY); jfb->buff = (char *)get_new_element(sgm_info_ptr->format_buff_list, jrec_size >> JFB_ELE_SIZE_IN_BITS); /* assume an align record will be written while computing maximum jnl-rec size requirements */ sgm_info_ptr->total_jnl_rec_size += jrec_size + MIN_ALIGN_RECLEN; } /* else if (0 == dollar_tlevel) jfb->buff already malloced in gvcst_init */ jfb->record_size = jrec_size; jfb->rectype = rectype; /* PREFIX */ ((jrec_prefix *)jfb->buff)->jrec_type = rectype; ((jrec_prefix *)jfb->buff)->forwptr = jrec_size; if (IS_ZTP(rectype)) local_buffer = jfb->buff + FIXED_ZTP_UPD_RECLEN; else local_buffer = jfb->buff + FIXED_UPD_RECLEN; *(jnl_str_len_t *)local_buffer = keystrlen; /* direct assignment for already aligned address */ local_buffer += sizeof(jnl_str_len_t); memcpy(local_buffer, (uchar_ptr_t)ja->key->base, keystrlen); local_buffer += keystrlen; if (JNL_SET == ja->operation) { PUT_MSTR_LEN(local_buffer, valstrlen); /* SET command's data may not be aligned */ local_buffer += sizeof(jnl_str_len_t); memcpy(local_buffer, (uchar_ptr_t)ja->val->str.addr, valstrlen); local_buffer += valstrlen; } if (0 != align_fill_size) { memset(local_buffer, 0, align_fill_size); local_buffer += align_fill_size; } assert(0 == ((uint4)local_buffer % sizeof(jrec_suffix))); /* SUFFIX */ ((jrec_suffix *)local_buffer)->backptr = jrec_size; ((jrec_suffix *)local_buffer)->suffix_code = JNL_REC_SUFFIX_CODE; }
/* This routine is called only for recover and rollback (that is, mur_options.update). * It applies the set/kill/zkill, tcom, inctn, and aimg records during forward processing. * Some fields like jnl_seqno, rec_seqno and prefix.time are saved here from original journal files. * Later jnl_write routines copies them to journal records instead of generating them like the runtime system */ uint4 mur_output_record(reg_ctl_list *rctl) { mval mv; jnl_record *rec; char *val_ptr; int strm_num; uint4 dummy; off_jnl_t pini_addr; jnl_string *keystr; enum jnl_record_type rectype; uint4 jnl_status, status; pini_list_struct *plst; boolean_t jnl_enabled, was_crit; struct_jrec_null null_record; gd_region *reg; seq_num strm_seqno; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; jnl_ctl_list *jctl; jnl_format_buffer *ztworm_jfb; blk_hdr_ptr_t aimg_blk_ptr; int in_len, gtmcrypt_errno; boolean_t use_new_key; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(mur_options.update); rec = rctl->mur_desc->jnlrec; rectype = (enum jnl_record_type)rec->prefix.jrec_type; switch (rectype) { case JRT_ALIGN: case JRT_EOF: case JRT_EPOCH: case JRT_PBLK: case JRT_PINI: case JRT_TRUNC: return SS_NORMAL; break; default: break; } jgbl.gbl_jrec_time = rec->prefix.time; pini_addr = rec->prefix.pini_addr; reg = rctl->gd; jctl = rctl->jctl; assert(jctl->reg_ctl == rctl); assert(gv_cur_region == reg); csa = rctl->csa; assert(cs_addrs == csa); csd = csa->hdr; assert(cs_data == csd); jnl_enabled = JNL_ENABLED(csa); if (jnl_enabled) { status = mur_get_pini(jctl, pini_addr, &plst); if (SS_NORMAL != status) return status; prc_vec = &plst->jpv; csa->jnl->pini_addr = plst->new_pini_addr; rctl->mur_plst = plst; } if (mur_options.rollback && IS_REPLICATED(rectype)) { jgbl.mur_jrec_seqno = GET_JNL_SEQNO(rec); if (jgbl.mur_jrec_seqno >= murgbl.consist_jnl_seqno) { assert(murgbl.losttn_seqno >= (jgbl.mur_jrec_seqno + 1)); murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno + 1; } jgbl.mur_jrec_strm_seqno = GET_STRM_SEQNO(rec); strm_seqno = jgbl.mur_jrec_strm_seqno; if (strm_seqno) { /* maintain csd->strm_reg_seqno */ strm_num = GET_STRM_INDEX(strm_seqno); strm_seqno = GET_STRM_SEQ60(strm_seqno); assert(csd->strm_reg_seqno[strm_num] <= (strm_seqno + 1)); csd->strm_reg_seqno[strm_num] = strm_seqno + 1; } } /* Assert that TREF(gd_targ_gvnh_reg) is NULL for every update that journal recovery/rollback plays forward; * This is necessary to ensure every update is played in only the database file where the journal record is seen * instead of across all regions that span the particular global reference. For example if ^a(1) spans db files * a.dat and b.dat, and a KILL ^a(1) is done at the user level, we would see KILL ^a(1) journal records in a.mjl * and b.mjl. When journal recovery processes the journal record in a.mjl, it should do the kill only in a.dat * When it gets to the same journal record in b.mjl, it would do the same kill in b.dat and effectively complete * the user level KILL ^a(1). If instead recovery does the KILL across all spanned regions, we would be basically * doing duplicate work let alone do it out-of-order since recovery goes region by region for the most part. */ assert(NULL == TREF(gd_targ_gvnh_reg)); if (IS_SET_KILL_ZKILL_ZTRIG(rectype)) { /* TP and non-TP has same format */ keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node; if (jnl_enabled) { MUR_SET_JNL_FENCE_CTL_TOKEN(rec->jrec_set_kill.token_seq.token, rctl); jnl_fence_ctl.strm_seqno = rec->jrec_set_kill.strm_seqno; jgbl.tp_ztp_jnl_upd_num = rec->jrec_set_kill.update_num; DEBUG_ONLY(jgbl.max_tp_ztp_jnl_upd_num = MAX(jgbl.max_tp_ztp_jnl_upd_num, jgbl.tp_ztp_jnl_upd_num);) jgbl.mur_jrec_nodeflags = keystr->nodeflags; } if (IS_FENCED(rectype)) { /* Even for FENCE_NONE we apply fences. Otherwise an [F/G/T/U]UPD becomes UPD etc. */ /* op_tstart is called in "mur_forward_play_cur_jrec" already */ if (IS_FUPD(rectype)) { jnl_fence_ctl.level = 1; if (jnl_enabled) { jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; csa->next_fenced = NULL; } } else if (IS_GUPD(rectype)) { jnl_fence_ctl.level = 1; if (jnl_enabled) { jnl_fence_ctl.fence_list = csa; csa->next_fenced = JNL_FENCE_LIST_END; } } else if (IS_TP(rectype)) tp_set_sgm(); } # ifdef GTM_TRIGGER /* Check if ^#t and if so need to increment trigger cycle in file header. Note that the below 'if' check could cause * csd->db_trigger_cycle to be incremented even for the region that actually did NOT get any trigger updates. This * is because some of the ^#t subscripts (like ^#t(#TNAME)) go to the DEFAULT region. So, even though a trigger was * loaded only for ^a (corresponding to AREG), csd->db_trigger_cycle will be incremented for DEFAULT region as well. * To avoid this, the below check should be modified to set csa->incr_db_trigger_cycle only if the ^#t subscript * does not begin with '#' (similar to what is done in UPD_GV_BIND_NAME_APPROPRIATE). However, since journal * recovery operates in standalone mode, the db_trigger_cycle increment to DEFAULT region should be okay since it * will NOT cause any restarts */ if (IS_GVKEY_HASHT_GBLNAME(keystr->length, keystr->text)) { assert(cs_addrs == csa); csa->incr_db_trigger_cycle = TRUE; } # endif if (IS_SET(rectype)) { val_ptr = &keystr->text[keystr->length]; GET_MSTR_LEN(mv.str.len, val_ptr); mv.str.addr = val_ptr + SIZEOF(mstr_len_t); mv.mvtype = MV_STR; op_gvput(&mv); } else if (IS_KILL(rectype)) { if (IS_TP(rectype)) tp_set_sgm(); op_gvkill(); # ifdef GTM_TRIGGER } else if (IS_ZTRIG(rectype)) { if (IS_TP(rectype)) tp_set_sgm(); op_ztrigger(); # endif } else { assert(IS_ZKILL(rectype)); if (IS_TP(rectype)) tp_set_sgm(); op_gvzwithdraw(); } if (IS_ZTP(rectype)) { /* Even for FENCE_NONE we apply fences. Otherwise an FUPD/GUPD becomes UPD etc. */ assert(jnl_enabled || (JNL_FENCE_LIST_END == jnl_fence_ctl.fence_list && NULL == csa->next_fenced)); jnl_fence_ctl.level = 0; if (jnl_enabled) { jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; csa->next_fenced = NULL; } } return SS_NORMAL; }