int jnl_v11tov15(uchar_ptr_t jnl_buff, uint4 *jnl_len, uchar_ptr_t conv_buff, uint4 *conv_len, uint4 conv_bufsiz) { /* Convert a transaction from jnl version 11 (V4.2-002) to 15 (V.4.4-002) */ unsigned char *jb, *cb, *cstart, *jstart, rectype; int status, reclen; unsigned short key_len; unsigned int long_data_len, jlen, total_data, nzeros, conv_reclen, clen_without_sfx, total_key; jrec_prefix prefix; jrec_suffix suffix; seq_num jsno; jb = jnl_buff; cb = conv_buff; status = SS_NORMAL; jlen = *jnl_len; while (0 < jlen) { if (0 < (reclen = v11_jnl_record_length((jnl_record *)jb, jlen))) { if (reclen <= jlen) { rectype = REF_CHAR(jb + V11_JREC_TYPE_OFFSET); total_key = total_data = 0; assert(IS_REPLICATED(rectype)); if (IS_ZTP(rectype)) GTMASSERT; /* ZTP not supported */ if (IS_SET_KILL_ZKILL(rectype)) { GET_USHORT(key_len, jb + V11_JREC_PREFIX_SIZE + v11_jnl_fixed_size[rectype]); total_key = key_len + sizeof(unsigned short); if (IS_SET(rectype)) { GET_MSTR_LEN(long_data_len, jb + V11_JREC_PREFIX_SIZE + v11_jnl_fixed_size[rectype] + total_key); total_data = long_data_len + sizeof(mstr_len_t); } conv_reclen = JREC_PREFIX_SIZE + FIXED_UPD_RECLEN + total_key + total_data + JREC_SUFFIX_SIZE; conv_reclen = ROUND_UP2(conv_reclen, JNL_REC_START_BNDRY); } else if (IS_COM(rectype)) conv_reclen = JREC_PREFIX_SIZE + TCOM_RECLEN + JREC_SUFFIX_SIZE; clen_without_sfx = conv_reclen - JREC_SUFFIX_SIZE; if (cb - conv_buff + conv_reclen > conv_bufsiz) { repl_errno = EREPL_INTLFILTER_NOSPC; status = -1; break; } cstart = cb; jstart = jb; prefix.jrec_type = rectype; suffix.backptr = prefix.forwptr = conv_reclen; prefix.pini_addr = 0; prefix.time = 0; prefix.tn = 0; suffix.suffix_code = JNL_REC_SUFFIX_CODE; memcpy(cb, (unsigned char*)&prefix, JREC_PREFIX_SIZE); cb += JREC_PREFIX_SIZE; memcpy(cb, jb + V11_JREC_PREFIX_SIZE + V11_JNL_SEQNO_OFFSET, sizeof(seq_num)); cb += sizeof(seq_num); if (IS_SET_KILL_ZKILL(rectype)) { PUT_JNL_STR_LEN(cb, key_len); jb += (V11_JREC_PREFIX_SIZE + V11_MUMPS_NODE_OFFSET + sizeof(unsigned short)); if (IS_FENCED(rectype)) jb += TP_TOKEN_TID_SIZE; cb += sizeof(jnl_str_len_t); memcpy(cb, jb, key_len); cb += key_len; jb += key_len; if (IS_SET(rectype)) { PUT_MSTR_LEN(cb, long_data_len); cb += sizeof(mstr_len_t); jb += sizeof(mstr_len_t); memcpy(cb, jb, long_data_len); cb += long_data_len; } } else if (IS_COM(rectype)) { assert(JRT_TCOM == rectype); memset(cb, 0, TID_STR_SIZE); cb += TID_STR_SIZE; memcpy(cb, jb + V11_JREC_PREFIX_SIZE + V11_TCOM_PARTICIPANTS_OFFSET, sizeof(uint4)); cb += sizeof(uint4); } else assert(FALSE); nzeros = (cstart + clen_without_sfx - cb); if (nzeros > 0) { memset(cb, 0, nzeros); cb += nzeros; } jb = jstart + reclen; memcpy(cb, (unsigned char*)&suffix, JREC_SUFFIX_SIZE); cb += JREC_SUFFIX_SIZE; assert(cb == cstart + conv_reclen); jlen -= reclen; continue; } repl_errno = EREPL_INTLFILTER_INCMPLREC; status = -1; break; } repl_errno = EREPL_INTLFILTER_BADREC; status = -1; break; } assert(0 == jlen || -1 == status); *jnl_len = jb - jnl_buff; *conv_len = cb - conv_buff; return(status); }
uint4 mur_forward_play_cur_jrec(reg_ctl_list *rctl) { boolean_t process_losttn; boolean_t is_set_kill_zkill_ztworm_lgtrig_ztrig, is_set_kill_zkill_ztrig; trans_num curr_tn; enum jnl_record_type rectype; enum rec_fence_type rec_fence; enum broken_type recstat; jnl_tm_t rec_time; uint4 status; mval mv; seq_num rec_token_seq, rec_strm_seqno, resync_strm_seqno; jnl_record *rec; jnl_string *keystr; multi_struct *multi; jnl_ctl_list *jctl; ht_ent_mname *tabent; mname_entry gvent; gvnh_reg_t *gvnh_reg; pini_list_struct *plst; int4 gtmcrypt_errno; boolean_t use_new_key; forw_multi_struct *forw_multi; # if (defined(DEBUG) && defined(UNIX)) int4 strm_idx; # endif DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(!rctl->forw_eof_seen); if (multi_proc_in_use) { /* Set key to print this rctl's region-name as prefix in case this forked off process prints any output. * e.g. If this function ends up calling t_end/op_tcommit which in turn needs to do a jnl autoswitch * inside jnl_file_extend and prints a GTM-I-FILERENAME message. */ MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key); } jctl = rctl->jctl; /* Ensure we never DOUBLE process the same journal record in the forward phase */ assert((jctl != rctl->last_processed_jctl) || (jctl->rec_offset != rctl->last_processed_rec_offset)); # ifdef DEBUG rctl->last_processed_jctl = jctl; rctl->last_processed_rec_offset = jctl->rec_offset; # endif rec = rctl->mur_desc->jnlrec; rectype = (enum jnl_record_type)rec->prefix.jrec_type; rec_time = rec->prefix.time; assert(rec_time <= mur_options.before_time); assert(rec_time >= mur_options.after_time); assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated)); is_set_kill_zkill_ztworm_lgtrig_ztrig = (boolean_t)(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype)); if (is_set_kill_zkill_ztworm_lgtrig_ztrig) { keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node; if (USES_ANY_KEY(jctl->jfh)) { use_new_key = USES_NEW_KEY(jctl->jfh); assert(NEEDS_NEW_KEY(jctl->jfh, rec->prefix.tn) == use_new_key); MUR_DECRYPT_LOGICAL_RECS( keystr, (use_new_key ? TRUE : jctl->jfh->non_null_iv), rec->prefix.forwptr, (use_new_key ? jctl->encr_key_handle2 : jctl->encr_key_handle), gtmcrypt_errno); if (0 != gtmcrypt_errno) { GTMCRYPT_REPORT_ERROR(gtmcrypt_errno, gtm_putmsg, jctl->jnl_fn_len, jctl->jnl_fn); return gtmcrypt_errno; } } } if (mur_options.selection && !mur_select_rec(jctl)) return SS_NORMAL; rec_token_seq = (REC_HAS_TOKEN_SEQ(rectype)) ? GET_JNL_SEQNO(rec) : 0; process_losttn = rctl->process_losttn; if (!process_losttn && mur_options.rollback) { if (IS_REPLICATED(rectype) && (rec_token_seq >= murgbl.losttn_seqno)) process_losttn = rctl->process_losttn = TRUE; # if (defined(UNIX) && defined(DEBUG)) if ((rec_token_seq < murgbl.losttn_seqno) && murgbl.resync_strm_seqno_nonzero && IS_REPLICATED(rectype)) { assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype) || IS_COM(rectype) || (JRT_NULL == (rectype))); assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_null.strm_seqno); assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_tcom.strm_seqno); rec_strm_seqno = GET_STRM_SEQNO(rec); if (rec_strm_seqno) { strm_idx = GET_STRM_INDEX(rec_strm_seqno); rec_strm_seqno = GET_STRM_SEQ60(rec_strm_seqno); resync_strm_seqno = murgbl.resync_strm_seqno[strm_idx]; assert(!resync_strm_seqno || (rec_strm_seqno < resync_strm_seqno)); } } # endif } /* Note: Broken transaction determination is done below only based on the records that got selected as * part of the mur_options.selection criteria. Therefore depending on whether a broken transaction gets * selected or not, future complete transactions might either go to the lost transaction or extract file. */ recstat = process_losttn ? LOST_TN : GOOD_TN; status = SS_NORMAL; if (FENCE_NONE != mur_options.fences) { if (IS_FENCED(rectype)) { assert(rec_token_seq); # ifdef DEBUG /* assert that all TP records before min_broken_time are not broken */ if (IS_TP(rectype) && ((!mur_options.rollback && rec_time < murgbl.min_broken_time) || (mur_options.rollback && rec_token_seq < murgbl.min_broken_seqno))) { rec_fence = GET_REC_FENCE_TYPE(rectype); if (NULL != (multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, rec_fence))) { assert(0 == multi->partner); assert(FALSE == multi->this_is_broken); } } # endif /* In most cases, the fact whether a TP tn is broken or not would have been determined already in * mur_forward. In this case, rctl->forw_multi would be set appropriately. So use that to get to * "multi" and avoid a hashtable lookup. If forw_multi is NULL (e.g. for ZTP or single-region TP), * the hash-table lookup cannot be avoided. */ multi = NULL; forw_multi = rctl->forw_multi; if (NULL != forw_multi) { multi = forw_multi->multi; /* Always honor the "recstat" from the forw_multi since that has been determined taking into * consideration the BROKEN_TN status of ALL participating regions. */ assert((GOOD_TN != forw_multi->recstat) || (GOOD_TN == recstat)); recstat = forw_multi->recstat; } else if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq)) { assert(!mur_options.rollback || process_losttn); rec_fence = GET_REC_FENCE_TYPE(rectype); assert(rec_token_seq == ((struct_jrec_upd *)rec)->token_seq.token); multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, rec_fence); if ((NULL != multi) && (0 < multi->partner)) { process_losttn = rctl->process_losttn = TRUE; recstat = BROKEN_TN; } } /* Check that if the hashtable reports a tn as GOOD, it better have had the same * # of participants in the TCOM records across all the participating regions. */ assert((NULL == multi) || (BROKEN_TN == recstat) || (FALSE == multi->this_is_broken)); } else if ((FENCE_ALWAYS == mur_options.fences) && is_set_kill_zkill_ztworm_lgtrig_ztrig) { process_losttn = rctl->process_losttn = TRUE; recstat = BROKEN_TN; } } else forw_multi = NULL; if (mur_options.show) { assert(SS_NORMAL == status); if (BROKEN_TN != recstat) { if (JRT_PFIN == rectype) status = mur_pini_state(jctl, rec->prefix.pini_addr, FINISHED_PROC); else if ((JRT_EOF != rectype) && ((JRT_ALIGN != rectype) || (JNL_HDR_LEN != rec->prefix.pini_addr))) { /* Note that it is possible that we have a PINI record followed by a PFIN record * and later an ALIGN record with the pini_addr pointing to the original PINI * record (see comment in jnl_write.c where pini_addr gets assigned to JNL_HDR_LEN) * In this case we do not want the ALIGN record to cause the process to become * ACTIVE although it has written a PFIN record. Hence the check above. */ status = mur_pini_state(jctl, rec->prefix.pini_addr, ACTIVE_PROC); } } else status = mur_pini_state(jctl, rec->prefix.pini_addr, BROKEN_PROC); if (SS_NORMAL != status) return status; /* "mur_pini_state" failed due to bad pini_addr */ ++jctl->jnlrec_cnt[rectype]; /* for -show=STATISTICS */ } if (!mur_options.update && !jgbl.mur_extract) return SS_NORMAL; if (murgbl.ok_to_update_db && IS_TUPD(rectype) && (GOOD_TN == recstat)) { /* Even for FENCE_NONE we apply fences. Otherwise a TUPD becomes UPD etc. * If forw_multi is non-NULL, a multi-region TP transaction is being played as a SINGLE * TP transaction across all the involved regions. Therefore only ONE op_tstart is done * even though more than one TSET might be encountered. In this case, do not issue JNLTPNEST error. */ if (dollar_tlevel && (NULL == forw_multi)) { assert(FALSE); murgbl.wrn_count++; gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(6) ERR_JNLTPNEST, 4, jctl->jnl_fn_len, jctl->jnl_fn, jctl->rec_offset, &rec->prefix.tn); OP_TROLLBACK(0); } if (!dollar_tlevel) { /* Note: op_tstart resets gv_currkey. So set gv_currkey later. */ /* mv is used to determine transaction id. But it is ignored by recover/rollback */ mv.mvtype = MV_STR; mv.str.len = 0; mv.str.addr = NULL; op_tstart(IMPLICIT_TSTART, TRUE, &mv, -1); DEBUG_ONLY(jgbl.max_tp_ztp_jnl_upd_num = 0;) } tp_set_sgm(); /* needed to set "sgm_info_ptr" to correspond to "rctl" */ }
/* This routine is called only for recover and rollback (that is, mur_options.update). * It applies the set/kill/zkill, tcom, inctn, and aimg records during forward processing. * Some fields like jnl_seqno, rec_seqno and prefix.time are saved here from original journal files. * Later jnl_write routines copies them to journal records instead of generating them like the runtime system */ uint4 mur_output_record(reg_ctl_list *rctl) { mval mv; jnl_record *rec; char *val_ptr; int strm_num; uint4 dummy; off_jnl_t pini_addr; jnl_string *keystr; enum jnl_record_type rectype; uint4 jnl_status, status; pini_list_struct *plst; boolean_t jnl_enabled, was_crit; struct_jrec_null null_record; gd_region *reg; seq_num strm_seqno; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; jnl_ctl_list *jctl; jnl_format_buffer *ztworm_jfb; blk_hdr_ptr_t aimg_blk_ptr; int in_len, gtmcrypt_errno; boolean_t use_new_key; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(mur_options.update); rec = rctl->mur_desc->jnlrec; rectype = (enum jnl_record_type)rec->prefix.jrec_type; switch (rectype) { case JRT_ALIGN: case JRT_EOF: case JRT_EPOCH: case JRT_PBLK: case JRT_PINI: case JRT_TRUNC: return SS_NORMAL; break; default: break; } jgbl.gbl_jrec_time = rec->prefix.time; pini_addr = rec->prefix.pini_addr; reg = rctl->gd; jctl = rctl->jctl; assert(jctl->reg_ctl == rctl); assert(gv_cur_region == reg); csa = rctl->csa; assert(cs_addrs == csa); csd = csa->hdr; assert(cs_data == csd); jnl_enabled = JNL_ENABLED(csa); if (jnl_enabled) { status = mur_get_pini(jctl, pini_addr, &plst); if (SS_NORMAL != status) return status; prc_vec = &plst->jpv; csa->jnl->pini_addr = plst->new_pini_addr; rctl->mur_plst = plst; } if (mur_options.rollback && IS_REPLICATED(rectype)) { jgbl.mur_jrec_seqno = GET_JNL_SEQNO(rec); if (jgbl.mur_jrec_seqno >= murgbl.consist_jnl_seqno) { assert(murgbl.losttn_seqno >= (jgbl.mur_jrec_seqno + 1)); murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno + 1; } jgbl.mur_jrec_strm_seqno = GET_STRM_SEQNO(rec); strm_seqno = jgbl.mur_jrec_strm_seqno; if (strm_seqno) { /* maintain csd->strm_reg_seqno */ strm_num = GET_STRM_INDEX(strm_seqno); strm_seqno = GET_STRM_SEQ60(strm_seqno); assert(csd->strm_reg_seqno[strm_num] <= (strm_seqno + 1)); csd->strm_reg_seqno[strm_num] = strm_seqno + 1; } } /* Assert that TREF(gd_targ_gvnh_reg) is NULL for every update that journal recovery/rollback plays forward; * This is necessary to ensure every update is played in only the database file where the journal record is seen * instead of across all regions that span the particular global reference. For example if ^a(1) spans db files * a.dat and b.dat, and a KILL ^a(1) is done at the user level, we would see KILL ^a(1) journal records in a.mjl * and b.mjl. When journal recovery processes the journal record in a.mjl, it should do the kill only in a.dat * When it gets to the same journal record in b.mjl, it would do the same kill in b.dat and effectively complete * the user level KILL ^a(1). If instead recovery does the KILL across all spanned regions, we would be basically * doing duplicate work let alone do it out-of-order since recovery goes region by region for the most part. */ assert(NULL == TREF(gd_targ_gvnh_reg)); if (IS_SET_KILL_ZKILL_ZTRIG(rectype)) { /* TP and non-TP has same format */ keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node; if (jnl_enabled) { MUR_SET_JNL_FENCE_CTL_TOKEN(rec->jrec_set_kill.token_seq.token, rctl); jnl_fence_ctl.strm_seqno = rec->jrec_set_kill.strm_seqno; jgbl.tp_ztp_jnl_upd_num = rec->jrec_set_kill.update_num; DEBUG_ONLY(jgbl.max_tp_ztp_jnl_upd_num = MAX(jgbl.max_tp_ztp_jnl_upd_num, jgbl.tp_ztp_jnl_upd_num);) jgbl.mur_jrec_nodeflags = keystr->nodeflags; } if (IS_FENCED(rectype)) { /* Even for FENCE_NONE we apply fences. Otherwise an [F/G/T/U]UPD becomes UPD etc. */ /* op_tstart is called in "mur_forward_play_cur_jrec" already */ if (IS_FUPD(rectype)) { jnl_fence_ctl.level = 1; if (jnl_enabled) { jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; csa->next_fenced = NULL; } } else if (IS_GUPD(rectype)) { jnl_fence_ctl.level = 1; if (jnl_enabled) { jnl_fence_ctl.fence_list = csa; csa->next_fenced = JNL_FENCE_LIST_END; } } else if (IS_TP(rectype)) tp_set_sgm(); } # ifdef GTM_TRIGGER /* Check if ^#t and if so need to increment trigger cycle in file header. Note that the below 'if' check could cause * csd->db_trigger_cycle to be incremented even for the region that actually did NOT get any trigger updates. This * is because some of the ^#t subscripts (like ^#t(#TNAME)) go to the DEFAULT region. So, even though a trigger was * loaded only for ^a (corresponding to AREG), csd->db_trigger_cycle will be incremented for DEFAULT region as well. * To avoid this, the below check should be modified to set csa->incr_db_trigger_cycle only if the ^#t subscript * does not begin with '#' (similar to what is done in UPD_GV_BIND_NAME_APPROPRIATE). However, since journal * recovery operates in standalone mode, the db_trigger_cycle increment to DEFAULT region should be okay since it * will NOT cause any restarts */ if (IS_GVKEY_HASHT_GBLNAME(keystr->length, keystr->text)) { assert(cs_addrs == csa); csa->incr_db_trigger_cycle = TRUE; } # endif if (IS_SET(rectype)) { val_ptr = &keystr->text[keystr->length]; GET_MSTR_LEN(mv.str.len, val_ptr); mv.str.addr = val_ptr + SIZEOF(mstr_len_t); mv.mvtype = MV_STR; op_gvput(&mv); } else if (IS_KILL(rectype)) { if (IS_TP(rectype)) tp_set_sgm(); op_gvkill(); # ifdef GTM_TRIGGER } else if (IS_ZTRIG(rectype)) { if (IS_TP(rectype)) tp_set_sgm(); op_ztrigger(); # endif } else { assert(IS_ZKILL(rectype)); if (IS_TP(rectype)) tp_set_sgm(); op_gvzwithdraw(); } if (IS_ZTP(rectype)) { /* Even for FENCE_NONE we apply fences. Otherwise an FUPD/GUPD becomes UPD etc. */ assert(jnl_enabled || (JNL_FENCE_LIST_END == jnl_fence_ctl.fence_list && NULL == csa->next_fenced)); jnl_fence_ctl.level = 0; if (jnl_enabled) { jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; csa->next_fenced = NULL; } } return SS_NORMAL; }