/* This routine formats and outputs journal extract records corresponding to M SET, KILL, ZKILL, TSTART, ZTSTART, and ZTRIGGER commands, $ZTRIGGER function (LGTRIG) and $ZTWORMHOLE */ void mur_extract_set(jnl_ctl_list *jctl, fi_type *fi, jnl_record *rec, pini_list_struct *plst) { enum jnl_record_type rectype; int max_blen, actual, extract_len, val_extr_len, val_len; char *val_ptr, *ptr, *buff; jnl_string *keystr; boolean_t do_format2zwr, is_ztstart; if (!mur_options.detail) extract_len = 0; else EXT_DET_COMMON_PREFIX(jctl); rectype = (enum jnl_record_type)rec->prefix.jrec_type; if (IS_FUPD_TUPD(rectype)) { if (!mur_options.detail) { if (IS_TUPD(rectype)) { EXT2BYTES(&muext_code[MUEXT_TSTART][0]); /* TSTART */ is_ztstart = FALSE; } else /* if (IS_FUPD(rectype)) */ { EXT2BYTES(&muext_code[MUEXT_ZTSTART][0]); /* ZTSTART */ is_ztstart = TRUE; } } else { if (IS_TUPD(rectype)) { strcpy(murgbl.extr_buff + extract_len, "TSTART \\"); is_ztstart = FALSE; } else /* if (IS_FUPD(rectype)) */ { strcpy(murgbl.extr_buff + extract_len, "ZTSTART\\"); is_ztstart = TRUE; } extract_len = STRLEN(murgbl.extr_buff); } EXTTIME(rec->prefix.time); EXTQW(rec->prefix.tn); if (mur_options.detail) EXTINT(rec->prefix.checksum); EXTPID(plst); EXTQW(rec->jrec_set_kill.token_seq.jnl_seqno); if (!is_ztstart) EXT_STRM_SEQNO(rec->jrec_set_kill.strm_seqno); jnlext_write(fi, murgbl.extr_buff, extract_len); } /* Output the SET or KILL or ZKILL or ZTWORMHOLE or LGTRIG or ZTRIG record */ if (!mur_options.detail) { extract_len = 0; if (IS_SET(rectype)) { EXT2BYTES(&muext_code[MUEXT_SET][0]); } else if (IS_KILL(rectype)) { EXT2BYTES(&muext_code[MUEXT_KILL][0]); } else if (IS_ZKILL(rectype)) { EXT2BYTES(&muext_code[MUEXT_ZKILL][0]); } else if (IS_ZTWORM(rectype)) { EXT2BYTES(&muext_code[MUEXT_ZTWORM][0]); } else if (IS_LGTRIG(rectype)) { EXT2BYTES(&muext_code[MUEXT_LGTRIG][0]); } else if (IS_ZTRIG(rectype)) { EXT2BYTES(&muext_code[MUEXT_ZTRIG][0]); } else assert(FALSE); /* The assert will disappear in pro but not the ";" to properly terminate the else */ } else { if (IS_FUPD_TUPD(rectype)) { memcpy(murgbl.extr_buff, " ", 23); extract_len = 23; } else extract_len = STRLEN(murgbl.extr_buff); strcpy(murgbl.extr_buff + extract_len, " \\"); memcpy(murgbl.extr_buff + extract_len, jrt_label[rectype], LAB_LEN); extract_len += LAB_LEN; memcpy(murgbl.extr_buff + extract_len, LAB_TERM, LAB_TERM_SZ); extract_len += LAB_TERM_SZ; } EXTTIME(rec->prefix.time); EXTQW(rec->prefix.tn); if (mur_options.detail) EXTINT(rec->prefix.checksum); EXTPID(plst); if (IS_ZTP(rectype)) { EXTQW(rec->jrec_set_kill.token_seq.token); } else EXTQW(rec->jrec_set_kill.token_seq.jnl_seqno); assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype)); assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_ztworm.strm_seqno); assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_lgtrig.strm_seqno); EXT_STRM_SEQNO(rec->jrec_set_kill.strm_seqno); assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num); assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num); EXTINT(rec->jrec_set_kill.update_num); do_format2zwr = FALSE; if (IS_SET_KILL_ZKILL_ZTRIG(rectype)) { keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node; EXTINT(keystr->nodeflags); buff = &murgbl.extr_buff[extract_len]; max_blen = MIN(MAX_ZWR_KEY_SZ, murgbl.max_extr_record_length - extract_len); assert(MAX_ZWR_KEY_SZ == max_blen); /* We allocated enough for key and data expansion for ZWR format */ ptr = (char *)format_targ_key((uchar_ptr_t)buff, max_blen, gv_currkey, TRUE); assert(NULL != ptr); if (NULL != ptr) extract_len += (int)(ptr - &murgbl.extr_buff[extract_len]); if (IS_SET(rectype)) { murgbl.extr_buff[extract_len++] = '='; val_ptr = &keystr->text[keystr->length]; GET_MSTR_LEN(val_len, val_ptr); val_ptr += SIZEOF(mstr_len_t); do_format2zwr = TRUE; } } else if (IS_ZTWORM(rectype) || IS_LGTRIG(rectype)) { assert(&rec->jrec_ztworm.ztworm_str == &rec->jrec_lgtrig.lgtrig_str); keystr = (jnl_string *)&rec->jrec_ztworm.ztworm_str; val_len = keystr->length; val_ptr = &keystr->text[0]; do_format2zwr = TRUE; } if (do_format2zwr) { if (ZWR_EXP_RATIO(val_len) <= murgbl.max_extr_record_length - extract_len) { ptr = &murgbl.extr_buff[extract_len]; format2zwr((sm_uc_ptr_t)val_ptr, val_len, (unsigned char *)ptr, &val_extr_len); extract_len += val_extr_len; } else { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(9) ERR_JNLBADRECFMT, 3, jctl->jnl_fn_len, jctl->jnl_fn, jctl->rec_offset, ERR_TEXT, 2, LEN_AND_LIT("Length of the record is too high for zwr format")); if (mur_options.verbose || mur_options.detail) { gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT("After max expansion record length"), ZWR_EXP_RATIO(val_len), ZWR_EXP_RATIO(val_len)); gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT("Buffer size"), murgbl.max_extr_record_length - extract_len, murgbl.max_extr_record_length - extract_len); } assert(FALSE); } } murgbl.extr_buff[extract_len++] = '\\'; jnlext_write(fi, murgbl.extr_buff, extract_len); }
char *jnl2ext(char *jnl_buff, char *ext_buff) { char *curr, *val_ptr, *ptr, rectype, key_buff[sizeof(gv_key) + MAX_KEY_SZ + 7]; jnl_record *rec; gv_key *key; jnl_string *keystr; int val_extr_len, val_len, rec_len; rec = (jnl_record *)jnl_buff; rectype = rec->prefix.jrec_type; rec_len = rec->prefix.forwptr; if (rec_len != REC_LEN_FROM_SUFFIX(jnl_buff, rec_len)) { assert(FALSE); return ext_buff; } if (!IS_REPLICATED(rectype)) { assert(FALSE); return ext_buff; } curr = ext_buff; if (IS_TUPD(rectype)) { if (FALSE == first_tstart) { GET_SHORTP(curr, &muext_code[MUEXT_TSTART][0]); curr += 2; DELIMIT_CURR; MEMCPY_LIT(curr, ZERO_TIME_DELIM); curr += STR_LIT_LEN(ZERO_TIME_DELIM); curr = (char *)i2asc((uchar_ptr_t)curr, rec->jrec_kill.prefix.tn); DELIMIT_CURR; MEMCPY_LIT(curr, PIDS_DELIM); curr += STR_LIT_LEN(PIDS_DELIM); curr = (char *)i2ascl((uchar_ptr_t)curr, rec->jrec_kill.token_seq.jnl_seqno); *curr++ = '\n'; *curr = '\0'; first_tstart = TRUE; } num_tstarts++; } else if (JRT_TCOM == rectype) { num_tcommits++; if (num_tcommits == num_tstarts) { num_tcommits = num_tstarts = 0; first_tstart = FALSE; GET_SHORTP(curr, &muext_code[MUEXT_TCOMMIT][0]); curr += 2; DELIMIT_CURR; MEMCPY_LIT(curr, ZERO_TIME_DELIM); curr += STR_LIT_LEN(ZERO_TIME_DELIM); curr = (char *)i2asc((uchar_ptr_t)curr, rec->jrec_tcom.prefix.tn); DELIMIT_CURR; MEMCPY_LIT(curr, PIDS_DELIM); curr += STR_LIT_LEN(PIDS_DELIM); curr = (char *)i2ascl((uchar_ptr_t)curr, rec->jrec_tcom.token_seq.jnl_seqno); DELIMIT_CURR; curr = (char *)i2ascl((uchar_ptr_t)curr, rec->jrec_tcom.participants); *curr++ = '\n'; *curr = '\0'; return curr; } return ext_buff; } if (IS_SET(rectype)) GET_SHORTP(curr, &muext_code[MUEXT_SET][0]); else if (IS_KILL(rectype)) GET_SHORTP(curr, &muext_code[MUEXT_KILL][0]); else if (IS_ZKILL(rectype)) GET_SHORTP(curr, &muext_code[MUEXT_ZKILL][0]); else /* if (JRT_NULL == rectype) */ { assert(JRT_NULL == rectype); GET_SHORTP(curr, &muext_code[MUEXT_NULL][0]); } curr += 2; DELIMIT_CURR; MEMCPY_LIT(curr, ZERO_TIME_DELIM); curr += STR_LIT_LEN(ZERO_TIME_DELIM); curr = (char *)i2asc((uchar_ptr_t)curr, rec->jrec_kill.prefix.tn); DELIMIT_CURR; MEMCPY_LIT(curr, PIDS_DELIM); curr += STR_LIT_LEN(PIDS_DELIM); curr = (char *)i2ascl((uchar_ptr_t)curr, rec->jrec_kill.token_seq.jnl_seqno); if (rectype == JRT_NULL) { *curr++ = '\n'; *curr='\0'; return curr; } assert(IS_SET_KILL_ZKILL(rectype)); DELIMIT_CURR; keystr = (jnl_string *)&rec->jrec_kill.mumps_node; ptr = (char *)ROUND_UP((uint4)key_buff, 8); key = (gv_key *)ptr; key->top = MAX_KEY_SZ; key->end = keystr->length; if (key->end > key->top) { assert(FALSE); return ext_buff; } memcpy(key->base, &keystr->text[0], keystr->length); key->base[key->end] = 0; curr = (char *)format_targ_key((uchar_ptr_t)curr, MAX_ZWR_KEY_SZ, key, TRUE); if (IS_SET(rectype)) { *curr++ = '='; val_ptr = &keystr->text[keystr->length]; GET_MSTR_LEN(val_len, val_ptr); val_ptr += sizeof(mstr_len_t); format2zwr((sm_uc_ptr_t)val_ptr, val_len, (uchar_ptr_t)curr, &val_extr_len); curr += val_extr_len; } *curr++ = '\n'; *curr='\0'; return curr; }
/* This routine is called only for recover and rollback (that is, mur_options.update). * It applies the set/kill/zkill, tcom, inctn, and aimg records during forward processing. * Some fields like jnl_seqno, rec_seqno and prefix.time are saved here from original journal files. * Later jnl_write routines copies them to journal records instead of generating them like the runtime system */ uint4 mur_output_record(reg_ctl_list *rctl) { mval mv; jnl_record *rec; char *val_ptr; int strm_num; uint4 dummy; off_jnl_t pini_addr; jnl_string *keystr; enum jnl_record_type rectype; uint4 jnl_status, status; pini_list_struct *plst; boolean_t jnl_enabled, was_crit; struct_jrec_null null_record; gd_region *reg; seq_num strm_seqno; sgmnt_addrs *csa; sgmnt_data_ptr_t csd; jnl_ctl_list *jctl; jnl_format_buffer *ztworm_jfb; blk_hdr_ptr_t aimg_blk_ptr; int in_len, gtmcrypt_errno; boolean_t use_new_key; DCL_THREADGBL_ACCESS; SETUP_THREADGBL_ACCESS; assert(mur_options.update); rec = rctl->mur_desc->jnlrec; rectype = (enum jnl_record_type)rec->prefix.jrec_type; switch (rectype) { case JRT_ALIGN: case JRT_EOF: case JRT_EPOCH: case JRT_PBLK: case JRT_PINI: case JRT_TRUNC: return SS_NORMAL; break; default: break; } jgbl.gbl_jrec_time = rec->prefix.time; pini_addr = rec->prefix.pini_addr; reg = rctl->gd; jctl = rctl->jctl; assert(jctl->reg_ctl == rctl); assert(gv_cur_region == reg); csa = rctl->csa; assert(cs_addrs == csa); csd = csa->hdr; assert(cs_data == csd); jnl_enabled = JNL_ENABLED(csa); if (jnl_enabled) { status = mur_get_pini(jctl, pini_addr, &plst); if (SS_NORMAL != status) return status; prc_vec = &plst->jpv; csa->jnl->pini_addr = plst->new_pini_addr; rctl->mur_plst = plst; } if (mur_options.rollback && IS_REPLICATED(rectype)) { jgbl.mur_jrec_seqno = GET_JNL_SEQNO(rec); if (jgbl.mur_jrec_seqno >= murgbl.consist_jnl_seqno) { assert(murgbl.losttn_seqno >= (jgbl.mur_jrec_seqno + 1)); murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno + 1; } jgbl.mur_jrec_strm_seqno = GET_STRM_SEQNO(rec); strm_seqno = jgbl.mur_jrec_strm_seqno; if (strm_seqno) { /* maintain csd->strm_reg_seqno */ strm_num = GET_STRM_INDEX(strm_seqno); strm_seqno = GET_STRM_SEQ60(strm_seqno); assert(csd->strm_reg_seqno[strm_num] <= (strm_seqno + 1)); csd->strm_reg_seqno[strm_num] = strm_seqno + 1; } } /* Assert that TREF(gd_targ_gvnh_reg) is NULL for every update that journal recovery/rollback plays forward; * This is necessary to ensure every update is played in only the database file where the journal record is seen * instead of across all regions that span the particular global reference. For example if ^a(1) spans db files * a.dat and b.dat, and a KILL ^a(1) is done at the user level, we would see KILL ^a(1) journal records in a.mjl * and b.mjl. When journal recovery processes the journal record in a.mjl, it should do the kill only in a.dat * When it gets to the same journal record in b.mjl, it would do the same kill in b.dat and effectively complete * the user level KILL ^a(1). If instead recovery does the KILL across all spanned regions, we would be basically * doing duplicate work let alone do it out-of-order since recovery goes region by region for the most part. */ assert(NULL == TREF(gd_targ_gvnh_reg)); if (IS_SET_KILL_ZKILL_ZTRIG(rectype)) { /* TP and non-TP has same format */ keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node; if (jnl_enabled) { MUR_SET_JNL_FENCE_CTL_TOKEN(rec->jrec_set_kill.token_seq.token, rctl); jnl_fence_ctl.strm_seqno = rec->jrec_set_kill.strm_seqno; jgbl.tp_ztp_jnl_upd_num = rec->jrec_set_kill.update_num; DEBUG_ONLY(jgbl.max_tp_ztp_jnl_upd_num = MAX(jgbl.max_tp_ztp_jnl_upd_num, jgbl.tp_ztp_jnl_upd_num);) jgbl.mur_jrec_nodeflags = keystr->nodeflags; } if (IS_FENCED(rectype)) { /* Even for FENCE_NONE we apply fences. Otherwise an [F/G/T/U]UPD becomes UPD etc. */ /* op_tstart is called in "mur_forward_play_cur_jrec" already */ if (IS_FUPD(rectype)) { jnl_fence_ctl.level = 1; if (jnl_enabled) { jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; csa->next_fenced = NULL; } } else if (IS_GUPD(rectype)) { jnl_fence_ctl.level = 1; if (jnl_enabled) { jnl_fence_ctl.fence_list = csa; csa->next_fenced = JNL_FENCE_LIST_END; } } else if (IS_TP(rectype)) tp_set_sgm(); } # ifdef GTM_TRIGGER /* Check if ^#t and if so need to increment trigger cycle in file header. Note that the below 'if' check could cause * csd->db_trigger_cycle to be incremented even for the region that actually did NOT get any trigger updates. This * is because some of the ^#t subscripts (like ^#t(#TNAME)) go to the DEFAULT region. So, even though a trigger was * loaded only for ^a (corresponding to AREG), csd->db_trigger_cycle will be incremented for DEFAULT region as well. * To avoid this, the below check should be modified to set csa->incr_db_trigger_cycle only if the ^#t subscript * does not begin with '#' (similar to what is done in UPD_GV_BIND_NAME_APPROPRIATE). However, since journal * recovery operates in standalone mode, the db_trigger_cycle increment to DEFAULT region should be okay since it * will NOT cause any restarts */ if (IS_GVKEY_HASHT_GBLNAME(keystr->length, keystr->text)) { assert(cs_addrs == csa); csa->incr_db_trigger_cycle = TRUE; } # endif if (IS_SET(rectype)) { val_ptr = &keystr->text[keystr->length]; GET_MSTR_LEN(mv.str.len, val_ptr); mv.str.addr = val_ptr + SIZEOF(mstr_len_t); mv.mvtype = MV_STR; op_gvput(&mv); } else if (IS_KILL(rectype)) { if (IS_TP(rectype)) tp_set_sgm(); op_gvkill(); # ifdef GTM_TRIGGER } else if (IS_ZTRIG(rectype)) { if (IS_TP(rectype)) tp_set_sgm(); op_ztrigger(); # endif } else { assert(IS_ZKILL(rectype)); if (IS_TP(rectype)) tp_set_sgm(); op_gvzwithdraw(); } if (IS_ZTP(rectype)) { /* Even for FENCE_NONE we apply fences. Otherwise an FUPD/GUPD becomes UPD etc. */ assert(jnl_enabled || (JNL_FENCE_LIST_END == jnl_fence_ctl.fence_list && NULL == csa->next_fenced)); jnl_fence_ctl.level = 0; if (jnl_enabled) { jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; csa->next_fenced = NULL; } } return SS_NORMAL; }