Esempio n. 1
0
/* This called for TP and non-TP, but not for ZTP */
void	jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb)
{
	struct_jrec_upd		*jrec;
	jnl_private_control	*jpc;

	/* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes
	 * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record
	 * to the journal buffer or journal file but write it only to the journal pool from where it gets
	 * sent across to the update process that does not care about these fields so it is ok to leave them as is.
	 */
	jpc = csa->jnl;
	assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa));
	assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa));
	assert(csa->now_crit);
	assert(IS_SET_KILL_ZKILL(jfb->rectype));
	assert(!IS_ZTP(jfb->rectype));
	jrec = (struct_jrec_upd *)jfb->buff;
	jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr;
	jrec->prefix.tn = csa->ti->curr_tn;
	jrec->prefix.time = jgbl.gbl_jrec_time;
	jrec->prefix.checksum = jfb->checksum;
	if (jgbl.forw_phase_recovery)
	{
		QWASSIGN(jrec->token_seq, jgbl.mur_jrec_token_seq);
	} else
	{	/* t_end and tp_tend already has set token or jnl_seqno into jnl_fence_ctl.token */
		QWASSIGN(jrec->token_seq.token, jnl_fence_ctl.token);
	}
	JNL_WRITE_APPROPRIATE(csa, jpc, jfb->rectype, (jnl_record *)jrec, NULL, jfb);
}
Esempio n. 2
0
void repl_sort_tr_buff(uchar_ptr_t tr_buff, uint4 tr_bufflen)
{
	boolean_t		already_sorted, is_set_kill_zkill_ztrig_ztworm, sorting_needed;
	uchar_ptr_t		tb, dst_addr, this_jrec_addr, working_record_addr, next_record_addr, reg_top;
	static uchar_ptr_t	private_tr_buff;
	static reg_jrec_info_t	*reg_jrec_info_array;
	static uint4		private_tr_bufflen = 0, max_participants = 0;
	struct_jrec_tcom	*last_tcom_rec_ptr;
	enum jnl_record_type	rectype;
	int			balanced, tlen;
	uint4			num_records, cur_rec_idx, reg_idx, reclen, cur_updnum, max_updnum = 0, prev_updnum = 0;
	uint4			working_record, copy_len, idx, min_updnum_reg, min_val, next_min_val, this_reg_updnum;
	uint4			participants;
#	ifdef DEBUG
	uint4			tmp_sum, tcom_num = 0, prev_updnum_this_reg;
#	endif
	jnl_record		*rec;
	jrec_prefix		*prefix;
	long			first_tcom_offset = 0;

	tb = tr_buff;
	tlen = tr_bufflen;
	assert(0 != tr_bufflen);
	assert(0 == ((UINTPTR_T)tb % SIZEOF(uint4)));
	prefix = (jrec_prefix *)tb;
	rectype = (enum jnl_record_type)prefix->jrec_type;
	assert(!IS_ZTP(rectype));
	if (prefix->forwptr == tlen)
	{	/* there is only one journal record in this buffer. Make sure it is either JRT_SET/JRT_KILL/JRT_NULL */
		assert((JRT_SET == rectype) || (JRT_KILL == rectype) || (JRT_ZKILL == rectype) || (JRT_NULL == rectype));
		/* No sorting needed. */
		return;
	} else /* We have a TP transaction buffer */
	{
		if (!IS_TUPD(rectype))
		{
			assert(FALSE);
			return;
		}
		/* We should have at least one TCOM record at the end. The check for balanced TSET/TCOM pairs will be done below */
		last_tcom_rec_ptr = (struct_jrec_tcom *)(tb + tlen - SIZEOF(struct_jrec_tcom));
		prefix = (jrec_prefix *)(last_tcom_rec_ptr);
		participants = last_tcom_rec_ptr->num_participants;
		if (JRT_TCOM != prefix->jrec_type)
		{
			assert(FALSE);
			return;
		}
	}
	PRO_ONLY(
		/* A single region TP transaction is always sorted. So, for pro, return without addition sorting */
		if (1 == participants)
			return;
	)
Esempio n. 3
0
/* This called for TP and non-TP, but not for ZTP */
void	jnl_write_logical(sgmnt_addrs *csa, jnl_format_buffer *jfb, uint4 com_csum, jnlpool_write_ctx_t *jplctx)
{
	struct_jrec_upd		*jrec;
	struct_jrec_null	*jrec_null;
	struct_jrec_upd		*jrec_alt;
	jnl_private_control	*jpc;
	/* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes
	 * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record
	 * to the journal buffer or journal file but write it only to the journal pool from where it gets
	 * sent across to the update process that does not care about these fields so it is ok to leave them as is.
	 */
	jpc = csa->jnl;
	assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa));
	assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa));
	assert(csa->now_crit);
	assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(jfb->rectype) || (JRT_NULL == jfb->rectype));
	assert(!IS_ZTP(jfb->rectype));
	jrec = (struct_jrec_upd *)jfb->buff;
	assert(OFFSETOF(struct_jrec_null, prefix) == OFFSETOF(struct_jrec_upd, prefix));
	assert(SIZEOF(jrec_null->prefix) == SIZEOF(jrec->prefix));
	jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr;
	jrec->prefix.tn = csa->ti->curr_tn;
	jrec->prefix.time = jgbl.gbl_jrec_time;
	/* t_end/tp_tend/mur_output_record has already set token/jnl_seqno into jnl_fence_ctl.token */
	assert((0 != jnl_fence_ctl.token) || (!dollar_tlevel && !jgbl.forw_phase_recovery && !REPL_ENABLED(csa))
		|| (!dollar_tlevel && jgbl.forw_phase_recovery && (repl_open != csa->hdr->intrpt_recov_repl_state)));
	assert(OFFSETOF(struct_jrec_null, jnl_seqno) == OFFSETOF(struct_jrec_upd, token_seq));
	assert(SIZEOF(jrec_null->jnl_seqno) == SIZEOF(jrec->token_seq));
	jrec->token_seq.token = jnl_fence_ctl.token;
	assert(OFFSETOF(struct_jrec_null, strm_seqno) == OFFSETOF(struct_jrec_upd, strm_seqno));
	assert(SIZEOF(jrec_null->strm_seqno) == SIZEOF(jrec->strm_seqno));
	jrec->strm_seqno = jnl_fence_ctl.strm_seqno;
	/* update checksum below */
	if(JRT_NULL != jrec->prefix.jrec_type)
	{
		COMPUTE_LOGICAL_REC_CHECKSUM(jfb->checksum, jrec, com_csum, jrec->prefix.checksum);
	} else
		jrec->prefix.checksum = compute_checksum(INIT_CHECKSUM_SEED, (unsigned char *)jrec, SIZEOF(struct_jrec_null));
	if (REPL_ALLOWED(csa) && USES_ANY_KEY(csa->hdr))
	{
		jrec_alt = (struct_jrec_upd *)jfb->alt_buff;
		jrec_alt->prefix = jrec->prefix;
		jrec_alt->token_seq = jrec->token_seq;
		jrec_alt->strm_seqno = jrec->strm_seqno;
		jrec_alt->num_participants = jrec->num_participants;
	}
	JNL_WRITE_APPROPRIATE(csa, jpc, jfb->rectype, (jnl_record *)jrec, NULL, jfb, jplctx);
}
Esempio n. 4
0
	struct_jrec_null	*jrec_null;
	GTMCRYPT_ONLY(
		struct_jrec_upd	*jrec_alt;
	)
	jnl_private_control	*jpc;
	/* If REPL_WAS_ENABLED(csa) is TRUE, then we would not have gone through the code that initializes
	 * jgbl.gbl_jrec_time or jpc->pini_addr. But in this case, we are not writing the journal record
	 * to the journal buffer or journal file but write it only to the journal pool from where it gets
	 * sent across to the update process that does not care about these fields so it is ok to leave them as is.
	 */
	jpc = csa->jnl;
	assert((0 != jpc->pini_addr) || REPL_WAS_ENABLED(csa));
	assert(jgbl.gbl_jrec_time || REPL_WAS_ENABLED(csa));
	assert(csa->now_crit);
	assert(IS_SET_KILL_ZKILL_ZTRIG_ZTWORM(jfb->rectype) || (JRT_NULL == jfb->rectype));
	assert(!IS_ZTP(jfb->rectype));
	jrec = (struct_jrec_upd *)jfb->buff;
	assert(OFFSETOF(struct_jrec_null, prefix) == OFFSETOF(struct_jrec_upd, prefix));
	assert(SIZEOF(jrec_null->prefix) == SIZEOF(jrec->prefix));
	jrec->prefix.pini_addr = (0 == jpc->pini_addr) ? JNL_HDR_LEN : jpc->pini_addr;
	jrec->prefix.tn = csa->ti->curr_tn;
	jrec->prefix.time = jgbl.gbl_jrec_time;
	/* t_end/tp_tend/mur_output_record has already set token/jnl_seqno into jnl_fence_ctl.token */
	assert((0 != jnl_fence_ctl.token) || (!dollar_tlevel && !jgbl.forw_phase_recovery && !REPL_ENABLED(csa))
		|| (!dollar_tlevel && jgbl.forw_phase_recovery && (repl_open != csa->hdr->intrpt_recov_repl_state)));
	assert(OFFSETOF(struct_jrec_null, jnl_seqno) == OFFSETOF(struct_jrec_upd, token_seq));
	assert(SIZEOF(jrec_null->jnl_seqno) == SIZEOF(jrec->token_seq));
	jrec->token_seq.token = jnl_fence_ctl.token;
	assert(OFFSETOF(struct_jrec_null, strm_seqno) == OFFSETOF(struct_jrec_upd, strm_seqno));
	assert(SIZEOF(jrec_null->strm_seqno) == SIZEOF(jrec->strm_seqno));
	jrec->strm_seqno = jnl_fence_ctl.strm_seqno;
Esempio n. 5
0
/* This routine formats and outputs journal extract records
   corresponding to M SET, KILL, ZKILL, TSTART, ZTSTART, and ZTRIGGER commands, $ZTRIGGER function (LGTRIG) and $ZTWORMHOLE */
void	mur_extract_set(jnl_ctl_list *jctl, fi_type *fi, jnl_record *rec, pini_list_struct *plst)
{
	enum jnl_record_type	rectype;
	int			max_blen, actual, extract_len, val_extr_len, val_len;
	char			*val_ptr, *ptr, *buff;
	jnl_string		*keystr;
	boolean_t		do_format2zwr, is_ztstart;

	if (!mur_options.detail)
		extract_len = 0;
	else
		EXT_DET_COMMON_PREFIX(jctl);
	rectype = (enum jnl_record_type)rec->prefix.jrec_type;
	if (IS_FUPD_TUPD(rectype))
	{
		if (!mur_options.detail)
		{
			if (IS_TUPD(rectype))
			{
				EXT2BYTES(&muext_code[MUEXT_TSTART][0]); /* TSTART */
				is_ztstart = FALSE;
			} else /* if (IS_FUPD(rectype)) */
			{
				EXT2BYTES(&muext_code[MUEXT_ZTSTART][0]); /* ZTSTART */
				is_ztstart = TRUE;
			}
		} else
		{
			if (IS_TUPD(rectype))
			{
				strcpy(murgbl.extr_buff + extract_len, "TSTART \\");
				is_ztstart = FALSE;
			} else /* if (IS_FUPD(rectype)) */
			{
				strcpy(murgbl.extr_buff + extract_len, "ZTSTART\\");
				is_ztstart = TRUE;
			}
			extract_len = STRLEN(murgbl.extr_buff);
		}
		EXTTIME(rec->prefix.time);
		EXTQW(rec->prefix.tn);
		if (mur_options.detail)
			EXTINT(rec->prefix.checksum);
		EXTPID(plst);
		EXTQW(rec->jrec_set_kill.token_seq.jnl_seqno);
		if (!is_ztstart)
			EXT_STRM_SEQNO(rec->jrec_set_kill.strm_seqno);
		jnlext_write(fi, murgbl.extr_buff, extract_len);
	}
	/* Output the SET or KILL or ZKILL or ZTWORMHOLE or LGTRIG or ZTRIG record */
	if (!mur_options.detail)
	{
		extract_len = 0;
		if (IS_SET(rectype))
		{
			EXT2BYTES(&muext_code[MUEXT_SET][0]);
		} else if (IS_KILL(rectype))
		{
			EXT2BYTES(&muext_code[MUEXT_KILL][0]);
		} else if (IS_ZKILL(rectype))
		{
			EXT2BYTES(&muext_code[MUEXT_ZKILL][0]);
		} else if (IS_ZTWORM(rectype))
		{
			EXT2BYTES(&muext_code[MUEXT_ZTWORM][0]);
		} else if (IS_LGTRIG(rectype))
		{
			EXT2BYTES(&muext_code[MUEXT_LGTRIG][0]);
		} else if (IS_ZTRIG(rectype))
		{
			EXT2BYTES(&muext_code[MUEXT_ZTRIG][0]);
		} else
			assert(FALSE);	/* The assert will disappear in pro but not the ";" to properly terminate the else */
	} else
	{
		if (IS_FUPD_TUPD(rectype))
		{
			memcpy(murgbl.extr_buff, "                       ", 23);
			extract_len = 23;
		} else
			extract_len = STRLEN(murgbl.extr_buff);
		strcpy(murgbl.extr_buff + extract_len, "       \\");
		memcpy(murgbl.extr_buff + extract_len, jrt_label[rectype], LAB_LEN);
		extract_len += LAB_LEN;
		memcpy(murgbl.extr_buff + extract_len, LAB_TERM, LAB_TERM_SZ);
		extract_len += LAB_TERM_SZ;
	}
	EXTTIME(rec->prefix.time);
	EXTQW(rec->prefix.tn);
	if (mur_options.detail)
		EXTINT(rec->prefix.checksum);
	EXTPID(plst);
	if (IS_ZTP(rectype))
	{
		EXTQW(rec->jrec_set_kill.token_seq.token);
	} else
		EXTQW(rec->jrec_set_kill.token_seq.jnl_seqno);
	assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype));
	assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_ztworm.strm_seqno);
	assert(&rec->jrec_set_kill.strm_seqno == &rec->jrec_lgtrig.strm_seqno);
	EXT_STRM_SEQNO(rec->jrec_set_kill.strm_seqno);
	assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num);
	assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num);
	EXTINT(rec->jrec_set_kill.update_num);
	do_format2zwr = FALSE;
	if (IS_SET_KILL_ZKILL_ZTRIG(rectype))
	{
		keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node;
		EXTINT(keystr->nodeflags);
		buff = &murgbl.extr_buff[extract_len];
		max_blen = MIN(MAX_ZWR_KEY_SZ, murgbl.max_extr_record_length - extract_len);
		assert(MAX_ZWR_KEY_SZ == max_blen);	/* We allocated enough for key and data expansion for ZWR format */
		ptr = (char *)format_targ_key((uchar_ptr_t)buff, max_blen, gv_currkey, TRUE);
		assert(NULL != ptr);
		if (NULL != ptr)
			extract_len += (int)(ptr - &murgbl.extr_buff[extract_len]);
		if (IS_SET(rectype))
		{
			murgbl.extr_buff[extract_len++] = '=';
			val_ptr = &keystr->text[keystr->length];
			GET_MSTR_LEN(val_len, val_ptr);
			val_ptr += SIZEOF(mstr_len_t);
			do_format2zwr = TRUE;
		}
	} else if (IS_ZTWORM(rectype) || IS_LGTRIG(rectype))
	{
		assert(&rec->jrec_ztworm.ztworm_str == &rec->jrec_lgtrig.lgtrig_str);
		keystr = (jnl_string *)&rec->jrec_ztworm.ztworm_str;
		val_len = keystr->length;
		val_ptr = &keystr->text[0];
		do_format2zwr = TRUE;
	}
	if (do_format2zwr)
	{
		if (ZWR_EXP_RATIO(val_len) <= murgbl.max_extr_record_length - extract_len)
		{
			ptr = &murgbl.extr_buff[extract_len];
			format2zwr((sm_uc_ptr_t)val_ptr, val_len, (unsigned char *)ptr, &val_extr_len);
			extract_len += val_extr_len;
		} else
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(9) ERR_JNLBADRECFMT,
				3, jctl->jnl_fn_len, jctl->jnl_fn, jctl->rec_offset,
				ERR_TEXT, 2, LEN_AND_LIT("Length of the record is too high for zwr format"));
			if (mur_options.verbose || mur_options.detail)
			{
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_MUINFOUINT4, 4,
					LEN_AND_LIT("After max expansion record length"),
					ZWR_EXP_RATIO(val_len), ZWR_EXP_RATIO(val_len));
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_MUINFOUINT4, 4, LEN_AND_LIT("Buffer size"),
					murgbl.max_extr_record_length - extract_len,
					murgbl.max_extr_record_length - extract_len);
			}
			assert(FALSE);
		}
	}
	murgbl.extr_buff[extract_len++] = '\\';
	jnlext_write(fi, murgbl.extr_buff, extract_len);
}
Esempio n. 6
0
int jnl_v11tov15(uchar_ptr_t jnl_buff, uint4 *jnl_len, uchar_ptr_t conv_buff, uint4 *conv_len, uint4 conv_bufsiz)
{
	/* Convert a transaction from jnl version 11 (V4.2-002) to 15 (V.4.4-002)  */

	unsigned char		*jb, *cb, *cstart, *jstart, rectype;
	int			status, reclen;
	unsigned short		key_len;
	unsigned int		long_data_len, jlen, total_data, nzeros, conv_reclen, clen_without_sfx, total_key;
	jrec_prefix		prefix;
	jrec_suffix		suffix;
	seq_num			jsno;

	jb = jnl_buff;
	cb = conv_buff;
	status = SS_NORMAL;
	jlen = *jnl_len;
	while (0 < jlen)
	{
		if (0 < (reclen = v11_jnl_record_length((jnl_record *)jb, jlen)))
		{
			if (reclen <= jlen)
			{
				rectype = REF_CHAR(jb + V11_JREC_TYPE_OFFSET);
				total_key = total_data = 0;
				assert(IS_REPLICATED(rectype));
				if (IS_ZTP(rectype))
					GTMASSERT;	/* ZTP not supported */
				if (IS_SET_KILL_ZKILL(rectype))
				{
					GET_USHORT(key_len, jb + V11_JREC_PREFIX_SIZE + v11_jnl_fixed_size[rectype]);
					total_key = key_len + sizeof(unsigned short);
					if (IS_SET(rectype))
					{
						GET_MSTR_LEN(long_data_len, jb + V11_JREC_PREFIX_SIZE +
								   v11_jnl_fixed_size[rectype] + total_key);
						total_data = long_data_len + sizeof(mstr_len_t);
					}
					conv_reclen = JREC_PREFIX_SIZE + FIXED_UPD_RECLEN +
						total_key + total_data + JREC_SUFFIX_SIZE;
					conv_reclen = ROUND_UP2(conv_reclen, JNL_REC_START_BNDRY);
				} else if (IS_COM(rectype))
					conv_reclen = JREC_PREFIX_SIZE + TCOM_RECLEN + JREC_SUFFIX_SIZE;
				clen_without_sfx = conv_reclen - JREC_SUFFIX_SIZE;
				if (cb - conv_buff + conv_reclen > conv_bufsiz)
				{
					repl_errno = EREPL_INTLFILTER_NOSPC;
					status = -1;
					break;
				}
				cstart = cb;
				jstart = jb;
				prefix.jrec_type = rectype;
				suffix.backptr = prefix.forwptr = conv_reclen;
				prefix.pini_addr = 0;
				prefix.time = 0;
				prefix.tn = 0;
				suffix.suffix_code = JNL_REC_SUFFIX_CODE;
				memcpy(cb, (unsigned char*)&prefix, JREC_PREFIX_SIZE);
				cb += JREC_PREFIX_SIZE;
				memcpy(cb, jb + V11_JREC_PREFIX_SIZE + V11_JNL_SEQNO_OFFSET, sizeof(seq_num));
				cb += sizeof(seq_num);
				if (IS_SET_KILL_ZKILL(rectype))
				{
					PUT_JNL_STR_LEN(cb, key_len);
					jb += (V11_JREC_PREFIX_SIZE + V11_MUMPS_NODE_OFFSET + sizeof(unsigned short));
					if (IS_FENCED(rectype))
						jb += TP_TOKEN_TID_SIZE;
					cb += sizeof(jnl_str_len_t);
					memcpy(cb, jb, key_len);
					cb += key_len;
					jb += key_len;
					if (IS_SET(rectype))
					{
						PUT_MSTR_LEN(cb, long_data_len);
						cb += sizeof(mstr_len_t);
						jb += sizeof(mstr_len_t);
						memcpy(cb, jb, long_data_len);
						cb += long_data_len;
					}
				} else if (IS_COM(rectype))
				{
					assert(JRT_TCOM == rectype);
					memset(cb, 0, TID_STR_SIZE);
					cb += TID_STR_SIZE;
					memcpy(cb, jb + V11_JREC_PREFIX_SIZE + V11_TCOM_PARTICIPANTS_OFFSET, sizeof(uint4));
					cb += sizeof(uint4);
				} else
					assert(FALSE);
				nzeros = (cstart + clen_without_sfx - cb);
				if (nzeros > 0)
				{
					memset(cb, 0, nzeros);
					cb += nzeros;
				}
				jb = jstart + reclen;
				memcpy(cb, (unsigned char*)&suffix, JREC_SUFFIX_SIZE);
				cb += JREC_SUFFIX_SIZE;
				assert(cb == cstart + conv_reclen);
				jlen -= reclen;
				continue;
			}
			repl_errno = EREPL_INTLFILTER_INCMPLREC;
			status = -1;
			break;
		}
		repl_errno = EREPL_INTLFILTER_BADREC;
		status = -1;
		break;
	}
	assert(0 == jlen || -1 == status);
	*jnl_len = jb - jnl_buff;
	*conv_len = cb - conv_buff;
	return(status);
}
Esempio n. 7
0
void	jnl_format(jnl_format_buffer *jfb)
{
	enum jnl_record_type	rectype;
	sgmnt_addrs		*csa;
	uint4			align_fill_size, jrec_size, tmp_jrec_size;
	int			subcode;
	jnl_action		*ja;
	char			*local_buffer;
	jnl_str_len_t		keystrlen;
	mstr_len_t		valstrlen;

	csa = &FILE_INFO(gv_cur_region)->s_addrs;
	if (jnl_fence_ctl.level == 0 && dollar_tlevel == 0)
	{
		/* Non-TP */
		subcode = 0;
		tmp_jrec_size = FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE;
	} else
	{
		if (NULL == csa->next_fenced)
		{
			/* F (or T) */
			subcode = 1;
			csa->next_fenced = jnl_fence_ctl.fence_list;
			jnl_fence_ctl.fence_list = csa;
		} else
			/* G (or U) */
			subcode = 3;
		if (0 != dollar_tlevel)
		{
			/* TP */
			++subcode;
			tmp_jrec_size = FIXED_UPD_RECLEN + JREC_SUFFIX_SIZE;
		} else
			tmp_jrec_size = FIXED_ZTP_UPD_RECLEN + JREC_SUFFIX_SIZE;
	}
	ja = &(jfb->ja);
	rectype = jnl_opcode[ja->operation][subcode];
	assert(rectype > JRT_BAD && rectype < JRT_RECTYPES);
	assert(IS_SET_KILL_ZKILL(rectype));
	/* Compute actual record length */
	assert(NULL != ja->key);
	keystrlen = ja->key->end;
	tmp_jrec_size += keystrlen + sizeof(jnl_str_len_t);
	if (JNL_SET == ja->operation)
	{
		assert(NULL != ja->val);
		valstrlen = ja->val->str.len;
		tmp_jrec_size += valstrlen + sizeof(mstr_len_t);
	}
	jrec_size = ROUND_UP2(tmp_jrec_size, JNL_REC_START_BNDRY);
	align_fill_size = jrec_size - tmp_jrec_size; /* For JNL_REC_START_BNDRY alignment */
	if (dollar_tlevel)
	{
		assert((1 << JFB_ELE_SIZE_IN_BITS) == JNL_REC_START_BNDRY);
		assert(JFB_ELE_SIZE == JNL_REC_START_BNDRY);
		jfb->buff = (char *)get_new_element(sgm_info_ptr->format_buff_list, jrec_size >> JFB_ELE_SIZE_IN_BITS);
		/* assume an align record will be written while computing maximum jnl-rec size requirements */
		sgm_info_ptr->total_jnl_rec_size += jrec_size + MIN_ALIGN_RECLEN;
	}
	/* else if (0 == dollar_tlevel) jfb->buff already malloced in gvcst_init */
	jfb->record_size = jrec_size;
	jfb->rectype = rectype;
	/* PREFIX */
	((jrec_prefix *)jfb->buff)->jrec_type = rectype;
	((jrec_prefix *)jfb->buff)->forwptr = jrec_size;
	if (IS_ZTP(rectype))
		local_buffer = jfb->buff + FIXED_ZTP_UPD_RECLEN;
	else
		local_buffer = jfb->buff + FIXED_UPD_RECLEN;
	*(jnl_str_len_t *)local_buffer = keystrlen; /* direct assignment for already aligned address */
	local_buffer += sizeof(jnl_str_len_t);
	memcpy(local_buffer, (uchar_ptr_t)ja->key->base, keystrlen);
	local_buffer += keystrlen;
	if (JNL_SET == ja->operation)
	{
		PUT_MSTR_LEN(local_buffer, valstrlen); /* SET command's data may not be aligned */
		local_buffer +=  sizeof(jnl_str_len_t);
		memcpy(local_buffer, (uchar_ptr_t)ja->val->str.addr, valstrlen);
		local_buffer += valstrlen;
	}
	if (0 != align_fill_size)
	{
		memset(local_buffer, 0, align_fill_size);
		local_buffer += align_fill_size;
	}
	assert(0 == ((uint4)local_buffer % sizeof(jrec_suffix)));
	/* SUFFIX */
	((jrec_suffix *)local_buffer)->backptr = jrec_size;
	((jrec_suffix *)local_buffer)->suffix_code = JNL_REC_SUFFIX_CODE;
}
Esempio n. 8
0
/* This routine is called only for recover and rollback (that is, mur_options.update).
 * It applies the set/kill/zkill, tcom, inctn, and aimg records during forward processing.
 * Some fields like jnl_seqno, rec_seqno and prefix.time are saved here from original journal files.
 * Later jnl_write routines copies them to journal records instead of generating them like the runtime system */
uint4	mur_output_record(reg_ctl_list *rctl)
{
	mval			mv;
	jnl_record		*rec;
	char			*val_ptr;
	int			strm_num;
	uint4			dummy;
	off_jnl_t		pini_addr;
	jnl_string		*keystr;
	enum jnl_record_type 	rectype;
	uint4			jnl_status, status;
	pini_list_struct	*plst;
	boolean_t		jnl_enabled, was_crit;
	struct_jrec_null	null_record;
	gd_region		*reg;
	seq_num			strm_seqno;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	jnl_ctl_list		*jctl;
	jnl_format_buffer	*ztworm_jfb;
	blk_hdr_ptr_t		aimg_blk_ptr;
	int			in_len, gtmcrypt_errno;
	boolean_t		use_new_key;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(mur_options.update);
	rec = rctl->mur_desc->jnlrec;
	rectype = (enum jnl_record_type)rec->prefix.jrec_type;
	switch (rectype)
	{
		case JRT_ALIGN:
		case JRT_EOF:
		case JRT_EPOCH:
		case JRT_PBLK:
		case JRT_PINI:
		case JRT_TRUNC:
			return SS_NORMAL;
			break;
		default:
			break;
	}
	jgbl.gbl_jrec_time = rec->prefix.time;
	pini_addr = rec->prefix.pini_addr;
	reg = rctl->gd;
	jctl = rctl->jctl;
	assert(jctl->reg_ctl == rctl);
	assert(gv_cur_region == reg);
	csa = rctl->csa;
	assert(cs_addrs == csa);
	csd = csa->hdr;
	assert(cs_data == csd);
	jnl_enabled = JNL_ENABLED(csa);
	if (jnl_enabled)
	{
		status = mur_get_pini(jctl, pini_addr, &plst);
		if (SS_NORMAL != status)
			return status;
		prc_vec = &plst->jpv;
		csa->jnl->pini_addr = plst->new_pini_addr;
		rctl->mur_plst = plst;
	}
	if (mur_options.rollback && IS_REPLICATED(rectype))
	{
		jgbl.mur_jrec_seqno = GET_JNL_SEQNO(rec);
		if (jgbl.mur_jrec_seqno >= murgbl.consist_jnl_seqno)
		{
			assert(murgbl.losttn_seqno >= (jgbl.mur_jrec_seqno + 1));
			murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno + 1;
		}
		jgbl.mur_jrec_strm_seqno = GET_STRM_SEQNO(rec);
		strm_seqno = jgbl.mur_jrec_strm_seqno;
		if (strm_seqno)
		{	/* maintain csd->strm_reg_seqno */
			strm_num = GET_STRM_INDEX(strm_seqno);
			strm_seqno = GET_STRM_SEQ60(strm_seqno);
			assert(csd->strm_reg_seqno[strm_num] <= (strm_seqno + 1));
			csd->strm_reg_seqno[strm_num] = strm_seqno + 1;
		}
	}
	/* Assert that TREF(gd_targ_gvnh_reg) is NULL for every update that journal recovery/rollback plays forward;
	 * This is necessary to ensure every update is played in only the database file where the journal record is seen
	 * instead of across all regions that span the particular global reference. For example if ^a(1) spans db files
	 * a.dat and b.dat, and a KILL ^a(1) is done at the user level, we would see KILL ^a(1) journal records in a.mjl
	 * and b.mjl. When journal recovery processes the journal record in a.mjl, it should do the kill only in a.dat
	 * When it gets to the same journal record in b.mjl, it would do the same kill in b.dat and effectively complete
	 * the user level KILL ^a(1). If instead recovery does the KILL across all spanned regions, we would be basically
	 * doing duplicate work let alone do it out-of-order since recovery goes region by region for the most part.
	 */
	assert(NULL == TREF(gd_targ_gvnh_reg));
	if (IS_SET_KILL_ZKILL_ZTRIG(rectype))
	{	/* TP and non-TP has same format */
		keystr = (jnl_string *)&rec->jrec_set_kill.mumps_node;
		if (jnl_enabled)
		{
			MUR_SET_JNL_FENCE_CTL_TOKEN(rec->jrec_set_kill.token_seq.token, rctl);
			jnl_fence_ctl.strm_seqno = rec->jrec_set_kill.strm_seqno;
			jgbl.tp_ztp_jnl_upd_num = rec->jrec_set_kill.update_num;
			DEBUG_ONLY(jgbl.max_tp_ztp_jnl_upd_num = MAX(jgbl.max_tp_ztp_jnl_upd_num, jgbl.tp_ztp_jnl_upd_num);)
			jgbl.mur_jrec_nodeflags = keystr->nodeflags;
		}
		if (IS_FENCED(rectype))
		{	/* Even for FENCE_NONE we apply fences. Otherwise an [F/G/T/U]UPD becomes UPD etc. */
			/* op_tstart is called in "mur_forward_play_cur_jrec" already */
			if (IS_FUPD(rectype))
			{
				jnl_fence_ctl.level = 1;
				if (jnl_enabled)
				{
					jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END;
					csa->next_fenced = NULL;
				}
			} else if (IS_GUPD(rectype))
			{
				jnl_fence_ctl.level = 1;
				if (jnl_enabled)
				{
					jnl_fence_ctl.fence_list = csa;
					csa->next_fenced = JNL_FENCE_LIST_END;
				}
			} else if (IS_TP(rectype))
				tp_set_sgm();
		}
#		ifdef GTM_TRIGGER
		/* Check if ^#t and if so need to increment trigger cycle in file header. Note that the below 'if' check could cause
		 * csd->db_trigger_cycle to be incremented even for the region that actually did NOT get any trigger updates. This
		 * is because some of the ^#t subscripts (like ^#t(#TNAME)) go to the DEFAULT region. So, even though a trigger was
		 * loaded only for ^a (corresponding to AREG), csd->db_trigger_cycle will be incremented for DEFAULT region as well.
		 * To avoid this, the below check should be modified to set csa->incr_db_trigger_cycle only if the ^#t subscript
		 * does not begin with '#' (similar to what is done in UPD_GV_BIND_NAME_APPROPRIATE). However, since journal
		 * recovery operates in standalone mode, the db_trigger_cycle increment to DEFAULT region should be okay since it
		 * will NOT cause any restarts
		 */
		if (IS_GVKEY_HASHT_GBLNAME(keystr->length, keystr->text))
		{
			assert(cs_addrs == csa);
			csa->incr_db_trigger_cycle = TRUE;
		}
#		endif
		if (IS_SET(rectype))
		{
			val_ptr = &keystr->text[keystr->length];
			GET_MSTR_LEN(mv.str.len, val_ptr);
			mv.str.addr = val_ptr + SIZEOF(mstr_len_t);
			mv.mvtype = MV_STR;
			op_gvput(&mv);
		} else if (IS_KILL(rectype))
		{
			if (IS_TP(rectype))
				tp_set_sgm();
			op_gvkill();
#		ifdef GTM_TRIGGER
		} else if (IS_ZTRIG(rectype))
		{
			if (IS_TP(rectype))
				tp_set_sgm();
			op_ztrigger();
#		endif
		} else
		{
			assert(IS_ZKILL(rectype));
			if (IS_TP(rectype))
				tp_set_sgm();
			op_gvzwithdraw();
		}
		if (IS_ZTP(rectype))
		{	/* Even for FENCE_NONE we apply fences. Otherwise an FUPD/GUPD becomes UPD etc. */
			assert(jnl_enabled || (JNL_FENCE_LIST_END == jnl_fence_ctl.fence_list && NULL == csa->next_fenced));
			jnl_fence_ctl.level = 0;
			if (jnl_enabled)
			{
				jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END;
				csa->next_fenced = NULL;
			}
		}
		return SS_NORMAL;
	}