Beispiel #1
0
boolean_t	gvcst_query(void)
{	/* Similar to gvcst_order and gvcst_zprevious. In each case we skip over hidden subscripts as needed.
	 *
	 *     1  2  3  NULL                           <--- order/zprev...
	 *     1  2  3  NULL  NULL
	 *     1  2  3  NULL  NULL  NULL                                <--- query from here...
	 *     1  2  3  NULL  NULL  NULL  hidden
	 *     1  2  3  NULL  NULL  hidden
	 *     1  2  3  NULL  hidden
	 *     1  2  3  hidden                         <--- ... skip this guy and go to bottom/top, respectively
	 *     1  2  3  7                                               <--- ... needs to end up here
	 */
	boolean_t	found, is_hidden, sn_tpwrapped;
	boolean_t	est_first_pass;
	gv_key		save_currkey[DBKEYALLOC(MAX_KEY_SZ)];
	int		i;
	int		save_dollar_tlevel;

	DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel);
	found = gvcst_query2();
#	ifdef UNIX
	assert(save_dollar_tlevel == dollar_tlevel);
	CHECK_HIDDEN_SUBSCRIPT_AND_RETURN(found, gv_altkey, is_hidden);
	IF_SN_DISALLOWED_AND_NO_SPAN_IN_DB(return found);
	assert(found && is_hidden);
	SAVE_GV_CURRKEY(save_currkey);
	if (!dollar_tlevel)
	{
		sn_tpwrapped = TRUE;
		op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0);
		ESTABLISH_NORET(gvcst_query_ch, est_first_pass);
		GVCST_ROOT_SEARCH_AND_PREP(est_first_pass);
	} else
		sn_tpwrapped = FALSE;
	for (i = 0; i <= MAX_GVSUBSCRIPTS; i++)
	{
		INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, (gtm_uint64_t) -1);
		found = gvcst_query2();
		CHECK_HIDDEN_SUBSCRIPT_AND_BREAK(found, gv_altkey, is_hidden);
		assert(found && is_hidden);
		/* Replace last subscript to be the highest possible hidden subscript so another
		 * gvcst_query2 will give us the next non-hidden subscript.
		 */
		REPLACE_HIDDEN_SUB_TO_HIGHEST(gv_altkey, gv_currkey);	/* uses gv_altkey to modify gv_currkey */
	}
	if (sn_tpwrapped)
	{
		op_tcommit();
		REVERT; /* remove our condition handler */
	}
	RESTORE_GV_CURRKEY(save_currkey);
	assert(save_dollar_tlevel == dollar_tlevel);
#	endif
	return found;
}
boolean_t	gvcst_order(void)
{	/* See gvcst_query.c */
	boolean_t	found, is_hidden, sn_tpwrapped;
	boolean_t	est_first_pass;
	gv_key		save_currkey[DBKEYALLOC(MAX_KEY_SZ)];
	int		end, prev, oldend;
	int		save_dollar_tlevel;

	DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel);
	found = gvcst_order2();
#	ifdef UNIX
	assert(save_dollar_tlevel == dollar_tlevel);
	CHECK_HIDDEN_SUBSCRIPT_AND_RETURN(found, gv_altkey, is_hidden);
	assert(found && is_hidden);
	IF_SN_DISALLOWED_AND_NO_SPAN_IN_DB(return found);
	SAVE_GV_CURRKEY_LAST_SUBSCRIPT(save_currkey, prev, oldend);
	if (!dollar_tlevel)
	{
		sn_tpwrapped = TRUE;
		op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0);
		ESTABLISH_NORET(gvcst_order_ch, est_first_pass);
		GVCST_ROOT_SEARCH_AND_PREP(est_first_pass);
		INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, (gtm_uint64_t) -1);
		found = gvcst_order2();
	} else
		sn_tpwrapped = FALSE;
	if (found)
	{
		CHECK_HIDDEN_SUBSCRIPT(gv_altkey, is_hidden);
		if (is_hidden)
		{	/* Replace last subscript to be the highest possible hidden subscript so another
			 * gvcst_order2 will give us the next non-hidden subscript.
			 */
			REPLACE_HIDDEN_SUB_TO_HIGHEST(gv_altkey, gv_currkey);	/* uses gv_altkey to modify gv_currkey */
			/* fix up since it should only be externally counted as one $order */
			INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_order, (gtm_uint64_t) -1);
			found = gvcst_order2();
		}
	}
	if (sn_tpwrapped)
	{
		op_tcommit();
		REVERT; /* remove our condition handler */
	}
	RESTORE_GV_CURRKEY_LAST_SUBSCRIPT(save_currkey, prev, oldend);
	assert(save_dollar_tlevel == dollar_tlevel);
#	endif
	return found;
}
Beispiel #3
0
boolean_t gvcst_queryget(mval *val)
{
	bool		found, is_hidden, is_dummy = FALSE, sn_tpwrapped;
	boolean_t	est_first_pass;
	char		save_currkey[SIZEOF(gv_key) + DBKEYSIZE(MAX_KEY_SZ)];
	gv_key		*save_gv_currkey;
	int		save_dollar_tlevel;

	DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel);
	found = gvcst_queryget2(val, NULL);
#	ifdef UNIX
	assert(save_dollar_tlevel == dollar_tlevel);
	CHECK_HIDDEN_SUBSCRIPT(gv_altkey, is_hidden);
	if (found && IS_SN_DUMMY(val->str.len, val->str.addr))
		is_dummy = TRUE;
	if (!found || (!is_dummy && !is_hidden))
		return found;
	IF_SN_DISALLOWED_AND_NO_SPAN_IN_DB(return found);
	SAVE_GV_CURRKEY;
	if (!dollar_tlevel)
	{
		sn_tpwrapped = TRUE;
		op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0);
		ESTABLISH_NORET(gvcst_queryget_ch, est_first_pass);
		GVCST_ROOT_SEARCH_AND_PREP(est_first_pass);
	} else
		sn_tpwrapped = FALSE;
	found = gvcst_query();
	COPY_KEY(gv_currkey, gv_altkey); /* set gv_currkey to gv_altkey */
	found = gvcst_get(val);
	INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_get, (gtm_uint64_t) -1); /* only counted externally as one get */
	INCR_GVSTATS_COUNTER(cs_addrs, cs_addrs->nl, n_query, (gtm_uint64_t) -1);
	if (sn_tpwrapped)
	{
		op_tcommit();
		REVERT; /* remove our condition handler */
	}
	RESTORE_GV_CURRKEY;
	assert(save_dollar_tlevel == dollar_tlevel);
#	endif
	return found;
}
Beispiel #4
0
/* Upgrade ^#t global in "reg" region */
void	trigger_upgrade(gd_region *reg)
{
	boolean_t		est_first_pass, do_upgrade, is_defined;
	boolean_t		was_null = FALSE, is_null = FALSE;
	int			seq_num, trig_seq_num;
	int			currlabel;
	mval			tmpmval, xecuteimval, *gvname, *tmpmv, *tmpmv2;
	int4			result, tmpint4;
	uint4			curend, gvname_prev, xecute_curend;
	uint4			hash_code, kill_hash_code;
	int			count, i, xecutei, tncount;
	char			*trigname, *trigindex, *ptr;
	char			name_and_index[MAX_MIDENT_LEN + 1 + MAX_DIGITS_IN_INT];
	char			trigvn[MAX_MIDENT_LEN + 1 + MAX_DIGITS_IN_INT], nullbyte[1];
	uint4			trigname_len, name_index_len;
	int			ilen;
	sgmnt_addrs		*csa;
	jnl_private_control	*jpc;
	uint4			sts;
	int			close_res;
	hash128_state_t		hash_state, kill_hash_state;
	uint4			hash_totlen, kill_hash_totlen;
	int			trig_protected_mval_push_count;
#	ifdef DEBUG
	int			save_dollar_tlevel;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(gv_cur_region == reg);
	assert(!dollar_tlevel);	/* caller should have ensured this. this is needed as otherwise things get complicated. */
	assert(!is_replicator);	/* caller should have ensured this. this is needed so we dont bump jnl_seqno (if replicating) */
	csa = &FILE_INFO(reg)->s_addrs;
	assert(csa->hdr->hasht_upgrade_needed);
	/* If before-image journaling is turned on in this region (does not matter if replication is turned on or not),
	 * once this transaction is done, we need to switch to new journal file and cut the back link because
	 * otherwise it is possible for backward journal recovery (or rollback) or source server to encounter
	 * the journal records generated in this ^#t-upgrade-transaction in which case they dont know to handle
	 * it properly (e.g. rollback or backward recovery does not know to restore csa->hdr->hasht_upgrade_needed
	 * if it rolls back this transaction). To achieve this, we set hold_onto_crit to TRUE and do the jnl link
	 * cut AFTER the transaction commits but before anyone else can sneak in to do any more updates.
	 * Since most often we expect databases to be journaled, we do this hold_onto_crit even for the non-journaled case.
	 */
	grab_crit(reg);
	csa->hold_onto_crit = TRUE;
	DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel);
	assert(!donot_INVOKE_MUMTSTART);
	DEBUG_ONLY(donot_INVOKE_MUMTSTART = TRUE);
	op_tstart(IMPLICIT_TSTART, TRUE, &literal_batch, 0); /* 0 ==> save no locals but RESTART OK */
	ESTABLISH_NORET(trigger_upgrade_ch, est_first_pass);
	/* On a TP restart anywhere down below, this line is where the restart resumes execution from */
	assert(donot_INVOKE_MUMTSTART);	/* Make sure still set for every try/retry of TP transaction */
	change_reg(); /* TP_CHANGE_REG wont work as we need to set sgm_info_ptr */
	assert(NULL != cs_addrs);
	assert(csa == cs_addrs);
	SET_GVTARGET_TO_HASHT_GBL(csa);	/* sets up gv_target */
	assert(NULL != gv_target);
	INITIAL_HASHT_ROOT_SEARCH_IF_NEEDED;	/* Needed to do every retry in case restart was due to an online rollback.
						 * This also sets up gv_currkey */
	/* Do actual upgrade of ^#t global.
	 *
	 * Below is a sample layout of the label 2 ^#t global
	 * -------------------------------------------------------
	 * ^#t("#TNAME","x")="a"_$C(0)_"1"		(present in DEFAULT only)
	 * ^#t("#TRHASH",89771515,1)="a"_$C(0)_"1"	(present in DEFAULT only)
	 * ^#t("#TRHASH",106937755,1)="a"_$C(0)_"1"	(present in DEFAULT only)
	 * ^#t("a",1,"BHASH")="106937755"
	 * ^#t("a",1,"CHSET")="M"
	 * ^#t("a",1,"CMD")="S"
	 * ^#t("a",1,"LHASH")="89771515"
	 * ^#t("a",1,"TRIGNAME")="x#"
	 * ^#t("a",1,"XECUTE")=" do ^twork"
	 * ^#t("a","#COUNT")="1"
	 * ^#t("a","#CYCLE")="1"
	 * ^#t("a","#LABEL")="2"
	 *
	 * Below is a sample layout of the label 3 ^#t global
	 * -------------------------------------------------------
	 * ^#t("#LABEL")="3"				(present only after upgrade, not regular trigger load)
	 * ^#t("#TNAME","x")="a"_$C(0)_"1"		(present in CURRENT region)
	 * ^#t("a",1,"BHASH")="71945627"
	 * ^#t("a",1,"CHSET")="M"
	 * ^#t("a",1,"CMD")="S"
	 * ^#t("a",1,"LHASH")="71945627"
	 * ^#t("a",1,"TRIGNAME")="x#"
	 * ^#t("a",1,"XECUTE")=" do ^twork"
	 * ^#t("a","#COUNT")="1"
	 * ^#t("a","#CYCLE")="2"
	 * ^#t("a","#LABEL")="3"
	 * ^#t("a","#TRHASH",71945627,1)="a"_$C(0)_"1"
	 *
	 * Key aspects of the format change
	 * ----------------------------------
	 * 1) New ^#t("#LABEL")="3" to indicate the format of the ^#t global. This is in addition to
	 * 	^#t("a","#LABEL") etc. which is already there. This way we have a #LABEL for not just the installed
	 * 	triggers but also for the name information stored in the #TNAME nodes.
	 * 2) In the BHASH and LHASH fields. The hash computation is different so there are more chances of BHASH and LHASH
	 * 	matching in which case we store only one #TRHASH entry (instead of two). So thre is fewer ^#t records in the new
	 * 	format in most cases.
	 * 3) ^#t("a","#LABEL") bumps from 2 to 3. Similarly ^#t("a","#CYCLE") bumps by one (to make sure triggers for this
	 *	global get re-read if and when we implement an -ONLINE upgrade).
	 * 4) DEFAULT used to have ^#t("#TNAME",...) nodes corresponding to triggers across ALL regions in the gbldir and
	 * 	other regions used to have NO ^#t("#TNAME",...) nodes whereas after the upgrade every region have
	 *	^#t("#TNAME",...) nodes	corresponding to triggers installed in that region. So it is safer to kill ^#t("#TNAME")
	 *	nodes and add them as needed.
	 * 5) #TRHASH has moved from ^#t() to ^#t(<gbl>). So it is safer to kill ^#t("#TRHASH")	nodes and add them as needed.
	 *
	 * Below is a sample layout of the label 4 ^#t global
	 * -------------------------------------------------------
	 * ^#t("#TNAME","x")="a"_$C(0)_"1"		(present in CURRENT region)
	 * ^#t("a",1,"BHASH")="71945627"
	 * ^#t("a",1,"CHSET")="M"
	 * ^#t("a",1,"CMD")="S"
	 * ^#t("a",1,"LHASH")="71945627"
	 * ^#t("a",1,"TRIGNAME")="x#"
	 * ^#t("a",1,"XECUTE")=" do ^twork"
	 * ^#t("a","#COUNT")="1"
	 * ^#t("a","#CYCLE")="2"
	 * ^#t("a","#LABEL")="4"
	 * ^#t("a","#TRHASH",71945627,1)="a"_$C(0)_"1"
	 *
	 * Key aspects of the format change
	 * ----------------------------------
	 * 1) Removed ^#t("#LABEL") as it is redundant information and trigger load does not include it
	 * 2) Multiline triggers were incorrectly processed resulting in incorrect BHASH and LHASH values. Upgrade fixes this
	 * 3) ^#t("a","#LABEL") bumps from 3 to 4. Similarly ^#t("a","#CYCLE") bumps by one (to make sure
	 * 	triggers for this global get re-read if and when we implement an -ONLINE upgrade).
	 */
	tmpmv = &tmpmval;	/* At all points maintain this relationship. The two are used interchangeably below */
	if (gv_target->root)
		do_upgrade = TRUE;
	/* The below logic assumes ^#t global does not have any integrity errors */
	assert(do_upgrade);	/* caller should have not invoked us otherwise */
	if (do_upgrade)
	{	/* kill ^#t("#TRHASH"), ^#t("#TNAME") and ^#t("#LABEL") first. Regenerate each again as we process ^#t(<gbl>,...) */
		csa->incr_db_trigger_cycle = TRUE; /* so that we increment csd->db_trigger_cycle at commit time.
							 * this forces concurrent processes to read upgraded triggers.
							 */
		if (JNL_WRITE_LOGICAL_RECS(csa))
		{	/* Note that the ^#t upgrade is a physical layout change. But it has no logical change (i.e. users
			 * see the same MUPIP TRIGGER -SELECT output as before). So write only a dummy LGTRIG journal
			 * record for this operation. Hence write a string that starts with a trigger comment character ";".
			 */
			assert(!gv_cur_region->read_only);
			jnl_format(JNL_LGTRIG, NULL, (mval *)&literal_trigjnlrec, 0);
		}
		/* KILL ^#t("#LABEL") unconditionally */
		BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHLABEL, STRLEN(LITERAL_HASHLABEL));
		if (0 != gvcst_data())
			gvcst_kill(TRUE);
		/* KILL ^#t("#TNAME") unconditionally and regenerate */
		BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHTNAME, STRLEN(LITERAL_HASHTNAME));
		if (0 != gvcst_data())
			gvcst_kill(TRUE);
		/* KILL ^#t("#TRHASH") unconditionally and regenerate */
		BUILD_HASHT_SUB_CURRKEY(LITERAL_HASHTRHASH, STRLEN(LITERAL_HASHTRHASH));
		if (0 != gvcst_data())
			gvcst_kill(TRUE);
		/* Loop through all global names for which ^#t(<gvn>) exists. The only first-level subscripts of ^#t starting
		 * with # are #TNAME and #TRHASH in collation order. So after #TRHASH we expect to find subscripts that are
		 * global names. Hence the HASHTRHASH code is placed AFTER the HASHTNAME code above.
		 */
		TREF(gd_targ_gvnh_reg) = NULL;	/* needed so op_gvorder below goes through gvcst_order (i.e. focuses only
						 * on the current region) and NOT through gvcst_spr_order (which does not
						 * apply anyways in the case of ^#t).
						 */
		nullbyte[0] = '\0';
		trig_protected_mval_push_count = 0;
		INCR_AND_PUSH_MV_STENT(gvname); /* Protect gvname from garbage collection */
		do
		{
			op_gvorder(gvname);
			if (0 == gvname->str.len)
				break;
			assert(ARRAYSIZE(trigvn) > gvname->str.len);
			memcpy(&trigvn[0], gvname->str.addr, gvname->str.len);
			gvname->str.addr = &trigvn[0];	/* point away from stringpool to avoid stp_gcol issues */
			/* Save gv_currkey->prev so it is restored before next call to op_gvorder (which cares about this field).
			 * gv_currkey->prev gets tampered with in the for loop below (e.g. BUILD_HASHT_SUB_CURRKEY macro).
			 * No need to do this for gv_currkey->end since the body of the for loop takes care of restoring it.
			 */
			gvname_prev = gv_currkey->prev;
			BUILD_HASHT_SUB_CURRKEY(gvname->str.addr, gvname->str.len);
			/* At this point, gv_currkey is ^#t(<gvn>) */
			/* Increment ^#t(<gvn>,"#CYCLE") */
			is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_hashcycle, tmpmv);
			assert(is_defined);
			tmpint4 = mval2i(tmpmv);
			tmpint4++;
			i2mval(tmpmv, tmpint4);
			gvtr_set_hasht_gblsubs((mval *)&literal_hashcycle, tmpmv);
			/* Read ^#t(<gvn>,"#COUNT") */
			is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_hashcount, tmpmv);
			if (is_defined)
			{
				tmpint4 = mval2i(tmpmv);
				count = tmpint4;
				/* Get ^#t(<gvn>,"#LABEL"), error out for invalid values. Upgrade disallowed for label 1 triggers */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_hashlabel, tmpmv);
				assert(is_defined);
				currlabel = mval2i(tmpmv);
				if ((V19_HASHT_GBL_LABEL_INT >= currlabel) || (HASHT_GBL_CURLABEL_INT <= currlabel))
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_TRIGUPBADLABEL, 6, currlabel,
							HASHT_GBL_CURLABEL_INT, gvname->str.len, gvname->str.addr,
							REG_LEN_STR(reg));
				/* Set ^#t(<gvn>,"#LABEL")=HASHT_GBL_CURLABEL */
				gvtr_set_hasht_gblsubs((mval *)&literal_hashlabel, (mval *)&literal_curlabel);
			} else
				count = 0;
			/* Kill ^#t(<gvn>,"#TRHASH") unconditionally and regenerate */
			gvtr_kill_hasht_gblsubs((mval *)&literal_hashtrhash, TRUE);
			/* At this point, gv_currkey is ^#t(<gvn>) */
			for (i = 1; i <= count; i++)
			{
				/* At this point, gv_currkey is ^#t(<gvn>) */
				curend = gv_currkey->end; /* note gv_currkey->end before changing it so we can restore it later */
				assert(KEY_DELIMITER == gv_currkey->base[curend]);
				assert(gv_target->gd_csa == cs_addrs);
				i2mval(tmpmv, i);
				COPY_SUBS_TO_GVCURRKEY(tmpmv, gv_cur_region, gv_currkey, was_null, is_null);
				/* At this point, gv_currkey is ^#t(<gvn>,i) */
				/* Compute new LHASH and BHASH hash values.
				 *	LHASH uses : GVSUBS,                        XECUTE
				 *	BHASH uses : GVSUBS, DELIM, ZDELIM, PIECES, XECUTE
				 * So reach each of these pieces and compute hash along the way.
				 */
				STR_PHASH_INIT(hash_state, hash_totlen);
				STR_PHASH_PROCESS(hash_state, hash_totlen, gvname->str.addr, gvname->str.len);
				STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1);
				/* Read in ^#t(<gvn>,i,"GVSUBS") */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_gvsubs, tmpmv);
				if (is_defined)
				{
					STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len);
					STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1);
				}
				/* Copy over SET hash state (2-tuple <state,totlen>) to KILL hash state before adding
				 * the PIECES, DELIM, ZDELIM portions (those are only part of the SET hash).
				 */
				kill_hash_state = hash_state;
				kill_hash_totlen = hash_totlen;
				/* Read in ^#t(<gvn>,i,"PIECES") */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_pieces, tmpmv);
				if (is_defined)
				{
					STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len);
					STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1);
				}
				/* Read in ^#t(<gvn>,i,"DELIM") */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_delim, tmpmv);
				if (is_defined)
				{
					STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len);
					STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1);
				}
				/* Read in ^#t(<gvn>,i,"ZDELIM") */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_zdelim, tmpmv);
				if (is_defined)
				{
					STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len);
					STR_PHASH_PROCESS(hash_state, hash_totlen, nullbyte, 1);
				}
				/* Read in ^#t(<gvn>,i,"XECUTE").
				 * Note: The XECUTE portion of the trigger definition is used in SET and KILL hash.
				 * But since we have started maintaining "hash_state" and "kill_hash_state" separately
				 * (due to PIECES, DELIM, ZDELIM) we need to update the hash for both using same input string.
				 */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_xecute, tmpmv);
				if (is_defined)
				{
					STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len);
					STR_PHASH_PROCESS(kill_hash_state, kill_hash_totlen, tmpmval.str.addr, tmpmval.str.len);
				} else
				{	/* Multi-record XECUTE string */
					/* At this point, gv_currkey is ^#t(<gvn>,i) */
					xecute_curend = gv_currkey->end; /* note gv_currkey->end so we can restore it later */
					assert(KEY_DELIMITER == gv_currkey->base[xecute_curend]);
					tmpmv2 = (mval *)&literal_xecute;
					COPY_SUBS_TO_GVCURRKEY(tmpmv2, gv_cur_region, gv_currkey, was_null, is_null);
					xecutei = 1;
					do
					{
						i2mval(&xecuteimval, xecutei);
						is_defined = gvtr_get_hasht_gblsubs(&xecuteimval, tmpmv);
						if (!is_defined)
							break;
						STR_PHASH_PROCESS(hash_state, hash_totlen, tmpmval.str.addr, tmpmval.str.len);
						STR_PHASH_PROCESS(kill_hash_state, kill_hash_totlen,
									tmpmval.str.addr, tmpmval.str.len);
						xecutei++;
					} while (TRUE);
					/* Restore gv_currkey to ^#t(<gvn>,i) */
					gv_currkey->end = xecute_curend;
					gv_currkey->base[xecute_curend] = KEY_DELIMITER;
				}
				STR_PHASH_RESULT(hash_state, hash_totlen, hash_code);
				STR_PHASH_RESULT(kill_hash_state, kill_hash_totlen, kill_hash_code);
				/* Set ^#t(<gvn>,i,"LHASH") */
				MV_FORCE_UMVAL(tmpmv, kill_hash_code);
				gvtr_set_hasht_gblsubs((mval *)&literal_lhash, tmpmv);
				/* Set ^#t(<gvn>,i,"BHASH") */
				MV_FORCE_UMVAL(tmpmv, hash_code);
				gvtr_set_hasht_gblsubs((mval *)&literal_bhash, tmpmv);
				/* Read in ^#t(<gvn>,i,"TRIGNAME") to determine if #SEQNUM/#TNCOUNT needs to be maintained */
				is_defined = gvtr_get_hasht_gblsubs((mval *)&literal_trigname, tmpmv);
				assert(is_defined);
				assert('#' == tmpmval.str.addr[tmpmval.str.len - 1]);
				tmpmval.str.len--;
				if ((tmpmval.str.len <= ARRAYSIZE(name_and_index)) &&
						(NULL != (ptr = memchr(tmpmval.str.addr, '#', tmpmval.str.len))))
				{	/* Auto-generated name. Need to maintain #SEQNUM/#TNCOUNT */
					/* Take copy of trigger name into non-stringpool location to avoid stp_gcol issues */
					trigname_len = ptr - tmpmval.str.addr;
					ptr++;
					name_index_len = (tmpmval.str.addr + tmpmval.str.len) - ptr;
					assert(ARRAYSIZE(name_and_index) >= (trigname_len + 1 + name_index_len));
					trigname = &name_and_index[0];
					trigindex = ptr;
					memcpy(trigname, tmpmval.str.addr, tmpmval.str.len);
					A2I(ptr, ptr + name_index_len, trig_seq_num);
					/* At this point, gv_currkey is ^#t(<gvn>,i) */
					/* $get(^#t("#TNAME",<trigger name>,"#SEQNUM")) */
					BUILD_HASHT_SUB_SUB_SUB_CURRKEY(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME),
						trigname, trigname_len, LITERAL_HASHSEQNUM, STR_LIT_LEN(LITERAL_HASHSEQNUM));
					seq_num = gvcst_get(tmpmv) ? mval2i(tmpmv) : 0;
					if (trig_seq_num > seq_num)
					{	/* Set ^#t("#TNAME",<trigger name>,"#SEQNUM") = trig_seq_num */
						SET_TRIGGER_GLOBAL_SUB_SUB_SUB_STR(LITERAL_HASHTNAME,
							STR_LIT_LEN(LITERAL_HASHTNAME), trigname, trigname_len,
							LITERAL_HASHSEQNUM, STR_LIT_LEN(LITERAL_HASHSEQNUM),
							trigindex, name_index_len, result);
						assert(PUT_SUCCESS == result);
					}
					/* set ^#t("#TNAME",<trigger name>,"#TNCOUNT")++ */
					BUILD_HASHT_SUB_SUB_SUB_CURRKEY(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME),
						trigname, trigname_len, LITERAL_HASHTNCOUNT, STR_LIT_LEN(LITERAL_HASHTNCOUNT));
					tncount = gvcst_get(tmpmv) ? mval2i(tmpmv) + 1 : 1;
					i2mval(tmpmv, tncount);
					SET_TRIGGER_GLOBAL_SUB_SUB_SUB_MVAL(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME),
						trigname, trigname_len, LITERAL_HASHTNCOUNT, STR_LIT_LEN(LITERAL_HASHTNCOUNT),
						tmpmval, result);
					trigname_len += 1 + name_index_len; /* in preparation for ^#t("#TNAME") set below */
					assert(PUT_SUCCESS == result);
					BUILD_HASHT_SUB_CURRKEY(gvname->str.addr, gvname->str.len);
					/* At this point, gv_currkey is ^#t(<gvn>) */
				} else
				{
					/* Take copy of trigger name into non-stringpool location to avoid stp_gcol issues */
					trigname = &name_and_index[0];  /* in preparation for ^#t("#TNAME") set below */
					trigname_len = MIN(tmpmval.str.len, ARRAYSIZE(name_and_index));
					assert(ARRAYSIZE(name_and_index) >= trigname_len);
					memcpy(trigname, tmpmval.str.addr, trigname_len);
					/* Restore gv_currkey to what it was at beginning of for loop iteration */
					gv_currkey->end = curend;
					gv_currkey->base[curend] = KEY_DELIMITER;
				}
				/* At this point, gv_currkey is ^#t(<gvn>) */
				if (kill_hash_code != hash_code)
					gvtr_set_hashtrhash(gvname->str.addr, gvname->str.len, kill_hash_code, i);
				/* Set ^#t(<gvn>,"#TRHASH",hash_code,i) */
				gvtr_set_hashtrhash(gvname->str.addr, gvname->str.len, hash_code, i);
				/* Set ^#t("#TNAME",<trigname>)=<gvn>_$c(0)_<trigindx> */
				/* The upgrade assumes that the region does not contain two triggers with the same name.
				 * V62000 and before could potentially have this out of design case. Once implemented
				 * the trigger integrity check will warn users of this edge case */
				ptr = &trigvn[gvname->str.len];
				*ptr++ = '\0';
				ilen = 0;
				I2A(ptr, ilen, i);
				ptr += ilen;
				assert(ptr <= ARRAYTOP(trigvn));
				SET_TRIGGER_GLOBAL_SUB_SUB_STR(LITERAL_HASHTNAME, STR_LIT_LEN(LITERAL_HASHTNAME),
					trigname, trigname_len, trigvn, ptr - gvname->str.addr, result);
				assert(PUT_SUCCESS == result);
				BUILD_HASHT_SUB_CURRKEY(gvname->str.addr, gvname->str.len);
				/* At this point, gv_currkey is ^#t(<gvn>) */
			}
			/* At this point, gv_currkey is ^#t(<gvn>) i.e. gv_currkey->end is correct but gv_currkey->prev
			 * might have been tampered with. Restore it to proper value first.
			 */
			 gv_currkey->prev = gvname_prev;
			gvname->mvtype = 0; /* can now be garbage collected in the next iteration */
		} while (TRUE);
	}
	op_tcommit();
	REVERT; /* remove our condition handler */
	DEBUG_ONLY(donot_INVOKE_MUMTSTART = FALSE;)
	if (csa->hold_onto_crit)
Beispiel #5
0
int4 gds_rundown(void)
{
	boolean_t		canceled_dbsync_timer, canceled_flush_timer, ok_to_write_pfin;
	boolean_t		have_standalone_access, ipc_deleted, err_caught;
	boolean_t		is_cur_process_ss_initiator, remove_shm, vermismatch, we_are_last_user, we_are_last_writer, is_mm;
	boolean_t		unsafe_last_writer;
	char			time_str[CTIME_BEFORE_NL + 2]; /* for GET_CUR_TIME macro */
	gd_region		*reg;
	int			save_errno, status, rc;
	int4			semval, ftok_semval, sopcnt, ftok_sopcnt;
	short			crash_count;
	sm_long_t		munmap_len;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	node_local_ptr_t	cnl;
	struct shmid_ds		shm_buf;
	struct sembuf		sop[2], ftok_sop[2];
	uint4           	jnl_status;
	unix_db_info		*udi;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	shm_snapshot_t		*ss_shm_ptr;
	uint4			ss_pid, onln_rlbk_pid, holder_pid;
	boolean_t		was_crit;
	boolean_t		safe_mode; /* Do not flush or take down shared memory. */
	boolean_t		bypassed_ftok = FALSE, bypassed_access = FALSE, may_bypass_ftok, inst_is_frozen,
				ftok_counter_halted,
				access_counter_halted;
	int			secshrstat;
	intrpt_state_t		prev_intrpt_state;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	jnl_status = 0;
	reg = gv_cur_region;			/* Local copy */

	/* early out for cluster regions
	 * to avoid tripping the assert below.
	 * Note:
	 *	This early out is consistent with VMS.  It has been
	 *	noted that all of the gtcm assignments
	 *      to gv_cur_region should use the TP_CHANGE_REG
	 *	macro.  This would also avoid the assert problem
	 *	and should be done eventually.
	 */
	if (dba_cm == reg->dyn.addr->acc_meth)
		return EXIT_NRM;

	udi = FILE_INFO(reg);
	csa = &udi->s_addrs;
	csd = csa->hdr;
	assert(csa == cs_addrs && csd == cs_data);
	if ((reg->open) && (dba_usr == csd->acc_meth))
	{
		change_reg();
		gvusr_rundown();
		return EXIT_NRM;
	}
	/* If the process has standalone access, it has udi->grabbed_access_sem set to TRUE at this point. Note that down in a local
	 * variable as the udi->grabbed_access_sem is set to TRUE even for non-standalone access below and hence we can't rely on
	 * that later to determine if the process had standalone access or not when it entered this function.  We need to guarantee
	 * that none else access database file header when semid/shmid fields are reset.  We already have created ftok semaphore in
	 * db_init or, mu_rndwn_file and did not remove it.  So just lock it. We do it in blocking mode.
	 */
	have_standalone_access = udi->grabbed_access_sem; /* process holds standalone access */
	DEFER_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state);
	ESTABLISH_NORET(gds_rundown_ch, err_caught);
	if (err_caught)
	{
		REVERT;
		WITH_CH(gds_rundown_ch, gds_rundown_err_cleanup(have_standalone_access), 0);
		ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state);
		DEBUG_ONLY(ok_to_UNWIND_in_exit_handling = FALSE);
		return EXIT_ERR;
	}
	assert(reg->open);			/* if we failed to open, dbinit_ch should have taken care of proper clean up */
	assert(!reg->opening);			/* see comment above */
	assert((dba_bg == csd->acc_meth) || (dba_mm == csd->acc_meth));
	is_mm = (dba_bg != csd->acc_meth);
	assert(!csa->hold_onto_crit || (csa->now_crit && jgbl.onlnrlbk));
	/* If we are online rollback, we should already be holding crit and should release it only at the end of this module. This
	 * is usually done by noting down csa->now_crit in a local variable (was_crit) and using it whenever we are about to
	 * grab_crit. But, there are instances (like mupip_set_journal.c) where we grab_crit but invoke gds_rundown without any
	 * preceeding rel_crit. Such code relies on the fact that gds_rundown does rel_crit unconditionally (to get locks to a known
	 * state). So, augment csa->now_crit with jgbl.onlnrlbk to track if we can rel_crit unconditionally or not in gds_rundown.
	 */
	was_crit = (csa->now_crit && jgbl.onlnrlbk);
	/* Cancel any pending flush timer for this region by this task */
	canceled_flush_timer = FALSE;
	canceled_dbsync_timer = FALSE;
	CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer);
	we_are_last_user = FALSE;
	inst_is_frozen = IS_REPL_INST_FROZEN && REPL_ALLOWED(csa->hdr);
	if (!csa->persistent_freeze)
		region_freeze(reg, FALSE, FALSE, FALSE);
	if (!was_crit)
	{
		rel_crit(reg);		/* get locks to known state */
		mutex_cleanup(reg);
	}
	/* The only process that can invoke gds_rundown while holding access control semaphore is RECOVER/ROLLBACK. All the others
	 * (like MUPIP SET -FILE/MUPIP EXTEND would have invoked db_ipcs_reset() before invoking gds_rundown (from
	 * mupip_exit_handler). The only exception is when these processes encounter a terminate signal and they reach
	 * mupip_exit_handler while holding access control semaphore. Assert accordingly.
	 */
	assert(!have_standalone_access || mupip_jnl_recover || process_exiting);
	/* If we have standalone access, then ensure that a concurrent online rollback cannot be running at the same time as it
	 * needs the access control lock as well. The only expection is we are online rollback and currently running down.
	 */
	cnl = csa->nl;
	onln_rlbk_pid = cnl->onln_rlbk_pid;
	assert(!have_standalone_access || mupip_jnl_recover || !onln_rlbk_pid || !is_proc_alive(onln_rlbk_pid, 0));
	if (!have_standalone_access)
	{
		if (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))) /* Check # of procs counted on FTOK */
		{
			save_errno = errno;
			assert(FALSE);
			rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5,
				  RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno);
		}
		may_bypass_ftok = CAN_BYPASS(ftok_semval, csd, inst_is_frozen); /* Do we need a blocking wait? */
		/* We need to guarantee that no one else access database file header when semid/shmid fields are reset.
		 * We already have created ftok semaphore in db_init or mu_rndwn_file and did not remove it. So just lock it.
		 */
		if (!ftok_sem_lock(reg, may_bypass_ftok))
		{
			if (may_bypass_ftok)
			{	/* We did a non-blocking wait. It's ok to proceed without locking */
				bypassed_ftok = TRUE;
				holder_pid = semctl(udi->ftok_semid, DB_CONTROL_SEM, GETPID);
				if ((uint4)-1 == holder_pid)
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
							ERR_SYSCALL, 5,
							RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"),
							CALLFROM, errno);
				if (!IS_GTM_IMAGE) /* MUMPS processes should not flood syslog with bypass messages. */
				{
					send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10,
						 LEN_AND_STR(gtmImageNames[image_type].imageName), process_id, LEN_AND_LIT("FTOK"),
						 REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid);
					send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2,
							LEN_AND_LIT("FTOK bypassed at rundown"));
				}
			} else
			{	/* We did a blocking wait but something bad happened. */
				FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_lock, process_id);
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
			}
		}
		sop[0].sem_num = DB_CONTROL_SEM; sop[0].sem_op = 0;	/* Wait for 0 */
		sop[1].sem_num = DB_CONTROL_SEM; sop[1].sem_op = 1;	/* Lock */
		sopcnt = 2;
		sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO | IPC_NOWAIT; /* Don't wait the first time thru */
		SEMOP(udi->semid, sop, sopcnt, status, NO_WAIT);
		if (0 != status)
		{
			save_errno = errno;
			/* Check # of processes counted on access sem. */
			if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL)))
			{
				assert(FALSE);
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5,
					  RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno);
			}
			bypassed_access = CAN_BYPASS(semval, csd, inst_is_frozen) || onln_rlbk_pid || csd->file_corrupt;
			/* Before attempting again in the blocking mode, see if the holding process is an online rollback.
			 * If so, it is likely we won't get the access control semaphore anytime soon. In that case, we
			 * are better off skipping rundown and continuing with sanity cleanup and exit.
			 */
			holder_pid = semctl(udi->semid, DB_CONTROL_SEM, GETPID);
			if ((uint4)-1 == holder_pid)
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5,
					  RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get holder_pid"), CALLFROM, errno);
			if (!bypassed_access)
			{	/* We couldn't get it in one shot-- see if we already have it */
				if (holder_pid == process_id)
				{
					send_msg_csa(CSA_ARG(csa) VARLSTCNT(5) MAKE_MSG_INFO(ERR_CRITSEMFAIL), 2, DB_LEN_STR(reg),
							ERR_RNDWNSEMFAIL);
					REVERT;
					ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state);
					assert(FALSE);
					return EXIT_ERR;
				}
				if (EAGAIN != save_errno)
				{
					assert(FALSE);
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
							ERR_SYSCALL, 5,
							RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"),
							CALLFROM, save_errno);
				}
				sop[0].sem_flg = sop[1].sem_flg = SEM_UNDO;	/* Try again - blocking this time */
				SEMOP(udi->semid, sop, 2, status, FORCED_WAIT);
				if (-1 == status)			/* We couldn't get it at all.. */
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
							ERR_SYSCALL, 5,
							RTS_ERROR_TEXT("gds_rundown SEMOP on access control semaphore"),
							CALLFROM, errno);
			} else if (!IS_GTM_IMAGE)
			{
				send_msg_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_RESRCINTRLCKBYPAS, 10,
						LEN_AND_STR(gtmImageNames[image_type].imageName), process_id,
						LEN_AND_LIT("access control"), REG_LEN_STR(reg), DB_LEN_STR(reg), holder_pid);
				send_msg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_TEXT, 2,
						LEN_AND_LIT("Access control bypassed at rundown"));
			}
			udi->grabbed_access_sem = !bypassed_access;
		}
	} /* else we we hold the access control semaphore and therefore have standalone access. We do not release it now - we
	   * release it later in mupip_exit_handler.c. Since we already hold the access control semaphore, we don't need the
	   * ftok semaphore and trying it could cause deadlock
	   */
	/* Note that in the case of online rollback, "udi->grabbed_access_sem" (and in turn "have_standalone_access") is TRUE.
	 * But there could be other processes still having the database open so we cannot safely reset the halted fields.
	 */
	if (have_standalone_access && !jgbl.onlnrlbk)
		csd->ftok_counter_halted = csd->access_counter_halted = FALSE;
	ftok_counter_halted = csd->ftok_counter_halted;
	access_counter_halted = csd->access_counter_halted;
	/* If we bypassed any of the semaphores, activate safe mode.
	 * Also, if the replication instance is frozen and this db has replication turned on (which means
	 * no flushes of dirty buffers to this db can happen while the instance is frozen) activate safe mode.
	 */
	ok_to_write_pfin = !(bypassed_access || bypassed_ftok || inst_is_frozen);
	safe_mode = !ok_to_write_pfin || ftok_counter_halted || access_counter_halted;
	/* At this point we are guaranteed no one else is doing a db_init/rundown as we hold the access control semaphore */
	assert(csa->ref_cnt);	/* decrement private ref_cnt before shared ref_cnt decrement. */
	csa->ref_cnt--;		/* Currently journaling logic in gds_rundown() in VMS relies on this order to detect last writer */
	assert(!csa->ref_cnt);
	--cnl->ref_cnt;
	if (memcmp(cnl->now_running, gtm_release_name, gtm_release_name_len + 1))
	{	/* VERMISMATCH condition. Possible only if DSE */
		assert(dse_running);
		vermismatch = TRUE;
	} else
		vermismatch = FALSE;
	if (-1 == shmctl(udi->shmid, IPC_STAT, &shm_buf))
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5,
				RTS_ERROR_TEXT("gds_rundown shmctl"), CALLFROM, save_errno);
	} else
		we_are_last_user =  (1 == shm_buf.shm_nattch) && !vermismatch && !safe_mode;
	/* recover => one user except ONLINE ROLLBACK, or standalone with frozen instance */
	assert(!have_standalone_access || we_are_last_user || jgbl.onlnrlbk || inst_is_frozen);
	if (-1 == (semval = semctl(udi->semid, DB_COUNTER_SEM, GETVAL)))
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5,
			  RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get semval"), CALLFROM, errno);
	/* There's one writer left and I am it */
	assert(reg->read_only || semval >= 0);
	unsafe_last_writer = (DB_COUNTER_SEM_INCR == semval) && (FALSE == reg->read_only) && !vermismatch;
	we_are_last_writer = unsafe_last_writer && !safe_mode;
	assert(!we_are_last_writer || !safe_mode);
	assert(!we_are_last_user || !safe_mode);
	/* recover + R/W region => one writer except ONLINE ROLLBACK, or standalone with frozen instance, leading to safe_mode */
	assert(!(have_standalone_access && !reg->read_only) || we_are_last_writer || jgbl.onlnrlbk || inst_is_frozen);
	GTM_WHITE_BOX_TEST(WBTEST_ANTIFREEZE_JNLCLOSE, we_are_last_writer, 1); /* Assume we are the last writer to invoke wcs_flu */
	if (!have_standalone_access && (-1 == (ftok_semval = semctl(udi->ftok_semid, DB_COUNTER_SEM, GETVAL))))
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg), ERR_SYSCALL, 5,
			  RTS_ERROR_TEXT("gds_rundown SEMCTL failed to get ftok_semval"), CALLFROM, errno);
	if (NULL != csa->ss_ctx)
		ss_destroy_context(csa->ss_ctx);
	/* SS_MULTI: If multiple snapshots are supported, then we have to run through each of the snapshots */
	assert(1 == MAX_SNAPSHOTS);
	ss_shm_ptr = (shm_snapshot_ptr_t)SS_GETSTARTPTR(csa);
	ss_pid = ss_shm_ptr->ss_info.ss_pid;
	is_cur_process_ss_initiator = (process_id == ss_pid);
	if (ss_pid && (is_cur_process_ss_initiator || we_are_last_user))
	{
		/* Try getting snapshot crit latch. If we don't get latch, we won't hang for eternity and will skip
		 * doing the orphaned snapshot cleanup. It will be cleaned up eventually either by subsequent MUPIP
		 * INTEG or by a MUPIP RUNDOWN.
		 */
		if (ss_get_lock_nowait(reg) && (ss_pid == ss_shm_ptr->ss_info.ss_pid)
			&& (is_cur_process_ss_initiator || !is_proc_alive(ss_pid, 0)))
		{
			ss_release(NULL);
			ss_release_lock(reg);
		}
	}
	/* If cnl->donotflush_dbjnl is set, it means mupip recover/rollback was interrupted and therefore we need not flush
	 * shared memory contents to disk as they might be in an inconsistent state. Moreover, any more flushing will only cause
	 * future rollback to undo more journal records (PBLKs). In this case, we will go ahead and remove shared memory (without
	 * flushing the contents) in this routine. A reissue of the recover/rollback command will restore the database to a
	 * consistent state.
	 */
	if (!cnl->donotflush_dbjnl && !reg->read_only && !vermismatch)
	{	/* If we had an orphaned block and were interrupted, set wc_blocked so we can invoke wcs_recover. Do it ONLY
		 * if there is NO concurrent online rollback running (as we need crit to set wc_blocked)
		 */
		if (csa->wbuf_dqd && !is_mm)
		{	/* If we had an orphaned block and were interrupted, mupip_exit_handler will invoke secshr_db_clnup which
			 * will clear this field and so we should never come to gds_rundown with a non-zero wbuf_dqd. The only
			 * exception is if we are recover/rollback in which case gds_rundown (from mur_close_files) is invoked
			 * BEFORE secshr_db_clnup in mur_close_files.
			 * Note: It is NOT possible for online rollback to reach here with wbuf_dqd being non-zero. This is because
			 * the moment we apply the first PBLK, we stop all interrupts and hence can never be interrupted in
			 * wcs_wtstart or wcs_get_space. Assert accordingly.
			 */
			assert(mupip_jnl_recover && !jgbl.onlnrlbk && !safe_mode);
			if (!was_crit)
				grab_crit(reg);
			SET_TRACEABLE_VAR(cnl->wc_blocked, TRUE);
			BG_TRACE_PRO_ANY(csa, wcb_gds_rundown);
                        send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_WCBLOCKED, 6, LEN_AND_LIT("wcb_gds_rundown"),
                                process_id, &csa->ti->curr_tn, DB_LEN_STR(reg));
			csa->wbuf_dqd = 0;
			wcs_recover(reg);
			BG_TRACE_PRO_ANY(csa, lost_block_recovery);
			if (!was_crit)
				rel_crit(reg);
		}
		if (JNL_ENABLED(csd) && IS_GTCM_GNP_SERVER_IMAGE)
			originator_prc_vec = NULL;
		/* If we are the last writing user, then everything must be flushed */
		if (we_are_last_writer)
		{	/* Time to flush out all of our buffers */
			assert(!safe_mode);
			if (is_mm)
			{
				MM_DBFILEXT_REMAP_IF_NEEDED(csa, reg);
				cnl->remove_shm = TRUE;
			}
			if (cnl->wc_blocked && jgbl.onlnrlbk)
			{	/* if the last update done by online rollback was not committed in the normal code-path but was
				 * completed by secshr_db_clnup, wc_blocked will be set to TRUE. But, since online rollback never
				 * invokes grab_crit (since csa->hold_onto_crit is set to TRUE), wcs_recover is never invoked. This
				 * could result in the last update never getting flushed to the disk and if online rollback happened
				 * to be the last writer then the shared memory will be flushed and removed and the last update will
				 * be lost. So, force wcs_recover if we find ourselves in such a situation. But, wc_blocked is
				 * possible only if phase1 or phase2 errors are induced using white box test cases
				 */
				assert(WB_COMMIT_ERR_ENABLED);
				wcs_recover(reg);
			}
			/* Note WCSFLU_SYNC_EPOCH ensures the epoch is synced to the journal and indirectly
			 * also ensures that the db is fsynced. We don't want to use it in the calls to
			 * wcs_flu() from t_end() and tp_tend() since we can defer it to out-of-crit there.
			 * In this case, since we are running down, we don't have any such option.
			 */
			cnl->remove_shm = wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH);
			/* Since we_are_last_writer, we should be guaranteed that wcs_flu() did not change csd, (in
			 * case of MM for potential file extension), even if it did a grab_crit().  Therefore, make
			 * sure that's true.
			 */
			assert(csd == csa->hdr);
			assert(0 == memcmp(csd->label, GDS_LABEL, GDS_LABEL_SZ - 1));
		} else if (((canceled_flush_timer && (0 > cnl->wcs_timers)) || canceled_dbsync_timer) && !inst_is_frozen)
		{	/* canceled pending db or jnl flush timers - flush database and journal buffers to disk */
			if (!was_crit)
				grab_crit(reg);
			/* we need to sync the epoch as the fact that there is no active pending flush timer implies
			 * there will be noone else who will flush the dirty buffers and EPOCH to disk in a timely fashion
			 */
			wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_SYNC_EPOCH);
			if (!was_crit)
				rel_crit(reg);
			assert((dba_mm == cs_data->acc_meth) || (csd == cs_data));
			csd = cs_data;	/* In case this is MM and wcs_flu() remapped an extended database, reset csd */
		}
		/* Do rundown journal processing after buffer flushes since they require jnl to be open */
		if (JNL_ENABLED(csd))
		{	/* the following tp_change_reg() is not needed due to the assert csa == cs_addrs at the beginning
			 * of gds_rundown(), but just to be safe. To be removed by 2002!! --- nars -- 2001/04/25.
			 */
			tp_change_reg();	/* call this because jnl_ensure_open checks cs_addrs rather than gv_cur_region */
			jpc = csa->jnl;
			jbp = jpc->jnl_buff;
			if (jbp->fsync_in_prog_latch.u.parts.latch_pid == process_id)
                        {
                                assert(FALSE);
                                COMPSWAP_UNLOCK(&jbp->fsync_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0);
                        }
                        if (jbp->io_in_prog_latch.u.parts.latch_pid == process_id)
                        {
                                assert(FALSE);
                                COMPSWAP_UNLOCK(&jbp->io_in_prog_latch, process_id, 0, LOCK_AVAILABLE, 0);
                        }
			if ((((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc))
				|| we_are_last_writer && (0 != cnl->jnl_file.u.inode)) && ok_to_write_pfin)
			{	/* We need to close the journal file cleanly if we have the latest generation journal file open
				 *	or if we are the last writer and the journal file is open in shared memory (not necessarily
				 *	by ourselves e.g. the only process that opened the journal got shot abnormally)
				 * Note: we should not infer anything from the shared memory value of cnl->jnl_file.u.inode
				 * 	if we are not the last writer as it can be concurrently updated.
				 */
				if (!was_crit)
					grab_crit(reg);
				if (JNL_ENABLED(csd))
				{
					SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pini/pfin/jnl_file_close all need it */
					/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
					 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
					 * journal records (if it decides to switch to a new journal file).
					 */
					ADJUST_GBL_JREC_TIME(jgbl, jbp);
					jnl_status = jnl_ensure_open();
					if (0 == jnl_status)
					{	/* If we_are_last_writer, we would have already done a wcs_flu() which would
						 * have written an epoch record and we are guaranteed no further updates
						 * since we are the last writer. So, just close the journal.
						 * If the freeaddr == post_epoch_freeaddr, wcs_flu may have skipped writing
						 * a pini, so allow for that.
						 */
						assert(!jbp->before_images || is_mm
						    || !we_are_last_writer || (0 != jpc->pini_addr) || jgbl.mur_extract
						    || (jpc->jnl_buff->freeaddr == jpc->jnl_buff->post_epoch_freeaddr));
						/* If we haven't written a pini, let jnl_file_close write the pini/pfin. */
						if (!jgbl.mur_extract && (0 != jpc->pini_addr))
							jnl_put_jrt_pfin(csa);
						/* If not the last writer and no pending flush timer left, do jnl flush now */
						if (!we_are_last_writer && (0 > cnl->wcs_timers))
						{
							if (SS_NORMAL == (jnl_status = jnl_flush(reg)))
							{
								assert(jbp->freeaddr == jbp->dskaddr);
								jnl_fsync(reg, jbp->dskaddr);
								assert(jbp->fsync_dskaddr == jbp->dskaddr);
							} else
							{
								send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2,
									JNL_LEN_STR(csd), ERR_TEXT, 2,
									RTS_ERROR_TEXT("Error with journal flush in gds_rundown"),
									jnl_status);
								assert(NOJNL == jpc->channel);/* jnl file lost has been triggered */
								/* In this routine, all code that follows from here on does not
								 * assume anything about the journaling characteristics of this
								 * database so it is safe to continue execution even though
								 * journaling got closed in the middle.
								 */
							}
						}
						jnl_file_close(reg, we_are_last_writer, FALSE);
					} else
						send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd),
								DB_LEN_STR(reg));
				}
				if (!was_crit)
					rel_crit(reg);
			}
		}
		if (we_are_last_writer)			/* Flush the fileheader last and harden the file to disk */
		{
			if (!was_crit)
				grab_crit(reg);			/* To satisfy crit requirement in fileheader_sync() */
			memset(csd->machine_name, 0, MAX_MCNAMELEN); /* clear the machine_name field */
			if (!have_standalone_access && we_are_last_user)
			{	/* mupip_exit_handler will do this after mur_close_file */
				csd->semid = INVALID_SEMID;
				csd->shmid = INVALID_SHMID;
				csd->gt_sem_ctime.ctime = 0;
				csd->gt_shm_ctime.ctime = 0;
			}
			fileheader_sync(reg);
			if (!was_crit)
				rel_crit(reg);
			if (!is_mm)
			{
				GTM_DB_FSYNC(csa, udi->fd, rc);		/* Sync it all */
				if (-1 == rc)
				{
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
						  ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno);
				}
			} else
			{	/* Now do final MM file sync before exit */
				assert(csa->ti->total_blks == csa->total_blks);
				#ifdef _AIX
				GTM_DB_FSYNC(csa, udi->fd, rc);
				if (-1 == rc)
				#else
				if (-1 == MSYNC((caddr_t)csa->db_addrs[0], (caddr_t)csa->db_addrs[1]))
				#endif
				{
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
						  ERR_TEXT, 2, RTS_ERROR_TEXT("Error during file sync at close"), errno);
				}
			}
		} else if (unsafe_last_writer && !cnl->lastwriterbypas_msg_issued)
		{
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_LASTWRITERBYPAS, 2, DB_LEN_STR(reg));
			cnl->lastwriterbypas_msg_issued = TRUE;
		}
	} /* end if (!reg->read_only && !cnl->donotflush_dbjnl) */
	/* We had canceled all db timers at start of rundown. In case as part of rundown (wcs_flu above), we had started
	 * any timers, cancel them BEFORE setting reg->open to FALSE (assert in wcs_clean_dbsync relies on this).
	 */
	CANCEL_DB_TIMERS(reg, csa, canceled_flush_timer, canceled_dbsync_timer);
	if (reg->read_only && we_are_last_user && !have_standalone_access && cnl->remove_shm)
	{	/* mupip_exit_handler will do this after mur_close_file */
		db_ipcs.semid = INVALID_SEMID;
		db_ipcs.shmid = INVALID_SHMID;
		db_ipcs.gt_sem_ctime = 0;
		db_ipcs.gt_shm_ctime = 0;
		db_ipcs.fn_len = reg->dyn.addr->fname_len;
		memcpy(db_ipcs.fn, reg->dyn.addr->fname, reg->dyn.addr->fname_len);
		db_ipcs.fn[reg->dyn.addr->fname_len] = 0;
 		/* request gtmsecshr to flush. read_only cannot flush itself */
		WAIT_FOR_REPL_INST_UNFREEZE_SAFE(csa);
		if (!csa->read_only_fs)
		{
			secshrstat = send_mesg2gtmsecshr(FLUSH_DB_IPCS_INFO, 0, (char *)NULL, 0);
			if (0 != secshrstat)
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					  ERR_TEXT, 2, RTS_ERROR_TEXT("gtmsecshr failed to update database file header"));
		}
	}
	/* Done with file now, close it */
	CLOSEFILE_RESET(udi->fd, rc);	/* resets "udi->fd" to FD_INVALID */
	if (-1 == rc)
	{
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
			  ERR_TEXT, 2, LEN_AND_LIT("Error during file close"), errno);
	}
	/* Unmap storage if mm mode but only the part that is not the fileheader (so shows up in dumps) */
#	if !defined(_AIX)
	if (is_mm && (NULL != csa->db_addrs[0]))
	{
		assert(csa->db_addrs[1] > csa->db_addrs[0]);
		munmap_len = (sm_long_t)(csa->db_addrs[1] - csa->db_addrs[0]);
		if (0 < munmap_len)
			munmap((caddr_t)(csa->db_addrs[0]), (size_t)(munmap_len));
	}
#	endif
	/* Detach our shared memory while still under lock so reference counts will be correct for the next process to run down
	 * this region. In the process also get the remove_shm status from node_local before detaching.
	 * If cnl->donotflush_dbjnl is TRUE, it means we can safely remove shared memory without compromising data
	 * integrity as a reissue of recover will restore the database to a consistent state.
	 */
	remove_shm = !vermismatch && (cnl->remove_shm || cnl->donotflush_dbjnl);
	/* We are done with online rollback on this region. Indicate to other processes by setting the onln_rlbk_pid to 0.
	 * Do it before releasing crit (t_end relies on this ordering when accessing cnl->onln_rlbk_pid).
	 */
	if (jgbl.onlnrlbk)
		cnl->onln_rlbk_pid = 0;
	rel_crit(reg); /* Since we are about to detach from the shared memory, release crit and reset onln_rlbk_pid */
	/* If we had skipped flushing journal and database buffers due to a concurrent online rollback, increment the counter
	 * indicating that in the shared memory so that online rollback can report the # of such processes when it shuts down.
	 * The same thing is done for both FTOK and access control semaphores when there are too many MUMPS processes.
	 */
	if (safe_mode) /* indicates flushing was skipped */
	{
		if (bypassed_access)
			cnl->dbrndwn_access_skip++; /* Access semaphore can be bypassed during online rollback */
		if (bypassed_ftok)
			cnl->dbrndwn_ftok_skip++;
	}
	if (jgbl.onlnrlbk)
		csa->hold_onto_crit = FALSE;
	GTM_WHITE_BOX_TEST(WBTEST_HOLD_SEM_BYPASS, cnl->wbox_test_seq_num, 0);
	status = shmdt((caddr_t)cnl);
	csa->nl = NULL; /* dereferencing nl after detach is not right, so we set it to NULL so that we can test before dereference*/
	/* Note that although csa->nl is NULL, we use CSA_ARG(csa) below (not CSA_ARG(NULL)) to be consistent with similar
	 * usages before csa->nl became NULL. The "is_anticipatory_freeze_needed" function (which is in turn called by the
	 * CHECK_IF_FREEZE_ON_ERROR_NEEDED macro) does a check of csa->nl before dereferencing shared memory contents so
	 * we are safe passing "csa".
	 */
	if (-1 == status)
		send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg), ERR_TEXT, 2,
				LEN_AND_LIT("Error during shmdt"), errno);
	REMOVE_CSA_FROM_CSADDRSLIST(csa);	/* remove "csa" from list of open regions (cs_addrs_list) */
	reg->open = FALSE;
	/* If file is still not in good shape, die here and now before we get rid of our storage */
	assertpro(0 == csa->wbuf_dqd);
	ipc_deleted = FALSE;
	/* If we are the very last user, remove shared storage id and the semaphores */
	if (we_are_last_user)
	{	/* remove shared storage, only if last writer to rundown did a successful wcs_flu() */
		assert(!vermismatch);
		if (remove_shm)
		{
			ipc_deleted = TRUE;
			if (0 != shm_rmid(udi->shmid))
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove shared memory"));
			/* Note that we no longer have a new shared memory. Currently only used/usable for standalone rollback. */
			udi->new_shm = FALSE;
			/* mupip recover/rollback don't release the semaphore here, but do it later in db_ipcs_reset (invoked from
			 * mur_close_files())
			 */
			if (!have_standalone_access)
			{
				if (0 != sem_rmid(udi->semid))
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_DBFILERR, 2, DB_LEN_STR(reg),
						      ERR_TEXT, 2, RTS_ERROR_TEXT("Unable to remove semaphore"));
				udi->new_sem = FALSE;			/* Note that we no longer have a new semaphore */
				udi->grabbed_access_sem = FALSE;
				udi->counter_acc_incremented = FALSE;
			}
		} else if (is_src_server || is_updproc)
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id);
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id);
		} else
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_DBRNDWNWRN, 4, DB_LEN_STR(reg), process_id, process_id);
	} else
	{
		assert(!have_standalone_access || jgbl.onlnrlbk || safe_mode);
		if (!jgbl.onlnrlbk && !have_standalone_access)
		{ 	/* If we were writing, get rid of our writer access count semaphore */
			if (!reg->read_only)
			{
				if (!access_counter_halted)
				{
					save_errno = do_semop(udi->semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO);
					if (0 != save_errno)
						rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
								ERR_SYSCALL, 5,
								RTS_ERROR_TEXT("gds_rundown access control semaphore decrement"),
								CALLFROM, save_errno);
				}
				udi->counter_acc_incremented = FALSE;
			}
			assert(safe_mode || !bypassed_access);
			/* Now remove the rundown lock */
			if (!bypassed_access)
			{
				if (0 != (save_errno = do_semop(udi->semid, DB_CONTROL_SEM, -1, SEM_UNDO)))
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(12) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg),
							ERR_SYSCALL, 5,
							RTS_ERROR_TEXT("gds_rundown access control semaphore release"),
							CALLFROM, save_errno);
				udi->grabbed_access_sem = FALSE;
			}
		} /* else access control semaphore will be released in db_ipcs_reset */
	}
	if (!have_standalone_access)
	{
		if (bypassed_ftok)
		{
			if (!ftok_counter_halted)
				if (0 != (save_errno = do_semop(udi->ftok_semid, DB_COUNTER_SEM, -DB_COUNTER_SEM_INCR, SEM_UNDO)))
					rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
		} else if (!ftok_sem_release(reg, !ftok_counter_halted, FALSE))
		{
			FTOK_TRACE(csa, csa->ti->curr_tn, ftok_ops_release, process_id);
			rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
		}
		udi->grabbed_ftok_sem = FALSE;
		udi->counter_ftok_incremented = FALSE;
	}
	ENABLE_INTERRUPTS(INTRPT_IN_GDS_RUNDOWN, prev_intrpt_state);
	if (!ipc_deleted)
	{
		GET_CUR_TIME(time_str);
		if (is_src_server)
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str,
				LEN_AND_LIT("Source server"), REG_LEN_STR(reg));
		if (is_updproc)
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str,
				LEN_AND_LIT("Update process"), REG_LEN_STR(reg));
		if (mupip_jnl_recover && (!jgbl.onlnrlbk || !we_are_last_user))
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str,
				LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg));
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_IPCNOTDEL, 6, CTIME_BEFORE_NL, time_str,
				LEN_AND_LIT("Mupip journal process"), REG_LEN_STR(reg));
		}
	}
	REVERT;
	return EXIT_NRM;
}
Beispiel #6
0
/*
 * -----------------------------------------------
 * gvn2gds()
 * Converts a global variable name (GVN) into its internal database repesentation (GDS)
 *
 * Arguments:
 *	gvn	      - Pointer to Source Name string mval. Must be in GVN form.
 *	buf	      - Pointer to a buffer large enough to fit the whole GDS of the passed in GVN.
 *	col	      - Collation number.
 * Return:
 *	unsigned char - Pointer to the end of the GDS written to gvkey.
 * -----------------------------------------------
 */
unsigned char *gvn2gds(mval *gvn, gv_key *gvkey, int act)
{
	boolean_t	est_first_pass, retn;
	collseq 	*csp;
	gd_region	tmpreg;
	gv_namehead	temp_gv_target;
	unsigned char 	*key, *key_top, *key_start;
	int		subscript, i, contains_env;
	int		*start, *stop;
	gv_name_and_subscripts 	start_buff, stop_buff;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	/* determine which buffer to use */
	DETERMINE_BUFFER(gvn, start_buff, stop_buff, start, stop);
	if (0 != act)
	{
		csp = ready_collseq(act);
		if (NULL == csp)
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(3) ERR_COLLATIONUNDEF, 1, act);
	} else
		csp = NULL;	/* Do not issue COLLATIONUNDEF for 0 collation */
	retn = TRUE;
	assert(MV_IS_STRING(gvn));
	key_start = &gvkey->base[0];
	key = key_start;
	gvkey->prev = 0;
	gvkey->top = DBKEYSIZE(MAX_KEY_SZ);
	key_top = key_start + gvkey->top;
	/* We will parse all of the components up front. */
	if (!parse_gv_name_and_subscripts(gvn, &subscript, start, stop, &contains_env))
		NOCANONICNAME_ERROR(gvn);
	if (stop[contains_env] - start[contains_env] > gvkey->top)
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_GVSUBOFLOW);
 	memcpy(key, gvn->str.addr + start[contains_env], stop[contains_env] - start[contains_env]);
	key += stop[contains_env] - start[contains_env];
	*key++ = KEY_DELIMITER;
	gvkey->end = key - key_start;
	/* Temporarily repoint global variables "gv_cur_region", "gv_target" and "transform".
	 * They are needed by mval2subsc for the following
	 *	"transform", "gv_target->nct", "gv_target->collseq" and "gv_cur_region->std_null_coll"
	 * Note that transform is usually ON, specifying that collation transformation is "enabled",
	 * and is only shut off for minor periods when something is being critically formatted (like
	 * we're doing here). Note that mval2subsc could issue an rts_error, so we establish a
	 * condition handler to restore the above.
	 */
	save_transform = TREF(transform);
	assert(save_transform);
	TREF(transform) = TRUE;
	reset_gv_target = gv_target;
	gv_target = &temp_gv_target;
	memset(gv_target, 0, SIZEOF(gv_namehead));
	gv_target->collseq = csp;
	memset(&tmpreg, 0, SIZEOF(gd_region));
	/* Assign "gv_cur_region" only after tmpreg has been fully initialized or timer interrupts can look at inconsistent copy */
	save_gv_cur_region = gv_cur_region;
	gv_cur_region = &tmpreg;
	gv_cur_region->std_null_coll = TRUE;
	ESTABLISH_NORET(gvn2gds_ch, est_first_pass);
	/* we know the number of subscripts, so we convert them all */
	for (i = 1 + contains_env; i <= contains_env + subscript; ++i)
	{
		if (!(retn = convert_key_to_db(gvn, start[i], stop[i], gvkey, &key)))
			break;
	}
	REVERT;
	RESTORE_GBL_VARS_BEFORE_FUN_RETURN;
	if (!retn || !CAN_APPEND_HIDDEN_SUBS(gvkey))
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_GVSUBOFLOW);
	*key++ = KEY_DELIMITER;	/* add double terminating null byte */
	assert(key <= key_top);
	return key;
}
void	gvcst_spr_kill(void)
{
	boolean_t	spr_tpwrapped;
	boolean_t	est_first_pass;
	int		reg_index;
	gd_binding	*start_map, *end_map, *map;
	gd_region	*reg, *gd_reg_start;
	gd_addr		*addr;
	gv_namehead	*start_map_gvt;
	gvnh_reg_t	*gvnh_reg;
	trans_num	gd_targ_tn, *tn_array;
#	ifdef DEBUG
	int		save_dollar_tlevel;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	start_map = TREF(gd_targ_map);	/* set up by op_gvname/op_gvnaked/op_gvextnam done just before invoking op_gvkill */
	start_map_gvt = gv_target;	/* save gv_target corresponding to start_map so we can restore at end */
	/* Find out if the next (in terms of $order) key maps to same map as currkey. If so, no spanning activity needed */
	GVKEY_INCREMENT_ORDER(gv_currkey);
	end_map = gv_srch_map_linear(start_map, (char *)&gv_currkey->base[0], gv_currkey->end - 1);
	BACK_OFF_ONE_MAP_ENTRY_IF_EDGECASE(gv_currkey->base, gv_currkey->end - 1, end_map);
	GVKEY_UNDO_INCREMENT_ORDER(gv_currkey);
	if (start_map == end_map)
	{
		assert(gv_target == start_map_gvt);
		if (IS_OK_TO_INVOKE_GVCST_KILL(start_map_gvt))
			gvcst_kill(TRUE);
		return;
	}
	/* Do any initialization that is independent of retries BEFORE the op_tstart */
	addr = TREF(gd_targ_addr);
	assert(NULL != addr);
	gd_reg_start = &addr->regions[0];
	tn_array = TREF(gd_targ_reg_array);
	gvnh_reg = TREF(gd_targ_gvnh_reg);
	assert(NULL != gvnh_reg);
	assert(NULL != gvnh_reg->gvspan);
	/* Now that we know the keyrange maps to more than one region, go through each of them and do the kill.
	 * Since multiple regions are potentially involved, need a TP fence.
	 */
	DEBUG_ONLY(save_dollar_tlevel = dollar_tlevel);
	if (!dollar_tlevel)
	{
		spr_tpwrapped = TRUE;
		op_tstart((IMPLICIT_TSTART), TRUE, &literal_batch, 0);
		ESTABLISH_NORET(gvcst_spr_kill_ch, est_first_pass);
		GVCST_ROOT_SEARCH_AND_PREP(est_first_pass);
	} else
		spr_tpwrapped = FALSE;
	assert(gv_cur_region == start_map->reg.addr);
	DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC(CHECK_CSA_TRUE);
	/* Do any initialization that is dependent on retries AFTER the op_tstart */
	map = start_map;
	INCREMENT_GD_TARG_TN(gd_targ_tn); /* takes a copy of incremented "TREF(gd_targ_tn)" into local variable "gd_targ_tn" */
	/* Verify that initializations that happened before op_tstart are still unchanged */
	assert(addr == TREF(gd_targ_addr));
	assert(tn_array == TREF(gd_targ_reg_array));
	assert(gvnh_reg == TREF(gd_targ_gvnh_reg));
	for ( ; map <= end_map; map++)
	{
		reg = map->reg.addr;
		GET_REG_INDEX(addr, gd_reg_start, reg, reg_index);	/* sets "reg_index" */
		assert((map != start_map) || (tn_array[reg_index] != gd_targ_tn));
		assert(TREF(gd_targ_reg_array_size) > reg_index);
		if (tn_array[reg_index] == gd_targ_tn)
			continue;
		if (map != start_map)
			GV_BIND_SUBSREG(addr, reg, gvnh_reg);	/* sets gv_target/gv_cur_region/cs_addrs */
		assert(reg->open);
		if (IS_OK_TO_INVOKE_GVCST_KILL(gv_target))
			gvcst_kill(TRUE);
		tn_array[reg_index] = gd_targ_tn;
	}
	if (gv_target != start_map_gvt)
	{	/* Restore gv_cur_region/gv_target etc. */
		gv_target = start_map_gvt;
		gv_cur_region = start_map->reg.addr;
		change_reg();
	}
	DBG_CHECK_GVTARGET_GVCURRKEY_IN_SYNC(CHECK_CSA_TRUE);
	if (spr_tpwrapped)
	{
		op_tcommit();
		REVERT; /* remove our condition handler */
	}
	assert(save_dollar_tlevel == dollar_tlevel);
	return;
}