Ejemplo n.º 1
0
int4	add_inter(int val, sm_int_ptr_t addr, sm_global_latch_ptr_t latch)
{
	int4			cntrval, newcntrval, spins, maxspins, retries;
	boolean_t		cswpsuccess;
	sm_int_ptr_t volatile	cntrval_p;

	++fast_lock_count;
	maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
	cntrval_p = addr;	/* Need volatile context especially on Itanium */
        for (retries = LOCK_TRIES - 1; 0 < retries; retries--)  /* - 1 so do rel_quant 3 times first */
        {	/* seems like a legitinate spin which could take advantage of transactional memory */
		for (spins = maxspins; 0 < spins; spins--)
		{
			cntrval = *cntrval_p;
			newcntrval = cntrval + val;
			/* This is (currently as of 08/2007) the only non-locking usage of compswap in GT.M. We
			   are not passing compswap an actual sm_global_latch_ptr_t addr like its function would
			   normally dictate. However, since the address of the field we want to deal with is the
			   first int in the global_latch_t, we just pass our int address properly cast to the
			   type that compswap is expecting. The assert below verifies that this assumption has
			   not changed (SE 08/2007)
			*/
			assert(0 == OFFSETOF(global_latch_t, u.parts.latch_pid));
			IA64_ONLY(cswpsuccess = compswap_unlock(RECAST(sm_global_latch_ptr_t)cntrval_p, cntrval, newcntrval));
			NON_IA64_ONLY(cswpsuccess = compswap((sm_global_latch_ptr_t)cntrval_p, cntrval, newcntrval));
			if (cswpsuccess)
			{
				--fast_lock_count;
				assert(0 <= fast_lock_count);
				return newcntrval;
			}
		}
		if (retries & 0x3)
			/* On all but every 4th pass, do a simple rel_quant */
			rel_quant();	/* Release processor to holder of lock (hopefully) */
		else
		{
			/* On every 4th pass, we bide for awhile */
			wcs_sleep(LOCK_SLEEP);
			assert(0 == (LOCK_TRIES % 4)); /* assures there are 3 rel_quants prior to first wcs_sleep() */
		}
	}
	--fast_lock_count;
	assert(FALSE);
	rts_error_csa(CSA_ARG(NULL) VARLSTCNT(9) ERR_DBCCERR, 2, LEN_AND_LIT("*unknown*"), ERR_ERRCALL, 3, CALLFROM);
	return 0; /* To keep the compiler quiet */
}
Ejemplo n.º 2
0
/*
 * -----------------------------------------------
 * Maintain in parallel with op_zalloc2
 * Arguments:
 *	timeout	- max. time to wait for locks before giving up
 *      laflag - passed to gvcmx* routines as "laflag" argument;
 *		 originally indicated the request was a Lock or
 *		 zAllocate request (hence the name "laflag"), but
 *		 now capable of holding more values signifying
 *		 additional information
 *
 * Return:
 *	1 - if not timeout specified
 *	if timeout specified:
 *		!= 0 - all the locks int the list obtained, or
 *		0 - blocked
 *	The return result is suited to be placed directly into
 *	the $T variable by the caller if timeout is specified.
 * -----------------------------------------------
 */
int	op_lock2(int4 timeout, unsigned char laflag)	/* timeout is in seconds */
{
	boolean_t	blocked, timer_on;
	signed char	gotit;
	unsigned short	locks_bckout, locks_done;
	int4		msec_timeout;	/* timeout in milliseconds */
	mlk_pvtblk	*pvt_ptr1, *pvt_ptr2, **prior;
	unsigned char	action;
	ABS_TIME	cur_time, end_time, remain_time;
	mv_stent	*mv_zintcmd;

	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	gotit = -1;
	cm_action = laflag;
	out_of_time = FALSE;
	if (timeout < 0)
		timeout = 0;
	else if (TREF(tpnotacidtime) < timeout)
		TPNOTACID_CHECK(LOCKTIMESTR);
	if (!(timer_on = (NO_M_TIMEOUT != timeout)))	/* NOTE assignment */
		msec_timeout = NO_M_TIMEOUT;
	else
	{
		msec_timeout = timeout2msec(timeout);
		if (0 == msec_timeout)
		{
			out_of_time = TRUE;
			timer_on = FALSE;
		} else
		{
			mv_zintcmd = find_mvstent_cmd(ZINTCMD_LOCK, restart_pc, restart_ctxt, FALSE);
			if (mv_zintcmd)
			{
				remain_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain;
				if (0 <= remain_time.at_sec)
					msec_timeout = (int4)(remain_time.at_sec * 1000 + remain_time.at_usec / 1000);
				else
					msec_timeout = 0;
				TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_pc_last
					= mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior;
				TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_ctxt_last
					= mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior;
				TAREF1(zintcmd_active, ZINTCMD_LOCK).count--;
				assert(0 <= TAREF1(zintcmd_active, ZINTCMD_LOCK).count);
				if (mv_chain == mv_zintcmd)
					POP_MV_STENT(); /* just pop if top of stack */
				else
				{       /* flag as not active */
					mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP;
					mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL;
				}
			}
			if (0 < msec_timeout)
			{
				sys_get_curr_time(&cur_time);
				add_int_to_abs_time(&cur_time, msec_timeout, &end_time);
				start_timer((TID)&timer_on, msec_timeout, wake_alarm, 0, NULL);
			} else
			{
				out_of_time = TRUE;
				timer_on = FALSE;
			}
		}
	}
	lckclr();
	for (blocked = FALSE;  !blocked;)
	{	/* if this is a request for a remote node */
		if (remlkreq)
		{
			if (gotit >= 0)
				gotit = gvcmx_resremlk(cm_action);
			else
				gotit = gvcmx_reqremlk(cm_action, msec_timeout);	/* REQIMMED if 2nd arg == 0 */
			if (!gotit)
			{	/* only REQIMMED returns false */
				blocked = TRUE;
				break;
			}
		}
		for (pvt_ptr1 = mlk_pvt_root, locks_done = 0;  locks_done < lks_this_cmd;  pvt_ptr1 = pvt_ptr1->next, locks_done++)
		{	/* Go thru the list of all locks to be obtained attempting to lock
			 * each one. If any lock could not be obtained, break out of the loop
			 */
			if (!mlk_lock(pvt_ptr1, 0, TRUE))
			{	/* If lock is obtained */
				pvt_ptr1->granted = TRUE;
				switch (laflag)
				{
				case CM_LOCKS:
					pvt_ptr1->level = 1;
					break;
				case INCREMENTAL:
					if (pvt_ptr1->level < 511) /* The same lock can not be incremented more than 511 times. */
						pvt_ptr1->level += pvt_ptr1->translev;
					else
						level_err(pvt_ptr1);
					break;
				default:
					GTMASSERT;
					break;
				}
			} else
			{
				blocked = TRUE;
				break;
			}
		}
		/* If we did not get blocked, we are all done */
		if (!blocked)
			break;
		/* We got blocked and need to keep retrying after some time interval */
		if (remlkreq)
			gvcmx_susremlk(cm_action);
		switch (cm_action)
		{
		case CM_LOCKS:
			action = LOCKED;
			break;
		case INCREMENTAL:
			action = INCREMENTAL;
			break;
		default:
			GTMASSERT;
			break;
		}
		for (pvt_ptr2 = mlk_pvt_root, locks_bckout = 0;  locks_bckout < locks_done;
			pvt_ptr2 = pvt_ptr2->next, locks_bckout++)
		{
			assert(pvt_ptr2->granted && (pvt_ptr2 != pvt_ptr1));
			mlk_bckout(pvt_ptr2, action);
		}
		if (dollar_tlevel && (CDB_STAGNATE <= t_tries))
		{	/* upper TPNOTACID_CHECK conditioned on no short timeout; this one rel_crits to avoid potential deadlock */
			assert(TREF(tpnotacidtime) >= timeout);
			TPNOTACID_CHECK(LOCKTIMESTR);
		}
		for (;;)
		{
			if (out_of_time || outofband)
			{	/* if time expired || control-c, tptimeout, or jobinterrupt encountered */
				if (outofband || !lk_check_own(pvt_ptr1))
				{	/* If CTL-C, check lock owner */
					if (pvt_ptr1->nodptr)		/* Get off pending list to be sent a wake */
						mlk_unpend(pvt_ptr1);
					/* Cancel all remote locks obtained so far */
					if (remlkreq)
					{
						gvcmx_canremlk();
						gvcmz_clrlkreq();
						remlkreq = FALSE;
					}
					if (outofband)
					{
						if (timer_on && !out_of_time)
						{
							cancel_timer((TID)&timer_on);
							timer_on = FALSE;
						}
						if (!out_of_time && (NO_M_TIMEOUT != timeout))
						{	/* get remain = end_time - cur_time */
							sys_get_curr_time(&cur_time);
							remain_time = sub_abs_time(&end_time, &cur_time);
							if (0 <= remain_time.at_sec)
								msec_timeout = (int4)(remain_time.at_sec * 1000
									+ remain_time.at_usec / 1000);
							else
								msec_timeout = 0;	/* treat as out_of_time */
							if (0 >= msec_timeout)
							{
								out_of_time = TRUE;
								timer_on = FALSE;	/* as if LOCK :0 */
								break;
							}
							PUSH_MV_STENT(MVST_ZINTCMD);
							mv_chain->mv_st_cont.mvs_zintcmd.end_or_remain = remain_time;
							mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_check = restart_ctxt;
							mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_check = restart_pc;
							/* save current information from zintcmd_active */
							mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_prior
								= TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_ctxt_last;
							mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_prior
								= TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_pc_last;
							TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_pc_last = restart_pc;
							TAREF1(zintcmd_active, ZINTCMD_LOCK).restart_ctxt_last = restart_ctxt;
							TAREF1(zintcmd_active, ZINTCMD_LOCK).count++;
							mv_chain->mv_st_cont.mvs_zintcmd.command = ZINTCMD_LOCK;
							outofband_action(FALSE);	/* no return */
						}
					}
					break;
				}
			}
			if (!mlk_lock(pvt_ptr1, 0, FALSE))
			{	/* If we got the lock, break out of timer loop */
				blocked = FALSE;
				if (pvt_ptr1 != mlk_pvt_root)
				{
					rel_quant();		/* attempt to get a full timeslice for maximum chance to get all */
					mlk_unlock(pvt_ptr1);
				}
				break;
			}
			if (pvt_ptr1->nodptr)
				lk_check_own(pvt_ptr1);		/* clear an abandoned owner */
			hiber_start_wait_any(LOCK_SELF_WAKE);
		}
		if (blocked && out_of_time)
			break;
	}
	if (remlkreq)
	{
		gvcmz_clrlkreq();
		remlkreq = FALSE;
	}
	if (NO_M_TIMEOUT != timeout)
	{	/* was timed or immediate */
		if (timer_on && !out_of_time)
			cancel_timer((TID)&timer_on);
		if (blocked)
		{
			for (prior = &mlk_pvt_root;  *prior;)
			{
				if (!(*prior)->granted)
				{	/* if entry was never granted, delete list entry */
					mlk_pvtblk_delete(prior);
				} else
					prior = &((*prior)->next);
			}
			mlk_stats.n_user_locks_fail++;
			return (FALSE);
		}
	}
	mlk_stats.n_user_locks_success++;
	return (TRUE);
}
Ejemplo n.º 3
0
int	mur_forward_multi_proc(reg_ctl_list *rctl)
{
	boolean_t		multi_proc, this_reg_stuck, release_latch, ok_to_play;
	boolean_t		cancelled_dbsync_timer, cancelled_timer;
	reg_ctl_list		*rctl_top, *prev_rctl;
	jnl_ctl_list		*jctl;
	gd_region		*reg;
	sgmnt_addrs		*csa;
	seq_num 		rec_token_seq;
	jnl_tm_t		rec_time;
	enum broken_type	recstat;
	jnl_record		*rec;
	enum jnl_record_type	rectype;
	char			errstr[256];
	int			i, rctl_index, save_errno, num_procs_stuck, num_reg_stuck;
	uint4			status, regcnt_stuck, num_partners, start_hrtbt_cntr;
	forw_multi_struct	*forw_multi;
	shm_forw_multi_t	*sfm;
	multi_struct 		*multi;
	jnl_tm_t		adjusted_resolve_time;
	shm_reg_ctl_t		*shm_rctl_start, *shm_rctl, *first_shm_rctl;
	size_t			shm_size, reccnt, copy_size;
	int4			*size_ptr;
	char			*shmPtr; /* not using "shm_ptr" since it is already used in an AIX include file */
	int			shmid;
	multi_proc_shm_hdr_t	*mp_hdr;	/* Pointer to "multi_proc_shm_hdr_t" structure in shared memory */

	status = 0;
	/* Although we made sure the # of tasks is the same as the # of processes forked off (in the "gtm_multi_proc"
	 * invocation in "mur_forward"), it is possible one of the forked process finishes one invocation of
	 * "mur_forward_multi_proc" before even another forked process gets assigned one task in "gtm_multi_proc_helper".
	 * In this case, we would be invoked more than once. But the first invocation would have done all the needed stuff
	 * so return for later invocations.
	 */
	if (mur_forward_multi_proc_done)
		return 0;
	mur_forward_multi_proc_done = TRUE;
	/* Note: "rctl" is unused. But cannot avoid passing it since "gtm_multi_proc" expects something */
	prev_rctl = NULL;
	rctl_start = NULL;
	adjusted_resolve_time = murgbl.adjusted_resolve_time;
	assert(0 == murgbl.regcnt_remaining);
	multi_proc = multi_proc_in_use;	/* cache value in "local" to speed up access inside loops below */
	if (multi_proc)
	{
		mp_hdr = multi_proc_shm_hdr;
		shm_rctl_start = mur_shm_hdr->shm_rctl_start;
		if (jgbl.onlnrlbk)
		{
			for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
			{
				assert(rctl->csa->hold_onto_crit);	/* would have been set in parent process */
				rctl->csa->hold_onto_crit = FALSE;	/* reset since we dont own this region */
				assert(rctl->csa->now_crit);		/* would have been set in parent process */
				rctl->csa->now_crit = FALSE;		/* reset since we dont own this region */
			}
		}
		START_HEARTBEAT_IF_NEEDED; /* heartbeat timer needed later (in case not already started by "gtm_multi_proc") */
	}
	first_shm_rctl = NULL;
	/* Phase1 of forward recovery starts */
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		/* Check if "rctl" is available for us or if some other concurrent process has taken it */
		if (multi_proc)
		{
			rctl_index = rctl - &mur_ctl[0];
			shm_rctl = &shm_rctl_start[rctl_index];
			if (shm_rctl->owning_pid)
			{
				assert(process_id != shm_rctl->owning_pid);
				continue;
			}
			GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
			assert(release_latch);
			for ( ; rctl < rctl_top; rctl++, shm_rctl++)
			{
				if (shm_rctl->owning_pid)
				{
					assert(process_id != shm_rctl->owning_pid);
					continue;
				}
				shm_rctl->owning_pid = process_id;	/* Declare ownership */
				rctl->this_pid_is_owner = TRUE;
				if (jgbl.onlnrlbk)
				{	/* This is an online rollback and crit was grabbed on all regions by the parent rollback
					 * process. But this child process now owns this region and does the actual rollback on
					 * this region so borrow crit for the duration of this child process.
					 */
					csa = rctl->csa;
					csa->hold_onto_crit = TRUE;
					csa->now_crit = TRUE;
					assert(csa->nl->in_crit == mp_hdr->parent_pid);
					csa->nl->in_crit = process_id;
					assert(csa->nl->onln_rlbk_pid == mp_hdr->parent_pid);
					csa->nl->onln_rlbk_pid = process_id;
				}
				if (NULL == first_shm_rctl)
					first_shm_rctl = shm_rctl;
				break;
			}
			REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
			if (rctl >= rctl_top)
			{
				assert(rctl == rctl_top);
				break;
			}
			/* Set key to print this rctl'ss region-name as prefix in case this forked off process prints any output */
			MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);
#			ifdef MUR_DEBUG
			fprintf(stderr, "pid = %d : Owns region %s\n", process_id, multi_proc_key);
#			endif
		} else
			rctl->this_pid_is_owner = TRUE;
		if (mur_options.forward)
		{
			assert(NULL == rctl->jctl_turn_around);
			jctl = rctl->jctl = rctl->jctl_head;
			assert(jctl->reg_ctl == rctl);
			jctl->rec_offset = JNL_HDR_LEN;
			jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; /* initialized to reflect journaling is not enabled */
			if (mur_options.rollback)
				jgbl.mur_jrec_seqno = jctl->jfh->start_seqno;
		} else
		{
			jctl = rctl->jctl = (NULL == rctl->jctl_turn_around) ? rctl->jctl_head : rctl->jctl_turn_around;
			assert(jctl->reg_ctl == rctl);
			jctl->rec_offset = jctl->turn_around_offset;
			jgbl.mur_jrec_seqno = jctl->turn_around_seqno;
			assert((NULL != rctl->jctl_turn_around) || (0 == jctl->rec_offset));
		}
		if (mur_options.rollback)
		{
			if (murgbl.consist_jnl_seqno < jgbl.mur_jrec_seqno)
			{
				/* Assert that murgbl.losttn_seqno is never lesser than jgbl.mur_jrec_seqno (the turnaround
				 * point seqno) as this is what murgbl.consist_jnl_seqno is going to be set to and will
				 * eventually be the post-rollback seqno. If this condition is violated, the result of the
				 * recovery is a compromised database (the file header will indicate a Region Seqno which
				 * is not necessarily correct since seqnos prior to it might be absent in the database).
				 * Therefore, this is an out-of-design situation with respect to rollback and so stop it.
				 */
				assert(murgbl.losttn_seqno >= jgbl.mur_jrec_seqno);
				murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno;
			}
			assert(murgbl.consist_jnl_seqno <= murgbl.losttn_seqno);
		}
		if (mur_options.update || mur_options.extr[GOOD_TN])
		{
			reg = rctl->gd;
			gv_cur_region = reg;
			tp_change_reg();	/* note : sets cs_addrs to non-NULL value even if gv_cur_region->open is FALSE
						 * (cs_data could still be NULL). */
			rctl->csa = cs_addrs;
			cs_addrs->miscptr = (void *)rctl;
			rctl->csd = cs_data;
			rctl->sgm_info_ptr = cs_addrs->sgm_info_ptr;
			assert(!reg->open || (NULL != cs_addrs->dir_tree));
			gv_target = cs_addrs->dir_tree;
		}
		jctl->after_end_of_data = FALSE;
		status = mur_next(jctl, jctl->rec_offset);
		assert(ERR_JNLREADEOF != status);	/* cannot get EOF at start of forward processing */
		if (SS_NORMAL != status)
			goto finish;
		PRINT_VERBOSE_STAT(jctl, "mur_forward:at the start");
		rctl->process_losttn = FALSE;
		/* Any multi-region TP transaction will be processed as multiple single-region TP transactions up
		 * until the tp-resolve-time is reached. From then on, they will be treated as one multi-region TP
		 * transaction. This is needed for proper lost-tn determination (any multi-region transaction that
		 * gets played in a region AFTER it has already encountered a broken tn should treat this as a lost tn).
		 */
		do
		{
			if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
			{	/* We are at a logical point. So exit if signaled by parent */
				status = ERR_FORCEDHALT;
				goto finish;
			}
			assert(jctl == rctl->jctl);
			rec = rctl->mur_desc->jnlrec;
			rec_time = rec->prefix.time;
			if (rec_time > mur_options.before_time)
				break;	/* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */
			if (rec_time < mur_options.after_time)
			{
				status = mur_next_rec(&jctl);
				continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */
			}
			if (rec_time >= adjusted_resolve_time)
				break;	/* Records after this adjusted resolve_time will be processed below in phase2 */
			/* Note: Since we do hashtable token processing only for records from tp_resolve_time onwards,
			 * it is possible that if we encounter any broken transactions here we wont know they are broken
			 * but will play them as is. That is unavoidable. Specify -SINCE_TIME (for -BACKWARD rollback/recover)
			 * and -VERIFY (for -FORWARD rollback/recover) to control tp_resolve_time (and in turn more
			 * effective broken tn determination).
			 */
			status = mur_forward_play_cur_jrec(rctl);
			if (SS_NORMAL != status)
				break;
			status = mur_next_rec(&jctl);
		} while (SS_NORMAL == status);
		CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */
		if (SS_NORMAL != status)
		{	/* ERR_FILENOTCREATE is possible from "mur_cre_file_extfmt" OR	ERR_FORCEDHALT is possible
			 * from "mur_forward_play_cur_jrec". No other errors are known to occur here. Assert accordingly.
			 */
			assert((ERR_FILENOTCREATE == status) || (ERR_FORCEDHALT == status));
			goto finish;
		}
		if (rctl->forw_eof_seen)
		{
			PRINT_VERBOSE_STAT(jctl, "mur_forward:Reached EOF before tp_resolve_time");
			continue;	/* Reached EOF before even getting to tp_resolve_time.
					 * Do not even consider region for next processing loop */
		}
		rctl->last_tn = 0;
		murgbl.regcnt_remaining++;	/* # of regions participating in recovery at this point */
		if (NULL == rctl_start)
			rctl_start = rctl;
		if (NULL != prev_rctl)
		{
			prev_rctl->next_rctl = rctl;
			rctl->prev_rctl = prev_rctl;
		}
		prev_rctl = rctl;
		assert(murgbl.ok_to_update_db || !rctl->db_updated);
		PRINT_VERBOSE_STAT(jctl, "mur_forward:at tp_resolve_time");
	}
	if (multi_proc)
		multi_proc_key = NULL;	/* reset key until it can be set to rctl's region-name again */
	/* Note that it is possible for rctl_start to be NULL at this point. That is there is no journal record in any region
	 * AFTER the calculated tp-resolve-time. This is possible if for example -AFTER_TIME was used and has a time later
	 * than any journal record in all journal files. If rctl_start is NULL, prev_rctl should also be NULL and vice versa.
	 */
	if (NULL != rctl_start)
	{
		assert(NULL != prev_rctl);
		prev_rctl->next_rctl = rctl_start;
		rctl_start->prev_rctl = prev_rctl;
	}
	rctl = rctl_start;
	regcnt_stuck = 0; /* # of regions we are stuck in waiting for other regions to resolve a multi-region TP transaction */
	assert((NULL == rctl) || (NULL == rctl->forw_multi));
	gv_cur_region = NULL;	/* clear out any previous value to ensure gv_cur_region/cs_addrs/cs_data
				 * all get set in sync by the MUR_CHANGE_REG macro below.
				 */
	/* Phase2 of forward recovery starts */
	while (NULL != rctl)
	{	/* while there is at least one region remaining with unprocessed journal records */
		assert(NULL != rctl_start);
		assert(0 < murgbl.regcnt_remaining);
		if (NULL != rctl->forw_multi)
		{	/* This region's current journal record is part of a TP transaction waiting for other regions */
			regcnt_stuck++;
			assert(regcnt_stuck <= murgbl.regcnt_remaining);
			if (regcnt_stuck == murgbl.regcnt_remaining)
			{
				assertpro(multi_proc_in_use); /* Else : Out-of-design situation. Stuck in ALL regions. */
				/* Check one last time if all regions are stuck waiting for another process to resolve the
				 * multi-region TP transaction. If so, wait in a sleep loop. If not, we can proceed.
				 */
				rctl = rctl_start;
				start_hrtbt_cntr = heartbeat_counter;
				do
				{
					if (IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
					{	/* We are at a logical point. So exit if signaled by parent */
						status = ERR_FORCEDHALT;
						goto finish;
					}
					forw_multi = rctl->forw_multi;
					assert(NULL != forw_multi);
					sfm = forw_multi->shm_forw_multi;
					assert(NULL != sfm);
					assert(sfm->num_reg_seen_forward <= sfm->num_reg_seen_backward);
#					ifdef MUR_DEBUG
					fprintf(stderr, "Pid = %d : Line %d : token = %llu : forward = %d : backward = %d\n",
						process_id, __LINE__, (long long int)sfm->token,
						sfm->num_reg_seen_forward, sfm->num_reg_seen_backward);
#					endif
					if (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward)
					{	/* We are no longer stuck in this region */
						assert(!forw_multi->no_longer_stuck);
						forw_multi->no_longer_stuck = TRUE;
						break;
					}
					rctl = rctl->next_rctl;	/* Move on to the next available region */
					assert(NULL != rctl);
					if (rctl == rctl_start)
					{	/* We went through all regions once and are still stuck.
						 * Sleep until at leat TWO heartbeats have elapsed after which check for deadlock.
						 * Do this only in the child process that owns the FIRST region in the region list.
						 * This way we dont have contention for the GRAB_MULTI_PROC_LATCH from
						 * all children at more or less the same time.
						 */
						if ((rctl == mur_ctl) && (heartbeat_counter > (start_hrtbt_cntr + 2)))
						{	/* Check if all processes are stuck for a while. If so assertpro */
							GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
							assert(release_latch);
							shm_rctl_start = mur_shm_hdr->shm_rctl_start;
							num_reg_stuck = 0;
							for (i = 0; i < murgbl.reg_total; i++)
							{
								shm_rctl = &shm_rctl_start[i];
								sfm = shm_rctl->shm_forw_multi;
								if (NULL != sfm)
								{
									if (sfm->num_reg_seen_forward != sfm->num_reg_seen_backward)
										num_reg_stuck++;
								}
							}
							REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
							/* If everyone is stuck at this point, it is an out-of-design situation */
							assertpro(num_reg_stuck < murgbl.reg_total);
							start_hrtbt_cntr = heartbeat_counter;
						} else
						{	/* Sleep and recheck if any region we are stuck in got resolved.
							 * To minimize time spent sleeping, we just yield our timeslice.
							 */
							rel_quant();
							continue;
						}
					}
				} while (TRUE);
			} else
			{
				rctl = rctl->next_rctl;	/* Move on to the next available region */
				assert(NULL != rctl);
				continue;
			}
		}
		regcnt_stuck = 0;	/* restart the counter now that we found at least one non-stuck region */
		MUR_CHANGE_REG(rctl);
		jctl = rctl->jctl;
		this_reg_stuck = FALSE;
		for ( status = SS_NORMAL; SS_NORMAL == status; )
		{
			if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
			{	/* We are at a logical point. So exit if signaled by parent */
				status = ERR_FORCEDHALT;
				goto finish;
			}
			assert(jctl == rctl->jctl);
			rec = rctl->mur_desc->jnlrec;
			rec_time = rec->prefix.time;
			if (rec_time > mur_options.before_time)
				break;	/* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */
			assert((rec_time >= adjusted_resolve_time) || (mur_options.notncheck && !mur_options.verify));
			assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated));
			if (rec_time < mur_options.after_time)
			{
				status = mur_next_rec(&jctl);
				continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */
			}
			/* Check if current journal record can be played right away or need to wait for corresponding journal
			 * records from other participating TP regions to be reached. A non-TP or ZTP transaction can be played
			 * without issues (i.e. has no dependencies with any other regions). A single-region TP transaction too
			 * falls in the same category. A multi-region TP transaction needs to wait until all participating regions
			 * have played all journal records BEFORE this TP in order to ensure recover plays records in the exact
			 * same order that GT.M performed them in.
			 */
			/* If FENCE_NONE is specified, we would not have maintained any multi hashtable in mur_back_process for
			 * broken transaction processing. So we process multi-region TP transactions as multiple single-region
			 * TP transactions in forward phase.
			 */
			if (FENCE_NONE != mur_options.fences)
			{
				rectype = (enum jnl_record_type)rec->prefix.jrec_type;
				if (IS_TP(rectype) && IS_TUPD(rectype))
				{
					assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype));
					assert(&rec->jrec_set_kill.num_participants == &rec->jrec_ztworm.num_participants);
					assert(&rec->jrec_set_kill.num_participants == &rec->jrec_lgtrig.num_participants);
					num_partners = rec->jrec_set_kill.num_participants;
					assert(0 < num_partners);
					if (1 < num_partners)
					{
						this_reg_stuck = TRUE;
						assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num);
						assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num);
					}
				}
			}
			if (this_reg_stuck)
			{
				rec_token_seq = GET_JNL_SEQNO(rec);
				MUR_FORW_TOKEN_LOOKUP(forw_multi, rec_token_seq, rec_time);
				if (NULL != forw_multi)
				{	/* This token has already been seen in another region in forward processing.
					 * Add current region as well. If all regions have been resolved, then play
					 * the entire transaction maintaining the exact same order of updates within.
					 */
					if (!forw_multi->no_longer_stuck)
						MUR_FORW_TOKEN_ONE_MORE_REG(forw_multi, rctl);
				} else
				{	/* First time we are seeing this token in forward processing. Check if this
					 * has already been determined to be a broken transaction.
					 */
					recstat = GOOD_TN;
					multi = NULL;
					if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq))
					{
						multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, TPFENCE);
						if ((NULL != multi) && (0 < multi->partner))
							recstat = BROKEN_TN;
					}
					MUR_FORW_TOKEN_ADD(forw_multi, rec_token_seq, rec_time, rctl, num_partners,
								recstat, multi);
				}
				/* Check that "tabent" field has been initialized above (by either the MUR_FORW_TOKEN_LOOKUP
				 * or MUR_FORW_TOKEN_ADD macros). This is relied upon by "mur_forward_play_multireg_tp" below.
				 */
				assert(NULL != forw_multi->u.tabent);
				assert(forw_multi->num_reg_seen_forward <= forw_multi->num_reg_seen_backward);
				if (multi_proc)
				{
					sfm = forw_multi->shm_forw_multi;
					ok_to_play = (NULL == sfm) || (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward);
				} else
					ok_to_play = (forw_multi->num_reg_seen_forward == forw_multi->num_reg_seen_backward);
				assert(ok_to_play || !forw_multi->no_longer_stuck);
				if (ok_to_play )
				{	/* We have enough information to proceed with playing this multi-region TP in
					 * forward processing (even if we might not have seen all needed regions). Now play it.
					 * Note that the TP could be BROKEN_TN or GOOD_TN. The callee handles it.
					 */
					assert(forw_multi == rctl->forw_multi);
					status = mur_forward_play_multireg_tp(forw_multi, rctl);
					this_reg_stuck = FALSE;
					/* Note that as part of playing the TP transaction, we could have reached
					 * the EOF of rctl. In this case, we need to break out of the loop.
					 */
					if ((SS_NORMAL != status) || rctl->forw_eof_seen)
						break;
					assert(NULL == rctl->forw_multi);
					assert(!dollar_tlevel);
					jctl = rctl->jctl;	/* In case the first record after the most recently processed
								 * TP transaction is in the next generation journal file */
					continue;
				}
				break;
			} else
			{
				status = mur_forward_play_cur_jrec(rctl);
				if (SS_NORMAL != status)
					break;
			}
			assert(!this_reg_stuck);
			status = mur_next_rec(&jctl);
		}
		assert((NULL == rctl->forw_multi) || this_reg_stuck);
		assert((NULL != rctl->forw_multi) || !this_reg_stuck);
		if (!this_reg_stuck)
		{	/* We are not stuck in this region (to resolve a multi-region TP).
			 * This means we are done processing all the records of this region.
			 */
			assert(NULL == rctl->forw_multi);
			if (!rctl->forw_eof_seen)
			{
				CHECK_IF_EOF_REACHED(rctl, status);
					/* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */
				if (SS_NORMAL != status)
				{
					assert(ERR_FILENOTCREATE == status);
					goto finish;
				}
				assert(!dollar_tlevel);
				DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl); /* since all of its records should have been processed */
			} else
			{	/* EOF was seen in rctl inside "mur_forward_play_multireg_tp" and it was removed
				 * from the unprocessed list of rctls. At the time rctl was removed, its "next_rctl"
				 * field could have been pointing to another <rctl> that has since then also been
				 * removed inside the same function. Therefore the "next_rctl" field is not reliable
				 * in this case but instead we should rely on the global variable "rctl_start" which
				 * points to the list of unprocessed rctls. Set "next_rctl" accordingly.
				 */
				rctl->next_rctl = rctl_start;
				if (ERR_JNLREADEOF == status)
					status = SS_NORMAL;
			}
			assert(rctl->deleted_from_unprocessed_list);
		}
		assert(SS_NORMAL == status);
		assert(!this_reg_stuck || !rctl->forw_eof_seen);
		assert((NULL == rctl->next_rctl) || (NULL != rctl_start));
		assert((NULL == rctl->next_rctl) || (0 < murgbl.regcnt_remaining));
		rctl = rctl->next_rctl;	/* Note : even though "rctl" could have been deleted from the doubly linked list above,
					 * rctl->next_rctl is not touched so we can still use it to get to the next element. */
	}
	assert(0 == murgbl.regcnt_remaining);
	jgbl.mur_pini_addr_reset_fnptr = NULL;	/* No more simulation of GT.M activity for any region */
	prc_vec = murgbl.prc_vec;	/* Use process-vector of MUPIP RECOVER (not any simulating GT.M process) now onwards */
	assert(0 == dollar_tlevel);
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		if (!rctl->this_pid_is_owner)
		{
			assert(multi_proc_in_use);
			continue;	/* in a parallel processing environment, process only regions we own */
		}
		if (multi_proc)
		{	/* Set key to print this rctl's region-name as prefix in case this forked off process prints any output */
			MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);
		}
		PRINT_VERBOSE_STAT(rctl->jctl, "mur_forward:at the end");
		assert(!mur_options.rollback || (0 != murgbl.consist_jnl_seqno));
		assert(mur_options.rollback || (0 == murgbl.consist_jnl_seqno));
		assert(!dollar_tlevel);	/* In case it applied a broken TUPD */
		assert(murgbl.ok_to_update_db || !rctl->db_updated);
		rctl->mur_plst = NULL;	/* reset now that simulation of GT.M updates is done */
		/* Ensure mur_block_count_correct is called if updates allowed */
		if (murgbl.ok_to_update_db && (SS_NORMAL != mur_block_count_correct(rctl)))
		{
			gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_BLKCNTEDITFAIL, 2, DB_LEN_STR(rctl->gd));
			murgbl.wrn_count++;
		}
	}
finish:
	if (multi_proc)
		multi_proc_key = NULL;	/* reset key until it can be set to rctl's region-name again */
	if ((SS_NORMAL == status) && mur_options.show)
		mur_output_show();
	if (NULL != first_shm_rctl)
	{	/* Transfer needed process-private information to shared memory so parent process can later inherit this. */
		first_shm_rctl->err_cnt = murgbl.err_cnt;
		first_shm_rctl->wrn_count = murgbl.wrn_count;
		first_shm_rctl->consist_jnl_seqno = murgbl.consist_jnl_seqno;
		/* If extract files were created by this process for one or more regions, then copy that information to
		 * shared memory so parent process can use this information to do a merge sort.
		 */
		shm_rctl = mur_shm_hdr->shm_rctl_start;
		for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++)
		{
			assert(multi_proc_in_use);
			if (!rctl->this_pid_is_owner)
				continue;	/* in a parallel processing environment, process only regions we own */
			/* Cancel any flush/dbsync timers by this child process for this region. This is because the
			 * child is not going to go through exit handling code (no gds_rundown etc.). And we need to
			 * clear up csa->nl->wcs_timers. (normally done by gds_rundown).
			 */
			if (NULL != rctl->csa)	/* rctl->csa can be NULL in case of "mupip journal -extract" etc. */
				CANCEL_DB_TIMERS(rctl->gd, rctl->csa, cancelled_timer, cancelled_dbsync_timer);
			reccnt = 0;
			for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0;
								recstat < TOT_EXTR_TYPES;
									recstat++, size_ptr++)
			{	/* Assert "extr_file_created" information is in sync between rctl and shm_rctl.
				 * This was done at the end of "mur_cre_file_extfmt".
				 */
				assert(shm_rctl->extr_file_created[recstat] == rctl->extr_file_created[recstat]);
				/* Assert that if *size_ptr is non-zero, then we better have created an extract file.
				 * Note that the converse is not true. It is possible we created a file for example to
				 * write an INCTN record but decided to not write anything because it was not a -detail
				 * type of extract. So *sizeptr could be 0 even though we created the extract file.
				 */
				assert(!*size_ptr || rctl->extr_file_created[recstat]);
				shm_rctl->jnlext_list_size[recstat] = *size_ptr;
				reccnt += *size_ptr;
			}
			assert(INVALID_SHMID == shm_rctl->jnlext_shmid);
			shm_size = reccnt * SIZEOF(jnlext_multi_t);
			/* If we are quitting because of an abnormal status OR a forced signal to terminate
			 * OR if the parent is dead (kill -9) dont bother creating shmid to communicate back with parent.
			 */
			if (mp_hdr->parent_pid != getppid())
			{
				SET_FORCED_MULTI_PROC_EXIT;	/* Also signal sibling children to stop processing */
				if (SS_NORMAL != status)
					status = ERR_FORCEDHALT;
			}
			if ((SS_NORMAL == status) && shm_size)
			{
				shmid = shmget(IPC_PRIVATE, shm_size, 0600 | IPC_CREAT);
				if (-1 == shmid)
				{
					save_errno = errno;
					SNPRINTF(errstr, SIZEOF(errstr),
						"shmget() : shmsize=0x%llx", shm_size);
					MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);	/* to print region name prefix */
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8)
								ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno);
				}
				shmPtr = (char *)do_shmat(shmid, 0, 0);
				if (-1 == (sm_long_t)shmPtr)
				{
					save_errno = errno;
					SNPRINTF(errstr, SIZEOF(errstr),
						"shmat() : shmid=%d shmsize=0x%llx", shmid, shm_size);
					MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);	/* to print region name prefix */
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8)
								ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno);
				}
				shm_rctl->jnlext_shmid = shmid;
				shm_rctl->jnlext_shm_size = shm_size;
				for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0;
									recstat < TOT_EXTR_TYPES;
										recstat++, size_ptr++)
				{
					shm_size = *size_ptr;
					if (shm_size)
					{
						copy_size = copy_list_to_buf(rctl->jnlext_multi_list[recstat],
												(int4)shm_size, shmPtr);
						assert(copy_size == (shm_size * SIZEOF(jnlext_multi_t)));
						shmPtr += copy_size;
					}
				}
			}
		}
	}
	mur_close_file_extfmt(IN_MUR_CLOSE_FILES_FALSE);	/* Need to flush buffered extract/losttrans/brokentrans files */
	return (int)status;
}
Ejemplo n.º 4
0
/*
 * -----------------------------------------------
 * Maintain in parallel with op_zalloc2
 * Arguments:
 *	timeout	- max. time to wait for locks before giving up
 *      laflag - passed to gvcmx* routines as "laflag" argument;
 *		 originally indicated the request was a Lock or
 *		 zAllocate request (hence the name "laflag"), but
 *		 now capable of holding more values signifying
 *		 additional information
 *
 * Return:
 *	1 - if not timeout specified
 *	if timeout specified:
 *		!= 0 - all the locks int the list obtained, or
 *		0 - blocked
 *	The return result is suited to be placed directly into
 *	the $T variable by the caller if timeout is specified.
 * -----------------------------------------------
 */
int	op_lock2(int4 timeout, unsigned char laflag)	/* timeout is in seconds */
{
	bool		blocked, timer_on;
	signed char	gotit;
	unsigned short	locks_bckout, locks_done;
	int4		msec_timeout;	/* timeout in milliseconds */
	mlk_pvtblk	*pvt_ptr1, *pvt_ptr2, **prior;
	unsigned char	action;

	gotit = -1;
	cm_action = laflag;
	timer_on = (NO_M_TIMEOUT != timeout);
	out_of_time = FALSE;
	if (!timer_on)
		msec_timeout = NO_M_TIMEOUT;
	else
	{
		msec_timeout = timeout2msec(timeout);
		if (0 == msec_timeout)
			out_of_time = TRUE;
		else
			start_timer((TID)&timer_on, msec_timeout, wake_alarm, 0, NULL);
	}
	lckclr();
	for (blocked = FALSE;  !blocked;)
	{
		/* if this is a request for a remote node */
		if (remlkreq)
		{
			if (gotit >= 0)
				gotit = gvcmx_resremlk(cm_action);
			else
				gotit = gvcmx_reqremlk(cm_action, timeout);

			if (!gotit)
			{
				/* only REQIMMED returns false */
				blocked = TRUE;
				break;
			}
		}
		for (pvt_ptr1 = mlk_pvt_root, locks_done = 0;  locks_done < lks_this_cmd;  pvt_ptr1 = pvt_ptr1->next, locks_done++)
		{	/* Go thru the list of all locks to be obtained attempting to lock
			 * each one. If any lock could not be obtained, break out of the loop */
			if (!mlk_lock(pvt_ptr1, 0, TRUE))
			{	/* If lock is obtained */
				pvt_ptr1->granted = TRUE;
				switch (laflag)
				{
				case CM_LOCKS:
					pvt_ptr1->level = 1;
					break;
				case INCREMENTAL:
					pvt_ptr1->level += pvt_ptr1->translev;
					break;
				default:
					GTMASSERT;
					break;
				}
			} else
			{
				blocked = TRUE;
				break;
			}
		}
		/* If we did not get blocked, we are all done */
		if (!blocked)
			break;
		/* We got blocked and need to keep retrying after some time interval */
		if (remlkreq)
			gvcmx_susremlk(cm_action);
		switch (cm_action)
		{
		case CM_LOCKS:
			action = LOCKED;
			break;
		case INCREMENTAL:
			action = INCREMENTAL;
			break;
		default:
			GTMASSERT;
			break;
		}
		for (pvt_ptr2 = mlk_pvt_root, locks_bckout = 0;  locks_bckout < locks_done;
			pvt_ptr2 = pvt_ptr2->next, locks_bckout++)
		{
			assert(pvt_ptr2->granted && (pvt_ptr2 != pvt_ptr1));
			mlk_bckout(pvt_ptr2, action);
		}
		if (dollar_tlevel && (CDB_STAGNATE <= t_tries))
		{
			mlk_unpend(pvt_ptr1);		/* Eliminated the dangling request block */
			if (timer_on)
				cancel_timer((TID)&timer_on);
			t_retry(cdb_sc_needlock);	/* release crit to prevent a deadlock */
		}
		for (;;)
		{
			if (out_of_time || outofband)
			{	/* if time expired  ||  control-c encountered */
				if (outofband || !lk_check_own(pvt_ptr1))
				{	/* If CTL-C, check lock owner */
					if (pvt_ptr1->nodptr)		/* Get off pending list to be sent a wake */
						mlk_unpend(pvt_ptr1);
					/* Cancel all remote locks obtained so far */
					if (remlkreq)
					{
						gvcmx_canremlk();
						gvcmz_clrlkreq();
						remlkreq = FALSE;
					}
					if (outofband)
					{
						cancel_timer((TID)&timer_on);
						outofband_action(FALSE);
					}
					break;
				}
			}
			if (!mlk_lock(pvt_ptr1, 0, FALSE))
			{	/* If we got the lock, break out of timer loop */
				blocked = FALSE;
				if (pvt_ptr1 != mlk_pvt_root)
				{
					rel_quant();		/* attempt to get a full timeslice for maximum chance to get all */
					mlk_unlock(pvt_ptr1);
				}
				break;
			}
			if (pvt_ptr1->nodptr)
				lk_check_own(pvt_ptr1);		/* clear an abandoned owner */
			hiber_start_wait_any(LOCK_SELF_WAKE);
		}
		if (blocked  &&  out_of_time)
			break;
	}
	if (remlkreq)
	{
		gvcmz_clrlkreq();
		remlkreq = FALSE;
	}
	if (timer_on)
	{
		cancel_timer((TID)&timer_on);
		if (blocked)
		{
			for (prior = &mlk_pvt_root;  *prior;)
			{
				if (!(*prior)->granted)
				{	/* if entry was never granted, delete list entry */
					mlk_pvtblk_delete(prior);
				} else
					prior = &((*prior)->next);
			}
			mlk_stats.n_user_locks_fail++;
			return (FALSE);
		}
	}
	mlk_stats.n_user_locks_success++;
	return (TRUE);
}
Ejemplo n.º 5
0
/*
 * ------------------------------------------
 * Hang the process for a specified time.
 *
 *	Goes to sleep for a positive value.
 *	Any caught signal will terminate the sleep
 *	following the execution of that signal's catching routine.
 *
 * 	The actual hang duration should be NO LESS than the specified
 * 	duration for specified durations greater than .001 seconds.
 * 	Certain applications depend on this assumption.
 *
 * Arguments:
 *	num - time to sleep
 *
 * Return:
 *	none
 * ------------------------------------------
 */
void op_hang(mval* num)
{
	int		ms;
	double		tmp;
	mv_stent	*mv_zintcmd;
	ABS_TIME	cur_time, end_time;
#	ifdef VMS
	uint4 		time[2];
	int4		efn_mask, status;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	ms = 0;
	MV_FORCE_NUM(num);
	if (num->mvtype & MV_INT)
	{
		if (0 < num->m[1])
		{
			assert(MV_BIAS >= 1000);	/* if formats change overflow may need attention */
			ms = num->m[1] * (1000 / MV_BIAS);
		}
	} else if (0 == num->sgn) 		/* if sign is not 0 it means num is negative */
	{
		tmp = mval2double(num) * (double)1000;
		ms = ((double)MAXPOSINT4 >= tmp) ? (int)tmp : (int)MAXPOSINT4;
	}
	if (ms)
	{
		if (TREF(tpnotacidtime) * 1000 < ms)
			TPNOTACID_CHECK(HANGSTR);
#		if defined(DEBUG) && defined(UNIX)
		if (WBTEST_ENABLED(WBTEST_DEFERRED_TIMERS) && (3 > gtm_white_box_test_case_count) && (123000 == ms))
		{
			DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
			DBGFPF((stderr, "OP_HANG: will sleep for 20 seconds\n"));
			LONG_SLEEP(20);
			DBGFPF((stderr, "OP_HANG: done sleeping\n"));
			ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
			return;
		}
		if (WBTEST_ENABLED(WBTEST_BREAKMPC)&& (0 == gtm_white_box_test_case_count) && (999 == ms))
		{
			frame_pointer->old_frame_pointer->mpc = (unsigned char *)GTM64_ONLY(0xdeadbeef12345678)
				NON_GTM64_ONLY(0xdead1234);
			return;
		}
		if (WBTEST_ENABLED(WBTEST_UTIL_OUT_BUFFER_PROTECTION) && (0 == gtm_white_box_test_case_count) && (999 == ms))
		{	/* Upon seeing a .999s hang this white-box test launches a timer that pops with a period of
		 	 * UTIL_OUT_SYSLOG_INTERVAL and prints a long message via util_out_ptr.
			 */
			start_timer((TID)&util_out_syslog_dump, UTIL_OUT_SYSLOG_INTERVAL, util_out_syslog_dump, 0, NULL);
			return;
		}
#		endif
		sys_get_curr_time(&cur_time);
		mv_zintcmd = find_mvstent_cmd(ZINTCMD_HANG, restart_pc, restart_ctxt, FALSE);
		if (!mv_zintcmd)
			add_int_to_abs_time(&cur_time, ms, &end_time);
		else
		{
			end_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain;
			cur_time = sub_abs_time(&end_time, &cur_time);	/* get remaing time to sleep */
			if (0 <= cur_time.at_sec)
				ms = (int4)(cur_time.at_sec * 1000 + cur_time.at_usec / 1000);
			else
				ms = 0;		/* all done */
			/* restore/pop previous zintcmd_active[ZINTCMD_HANG] hints */
			TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior;
			TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last
				= mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior;
			TAREF1(zintcmd_active, ZINTCMD_HANG).count--;
			assert(0 <= TAREF1(zintcmd_active, ZINTCMD_HANG).count);
			if (mv_chain == mv_zintcmd)
				POP_MV_STENT();	/* just pop if top of stack */
			else
			{	/* flag as not active */
				mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP;
				mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL;
			}
			if (0 == ms)
				return;		/* done HANGing */
		}
#		ifdef UNIX
		if (ms < 10)
			SLEEP_USEC(ms * 1000, TRUE);	/* Finish the sleep if it is less than 10ms. */
		else
			hiber_start(ms);
#		elif defined(VMS)
		time[0] = -time_low_ms(ms);
		time[1] = -time_high_ms(ms) - 1;
		efn_mask = (1 << efn_outofband | 1 << efn_timer);
		if (SS$_NORMAL != (status = sys$setimr(efn_timer, &time, NULL, &time, 0)))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$setimr"), CALLFROM, status);
		if (SS$_NORMAL != (status = sys$wflor(efn_outofband, efn_mask)))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$wflor"), CALLFROM, status);
		if (outofband)
		{
			if (SS$_WASCLR == (status = sys$readef(efn_timer, &efn_mask)))
			{
				if (SS$_NORMAL != (status = sys$cantim(&time, 0)))
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$cantim"),
						CALLFROM, status);
			} else
				assertpro(SS$_WASSET == status);
		}
#		endif
	} else
		rel_quant();
	if (outofband)
	{
		PUSH_MV_STENT(MVST_ZINTCMD);
		mv_chain->mv_st_cont.mvs_zintcmd.end_or_remain = end_time;
		mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_check = restart_ctxt;
		mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_check = restart_pc;
		/* save current information from zintcmd_active */
		mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last;
		mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last;
		TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = restart_pc;
		TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = restart_ctxt;
		TAREF1(zintcmd_active, ZINTCMD_HANG).count++;
		mv_chain->mv_st_cont.mvs_zintcmd.command = ZINTCMD_HANG;
		outofband_action(FALSE);
	}
	return;
}
Ejemplo n.º 6
0
void	iosocket_tls(mval *optionmval, int4 timeoutarg, mval *tlsid, mval *password, mval *extraarg)
{	/* note extraarg is not currently used */
	int4			length, flags, timeout, msec_timeout, status, status2, len, errlen, devlen, tls_errno, save_errno;
	io_desc			*iod;
	d_socket_struct 	*dsocketptr;
	socket_struct		*socketptr;
	char			optionstr[MAX_TLSOPTION], idstr[MAX_TLSID_LEN], passwordstr[GTM_PASSPHRASE_MAX_ASCII + 1];
	const char		*errp;
	tls_option		option;
	gtm_tls_socket_t	*tlssocket;
	ABS_TIME		cur_time, end_time;
#	ifdef USE_POLL
	struct pollfd		fds;
#	else
	fd_set			fds, *readfds, *writefds;
	struct timeval		timeout_spec, *timeout_ptr;
#	endif

	iod = io_curr_device.out;
	assert(gtmsocket == iod->type);
	dsocketptr = (d_socket_struct *)iod->dev_sp;
	if (0 >= dsocketptr->n_socket)
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOSOCKETINDEV);
		return;
	}
	if (dsocketptr->n_socket <= dsocketptr->current_socket)
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_CURRSOCKOFR, 2, dsocketptr->current_socket, dsocketptr->n_socket);
		return;
	}
	if (dsocketptr->mupintr)
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_ZINTRECURSEIO);
	socketptr = dsocketptr->socket[dsocketptr->current_socket];
	ENSURE_DATA_SOCKET(socketptr);
	if (socket_tcpip != socketptr->protocol)
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, RTS_ERROR_MVAL(optionmval),
			LEN_AND_LIT("but socket is not TCP"));
		return;
	}
	if (socket_connected != socketptr->state)
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_LIT("/TLS"),
			LEN_AND_LIT("but socket not connected"));
		return;
	}
	if (NULL != tlsid)
	{
		length = tlsid->str.len;
		if (MAX_TLSID_LEN < (length + 1))	/* for null */
		{
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_LIT("TLSID"), LEN_AND_LIT("too long"));
			return;
		}
		STRNCPY_STR(idstr, tlsid->str.addr, length);
		idstr[length] = '\0';
	} else
		idstr[0] = '\0';
	if (NULL != password)
	{
		length = password->str.len;
		if (GTM_PASSPHRASE_MAX_ASCII < length)
		{
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_LIT("passphrase"),
				LEN_AND_LIT("too long"));
			return;
		}
		STRNCPY_STR(passwordstr, password->str.addr, length);
		passwordstr[length] = '\0';
	} else
		passwordstr[0] = '\0';
	length = MIN(MAX_TLSOPTION, optionmval->str.len);
	lower_to_upper((uchar_ptr_t)optionstr, (uchar_ptr_t)optionmval->str.addr, length);
	if (0 == memcmp(optionstr, "CLIENT", length))
		option = tlsopt_client;
	else if (0 == memcmp(optionstr, "SERVER", length))
		option = tlsopt_server;
	else if (0 == memcmp(optionstr, "RENEGOTIATE", length))
		option = tlsopt_renegotiate;
	else
		option = tlsopt_invalid;
	memcpy(iod->dollar.device, "0", SIZEOF("0"));
	if (NO_M_TIMEOUT != timeoutarg)
	{
		msec_timeout = timeout2msec(timeoutarg);
		sys_get_curr_time(&cur_time);
		add_int_to_abs_time(&cur_time, msec_timeout, &end_time);
	} else
		msec_timeout = -1;
	if ((tlsopt_client == option) || (tlsopt_server == option))
	{	/* most of the setup is common */
		if (socketptr->tlsenabled)
		{
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSPARAM, 4, LEN_AND_STR(optionstr),
				LEN_AND_LIT("but TLS already enabled"));
			return;
		}
		assertpro((0 >= socketptr->buffered_length) && (0 >= socketptr->obuffer_length));
		if (NULL == tls_ctx)
		{	/* first use of TLS */
			if (-1 == gtm_tls_loadlibrary())
			{
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSDLLNOOPEN, 0, ERR_TEXT, 2, LEN_AND_STR(dl_err));
				return;
			}
			if (NULL == (tls_ctx = (gtm_tls_init(GTM_TLS_API_VERSION, GTMTLS_OP_INTERACTIVE_MODE))))
			{
				errp = gtm_tls_get_error();
				len = SIZEOF(ONE_COMMA) - 1;
				memcpy(iod->dollar.device, ONE_COMMA, len);
				errlen = STRLEN(errp);
				devlen = MIN((SIZEOF(iod->dollar.device) - len - 1), errlen);
				memcpy(&iod->dollar.device[len], errp, devlen + 1);
				if (devlen < errlen)
					iod->dollar.device[SIZEOF(iod->dollar.device) - 1] = '\0';
				if (socketptr->ioerror)
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSINIT, 0, ERR_TEXT, 2, errlen, errp);
				if (NO_M_TIMEOUT != timeoutarg)
					dollar_truth = FALSE;
				return;
			}
		}
		socketptr->tlsenabled = TRUE;
		flags = GTMTLS_OP_SOCKET_DEV | ((tlsopt_client == option) ? GTMTLS_OP_CLIENT_MODE : 0);
		socketptr->tlssocket = gtm_tls_socket(tls_ctx, NULL, socketptr->sd, idstr, flags);
		if (NULL == socketptr->tlssocket)
		{
			socketptr->tlsenabled = FALSE;
			errp = gtm_tls_get_error();
			len = SIZEOF(ONE_COMMA) - 1;
			memcpy(iod->dollar.device, ONE_COMMA, len);
			errlen = STRLEN(errp);
			devlen = MIN((SIZEOF(iod->dollar.device) - len - 1), errlen);
			memcpy(&iod->dollar.device[len], errp, devlen + 1);
			if (devlen < errlen)
				iod->dollar.device[SIZEOF(iod->dollar.device) - 1] = '\0';
			if (socketptr->ioerror)
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSCONVSOCK, 0, ERR_TEXT, 2, errlen, errp);
			if (NO_M_TIMEOUT != timeoutarg)
				dollar_truth = FALSE;
			return;
		}
		status = 0;
#		ifndef	USE_POLL
		if (NO_M_TIMEOUT == timeoutarg)
			timeout_ptr = NULL;
		else
		{
			timeout_spec.tv_sec = msec_timeout / 1000;
			timeout_spec.tv_usec = (msec_timeout % 1000) * 1000;	/* remainder in millsecs to microsecs */
			timeout_ptr = &timeout_spec;
		}
#		endif
		do
		{
			status2 = 0;
			if (0 != status)
			{
#				ifdef USE_POLL
				fds.fd = socketptr->sd;
				fds.events = (GTMTLS_WANT_READ == status) ? POLLIN : POLLOUT;
#				else
				readfds = writefds = NULL;
				assertpro(FD_SETSIZE > socketptr->sd);
				FD_ZERO(&fds);
				FD_SET(socketptr->sd, &fds);
				writefds = (GTMTLS_WANT_WRITE == status) ? &fds : NULL;
				readfds = (GTMTLS_WANT_READ == status) ? &fds : NULL;
#				endif
				POLL_ONLY(if (-1 == (status2 = poll(&fds, 1, msec_timeout))))
				SELECT_ONLY(if (-1 == (status2 = select(socketptr->sd + 1, readfds, writefds, NULL, timeout_ptr))))
				{
					save_errno = errno;
					if (EAGAIN == save_errno)
					{
						rel_quant();	/* allow resources to become available */
						status2 = 0;	/* treat as timeout */
					} else if (EINTR == save_errno)
						status2 = 0;
				}
			} else
				status2 = 1;	/* do accept/connect first time */
			if (0 < status2)
			{
				if (tlsopt_server == option)
					status = gtm_tls_accept((gtm_tls_socket_t *)socketptr->tlssocket);
				else
					status = gtm_tls_connect((gtm_tls_socket_t *)socketptr->tlssocket);
			}
			if ((0 > status2) || ((status != 0) && ((GTMTLS_WANT_READ != status) && (GTMTLS_WANT_WRITE != status))))
			{
				if (0 != status)
				{
					tls_errno = gtm_tls_errno();
					if (0 > tls_errno)
						errp = gtm_tls_get_error();
					else
						errp = STRERROR(tls_errno);
				} else
					errp = STRERROR(save_errno);
				socketptr->tlsenabled = FALSE;
				len = SIZEOF(ONE_COMMA) - 1;
				memcpy(iod->dollar.device, ONE_COMMA, len);
				errlen = STRLEN(errp);
				devlen = MIN((SIZEOF(iod->dollar.device) - len - 1), errlen);
				memcpy(&iod->dollar.device[len], errp, devlen + 1);
				if (devlen < errlen)
					iod->dollar.device[SIZEOF(iod->dollar.device) - 1] = '\0';
				if (socketptr->ioerror)
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_TLSHANDSHAKE, 0,
						ERR_TEXT, 2, errlen, errp);
				return;
			}
			if ((0 != status) && (0 <= status2))	/* not accepted/connected and not error */
			{	/* check for timeout if not error or want read or write */
				if ((0 != timeoutarg) && (NO_M_TIMEOUT != timeoutarg))
				{
					sys_get_curr_time(&cur_time);
					cur_time = sub_abs_time(&end_time, &cur_time);
					if (0 >= cur_time.at_sec)
					{	/* no more time */
						gtm_tls_session_close((gtm_tls_socket_t **)&socketptr->tlssocket);
						socketptr->tlsenabled = FALSE;
						dollar_truth = FALSE;
						return;
					} else
					{	/* adjust msec_timeout for poll/select */
#					ifdef	USE_POLL
						msec_timeout = (cur_time.at_sec * 1000) + (cur_time.at_usec / 1000);
#					else
						timeout_spec.tv_sec = cur_time.at_sec;
						timeout_spec.tv_usec = (gtm_tv_usec_t)cur_time.at_usec;
#					endif
					}
				} else if (0 == timeoutarg)
				{	/* only one chance */
					gtm_tls_session_close((gtm_tls_socket_t **)&socketptr->tlssocket);
					socketptr->tlsenabled = FALSE;
					dollar_truth = FALSE;
					return;
				}
				continue;
			}
		} while ((GTMTLS_WANT_READ == status) || (GTMTLS_WANT_WRITE == status));
		/* turn on output buffering */
		if (0 == socketptr->obuffer_size)
			socketptr->obuffer_size = socketptr->buffer_size;
		socketptr->obuffer_length = socketptr->obuffer_offset = 0;
		socketptr->obuffer_wait_time = DEFAULT_WRITE_WAIT;
		socketptr->obuffer_flush_time = DEFAULT_WRITE_WAIT * 2;	/* until add device parameter */
		socketptr->obuffer = malloc(socketptr->obuffer_size);
	} else if (tlsopt_renegotiate == option)
Ejemplo n.º 7
0
/* Note we don't increment fast_lock_count as part of getting the latch and decrement it when releasing it because ROLLBACK
 * can hold onto this latch for a long while and can do updates in this duration and we should NOT have a non-zero fast_lock_count
 * as many places like t_begin/dsk_read have asserts to this effect. It is okay to NOT increment fast_lock_count as ROLLBACK
 * anyways have logic to disable interrupts the moment it starts doing database updates.
 */
boolean_t	grab_gtmsource_srv_latch(sm_global_latch_ptr_t latch, uint4 max_timeout_in_secs, uint4 onln_rlbk_action)
{
	int			spins, maxspins, retries, max_retries;
	unix_db_info		*udi;
	sgmnt_addrs		*repl_csa;
	boolean_t		cycle_mismatch;

	assert(!have_crit(CRIT_HAVE_ANY_REG));
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	repl_csa = &udi->s_addrs;
	maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
	max_retries = max_timeout_in_secs * 4 * 1000; /* outer-loop : X minutes, 1 loop in 4 is sleep of 1 ms */
	for (retries = max_retries - 1; 0 < retries; retries--)
	{
		for (spins = maxspins; 0 < spins; spins--)
		{
			assert(latch->u.parts.latch_pid != process_id); /* We better not hold it if trying to get it */
			if (GET_SWAPLOCK(latch))
			{
				DEBUG_ONLY(locknl = repl_csa->nl); /* Use the journal pool to maintain lock history */
				LOCK_HIST("OBTN", latch, process_id, retries);
				DEBUG_ONLY(locknl = NULL);
				if (jnlpool.repl_inst_filehdr->file_corrupt && !jgbl.onlnrlbk)
				{
					/* Journal pool indicates an abnormally terminated online rollback. Cannot continue until
					 * the rollback command is re-run to bring the journal pool/file and instance file to a
					 * consistent state.
					 */
					/* No need to release the latch before rts_error (mupip_exit_handler will do it for us) */
					rts_error(VARLSTCNT(8) ERR_REPLREQROLLBACK, 2, LEN_AND_STR(udi->fn),
						ERR_TEXT, 2, LEN_AND_LIT("file_corrupt field in instance file header is set to"
										" TRUE"));
				}
				cycle_mismatch = (repl_csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle);
				assert((ASSERT_NO_ONLINE_ROLLBACK != onln_rlbk_action) || !cycle_mismatch);
				if ((HANDLE_CONCUR_ONLINE_ROLLBACK == onln_rlbk_action) && cycle_mismatch)
				{
					assert(is_src_server);
					SYNC_ONLN_RLBK_CYCLES;
					gtmsource_onln_rlbk_clnup(); /* side-effect : sets gtmsource_state */
					rel_gtmsource_srv_latch(latch);
				}
				return TRUE;
			}
		}
		if (retries & 0x3)
		{	/* On all but every 4th pass, do a simple rel_quant */
			rel_quant();
		} else
		{
			/* On every 4th pass, we bide for awhile */
			wcs_sleep(LOCK_SLEEP);
			if (RETRY_CASLATCH_CUTOFF == (retries % LOCK_TRIES))
				performCASLatchCheck(latch, TRUE);
		}
	}
	DUMP_LOCKHIST();
	assert(FALSE);
	assert(jnlpool.gtmsource_local && jnlpool.gtmsource_local->gtmsource_pid);
	rts_error(VARLSTCNT(5) ERR_SRVLCKWT2LNG, 2, max_timeout_in_secs, jnlpool.gtmsource_local->gtmsource_pid);
	return FALSE; /* to keep the compiler happy */
}