Beispiel #1
0
void jnl_fsync(gd_region *reg, uint4 fsync_addr)
{
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jb;
	uint4			lcnt, saved_dsk_addr, saved_status;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	int4			lck_state;
	int			fsync_ret, save_errno;

	error_def(ERR_JNLFSYNCERR);
	error_def(ERR_FSYNCTIMOUT);
	error_def(ERR_TEXT);
	error_def(ERR_JNLFRCDTERM);
	error_def(ERR_JNLFSYNCLSTCK);

	csa = &FILE_INFO(reg)->s_addrs;
	jpc = csa->jnl;
	jb  = jpc->jnl_buff;

	if ((NOJNL != jpc->channel) && !JNL_FILE_SWITCHED(jpc))
	{
		csd = csa->hdr;
		for (lcnt = 1; fsync_addr > jb->fsync_dskaddr && !JNL_FILE_SWITCHED(jpc); lcnt++)
		{
			if (MAX_FSYNC_WAIT_CNT / 2 == lcnt)	/* half way into max.patience*/
			{
				saved_status = jpc->status;
				jpc->status = SS_NORMAL;
				jnl_send_oper(jpc, ERR_JNLFSYNCLSTCK);
				jpc->status = saved_status ;
			}
			if (MAX_FSYNC_WAIT_CNT == lcnt)	/* tried a long */
			{
				saved_status = jpc->status;
				jpc->status = SS_NORMAL;
				jnl_send_oper(jpc, ERR_JNLFSYNCLSTCK);
				jpc->status = saved_status ;
				send_msg(VARLSTCNT(4) ERR_FSYNCTIMOUT, 2, JNL_LEN_STR(csd));
				GTMASSERT;
			}
			BG_TRACE_PRO_ANY(csa, n_jnl_fsync_tries);
			if (GET_SWAPLOCK(&jb->fsync_in_prog_latch))
				break;
			wcs_sleep(lcnt);
			performCASLatchCheck(&jb->fsync_in_prog_latch, lcnt);
		}
		if (fsync_addr > jb->fsync_dskaddr && !JNL_FILE_SWITCHED(jpc))
		{
			assert(process_id == jb->fsync_in_prog_latch.u.parts.latch_pid);  /* assert we have the lock */
			saved_dsk_addr = jb->dskaddr;
			if (jpc->sync_io)
			{
				/* We need to maintain the fsync control fields irrespective of the type of IO, because we might
				 * switch between these at any time.
				 */
				jb->fsync_dskaddr = saved_dsk_addr;
			} else
			{
				GTM_FSYNC(jpc->channel, fsync_ret);
				if (-1 == fsync_ret)
				{
					save_errno = errno;
					assert(FALSE);
					send_msg(VARLSTCNT(9) ERR_JNLFSYNCERR, 2, JNL_LEN_STR(csd),
						ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), save_errno);
					rts_error(VARLSTCNT(9) ERR_JNLFSYNCERR, 2, JNL_LEN_STR(csd),
						ERR_TEXT, 2, RTS_ERROR_TEXT("Error with fsync"), save_errno);
				} else
				{
					jb->fsync_dskaddr = saved_dsk_addr;
					BG_TRACE_PRO_ANY(csa, n_jnl_fsyncs);
				}
			}
		}
		if (process_id == jb->fsync_in_prog_latch.u.parts.latch_pid)
			RELEASE_SWAPLOCK(&jb->fsync_in_prog_latch);
	}
	return;
}
/* Note we don't increment fast_lock_count as part of getting the latch and decrement it when releasing it because ROLLBACK
 * can hold onto this latch for a long while and can do updates in this duration and we should NOT have a non-zero fast_lock_count
 * as many places like t_begin/dsk_read have asserts to this effect. It is okay to NOT increment fast_lock_count as ROLLBACK
 * anyways have logic to disable interrupts the moment it starts doing database updates.
 */
boolean_t	grab_gtmsource_srv_latch(sm_global_latch_ptr_t latch, uint4 max_timeout_in_secs, uint4 onln_rlbk_action)
{
	int			spins, maxspins, retries, max_retries;
	unix_db_info		*udi;
	sgmnt_addrs		*repl_csa;
	boolean_t		cycle_mismatch;

	assert(!have_crit(CRIT_HAVE_ANY_REG));
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	repl_csa = &udi->s_addrs;
	maxspins = num_additional_processors ? MAX_LOCK_SPINS(LOCK_SPINS, num_additional_processors) : 1;
	max_retries = max_timeout_in_secs * 4 * 1000; /* outer-loop : X minutes, 1 loop in 4 is sleep of 1 ms */
	for (retries = max_retries - 1; 0 < retries; retries--)
	{
		for (spins = maxspins; 0 < spins; spins--)
		{
			assert(latch->u.parts.latch_pid != process_id); /* We better not hold it if trying to get it */
			if (GET_SWAPLOCK(latch))
			{
				DEBUG_ONLY(locknl = repl_csa->nl); /* Use the journal pool to maintain lock history */
				LOCK_HIST("OBTN", latch, process_id, retries);
				DEBUG_ONLY(locknl = NULL);
				if (jnlpool.repl_inst_filehdr->file_corrupt && !jgbl.onlnrlbk)
				{
					/* Journal pool indicates an abnormally terminated online rollback. Cannot continue until
					 * the rollback command is re-run to bring the journal pool/file and instance file to a
					 * consistent state.
					 */
					/* No need to release the latch before rts_error (mupip_exit_handler will do it for us) */
					rts_error(VARLSTCNT(8) ERR_REPLREQROLLBACK, 2, LEN_AND_STR(udi->fn),
						ERR_TEXT, 2, LEN_AND_LIT("file_corrupt field in instance file header is set to"
										" TRUE"));
				}
				cycle_mismatch = (repl_csa->onln_rlbk_cycle != jnlpool.jnlpool_ctl->onln_rlbk_cycle);
				assert((ASSERT_NO_ONLINE_ROLLBACK != onln_rlbk_action) || !cycle_mismatch);
				if ((HANDLE_CONCUR_ONLINE_ROLLBACK == onln_rlbk_action) && cycle_mismatch)
				{
					assert(is_src_server);
					SYNC_ONLN_RLBK_CYCLES;
					gtmsource_onln_rlbk_clnup(); /* side-effect : sets gtmsource_state */
					rel_gtmsource_srv_latch(latch);
				}
				return TRUE;
			}
		}
		if (retries & 0x3)
		{	/* On all but every 4th pass, do a simple rel_quant */
			rel_quant();
		} else
		{
			/* On every 4th pass, we bide for awhile */
			wcs_sleep(LOCK_SLEEP);
			if (RETRY_CASLATCH_CUTOFF == (retries % LOCK_TRIES))
				performCASLatchCheck(latch, TRUE);
		}
	}
	DUMP_LOCKHIST();
	assert(FALSE);
	assert(jnlpool.gtmsource_local && jnlpool.gtmsource_local->gtmsource_pid);
	rts_error(VARLSTCNT(5) ERR_SRVLCKWT2LNG, 2, max_timeout_in_secs, jnlpool.gtmsource_local->gtmsource_pid);
	return FALSE; /* to keep the compiler happy */
}