예제 #1
0
int continue_proc(pid_t pid)
{
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	DEBUG_ONLY(if (!TREF(gtm_usesecshr)))	/* Cause debug builds to talk to gtmsecshr more often */
	{
		if (WBTEST_ENABLED(WBTEST_HOLD_GTMSOURCE_SRV_LATCH))
		{
			/* Simulate the kill below, but ignore its return status so that we end up invoking gtmsecshr */
			kill(pid, SIGCONT);
			/* Wait until the target quits so that kill() call by gtmsecshr fails with ESRCH */
			while (is_proc_alive(pid, 0))
				LONG_SLEEP(1);
		}
		else if (0 == kill(pid, SIGCONT))
			return 0;
		else if (ESRCH == errno)
		{
			send_msg_csa(CSA_ARG(NULL) VARLSTCNT(5) ERR_NOSUCHPROC, 3, pid, RTS_ERROR_LITERAL("continue"));
			return ESRCH;
		} else
			assert(EINVAL != errno);
	}
	return send_mesg2gtmsecshr(CONTINUE_PROCESS, pid, NULL, 0);
}
예제 #2
0
파일: dse.c 프로젝트: CeperaCPP/fis-gtm
int main(int argc, char *argv[])
{
	DCL_THREADGBL_ACCESS;

	GTM_THREADGBL_INIT;
	set_blocksig();
	gtm_imagetype_init(DSE_IMAGE);
	gtm_wcswidth_fnptr = gtm_wcswidth;
	gtm_env_init();	/* read in all environment variables */
	licensed = TRUE;
	TREF(transform) = TRUE;
	op_open_ptr = op_open;
	patch_curr_blk = get_dir_root();
	err_init(util_base_ch);
	GTM_ICU_INIT_IF_NEEDED;	/* Note: should be invoked after err_init (since it may error out) and before CLI parsing */
	sig_init(generic_signal_handler, dse_ctrlc_handler, suspsigs_handler);
	atexit(util_exit_handler);
	SET_LATCH_GLOBAL(&defer_latch, LOCK_AVAILABLE);
	get_page_size();
	stp_init(STP_INITSIZE);
	rts_stringpool = stringpool;
	getjobname();
	INVOKE_INIT_SECSHR_ADDRS;
	getzdir();
	prealloc_gt_timers();
	initialize_pattern_table();
	gvinit();
	region_init(FALSE);
	INIT_GBL_ROOT(); /* Needed for GVT initialization */
	getjobnum();
	util_out_print("!/File  !_!AD", TRUE, DB_LEN_STR(gv_cur_region));
	util_out_print("Region!_!AD!/", TRUE, REG_LEN_STR(gv_cur_region));
	cli_lex_setup(argc, argv);
	CREATE_DUMMY_GBLDIR(gd_header, original_header, gv_cur_region, gd_map, gd_map_top);
	gtm_chk_dist(argv[0]);
#	ifdef DEBUG
	if ((gtm_white_box_test_case_enabled && (WBTEST_SEMTOOLONG_STACK_TRACE == gtm_white_box_test_case_number) ))
	{
		sgmnt_addrs     * csa;
		node_local_ptr_t cnl;
		csa = &FILE_INFO(gv_cur_region)->s_addrs;
		cnl = csa->nl;
		cnl->wbox_test_seq_num  = 1; /*Signal the first step and wait here*/
		while (2 != cnl->wbox_test_seq_num) /*Wait for another process to get hold of the semaphore and signal next step*/
			LONG_SLEEP(10);
	}
#	endif
	if (argc < 2)
                display_prompt();
	io_init(TRUE);
	while (1)
	{
		if (!dse_process(argc))
			break;
		display_prompt();
	}
	dse_exit();
	REVERT;
}
예제 #3
0
void deferred_signal_handler(void)
{
	void (*signal_routine)();
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	/* To avoid nested calls to this routine, we set forced_exit to FALSE at the very beginning */
	forced_exit = FALSE;

	if (exit_handler_active)
	{
		assert(FALSE);	/* at this point in time (June 2003) there is no way we know of to get here, hence the assert */
		return;	/* since anyway we are exiting currently, resume exit handling instead of reissuing another one */
	}
	/* For signals that get a delayed response so we can get out of crit, we also delay the messages.
	 * This routine will output those delayed messages from the appropriate structures to both the
	 * user and the system console.
	 */
	/* note can't use switch here because ERR_xxx are not defined as constants */
	if (ERR_KILLBYSIG == forced_exit_err)
	{
		send_msg(VARLSTCNT(6) ERR_KILLBYSIG, 4, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal);
		gtm_putmsg(VARLSTCNT(6) ERR_KILLBYSIG, 4, GTMIMAGENAMETXT(image_type), process_id, signal_info.signal);
	} else if (ERR_KILLBYSIGUINFO == forced_exit_err)
	{
		send_msg(VARLSTCNT(8) ERR_KILLBYSIGUINFO, 6, GTMIMAGENAMETXT(image_type), process_id,
						signal_info.signal, signal_info.send_pid, signal_info.send_uid);
		gtm_putmsg(VARLSTCNT(8) ERR_KILLBYSIGUINFO, 6, GTMIMAGENAMETXT(image_type), process_id,
						signal_info.signal, signal_info.send_pid, signal_info.send_uid);
	} else if (ERR_KILLBYSIGSINFO1 == forced_exit_err)
	{
		send_msg(VARLSTCNT(8) ERR_KILLBYSIGSINFO1, 6, GTMIMAGENAMETXT(image_type),
			 process_id, signal_info.signal, signal_info.int_iadr, signal_info.bad_vadr);
		gtm_putmsg(VARLSTCNT(8) ERR_KILLBYSIGSINFO1, 6, GTMIMAGENAMETXT(image_type),
			   process_id, signal_info.signal, signal_info.int_iadr, signal_info.bad_vadr);
	} else if (ERR_KILLBYSIGSINFO2 == forced_exit_err)
	{
		send_msg(VARLSTCNT(7) ERR_KILLBYSIGSINFO2, 5, GTMIMAGENAMETXT(image_type),
			 process_id, signal_info.signal, signal_info.int_iadr);
		gtm_putmsg(VARLSTCNT(7) ERR_KILLBYSIGSINFO2, 5, GTMIMAGENAMETXT(image_type),
			   process_id, signal_info.signal, signal_info.int_iadr);
	} else if (ERR_KILLBYSIGSINFO3 == forced_exit_err)
	{
		send_msg(VARLSTCNT(7) ERR_KILLBYSIGSINFO3, 5, GTMIMAGENAMETXT(image_type),
			 process_id, signal_info.signal, signal_info.bad_vadr);
		gtm_putmsg(VARLSTCNT(7) ERR_KILLBYSIGSINFO3, 5, GTMIMAGENAMETXT(image_type),
			   process_id, signal_info.signal, signal_info.bad_vadr);
	} else if (ERR_FORCEDHALT != forced_exit_err || !gtm_quiet_halt)
	{	/* No HALT messages if quiet halt is requested */
		send_msg(VARLSTCNT(1) forced_exit_err);
		gtm_putmsg(VARLSTCNT(1) forced_exit_err);
	}
	assert(OK_TO_INTERRUPT);
	/* Signal intent to exit BEFORE driving condition handlers. This avoids checks that will otherwise fail (for example
	 * if mdb_condition_handler/preemptive_ch gets called below, that could invoke the RESET_GV_TARGET macro which in turn
	 * would assert that gv_target->gd_csa is equal to cs_addrs. This could not be true in case we were in mainline code
	 * that was interrupted by the flush timer for a different region which in turn was interrupted by an external signal
	 * that would drive us to exit. Setting the "process_exiting" variable causes those csa checks to pass.
	 */
	SET_PROCESS_EXITING_TRUE;
#	ifdef DEBUG
	if (gtm_white_box_test_case_enabled && (WBTEST_DEFERRED_TIMERS == gtm_white_box_test_case_number)
		&& (2 == gtm_white_box_test_case_count))
	{
		DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
		DBGFPF((stderr, "DEFERRED_SIGNAL_HANDLER: will sleep for 20 seconds\n"));
		LONG_SLEEP(20);
		DBGFPF((stderr, "DEFERRED_SIGNAL_HANDLER: done sleeping\n"));
		ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
	}
#	endif
	/* If any special routines are registered to be driven on a signal, drive them now */
	if ((0 != exi_condition) && (NULL != call_on_signal))
	{
		signal_routine = call_on_signal;
		call_on_signal = NULL;		/* So we don't recursively call ourselves */
		(*signal_routine)();
	}
	/* Note, we do not drive create_fatal_error zshow_dmp() in this routine since any deferrable signals are
	 * by definition not fatal.
	 */
	exit(-exi_condition);
}
예제 #4
0
파일: gdsfilext.c 프로젝트: shabiel/YottaDB
uint4	 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog)
{
	sm_uc_ptr_t		old_base[2], mmap_retaddr;
	boolean_t		was_crit, is_mm;
	int			result, save_errno, status;
	DEBUG_ONLY(int		first_save_errno);
	uint4			new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total;
	uint4			jnl_status;
	gtm_uint64_t		avail_blocks, mmap_sz;
	off_t			new_eof, new_size;
	trans_num		curr_tn;
	unix_db_info		*udi;
	inctn_opcode_t		save_inctn_opcode;
	int4			prev_extend_blks_to_upgrd;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	cache_rec_ptr_t         cr;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(!IS_DSE_IMAGE);
	assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */
	assert(!process_exiting);
	DEBUG_ONLY(old_base[0] = old_base[1] = NULL);
	assert(!gv_cur_region->read_only);
	udi = FILE_INFO(gv_cur_region);
	is_mm = (dba_mm == cs_addrs->hdr->acc_meth);
#	if !defined(MM_FILE_EXT_OK)
	if (!udi->grabbed_access_sem && is_mm)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */
#	endif
	/* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will
	   overflow and end up doing silly things.
	*/
	assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
#	if defined(__sun) || defined(__hpux)
	cs_data->defer_allocate = TRUE;
#	endif
	if (!blocks && (cs_data->defer_allocate || (TRANS_IN_PROG_TRUE == trans_in_prog)))
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */
	bplmap = cs_data->bplmap;
	/* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired
	 * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
	 *      and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
	 *      manner as every non-bitmap block must have an associated bitmap)
	 * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap.
	 * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed.
	 */
	new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1)
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap);
	new_blocks = blocks + new_bit_maps;
	assert((0 < (int)new_blocks) || (!cs_data->defer_allocate && (0 == new_blocks)));
	if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data))
	{
		assert(WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX);
		return (uint4)(NO_FREE_SPACE);
	}
	if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE)))
	{
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
	} else
	{
		if (!(gtmDebugLevel & GDL_IgnoreAvailSpace))
		{	/* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB
			 * in space it could never fit it if wasn't sparse. Needed for some tests.
			 */
			avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE);
			if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (blocks > (uint4)avail_blocks)
				{
					if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs))
						return (uint4)(NO_FREE_SPACE);
					else
						send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4,
							DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks);
				} else
					send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region),
						 (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0)));
			}
		}
	}
#	ifdef DEBUG
	if (WBTEST_ENABLED(WBTEST_MM_CONCURRENT_FILE_EXTEND) && dollar_tlevel && !MEMCMP_LIT(gv_cur_region->rname, "DEFAULT"))
	{
		SYSTEM("$gtm_dist/mumps -run $gtm_wbox_mrtn");
		assert(1 == cs_addrs->nl->wbox_test_seq_num);	/* should have been set by mubfilcpy */
		cs_addrs->nl->wbox_test_seq_num = 2;	/* signal mupip backup to stop sleeping in mubfilcpy */
	}
#	endif
	/* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */
	was_crit = cs_addrs->now_crit;
	assert(!cs_addrs->hold_onto_crit || was_crit);
	/* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur.
	 * If we are coming from online rollback (when that feature is available), we will come in holding crit and in
	 * 	the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself.
	 * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for
	 * freeze.
	 * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited
	 * 	for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this
	 *	function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that
	 *	at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region
	 *	to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused
	 *	op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which
	 *	we hold crit.
	 */
	assert(!was_crit || !FROZEN_HARD(cs_data) || (dollar_tlevel && (CDB_STAGNATE <= t_tries)));
	/*
	 * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE
	 * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup
	 * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt"
	 * transaction numbers without correspondingly updating the history transaction numbers (effectively causing
	 * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover)
	 * if we already hold crit.
	 */
	if (!was_crit)
	{
		for ( ; ; )
		{
			grab_crit(gv_cur_region);
			if (FROZEN_CHILLED(cs_data))
				DO_CHILLED_AUTORELEASE(cs_addrs, cs_data);
			if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN)
				break;
			rel_crit(gv_cur_region);
			while (FROZEN(cs_data) || IS_REPL_INST_FROZEN)
			{
				hiber_start(1000);
				if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data))
					break;
			}
		}
	} else if (FROZEN_HARD(cs_data) && dollar_tlevel)
	{	/* We don't want to continue with file extension as explained above. Hence return with an error code which
		 * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly.
		 */
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_FREEZE_PROG;
	} else
		WAIT_FOR_REGION_TO_UNCHILL(cs_addrs, cs_data);
	if (IS_REPL_INST_FROZEN && trans_in_prog)
	{
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_INST_FREEZE;
	}
	assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks);
	old_total = cs_data->trans_hist.total_blks;
	if (old_total != filesize)
	{	/* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM,
		 * we still need to remap the database
		 */
		assert((old_total > filesize) || !is_mm);
		/* For BG, someone else could have truncated or extended - we have no idea */
		GDSFILEXT_CLNUP;
		return (SS_NORMAL);
	}
	if (trans_in_prog && SUSPICIOUS_EXTEND)
	{
		if (!was_crit)
		{
			GDSFILEXT_CLNUP;
			return (uint4)(EXTEND_SUSPECT);
		}
		/* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then it means we held
		 * crit through bm_getfree into gdsfilext and still didn't get it right.
		 */
		assertpro(!is_free_blks_ctr_ok() && !SUSPICIOUS_EXTEND);
	}
	if (JNL_ENABLED(cs_data))
	{
		if (!jgbl.dont_reset_gbl_jrec_time)
			SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = cs_addrs->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open(gv_cur_region, cs_addrs);
		if (jnl_status)
		{
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			return (uint4)(NO_FREE_SPACE);	/* should have better return status */
		}
	}
	if (is_mm)
	{
		cs_addrs->nl->mm_extender_pid = process_id;
		status = wcs_wtstart(gv_cur_region, 0, NULL, NULL);
		cs_addrs->nl->mm_extender_pid = 0;
		assertpro(SS_NORMAL == status);
		old_base[0] = cs_addrs->db_addrs[0];
		old_base[1] = cs_addrs->db_addrs[1];
		cs_addrs->db_addrs[0] = NULL; /* don't rely on it until the mmap below */
#		ifdef _AIX
		status = shmdt(old_base[0] - BLK_ZERO_OFF(cs_data->start_vbn));
#		else
		status = munmap((caddr_t)old_base[0], (size_t)(old_base[1] - old_base[0]));
#		endif
		if (0 != status)
		{
			save_errno = errno;
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(12) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region),
					ERR_SYSCALL, 5, LEN_AND_STR(MEM_UNMAP_SYSCALL), CALLFROM, save_errno);
			return (uint4)(NO_FREE_SPACE);
		}
	} else
	{	/* Due to concurrency issues, it is possible some process had issued a disk read of the GDS block# corresponding
		 * to "old_total" right after a truncate wrote a GDS-block of zeros on disk (to signal end of the db file).
		 * If so, the global buffer containing this block needs to be invalidated now as part of the extend. If not, it is
		 * possible the EOF block on disk is now going to be overwritten by a properly initialized bitmap block (as part
		 * of the gdsfilext below) while the global buffer continues to have an incorrect copy of that bitmap block and
		 * this in turn would cause XXXX failures due to a bad bitmap block in shared memory. (GTM-7519)
		 */
		cr = db_csh_get((block_id)old_total);
		if ((NULL != cr) && ((cache_rec_ptr_t)CR_NOTVALID != cr))
		{
			assert((0 == cr->dirty) && (0 == cr->bt_index) && !cr->stopped);
			cr->cycle++;
			cr->blk = CR_BLKEMPTY;
		}
	}
	CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
	new_total = old_total + new_blocks;
	new_eof = BLK_ZERO_OFF(cs_data->start_vbn) + ((off_t)new_total * cs_data->blk_size);
#	if !defined(__sun) && !defined(__hpux)
	if (!cs_data->defer_allocate)
	{
		new_size = new_eof + cs_data->blk_size;
		save_errno = posix_fallocate(udi->fd, 0, new_size);
		DEBUG_ONLY(first_save_errno = save_errno);
		if ((ENOSPC == save_errno) && IS_GTM_IMAGE)
			save_errno = extend_wait_for_fallocate(udi, new_size);
		if (0 != save_errno)
		{
			GDSFILEXT_CLNUP;
			assert(ENOSPC == save_errno);
			if (ENOSPC != save_errno)
				send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_PREALLOCATEFAIL, 2, DB_LEN_STR(gv_cur_region),
					     save_errno);
			return (uint4)(NO_FREE_SPACE);
		}
	}
#	endif
	save_errno = db_write_eof_block(udi, udi->fd, cs_data->blk_size, new_eof, &(TREF(dio_buff)));
	if ((ENOSPC == save_errno) && IS_GTM_IMAGE)
		save_errno = extend_wait_for_write(udi, cs_data->blk_size, new_eof);
	if (0 != save_errno)
	{
		GDSFILEXT_CLNUP;
		if (ENOSPC != save_errno)
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		return (uint4)(NO_FREE_SPACE);
	}
	if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_1))
	{
		LONG_SLEEP(600);
		assert(FALSE);
	}
	/* Ensure the EOF and metadata get to disk BEFORE any bitmap writes. Otherwise, the file size could no longer reflect
	 * a proper extent and subsequent invocations of gdsfilext could corrupt the database.
	 */
	if (!IS_STATSDB_CSA(cs_addrs))
	{
		GTM_DB_FSYNC(cs_addrs, udi->fd, status);
		assert(0 == status);
		if (0 != status)
		{
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(8) ERR_DBFILERR, 5,
						RTS_ERROR_LITERAL("fsync1()"), CALLFROM, status);
			return (uint4)(NO_FREE_SPACE);
		}
	}
	if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_2))
	{
		LONG_SLEEP(600);
		assert(FALSE); /* Should be killed before that */
	}
	DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
예제 #5
0
void mu_int_reg(gd_region *reg, boolean_t *return_value, boolean_t return_after_open)
{
	boolean_t		read_only, was_crit;
	freeze_status		status;
	node_local_ptr_t	cnl;
	sgmnt_addrs     	*csa;
	sgmnt_data_ptr_t	csd;
	sgmnt_data		*csd_copy_ptr;
	gd_segment		*seg;
	int			gtmcrypt_errno;
#	ifdef DEBUG
	boolean_t		need_to_wait = FALSE;
	int			trynum;
	uint4			curr_wbox_seq_num;
#	endif

	*return_value = FALSE;
	jnlpool_init_needed = TRUE;
	ESTABLISH(mu_int_reg_ch);
	if (dba_usr == reg->dyn.addr->acc_meth)
	{
		util_out_print("!/Can't integ region !AD; not GDS format", TRUE,  REG_LEN_STR(reg));
		mu_int_skipreg_cnt++;
		return;
	}
	gv_cur_region = reg;
	if (reg_cmcheck(reg))
	{
		util_out_print("!/Can't integ region across network", TRUE);
		mu_int_skipreg_cnt++;
		return;
	}
	gvcst_init(gv_cur_region);
	if (gv_cur_region->was_open)
	{	/* already open under another name */
		gv_cur_region->open = FALSE;
		return;
	}
	if (return_after_open)
	{
		*return_value = TRUE;
		return;
	}
	change_reg();
	csa = &FILE_INFO(gv_cur_region)->s_addrs;
	cnl = csa->nl;
	csd = csa->hdr;
	read_only = gv_cur_region->read_only;
	assert(NULL != mu_int_master);
	/* Ensure that we don't see an increase in the file header and master map size compared to it's maximum values */
	assert(SGMNT_HDR_LEN >= SIZEOF(sgmnt_data) && (MASTER_MAP_SIZE_MAX >= MASTER_MAP_SIZE(csd)));
	/* ONLINE INTEG if asked for explicitly by specifying -ONLINE is an error if the db has partial V4 blocks.
	 * However, if -ONLINE is not explicitly specified but rather assumed implicitly (as default for -REG)
	 * then turn off ONLINE INTEG for this region and continue as if -NOONLINE was specified
	 */
	if (!csd->fully_upgraded)
	{
		ointeg_this_reg = FALSE; /* Turn off ONLINE INTEG for this region */
		if (online_specified)
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_SSV4NOALLOW, 2, DB_LEN_STR(gv_cur_region));
			util_out_print(NO_ONLINE_ERR_MSG, TRUE);
			mu_int_skipreg_cnt++;
			return;
		}
	}
	if (!ointeg_this_reg || read_only)
	{
		status = region_freeze(gv_cur_region, TRUE, FALSE, TRUE, FALSE, !read_only);
		switch (status)
		{
			case REG_ALREADY_FROZEN:
				if (csa->read_only_fs)
					break;
				util_out_print("!/Database for region !AD is already frozen, not integing",
					TRUE, REG_LEN_STR(gv_cur_region));
				mu_int_skipreg_cnt++;
				return;
			case REG_FLUSH_ERROR:
				gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT(MUPIP_INTEG),
					DB_LEN_STR(gv_cur_region));
				mu_int_skipreg_cnt++;
				return;
			case REG_HAS_KIP:
				/* We have already waited for KIP to reset. This time do not wait for KIP */
				status = region_freeze(gv_cur_region, TRUE, FALSE, FALSE, FALSE, !read_only);
				if (REG_ALREADY_FROZEN == status)
				{
					if (csa->read_only_fs)
						break;
					util_out_print("!/Database for region !AD is already frozen, not integing",
						TRUE, REG_LEN_STR(gv_cur_region));
					mu_int_skipreg_cnt++;
					return;
				} else if (REG_FLUSH_ERROR == status)
				{
					gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT(MUPIP_INTEG),
						DB_LEN_STR(gv_cur_region));
					mu_int_skipreg_cnt++;
					return;
				}
				assert(REG_FREEZE_SUCCESS == status);
				/* no break */
			case REG_FREEZE_SUCCESS:
				break;
			default:
				assert(FALSE);
				/* no break */
		}
		if (read_only && (dba_bg == csa->hdr->acc_meth) && !mu_int_wait_rdonly(csa, MUPIP_INTEG))
		{
			mu_int_skipreg_cnt++;
			return;
		}
	}
	if (!ointeg_this_reg)
	{	/* Take a copy of the file-header. To ensure it is consistent, do it while holding crit. */
		was_crit = csa->now_crit;
		if (!was_crit)
			grab_crit(gv_cur_region);
		memcpy((uchar_ptr_t)&mu_int_data, (uchar_ptr_t)csd, SIZEOF(sgmnt_data));
		if (!was_crit)
			rel_crit(gv_cur_region);
		memcpy(mu_int_master, MM_ADDR(csd), MASTER_MAP_SIZE(csd));
		csd_copy_ptr = &mu_int_data;
	} else
	{
		if (!ss_initiate(gv_cur_region, util_ss_ptr, &csa->ss_ctx, preserve_snapshot, MUPIP_INTEG))
		{
			mu_int_skipreg_cnt++;
			assert(NULL != csa->ss_ctx);
			ss_release(&csa->ss_ctx);
			ointeg_this_reg = FALSE; /* Turn off ONLINE INTEG for this region */
			assert(process_id != cnl->in_crit); /* Ensure ss_initiate released the crit before returning */
			assert(!FROZEN_HARD(csd)); /* Ensure region is unfrozen before returning from ss_initiate */
			assert(INTRPT_IN_SS_INITIATE != intrpt_ok_state); /* Ensure ss_initiate released intrpt_ok_state */
			return;
		}
		assert(process_id != cnl->in_crit); /* Ensure ss_initiate released the crit before returning */
		assert(INTRPT_IN_SS_INITIATE != intrpt_ok_state); /* Ensure ss_initiate released intrpt_ok_state */
		csd_copy_ptr = &csa->ss_ctx->ss_shm_ptr->shadow_file_header;
#		if defined(DEBUG)
		curr_wbox_seq_num = 1;
		cnl->wbox_test_seq_num = curr_wbox_seq_num; /* indicate we took the next step */
		GTM_WHITE_BOX_TEST(WBTEST_OINTEG_WAIT_ON_START, need_to_wait, TRUE);
		if (need_to_wait) /* wait for them to take next step */
		{
			trynum = 30; /* given 30 cycles to tell you to go */
			while ((curr_wbox_seq_num == cnl->wbox_test_seq_num) && trynum--)
				LONG_SLEEP(1);
			cnl->wbox_test_seq_num++; /* let them know we took the next step */
			assert(trynum);
		}
#		endif
	}
	if (USES_ANY_KEY(csd_copy_ptr))
	{ 	/* Initialize mu_int_encrypt_key_handle to be used in mu_int_read */
		seg = gv_cur_region->dyn.addr;
		INIT_DB_OR_JNL_ENCRYPTION(&mu_int_encr_handles, csd_copy_ptr, seg->fname_len, (char *)seg->fname, gtmcrypt_errno);
		if (0 != gtmcrypt_errno)
		{
			GTMCRYPT_REPORT_ERROR(gtmcrypt_errno, gtm_putmsg, seg->fname_len, seg->fname);
			mu_int_skipreg_cnt++;
			return;
		}
	}
	*return_value = mu_int_fhead();
	REVERT;
	return;
}
예제 #6
0
int gtmrecv_poll_actions1(int *pending_data_len, int *buff_unprocessed, unsigned char *buffp)
{
	static int		report_cnt = 1;
	static int		next_report_at = 1;
	static boolean_t	send_xoff = FALSE;
	static boolean_t	xoff_sent = FALSE;
	static seq_num		send_seqno;
	static boolean_t	log_draining_msg = FALSE;
	static boolean_t	send_badtrans = FALSE;
	static boolean_t	send_cmp2uncmp = FALSE;
	static boolean_t	upd_shut_too_early_logged = FALSE;
	static time_t		last_reap_time = 0;
	repl_msg_t		xoff_msg;
	repl_badtrans_msg_t	bad_trans_msg;
	boolean_t		alert = FALSE, info = FALSE;
	int			return_status;
	gd_region		*region_top;
	unsigned char		*msg_ptr;				/* needed for REPL_{SEND,RECV}_LOOP */
	int			tosend_len, sent_len, sent_this_iter;	/* needed for REPL_SEND_LOOP */
	int			torecv_len, recvd_len, recvd_this_iter;	/* needed for REPL_RECV_LOOP */
	int			status, poll_dir;			/* needed for REPL_{SEND,RECV}_LOOP */
	int			temp_len, pending_msg_size;
	int			upd_start_status, upd_start_attempts;
	int			buffered_data_len;
	int			upd_exit_status;
	seq_num			temp_send_seqno;
	boolean_t		bad_trans_detected = FALSE, onln_rlbk_flg_set = FALSE;
	recvpool_ctl_ptr_t	recvpool_ctl;
	upd_proc_local_ptr_t	upd_proc_local;
	gtmrecv_local_ptr_t	gtmrecv_local;
	upd_helper_ctl_ptr_t	upd_helper_ctl;
	pid_t			waitpid_res;
	int4			msg_type, msg_len;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	recvpool_ctl = recvpool.recvpool_ctl;
	upd_proc_local = recvpool.upd_proc_local;
	gtmrecv_local = recvpool.gtmrecv_local;
	upd_helper_ctl = recvpool.upd_helper_ctl;
	if (SHUTDOWN == gtmrecv_local->shutdown)
	{
		repl_log(gtmrecv_log_fp, TRUE, TRUE, "Shutdown signalled\n");
		gtmrecv_end(); /* Won't return */
	}
#	ifdef GTM_TLS
	/* If we sent a REPL_RENEG_ACK, then we cannot afford to send anymore asynchronous messages (like XOFF_ACK_ME) until we
	 * receive a REPL_RENEG_COMPLETE from the source server. This ensures that while the source server attempts to do a SSL/TLS
	 * renegotiation, it doesn't have any application data (like XOFF_ACK_ME) sitting in the pipe.
	 */
	if (REPLTLS_WAITING_FOR_RENEG_COMPLETE == repl_tls.renegotiate_state)
		return STOP_POLL;
#	endif
	/* Reset report_cnt and next_report_at to 1 when a new upd proc is forked */
	if ((1 == report_cnt) || (report_cnt == next_report_at))
	{
		/* A comment on the usage of NO_SHUTDOWN below for the alert variable. Since upd_proc_local->upd_proc_shutdown is
		 * a shared memory field (and could be concurrently changed by either the receiver server or the update process),
		 * we want to make sure it is the same value BEFORE and AFTER checking whether the update process is alive or not.
		 * If it is not NO_SHUTDOWN (i.e. is SHUTDOWN or NORMAL_SHUTDOWN or ABNORMAL_SHUTDOWN) it has shut down due to
		 * an external request so we do want to send out a false update-process-is-not-alive alert.
		 */
		if ((alert = ((NO_SHUTDOWN == upd_proc_local->upd_proc_shutdown) && (SRV_DEAD == is_updproc_alive())
				&& (NO_SHUTDOWN == upd_proc_local->upd_proc_shutdown)))
			|| (info = (((NORMAL_SHUTDOWN == upd_proc_local->upd_proc_shutdown)
				|| (ABNORMAL_SHUTDOWN == upd_proc_local->upd_proc_shutdown)) && (SRV_DEAD == is_updproc_alive()))))
		{
			if (alert)
				repl_log(gtmrecv_log_fp, TRUE, TRUE,
					"ALERT : Receiver Server detected that Update Process is not ALIVE\n");
			else
				repl_log(gtmrecv_log_fp, TRUE, TRUE,
					"INFO : Update process not running due to user initiated shutdown\n");
			if (1 == report_cnt)
			{
				send_xoff = TRUE;
				recvpool_ctl->old_jnl_seqno = recvpool_ctl->jnl_seqno;
				recvpool_ctl->jnl_seqno = 0;
				/* Even though we have identified that the update process is NOT alive, a waitpid on the update
				 * process PID is necessary so that the system doesn't leave any zombie process lying around.
				 * This is possible since any child process that dies without the parent doing a waitpid on it
				 * will be defunct unless the parent dies at which point the "init" process takes the role of
				 * the parent and invokes waitpid to remove the zombies.
				 * NOTE: It is possible that the update process was killed before the receiver server got a
				 * chance to record it's PID in the recvpool.upd_proc_local structure. In such a case, don't
				 * invoke waitpid as that will block us (receiver server) if this instance of the receiver
				 * server was started with helper processes.
				 */
				if (0 < upd_proc_local->upd_proc_pid)
				{
					WAITPID(upd_proc_local->upd_proc_pid, &upd_exit_status, 0, waitpid_res);
					/* Since the update process as part of its shutdown does NOT reset the upd_proc_pid, reset
					 * it here ONLY if the update process was NOT kill -9ed. This is needed because receiver
					 * server as part of its shutdown relies on this field (upd_proc_pid) to determine if the
					 * update process was cleanly shutdown or was kill -9ed.
					 */
					if (!alert)
						upd_proc_local->upd_proc_pid = 0;
				}
				upd_proc_local->bad_trans = FALSE; /* No point in doing bad transaction processing */
				upd_proc_local->onln_rlbk_flg = FALSE; /* No point handling online rollback */
			}
			gtmrecv_wait_for_jnl_seqno = TRUE;
			REPL_DPRINT1(
			       "gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because of upd crash/shutdown\n");
			next_report_at *= GTMRECV_NEXT_REPORT_FACTOR;
			report_cnt++;
		}
	} else
		report_cnt++;
	/* Check if REPL_CMP2UNCMP or REPL_BADTRANS message needs to be sent */
	if (upd_proc_local->onln_rlbk_flg)
	{	/* Update process detected an online rollback and is requesting us to restart the connection. But before that, send
		 * REPL_XOFF source side and drain the replication pipe
		 */
		onln_rlbk_flg_set = TRUE;
		send_xoff = TRUE;
	} else if (!send_cmp2uncmp && gtmrecv_send_cmp2uncmp)
	{
		send_xoff = TRUE;
		send_seqno = recvpool_ctl->jnl_seqno;
		send_cmp2uncmp = TRUE;
	} else if (!send_badtrans && upd_proc_local->bad_trans)
	{
		send_xoff = TRUE;
		send_seqno = upd_proc_local->read_jnl_seqno;
		send_badtrans = TRUE;
		bad_trans_detected = TRUE;
	} else if (!upd_proc_local->bad_trans && send_badtrans && 1 != report_cnt)
	{
		send_badtrans = FALSE;
		bad_trans_detected = FALSE;
	}
	if (send_xoff && !xoff_sent)
	{	/* Send XOFF_ACK_ME if the receiver has a connection to the source. Do not attempt to send it if we dont even
		 * know the endianness of the remote side. In that case, we are guaranteed no initial handshake occurred and
		 * so no point sending the XOFF too. This saves us lots of trouble in case of cross-endian replication connections.
		 */
		assert((FD_INVALID  != gtmrecv_sock_fd) || repl_connection_reset);
		if ((FD_INVALID != gtmrecv_sock_fd) && remote_side->endianness_known)
		{
			send_seqno = upd_proc_local->read_jnl_seqno;
			if (!remote_side->cross_endian)
			{
				xoff_msg.type = REPL_XOFF_ACK_ME;
				xoff_msg.len = MIN_REPL_MSGLEN;
				memcpy((uchar_ptr_t)&xoff_msg.msg[0], (uchar_ptr_t)&send_seqno, SIZEOF(seq_num));
			} else
			{
				xoff_msg.type = GTM_BYTESWAP_32(REPL_XOFF_ACK_ME);
				xoff_msg.len = GTM_BYTESWAP_32(MIN_REPL_MSGLEN);
				temp_send_seqno = GTM_BYTESWAP_64(send_seqno);
				memcpy((uchar_ptr_t)&xoff_msg.msg[0], (uchar_ptr_t)&temp_send_seqno, SIZEOF(seq_num));
			}
			REPL_SEND_LOOP(gtmrecv_sock_fd, &xoff_msg, MIN_REPL_MSGLEN, REPL_POLL_NOWAIT)
				; /* Empty Body */
			if (SS_NORMAL != status)
			{
				if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending XOFF_ACK_ME. "
							"Status = %d ; %s\n", status, STRERROR(status));
					repl_close(&gtmrecv_sock_fd);
					repl_connection_reset = TRUE;
					xoff_sent = FALSE;
					send_badtrans = FALSE;

				} else if (EREPL_SEND == repl_errno)
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
						LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. "
								"Error in send"), status);
				else
				{
					assert(EREPL_SELECT == repl_errno);
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
						LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. "
								"Error in select"), status);
				}
			} else
			{
				xoff_sent = TRUE;
				log_draining_msg = TRUE;
			}
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_XOFF_ACK_ME sent due to upd shutdown/crash or bad trans "
					"or ONLINE_ROLLBACK\n");
			send_xoff = FALSE;
		} else
		{	/* Connection has been lost OR initial handshake needs to happen again, so no point sending XOFF/BADTRANS */
			send_xoff = FALSE;
			send_badtrans = FALSE;
		}
	}
	/* Drain pipe */
	if (xoff_sent)
	{
		if (log_draining_msg)
		{	/* avoid multiple logs per instance */
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Draining replication pipe due to %s\n",
					send_cmp2uncmp ? "CMP2UNCMP" : (send_badtrans ? "BAD_TRANS" :
							(onln_rlbk_flg_set ? "ONLINE_ROLLBACK" : "UPD shutdown/crash")));
			log_draining_msg = FALSE;
		}
		if (0 != *buff_unprocessed)
		{	/* Throw away the current contents of the buffer */
			buffered_data_len = ((*pending_data_len <= *buff_unprocessed) ? *pending_data_len : *buff_unprocessed);
			*buff_unprocessed -= buffered_data_len;
			buffp += buffered_data_len;
			*pending_data_len -= buffered_data_len;
			REPL_DPRINT2("gtmrecv_poll_actions : (1) Throwing away %d bytes from old buffer while draining\n",
				buffered_data_len);
			assert(remote_side->endianness_known);	/* only then is remote_side->cross_endian reliable */
			while (REPL_MSG_HDRLEN <= *buff_unprocessed)
			{
				assert(0 == (((unsigned long)buffp) % REPL_MSG_ALIGN));
				msg_len = ((repl_msg_ptr_t)buffp)->len;
				msg_type = ((repl_msg_ptr_t)buffp)->type;
		        	if (remote_side->cross_endian)
			        {
			                msg_len = GTM_BYTESWAP_32(msg_len);
			                msg_type = GTM_BYTESWAP_32(msg_type);
			        }
				msg_type = (msg_type & REPL_TR_CMP_MSG_TYPE_MASK);
				assert((REPL_TR_CMP_JNL_RECS == msg_type) || (0 == (msg_len % REPL_MSG_ALIGN)));
				*pending_data_len = ROUND_UP2(msg_len, REPL_MSG_ALIGN);
				buffered_data_len = ((*pending_data_len <= *buff_unprocessed) ?
								*pending_data_len : *buff_unprocessed);
				*buff_unprocessed -= buffered_data_len;
				buffp += buffered_data_len;
				*pending_data_len -= buffered_data_len;
				REPL_DPRINT3("gtmrecv_poll_actions : (1) Throwing away message of "
					"type %d and length %d from old buffer while draining\n", msg_type, buffered_data_len);
			}
			if (0 < *buff_unprocessed)
			{
				memmove((unsigned char *)gtmrecv_msgp, buffp, *buff_unprocessed);
				REPL_DPRINT2("gtmrecv_poll_actions : Incomplete header of length %d while draining\n",
					*buff_unprocessed);
			}
		}
		status = SS_NORMAL;
		if (0 != *buff_unprocessed || 0 == *pending_data_len)
		{	/* Receive the header of a message */
			assert(REPL_MSG_HDRLEN > *buff_unprocessed);	/* so we dont pass negative length in REPL_RECV_LOOP */
			REPL_RECV_LOOP(gtmrecv_sock_fd, ((unsigned char *)gtmrecv_msgp) + *buff_unprocessed,
				       (REPL_MSG_HDRLEN - *buff_unprocessed), REPL_POLL_WAIT)
				; /* Empty Body */
			if (SS_NORMAL == status)
			{
				assert(remote_side->endianness_known);	/* only then is remote_side->cross_endian reliable */
		        	if (!remote_side->cross_endian)
	        		{
			                msg_len = gtmrecv_msgp->len;
			                msg_type = gtmrecv_msgp->type;
			        } else
			        {
			                msg_len = GTM_BYTESWAP_32(gtmrecv_msgp->len);
			                msg_type = GTM_BYTESWAP_32(gtmrecv_msgp->type);
			        }
				msg_type = (msg_type & REPL_TR_CMP_MSG_TYPE_MASK);
				assert((REPL_TR_CMP_JNL_RECS == msg_type) || (0 == (msg_len % REPL_MSG_ALIGN)));
				msg_len = ROUND_UP2(msg_len, REPL_MSG_ALIGN);
				REPL_DPRINT3("gtmrecv_poll_actions : Received message of type %d and length %d while draining\n",
					msg_type, msg_len);
			}
		}
		if ((SS_NORMAL == status) && (0 != *buff_unprocessed || 0 == *pending_data_len) && (REPL_XOFF_ACK == msg_type))
		{	/* Receive the rest of the XOFF_ACK msg and signal the drain as complete */
			REPL_RECV_LOOP(gtmrecv_sock_fd, gtmrecv_msgp, (MIN_REPL_MSGLEN - REPL_MSG_HDRLEN), REPL_POLL_WAIT)
				; /* Empty Body */
			if (SS_NORMAL == status)
			{
				repl_log(gtmrecv_log_fp, TRUE, TRUE,
						"REPL INFO - XOFF_ACK received. Drained replication pipe completely\n");
				upd_shut_too_early_logged = FALSE;
				xoff_sent = FALSE;
				return_status = STOP_POLL;
			}
		} else if (SS_NORMAL == status)
		{	/* Drain the rest of the message */
			if (0 < *pending_data_len)
			{
				pending_msg_size = *pending_data_len;
				REPL_DPRINT2("gtmrecv_poll_actions : (2) Throwing away %d bytes from pipe\n", pending_msg_size);
			} else
			{
				pending_msg_size = msg_len - REPL_MSG_HDRLEN;
				REPL_DPRINT3("gtmrecv_poll_actions : (2) Throwing away message of "
					"type %d and length %d from pipe\n", msg_type, msg_len);
			}
			for ( ; SS_NORMAL == status && 0 < pending_msg_size; pending_msg_size -= gtmrecv_max_repl_msglen)
			{
				temp_len = (pending_msg_size < gtmrecv_max_repl_msglen)? pending_msg_size : gtmrecv_max_repl_msglen;
				REPL_RECV_LOOP(gtmrecv_sock_fd, gtmrecv_msgp, temp_len, REPL_POLL_WAIT)
					; /* Empty Body */
			}
			*buff_unprocessed = 0; *pending_data_len = 0;
			if (SS_NORMAL == status && info && !upd_shut_too_early_logged)
			{
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "ALERT : User initiated shutdown of Update Process done "
						"when there was data in the replication pipe\n");
				upd_shut_too_early_logged = TRUE;
			}
			return_status = CONTINUE_POLL;
		}
		if (SS_NORMAL != status)
		{
			if (EREPL_RECV == repl_errno)
			{
				if (REPL_CONN_RESET(status))
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while receiving XOFF_ACK. "
							"Status = %d ; %s\n", status, STRERROR(status));
					repl_close(&gtmrecv_sock_fd);
					repl_connection_reset = TRUE;
					xoff_sent = FALSE;
					send_badtrans = FALSE;
					return_status = STOP_POLL;
				} else
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
						LEN_AND_LIT("Error while draining replication pipe. Error in recv"), status);
			} else
			{
				assert(EREPL_SELECT == repl_errno);
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
					LEN_AND_LIT("Error while draining replication pipe. Error in select"), status);
			}
		}
	} else
		return_status = STOP_POLL;
	/* Like was done before for the XOFF_ACK_ME message, send a BADTRANS/CMP2UNCMP message only if we know
	 * the endianness of the other side. If not, no point in sending one anyways and saves us trouble in
	 * case of cross-endian replication connections.
	 */
	if ((STOP_POLL == return_status) && (send_badtrans || send_cmp2uncmp)
		&& (FD_INVALID != gtmrecv_sock_fd) && remote_side->endianness_known)
	{	/* Send REPL_BADTRANS or REPL_CMP2UNCMP message */
		if (!remote_side->cross_endian)
		{
			bad_trans_msg.type = send_cmp2uncmp ? REPL_CMP2UNCMP : REPL_BADTRANS;
			bad_trans_msg.len  = MIN_REPL_MSGLEN;
			bad_trans_msg.start_seqno = send_seqno;
		} else
		{
			bad_trans_msg.type = send_cmp2uncmp ? GTM_BYTESWAP_32(REPL_CMP2UNCMP) : GTM_BYTESWAP_32(REPL_BADTRANS);
			bad_trans_msg.len  = GTM_BYTESWAP_32(MIN_REPL_MSGLEN);
			bad_trans_msg.start_seqno = GTM_BYTESWAP_64(send_seqno);
		}
		REPL_SEND_LOOP(gtmrecv_sock_fd, &bad_trans_msg, bad_trans_msg.len, REPL_POLL_NOWAIT)
			; /* Empty Body */
		if (SS_NORMAL == status)
		{
			if (send_cmp2uncmp)
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_CMP2UNCMP message sent with seqno %llu\n", send_seqno);
			else
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL_BADTRANS message sent with seqno %llu\n", send_seqno);
		} else
		{
			if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
			{
				if (send_cmp2uncmp)
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_CMP2UNCMP. "
							"Status = %d ; %s\n", status, STRERROR(status));
				} else
				{
					repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_BADTRANS. "
							"Status = %d ; %s\n", status, STRERROR(status));
				}
				repl_close(&gtmrecv_sock_fd);
				repl_connection_reset = TRUE;
				return_status = STOP_POLL;
			} else if (EREPL_SEND == repl_errno)
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
					LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in send"), status);
			else
			{
				assert(EREPL_SELECT == repl_errno);
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
					LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in select"), status);
			}
		}
		send_badtrans = FALSE;
		if (send_cmp2uncmp)
		{
			REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because this receiver"
				"server requested a fall-back from compressed to uncompressed operation\n");
			gtmrecv_wait_for_jnl_seqno = TRUE;/* set this to TRUE to break out and go back to a fresh "do_main_loop" */
			gtmrecv_bad_trans_sent = TRUE;
			gtmrecv_send_cmp2uncmp = FALSE;
			send_cmp2uncmp = FALSE;
		}
	}
	if ((upd_proc_local->bad_trans && bad_trans_detected) || onln_rlbk_flg_set
		|| (UPDPROC_START == upd_proc_local->start_upd) && (1 != report_cnt))
	{
		if (UPDPROC_START == upd_proc_local->start_upd)
		{
			assert(is_updproc_alive() != SRV_ALIVE);
			upd_proc_local->upd_proc_shutdown = NO_SHUTDOWN;
		}
		recvpool_ctl->wrapped = FALSE;
		recvpool_ctl->write_wrap = recvpool_ctl->recvpool_size;
		recvpool_ctl->write = 0;
		/* Reset last_rcvd_histinfo, last_valid_histinfo etc. as they reflect context from unprocessed data
		 * in the receive pool and those are no longer valid because we have drained the receive pool.
		 */
		GTMRECV_CLEAR_CACHED_HISTINFO(recvpool.recvpool_ctl, jnlpool, jnlpool_ctl, INSERT_STRM_HISTINFO_FALSE);
		if (UPDPROC_START == upd_proc_local->start_upd)
		{
			/* Attempt starting the update process */
			for (upd_start_attempts = 0;
			     UPDPROC_START_ERR == (upd_start_status = gtmrecv_upd_proc_init(FALSE)) &&
			     GTMRECV_MAX_UPDSTART_ATTEMPTS > upd_start_attempts;
			     upd_start_attempts++)
			{
				if (EREPL_UPDSTART_SEMCTL == repl_errno || EREPL_UPDSTART_BADPATH == repl_errno)
				{
					gtmrecv_autoshutdown();
				} else if (EREPL_UPDSTART_FORK == repl_errno)
				{
					/* Couldn't start up update now, can try later */
					LONG_SLEEP(GTMRECV_WAIT_FOR_PROC_SLOTS);
					continue;
				} else if (EREPL_UPDSTART_EXEC == repl_errno)
				{
					/* In forked child, could not exec, should exit */
					gtmrecv_exit(ABNORMAL_SHUTDOWN);
				}
			}
			if (UPDPROC_STARTED == (upd_proc_local->start_upd = upd_start_status))
			{
				REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because of "
					     "upd restart\n");
				gtmrecv_wait_for_jnl_seqno = TRUE;
				report_cnt = next_report_at = 1;
				if (send_xoff && (FD_INVALID == gtmrecv_sock_fd))
				{
					/* Update start command was issued before connection was established,
					 * no point in sending XOFF.  */
					send_xoff = FALSE;
				}
			} else
			{
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "%d failed attempts to fork update process. Try later\n",
					 upd_start_attempts);
			}
		} else
		{
			gtmrecv_wait_for_jnl_seqno = TRUE;/* set this to TRUE to break out and go back to a fresh "do_main_loop" */
			if (onln_rlbk_flg_set)
			{
				assert(NULL != jnlpool_ctl);
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "Closing connection due to ONLINE ROLLBACK\n");
 				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n",
 						jnlpool_ctl->jnl_seqno);
				repl_log(gtmrecv_log_fp, TRUE, TRUE, "REPL INFO - Current Receive Pool Seqno : %llu\n",
						recvpool_ctl->jnl_seqno);
				repl_close(&gtmrecv_sock_fd);
				repl_connection_reset = TRUE;
				xoff_sent = FALSE;
				send_badtrans = FALSE;
				upd_proc_local->onln_rlbk_flg = FALSE;
				/* Before restarting afresh, sync the online rollback cycles. This way any future grab_lock that
				 * we do after restarting should not realize an unhandled online rollback.  For receiver, it is
				 * just syncing the journal pool cycles as the databases are not opened. But, to be safe, grab
				 * the lock and sync the cycles.
				 */
				grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, GRAB_LOCK_ONLY);
				SYNC_ONLN_RLBK_CYCLES;
				rel_lock(jnlpool.jnlpool_dummy_reg);
				return_status = STOP_POLL;
				recvpool_ctl->jnl_seqno = 0;
			} else
			{
				REPL_DPRINT1("gtmrecv_poll_actions : Setting gtmrecv_wait_for_jnl_seqno to TRUE because bad trans"
						"sent\n");
				gtmrecv_bad_trans_sent = TRUE;
				upd_proc_local->bad_trans = FALSE;
				recvpool_ctl->jnl_seqno = upd_proc_local->read_jnl_seqno;
			}
		}
	}
	if ((0 == *pending_data_len) && (0 != gtmrecv_local->changelog))
	{
		if (gtmrecv_local->changelog & REPLIC_CHANGE_LOGINTERVAL)
		{
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "Changing log interval from %u to %u\n",
					log_interval, gtmrecv_local->log_interval);
			log_interval = gtmrecv_local->log_interval;
			gtmrecv_reinit_logseqno(); /* will force a LOG on the first recv following the interval change */
		}
		if (gtmrecv_local->changelog & REPLIC_CHANGE_LOGFILE)
		{
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "Changing log file to %s\n", gtmrecv_local->log_file);
			repl_log_init(REPL_GENERAL_LOG, &gtmrecv_log_fd, gtmrecv_local->log_file);
			repl_log_fd2fp(&gtmrecv_log_fp, gtmrecv_log_fd);
			repl_log(gtmrecv_log_fp, TRUE, TRUE, "Change log to %s successful\n",gtmrecv_local->log_file);
		}
		/* NOTE: update process and receiver each ignore any setting specific to the other (REPLIC_CHANGE_UPD_LOGINTERVAL,
		 * REPLIC_CHANGE_LOGINTERVAL) */
		if (REPLIC_CHANGE_LOGINTERVAL == gtmrecv_local->changelog)
			upd_proc_local->changelog = 0;
		else
			upd_proc_local->changelog = gtmrecv_local->changelog; /* Pass changelog request to the update process */
		gtmrecv_local->changelog = 0;
	}
	if (0 == *pending_data_len && !gtmrecv_logstats && gtmrecv_local->statslog)
	{
		gtmrecv_logstats = TRUE;
		repl_log(gtmrecv_log_fp, TRUE, TRUE, "Begin statistics logging\n");
	} else if (0 == *pending_data_len && gtmrecv_logstats && !gtmrecv_local->statslog)
	{
		gtmrecv_logstats = FALSE;
		/* Force all data out to the file before closing the file */
		repl_log(gtmrecv_log_fp, TRUE, TRUE, "End statistics logging\n");
	}
	if (0 == *pending_data_len)
	{
		if (upd_helper_ctl->start_helpers)
		{
			gtmrecv_helpers_init(upd_helper_ctl->start_n_readers, upd_helper_ctl->start_n_writers);
			upd_helper_ctl->start_helpers = FALSE;
		}
		if (HELPER_REAP_NONE != (status = upd_helper_ctl->reap_helpers) ||
			(double)GTMRECV_REAP_HELPERS_INTERVAL <= difftime(gtmrecv_now, last_reap_time))
		{
			gtmrecv_reap_helpers(HELPER_REAP_WAIT == status);
			last_reap_time = gtmrecv_now;
		}
	}
	return (return_status);
}
예제 #7
0
/*
 * ------------------------------------------
 * Hang the process for a specified time.
 *
 *	Goes to sleep for a positive value.
 *	Any caught signal will terminate the sleep
 *	following the execution of that signal's catching routine.
 *
 * Arguments:
 *	num - time to sleep
 *
 * Return:
 *	none
 * ------------------------------------------
 */
void op_hang(mval* num)
{
	int		ms;
	mv_stent	*mv_zintcmd;
	ABS_TIME	cur_time, end_time;
#	ifdef VMS
	uint4 		time[2];
	int4		efn_mask, status;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	ms = 0;
	MV_FORCE_NUM(num);
	if (num->mvtype & MV_INT)
	{
		if (0 < num->m[1])
		{
			assert(MV_BIAS >= 1000);	/* if formats change overflow may need attention */
			ms = num->m[1] * (1000 / MV_BIAS);
		}
	} else if (0 == num->sgn) 		/* if sign is not 0 it means num is negative */
		ms = mval2i(num) * 1000;	/* too big to care about fractional amounts */
	if (ms)
	{
		if (TREF(tpnotacidtime) * 1000 < ms)
			TPNOTACID_CHECK(HANGSTR);
#		if defined(DEBUG) && defined(UNIX)
		if (gtm_white_box_test_case_enabled
			&& (WBTEST_DEFERRED_TIMERS == gtm_white_box_test_case_number)
			&& (3 > gtm_white_box_test_case_count)
			&& (123000 == ms))
		{
			DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
			DBGFPF((stderr, "OP_HANG: will sleep for 20 seconds\n"));
			LONG_SLEEP(20);
			DBGFPF((stderr, "OP_HANG: done sleeping\n"));
			ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
			return;
		}
		if (gtm_white_box_test_case_enabled
			&& (WBTEST_BREAKMPC == gtm_white_box_test_case_number)
			&& (0 == gtm_white_box_test_case_count)
			&& (999 == ms))
		{
			frame_pointer->old_frame_pointer->mpc = (unsigned char *)GTM64_ONLY(0xdeadbeef12345678)
				NON_GTM64_ONLY(0xdead1234);
			return;
		}
		/* Upon seeing a .999s hang this white-box test launches a timer that pops with a period of UTIL_OUT_SYSLOG_INTERVAL
		 * and prints a long message via util_out_ptr.
		 */
		if (gtm_white_box_test_case_enabled
			&& (WBTEST_UTIL_OUT_BUFFER_PROTECTION == gtm_white_box_test_case_number)
			&& (0 == gtm_white_box_test_case_count)
			&& (999 == ms))
		{
			start_timer((TID)&util_out_syslog_dump, UTIL_OUT_SYSLOG_INTERVAL, util_out_syslog_dump, 0, NULL);
			return;
		}
#		endif
		sys_get_curr_time(&cur_time);
		mv_zintcmd = find_mvstent_cmd(ZINTCMD_HANG, restart_pc, restart_ctxt, FALSE);
		if (!mv_zintcmd)
			add_int_to_abs_time(&cur_time, ms, &end_time);
		else
		{
			end_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain;
			cur_time = sub_abs_time(&end_time, &cur_time);	/* get remaing time to sleep */
			if (0 <= cur_time.at_sec)
				ms = (int4)(cur_time.at_sec * 1000 + cur_time.at_usec / 1000);
			else
				ms = 0;		/* all done */
			/* restore/pop previous zintcmd_active[ZINTCMD_HANG] hints */
			TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior;
			TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last
				= mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior;
			TAREF1(zintcmd_active, ZINTCMD_HANG).count--;
			assert(0 <= TAREF1(zintcmd_active, ZINTCMD_HANG).count);
			if (mv_chain == mv_zintcmd)
				POP_MV_STENT();	/* just pop if top of stack */
			else
			{	/* flag as not active */
				mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP;
				mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL;
			}
			if (0 == ms)
				return;		/* done HANGing */
		}
		UNIX_ONLY(hiber_start(ms);)
		VMS_ONLY(
			time[0] = -time_low_ms(ms);
			time[1] = -time_high_ms(ms) - 1;
			efn_mask = (1 << efn_outofband | 1 << efn_timer);
			if (SS$_NORMAL != (status = sys$setimr(efn_timer, &time, NULL, &time, 0)))
				rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$setimr"), CALLFROM, status);
			if (SS$_NORMAL != (status = sys$wflor(efn_outofband, efn_mask)))
				rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$wflor"), CALLFROM, status);
		)
		if (outofband)
예제 #8
0
/*
 * ------------------------------------------
 * Hang the process for a specified time.
 *
 *	Goes to sleep for a positive value.
 *	Any caught signal will terminate the sleep
 *	following the execution of that signal's catching routine.
 *
 * 	The actual hang duration should be NO LESS than the specified
 * 	duration for specified durations greater than .001 seconds.
 * 	Certain applications depend on this assumption.
 *
 * Arguments:
 *	num - time to sleep
 *
 * Return:
 *	none
 * ------------------------------------------
 */
void op_hang(mval* num)
{
	int		ms;
	double		tmp;
	mv_stent	*mv_zintcmd;
	ABS_TIME	cur_time, end_time;
#	ifdef VMS
	uint4 		time[2];
	int4		efn_mask, status;
#	endif
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	ms = 0;
	MV_FORCE_NUM(num);
	if (num->mvtype & MV_INT)
	{
		if (0 < num->m[1])
		{
			assert(MV_BIAS >= 1000);	/* if formats change overflow may need attention */
			ms = num->m[1] * (1000 / MV_BIAS);
		}
	} else if (0 == num->sgn) 		/* if sign is not 0 it means num is negative */
	{
		tmp = mval2double(num) * (double)1000;
		ms = ((double)MAXPOSINT4 >= tmp) ? (int)tmp : (int)MAXPOSINT4;
	}
	if (ms)
	{
		if (TREF(tpnotacidtime) * 1000 < ms)
			TPNOTACID_CHECK(HANGSTR);
#		if defined(DEBUG) && defined(UNIX)
		if (WBTEST_ENABLED(WBTEST_DEFERRED_TIMERS) && (3 > gtm_white_box_test_case_count) && (123000 == ms))
		{
			DEFER_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
			DBGFPF((stderr, "OP_HANG: will sleep for 20 seconds\n"));
			LONG_SLEEP(20);
			DBGFPF((stderr, "OP_HANG: done sleeping\n"));
			ENABLE_INTERRUPTS(INTRPT_NO_TIMER_EVENTS);
			return;
		}
		if (WBTEST_ENABLED(WBTEST_BREAKMPC)&& (0 == gtm_white_box_test_case_count) && (999 == ms))
		{
			frame_pointer->old_frame_pointer->mpc = (unsigned char *)GTM64_ONLY(0xdeadbeef12345678)
				NON_GTM64_ONLY(0xdead1234);
			return;
		}
		if (WBTEST_ENABLED(WBTEST_UTIL_OUT_BUFFER_PROTECTION) && (0 == gtm_white_box_test_case_count) && (999 == ms))
		{	/* Upon seeing a .999s hang this white-box test launches a timer that pops with a period of
		 	 * UTIL_OUT_SYSLOG_INTERVAL and prints a long message via util_out_ptr.
			 */
			start_timer((TID)&util_out_syslog_dump, UTIL_OUT_SYSLOG_INTERVAL, util_out_syslog_dump, 0, NULL);
			return;
		}
#		endif
		sys_get_curr_time(&cur_time);
		mv_zintcmd = find_mvstent_cmd(ZINTCMD_HANG, restart_pc, restart_ctxt, FALSE);
		if (!mv_zintcmd)
			add_int_to_abs_time(&cur_time, ms, &end_time);
		else
		{
			end_time = mv_zintcmd->mv_st_cont.mvs_zintcmd.end_or_remain;
			cur_time = sub_abs_time(&end_time, &cur_time);	/* get remaing time to sleep */
			if (0 <= cur_time.at_sec)
				ms = (int4)(cur_time.at_sec * 1000 + cur_time.at_usec / 1000);
			else
				ms = 0;		/* all done */
			/* restore/pop previous zintcmd_active[ZINTCMD_HANG] hints */
			TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_prior;
			TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last
				= mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_ctxt_prior;
			TAREF1(zintcmd_active, ZINTCMD_HANG).count--;
			assert(0 <= TAREF1(zintcmd_active, ZINTCMD_HANG).count);
			if (mv_chain == mv_zintcmd)
				POP_MV_STENT();	/* just pop if top of stack */
			else
			{	/* flag as not active */
				mv_zintcmd->mv_st_cont.mvs_zintcmd.command = ZINTCMD_NOOP;
				mv_zintcmd->mv_st_cont.mvs_zintcmd.restart_pc_check = NULL;
			}
			if (0 == ms)
				return;		/* done HANGing */
		}
#		ifdef UNIX
		if (ms < 10)
			SLEEP_USEC(ms * 1000, TRUE);	/* Finish the sleep if it is less than 10ms. */
		else
			hiber_start(ms);
#		elif defined(VMS)
		time[0] = -time_low_ms(ms);
		time[1] = -time_high_ms(ms) - 1;
		efn_mask = (1 << efn_outofband | 1 << efn_timer);
		if (SS$_NORMAL != (status = sys$setimr(efn_timer, &time, NULL, &time, 0)))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$setimr"), CALLFROM, status);
		if (SS$_NORMAL != (status = sys$wflor(efn_outofband, efn_mask)))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$wflor"), CALLFROM, status);
		if (outofband)
		{
			if (SS$_WASCLR == (status = sys$readef(efn_timer, &efn_mask)))
			{
				if (SS$_NORMAL != (status = sys$cantim(&time, 0)))
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("$cantim"),
						CALLFROM, status);
			} else
				assertpro(SS$_WASSET == status);
		}
#		endif
	} else
		rel_quant();
	if (outofband)
	{
		PUSH_MV_STENT(MVST_ZINTCMD);
		mv_chain->mv_st_cont.mvs_zintcmd.end_or_remain = end_time;
		mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_check = restart_ctxt;
		mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_check = restart_pc;
		/* save current information from zintcmd_active */
		mv_chain->mv_st_cont.mvs_zintcmd.restart_ctxt_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last;
		mv_chain->mv_st_cont.mvs_zintcmd.restart_pc_prior = TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last;
		TAREF1(zintcmd_active, ZINTCMD_HANG).restart_pc_last = restart_pc;
		TAREF1(zintcmd_active, ZINTCMD_HANG).restart_ctxt_last = restart_ctxt;
		TAREF1(zintcmd_active, ZINTCMD_HANG).count++;
		mv_chain->mv_st_cont.mvs_zintcmd.command = ZINTCMD_HANG;
		outofband_action(FALSE);
	}
	return;
}
예제 #9
0
int gtmsource_changelog(void)
{
	uint4	changelog_accepted = 0;
	int     log_fd = 0; /*used to indicate whether the new specified log file is writable*/
	int     close_status = 0; /*used to indicate if log file is successfully closed*/
	char*   err_code;
	int	save_errno = 0;
	int	retry_count = 5;

	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	repl_log(stderr, TRUE, TRUE, "Initiating CHANGELOG operation on source server pid [%d] for secondary instance [%s]\n",
		jnlpool.gtmsource_local->gtmsource_pid, jnlpool.gtmsource_local->secondary_instname);
	if (0 != jnlpool.gtmsource_local->changelog)
	{
		retry_count = 5;
		while (0 != retry_count--)
		{
			LONG_SLEEP(5);
			if (!jnlpool.gtmsource_local->changelog)
				break;
		}
	}
	if (0 != jnlpool.gtmsource_local->changelog)
	{
		util_out_print("Change log is already in progress. Not initiating change in log file or log interval", TRUE);
		return (ABNORMAL_SHUTDOWN);
	}
	if ('\0' != gtmsource_options.log_file[0]) /* trigger change in log file */
	{
		if (0 != STRCMP(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file))
		{	/*check if the new log file is writable*/
			OPENFILE3_CLOEXEC(gtmsource_options.log_file,
				      O_RDWR | O_CREAT | O_APPEND, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, log_fd);
			if (log_fd < 0)
			{
				save_errno = ERRNO;
				err_code = STRERROR(save_errno);
				if ('\0' != jnlpool.gtmsource_local->log_file[0])
					gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLLOGOPN, 6,
						LEN_AND_STR(gtmsource_options.log_file),
						LEN_AND_STR(err_code),
						LEN_AND_STR(jnlpool.gtmsource_local->log_file));
				else
					gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_REPLLOGOPN, 6,
						LEN_AND_STR(gtmsource_options.log_file),
					   	LEN_AND_STR(err_code),
						LEN_AND_STR(NULL_DEVICE));
			} else {
				CLOSEFILE_IF_OPEN(log_fd, close_status);
				assert(close_status==0);
				changelog_accepted |= REPLIC_CHANGE_LOGFILE;
				STRCPY(jnlpool.gtmsource_local->log_file, gtmsource_options.log_file);
				util_out_print("Change log initiated with file !AD", TRUE, LEN_AND_STR(gtmsource_options.log_file));
			}
		} else
			util_out_print("Log file is already !AD. Not initiating change in log file", TRUE,
					LEN_AND_STR(gtmsource_options.log_file));
	}
	if (0 != gtmsource_options.src_log_interval) /* trigger change in log interval */
	{
		if (gtmsource_options.src_log_interval != jnlpool.gtmsource_local->log_interval)
		{
			changelog_accepted |= REPLIC_CHANGE_LOGINTERVAL;
			jnlpool.gtmsource_local->log_interval = gtmsource_options.src_log_interval;
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_CHANGELOGINTERVAL, 5,
					   LEN_AND_LIT("Source"),
					   LEN_AND_STR(jnlpool.gtmsource_local->log_file),
					   gtmsource_options.src_log_interval);
		} else
			util_out_print("Log interval is already !UL. Not initiating change in log interval", TRUE,
					gtmsource_options.src_log_interval);
	}
	if (0 != changelog_accepted)
		jnlpool.gtmsource_local->changelog = changelog_accepted;
	else
		util_out_print("No change to log file or log interval", TRUE);
	return (0 != save_errno) ? ABNORMAL_SHUTDOWN : NORMAL_SHUTDOWN;
}
예제 #10
0
파일: dse.c 프로젝트: mihawk/fis-gtm
int main(int argc, char *argv[])
{
	DCL_THREADGBL_ACCESS;

	GTM_THREADGBL_INIT;
	common_startup_init(DSE_IMAGE);
	licensed = TRUE;
	TREF(transform) = TRUE;
	TREF(no_spangbls) = TRUE;	/* dse operates on a per-region basis irrespective of global mapping in gld */
	TREF(skip_file_corrupt_check) = TRUE;	/* do not let csd->file_corrupt flag cause errors in dse */
	op_open_ptr = op_open;
	patch_curr_blk = get_dir_root();
	err_init(util_base_ch);
	UNICODE_ONLY(gtm_strToTitle_ptr = &gtm_strToTitle);
	GTM_ICU_INIT_IF_NEEDED;	/* Note: should be invoked after err_init (since it may error out) and before CLI parsing */
	sig_init(generic_signal_handler, dse_ctrlc_handler, suspsigs_handler, continue_handler);
	atexit(util_exit_handler);
	SET_LATCH_GLOBAL(&defer_latch, LOCK_AVAILABLE);
	stp_init(STP_INITSIZE);
	rts_stringpool = stringpool;
	getjobname();
	INVOKE_INIT_SECSHR_ADDRS;
	io_init(TRUE);
	getzdir();
	gtm_chk_dist(argv[0]);
	prealloc_gt_timers();
	gt_timers_add_safe_hndlrs();
	initialize_pattern_table();
	gvinit();
	region_init(FALSE);
	util_out_print("!/File  !_!AD", TRUE, DB_LEN_STR(gv_cur_region));
	util_out_print("Region!_!AD!/", TRUE, REG_LEN_STR(gv_cur_region));
	cli_lex_setup(argc, argv);
	/* Since DSE operates on a region-by-region basis (for the most part), do not use a global directory at all from now on */
	original_header = gd_header;
	gd_header = NULL;
	OPERATOR_LOG_MSG;
#	ifdef DEBUG
	if ((gtm_white_box_test_case_enabled && (WBTEST_SEMTOOLONG_STACK_TRACE == gtm_white_box_test_case_number) ))
	{
		sgmnt_addrs     * csa;
		node_local_ptr_t cnl;
		csa = &FILE_INFO(gv_cur_region)->s_addrs;
		cnl = csa->nl;
		cnl->wbox_test_seq_num  = 1; /*Signal the first step and wait here*/
		/* The signal to the shell. MUPIP must not start BEFORE DSE */
		util_out_print("DSE is ready. MUPIP can start. Note: This message is a part of WBTEST_SEMTOOLONG_STACK_TRACE test. "
			       "It will not appear in PRO version.", TRUE);
		while (2 != cnl->wbox_test_seq_num) /*Wait for another process to get hold of the semaphore and signal next step*/
			LONG_SLEEP(1);
	}
#	endif
	if (argc < 2)
                display_prompt();
	while (1)
	{
		if (!dse_process(argc))
			break;
		display_prompt();
	}
	dse_exit();
	REVERT;
	return 0;
}
예제 #11
0
int gtmsource()
{
	int			status, log_init_status, waitpid_res, save_errno;
	char			print_msg[1024], tmpmsg[1024];
	gd_region		*reg, *region_top;
	sgmnt_addrs		*csa, *repl_csa;
	boolean_t		all_files_open, isalive;
	pid_t			pid, ppid, procgp;
	seq_num			read_jnl_seqno, jnl_seqno;
	unix_db_info		*udi;
	gtmsource_local_ptr_t	gtmsource_local;
	boolean_t		this_side_std_null_coll;
	int			null_fd, rc;

	memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs));
	call_on_signal = gtmsource_sigstop;
	ESTABLISH_RET(gtmsource_ch, SS_NORMAL);
	if (-1 == gtmsource_get_opt())
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR);
	if (gtmsource_options.shut_down)
	{	/* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return
		 * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while
		 * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other
		 * MUPIP REPLIC commands that need these locks for their function.
		 */
		if (0 < gtmsource_options.shutdown_time)
		{
			repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n",
												gtmsource_options.shutdown_time);
			LONG_SLEEP(gtmsource_options.shutdown_time);
		} else
			repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n");
	} else if (gtmsource_options.start)
	{
		repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n",
			gtmsource_options.secondary_instname);
	}
	if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary))
	{	/* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now
		 * in case this is a secondary -> primary transition. This is so we can later switch journal files in all
		 * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since
		 * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we
		 * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always
		 * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has
		 * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic
		 * currently has assumptions that a process holds only one ftok at any point in time.
		 */
		assert(NULL == gd_header);
		gvinit();
		all_files_open = region_init(FALSE);
		if (!all_files_open)
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator);
	/* is_jnlpool_creator == TRUE ==> this process created the journal pool
	 * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it.
	 */
	if (gtmsource_options.shut_down)
		gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.activate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.deactivate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.checkhealth)
		gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.changelog)
		 gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showbacklog)
		gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.stopsourcefilter)
		gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.jnlpool)
		gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.losttncomplete)
		gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.needrestart)
		gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showfreeze)
		gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.setfreeze)
		gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN);
	else if (!gtmsource_options.start)
	{
		assert(CLI_PRESENT == cli_present("STATSLOG"));
		gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN);
	}
	assert(gtmsource_options.start);
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */
	gtmsource_local = jnlpool.gtmsource_local;
	gtmsource_local->child_server_running = FALSE;
	FORK(pid);
	if (0 > pid)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
			ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno);
	} else if (0 < pid)
	{	/* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child
		 * source server has completed its initialization phase and is all set so the parent command can return.
		 */
		while (isalive = is_proc_alive(pid, 0))	/* note : intended assignment */
		{
			if (gtmsource_local->child_server_running)
				break;
			/* To take care of reassignment of PIDs, the while condition should be && with the condition
			 * (PPID of pid == process_id)
			 */
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START);
			WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */
		}
		if (isalive)
		{	/* Child process is alive and started with no issues */
			if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)))
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
					ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno);
			ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE);
		} else
		{	/* Child source server process errored out at startup and is no longer alive.
			 * If we were the one who created the journal pool, let us clean it up.
			 */
			repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n");
			if (is_jnlpool_creator)
				status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN);
		}
		/* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup
		 * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the
		 * value of sem 1 before the child grabs sem 1.
		 */
		gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR);
	}
	/* Point stdin to /dev/null */
	OPENFILE("/dev/null", O_RDONLY, null_fd);
	if (0 > null_fd)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0);
	FCNTL3(null_fd, F_DUPFD, 0, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0);
	CLOSEFILE(null_fd, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0);
	/* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore
	 * at this point. The variables that indicate this would have been copied over to the child during the fork. This will
	 * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child
	 * to ensure they do not misrepresent the holder of those semaphores.
	 */
	ftok_sem_reg = NULL;
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	assert(udi->grabbed_ftok_sem);
	udi->grabbed_ftok_sem = FALSE;
	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE;
	assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]);
	/* Start child source server initialization */
	is_src_server = TRUE;
	OPERATOR_LOG_MSG;
	process_id = getpid();
	/* Reinvoke secshr related initialization with the child's pid */
	INVOKE_INIT_SECSHR_ADDRS;
	/* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool.
	 * Note that the initialization would already have been done by the parent receiver startup command but we need to
	 * redo the initialization with the child process id.
	 */
	assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id));
	mutex_per_process_init();
	START_HEARTBEAT_IF_NEEDED;
	ppid = getppid();
	log_init_status = repl_log_init(REPL_GENERAL_LOG, &gtmsource_log_fd, gtmsource_options.log_file);
	assert(SS_NORMAL == log_init_status);
	repl_log_fd2fp(&gtmsource_log_fp, gtmsource_log_fd);
	if (-1 == (procgp = setsid()))
		send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				RTS_ERROR_LITERAL("Source server error in setsid"), errno);
#	endif /* REPL_DEBUG_NOBACKGROUND */
	if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level)
		gtm_zlib_init();	/* Open zlib shared library for compression/decompression */
	REPL_DPRINT1("Setting up regions\n");
	gvinit();

	/* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */
	all_files_open = region_init(FALSE);
	if (!all_files_open)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
		gtmsource_exit(ABNORMAL_SHUTDOWN);
	}
	/* Determine primary side null subscripts collation order */
	/* Also check whether all regions have same null collation order */
	this_side_std_null_coll = -1;
	for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++)
	{
		csa = &FILE_INFO(reg)->s_addrs;
		if (this_side_std_null_coll != csa->hdr->std_null_coll)
		{
			if (-1 == this_side_std_null_coll)
				this_side_std_null_coll = csa->hdr->std_null_coll;
			else
			{
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF);
				gtmsource_exit(ABNORMAL_SHUTDOWN);
			}
		}
		if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		if (reg->read_only && REPL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				   RTS_ERROR_LITERAL("Source Server does not have write permissions to one or "
					             "more database files that are replicated"));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	/* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */
	gtmsource_local->gtmsource_pid = process_id;
	gtmsource_local->gtmsource_state = GTMSOURCE_START;
	if (is_jnlpool_creator)
	{
		DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id);
		gtmsource_seqno_init(this_side_std_null_coll);
		if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)
		{	/* Created the journal pool as a root primary. Append a history record to the replication instance file.
			 * Invoke the function "gtmsource_rootprimary_init" to do that.
			 */
			gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno);
		}
	}
	/* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used
	 * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0.
	 * Note that the parent source server startup command would have done an increment of the ftok counter semaphore
	 * for the replication instance file. But the source server process (the child) that comes here would not have done
	 * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay.
	 */
	if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg))
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP);
	/* Increment the source server count semaphore */
	status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM);
	if (0 != status)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno);
	}
#	else
	if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM)))
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno);
	}
#	endif /* REPL_DEBUG_NOBACKGROUND */

	gtmsource_srv_count++;
	gtmsource_local->child_server_running = TRUE;	/* At this point, the parent startup command will stop waiting for child */
	gtm_event_log_init();
	/* Log source server startup command line first */
	SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str);
	repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg);

	SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]",
		process_id, gtmsource_local->secondary_instname);
	sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
	repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	if (is_jnlpool_creator)
	{
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	} else
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
#	ifdef GTM_TLS
	if (REPL_TLS_REQUESTED)
	{
		repl_do_tls_init(gtmsource_log_fp);
		assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK);
	}
#	endif
	if (jnlpool.jnlpool_ctl->freeze)
	{
		last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze;
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname);
		repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg);
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment);
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	}
	gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly;
	do
	{ 	/* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */
		gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START;
		if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN))
		{
			gtmsource_poll_actions(FALSE);
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE);
			continue;
		}
		if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode)
		{	/* Shutdown initiated */
			assert(gtmsource_local->shutdown == SHUTDOWN);
			sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2,
				    RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled"));
			repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
			gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
			break;
		}
		gtmsource_poll_actions(FALSE);
		if (GTMSOURCE_CHANGING_MODE == gtmsource_state)
			continue;
		if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode)
			gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE;
		SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port);
		sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
		gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
		DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
		assert(!repl_csa->hold_onto_crit);	/* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */
		grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK);
		if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state)
		{
			repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n");
			repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n",
					jnlpool.jnlpool_ctl->jnl_seqno);
			continue;
		}
		QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr);
		gtmsource_local->read = jnlpool.jnlpool_ctl->write;
		gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL;
		read_jnl_seqno = gtmsource_local->read_jnl_seqno;
		assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno);
		if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno)
		{
			gtmsource_local->read_state = READ_FILE;
			QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno);
			gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */
		}
		rel_lock(jnlpool.jnlpool_dummy_reg);
		if (SS_NORMAL != (status = gtmsource_alloc_tcombuff()))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
				  RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status);
		gtmsource_filter = NO_FILTER;
		if ('\0' != gtmsource_local->filter_cmd[0])
		{
			if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd)))
				gtmsource_filter |= EXTERNAL_FILTER;
			else
				gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		gtmsource_process();
		/* gtmsource_process returns only when mode needs to be changed to PASSIVE */
		assert(gtmsource_state == GTMSOURCE_CHANGING_MODE);
		gtmsource_ctl_close();
		gtmsource_free_msgbuff();
		gtmsource_free_tcombuff();
		gtmsource_free_filter_buff();
		gtmsource_stop_heartbeat();
		if (FD_INVALID != gtmsource_sock_fd)
			repl_close(&gtmsource_sock_fd);
		if (gtmsource_filter & EXTERNAL_FILTER)
			repl_stop_filter();
	} while (TRUE);
예제 #12
0
파일: t_qread.c 프로젝트: mihawk/fis-gtm
sm_uc_ptr_t t_qread(block_id blk, sm_int_ptr_t cycle, cache_rec_ptr_ptr_t cr_out)
	/* cycle is used in t_end to detect if the buffer has been refreshed since the t_qread */
{
	int4			status;
	uint4			blocking_pid;
	cache_rec_ptr_t		cr;
	bt_rec_ptr_t		bt;
	boolean_t		clustered, hold_onto_crit, was_crit, issued_db_init_crypt_warning, sync_needed;
	int			dummy, lcnt, ocnt;
	cw_set_element		*cse;
	off_chain		chain1;
	register sgmnt_addrs	*csa;
	register sgmnt_data_ptr_t	csd;
	enum db_ver		ondsk_blkver;
	int4			dummy_errno, gtmcrypt_errno;
	boolean_t		already_built, is_mm, reset_first_tp_srch_status, set_wc_blocked, sleep_invoked;
	ht_ent_int4		*tabent;
	srch_blk_status		*blkhist;
	trans_num		dirty, blkhdrtn;
	sm_uc_ptr_t		buffaddr;
	uint4			stuck_cnt = 0;
	boolean_t		lcl_blk_free;
	node_local_ptr_t	cnl;
	gd_segment		*seg;
	uint4			buffs_per_flush, flush_target;
	enc_info_t		*encr_ptr;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	lcl_blk_free = block_is_free;
	block_is_free = FALSE;	/* Reset to FALSE so that if t_qread fails below, we don't have an incorrect state of this var */
	first_tp_srch_status = NULL;
	reset_first_tp_srch_status = FALSE;
	csa = cs_addrs;
	csd = csa->hdr;
	INCR_DB_CSH_COUNTER(csa, n_t_qreads, 1);
	is_mm = (dba_mm == csd->acc_meth);
	/* We better hold crit in the final retry (TP & non-TP). Only exception is journal recovery */
	assert((t_tries < CDB_STAGNATE) || csa->now_crit || mupip_jnl_recover);
	if (dollar_tlevel)
	{
		assert(sgm_info_ptr);
		if (0 != sgm_info_ptr->cw_set_depth)
		{
			chain1 = *(off_chain *)&blk;
			if (1 == chain1.flag)
			{
				assert(sgm_info_ptr->cw_set_depth);
				if ((int)chain1.cw_index < sgm_info_ptr->cw_set_depth)
					tp_get_cw(sgm_info_ptr->first_cw_set, (int)chain1.cw_index, &cse);
				else
				{
					assert(FALSE == csa->now_crit);
					rdfail_detail = cdb_sc_blknumerr;
					return (sm_uc_ptr_t)NULL;
				}
			} else
			{
				if (NULL != (tabent = lookup_hashtab_int4(sgm_info_ptr->blks_in_use, (uint4 *)&blk)))
					first_tp_srch_status = tabent->value;
				else
					first_tp_srch_status = NULL;
				ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr);
				cse = first_tp_srch_status ? first_tp_srch_status->cse : NULL;
			}
			assert(!cse || !cse->high_tlevel);
			assert(!chain1.flag || cse);
			if (cse)
			{	/* transaction has modified the sought after block  */
				if ((gds_t_committed != cse->mode) || (n_gds_t_op < cse->old_mode))
				{	/* Changes have not been committed to shared memory, i.e. still in private memory.
					 * Build block in private buffer if not already done and return the same.
					 */
					assert(gds_t_writemap != cse->mode);
					if (FALSE == cse->done)
					{	/* out of date, so make it current */
						assert(gds_t_committed != cse->mode);
						already_built = (NULL != cse->new_buff);
						/* Validate the block's search history right after building a private copy.
						 * This is not needed in case gvcst_search is going to reuse the clue's search
						 * history and return (because tp_hist will do the validation of this block).
						 * But if gvcst_search decides to do a fresh traversal (because the clue does not
						 * cover the path of the current input key etc.) the block build that happened now
						 * will not get validated in tp_hist since it will instead be given the current
						 * key's search history path (a totally new path) for validation. Since a private
						 * copy of the block has been built, tp_tend would also skip validating this block
						 * so it is necessary that we validate the block right here. Since it is tricky to
						 * accurately differentiate between the two cases, we do the validation
						 * unconditionally here (besides it is only a few if checks done per block build
						 * so it is considered okay performance-wise).
						 */
						gvcst_blk_build(cse, (uchar_ptr_t)cse->new_buff, 0);
						assert(NULL != cse->blk_target);
						if (!already_built && !chain1.flag)
						{
							buffaddr = first_tp_srch_status->buffaddr;
							cr = first_tp_srch_status->cr;
							assert((is_mm || cr) && buffaddr);
							blkhdrtn = ((blk_hdr_ptr_t)buffaddr)->tn;
							if (TP_IS_CDB_SC_BLKMOD3(cr, first_tp_srch_status, blkhdrtn))
							{
								assert(CDB_STAGNATE > t_tries);
								rdfail_detail = cdb_sc_blkmod;	/* should this be something else */
								TP_TRACE_HIST_MOD(blk, gv_target, tp_blkmod_t_qread, cs_data,
									first_tp_srch_status->tn, blkhdrtn,
									((blk_hdr_ptr_t)buffaddr)->levl);
								return (sm_uc_ptr_t)NULL;
							}
							if (!is_mm && ((first_tp_srch_status->cycle != cr->cycle)
										|| (first_tp_srch_status->blk_num != cr->blk)))
							{
								assert(CDB_STAGNATE > t_tries);
								rdfail_detail = cdb_sc_lostcr; /* should this be something else */
								return (sm_uc_ptr_t)NULL;
							}
						}
						cse->done = TRUE;
					}
					*cycle = CYCLE_PVT_COPY;
					*cr_out = 0;
					return (sm_uc_ptr_t)cse->new_buff;
				} else
				{	/* Block changes are already committed to shared memory (possible if we are in TP
					 * in the 2nd phase of M-Kill in gvcst_expand_free_subtree.c). In this case, read
					 * block from shared memory; do not look at private memory (i.e. cse) as that might
					 * not be as uptodate as shared memory.
					 */
					assert(csa->now_crit);	/* gvcst_expand_free_subtree does t_qread in crit */
					/* If this block was newly created as part of the TP transaction, it should not be killed
					 * as part of the 2nd phase of M-kill. This is because otherwise the block's cse would
					 * have had an old_mode of kill_t_create in which case we would not have come into this
					 * else block. Assert accordingly.
					 */
					assert(!chain1.flag);
					first_tp_srch_status = NULL;	/* do not use any previous srch_hist information */
				}
			}
		} else
		{
			if (NULL != (tabent = lookup_hashtab_int4(sgm_info_ptr->blks_in_use, (uint4 *)&blk)))
				first_tp_srch_status = tabent->value;
			else
				first_tp_srch_status = NULL;
		}
		ASSERT_IS_WITHIN_TP_HIST_ARRAY_BOUNDS(first_tp_srch_status, sgm_info_ptr);
		if (!is_mm && first_tp_srch_status)
		{
			cr = first_tp_srch_status->cr;
			assert(cr && !first_tp_srch_status->cse);
			if (first_tp_srch_status->cycle == cr->cycle)
			{
				*cycle = first_tp_srch_status->cycle;
				*cr_out = cr;
				cr->refer = TRUE;
				if (CDB_STAGNATE <= t_tries)	/* mu_reorg doesn't use TP else should have an || for that */
					CWS_INSERT(blk);
				return (sm_uc_ptr_t)first_tp_srch_status->buffaddr;
			} else
			{	/* Block was already part of the read-set of this transaction, but got recycled in the cache.
				 * Allow block recycling by resetting first_tp_srch_status for this blk to reflect the new
				 * buffer, cycle and cache-record. tp_hist (invoked much later) has validation checks to detect
				 * if block recycling happened within the same mini-action and restart in that case.
				 * Updating first_tp_srch_status has to wait until the end of t_qread since only then do we know
				 * the values to update to. Set a variable that will enable the updation before returning.
				 * Also assert that if we are in the final retry, we are never in a situation where we have a
				 * block that got recycled since the start of the current mini-action. This is easily detected since
				 * as part of the final retry we maintain a hash-table "cw_stagnate" that holds the blocks that
				 * have been read as part of the current mini-action until now.
				 */
				assert(CDB_STAGNATE > t_tries || (NULL == lookup_hashtab_int4(&cw_stagnate, (uint4 *)&blk)));
				reset_first_tp_srch_status = TRUE;
			}
		}
	}
	if ((uint4)blk >= (uint4)csa->ti->total_blks)
	{	/* Requested block out of range; could occur because of a concurrency conflict. mm_read and dsk_read assume blk is
		 * never negative or greater than the maximum possible file size. If a concurrent REORG truncates the file, t_qread
		 * can proceed despite blk being greater than total_blks. But dsk_read handles this fine; see comments below.
		 */
		assert((&FILE_INFO(gv_cur_region)->s_addrs == csa) && (csd == cs_data));
		assert(!csa->now_crit);
		rdfail_detail = cdb_sc_blknumerr;
		return (sm_uc_ptr_t)NULL;
	}
	if (is_mm)
	{
		*cycle = CYCLE_SHRD_COPY;
		*cr_out = 0;
		return (sm_uc_ptr_t)(mm_read(blk));
	}
	was_crit = csa->now_crit;
	cnl = csa->nl;
	encr_ptr = csa->encr_ptr;
	if (NULL != encr_ptr)
	{
		/* If this is an encrypted database and we hold crit, make sure our private cycle matches the shared cycle.
		 * Or else we would need to call "process_reorg_encrypt_restart" below (a heavyweight operation) holding crit.
		 */
		assert(!was_crit || (cnl->reorg_encrypt_cycle == encr_ptr->reorg_encrypt_cycle));
		seg = gv_cur_region->dyn.addr;
		issued_db_init_crypt_warning = encr_ptr->issued_db_init_crypt_warning;
		if (!IS_BITMAP_BLK(blk) && issued_db_init_crypt_warning)
		{	/* A non-GT.M process is attempting to read a non-bitmap block, yet it has previously encountered an error
			 * during db_init (because it did not have access to the encryption keys) and reported it with a -W-
			 * severity. Since the block it is attempting to read can be in the unencrypted shared memory (read from
			 * disk by another process with access to the encryption keys), we cannot let it access it without a valid
			 * handle, so issue an rts_error.
			 *
			 * TODO: DSE and LKE could bypass getting the ftok semaphore. LKE is not an issue, but DSE does care about
			 *       the csa->reorg_encrypt_cycle. So it means DSE could get an inconsistent copy of reorg_encrypt_cycle
			 *       and associated hashes if it had done a bypass and a concurrent REORG -ENCRYPT is holding the ftok
			 *       semaphore and changing these values at the same time.
			 */
			assert(!IS_GTM_IMAGE);	/* GT.M would have error'ed out in db_init */
			gtmcrypt_errno = SET_REPEAT_MSG_MASK(SET_CRYPTERR_MASK(ERR_CRYPTBADCONFIG));
			GTMCRYPT_REPORT_ERROR(gtmcrypt_errno, rts_error, seg->fname_len, seg->fname);
		} else if (cnl->reorg_encrypt_cycle != encr_ptr->reorg_encrypt_cycle)
		{	/* A concurrent MUPIP REORG ENCRYPT occurred. Cannot proceed with the read even if the block is
			 * already loaded from disk into the unencrypted global buffers (security issue). Need to load the
			 * new encryption keys and only let those processes which are able to successfully do this proceed
			 * with the read. First, copy the key hashes from csd into csa->encr_ptr. That needs crit
			 * to ensure a concurrent MUPIP REORG ENCRYPT does not sneak in.
			 *
			 * Note: Even though we asserted a few lines above that if "was_crit" is TRUE, then we expect
			 * the encryption cycles to be in sync, we handle this out-of-design situation in "pro" by fixing
			 * the cycles while holding crit (hopefully rare case so it is okay to hold crit for a heavyweight call).
			 */
			if (!was_crit)
				grab_crit(gv_cur_region);
			/* Now that we have crit, sync them up by copying the new keys inside crit and opening the key handles
			 * outside crit (a potentially long running operation).
			 */
			SIGNAL_REORG_ENCRYPT_RESTART(mu_reorg_encrypt_in_prog, reorg_encrypt_restart_csa,
					cnl, csa, csd, rdfail_detail, process_id);
			assert(csa == reorg_encrypt_restart_csa);
			if (!was_crit)
				rel_crit(gv_cur_region);
			/* If we are inside a TP read-write transaction, it is possible we already used the old keys for
			 * prior calls to "jnl_format" so we have to restart (cannot sync up cycles). Do the same for
			 * TP read-only transaction as well as NON-TP read-write transaction. In all these cases we know
			 * the caller is capable of restarting. All other cases we dont know if the caller is capable so
			 * sync up the cycles and proceed using the new keys for the read.
			 *
			 * But since it is possible the caller does not call t_retry right away (e.g. mupip reorg which can
			 * choose to abandone this tree path and move on to another block without aborting this transaction)
			 * it is better we finish the pending call to "process_reorg_encrypt_restart" right here before returning.
			 */
			process_reorg_encrypt_restart();
			assert(NULL == reorg_encrypt_restart_csa);
			if (IS_NOT_SAFE_TO_SYNC_NEW_KEYS(dollar_tlevel, update_trans))
			{
				assert(cdb_sc_reorg_encrypt == rdfail_detail);	/* set by SIGNAL_REORG_ENCRYPT_RESTART macro */
				return (sm_uc_ptr_t)NULL;
			}
		}
	}
	assert(dba_bg == csd->acc_meth);
	assert(!first_tp_srch_status || !first_tp_srch_status->cr
					|| first_tp_srch_status->cycle != first_tp_srch_status->cr->cycle);
	if (FALSE == (clustered = csd->clustered))
		bt = NULL;
	ocnt = 0;
	set_wc_blocked = FALSE;	/* to indicate whether cnl->wc_blocked was set to TRUE by us */
	hold_onto_crit = csa->hold_onto_crit;	/* note down in local to avoid csa-> dereference in multiple usages below */
	do
	{
		if (NULL == (cr = db_csh_get(blk)))
		{	/* not in memory */
			if (clustered && (NULL != (bt = bt_get(blk))) && (FALSE == bt->flushing))
				bt = NULL;
			if (!csa->now_crit)
			{
				assert(!hold_onto_crit);
				if (NULL != bt)
				{	/* at this point, bt is not NULL only if clustered and flushing - wait no crit */
					assert(clustered);
					wait_for_block_flush(bt, blk);	/* try for no other node currently writing the block */
				}
				/* assume defaults for flush_target and buffs_per_flush */
				flush_target = csd->flush_trigger;
				buffs_per_flush = 0;
				if ((0 != csd->epoch_taper) && (FALSE == gv_cur_region->read_only) && JNL_ENABLED(csd) &&
						(0 != cnl->wcs_active_lvl) && (NOJNL != csa->jnl->channel) &&
						(0 != cnl->jnl_file.u.inode) && csd->jnl_before_image)
				{
					EPOCH_TAPER_IF_NEEDED(csa, csd, cnl, (gd_region *) 0, FALSE, buffs_per_flush, flush_target);
				}
				if ((flush_target <= cnl->wcs_active_lvl) && (FALSE == gv_cur_region->read_only))
					JNL_ENSURE_OPEN_WCS_WTSTART(csa, gv_cur_region, buffs_per_flush, dummy_errno);
						/* a macro that dclast's "wcs_wtstart" and checks for errors etc. */
				/* Get crit but also ensure encryption cycles are in sync ("dsk_read" relies on this).
				 * Note: "sync_needed" should be TRUE very rarely since we synced the cycles just a few lines
				 * above. But in case a MUPIP REORG ENCRYPT concurrently sneaked in between these lines we
				 * need to resync.
				 */
				sync_needed = grab_crit_encr_cycle_sync(gv_cur_region);
				assert(NULL == reorg_encrypt_restart_csa);
				assert(!sync_needed || (NULL != encr_ptr));
				if (sync_needed && IS_NOT_SAFE_TO_SYNC_NEW_KEYS(dollar_tlevel, update_trans))
				{
					assert(cnl->reorg_encrypt_cycle == encr_ptr->reorg_encrypt_cycle);
					rel_crit(gv_cur_region);
					rdfail_detail = cdb_sc_reorg_encrypt;	/* set by SIGNAL_REORG_ENCRYPT_RESTART macro */
					return (sm_uc_ptr_t)NULL;
				}
				cr = db_csh_get(blk);			/* in case blk arrived before crit */
			}
			if (clustered && (NULL != (bt = bt_get(blk))) && (TRUE == bt->flushing))
			{	/* Once crit, need to assure that if clustered, that flushing is [still] complete
				 * If it isn't, we missed an entire WM cycle and have to wait for another node to finish */
				wait_for_block_flush(bt, blk);	/* ensure no other node currently writing the block */
			}
			if (NULL == cr)
			{	/* really not in memory - must get a new buffer */
				assert(csa->now_crit);
				cr = db_csh_getn(blk);
				if (CR_NOTVALID == (sm_long_t)cr)
				{
					assert(cnl->wc_blocked); /* only reason we currently know wcs_get_space could fail */
					assert(gtm_white_box_test_case_enabled);
					SET_TRACEABLE_VAR(cnl->wc_blocked, TRUE);
					BG_TRACE_PRO_ANY(csa, wc_blocked_t_qread_db_csh_getn_invalid_blk);
					set_wc_blocked = TRUE;
					break;
				}
				assert(0 <= cr->read_in_progress);
				*cycle = cr->cycle;
				cr->tn = csd->trans_hist.curr_tn;
				/* Record history of most recent disk reads only in dbg builds for now. Although the macro
				 * is just a couple dozen instructions, it is done while holding crit so we want to avoid
				 * delaying crit unless really necessary. Whoever wants this information can enable it
				 * by a build change to remove the DEBUG_ONLY part below.
				 */
				DEBUG_ONLY(DSKREAD_TRACE(csa, GDS_ANY_ABS2REL(csa,cr), cr->tn, process_id, blk, cr->cycle);)
				if (!was_crit && !hold_onto_crit)
					rel_crit(gv_cur_region);
				/* read outside of crit may be of a stale block but should be detected by t_end or tp_tend */
				assert(0 == cr->dirty);
				assert(cr->read_in_progress >= 0);
				CR_BUFFER_CHECK(gv_cur_region, csa, csd, cr);
				buffaddr = (sm_uc_ptr_t)GDS_REL2ABS(cr->buffaddr);
#				ifdef DEBUG
				/* stop self to test sechshr_db_clnup clears the read state */
				if (gtm_white_box_test_case_enabled
					&& (WBTEST_SIGTSTP_IN_T_QREAD == gtm_white_box_test_case_number))
				{	/* this should never fail, but because of the way we developed the test we got paranoid */
					dummy = kill(process_id, SIGTERM);
					assert(0 == dummy);
					for (dummy = 10; dummy; dummy--)
						LONG_SLEEP(10); /* time for sigterm to take hit before we clear block_now_locked */
				}
#				endif
				if (SS_NORMAL != (status = dsk_read(blk, buffaddr, &ondsk_blkver, lcl_blk_free)))
				{
					/* buffer does not contain valid data, so reset blk to be empty */
					cr->cycle++;	/* increment cycle for blk number changes (for tp_hist and others) */
					cr->blk = CR_BLKEMPTY;
					cr->r_epid = 0;
					RELEASE_BUFF_READ_LOCK(cr);
					TREF(block_now_locked) = NULL;
					assert(-1 <= cr->read_in_progress);
					assert(was_crit == csa->now_crit);
					if (ERR_DYNUPGRDFAIL == status)
					{	/* if we dont hold crit on the region, it is possible due to concurrency conflicts
						 * that this block is unused (i.e. marked free/recycled in bitmap, see comments in
						 * gds_blk_upgrade.h). in this case we should not error out but instead restart.
						 */
						if (was_crit)
						{
							assert(FALSE);
							rts_error_csa(CSA_ARG(csa) VARLSTCNT(5) status, 3, blk,
									DB_LEN_STR(gv_cur_region));
						} else
						{
							rdfail_detail = cdb_sc_lostcr;
							return (sm_uc_ptr_t)NULL;
						}
					}
					if ((-1 == status) && !was_crit)
					{	/* LSEEKREAD and, consequently, dsk_read return -1 in case pread is unable to fetch
						 * a full database block's length of data. This can happen if the requested read is
						 * past the end of the file, which can happen if a concurrent truncate occurred
						 * after the blk >= csa->ti->total_blks comparison above. Allow for this scenario
						 * by restarting. However, if we've had crit the whole time, no truncate could have
						 * happened. -1 indicates a problem with the file, so fall through to DBFILERR.
						 */
						rdfail_detail = cdb_sc_truncate;
						return (sm_uc_ptr_t)NULL;
					} else if (IS_CRYPTERR_MASK(status))
					{
						seg = gv_cur_region->dyn.addr;
						GTMCRYPT_REPORT_ERROR(status, rts_error, seg->fname_len, seg->fname);
					}
					else
					{	/* A DBFILERR can be thrown for two possible reasons:
						 * (1) LSEEKREAD returned an unexpected error due to a filesystem problem; or
						 * (2) csa/cs_addrs/csd/cs_data are out of sync, and we're trying to read a block
						 * number for one region from another region with fewer total_blks.
						 *    We suspect the former is what happened in GTM-7623. Apparently the latter
						 * has been an issue before, too. If either occurs again in pro, this assertpro
						 * distinguishes the two possibilities.
						 */
						assertpro((&FILE_INFO(gv_cur_region)->s_addrs == csa) && (csd == cs_data));
						rts_error_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region),
								status);
					}
				}
				disk_blk_read = TRUE;
				assert(0 <= cr->read_in_progress);
				assert(0 == cr->dirty);
				/* Only set in cache if read was success */
				cr->ondsk_blkver = (lcl_blk_free ? GDSVCURR : ondsk_blkver);
				cr->r_epid = 0;
				RELEASE_BUFF_READ_LOCK(cr);
				TREF(block_now_locked) = NULL;
				assert(-1 <= cr->read_in_progress);
				*cr_out = cr;
				assert(was_crit == csa->now_crit);
				if (reset_first_tp_srch_status)
					RESET_FIRST_TP_SRCH_STATUS(first_tp_srch_status, cr, *cycle);
				return buffaddr;
			} else  if (!was_crit && (BAD_LUCK_ABOUNDS > ocnt))
			{
				assert(!hold_onto_crit);
				assert(TRUE == csa->now_crit);
				assert(cnl->in_crit == process_id);
				rel_crit(gv_cur_region);
			}
		}