/* Waits for a concurrently running write (of a global buffer to disk) to complete.
 *
 * Returns TRUE if write completes within timeout of approx. 1 minute.
 * Returns FALSE otherwise.
 */
boolean_t	wcs_write_in_progress_wait(node_local_ptr_t cnl, cache_rec_ptr_t cr, wbtest_code_t wbox_test_code)
{
	uint4	lcnt;
	int4	n;


	for (lcnt = 1; ; lcnt++)
	{	/* the design here is that either this process owns the block, or the writer does.
		 * if the writer does, it must be allowed to finish its write; then it will release the block
		 * and the next LOCK will establish ownership
		 */
		LOCK_BUFF_FOR_UPDATE(cr, n, &cnl->db_latch);
		/* This destroys evidence of writer ownership, but this is really a test that
		 * there was no prior owner. It will only be true if the writer has cleared it.
		 */
		if (OWN_BUFF(n))
			break;
		else
		{
			GTM_WHITE_BOX_TEST(wbox_test_code, lcnt, (2 * BUF_OWNER_STUCK));
			/* We have noticed the below assert to fail occasionally on some platforms
			 * We suspect it is because of waiting for another writer that is in jnl_fsync
			 * (as part of flushing a global buffer) which takes more than a minute to finish.
			 * To avoid false failures (where the other writer finishes its job in a little over
			 * a minute) we wait for twice the time in the debug version.
			 */
DEBUG_ONLY(
			if ((BUF_OWNER_STUCK == lcnt) && cr->epid)
				GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
	  )
			if (BUF_OWNER_STUCK DEBUG_ONLY( * 2) < lcnt)
			{	/* sick of waiting */
				if (0 == cr->dirty)
				{	/* someone dropped something; assume it was the writer and go on */
					LOCK_NEW_BUFF_FOR_UPDATE(cr);
					break;
				} else
				{
					if (cr->epid)
					{
#ifdef DEBUG
						GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, TWICE);
						send_msg(VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id, TWICE, \
							cr->epid, cr->blk, DB_LEN_STR(gv_cur_region));
#else
						GET_C_STACK_FROM_SCRIPT("WRITEWAITPID", process_id, cr->epid, ONCE);
						send_msg(VARLSTCNT(8) ERR_WRITEWAITPID, 6, process_id, ONCE, \
							cr->epid, cr->blk, DB_LEN_STR(gv_cur_region));
#endif
					}
					return FALSE;
				}
			}
			if (WRITER_STILL_OWNS_BUFF(cr, n))
				wcs_sleep(lcnt);
		}
	}	/* end of for loop to control buffer */
Ejemplo n.º 2
0
void jnl_file_lost(jnl_private_control *jpc, uint4 jnl_stat)
{	/* Notify operator and terminate journaling */
	unsigned int	status;
	sgmnt_addrs	*csa;
	seq_num		reg_seqno, jnlseqno;

	error_def(ERR_REPLJNLCLOSED);
	error_def(ERR_JNLCLOSED);

	switch(jpc->region->dyn.addr->acc_meth)
	{
	case dba_mm:
	case dba_bg:
		csa = &FILE_INFO(jpc->region)->s_addrs;
		break;
	default:
		GTMASSERT;
	}
#ifdef VMS
	/* The following assert has been removed as it could be FALSE if the caller is "jnl_file_extend"
	 *	assert(0 != memcmp(csa->nl->jnl_file.jnl_file_id.fid, zero_fid, sizeof(zero_fid)));
	 */
#endif
	assert(csa->now_crit);
	if (0 != jnl_stat)
		jnl_send_oper(jpc, jnl_stat);
	csa->hdr->jnl_state = jnl_closed;
	jpc->jnl_buff->cycle++; /* increment shared cycle so all future callers of jnl_ensure_open recognize journal switch */
	assert(jpc->cycle < jpc->jnl_buff->cycle);
	if (REPL_ENABLED(csa->hdr))
	{
		csa->hdr->repl_state = repl_was_open;
		reg_seqno = csa->hdr->reg_seqno;
		jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO;
		send_msg(VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(jpc->region), &reg_seqno, &reg_seqno, &jnlseqno, &jnlseqno);
	} else
		send_msg(VARLSTCNT(5) ERR_JNLCLOSED, 3, DB_LEN_STR(jpc->region), &csa->ti->curr_tn);
#ifdef VMS
	assert(0 != csa->jnl->jnllsb->lockid);
	status = gtm_enqw(EFN$C_ENF, LCK$K_EXMODE, csa->jnl->jnllsb, LCK$M_CONVERT | LCK$M_NODLCKBLK,
			NULL, 0, NULL, 0, NULL, PSL$C_USER, 0);
	if (SS$_NORMAL == status)
		status = csa->jnl->jnllsb->cond;
	jnl_file_close(jpc->region, FALSE, FALSE);
	if (SS$_NORMAL == status)
		status = gtm_deq(csa->jnl->jnllsb->lockid, NULL, PSL$C_USER, 0);
	if (SS$_NORMAL != status)
		GTMASSERT;
# else
	jnl_file_close(jpc->region, FALSE, FALSE);
#endif
}
/* input parameter "command_name" is a string that is either "MUPIP REORG UPGRADE/DOWNGRADE" or "MUPIP SET VERSION" */
int4	desired_db_format_set(gd_region *reg, enum db_ver new_db_format, char *command_name)
{
	boolean_t		was_crit;
	char			*db_fmt_str;
	char			*wcblocked_ptr;
	int4			status;
	uint4			jnl_status;
	inctn_opcode_t		save_inctn_opcode;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	trans_num		curr_tn;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;

	assert(reg->open);
	csa = &FILE_INFO(reg)->s_addrs;
	csd = csa->hdr;
	GTMCRYPT_ONLY(
		/* We don't allow databases to be encrypted if the version is V4 */
		if (csd->is_encrypted && (GDSV4 == new_db_format))
		{
			gtm_putmsg(VARLSTCNT(4) ERR_CRYPTNOV4, 2, DB_LEN_STR(reg));
			return ERR_CRYPTNOV4;
		}
	)
Ejemplo n.º 4
0
boolean_t mur_report_error(jnl_ctl_list *jctl, enum mur_error code)
{
	error_def(ERR_BOVTMGTEOVTM);
	error_def(ERR_DUPTOKEN);
	error_def(ERR_JNLBADRECFMT);
	error_def(ERR_PREVJNLNOEOF);
	error_def(ERR_UNKNOWNRECTYPE);

	switch (code)
	{
	default:
		assert(FALSE);
		break;

	case MUR_DUPTOKEN:
		assert(FALSE);
 		gtm_putmsg(VARLSTCNT(7) ERR_DUPTOKEN, 5, &((struct_jrec_tcom *)jctl->reg_ctl->mur_desc->jnlrec)->token_seq.token,
 				jctl->jnl_fn_len, jctl->jnl_fn, DB_LEN_STR(jctl->reg_ctl->gd));
		break;

	case MUR_PREVJNLNOEOF:
		gtm_putmsg(VARLSTCNT(4) ERR_PREVJNLNOEOF, 2, jctl->jnl_fn_len, jctl->jnl_fn);
		break;

	case MUR_JNLBADRECFMT:
		gtm_putmsg(VARLSTCNT(5) ERR_JNLBADRECFMT, 3, jctl->jnl_fn_len, jctl->jnl_fn, jctl->rec_offset);
		break;

	case MUR_BOVTMGTEOVTM:
		gtm_putmsg(VARLSTCNT(6) ERR_BOVTMGTEOVTM, 4, jctl->jnl_fn_len, jctl->jnl_fn,
							&jctl->jfh->bov_timestamp, &jctl->jfh->eov_timestamp);
		break;
	}
	return MUR_WITHIN_ERROR_LIMIT(murgbl.err_cnt, mur_options.error_limit); /* side-effect : increments murgbl.err_cnt */
}
Ejemplo n.º 5
0
boolean_t mur_report_error(enum mur_error code)
{
	error_def(ERR_UNKNOWNRECTYPE);
	error_def(ERR_DUPTOKEN);
	error_def(ERR_PREVJNLNOEOF);
	error_def(ERR_JNLBADRECFMT);

	switch (code)
	{
	default:
		assert(FALSE);
		break;

	case MUR_DUPTOKEN:
		assert(FALSE);
 		gtm_putmsg(VARLSTCNT(7) ERR_DUPTOKEN, 5, &((struct_jrec_tcom *)mur_rab.jnlrec)->token_seq.token,
 				mur_jctl->jnl_fn_len, mur_jctl->jnl_fn, DB_LEN_STR(mur_ctl[mur_regno].gd));
		break;

	case MUR_PREVJNLNOEOF:
		gtm_putmsg(VARLSTCNT(4) ERR_PREVJNLNOEOF, 2, mur_jctl->jnl_fn_len, mur_jctl->jnl_fn);
		break;

	case MUR_JNLBADRECFMT:
		gtm_putmsg(VARLSTCNT(5) ERR_JNLBADRECFMT, 3, mur_jctl->jnl_fn_len, mur_jctl->jnl_fn, mur_jctl->rec_offset);
		break;
	case MUR_BOVTMGTEOVTM:
		break;

	}
	return MUR_WITHIN_ERROR_LIMIT(murgbl.err_cnt, mur_options.error_limit); /* side-effect : increments murgbl.err_cnt */
}
Ejemplo n.º 6
0
int main(int argc, char *argv[])
{
	DCL_THREADGBL_ACCESS;

	GTM_THREADGBL_INIT;
	set_blocksig();
	gtm_imagetype_init(DSE_IMAGE);
	gtm_wcswidth_fnptr = gtm_wcswidth;
	gtm_env_init();	/* read in all environment variables */
	licensed = TRUE;
	TREF(transform) = TRUE;
	op_open_ptr = op_open;
	patch_curr_blk = get_dir_root();
	err_init(util_base_ch);
	GTM_ICU_INIT_IF_NEEDED;	/* Note: should be invoked after err_init (since it may error out) and before CLI parsing */
	sig_init(generic_signal_handler, dse_ctrlc_handler, suspsigs_handler);
	atexit(util_exit_handler);
	SET_LATCH_GLOBAL(&defer_latch, LOCK_AVAILABLE);
	get_page_size();
	stp_init(STP_INITSIZE);
	rts_stringpool = stringpool;
	getjobname();
	INVOKE_INIT_SECSHR_ADDRS;
	getzdir();
	prealloc_gt_timers();
	initialize_pattern_table();
	gvinit();
	region_init(FALSE);
	INIT_GBL_ROOT(); /* Needed for GVT initialization */
	getjobnum();
	util_out_print("!/File  !_!AD", TRUE, DB_LEN_STR(gv_cur_region));
	util_out_print("Region!_!AD!/", TRUE, REG_LEN_STR(gv_cur_region));
	cli_lex_setup(argc, argv);
	CREATE_DUMMY_GBLDIR(gd_header, original_header, gv_cur_region, gd_map, gd_map_top);
	gtm_chk_dist(argv[0]);
#	ifdef DEBUG
	if ((gtm_white_box_test_case_enabled && (WBTEST_SEMTOOLONG_STACK_TRACE == gtm_white_box_test_case_number) ))
	{
		sgmnt_addrs     * csa;
		node_local_ptr_t cnl;
		csa = &FILE_INFO(gv_cur_region)->s_addrs;
		cnl = csa->nl;
		cnl->wbox_test_seq_num  = 1; /*Signal the first step and wait here*/
		while (2 != cnl->wbox_test_seq_num) /*Wait for another process to get hold of the semaphore and signal next step*/
			LONG_SLEEP(10);
	}
#	endif
	if (argc < 2)
                display_prompt();
	io_init(TRUE);
	while (1)
	{
		if (!dse_process(argc))
			break;
		display_prompt();
	}
	dse_exit();
	REVERT;
}
Ejemplo n.º 7
0
uint4 mupip_set_journal_newstate(set_jnl_options *jnl_options, jnl_create_info *jnl_info, mu_set_rlist *rptr)
{
	enum jnl_state_codes	jnl_curr_state;
	enum repl_state_codes	repl_curr_state;
	boolean_t 		current_image;
	enum db_acc_method	acc_meth;

	error_def(ERR_REPLNOBEFORE);
	error_def(ERR_REPLJNLCNFLCT);
	error_def(ERR_JNLDISABLE);
	error_def(ERR_MMBEFOREJNL);
	error_def(ERR_MMNOBFORRPL);

	jnl_curr_state = (enum jnl_state_codes)rptr->sd->jnl_state;
	repl_curr_state = (enum repl_state_codes)rptr->sd->repl_state;
	acc_meth = rptr->sd->acc_meth;
	current_image = rptr->sd->jnl_before_image;
	if (CLI_ABSENT == jnl_options->cli_journal)
		rptr->jnl_new_state = jnl_curr_state;
	else if ((CLI_NEGATED == jnl_options->cli_journal) || (CLI_NEGATED == jnl_options->cli_enable))
		rptr->jnl_new_state = jnl_notallowed; /* DISABLE specified */
	else if ((jnl_notallowed != jnl_curr_state) || (CLI_PRESENT == jnl_options->cli_enable))
	{	/* journaling is already ENABLED or ENABLE is explicitly specified */
		if (CLI_NEGATED == jnl_options->cli_on)	/* OFF specified */
			rptr->jnl_new_state = jnl_closed;
		else if (repl_curr_state == repl_was_open && CLI_PRESENT != jnl_options->cli_replic_on)
		{ /* Journaling was turned OFF by jnl_file_lost(). Do not allow turning journaling ON without also
		     turning replication ON */
			gtm_putmsg(VARLSTCNT(10) ERR_REPLJNLCNFLCT, 8, LEN_AND_STR(jnl_state_lit[jnl_open]),
					DB_LEN_STR(gv_cur_region), LEN_AND_STR(repl_state_lit[repl_closed]),
					LEN_AND_STR(jnl_state_lit[jnl_open]));
			return EXIT_WRN;
		}
		else /* ON explicitly specified or present by default */
			rptr->jnl_new_state = jnl_open;
	} else	/* jnl_notallowed == jnl_curr_state && CLI_ABSENT == jnl_options->cli_enable */
	{
		if (CLI_PRESENT != jnl_options->cli_replic_on)
		{
			gtm_putmsg(VARLSTCNT(4) ERR_JNLDISABLE, 2, DB_LEN_STR(gv_cur_region));
			return EXIT_WRN;
		} else
			rptr->jnl_new_state = jnl_open;	/* turn journaling on for REPLICATION=ON */
	}
	VMS_ONLY(rptr->before_images = (jnl_options->image_type_specified ?  jnl_info->before_images : current_image);)
Ejemplo n.º 8
0
void set_enospc_flags(gd_addr *addr_ptr, char enospc_enable_list[], boolean_t ok_to_interrupt)
{
	gd_region	*r_local, *r_top;
	int		i;
	sgmnt_addrs	*csa;
	const char	*syslog_msg;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;

	for (r_local = addr_ptr->regions, r_top = r_local + addr_ptr->n_regions, i = 0;
	     r_local < r_top; r_local++, i++)
	{
		if (!r_local->open || r_local->was_open)
			continue;
		if ((dba_bg != r_local->dyn.addr->acc_meth) && (dba_mm != r_local->dyn.addr->acc_meth))
			continue;
		csa = &FILE_INFO(r_local)->s_addrs;
		if (ANTICIPATORY_FREEZE_ENABLED(csa))
		{
			switch(enospc_enable_list[i])
			{
			case NONE:
				syslog_msg = "Turning off fake ENOSPC for both database and journal file.";
				csa->nl->fake_db_enospc = FALSE;
				csa->nl->fake_jnl_enospc = FALSE;
				break;
			case DB_ON:
				syslog_msg = "Turning on fake ENOSPC only for database file.";
				csa->nl->fake_db_enospc = TRUE;
				csa->nl->fake_jnl_enospc = FALSE;
				break;
			case JNL_ON:
				syslog_msg = "Turning on fake ENOSPC only for journal file.";
				csa->nl->fake_db_enospc = FALSE;
				csa->nl->fake_jnl_enospc = TRUE;
				break;
			case DB_AND_JNL_ON:
				syslog_msg = "Turning on fake ENOSPC for both database and journal file.";
				csa->nl->fake_db_enospc = TRUE;
				csa->nl->fake_jnl_enospc = TRUE;
				break;
			default:
				assert(FALSE);
			}
			if (ok_to_interrupt)
				send_msg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TEXT, 2, DB_LEN_STR(r_local), ERR_TEXT, 2,
					     LEN_AND_STR(syslog_msg));
		}
	}
}
Ejemplo n.º 9
0
STATICFNDEF void mu_rndwn_all_helper(shm_parms *parm_buff, char *fname, int *exit_status, int *tmp_exit_status)
{
	replpool_identifier	replpool_id;
	boolean_t 		ret_status, jnlpool_sem_created;
	unsigned char		ipcs_buff[MAX_IPCS_ID_BUF], *ipcs_ptr;

	ESTABLISH(mu_rndwn_all_helper_ch);
	if (validate_db_shm_entry(parm_buff, fname, tmp_exit_status))
	{
		if (SS_NORMAL == *tmp_exit_status)
		{	/* shm still exists */
			mu_gv_cur_reg_init();
			gv_cur_region->dyn.addr->fname_len = strlen(fname);
			STRNCPY_STR(gv_cur_region->dyn.addr->fname, fname, gv_cur_region->dyn.addr->fname_len);
			if (mu_rndwn_file(gv_cur_region, FALSE))
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_MUFILRNDWNSUC, 2, DB_LEN_STR(gv_cur_region));
			else
			{	/* Save semid so that it will not be removed by mu_rndwn_sem_all() */
				add_to_semids_list(FILE_INFO(gv_cur_region)->semid);
				*exit_status = ERR_MUNOTALLSEC;
			}
			mu_gv_cur_reg_free();
		} else
		{	/* shm has been cleaned up by "validate_db_shm_entry" so no need of any more cleanup here */
			assert(ERR_SHMREMOVED == *tmp_exit_status);
			*tmp_exit_status = SS_NORMAL;	/* reset tmp_exit_status for below logic to treat this as normal */
		}
	} else if ((SS_NORMAL == *tmp_exit_status)
		&& validate_replpool_shm_entry(parm_buff, (replpool_id_ptr_t)&replpool_id, tmp_exit_status))
	{
		if (SS_NORMAL == *tmp_exit_status)
		{
			assert(JNLPOOL_SEGMENT == replpool_id.pool_type || RECVPOOL_SEGMENT == replpool_id.pool_type);
			ret_status = mu_rndwn_repl_instance(&replpool_id, TRUE, FALSE, &jnlpool_sem_created);
			ipcs_ptr = i2asc((uchar_ptr_t)ipcs_buff, parm_buff->shmid);
			*ipcs_ptr = '\0';
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) (JNLPOOL_SEGMENT == replpool_id.pool_type) ?
				(ret_status ? ERR_MUJPOOLRNDWNSUC : ERR_MUJPOOLRNDWNFL) :
				(ret_status ? ERR_MURPOOLRNDWNSUC : ERR_MURPOOLRNDWNFL),
				4, LEN_AND_STR(ipcs_buff), LEN_AND_STR(replpool_id.instfilename));
			if (!ret_status)
				*exit_status = ERR_MUNOTALLSEC;
		} else
		{	/* shm has been cleaned up by "validate_replpool_shm_entry" so no need of any more cleanup here */
			assert(ERR_SHMREMOVED == *tmp_exit_status);
			*tmp_exit_status = SS_NORMAL;	/* reset tmp_exit_status for below logic to treat this as normal */
		}
	}
	REVERT;
}
Ejemplo n.º 10
0
/* This routine returns whether the free_blocks counter in the file-header is ok (TRUE) or not (FALSE).
 * If not, it corrects it. This assumes cs_addrs, cs_data and gv_cur_region to point to the region of interest.
 * It also assumes that the master-map is correct and finds out non-full local bitmaps and counts the number of
 * free blocks in each of them and sums them up to determine the perceived correct free_blocks count.
 * The reason why this is ok is that even if the master-map incorrectly reports a local bitmap as full, our new free_blocks
 * count will effectively make the free space in that local-bitmap invisible and make a gdsfilext necessary and valid.
 * A later mupip integ will scavenge that invisible space for us. The worst that can therefore happen is that we will transiently
 * not be using up existing space. But we will always ensure that the free_blocks counter goes in sync with the master-map.
 */
boolean_t	is_free_blks_ctr_ok(void)
{
	boolean_t	blk_used;
	block_id	bml, free_bit, free_bml, maxbitsthismap;
	cache_rec_ptr_t	cr;
	int		cycle;
	sm_uc_ptr_t	bmp;
	unsigned int	local_maps, total_blks, free_blocks;

	error_def(ERR_DBBADFREEBLKCTR);

	assert(&FILE_INFO(gv_cur_region)->s_addrs == cs_addrs && cs_addrs->hdr == cs_data && cs_addrs->now_crit);
	total_blks = (dba_mm == cs_data->acc_meth) ? cs_addrs->total_blks : cs_addrs->ti->total_blks;
	local_maps = DIVIDE_ROUND_UP(total_blks, BLKS_PER_LMAP);
	for (free_blocks = 0, free_bml = 0; free_bml < local_maps; free_bml++)
	{
		bml = bmm_find_free((uint4)free_bml, (sm_uc_ptr_t)MM_ADDR(cs_data), local_maps);
		if (bml < free_bml)
			break;
		free_bml = bml;
		bml *= BLKS_PER_LMAP;
		if (!(bmp = t_qread(bml, (sm_int_ptr_t)&cycle, &cr))
				|| (BM_SIZE(BLKS_PER_LMAP) != ((blk_hdr_ptr_t)bmp)->bsiz)
				|| (LCL_MAP_LEVL != ((blk_hdr_ptr_t)bmp)->levl))
		{
			assert(FALSE);	/* In pro, we will simply skip counting this local bitmap. */
			continue;
		}
		assert(free_bml <= (local_maps - 1));
		maxbitsthismap = (free_bml != (local_maps - 1)) ? BLKS_PER_LMAP : total_blks - bml;
		for (free_bit = 0; free_bit < maxbitsthismap; free_bit++)
		{
			free_bit = bm_find_blk(free_bit, (sm_uc_ptr_t)bmp + sizeof(blk_hdr), maxbitsthismap, &blk_used);
			assert(NO_FREE_SPACE <= free_bit);
			if (0 > free_bit)
				break;
			free_blocks++;
		}
	}
	assert(cs_addrs->ti->free_blocks == free_blocks);
	if (cs_addrs->ti->free_blocks != free_blocks)
	{
		send_msg(VARLSTCNT(6) ERR_DBBADFREEBLKCTR, 4, DB_LEN_STR(gv_cur_region), cs_addrs->ti->free_blocks, free_blocks);
		cs_addrs->ti->free_blocks = free_blocks;
		return FALSE;
	}
	return TRUE;
}
Ejemplo n.º 11
0
bool gtcmtr_kill(void)
{
	cm_region_list	*reg_ref;
	unsigned char	*ptr, regnum;
	unsigned short	len;
	static readonly	gds_file_id file;

	error_def(ERR_DBPRIVERR);

	ptr = curr_entry->clb_ptr->mbf;
	assert(*ptr == CMMS_Q_KILL);
	ptr++;
	GET_SHORT(len, ptr);
	ptr += sizeof(unsigned short);
	regnum = *ptr++;
	reg_ref = gtcm_find_region(curr_entry,regnum);
	len--; /* subtract size of regnum */
	CM_GET_GVCURRKEY(ptr, len);
	gtcm_bind_name(reg_ref->reghead, TRUE);
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (JNL_ALLOWED(cs_addrs))
	{	/* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines
		 *	do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not
		 *	easy to re-execute the following two assignments in case gvcst_kill()'s call to t_end() encounters a
		 *	cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment
		 *	if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE.
		 * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED
		 * 	might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead.
		 * this approach ensures that the jnl_put_jrt_pini() gets the appropriate prc_vec for writing into the
		 * 	journal record in case JNL_ENABLED turns out to be TRUE in t_end() time.
		 * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access
		 * 	and hence the correctness of prc_vec whenever it turns out necessary is guaranteed.
		 */
		originator_prc_vec = curr_entry->pvec;
		cs_addrs->jnl->pini_addr = reg_ref->pini_addr;
	}
	if (gv_target->root)
		gvcst_kill(TRUE);
	if (JNL_ALLOWED(cs_addrs))
		reg_ref->pini_addr = cs_addrs->jnl->pini_addr;  /* In case  journal switch occurred */
	ptr = curr_entry->clb_ptr->mbf;
	*ptr++ = CMMS_R_KILL;
	curr_entry->clb_ptr->cbl = S_HDRSIZE;
	return TRUE;
}
Ejemplo n.º 12
0
uint4	set_jnl_file_close(set_jnl_file_close_opcode_t set_jnl_file_close_opcode)
{
	uint4 		jnl_status = 0;

	cs_addrs = &FILE_INFO(gv_cur_region)->s_addrs;
	jnl_status = jnl_ensure_open();
	if (0 == jnl_status)
	{
		if (0 == cs_addrs->jnl->pini_addr)
			jnl_put_jrt_pini(cs_addrs);
		wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH);
		jnl_put_jrt_pfin(cs_addrs);
		jnl_file_close(gv_cur_region, TRUE, TRUE);
	} else
		gtm_putmsg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_addrs->hdr),
			DB_LEN_STR(gv_cur_region));
	return jnl_status;
}
Ejemplo n.º 13
0
void op_gvkill(void)
{	gd_region	*reg;
	error_def(ERR_DBPRIVERR);

	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (gv_curr_subsc_null && gv_cur_region->null_subs == FALSE)
		sgnl_gvnulsubsc();

	if (gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm)
	{
		if (gv_target->root)
		{	gvcst_kill(TRUE);
		}
	}
	else if (gv_cur_region->dyn.addr->acc_meth == dba_cm)
	{	gvcmx_kill(TRUE);
	}else
	{	gvusr_kill(TRUE);
	}

	if (gv_cur_region->dyn.addr->repl_list)
	{
		gv_replication_error = gv_replopen_error;
		gv_replopen_error = FALSE;
		reg = gv_cur_region;
		while (gv_cur_region = gv_cur_region->dyn.addr->repl_list)	/* set replicated segments */
		{
			if (gv_cur_region->open)
			{
				change_reg();
				kill_var();
			}
			else
				gv_replication_error = TRUE;
		}
		gv_cur_region = reg;
		change_reg();
		if (gv_replication_error)
			sgnl_gvreplerr();
	}
}
Ejemplo n.º 14
0
uint4 jnl_file_open_switch(gd_region *reg, uint4 sts)
{
	sgmnt_addrs		*csa;
	jnl_private_control	*jpc;
	jnl_create_info		create;
	char			prev_jnl_fn[JNL_NAME_SIZE];

	csa = &FILE_INFO(reg)->s_addrs;
	jpc = csa->jnl;

	assert((ERR_JNLFILOPN != sts) && (NOJNL != jpc->channel) || (ERR_JNLFILOPN == sts) && (NOJNL == jpc->channel));
	if ((ERR_JNLFILOPN != sts) && (NOJNL != jpc->channel))
		F_CLOSE(jpc->channel);
	jpc->channel = NOJNL;
	jnl_send_oper(jpc, sts);
	/* attempt to create a new journal file */
	memset(&create, 0, sizeof(create));
	create.status = create.status2 = SS_NORMAL;
	create.prev_jnl = &prev_jnl_fn[0];
	set_jnl_info(reg, &create);
	create.no_prev_link = TRUE;
	create.no_rename = FALSE;
	if (!jgbl.forw_phase_recovery)
		JNL_SHORT_TIME(jgbl.gbl_jrec_time);	/* needed for cre_jnl_file() */
	/* else mur_output_record() would have already set jgbl.gbl_jrec_time */
	assert(jgbl.gbl_jrec_time);
	if (EXIT_NRM != cre_jnl_file(&create))
	{
		jpc->status = create.status;
		jpc->status2 = create.status2;
		return ERR_JNLINVALID;
	} else
	{
		jpc->status = SS_NORMAL;
		sts = 0;
	}
	send_msg(VARLSTCNT(6) ERR_PREVJNLLINKCUT, 4, JNL_LEN_STR(csa->hdr), DB_LEN_STR(reg));
	assert(csa->hdr->jnl_file_len == create.jnl_len);
	assert(0 == memcmp(csa->hdr->jnl_file_name, create.jnl, create.jnl_len));
	return sts;
}
Ejemplo n.º 15
0
void	op_gvincr(mval *increment, mval *result)
{
	unsigned char	buff[MAX_ZWR_KEY_SZ], *end;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	/* If specified var name is global ^%Y*, the name is illegal to use in a SET or KILL command, only GETs are allowed */
	if ((RESERVED_NAMESPACE_LEN <= gv_currkey->end) && (0 == MEMCMP_LIT(gv_currkey->base, RESERVED_NAMESPACE)))
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_PCTYRESERVED);
	if (gv_cur_region->read_only)
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if ((TREF(gv_last_subsc_null) || TREF(gv_some_subsc_null)) && (ALWAYS != gv_cur_region->null_subs))
		sgnl_gvnulsubsc();
	assert(gv_currkey->end + 1 <= gv_cur_region->max_key_size);
	MV_FORCE_NUM(increment);
	switch (gv_cur_region->dyn.addr->acc_meth)
	{
		case dba_bg:
		case dba_mm:
			gvcst_incr(increment, result);
			break;
		case dba_cm:
			gvcmx_increment(increment, result);
			break;
		case dba_usr:
			/* $INCR not supported for DDP/USR access method */
			if (0 == (end = format_targ_key(buff, MAX_ZWR_KEY_SZ, gv_currkey, TRUE)))
				end = &buff[MAX_ZWR_KEY_SZ - 1];
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(10) ERR_UNIMPLOP, 0,
				      ERR_TEXT, 2, LEN_AND_LIT("GTCM DDP server does not support $INCREMENT"),
				      ERR_GVIS, 2, end - buff, buff,
				      ERR_TEXT, 2, REG_LEN_STR(gv_cur_region));
			break;
		default:
			assertpro(FALSE);
	}
	assert(MV_DEFINED(result));
}
Ejemplo n.º 16
0
void op_gvkill(void)
{
	gd_region	*reg;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (TREF(gv_last_subsc_null) && NEVER == gv_cur_region->null_subs)
		sgnl_gvnulsubsc();
	if (gv_cur_region->dyn.addr->acc_meth == dba_bg || gv_cur_region->dyn.addr->acc_meth == dba_mm)
	{
		if (IS_OK_TO_INVOKE_GVCST_KILL(gv_target))
			gvcst_kill(TRUE);
	} else if (gv_cur_region->dyn.addr->acc_meth == dba_cm)
		gvcmx_kill(TRUE);
	else
		gvusr_kill(TRUE);
	if (gv_cur_region->dyn.addr->repl_list)
	{
		gv_replication_error = gv_replopen_error;
		gv_replopen_error = FALSE;
		reg = gv_cur_region;
		while (gv_cur_region = gv_cur_region->dyn.addr->repl_list)	/* set replicated segments */
		{
			if (gv_cur_region->open)
			{
				change_reg();
				kill_var();
			} else
				gv_replication_error = TRUE;
		}
		gv_cur_region = reg;
		change_reg();
		if (gv_replication_error)
			sgnl_gvreplerr();
	}
}
Ejemplo n.º 17
0
block_id dse_getblk(char *element, boolean_t nobml, boolean_t carry_curr)
{
	block_id	blk;

	if (!cli_get_hex(element, (uint4 *)&blk))
		blk = patch_curr_blk;
	else
		CLEAR_DSE_COMPRESS_KEY;
	if ((blk < 0) || (blk >= cs_addrs->ti->total_blks))
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_BLKINVALID, 4, blk, DB_LEN_STR(gv_cur_region),
			cs_addrs->ti->total_blks);
		return BADDSEBLK;
	}
	if (nobml && IS_BITMAP_BLK(blk))
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_CANTBITMAP);
		return BADDSEBLK;
	}
	if (carry_curr)
		patch_curr_blk = blk;
	return blk;
}
Ejemplo n.º 18
0
uint4	 gdsfilext(uint4 blocks, uint4 filesize, boolean_t trans_in_prog)
{
	sm_uc_ptr_t		old_base[2], mmap_retaddr;
	boolean_t		was_crit, is_mm;
	int			result, save_errno, status;
	DEBUG_ONLY(int		first_save_errno);
	uint4			new_bit_maps, bplmap, map, new_blocks, new_total, max_tot_blks, old_total;
	uint4			jnl_status;
	gtm_uint64_t		avail_blocks, mmap_sz;
	off_t			new_eof, new_size;
	trans_num		curr_tn;
	unix_db_info		*udi;
	inctn_opcode_t		save_inctn_opcode;
	int4			prev_extend_blks_to_upgrd;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	cache_rec_ptr_t         cr;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(!IS_DSE_IMAGE);
	assert((cs_addrs->nl == NULL) || (process_id != cs_addrs->nl->trunc_pid)); /* mu_truncate shouldn't extend file... */
	assert(!process_exiting);
	DEBUG_ONLY(old_base[0] = old_base[1] = NULL);
	assert(!gv_cur_region->read_only);
	udi = FILE_INFO(gv_cur_region);
	is_mm = (dba_mm == cs_addrs->hdr->acc_meth);
#	if !defined(MM_FILE_EXT_OK)
	if (!udi->grabbed_access_sem && is_mm)
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not allowed ? */
#	endif
	/* Both blocks and total blocks are unsigned ints so make sure we aren't asking for huge numbers that will
	   overflow and end up doing silly things.
	*/
	assert((blocks <= (MAXTOTALBLKS(cs_data) - cs_data->trans_hist.total_blks)) || WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
#	if defined(__sun) || defined(__hpux)
	cs_data->defer_allocate = TRUE;
#	endif
	if (!blocks && (cs_data->defer_allocate || (TRANS_IN_PROG_TRUE == trans_in_prog)))
		return (uint4)(NO_FREE_SPACE); /* should this be changed to show extension not enabled ? */
	bplmap = cs_data->bplmap;
	/* New total of non-bitmap blocks will be number of current, non-bitmap blocks, plus new blocks desired
	 * There are (bplmap - 1) non-bitmap blocks per bitmap, so add (bplmap - 2) to number of non-bitmap blocks
	 *      and divide by (bplmap - 1) to get total number of bitmaps for expanded database. (must round up in this
	 *      manner as every non-bitmap block must have an associated bitmap)
	 * Current number of bitmaps is (total number of current blocks + bplmap - 1) / bplmap.
	 * Subtract current number of bitmaps from number needed for expanded database to get number of new bitmaps needed.
	 */
	new_bit_maps = DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap) + blocks, bplmap - 1)
			- DIVIDE_ROUND_UP(cs_data->trans_hist.total_blks, bplmap);
	new_blocks = blocks + new_bit_maps;
	assert((0 < (int)new_blocks) || (!cs_data->defer_allocate && (0 == new_blocks)));
	if (new_blocks + cs_data->trans_hist.total_blks > MAXTOTALBLKS(cs_data))
	{
		assert(WBTEST_ENABLED(WBTEST_FILE_EXTEND_ERROR));
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(1) ERR_TOTALBLKMAX);
		return (uint4)(NO_FREE_SPACE);
	}
	if (0 != (save_errno = disk_block_available(udi->fd, &avail_blocks, FALSE)))
	{
		send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
	} else
	{
		if (!(gtmDebugLevel & GDL_IgnoreAvailSpace))
		{	/* Bypass this space check if debug flag above is on. Allows us to create a large sparce DB
			 * in space it could never fit it if wasn't sparse. Needed for some tests.
			 */
			avail_blocks = avail_blocks / (cs_data->blk_size / DISK_BLOCK_SIZE);
			if ((blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (blocks > (uint4)avail_blocks)
				{
					if (!INST_FREEZE_ON_NOSPC_ENABLED(cs_addrs))
						return (uint4)(NO_FREE_SPACE);
					else
						send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) MAKE_MSG_WARNING(ERR_NOSPACEEXT), 4,
							DB_LEN_STR(gv_cur_region), new_blocks, (uint4)avail_blocks);
				} else
					send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, DB_LEN_STR(gv_cur_region),
						 (uint4)(avail_blocks - ((new_blocks <= avail_blocks) ? new_blocks : 0)));
			}
		}
	}
#	ifdef DEBUG
	if (WBTEST_ENABLED(WBTEST_MM_CONCURRENT_FILE_EXTEND) && dollar_tlevel && !MEMCMP_LIT(gv_cur_region->rname, "DEFAULT"))
	{
		SYSTEM("$gtm_dist/mumps -run $gtm_wbox_mrtn");
		assert(1 == cs_addrs->nl->wbox_test_seq_num);	/* should have been set by mubfilcpy */
		cs_addrs->nl->wbox_test_seq_num = 2;	/* signal mupip backup to stop sleeping in mubfilcpy */
	}
#	endif
	/* From here on, we need to use GDSFILEXT_CLNUP before returning to the caller */
	was_crit = cs_addrs->now_crit;
	assert(!cs_addrs->hold_onto_crit || was_crit);
	/* If we are coming from mupip_extend (which gets crit itself) we better have waited for any unfreezes to occur.
	 * If we are coming from online rollback (when that feature is available), we will come in holding crit and in
	 * 	the final retry. In that case too, we expect to have waited for unfreezes to occur in the caller itself.
	 * Therefore if we are coming in holding crit from MUPIP, we expect the db to be unfrozen so no need to wait for
	 * freeze.
	 * If we are coming from GT.M and final retry (in which case we come in holding crit) we expect to have waited
	 * 	for any unfreezes (by invoking tp_crit_all_regions) to occur (TP or non-TP) before coming into this
	 *	function. However, there is one exception. In the final retry, if tp_crit_all_regions notices that
	 *	at least one of the participating regions did ONLY READs, it will not wait for any freeze on THAT region
	 *	to complete before grabbing crit. Later, in the final retry, if THAT region did an update which caused
	 *	op_tcommit to invoke bm_getfree->gdsfilext, then we would have come here with a frozen region on which
	 *	we hold crit.
	 */
	assert(!was_crit || !FROZEN_HARD(cs_data) || (dollar_tlevel && (CDB_STAGNATE <= t_tries)));
	/*
	 * If we are in the final retry and already hold crit, it is possible that csa->nl->wc_blocked is also set to TRUE
	 * (by a concurrent process in phase2 which encountered an error in the midst of commit and secshr_db_clnup
	 * finished the job for it). In this case we do NOT want to invoke wcs_recover as that will update the "bt"
	 * transaction numbers without correspondingly updating the history transaction numbers (effectively causing
	 * a cdb_sc_blkmod type of restart). Therefore do NOT call grab_crit (which unconditionally invokes wcs_recover)
	 * if we already hold crit.
	 */
	if (!was_crit)
	{
		for ( ; ; )
		{
			grab_crit(gv_cur_region);
			if (FROZEN_CHILLED(cs_data))
				DO_CHILLED_AUTORELEASE(cs_addrs, cs_data);
			if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN)
				break;
			rel_crit(gv_cur_region);
			while (FROZEN(cs_data) || IS_REPL_INST_FROZEN)
			{
				hiber_start(1000);
				if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data))
					break;
			}
		}
	} else if (FROZEN_HARD(cs_data) && dollar_tlevel)
	{	/* We don't want to continue with file extension as explained above. Hence return with an error code which
		 * op_tcommit will recognize (as a cdb_sc_needcrit/cdb_sc_instancefreeze type of restart) and restart accordingly.
		 */
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_FREEZE_PROG;
	} else
		WAIT_FOR_REGION_TO_UNCHILL(cs_addrs, cs_data);
	if (IS_REPL_INST_FROZEN && trans_in_prog)
	{
		assert(CDB_STAGNATE <= t_tries);
		GDSFILEXT_CLNUP;
		return (uint4)FINAL_RETRY_INST_FREEZE;
	}
	assert(cs_addrs->ti->total_blks == cs_data->trans_hist.total_blks);
	old_total = cs_data->trans_hist.total_blks;
	if (old_total != filesize)
	{	/* Somebody else has already extended it, since we are in crit, this is trust-worthy. However, in case of MM,
		 * we still need to remap the database
		 */
		assert((old_total > filesize) || !is_mm);
		/* For BG, someone else could have truncated or extended - we have no idea */
		GDSFILEXT_CLNUP;
		return (SS_NORMAL);
	}
	if (trans_in_prog && SUSPICIOUS_EXTEND)
	{
		if (!was_crit)
		{
			GDSFILEXT_CLNUP;
			return (uint4)(EXTEND_SUSPECT);
		}
		/* If free_blocks counter is not ok, then correct it. Do the check again. If still fails, then it means we held
		 * crit through bm_getfree into gdsfilext and still didn't get it right.
		 */
		assertpro(!is_free_blks_ctr_ok() && !SUSPICIOUS_EXTEND);
	}
	if (JNL_ENABLED(cs_data))
	{
		if (!jgbl.dont_reset_gbl_jrec_time)
			SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = cs_addrs->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open(gv_cur_region, cs_addrs);
		if (jnl_status)
		{
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			return (uint4)(NO_FREE_SPACE);	/* should have better return status */
		}
	}
	if (is_mm)
	{
		cs_addrs->nl->mm_extender_pid = process_id;
		status = wcs_wtstart(gv_cur_region, 0, NULL, NULL);
		cs_addrs->nl->mm_extender_pid = 0;
		assertpro(SS_NORMAL == status);
		old_base[0] = cs_addrs->db_addrs[0];
		old_base[1] = cs_addrs->db_addrs[1];
		cs_addrs->db_addrs[0] = NULL; /* don't rely on it until the mmap below */
#		ifdef _AIX
		status = shmdt(old_base[0] - BLK_ZERO_OFF(cs_data->start_vbn));
#		else
		status = munmap((caddr_t)old_base[0], (size_t)(old_base[1] - old_base[0]));
#		endif
		if (0 != status)
		{
			save_errno = errno;
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(12) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region),
					ERR_SYSCALL, 5, LEN_AND_STR(MEM_UNMAP_SYSCALL), CALLFROM, save_errno);
			return (uint4)(NO_FREE_SPACE);
		}
	} else
	{	/* Due to concurrency issues, it is possible some process had issued a disk read of the GDS block# corresponding
		 * to "old_total" right after a truncate wrote a GDS-block of zeros on disk (to signal end of the db file).
		 * If so, the global buffer containing this block needs to be invalidated now as part of the extend. If not, it is
		 * possible the EOF block on disk is now going to be overwritten by a properly initialized bitmap block (as part
		 * of the gdsfilext below) while the global buffer continues to have an incorrect copy of that bitmap block and
		 * this in turn would cause XXXX failures due to a bad bitmap block in shared memory. (GTM-7519)
		 */
		cr = db_csh_get((block_id)old_total);
		if ((NULL != cr) && ((cache_rec_ptr_t)CR_NOTVALID != cr))
		{
			assert((0 == cr->dirty) && (0 == cr->bt_index) && !cr->stopped);
			cr->cycle++;
			cr->blk = CR_BLKEMPTY;
		}
	}
	CHECK_TN(cs_addrs, cs_data, cs_data->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
	new_total = old_total + new_blocks;
	new_eof = BLK_ZERO_OFF(cs_data->start_vbn) + ((off_t)new_total * cs_data->blk_size);
#	if !defined(__sun) && !defined(__hpux)
	if (!cs_data->defer_allocate)
	{
		new_size = new_eof + cs_data->blk_size;
		save_errno = posix_fallocate(udi->fd, 0, new_size);
		DEBUG_ONLY(first_save_errno = save_errno);
		if ((ENOSPC == save_errno) && IS_GTM_IMAGE)
			save_errno = extend_wait_for_fallocate(udi, new_size);
		if (0 != save_errno)
		{
			GDSFILEXT_CLNUP;
			assert(ENOSPC == save_errno);
			if (ENOSPC != save_errno)
				send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_PREALLOCATEFAIL, 2, DB_LEN_STR(gv_cur_region),
					     save_errno);
			return (uint4)(NO_FREE_SPACE);
		}
	}
#	endif
	save_errno = db_write_eof_block(udi, udi->fd, cs_data->blk_size, new_eof, &(TREF(dio_buff)));
	if ((ENOSPC == save_errno) && IS_GTM_IMAGE)
		save_errno = extend_wait_for_write(udi, cs_data->blk_size, new_eof);
	if (0 != save_errno)
	{
		GDSFILEXT_CLNUP;
		if (ENOSPC != save_errno)
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), save_errno);
		return (uint4)(NO_FREE_SPACE);
	}
	if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_1))
	{
		LONG_SLEEP(600);
		assert(FALSE);
	}
	/* Ensure the EOF and metadata get to disk BEFORE any bitmap writes. Otherwise, the file size could no longer reflect
	 * a proper extent and subsequent invocations of gdsfilext could corrupt the database.
	 */
	if (!IS_STATSDB_CSA(cs_addrs))
	{
		GTM_DB_FSYNC(cs_addrs, udi->fd, status);
		assert(0 == status);
		if (0 != status)
		{
			GDSFILEXT_CLNUP;
			send_msg_csa(CSA_ARG(cs_addrs) VARLSTCNT(8) ERR_DBFILERR, 5,
						RTS_ERROR_LITERAL("fsync1()"), CALLFROM, status);
			return (uint4)(NO_FREE_SPACE);
		}
	}
	if (WBTEST_ENABLED(WBTEST_FILE_EXTEND_INTERRUPT_2))
	{
		LONG_SLEEP(600);
		assert(FALSE); /* Should be killed before that */
	}
	DEBUG_ONLY(prev_extend_blks_to_upgrd = cs_data->blks_to_upgrd;)
Ejemplo n.º 19
0
void	lke_show(void)
{
	bool			locks, all = TRUE, wait = TRUE, interactive = FALSE, match = FALSE, memory = TRUE, nocrit = TRUE;
	boolean_t		exact = FALSE, was_crit;
	int4			pid;
	size_t			ls_len;
	int			n;
	char 			regbuf[MAX_RN_LEN], nodebuf[32], one_lockbuf[MAX_KEY_SZ];
	mlk_ctldata_ptr_t	ctl;
	mstr			reg, node, one_lock;
	int			shr_sub_len = 0;
	float			ls_free = 0;	/* Free space in bottleneck subspace */
	/* Get all command parameters */
	reg.addr = regbuf;
	reg.len = SIZEOF(regbuf);
	node.addr = nodebuf;
	node.len = SIZEOF(nodebuf);
	one_lock.addr = one_lockbuf;
	one_lock.len = SIZEOF(one_lockbuf);
	if (lke_getcli(&all, &wait, &interactive, &pid, &reg, &node, &one_lock, &memory, &nocrit, &exact) == 0)
		return;

	/* Search all regions specified on the command line */
	for (gv_cur_region = gd_header->regions, n = 0; n != gd_header->n_regions; ++gv_cur_region, ++n)
	{
		/* If region matches and is open */
		if ((reg.len == 0  ||
		     gv_cur_region->rname_len == reg.len  &&  memcmp(gv_cur_region->rname, reg.addr, reg.len) == 0)  &&
		    gv_cur_region->open)
		{
			match = TRUE;
			util_out_print("!/!AD!/", NOFLUSH, REG_LEN_STR(gv_cur_region));

			/* If distributed database, the region is located on another node */
			if (gv_cur_region->dyn.addr->acc_meth == dba_cm)
			{
#				if defined(LKE_WORKS_OK_WITH_CM)
				/* Obtain lock info from the remote node */
				locks = gtcmtr_lke_showreq(gv_cur_region->dyn.addr->cm_blk, gv_cur_region->cmx_regnum,
							   all, wait, pid, &node);
#				else
				gtm_putmsg(VARLSTCNT(10) ERR_UNIMPLOP, 0, ERR_TEXT, 2,
						LEN_AND_LIT("GT.CM region - locks must be displayed on the local node"),
						ERR_TEXT, 2, REG_LEN_STR(gv_cur_region));
				continue;
#				endif
			} else if (gv_cur_region->dyn.addr->acc_meth == dba_bg  || gv_cur_region->dyn.addr->acc_meth == dba_mm)
			{	/* Local region */
				cs_addrs = &FILE_INFO(gv_cur_region)->s_addrs;
				ls_len = (size_t)(cs_addrs->lock_addrs[1] - cs_addrs->lock_addrs[0]);
				ctl = (mlk_ctldata_ptr_t)malloc(ls_len);
				/* Prevent any modification of the lock space while we make a local copy of it */
				if (cs_addrs->critical != NULL)
					crash_count = cs_addrs->critical->crashcnt;
				was_crit = cs_addrs->now_crit;
				if (!nocrit && !was_crit)
					grab_crit(gv_cur_region);
				longcpy((uchar_ptr_t)ctl, (uchar_ptr_t)cs_addrs->lock_addrs[0], ls_len);
				assert((ctl->max_blkcnt > 0) && (ctl->max_prccnt > 0) && ((ctl->subtop - ctl->subbase) > 0));
				if (!nocrit && !was_crit)
					rel_crit(gv_cur_region);
				shr_sub_len = 0;
				locks = ctl->blkroot == 0 ?
						FALSE:
						lke_showtree(NULL, (mlk_shrblk_ptr_t)R2A(ctl->blkroot), all, wait, pid,
							     one_lock, memory, &shr_sub_len);
				/* lock space usage consists of: control_block + nodes(locks) +  processes + substrings */
				/* any of those subspaces can be bottleneck.
				 * Therefore we will report the subspace which is running out.
				 */
				ls_free = MIN(((float)ctl->blkcnt) / ctl->max_blkcnt, ((float)ctl->prccnt) / ctl->max_prccnt);
				ls_free = MIN(1-(((float)shr_sub_len) / (ctl->subtop - ctl->subbase)), ls_free);
				ls_free *= 100;	/* Scale to [0-100] range. (couldn't do this inside util_out_print) */
				if (ls_free < 1) /* No memory? Notify user. */
					gtm_putmsg(VARLSTCNT(4) ERR_LOCKSPACEFULL, 2, DB_LEN_STR(gv_cur_region));
				if (ls_free < 1 || memory)
				{
					if (ctl->subtop > ctl->subfree)
						gtm_putmsg(VARLSTCNT(10) ERR_LOCKSPACEINFO, 8, REG_LEN_STR(gv_cur_region),
							   (ctl->max_prccnt - ctl->prccnt), ctl->max_prccnt,
							   (ctl->max_blkcnt - ctl->blkcnt), ctl->max_blkcnt, LEN_AND_LIT(" not "));
					else
						gtm_putmsg(VARLSTCNT(10) ERR_LOCKSPACEINFO, 8, REG_LEN_STR(gv_cur_region),
							   (ctl->max_prccnt - ctl->prccnt), ctl->max_prccnt,
							   (ctl->max_blkcnt - ctl->blkcnt), ctl->max_blkcnt, LEN_AND_LIT(" "));
				}
				free(ctl);
			} else
			{
				gtm_putmsg(VARLSTCNT(2) ERR_BADREGION, 0);
				locks = TRUE;
			}
			if (!locks)
			{
				gtm_putmsg(VARLSTCNT(4) ERR_NOLOCKMATCH, 2, REG_LEN_STR(gv_cur_region));
			}
			assert((ls_free <= 100) && (ls_free >= 0));
			gtm_putmsg(VARLSTCNT(4) ERR_LOCKSPACEUSE, 2, ((int)ls_free),
				       cs_addrs->hdr->lock_space_size/OS_PAGELET_SIZE);
		}
	}

	if (!match  &&  reg.len != 0)
		rts_error(VARLSTCNT(4) ERR_NOREGION, 2, reg.len, reg.addr);

}
Ejemplo n.º 20
0
uint4 jnl_file_lost(jnl_private_control *jpc, uint4 jnl_stat)
{	/* Notify operator and terminate journaling */
	unsigned int	status;
	sgmnt_addrs	*csa;
	seq_num		reg_seqno, jnlseqno;
	boolean_t	was_lockid = FALSE, instfreeze_environ;

	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	switch(jpc->region->dyn.addr->acc_meth)
	{
	case dba_mm:
	case dba_bg:
		csa = &FILE_INFO(jpc->region)->s_addrs;
		break;
	default:
		assertpro(FALSE && jpc->region->dyn.addr->acc_meth);
	}
#	ifdef VMS
	/* The following assert has been removed as it could be FALSE if the caller is "jnl_file_extend"
	 *	assert(0 != memcmp(csa->nl->jnl_file.jnl_file_id.fid, zero_fid, SIZEOF(zero_fid)));
	 */
#	endif
	assert(csa->now_crit);
	/* We issue an rts_error (instead of shutting off journaling) in the following cases :					{BYPASSOK}
	 * 1) $gtm_error_on_jnl_file_lost is set to issue runtime error (if not already issued) in case of journaling issues.
	 * 2) The process has the given message set in $gtm_custom_errors (indicative of instance freeze on error setup)
	 *    in which case the goal is to never shut-off journaling
	 */
	UNIX_ONLY(assert(jnlpool.jnlpool_ctl == jnlpool_ctl));
	UNIX_ONLY(instfreeze_environ = INST_FREEZE_ON_MSG_ENABLED(csa, jnl_stat));
	VMS_ONLY(instfreeze_environ = FALSE);
	if ((JNL_FILE_LOST_ERRORS == TREF(error_on_jnl_file_lost)) || instfreeze_environ)
	{
		VMS_ONLY(assert(FALSE)); /* Not fully implemented / supported on VMS. */
		if (!process_exiting || instfreeze_environ || !csa->jnl->error_reported)
		{
			csa->jnl->error_reported = TRUE;
			in_wcs_recover = FALSE;	/* in case we're called in wcs_recover() */
			if (SS_NORMAL != jpc->status)
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(7) jnl_stat, 4, JNL_LEN_STR(csa->hdr),
						DB_LEN_STR(gv_cur_region), jpc->status);
			else
				rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_stat, 4, JNL_LEN_STR(csa->hdr),
						DB_LEN_STR(gv_cur_region));
		}
		return jnl_stat;
	}
	if (0 != jnl_stat)
		jnl_send_oper(jpc, jnl_stat);
	csa->hdr->jnl_state = jnl_closed;
	jpc->jnl_buff->cycle++; /* increment shared cycle so all future callers of jnl_ensure_open recognize journal switch */
	assert(jpc->cycle < jpc->jnl_buff->cycle);
	if (REPL_ENABLED(csa->hdr))
	{
		csa->hdr->repl_state = repl_was_open;
		reg_seqno = csa->hdr->reg_seqno;
		jnlseqno = (NULL != jnlpool.jnlpool_ctl) ? jnlpool.jnlpool_ctl->jnl_seqno : MAX_SEQNO;
		send_msg_csa(CSA_ARG(csa) VARLSTCNT(8) ERR_REPLJNLCLOSED, 6, DB_LEN_STR(jpc->region), &reg_seqno, &reg_seqno,
				&jnlseqno, &jnlseqno);
	} else
		send_msg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_JNLCLOSED, 3, DB_LEN_STR(jpc->region), &csa->ti->curr_tn);
#ifdef VMS
	/* We can get a jnl_file_lost before the file is even created, so locking is done only if the lock exist */
	if (0 != csa->jnl->jnllsb->lockid)
	{
		was_lockid = TRUE;
		status = gtm_enqw(EFN$C_ENF, LCK$K_EXMODE, csa->jnl->jnllsb, LCK$M_CONVERT | LCK$M_NODLCKBLK,
				NULL, 0, NULL, 0, NULL, PSL$C_USER, 0);
		if (SS$_NORMAL == status)
			status = csa->jnl->jnllsb->cond;
	}
	jnl_file_close(jpc->region, FALSE, FALSE);
	if (was_lockid)
	{
		if (SS$_NORMAL == status)
			status = gtm_deq(csa->jnl->jnllsb->lockid, NULL, PSL$C_USER, 0);
		assertpro(SS$_NORMAL == status);
	}
# else
	jnl_file_close(jpc->region, FALSE, FALSE);
#endif
	return EXIT_NRM;
}
Ejemplo n.º 21
0
void db_init(gd_region *reg, sgmnt_data_ptr_t tsd)
{
	static boolean_t	mutex_init_done = FALSE;
	boolean_t       	is_bg, read_only;
	char            	machine_name[MAX_MCNAMELEN];
	file_control    	*fc;
	int			gethostname_res, stat_res, mm_prot;
	int4            	status, semval, dblksize, fbwsize;
	sm_long_t       	status_l;
	sgmnt_addrs     	*csa;
	sgmnt_data_ptr_t        csd;
	struct sembuf   	sop[3];
	struct stat     	stat_buf;
	union semun		semarg;
	struct semid_ds		semstat;
	struct shmid_ds         shmstat;
	struct statvfs		dbvfs;
	uint4           	sopcnt;
	unix_db_info    	*udi;
#ifdef periodic_timer_removed
	void            	periodic_flush_check();
#endif

	error_def(ERR_CLSTCONFLICT);
	error_def(ERR_CRITSEMFAIL);
	error_def(ERR_DBNAMEMISMATCH);
	error_def(ERR_DBIDMISMATCH);
	error_def(ERR_NLMISMATCHCALC);
	error_def(ERR_REQRUNDOWN);
	error_def(ERR_SYSCALL);

	assert(tsd->acc_meth == dba_bg  ||  tsd->acc_meth == dba_mm);
	is_bg = (dba_bg == tsd->acc_meth);
	read_only = reg->read_only;
	new_dbinit_ipc = FALSE;	/* we did not create a new ipc resource */
	udi = FILE_INFO(reg);
	memset(machine_name, 0, sizeof(machine_name));
	if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res))
		rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno);
	assert(strlen(machine_name) < MAX_MCNAMELEN);
	csa = &udi->s_addrs;
	csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL;   /* to help in dbinit_ch  and gds_rundown */
	reg->opening = TRUE;
	/*
	 * Create ftok semaphore for this region.
	 * We do not want to make ftok counter semaphore to be 2 for on mupip journal recover process.
	 */
	if (!ftok_sem_get(reg, !mupip_jnl_recover, GTM_ID, FALSE))
		rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
	/*
	 * At this point we have ftok_semid sempahore based on ftok key.
	 * Any ftok conflicted region will block at this point.
	 * Say, a.dat and b.dat both has same ftok and we have process A to access a.dat and
	 * process B to access b.dat. In this case only one can continue to do db_init()
	 */
	fc = reg->dyn.addr->file_cntl;
	fc->file_type = reg->dyn.addr->acc_meth;
	fc->op = FC_READ;
	fc->op_buff = (sm_uc_ptr_t)tsd;
	fc->op_len = sizeof(*tsd);
	fc->op_pos = 1;
	dbfilop(fc);		/* Read file header */
	udi->shmid = tsd->shmid;
	udi->semid = tsd->semid;
	udi->sem_ctime = tsd->sem_ctime.ctime;
	udi->shm_ctime = tsd->shm_ctime.ctime;
	dbsecspc(reg, tsd); 	/* Find db segment size */
	if (!mupip_jnl_recover)
	{
		if (INVALID_SEMID == udi->semid)
		{
			if (0 != udi->sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime)
			/* We must have somthing wrong in protocol or, code, if this happens */
				GTMASSERT;
			/*
			 * Create new semaphore using IPC_PRIVATE. System guarantees a unique id.
			 */
			if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT)))
			{
				udi->semid = INVALID_SEMID;
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno);
			}
			udi->shmid = INVALID_SHMID;	/* reset shmid so dbinit_ch does not get confused in case we go there */
			new_dbinit_ipc = TRUE;
			tsd->semid = udi->semid;
			semarg.val = GTM_ID;
			/*
			 * Following will set semaphore number 2 (=FTOK_SEM_PER_ID - 1)  value as GTM_ID.
			 * In case we have orphaned semaphore for some reason, mupip rundown will be
			 * able to identify GTM semaphores from the value and can remove.
			 */
			if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno);
			/*
			 * Warning: We must read the sem_ctime using IPC_STAT after SETVAL, which changes it.
			 *	    We must NOT do any more SETVAL after this. Our design is to use
			 *	    sem_ctime as creation time of semaphore.
			 */
			semarg.buf = &semstat;
			if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT"), errno);
			tsd->sem_ctime.ctime = udi->sem_ctime = semarg.buf->sem_ctime;
		} else
		{
			if (INVALID_SHMID == udi->shmid)
				/* if mu_rndwn_file gets standalone access of this region and
				 * somehow mupip process crashes, we can have semid != -1 but shmid == -1
				 */
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
						ERR_TEXT, 2, LEN_AND_LIT("semid is valid but shmid is invalid"));
			semarg.buf = &semstat;
			if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg))
				/* file header has valid semid but semaphore does not exists */
				rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name));
			else if (semarg.buf->sem_ctime != tsd->sem_ctime.ctime)
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
						ERR_TEXT, 2, LEN_AND_LIT("sem_ctime does not match"));
			if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno);
			else if (shmstat.shm_ctime != tsd->shm_ctime.ctime)
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
					ERR_TEXT, 2, LEN_AND_LIT("shm_ctime does not match"));
		}
		/* We already have ftok semaphore of this region, so just plainly do semaphore operation */
		/* This is the database access control semaphore for any region */
		sop[0].sem_num = 0; sop[0].sem_op = 0;	/* Wait for 0 */
		sop[1].sem_num = 0; sop[1].sem_op = 1;	/* Lock */
		sopcnt = 2;
		if (!read_only)
		{
			sop[2].sem_num = 1; sop[2].sem_op  = 1;	 /* increment r/w access counter */
			sopcnt = 3;
		}
		sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = SEM_UNDO | IPC_NOWAIT;
		SEMOP(udi->semid, sop, sopcnt, status);
		if (-1 == status)
		{
			errno_save = errno;
			gtm_putmsg(VARLSTCNT(4) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg));
			rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM, errno_save);
		}
	} else /* for mupip_jnl_recover we were already in mu_rndwn_file and got "semid" semaphore  */
	{
		if (INVALID_SEMID == udi->semid || 0 == udi->sem_ctime)
			/* make sure mu_rndwn_file() has reset created semaphore for standalone access */
			GTMASSERT;
		if (INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime)
			/* make sure mu_rndwn_file() has reset shared memory */
			GTMASSERT;
		udi->shmid = INVALID_SHMID;	/* reset shmid so dbinit_ch does not get confused in case we go there */
		new_dbinit_ipc = TRUE;
	}
	sem_incremented = TRUE;
	if (new_dbinit_ipc)
	{
		/* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */
#ifdef __MVS__
		if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, ROUND_UP(reg->sec_size, MEGA_BOUND),
			__IPC_MEGA | IPC_CREAT | RWDALL)))
#else
		if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, reg->sec_size, RWDALL | IPC_CREAT)))
#endif
		{
			udi->shmid = status_l = INVALID_SHMID;
			rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
				  ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno);
		}
		tsd->shmid = udi->shmid;
		if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
			rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
				ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno);
		tsd->shm_ctime.ctime = udi->shm_ctime = shmstat.shm_ctime;
	}
#ifdef DEBUG_DB64
	status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, next_smseg, SHM_RND));
	next_smseg = (sm_uc_ptr_t)ROUND_UP((sm_long_t)(next_smseg + reg->sec_size), SHMAT_ADDR_INCS);
#else
	status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND));
#endif
	if (-1 == status_l)
	{
		rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
			  ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno);
	}
	csa->nl = (node_local_ptr_t)csa->db_addrs[0];
	csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE);
	assert(((int)csa->critical & 0xf) == 0); 			/* critical should be 16-byte aligned */
#ifdef CACHELINE_SIZE
	assert(0 == ((int)csa->critical & (CACHELINE_SIZE - 1)));
#endif
	/* Note: Here we check jnl_sate from database file and its value cannot change without standalone access.
	 * The jnl_buff buffer should be initialized irrespective of read/write process */
	JNL_INIT(csa, reg, tsd);
	csa->backup_buffer = (backup_buff_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd));
	csa->lock_addrs[0] = (sm_uc_ptr_t)csa->backup_buffer + BACKUP_BUFFER_SIZE + 1;
	csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1;
	csa->total_blks = tsd->trans_hist.total_blks;   		/* For test to see if file has extended */
	if (new_dbinit_ipc)
	{
		memset(csa->nl, 0, sizeof(*csa->nl));			/* We allocated shared storage -- we have to init it */
		if (JNL_ALLOWED(csa))
		{	/* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not
			 * necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch
			 * check in order to determine whether to call jnl_file_open() or not. this is in preparation for that.
			 */
			csa->jnl->jnl_buff->cycle = 1;
		}
	}
	if (is_bg)
		csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd));
	else
	{
		csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1);
		FSTAT_FILE(udi->fd, &stat_buf, stat_res);
		if (-1 == stat_res)
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
		mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE);
#ifdef DEBUG_DB64
		if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)get_mmseg((size_t)stat_buf.st_size),
									   (size_t)stat_buf.st_size,
									   mm_prot,
									   GTM_MM_FLAGS, udi->fd, (off_t)0)))
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
		put_mmseg((caddr_t)(csa->db_addrs[0]), (size_t)stat_buf.st_size);
#else
		if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL,
									   (size_t)stat_buf.st_size,
									   mm_prot,
									   GTM_MM_FLAGS, udi->fd, (off_t)0)))
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
#endif
		csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1;
		csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0];
	}
	if (!csa->nl->glob_sec_init)
	{
		assert(new_dbinit_ipc);
		if (is_bg)
			*csd = *tsd;
		if (csd->machine_name[0])                  /* crash occured */
		{
			if (0 != memcmp(csd->machine_name, machine_name, MAX_MCNAMELEN))  /* crashed on some other node */
				rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
			else
				rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
		}
		if (is_bg)
		{
			bt_malloc(csa);
			csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd);
			db_csh_ini(csa);
		}
		db_csh_ref(csa);
		strcpy(csa->nl->machine_name, machine_name);					/* machine name */
		assert(MAX_REL_NAME > gtm_release_name_len);
		memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1);	/* GT.M release name */
		memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1);				/* GDS label */
		memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len);		/* database filename */
		csa->nl->creation_date_time = csd->creation.date_time;
		csa->nl->highest_lbm_blk_changed = -1;
		csa->nl->wcs_timers = -1;
		csa->nl->nbb = BACKUP_NOT_IN_PROGRESS;
		csa->nl->unique_id.uid = FILE_INFO(reg)->fileid;            /* save what file we initialized this storage for */
		/* save pointers in csa to access shared memory */
		csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl);
		if (JNL_ALLOWED(csa))
			csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl);
		csa->nl->backup_buffer = (sm_off_t)((sm_uc_ptr_t)csa->backup_buffer - (sm_uc_ptr_t)csa->nl);
		csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl);
		csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl);
		if (!read_only || is_bg)
		{
			csd->trans_hist.early_tn = csd->trans_hist.curr_tn;
			csd->max_update_array_size = csd->max_non_bm_update_array_size
				= ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE);
			csd->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE);
			/* add current db_csh counters into the cumulative counters and reset the current counters */
#define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2)					\
				csd->COUNTER.cumul_count += csd->COUNTER.curr_count;	\
				csd->COUNTER.curr_count = 0;
#include "tab_db_csh_acct_rec.h"
#undef TAB_DB_CSH_ACCT_REC
		}
		if (!read_only)
		{
			if (is_bg)
			{
				assert(memcmp(csd, GDS_LABEL, GDS_LABEL_SZ - 1) == 0);
				LSEEKWRITE(udi->fd, (off_t)0, (sm_uc_ptr_t)csd, sizeof(sgmnt_data), errno_save);
				if (0 != errno_save)
				{
					rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
						  ERR_TEXT, 2, LEN_AND_LIT("Error with database write"), errno_save);
				}
			}
		}
		reg->dyn.addr->ext_blk_count = csd->extension_size;
		mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only));
		DEBUG_ONLY(locknl = csa->nl;)	/* for DEBUG_ONLY LOCK_HIST macro */
Ejemplo n.º 22
0
gd_region *dbfilopn (gd_region *reg)
{
	unix_db_info    *udi;
	parse_blk       pblk;
	mstr            file;
	char            *fnptr, fbuff[MAX_FBUFF + 1];
	struct stat     buf;
	gd_region       *prev_reg;
	gd_segment      *seg;
	int             status;
	bool            raw;
	int		stat_res;

	seg = reg->dyn.addr;
	assert(seg->acc_meth == dba_bg  ||  seg->acc_meth == dba_mm);
	if (NULL == seg->file_cntl)
	{
		seg->file_cntl = (file_control *)malloc(sizeof(*seg->file_cntl));
		memset(seg->file_cntl, 0, sizeof(*seg->file_cntl));
	}
	if (NULL == seg->file_cntl->file_info)
	{
		seg->file_cntl->file_info = (void *)malloc(sizeof(unix_db_info));
		memset(seg->file_cntl->file_info, 0, sizeof(unix_db_info));
	}
	file.addr = (char *)seg->fname;
	file.len = seg->fname_len;
	memset(&pblk, 0, sizeof(pblk));
	pblk.buffer = fbuff;
	pblk.buff_size = MAX_FBUFF;
	pblk.fop = (F_SYNTAXO | F_PARNODE);
	memcpy(fbuff,file.addr,file.len);
	*(fbuff + file.len) = '\0';
	if (is_raw_dev(fbuff))
	{
		raw = TRUE;
		pblk.def1_buf = DEF_NODBEXT;
		pblk.def1_size = sizeof(DEF_NODBEXT) - 1;
	} else
	{
		raw = FALSE;
		pblk.def1_buf = DEF_DBEXT;
		pblk.def1_size = sizeof(DEF_DBEXT) - 1;
	}
	status = parse_file(&file, &pblk);
	if (!(status & 1))
	{
		if (GTCM_GNP_SERVER_IMAGE != image_type)
		{
			free(seg->file_cntl->file_info);
			free(seg->file_cntl);
			seg->file_cntl = 0;
		}
		rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), status);
	}
	assert(pblk.b_esl < sizeof(seg->fname));
	memcpy(seg->fname, pblk.buffer, pblk.b_esl);
	pblk.buffer[pblk.b_esl] = 0;
	seg->fname[pblk.b_esl] = 0;
	seg->fname_len = pblk.b_esl;
	if (pblk.fnb & F_HAS_NODE)
	{	/* Remote node specification given */
		assert(pblk.b_node && pblk.l_node[pblk.b_node - 1] == ':');
		gvcmy_open(reg, &pblk);
		return (gd_region *)-1;
	}
	fnptr = (char *)seg->fname + pblk.b_node;
	udi = FILE_INFO(reg);
	udi->raw = raw;
	udi->fn = (char *)fnptr;
	OPENFILE(fnptr, O_RDWR, udi->fd);
	udi->ftok_semid = INVALID_SEMID;
	udi->semid = INVALID_SEMID;
	udi->shmid = INVALID_SHMID;
	udi->sem_ctime = 0;
	udi->shm_ctime = 0;
	reg->read_only = FALSE;		/* maintain csa->read_write simultaneously */
	udi->s_addrs.read_write = TRUE;	/* maintain reg->read_only simultaneously */
	if (udi->fd == -1)
	{
		OPENFILE(fnptr, O_RDONLY, udi->fd);
		if (udi->fd == -1)
		{
			errno_save = errno;
			if (GTCM_GNP_SERVER_IMAGE != image_type)
			{
				free(seg->file_cntl->file_info);
				free(seg->file_cntl);
				seg->file_cntl = 0;
			}
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno_save);
		}
		reg->read_only = TRUE;			/* maintain csa->read_write simultaneously */
		udi->s_addrs.read_write = FALSE;	/* maintain reg->read_only simultaneously */
	}
	STAT_FILE(fnptr, &buf, stat_res);
	set_gdid_from_stat(&udi->fileid, &buf);
	if (prev_reg = gv_match(reg))
	{
		close(udi->fd);
		free(seg->file_cntl->file_info);
		free(seg->file_cntl);
		seg->file_cntl = 0;
		return prev_reg;
	}
	return reg;
}
Ejemplo n.º 23
0
boolean_t mu_truncate(int4 truncate_percent)
{
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t 	csd;
	int			num_local_maps;
	int 			lmap_num, lmap_blk_num;
	int			bml_status, sigkill;
	int			save_errno;
	int			ftrunc_status;
	uint4			jnl_status;
	uint4			old_total, new_total;
	uint4			old_free, new_free;
	uint4			end_blocks;
	int4			blks_in_lmap, blk;
	gtm_uint64_t		before_trunc_file_size;
	off_t			trunc_file_size;
	off_t			padding;
	uchar_ptr_t		lmap_addr;
	boolean_t		was_crit;
	uint4			found_busy_blk;
	srch_blk_status		bmphist;
	srch_blk_status 	*blkhist;
	srch_hist		alt_hist;
	trans_num		curr_tn;
	blk_hdr_ptr_t		lmap_blk_hdr;
	block_id		*blkid_ptr;
	unix_db_info    	*udi;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	char			*err_msg;
	intrpt_state_t		prev_intrpt_state;
	off_t			offset;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	csa = cs_addrs;
	csd = cs_data;
	if (dba_mm == csd->acc_meth)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOTBG, 2, REG_LEN_STR(gv_cur_region));
		return TRUE;
	}
	if ((GDSVCURR != csd->desired_db_format) || (csd->blks_to_upgrd != 0))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region));
		return TRUE;
	}
	if (csa->ti->free_blocks < (truncate_percent * csa->ti->total_blks / 100))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent);
		return TRUE;
	}
	/* already checked for parallel truncates on this region --- see mupip_reorg.c */
	gv_target = NULL;
	assert(csa->nl->trunc_pid == process_id);
	assert(dba_mm != csd->acc_meth);
	old_total = csa->ti->total_blks;
	old_free = csa->ti->free_blocks;
	sigkill = 0;
	found_busy_blk = 0;
	memset(&alt_hist, 0, SIZEOF(alt_hist)); /* null-initialize history */
	assert(csd->bplmap == BLKS_PER_LMAP);
	end_blocks = old_total % BLKS_PER_LMAP; /* blocks in the last lmap (first one we start scanning) */
	if (0 == end_blocks)
		end_blocks = BLKS_PER_LMAP;
	num_local_maps = DIVIDE_ROUND_UP(old_total, BLKS_PER_LMAP);
	/* ======================================== PHASE 1 ======================================== */
	for (lmap_num = num_local_maps - 1; (lmap_num > 0 && !found_busy_blk); lmap_num--)
	{
		if (mu_ctrly_occurred || mu_ctrlc_occurred)
			return TRUE;
		assert(csa->ti->total_blks >= old_total); /* otherwise, a concurrent truncate happened... */
		if (csa->ti->total_blks != old_total) /* Extend (likely called by mupip extend) -- don't truncate */
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region),
					truncate_percent);
			return TRUE;
		}
		lmap_blk_num = lmap_num * BLKS_PER_LMAP;
		if (csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num)
		{
			found_busy_blk = lmap_blk_num;
			break;
		}
		blks_in_lmap = (lmap_num == num_local_maps - 1) ? end_blocks : BLKS_PER_LMAP;
		/* Loop through non-bitmap blocks of this lmap, do recycled2free */
		DBGEHND((stdout, "DBG:: lmap_num = [%lu], lmap_blk_num = [%lu], blks_in_lmap = [%lu]\n",
			lmap_num, lmap_blk_num, blks_in_lmap));
		for (blk = 1; blk < blks_in_lmap && blk != -1 && !found_busy_blk;)
		{
			t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK);
			for (;;) /* retry loop for recycled to free transactions */
			{
				curr_tn = csd->trans_hist.curr_tn;
				/* Read the nth local bitmap into memory */
				bmphist.blk_num = lmap_blk_num;
				bmphist.buffaddr = t_qread(bmphist.blk_num, &bmphist.cycle, &bmphist.cr);
				lmap_blk_hdr = (blk_hdr_ptr_t)bmphist.buffaddr;
				if (!(bmphist.buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz))
				{ /* Could not read the block successfully. Retry. */
					t_retry((enum cdb_sc)rdfail_detail);
					continue;
				}
				lmap_addr = bmphist.buffaddr + SIZEOF(blk_hdr);
				/* starting from the hint (blk itself), find the first busy or recycled block */
				blk = bml_find_busy_recycled(blk, lmap_addr, blks_in_lmap, &bml_status);
				assert(blk < BLKS_PER_LMAP);
				if (blk == -1 || blk >= blks_in_lmap)
				{ /* done with this lmap, continue to next */
					t_abort(gv_cur_region, csa);
					break;
				}
				else if (BLK_BUSY == bml_status || csa->nl->highest_lbm_with_busy_blk >= lmap_blk_num)
				{ /* stop processing blocks... skip ahead to phase 2 */
					found_busy_blk = lmap_blk_num;
					t_abort(gv_cur_region, csa);
					break;
				}
				else if (BLK_RECYCLED == bml_status)
				{ /* Write PBLK records for recycled blocks only if before_image journaling is
				   * enabled. t_end() takes care of checking if journaling is enabled and
				   * writing PBLK record. We have to at least mark the recycled block as free.
				   */
					RESET_UPDATE_ARRAY;
					update_trans = UPDTRNS_DB_UPDATED_MASK;
					*((block_id *)update_array_ptr) = blk;
					update_array_ptr += SIZEOF(block_id);
					*(int *)update_array_ptr = 0;
					alt_hist.h[1].blk_num = 0;
					alt_hist.h[0].level = 0;
					alt_hist.h[0].cse = NULL;
					alt_hist.h[0].tn = curr_tn;
					alt_hist.h[0].blk_num = lmap_blk_num + blk;
					alt_hist.h[0].buffaddr = t_qread(alt_hist.h[0].blk_num,
							&alt_hist.h[0].cycle, &alt_hist.h[0].cr);
					if (!alt_hist.h[0].buffaddr)
					{
						t_retry((enum cdb_sc)rdfail_detail);
						continue;
					}
					if (!t_recycled2free(&alt_hist.h[0]))
					{
						t_retry(cdb_sc_lostbmlcr);
						continue;
					}
					t_write_map(&bmphist, (unsigned char *)update_array, curr_tn, 0);
					/* Set the opcode for INCTN record written by t_end() */
					inctn_opcode = inctn_blkmarkfree;
					if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
						continue;
					/* block processed, scan from the next one */
					blk++;
					break;
				} else
				{
					assert(t_tries < CDB_STAGNATE);
					t_retry(cdb_sc_badbitmap);
					continue;
				}
			} /* END recycled2free retry loop */
		} /* END scanning blocks of this particular lmap */
		/* Write PBLK for the bitmap block, in case it hasn't been written i.e. t_end() was never called above */
		/* Do a transaction that just increments the bitmap block's tn so that t_end() can do its thing */
		DBGEHND((stdout, "DBG:: bitmap block inctn -- lmap_blk_num = [%lu]\n", lmap_blk_num));
		t_begin(ERR_MUTRUNCFAIL, UPDTRNS_DB_UPDATED_MASK);
		for (;;)
		{
			RESET_UPDATE_ARRAY;
			BLK_ADDR(blkid_ptr, SIZEOF(block_id), block_id);
			*blkid_ptr = 0;
			update_trans = UPDTRNS_DB_UPDATED_MASK;
			inctn_opcode = inctn_mu_reorg; /* inctn_mu_truncate */
			curr_tn = csd->trans_hist.curr_tn;
			blkhist = &alt_hist.h[0];
			blkhist->blk_num = lmap_blk_num;
			blkhist->tn = curr_tn;
			blkhist->cse = NULL; /* start afresh (do not use value from previous retry) */
			/* Read the nth local bitmap into memory */
			blkhist->buffaddr = t_qread(lmap_blk_num, (sm_int_ptr_t)&blkhist->cycle, &blkhist->cr);
			lmap_blk_hdr = (blk_hdr_ptr_t)blkhist->buffaddr;
			if (!(blkhist->buffaddr) || (BM_SIZE(BLKS_PER_LMAP) != lmap_blk_hdr->bsiz))
			{ /* Could not read the block successfully. Retry. */
				t_retry((enum cdb_sc)rdfail_detail);
				continue;
			}
			t_write_map(blkhist, (unsigned char *)blkid_ptr, curr_tn, 0);
			blkhist->blk_num = 0; /* create empty history for bitmap block */
			if ((trans_num)0 == t_end(&alt_hist, NULL, TN_NOT_SPECIFIED))
				continue;
			break;
		}
	} /* END scanning lmaps */
	/* ======================================== PHASE 2 ======================================== */
	assert(!csa->now_crit);
	for (;;)
	{ /* wait for FREEZE, we don't want to truncate a frozen database */
		grab_crit(gv_cur_region);
		if (FROZEN_CHILLED(cs_data))
			DO_CHILLED_AUTORELEASE(csa, cs_data);
		if (!FROZEN(cs_data) && !IS_REPL_INST_FROZEN)
			break;
		rel_crit(gv_cur_region);
		while (FROZEN(cs_data) || IS_REPL_INST_FROZEN)
		{
			hiber_start(1000);
			if (FROZEN_CHILLED(cs_data) && CHILLED_AUTORELEASE(cs_data))
				break;
		}
	}
	assert(csa->nl->trunc_pid == process_id);
	/* Flush pending updates to disk. If this is not done, old updates can be flushed AFTER ftruncate, extending the file. */
	if (!wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH | WCSFLU_MSYNC_DB))
	{
		assert(FALSE);
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_BUFFLUFAILED, 4, LEN_AND_LIT("MUPIP REORG TRUNCATE"),
				DB_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return FALSE;
	}
	csa->nl->highest_lbm_with_busy_blk = MAX(found_busy_blk, csa->nl->highest_lbm_with_busy_blk);
	assert(IS_BITMAP_BLK(csa->nl->highest_lbm_with_busy_blk));
	new_total = MIN(old_total, csa->nl->highest_lbm_with_busy_blk + BLKS_PER_LMAP);
	if (mu_ctrly_occurred || mu_ctrlc_occurred)
	{
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (csa->ti->total_blks != old_total || new_total == old_total)
	{
		assert(csa->ti->total_blks >= old_total); /* Better have been an extend, not a truncate... */
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_MUTRUNCNOSPACE, 3, REG_LEN_STR(gv_cur_region), truncate_percent);
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (GDSVCURR != csd->desired_db_format || csd->blks_to_upgrd != 0 || !csd->fully_upgraded)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCNOV4, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (SNAPSHOTS_IN_PROG(csa->nl))
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCSSINPROG, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	} else if (BACKUP_NOT_IN_PROGRESS != cs_addrs->nl->nbb)
	{
		gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_MUTRUNCBACKINPROG, 2, REG_LEN_STR(gv_cur_region));
		rel_crit(gv_cur_region);
		return TRUE;
	}
	DEFER_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state);
	if (JNL_ENABLED(csa))
	{ /* Write JRT_TRUNC and INCTN records */
		if (!jgbl.dont_reset_gbl_jrec_time)
		SET_GBL_JREC_TIME;	/* needed before jnl_ensure_open as that can write jnl records */
		jpc = csa->jnl;
		jbp = jpc->jnl_buff;
		/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
		 * of jnl records. This needs to be done BEFORE the jnl_ensure_open as that could write
		 * journal records (if it decides to switch to a new journal file).
		 */
		ADJUST_GBL_JREC_TIME(jgbl, jbp);
		jnl_status = jnl_ensure_open(gv_cur_region, csa);
		if (SS_NORMAL != jnl_status)
			send_msg_csa(CSA_ARG(csa) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
		else
		{
			if (0 == jpc->pini_addr)
				jnl_put_jrt_pini(csa);
			jnl_write_trunc_rec(csa, old_total, csa->ti->free_blocks, new_total);
			inctn_opcode = inctn_mu_reorg;
			jnl_write_inctn_rec(csa);
			jnl_status = jnl_flush(gv_cur_region);
			if (SS_NORMAL != jnl_status)
			{
				send_msg_csa(CSA_ARG(csa) VARLSTCNT(9) ERR_JNLFLUSH, 2, JNL_LEN_STR(csd),
					ERR_TEXT, 2, RTS_ERROR_TEXT("Error with journal flush during mu_truncate"),
					jnl_status);
				assert(NOJNL == jpc->channel); /* jnl file lost has been triggered */
			}
		}
	}
	/* Good to go ahead and REALLY truncate (reduce total_blks, clear cache_array, FTRUNCATE) */
	curr_tn = csa->ti->curr_tn;
	CHECK_TN(csa, csd, curr_tn);
	udi = FILE_INFO(gv_cur_region);
	/* Information used by recover_truncate to check if the file size and csa->ti->total_blks are INCONSISTENT */
	trunc_file_size = BLK_ZERO_OFF(csd->start_vbn) + ((off_t)csd->blk_size * (new_total + 1));
	csd->after_trunc_total_blks = new_total;
	csd->before_trunc_free_blocks = csa->ti->free_blocks;
	csd->before_trunc_total_blks = old_total; /* Flags interrupted truncate for recover_truncate */
	/* file size and total blocks: INCONSISTENT */
	csa->ti->total_blks = new_total;
	/* past the point of no return -- shared memory intact */
	assert(csa->ti->free_blocks >= DELTA_FREE_BLOCKS(old_total, new_total));
	csa->ti->free_blocks -= DELTA_FREE_BLOCKS(old_total, new_total);
	new_free = csa->ti->free_blocks;
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_1); /* 55 : Issue a kill -9 before 1st fsync */
	fileheader_sync(gv_cur_region);
	DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno);
	CHECK_DBSYNC(gv_cur_region, save_errno);
	/* past the point of no return -- shared memory deleted */
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_2); /* 56 : Issue a kill -9 after 1st fsync */
	clear_cache_array(csa, csd, gv_cur_region, new_total, old_total);
	offset = (off_t)BLK_ZERO_OFF(csd->start_vbn) + (off_t)new_total * csd->blk_size;
	save_errno = db_write_eof_block(udi, udi->fd, csd->blk_size, offset, &(TREF(dio_buff)));
	if (0 != save_errno)
	{
		err_msg = (char *)STRERROR(errno);
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg));
		return FALSE;
	}
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_3); /* 57 : Issue a kill -9 after reducing csa->ti->total_blks, before FTRUNCATE */
	/* Execute an ftruncate() and truncate the DB file
	 * ftruncate() is a SYSTEM CALL on almost all platforms (except SunOS)
	 * It ignores kill -9 signal till its operation is completed.
	 * So we can safely assume that the result of ftruncate() will be complete.
	 */
	FTRUNCATE(FILE_INFO(gv_cur_region)->fd, trunc_file_size, ftrunc_status);
	if (0 != ftrunc_status)
	{
		err_msg = (char *)STRERROR(errno);
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(6) ERR_MUTRUNCERROR, 4, REG_LEN_STR(gv_cur_region), LEN_AND_STR(err_msg));
		/* should go through recover_truncate now, which will again try to FTRUNCATE */
		return FALSE;
	}
	/* file size and total blocks: CONSISTENT (shrunk) */
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_4); /* 58 : Issue a kill -9 after FTRUNCATE, before 2nd fsync */
	csa->nl->root_search_cycle++;	/* Force concurrent processes to restart in t_end/tp_tend to make sure no one
					 * tries to commit updates past the end of the file. Bitmap validations together
					 * with highest_lbm_with_busy_blk should actually be sufficient, so this is
					 * just to be safe.
					 */
	csd->before_trunc_total_blks = 0; /* indicate CONSISTENT */
	/* Increment TN */
	assert(csa->ti->early_tn == csa->ti->curr_tn);
	csd->trans_hist.early_tn = csd->trans_hist.curr_tn + 1;
	INCREMENT_CURR_TN(csd);
	fileheader_sync(gv_cur_region);
	DB_FSYNC(gv_cur_region, udi, csa, db_fsync_in_prog, save_errno);
	KILL_TRUNC_TEST(WBTEST_CRASH_TRUNCATE_5); /* 58 : Issue a kill -9 after after 2nd fsync */
	CHECK_DBSYNC(gv_cur_region, save_errno);
	ENABLE_INTERRUPTS(INTRPT_IN_TRUNC, prev_intrpt_state);
	curr_tn = csa->ti->curr_tn;
	rel_crit(gv_cur_region);
	send_msg_csa(CSA_ARG(csa) VARLSTCNT(7) ERR_MUTRUNCSUCCESS, 5, DB_LEN_STR(gv_cur_region), old_total, new_total, &curr_tn);
	util_out_print("Truncated region: !AD. Reduced total blocks from [!UL] to [!UL]. Reduced free blocks from [!UL] to [!UL].",
					FLUSH, REG_LEN_STR(gv_cur_region), old_total, new_total, old_free, new_free);
	return TRUE;
} /* END of mu_truncate() */
Ejemplo n.º 24
0
void dse_crit(void)
{
	int			util_len, dse_crit_count;
	char			util_buff[MAX_UTIL_LEN];
	boolean_t		crash = FALSE, cycle = FALSE, owner = FALSE;
	gd_region		*save_region, *r_local, *r_top;

	crash = ((cli_present("CRASH") == CLI_PRESENT) || (cli_present("RESET") == CLI_PRESENT));
	cycle = (CLI_PRESENT == cli_present("CYCLE"));
	if (cli_present("SEIZE") == CLI_PRESENT || cycle)
	{
		if (gv_cur_region->read_only && !cycle)
			rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
		if (cs_addrs->now_crit)
		{
			util_out_print("!/Write critical section already seized.!/", TRUE);
			return;
		}
		crash_count = cs_addrs->critical->crashcnt;
		grab_crit_encr_cycle_sync(gv_cur_region);
		cs_addrs->hold_onto_crit = TRUE;	/* need to do this AFTER grab_crit */
		cs_addrs->dse_crit_seize_done = TRUE;
		util_out_print("!/Seized write critical section.!/", TRUE);
		if (!cycle)
			return;
	}
	if (cli_present("RELEASE") == CLI_PRESENT || cycle)
	{
		if (gv_cur_region->read_only && !cycle)
			rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
		if (!cs_addrs->now_crit)
		{
			util_out_print("!/Critical section already released.!/", TRUE);
			return;
		}
		crash_count = cs_addrs->critical->crashcnt;
		if (cs_addrs->now_crit)
		{	/* user wants crit to be released unconditionally so "was_crit" not checked like everywhere else */
			assert(cs_addrs->hold_onto_crit && cs_addrs->dse_crit_seize_done);
			cs_addrs->dse_crit_seize_done = FALSE;
			cs_addrs->hold_onto_crit = FALSE;	/* need to do this before the rel_crit */
			rel_crit(gv_cur_region);
			util_out_print("!/Released write critical section.!/", TRUE);
		}
#		ifdef DEBUG
		else
			assert(!cs_addrs->hold_onto_crit && !cs_addrs->dse_crit_seize_done);
#		endif
		return;
	}
	if (cli_present("INIT") == CLI_PRESENT)
	{
		if (gv_cur_region->read_only)
			rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
		cs_addrs->hdr->image_count = 0;
		UNIX_ONLY(gtm_mutex_init(gv_cur_region, NUM_CRIT_ENTRY(cs_addrs->hdr), crash));
		VMS_ONLY(mutex_init(cs_addrs->critical, NUM_CRIT_ENTRY(cs_addrs->hdr), crash));
		cs_addrs->nl->in_crit = 0;
		cs_addrs->now_crit = FALSE;
		util_out_print("!/Reinitialized critical section.!/", TRUE);
		return;
	}
	if (cli_present("REMOVE") == CLI_PRESENT)
	{
		if (gv_cur_region->read_only)
			rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
		if (cs_addrs->nl->in_crit == 0)
		{
			util_out_print("!/The write critical section is unowned!/", TRUE);
			return;
		}
		UNIX_ONLY(assert(LOCK_AVAILABLE != cs_addrs->critical->semaphore.u.parts.latch_pid);)
		VMS_ONLY(assert(cs_addrs->critical->semaphore >= 0);)
Ejemplo n.º 25
0
void dse_chng_bhead(void)
{
	blk_hdr			new_hdr;
	blk_segment		*bs1, *bs_ptr;
	block_id		blk;
	boolean_t		chng_blk, ismap, was_hold_onto_crit;
	int4			blk_seg_cnt, blk_size;	/* needed for BLK_INIT,BLK_SEG and BLK_FINI macros */
	int4			x;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	srch_blk_status		blkhist;
	trans_num		tn;
	uint4			mapsize;

	csa = cs_addrs;
        if (gv_cur_region->read_only)
                rts_error_csa(CSA_ARG(csa) VARLSTCNT(4) ERR_DBRDONLY, 2, DB_LEN_STR(gv_cur_region));
	CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
	chng_blk = FALSE;
	if (BADDSEBLK == (blk = dse_getblk("BLOCK", DSEBMLOK, DSEBLKCUR)))		/* WARNING: assignment */
		return;
	csd = csa->hdr;
	assert(csd == cs_data);
	blk_size = csd->blk_size;
	ismap = IS_BITMAP_BLK(blk);
	mapsize = BM_SIZE(csd->bplmap);
	t_begin_crit(ERR_DSEFAIL);
	blkhist.blk_num = blk;
	if (!(blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		rts_error_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
	new_hdr = *(blk_hdr_ptr_t)blkhist.buffaddr;
	if (CLI_PRESENT == cli_present("LEVEL"))
	{
		if (!cli_get_hex("LEVEL", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && (unsigned char)x != LCL_MAP_LEVL)
		{
			util_out_print("Error: invalid level for a bit map block.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (!ismap && (x < 0 || x > MAX_BT_DEPTH + 1))
		{
			util_out_print("Error: invalid level.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		new_hdr.levl = (unsigned char)x;
		chng_blk = TRUE;
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
	}
	if (CLI_PRESENT == cli_present("BSIZ"))
	{
		if (!cli_get_hex("BSIZ", (uint4 *)&x))
		{
			t_abort(gv_cur_region, csa);
			return;
		}
		if (ismap && x != mapsize)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		} else if (x < SIZEOF(blk_hdr) || x > blk_size)
		{
			util_out_print("Error: invalid bsiz.", TRUE);
			t_abort(gv_cur_region, csa);
			return;
		}
		chng_blk = TRUE;
		new_hdr.bsiz = x;
	}
	if (!chng_blk)
		t_abort(gv_cur_region, csa);
	else
	{
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		if (!BLK_FINI(bs_ptr, bs1))
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(5) ERR_AIMGBLKFAIL, 3, blk, DB_LEN_STR(gv_cur_region));
			t_abort(gv_cur_region, csa);
			return;
		}
		t_write(&blkhist, (unsigned char *)bs1, 0, 0, new_hdr.levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		BUILD_AIMG_IF_JNL_ENABLED(csd, csa->ti->curr_tn);
		t_end(&dummy_hist, NULL, TN_NOT_SPECIFIED);
	}
	if (CLI_PRESENT == cli_present("TN"))
	{
		if (!cli_get_hex64("TN", &tn))
			return;
		t_begin_crit(ERR_DSEFAIL);
		CHECK_TN(csa, csd, csd->trans_hist.curr_tn);	/* can issue rts_error TNTOOLARGE */
		assert(csa->ti->early_tn == csa->ti->curr_tn);
		if (NULL == (blkhist.buffaddr = t_qread(blkhist.blk_num, &blkhist.cycle, &blkhist.cr)))
		{
			gtm_putmsg_csa(CSA_ARG(csa) VARLSTCNT(1) ERR_DSEBLKRDFAIL);
			t_abort(gv_cur_region, csa);
			return;
		}
		if (new_hdr.bsiz < SIZEOF(blk_hdr))
			new_hdr.bsiz = SIZEOF(blk_hdr);
		if (new_hdr.bsiz  > blk_size)
			new_hdr.bsiz = blk_size;
		BLK_INIT(bs_ptr, bs1);
		BLK_SEG(bs_ptr, blkhist.buffaddr + SIZEOF(new_hdr), new_hdr.bsiz - SIZEOF(new_hdr));
		BLK_FINI(bs_ptr, bs1);
		t_write(&blkhist, (unsigned char *)bs1, 0, 0,
			((blk_hdr_ptr_t)blkhist.buffaddr)->levl, TRUE, FALSE, GDS_WRITE_KILLTN);
		/* Pass the desired tn as argument to bg_update/mm_update below */
		BUILD_AIMG_IF_JNL_ENABLED_AND_T_END_WITH_EFFECTIVE_TN(csa, csd, tn, &dummy_hist);
	}
	return;
}
Ejemplo n.º 26
0
int jnl_file_extend(jnl_private_control *jpc, uint4 total_jnl_rec_size)
{
	file_control		*fc;
	boolean_t		need_extend;
	jnl_buffer_ptr_t     	jb;
	jnl_create_info 	jnl_info;
	jnl_file_header		header;
	uint4			new_alq;
	sgmnt_addrs		*csa;
	sgmnt_data_ptr_t	csd;
	char			prev_jnl_fn[JNL_NAME_SIZE];
	uint4			jnl_status = 0, status;
	int			new_blocks, result;
	GTM_BAVAIL_TYPE		avail_blocks;
	uint4			aligned_tot_jrec_size, count;

	switch(jpc->region->dyn.addr->acc_meth)
	{
	case dba_mm:
	case dba_bg:
		csa = &FILE_INFO(jpc->region)->s_addrs;
		break;
	default:
		GTMASSERT;
	}
	csd = csa->hdr;
	assert(csa == cs_addrs && csd == cs_data);
	assert(csa->now_crit || (csd->clustered && (CCST_CLOSED == csa->nl->ccp_state)));
	assert(jpc->region == gv_cur_region);
	assert(csa->jnl_state == csd->jnl_state);
	if (!JNL_ENABLED(csa) || (NOJNL == jpc->channel) || (JNL_FILE_SWITCHED(jpc)))
		GTMASSERT;	/* crit and messing with the journal file - how could it have vanished? */
	if (!csd->jnl_deq)
	{
		assert(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE) <= csd->jnl_alq);
		assert(csd->jnl_alq == csd->autoswitchlimit);
		new_blocks = csd->jnl_alq;
	} else
		/* May cause extension of  csd->jnl_deq * n blocks where n > 0 */
		new_blocks = ROUND_UP(DIVIDE_ROUND_UP(total_jnl_rec_size, DISK_BLOCK_SIZE), csd->jnl_deq);
	jpc->status = SS_NORMAL;
	jb = jpc->jnl_buff;
	assert(0 <= new_blocks);
	DEBUG_ONLY(count = 0);
	for (need_extend = (0 != new_blocks); need_extend; )
	{
		DEBUG_ONLY(count++);
		/* usually we will do the loop just once where we do the file extension.
		 * rarely we might need to do an autoswitch instead after which again rarely
		 * 	we might need to do an extension on the new journal to fit in the transaction's	journal requirements.
		 * therefore we should do this loop a maximum of twice. hence the assert below.
		 */
		assert(count <= 2);
		need_extend = FALSE;
		if (SS_NORMAL == (status = disk_block_available(jpc->channel, &avail_blocks, TRUE)))
		{
			if ((new_blocks * EXTEND_WARNING_FACTOR) > avail_blocks)
			{
				if (new_blocks > avail_blocks)
				{	/* if we cannot satisfy the request, it is an error */
					send_msg(VARLSTCNT(6) ERR_NOSPACEEXT, 4, JNL_LEN_STR(csd),
						new_blocks, avail_blocks);
					new_blocks = 0;
					jpc->status = SS_NORMAL;
					break;
				} else
					send_msg(VARLSTCNT(5) ERR_DSKSPACEFLOW, 3, JNL_LEN_STR(csd),
						(avail_blocks - new_blocks));
			}
		} else
			send_msg(VARLSTCNT(5) ERR_JNLFILEXTERR, 2, JNL_LEN_STR(csd), status);
		new_alq = jb->filesize + new_blocks;
		/* ensure current journal file size is well within autoswitchlimit --> design constraint */
		assert(csd->autoswitchlimit >= jb->filesize);
		if (csd->autoswitchlimit < (jb->filesize + (EXTEND_WARNING_FACTOR * new_blocks)))	/* close to max */
			send_msg(VARLSTCNT(5) ERR_JNLSPACELOW, 3, JNL_LEN_STR(csd), csd->autoswitchlimit - jb->filesize);
		if (csd->autoswitchlimit < new_alq)
		{	/* Reached max, need to autoswitch */
			/* Ensure new journal file can hold the entire current transaction's journal record requirements */
			assert(csd->autoswitchlimit >= MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size));
			memset(&jnl_info, 0, sizeof(jnl_info));
			jnl_info.prev_jnl = &prev_jnl_fn[0];
			set_jnl_info(gv_cur_region, &jnl_info);
			assert(JNL_ENABLED(csa) && (NOJNL != jpc->channel) && !(JNL_FILE_SWITCHED(jpc)));
			jnl_status = jnl_ensure_open();
			if (0 == jnl_status)
			{	/* flush the cache and jnl-buffer-contents to current journal file before
				 * switching to a new journal. */
				wcs_flu(WCSFLU_FLUSH_HDR | WCSFLU_WRITE_EPOCH);
				jnl_file_close(gv_cur_region, TRUE, TRUE);
			} else
				rts_error(VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region), jpc->status);
			assert(!jgbl.forw_phase_recovery || (NULL != jgbl.mur_pini_addr_reset_fnptr));
			if (jgbl.forw_phase_recovery && (NULL != jgbl.mur_pini_addr_reset_fnptr))
				(*jgbl.mur_pini_addr_reset_fnptr)();
			assert(!jnl_info.no_rename);
			assert(!jnl_info.no_prev_link);
			if (EXIT_NRM == cre_jnl_file(&jnl_info))
			{
				assert(0 == memcmp(csd->jnl_file_name, jnl_info.jnl, jnl_info.jnl_len));
				assert(csd->jnl_file_name[jnl_info.jnl_len] == '\0');
				assert(csd->jnl_file_len == jnl_info.jnl_len);
				assert(csd->jnl_buffer_size == jnl_info.buffer);
				assert(csd->jnl_alq == jnl_info.alloc);
				assert(csd->jnl_deq == jnl_info.extend);
				assert(csd->jnl_before_image == jnl_info.before_images);
				csd->trans_hist.header_open_tn = jnl_info.tn;	/* needed for successful jnl_file_open() */
				send_msg(VARLSTCNT(4) ERR_NEWJNLFILECREATE, 2, JNL_LEN_STR(csd));
				fc = gv_cur_region->dyn.addr->file_cntl;
				fc->op = FC_WRITE;
				fc->op_buff = (sm_uc_ptr_t)csd;
				status = dbfilop(fc);
				if (SS_NORMAL != status)
					send_msg(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(gv_cur_region), status);
				assert(JNL_ENABLED(csa));
				/* call jnl_ensure_open instead of jnl_file_open to make sure jpc->pini_addr is set to 0 */
				jnl_status = jnl_ensure_open();	/* sets jpc->status */
				if (0 != jnl_status)
					rts_error(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(csd), DB_LEN_STR(gv_cur_region));
				assert(jb->filesize == csd->jnl_alq);
				aligned_tot_jrec_size = ALIGNED_ROUND_UP(MAX_REQD_JNL_FILE_SIZE(total_jnl_rec_size),
										csd->jnl_alq, csd->jnl_deq);
				if (aligned_tot_jrec_size > csd->jnl_alq)
				{	/* need to extend more than initial allocation in the new journal file
					 * to accommodate the current transaction.
					 */
					new_blocks = aligned_tot_jrec_size - csd->jnl_alq;
					assert(new_blocks);
					assert(0 == new_blocks % csd->jnl_deq);
					need_extend = TRUE;
				}
			} else
			{
				send_msg(VARLSTCNT(4) ERR_JNLCREATERR, 2, JNL_LEN_STR(csd));
				jpc->status = ERR_JNLNOCREATE;
				new_blocks = -1;
			}
		} else
		{
			assert(!need_extend);	/* ensure we won't go through the for loop again */
			/* Virtually extend currently used journal file */
			jb->filesize = new_alq;	/* Actually this is virtual file size blocks */
			DO_FILE_READ(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2);
			if (SS_NORMAL != jpc->status)
				rts_error(VARLSTCNT(5) ERR_JNLRDERR, 2, JNL_LEN_STR(csd), jpc->status);
			assert((header.virtual_size + new_blocks) == new_alq);
			header.virtual_size = new_alq;
			DO_FILE_WRITE(jpc->channel, 0, &header, JNL_HDR_LEN, jpc->status, jpc->status2);
			if (SS_NORMAL != jpc->status)
				rts_error(VARLSTCNT(5) ERR_JNLWRERR, 2, JNL_LEN_STR(csd), jpc->status);
		}
		if (0 >= new_blocks)
			break;
	}
	if (0 >= new_blocks)
	{
		jpc->status = ERR_JNLREADEOF;
		jnl_file_lost(jpc, ERR_JNLEXTEND);
       		new_blocks = -1;
	}
	return new_blocks;
}
Ejemplo n.º 27
0
/* This function is called primarily to append a new histinfo record to the replication instance file by one of the following
 *	1) MUPIP REPLIC -SOURCE -START -ROOTPRIMARY command (after forking the child source server) if it created the journal pool.
 *	2) MUPIP REPLIC -SOURCE -ACTIVATE -ROOTPRIMARY command if this is a propagating primary to root primary transition.
 * In addition, this function also initializes the "lms_group_info" field in the instance file (from the "inst_info" field)
 *	if the current value is NULL.
 */
void	gtmsource_rootprimary_init(seq_num start_seqno)
{
	unix_db_info		*udi;
	repl_histinfo		histinfo;
	boolean_t		was_crit, switch_jnl;
	gd_region		*reg, *region_top;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	uint4			jnl_status;

	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	assert(NULL != jnlpool.repl_inst_filehdr);
	/* Update journal pool fields to reflect this is a root primary startup and updates are enabled */
	assert(!udi->s_addrs.hold_onto_crit || jgbl.onlnrlbk);
	was_crit = udi->s_addrs.now_crit;
	if (!was_crit)
		grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, ASSERT_NO_ONLINE_ROLLBACK);
	jnlpool.repl_inst_filehdr->root_primary_cycle++;
	/* If this instance is transitioning from a non-rootprimary to rootprimary, switch journal files.
	 * This helps with maintaining accurate value of csd->zqgblmod_tn when the former primary connects
	 * to the current primary through a fetchresync-rollback or receiver-server-autorollback..
	 */
	switch_jnl = (!jnlpool.repl_inst_filehdr->was_rootprimary && (0 < jnlpool.repl_inst_filehdr->num_histinfo));
	jnlpool.repl_inst_filehdr->was_rootprimary = TRUE;
	assert(start_seqno >= jnlpool.jnlpool_ctl->start_jnl_seqno);
	assert(start_seqno == jnlpool.jnlpool_ctl->jnl_seqno);
	jnlpool.repl_inst_filehdr->jnl_seqno = start_seqno;
	assert(jgbl.onlnrlbk || jnlpool.jnlpool_ctl->upd_disabled);
	if (!jgbl.onlnrlbk)
		jnlpool.jnlpool_ctl->upd_disabled = FALSE;
	if (IS_REPL_INST_UUID_NULL(jnlpool.repl_inst_filehdr->lms_group_info))
	{	/* This is the first time this instance is being brought up either as a root primary or as a propagating
		 * primary. Initialize the "lms_group_info" fields in the instance file header in journal pool shared memory.
		 * They will be flushed to the instance file as part of the "repl_inst_histinfo_add -> repl_inst_flush_filehdr"
		 * function invocation below.
		 */
		assert('\0' == jnlpool.repl_inst_filehdr->lms_group_info.created_nodename[0]);
		assert('\0' == jnlpool.repl_inst_filehdr->lms_group_info.this_instname[0]);
		assert(!jnlpool.repl_inst_filehdr->lms_group_info.creator_pid);
		jnlpool.repl_inst_filehdr->lms_group_info = jnlpool.repl_inst_filehdr->inst_info;
		assert('\0' != jnlpool.repl_inst_filehdr->lms_group_info.created_nodename[0]);
		DBG_CHECK_CREATED_NODENAME(jnlpool.repl_inst_filehdr->lms_group_info.created_nodename);
		assert('\0' != jnlpool.repl_inst_filehdr->lms_group_info.this_instname[0]);
		assert(jnlpool.repl_inst_filehdr->lms_group_info.created_time);
		assert(jnlpool.repl_inst_filehdr->lms_group_info.creator_pid);
	}
	/* Initialize histinfo fields */
	memcpy(histinfo.root_primary_instname, jnlpool.repl_inst_filehdr->inst_info.this_instname, MAX_INSTNAME_LEN - 1);
	histinfo.root_primary_instname[MAX_INSTNAME_LEN - 1] = '\0';
	assert('\0' != histinfo.root_primary_instname[0]);
	histinfo.start_seqno = start_seqno;
	assert(jnlpool.jnlpool_ctl->strm_seqno[0] == jnlpool.repl_inst_filehdr->strm_seqno[0]);
	assert(jnlpool.repl_inst_filehdr->is_supplementary || (0 == jnlpool.jnlpool_ctl->strm_seqno[0]));
	histinfo.strm_seqno = (!jnlpool.repl_inst_filehdr->is_supplementary) ? 0 : jnlpool.jnlpool_ctl->strm_seqno[0];
	histinfo.root_primary_cycle = jnlpool.repl_inst_filehdr->root_primary_cycle;
	assert(process_id == getpid());
	histinfo.creator_pid = process_id;
	JNL_SHORT_TIME(histinfo.created_time);
	histinfo.strm_index = 0;
	histinfo.history_type = HISTINFO_TYPE_NORMAL;
	NULL_INITIALIZE_REPL_INST_UUID(histinfo.lms_group);
	/* The following fields will be initialized in the "repl_inst_histinfo_add" function call below.
	 *	histinfo.histinfo_num
	 *	histinfo.prev_histinfo_num
	 *	histinfo.last_histinfo_num[]
	 */
	/* Add the histinfo record to the instance file and flush the changes in the journal pool to the file header */
	repl_inst_histinfo_add(&histinfo);
	if (!was_crit)
		rel_lock(jnlpool.jnlpool_dummy_reg);
	if (switch_jnl)
	{
		SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_file_extend and its callees assume jgbl.gbl_jrec_time is set */
		for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++)
		{
			gv_cur_region = reg;
			change_reg();		/* sets cs_addrs/cs_data (needed by jnl_ensure_open) */
			if (!JNL_ENABLED(cs_addrs))
				continue;
			grab_crit(gv_cur_region);
			jpc = cs_addrs->jnl;
			/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order of jnl
			 * records. This needs to be done BEFORE the jnl_ensure_open as that could write journal records
			 * (if it decides to switch to a new journal file)
			 */
			jbp = jpc->jnl_buff;
			ADJUST_GBL_JREC_TIME(jgbl, jbp);
			jnl_status = jnl_ensure_open();
			if (0 == jnl_status)
			{
				if (EXIT_ERR == SWITCH_JNL_FILE(jpc))
					rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(4) ERR_JNLEXTEND, 2, JNL_LEN_STR(cs_data));
			} else
			{
				if (SS_NORMAL != jpc->status)
					rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(7) jnl_status, 4, JNL_LEN_STR(cs_data),
							DB_LEN_STR(gv_cur_region), jpc->status);
				else
					rts_error_csa(CSA_ARG(cs_addrs) VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data),
							DB_LEN_STR(gv_cur_region));
			}
			rel_crit(gv_cur_region);
		}
	}
}
Ejemplo n.º 28
0
int	mur_forward_multi_proc(reg_ctl_list *rctl)
{
	boolean_t		multi_proc, this_reg_stuck, release_latch, ok_to_play;
	boolean_t		cancelled_dbsync_timer, cancelled_timer;
	reg_ctl_list		*rctl_top, *prev_rctl;
	jnl_ctl_list		*jctl;
	gd_region		*reg;
	sgmnt_addrs		*csa;
	seq_num 		rec_token_seq;
	jnl_tm_t		rec_time;
	enum broken_type	recstat;
	jnl_record		*rec;
	enum jnl_record_type	rectype;
	char			errstr[256];
	int			i, rctl_index, save_errno, num_procs_stuck, num_reg_stuck;
	uint4			status, regcnt_stuck, num_partners, start_hrtbt_cntr;
	forw_multi_struct	*forw_multi;
	shm_forw_multi_t	*sfm;
	multi_struct 		*multi;
	jnl_tm_t		adjusted_resolve_time;
	shm_reg_ctl_t		*shm_rctl_start, *shm_rctl, *first_shm_rctl;
	size_t			shm_size, reccnt, copy_size;
	int4			*size_ptr;
	char			*shmPtr; /* not using "shm_ptr" since it is already used in an AIX include file */
	int			shmid;
	multi_proc_shm_hdr_t	*mp_hdr;	/* Pointer to "multi_proc_shm_hdr_t" structure in shared memory */

	status = 0;
	/* Although we made sure the # of tasks is the same as the # of processes forked off (in the "gtm_multi_proc"
	 * invocation in "mur_forward"), it is possible one of the forked process finishes one invocation of
	 * "mur_forward_multi_proc" before even another forked process gets assigned one task in "gtm_multi_proc_helper".
	 * In this case, we would be invoked more than once. But the first invocation would have done all the needed stuff
	 * so return for later invocations.
	 */
	if (mur_forward_multi_proc_done)
		return 0;
	mur_forward_multi_proc_done = TRUE;
	/* Note: "rctl" is unused. But cannot avoid passing it since "gtm_multi_proc" expects something */
	prev_rctl = NULL;
	rctl_start = NULL;
	adjusted_resolve_time = murgbl.adjusted_resolve_time;
	assert(0 == murgbl.regcnt_remaining);
	multi_proc = multi_proc_in_use;	/* cache value in "local" to speed up access inside loops below */
	if (multi_proc)
	{
		mp_hdr = multi_proc_shm_hdr;
		shm_rctl_start = mur_shm_hdr->shm_rctl_start;
		if (jgbl.onlnrlbk)
		{
			for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
			{
				assert(rctl->csa->hold_onto_crit);	/* would have been set in parent process */
				rctl->csa->hold_onto_crit = FALSE;	/* reset since we dont own this region */
				assert(rctl->csa->now_crit);		/* would have been set in parent process */
				rctl->csa->now_crit = FALSE;		/* reset since we dont own this region */
			}
		}
		START_HEARTBEAT_IF_NEEDED; /* heartbeat timer needed later (in case not already started by "gtm_multi_proc") */
	}
	first_shm_rctl = NULL;
	/* Phase1 of forward recovery starts */
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		/* Check if "rctl" is available for us or if some other concurrent process has taken it */
		if (multi_proc)
		{
			rctl_index = rctl - &mur_ctl[0];
			shm_rctl = &shm_rctl_start[rctl_index];
			if (shm_rctl->owning_pid)
			{
				assert(process_id != shm_rctl->owning_pid);
				continue;
			}
			GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
			assert(release_latch);
			for ( ; rctl < rctl_top; rctl++, shm_rctl++)
			{
				if (shm_rctl->owning_pid)
				{
					assert(process_id != shm_rctl->owning_pid);
					continue;
				}
				shm_rctl->owning_pid = process_id;	/* Declare ownership */
				rctl->this_pid_is_owner = TRUE;
				if (jgbl.onlnrlbk)
				{	/* This is an online rollback and crit was grabbed on all regions by the parent rollback
					 * process. But this child process now owns this region and does the actual rollback on
					 * this region so borrow crit for the duration of this child process.
					 */
					csa = rctl->csa;
					csa->hold_onto_crit = TRUE;
					csa->now_crit = TRUE;
					assert(csa->nl->in_crit == mp_hdr->parent_pid);
					csa->nl->in_crit = process_id;
					assert(csa->nl->onln_rlbk_pid == mp_hdr->parent_pid);
					csa->nl->onln_rlbk_pid = process_id;
				}
				if (NULL == first_shm_rctl)
					first_shm_rctl = shm_rctl;
				break;
			}
			REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
			if (rctl >= rctl_top)
			{
				assert(rctl == rctl_top);
				break;
			}
			/* Set key to print this rctl'ss region-name as prefix in case this forked off process prints any output */
			MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);
#			ifdef MUR_DEBUG
			fprintf(stderr, "pid = %d : Owns region %s\n", process_id, multi_proc_key);
#			endif
		} else
			rctl->this_pid_is_owner = TRUE;
		if (mur_options.forward)
		{
			assert(NULL == rctl->jctl_turn_around);
			jctl = rctl->jctl = rctl->jctl_head;
			assert(jctl->reg_ctl == rctl);
			jctl->rec_offset = JNL_HDR_LEN;
			jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; /* initialized to reflect journaling is not enabled */
			if (mur_options.rollback)
				jgbl.mur_jrec_seqno = jctl->jfh->start_seqno;
		} else
		{
			jctl = rctl->jctl = (NULL == rctl->jctl_turn_around) ? rctl->jctl_head : rctl->jctl_turn_around;
			assert(jctl->reg_ctl == rctl);
			jctl->rec_offset = jctl->turn_around_offset;
			jgbl.mur_jrec_seqno = jctl->turn_around_seqno;
			assert((NULL != rctl->jctl_turn_around) || (0 == jctl->rec_offset));
		}
		if (mur_options.rollback)
		{
			if (murgbl.consist_jnl_seqno < jgbl.mur_jrec_seqno)
			{
				/* Assert that murgbl.losttn_seqno is never lesser than jgbl.mur_jrec_seqno (the turnaround
				 * point seqno) as this is what murgbl.consist_jnl_seqno is going to be set to and will
				 * eventually be the post-rollback seqno. If this condition is violated, the result of the
				 * recovery is a compromised database (the file header will indicate a Region Seqno which
				 * is not necessarily correct since seqnos prior to it might be absent in the database).
				 * Therefore, this is an out-of-design situation with respect to rollback and so stop it.
				 */
				assert(murgbl.losttn_seqno >= jgbl.mur_jrec_seqno);
				murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno;
			}
			assert(murgbl.consist_jnl_seqno <= murgbl.losttn_seqno);
		}
		if (mur_options.update || mur_options.extr[GOOD_TN])
		{
			reg = rctl->gd;
			gv_cur_region = reg;
			tp_change_reg();	/* note : sets cs_addrs to non-NULL value even if gv_cur_region->open is FALSE
						 * (cs_data could still be NULL). */
			rctl->csa = cs_addrs;
			cs_addrs->miscptr = (void *)rctl;
			rctl->csd = cs_data;
			rctl->sgm_info_ptr = cs_addrs->sgm_info_ptr;
			assert(!reg->open || (NULL != cs_addrs->dir_tree));
			gv_target = cs_addrs->dir_tree;
		}
		jctl->after_end_of_data = FALSE;
		status = mur_next(jctl, jctl->rec_offset);
		assert(ERR_JNLREADEOF != status);	/* cannot get EOF at start of forward processing */
		if (SS_NORMAL != status)
			goto finish;
		PRINT_VERBOSE_STAT(jctl, "mur_forward:at the start");
		rctl->process_losttn = FALSE;
		/* Any multi-region TP transaction will be processed as multiple single-region TP transactions up
		 * until the tp-resolve-time is reached. From then on, they will be treated as one multi-region TP
		 * transaction. This is needed for proper lost-tn determination (any multi-region transaction that
		 * gets played in a region AFTER it has already encountered a broken tn should treat this as a lost tn).
		 */
		do
		{
			if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
			{	/* We are at a logical point. So exit if signaled by parent */
				status = ERR_FORCEDHALT;
				goto finish;
			}
			assert(jctl == rctl->jctl);
			rec = rctl->mur_desc->jnlrec;
			rec_time = rec->prefix.time;
			if (rec_time > mur_options.before_time)
				break;	/* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */
			if (rec_time < mur_options.after_time)
			{
				status = mur_next_rec(&jctl);
				continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */
			}
			if (rec_time >= adjusted_resolve_time)
				break;	/* Records after this adjusted resolve_time will be processed below in phase2 */
			/* Note: Since we do hashtable token processing only for records from tp_resolve_time onwards,
			 * it is possible that if we encounter any broken transactions here we wont know they are broken
			 * but will play them as is. That is unavoidable. Specify -SINCE_TIME (for -BACKWARD rollback/recover)
			 * and -VERIFY (for -FORWARD rollback/recover) to control tp_resolve_time (and in turn more
			 * effective broken tn determination).
			 */
			status = mur_forward_play_cur_jrec(rctl);
			if (SS_NORMAL != status)
				break;
			status = mur_next_rec(&jctl);
		} while (SS_NORMAL == status);
		CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */
		if (SS_NORMAL != status)
		{	/* ERR_FILENOTCREATE is possible from "mur_cre_file_extfmt" OR	ERR_FORCEDHALT is possible
			 * from "mur_forward_play_cur_jrec". No other errors are known to occur here. Assert accordingly.
			 */
			assert((ERR_FILENOTCREATE == status) || (ERR_FORCEDHALT == status));
			goto finish;
		}
		if (rctl->forw_eof_seen)
		{
			PRINT_VERBOSE_STAT(jctl, "mur_forward:Reached EOF before tp_resolve_time");
			continue;	/* Reached EOF before even getting to tp_resolve_time.
					 * Do not even consider region for next processing loop */
		}
		rctl->last_tn = 0;
		murgbl.regcnt_remaining++;	/* # of regions participating in recovery at this point */
		if (NULL == rctl_start)
			rctl_start = rctl;
		if (NULL != prev_rctl)
		{
			prev_rctl->next_rctl = rctl;
			rctl->prev_rctl = prev_rctl;
		}
		prev_rctl = rctl;
		assert(murgbl.ok_to_update_db || !rctl->db_updated);
		PRINT_VERBOSE_STAT(jctl, "mur_forward:at tp_resolve_time");
	}
	if (multi_proc)
		multi_proc_key = NULL;	/* reset key until it can be set to rctl's region-name again */
	/* Note that it is possible for rctl_start to be NULL at this point. That is there is no journal record in any region
	 * AFTER the calculated tp-resolve-time. This is possible if for example -AFTER_TIME was used and has a time later
	 * than any journal record in all journal files. If rctl_start is NULL, prev_rctl should also be NULL and vice versa.
	 */
	if (NULL != rctl_start)
	{
		assert(NULL != prev_rctl);
		prev_rctl->next_rctl = rctl_start;
		rctl_start->prev_rctl = prev_rctl;
	}
	rctl = rctl_start;
	regcnt_stuck = 0; /* # of regions we are stuck in waiting for other regions to resolve a multi-region TP transaction */
	assert((NULL == rctl) || (NULL == rctl->forw_multi));
	gv_cur_region = NULL;	/* clear out any previous value to ensure gv_cur_region/cs_addrs/cs_data
				 * all get set in sync by the MUR_CHANGE_REG macro below.
				 */
	/* Phase2 of forward recovery starts */
	while (NULL != rctl)
	{	/* while there is at least one region remaining with unprocessed journal records */
		assert(NULL != rctl_start);
		assert(0 < murgbl.regcnt_remaining);
		if (NULL != rctl->forw_multi)
		{	/* This region's current journal record is part of a TP transaction waiting for other regions */
			regcnt_stuck++;
			assert(regcnt_stuck <= murgbl.regcnt_remaining);
			if (regcnt_stuck == murgbl.regcnt_remaining)
			{
				assertpro(multi_proc_in_use); /* Else : Out-of-design situation. Stuck in ALL regions. */
				/* Check one last time if all regions are stuck waiting for another process to resolve the
				 * multi-region TP transaction. If so, wait in a sleep loop. If not, we can proceed.
				 */
				rctl = rctl_start;
				start_hrtbt_cntr = heartbeat_counter;
				do
				{
					if (IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
					{	/* We are at a logical point. So exit if signaled by parent */
						status = ERR_FORCEDHALT;
						goto finish;
					}
					forw_multi = rctl->forw_multi;
					assert(NULL != forw_multi);
					sfm = forw_multi->shm_forw_multi;
					assert(NULL != sfm);
					assert(sfm->num_reg_seen_forward <= sfm->num_reg_seen_backward);
#					ifdef MUR_DEBUG
					fprintf(stderr, "Pid = %d : Line %d : token = %llu : forward = %d : backward = %d\n",
						process_id, __LINE__, (long long int)sfm->token,
						sfm->num_reg_seen_forward, sfm->num_reg_seen_backward);
#					endif
					if (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward)
					{	/* We are no longer stuck in this region */
						assert(!forw_multi->no_longer_stuck);
						forw_multi->no_longer_stuck = TRUE;
						break;
					}
					rctl = rctl->next_rctl;	/* Move on to the next available region */
					assert(NULL != rctl);
					if (rctl == rctl_start)
					{	/* We went through all regions once and are still stuck.
						 * Sleep until at leat TWO heartbeats have elapsed after which check for deadlock.
						 * Do this only in the child process that owns the FIRST region in the region list.
						 * This way we dont have contention for the GRAB_MULTI_PROC_LATCH from
						 * all children at more or less the same time.
						 */
						if ((rctl == mur_ctl) && (heartbeat_counter > (start_hrtbt_cntr + 2)))
						{	/* Check if all processes are stuck for a while. If so assertpro */
							GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
							assert(release_latch);
							shm_rctl_start = mur_shm_hdr->shm_rctl_start;
							num_reg_stuck = 0;
							for (i = 0; i < murgbl.reg_total; i++)
							{
								shm_rctl = &shm_rctl_start[i];
								sfm = shm_rctl->shm_forw_multi;
								if (NULL != sfm)
								{
									if (sfm->num_reg_seen_forward != sfm->num_reg_seen_backward)
										num_reg_stuck++;
								}
							}
							REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
							/* If everyone is stuck at this point, it is an out-of-design situation */
							assertpro(num_reg_stuck < murgbl.reg_total);
							start_hrtbt_cntr = heartbeat_counter;
						} else
						{	/* Sleep and recheck if any region we are stuck in got resolved.
							 * To minimize time spent sleeping, we just yield our timeslice.
							 */
							rel_quant();
							continue;
						}
					}
				} while (TRUE);
			} else
			{
				rctl = rctl->next_rctl;	/* Move on to the next available region */
				assert(NULL != rctl);
				continue;
			}
		}
		regcnt_stuck = 0;	/* restart the counter now that we found at least one non-stuck region */
		MUR_CHANGE_REG(rctl);
		jctl = rctl->jctl;
		this_reg_stuck = FALSE;
		for ( status = SS_NORMAL; SS_NORMAL == status; )
		{
			if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
			{	/* We are at a logical point. So exit if signaled by parent */
				status = ERR_FORCEDHALT;
				goto finish;
			}
			assert(jctl == rctl->jctl);
			rec = rctl->mur_desc->jnlrec;
			rec_time = rec->prefix.time;
			if (rec_time > mur_options.before_time)
				break;	/* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */
			assert((rec_time >= adjusted_resolve_time) || (mur_options.notncheck && !mur_options.verify));
			assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated));
			if (rec_time < mur_options.after_time)
			{
				status = mur_next_rec(&jctl);
				continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */
			}
			/* Check if current journal record can be played right away or need to wait for corresponding journal
			 * records from other participating TP regions to be reached. A non-TP or ZTP transaction can be played
			 * without issues (i.e. has no dependencies with any other regions). A single-region TP transaction too
			 * falls in the same category. A multi-region TP transaction needs to wait until all participating regions
			 * have played all journal records BEFORE this TP in order to ensure recover plays records in the exact
			 * same order that GT.M performed them in.
			 */
			/* If FENCE_NONE is specified, we would not have maintained any multi hashtable in mur_back_process for
			 * broken transaction processing. So we process multi-region TP transactions as multiple single-region
			 * TP transactions in forward phase.
			 */
			if (FENCE_NONE != mur_options.fences)
			{
				rectype = (enum jnl_record_type)rec->prefix.jrec_type;
				if (IS_TP(rectype) && IS_TUPD(rectype))
				{
					assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype));
					assert(&rec->jrec_set_kill.num_participants == &rec->jrec_ztworm.num_participants);
					assert(&rec->jrec_set_kill.num_participants == &rec->jrec_lgtrig.num_participants);
					num_partners = rec->jrec_set_kill.num_participants;
					assert(0 < num_partners);
					if (1 < num_partners)
					{
						this_reg_stuck = TRUE;
						assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num);
						assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num);
					}
				}
			}
			if (this_reg_stuck)
			{
				rec_token_seq = GET_JNL_SEQNO(rec);
				MUR_FORW_TOKEN_LOOKUP(forw_multi, rec_token_seq, rec_time);
				if (NULL != forw_multi)
				{	/* This token has already been seen in another region in forward processing.
					 * Add current region as well. If all regions have been resolved, then play
					 * the entire transaction maintaining the exact same order of updates within.
					 */
					if (!forw_multi->no_longer_stuck)
						MUR_FORW_TOKEN_ONE_MORE_REG(forw_multi, rctl);
				} else
				{	/* First time we are seeing this token in forward processing. Check if this
					 * has already been determined to be a broken transaction.
					 */
					recstat = GOOD_TN;
					multi = NULL;
					if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq))
					{
						multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, TPFENCE);
						if ((NULL != multi) && (0 < multi->partner))
							recstat = BROKEN_TN;
					}
					MUR_FORW_TOKEN_ADD(forw_multi, rec_token_seq, rec_time, rctl, num_partners,
								recstat, multi);
				}
				/* Check that "tabent" field has been initialized above (by either the MUR_FORW_TOKEN_LOOKUP
				 * or MUR_FORW_TOKEN_ADD macros). This is relied upon by "mur_forward_play_multireg_tp" below.
				 */
				assert(NULL != forw_multi->u.tabent);
				assert(forw_multi->num_reg_seen_forward <= forw_multi->num_reg_seen_backward);
				if (multi_proc)
				{
					sfm = forw_multi->shm_forw_multi;
					ok_to_play = (NULL == sfm) || (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward);
				} else
					ok_to_play = (forw_multi->num_reg_seen_forward == forw_multi->num_reg_seen_backward);
				assert(ok_to_play || !forw_multi->no_longer_stuck);
				if (ok_to_play )
				{	/* We have enough information to proceed with playing this multi-region TP in
					 * forward processing (even if we might not have seen all needed regions). Now play it.
					 * Note that the TP could be BROKEN_TN or GOOD_TN. The callee handles it.
					 */
					assert(forw_multi == rctl->forw_multi);
					status = mur_forward_play_multireg_tp(forw_multi, rctl);
					this_reg_stuck = FALSE;
					/* Note that as part of playing the TP transaction, we could have reached
					 * the EOF of rctl. In this case, we need to break out of the loop.
					 */
					if ((SS_NORMAL != status) || rctl->forw_eof_seen)
						break;
					assert(NULL == rctl->forw_multi);
					assert(!dollar_tlevel);
					jctl = rctl->jctl;	/* In case the first record after the most recently processed
								 * TP transaction is in the next generation journal file */
					continue;
				}
				break;
			} else
			{
				status = mur_forward_play_cur_jrec(rctl);
				if (SS_NORMAL != status)
					break;
			}
			assert(!this_reg_stuck);
			status = mur_next_rec(&jctl);
		}
		assert((NULL == rctl->forw_multi) || this_reg_stuck);
		assert((NULL != rctl->forw_multi) || !this_reg_stuck);
		if (!this_reg_stuck)
		{	/* We are not stuck in this region (to resolve a multi-region TP).
			 * This means we are done processing all the records of this region.
			 */
			assert(NULL == rctl->forw_multi);
			if (!rctl->forw_eof_seen)
			{
				CHECK_IF_EOF_REACHED(rctl, status);
					/* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */
				if (SS_NORMAL != status)
				{
					assert(ERR_FILENOTCREATE == status);
					goto finish;
				}
				assert(!dollar_tlevel);
				DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl); /* since all of its records should have been processed */
			} else
			{	/* EOF was seen in rctl inside "mur_forward_play_multireg_tp" and it was removed
				 * from the unprocessed list of rctls. At the time rctl was removed, its "next_rctl"
				 * field could have been pointing to another <rctl> that has since then also been
				 * removed inside the same function. Therefore the "next_rctl" field is not reliable
				 * in this case but instead we should rely on the global variable "rctl_start" which
				 * points to the list of unprocessed rctls. Set "next_rctl" accordingly.
				 */
				rctl->next_rctl = rctl_start;
				if (ERR_JNLREADEOF == status)
					status = SS_NORMAL;
			}
			assert(rctl->deleted_from_unprocessed_list);
		}
		assert(SS_NORMAL == status);
		assert(!this_reg_stuck || !rctl->forw_eof_seen);
		assert((NULL == rctl->next_rctl) || (NULL != rctl_start));
		assert((NULL == rctl->next_rctl) || (0 < murgbl.regcnt_remaining));
		rctl = rctl->next_rctl;	/* Note : even though "rctl" could have been deleted from the doubly linked list above,
					 * rctl->next_rctl is not touched so we can still use it to get to the next element. */
	}
	assert(0 == murgbl.regcnt_remaining);
	jgbl.mur_pini_addr_reset_fnptr = NULL;	/* No more simulation of GT.M activity for any region */
	prc_vec = murgbl.prc_vec;	/* Use process-vector of MUPIP RECOVER (not any simulating GT.M process) now onwards */
	assert(0 == dollar_tlevel);
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		if (!rctl->this_pid_is_owner)
		{
			assert(multi_proc_in_use);
			continue;	/* in a parallel processing environment, process only regions we own */
		}
		if (multi_proc)
		{	/* Set key to print this rctl's region-name as prefix in case this forked off process prints any output */
			MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);
		}
		PRINT_VERBOSE_STAT(rctl->jctl, "mur_forward:at the end");
		assert(!mur_options.rollback || (0 != murgbl.consist_jnl_seqno));
		assert(mur_options.rollback || (0 == murgbl.consist_jnl_seqno));
		assert(!dollar_tlevel);	/* In case it applied a broken TUPD */
		assert(murgbl.ok_to_update_db || !rctl->db_updated);
		rctl->mur_plst = NULL;	/* reset now that simulation of GT.M updates is done */
		/* Ensure mur_block_count_correct is called if updates allowed */
		if (murgbl.ok_to_update_db && (SS_NORMAL != mur_block_count_correct(rctl)))
		{
			gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_BLKCNTEDITFAIL, 2, DB_LEN_STR(rctl->gd));
			murgbl.wrn_count++;
		}
	}
finish:
	if (multi_proc)
		multi_proc_key = NULL;	/* reset key until it can be set to rctl's region-name again */
	if ((SS_NORMAL == status) && mur_options.show)
		mur_output_show();
	if (NULL != first_shm_rctl)
	{	/* Transfer needed process-private information to shared memory so parent process can later inherit this. */
		first_shm_rctl->err_cnt = murgbl.err_cnt;
		first_shm_rctl->wrn_count = murgbl.wrn_count;
		first_shm_rctl->consist_jnl_seqno = murgbl.consist_jnl_seqno;
		/* If extract files were created by this process for one or more regions, then copy that information to
		 * shared memory so parent process can use this information to do a merge sort.
		 */
		shm_rctl = mur_shm_hdr->shm_rctl_start;
		for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++)
		{
			assert(multi_proc_in_use);
			if (!rctl->this_pid_is_owner)
				continue;	/* in a parallel processing environment, process only regions we own */
			/* Cancel any flush/dbsync timers by this child process for this region. This is because the
			 * child is not going to go through exit handling code (no gds_rundown etc.). And we need to
			 * clear up csa->nl->wcs_timers. (normally done by gds_rundown).
			 */
			if (NULL != rctl->csa)	/* rctl->csa can be NULL in case of "mupip journal -extract" etc. */
				CANCEL_DB_TIMERS(rctl->gd, rctl->csa, cancelled_timer, cancelled_dbsync_timer);
			reccnt = 0;
			for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0;
								recstat < TOT_EXTR_TYPES;
									recstat++, size_ptr++)
			{	/* Assert "extr_file_created" information is in sync between rctl and shm_rctl.
				 * This was done at the end of "mur_cre_file_extfmt".
				 */
				assert(shm_rctl->extr_file_created[recstat] == rctl->extr_file_created[recstat]);
				/* Assert that if *size_ptr is non-zero, then we better have created an extract file.
				 * Note that the converse is not true. It is possible we created a file for example to
				 * write an INCTN record but decided to not write anything because it was not a -detail
				 * type of extract. So *sizeptr could be 0 even though we created the extract file.
				 */
				assert(!*size_ptr || rctl->extr_file_created[recstat]);
				shm_rctl->jnlext_list_size[recstat] = *size_ptr;
				reccnt += *size_ptr;
			}
			assert(INVALID_SHMID == shm_rctl->jnlext_shmid);
			shm_size = reccnt * SIZEOF(jnlext_multi_t);
			/* If we are quitting because of an abnormal status OR a forced signal to terminate
			 * OR if the parent is dead (kill -9) dont bother creating shmid to communicate back with parent.
			 */
			if (mp_hdr->parent_pid != getppid())
			{
				SET_FORCED_MULTI_PROC_EXIT;	/* Also signal sibling children to stop processing */
				if (SS_NORMAL != status)
					status = ERR_FORCEDHALT;
			}
			if ((SS_NORMAL == status) && shm_size)
			{
				shmid = shmget(IPC_PRIVATE, shm_size, 0600 | IPC_CREAT);
				if (-1 == shmid)
				{
					save_errno = errno;
					SNPRINTF(errstr, SIZEOF(errstr),
						"shmget() : shmsize=0x%llx", shm_size);
					MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);	/* to print region name prefix */
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8)
								ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno);
				}
				shmPtr = (char *)do_shmat(shmid, 0, 0);
				if (-1 == (sm_long_t)shmPtr)
				{
					save_errno = errno;
					SNPRINTF(errstr, SIZEOF(errstr),
						"shmat() : shmid=%d shmsize=0x%llx", shmid, shm_size);
					MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);	/* to print region name prefix */
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8)
								ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno);
				}
				shm_rctl->jnlext_shmid = shmid;
				shm_rctl->jnlext_shm_size = shm_size;
				for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0;
									recstat < TOT_EXTR_TYPES;
										recstat++, size_ptr++)
				{
					shm_size = *size_ptr;
					if (shm_size)
					{
						copy_size = copy_list_to_buf(rctl->jnlext_multi_list[recstat],
												(int4)shm_size, shmPtr);
						assert(copy_size == (shm_size * SIZEOF(jnlext_multi_t)));
						shmPtr += copy_size;
					}
				}
			}
		}
	}
	mur_close_file_extfmt(IN_MUR_CLOSE_FILES_FALSE);	/* Need to flush buffered extract/losttrans/brokentrans files */
	return (int)status;
}
Ejemplo n.º 29
0
trans_num gvcst_bmp_mark_free(kill_set *ks)
{
	block_id		bit_map, next_bm, *updptr;
	blk_ident		*blk, *blk_top, *nextblk;
	trans_num		ctn, start_db_fmt_tn;
	unsigned int		len;
#	if defined(UNIX) && defined(DEBUG)
	unsigned int		lcl_t_tries;
#	endif
	int4			blk_prev_version;
	srch_hist		alt_hist;
	trans_num		ret_tn = 0;
	boolean_t		visit_blks;
	srch_blk_status		bmphist;
	cache_rec_ptr_t		cr;
	enum db_ver		ondsk_blkver;
	enum cdb_sc		status;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	TREF(in_gvcst_bmp_mark_free) = TRUE;
	assert(inctn_bmp_mark_free_gtm == inctn_opcode || inctn_bmp_mark_free_mu_reorg == inctn_opcode);
	/* Note down the desired_db_format_tn before you start relying on cs_data->fully_upgraded.
	 * If the db is fully_upgraded, take the optimal path that does not need to read each block being freed.
	 * But in order to detect concurrent desired_db_format changes, note down the tn (when the last format change occurred)
	 * 	before the fully_upgraded check	and after having noted down the database current_tn.
	 * If they are the same, then we are guaranteed no concurrent desired_db_format change occurred.
	 * If they are not, then fall through to the non-optimal path where each to-be-killed block has to be visited.
	 * The reason we need to visit every block in case desired_db_format changes is to take care of the case where
	 *	MUPIP REORG DOWNGRADE concurrently changes a block that we are about to free.
	 */
	start_db_fmt_tn = cs_data->desired_db_format_tn;
	visit_blks = (!cs_data->fully_upgraded);	/* Local evaluation */
	assert(!visit_blks || (visit_blks && dba_bg == cs_addrs->hdr->acc_meth)); /* must have blks_to_upgrd == 0 for non-BG */
	assert(!dollar_tlevel); 			/* Should NOT be in TP now */
	blk = &ks->blk[0];
	blk_top = &ks->blk[ks->used];
	if (!visit_blks)
	{	/* Database has been completely upgraded. Free all blocks in one bitmap as part of one transaction. */
		assert(cs_data->db_got_to_v5_once); /* assert all V4 fmt blocks (including RECYCLED) have space for V5 upgrade */
		inctn_detail.blknum_struct.blknum = 0; /* to indicate no adjustment to "blks_to_upgrd" necessary */
		/* If any of the mini transaction below restarts because of an online rollback, we don't want the application
		 * refresh to happen (like $ZONLNRLBK++ or rts_error(DBROLLEDBACK). This is because, although we are currently in
		 * non-tp (dollar_tleve = 0), we could actually be in a TP transaction and have actually faked dollar_tlevel. In
		 * such a case, we should NOT * be issuing a DBROLLEDBACK error as TP transactions are supposed to just restart in
		 * case of an online rollback. So, set the global variable that gtm_onln_rlbk_clnup can check and skip doing the
		 * application refresh, but will reset the clues. The next update will see the cycle mismatch and will accordingly
		 * take the right action.
		 */
		for ( ; blk < blk_top;  blk = nextblk)
		{
			if (0 != blk->flag)
			{
				nextblk = blk + 1;
				continue;
			}
			assert(0 < blk->block);
			assert((int4)blk->block < cs_addrs->ti->total_blks);
			bit_map = ROUND_DOWN2((int)blk->block, BLKS_PER_LMAP);
			next_bm = bit_map + BLKS_PER_LMAP;
			CHECK_AND_RESET_UPDATE_ARRAY;	/* reset update_array_ptr to update_array */
			/* Scan for the next local bitmap */
			updptr = (block_id *)update_array_ptr;
			for (nextblk = blk;
				(0 == nextblk->flag) && (nextblk < blk_top) && ((block_id)nextblk->block < next_bm);
				++nextblk)
			{
				assert((block_id)nextblk->block - bit_map);
				*updptr++ = (block_id)nextblk->block - bit_map;
			}
			len = (unsigned int)((char *)nextblk - (char *)blk);
			update_array_ptr = (char *)updptr;
			alt_hist.h[0].blk_num = 0;			/* need for calls to T_END for bitmaps */
			alt_hist.h[0].blk_target = NULL;		/* need to initialize for calls to T_END */
			/* the following assumes SIZEOF(blk_ident) == SIZEOF(int) */
			assert(SIZEOF(blk_ident) == SIZEOF(int));
			*(int *)update_array_ptr = 0;
			t_begin(ERR_GVKILLFAIL, UPDTRNS_DB_UPDATED_MASK);
			for (;;)
			{
				ctn = cs_addrs->ti->curr_tn;
				/* Need a read fence before reading fields from cs_data as we are reading outside
				 * of crit and relying on this value to detect desired db format state change.
				 */
				SHM_READ_MEMORY_BARRIER;
				if (start_db_fmt_tn != cs_data->desired_db_format_tn)
				{	/* Concurrent db format change has occurred. Need to visit every block to be killed
					 * to determine its block format. Fall through to the non-optimal path below
					 */
					ret_tn = 0;
					break;
				}
				bmphist.blk_num = bit_map;
				if (NULL == (bmphist.buffaddr = t_qread(bmphist.blk_num, (sm_int_ptr_t)&bmphist.cycle,
									&bmphist.cr)))
				{
					t_retry((enum cdb_sc)rdfail_detail);
					continue;
				}
				t_write_map(&bmphist, (uchar_ptr_t)update_array, ctn, -(int4)(nextblk - blk));
				UNIX_ONLY(DEBUG_ONLY(lcl_t_tries = t_tries));
				if ((trans_num)0 == (ret_tn = t_end(&alt_hist, NULL, TN_NOT_SPECIFIED)))
				{
#					ifdef UNIX
					assert((CDB_STAGNATE == t_tries) || (lcl_t_tries == t_tries - 1));
					status = LAST_RESTART_CODE;
					if ((cdb_sc_onln_rlbk1 == status) || (cdb_sc_onln_rlbk2 == status)
						|| TREF(rlbk_during_redo_root))
					{	/* t_end restarted due to online rollback. Discard bitmap free-up and return control
						 * to the application. But, before that reset only_reset_clues_if_onln_rlbk to FALSE
						 */
						TREF(in_gvcst_bmp_mark_free) = FALSE;
						send_msg(VARLSTCNT(6) ERR_IGNBMPMRKFREE, 4, REG_LEN_STR(gv_cur_region),
								DB_LEN_STR(gv_cur_region));
						t_abort(gv_cur_region, cs_addrs);
						return ret_tn; /* actually 0 */
					}
#					endif
					continue;
				}
				break;
			}
			if (0 == ret_tn) /* db format change occurred. Fall through to below for loop to visit each block */
			{
				/* Abort any active transaction to get rid of lingering Non-TP artifacts */
				t_abort(gv_cur_region, cs_addrs);
				break;
			}
		}
	}	/* for all blocks in the kill_set */
Ejemplo n.º 30
0
bool	gtcmtr_increment(void)
{
	cm_region_list	*reg_ref;
	mval		incr_delta, post_incr;
	unsigned char	buff[MAX_ZWR_KEY_SZ], *end;
	unsigned char	*ptr, regnum;
	short		n;
	unsigned short	top, len, temp_short;
	static readonly	gds_file_id file;

	error_def(ERR_KEY2BIG);
	error_def(ERR_GVIS);
	error_def(ERR_DBPRIVERR);

	ptr = curr_entry->clb_ptr->mbf;
	assert(*ptr == CMMS_Q_INCREMENT);
	ptr++;
	GET_USHORT(len, ptr);
	ptr += SIZEOF(unsigned short);
	regnum = *ptr++;
	reg_ref = gtcm_find_region(curr_entry,regnum);
	len--; /* subtract size of regnum */
	CM_GET_GVCURRKEY(ptr, len);
	gtcm_bind_name(reg_ref->reghead, TRUE);
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (JNL_ALLOWED(cs_addrs))
	{	/* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines
		 *	do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not
		 *	easy to re-execute the following two assignments in case gvcst_incr's call to t_end encounters a
		 *	cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment
		 *	if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE.
		 * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED
		 * 	might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead.
		 * this approach ensures that the jnl_put_jrt_pini gets the appropriate prc_vec for writing into the
		 * 	journal record in case JNL_ENABLED turns out to be TRUE in t_end time.
		 * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access
		 * 	and hence the correctness of prc_vec (whenever it turns out necessary) is guaranteed.
		 */
		originator_prc_vec = curr_entry->pvec;
		cs_addrs->jnl->pini_addr = reg_ref->pini_addr;
	}
	GET_USHORT(len, ptr);
	ptr += SIZEOF(unsigned short);
	incr_delta.mvtype = MV_STR;
	incr_delta.str.len = len;
	incr_delta.str.addr = (char *)ptr;
	if ((n = gv_currkey->end + 1) > gv_cur_region->max_key_size)
	{
		if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0)
			end = &buff[MAX_ZWR_KEY_SZ - 1];
		rts_error(VARLSTCNT(11) ERR_KEY2BIG, 4, n, (int4)gv_cur_region->max_key_size,
			REG_LEN_STR(gv_cur_region), 0, ERR_GVIS, 2, end - buff, buff);
	}
	MV_FORCE_NUMD(&incr_delta);
	gvcst_incr(&incr_delta, &post_incr);
	if (JNL_ALLOWED(cs_addrs))
		reg_ref->pini_addr = cs_addrs->jnl->pini_addr; /* In case journal switch occurred */
	ptr = curr_entry->clb_ptr->mbf;
	if (MV_DEFINED(&post_incr))
	{
		temp_short = (unsigned short)post_incr.str.len;
		assert((int4)temp_short == post_incr.str.len); /* ushort <- int4 assignment lossy? */
		if (curr_entry->clb_ptr->mbl < 1 +			/* msg header */
					       SIZEOF(temp_short) +	/* size of length of $INCR return value */
					       temp_short) 		/* length of $INCR return value */
		{	/* resize buffer */
			cmi_realloc_mbf(curr_entry->clb_ptr, 1 + SIZEOF(temp_short) + temp_short);
			ptr = curr_entry->clb_ptr->mbf;
		}
		*ptr++ = CMMS_R_INCREMENT;
		PUT_USHORT(ptr, temp_short);
		ptr += SIZEOF(unsigned short);
		memcpy(ptr, post_incr.str.addr, temp_short);
		ptr += temp_short;
	} else