Ejemplo n.º 1
0
bool gtcmtr_kill(void)
{
	cm_region_list	*reg_ref;
	unsigned char	*ptr, regnum;
	unsigned short	len;
	static readonly	gds_file_id file;

	error_def(ERR_DBPRIVERR);

	ptr = curr_entry->clb_ptr->mbf;
	assert(*ptr == CMMS_Q_KILL);
	ptr++;
	GET_SHORT(len, ptr);
	ptr += sizeof(unsigned short);
	regnum = *ptr++;
	reg_ref = gtcm_find_region(curr_entry,regnum);
	len--; /* subtract size of regnum */
	CM_GET_GVCURRKEY(ptr, len);
	gtcm_bind_name(reg_ref->reghead, TRUE);
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (JNL_ALLOWED(cs_addrs))
	{	/* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines
		 *	do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not
		 *	easy to re-execute the following two assignments in case gvcst_kill()'s call to t_end() encounters a
		 *	cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment
		 *	if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE.
		 * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED
		 * 	might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead.
		 * this approach ensures that the jnl_put_jrt_pini() gets the appropriate prc_vec for writing into the
		 * 	journal record in case JNL_ENABLED turns out to be TRUE in t_end() time.
		 * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access
		 * 	and hence the correctness of prc_vec whenever it turns out necessary is guaranteed.
		 */
		originator_prc_vec = curr_entry->pvec;
		cs_addrs->jnl->pini_addr = reg_ref->pini_addr;
	}
	if (gv_target->root)
		gvcst_kill(TRUE);
	if (JNL_ALLOWED(cs_addrs))
		reg_ref->pini_addr = cs_addrs->jnl->pini_addr;  /* In case  journal switch occurred */
	ptr = curr_entry->clb_ptr->mbf;
	*ptr++ = CMMS_R_KILL;
	curr_entry->clb_ptr->cbl = S_HDRSIZE;
	return TRUE;
}
Ejemplo n.º 2
0
void db_init(gd_region *reg, sgmnt_data_ptr_t tsd)
{
	static boolean_t	mutex_init_done = FALSE;
	boolean_t       	is_bg, read_only;
	char            	machine_name[MAX_MCNAMELEN];
	file_control    	*fc;
	int			gethostname_res, stat_res, mm_prot;
	int4            	status, semval, dblksize, fbwsize;
	sm_long_t       	status_l;
	sgmnt_addrs     	*csa;
	sgmnt_data_ptr_t        csd;
	struct sembuf   	sop[3];
	struct stat     	stat_buf;
	union semun		semarg;
	struct semid_ds		semstat;
	struct shmid_ds         shmstat;
	struct statvfs		dbvfs;
	uint4           	sopcnt;
	unix_db_info    	*udi;
#ifdef periodic_timer_removed
	void            	periodic_flush_check();
#endif

	error_def(ERR_CLSTCONFLICT);
	error_def(ERR_CRITSEMFAIL);
	error_def(ERR_DBNAMEMISMATCH);
	error_def(ERR_DBIDMISMATCH);
	error_def(ERR_NLMISMATCHCALC);
	error_def(ERR_REQRUNDOWN);
	error_def(ERR_SYSCALL);

	assert(tsd->acc_meth == dba_bg  ||  tsd->acc_meth == dba_mm);
	is_bg = (dba_bg == tsd->acc_meth);
	read_only = reg->read_only;
	new_dbinit_ipc = FALSE;	/* we did not create a new ipc resource */
	udi = FILE_INFO(reg);
	memset(machine_name, 0, sizeof(machine_name));
	if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res))
		rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno);
	assert(strlen(machine_name) < MAX_MCNAMELEN);
	csa = &udi->s_addrs;
	csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL;   /* to help in dbinit_ch  and gds_rundown */
	reg->opening = TRUE;
	/*
	 * Create ftok semaphore for this region.
	 * We do not want to make ftok counter semaphore to be 2 for on mupip journal recover process.
	 */
	if (!ftok_sem_get(reg, !mupip_jnl_recover, GTM_ID, FALSE))
		rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
	/*
	 * At this point we have ftok_semid sempahore based on ftok key.
	 * Any ftok conflicted region will block at this point.
	 * Say, a.dat and b.dat both has same ftok and we have process A to access a.dat and
	 * process B to access b.dat. In this case only one can continue to do db_init()
	 */
	fc = reg->dyn.addr->file_cntl;
	fc->file_type = reg->dyn.addr->acc_meth;
	fc->op = FC_READ;
	fc->op_buff = (sm_uc_ptr_t)tsd;
	fc->op_len = sizeof(*tsd);
	fc->op_pos = 1;
	dbfilop(fc);		/* Read file header */
	udi->shmid = tsd->shmid;
	udi->semid = tsd->semid;
	udi->sem_ctime = tsd->sem_ctime.ctime;
	udi->shm_ctime = tsd->shm_ctime.ctime;
	dbsecspc(reg, tsd); 	/* Find db segment size */
	if (!mupip_jnl_recover)
	{
		if (INVALID_SEMID == udi->semid)
		{
			if (0 != udi->sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime)
			/* We must have somthing wrong in protocol or, code, if this happens */
				GTMASSERT;
			/*
			 * Create new semaphore using IPC_PRIVATE. System guarantees a unique id.
			 */
			if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT)))
			{
				udi->semid = INVALID_SEMID;
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno);
			}
			udi->shmid = INVALID_SHMID;	/* reset shmid so dbinit_ch does not get confused in case we go there */
			new_dbinit_ipc = TRUE;
			tsd->semid = udi->semid;
			semarg.val = GTM_ID;
			/*
			 * Following will set semaphore number 2 (=FTOK_SEM_PER_ID - 1)  value as GTM_ID.
			 * In case we have orphaned semaphore for some reason, mupip rundown will be
			 * able to identify GTM semaphores from the value and can remove.
			 */
			if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno);
			/*
			 * Warning: We must read the sem_ctime using IPC_STAT after SETVAL, which changes it.
			 *	    We must NOT do any more SETVAL after this. Our design is to use
			 *	    sem_ctime as creation time of semaphore.
			 */
			semarg.buf = &semstat;
			if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT"), errno);
			tsd->sem_ctime.ctime = udi->sem_ctime = semarg.buf->sem_ctime;
		} else
		{
			if (INVALID_SHMID == udi->shmid)
				/* if mu_rndwn_file gets standalone access of this region and
				 * somehow mupip process crashes, we can have semid != -1 but shmid == -1
				 */
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
						ERR_TEXT, 2, LEN_AND_LIT("semid is valid but shmid is invalid"));
			semarg.buf = &semstat;
			if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg))
				/* file header has valid semid but semaphore does not exists */
				rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name));
			else if (semarg.buf->sem_ctime != tsd->sem_ctime.ctime)
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
						ERR_TEXT, 2, LEN_AND_LIT("sem_ctime does not match"));
			if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno);
			else if (shmstat.shm_ctime != tsd->shm_ctime.ctime)
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
					ERR_TEXT, 2, LEN_AND_LIT("shm_ctime does not match"));
		}
		/* We already have ftok semaphore of this region, so just plainly do semaphore operation */
		/* This is the database access control semaphore for any region */
		sop[0].sem_num = 0; sop[0].sem_op = 0;	/* Wait for 0 */
		sop[1].sem_num = 0; sop[1].sem_op = 1;	/* Lock */
		sopcnt = 2;
		if (!read_only)
		{
			sop[2].sem_num = 1; sop[2].sem_op  = 1;	 /* increment r/w access counter */
			sopcnt = 3;
		}
		sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = SEM_UNDO | IPC_NOWAIT;
		SEMOP(udi->semid, sop, sopcnt, status);
		if (-1 == status)
		{
			errno_save = errno;
			gtm_putmsg(VARLSTCNT(4) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg));
			rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM, errno_save);
		}
	} else /* for mupip_jnl_recover we were already in mu_rndwn_file and got "semid" semaphore  */
	{
		if (INVALID_SEMID == udi->semid || 0 == udi->sem_ctime)
			/* make sure mu_rndwn_file() has reset created semaphore for standalone access */
			GTMASSERT;
		if (INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime)
			/* make sure mu_rndwn_file() has reset shared memory */
			GTMASSERT;
		udi->shmid = INVALID_SHMID;	/* reset shmid so dbinit_ch does not get confused in case we go there */
		new_dbinit_ipc = TRUE;
	}
	sem_incremented = TRUE;
	if (new_dbinit_ipc)
	{
		/* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */
#ifdef __MVS__
		if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, ROUND_UP(reg->sec_size, MEGA_BOUND),
			__IPC_MEGA | IPC_CREAT | RWDALL)))
#else
		if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, reg->sec_size, RWDALL | IPC_CREAT)))
#endif
		{
			udi->shmid = status_l = INVALID_SHMID;
			rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
				  ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno);
		}
		tsd->shmid = udi->shmid;
		if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
			rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
				ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno);
		tsd->shm_ctime.ctime = udi->shm_ctime = shmstat.shm_ctime;
	}
#ifdef DEBUG_DB64
	status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, next_smseg, SHM_RND));
	next_smseg = (sm_uc_ptr_t)ROUND_UP((sm_long_t)(next_smseg + reg->sec_size), SHMAT_ADDR_INCS);
#else
	status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND));
#endif
	if (-1 == status_l)
	{
		rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
			  ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno);
	}
	csa->nl = (node_local_ptr_t)csa->db_addrs[0];
	csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE);
	assert(((int)csa->critical & 0xf) == 0); 			/* critical should be 16-byte aligned */
#ifdef CACHELINE_SIZE
	assert(0 == ((int)csa->critical & (CACHELINE_SIZE - 1)));
#endif
	/* Note: Here we check jnl_sate from database file and its value cannot change without standalone access.
	 * The jnl_buff buffer should be initialized irrespective of read/write process */
	JNL_INIT(csa, reg, tsd);
	csa->backup_buffer = (backup_buff_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd));
	csa->lock_addrs[0] = (sm_uc_ptr_t)csa->backup_buffer + BACKUP_BUFFER_SIZE + 1;
	csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1;
	csa->total_blks = tsd->trans_hist.total_blks;   		/* For test to see if file has extended */
	if (new_dbinit_ipc)
	{
		memset(csa->nl, 0, sizeof(*csa->nl));			/* We allocated shared storage -- we have to init it */
		if (JNL_ALLOWED(csa))
		{	/* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not
			 * necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch
			 * check in order to determine whether to call jnl_file_open() or not. this is in preparation for that.
			 */
			csa->jnl->jnl_buff->cycle = 1;
		}
	}
	if (is_bg)
		csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd));
	else
	{
		csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1);
		FSTAT_FILE(udi->fd, &stat_buf, stat_res);
		if (-1 == stat_res)
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
		mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE);
#ifdef DEBUG_DB64
		if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)get_mmseg((size_t)stat_buf.st_size),
									   (size_t)stat_buf.st_size,
									   mm_prot,
									   GTM_MM_FLAGS, udi->fd, (off_t)0)))
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
		put_mmseg((caddr_t)(csa->db_addrs[0]), (size_t)stat_buf.st_size);
#else
		if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL,
									   (size_t)stat_buf.st_size,
									   mm_prot,
									   GTM_MM_FLAGS, udi->fd, (off_t)0)))
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
#endif
		csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1;
		csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0];
	}
	if (!csa->nl->glob_sec_init)
	{
		assert(new_dbinit_ipc);
		if (is_bg)
			*csd = *tsd;
		if (csd->machine_name[0])                  /* crash occured */
		{
			if (0 != memcmp(csd->machine_name, machine_name, MAX_MCNAMELEN))  /* crashed on some other node */
				rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
			else
				rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
		}
		if (is_bg)
		{
			bt_malloc(csa);
			csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd);
			db_csh_ini(csa);
		}
		db_csh_ref(csa);
		strcpy(csa->nl->machine_name, machine_name);					/* machine name */
		assert(MAX_REL_NAME > gtm_release_name_len);
		memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1);	/* GT.M release name */
		memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1);				/* GDS label */
		memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len);		/* database filename */
		csa->nl->creation_date_time = csd->creation.date_time;
		csa->nl->highest_lbm_blk_changed = -1;
		csa->nl->wcs_timers = -1;
		csa->nl->nbb = BACKUP_NOT_IN_PROGRESS;
		csa->nl->unique_id.uid = FILE_INFO(reg)->fileid;            /* save what file we initialized this storage for */
		/* save pointers in csa to access shared memory */
		csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl);
		if (JNL_ALLOWED(csa))
			csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl);
		csa->nl->backup_buffer = (sm_off_t)((sm_uc_ptr_t)csa->backup_buffer - (sm_uc_ptr_t)csa->nl);
		csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl);
		csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl);
		if (!read_only || is_bg)
		{
			csd->trans_hist.early_tn = csd->trans_hist.curr_tn;
			csd->max_update_array_size = csd->max_non_bm_update_array_size
				= ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE);
			csd->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE);
			/* add current db_csh counters into the cumulative counters and reset the current counters */
#define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2)					\
				csd->COUNTER.cumul_count += csd->COUNTER.curr_count;	\
				csd->COUNTER.curr_count = 0;
#include "tab_db_csh_acct_rec.h"
#undef TAB_DB_CSH_ACCT_REC
		}
		if (!read_only)
		{
			if (is_bg)
			{
				assert(memcmp(csd, GDS_LABEL, GDS_LABEL_SZ - 1) == 0);
				LSEEKWRITE(udi->fd, (off_t)0, (sm_uc_ptr_t)csd, sizeof(sgmnt_data), errno_save);
				if (0 != errno_save)
				{
					rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
						  ERR_TEXT, 2, LEN_AND_LIT("Error with database write"), errno_save);
				}
			}
		}
		reg->dyn.addr->ext_blk_count = csd->extension_size;
		mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only));
		DEBUG_ONLY(locknl = csa->nl;)	/* for DEBUG_ONLY LOCK_HIST macro */
Ejemplo n.º 3
0
/* Initialize the TP structures we will be using for the successive TP operations */
void gvcst_tp_init(gd_region *greg)
{
	sgm_info		*si;
	sgmnt_addrs		*csa;

	csa = (sgmnt_addrs *)&FILE_INFO(greg)->s_addrs;
	if (NULL == csa->sgm_info_ptr)
	{
		si = csa->sgm_info_ptr = (sgm_info *)malloc(sizeof(sgm_info));
		assert(32768 > sizeof(sgm_info));
		memset(si, 0, sizeof(sgm_info));
		si->tp_hist_size = TP_MAX_MM_TRANSIZE;
		si->cur_tp_hist_size = INIT_CUR_TP_HIST_SIZE;	/* should be very much less than si->tp_hist_size */
		assert(si->cur_tp_hist_size <= si->tp_hist_size);
		si->blks_in_use = (hash_table_int4 *)malloc(sizeof(hash_table_int4));
		init_hashtab_int4(si->blks_in_use, BLKS_IN_USE_INIT_ELEMS);
		/* See comment in tp.h about cur_tp_hist_size for details */
		si->first_tp_hist = si->last_tp_hist =
			(srch_blk_status *)malloc(sizeof(srch_blk_status) * si->cur_tp_hist_size);
		si->cw_set_list = (buddy_list *)malloc(sizeof(buddy_list));
		initialize_list(si->cw_set_list, sizeof(cw_set_element), CW_SET_LIST_INIT_ALLOC);
		si->tlvl_cw_set_list = (buddy_list *)malloc(sizeof(buddy_list));
		initialize_list(si->tlvl_cw_set_list, sizeof(cw_set_element), TLVL_CW_SET_LIST_INIT_ALLOC);
		si->tlvl_info_list = (buddy_list *)malloc(sizeof(buddy_list));
		initialize_list(si->tlvl_info_list, sizeof(tlevel_info), TLVL_INFO_LIST_INIT_ALLOC);
		si->new_buff_list = (buddy_list *)malloc(sizeof(buddy_list));
		initialize_list(si->new_buff_list, SIZEOF(que_ent) + csa->hdr->blk_size, NEW_BUFF_LIST_INIT_ALLOC);
		si->recompute_list = (buddy_list *)malloc(sizeof(buddy_list));
		initialize_list(si->recompute_list, sizeof(key_cum_value), RECOMPUTE_LIST_INIT_ALLOC);
		/* The size of the si->cr_array can go up to TP_MAX_MM_TRANSIZE, but usually is quite less.
		 * Therefore, initially allocate a small array and expand as needed later.
		 */
		if (dba_bg == greg->dyn.addr->acc_meth)
		{
			si->cr_array_size = si->cur_tp_hist_size;
			si->cr_array = (cache_rec_ptr_ptr_t)malloc(sizeof(cache_rec_ptr_t) * si->cr_array_size);
		} else
		{
			si->cr_array_size = 0;
			si->cr_array = NULL;
		}
		si->fresh_start = TRUE;
	} else
		si = csa->sgm_info_ptr;
	si->gv_cur_region = greg;
	si->tp_csa = csa;
	si->tp_csd = csa->hdr;
	si->start_tn = csa->ti->curr_tn;
	if (JNL_ALLOWED(csa))
	{
		si->total_jnl_rec_size = csa->min_total_tpjnl_rec_size;	/* Reinitialize total_jnl_rec_size */
		/* Since the following jnl-mallocs are independent of any dynamically-changeable parameter of the
		 * database, we can as well use the existing malloced jnl structures if at all they exist.
		 */
		if (NULL == si->jnl_tail)
		{
			si->jnl_tail = &si->jnl_head;
			si->jnl_list = (buddy_list *)malloc(sizeof(buddy_list));
			initialize_list(si->jnl_list, sizeof(jnl_format_buffer), JNL_LIST_INIT_ALLOC);
			si->format_buff_list = (buddy_list *)malloc(sizeof(buddy_list));
			/* Minimum value of elemSize is 8 due to alignment requirements of the returned memory location.
			 * Therefore, we request an elemSize of 8 bytes for the format-buffer and will convert as much
			 * bytes as we need into as many 8-byte multiple segments (see code in jnl_format).
			 */
			initialize_list(si->format_buff_list, JFB_ELE_SIZE,
					DIVIDE_ROUND_UP(JNL_FORMAT_BUFF_INIT_ALLOC, JFB_ELE_SIZE));
		}
	} else if (NULL != si->jnl_tail)
	{	/* journaling is currently disallowed although it was allowed (non-zero si->jnl_tail)
		 * during the prior use of this region. Free up unnecessary region-specific structures now.
		 */
		FREEUP_BUDDY_LIST(si->jnl_list);
		FREEUP_BUDDY_LIST(si->format_buff_list);
		si->jnl_tail = NULL;
	}
}
Ejemplo n.º 4
0
bool	gtcmtr_increment(void)
{
	cm_region_list	*reg_ref;
	mval		incr_delta, post_incr;
	unsigned char	buff[MAX_ZWR_KEY_SZ], *end;
	unsigned char	*ptr, regnum;
	short		n;
	unsigned short	top, len, temp_short;
	static readonly	gds_file_id file;

	error_def(ERR_KEY2BIG);
	error_def(ERR_GVIS);
	error_def(ERR_DBPRIVERR);

	ptr = curr_entry->clb_ptr->mbf;
	assert(*ptr == CMMS_Q_INCREMENT);
	ptr++;
	GET_USHORT(len, ptr);
	ptr += SIZEOF(unsigned short);
	regnum = *ptr++;
	reg_ref = gtcm_find_region(curr_entry,regnum);
	len--; /* subtract size of regnum */
	CM_GET_GVCURRKEY(ptr, len);
	gtcm_bind_name(reg_ref->reghead, TRUE);
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (JNL_ALLOWED(cs_addrs))
	{	/* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines
		 *	do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not
		 *	easy to re-execute the following two assignments in case gvcst_incr's call to t_end encounters a
		 *	cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment
		 *	if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE.
		 * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED
		 * 	might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead.
		 * this approach ensures that the jnl_put_jrt_pini gets the appropriate prc_vec for writing into the
		 * 	journal record in case JNL_ENABLED turns out to be TRUE in t_end time.
		 * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access
		 * 	and hence the correctness of prc_vec (whenever it turns out necessary) is guaranteed.
		 */
		originator_prc_vec = curr_entry->pvec;
		cs_addrs->jnl->pini_addr = reg_ref->pini_addr;
	}
	GET_USHORT(len, ptr);
	ptr += SIZEOF(unsigned short);
	incr_delta.mvtype = MV_STR;
	incr_delta.str.len = len;
	incr_delta.str.addr = (char *)ptr;
	if ((n = gv_currkey->end + 1) > gv_cur_region->max_key_size)
	{
		if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0)
			end = &buff[MAX_ZWR_KEY_SZ - 1];
		rts_error(VARLSTCNT(11) ERR_KEY2BIG, 4, n, (int4)gv_cur_region->max_key_size,
			REG_LEN_STR(gv_cur_region), 0, ERR_GVIS, 2, end - buff, buff);
	}
	MV_FORCE_NUMD(&incr_delta);
	gvcst_incr(&incr_delta, &post_incr);
	if (JNL_ALLOWED(cs_addrs))
		reg_ref->pini_addr = cs_addrs->jnl->pini_addr; /* In case journal switch occurred */
	ptr = curr_entry->clb_ptr->mbf;
	if (MV_DEFINED(&post_incr))
	{
		temp_short = (unsigned short)post_incr.str.len;
		assert((int4)temp_short == post_incr.str.len); /* ushort <- int4 assignment lossy? */
		if (curr_entry->clb_ptr->mbl < 1 +			/* msg header */
					       SIZEOF(temp_short) +	/* size of length of $INCR return value */
					       temp_short) 		/* length of $INCR return value */
		{	/* resize buffer */
			cmi_realloc_mbf(curr_entry->clb_ptr, 1 + SIZEOF(temp_short) + temp_short);
			ptr = curr_entry->clb_ptr->mbf;
		}
		*ptr++ = CMMS_R_INCREMENT;
		PUT_USHORT(ptr, temp_short);
		ptr += SIZEOF(unsigned short);
		memcpy(ptr, post_incr.str.addr, temp_short);
		ptr += temp_short;
	} else
Ejemplo n.º 5
0
void gtcmd_rundown(connection_struct *cnx, bool clean_exit)
{
	int4			link;
	cm_region_list		*ptr, *last, *que_next, *que_last;
	cm_region_head		*region;
	uint4			jnl_status;
	jnl_private_control	*jpc;
	jnl_buffer_ptr_t	jbp;
	int			refcnt;
	boolean_t		was_crit;
	int4			rundown_status = EXIT_NRM;			/* if gds_rundown went smoothly */

	for (ptr = cnx->region_root;  ptr;)
	{
		region = ptr->reghead;
		TP_CHANGE_REG(region->reg);
		jpc = cs_addrs->jnl;
		if (ptr->pini_addr && clean_exit && JNL_ENABLED(cs_data) && (NOJNL != jpc->channel))
		{
			was_crit = cs_addrs->now_crit;
			if (!was_crit)
				grab_crit(gv_cur_region);
			if (JNL_ENABLED(cs_data))
			{
				jpc->pini_addr = ptr->pini_addr;
				SET_GBL_JREC_TIME; /* jnl_ensure_open/jnl_put_jrt_pfin needs this to be set */
				jbp = jpc->jnl_buff;
				/* Before writing to jnlfile, adjust jgbl.gbl_jrec_time if needed to maintain time order
				 * of jnl records.  This needs to be done BEFORE the jnl_ensure_open as that could write
				 * journal records (if it decides to switch to a new journal file).
				 */
				ADJUST_GBL_JREC_TIME(jgbl, jbp);
				jnl_status = jnl_ensure_open();
				if (0 == jnl_status)
				{
					if (0 != jpc->pini_addr)
						jnl_put_jrt_pfin(cs_addrs);
				} else
					send_msg(VARLSTCNT(6) jnl_status, 4, JNL_LEN_STR(cs_data), DB_LEN_STR(gv_cur_region));
			}
			if (!was_crit)
				rel_crit(gv_cur_region);
		}
		refcnt = --region->refcnt;
		/* Dont know how refcnt can become negative but in pro handle it by bypassing this region. The reason is the
		 * following. refcnt should have originally been a positive value. Every time this function is invoked, it would
		 * be decremented by one. There should have been one invocation that saw refcnt to be zero. That would have
		 * done the rundown of the region or if it is still in the stack the rundown is still in progress. Therefore
		 * it is not a good idea to try running down this region when we see refcnt to be negative (as otherwise we
		 * will get confused and could potentially end up with SIG-11 or ACCVIO errors). The worst case is that we
		 * would not have rundown the region in which case an externally issued MUPIP RUNDOWN would be enough.
		 */
		assert(0 <= refcnt);
		if (0 == refcnt)
		{	/* free up only as little as needed to facilitate structure reuse when the region is opened again */
			assert(region->head.fl == region->head.bl);
			VMS_ONLY(gtcm_ast_avail++);
			if (JNL_ALLOWED(cs_data))
				jpc->pini_addr = 0;
			UNIX_ONLY(rundown_status |=) gds_rundown();
			gd_ht_kill(region->reg_hash, TRUE);	/* TRUE to free up the table and the gv_targets it holds too */
			FREE_CSA_DIR_TREE(cs_addrs);
			cm_del_gdr_ptr(gv_cur_region);
		}
		que_next = (cm_region_list *)((unsigned char *)ptr + ptr->regque.fl);
		que_last = (cm_region_list *)((unsigned char *)ptr + ptr->regque.bl);
		link = (int4)((unsigned char *)que_next - (unsigned char *)que_last);
		que_last->regque.fl = link;
		que_next->regque.bl = -link;
		last = ptr;
		ptr = ptr->next;
		free(last);
	}
Ejemplo n.º 6
0
void mupip_upgrade(void)
{
	bool		rbno;
	unsigned char 	*upgrd_buff[2], upgrd_label[GDS_LABEL_SZ]="UPGRADE0304";
	char		fn[256];
	char		answer[4];
	unsigned short	fn_len;
	int4		fd, save_errno, old_hdr_size, new_hdr_size, status, bufsize, dsize, datasize[2];
	int4            old_hdr_size_vbn, new_hdr_size_vbn;
	int		fstat_res;
	off_t 		last_full_grp_startoff, old_file_len, old_file_len2, read_off, write_off, old_start_vbn_off;
	block_id	last_full_grp_startblk;
	v3_sgmnt_data	old_head_data, *old_head;
	sgmnt_data	new_head_data, *new_head;
 	struct stat    	stat_buf;

	error_def(ERR_MUNODBNAME);
	error_def(ERR_MUNOUPGRD);
	error_def(ERR_DBOPNERR);
	error_def(ERR_DBRDONLY);
	error_def(ERR_DBFILOPERR);
	error_def(ERR_DBPREMATEOF);

	ESTABLISH(mupip_upgrade_ch);
	fn_len = sizeof(fn);
	if (!cli_get_str("FILE", fn, &fn_len))
		rts_error(VARLSTCNT(1) ERR_MUNODBNAME);
	if (!(mupip_upgrade_standalone(fn, &upgrade_standalone_sems)))
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	if (-1 == (fd = OPEN(fn, O_RDWR)))
	{
		save_errno = errno;
		if (-1 != (fd = OPEN(fn, O_RDONLY)))
		{
			util_out_print("Cannot update read-only database.", FLUSH);
			rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, errno);
		}
		rts_error(VARLSTCNT(5) ERR_DBRDONLY, 2, fn_len, fn, save_errno);
	}
	/* Confirm before proceed */
	if (!mu_upgrd_confirmed(TRUE))
	{
		util_out_print("Upgrade canceled by user", FLUSH);
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	}
	util_out_print("Do not interrupt to avoid damage in database!!", FLUSH);
	util_out_print("Mupip upgrade started ...!/", FLUSH);
	mu_upgrd_sig_init();
	/* get file status */
	FSTAT_FILE(fd, &stat_buf, fstat_res);
	if (-1 == fstat_res)
		rts_error(VARLSTCNT(5) ERR_DBOPNERR, 2, fn_len, fn, errno);
	old_file_len = stat_buf.st_size;

	/* Prepare v3.x file header buffer */
	old_hdr_size  = sizeof(*old_head);
	old_head = &old_head_data;
	/* Prepare v4.x file header buffer */
	new_hdr_size = sizeof(*new_head);
	new_head = &new_head_data;
	memset(new_head, 0, new_hdr_size);
	old_hdr_size_vbn = DIVIDE_ROUND_UP(old_hdr_size, DISK_BLOCK_SIZE);
	new_hdr_size_vbn = DIVIDE_ROUND_UP(new_hdr_size, DISK_BLOCK_SIZE);
	/* READ header from V3.x file */
	LSEEKREAD(fd, 0, old_head, old_hdr_size, status);
	if (0 != status)
		if (-1 == status)
			rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn);
		else
			rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
	/* Check version */
	if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 1))
	{
		if (memcmp(&old_head->label[0], GDS_LABEL, GDS_LABEL_SZ - 3))
		{	/* it is not a GTM database */
			close(fd);
			util_out_print("File !AD is not a GT.M database.!/", FLUSH, fn_len, fn);
			rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
		}else
		{	/* it is GTM database */
			/* is it not v3.x database?  */
			if (memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_V30,2) !=0  &&
		   	    memcmp(&old_head->label[GDS_LABEL_SZ - 3],GDS_ALT_V30,2) != 0)
			{
				close(fd);
				util_out_print("File !AD has an unrecognized database version!/", FLUSH, fn_len, fn);
				rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
			}
		}
	}
	else
	{	/* Note: We assume that if the V4.x header and current GT.M file header
		 *       has same field names, they are at same offset */
		/* READ the header from file again as V4.x header */
                LSEEKREAD(fd, 0, new_head, new_hdr_size, status);
                if (0 != status)
			if (-1 != status)
				rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
			else
				rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn);
                if (QWNE(new_head->reg_seqno, seq_num_zero) ||
                    QWNE(new_head->resync_seqno, seq_num_zero) ||
                    (new_head->resync_tn != 0) ||
                    new_head->repl_state != repl_closed)
                {
                        util_out_print("!AD might already have been upgraded", FLUSH, fn_len, fn);
                        util_out_print("Do you wish to continue with the upgrade? [y/n] ", FLUSH);
                        SCANF("%s", answer);
                        if (answer[0] != 'y' && answer[0] != 'Y')
                        {
                                close(fd);
                                util_out_print("Upgrade canceled by user", FLUSH);
                                rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
                        }
                }
                init_replication(new_head);
		new_head->max_update_array_size = new_head->max_non_bm_update_array_size
                                       = ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(new_head), UPDATE_ARRAY_ALIGN_SIZE);
		new_head->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE);
		new_head->mutex_spin_parms.mutex_hard_spin_count = MUTEX_HARD_SPIN_COUNT;
		new_head->mutex_spin_parms.mutex_sleep_spin_count = MUTEX_SLEEP_SPIN_COUNT;
		new_head->mutex_spin_parms.mutex_spin_sleep_mask = MUTEX_SPIN_SLEEP_MASK;
		new_head->semid = INVALID_SEMID;
		new_head->shmid = INVALID_SHMID;
		if (JNL_ALLOWED(new_head))
		{	/* Following 3 are new fields starting from V43001.
			 * Initialize them appropriately.
			 */
			new_head->epoch_interval = DEFAULT_EPOCH_INTERVAL;
			new_head->alignsize = DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE;
			if (!new_head->jnl_alq)
				new_head->jnl_alq = JNL_ALLOC_DEF;
			/* note new_head->jnl_deq is carried over without any change even if it is zero since a zero
			 * jnl file extension size is supported starting V43001
			 */
			new_head->autoswitchlimit = ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, new_head->jnl_alq, new_head->jnl_deq);
			/* following field is assumed as non-zero by set_jnl_info starting V43001A */
			if (JNL_ALLOWED(new_head) && !new_head->jnl_buffer_size)
				new_head->jnl_buffer_size = JNL_BUFFER_DEF;
		} else
		{
			new_head->epoch_interval = 0;
			new_head->alignsize = 0;
			new_head->autoswitchlimit = 0;
		}
		new_head->yield_lmt = DEFAULT_YIELD_LIMIT;
                /* writing header */
                LSEEKWRITE(fd, 0, new_head, new_hdr_size, status);
                if (0 != status)
                        rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
                close(fd);
                util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn);
		if (0 != sem_rmid(upgrade_standalone_sems))
		{
			util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems);
			rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
		}
                mupip_exit(SS_NORMAL);
	}
	util_out_print("Old header size: !SL", FLUSH, old_hdr_size);
	util_out_print("New header size: !SL", FLUSH, new_hdr_size);
	if (old_head->createinprogress)
	{
		close(fd);
		util_out_print("Database creation in progress on file !AD.!/", FLUSH, fn_len, fn);
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	}
	if (old_head->file_corrupt)
	{
		close(fd);
		util_out_print("Database !AD is corrupted.!/", FLUSH, fn_len, fn);
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	}
	if ((((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE +
		(off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)DISK_BLOCK_SIZE != old_file_len) &&
	   (((off_t)old_head->start_vbn - 1) * DISK_BLOCK_SIZE +
		(off_t)old_head->trans_hist.total_blks * old_head->blk_size + (off_t)old_head->blk_size != old_file_len))
	{
		util_out_print("Incorrect start_vbn !SL or, block size !SL or, total blocks !SL",
			FLUSH, old_head->start_vbn, old_head->blk_size, old_head->trans_hist.total_blks);
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	}
	if (ROUND_DOWN(old_head->blk_size, DISK_BLOCK_SIZE) != old_head->blk_size)
	{
		util_out_print("Database block size !SL is not divisible by DISK_BLOCK_SIZE", FLUSH, old_head->blk_size);
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	}
        mu_upgrd_header(old_head, new_head); /* Update header from v3.x to v4.x  */
        new_head->start_vbn = new_hdr_size_vbn + 1;
        new_head->free_space = 0;
        new_head->wc_blocked_t_end_hist.evnt_cnt = old_head->wc_blocked_t_end_hist2.evnt_cnt;
        new_head->wc_blocked_t_end_hist.evnt_tn  = old_head->wc_blocked_t_end_hist2.evnt_tn;
        init_replication(new_head);
	/*
	   A simple way of doing mupip upgrade is to move all the data after file header
	   towards the eof to make space and write down the header. This does not need any
	   computation or, change in data/index blocks.  This is a slow process because it
	   has mainly I/O, though no manipulation of database structures.  or index blocks.
	   This is okay for small database.

	   A time efficient way is to physically move second group of BLKS_PER_LMAP number of
	   blocks towards the eof and move first group of BLKS_PER_LMAP number of blocks in
	   place of 2nd group. Finally adjust all indices to point to the blocks correctly.
	   Also adjust master bit map.
	   (note: we cannot move first group from the beginning).

	   Detail algorithm as follows:
	   ---------------------------
	   // Allocate two buffers each to hold one group of data.
	   Read v3.x header and upgrade to v4.x
	   if file is big enough
	  	read group 1 in buff[0]
	   	read_off = offset of starting block of 2nd group.
	  	read group 2 in buff[1]
		write buff[0] at offset read_off

	        last_full_grp_startblk = points to the block where 2nd group of 512 blocks
			of old file will be written back.
		//Instead of searching for a free group we will write at the last full group
		//Say, we have 3000 blocks.  last_full_grp_startblk = 2048
		//			     (not 2560, because it is not full)
		//All data from that point upto eof will be read and saved in buffer
		read all remaining data from the point last_full_grp_startblk upto eof in buff[0]
		write buff[1] at the point of last_full_grp_startblk
		Now write buff[0] at the end of last write
		//Graphical Example: Each letter corresponds to a group of 512 blocks where first block
		// 	is local bit map. Last group U may be a group of less than 512 blocks.
		//      Extend towards right ------------------------------------------------------->
		//	old permutation:    [v3 head]  A B C D E F G H   I J K L M N O P  Q R S T U
		//	new permutation:    [v4 head ]   A C D E F G H   I J K L M N O P  Q R S T B U
		Finally traverse the tree and adjust block pointers
		Adjust master map
		write new v4.x header at bof

	    else
	    	bufsize = size of data for a group
		rbno = 0    // read buffer no. This switches between 0 and 1
		read_off = 0
		write_off = 0
		upgrd_buff[rbno] = new header
		data_size[rbno] = new header size
		rbno = INVERT(rbno);
		do while not eof
			data_size[rbno] = MIN(bufsize, remaining_data_size)
			Read data of size data_size[rbno] in upgrd_buff[rbno] and adjust read_off
			rbno = INVERT(rbno);
			Write upgrd_buff[rbno] of datasize[rbno] at write_off and increase write_off
		Enddo
		rbno = INVERT(rbno)
		Write upgrd_buff[rbno] of datasize[rbno] at write_off
	    endif
	*/
	bufsize = old_head->blk_size * BLKS_PER_LMAP;
	upgrd_buff[0] = (unsigned char*) malloc(bufsize);
	upgrd_buff[1] = (unsigned char*) malloc(bufsize);
	read_off =  old_start_vbn_off = (off_t)(old_head->start_vbn - 1) * DISK_BLOCK_SIZE; /* start vbn offset in bytes */
	last_full_grp_startblk = ROUND_DOWN(new_head->trans_hist.total_blks, BLKS_PER_LMAP); /* in block_id */
	last_full_grp_startoff = old_start_vbn_off + (off_t)last_full_grp_startblk * new_head->blk_size; /* offset in bytes */
	/* this calculation is used because some 3.2x database has GDS blk_size bytes at the end
	   instead of DISK_BLOCK_SIZE bytes. */
	old_file_len2 = old_head->start_vbn * DISK_BLOCK_SIZE + (off_t)old_head->blk_size * old_head->trans_hist.total_blks;
	/* Change Label to a temporary dummy value, so that other GTM process does not come
	while doing upgrade and corrupts database */
	LSEEKWRITE(fd, 0, upgrd_label, GDS_LABEL_SZ - 1, status);
	if (0 != status)
		rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
	if (old_head->trans_hist.total_blks > BLKS_PER_LMAP * 2)
	{
		/* recalculate start_vbn and free space, because there will be a gap after header */
		new_head->start_vbn = old_head->start_vbn + bufsize / DISK_BLOCK_SIZE;
		new_head->free_space = bufsize - (new_hdr_size_vbn - old_hdr_size_vbn) * DISK_BLOCK_SIZE;
		util_out_print("New starting VBN is: !SL !/", FLUSH, new_head->start_vbn);

		/* read 1st group of blocks */
		LSEEKREAD(fd, read_off, upgrd_buff[0], bufsize, status);
		if (0 != status)
			if (-1 == status)
				rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn);
			else
				rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
		read_off = read_off + bufsize;
		/* read 2nd group of blocks */
		LSEEKREAD(fd, read_off, upgrd_buff[1], bufsize, status);
		if (0 != status)
			if (-1 == status)
				rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn);
			else
				rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
		/* write 1st group of blocks in place of 2nd group */
		write_off = old_start_vbn_off + bufsize;
		LSEEKWRITE(fd, write_off, upgrd_buff[0], bufsize, status);
		if (0 != status)
			rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
		/* read last group (# of blks <= BLKS_PER_LMAP) */
		dsize = old_file_len2 - last_full_grp_startoff;
		assert (dsize <= bufsize);
		LSEEKREAD(fd, last_full_grp_startoff, upgrd_buff[0], dsize, status);
		if (0 != status)
			if (-1 == status)
				rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn);
			else
				rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
		/* write 2nd group of blocks */
		LSEEKWRITE(fd, last_full_grp_startoff, upgrd_buff[1], bufsize, status);
		if (0 != status)
			rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
		 /* write last group read from old file */
		LSEEKWRITE(fd, last_full_grp_startoff + bufsize, upgrd_buff[0], dsize, status);
		if (0 != status)
			rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
		util_out_print("Please wait while index is being adjusted...!/", FLUSH);
		mu_upgrd_adjust_blkptr(1L, TRUE, new_head, fd, fn, fn_len);
		mu_upgrd_adjust_mm(new_head->master_map, DIVIDE_ROUND_UP(new_head->trans_hist.total_blks+1,BLKS_PER_LMAP));
		/* writing header */
		LSEEKWRITE(fd, 0, new_head, new_hdr_size, status);
		if (0 != status)
			rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
	}
	else /* very small database */
	{
		rbno = 0;
		write_off = 0;

		datasize[rbno] = new_hdr_size;
		memcpy(upgrd_buff[0], new_head, new_hdr_size);
		rbno = INVERT(rbno);

		while(read_off < old_file_len2)
		{
			datasize[rbno] = MIN (old_file_len2 - read_off, bufsize);
			LSEEKREAD(fd, read_off, upgrd_buff[rbno], datasize[rbno], status);
			if (0 != status)
				if (-1 == status)
					rts_error(VARLSTCNT(4) ERR_DBPREMATEOF, 2, fn_len, fn);
				else
					rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
			read_off += datasize[rbno];
			rbno = INVERT(rbno);


			LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status);
			if (0 != status)
				rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
			write_off+= datasize[rbno];
		}
		rbno = INVERT(rbno);
		LSEEKWRITE(fd, write_off, upgrd_buff[rbno], datasize[rbno], status);
		if (0 != status)
			rts_error(VARLSTCNT(5) ERR_DBFILOPERR, 2, fn_len, fn, status);
	} /* end if small database */
	free(upgrd_buff[0]);
	free(upgrd_buff[1]);
	close(fd);
	util_out_print("File !AD successfully upgraded.!/", FLUSH, fn_len, fn);
	REVERT;
	if (0 != sem_rmid(upgrade_standalone_sems))
	{
		util_out_print("Error with sem_rmid : %d [0x%x]", TRUE, upgrade_standalone_sems, upgrade_standalone_sems);
		rts_error(VARLSTCNT(1) ERR_MUNOUPGRD);
	}
	mupip_exit(SS_NORMAL);
}
Ejemplo n.º 7
0
void mucregini(int4 blk_init_size)
{
	int4			status;
	int4			i;
	th_index_ptr_t 		th;
	collseq			*csp;
	uint4			ustatus;
	mstr 			jnlfile, jnldef, tmpjnlfile;
	time_t			ctime;

	MEMCPY_LIT(cs_data->label, GDS_LABEL);
	cs_data->desired_db_format = GDSVCURR;
	cs_data->fully_upgraded = TRUE;
	cs_data->db_got_to_v5_once = TRUE;	/* no V4 format blocks that are non-upgradeable */
	cs_data->minor_dbver = GDSMVCURR;
	cs_data->certified_for_upgrade_to = GDSVCURR;
	cs_data->creation_db_ver = GDSVCURR;
	cs_data->creation_mdb_ver = GDSMVCURR;
	cs_data->master_map_len = MASTER_MAP_SIZE_DFLT;
	cs_data->bplmap = BLKS_PER_LMAP;
	assert(BLK_SIZE <= MAX_DB_BLK_SIZE);
	cs_data->blk_size = BLK_SIZE;
	i = cs_data->trans_hist.total_blks;
	cs_data->trans_hist.free_blocks = i - DIVIDE_ROUND_UP(i, BLKS_PER_LMAP) - 2;
	cs_data->max_rec_size = gv_cur_region->max_rec_size;
	cs_data->max_key_size = gv_cur_region->max_key_size;
	cs_data->null_subs = gv_cur_region->null_subs;
	cs_data->std_null_coll = gv_cur_region->std_null_coll;
#ifdef UNIX
	cs_data->freeze_on_fail = gv_cur_region->freeze_on_fail;
	cs_data->mumps_can_bypass = gv_cur_region->mumps_can_bypass;
#endif
	cs_data->reserved_bytes = gv_cur_region->dyn.addr->reserved_bytes;
	cs_data->clustered = FALSE;
	cs_data->file_corrupt = 0;
	if (gv_cur_region->dyn.addr->lock_space)
		cs_data->lock_space_size = gv_cur_region->dyn.addr->lock_space * OS_PAGELET_SIZE;
	else
		cs_data->lock_space_size = DEF_LOCK_SIZE;
	cs_data->staleness[0] = -300000000;	/* staleness timer = 30 seconds */
	cs_data->staleness[1] = -1;
	cs_data->ccp_quantum_interval[0] = -20000000;	/* 2 sec */
	cs_data->ccp_quantum_interval[1] = -1;
	cs_data->ccp_response_interval[0] = -600000000;	/* 1 min */
	cs_data->ccp_response_interval[1] = -1;
	cs_data->ccp_tick_interval[0] = -1000000;	/* 1/10 sec */
	cs_data->ccp_tick_interval[1] = -1;
	cs_data->last_com_backup = 1;
	cs_data->last_inc_backup = 1;
	cs_data->last_rec_backup = 1;
	cs_data->defer_time = gv_cur_region->dyn.addr->defer_time;
	cs_data->jnl_alq = gv_cur_region->jnl_alq;
	if (cs_data->jnl_state && !cs_data->jnl_alq)
		cs_data->jnl_alq = JNL_ALLOC_DEF;
	cs_data->jnl_deq = gv_cur_region->jnl_deq;
	cs_data->jnl_before_image = gv_cur_region->jnl_before_image;
	cs_data->jnl_state = gv_cur_region->jnl_state;
	cs_data->epoch_interval = JNL_ALLOWED(cs_data) ? DEFAULT_EPOCH_INTERVAL : 0;
	cs_data->alignsize = JNL_ALLOWED(cs_data) ? (DISK_BLOCK_SIZE * JNL_DEF_ALIGNSIZE) : 0;
	ROUND_UP_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, gv_cur_region->jnl_buffer_size, cs_data);
#ifdef UNIX
	if (JNL_ALLOWED(cs_data))
	{
		if (cs_data->jnl_alq + cs_data->jnl_deq > gv_cur_region->jnl_autoswitchlimit)
		{
			cs_data->autoswitchlimit = gv_cur_region->jnl_autoswitchlimit;
			cs_data->jnl_alq = cs_data->autoswitchlimit;
		} else
			cs_data->autoswitchlimit = ALIGNED_ROUND_DOWN(gv_cur_region->jnl_autoswitchlimit,
							cs_data->jnl_alq, cs_data->jnl_deq);
	}
	else
		cs_data->autoswitchlimit = 0;
	assert(!(MAX_IO_BLOCK_SIZE % DISK_BLOCK_SIZE));
	if (cs_data->jnl_alq + cs_data->jnl_deq > cs_data->autoswitchlimit)
		cs_data->jnl_alq = cs_data->autoswitchlimit;
#else
	cs_data->autoswitchlimit = JNL_ALLOWED(cs_data) ? ALIGNED_ROUND_DOWN(JNL_ALLOC_MAX, cs_data->jnl_alq, cs_data->jnl_deq) : 0;
#endif
	if (!cs_data->jnl_buffer_size)
		ROUND_UP_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, JNL_BUFFER_DEF, cs_data);
	if (JNL_ALLOWED(cs_data))
		if (cs_data->jnl_buffer_size < JNL_BUFF_PORT_MIN(cs_data))
		{
			ROUND_UP_MIN_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, cs_data);
		} else if (cs_data->jnl_buffer_size > JNL_BUFFER_MAX)
		{
			ROUND_DOWN_MAX_JNL_BUFF_SIZE(cs_data->jnl_buffer_size, cs_data);
		}
	cs_data->def_coll = gv_cur_region->def_coll;
	if (cs_data->def_coll)
	{
		if (csp = ready_collseq((int)(cs_data->def_coll)))
		{
			cs_data->def_coll_ver = (csp->version)(cs_data->def_coll);
			if (!do_verify(csp, cs_data->def_coll, cs_data->def_coll_ver))
			{
				gtm_putmsg(VARLSTCNT(4) ERR_COLLTYPVERSION, 2, cs_data->def_coll, cs_data->def_coll_ver);
				mupip_exit(ERR_MUNOACTION);
			}
		} else
		{
			gtm_putmsg(VARLSTCNT(3) ERR_COLLATIONUNDEF, 1, cs_data->def_coll);
			mupip_exit(ERR_MUNOACTION);
		}
	}
	/* mupip_set_journal() relies on cs_data->jnl_file_len being 0 if cs_data->jnl_state is jnl_notallowed.
	 * Note that even though gv_cur_region->jnl_state is jnl_notallowed, gv_cur_region->jnl_file_len can be non-zero
	 */
	cs_data->jnl_file_len = JNL_ALLOWED(cs_data) ? gv_cur_region->jnl_file_len : 0;
	cs_data->reg_seqno = 1;
	VMS_ONLY(
		cs_data->resync_seqno = 1;
		cs_data->old_resync_seqno = 1;
		cs_data->resync_tn = 1;
	)
Ejemplo n.º 8
0
int gtmsource()
{
	int			status, log_init_status, waitpid_res, save_errno;
	char			print_msg[1024], tmpmsg[1024];
	gd_region		*reg, *region_top;
	sgmnt_addrs		*csa, *repl_csa;
	boolean_t		all_files_open, isalive;
	pid_t			pid, ppid, procgp;
	seq_num			read_jnl_seqno, jnl_seqno;
	unix_db_info		*udi;
	gtmsource_local_ptr_t	gtmsource_local;
	boolean_t		this_side_std_null_coll;
	int			null_fd, rc;

	memset((uchar_ptr_t)&jnlpool, 0, SIZEOF(jnlpool_addrs));
	call_on_signal = gtmsource_sigstop;
	ESTABLISH_RET(gtmsource_ch, SS_NORMAL);
	if (-1 == gtmsource_get_opt())
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_MUPCLIERR);
	if (gtmsource_options.shut_down)
	{	/* Wait till shutdown time nears even before going to "jnlpool_init". This is because the latter will return
		 * with the ftok semaphore and access semaphore held and we do not want to be holding those locks (while
		 * waiting for the user specified timeout to expire) as that will affect new GTM processes and/or other
		 * MUPIP REPLIC commands that need these locks for their function.
		 */
		if (0 < gtmsource_options.shutdown_time)
		{
			repl_log(stdout, TRUE, TRUE, "Waiting for %d seconds before signalling shutdown\n",
												gtmsource_options.shutdown_time);
			LONG_SLEEP(gtmsource_options.shutdown_time);
		} else
			repl_log(stdout, TRUE, TRUE, "Signalling shutdown immediate\n");
	} else if (gtmsource_options.start)
	{
		repl_log(stdout, TRUE, TRUE, "Initiating START of source server for secondary instance [%s]\n",
			gtmsource_options.secondary_instname);
	}
	if (gtmsource_options.activate && (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary))
	{	/* MUPIP REPLIC -SOURCE -ACTIVATE -UPDOK has been specified. We need to open the gld and db regions now
		 * in case this is a secondary -> primary transition. This is so we can later switch journal files in all
		 * journaled regions when the transition actually happens inside "gtmsource_rootprimary_init". But since
		 * we have not yet done a "jnlpool_init", we dont know if updates are disabled in it or not. Although we
		 * need to do the gld/db open only if updates are currently disabled in the jnlpool, we do this always
		 * because once we do a jnlpool_init, we will come back with the ftok on the jnlpool held and that has
		 * issues with later db open since we will try to hold the db ftok as part of db open and the ftok logic
		 * currently has assumptions that a process holds only one ftok at any point in time.
		 */
		assert(NULL == gd_header);
		gvinit();
		all_files_open = region_init(FALSE);
		if (!all_files_open)
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	jnlpool_init(GTMSOURCE, gtmsource_options.start, &is_jnlpool_creator);
	/* is_jnlpool_creator == TRUE ==> this process created the journal pool
	 * is_jnlpool_creator == FALSE ==> journal pool already existed and this process simply attached to it.
	 */
	if (gtmsource_options.shut_down)
		gtmsource_exit(gtmsource_shutdown(FALSE, NORMAL_SHUTDOWN) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.activate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_ACTIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.deactivate)
		gtmsource_exit(gtmsource_mode_change(GTMSOURCE_MODE_PASSIVE_REQUESTED) - NORMAL_SHUTDOWN);
	else if (gtmsource_options.checkhealth)
		gtmsource_exit(gtmsource_checkhealth() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.changelog)
		 gtmsource_exit(gtmsource_changelog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showbacklog)
		gtmsource_exit(gtmsource_showbacklog() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.stopsourcefilter)
		gtmsource_exit(gtmsource_stopfilter() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.jnlpool)
		gtmsource_exit(gtmsource_jnlpool() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.losttncomplete)
		gtmsource_exit(gtmsource_losttncomplete() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.needrestart)
		gtmsource_exit(gtmsource_needrestart() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.showfreeze)
		gtmsource_exit(gtmsource_showfreeze() - NORMAL_SHUTDOWN);
	else if (gtmsource_options.setfreeze)
		gtmsource_exit(gtmsource_setfreeze() - NORMAL_SHUTDOWN);
	else if (!gtmsource_options.start)
	{
		assert(CLI_PRESENT == cli_present("STATSLOG"));
		gtmsource_exit(gtmsource_statslog() - NORMAL_SHUTDOWN);
	}
	assert(gtmsource_options.start);
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* Set "child_server_running" to FALSE before forking off child. Wait for it to be set to TRUE by the child. */
	gtmsource_local = jnlpool.gtmsource_local;
	gtmsource_local->child_server_running = FALSE;
	FORK(pid);
	if (0 > pid)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
			ERR_TEXT, 2, RTS_ERROR_LITERAL("Could not fork source server"), save_errno);
	} else if (0 < pid)
	{	/* Parent. Wait until child sets "child_server_running" to FALSE. That is an indication that the child
		 * source server has completed its initialization phase and is all set so the parent command can return.
		 */
		while (isalive = is_proc_alive(pid, 0))	/* note : intended assignment */
		{
			if (gtmsource_local->child_server_running)
				break;
			/* To take care of reassignment of PIDs, the while condition should be && with the condition
			 * (PPID of pid == process_id)
			 */
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_SRV_START);
			WAITPID(pid, &status, WNOHANG, waitpid_res); /* Release defunct child if dead */
		}
		if (isalive)
		{	/* Child process is alive and started with no issues */
			if (0 != (save_errno = rel_sem(SOURCE, JNL_POOL_ACCESS_SEM)))
				rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0,
					ERR_TEXT, 2, RTS_ERROR_LITERAL("Error in rel_sem"), save_errno);
			ftok_sem_release(jnlpool.jnlpool_dummy_reg, TRUE, TRUE);
		} else
		{	/* Child source server process errored out at startup and is no longer alive.
			 * If we were the one who created the journal pool, let us clean it up.
			 */
			repl_log(stdout, TRUE, TRUE, "Source server startup failed. See source server log file\n");
			if (is_jnlpool_creator)
				status = gtmsource_shutdown(TRUE, NORMAL_SHUTDOWN);
		}
		/* If the parent is killed (or crashes) between the fork and exit, checkhealth may not detect that startup
		 * is in progress - parent forks and dies, the system will release sem 0 and 1, checkhealth might test the
		 * value of sem 1 before the child grabs sem 1.
		 */
		gtmsource_exit(isalive ? SRV_ALIVE : SRV_ERR);
	}
	/* Point stdin to /dev/null */
	OPENFILE("/dev/null", O_RDONLY, null_fd);
	if (0 > null_fd)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to open /dev/null for read"), errno, 0);
	FCNTL3(null_fd, F_DUPFD, 0, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to set stdin to /dev/null"), errno, 0);
	CLOSEFILE(null_fd, rc);
	if (0 > rc)
		rts_error_csa(CSA_ARG(NULL) ERR_REPLERR, RTS_ERROR_LITERAL("Failed to close /dev/null"), errno, 0);
	/* The parent process (source server startup command) will be holding the ftok semaphore and jnlpool access semaphore
	 * at this point. The variables that indicate this would have been copied over to the child during the fork. This will
	 * make the child think it is actually holding them as well when actually it is not. Reset those variables in the child
	 * to ensure they do not misrepresent the holder of those semaphores.
	 */
	ftok_sem_reg = NULL;
	udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
	assert(udi->grabbed_ftok_sem);
	udi->grabbed_ftok_sem = FALSE;
	assert(holds_sem[SOURCE][JNL_POOL_ACCESS_SEM]);
	holds_sem[SOURCE][JNL_POOL_ACCESS_SEM] = FALSE;
	assert(!holds_sem[SOURCE][SRC_SERV_COUNT_SEM]);
	/* Start child source server initialization */
	is_src_server = TRUE;
	OPERATOR_LOG_MSG;
	process_id = getpid();
	/* Reinvoke secshr related initialization with the child's pid */
	INVOKE_INIT_SECSHR_ADDRS;
	/* Initialize mutex socket, memory semaphore etc. before any "grab_lock" is done by this process on the journal pool.
	 * Note that the initialization would already have been done by the parent receiver startup command but we need to
	 * redo the initialization with the child process id.
	 */
	assert(mutex_per_process_init_pid && (mutex_per_process_init_pid != process_id));
	mutex_per_process_init();
	START_HEARTBEAT_IF_NEEDED;
	ppid = getppid();
	log_init_status = repl_log_init(REPL_GENERAL_LOG, &gtmsource_log_fd, gtmsource_options.log_file);
	assert(SS_NORMAL == log_init_status);
	repl_log_fd2fp(&gtmsource_log_fp, gtmsource_log_fd);
	if (-1 == (procgp = setsid()))
		send_msg_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				RTS_ERROR_LITERAL("Source server error in setsid"), errno);
#	endif /* REPL_DEBUG_NOBACKGROUND */
	if (ZLIB_CMPLVL_NONE != gtm_zlib_cmp_level)
		gtm_zlib_init();	/* Open zlib shared library for compression/decompression */
	REPL_DPRINT1("Setting up regions\n");
	gvinit();

	/* We use the same code dse uses to open all regions but we must make sure they are all open before proceeding. */
	all_files_open = region_init(FALSE);
	if (!all_files_open)
	{
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NOTALLDBOPN);
		gtmsource_exit(ABNORMAL_SHUTDOWN);
	}
	/* Determine primary side null subscripts collation order */
	/* Also check whether all regions have same null collation order */
	this_side_std_null_coll = -1;
	for (reg = gd_header->regions, region_top = gd_header->regions + gd_header->n_regions; reg < region_top; reg++)
	{
		csa = &FILE_INFO(reg)->s_addrs;
		if (this_side_std_null_coll != csa->hdr->std_null_coll)
		{
			if (-1 == this_side_std_null_coll)
				this_side_std_null_coll = csa->hdr->std_null_coll;
			else
			{
				gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_NULLCOLLDIFF);
				gtmsource_exit(ABNORMAL_SHUTDOWN);
			}
		}
		if (!REPL_ALLOWED(csa) && JNL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(4) ERR_REPLOFFJNLON, 2, DB_LEN_STR(reg));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		if (reg->read_only && REPL_ALLOWED(csa))
		{
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
				   RTS_ERROR_LITERAL("Source Server does not have write permissions to one or "
					             "more database files that are replicated"));
			gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
	}
	/* Initialize source server alive/dead state related fields in "gtmsource_local" before the ftok semaphore is released */
	gtmsource_local->gtmsource_pid = process_id;
	gtmsource_local->gtmsource_state = GTMSOURCE_START;
	if (is_jnlpool_creator)
	{
		DEBUG_ONLY(jnlpool.jnlpool_ctl->jnlpool_creator_pid = process_id);
		gtmsource_seqno_init(this_side_std_null_coll);
		if (ROOTPRIMARY_SPECIFIED == gtmsource_options.rootprimary)
		{	/* Created the journal pool as a root primary. Append a history record to the replication instance file.
			 * Invoke the function "gtmsource_rootprimary_init" to do that.
			 */
			gtmsource_rootprimary_init(jnlpool.jnlpool_ctl->jnl_seqno);
		}
	}
	/* after this point we can no longer have the case where all the regions are unreplicated/non-journaled. */
#	ifndef REPL_DEBUG_NOBACKGROUND
	/* It is necessary for every process that is using the ftok semaphore to increment the counter by 1. This is used
	 * by the last process that shuts down to delete the ftok semaphore when it notices the counter to be 0.
	 * Note that the parent source server startup command would have done an increment of the ftok counter semaphore
	 * for the replication instance file. But the source server process (the child) that comes here would not have done
	 * that. Do that while the parent is still holding on to the ftok semaphore waiting for our okay.
	 */
	if (!ftok_sem_incrcnt(jnlpool.jnlpool_dummy_reg))
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_JNLPOOLSETUP);
	/* Increment the source server count semaphore */
	status = incr_sem(SOURCE, SRC_SERV_COUNT_SEM);
	if (0 != status)
	{
		save_errno = errno;
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Counter semaphore increment failure in child source server"), save_errno);
	}
#	else
	if (0 != (save_errno = rel_sem_immediate(SOURCE, JNL_POOL_ACCESS_SEM)))
	{
		rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_JNLPOOLSETUP, 0, ERR_TEXT, 2,
			RTS_ERROR_LITERAL("Error in rel_sem_immediate"), save_errno);
	}
#	endif /* REPL_DEBUG_NOBACKGROUND */

	gtmsource_srv_count++;
	gtmsource_local->child_server_running = TRUE;	/* At this point, the parent startup command will stop waiting for child */
	gtm_event_log_init();
	/* Log source server startup command line first */
	SPRINTF(tmpmsg, "%s %s\n", cli_lex_in_ptr->argv[0], cli_lex_in_ptr->in_str);
	repl_log(gtmsource_log_fp, TRUE, TRUE, tmpmsg);

	SPRINTF(tmpmsg, "GTM Replication Source Server with Pid [%d] started for Secondary Instance [%s]",
		process_id, gtmsource_local->secondary_instname);
	sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
	repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	if (is_jnlpool_creator)
	{
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Created jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	} else
		repl_log(gtmsource_log_fp, TRUE, TRUE, "Attached to existing jnlpool with shmid = [%d] and semid = [%d]\n",
			jnlpool.repl_inst_filehdr->jnlpool_shmid, jnlpool.repl_inst_filehdr->jnlpool_semid);
	gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
#	ifdef GTM_TLS
	if (REPL_TLS_REQUESTED)
	{
		repl_do_tls_init(gtmsource_log_fp);
		assert(REPL_TLS_REQUESTED || PLAINTEXT_FALLBACK);
	}
#	endif
	if (jnlpool.jnlpool_ctl->freeze)
	{
		last_seen_freeze_flag = jnlpool.jnlpool_ctl->freeze;
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFROZEN, 1, jnlpool.repl_inst_filehdr->inst_info.this_instname);
		repl_log(gtmsource_log_fp, TRUE, FALSE, print_msg);
		sgtm_putmsg(print_msg, VARLSTCNT(3) ERR_REPLINSTFREEZECOMMENT, 1, jnlpool.jnlpool_ctl->freeze_comment);
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
	}
	gtmsource_local->jnlfileonly = gtmsource_options.jnlfileonly;
	do
	{ 	/* If mode is passive, go to sleep. Wakeup every now and then and check to see if I have to become active. */
		gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_START;
		if ((gtmsource_local->mode == GTMSOURCE_MODE_PASSIVE) && (gtmsource_local->shutdown == NO_SHUTDOWN))
		{
			gtmsource_poll_actions(FALSE);
			SHORT_SLEEP(GTMSOURCE_WAIT_FOR_MODE_CHANGE);
			continue;
		}
		if (GTMSOURCE_MODE_PASSIVE == gtmsource_local->mode)
		{	/* Shutdown initiated */
			assert(gtmsource_local->shutdown == SHUTDOWN);
			sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2,
				    RTS_ERROR_LITERAL("GTM Replication Source Server Shutdown signalled"));
			repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
			gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
			break;
		}
		gtmsource_poll_actions(FALSE);
		if (GTMSOURCE_CHANGING_MODE == gtmsource_state)
			continue;
		if (GTMSOURCE_MODE_ACTIVE_REQUESTED == gtmsource_local->mode)
			gtmsource_local->mode = GTMSOURCE_MODE_ACTIVE;
		SPRINTF(tmpmsg, "GTM Replication Source Server now in ACTIVE mode using port %d", gtmsource_local->secondary_port);
		sgtm_putmsg(print_msg, VARLSTCNT(4) ERR_REPLINFO, 2, LEN_AND_STR(tmpmsg));
		repl_log(gtmsource_log_fp, TRUE, TRUE, print_msg);
		gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "REPLINFO", print_msg);
		DEBUG_ONLY(repl_csa = &FILE_INFO(jnlpool.jnlpool_dummy_reg)->s_addrs;)
		assert(!repl_csa->hold_onto_crit);	/* so it is ok to invoke "grab_lock" and "rel_lock" unconditionally */
		grab_lock(jnlpool.jnlpool_dummy_reg, TRUE, HANDLE_CONCUR_ONLINE_ROLLBACK);
		if (GTMSOURCE_HANDLE_ONLN_RLBK == gtmsource_state)
		{
			repl_log(gtmsource_log_fp, TRUE, TRUE, "Starting afresh due to ONLINE ROLLBACK\n");
			repl_log(gtmsource_log_fp, TRUE, TRUE, "REPL INFO - Current Jnlpool Seqno : %llu\n",
					jnlpool.jnlpool_ctl->jnl_seqno);
			continue;
		}
		QWASSIGN(gtmsource_local->read_addr, jnlpool.jnlpool_ctl->write_addr);
		gtmsource_local->read = jnlpool.jnlpool_ctl->write;
		gtmsource_local->read_state = gtmsource_local->jnlfileonly ? READ_FILE : READ_POOL;
		read_jnl_seqno = gtmsource_local->read_jnl_seqno;
		assert(read_jnl_seqno <= jnlpool.jnlpool_ctl->jnl_seqno);
		if (read_jnl_seqno < jnlpool.jnlpool_ctl->jnl_seqno)
		{
			gtmsource_local->read_state = READ_FILE;
			QWASSIGN(gtmsource_save_read_jnl_seqno, jnlpool.jnlpool_ctl->jnl_seqno);
			gtmsource_pool2file_transition = TRUE; /* so that we read the latest gener jnl files */
		}
		rel_lock(jnlpool.jnlpool_dummy_reg);
		if (SS_NORMAL != (status = gtmsource_alloc_tcombuff()))
			rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
				  RTS_ERROR_LITERAL("Error allocating initial tcom buffer space. Malloc error"), status);
		gtmsource_filter = NO_FILTER;
		if ('\0' != gtmsource_local->filter_cmd[0])
		{
			if (SS_NORMAL == (status = repl_filter_init(gtmsource_local->filter_cmd)))
				gtmsource_filter |= EXTERNAL_FILTER;
			else
				gtmsource_exit(ABNORMAL_SHUTDOWN);
		}
		gtmsource_process();
		/* gtmsource_process returns only when mode needs to be changed to PASSIVE */
		assert(gtmsource_state == GTMSOURCE_CHANGING_MODE);
		gtmsource_ctl_close();
		gtmsource_free_msgbuff();
		gtmsource_free_tcombuff();
		gtmsource_free_filter_buff();
		gtmsource_stop_heartbeat();
		if (FD_INVALID != gtmsource_sock_fd)
			repl_close(&gtmsource_sock_fd);
		if (gtmsource_filter & EXTERNAL_FILTER)
			repl_stop_filter();
	} while (TRUE);
Ejemplo n.º 9
0
bool gtcmtr_put(void)
{
	cm_region_list	*reg_ref;
	mval		v;
	unsigned char	buff[MAX_ZWR_KEY_SZ], *end;
	unsigned char	*ptr, regnum;
	short		n;
	unsigned short	top, len;
	static readonly gds_file_id file;

	error_def(ERR_KEY2BIG);
	error_def(ERR_REC2BIG);
	error_def(ERR_GVIS);
	error_def(ERR_DBPRIVERR);

	ptr = curr_entry->clb_ptr->mbf;
	assert(*ptr == CMMS_Q_PUT);
	ptr++;
	GET_USHORT(len, ptr);
	ptr += SIZEOF(unsigned short);
	regnum = *ptr++;
	reg_ref = gtcm_find_region(curr_entry,regnum);
	len--; /* subtract size of regnum */
	CM_GET_GVCURRKEY(ptr, len);
	gtcm_bind_name(reg_ref->reghead, TRUE);
	if (gv_cur_region->read_only)
		rts_error(VARLSTCNT(4) ERR_DBPRIVERR, 2, DB_LEN_STR(gv_cur_region));
	if (JNL_ALLOWED(cs_addrs))
	{	/* we need to copy client's specific prc_vec into the global variable in order that the gvcst* routines
		 *	do the right job. actually we need to do this only if JNL_ENABLED(cs_addrs), but since it is not
		 *	easy to re-execute the following two assignments in case gvcst_put()'s call to t_end() encounters a
		 *	cdb_sc_jnlstatemod retry code, we choose the easier approach of executing the following segment
		 *	if JNL_ALLOWED(cs_addrs) is TRUE instead of checking for JNL_ENABLED(cs_addrs) to be TRUE.
		 * this approach has the overhead that we will be doing the following assignments even though JNL_ENABLED
		 * 	might not be TRUE but since the following two are just pointer copies, it is not considered a big overhead.
		 * this approach ensures that the jnl_put_jrt_pini() gets the appropriate prc_vec for writing into the
		 * 	journal record in case JNL_ENABLED turns out to be TRUE in t_end() time.
		 * note that the value of JNL_ALLOWED(cs_addrs) cannot be changed on the fly without obtaining standalone access
		 * 	and hence the correctness of prc_vec (whenever it turns out necessary) is guaranteed.
		 */
		originator_prc_vec = curr_entry->pvec;
		cs_addrs->jnl->pini_addr = reg_ref->pini_addr;
	}
	GET_USHORT(len, ptr);
	ptr += SIZEOF(unsigned short);
	v.mvtype = MV_STR;
	v.str.len = len;
	v.str.addr = (char *)ptr;
	if ((n = gv_currkey->end + 1) > gv_cur_region->max_key_size)
	{
		if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0)
			end = &buff[MAX_ZWR_KEY_SZ - 1];
		rts_error(VARLSTCNT(11) ERR_KEY2BIG, 4, n, (int4)gv_cur_region->max_key_size,
			REG_LEN_STR(gv_cur_region), 0, ERR_GVIS, 2, end - buff, buff);
	}
	if (n + v.str.len + SIZEOF(rec_hdr) > gv_cur_region->max_rec_size)
	{
		if ((end = format_targ_key(&buff[0], MAX_ZWR_KEY_SZ, gv_currkey, TRUE)) == 0)
			end = &buff[MAX_ZWR_KEY_SZ - 1];
		rts_error(VARLSTCNT(10) ERR_REC2BIG, 4, n + v.str.len + SIZEOF(rec_hdr),
			(int4)gv_cur_region->max_rec_size, REG_LEN_STR(gv_cur_region),
			ERR_GVIS, 2, end - buff, buff);
	}
	gvcst_put(&v);
	if (JNL_ALLOWED(cs_addrs))
		reg_ref->pini_addr = cs_addrs->jnl->pini_addr; /* In case  journal switch occurred */
	ptr = curr_entry->clb_ptr->mbf;
	*ptr++ = CMMS_R_PUT;
	curr_entry->clb_ptr->cbl = S_HDRSIZE;
	return TRUE;
}