Example #1
0
asmlinkage int sunos_shmsys(int op, unsigned long arg1, unsigned long arg2,
                            unsigned long arg3)
{
    unsigned long raddr;
    int rval;

    switch(op) {
    case 0:
        /* do_shmat(): attach a shared memory area */
        rval = do_shmat((int)arg1,(char __user *)arg2,(int)arg3,&raddr);
        if (!rval)
            rval = (int) raddr;
        break;
    case 1:
        /* sys_shmctl(): modify shared memory area attr. */
        rval = sys_shmctl((int)arg1,(int)arg2,(struct shmid_ds __user *)arg3);
        break;
    case 2:
        /* sys_shmdt(): detach a shared memory area */
        rval = sys_shmdt((char __user *)arg1);
        break;
    case 3:
        /* sys_shmget(): get a shared memory area */
        rval = sys_shmget((key_t)arg1,(int)arg2,(int)arg3);
        break;
    default:
        rval = -EINVAL;
        break;
    };
    return rval;
}
Example #2
0
asmlinkage long xtensa_shmat(int shmid, char __user *shmaddr, int shmflg)
{
	unsigned long ret;
	long err;

	err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
	if (err)
		return err;
	return (long)ret;
}
Example #3
0
long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg)
{
	unsigned long ret;
	long err;

	err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret);
	if (err)
		return err;
	force_successful_syscall_return();
	return (long)ret;
}
Example #4
0
/*
 * Native ABI that is O32 or N64 version
 */
asmlinkage long sys_shmat(int shmid, char __user *shmaddr,
                          int shmflg, unsigned long *addr)
{
	unsigned long raddr;
	int err;

	err = do_shmat(shmid, shmaddr, shmflg, &raddr);
	if (err)
		return err;

	return put_user(raddr, addr);
}
Example #5
0
asmlinkage unsigned long
ia64_shmat (int shmid, void __user *shmaddr, int shmflg)
{
	unsigned long raddr;
	int retval;

	retval = do_shmat(shmid, shmaddr, shmflg, &raddr);
	if (retval < 0)
		return retval;

	force_successful_syscall_return();
	return raddr;
}
Example #6
0
long compat_sys_shmat(int first, int second, compat_uptr_t third, int version,
			void __user *uptr)
{
	int err;
	unsigned long raddr;
	compat_ulong_t __user *uaddr;

	if (version == 1)
		return -EINVAL;
	err = do_shmat(first, uptr, second, &raddr);
	if (err < 0)
		return err;
	uaddr = compat_ptr(third);
	return put_user(raddr, uaddr);
}
Example #7
0
asmlinkage int sunos_shmsys(int op, u32 arg1, u32 arg2, u32 arg3)
{
	struct shmid_ds ksds;
	unsigned long raddr;
	mm_segment_t old_fs = get_fs();
	int rval;

	switch(op) {
	case 0:
		/* do_shmat(): attach a shared memory area */
		rval = do_shmat((int)arg1,(char __user *)(unsigned long)arg2,(int)arg3,&raddr);
		if (!rval)
			rval = (int) raddr;
		break;
	case 1:
		/* sys_shmctl(): modify shared memory area attr. */
		if (!sunos_shmid_get((struct shmid_ds32 __user *)(unsigned long)arg3, &ksds)) {
			set_fs(KERNEL_DS);
			rval = sys_shmctl((int) arg1,(int) arg2,
					  (struct shmid_ds __user *) &ksds);
			set_fs(old_fs);
			if (!rval)
				rval = sunos_shmid_put((struct shmid_ds32 __user *)(unsigned long)arg3,
						       &ksds);
		} else
			rval = -EFAULT;
		break;
	case 2:
		/* sys_shmdt(): detach a shared memory area */
		rval = sys_shmdt((char __user *)(unsigned long)arg1);
		break;
	case 3:
		/* sys_shmget(): get a shared memory area */
		rval = sys_shmget((key_t)arg1,(int)arg2,(int)arg3);
		break;
	default:
		rval = -EINVAL;
		break;
	};
	return rval;
}
int 	mu_rndwn_replpool(replpool_identifier *replpool_id, repl_inst_hdr_ptr_t repl_inst_filehdr, int shm_id, boolean_t *ipc_rmvd)
{
	int			semval, status, save_errno, nattch;
	char			*instfilename, pool_type;
	sm_uc_ptr_t		start_addr;
	struct shmid_ds		shm_buf;
	unix_db_info		*udi;
	sgmnt_addrs		*csa;
	boolean_t		anticipatory_freeze_available, force_attach;
	DCL_THREADGBL_ACCESS;

	SETUP_THREADGBL_ACCESS;
	assert(INVALID_SHMID != shm_id);
	instfilename = replpool_id->instfilename;
	pool_type = replpool_id->pool_type;
	assert((JNLPOOL_SEGMENT == pool_type) || (RECVPOOL_SEGMENT == pool_type));
	anticipatory_freeze_available = ANTICIPATORY_FREEZE_AVAILABLE;
	force_attach = (jgbl.onlnrlbk || (!jgbl.mur_rollback && !argumentless_rundown && anticipatory_freeze_available));
	if (-1 == shmctl(shm_id, IPC_STAT, &shm_buf))
	{
		save_errno = errno;
		ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shmctl()");
	}
	nattch = shm_buf.shm_nattch;
	if ((0 != nattch) && !force_attach)
	{
		util_out_print("Replpool segment (id = !UL) for replication instance !AD is in use by another process.",
				TRUE, shm_id, LEN_AND_STR(instfilename));
		return -1;
	}
	if (-1 == (sm_long_t)(start_addr = (sm_uc_ptr_t) do_shmat(shm_id, 0, 0)))
	{
		save_errno = errno;
		ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shmat()");
	}
	ESTABLISH_RET(mu_rndwn_replpool_ch, -1);
	/* assert that the identifiers are at the top of replpool control structure */
	assert(0 == offsetof(jnlpool_ctl_struct, jnlpool_id));
	assert(0 == offsetof(recvpool_ctl_struct, recvpool_id));
	memcpy((void *)replpool_id, (void *)start_addr, SIZEOF(replpool_identifier));
	if (memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 1))
	{
		if (!memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 3))
			util_out_print(
				"Incorrect version for the replpool segment (id = !UL) belonging to replication instance !AD",
					TRUE, shm_id, LEN_AND_STR(instfilename));
		else
			util_out_print("Incorrect replpool format for the segment (id = !UL) belonging to replication instance !AD",
					TRUE, shm_id, LEN_AND_STR(instfilename));
		DETACH_AND_RETURN(start_addr, shm_id, instfilename);
	}
	if (memcmp(replpool_id->now_running, gtm_release_name, gtm_release_name_len + 1))
	{
		util_out_print("Attempt to access with version !AD, while already using !AD for replpool segment (id = !UL)"
				" belonging to replication instance !AD.", TRUE, gtm_release_name_len, gtm_release_name,
				LEN_AND_STR(replpool_id->now_running), shm_id, LEN_AND_STR(instfilename));
		DETACH_AND_RETURN(start_addr, shm_id, instfilename);
	}
	/* Assert that if we haven't yet attached to the journal pool yet, we have the corresponding global vars set to NULL */
	assert((JNLPOOL_SEGMENT != pool_type) || ((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl)));
	if (JNLPOOL_SEGMENT == pool_type)
	{	/* Initialize variables to simulate a "jnlpool_init". This is required by "repl_inst_flush_jnlpool" called below */
		jnlpool_ctl = jnlpool.jnlpool_ctl = (jnlpool_ctl_ptr_t)start_addr;
		assert(NULL != jnlpool.jnlpool_dummy_reg);
		udi = FILE_INFO(jnlpool.jnlpool_dummy_reg);
		csa = &udi->s_addrs;
		csa->critical = (mutex_struct_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl + JNLPOOL_CTL_SIZE);
		csa->nl = (node_local_ptr_t)((sm_uc_ptr_t)csa->critical + CRIT_SPACE + SIZEOF(mutex_spin_parms_struct));
		/* secshr_db_clnup uses this relationship */
		assert(jnlpool.jnlpool_ctl->filehdr_off);
		assert(jnlpool.jnlpool_ctl->srclcl_array_off > jnlpool.jnlpool_ctl->filehdr_off);
		assert(jnlpool.jnlpool_ctl->sourcelocal_array_off > jnlpool.jnlpool_ctl->srclcl_array_off);
		/* Initialize "jnlpool.repl_inst_filehdr" and related fields as "repl_inst_flush_jnlpool" relies on that */
		jnlpool.repl_inst_filehdr = (repl_inst_hdr_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl
									+ jnlpool.jnlpool_ctl->filehdr_off);
		jnlpool.gtmsrc_lcl_array = (gtmsrc_lcl_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl
									+ jnlpool.jnlpool_ctl->srclcl_array_off);
		jnlpool.gtmsource_local_array = (gtmsource_local_ptr_t)((sm_uc_ptr_t)jnlpool.jnlpool_ctl
										+ jnlpool.jnlpool_ctl->sourcelocal_array_off);
		if (0 == nattch)
		{	/* No one attached. So, we can safely flush the journal pool so that the gtmsrc_lcl structures in the
			 * jnlpool and disk are in sync with each other. More importantly we are about to remove the jnlpool
			 * so we better get things in sync before that. If anticipatory freeze scheme is in effect, then we
			 * need to keep the journal pool up and running. So, don't reset the crash field in the instance file
			 * header (dictated by the second parameter to repl_inst_flush_jnlpool below).
			 * Note:
			 * If mu_rndwn_repl_instance created new semaphores (in mu_replpool_remove_sem), we need to flush those
			 * to the instance file as well. So, override the jnlpool_semid and jnlpool_semid_ctime with the new
			 * values.
			 */
			assert((INVALID_SEMID != repl_inst_filehdr->jnlpool_semid)
					&& (0 != repl_inst_filehdr->jnlpool_semid_ctime));
			jnlpool.repl_inst_filehdr->jnlpool_semid = repl_inst_filehdr->jnlpool_semid;
			jnlpool.repl_inst_filehdr->jnlpool_semid_ctime = repl_inst_filehdr->jnlpool_semid_ctime;
			repl_inst_flush_jnlpool(FALSE, !anticipatory_freeze_available);
			assert(!jnlpool.repl_inst_filehdr->crash || anticipatory_freeze_available);
			/* Refresh local copy (repl_inst_filehdr) with the copy that was just flushed (jnlpool.repl_inst_filehdr) */
			memcpy(repl_inst_filehdr, jnlpool.repl_inst_filehdr, SIZEOF(repl_inst_hdr));
			if (!anticipatory_freeze_available)
			{ 	/* Now that jnlpool has been flushed and there is going to be no journal pool, reset
				 * "jnlpool.repl_inst_filehdr" as otherwise other routines (e.g. "repl_inst_recvpool_reset") are
				 * affected by whether this is NULL or not.
				 */
				jnlpool.jnlpool_ctl = NULL;
				jnlpool_ctl = NULL;
				jnlpool.gtmsrc_lcl_array = NULL;
				jnlpool.gtmsource_local_array = NULL;
				jnlpool.jnldata_base = NULL;
				jnlpool.repl_inst_filehdr = NULL;
			}
		} /* else we are ONLINE ROLLBACK. repl_inst_flush_jnlpool will be done later after gvcst_init in mur_open_files */
	}
	if ((0 == nattch) && (!anticipatory_freeze_available || (RECVPOOL_SEGMENT == pool_type)))
	{
		if (-1 == shmdt((caddr_t)start_addr))
		{
			save_errno = errno;
			ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shmdt()");
		}
		if (0 != shm_rmid(shm_id))
		{
			save_errno = errno;
			ISSUE_REPLPOOLINST_AND_RETURN(save_errno, shm_id, instfilename, "shm_rmid()");
		}
		if (JNLPOOL_SEGMENT == pool_type)
		{
			repl_inst_filehdr->jnlpool_shmid = INVALID_SHMID;
			repl_inst_filehdr->jnlpool_shmid_ctime = 0;
			assert((NULL == jnlpool.jnlpool_ctl) && (NULL == jnlpool_ctl));
			*ipc_rmvd = TRUE;
		} else
		{
			repl_inst_filehdr->recvpool_shmid = INVALID_SHMID;
			repl_inst_filehdr->recvpool_shmid_ctime = 0;
			*ipc_rmvd = TRUE;
		}
	} else
	{	/* Else we are ONLINE ROLLBACK or anticipatory freeze is in effect and so we want to keep the journal pool available
		 * for the duration of the rollback. Do not remove and/or reset the fields in the file header
	   	 */
		assert((JNLPOOL_SEGMENT != pool_type) || ((NULL != jnlpool.jnlpool_ctl) && (NULL != jnlpool_ctl)));
		if (JNLPOOL_SEGMENT == pool_type)
			*ipc_rmvd = FALSE;
		if (RECVPOOL_SEGMENT == pool_type)
			*ipc_rmvd = FALSE;
	}
	REVERT;
	return 0;
}
Example #9
0
SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second,
		unsigned long, third, void __user *, ptr, long, fifth)
{
	long err;

	/* No need for backward compatibility. We can start fresh... */
	if (call <= SEMCTL) {
		switch (call) {
		case SEMOP:
			err = sys_semtimedop(first, ptr,
					     (unsigned)second, NULL);
			goto out;
		case SEMTIMEDOP:
			err = sys_semtimedop(first, ptr, (unsigned)second,
				(const struct timespec __user *)
					     (unsigned long) fifth);
			goto out;
		case SEMGET:
			err = sys_semget(first, (int)second, (int)third);
			goto out;
		case SEMCTL: {
			err = sys_semctl(first, second,
					 (int)third | IPC_64,
					 (union semun) ptr);
			goto out;
		}
		default:
			err = -ENOSYS;
			goto out;
		}
	}
	if (call <= MSGCTL) {
		switch (call) {
		case MSGSND:
			err = sys_msgsnd(first, ptr, (size_t)second,
					 (int)third);
			goto out;
		case MSGRCV:
			err = sys_msgrcv(first, ptr, (size_t)second, fifth,
					 (int)third);
			goto out;
		case MSGGET:
			err = sys_msgget((key_t)first, (int)second);
			goto out;
		case MSGCTL:
			err = sys_msgctl(first, (int)second | IPC_64, ptr);
			goto out;
		default:
			err = -ENOSYS;
			goto out;
		}
	}
	if (call <= SHMCTL) {
		switch (call) {
		case SHMAT: {
			ulong raddr;
			err = do_shmat(first, ptr, (int)second, &raddr);
			if (!err) {
				if (put_user(raddr,
					     (ulong __user *) third))
					err = -EFAULT;
			}
			goto out;
		}
		case SHMDT:
			err = sys_shmdt(ptr);
			goto out;
		case SHMGET:
			err = sys_shmget(first, (size_t)second, (int)third);
			goto out;
		case SHMCTL:
			err = sys_shmctl(first, (int)second | IPC_64, ptr);
			goto out;
		default:
			err = -ENOSYS;
			goto out;
		}
	} else {
		err = -ENOSYS;
	}
out:
	return err;
}
Example #10
0
/*
 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
 *
 * This is really horribly ugly. This will be remove with new toolchain.
 */
asmlinkage long
sys_ipc(uint call, int first, int second, int third, void *ptr, long fifth)
{
	int version, ret;

	version = call >> 16; /* hack for backward compatibility */
	call &= 0xffff;

	ret = -EINVAL;
	switch (call) {
	case SEMOP:
		ret = sys_semop(first, (struct sembuf *)ptr, second);
		break;
	case SEMGET:
		ret = sys_semget(first, second, third);
		break;
	case SEMCTL:
	{
		union semun fourth;

		if (!ptr)
			break;
		ret = (access_ok(VERIFY_READ, ptr, sizeof(long)) ? 0 : -EFAULT)
				|| (get_user(fourth.__pad, (void **)ptr)) ;
		if (ret)
			break;
		ret = sys_semctl(first, second, third, fourth);
		break;
	}
	case MSGSND:
		ret = sys_msgsnd(first, (struct msgbuf *) ptr, second, third);
		break;
	case MSGRCV:
		switch (version) {
		case 0: {
			struct ipc_kludge tmp;

			if (!ptr)
				break;
			ret = (access_ok(VERIFY_READ, ptr, sizeof(tmp))
				? 0 : -EFAULT) || copy_from_user(&tmp,
				(struct ipc_kludge *) ptr, sizeof(tmp));
			if (ret)
				break;
			ret = sys_msgrcv(first, tmp.msgp, second, tmp.msgtyp,
					third);
			break;
			}
		default:
			ret = sys_msgrcv(first, (struct msgbuf *) ptr,
					second, fifth, third);
			break;
		}
		break;
	case MSGGET:
		ret = sys_msgget((key_t) first, second);
		break;
	case MSGCTL:
		ret = sys_msgctl(first, second, (struct msqid_ds *) ptr);
		break;
	case SHMAT:
		switch (version) {
		default: {
			ulong raddr;
			ret = access_ok(VERIFY_WRITE, (ulong *) third,
					sizeof(ulong)) ? 0 : -EFAULT;
			if (ret)
				break;
			ret = do_shmat(first, (char *) ptr, second, &raddr);
			if (ret)
				break;
			ret = put_user(raddr, (ulong *) third);
			break;
			}
		case 1:	/* iBCS2 emulator entry point */
			if (!segment_eq(get_fs(), get_ds()))
				break;
			ret = do_shmat(first, (char *) ptr, second,
					(ulong *) third);
			break;
		}
		break;
	case SHMDT:
		ret = sys_shmdt((char *)ptr);
		break;
	case SHMGET:
		ret = sys_shmget(first, second, third);
		break;
	case SHMCTL:
		ret = sys_shmctl(first, second, (struct shmid_ds *) ptr);
		break;
	}
	return ret;
}
Example #11
0
void db_init(gd_region *reg, sgmnt_data_ptr_t tsd)
{
	static boolean_t	mutex_init_done = FALSE;
	boolean_t       	is_bg, read_only;
	char            	machine_name[MAX_MCNAMELEN];
	file_control    	*fc;
	int			gethostname_res, stat_res, mm_prot;
	int4            	status, semval, dblksize, fbwsize;
	sm_long_t       	status_l;
	sgmnt_addrs     	*csa;
	sgmnt_data_ptr_t        csd;
	struct sembuf   	sop[3];
	struct stat     	stat_buf;
	union semun		semarg;
	struct semid_ds		semstat;
	struct shmid_ds         shmstat;
	struct statvfs		dbvfs;
	uint4           	sopcnt;
	unix_db_info    	*udi;
#ifdef periodic_timer_removed
	void            	periodic_flush_check();
#endif

	error_def(ERR_CLSTCONFLICT);
	error_def(ERR_CRITSEMFAIL);
	error_def(ERR_DBNAMEMISMATCH);
	error_def(ERR_DBIDMISMATCH);
	error_def(ERR_NLMISMATCHCALC);
	error_def(ERR_REQRUNDOWN);
	error_def(ERR_SYSCALL);

	assert(tsd->acc_meth == dba_bg  ||  tsd->acc_meth == dba_mm);
	is_bg = (dba_bg == tsd->acc_meth);
	read_only = reg->read_only;
	new_dbinit_ipc = FALSE;	/* we did not create a new ipc resource */
	udi = FILE_INFO(reg);
	memset(machine_name, 0, sizeof(machine_name));
	if (GETHOSTNAME(machine_name, MAX_MCNAMELEN, gethostname_res))
		rts_error(VARLSTCNT(5) ERR_TEXT, 2, LEN_AND_LIT("Unable to get the hostname"), errno);
	assert(strlen(machine_name) < MAX_MCNAMELEN);
	csa = &udi->s_addrs;
	csa->db_addrs[0] = csa->db_addrs[1] = csa->lock_addrs[0] = NULL;   /* to help in dbinit_ch  and gds_rundown */
	reg->opening = TRUE;
	/*
	 * Create ftok semaphore for this region.
	 * We do not want to make ftok counter semaphore to be 2 for on mupip journal recover process.
	 */
	if (!ftok_sem_get(reg, !mupip_jnl_recover, GTM_ID, FALSE))
		rts_error(VARLSTCNT(4) ERR_DBFILERR, 2, DB_LEN_STR(reg));
	/*
	 * At this point we have ftok_semid sempahore based on ftok key.
	 * Any ftok conflicted region will block at this point.
	 * Say, a.dat and b.dat both has same ftok and we have process A to access a.dat and
	 * process B to access b.dat. In this case only one can continue to do db_init()
	 */
	fc = reg->dyn.addr->file_cntl;
	fc->file_type = reg->dyn.addr->acc_meth;
	fc->op = FC_READ;
	fc->op_buff = (sm_uc_ptr_t)tsd;
	fc->op_len = sizeof(*tsd);
	fc->op_pos = 1;
	dbfilop(fc);		/* Read file header */
	udi->shmid = tsd->shmid;
	udi->semid = tsd->semid;
	udi->sem_ctime = tsd->sem_ctime.ctime;
	udi->shm_ctime = tsd->shm_ctime.ctime;
	dbsecspc(reg, tsd); 	/* Find db segment size */
	if (!mupip_jnl_recover)
	{
		if (INVALID_SEMID == udi->semid)
		{
			if (0 != udi->sem_ctime || INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime)
			/* We must have somthing wrong in protocol or, code, if this happens */
				GTMASSERT;
			/*
			 * Create new semaphore using IPC_PRIVATE. System guarantees a unique id.
			 */
			if (-1 == (udi->semid = semget(IPC_PRIVATE, FTOK_SEM_PER_ID, RWDALL | IPC_CREAT)))
			{
				udi->semid = INVALID_SEMID;
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semget"), errno);
			}
			udi->shmid = INVALID_SHMID;	/* reset shmid so dbinit_ch does not get confused in case we go there */
			new_dbinit_ipc = TRUE;
			tsd->semid = udi->semid;
			semarg.val = GTM_ID;
			/*
			 * Following will set semaphore number 2 (=FTOK_SEM_PER_ID - 1)  value as GTM_ID.
			 * In case we have orphaned semaphore for some reason, mupip rundown will be
			 * able to identify GTM semaphores from the value and can remove.
			 */
			if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, SETVAL, semarg))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl SETVAL"), errno);
			/*
			 * Warning: We must read the sem_ctime using IPC_STAT after SETVAL, which changes it.
			 *	    We must NOT do any more SETVAL after this. Our design is to use
			 *	    sem_ctime as creation time of semaphore.
			 */
			semarg.buf = &semstat;
			if (-1 == semctl(udi->semid, FTOK_SEM_PER_ID - 1, IPC_STAT, semarg))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control semctl IPC_STAT"), errno);
			tsd->sem_ctime.ctime = udi->sem_ctime = semarg.buf->sem_ctime;
		} else
		{
			if (INVALID_SHMID == udi->shmid)
				/* if mu_rndwn_file gets standalone access of this region and
				 * somehow mupip process crashes, we can have semid != -1 but shmid == -1
				 */
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
						ERR_TEXT, 2, LEN_AND_LIT("semid is valid but shmid is invalid"));
			semarg.buf = &semstat;
			if (-1 == semctl(udi->semid, 0, IPC_STAT, semarg))
				/* file header has valid semid but semaphore does not exists */
				rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name));
			else if (semarg.buf->sem_ctime != tsd->sem_ctime.ctime)
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
						ERR_TEXT, 2, LEN_AND_LIT("sem_ctime does not match"));
			if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
				rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
					ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno);
			else if (shmstat.shm_ctime != tsd->shm_ctime.ctime)
				rts_error(VARLSTCNT(10) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(tsd->machine_name),
					ERR_TEXT, 2, LEN_AND_LIT("shm_ctime does not match"));
		}
		/* We already have ftok semaphore of this region, so just plainly do semaphore operation */
		/* This is the database access control semaphore for any region */
		sop[0].sem_num = 0; sop[0].sem_op = 0;	/* Wait for 0 */
		sop[1].sem_num = 0; sop[1].sem_op = 1;	/* Lock */
		sopcnt = 2;
		if (!read_only)
		{
			sop[2].sem_num = 1; sop[2].sem_op  = 1;	 /* increment r/w access counter */
			sopcnt = 3;
		}
		sop[0].sem_flg = sop[1].sem_flg = sop[2].sem_flg = SEM_UNDO | IPC_NOWAIT;
		SEMOP(udi->semid, sop, sopcnt, status);
		if (-1 == status)
		{
			errno_save = errno;
			gtm_putmsg(VARLSTCNT(4) ERR_CRITSEMFAIL, 2, DB_LEN_STR(reg));
			rts_error(VARLSTCNT(8) ERR_SYSCALL, 5, RTS_ERROR_LITERAL("semop()"), CALLFROM, errno_save);
		}
	} else /* for mupip_jnl_recover we were already in mu_rndwn_file and got "semid" semaphore  */
	{
		if (INVALID_SEMID == udi->semid || 0 == udi->sem_ctime)
			/* make sure mu_rndwn_file() has reset created semaphore for standalone access */
			GTMASSERT;
		if (INVALID_SHMID != udi->shmid || 0 != udi->shm_ctime)
			/* make sure mu_rndwn_file() has reset shared memory */
			GTMASSERT;
		udi->shmid = INVALID_SHMID;	/* reset shmid so dbinit_ch does not get confused in case we go there */
		new_dbinit_ipc = TRUE;
	}
	sem_incremented = TRUE;
	if (new_dbinit_ipc)
	{
		/* Create new shared memory using IPC_PRIVATE. System guarantees a unique id */
#ifdef __MVS__
		if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, ROUND_UP(reg->sec_size, MEGA_BOUND),
			__IPC_MEGA | IPC_CREAT | RWDALL)))
#else
		if (-1 == (status_l = udi->shmid = shmget(IPC_PRIVATE, reg->sec_size, RWDALL | IPC_CREAT)))
#endif
		{
			udi->shmid = status_l = INVALID_SHMID;
			rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
				  ERR_TEXT, 2, LEN_AND_LIT("Error with database shmget"), errno);
		}
		tsd->shmid = udi->shmid;
		if (-1 == shmctl(udi->shmid, IPC_STAT, &shmstat))
			rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
				ERR_TEXT, 2, LEN_AND_LIT("Error with database control shmctl"), errno);
		tsd->shm_ctime.ctime = udi->shm_ctime = shmstat.shm_ctime;
	}
#ifdef DEBUG_DB64
	status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, next_smseg, SHM_RND));
	next_smseg = (sm_uc_ptr_t)ROUND_UP((sm_long_t)(next_smseg + reg->sec_size), SHMAT_ADDR_INCS);
#else
	status_l = (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)do_shmat(udi->shmid, 0, SHM_RND));
#endif
	if (-1 == status_l)
	{
		rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
			  ERR_TEXT, 2, LEN_AND_LIT("Error attaching to database shared memory"), errno);
	}
	csa->nl = (node_local_ptr_t)csa->db_addrs[0];
	csa->critical = (mutex_struct_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SIZE);
	assert(((int)csa->critical & 0xf) == 0); 			/* critical should be 16-byte aligned */
#ifdef CACHELINE_SIZE
	assert(0 == ((int)csa->critical & (CACHELINE_SIZE - 1)));
#endif
	/* Note: Here we check jnl_sate from database file and its value cannot change without standalone access.
	 * The jnl_buff buffer should be initialized irrespective of read/write process */
	JNL_INIT(csa, reg, tsd);
	csa->backup_buffer = (backup_buff_ptr_t)(csa->db_addrs[0] + NODE_LOCAL_SPACE + JNL_SHARE_SIZE(tsd));
	csa->lock_addrs[0] = (sm_uc_ptr_t)csa->backup_buffer + BACKUP_BUFFER_SIZE + 1;
	csa->lock_addrs[1] = csa->lock_addrs[0] + LOCK_SPACE_SIZE(tsd) - 1;
	csa->total_blks = tsd->trans_hist.total_blks;   		/* For test to see if file has extended */
	if (new_dbinit_ipc)
	{
		memset(csa->nl, 0, sizeof(*csa->nl));			/* We allocated shared storage -- we have to init it */
		if (JNL_ALLOWED(csa))
		{	/* initialize jb->cycle to a value different from initial value of jpc->cycle (0). although this is not
			 * necessary right now, in the future, the plan is to change jnl_ensure_open() to only do a cycle mismatch
			 * check in order to determine whether to call jnl_file_open() or not. this is in preparation for that.
			 */
			csa->jnl->jnl_buff->cycle = 1;
		}
	}
	if (is_bg)
		csd = csa->hdr = (sgmnt_data_ptr_t)(csa->lock_addrs[1] + 1 + CACHE_CONTROL_SIZE(tsd));
	else
	{
		csa->acc_meth.mm.mmblk_state = (mmblk_que_heads_ptr_t)(csa->lock_addrs[1] + 1);
		FSTAT_FILE(udi->fd, &stat_buf, stat_res);
		if (-1 == stat_res)
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
		mm_prot = read_only ? PROT_READ : (PROT_READ | PROT_WRITE);
#ifdef DEBUG_DB64
		if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)get_mmseg((size_t)stat_buf.st_size),
									   (size_t)stat_buf.st_size,
									   mm_prot,
									   GTM_MM_FLAGS, udi->fd, (off_t)0)))
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
		put_mmseg((caddr_t)(csa->db_addrs[0]), (size_t)stat_buf.st_size);
#else
		if (-1 == (sm_long_t)(csa->db_addrs[0] = (sm_uc_ptr_t)mmap((caddr_t)NULL,
									   (size_t)stat_buf.st_size,
									   mm_prot,
									   GTM_MM_FLAGS, udi->fd, (off_t)0)))
			rts_error(VARLSTCNT(5) ERR_DBFILERR, 2, DB_LEN_STR(reg), errno);
#endif
		csa->db_addrs[1] = csa->db_addrs[0] + stat_buf.st_size - 1;
		csd = csa->hdr = (sgmnt_data_ptr_t)csa->db_addrs[0];
	}
	if (!csa->nl->glob_sec_init)
	{
		assert(new_dbinit_ipc);
		if (is_bg)
			*csd = *tsd;
		if (csd->machine_name[0])                  /* crash occured */
		{
			if (0 != memcmp(csd->machine_name, machine_name, MAX_MCNAMELEN))  /* crashed on some other node */
				rts_error(VARLSTCNT(6) ERR_CLSTCONFLICT, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
			else
				rts_error(VARLSTCNT(6) ERR_REQRUNDOWN, 4, DB_LEN_STR(reg), LEN_AND_STR(csd->machine_name));
		}
		if (is_bg)
		{
			bt_malloc(csa);
			csa->nl->cache_off = -CACHE_CONTROL_SIZE(tsd);
			db_csh_ini(csa);
		}
		db_csh_ref(csa);
		strcpy(csa->nl->machine_name, machine_name);					/* machine name */
		assert(MAX_REL_NAME > gtm_release_name_len);
		memcpy(csa->nl->now_running, gtm_release_name, gtm_release_name_len + 1);	/* GT.M release name */
		memcpy(csa->nl->label, GDS_LABEL, GDS_LABEL_SZ - 1);				/* GDS label */
		memcpy(csa->nl->fname, reg->dyn.addr->fname, reg->dyn.addr->fname_len);		/* database filename */
		csa->nl->creation_date_time = csd->creation.date_time;
		csa->nl->highest_lbm_blk_changed = -1;
		csa->nl->wcs_timers = -1;
		csa->nl->nbb = BACKUP_NOT_IN_PROGRESS;
		csa->nl->unique_id.uid = FILE_INFO(reg)->fileid;            /* save what file we initialized this storage for */
		/* save pointers in csa to access shared memory */
		csa->nl->critical = (sm_off_t)((sm_uc_ptr_t)csa->critical - (sm_uc_ptr_t)csa->nl);
		if (JNL_ALLOWED(csa))
			csa->nl->jnl_buff = (sm_off_t)((sm_uc_ptr_t)csa->jnl->jnl_buff - (sm_uc_ptr_t)csa->nl);
		csa->nl->backup_buffer = (sm_off_t)((sm_uc_ptr_t)csa->backup_buffer - (sm_uc_ptr_t)csa->nl);
		csa->nl->hdr = (sm_off_t)((sm_uc_ptr_t)csd - (sm_uc_ptr_t)csa->nl);
		csa->nl->lock_addrs = (sm_off_t)((sm_uc_ptr_t)csa->lock_addrs[0] - (sm_uc_ptr_t)csa->nl);
		if (!read_only || is_bg)
		{
			csd->trans_hist.early_tn = csd->trans_hist.curr_tn;
			csd->max_update_array_size = csd->max_non_bm_update_array_size
				= ROUND_UP2(MAX_NON_BITMAP_UPDATE_ARRAY_SIZE(csd), UPDATE_ARRAY_ALIGN_SIZE);
			csd->max_update_array_size += ROUND_UP2(MAX_BITMAP_UPDATE_ARRAY_SIZE, UPDATE_ARRAY_ALIGN_SIZE);
			/* add current db_csh counters into the cumulative counters and reset the current counters */
#define TAB_DB_CSH_ACCT_REC(COUNTER, DUMMY1, DUMMY2)					\
				csd->COUNTER.cumul_count += csd->COUNTER.curr_count;	\
				csd->COUNTER.curr_count = 0;
#include "tab_db_csh_acct_rec.h"
#undef TAB_DB_CSH_ACCT_REC
		}
		if (!read_only)
		{
			if (is_bg)
			{
				assert(memcmp(csd, GDS_LABEL, GDS_LABEL_SZ - 1) == 0);
				LSEEKWRITE(udi->fd, (off_t)0, (sm_uc_ptr_t)csd, sizeof(sgmnt_data), errno_save);
				if (0 != errno_save)
				{
					rts_error(VARLSTCNT(9) ERR_DBFILERR, 2, DB_LEN_STR(reg),
						  ERR_TEXT, 2, LEN_AND_LIT("Error with database write"), errno_save);
				}
			}
		}
		reg->dyn.addr->ext_blk_count = csd->extension_size;
		mlk_shr_init(csa->lock_addrs[0], csd->lock_space_size, csa, (FALSE == read_only));
		DEBUG_ONLY(locknl = csa->nl;)	/* for DEBUG_ONLY LOCK_HIST macro */
Example #12
0
int	mur_forward_multi_proc(reg_ctl_list *rctl)
{
	boolean_t		multi_proc, this_reg_stuck, release_latch, ok_to_play;
	boolean_t		cancelled_dbsync_timer, cancelled_timer;
	reg_ctl_list		*rctl_top, *prev_rctl;
	jnl_ctl_list		*jctl;
	gd_region		*reg;
	sgmnt_addrs		*csa;
	seq_num 		rec_token_seq;
	jnl_tm_t		rec_time;
	enum broken_type	recstat;
	jnl_record		*rec;
	enum jnl_record_type	rectype;
	char			errstr[256];
	int			i, rctl_index, save_errno, num_procs_stuck, num_reg_stuck;
	uint4			status, regcnt_stuck, num_partners, start_hrtbt_cntr;
	forw_multi_struct	*forw_multi;
	shm_forw_multi_t	*sfm;
	multi_struct 		*multi;
	jnl_tm_t		adjusted_resolve_time;
	shm_reg_ctl_t		*shm_rctl_start, *shm_rctl, *first_shm_rctl;
	size_t			shm_size, reccnt, copy_size;
	int4			*size_ptr;
	char			*shmPtr; /* not using "shm_ptr" since it is already used in an AIX include file */
	int			shmid;
	multi_proc_shm_hdr_t	*mp_hdr;	/* Pointer to "multi_proc_shm_hdr_t" structure in shared memory */

	status = 0;
	/* Although we made sure the # of tasks is the same as the # of processes forked off (in the "gtm_multi_proc"
	 * invocation in "mur_forward"), it is possible one of the forked process finishes one invocation of
	 * "mur_forward_multi_proc" before even another forked process gets assigned one task in "gtm_multi_proc_helper".
	 * In this case, we would be invoked more than once. But the first invocation would have done all the needed stuff
	 * so return for later invocations.
	 */
	if (mur_forward_multi_proc_done)
		return 0;
	mur_forward_multi_proc_done = TRUE;
	/* Note: "rctl" is unused. But cannot avoid passing it since "gtm_multi_proc" expects something */
	prev_rctl = NULL;
	rctl_start = NULL;
	adjusted_resolve_time = murgbl.adjusted_resolve_time;
	assert(0 == murgbl.regcnt_remaining);
	multi_proc = multi_proc_in_use;	/* cache value in "local" to speed up access inside loops below */
	if (multi_proc)
	{
		mp_hdr = multi_proc_shm_hdr;
		shm_rctl_start = mur_shm_hdr->shm_rctl_start;
		if (jgbl.onlnrlbk)
		{
			for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
			{
				assert(rctl->csa->hold_onto_crit);	/* would have been set in parent process */
				rctl->csa->hold_onto_crit = FALSE;	/* reset since we dont own this region */
				assert(rctl->csa->now_crit);		/* would have been set in parent process */
				rctl->csa->now_crit = FALSE;		/* reset since we dont own this region */
			}
		}
		START_HEARTBEAT_IF_NEEDED; /* heartbeat timer needed later (in case not already started by "gtm_multi_proc") */
	}
	first_shm_rctl = NULL;
	/* Phase1 of forward recovery starts */
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		/* Check if "rctl" is available for us or if some other concurrent process has taken it */
		if (multi_proc)
		{
			rctl_index = rctl - &mur_ctl[0];
			shm_rctl = &shm_rctl_start[rctl_index];
			if (shm_rctl->owning_pid)
			{
				assert(process_id != shm_rctl->owning_pid);
				continue;
			}
			GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
			assert(release_latch);
			for ( ; rctl < rctl_top; rctl++, shm_rctl++)
			{
				if (shm_rctl->owning_pid)
				{
					assert(process_id != shm_rctl->owning_pid);
					continue;
				}
				shm_rctl->owning_pid = process_id;	/* Declare ownership */
				rctl->this_pid_is_owner = TRUE;
				if (jgbl.onlnrlbk)
				{	/* This is an online rollback and crit was grabbed on all regions by the parent rollback
					 * process. But this child process now owns this region and does the actual rollback on
					 * this region so borrow crit for the duration of this child process.
					 */
					csa = rctl->csa;
					csa->hold_onto_crit = TRUE;
					csa->now_crit = TRUE;
					assert(csa->nl->in_crit == mp_hdr->parent_pid);
					csa->nl->in_crit = process_id;
					assert(csa->nl->onln_rlbk_pid == mp_hdr->parent_pid);
					csa->nl->onln_rlbk_pid = process_id;
				}
				if (NULL == first_shm_rctl)
					first_shm_rctl = shm_rctl;
				break;
			}
			REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
			if (rctl >= rctl_top)
			{
				assert(rctl == rctl_top);
				break;
			}
			/* Set key to print this rctl'ss region-name as prefix in case this forked off process prints any output */
			MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);
#			ifdef MUR_DEBUG
			fprintf(stderr, "pid = %d : Owns region %s\n", process_id, multi_proc_key);
#			endif
		} else
			rctl->this_pid_is_owner = TRUE;
		if (mur_options.forward)
		{
			assert(NULL == rctl->jctl_turn_around);
			jctl = rctl->jctl = rctl->jctl_head;
			assert(jctl->reg_ctl == rctl);
			jctl->rec_offset = JNL_HDR_LEN;
			jnl_fence_ctl.fence_list = JNL_FENCE_LIST_END; /* initialized to reflect journaling is not enabled */
			if (mur_options.rollback)
				jgbl.mur_jrec_seqno = jctl->jfh->start_seqno;
		} else
		{
			jctl = rctl->jctl = (NULL == rctl->jctl_turn_around) ? rctl->jctl_head : rctl->jctl_turn_around;
			assert(jctl->reg_ctl == rctl);
			jctl->rec_offset = jctl->turn_around_offset;
			jgbl.mur_jrec_seqno = jctl->turn_around_seqno;
			assert((NULL != rctl->jctl_turn_around) || (0 == jctl->rec_offset));
		}
		if (mur_options.rollback)
		{
			if (murgbl.consist_jnl_seqno < jgbl.mur_jrec_seqno)
			{
				/* Assert that murgbl.losttn_seqno is never lesser than jgbl.mur_jrec_seqno (the turnaround
				 * point seqno) as this is what murgbl.consist_jnl_seqno is going to be set to and will
				 * eventually be the post-rollback seqno. If this condition is violated, the result of the
				 * recovery is a compromised database (the file header will indicate a Region Seqno which
				 * is not necessarily correct since seqnos prior to it might be absent in the database).
				 * Therefore, this is an out-of-design situation with respect to rollback and so stop it.
				 */
				assert(murgbl.losttn_seqno >= jgbl.mur_jrec_seqno);
				murgbl.consist_jnl_seqno = jgbl.mur_jrec_seqno;
			}
			assert(murgbl.consist_jnl_seqno <= murgbl.losttn_seqno);
		}
		if (mur_options.update || mur_options.extr[GOOD_TN])
		{
			reg = rctl->gd;
			gv_cur_region = reg;
			tp_change_reg();	/* note : sets cs_addrs to non-NULL value even if gv_cur_region->open is FALSE
						 * (cs_data could still be NULL). */
			rctl->csa = cs_addrs;
			cs_addrs->miscptr = (void *)rctl;
			rctl->csd = cs_data;
			rctl->sgm_info_ptr = cs_addrs->sgm_info_ptr;
			assert(!reg->open || (NULL != cs_addrs->dir_tree));
			gv_target = cs_addrs->dir_tree;
		}
		jctl->after_end_of_data = FALSE;
		status = mur_next(jctl, jctl->rec_offset);
		assert(ERR_JNLREADEOF != status);	/* cannot get EOF at start of forward processing */
		if (SS_NORMAL != status)
			goto finish;
		PRINT_VERBOSE_STAT(jctl, "mur_forward:at the start");
		rctl->process_losttn = FALSE;
		/* Any multi-region TP transaction will be processed as multiple single-region TP transactions up
		 * until the tp-resolve-time is reached. From then on, they will be treated as one multi-region TP
		 * transaction. This is needed for proper lost-tn determination (any multi-region transaction that
		 * gets played in a region AFTER it has already encountered a broken tn should treat this as a lost tn).
		 */
		do
		{
			if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
			{	/* We are at a logical point. So exit if signaled by parent */
				status = ERR_FORCEDHALT;
				goto finish;
			}
			assert(jctl == rctl->jctl);
			rec = rctl->mur_desc->jnlrec;
			rec_time = rec->prefix.time;
			if (rec_time > mur_options.before_time)
				break;	/* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */
			if (rec_time < mur_options.after_time)
			{
				status = mur_next_rec(&jctl);
				continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */
			}
			if (rec_time >= adjusted_resolve_time)
				break;	/* Records after this adjusted resolve_time will be processed below in phase2 */
			/* Note: Since we do hashtable token processing only for records from tp_resolve_time onwards,
			 * it is possible that if we encounter any broken transactions here we wont know they are broken
			 * but will play them as is. That is unavoidable. Specify -SINCE_TIME (for -BACKWARD rollback/recover)
			 * and -VERIFY (for -FORWARD rollback/recover) to control tp_resolve_time (and in turn more
			 * effective broken tn determination).
			 */
			status = mur_forward_play_cur_jrec(rctl);
			if (SS_NORMAL != status)
				break;
			status = mur_next_rec(&jctl);
		} while (SS_NORMAL == status);
		CHECK_IF_EOF_REACHED(rctl, status); /* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */
		if (SS_NORMAL != status)
		{	/* ERR_FILENOTCREATE is possible from "mur_cre_file_extfmt" OR	ERR_FORCEDHALT is possible
			 * from "mur_forward_play_cur_jrec". No other errors are known to occur here. Assert accordingly.
			 */
			assert((ERR_FILENOTCREATE == status) || (ERR_FORCEDHALT == status));
			goto finish;
		}
		if (rctl->forw_eof_seen)
		{
			PRINT_VERBOSE_STAT(jctl, "mur_forward:Reached EOF before tp_resolve_time");
			continue;	/* Reached EOF before even getting to tp_resolve_time.
					 * Do not even consider region for next processing loop */
		}
		rctl->last_tn = 0;
		murgbl.regcnt_remaining++;	/* # of regions participating in recovery at this point */
		if (NULL == rctl_start)
			rctl_start = rctl;
		if (NULL != prev_rctl)
		{
			prev_rctl->next_rctl = rctl;
			rctl->prev_rctl = prev_rctl;
		}
		prev_rctl = rctl;
		assert(murgbl.ok_to_update_db || !rctl->db_updated);
		PRINT_VERBOSE_STAT(jctl, "mur_forward:at tp_resolve_time");
	}
	if (multi_proc)
		multi_proc_key = NULL;	/* reset key until it can be set to rctl's region-name again */
	/* Note that it is possible for rctl_start to be NULL at this point. That is there is no journal record in any region
	 * AFTER the calculated tp-resolve-time. This is possible if for example -AFTER_TIME was used and has a time later
	 * than any journal record in all journal files. If rctl_start is NULL, prev_rctl should also be NULL and vice versa.
	 */
	if (NULL != rctl_start)
	{
		assert(NULL != prev_rctl);
		prev_rctl->next_rctl = rctl_start;
		rctl_start->prev_rctl = prev_rctl;
	}
	rctl = rctl_start;
	regcnt_stuck = 0; /* # of regions we are stuck in waiting for other regions to resolve a multi-region TP transaction */
	assert((NULL == rctl) || (NULL == rctl->forw_multi));
	gv_cur_region = NULL;	/* clear out any previous value to ensure gv_cur_region/cs_addrs/cs_data
				 * all get set in sync by the MUR_CHANGE_REG macro below.
				 */
	/* Phase2 of forward recovery starts */
	while (NULL != rctl)
	{	/* while there is at least one region remaining with unprocessed journal records */
		assert(NULL != rctl_start);
		assert(0 < murgbl.regcnt_remaining);
		if (NULL != rctl->forw_multi)
		{	/* This region's current journal record is part of a TP transaction waiting for other regions */
			regcnt_stuck++;
			assert(regcnt_stuck <= murgbl.regcnt_remaining);
			if (regcnt_stuck == murgbl.regcnt_remaining)
			{
				assertpro(multi_proc_in_use); /* Else : Out-of-design situation. Stuck in ALL regions. */
				/* Check one last time if all regions are stuck waiting for another process to resolve the
				 * multi-region TP transaction. If so, wait in a sleep loop. If not, we can proceed.
				 */
				rctl = rctl_start;
				start_hrtbt_cntr = heartbeat_counter;
				do
				{
					if (IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
					{	/* We are at a logical point. So exit if signaled by parent */
						status = ERR_FORCEDHALT;
						goto finish;
					}
					forw_multi = rctl->forw_multi;
					assert(NULL != forw_multi);
					sfm = forw_multi->shm_forw_multi;
					assert(NULL != sfm);
					assert(sfm->num_reg_seen_forward <= sfm->num_reg_seen_backward);
#					ifdef MUR_DEBUG
					fprintf(stderr, "Pid = %d : Line %d : token = %llu : forward = %d : backward = %d\n",
						process_id, __LINE__, (long long int)sfm->token,
						sfm->num_reg_seen_forward, sfm->num_reg_seen_backward);
#					endif
					if (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward)
					{	/* We are no longer stuck in this region */
						assert(!forw_multi->no_longer_stuck);
						forw_multi->no_longer_stuck = TRUE;
						break;
					}
					rctl = rctl->next_rctl;	/* Move on to the next available region */
					assert(NULL != rctl);
					if (rctl == rctl_start)
					{	/* We went through all regions once and are still stuck.
						 * Sleep until at leat TWO heartbeats have elapsed after which check for deadlock.
						 * Do this only in the child process that owns the FIRST region in the region list.
						 * This way we dont have contention for the GRAB_MULTI_PROC_LATCH from
						 * all children at more or less the same time.
						 */
						if ((rctl == mur_ctl) && (heartbeat_counter > (start_hrtbt_cntr + 2)))
						{	/* Check if all processes are stuck for a while. If so assertpro */
							GRAB_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
							assert(release_latch);
							shm_rctl_start = mur_shm_hdr->shm_rctl_start;
							num_reg_stuck = 0;
							for (i = 0; i < murgbl.reg_total; i++)
							{
								shm_rctl = &shm_rctl_start[i];
								sfm = shm_rctl->shm_forw_multi;
								if (NULL != sfm)
								{
									if (sfm->num_reg_seen_forward != sfm->num_reg_seen_backward)
										num_reg_stuck++;
								}
							}
							REL_MULTI_PROC_LATCH_IF_NEEDED(release_latch);
							/* If everyone is stuck at this point, it is an out-of-design situation */
							assertpro(num_reg_stuck < murgbl.reg_total);
							start_hrtbt_cntr = heartbeat_counter;
						} else
						{	/* Sleep and recheck if any region we are stuck in got resolved.
							 * To minimize time spent sleeping, we just yield our timeslice.
							 */
							rel_quant();
							continue;
						}
					}
				} while (TRUE);
			} else
			{
				rctl = rctl->next_rctl;	/* Move on to the next available region */
				assert(NULL != rctl);
				continue;
			}
		}
		regcnt_stuck = 0;	/* restart the counter now that we found at least one non-stuck region */
		MUR_CHANGE_REG(rctl);
		jctl = rctl->jctl;
		this_reg_stuck = FALSE;
		for ( status = SS_NORMAL; SS_NORMAL == status; )
		{
			if (multi_proc && IS_FORCED_MULTI_PROC_EXIT(mp_hdr))
			{	/* We are at a logical point. So exit if signaled by parent */
				status = ERR_FORCEDHALT;
				goto finish;
			}
			assert(jctl == rctl->jctl);
			rec = rctl->mur_desc->jnlrec;
			rec_time = rec->prefix.time;
			if (rec_time > mur_options.before_time)
				break;	/* Records after -BEFORE_TIME do not go to extract or losttrans or brkntrans files */
			assert((rec_time >= adjusted_resolve_time) || (mur_options.notncheck && !mur_options.verify));
			assert((0 == mur_options.after_time) || (mur_options.forward && !rctl->db_updated));
			if (rec_time < mur_options.after_time)
			{
				status = mur_next_rec(&jctl);
				continue; /* Records before -AFTER_TIME do not go to extract or losttrans or brkntrans files */
			}
			/* Check if current journal record can be played right away or need to wait for corresponding journal
			 * records from other participating TP regions to be reached. A non-TP or ZTP transaction can be played
			 * without issues (i.e. has no dependencies with any other regions). A single-region TP transaction too
			 * falls in the same category. A multi-region TP transaction needs to wait until all participating regions
			 * have played all journal records BEFORE this TP in order to ensure recover plays records in the exact
			 * same order that GT.M performed them in.
			 */
			/* If FENCE_NONE is specified, we would not have maintained any multi hashtable in mur_back_process for
			 * broken transaction processing. So we process multi-region TP transactions as multiple single-region
			 * TP transactions in forward phase.
			 */
			if (FENCE_NONE != mur_options.fences)
			{
				rectype = (enum jnl_record_type)rec->prefix.jrec_type;
				if (IS_TP(rectype) && IS_TUPD(rectype))
				{
					assert(IS_SET_KILL_ZKILL_ZTWORM_LGTRIG_ZTRIG(rectype));
					assert(&rec->jrec_set_kill.num_participants == &rec->jrec_ztworm.num_participants);
					assert(&rec->jrec_set_kill.num_participants == &rec->jrec_lgtrig.num_participants);
					num_partners = rec->jrec_set_kill.num_participants;
					assert(0 < num_partners);
					if (1 < num_partners)
					{
						this_reg_stuck = TRUE;
						assert(&rec->jrec_set_kill.update_num == &rec->jrec_ztworm.update_num);
						assert(&rec->jrec_set_kill.update_num == &rec->jrec_lgtrig.update_num);
					}
				}
			}
			if (this_reg_stuck)
			{
				rec_token_seq = GET_JNL_SEQNO(rec);
				MUR_FORW_TOKEN_LOOKUP(forw_multi, rec_token_seq, rec_time);
				if (NULL != forw_multi)
				{	/* This token has already been seen in another region in forward processing.
					 * Add current region as well. If all regions have been resolved, then play
					 * the entire transaction maintaining the exact same order of updates within.
					 */
					if (!forw_multi->no_longer_stuck)
						MUR_FORW_TOKEN_ONE_MORE_REG(forw_multi, rctl);
				} else
				{	/* First time we are seeing this token in forward processing. Check if this
					 * has already been determined to be a broken transaction.
					 */
					recstat = GOOD_TN;
					multi = NULL;
					if (IS_REC_POSSIBLY_BROKEN(rec_time, rec_token_seq))
					{
						multi = MUR_TOKEN_LOOKUP(rec_token_seq, rec_time, TPFENCE);
						if ((NULL != multi) && (0 < multi->partner))
							recstat = BROKEN_TN;
					}
					MUR_FORW_TOKEN_ADD(forw_multi, rec_token_seq, rec_time, rctl, num_partners,
								recstat, multi);
				}
				/* Check that "tabent" field has been initialized above (by either the MUR_FORW_TOKEN_LOOKUP
				 * or MUR_FORW_TOKEN_ADD macros). This is relied upon by "mur_forward_play_multireg_tp" below.
				 */
				assert(NULL != forw_multi->u.tabent);
				assert(forw_multi->num_reg_seen_forward <= forw_multi->num_reg_seen_backward);
				if (multi_proc)
				{
					sfm = forw_multi->shm_forw_multi;
					ok_to_play = (NULL == sfm) || (sfm->num_reg_seen_forward == sfm->num_reg_seen_backward);
				} else
					ok_to_play = (forw_multi->num_reg_seen_forward == forw_multi->num_reg_seen_backward);
				assert(ok_to_play || !forw_multi->no_longer_stuck);
				if (ok_to_play )
				{	/* We have enough information to proceed with playing this multi-region TP in
					 * forward processing (even if we might not have seen all needed regions). Now play it.
					 * Note that the TP could be BROKEN_TN or GOOD_TN. The callee handles it.
					 */
					assert(forw_multi == rctl->forw_multi);
					status = mur_forward_play_multireg_tp(forw_multi, rctl);
					this_reg_stuck = FALSE;
					/* Note that as part of playing the TP transaction, we could have reached
					 * the EOF of rctl. In this case, we need to break out of the loop.
					 */
					if ((SS_NORMAL != status) || rctl->forw_eof_seen)
						break;
					assert(NULL == rctl->forw_multi);
					assert(!dollar_tlevel);
					jctl = rctl->jctl;	/* In case the first record after the most recently processed
								 * TP transaction is in the next generation journal file */
					continue;
				}
				break;
			} else
			{
				status = mur_forward_play_cur_jrec(rctl);
				if (SS_NORMAL != status)
					break;
			}
			assert(!this_reg_stuck);
			status = mur_next_rec(&jctl);
		}
		assert((NULL == rctl->forw_multi) || this_reg_stuck);
		assert((NULL != rctl->forw_multi) || !this_reg_stuck);
		if (!this_reg_stuck)
		{	/* We are not stuck in this region (to resolve a multi-region TP).
			 * This means we are done processing all the records of this region.
			 */
			assert(NULL == rctl->forw_multi);
			if (!rctl->forw_eof_seen)
			{
				CHECK_IF_EOF_REACHED(rctl, status);
					/* sets rctl->forw_eof_seen if needed; resets "status" to SS_NORMAL */
				if (SS_NORMAL != status)
				{
					assert(ERR_FILENOTCREATE == status);
					goto finish;
				}
				assert(!dollar_tlevel);
				DELETE_RCTL_FROM_UNPROCESSED_LIST(rctl); /* since all of its records should have been processed */
			} else
			{	/* EOF was seen in rctl inside "mur_forward_play_multireg_tp" and it was removed
				 * from the unprocessed list of rctls. At the time rctl was removed, its "next_rctl"
				 * field could have been pointing to another <rctl> that has since then also been
				 * removed inside the same function. Therefore the "next_rctl" field is not reliable
				 * in this case but instead we should rely on the global variable "rctl_start" which
				 * points to the list of unprocessed rctls. Set "next_rctl" accordingly.
				 */
				rctl->next_rctl = rctl_start;
				if (ERR_JNLREADEOF == status)
					status = SS_NORMAL;
			}
			assert(rctl->deleted_from_unprocessed_list);
		}
		assert(SS_NORMAL == status);
		assert(!this_reg_stuck || !rctl->forw_eof_seen);
		assert((NULL == rctl->next_rctl) || (NULL != rctl_start));
		assert((NULL == rctl->next_rctl) || (0 < murgbl.regcnt_remaining));
		rctl = rctl->next_rctl;	/* Note : even though "rctl" could have been deleted from the doubly linked list above,
					 * rctl->next_rctl is not touched so we can still use it to get to the next element. */
	}
	assert(0 == murgbl.regcnt_remaining);
	jgbl.mur_pini_addr_reset_fnptr = NULL;	/* No more simulation of GT.M activity for any region */
	prc_vec = murgbl.prc_vec;	/* Use process-vector of MUPIP RECOVER (not any simulating GT.M process) now onwards */
	assert(0 == dollar_tlevel);
	for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++)
	{
		if (!rctl->this_pid_is_owner)
		{
			assert(multi_proc_in_use);
			continue;	/* in a parallel processing environment, process only regions we own */
		}
		if (multi_proc)
		{	/* Set key to print this rctl's region-name as prefix in case this forked off process prints any output */
			MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);
		}
		PRINT_VERBOSE_STAT(rctl->jctl, "mur_forward:at the end");
		assert(!mur_options.rollback || (0 != murgbl.consist_jnl_seqno));
		assert(mur_options.rollback || (0 == murgbl.consist_jnl_seqno));
		assert(!dollar_tlevel);	/* In case it applied a broken TUPD */
		assert(murgbl.ok_to_update_db || !rctl->db_updated);
		rctl->mur_plst = NULL;	/* reset now that simulation of GT.M updates is done */
		/* Ensure mur_block_count_correct is called if updates allowed */
		if (murgbl.ok_to_update_db && (SS_NORMAL != mur_block_count_correct(rctl)))
		{
			gtm_putmsg_csa(CSA_ARG(rctl->csa) VARLSTCNT(4) ERR_BLKCNTEDITFAIL, 2, DB_LEN_STR(rctl->gd));
			murgbl.wrn_count++;
		}
	}
finish:
	if (multi_proc)
		multi_proc_key = NULL;	/* reset key until it can be set to rctl's region-name again */
	if ((SS_NORMAL == status) && mur_options.show)
		mur_output_show();
	if (NULL != first_shm_rctl)
	{	/* Transfer needed process-private information to shared memory so parent process can later inherit this. */
		first_shm_rctl->err_cnt = murgbl.err_cnt;
		first_shm_rctl->wrn_count = murgbl.wrn_count;
		first_shm_rctl->consist_jnl_seqno = murgbl.consist_jnl_seqno;
		/* If extract files were created by this process for one or more regions, then copy that information to
		 * shared memory so parent process can use this information to do a merge sort.
		 */
		shm_rctl = mur_shm_hdr->shm_rctl_start;
		for (rctl = mur_ctl, rctl_top = mur_ctl + murgbl.reg_total; rctl < rctl_top; rctl++, shm_rctl++)
		{
			assert(multi_proc_in_use);
			if (!rctl->this_pid_is_owner)
				continue;	/* in a parallel processing environment, process only regions we own */
			/* Cancel any flush/dbsync timers by this child process for this region. This is because the
			 * child is not going to go through exit handling code (no gds_rundown etc.). And we need to
			 * clear up csa->nl->wcs_timers. (normally done by gds_rundown).
			 */
			if (NULL != rctl->csa)	/* rctl->csa can be NULL in case of "mupip journal -extract" etc. */
				CANCEL_DB_TIMERS(rctl->gd, rctl->csa, cancelled_timer, cancelled_dbsync_timer);
			reccnt = 0;
			for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0;
								recstat < TOT_EXTR_TYPES;
									recstat++, size_ptr++)
			{	/* Assert "extr_file_created" information is in sync between rctl and shm_rctl.
				 * This was done at the end of "mur_cre_file_extfmt".
				 */
				assert(shm_rctl->extr_file_created[recstat] == rctl->extr_file_created[recstat]);
				/* Assert that if *size_ptr is non-zero, then we better have created an extract file.
				 * Note that the converse is not true. It is possible we created a file for example to
				 * write an INCTN record but decided to not write anything because it was not a -detail
				 * type of extract. So *sizeptr could be 0 even though we created the extract file.
				 */
				assert(!*size_ptr || rctl->extr_file_created[recstat]);
				shm_rctl->jnlext_list_size[recstat] = *size_ptr;
				reccnt += *size_ptr;
			}
			assert(INVALID_SHMID == shm_rctl->jnlext_shmid);
			shm_size = reccnt * SIZEOF(jnlext_multi_t);
			/* If we are quitting because of an abnormal status OR a forced signal to terminate
			 * OR if the parent is dead (kill -9) dont bother creating shmid to communicate back with parent.
			 */
			if (mp_hdr->parent_pid != getppid())
			{
				SET_FORCED_MULTI_PROC_EXIT;	/* Also signal sibling children to stop processing */
				if (SS_NORMAL != status)
					status = ERR_FORCEDHALT;
			}
			if ((SS_NORMAL == status) && shm_size)
			{
				shmid = shmget(IPC_PRIVATE, shm_size, 0600 | IPC_CREAT);
				if (-1 == shmid)
				{
					save_errno = errno;
					SNPRINTF(errstr, SIZEOF(errstr),
						"shmget() : shmsize=0x%llx", shm_size);
					MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);	/* to print region name prefix */
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8)
								ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno);
				}
				shmPtr = (char *)do_shmat(shmid, 0, 0);
				if (-1 == (sm_long_t)shmPtr)
				{
					save_errno = errno;
					SNPRINTF(errstr, SIZEOF(errstr),
						"shmat() : shmid=%d shmsize=0x%llx", shmid, shm_size);
					MUR_SET_MULTI_PROC_KEY(rctl, multi_proc_key);	/* to print region name prefix */
					rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8)
								ERR_SYSCALL, 5, LEN_AND_STR(errstr), CALLFROM, save_errno);
				}
				shm_rctl->jnlext_shmid = shmid;
				shm_rctl->jnlext_shm_size = shm_size;
				for (size_ptr = &rctl->jnlext_multi_list_size[0], recstat = 0;
									recstat < TOT_EXTR_TYPES;
										recstat++, size_ptr++)
				{
					shm_size = *size_ptr;
					if (shm_size)
					{
						copy_size = copy_list_to_buf(rctl->jnlext_multi_list[recstat],
												(int4)shm_size, shmPtr);
						assert(copy_size == (shm_size * SIZEOF(jnlext_multi_t)));
						shmPtr += copy_size;
					}
				}
			}
		}
	}
	mur_close_file_extfmt(IN_MUR_CLOSE_FILES_FALSE);	/* Need to flush buffered extract/losttrans/brokentrans files */
	return (int)status;
}
Example #13
0
/* Takes an entry from 'ipcs -am' and checks for its validity to be a GT.M replication segment.
 * Returns TRUE if the shared memory segment is a valid GT.M replication segment
 * (based on a check on some fields in the shared memory) else FALSE.
 * If the segment belongs to GT.M, it returns the replication id of the segment
 * by the second argument.
 * Sets exit_stat to ERR_MUNOTALLSEC if appropriate.
 */
boolean_t validate_replpool_shm_entry(shm_parms *parm_buff, replpool_id_ptr_t replpool_id, int *exit_stat)
{
	boolean_t		remove_shmid, jnlpool_segment;
	int			fd;
	repl_inst_hdr		repl_instance;
	sm_uc_ptr_t		start_addr;
	int			save_errno, status, shmid;
	struct shmid_ds		shmstat;
	char			msgbuff[OUT_BUFF_SIZE], *instfilename;

	if (NULL == parm_buff)
		return FALSE;
	/* Check for the bare minimum size of the replic shared segment that we expect */
	/* if (parm_buff->sgmnt_siz < (SIZEOF(replpool_identifier) + MIN(MIN_JNLPOOL_SIZE, MIN_RECVPOOL_SIZE))) */
	if (parm_buff->sgmnt_siz < MIN(MIN_JNLPOOL_SIZE, MIN_RECVPOOL_SIZE))
		return FALSE;
	if (IPC_PRIVATE != parm_buff->key)
		return FALSE;
	shmid = parm_buff->shmid;
	/* we do not need to lock the shm for reading the rundown information as
	 * the other rundowns (if any) can also be allowed to share reading the
	 * same info concurrently.
	 */
	if (-1 == (sm_long_t)(start_addr = (sm_uc_ptr_t) do_shmat(shmid, 0, SHM_RND)))
		return FALSE;
	memcpy((void *)replpool_id, (void *)start_addr, SIZEOF(replpool_identifier));
	instfilename = replpool_id->instfilename;
	/* Even though we could be looking at a replication pool structure that has been created by an older version
	 * or newer version of GT.M, the format of the "replpool_identifier" structure is expected to be the same
	 * across all versions so we can safely dereference the "label" and "instfilename" fields in order to generate
	 * user-friendly error messages. Asserts for the layout are in "mu_rndwn_repl_instance" (not here) with a
	 * comment there as to why that location was chosen.
	 */
	if (memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 1))
	{
		if (!memcmp(replpool_id->label, GDS_RPL_LABEL, GDS_LABEL_SZ - 3))
		{
			util_out_print("Cannot rundown replpool shmid = !UL as it has format !AD "
				"created by !AD but this mupip is version and uses format !AD",
				TRUE, shmid, GDS_LABEL_SZ - 1, replpool_id->label,
				LEN_AND_STR(replpool_id->now_running), gtm_release_name_len, gtm_release_name,
				GDS_LABEL_SZ - 1, GDS_RPL_LABEL);
			*exit_stat = ERR_MUNOTALLSEC;
		}
		shmdt((void *)start_addr);
		return FALSE;
	}
	assert(JNLPOOL_SEGMENT == replpool_id->pool_type || RECVPOOL_SEGMENT == replpool_id->pool_type);
	if(JNLPOOL_SEGMENT != replpool_id->pool_type && RECVPOOL_SEGMENT != replpool_id->pool_type)
	{
		shmdt((void *)start_addr);
		return FALSE;
	}
	jnlpool_segment = (JNLPOOL_SEGMENT == replpool_id->pool_type);
	if (-1 == shmctl(shmid, IPC_STAT, &shmstat))
	{
		save_errno = errno;
		assert(FALSE);/* we were able to attach to this shmid before so should be able to get stats on it */
		util_out_print("!AD -> Error with shmctl for shmid = !UL",
				TRUE, LEN_AND_STR(instfilename), shmid);
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
		*exit_stat = ERR_MUNOTALLSEC;
		shmdt((void *)start_addr);
		return FALSE;
	}
	/* Check if instance filename reported in shared memory still exists. If not, clean this
	 * shared memory section without even invoking "mu_rndwn_repl_instance" as that expects
	 * the instance file to exist. Same case if shared memory points back to an instance file
	 * whose file header does not have this shmid.
	 */
	OPENFILE(instfilename, O_RDONLY, fd);	/* check if we can open it */
	msgbuff[0] = '\0';
	remove_shmid = FALSE;
	if (FD_INVALID == fd)
	{
		if (ENOENT == errno)
		{
			SNPRINTF(msgbuff, OUT_BUFF_SIZE, "File %s does not exist", instfilename);
			if (1 < shmstat.shm_nattch)
			{
				PRINT_AND_SEND_REPLPOOL_FAILURE_MSG(msgbuff, replpool_id, shmid);
				*exit_stat = ERR_MUNOTALLSEC;
				shmdt((void *)start_addr);
				return FALSE;
			}
			remove_shmid = TRUE;
		} else
		{	/* open() errored out e.g. due to file permissions. Log that */
			save_errno = errno;
			util_out_print("Cannot rundown replpool shmid !UL for instance file"
				" !AD as open() on the file returned the following error",
				TRUE, shmid, LEN_AND_STR(instfilename));
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
			*exit_stat = ERR_MUNOTALLSEC;
			shmdt((void *)start_addr);
			return FALSE;
		}
	} else
	{
		LSEEKREAD(fd, 0, &repl_instance, SIZEOF(repl_inst_hdr), status);
		if (0 != status)
		{
			save_errno = errno;
			util_out_print("!AD -> Error with LSEEKREAD for shmid = !UL", TRUE,
				LEN_AND_STR(instfilename), shmid);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
			*exit_stat = ERR_MUNOTALLSEC;
			shmdt((void *)start_addr);
			return FALSE;
		}
		if ((jnlpool_segment && (repl_instance.jnlpool_shmid != shmid))
			|| (!jnlpool_segment && (repl_instance.recvpool_shmid != shmid)))
		{
			SNPRINTF(msgbuff, OUT_BUFF_SIZE, "%s SHMID (%d) in the instance file header does not match with the"
					" one reported by \"ipcs\" command (%d)", jnlpool_segment ? "Journal Pool" : "Receive Pool",
					jnlpool_segment ? repl_instance.jnlpool_shmid : repl_instance.recvpool_shmid, shmid);
			if (1 < shmstat.shm_nattch)
			{
				PRINT_AND_SEND_REPLPOOL_FAILURE_MSG(msgbuff, replpool_id, shmid);
				*exit_stat = ERR_MUNOTALLSEC;
				shmdt((void *)start_addr);
				return FALSE;
			}
			remove_shmid = TRUE;
		}
		CLOSEFILE_RESET(fd, status);	/* resets "fd" to FD_INVALID */
	}
	shmdt((void *)start_addr);
	if (remove_shmid)
	{
		assert('\0' != msgbuff[0]);
		if (0 != shm_rmid(shmid))
		{
			save_errno = errno;
			util_out_print("!AD -> Error removing shared memory for shmid = !UL",
				TRUE, LEN_AND_STR(instfilename), shmid);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
			*exit_stat = ERR_MUNOTALLSEC;
			return FALSE;
		}
		PRINT_AND_SEND_SHMREMOVED_MSG(msgbuff, STRLEN(instfilename), instfilename, shmid);
		*exit_stat = ERR_SHMREMOVED;
	} else
		*exit_stat = SS_NORMAL;
	return TRUE;
}
Example #14
0
/* Takes an entry from 'ipcs -m' and checks for its validity to be a GT.M db segment.
 * Returns TRUE if the shared memory segment is a valid GT.M db segment
 * (based on a check on some fields in the shared memory) else FALSE.
 * If the segment belongs to GT.M it returns the database file name by the second argument.
 * Sets exit_stat to ERR_MUNOTALLSEC if appropriate.
 */
boolean_t validate_db_shm_entry(shm_parms *parm_buff, char *fname, int *exit_stat)
{
	boolean_t		remove_shmid;
	file_control		*fc;
	int			fname_len, save_errno, status, shmid;
	node_local_ptr_t	nl_addr;
	sm_uc_ptr_t		start_addr;
	struct stat		st_buff;
	struct shmid_ds		shmstat;
	sgmnt_data		tsd;
	unix_db_info		*udi;
	char			msgbuff[OUT_BUFF_SIZE];

	if (NULL == parm_buff)
		return FALSE;
	/* check for the bare minimum size of the shared memory segment that we expect
	 * (with no fileheader related information at hand) */
	if (MIN_NODE_LOCAL_SPACE + SHMPOOL_SECTION_SIZE > parm_buff->sgmnt_siz)
		return FALSE;
	if (IPC_PRIVATE != parm_buff->key)
		return FALSE;
	shmid = parm_buff->shmid;
	/* we do not need to lock the shm for reading the rundown information as
	 * the other rundowns (if any) can also be allowed to share reading the
	 * same info concurrently.
	 */
	if (-1 == (sm_long_t)(start_addr = (sm_uc_ptr_t) do_shmat(shmid, 0, SHM_RND)))
		return FALSE;
	nl_addr = (node_local_ptr_t)start_addr;
	memcpy(fname, nl_addr->fname, MAX_FN_LEN + 1);
	fname[MAX_FN_LEN] = '\0';			/* make sure the fname is null terminated */
	fname_len = STRLEN(fname);
	msgbuff[0] = '\0';
	if (memcmp(nl_addr->label, GDS_LABEL, GDS_LABEL_SZ - 1))
	{
		if (!memcmp(nl_addr->label, GDS_LABEL, GDS_LABEL_SZ - 3))
		{
			util_out_print("Cannot rundown shmid = !UL for database !AD as it has format !AD "
				"but this mupip uses format !AD", TRUE, shmid,
				fname_len, fname, GDS_LABEL_SZ - 1, nl_addr->label, GDS_LABEL_SZ - 1, GDS_LABEL);
			*exit_stat = ERR_MUNOTALLSEC;
		}
		shmdt((void *)start_addr);
		return FALSE;
	}
	if (-1 == shmctl(shmid, IPC_STAT, &shmstat))
	{
		save_errno = errno;
		assert(FALSE);/* we were able to attach to this shmid before so should be able to get stats on it */
		util_out_print("!AD -> Error with shmctl for shmid = !UL",
			TRUE, fname_len, fname, shmid);
		gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
		*exit_stat = ERR_MUNOTALLSEC;
		shmdt((void *)start_addr);
		return FALSE;
	}
	remove_shmid = FALSE;
	/* Check if db filename reported in shared memory still exists. If not, clean this shared memory section
	 * without even invoking "mu_rndwn_file" as that expects the db file to exist. Same case if shared memory
	 * points back to a database whose file header does not have this shmid.
	 */
	if (-1 == Stat(fname, &st_buff))
	{
		if (ENOENT == errno)
		{
			SNPRINTF(msgbuff, OUT_BUFF_SIZE, "File %s does not exist", fname);
			if (1 < shmstat.shm_nattch)
			{
				PRINT_AND_SEND_DBRNDWN_FAILURE_MSG(msgbuff, fname, shmid);
				*exit_stat = ERR_MUNOTALLSEC;
				shmdt((void *)start_addr);
				return FALSE;
			}
			remove_shmid = TRUE;
		} else
		{	/* Stat errored out e.g. due to file permissions. Log that */
			save_errno = errno;
			util_out_print("Cannot rundown shmid !UL for database file !AD as stat() on the file"
				" returned the following error", TRUE, shmid, fname_len, fname);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
			*exit_stat = ERR_MUNOTALLSEC;
			shmdt((void *)start_addr);
			return FALSE;
		}
	} else
	{
		mu_gv_cur_reg_init();
		gv_cur_region->dyn.addr->fname_len = strlen(fname);
		STRNCPY_STR(gv_cur_region->dyn.addr->fname, fname, gv_cur_region->dyn.addr->fname_len);
		fc = gv_cur_region->dyn.addr->file_cntl;
		fc->op = FC_OPEN;
		status = dbfilop(fc);
		if (SS_NORMAL != status)
		{
			util_out_print("!AD -> Error with dbfilop for shmid = !UL", TRUE, fname_len, fname, shmid);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(5) status, 2, DB_LEN_STR(gv_cur_region), errno);
			*exit_stat = ERR_MUNOTALLSEC;
			shmdt((void *)start_addr);
			return FALSE;
		}
		udi = FILE_INFO(gv_cur_region);
		LSEEKREAD(udi->fd, 0, &tsd, SIZEOF(sgmnt_data), status);
		if (0 != status)
		{
			save_errno = errno;
			util_out_print("!AD -> Error with LSEEKREAD for shmid = !UL", TRUE, fname_len, fname, shmid);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
			*exit_stat = ERR_MUNOTALLSEC;
			shmdt((void *)start_addr);
			return FALSE;
		}
		mu_gv_cur_reg_free();
		if (tsd.shmid != shmid)
		{
			SNPRINTF(msgbuff, OUT_BUFF_SIZE, "Shared memory ID (%d) in the DB file header does not match with the one"
					" reported by \"ipcs\" command (%d)", tsd.shmid, shmid);
			if (1 < shmstat.shm_nattch)
			{
				PRINT_AND_SEND_DBRNDWN_FAILURE_MSG(msgbuff, fname, shmid);
				*exit_stat = ERR_MUNOTALLSEC;
				shmdt((void *)start_addr);
				return FALSE;
			}
			remove_shmid = TRUE;
		} else if (tsd.gt_shm_ctime.ctime != shmstat.shm_ctime)
		{
			SNPRINTF(msgbuff, OUT_BUFF_SIZE, "Shared memory creation time in the DB file header does not match with"
					" the one reported by shmctl");
			if (1 < shmstat.shm_nattch)
			{
				PRINT_AND_SEND_DBRNDWN_FAILURE_MSG(msgbuff, fname, shmid);
				*exit_stat = ERR_MUNOTALLSEC;
				shmdt((void *)start_addr);
				return FALSE;
			}
			remove_shmid = TRUE;
		}
	}
	shmdt((void *)start_addr);
	if (remove_shmid)
	{
		assert('\0' != msgbuff[0]);
		if (0 != shm_rmid(shmid))
		{
			save_errno = errno;
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_DBFILERR, 2, fname_len, fname,
				   ERR_TEXT, 2, RTS_ERROR_TEXT("Error removing shared memory"));
			util_out_print("!AD -> Error removing shared memory for shmid = !UL", TRUE, fname_len, fname, shmid);
			gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(1) save_errno);
			*exit_stat = ERR_MUNOTALLSEC;
			return FALSE;
		}
		PRINT_AND_SEND_SHMREMOVED_MSG(msgbuff, fname_len, fname, shmid);
		*exit_stat = ERR_SHMREMOVED;
	} else
		*exit_stat = SS_NORMAL;
	return TRUE;
}
Example #15
0
SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
		unsigned long, third, void __user *, ptr, long, fifth)
{
	int version, ret;

	version = call >> 16; /* hack for backward compatibility */
	call &= 0xffff;

	switch (call) {
	case SEMOP:
		return sys_semtimedop(first, (struct sembuf __user *)ptr,
				      second, NULL);
	case SEMTIMEDOP:
		return sys_semtimedop(first, (struct sembuf __user *)ptr,
				      second,
				      (const struct timespec __user *)fifth);

	case SEMGET:
		return sys_semget(first, second, third);
	case SEMCTL: {
		unsigned long arg;
		if (!ptr)
			return -EINVAL;
		if (get_user(arg, (unsigned long __user *) ptr))
			return -EFAULT;
		return sys_semctl(first, second, third, arg);
	}

	case MSGSND:
		return sys_msgsnd(first, (struct msgbuf __user *) ptr,
				  second, third);
	case MSGRCV:
		switch (version) {
		case 0: {
			struct ipc_kludge tmp;
			if (!ptr)
				return -EINVAL;

			if (copy_from_user(&tmp,
					   (struct ipc_kludge __user *) ptr,
					   sizeof(tmp)))
				return -EFAULT;
			return sys_msgrcv(first, tmp.msgp, second,
					   tmp.msgtyp, third);
		}
		default:
			return sys_msgrcv(first,
					   (struct msgbuf __user *) ptr,
					   second, fifth, third);
		}
	case MSGGET:
		return sys_msgget((key_t) first, second);
	case MSGCTL:
		return sys_msgctl(first, second, (struct msqid_ds __user *)ptr);

	case SHMAT:
		switch (version) {
		default: {
			unsigned long raddr;
			ret = do_shmat(first, (char __user *)ptr,
				       second, &raddr, SHMLBA);
			if (ret)
				return ret;
			return put_user(raddr, (unsigned long __user *) third);
		}
		case 1:
			/*
			 * This was the entry point for kernel-originating calls
			 * from iBCS2 in 2.2 days.
			 */
			return -EINVAL;
		}
	case SHMDT:
		return sys_shmdt((char __user *)ptr);
	case SHMGET:
		return sys_shmget(first, second, third);
	case SHMCTL:
		return sys_shmctl(first, second,
				   (struct shmid_ds __user *) ptr);
	default:
		return -ENOSYS;
	}
}
Example #16
0
/*
 * sys_ipc() is the de-multiplexer for the SysV IPC calls..
 *
 * This is really horribly ugly.
 */
asmlinkage int sys_ipc (uint call, int first, int second,
			unsigned long third, void *ptr, long fifth)
{
	int version, ret;

	version = call >> 16; /* hack for backward compatibility */
	call &= 0xffff;

	switch (call) {
	case SEMOP:
		return sys_semtimedop (first, (struct sembuf *)ptr, second,
		                       NULL);
	case SEMTIMEDOP:
		return sys_semtimedop (first, (struct sembuf *)ptr, second,
		                       (const struct timespec __user *)fifth);
	case SEMGET:
		return sys_semget (first, second, third);
	case SEMCTL: {
		union semun fourth;
		if (!ptr)
			return -EINVAL;
		if (get_user(fourth.__pad, (void **) ptr))
			return -EFAULT;
		return sys_semctl (first, second, third, fourth);
	}

	case MSGSND:
		return sys_msgsnd (first, (struct msgbuf *) ptr,
				   second, third);
	case MSGRCV:
		switch (version) {
		case 0: {
			struct ipc_kludge tmp;
			if (!ptr)
				return -EINVAL;

			if (copy_from_user(&tmp,
					   (struct ipc_kludge *) ptr,
					   sizeof (tmp)))
				return -EFAULT;
			return sys_msgrcv (first, tmp.msgp, second,
					   tmp.msgtyp, third);
		}
		default:
			return sys_msgrcv (first,
					   (struct msgbuf *) ptr,
					   second, fifth, third);
		}
	case MSGGET:
		return sys_msgget ((key_t) first, second);
	case MSGCTL:
		return sys_msgctl (first, second, (struct msqid_ds *) ptr);

	case SHMAT:
		switch (version) {
		default: {
			ulong raddr;
			ret = do_shmat (first, (char *) ptr, second, &raddr);
			if (ret)
				return ret;
			return put_user (raddr, (ulong *) third);
		}
		case 1:	/* iBCS2 emulator entry point */
			if (!segment_eq(get_fs(), get_ds()))
				return -EINVAL;
			return do_shmat (first, (char *) ptr, second, (ulong *) third);
		}
	case SHMDT:
		return sys_shmdt ((char *)ptr);
	case SHMGET:
		return sys_shmget (first, second, third);
	case SHMCTL:
		return sys_shmctl (first, second,
				   (struct shmid_ds *) ptr);
	default:
		return -ENOSYS;
	}
}