Пример #1
0
static int
log_wput(queue_t *q, mblk_t *mp)
{
	log_t *lp = (log_t *)q->q_ptr;
	struct iocblk *iocp;
	mblk_t *mp2;
	cred_t *cr = msg_getcred(mp, NULL);
	zoneid_t zoneid;

	/*
	 * Default to global zone if dblk doesn't have a valid cred.
	 * Calls to syslog() go through putmsg(), which does set up
	 * the cred.
	 */
	zoneid = (cr != NULL) ? crgetzoneid(cr) : GLOBAL_ZONEID;

	switch (DB_TYPE(mp)) {
	case M_FLUSH:
		if (*mp->b_rptr & FLUSHW) {
			flushq(q, FLUSHALL);
			*mp->b_rptr &= ~FLUSHW;
		}
		if (*mp->b_rptr & FLUSHR) {
			flushq(RD(q), FLUSHALL);
			qreply(q, mp);
			return (0);
		}
		break;

	case M_IOCTL:
		iocp = (struct iocblk *)mp->b_rptr;

		if (lp->log_major != LOG_LOGMIN) {
			/* write-only device */
			miocnak(q, mp, 0, EINVAL);
			return (0);
		}

		if (iocp->ioc_count == TRANSPARENT) {
			miocnak(q, mp, 0, EINVAL);
			return (0);
		}

		if (lp->log_flags) {
			miocnak(q, mp, 0, EBUSY);
			return (0);
		}

		freemsg(lp->log_data);
		lp->log_data = mp->b_cont;
		mp->b_cont = NULL;

		switch (iocp->ioc_cmd) {

		case I_CONSLOG:
			log_update(lp, RD(q), SL_CONSOLE, log_console);
			break;

		case I_TRCLOG:
			if (lp->log_data == NULL) {
				miocnak(q, mp, 0, EINVAL);
				return (0);
			}
			log_update(lp, RD(q), SL_TRACE, log_trace);
			break;

		case I_ERRLOG:
			log_update(lp, RD(q), SL_ERROR, log_error);
			break;

		default:
			miocnak(q, mp, 0, EINVAL);
			return (0);
		}
		miocack(q, mp, 0, 0);
		return (0);

	case M_PROTO:
		if (MBLKL(mp) == sizeof (log_ctl_t) && mp->b_cont != NULL) {
			log_ctl_t *lc = (log_ctl_t *)mp->b_rptr;
			/* This code is used by savecore to log dump msgs */
			if (mp->b_band != 0 &&
			    secpolicy_sys_config(CRED(), B_FALSE) == 0) {
				(void) putq(log_consq, mp);
				return (0);
			}
			if ((lc->pri & LOG_FACMASK) == LOG_KERN)
				lc->pri |= LOG_USER;
			mp2 = log_makemsg(LOG_MID, LOG_CONSMIN, lc->level,
			    lc->flags, lc->pri, mp->b_cont->b_rptr,
			    MBLKL(mp->b_cont) + 1, 0);
			if (mp2 != NULL)
				log_sendmsg(mp2, zoneid);
		}
		break;

	case M_DATA:
		mp2 = log_makemsg(LOG_MID, LOG_CONSMIN, 0, SL_CONSOLE,
		    LOG_USER | LOG_INFO, mp->b_rptr, MBLKL(mp) + 1, 0);
		if (mp2 != NULL)
			log_sendmsg(mp2, zoneid);
		break;
	}

	freemsg(mp);
	return (0);
}
Пример #2
0
/*
 * iscsi_net_listen - listen to socket for peer connections
 */
static int
iscsi_net_listen(void *socket, int backlog)
{
	ksocket_t ks = (ksocket_t)socket;
	return (ksocket_listen(ks, backlog, CRED()));
}
Пример #3
0
/* ARGSUSED */
int
mount(long *lp, rval_t *rp)
{
	vnode_t *vp = NULL;
	struct vfs *vfsp;	/* dummy argument */
	int error;
	struct mounta *uap;
#if defined(_LP64)
	struct mounta native;

	/*
	 * Make a struct mounta if we are DATAMODEL_LP64
	 */
	uap = &native;
	uap->spec = (char *)*lp++;
	uap->dir = (char *)*lp++;
	uap->flags = (int)*lp++;
	uap->fstype = (char *)*lp++;
	uap->dataptr = (char *)*lp++;
	uap->datalen = (int)*lp++;
	uap->optptr = (char *)*lp++;
	uap->optlen = (int)*lp++;
#else	/* !defined(_LP64) */
	/*
	 * 32 bit kernels can take a shortcut and just cast
	 * the args array to the structure.
	 */
	uap = (struct mounta *)lp;
#endif	/* _LP64 */
	/*
	 * Resolve second path name (mount point).
	 */
	if (error = lookupname(uap->dir, UIO_USERSPACE, FOLLOW, NULLVPP, &vp))
		return (set_errno(error));

	/*
	 * Some mount flags are disallowed through the system call interface.
	 */
	uap->flags &= MS_MASK;

	if ((vp->v_flag & VPXFS) && ((uap->flags & MS_GLOBAL) != MS_GLOBAL)) {
		/*
		 * Clustering: if we're doing a mount onto the global
		 * namespace, and the mount is not a global mount, return
		 * an error.
		 */
		error = ENOTSUP;
	} else if (uap->flags & MS_GLOBAL) {
		/*
		 * Clustering: global mount specified.
		 */
		if ((cluster_bootflags & CLUSTER_BOOTED) == 0) {
			/*
			 * If we're not booted as a cluster,
			 * global mounts are not allowed.
			 */
			error = ENOTSUP;
		} else {
			error = domount("pxfs", uap, vp, CRED(), &vfsp);
			if (!error)
				VFS_RELE(vfsp);
		}
	} else {
		error = domount(NULL, uap, vp, CRED(), &vfsp);
		if (!error)
			VFS_RELE(vfsp);
	}
	VN_RELE(vp);
	rp->r_val2 = error;
	return (error ? set_errno(error) : 0);
}
Пример #4
0
static int
page_valid(struct seg *seg, caddr_t addr)
{
	struct segvn_data *svd;
	vnode_t *vp;
	vattr_t vattr;

	/*
	 * Fail if the page doesn't map to a page in the underlying
	 * mapped file, if an underlying mapped file exists.
	 */
	vattr.va_mask = AT_SIZE;
	if (seg->s_ops == &segvn_ops &&
	    SEGOP_GETVP(seg, addr, &vp) == 0 &&
	    vp != NULL && vp->v_type == VREG &&
	    VOP_GETATTR(vp, &vattr, 0, CRED()) == 0) {
		u_offset_t size = roundup(vattr.va_size, (u_offset_t)PAGESIZE);
		u_offset_t offset = SEGOP_GETOFFSET(seg, addr);

		if (offset >= size)
			return (0);
	}

	/*
	 * Fail if this is an ISM shared segment and the address is
	 * not within the real size of the spt segment that backs it.
	 */
	if (seg->s_ops == &segspt_shmops &&
	    addr >= seg->s_base + spt_realsize(seg))
		return (0);

	/*
	 * Fail if the segment is mapped from /dev/null.
	 * The key is that the mapping comes from segdev and the
	 * type is neither MAP_SHARED nor MAP_PRIVATE.
	 */
	if (seg->s_ops == &segdev_ops &&
	    ((SEGOP_GETTYPE(seg, addr) & (MAP_SHARED | MAP_PRIVATE)) == 0))
		return (0);

	/*
	 * Fail if the page is a MAP_NORESERVE page that has
	 * not actually materialized.
	 * We cheat by knowing that segvn is the only segment
	 * driver that supports MAP_NORESERVE.
	 */
	if (seg->s_ops == &segvn_ops &&
	    (svd = (struct segvn_data *)seg->s_data) != NULL &&
	    (svd->vp == NULL || svd->vp->v_type != VREG) &&
	    (svd->flags & MAP_NORESERVE)) {
		/*
		 * Guilty knowledge here.  We know that
		 * segvn_incore returns more than just the
		 * low-order bit that indicates the page is
		 * actually in memory.  If any bits are set,
		 * then there is backing store for the page.
		 */
		char incore = 0;
		(void) SEGOP_INCORE(seg, addr, PAGESIZE, &incore);
		if (incore == 0)
			return (0);
	}
	return (1);
}
Пример #5
0
/*
 * File control.
 */
int
fcntl(int fdes, int cmd, intptr_t arg)
{
	int iarg;
	int error = 0;
	int retval;
	proc_t *p;
	file_t *fp;
	vnode_t *vp;
	u_offset_t offset;
	u_offset_t start;
	struct vattr vattr;
	int in_crit;
	int flag;
	struct flock sbf;
	struct flock64 bf;
	struct o_flock obf;
	struct flock64_32 bf64_32;
	struct fshare fsh;
	struct shrlock shr;
	struct shr_locowner shr_own;
	offset_t maxoffset;
	model_t datamodel;
	int fdres;

#if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
	ASSERT(sizeof (struct flock) == sizeof (struct flock32));
	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
#endif
#if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
	ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
	ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
#endif

	/*
	 * First, for speed, deal with the subset of cases
	 * that do not require getf() / releasef().
	 */
	switch (cmd) {
	case F_GETFD:
		if ((error = f_getfd_error(fdes, &flag)) == 0)
			retval = flag;
		goto out;

	case F_SETFD:
		error = f_setfd_error(fdes, (int)arg);
		retval = 0;
		goto out;

	case F_GETFL:
		if ((error = f_getfl(fdes, &flag)) == 0)
			retval = (flag & (FMASK | FASYNC)) + FOPEN;
		goto out;

	case F_GETXFL:
		if ((error = f_getfl(fdes, &flag)) == 0)
			retval = flag + FOPEN;
		goto out;

	case F_BADFD:
		if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
			retval = fdres;
		goto out;
	}

	/*
	 * Second, for speed, deal with the subset of cases that
	 * require getf() / releasef() but do not require copyin.
	 */
	if ((fp = getf(fdes)) == NULL) {
		error = EBADF;
		goto out;
	}
	iarg = (int)arg;

	switch (cmd) {
/* ONC_PLUS EXTRACT END */

	case F_DUPFD:
		p = curproc;
		if ((uint_t)iarg >= p->p_fno_ctl) {
			if (iarg >= 0)
				fd_too_big(p);
			error = EINVAL;
		} else if ((retval = ufalloc_file(iarg, fp)) == -1) {
			error = EMFILE;
		} else {
			mutex_enter(&fp->f_tlock);
			fp->f_count++;
			mutex_exit(&fp->f_tlock);
		}
		goto done;

	case F_DUP2FD:
		p = curproc;
		if (fdes == iarg) {
			retval = iarg;
		} else if ((uint_t)iarg >= p->p_fno_ctl) {
			if (iarg >= 0)
				fd_too_big(p);
			error = EBADF;
		} else {
			/*
			 * We can't hold our getf(fdes) across the call to
			 * closeandsetf() because it creates a window for
			 * deadlock: if one thread is doing dup2(a, b) while
			 * another is doing dup2(b, a), each one will block
			 * waiting for the other to call releasef().  The
			 * solution is to increment the file reference count
			 * (which we have to do anyway), then releasef(fdes),
			 * then closeandsetf().  Incrementing f_count ensures
			 * that fp won't disappear after we call releasef().
			 * When closeandsetf() fails, we try avoid calling
			 * closef() because of all the side effects.
			 */
			mutex_enter(&fp->f_tlock);
			fp->f_count++;
			mutex_exit(&fp->f_tlock);
			releasef(fdes);
			if ((error = closeandsetf(iarg, fp)) == 0) {
				retval = iarg;
			} else {
				mutex_enter(&fp->f_tlock);
				if (fp->f_count > 1) {
					fp->f_count--;
					mutex_exit(&fp->f_tlock);
				} else {
					mutex_exit(&fp->f_tlock);
					(void) closef(fp);
				}
			}
			goto out;
		}
		goto done;

	case F_SETFL:
		vp = fp->f_vnode;
		flag = fp->f_flag;
		if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
			iarg &= ~FNDELAY;
		if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred)) == 0) {
			iarg &= FMASK;
			mutex_enter(&fp->f_tlock);
			fp->f_flag &= ~FMASK | (FREAD|FWRITE);
			fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
			mutex_exit(&fp->f_tlock);
		}
		retval = 0;
		goto done;
	}

	/*
	 * Finally, deal with the expensive cases.
	 */
	retval = 0;
	in_crit = 0;
	maxoffset = MAXOFF_T;
	datamodel = DATAMODEL_NATIVE;
#if defined(_SYSCALL32_IMPL)
	if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
		maxoffset = MAXOFF32_T;
#endif

	vp = fp->f_vnode;
	flag = fp->f_flag;
	offset = fp->f_offset;

	switch (cmd) {
/* ONC_PLUS EXTRACT START */
	/*
	 * The file system and vnode layers understand and implement
	 * locking with flock64 structures. So here once we pass through
	 * the test for compatibility as defined by LFS API, (for F_SETLK,
	 * F_SETLKW, F_GETLK, F_GETLKW, F_FREESP) we transform
	 * the flock structure to a flock64 structure and send it to the
	 * lower layers. Similarly in case of GETLK the returned flock64
	 * structure is transformed to a flock structure if everything fits
	 * in nicely, otherwise we return EOVERFLOW.
	 */

	case F_GETLK:
	case F_O_GETLK:
	case F_SETLK:
	case F_SETLKW:
	case F_SETLK_NBMAND:

		/*
		 * Copy in input fields only.
		 */

		if (cmd == F_O_GETLK) {
			if (datamodel != DATAMODEL_ILP32) {
				error = EINVAL;
				break;
			}

			if (copyin((void *)arg, &obf, sizeof (obf))) {
				error = EFAULT;
				break;
			}
			bf.l_type = obf.l_type;
			bf.l_whence = obf.l_whence;
			bf.l_start = (off64_t)obf.l_start;
			bf.l_len = (off64_t)obf.l_len;
			bf.l_sysid = (int)obf.l_sysid;
			bf.l_pid = obf.l_pid;
		} else if (datamodel == DATAMODEL_NATIVE) {
			if (copyin((void *)arg, &sbf, sizeof (sbf))) {
				error = EFAULT;
				break;
			}
			/*
			 * XXX	In an LP64 kernel with an LP64 application
			 *	there's no need to do a structure copy here
			 *	struct flock == struct flock64. However,
			 *	we did it this way to avoid more conditional
			 *	compilation.
			 */
			bf.l_type = sbf.l_type;
			bf.l_whence = sbf.l_whence;
			bf.l_start = (off64_t)sbf.l_start;
			bf.l_len = (off64_t)sbf.l_len;
			bf.l_sysid = sbf.l_sysid;
			bf.l_pid = sbf.l_pid;
		}
#if defined(_SYSCALL32_IMPL)
		else {
			struct flock32 sbf32;
			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
				error = EFAULT;
				break;
			}
			bf.l_type = sbf32.l_type;
			bf.l_whence = sbf32.l_whence;
			bf.l_start = (off64_t)sbf32.l_start;
			bf.l_len = (off64_t)sbf32.l_len;
			bf.l_sysid = sbf32.l_sysid;
			bf.l_pid = sbf32.l_pid;
		}
#endif /* _SYSCALL32_IMPL */

		/*
		 * 64-bit support: check for overflow for 32-bit lock ops
		 */
		if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
			break;

		/*
		 * Not all of the filesystems understand F_O_GETLK, and
		 * there's no need for them to know.  Map it to F_GETLK.
		 */
		if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
		    &bf, flag, offset, NULL, fp->f_cred)) != 0)
			break;

		/*
		 * If command is GETLK and no lock is found, only
		 * the type field is changed.
		 */
		if ((cmd == F_O_GETLK || cmd == F_GETLK) &&
		    bf.l_type == F_UNLCK) {
			/* l_type always first entry, always a short */
			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
			    sizeof (bf.l_type)))
				error = EFAULT;
			break;
		}

		if (cmd == F_O_GETLK) {
			/*
			 * Return an SVR3 flock structure to the user.
			 */
			obf.l_type = (int16_t)bf.l_type;
			obf.l_whence = (int16_t)bf.l_whence;
			obf.l_start = (int32_t)bf.l_start;
			obf.l_len = (int32_t)bf.l_len;
			if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
				/*
				 * One or both values for the above fields
				 * is too large to store in an SVR3 flock
				 * structure.
				 */
				error = EOVERFLOW;
				break;
			}
			obf.l_sysid = (int16_t)bf.l_sysid;
			obf.l_pid = (int16_t)bf.l_pid;
			if (copyout(&obf, (void *)arg, sizeof (obf)))
				error = EFAULT;
		} else if (cmd == F_GETLK) {
			/*
			 * Copy out SVR4 flock.
			 */
			int i;

			if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
				error = EOVERFLOW;
				break;
			}

			if (datamodel == DATAMODEL_NATIVE) {
				for (i = 0; i < 4; i++)
					sbf.l_pad[i] = 0;
				/*
				 * XXX	In an LP64 kernel with an LP64
				 *	application there's no need to do a
				 *	structure copy here as currently
				 *	struct flock == struct flock64.
				 *	We did it this way to avoid more
				 *	conditional compilation.
				 */
				sbf.l_type = bf.l_type;
				sbf.l_whence = bf.l_whence;
				sbf.l_start = (off_t)bf.l_start;
				sbf.l_len = (off_t)bf.l_len;
				sbf.l_sysid = bf.l_sysid;
				sbf.l_pid = bf.l_pid;
				if (copyout(&sbf, (void *)arg, sizeof (sbf)))
					error = EFAULT;
			}
#if defined(_SYSCALL32_IMPL)
			else {
				struct flock32 sbf32;
				if (bf.l_start > MAXOFF32_T ||
				    bf.l_len > MAXOFF32_T) {
					error = EOVERFLOW;
					break;
				}
				for (i = 0; i < 4; i++)
					sbf32.l_pad[i] = 0;
				sbf32.l_type = (int16_t)bf.l_type;
				sbf32.l_whence = (int16_t)bf.l_whence;
				sbf32.l_start = (off32_t)bf.l_start;
				sbf32.l_len = (off32_t)bf.l_len;
				sbf32.l_sysid = (int32_t)bf.l_sysid;
				sbf32.l_pid = (pid32_t)bf.l_pid;
				if (copyout(&sbf32,
				    (void *)arg, sizeof (sbf32)))
					error = EFAULT;
			}
#endif
		}
		break;
/* ONC_PLUS EXTRACT END */

	case F_CHKFL:
		/*
		 * This is for internal use only, to allow the vnode layer
		 * to validate a flags setting before applying it.  User
		 * programs can't issue it.
		 */
		error = EINVAL;
		break;

	case F_ALLOCSP:
	case F_FREESP:
	case F_ALLOCSP64:
	case F_FREESP64:
		if ((flag & FWRITE) == 0) {
			error = EBADF;
			break;
		}

		if (vp->v_type != VREG) {
			error = EINVAL;
			break;
		}

		if (datamodel != DATAMODEL_ILP32 &&
		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
			error = EINVAL;
			break;
		}

#if defined(_ILP32) || defined(_SYSCALL32_IMPL)
		if (datamodel == DATAMODEL_ILP32 &&
		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
			struct flock32 sbf32;
			/*
			 * For compatibility we overlay an SVR3 flock on an SVR4
			 * flock.  This works because the input field offsets
			 * in "struct flock" were preserved.
			 */
			if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
				error = EFAULT;
				break;
			} else {
				bf.l_type = sbf32.l_type;
				bf.l_whence = sbf32.l_whence;
				bf.l_start = (off64_t)sbf32.l_start;
				bf.l_len = (off64_t)sbf32.l_len;
				bf.l_sysid = sbf32.l_sysid;
				bf.l_pid = sbf32.l_pid;
			}
		}
#endif /* _ILP32 || _SYSCALL32_IMPL */

#if defined(_LP64)
		if (datamodel == DATAMODEL_LP64 &&
		    (cmd == F_ALLOCSP || cmd == F_FREESP)) {
			if (copyin((void *)arg, &bf, sizeof (bf))) {
				error = EFAULT;
				break;
			}
		}
#endif /* defined(_LP64) */

#if !defined(_LP64) || defined(_SYSCALL32_IMPL)
		if (datamodel == DATAMODEL_ILP32 &&
		    (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
			if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
				error = EFAULT;
				break;
			} else {
				/*
				 * Note that the size of flock64 is different in
				 * the ILP32 and LP64 models, due to the l_pad
				 * field. We do not want to assume that the
				 * flock64 structure is laid out the same in
				 * ILP32 and LP64 environments, so we will
				 * copy in the ILP32 version of flock64
				 * explicitly and copy it to the native
				 * flock64 structure.
				 */
				bf.l_type = (short)bf64_32.l_type;
				bf.l_whence = (short)bf64_32.l_whence;
				bf.l_start = bf64_32.l_start;
				bf.l_len = bf64_32.l_len;
				bf.l_sysid = (int)bf64_32.l_sysid;
				bf.l_pid = (pid_t)bf64_32.l_pid;
			}
		}
#endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */

		if (cmd == F_ALLOCSP || cmd == F_FREESP)
			error = flock_check(vp, &bf, offset, maxoffset);
		else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
			error = flock_check(vp, &bf, offset, MAXOFFSET_T);
		if (error)
			break;

		if (vp->v_type == VREG && bf.l_len == 0 &&
		    bf.l_start > OFFSET_MAX(fp)) {
			error = EFBIG;
			break;
		}

		/*
		 * Make sure that there are no conflicting non-blocking
		 * mandatory locks in the region being manipulated. If
		 * there are such locks then return EACCES.
		 */
		if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
			break;

		if (nbl_need_check(vp)) {
			u_offset_t	begin;
			ssize_t		length;

			nbl_start_crit(vp, RW_READER);
			in_crit = 1;
			vattr.va_mask = AT_SIZE;
			if ((error = VOP_GETATTR(vp, &vattr, 0, CRED())) != 0)
				break;
			begin = start > vattr.va_size ? vattr.va_size : start;
			length = vattr.va_size > start ? vattr.va_size - start :
				start - vattr.va_size;
			if (nbl_conflict(vp, NBL_WRITE, begin, length, 0)) {
				error = EACCES;
				break;
			}
		}

		if (cmd == F_ALLOCSP64)
			cmd = F_ALLOCSP;
		else if (cmd == F_FREESP64)
			cmd = F_FREESP;

		error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);

		break;

#if !defined(_LP64) || defined(_SYSCALL32_IMPL)
/* ONC_PLUS EXTRACT START */
	case F_GETLK64:
	case F_SETLK64:
	case F_SETLKW64:
	case F_SETLK64_NBMAND:
		/*
		 * Large Files: Here we set cmd as *LK and send it to
		 * lower layers. *LK64 is only for the user land.
		 * Most of the comments described above for F_SETLK
		 * applies here too.
		 * Large File support is only needed for ILP32 apps!
		 */
		if (datamodel != DATAMODEL_ILP32) {
			error = EINVAL;
			break;
		}

		if (cmd == F_GETLK64)
			cmd = F_GETLK;
		else if (cmd == F_SETLK64)
			cmd = F_SETLK;
		else if (cmd == F_SETLKW64)
			cmd = F_SETLKW;
		else if (cmd == F_SETLK64_NBMAND)
			cmd = F_SETLK_NBMAND;

		/*
		 * Note that the size of flock64 is different in the ILP32
		 * and LP64 models, due to the sucking l_pad field.
		 * We do not want to assume that the flock64 structure is
		 * laid out in the same in ILP32 and LP64 environments, so
		 * we will copy in the ILP32 version of flock64 explicitly
		 * and copy it to the native flock64 structure.
		 */

		if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
			error = EFAULT;
			break;
		}

		bf.l_type = (short)bf64_32.l_type;
		bf.l_whence = (short)bf64_32.l_whence;
		bf.l_start = bf64_32.l_start;
		bf.l_len = bf64_32.l_len;
		bf.l_sysid = (int)bf64_32.l_sysid;
		bf.l_pid = (pid_t)bf64_32.l_pid;

		if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
			break;

		if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
		    NULL, fp->f_cred)) != 0)
			break;

		if ((cmd == F_GETLK) && bf.l_type == F_UNLCK) {
			if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
			    sizeof (bf.l_type)))
				error = EFAULT;
			break;
		}

		if (cmd == F_GETLK) {
			int i;

			/*
			 * We do not want to assume that the flock64 structure
			 * is laid out in the same in ILP32 and LP64
			 * environments, so we will copy out the ILP32 version
			 * of flock64 explicitly after copying the native
			 * flock64 structure to it.
			 */
			for (i = 0; i < 4; i++)
				bf64_32.l_pad[i] = 0;
			bf64_32.l_type = (int16_t)bf.l_type;
			bf64_32.l_whence = (int16_t)bf.l_whence;
			bf64_32.l_start = bf.l_start;
			bf64_32.l_len = bf.l_len;
			bf64_32.l_sysid = (int32_t)bf.l_sysid;
			bf64_32.l_pid = (pid32_t)bf.l_pid;
			if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
				error = EFAULT;
		}
		break;
/* ONC_PLUS EXTRACT END */
#endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */

/* ONC_PLUS EXTRACT START */
	case F_SHARE:
	case F_SHARE_NBMAND:
	case F_UNSHARE:

		/*
		 * Copy in input fields only.
		 */
		if (copyin((void *)arg, &fsh, sizeof (fsh))) {
			error = EFAULT;
			break;
		}

		/*
		 * Local share reservations always have this simple form
		 */
		shr.s_access = fsh.f_access;
		shr.s_deny = fsh.f_deny;
		shr.s_sysid = 0;
		shr.s_pid = ttoproc(curthread)->p_pid;
		shr_own.sl_pid = shr.s_pid;
		shr_own.sl_id = fsh.f_id;
		shr.s_own_len = sizeof (shr_own);
		shr.s_owner = (caddr_t)&shr_own;
		error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred);
/* ONC_PLUS EXTRACT END */
		break;

	default:
		error = EINVAL;
		break;
	}

	if (in_crit)
		nbl_end_crit(vp);

done:
	releasef(fdes);
out:
	if (error)
		return (set_errno(error));
	return (retval);
}
Пример #6
0
static int
nd_init(vnode_t *dumpvp, TIUSER **tiptr)
{
	int 		error;

	if (*tiptr)
		return (0);

	/*
	 * If dump info hasn't yet been initialized (because dump
	 * device was chosen at user-level, rather than at boot time
	 * in nfs_swapvp) fill it in now.
	 */
	if (nfsdump_maxcount == 0) {
		nfsdump_version = VTOMI(dumpvp)->mi_vers;
		switch (nfsdump_version) {
		case NFS_VERSION:
			nfsdump_fhandle2 = *VTOFH(dumpvp);
			break;
		case NFS_V3:
			nfsdump_fhandle3 = *VTOFH3(dumpvp);
			break;
		default:
			return (EIO);
		}
		nfsdump_maxcount = (int)dumpvp_size;
		nfsdump_addr = VTOMI(dumpvp)->mi_curr_serv->sv_addr;
		nfsdump_cf = *(VTOMI(dumpvp)->mi_curr_serv->sv_knconf);
		if (nfsdump_cf.knc_semantics != NC_TPI_CLTS) {
			int v6 = 1;
			nd_log("nfs_dump: not connectionless!\n");
			if ((strcmp(nfsdump_cf.knc_protofmly, NC_INET) == 0) ||
			    ((v6 = strcmp(nfsdump_cf.knc_protofmly, NC_INET6))\
			    == 0)) {
				major_t clone_maj;

				nfsdump_cf.knc_proto = NC_UDP;
				nfsdump_cf.knc_semantics = NC_TPI_CLTS;
				nd_log("nfs_dump: grabbing UDP major number\n");
				clone_maj = ddi_name_to_major("clone");
				nd_log("nfs_dump: making UDP device\n");
				nfsdump_cf.knc_rdev = makedevice(clone_maj,
				    ddi_name_to_major(v6?"udp":"udp6"));
			} else {
				error = EIO;
				nfs_perror(error, "\nnfs_dump: cannot dump over"
				    " protocol %s: %m\n", nfsdump_cf.knc_proto);
				return (error);
			}
		}
	}

	nd_log("nfs_dump: calling t_kopen\n");

	if (error = t_kopen(NULL, nfsdump_cf.knc_rdev,
	    FREAD|FWRITE|FNDELAY, tiptr, CRED())) {
		nfs_perror(error, "\nnfs_dump: t_kopen failed: %m\n");
		return (EIO);
	}

	if ((strcmp(nfsdump_cf.knc_protofmly, NC_INET) == 0) ||
	    (strcmp(nfsdump_cf.knc_protofmly, NC_INET6) == 0)) {
		nd_log("nfs_dump: calling bindresvport\n");
		if (error = bindresvport(*tiptr, NULL, NULL, FALSE)) {
			nfs_perror(error,
			    "\nnfs_dump: bindresvport failed: %m\n");
			return (EIO);
		}
	} else {
		nd_log("nfs_dump: calling t_kbind\n");
		if ((error = t_kbind(*tiptr, NULL, NULL)) != 0) {
			nfs_perror(error, "\nnfs_dump: t_kbind failed: %m\n");
			return (EIO);
		}
	}
	return (0);
}
Пример #7
0
/*
 * Return value:
 *   1 - exitlwps() failed, call (or continue) lwp_exit()
 *   0 - restarting init.  Return through system call path
 */
int
proc_exit(int why, int what)
{
	kthread_t *t = curthread;
	klwp_t *lwp = ttolwp(t);
	proc_t *p = ttoproc(t);
	zone_t *z = p->p_zone;
	timeout_id_t tmp_id;
	int rv;
	proc_t *q;
	task_t *tk;
	vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
	sigqueue_t *sqp;
	lwpdir_t *lwpdir;
	uint_t lwpdir_sz;
	tidhash_t *tidhash;
	uint_t tidhash_sz;
	ret_tidhash_t *ret_tidhash;
	refstr_t *cwd;
	hrtime_t hrutime, hrstime;
	int evaporate;
	brand_t *orig_brand = NULL;
	void *brand_data = NULL;

	/*
	 * Stop and discard the process's lwps except for the current one,
	 * unless some other lwp beat us to it.  If exitlwps() fails then
	 * return and the calling lwp will call (or continue in) lwp_exit().
	 */
	proc_is_exiting(p);
	if (exitlwps(0) != 0)
		return (1);

	mutex_enter(&p->p_lock);
	if (p->p_ttime > 0) {
		/*
		 * Account any remaining ticks charged to this process
		 * on its way out.
		 */
		(void) task_cpu_time_incr(p->p_task, p->p_ttime);
		p->p_ttime = 0;
	}
	mutex_exit(&p->p_lock);

	DTRACE_PROC(lwp__exit);
	DTRACE_PROC1(exit, int, why);

	/*
	 * Will perform any brand specific proc exit processing. Since this
	 * is always the last lwp, will also perform lwp_exit and free
	 * brand_data, except in the case that the brand has a b_exit_with_sig
	 * handler. In this case we free the brand_data later within this
	 * function.
	 */
	mutex_enter(&p->p_lock);
	if (PROC_IS_BRANDED(p)) {
		orig_brand = p->p_brand;
		if (p->p_brand_data != NULL && orig_brand->b_data_size > 0) {
			brand_data = p->p_brand_data;
		}
		lwp_detach_brand_hdlrs(lwp);
		brand_clearbrand(p, B_FALSE);
	}
	mutex_exit(&p->p_lock);

	/*
	 * Don't let init exit unless zone_start_init() failed its exec, or
	 * we are shutting down the zone or the machine.
	 *
	 * Since we are single threaded, we don't need to lock the
	 * following accesses to zone_proc_initpid.
	 */
	if (p->p_pid == z->zone_proc_initpid) {
		if (z->zone_boot_err == 0 &&
		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
			if (z->zone_restart_init == B_TRUE) {
				if (restart_init(what, why) == 0)
					return (0);
			}

			z->zone_init_status = wstat(why, what);
			(void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL, CRED());
		}

		/*
		 * Since we didn't or couldn't restart init, we clear
		 * the zone's init state and proceed with exit
		 * processing.
		 */
		z->zone_proc_initpid = -1;
	}

	lwp_pcb_exit();

	/*
	 * Allocate a sigqueue now, before we grab locks.
	 * It will be given to sigcld(), below.
	 * Special case:  If we will be making the process disappear
	 * without a trace because it is either:
	 *	* an exiting SSYS process, or
	 *	* a posix_spawn() vfork child who requests it,
	 * we don't bother to allocate a useless sigqueue.
	 */
	evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
	    why == CLD_EXITED && what == _EVAPORATE);
	if (!evaporate)
		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);

	/*
	 * revoke any doors created by the process.
	 */
	if (p->p_door_list)
		door_exit();

	/*
	 * Release schedctl data structures.
	 */
	if (p->p_pagep)
		schedctl_proc_cleanup();

	/*
	 * make sure all pending kaio has completed.
	 */
	if (p->p_aio)
		aio_cleanup_exit();

	/*
	 * discard the lwpchan cache.
	 */
	if (p->p_lcp != NULL)
		lwpchan_destroy_cache(0);

	/*
	 * Clean up any DTrace helper actions or probes for the process.
	 */
	if (p->p_dtrace_helpers != NULL) {
		ASSERT(dtrace_helpers_cleanup != NULL);
		(*dtrace_helpers_cleanup)();
	}

	/* untimeout the realtime timers */
	if (p->p_itimer != NULL)
		timer_exit();

	if ((tmp_id = p->p_alarmid) != 0) {
		p->p_alarmid = 0;
		(void) untimeout(tmp_id);
	}

	/*
	 * Remove any fpollinfo_t's for this (last) thread from our file
	 * descriptors so closeall() can ASSERT() that they're all gone.
	 */
	pollcleanup();

	if (p->p_rprof_cyclic != CYCLIC_NONE) {
		mutex_enter(&cpu_lock);
		cyclic_remove(p->p_rprof_cyclic);
		mutex_exit(&cpu_lock);
	}

	mutex_enter(&p->p_lock);

	/*
	 * Clean up any DTrace probes associated with this process.
	 */
	if (p->p_dtrace_probes) {
		ASSERT(dtrace_fasttrap_exit_ptr != NULL);
		dtrace_fasttrap_exit_ptr(p);
	}

	while ((tmp_id = p->p_itimerid) != 0) {
		p->p_itimerid = 0;
		mutex_exit(&p->p_lock);
		(void) untimeout(tmp_id);
		mutex_enter(&p->p_lock);
	}

	lwp_cleanup();

	/*
	 * We are about to exit; prevent our resource associations from
	 * being changed.
	 */
	pool_barrier_enter();

	/*
	 * Block the process against /proc now that we have really
	 * acquired p->p_lock (to manipulate p_tlist at least).
	 */
	prbarrier(p);

	sigfillset(&p->p_ignore);
	sigemptyset(&p->p_siginfo);
	sigemptyset(&p->p_sig);
	sigemptyset(&p->p_extsig);
	sigemptyset(&t->t_sig);
	sigemptyset(&t->t_extsig);
	sigemptyset(&p->p_sigmask);
	sigdelq(p, t, 0);
	lwp->lwp_cursig = 0;
	lwp->lwp_extsig = 0;
	p->p_flag &= ~(SKILLED | SEXTKILLED);
	if (lwp->lwp_curinfo) {
		siginfofree(lwp->lwp_curinfo);
		lwp->lwp_curinfo = NULL;
	}

	t->t_proc_flag |= TP_LWPEXIT;
	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
	prlwpexit(t);		/* notify /proc */
	lwp_hash_out(p, t->t_tid);
	prexit(p);

	p->p_lwpcnt = 0;
	p->p_tlist = NULL;
	sigqfree(p);
	term_mstate(t);
	p->p_mterm = gethrtime();

	exec_vp = p->p_exec;
	execdir_vp = p->p_execdir;
	p->p_exec = NULLVP;
	p->p_execdir = NULLVP;
	mutex_exit(&p->p_lock);

	pr_free_watched_pages(p);

	closeall(P_FINFO(p));

	/* Free the controlling tty.  (freectty() always assumes curproc.) */
	ASSERT(p == curproc);
	(void) freectty(B_TRUE);

#if defined(__sparc)
	if (p->p_utraps != NULL)
		utrap_free(p);
#endif
	if (p->p_semacct)			/* IPC semaphore exit */
		semexit(p);
	rv = wstat(why, what);

	acct(rv & 0xff);
	exacct_commit_proc(p, rv);

	/*
	 * Release any resources associated with C2 auditing
	 */
	if (AU_AUDITING()) {
		/*
		 * audit exit system call
		 */
		audit_exit(why, what);
	}

	/*
	 * Free address space.
	 */
	relvm();

	if (exec_vp) {
		/*
		 * Close this executable which has been opened when the process
		 * was created by getproc().
		 */
		(void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
		VN_RELE(exec_vp);
	}
	if (execdir_vp)
		VN_RELE(execdir_vp);

	/*
	 * Release held contracts.
	 */
	contract_exit(p);

	/*
	 * Depart our encapsulating process contract.
	 */
	if ((p->p_flag & SSYS) == 0) {
		ASSERT(p->p_ct_process);
		contract_process_exit(p->p_ct_process, p, rv);
	}

	/*
	 * Remove pool association, and block if requested by pool_do_bind.
	 */
	mutex_enter(&p->p_lock);
	ASSERT(p->p_pool->pool_ref > 0);
	atomic_dec_32(&p->p_pool->pool_ref);
	p->p_pool = pool_default;
	/*
	 * Now that our address space has been freed and all other threads
	 * in this process have exited, set the PEXITED pool flag.  This
	 * tells the pools subsystems to ignore this process if it was
	 * requested to rebind this process to a new pool.
	 */
	p->p_poolflag |= PEXITED;
	pool_barrier_exit();
	mutex_exit(&p->p_lock);

	mutex_enter(&pidlock);

	/*
	 * Delete this process from the newstate list of its parent. We
	 * will put it in the right place in the sigcld in the end.
	 */
	delete_ns(p->p_parent, p);

	/*
	 * Reassign the orphans to the next of kin.
	 * Don't rearrange init's orphanage.
	 */
	if ((q = p->p_orphan) != NULL && p != proc_init) {

		proc_t *nokp = p->p_nextofkin;

		for (;;) {
			q->p_nextofkin = nokp;
			if (q->p_nextorph == NULL)
				break;
			q = q->p_nextorph;
		}
		q->p_nextorph = nokp->p_orphan;
		nokp->p_orphan = p->p_orphan;
		p->p_orphan = NULL;
	}

	/*
	 * Reassign the children to init.
	 * Don't try to assign init's children to init.
	 */
	if ((q = p->p_child) != NULL && p != proc_init) {
		struct proc	*np;
		struct proc	*initp = proc_init;
		pid_t		zone_initpid = 1;
		struct proc	*zoneinitp = NULL;
		boolean_t	setzonetop = B_FALSE;

		if (!INGLOBALZONE(curproc)) {
			zone_initpid = curproc->p_zone->zone_proc_initpid;

			ASSERT(MUTEX_HELD(&pidlock));
			zoneinitp = prfind(zone_initpid);
			if (zoneinitp != NULL) {
				initp = zoneinitp;
			} else {
				zone_initpid = 1;
				setzonetop = B_TRUE;
			}
		}

		pgdetach(p);

		do {
			np = q->p_sibling;
			/*
			 * Delete it from its current parent new state
			 * list and add it to init new state list
			 */
			delete_ns(q->p_parent, q);

			q->p_ppid = zone_initpid;

			q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
			if (setzonetop) {
				mutex_enter(&q->p_lock);
				q->p_flag |= SZONETOP;
				mutex_exit(&q->p_lock);
			}
			q->p_parent = initp;

			/*
			 * Since q will be the first child,
			 * it will not have a previous sibling.
			 */
			q->p_psibling = NULL;
			if (initp->p_child) {
				initp->p_child->p_psibling = q;
			}
			q->p_sibling = initp->p_child;
			initp->p_child = q;
			if (q->p_proc_flag & P_PR_PTRACE) {
				mutex_enter(&q->p_lock);
				sigtoproc(q, NULL, SIGKILL);
				mutex_exit(&q->p_lock);
			}
			/*
			 * sigcld() will add the child to parents
			 * newstate list.
			 */
			if (q->p_stat == SZOMB)
				sigcld(q, NULL);
		} while ((q = np) != NULL);

		p->p_child = NULL;
		ASSERT(p->p_child_ns == NULL);
	}

	TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);

	mutex_enter(&p->p_lock);
	CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */

	/*
	 * Have our task accummulate our resource usage data before they
	 * become contaminated by p_cacct etc., and before we renounce
	 * membership of the task.
	 *
	 * We do this regardless of whether or not task accounting is active.
	 * This is to avoid having nonsense data reported for this task if
	 * task accounting is subsequently enabled. The overhead is minimal;
	 * by this point, this process has accounted for the usage of all its
	 * LWPs. We nonetheless do the work here, and under the protection of
	 * pidlock, so that the movement of the process's usage to the task
	 * happens at the same time as the removal of the process from the
	 * task, from the point of view of exacct_snapshot_task_usage().
	 */
	exacct_update_task_mstate(p);

	hrutime = mstate_aggr_state(p, LMS_USER);
	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
	p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
	p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;

	p->p_acct[LMS_USER]	+= p->p_cacct[LMS_USER];
	p->p_acct[LMS_SYSTEM]	+= p->p_cacct[LMS_SYSTEM];
	p->p_acct[LMS_TRAP]	+= p->p_cacct[LMS_TRAP];
	p->p_acct[LMS_TFAULT]	+= p->p_cacct[LMS_TFAULT];
	p->p_acct[LMS_DFAULT]	+= p->p_cacct[LMS_DFAULT];
	p->p_acct[LMS_KFAULT]	+= p->p_cacct[LMS_KFAULT];
	p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
	p->p_acct[LMS_SLEEP]	+= p->p_cacct[LMS_SLEEP];
	p->p_acct[LMS_WAIT_CPU]	+= p->p_cacct[LMS_WAIT_CPU];
	p->p_acct[LMS_STOPPED]	+= p->p_cacct[LMS_STOPPED];

	p->p_ru.minflt	+= p->p_cru.minflt;
	p->p_ru.majflt	+= p->p_cru.majflt;
	p->p_ru.nswap	+= p->p_cru.nswap;
	p->p_ru.inblock	+= p->p_cru.inblock;
	p->p_ru.oublock	+= p->p_cru.oublock;
	p->p_ru.msgsnd	+= p->p_cru.msgsnd;
	p->p_ru.msgrcv	+= p->p_cru.msgrcv;
	p->p_ru.nsignals += p->p_cru.nsignals;
	p->p_ru.nvcsw	+= p->p_cru.nvcsw;
	p->p_ru.nivcsw	+= p->p_cru.nivcsw;
	p->p_ru.sysc	+= p->p_cru.sysc;
	p->p_ru.ioch	+= p->p_cru.ioch;

	p->p_stat = SZOMB;
	p->p_proc_flag &= ~P_PR_PTRACE;
	p->p_wdata = what;
	p->p_wcode = (char)why;

	cdir = PTOU(p)->u_cdir;
	rdir = PTOU(p)->u_rdir;
	cwd = PTOU(p)->u_cwd;

	ASSERT(cdir != NULL || p->p_parent == &p0);

	/*
	 * Release resource controls, as they are no longer enforceable.
	 */
	rctl_set_free(p->p_rctls);

	/*
	 * Decrement tk_nlwps counter for our task.max-lwps resource control.
	 * An extended accounting record, if that facility is active, is
	 * scheduled to be written.  We cannot give up task and project
	 * membership at this point because that would allow zombies to escape
	 * from the max-processes resource controls.  Zombies stay in their
	 * current task and project until the process table slot is released
	 * in freeproc().
	 */
	tk = p->p_task;

	mutex_enter(&p->p_zone->zone_nlwps_lock);
	tk->tk_nlwps--;
	tk->tk_proj->kpj_nlwps--;
	p->p_zone->zone_nlwps--;
	mutex_exit(&p->p_zone->zone_nlwps_lock);

	/*
	 * Clear the lwp directory and the lwpid hash table
	 * now that /proc can't bother us any more.
	 * We free the memory below, after dropping p->p_lock.
	 */
	lwpdir = p->p_lwpdir;
	lwpdir_sz = p->p_lwpdir_sz;
	tidhash = p->p_tidhash;
	tidhash_sz = p->p_tidhash_sz;
	ret_tidhash = p->p_ret_tidhash;
	p->p_lwpdir = NULL;
	p->p_lwpfree = NULL;
	p->p_lwpdir_sz = 0;
	p->p_tidhash = NULL;
	p->p_tidhash_sz = 0;
	p->p_ret_tidhash = NULL;

	/*
	 * If the process has context ops installed, call the exit routine
	 * on behalf of this last remaining thread. Normally exitpctx() is
	 * called during thread_exit() or lwp_exit(), but because this is the
	 * last thread in the process, we must call it here. By the time
	 * thread_exit() is called (below), the association with the relevant
	 * process has been lost.
	 *
	 * We also free the context here.
	 */
	if (p->p_pctx) {
		kpreempt_disable();
		exitpctx(p);
		kpreempt_enable();

		freepctx(p, 0);
	}

	/*
	 * curthread's proc pointer is changed to point to the 'sched'
	 * process for the corresponding zone, except in the case when
	 * the exiting process is in fact a zsched instance, in which
	 * case the proc pointer is set to p0.  We do so, so that the
	 * process still points at the right zone when we call the VN_RELE()
	 * below.
	 *
	 * This is because curthread's original proc pointer can be freed as
	 * soon as the child sends a SIGCLD to its parent.  We use zsched so
	 * that for user processes, even in the final moments of death, the
	 * process is still associated with its zone.
	 */
	if (p != t->t_procp->p_zone->zone_zsched)
		t->t_procp = t->t_procp->p_zone->zone_zsched;
	else
		t->t_procp = &p0;

	mutex_exit(&p->p_lock);
	if (!evaporate) {
		/*
		 * The brand specific code only happens when the brand has a
		 * function to call in place of sigcld, the data itself still
		 * existed, and the parent of the exiting process is not the
		 * global zone init. If the parent is the global zone init,
		 * then the process was reparented, and we don't want brand
		 * code delivering possibly strange signals to init. Also, init
		 * is not branded, so any brand specific exit data will not be
		 * picked up by init anyway.
		 * It is assumed by this code that any brand where
		 * b_exit_with_sig == NULL, will free its own brand_data rather
		 * than letting this piece of code free it.
		 */
		if (orig_brand != NULL &&
		    orig_brand->b_ops->b_exit_with_sig != NULL &&
		    brand_data != NULL && p->p_ppid != 1) {
			/*
			 * The code for _fini that could unload the brand_t
			 * blocks until the count of zones using the module
			 * reaches zero. Zones decrement the refcount on their
			 * brands only after all user tasks in that zone have
			 * exited and been waited on. The decrement on the
			 * brand's refcount happen in zone_destroy(). That
			 * depends on zone_shutdown() having been completed.
			 * zone_shutdown() includes a call to zone_empty(),
			 * where the zone waits for itself to reach the state
			 * ZONE_IS_EMPTY. This state is only set in either
			 * zone_shutdown(), when there are no user processes as
			 * the zone enters this function, or in
			 * zone_task_rele(). zone_task_rele() is called from
			 * code triggered by waiting on processes, not by the
			 * processes exiting through proc_exit().  This means
			 * all the branded processes that could exist for a
			 * specific brand_t must exit and get reaped before the
			 * refcount on the brand_t can reach 0. _fini will
			 * never unload the corresponding brand module before
			 * proc_exit finishes execution for all processes
			 * branded with a particular brand_t, which makes the
			 * operation below safe to do. Brands that wish to use
			 * this mechanism must wait in _fini as described
			 * above.
			 */
			orig_brand->b_ops->b_exit_with_sig(p,
			    sqp, brand_data);
		} else {
			p->p_pidflag &= ~CLDPEND;
			sigcld(p, sqp);
		}
		if (brand_data != NULL) {
			kmem_free(brand_data, orig_brand->b_data_size);
			brand_data = NULL;
			orig_brand = NULL;
		}

	} else {
		/*
		 * Do what sigcld() would do if the disposition
		 * of the SIGCHLD signal were set to be ignored.
		 */
		cv_broadcast(&p->p_srwchan_cv);
		freeproc(p);
	}
	mutex_exit(&pidlock);

	/*
	 * We don't release u_cdir and u_rdir until SZOMB is set.
	 * This protects us against dofusers().
	 */
	if (cdir)
		VN_RELE(cdir);
	if (rdir)
		VN_RELE(rdir);
	if (cwd)
		refstr_rele(cwd);

	/*
	 * task_rele() may ultimately cause the zone to go away (or
	 * may cause the last user process in a zone to go away, which
	 * signals zsched to go away).  So prior to this call, we must
	 * no longer point at zsched.
	 */
	t->t_procp = &p0;

	kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
	kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
	while (ret_tidhash != NULL) {
		ret_tidhash_t *next = ret_tidhash->rth_next;
		kmem_free(ret_tidhash->rth_tidhash,
		    ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
		kmem_free(ret_tidhash, sizeof (*ret_tidhash));
		ret_tidhash = next;
	}

	thread_exit();
	/* NOTREACHED */
}
Пример #8
0
static int
chdirec(vnode_t *vp, int ischroot, int do_traverse)
{
	int error;
	vnode_t *oldvp;
	proc_t *pp = curproc;
	vnode_t **vpp;
	refstr_t *cwd;
	int newcwd = 1;

	if (vp->v_type != VDIR) {
		error = ENOTDIR;
		goto bad;
	}
	if (error = VOP_ACCESS(vp, VEXEC, 0, CRED(), NULL))
		goto bad;

	/*
	 * The VOP_ACCESS() may have covered 'vp' with a new filesystem,
	 * if 'vp' is an autoFS vnode. Traverse the mountpoint so
	 * that we don't end up with a covered current directory.
	 */
	if (vn_mountedvfs(vp) != NULL && do_traverse) {
		if (error = traverse(&vp))
			goto bad;
	}

	/*
	 * Special chroot semantics: chroot is allowed if privileged
	 * or if the target is really a loopback mount of the root (or
	 * root of the zone) as determined by comparing dev and inode
	 * numbers
	 */
	if (ischroot) {
		struct vattr tattr;
		struct vattr rattr;
		vnode_t *zonevp = curproc->p_zone->zone_rootvp;

		tattr.va_mask = AT_FSID|AT_NODEID;
		if (error = VOP_GETATTR(vp, &tattr, 0, CRED(), NULL))
			goto bad;

		rattr.va_mask = AT_FSID|AT_NODEID;
		if (error = VOP_GETATTR(zonevp, &rattr, 0, CRED(), NULL))
			goto bad;

		if ((tattr.va_fsid != rattr.va_fsid ||
		    tattr.va_nodeid != rattr.va_nodeid) &&
		    (error = secpolicy_chroot(CRED())) != 0)
			goto bad;

		vpp = &PTOU(pp)->u_rdir;
	} else {
		vpp = &PTOU(pp)->u_cdir;
	}

	/* update abs cwd/root path see c2/audit.c */
	if (AU_AUDITING())
		audit_chdirec(vp, vpp);

	mutex_enter(&pp->p_lock);
	/*
	 * This bit of logic prevents us from overwriting u_cwd if we are
	 * changing to the same directory.  We set the cwd to NULL so that we
	 * don't try to do the lookup on the next call to getcwd().
	 */
	if (!ischroot && *vpp != NULL && vp != NULL && VN_CMP(*vpp, vp))
		newcwd = 0;

	oldvp = *vpp;
	*vpp = vp;
	if ((cwd = PTOU(pp)->u_cwd) != NULL && newcwd)
		PTOU(pp)->u_cwd = NULL;
	mutex_exit(&pp->p_lock);

	if (cwd && newcwd)
		refstr_rele(cwd);
	if (oldvp)
		VN_RELE(oldvp);
	return (0);

bad:
	VN_RELE(vp);
	return (error);
}
/*
 * Set the audit state information for the current process.
 * Return EFAULT if copyin fails.
 */
static int
setaudit_addr(caddr_t info_p, int len)
{
	STRUCT_DECL(auditinfo_addr, info);
	proc_t *p;
	cred_t	*newcred;
	model_t	model;
	int i;
	int type;
	auditinfo_addr_t *ainfo;

	if (secpolicy_audit_config(CRED()) != 0)
		return (EPERM);

	model = get_udatamodel();
	STRUCT_INIT(info, model);

	if (len < STRUCT_SIZE(info))
		return (EOVERFLOW);

	if (copyin(info_p, STRUCT_BUF(info), STRUCT_SIZE(info)))
		return (EFAULT);

	type = STRUCT_FGET(info, ai_termid.at_type);
	if ((type != AU_IPv4) && (type != AU_IPv6))
		return (EINVAL);

	newcred = cralloc();
	if ((ainfo = crgetauinfo_modifiable(newcred)) == NULL) {
		crfree(newcred);
		return (EINVAL);
	}

	/* grab p_crlock and switch to new cred */
	p = curproc;
	mutex_enter(&p->p_crlock);
	crcopy_to(p->p_cred, newcred);
	p->p_cred = newcred;

	/* Set audit mask, id, termid and session id as specified */
	ainfo->ai_auid = STRUCT_FGET(info, ai_auid);
	ainfo->ai_mask = STRUCT_FGET(info, ai_mask);
#ifdef _LP64
	/* only convert to 64 bit if coming from a 32 bit binary */
	if (model == DATAMODEL_ILP32)
		ainfo->ai_termid.at_port =
		    DEVEXPL(STRUCT_FGET(info, ai_termid.at_port));
	else
		ainfo->ai_termid.at_port = STRUCT_FGET(info, ai_termid.at_port);
#else
	ainfo->ai_termid.at_port = STRUCT_FGET(info, ai_termid.at_port);
#endif
	ainfo->ai_termid.at_type = type;
	bzero(&ainfo->ai_termid.at_addr[0], sizeof (ainfo->ai_termid.at_addr));
	for (i = 0; i < (type/sizeof (int)); i++)
		ainfo->ai_termid.at_addr[i] =
		    STRUCT_FGET(info, ai_termid.at_addr[i]);

	if (ainfo->ai_termid.at_type == AU_IPv6 &&
	    IN6_IS_ADDR_V4MAPPED(((in6_addr_t *)ainfo->ai_termid.at_addr))) {
		ainfo->ai_termid.at_type = AU_IPv4;
		ainfo->ai_termid.at_addr[0] = ainfo->ai_termid.at_addr[3];
		ainfo->ai_termid.at_addr[1] = 0;
		ainfo->ai_termid.at_addr[2] = 0;
		ainfo->ai_termid.at_addr[3] = 0;
	}

	ainfo->ai_asid = STRUCT_FGET(info, ai_asid);

	/* unlock and broadcast the cred changes */
	mutex_exit(&p->p_crlock);
	crset(p, newcred);

	return (0);
}
/*
 * The audit system call. Trust what the user has sent down and save it
 * away in the audit file. User passes a complete audit record and its
 * length.  We will fill in the time stamp, check the header and the length
 * Put a trailer and a sequence token if policy requires.
 * In the future length might become size_t instead of an int.
 *
 * The call is valid whether or not AUDIT_PERZONE is set (think of
 * login to a zone).  When the local audit state (auk_auditstate) is
 * AUC_INIT_AUDIT, records are accepted even though auditd isn't
 * running.
 */
int
audit(caddr_t record, int length)
{
	char	c;
	int	count, l;
	token_t	*m, *n, *s, *ad;
	int	hdrlen, delta;
	adr_t	hadr;
	adr_t	sadr;
	int	size;	/* 0: 32 bit utility  1: 64 bit utility */
	int	host_len;
	size_t	zlen;
	au_kcontext_t	*kctx = GET_KCTX_PZ;

	/* if auditing not enabled, then don't generate an audit record */
	if (kctx->auk_auditstate != AUC_AUDITING &&
	    kctx->auk_auditstate != AUC_INIT_AUDIT)
		return (0);

	/* Only privileged processes can audit */
	if (secpolicy_audit_modify(CRED()) != 0)
		return (EPERM);

	/* Max user record size is 32K */
	if (length > AUDIT_REC_SIZE)
		return (E2BIG);

	/*
	 * The specified length must be at least as big as the smallest
	 * possible header token. Later after beginning to scan the
	 * header we'll determine the true minimum length according to
	 * the header type and attributes.
	 */
#define	AU_MIN_HEADER_LEN	(sizeof (char) + sizeof (int32_t) + \
	sizeof (char) + sizeof (short) + sizeof (short) + \
	(sizeof (int32_t) * 2))

	if (length < AU_MIN_HEADER_LEN)
		return (EINVAL);

	/* Read in user's audit record */
	count = length;
	m = n = s = ad = NULL;
	while (count) {
		m = au_getclr();
		if (!s)
			s = n = m;
		else {
			n->next_buf = m;
			n = m;
		}
		l = MIN(count, AU_BUFSIZE);
		if (copyin(record, memtod(m, caddr_t), (size_t)l)) {
			/* copyin failed release au_membuf */
			au_free_rec(s);
			return (EFAULT);
		}
		record += l;
		count -= l;
		m->len = (uchar_t)l;
	}

	/* Now attach the entire thing to ad */
	au_write((caddr_t *)&(ad), s);

	/* validate header token type. trust everything following it */
	adr_start(&hadr, memtod(s, char *));
	(void) adr_getchar(&hadr, &c);
	switch (c) {
	case AUT_HEADER32:
		/* size vers+event_ID+event_modifier fields */
		delta = 1 + 2 + 2;
		hdrlen = 1 + 4 + delta + (sizeof (int32_t) * 2);
		size = HEADER_SIZE32;
		break;

#ifdef _LP64
	case AUT_HEADER64:
		/* size vers+event_ID+event_modifier fields */
		delta = 1 + 2 + 2;
		hdrlen = 1 + 4 + delta + (sizeof (int64_t) * 2);
		size = HEADER_SIZE64;
		break;
#endif

	case AUT_HEADER32_EX:
		/*
		 * Skip over the length/version/type/mod fields and
		 * grab the host address type (length), then rewind.
		 * This is safe per the previous minimum length check.
		 */
		hadr.adr_now += 9;
		(void) adr_getint32(&hadr, &host_len);
		hadr.adr_now -= 9 + sizeof (int32_t);

		/* size: vers+event_ID+event_modifier+IP_type+IP_addr_array */
		delta = 1 + 2 + 2 + 4 + host_len;
		hdrlen = 1 + 4 + delta + (sizeof (int32_t) * 2);
		size = HEADER_SIZE32;
		break;

#ifdef _LP64
	case AUT_HEADER64_EX:
		/*
		 * Skip over the length/version/type/mod fields and grab
		 * the host address type (length), then rewind.
		 * This is safe per the previous minimum length check.
		 */
		hadr.adr_now += 9;
		(void) adr_getint32(&hadr, &host_len);
		hadr.adr_now -= 9 + sizeof (int32_t);

		/* size: vers+event_ID+event_modifier+IP_type+IP_addr_array */
		delta = 1 + 2 + 2 + 4 + host_len;
		hdrlen = 1 + 4 + delta + (sizeof (int64_t) * 2);
		size = HEADER_SIZE64;
		break;
#endif

	default:
		/* Header is wrong, reject message */
		au_free_rec(s);
		return (EINVAL);
	}

	if (length < hdrlen) {
		au_free_rec(s);
		return (0);
	}

	/* advance over header token length field */
	hadr.adr_now += 4;

	/* validate version */
	(void) adr_getchar(&hadr, &c);
	if (c != TOKEN_VERSION) {
		/* version is wrong, reject message */
		au_free_rec(s);
		return (EINVAL);
	}

	/* backup to header length field (including version field) */
	hadr.adr_now -= 5;

	/*
	 * add on the zonename token if policy AUDIT_ZONENAME is set
	 */
	if (kctx->auk_policy & AUDIT_ZONENAME) {
		zlen = au_zonename_length(NULL);
		if (zlen > 0) {
			length += zlen;
			m = au_to_zonename(zlen, NULL);
			(void) au_append_rec(ad, m, AU_PACK);
		}
	}
	/* Add an (optional) sequence token. NULL offset if none */
	if (kctx->auk_policy & AUDIT_SEQ) {
		/* get the sequnce token */
		m = au_to_seq();

		/* sequence token 5 bytes long */
		length += 5;

		/* link to audit record (i.e. don't pack the data) */
		(void) au_append_rec(ad, m, AU_LINK);

		/* advance to count field of token */
		adr_start(&sadr, memtod(m, char *));
		sadr.adr_now += 1;
	} else
Пример #11
0
/*
 * iscsi_net_close - shutdown socket connection and release resources
 */
static void
iscsi_net_close(void *socket)
{
	ksocket_t ks = (ksocket_t)socket;
	(void) ksocket_close(ks, CRED());
}
Пример #12
0
/*
 * iscsi_net_shutdown - shutdown socket connection
 */
static int
iscsi_net_shutdown(void *socket, int how)
{
	ksocket_t ks = (ksocket_t)socket;
	return (ksocket_shutdown(ks, how, CRED()));
}
Пример #13
0
/*
 * iscsi_net_getsockname -
 */
static int
iscsi_net_getsockname(void *socket, struct sockaddr *addr, socklen_t *addrlen)
{
	ksocket_t ks = (ksocket_t)socket;
	return (ksocket_getsockname(ks, addr, addrlen, CRED()));
}
Пример #14
0
int
cpr_alloc_statefile(int alloc_retry)
{
	register int rc = 0;
	char *str;

	/*
	 * Statefile size validation. If checkpoint the first time, disk blocks
	 * allocation will be done; otherwise, just do file size check.
	 * if statefile allocation is being retried, C_VP will be inited
	 */
	if (alloc_retry) {
		str = "\n-->Retrying statefile allocation...";
		if (cpr_debug & (CPR_DEBUG1 | CPR_DEBUG7))
			prom_printf(str);
		if (C_VP->v_type != VBLK)
			(void) VOP_DUMPCTL(C_VP, DUMP_FREE, NULL, NULL);
	} else {
		/*
		 * Open an exiting file for writing, the state file needs to be
		 * pre-allocated since we can't and don't want to do allocation
		 * during checkpoint (too much of the OS is disabled).
		 *    - do a preliminary size checking here, if it is too small,
		 *	allocate more space internally and retry.
		 *    - check the vp to make sure it's the right type.
		 */
		char *path = cpr_build_statefile_path();

		if (path == NULL)
			return (ENXIO);
		else if (rc = cpr_verify_statefile_path())
			return (rc);

		if (rc = vn_open(path, UIO_SYSSPACE,
		    FCREAT|FWRITE, 0600, &C_VP, CRCREAT, 0)) {
			cpr_err(CE_WARN, "cannot open statefile %s", path);
			return (rc);
		}
	}

	/*
	 * Only ufs and block special statefiles supported
	 */
	if (C_VP->v_type != VREG && C_VP->v_type != VBLK) {
		cpr_err(CE_CONT,
		    "Statefile must be regular file or block special file.");
		return (EACCES);
	}

	if (rc = cpr_statefile_ok(C_VP, alloc_retry))
		return (rc);

	if (C_VP->v_type != VBLK) {
		/*
		 * sync out the fs change due to the statefile reservation.
		 */
		(void) VFS_SYNC(C_VP->v_vfsp, 0, CRED());

		/*
		 * Validate disk blocks allocation for the state file.
		 * Ask the file system prepare itself for the dump operation.
		 */
		if (rc = VOP_DUMPCTL(C_VP, DUMP_ALLOC, NULL, NULL)) {
			cpr_err(CE_CONT, "Error allocating "
			    "blocks for cpr statefile.");
			return (rc);
		}
	}
	return (0);
}
Пример #15
0
zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
#endif
{
	cred_t *cr = CRED();
	struct inode *ip;
	int error;
	fstrans_cookie_t cookie;
	pathname_t *ppn = NULL;
	pathname_t pn;
	int zfs_flags = 0;
	zfs_sb_t *zsb = dentry->d_sb->s_fs_info;

	if (dlen(dentry) > ZFS_MAXNAMELEN)
		return (ERR_PTR(-ENAMETOOLONG));

	crhold(cr);
	cookie = spl_fstrans_mark();

	/* If we are a case insensitive fs, we need the real name */
	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
		zfs_flags = FIGNORECASE;
		pn_alloc(&pn);
		ppn = &pn;
	}

	error = -zfs_lookup(dir, dname(dentry), &ip, zfs_flags, cr, NULL, ppn);
	spl_fstrans_unmark(cookie);
	ASSERT3S(error, <=, 0);
	crfree(cr);

	spin_lock(&dentry->d_lock);
	dentry->d_time = jiffies;
#ifndef HAVE_S_D_OP
	d_set_d_op(dentry, &zpl_dentry_operations);
#endif /* HAVE_S_D_OP */
	spin_unlock(&dentry->d_lock);

	if (error) {
		/*
		 * If we have a case sensitive fs, we do not want to
		 * insert negative entries, so return NULL for ENOENT.
		 * Fall through if the error is not ENOENT. Also free memory.
		 */
		if (ppn) {
			pn_free(ppn);
			if (error == -ENOENT)
				return (NULL);
		}

		if (error == -ENOENT)
			return (d_splice_alias(NULL, dentry));
		else
			return (ERR_PTR(error));
	}

	/*
	 * If we are case insensitive, call the correct function
	 * to install the name.
	 */
	if (ppn) {
		struct dentry *new_dentry;
		struct qstr ci_name;

		ci_name.name = pn.pn_buf;
		ci_name.len = strlen(pn.pn_buf);
		new_dentry = d_add_ci(dentry, ip, &ci_name);
		pn_free(ppn);
		return (new_dentry);
	} else {
		return (d_splice_alias(ip, dentry));
	}
}
Пример #16
0
static uint32_t
smb_authsock_open(smb_user_t *user)
{
	smb_server_t *sv = user->u_server;
	ksocket_t so = NULL;
	uint32_t status;
	int rc;

	/*
	 * If the auth. service is busy, wait our turn.
	 * This may be frequent, so don't log.
	 */
	if ((rc = smb_threshold_enter(&sv->sv_ssetup_ct)) != 0)
		return (NT_STATUS_NO_LOGON_SERVERS);

	rc = ksocket_socket(&so, AF_UNIX, SOCK_STREAM, 0,
	    KSOCKET_SLEEP, CRED());
	if (rc != 0) {
		cmn_err(CE_NOTE, "smb_authsock_open: socket, rc=%d", rc);
		status = NT_STATUS_INSUFF_SERVER_RESOURCES;
		goto errout;
	}

	/*
	 * Set the send/recv timeouts.
	 */
	(void) ksocket_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO,
	    &smb_auth_send_tmo, sizeof (smb_auth_send_tmo), CRED());
	(void) ksocket_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO,
	    &smb_auth_recv_tmo, sizeof (smb_auth_recv_tmo), CRED());

	/*
	 * Connect to the smbd auth. service.
	 *
	 * Would like to set the connect timeout too, but there's
	 * apparently no easy way to do that for AF_UNIX.
	 */
	rc = ksocket_connect(so, (struct sockaddr *)&smbauth_sockname,
	    sizeof (smbauth_sockname), CRED());
	if (rc != 0) {
		DTRACE_PROBE1(error, int, rc);
		status = NT_STATUS_NETLOGON_NOT_STARTED;
		goto errout;
	}

	/* Note: u_authsock cleanup in smb_authsock_close() */
	mutex_enter(&user->u_mutex);
	if (user->u_authsock != NULL) {
		mutex_exit(&user->u_mutex);
		status = NT_STATUS_INTERNAL_ERROR;
		goto errout;
	}
	user->u_authsock = so;
	mutex_exit(&user->u_mutex);
	return (0);

errout:
	if (so != NULL)
		(void) ksocket_close(so, CRED());
	smb_threshold_exit(&sv->sv_ssetup_ct);

	return (status);
}
Пример #17
0
static int
copen(int startfd, char *fname, int filemode, int createmode)
{
	struct pathname pn;
	vnode_t *vp, *sdvp;
	file_t *fp, *startfp;
	enum vtype type;
	int error;
	int fd, dupfd;
	vnode_t *startvp;
	proc_t *p = curproc;
	uio_seg_t seg = UIO_USERSPACE;
	char *open_filename = fname;
	uint32_t auditing = AU_AUDITING();
	char startchar;

	if (filemode & (FSEARCH|FEXEC)) {
		/*
		 * Must be one or the other and neither FREAD nor FWRITE
		 * Must not be any of FAPPEND FCREAT FTRUNC FXATTR FXATTRDIROPEN
		 * XXX: Should these just be silently ignored?
		 */
		if ((filemode & (FREAD|FWRITE)) ||
		    (filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) ||
		    (filemode & (FAPPEND|FCREAT|FTRUNC|FXATTR|FXATTRDIROPEN)))
			return (set_errno(EINVAL));
	}

	if (startfd == AT_FDCWD) {
		/*
		 * Regular open()
		 */
		startvp = NULL;
	} else {
		/*
		 * We're here via openat()
		 */
		if (copyin(fname, &startchar, sizeof (char)))
			return (set_errno(EFAULT));

		/*
		 * if startchar is / then startfd is ignored
		 */
		if (startchar == '/')
			startvp = NULL;
		else {
			if ((startfp = getf(startfd)) == NULL)
				return (set_errno(EBADF));
			startvp = startfp->f_vnode;
			VN_HOLD(startvp);
			releasef(startfd);
		}
	}

	/*
	 * Handle __openattrdirat() requests
	 */
	if (filemode & FXATTRDIROPEN) {
		if (auditing && startvp != NULL)
			audit_setfsat_path(1);
		if (error = lookupnameat(fname, seg, FOLLOW,
		    NULLVPP, &vp, startvp))
			return (set_errno(error));
		if (startvp != NULL)
			VN_RELE(startvp);

		startvp = vp;
	}

	/*
	 * Do we need to go into extended attribute space?
	 */
	if (filemode & FXATTR) {
		if (startfd == AT_FDCWD) {
			if (copyin(fname, &startchar, sizeof (char)))
				return (set_errno(EFAULT));

			/*
			 * If startchar == '/' then no extended attributes
			 * are looked up.
			 */
			if (startchar == '/') {
				startvp = NULL;
			} else {
				mutex_enter(&p->p_lock);
				startvp = PTOU(p)->u_cdir;
				VN_HOLD(startvp);
				mutex_exit(&p->p_lock);
			}
		}

		/*
		 * Make sure we have a valid extended attribute request.
		 * We must either have a real fd or AT_FDCWD and a relative
		 * pathname.
		 */
		if (startvp == NULL) {
			goto noxattr;
		}
	}

	if (filemode & (FXATTR|FXATTRDIROPEN)) {
		vattr_t vattr;

		if (error = pn_get(fname, UIO_USERSPACE, &pn)) {
			goto out;
		}

		/*
		 * In order to access hidden attribute directory the
		 * user must be able to stat() the file
		 */
		vattr.va_mask = AT_ALL;
		if (error = VOP_GETATTR(startvp, &vattr, 0, CRED(), NULL)) {
			pn_free(&pn);
			goto out;
		}

		if ((startvp->v_vfsp->vfs_flag & VFS_XATTR) != 0 ||
		    vfs_has_feature(startvp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
			error = VOP_LOOKUP(startvp, "", &sdvp, &pn,
			    (filemode & FXATTRDIROPEN) ? LOOKUP_XATTR :
			    LOOKUP_XATTR|CREATE_XATTR_DIR, rootvp, CRED(),
			    NULL, NULL, NULL);
		} else {
			error = EINVAL;
		}

		/*
		 * For __openattrdirat() use "." as filename to open
		 * as part of vn_openat()
		 */
		if (error == 0 && (filemode & FXATTRDIROPEN)) {
			open_filename = ".";
			seg = UIO_SYSSPACE;
		}

		pn_free(&pn);
		if (error != 0)
			goto out;

		VN_RELE(startvp);
		startvp = sdvp;
	}

noxattr:
	if ((filemode & (FREAD|FWRITE|FSEARCH|FEXEC|FXATTRDIROPEN)) != 0) {
		if ((filemode & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
			filemode &= ~FNDELAY;
		error = falloc((vnode_t *)NULL, filemode, &fp, &fd);
		if (error == 0) {
			if (auditing && startvp != NULL)
				audit_setfsat_path(1);
			/*
			 * Last arg is a don't-care term if
			 * !(filemode & FCREAT).
			 */
			error = vn_openat(open_filename, seg, filemode,
			    (int)(createmode & MODEMASK),
			    &vp, CRCREAT, PTOU(curproc)->u_cmask,
			    startvp, fd);

			if (startvp != NULL)
				VN_RELE(startvp);
			if (error == 0) {
				if ((vp->v_flag & VDUP) == 0) {
					fp->f_vnode = vp;
					mutex_exit(&fp->f_tlock);
					/*
					 * We must now fill in the slot
					 * falloc reserved.
					 */
					setf(fd, fp);
					return (fd);
				} else {
					/*
					 * Special handling for /dev/fd.
					 * Give up the file pointer
					 * and dup the indicated file descriptor
					 * (in v_rdev). This is ugly, but I've
					 * seen worse.
					 */
					unfalloc(fp);
					dupfd = getminor(vp->v_rdev);
					type = vp->v_type;
					mutex_enter(&vp->v_lock);
					vp->v_flag &= ~VDUP;
					mutex_exit(&vp->v_lock);
					VN_RELE(vp);
					if (type != VCHR)
						return (set_errno(EINVAL));
					if ((fp = getf(dupfd)) == NULL) {
						setf(fd, NULL);
						return (set_errno(EBADF));
					}
					mutex_enter(&fp->f_tlock);
					fp->f_count++;
					mutex_exit(&fp->f_tlock);
					setf(fd, fp);
					releasef(dupfd);
				}
				return (fd);
			} else {
				setf(fd, NULL);
				unfalloc(fp);
				return (set_errno(error));
			}
		}
	} else {
		error = EINVAL;
	}
out:
	if (startvp != NULL)
		VN_RELE(startvp);
	return (set_errno(error));
}
Пример #18
0
/*
 * msgctl system call.
 *
 * gets q lock (via ipc_lookup), releases before return.
 * may call users of msg_lock
 */
static int
msgctl(int msgid, int cmd, void *arg)
{
	STRUCT_DECL(msqid_ds, ds);		/* SVR4 queue work area */
	kmsqid_t		*qp;		/* ptr to associated q */
	int			error;
	struct	cred		*cr;
	model_t	mdl = get_udatamodel();
	struct msqid_ds64	ds64;
	kmutex_t		*lock;
	proc_t			*pp = curproc;

	STRUCT_INIT(ds, mdl);
	cr = CRED();

	/*
	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
	 */
	switch (cmd) {
	case IPC_SET:
		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
			return (set_errno(EFAULT));
		break;

	case IPC_SET64:
		if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
			return (set_errno(EFAULT));
		break;

	case IPC_RMID:
		if (error = ipc_rmid(msq_svc, msgid, cr))
			return (set_errno(error));
		return (0);
	}

	/*
	 * get msqid_ds for this msgid
	 */
	if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
		return (set_errno(EINVAL));

	switch (cmd) {
	case IPC_SET:
		if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
		    secpolicy_ipc_config(cr) != 0) {
			mutex_exit(lock);
			return (set_errno(EPERM));
		}
		if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
		    &STRUCT_BUF(ds)->msg_perm, mdl)) {
			mutex_exit(lock);
			return (set_errno(error));
		}
		qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
		qp->msg_ctime = gethrestime_sec();
		break;

	case IPC_STAT:
		if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
			mutex_exit(lock);
			return (set_errno(error));
		}

		if (qp->msg_rcv_cnt)
			qp->msg_perm.ipc_mode |= MSG_RWAIT;
		if (qp->msg_snd_cnt)
			qp->msg_perm.ipc_mode |= MSG_WWAIT;
		ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
		STRUCT_FSETP(ds, msg_first, NULL); 	/* kernel addr */
		STRUCT_FSETP(ds, msg_last, NULL);
		STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
		STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
		STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
		STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
		STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
		STRUCT_FSET(ds, msg_stime, qp->msg_stime);
		STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
		STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
		break;

	case IPC_SET64:
		mutex_enter(&pp->p_lock);
		if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
		    secpolicy_ipc_config(cr) != 0 &&
		    rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
		    ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
			mutex_exit(&pp->p_lock);
			mutex_exit(lock);
			return (set_errno(EPERM));
		}
		mutex_exit(&pp->p_lock);
		if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
		    &ds64.msgx_perm)) {
			mutex_exit(lock);
			return (set_errno(error));
		}
		qp->msg_qbytes = ds64.msgx_qbytes;
		qp->msg_ctime = gethrestime_sec();
		break;

	case IPC_STAT64:
		if (qp->msg_rcv_cnt)
			qp->msg_perm.ipc_mode |= MSG_RWAIT;
		if (qp->msg_snd_cnt)
			qp->msg_perm.ipc_mode |= MSG_WWAIT;
		ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
		qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
		ds64.msgx_cbytes = qp->msg_cbytes;
		ds64.msgx_qnum = qp->msg_qnum;
		ds64.msgx_qbytes = qp->msg_qbytes;
		ds64.msgx_lspid = qp->msg_lspid;
		ds64.msgx_lrpid = qp->msg_lrpid;
		ds64.msgx_stime = qp->msg_stime;
		ds64.msgx_rtime = qp->msg_rtime;
		ds64.msgx_ctime = qp->msg_ctime;
		break;

	default:
		mutex_exit(lock);
		return (set_errno(EINVAL));
	}

	mutex_exit(lock);

	/*
	 * Do copyout last (after releasing mutex).
	 */
	switch (cmd) {
	case IPC_STAT:
		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
			return (set_errno(EFAULT));
		break;

	case IPC_STAT64:
		if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
			return (set_errno(EFAULT));
		break;
	}

	return (0);
}
Пример #19
0
void *
osi_UfsOpen(afs_dcache_id_t *ainode)
{
#ifdef AFS_CACHE_VNODE_PATH
    struct vnode *vp;
#else
    struct inode *ip;
#endif
    struct osi_file *afile = NULL;
    afs_int32 code = 0;
    int dummy;
#ifdef AFS_CACHE_VNODE_PATH
    char namebuf[1024];
    struct pathname lookpn;
#endif
    struct osi_stat tstat;
    afile = osi_AllocSmallSpace(sizeof(struct osi_file));
    AFS_GUNLOCK();

    /*
     * AFS_CACHE_VNODE_PATH can be used with any file system, including ZFS or tmpfs.
     * The ainode is not an inode number but a path.
     */
#ifdef AFS_CACHE_VNODE_PATH
    /* Can not use vn_open or lookupname, they use user's CRED()
     * We need to run as root So must use low level lookuppnvp
     * assume fname starts with /
     */

    code = pn_get_buf(ainode->ufs, AFS_UIOSYS, &lookpn, namebuf, sizeof(namebuf));
    if (code != 0) 
        osi_Panic("UfsOpen: pn_get_buf failed %ld %s", code, ainode->ufs);
 
	VN_HOLD(rootdir); /* released in loopuppnvp */
	code = lookuppnvp(&lookpn, NULL, FOLLOW, NULL, &vp, 
           rootdir, rootdir, afs_osi_credp);
    if (code != 0)  
        osi_Panic("UfsOpen: lookuppnvp failed %ld %s", code, ainode->ufs);
	
#ifdef AFS_SUN511_ENV
    code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp, NULL);
#else
    code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp);
#endif

    if (code != 0)
        osi_Panic("UfsOpen: VOP_OPEN failed %ld %s", code, ainode->ufs);

#else
    code =
	igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, ainode->ufs, &ip,
		  CRED(), &dummy);
#endif
    AFS_GLOCK();
    if (code) {
	osi_FreeSmallSpace(afile);
	osi_Panic("UfsOpen: igetinode failed %ld %s", code, ainode->ufs);
    }
#ifdef AFS_CACHE_VNODE_PATH
    afile->vnode = vp;
    code = afs_osi_Stat(afile, &tstat);
    afile->size = tstat.size;
#else
    afile->vnode = ITOV(ip);
    afile->size = VTOI(afile->vnode)->i_size;
#endif
    afile->offset = 0;
    afile->proc = (int (*)())0;
    return (void *)afile;
}
Пример #20
0
/* ONC_PLUS EXTRACT START */
int
flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
{
	struct vattr	vattr;
	int	error;
	u_offset_t start, end;

	/*
	 * Determine the starting point of the request
	 */
	switch (flp->l_whence) {
	case 0:		/* SEEK_SET */
		start = (u_offset_t)flp->l_start;
		if (start > max)
			return (EINVAL);
		break;
	case 1:		/* SEEK_CUR */
		if (flp->l_start > (max - offset))
			return (EOVERFLOW);
		start = (u_offset_t)(flp->l_start + offset);
		if (start > max)
			return (EINVAL);
		break;
	case 2:		/* SEEK_END */
		vattr.va_mask = AT_SIZE;
		if (error = VOP_GETATTR(vp, &vattr, 0, CRED()))
			return (error);
		if (flp->l_start > (max - (offset_t)vattr.va_size))
			return (EOVERFLOW);
		start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
		if (start > max)
			return (EINVAL);
		break;
	default:
		return (EINVAL);
	}

	/*
	 * Determine the range covered by the request.
	 */
	if (flp->l_len == 0)
		end = MAXEND;
	else if ((offset_t)flp->l_len > 0) {
		if (flp->l_len > (max - start + 1))
			return (EOVERFLOW);
		end = (u_offset_t)(start + (flp->l_len - 1));
		ASSERT(end <= max);
	} else {
		/*
		 * Negative length; why do we even allow this ?
		 * Because this allows easy specification of
		 * the last n bytes of the file.
		 */
		end = start;
		start += (u_offset_t)flp->l_len;
		(start)++;
		if (start > max)
			return (EINVAL);
		ASSERT(end <= max);
	}
	ASSERT(start <= max);
	if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
	    end == (offset_t)max) {
		flp->l_len = 0;
	}
	if (start  > end)
		return (EINVAL);
	return (0);
}
Пример #21
0
int
corectl(int subcode, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
{
	int error = 0;
	proc_t *p;
	refstr_t *rp;
	size_t size;
	char *path;
	core_content_t content = CC_CONTENT_INVALID;
	struct core_globals *cg;
	zone_t *zone = curproc->p_zone;

	cg = zone_getspecific(core_zone_key, zone);
	ASSERT(cg != NULL);

	switch (subcode) {
	case CC_SET_OPTIONS:
		if ((error = secpolicy_coreadm(CRED())) == 0) {
			if (arg1 & ~CC_OPTIONS)
				error = EINVAL;
			else
				cg->core_options = (uint32_t)arg1;
		}
		break;

	case CC_GET_OPTIONS:
		return (cg->core_options);

	case CC_GET_GLOBAL_PATH:
	case CC_GET_DEFAULT_PATH:
	case CC_GET_PROCESS_PATH:
		if (subcode == CC_GET_GLOBAL_PATH) {
			mutex_enter(&cg->core_lock);
			if ((rp = cg->core_file) != NULL)
				refstr_hold(rp);
			mutex_exit(&cg->core_lock);
		} else if (subcode == CC_GET_DEFAULT_PATH) {
			rp = corectl_path_value(cg->core_default_path);
		} else {
			rp = NULL;
			mutex_enter(&pidlock);
			if ((p = prfind((pid_t)arg3)) == NULL ||
			    p->p_stat == SIDL) {
				mutex_exit(&pidlock);
				error = ESRCH;
			} else {
				mutex_enter(&p->p_lock);
				mutex_exit(&pidlock);
				mutex_enter(&p->p_crlock);
				if (!hasprocperm(p->p_cred, CRED()))
					error = EPERM;
				else if (p->p_corefile != NULL)
					rp = corectl_path_value(p->p_corefile);
				mutex_exit(&p->p_crlock);
				mutex_exit(&p->p_lock);
			}
		}
		if (rp == NULL) {
			if (error == 0 && suword8((void *)arg1, 0))
				error = EFAULT;
		} else {
			error = copyoutstr(refstr_value(rp), (char *)arg1,
			    (size_t)arg2, NULL);
			refstr_rele(rp);
		}
		break;

	case CC_SET_GLOBAL_PATH:
	case CC_SET_DEFAULT_PATH:
		if ((error = secpolicy_coreadm(CRED())) != 0)
			break;

		/* FALLTHROUGH */
	case CC_SET_PROCESS_PATH:
		if ((size = MIN((size_t)arg2, MAXPATHLEN)) == 0) {
			error = EINVAL;
			break;
		}
		path = kmem_alloc(size, KM_SLEEP);
		error = copyinstr((char *)arg1, path, size, NULL);
		if (error == 0) {
			if (subcode == CC_SET_PROCESS_PATH) {
				error = set_proc_info((pid_t)arg3, path, 0);
			} else if (subcode == CC_SET_DEFAULT_PATH) {
				corectl_path_set(cg->core_default_path, path);
			} else if (*path != '\0' && *path != '/') {
				error = EINVAL;
			} else {
				refstr_t *nrp = refstr_alloc(path);

				mutex_enter(&cg->core_lock);
				rp = cg->core_file;
				if (*path == '\0')
					cg->core_file = NULL;
				else
					refstr_hold(cg->core_file = nrp);
				mutex_exit(&cg->core_lock);

				if (rp != NULL)
					refstr_rele(rp);

				refstr_rele(nrp);
			}
		}
		kmem_free(path, size);
		break;

	case CC_SET_GLOBAL_CONTENT:
	case CC_SET_DEFAULT_CONTENT:
		if ((error = secpolicy_coreadm(CRED())) != 0)
			break;

		/* FALLTHROUGH */
	case CC_SET_PROCESS_CONTENT:
		error = copyin((void *)arg1, &content, sizeof (content));
		if (error != 0)
			break;

		/*
		 * If any unknown bits are set, don't let this charade
		 * continue.
		 */
		if (content & ~CC_CONTENT_ALL) {
			error = EINVAL;
			break;
		}

		if (subcode == CC_SET_PROCESS_CONTENT) {
			error = set_proc_info((pid_t)arg2, NULL, content);
		} else if (subcode == CC_SET_DEFAULT_CONTENT) {
			corectl_content_set(cg->core_default_content, content);
		} else {
			mutex_enter(&cg->core_lock);
			cg->core_content = content;
			mutex_exit(&cg->core_lock);
		}

		break;

	case CC_GET_GLOBAL_CONTENT:
		content = cg->core_content;
		error = copyout(&content, (void *)arg1, sizeof (content));
		break;

	case CC_GET_DEFAULT_CONTENT:
		content = corectl_content_value(cg->core_default_content);
		error = copyout(&content, (void *)arg1, sizeof (content));
		break;

	case CC_GET_PROCESS_CONTENT:
		mutex_enter(&pidlock);
		if ((p = prfind((pid_t)arg2)) == NULL || p->p_stat == SIDL) {
			mutex_exit(&pidlock);
			error = ESRCH;
			break;
		}

		mutex_enter(&p->p_lock);
		mutex_exit(&pidlock);
		mutex_enter(&p->p_crlock);
		if (!hasprocperm(p->p_cred, CRED()))
			error = EPERM;
		else if (p->p_content == NULL)
			content = CC_CONTENT_NONE;
		else
			content = corectl_content_value(p->p_content);
		mutex_exit(&p->p_crlock);
		mutex_exit(&p->p_lock);

		if (error == 0)
			error = copyout(&content, (void *)arg1,
			    sizeof (content));
		break;

	default:
		error = EINVAL;
		break;
	}

	if (error)
		return (set_errno(error));
	return (0);
}
Пример #22
0
int
rsm_get_controller(const char *name, uint_t number,
    rsm_controller_object_t *controller, uint_t version)
{
	rsmops_ctrl_t *p_ctrl;
	rsmops_drv_t *p_drv;
	vnode_t *vp;
	int error;
	int (*rsm_get_controller_handler)
	    (const char *name, uint_t number,
	    rsm_controller_object_t *pcontroller, uint_t version);

	mutex_enter(&rsmops_lock);

	/* check if the controller is already registered */
	if ((p_ctrl = find_rsmpi_controller(name, number)) == NULL) {
		/*
		 * controller is not registered.  We should try to load it
		 * First check if the driver is registered
		 */
		if ((p_drv = find_rsmpi_driver(name)) == NULL) {
			/* Cannot find the driver.  Try to load it */
			mutex_exit(&rsmops_lock);
			if ((error = modload("drv", (char *)name)) == -1) {
				return (RSMERR_CTLR_NOT_PRESENT);
			}
			mutex_enter(&rsmops_lock);
			if ((p_drv = find_rsmpi_driver(name)) == NULL) {
				mutex_exit(&rsmops_lock);
				/*
				 * Cannot find yet - maybe the driver we loaded
				 * was not a RSMPI driver at all.  We'll just
				 * fail this call.
				 */
				return (RSMERR_CTLR_NOT_PRESENT);
			}
		}
		ASSERT(p_drv);
		p_ctrl = find_rsmpi_controller(name, number);
		if (p_ctrl == NULL) {
			/*
			 * controller is not registered.
			 * try to do a VOP_OPEN to force it to get registered
			 */
			mutex_exit(&rsmops_lock);
			vp = rsmops_device_open(name, number);
			mutex_enter(&rsmops_lock);
			if (vp != NULL) {
				(void) VOP_CLOSE(vp, FREAD|FWRITE, 0, 0,
				    CRED(), NULL);
				VN_RELE(vp);
			}
			p_ctrl = find_rsmpi_controller(name, number);
			if (p_ctrl == NULL) {
				mutex_exit(&rsmops_lock);
				return (RSMERR_CTLR_NOT_PRESENT);
			}
		}
		ASSERT(p_ctrl);
	} else {
		p_drv = p_ctrl->p_drv;
	}
	ASSERT(p_drv);
	ASSERT(p_drv == p_ctrl->p_drv);

	rsm_get_controller_handler = p_drv->drv.rsm_get_controller_handler;
	/*
	 * Increase the refcnt right now, so that attempts to deregister
	 * while we are using this entry will fail
	 */
	p_ctrl->refcnt++;
	mutex_exit(&rsmops_lock);

	error = (*rsm_get_controller_handler)(name, number, controller,
	    version);
	if (error != RSM_SUCCESS) {
		/* We failed - drop the refcnt back */
		mutex_enter(&rsmops_lock);
		/*
		 * Even though we had released the global lock, we can
		 * guarantee that p_ctrl is still meaningful (and has not
		 * been deregistered, freed whatever) because we were holding
		 * refcnt on it.  So, it is okay to just use p_ctrl here
		 * after re-acquiring the global lock
		 */
		p_ctrl->refcnt--;
		mutex_exit(&rsmops_lock);
	} else {
		/*
		 * Initialize the controller handle field
		 */
		mutex_enter(&rsmops_lock);
		if ((p_ctrl = find_rsmpi_controller(name, number)) == NULL) {
			mutex_exit(&rsmops_lock);
			return (RSMERR_CTLR_NOT_PRESENT);
		}

		p_ctrl->handle = controller->handle;
		mutex_exit(&rsmops_lock);
	}
	return (error);
}
Пример #23
0
void
zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
{
	dmu_buf_t *db = sa_get_db(hdl);
	znode_t *zp = sa_get_userdata(hdl);
	zfsvfs_t *zfsvfs = ZTOZSB(zp);
	int count = 0;
	sa_bulk_attr_t *bulk, *sa_attrs;
	zfs_acl_locator_cb_t locate = { 0 };
	uint64_t uid, gid, mode, rdev, xattr, parent, tmp_gen;
	uint64_t crtime[2], mtime[2], ctime[2], atime[2];
	uint64_t links;
	zfs_acl_phys_t znode_acl;
	char scanstamp[AV_SCANSTAMP_SZ];
	boolean_t drop_lock = B_FALSE;

	/*
	 * No upgrade if ACL isn't cached
	 * since we won't know which locks are held
	 * and ready the ACL would require special "locked"
	 * interfaces that would be messy
	 */
	if (zp->z_acl_cached == NULL || S_ISLNK(ZTOI(zp)->i_mode))
		return;

	/*
	 * If the z_lock is held and we aren't the owner
	 * the just return since we don't want to deadlock
	 * trying to update the status of z_is_sa.  This
	 * file can then be upgraded at a later time.
	 *
	 * Otherwise, we know we are doing the
	 * sa_update() that caused us to enter this function.
	 */
	if (mutex_owner(&zp->z_lock) != curthread) {
		if (mutex_tryenter(&zp->z_lock) == 0)
			return;
		else
			drop_lock = B_TRUE;
	}

	/* First do a bulk query of the attributes that aren't cached */
	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * 20, KM_SLEEP);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
	    &znode_acl, 88);

	if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) {
		kmem_free(bulk, sizeof (sa_bulk_attr_t) * 20);
		goto done;
	}

	/*
	 * While the order here doesn't matter its best to try and organize
	 * it is such a way to pick up an already existing layout number
	 */
	count = 0;
	sa_attrs = kmem_zalloc(sizeof (sa_bulk_attr_t) * 20, KM_SLEEP);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL,
	    &zp->z_size, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs),
	    NULL, &tmp_gen, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs),
	    NULL, &parent, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL,
	    &zp->z_pflags, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL,
	    &atime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL,
	    &mtime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL,
	    &ctime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
	    &crtime, 16);
	links = ZTOI(zp)->i_nlink;
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
	    &links, 8);
	if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
		    &rdev, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
	    &zp->z_acl_cached->z_acl_count, 8);

	if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
		zfs_acl_xform(zp, zp->z_acl_cached, CRED());

	locate.cb_aclp = zp->z_acl_cached;
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs),
	    zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);

	if (xattr)
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs),
		    NULL, &xattr, 8);

	/* if scanstamp then add scanstamp */

	if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
		bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
		    scanstamp, AV_SCANSTAMP_SZ);
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs),
		    NULL, scanstamp, AV_SCANSTAMP_SZ);
		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
	}

	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
	    count, tx) == 0);
	if (znode_acl.z_acl_extern_obj)
		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
		    znode_acl.z_acl_extern_obj, tx));

	zp->z_is_sa = B_TRUE;
	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * 20);
	kmem_free(bulk, sizeof (sa_bulk_attr_t) * 20);
done:
	if (drop_lock)
		mutex_exit(&zp->z_lock);
}
Пример #24
0
static int
cfutimesat(int fd, char *fname, int nmflag, vattr_t *vap, int flags, int follow)
{
	file_t *fp;
	vnode_t *startvp, *vp;
	int error;
	char startchar;

	if (fd == AT_FDCWD && fname == NULL)
		return (set_errno(EFAULT));

	if (nmflag == 1 || (nmflag == 2 && fname != NULL)) {
		if (copyin(fname, &startchar, sizeof (char)))
			return (set_errno(EFAULT));
	} else {
		startchar = '\0';
	}

	if (fd == AT_FDCWD) {
		startvp = NULL;
	} else {
		/*
		 * is this absolute path?
		 */
		if (startchar != '/') {
			if ((fp = getf(fd)) == NULL)
				return (set_errno(EBADF));
			startvp = fp->f_vnode;
			VN_HOLD(startvp);
			releasef(fd);
		} else {
			startvp = NULL;
		}
	}

	if (audit_active)
		audit_setfsat_path(1);

	if ((nmflag == 1) || ((nmflag == 2) && (fname != NULL))) {
		if ((error = lookupnameat(fname, UIO_USERSPACE,
		    follow, NULLVPP, &vp, startvp)) != 0) {
			if (startvp != NULL)
				VN_RELE(startvp);
			return (set_errno(error));
		}
	} else {
		vp = startvp;
		VN_HOLD(vp);
	}

	if (startvp != NULL) {
		VN_RELE(startvp);
	}

	if (vn_is_readonly(vp)) {
		error = EROFS;
	} else {
		error = VOP_SETATTR(vp, vap, flags, CRED(), NULL);
	}

	VN_RELE(vp);
	if (error != 0)
		return (set_errno(error));
	return (0);
}
Пример #25
0
static int
in_sync_sys(char *pathname, uint_t flags)
{
	struct vnode *vp;
	int error;

	/*
	 * We must have sufficient privilege to do this, since we lock critical
	 * data structures whilst we're doing it ..
	 */
	if ((error = secpolicy_sys_devices(CRED())) != 0)
		return (set_errno(error));

	if (flags != INST_SYNC_ALWAYS && flags != INST_SYNC_IF_REQUIRED)
		return (set_errno(EINVAL));

	/*
	 * Only one process is allowed to get the state of the instance
	 * number assignments on the system at any given time.
	 */
	e_ddi_enter_instance();

	/*
	 * Recreate the instance file only if the device tree has changed
	 * or if the caller explicitly requests so.
	 */
	if (e_ddi_instance_is_clean() && flags != INST_SYNC_ALWAYS) {
		error = EALREADY;
		goto end;
	}

	/*
	 * Create an instance file for writing, giving it a mode that
	 * will only permit reading.  Note that we refuse to overwrite
	 * an existing file.
	 */
	if ((error = vn_open(pathname, UIO_USERSPACE,
	    FCREAT, 0444, &vp, CRCREAT, 0)) != 0) {
		if (error == EISDIR)
			error = EACCES;	/* SVID compliance? */
		goto end;
	}

	/*
	 * So far so good.  We're singly threaded, the vnode is beckoning
	 * so let's get on with it.  Any error, and we just give up and
	 * hand the first error we get back to userland.
	 */
	error = in_write_instance(vp);

	/*
	 * If there was any sort of error, we deliberately go and
	 * remove the file we just created so that any attempts to
	 * use it will quickly fail.
	 */
	if (error)
		(void) vn_remove(pathname, UIO_USERSPACE, RMFILE);
	else
		e_ddi_instance_set_clean();
end:
	e_ddi_exit_instance();
	return (error ? set_errno(error) : 0);
}
Пример #26
0
int
signotify(int cmd, siginfo_t *siginfo, signotify_id_t *sn_id)
{
	k_siginfo_t	info;
	signotify_id_t	id;
	proc_t		*p;
	proc_t		*cp = curproc;
	signotifyq_t	*snqp;
	struct cred	*cr;
	sigqueue_t	*sqp;
	sigqhdr_t	*sqh;
	u_longlong_t	sid;
	model_t 	datamodel = get_udatamodel();

	if (copyin(sn_id, &id, sizeof (signotify_id_t)))
		return (set_errno(EFAULT));

	if (id.sn_index >= _SIGNOTIFY_MAX || id.sn_index < 0)
		return (set_errno(EINVAL));

	switch (cmd) {
	case SN_PROC:
		/* get snid for the given user address of signotifyid_t */
		sid = get_sigid(cp, (caddr_t)sn_id);

		if (id.sn_pid > 0) {
			mutex_enter(&pidlock);
			if ((p = prfind(id.sn_pid)) != NULL) {
				mutex_enter(&p->p_lock);
				if (p->p_signhdr != NULL) {
					snqp = SIGN_PTR(p, id.sn_index);
					if (snqp->sn_snid == sid) {
						mutex_exit(&p->p_lock);
						mutex_exit(&pidlock);
						return (set_errno(EBUSY));
					}
				}
				mutex_exit(&p->p_lock);
			}
			mutex_exit(&pidlock);
		}

		if (copyin_siginfo(datamodel, siginfo, &info))
			return (set_errno(EFAULT));

		/* The si_code value must indicate the signal will be queued */
		if (!sigwillqueue(info.si_signo, info.si_code))
			return (set_errno(EINVAL));

		if (cp->p_signhdr == NULL) {
			/* Allocate signotify pool first time */
			sqh = sigqhdralloc(sizeof (signotifyq_t),
			    _SIGNOTIFY_MAX);
			mutex_enter(&cp->p_lock);
			if (cp->p_signhdr == NULL) {
				/* hang the pool head on proc */
				cp->p_signhdr = sqh;
			} else {
				/* another lwp allocated the pool, free ours */
				sigqhdrfree(sqh);
			}
		} else {
			mutex_enter(&cp->p_lock);
		}

		sqp = sigqalloc(cp->p_signhdr);
		if (sqp == NULL) {
			mutex_exit(&cp->p_lock);
			return (set_errno(EAGAIN));
		}
		cr = CRED();
		sqp->sq_info = info;
		sqp->sq_info.si_pid = cp->p_pid;
		sqp->sq_info.si_ctid = PRCTID(cp);
		sqp->sq_info.si_zoneid = getzoneid();
		sqp->sq_info.si_uid = crgetruid(cr);

		/* fill the signotifyq_t fields */
		((signotifyq_t *)sqp)->sn_snid = sid;

		mutex_exit(&cp->p_lock);

		/* complete the signotify_id_t fields */
		id.sn_index = (signotifyq_t *)sqp - SIGN_PTR(cp, 0);
		id.sn_pid = cp->p_pid;

		break;

	case SN_CANCEL:
	case SN_SEND:

		sid =  get_sigid(cp, (caddr_t)sn_id);
		mutex_enter(&pidlock);
		if ((id.sn_pid <= 0) || ((p = prfind(id.sn_pid)) == NULL)) {
			mutex_exit(&pidlock);
			return (set_errno(EINVAL));
		}
		mutex_enter(&p->p_lock);
		mutex_exit(&pidlock);

		if (p->p_signhdr == NULL) {
			mutex_exit(&p->p_lock);
			return (set_errno(EINVAL));
		}

		snqp = SIGN_PTR(p, id.sn_index);

		if (snqp->sn_snid == 0) {
			mutex_exit(&p->p_lock);
			return (set_errno(EINVAL));
		}

		if (snqp->sn_snid != sid) {
			mutex_exit(&p->p_lock);
			return (set_errno(EINVAL));
		}

		snqp->sn_snid = 0;

		/* cmd == SN_CANCEL or signo == 0 (SIGEV_NONE) */
		if (((sigqueue_t *)snqp)->sq_info.si_signo <= 0)
			cmd = SN_CANCEL;

		sigqsend(cmd, p, 0, (sigqueue_t *)snqp);
		mutex_exit(&p->p_lock);

		id.sn_pid = 0;
		id.sn_index = 0;

		break;

	default :
		return (set_errno(EINVAL));
	}

	if (copyout(&id, sn_id, sizeof (signotify_id_t)))
		return (set_errno(EFAULT));

	return (0);
}
Пример #27
0
/*
 * Perform process accounting functions.
 */
int
sysacct(char *fname)
{
	struct acct_globals *ag;
	struct vnode *vp;
	int error = 0;

	if (secpolicy_acct(CRED()) != 0)
		return (set_errno(EPERM));

	ag = zone_getspecific(acct_zone_key, curproc->p_zone);
	ASSERT(ag != NULL);

	if (fname == NULL) {
		/*
		 * Close the file and stop accounting.
		 */
		mutex_enter(&ag->aclock);
		vp = ag->acctvp;
		ag->acctvp = NULL;
		mutex_exit(&ag->aclock);
		if (vp) {
			error = VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(),
			    NULL);
			VN_RELE(vp);
		}
		return (error == 0 ? 0 : set_errno(error));
	}

	/*
	 * Either (a) open a new file and begin accounting -or- (b)
	 * switch accounting from an old to a new file.
	 *
	 * (Open the file without holding aclock in case it
	 * sleeps (holding the lock prevents process exit).)
	 */
	if ((error = vn_open(fname, UIO_USERSPACE, FWRITE,
	    0, &vp, (enum create)0, 0)) != 0) {
		/* SVID  compliance */
		if (error == EISDIR)
			error = EACCES;
		return (set_errno(error));
	}

	if (vp->v_type != VREG) {
		error = EACCES;
	} else {
		mutex_enter(&acct_list_lock);
		if (acct_find(vp, B_FALSE)) {
			error = EBUSY;
		} else {
			mutex_enter(&ag->aclock);
			if (ag->acctvp) {
				vnode_t *oldvp;

				/*
				 * close old acctvp, and point acct()
				 * at new file by swapping vp and acctvp
				 */
				oldvp = ag->acctvp;
				ag->acctvp = vp;
				vp = oldvp;
			} else {
				/*
				 * no existing file, start accounting ..
				 */
				ag->acctvp = vp;
				vp = NULL;
			}
			mutex_exit(&ag->aclock);
		}
		mutex_exit(&acct_list_lock);
	}

	if (vp) {
		(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
		VN_RELE(vp);
	}
	return (error == 0 ? 0 : set_errno(error));
}
Пример #28
0
static int
zpl_xattr_set(struct inode *ip, const char *name, const void *value,
    size_t size, int flags)
{
	znode_t *zp = ITOZ(ip);
	zfs_sb_t *zsb = ZTOZSB(zp);
	cred_t *cr = CRED();
	fstrans_cookie_t cookie;
	int where;
	int error;

	crhold(cr);
	cookie = spl_fstrans_mark();
	rrm_enter_read(&(zsb)->z_teardown_lock, FTAG);
	rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);

	/*
	 * Before setting the xattr check to see if it already exists.
	 * This is done to ensure the following optional flags are honored.
	 *
	 *   XATTR_CREATE: fail if xattr already exists
	 *   XATTR_REPLACE: fail if xattr does not exist
	 *
	 * We also want to know if it resides in sa or dir, so we can make
	 * sure we don't end up with duplicate in both places.
	 */
	error = __zpl_xattr_where(ip, name, &where, cr);
	if (error < 0) {
		if (error != -ENODATA)
			goto out;
		if (flags & XATTR_REPLACE)
			goto out;

		/* The xattr to be removed already doesn't exist */
		error = 0;
		if (value == NULL)
			goto out;
	} else {
		error = -EEXIST;
		if (flags & XATTR_CREATE)
			goto out;
	}

	/* Preferentially store the xattr as a SA for better performance */
	if (zsb->z_use_sa && zp->z_is_sa &&
	    (zsb->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
		error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
		if (error == 0) {
			/*
			 * Successfully put into SA, we need to clear the one
			 * in dir.
			 */
			if (where & XATTR_IN_DIR)
				zpl_xattr_set_dir(ip, name, NULL, 0, 0, cr);
			goto out;
		}
	}

	error = zpl_xattr_set_dir(ip, name, value, size, flags, cr);
	/*
	 * Successfully put into dir, we need to clear the one in SA.
	 */
	if (error == 0 && (where & XATTR_IN_SA))
		zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
out:
	rw_exit(&ITOZ(ip)->z_xattr_lock);
	rrm_exit(&(zsb)->z_teardown_lock, FTAG);
	spl_fstrans_unmark(cookie);
	crfree(cr);
	ASSERT3S(error, <=, 0);

	return (error);
}
Пример #29
0
/*
 * taskid_t tasksys_settaskid(projid_t projid, uint_t flags);
 *
 * Overview
 *   Place the calling process in a new task if sufficiently privileged.  If the
 *   present task is finalized, the process may not create a new task.
 *
 * Return values
 *   0 on success, errno on failure.
 */
static long
tasksys_settaskid(projid_t projid, uint_t flags)
{
	proc_t *p = ttoproc(curthread);
	kproject_t *oldpj;
	kproject_t *kpj;
	task_t *tk, *oldtk;
	rctl_entity_p_t e;
	zone_t *zone;
	int rctlfail = 0;

	if (secpolicy_tasksys(CRED()) != 0)
		return (set_errno(EPERM));

	if (projid < 0 || projid > MAXPROJID)
		return (set_errno(EINVAL));

	if (flags & ~TASK_FINAL)
		return (set_errno(EINVAL));

	mutex_enter(&pidlock);
	if (p->p_task->tk_flags & TASK_FINAL) {
		mutex_exit(&pidlock);
		return (set_errno(EACCES));
	}
	mutex_exit(&pidlock);

	/*
	 * Try to stop all other lwps in the process while we're changing
	 * our project.  This way, curthread doesn't need to grab its own
	 * thread_lock to find its project ID (see curprojid()).  If this
	 * is the /proc agent lwp, we know that the other lwps are already
	 * held.  If we failed to hold all lwps, bail out and return EINTR.
	 */
	if (curthread != p->p_agenttp && !holdlwps(SHOLDFORK1))
		return (set_errno(EINTR));
	/*
	 * Put a hold on our new project and make sure that nobody is
	 * trying to bind it to a pool while we're joining.
	 */
	kpj = project_hold_by_id(projid, p->p_zone, PROJECT_HOLD_INSERT);
	e.rcep_p.proj = kpj;
	e.rcep_t = RCENTITY_PROJECT;

	mutex_enter(&p->p_lock);
	oldpj = p->p_task->tk_proj;
	zone = p->p_zone;

	mutex_enter(&zone->zone_nlwps_lock);
	mutex_enter(&zone->zone_mem_lock);

	if (kpj->kpj_nlwps + p->p_lwpcnt > kpj->kpj_nlwps_ctl)
		if (rctl_test_entity(rc_project_nlwps, kpj->kpj_rctls, p, &e,
		    p->p_lwpcnt, 0) & RCT_DENY)
			rctlfail = 1;

	if (kpj->kpj_ntasks + 1 > kpj->kpj_ntasks_ctl)
		if (rctl_test_entity(rc_project_ntasks, kpj->kpj_rctls, p, &e,
		    1, 0) & RCT_DENY)
			rctlfail = 1;

	if (kpj->kpj_data.kpd_locked_mem + p->p_locked_mem >
	    kpj->kpj_data.kpd_locked_mem_ctl)
		if (rctl_test_entity(rc_project_locked_mem, kpj->kpj_rctls, p,
		    &e, p->p_locked_mem, 0) & RCT_DENY)
			rctlfail = 1;

	mutex_enter(&(kpj->kpj_data.kpd_crypto_lock));
	if (kpj->kpj_data.kpd_crypto_mem + p->p_crypto_mem >
	    kpj->kpj_data.kpd_crypto_mem_ctl)
		if (rctl_test_entity(rc_project_crypto_mem, kpj->kpj_rctls, p,
		    &e, p->p_crypto_mem, 0) & RCT_DENY)
			rctlfail = 1;

	if (rctlfail) {
		mutex_exit(&(kpj->kpj_data.kpd_crypto_lock));
		mutex_exit(&zone->zone_mem_lock);
		mutex_exit(&zone->zone_nlwps_lock);
		if (curthread != p->p_agenttp)
			continuelwps(p);
		mutex_exit(&p->p_lock);
		return (set_errno(EAGAIN));
	}
	kpj->kpj_data.kpd_crypto_mem += p->p_crypto_mem;
	mutex_exit(&(kpj->kpj_data.kpd_crypto_lock));
	kpj->kpj_data.kpd_locked_mem += p->p_locked_mem;
	kpj->kpj_nlwps += p->p_lwpcnt;
	kpj->kpj_ntasks++;

	oldpj->kpj_data.kpd_locked_mem -= p->p_locked_mem;
	mutex_enter(&(oldpj->kpj_data.kpd_crypto_lock));
	oldpj->kpj_data.kpd_crypto_mem -= p->p_crypto_mem;
	mutex_exit(&(oldpj->kpj_data.kpd_crypto_lock));
	oldpj->kpj_nlwps -= p->p_lwpcnt;

	mutex_exit(&zone->zone_mem_lock);
	mutex_exit(&zone->zone_nlwps_lock);
	mutex_exit(&p->p_lock);

	mutex_enter(&kpj->kpj_poolbind);
	tk = task_create(projid, curproc->p_zone);
	mutex_enter(&cpu_lock);
	/*
	 * Returns with p_lock held.
	 */
	oldtk = task_join(tk, flags);
	if (curthread != p->p_agenttp)
		continuelwps(p);
	mutex_exit(&p->p_lock);
	mutex_exit(&cpu_lock);
	mutex_exit(&kpj->kpj_poolbind);
	task_rele(oldtk);
	project_rele(kpj);
	return (tk->tk_tkid);
}