Exemplo n.º 1
0
/*
 * The close() system call uses it's own audit call to capture the path/vnode
 * information because those pieces are not easily obtained within the system
 * call itself.
 */
void
audit_sysclose(struct thread *td, int fd)
{
	struct kaudit_record *ar;
	struct vnode *vp;
	struct file *fp;
	int vfslocked;

	KASSERT(td != NULL, ("audit_sysclose: td == NULL"));

	ar = currecord();
	if (ar == NULL)
		return;

	audit_arg_fd(fd);

	if (getvnode(td->td_proc->p_fd, fd, &fp) != 0)
		return;

	vp = fp->f_vnode;
	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_lock(vp, LK_SHARED | LK_RETRY);
	audit_arg_vnode1(vp);
	VOP_UNLOCK(vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);
	fdrop(fp, td);
}
Exemplo n.º 2
0
int
kobj_read_file_vnode(struct _buf *file, char *buf, unsigned size, unsigned off)
{
	struct vnode *vp = file->ptr;
	struct thread *td = curthread;
	struct uio auio;
	struct iovec aiov;
	int error, vfslocked;

	bzero(&aiov, sizeof(aiov));
	bzero(&auio, sizeof(auio));

	aiov.iov_base = buf;
	aiov.iov_len = size;

	auio.uio_iov = &aiov;
	auio.uio_offset = (off_t)off;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_READ;
	auio.uio_iovcnt = 1;
	auio.uio_resid = size;
	auio.uio_td = td;

	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_lock(vp, LK_SHARED | LK_RETRY);
	error = VOP_READ(vp, &auio, IO_UNIT | IO_SYNC, td->td_ucred);
	VOP_UNLOCK(vp, 0);
	VFS_UNLOCK_GIANT(vfslocked);
	return (error != 0 ? -1 : size - auio.uio_resid);
}
Exemplo n.º 3
0
/*
 * Audit information about a file, either the file's vnode info, or its
 * socket address info.
 */
void
audit_arg_file(struct proc *p, struct file *fp)
{
	struct kaudit_record *ar;
	struct socket *so;
	struct inpcb *pcb;
	struct vnode *vp;
	int vfslocked;

	ar = currecord();
	if (ar == NULL)
		return;

	switch (fp->f_type) {
	case DTYPE_VNODE:
	case DTYPE_FIFO:
		/*
		 * XXXAUDIT: Only possibly to record as first vnode?
		 */
		vp = fp->f_vnode;
		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_lock(vp, LK_SHARED | LK_RETRY);
		audit_arg_vnode1(vp);
		VOP_UNLOCK(vp, 0);
		VFS_UNLOCK_GIANT(vfslocked);
		break;

	case DTYPE_SOCKET:
		so = (struct socket *)fp->f_data;
		if (INP_CHECK_SOCKAF(so, PF_INET)) {
			SOCK_LOCK(so);
			ar->k_ar.ar_arg_sockinfo.so_type =
			    so->so_type;
			ar->k_ar.ar_arg_sockinfo.so_domain =
			    INP_SOCKAF(so);
			ar->k_ar.ar_arg_sockinfo.so_protocol =
			    so->so_proto->pr_protocol;
			SOCK_UNLOCK(so);
			pcb = (struct inpcb *)so->so_pcb;
			INP_RLOCK(pcb);
			ar->k_ar.ar_arg_sockinfo.so_raddr =
			    pcb->inp_faddr.s_addr;
			ar->k_ar.ar_arg_sockinfo.so_laddr =
			    pcb->inp_laddr.s_addr;
			ar->k_ar.ar_arg_sockinfo.so_rport =
			    pcb->inp_fport;
			ar->k_ar.ar_arg_sockinfo.so_lport =
			    pcb->inp_lport;
			INP_RUNLOCK(pcb);
			ARG_SET_VALID(ar, ARG_SOCKINFO);
		}
		break;

	default:
		/* XXXAUDIT: else? */
		break;
	}
}
Exemplo n.º 4
0
void
kobj_close_file(struct _buf *file)
{

	if (file->mounted) {
		struct vnode *vp = file->ptr;
		struct thread *td = curthread;
		int vfslocked;

		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_close(vp, FREAD, td->td_ucred, td);
		VFS_UNLOCK_GIANT(vfslocked);
	}
	kmem_free(file, sizeof(*file));
}
Exemplo n.º 5
0
static int
kobj_get_filesize_vnode(struct _buf *file, uint64_t *size)
{
	struct vnode *vp = file->ptr;
	struct vattr va;
	int error, vfslocked;

	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_lock(vp, LK_SHARED | LK_RETRY);
	error = VOP_GETATTR(vp, &va, curthread->td_ucred);
	VOP_UNLOCK(vp, 0);
	if (error == 0)
		*size = (uint64_t)va.va_size;
	VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Exemplo n.º 6
0
void
zfs_rmnode(znode_t *zp)
{
	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
	objset_t	*os = zfsvfs->z_os;
	znode_t		*xzp = NULL;
	dmu_tx_t	*tx;
	uint64_t	acl_obj;
	int		error;
	int		vfslocked;

	vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs);

	ASSERT(zp->z_phys->zp_links == 0);

	/*
	 * If this is a ZIL replay then leave the object in the unlinked set.
	 * Otherwise we can get a deadlock, because the delete can be
	 * quite large and span multiple tx's and txgs, but each replay
	 * creates a tx to atomically run the replay function and mark the
	 * replay record as complete. We deadlock trying to start a tx in
	 * a new txg to further the deletion but can't because the replay
	 * tx hasn't finished.
	 *
	 * We actually delete the object if we get a failure to create an
	 * object in zil_replay_log_record(), or after calling zil_replay().
	 */
	if (zfsvfs->z_assign >= TXG_INITIAL) {
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If this is an attribute directory, purge its contents.
	 */
	if (ZTOV(zp) != NULL && ZTOV(zp)->v_type == VDIR &&
	    (zp->z_phys->zp_flags & ZFS_XATTR)) {
		if (zfs_purgedir(zp) != 0) {
			/*
			 * Not enough space to delete some xattrs.
			 * Leave it in the unlinked set.
			 */
			zfs_znode_dmu_fini(zp);
			zfs_znode_free(zp);
			VFS_UNLOCK_GIANT(vfslocked);
			return;
		}
	}

	/*
	 * Free up all the data in the file.
	 */
	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
	if (error) {
		/*
		 * Not enough space.  Leave the file in the unlinked set.
		 */
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		return;
	}

	/*
	 * If the file has extended attributes, we're going to unlink
	 * the xattr dir.
	 */
	if (zp->z_phys->zp_xattr) {
		error = zfs_zget(zfsvfs, zp->z_phys->zp_xattr, &xzp);
		ASSERT(error == 0);
	}

	acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj;

	/*
	 * Set up the final transaction.
	 */
	tx = dmu_tx_create(os);
	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
	if (xzp) {
		dmu_tx_hold_bonus(tx, xzp->z_id);
		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
	}
	if (acl_obj)
		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		/*
		 * Not enough space to delete the file.  Leave it in the
		 * unlinked set, leaking it until the fs is remounted (at
		 * which point we'll call zfs_unlinked_drain() to process it).
		 */
		dmu_tx_abort(tx);
		zfs_znode_dmu_fini(zp);
		zfs_znode_free(zp);
		goto out;
	}

	if (xzp) {
		dmu_buf_will_dirty(xzp->z_dbuf, tx);
		mutex_enter(&xzp->z_lock);
		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
		xzp->z_phys->zp_links = 0;	/* no more links to it */
		mutex_exit(&xzp->z_lock);
		zfs_unlinked_add(xzp, tx);
	}

	/* Remove this znode from the unlinked set */
	VERIFY3U(0, ==,
	    zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));

	zfs_znode_delete(zp, tx);

	dmu_tx_commit(tx);
out:
	if (xzp)
		VN_RELE(ZTOV(xzp));
	VFS_UNLOCK_GIANT(vfslocked);
}
Exemplo n.º 7
0
/*
 * Flush all pending data to disk.  This operation will block.
 */
static int
alq_doio(struct alq *alq)
{
    struct thread *td;
    struct mount *mp;
    struct vnode *vp;
    struct uio auio;
    struct iovec aiov[2];
    int totlen;
    int iov;
    int vfslocked;
    int wrapearly;

    KASSERT((HAS_PENDING_DATA(alq)), ("%s: queue empty!", __func__));

    vp = alq->aq_vp;
    td = curthread;
    totlen = 0;
    iov = 1;
    wrapearly = alq->aq_wrapearly;

    bzero(&aiov, sizeof(aiov));
    bzero(&auio, sizeof(auio));

    /* Start the write from the location of our buffer tail pointer. */
    aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;

    if (alq->aq_writetail < alq->aq_writehead) {
        /* Buffer not wrapped. */
        totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
    } else if (alq->aq_writehead == 0) {
        /* Buffer not wrapped (special case to avoid an empty iov). */
        totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
                                   wrapearly;
    } else {
        /*
         * Buffer wrapped, requires 2 aiov entries:
         * - first is from writetail to end of buffer
         * - second is from start of buffer to writehead
         */
        aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail -
                          wrapearly;
        iov++;
        aiov[1].iov_base = alq->aq_entbuf;
        aiov[1].iov_len =  alq->aq_writehead;
        totlen = aiov[0].iov_len + aiov[1].iov_len;
    }

    alq->aq_flags |= AQ_FLUSHING;
    ALQ_UNLOCK(alq);

    auio.uio_iov = &aiov[0];
    auio.uio_offset = 0;
    auio.uio_segflg = UIO_SYSSPACE;
    auio.uio_rw = UIO_WRITE;
    auio.uio_iovcnt = iov;
    auio.uio_resid = totlen;
    auio.uio_td = td;

    /*
     * Do all of the junk required to write now.
     */
    vfslocked = VFS_LOCK_GIANT(vp->v_mount);
    vn_start_write(vp, &mp, V_WAIT);
    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    /*
     * XXX: VOP_WRITE error checks are ignored.
     */
#ifdef MAC
    if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
#endif
        VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
    VOP_UNLOCK(vp, 0);
    vn_finished_write(mp);
    VFS_UNLOCK_GIANT(vfslocked);

    ALQ_LOCK(alq);
    alq->aq_flags &= ~AQ_FLUSHING;

    /* Adjust writetail as required, taking into account wrapping. */
    alq->aq_writetail = (alq->aq_writetail + totlen + wrapearly) %
                        alq->aq_buflen;
    alq->aq_freebytes += totlen + wrapearly;

    /*
     * If we just flushed part of the buffer which wrapped, reset the
     * wrapearly indicator.
     */
    if (wrapearly)
        alq->aq_wrapearly = 0;

    /*
     * If we just flushed the buffer completely, reset indexes to 0 to
     * minimise buffer wraps.
     * This is also required to ensure alq_getn() can't wedge itself.
     */
    if (!HAS_PENDING_DATA(alq))
        alq->aq_writehead = alq->aq_writetail = 0;

    KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
            ("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__));

    if (alq->aq_flags & AQ_WANTED) {
        alq->aq_flags &= ~AQ_WANTED;
        return (1);
    }

    return(0);
}
Exemplo n.º 8
0
/*
 * Main code to turn off disk quotas for a filesystem. Does not change
 * flags.
 */
static int
quotaoff1(struct thread *td, struct mount *mp, int type)
{
	struct vnode *vp;
	struct vnode *qvp, *mvp;
	struct ufsmount *ump;
	struct dquot *dq;
	struct inode *ip;
	struct ucred *cr;
	int vfslocked;
	int error;

	ump = VFSTOUFS(mp);

	UFS_LOCK(ump);
	KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0,
		("quotaoff1: flags are invalid"));
	if ((qvp = ump->um_quotas[type]) == NULLVP) {
		UFS_UNLOCK(ump);
		return (0);
	}
	cr = ump->um_cred[type];
	UFS_UNLOCK(ump);

	/*
	 * Search vnodes associated with this mount point,
	 * deleting any references to quota file being closed.
	 */
again:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		if (vp->v_type == VNON) {
			VI_UNLOCK(vp);
			continue;
		}
		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			goto again;
		}
		ip = VTOI(vp);
		dq = ip->i_dquot[type];
		ip->i_dquot[type] = NODQUOT;
		dqrele(vp, dq);
		VOP_UNLOCK(vp, 0);
		vrele(vp);
	}

	dqflush(qvp);
	/* Clear um_quotas before closing the quota vnode to prevent
	 * access to the closed vnode from dqget/dqsync
	 */
	UFS_LOCK(ump);
	ump->um_quotas[type] = NULLVP;
	ump->um_cred[type] = NOCRED;
	UFS_UNLOCK(ump);

	vfslocked = VFS_LOCK_GIANT(qvp->v_mount);
	vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY);
	qvp->v_vflag &= ~VV_SYSTEM;
	VOP_UNLOCK(qvp, 0);
	error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td);
	VFS_UNLOCK_GIANT(vfslocked);
	crfree(cr);

	return (error);
}
Exemplo n.º 9
0
/*
 * Update the disk quota in the quota file.
 */
static int
dqsync(struct vnode *vp, struct dquot *dq)
{
	uint8_t buf[sizeof(struct dqblk64)];
	off_t base, recsize;
	struct vnode *dqvp;
	struct iovec aiov;
	struct uio auio;
	int vfslocked, error;
	struct mount *mp;
	struct ufsmount *ump;

#ifdef DEBUG_VFS_LOCKS
	if (vp != NULL)
		ASSERT_VOP_ELOCKED(vp, "dqsync");
#endif

	mp = NULL;
	error = 0;
	if (dq == NODQUOT)
		panic("dqsync: dquot");
	if ((ump = dq->dq_ump) == NULL)
		return (0);
	UFS_LOCK(ump);
	if ((dqvp = ump->um_quotas[dq->dq_type]) == NULLVP)
		panic("dqsync: file");
	vref(dqvp);
	UFS_UNLOCK(ump);

	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	DQI_LOCK(dq);
	if ((dq->dq_flags & DQ_MOD) == 0) {
		DQI_UNLOCK(dq);
		vrele(dqvp);
		VFS_UNLOCK_GIANT(vfslocked);
		return (0);
	}
	DQI_UNLOCK(dq);

	(void) vn_start_secondary_write(dqvp, &mp, V_WAIT);
	if (vp != dqvp)
		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);

	VFS_UNLOCK_GIANT(vfslocked);
	DQI_LOCK(dq);
	DQI_WAIT(dq, PINOD+2, "dqsync");
	if ((dq->dq_flags & DQ_MOD) == 0)
		goto out;
	dq->dq_flags |= DQ_LOCK;
	DQI_UNLOCK(dq);

	/*
	 * Write the quota record to the quota file, performing any
	 * necessary conversions.  See dqget() for additional details.
	 */
	if (ump->um_qflags[dq->dq_type] & QTF_64BIT) {
		dq_dqb64(dq, (struct dqblk64 *)buf);
		recsize = sizeof(struct dqblk64);
		base = sizeof(struct dqhdr64);
	} else {
		dq_dqb32(dq, (struct dqblk32 *)buf);
		recsize = sizeof(struct dqblk32);
		base = 0;
	}

	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	aiov.iov_base = buf;
	aiov.iov_len = recsize;
	auio.uio_resid = recsize;
	auio.uio_offset = base + dq->dq_id * recsize;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_WRITE;
	auio.uio_td = (struct thread *)0;
	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
	VFS_UNLOCK_GIANT(vfslocked);
	if (auio.uio_resid && error == 0)
		error = EIO;

	DQI_LOCK(dq);
	DQI_WAKEUP(dq);
	dq->dq_flags &= ~DQ_MOD;
out:
	DQI_UNLOCK(dq);
	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	if (vp != dqvp)
		vput(dqvp);
	else
		vrele(dqvp);
	vn_finished_secondary_write(mp);
	VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Exemplo n.º 10
0
/*
 * Obtain a dquot structure for the specified identifier and quota file
 * reading the information from the file if necessary.
 */
static int
dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type,
    struct dquot **dqp)
{
	uint8_t buf[sizeof(struct dqblk64)];
	off_t base, recsize;
	struct dquot *dq, *dq1;
	struct dqhash *dqh;
	struct vnode *dqvp;
	struct iovec aiov;
	struct uio auio;
	int vfslocked, dqvplocked, error;

#ifdef DEBUG_VFS_LOCKS
	if (vp != NULLVP)
		ASSERT_VOP_ELOCKED(vp, "dqget");
#endif

	if (vp != NULLVP && *dqp != NODQUOT) {
		return (0);
	}

	/* XXX: Disallow negative id values to prevent the
	* creation of 100GB+ quota data files.
	*/
	if ((int)id < 0)
		return (EINVAL);

	UFS_LOCK(ump);
	dqvp = ump->um_quotas[type];
	if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
		*dqp = NODQUOT;
		UFS_UNLOCK(ump);
		return (EINVAL);
	}
	vref(dqvp);
	UFS_UNLOCK(ump);
	error = 0;
	dqvplocked = 0;

	/*
	 * Check the cache first.
	 */
	dqh = DQHASH(dqvp, id);
	DQH_LOCK();
	dq = dqhashfind(dqh, id, dqvp);
	if (dq != NULL) {
		DQH_UNLOCK();
hfound:		DQI_LOCK(dq);
		DQI_WAIT(dq, PINOD+1, "dqget");
		DQI_UNLOCK(dq);
		if (dq->dq_ump == NULL) {
			dqrele(vp, dq);
			dq = NODQUOT;
			error = EIO;
		}
		*dqp = dq;
		vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
		if (dqvplocked)
			vput(dqvp);
		else
			vrele(dqvp);
		VFS_UNLOCK_GIANT(vfslocked);
		return (error);
	}

	/*
	 * Quota vnode lock is before DQ_LOCK. Acquire dqvp lock there
	 * since new dq will appear on the hash chain DQ_LOCKed.
	 */
	if (vp != dqvp) {
		DQH_UNLOCK();
		vn_lock(dqvp, LK_SHARED | LK_RETRY);
		dqvplocked = 1;
		DQH_LOCK();
		/*
		 * Recheck the cache after sleep for quota vnode lock.
		 */
		dq = dqhashfind(dqh, id, dqvp);
		if (dq != NULL) {
			DQH_UNLOCK();
			goto hfound;
		}
	}

	/*
	 * Not in cache, allocate a new one or take it from the
	 * free list.
	 */
	if (TAILQ_FIRST(&dqfreelist) == NODQUOT &&
	    numdquot < MAXQUOTAS * desiredvnodes)
		desireddquot += DQUOTINC;
	if (numdquot < desireddquot) {
		numdquot++;
		DQH_UNLOCK();
		dq1 = malloc(sizeof *dq1, M_DQUOT, M_WAITOK | M_ZERO);
		mtx_init(&dq1->dq_lock, "dqlock", NULL, MTX_DEF);
		DQH_LOCK();
		/*
		 * Recheck the cache after sleep for memory.
		 */
		dq = dqhashfind(dqh, id, dqvp);
		if (dq != NULL) {
			numdquot--;
			DQH_UNLOCK();
			mtx_destroy(&dq1->dq_lock);
			free(dq1, M_DQUOT);
			goto hfound;
		}
		dq = dq1;
	} else {
		if ((dq = TAILQ_FIRST(&dqfreelist)) == NULL) {
			DQH_UNLOCK();
			tablefull("dquot");
			*dqp = NODQUOT;
			vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
			if (dqvplocked)
				vput(dqvp);
			else
				vrele(dqvp);
			VFS_UNLOCK_GIANT(vfslocked);
			return (EUSERS);
		}
		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
			panic("dqget: free dquot isn't %p", dq);
		TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
		if (dq->dq_ump != NULL)
			LIST_REMOVE(dq, dq_hash);
	}

	/*
	 * Dq is put into hash already locked to prevent parallel
	 * usage while it is being read from file.
	 */
	dq->dq_flags = DQ_LOCK;
	dq->dq_id = id;
	dq->dq_type = type;
	dq->dq_ump = ump;
	LIST_INSERT_HEAD(dqh, dq, dq_hash);
	DQREF(dq);
	DQH_UNLOCK();

	/*
	 * Read the requested quota record from the quota file, performing
	 * any necessary conversions.
	 */
	if (ump->um_qflags[type] & QTF_64BIT) {
		recsize = sizeof(struct dqblk64);
		base = sizeof(struct dqhdr64);
	} else {
		recsize = sizeof(struct dqblk32);
		base = 0;
	}
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	aiov.iov_base = buf;
	aiov.iov_len = recsize;
	auio.uio_resid = recsize;
	auio.uio_offset = base + id * recsize;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_READ;
	auio.uio_td = (struct thread *)0;

	vfslocked = VFS_LOCK_GIANT(dqvp->v_mount);
	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
	if (auio.uio_resid == recsize && error == 0) {
		bzero(&dq->dq_dqb, sizeof(dq->dq_dqb));
	} else {
		if (ump->um_qflags[type] & QTF_64BIT)
			dqb64_dq((struct dqblk64 *)buf, dq);
		else
			dqb32_dq((struct dqblk32 *)buf, dq);
	}
	if (dqvplocked)
		vput(dqvp);
	else
		vrele(dqvp);
	VFS_UNLOCK_GIANT(vfslocked);
	/*
	 * I/O error in reading quota file, release
	 * quota structure and reflect problem to caller.
	 */
	if (error) {
		DQH_LOCK();
		dq->dq_ump = NULL;
		LIST_REMOVE(dq, dq_hash);
		DQH_UNLOCK();
		DQI_LOCK(dq);
		if (dq->dq_flags & DQ_WANT)
			wakeup(dq);
		dq->dq_flags = 0;
		DQI_UNLOCK(dq);
		dqrele(vp, dq);
		*dqp = NODQUOT;
		return (error);
	}
	DQI_LOCK(dq);
	/*
	 * Check for no limit to enforce.
	 * Initialize time values if necessary.
	 */
	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
		dq->dq_flags |= DQ_FAKE;
	if (dq->dq_id != 0) {
		if (dq->dq_btime == 0) {
			dq->dq_btime = time_second + ump->um_btime[type];
			if (dq->dq_bsoftlimit &&
			    dq->dq_curblocks >= dq->dq_bsoftlimit)
				dq->dq_flags |= DQ_MOD;
		}
		if (dq->dq_itime == 0) {
			dq->dq_itime = time_second + ump->um_itime[type];
			if (dq->dq_isoftlimit &&
			    dq->dq_curinodes >= dq->dq_isoftlimit)
				dq->dq_flags |= DQ_MOD;
		}
	}
	DQI_WAKEUP(dq);
	DQI_UNLOCK(dq);
	*dqp = dq;
	return (0);
}
Exemplo n.º 11
0
/*
 * Clean up the unionfs node.
 */
void
unionfs_noderem(struct vnode *vp, struct thread *td)
{
	int		vfslocked;
	int		count;
	struct unionfs_node *unp, *unp_t1, *unp_t2;
	struct unionfs_node_hashhead *hd;
	struct unionfs_node_status *unsp, *unsp_tmp;
	struct vnode   *lvp;
	struct vnode   *uvp;
	struct vnode   *dvp;

	/*
	 * Use the interlock to protect the clearing of v_data to
	 * prevent faults in unionfs_lock().
	 */
	VI_LOCK(vp);
	unp = VTOUNIONFS(vp);
	lvp = unp->un_lowervp;
	uvp = unp->un_uppervp;
	dvp = unp->un_dvp;
	unp->un_lowervp = unp->un_uppervp = NULLVP;

	vp->v_vnlock = &(vp->v_lock);
	vp->v_data = NULL;
	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
	if (lvp != NULLVP)
		VOP_UNLOCK(lvp, 0, td);
	if (uvp != NULLVP)
		VOP_UNLOCK(uvp, 0, td);
	vp->v_object = NULL;

	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
		unionfs_rem_cached_vnode(unp, dvp);

	if (lvp != NULLVP) {
		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
		vrele(lvp);
		VFS_UNLOCK_GIANT(vfslocked);
	}
	if (uvp != NULLVP) {
		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
		vrele(uvp);
		VFS_UNLOCK_GIANT(vfslocked);
	}
	if (dvp != NULLVP) {
		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
		vrele(dvp);
		VFS_UNLOCK_GIANT(vfslocked);
		unp->un_dvp = NULLVP;
	}
	if (unp->un_path != NULL) {
		free(unp->un_path, M_UNIONFSPATH);
		unp->un_path = NULL;
	}

	if (unp->un_hashtbl != NULL) {
		for (count = 0; count <= unp->un_hashmask; count++) {
			hd = unp->un_hashtbl + count;
			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
				LIST_REMOVE(unp_t1, un_hash);
				unp_t1->un_hash.le_next = NULL;
				unp_t1->un_hash.le_prev = NULL;
			}
		}
		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
	}
Exemplo n.º 12
0
/* this call, unlike osi_FlushText, is supposed to discard caches that may
   contain invalid information if a file is written remotely, but that may
   contain valid information that needs to be written back if the file is
   being written locally.  It doesn't subsume osi_FlushText, since the latter
   function may be needed to flush caches that are invalidated by local writes.

   avc->pvnLock is already held, avc->lock is guaranteed not to be held (by
   us, of course).
*/
void
osi_FlushPages(struct vcache *avc, afs_ucred_t *credp)
{
#ifdef AFS_FBSD70_ENV
    int vfslocked;
#endif
    afs_hyper_t origDV;
#if defined(AFS_CACHE_BYPASS)
    /* The optimization to check DV under read lock below is identical a
     * change in CITI cache bypass work.  The problem CITI found in 1999
     * was that this code and background daemon doing prefetching competed
     * for the vcache entry shared lock.  It's not clear to me from the
     * tech report, but it looks like CITI fixed the general prefetch code
     * path as a bonus when experimenting on prefetch for cache bypass, see
     * citi-tr-01-3.
     */
#endif
    ObtainReadLock(&avc->lock);
    /* If we've already purged this version, or if we're the ones
     * writing this version, don't flush it (could lose the
     * data we're writing). */
    if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0)
	|| ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
	ReleaseReadLock(&avc->lock);
	return;
    }
    ReleaseReadLock(&avc->lock);
    ObtainWriteLock(&avc->lock, 10);
    /* Check again */
    if ((hcmp((avc->f.m.DataVersion), (avc->mapDV)) <= 0)
	|| ((avc->execsOrWriters > 0) && afs_DirtyPages(avc))) {
	ReleaseWriteLock(&avc->lock);
	return;
    }
    if (hiszero(avc->mapDV)) {
	hset(avc->mapDV, avc->f.m.DataVersion);
	ReleaseWriteLock(&avc->lock);
	return;
    }

    AFS_STATCNT(osi_FlushPages);
    hset(origDV, avc->f.m.DataVersion);
    afs_Trace3(afs_iclSetp, CM_TRACE_FLUSHPAGES, ICL_TYPE_POINTER, avc,
	       ICL_TYPE_INT32, origDV.low, ICL_TYPE_INT32, avc->f.m.Length);

    ReleaseWriteLock(&avc->lock);
#ifdef AFS_FBSD70_ENV
    vfslocked = VFS_LOCK_GIANT(AFSTOV(avc)->v_mount);
#endif
#ifndef AFS_FBSD70_ENV
    AFS_GUNLOCK();
#endif
    osi_VM_FlushPages(avc, credp);
#ifndef AFS_FBSD70_ENV
    AFS_GLOCK();
#endif
#ifdef AFS_FBSD70_ENV
    VFS_UNLOCK_GIANT(vfslocked);
#endif
    ObtainWriteLock(&avc->lock, 88);

    /* do this last, and to original version, since stores may occur
     * while executing above PUTPAGE call */
    hset(avc->mapDV, origDV);
    ReleaseWriteLock(&avc->lock);
}
Exemplo n.º 13
0
/*
 * Set up nameidata for a lookup() call and do it.
 *
 * If pubflag is set, this call is done for a lookup operation on the
 * public filehandle. In that case we allow crossing mountpoints and
 * absolute pathnames. However, the caller is expected to check that
 * the lookup result is within the public fs, and deny access if
 * it is not.
 *
 * nfs_namei() clears out garbage fields that namei() might leave garbage.
 * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
 * error occurs but the parent was not requested.
 *
 * dirp may be set whether an error is returned or not, and must be
 * released by the caller.
 */
int
nfs_namei(struct nameidata *ndp, struct nfsrv_descript *nfsd,
    fhandle_t *fhp, int len, struct nfssvc_sock *slp,
    struct sockaddr *nam, struct mbuf **mdp,
    caddr_t *dposp, struct vnode **retdirp, int v3, struct vattr *retdirattrp,
    int *retdirattr_retp, int pubflag)
{
	int i, rem;
	struct mbuf *md;
	char *fromcp, *tocp, *cp;
	struct iovec aiov;
	struct uio auio;
	struct vnode *dp;
	int error, rdonly, linklen;
	struct componentname *cnp = &ndp->ni_cnd;
	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0;
	int dvfslocked;
	int vfslocked;

	vfslocked = 0;
	dvfslocked = 0;
	*retdirp = NULL;
	cnp->cn_flags |= NOMACCHECK;
	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);

	/*
	 * Copy the name from the mbuf list to ndp->ni_pnbuf
	 * and set the various ndp fields appropriately.
	 */
	fromcp = *dposp;
	tocp = cnp->cn_pnbuf;
	md = *mdp;
	rem = mtod(md, caddr_t) + md->m_len - fromcp;
	for (i = 0; i < len; i++) {
		while (rem == 0) {
			md = md->m_next;
			if (md == NULL) {
				error = EBADRPC;
				goto out;
			}
			fromcp = mtod(md, caddr_t);
			rem = md->m_len;
		}
		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
			error = EACCES;
			goto out;
		}
		*tocp++ = *fromcp++;
		rem--;
	}
	*tocp = '\0';
	*mdp = md;
	*dposp = fromcp;
	len = nfsm_rndup(len)-len;
	if (len > 0) {
		if (rem >= len)
			*dposp += len;
		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
			goto out;
	}

	/*
	 * Extract and set starting directory.
	 */
	error = nfsrv_fhtovp(fhp, FALSE, &dp, &dvfslocked,
	    nfsd, slp, nam, &rdonly, pubflag);
	if (error)
		goto out;
	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
	if (dp->v_type != VDIR) {
		vrele(dp);
		error = ENOTDIR;
		goto out;
	}

	if (rdonly)
		cnp->cn_flags |= RDONLY;

	/*
	 * Set return directory.  Reference to dp is implicitly transfered
	 * to the returned pointer
	 */
	*retdirp = dp;
	if (v3) {
		vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
		*retdirattr_retp = VOP_GETATTR(dp, retdirattrp,
			ndp->ni_cnd.cn_cred);
		VOP_UNLOCK(dp, 0);
	}

	if (pubflag) {
		/*
		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
		 * and the 'native path' indicator.
		 */
		cp = uma_zalloc(namei_zone, M_WAITOK);
		fromcp = cnp->cn_pnbuf;
		tocp = cp;
		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
			switch ((unsigned char)*fromcp) {
			case WEBNFS_NATIVE_CHAR:
				/*
				 * 'Native' path for us is the same
				 * as a path according to the NFS spec,
				 * just skip the escape char.
				 */
				fromcp++;
				break;
			/*
			 * More may be added in the future, range 0x80-0xff
			 */
			default:
				error = EIO;
				uma_zfree(namei_zone, cp);
				goto out;
			}
		}
		/*
		 * Translate the '%' escapes, URL-style.
		 */
		while (*fromcp != '\0') {
			if (*fromcp == WEBNFS_ESC_CHAR) {
				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
					fromcp++;
					*tocp++ = HEXSTRTOI(fromcp);
					fromcp += 2;
					continue;
				} else {
					error = ENOENT;
					uma_zfree(namei_zone, cp);
					goto out;
				}
			} else
				*tocp++ = *fromcp++;
		}
		*tocp = '\0';
		uma_zfree(namei_zone, cnp->cn_pnbuf);
		cnp->cn_pnbuf = cp;
	}

	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
	ndp->ni_segflg = UIO_SYSSPACE;

	if (pubflag) {
		ndp->ni_rootdir = rootvnode;
		ndp->ni_loopcnt = 0;
		if (cnp->cn_pnbuf[0] == '/') {
			int tvfslocked;

			tvfslocked = VFS_LOCK_GIANT(rootvnode->v_mount);
			VFS_UNLOCK_GIANT(vfslocked);
			dp = rootvnode;
			vfslocked = tvfslocked;
		}
	} else {
		cnp->cn_flags |= NOCROSSMOUNT;
	}

	/*
	 * Initialize for scan, set ni_startdir and bump ref on dp again
	 * because lookup() will dereference ni_startdir.
	 */

	cnp->cn_thread = curthread;
	VREF(dp);
	ndp->ni_startdir = dp;

	if (!lockleaf)
		cnp->cn_flags |= LOCKLEAF;
	for (;;) {
		cnp->cn_nameptr = cnp->cn_pnbuf;
		/*
		 * Call lookup() to do the real work.  If an error occurs,
		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
		 * we do not have to dereference anything before returning.
		 * In either case ni_startdir will be dereferenced and NULLed
		 * out.
		 */
		if (vfslocked)
			ndp->ni_cnd.cn_flags |= GIANTHELD;
		error = lookup(ndp);
		vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
		ndp->ni_cnd.cn_flags &= ~GIANTHELD;
		if (error)
			break;

		/*
		 * Check for encountering a symbolic link.  Trivial
		 * termination occurs if no symlink encountered.
		 * Note: zfree is safe because error is 0, so we will
		 * not zfree it again when we break.
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			if (cnp->cn_flags & (SAVENAME | SAVESTART))
				cnp->cn_flags |= HASBUF;
			else
				uma_zfree(namei_zone, cnp->cn_pnbuf);
			if (ndp->ni_vp && !lockleaf)
				VOP_UNLOCK(ndp->ni_vp, 0);
			break;
		}

		/*
		 * Validate symlink
		 */
		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
			VOP_UNLOCK(ndp->ni_dvp, 0);
		if (!pubflag) {
			error = EINVAL;
			goto badlink2;
		}

		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
			error = ELOOP;
			goto badlink2;
		}
		if (ndp->ni_pathlen > 1)
			cp = uma_zalloc(namei_zone, M_WAITOK);
		else
			cp = cnp->cn_pnbuf;
		aiov.iov_base = cp;
		aiov.iov_len = MAXPATHLEN;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = 0;
		auio.uio_rw = UIO_READ;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_td = NULL;
		auio.uio_resid = MAXPATHLEN;
		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
		if (error) {
		badlink1:
			if (ndp->ni_pathlen > 1)
				uma_zfree(namei_zone, cp);
		badlink2:
			vput(ndp->ni_vp);
			vrele(ndp->ni_dvp);
			break;
		}
		linklen = MAXPATHLEN - auio.uio_resid;
		if (linklen == 0) {
			error = ENOENT;
			goto badlink1;
		}
		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
			error = ENAMETOOLONG;
			goto badlink1;
		}

		/*
		 * Adjust or replace path
		 */
		if (ndp->ni_pathlen > 1) {
			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
			uma_zfree(namei_zone, cnp->cn_pnbuf);
			cnp->cn_pnbuf = cp;
		} else
			cnp->cn_pnbuf[linklen] = '\0';
		ndp->ni_pathlen += linklen;

		/*
		 * Cleanup refs for next loop and check if root directory
		 * should replace current directory.  Normally ni_dvp
		 * becomes the new base directory and is cleaned up when
		 * we loop.  Explicitly null pointers after invalidation
		 * to clarify operation.
		 */
		vput(ndp->ni_vp);
		ndp->ni_vp = NULL;

		if (cnp->cn_pnbuf[0] == '/') {
			vrele(ndp->ni_dvp);
			ndp->ni_dvp = ndp->ni_rootdir;
			VREF(ndp->ni_dvp);
		}
		ndp->ni_startdir = ndp->ni_dvp;
		ndp->ni_dvp = NULL;
	}
	if (!lockleaf)
		cnp->cn_flags &= ~LOCKLEAF;
	if (cnp->cn_flags & GIANTHELD) {
		mtx_unlock(&Giant);
		cnp->cn_flags &= ~GIANTHELD;
	}

	/*
	 * nfs_namei() guarentees that fields will not contain garbage
	 * whether an error occurs or not.  This allows the caller to track
	 * cleanup state trivially.
	 */
out:
	if (error) {
		uma_zfree(namei_zone, cnp->cn_pnbuf);
		ndp->ni_vp = NULL;
		ndp->ni_dvp = NULL;
		ndp->ni_startdir = NULL;
		cnp->cn_flags &= ~HASBUF;
		VFS_UNLOCK_GIANT(vfslocked);
		vfslocked = 0;
	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
		ndp->ni_dvp = NULL;
	}
	/*
	 * This differs from normal namei() in that even on failure we may
	 * return with Giant held due to the dirp return.  Make sure we only
	 * have not recursed however.  The calling code only expects to drop
	 * one acquire.
	 */
	if (vfslocked || dvfslocked)
		ndp->ni_cnd.cn_flags |= GIANTHELD;
	if (vfslocked && dvfslocked)
		VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Exemplo n.º 14
0
static void
mac_proc_vm_revoke_recurse(struct thread *td, struct ucred *cred,
    struct vm_map *map)
{
	vm_map_entry_t vme;
	int vfslocked, result;
	vm_prot_t revokeperms;
	vm_object_t backing_object, object;
	vm_ooffset_t offset;
	struct vnode *vp;
	struct mount *mp;

	if (!mac_mmap_revocation)
		return;

	vm_map_lock(map);
	for (vme = map->header.next; vme != &map->header; vme = vme->next) {
		if (vme->eflags & MAP_ENTRY_IS_SUB_MAP) {
			mac_proc_vm_revoke_recurse(td, cred,
			    vme->object.sub_map);
			continue;
		}
		/*
		 * Skip over entries that obviously are not shared.
		 */
		if (vme->eflags & (MAP_ENTRY_COW | MAP_ENTRY_NOSYNC) ||
		    !vme->max_protection)
			continue;
		/*
		 * Drill down to the deepest backing object.
		 */
		offset = vme->offset;
		object = vme->object.vm_object;
		if (object == NULL)
			continue;
		VM_OBJECT_LOCK(object);
		while ((backing_object = object->backing_object) != NULL) {
			VM_OBJECT_LOCK(backing_object);
			offset += object->backing_object_offset;
			VM_OBJECT_UNLOCK(object);
			object = backing_object;
		}
		VM_OBJECT_UNLOCK(object);
		/*
		 * At the moment, vm_maps and objects aren't considered by
		 * the MAC system, so only things with backing by a normal
		 * object (read: vnodes) are checked.
		 */
		if (object->type != OBJT_VNODE)
			continue;
		vp = (struct vnode *)object->handle;
		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
		result = vme->max_protection;
		mac_vnode_check_mmap_downgrade(cred, vp, &result);
		VOP_UNLOCK(vp, 0);
		/*
		 * Find out what maximum protection we may be allowing now
		 * but a policy needs to get removed.
		 */
		revokeperms = vme->max_protection & ~result;
		if (!revokeperms) {
			VFS_UNLOCK_GIANT(vfslocked);
			continue;
		}
		printf("pid %ld: revoking %s perms from %#lx:%ld "
		    "(max %s/cur %s)\n", (long)td->td_proc->p_pid,
		    prot2str(revokeperms), (u_long)vme->start,
		    (long)(vme->end - vme->start),
		    prot2str(vme->max_protection), prot2str(vme->protection));
		/*
		 * This is the really simple case: if a map has more
		 * max_protection than is allowed, but it's not being
		 * actually used (that is, the current protection is still
		 * allowed), we can just wipe it out and do nothing more.
		 */
		if ((vme->protection & revokeperms) == 0) {
			vme->max_protection -= revokeperms;
		} else {
			if (revokeperms & VM_PROT_WRITE) {
				/*
				 * In the more complicated case, flush out all
				 * pending changes to the object then turn it
				 * copy-on-write.
				 */
				vm_object_reference(object);
				(void) vn_start_write(vp, &mp, V_WAIT);
				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
				VM_OBJECT_LOCK(object);
				vm_object_page_clean(object, offset, offset +
				    vme->end - vme->start, OBJPC_SYNC);
				VM_OBJECT_UNLOCK(object);
				VOP_UNLOCK(vp, 0);
				vn_finished_write(mp);
				vm_object_deallocate(object);
				/*
				 * Why bother if there's no read permissions
				 * anymore?  For the rest, we need to leave
				 * the write permissions on for COW, or
				 * remove them entirely if configured to.
				 */
				if (!mac_mmap_revocation_via_cow) {
					vme->max_protection &= ~VM_PROT_WRITE;
					vme->protection &= ~VM_PROT_WRITE;
				} if ((revokeperms & VM_PROT_READ) == 0)
					vme->eflags |= MAP_ENTRY_COW |
					    MAP_ENTRY_NEEDS_COPY;
			}
			if (revokeperms & VM_PROT_EXECUTE) {
				vme->max_protection &= ~VM_PROT_EXECUTE;
				vme->protection &= ~VM_PROT_EXECUTE;
			}
			if (revokeperms & VM_PROT_READ) {
				vme->max_protection = 0;
				vme->protection = 0;
			}
			pmap_protect(map->pmap, vme->start, vme->end,
			    vme->protection & ~revokeperms);
			vm_map_simplify_entry(map, vme);
		}
		VFS_UNLOCK_GIANT(vfslocked);
	}
	vm_map_unlock(map);
}
Exemplo n.º 15
0
/*
 * Exit: deallocate address space and other resources, change proc state to
 * zombie, and unlink proc from allproc and parent's lists.  Save exit status
 * and rusage for wait().  Check for child processes and orphan them.
 */
void
exit1(struct thread *td, int rv)
{
	struct proc *p, *nq, *q;
	struct vnode *vtmp;
	struct vnode *ttyvp = NULL;
#ifdef KTRACE
	struct vnode *tracevp;
	struct ucred *tracecred;
#endif
	struct plimit *plim;
	int locked;

	mtx_assert(&Giant, MA_NOTOWNED);

	p = td->td_proc;
	/*
	 * XXX in case we're rebooting we just let init die in order to
	 * work around an unsolved stack overflow seen very late during
	 * shutdown on sparc64 when the gmirror worker process exists.
	 */ 
	if (p == initproc && rebooting == 0) {
		printf("init died (signal %d, exit %d)\n",
		    WTERMSIG(rv), WEXITSTATUS(rv));
		panic("Going nowhere without my init!");
	}

	/*
	 * MUST abort all other threads before proceeding past here.
	 */
	PROC_LOCK(p);
	while (p->p_flag & P_HADTHREADS) {
		/*
		 * First check if some other thread got here before us..
		 * if so, act apropriatly, (exit or suspend);
		 */
		thread_suspend_check(0);

		/*
		 * Kill off the other threads. This requires
		 * some co-operation from other parts of the kernel
		 * so it may not be instantaneous.  With this state set
		 * any thread entering the kernel from userspace will
		 * thread_exit() in trap().  Any thread attempting to
		 * sleep will return immediately with EINTR or EWOULDBLOCK
		 * which will hopefully force them to back out to userland
		 * freeing resources as they go.  Any thread attempting
		 * to return to userland will thread_exit() from userret().
		 * thread_exit() will unsuspend us when the last of the
		 * other threads exits.
		 * If there is already a thread singler after resumption,
		 * calling thread_single will fail; in that case, we just
		 * re-check all suspension request, the thread should
		 * either be suspended there or exit.
		 */
		if (! thread_single(SINGLE_EXIT))
			break;

		/*
		 * All other activity in this process is now stopped.
		 * Threading support has been turned off.
		 */
	}
	KASSERT(p->p_numthreads == 1,
	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
	/*
	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
	 * on our vmspace, so we should block below until they have
	 * released their reference to us.  Note that if they have
	 * requested S_EXIT stops we will block here until they ack
	 * via PIOCCONT.
	 */
	_STOPEVENT(p, S_EXIT, rv);

	/*
	 * Note that we are exiting and do another wakeup of anyone in
	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
	 * decided to wait again after we told them we are exiting.
	 */
	p->p_flag |= P_WEXIT;
	wakeup(&p->p_stype);

	/*
	 * Wait for any processes that have a hold on our vmspace to
	 * release their reference.
	 */
	while (p->p_lock > 0)
		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);

	PROC_UNLOCK(p);
	/* Drain the limit callout while we don't have the proc locked */
	callout_drain(&p->p_limco);

#ifdef AUDIT
	/*
	 * The Sun BSM exit token contains two components: an exit status as
	 * passed to exit(), and a return value to indicate what sort of exit
	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
	 * what the return value is.
	 */
	AUDIT_ARG_EXIT(WEXITSTATUS(rv), 0);
	AUDIT_SYSCALL_EXIT(0, td);
#endif

	/* Are we a task leader? */
	if (p == p->p_leader) {
		mtx_lock(&ppeers_lock);
		q = p->p_peers;
		while (q != NULL) {
			PROC_LOCK(q);
			psignal(q, SIGKILL);
			PROC_UNLOCK(q);
			q = q->p_peers;
		}
		while (p->p_peers != NULL)
			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
		mtx_unlock(&ppeers_lock);
	}

	/*
	 * Check if any loadable modules need anything done at process exit.
	 * E.g. SYSV IPC stuff
	 * XXX what if one of these generates an error?
	 */
	EVENTHANDLER_INVOKE(process_exit, p);

	/*
	 * If parent is waiting for us to exit or exec,
	 * P_PPWAIT is set; we will wakeup the parent below.
	 */
	PROC_LOCK(p);
	stopprofclock(p);
	p->p_flag &= ~(P_TRACED | P_PPWAIT);

	/*
	 * Stop the real interval timer.  If the handler is currently
	 * executing, prevent it from rearming itself and let it finish.
	 */
	if (timevalisset(&p->p_realtimer.it_value) &&
	    callout_stop(&p->p_itcallout) == 0) {
		timevalclear(&p->p_realtimer.it_interval);
		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
		KASSERT(!timevalisset(&p->p_realtimer.it_value),
		    ("realtime timer is still armed"));
	}
	PROC_UNLOCK(p);

	/*
	 * Reset any sigio structures pointing to us as a result of
	 * F_SETOWN with our pid.
	 */
	funsetownlst(&p->p_sigiolst);

	/*
	 * If this process has an nlminfo data area (for lockd), release it
	 */
	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
		(*nlminfo_release_p)(p);

	/*
	 * Close open files and release open-file table.
	 * This may block!
	 */
	fdfree(td);

	/*
	 * If this thread tickled GEOM, we need to wait for the giggling to
	 * stop before we return to userland
	 */
	if (td->td_pflags & TDP_GEOM)
		g_waitidle();

	/*
	 * Remove ourself from our leader's peer list and wake our leader.
	 */
	mtx_lock(&ppeers_lock);
	if (p->p_leader->p_peers) {
		q = p->p_leader;
		while (q->p_peers != p)
			q = q->p_peers;
		q->p_peers = p->p_peers;
		wakeup(p->p_leader);
	}
	mtx_unlock(&ppeers_lock);

	vmspace_exit(td);

	sx_xlock(&proctree_lock);
	if (SESS_LEADER(p)) {
		struct session *sp = p->p_session;
		struct tty *tp;

		/*
		 * s_ttyp is not zero'd; we use this to indicate that
		 * the session once had a controlling terminal. (for
		 * logging and informational purposes)
		 */
		SESS_LOCK(sp);
		ttyvp = sp->s_ttyvp;
		tp = sp->s_ttyp;
		sp->s_ttyvp = NULL;
		sp->s_leader = NULL;
		SESS_UNLOCK(sp);

		/*
		 * Signal foreground pgrp and revoke access to
		 * controlling terminal if it has not been revoked
		 * already.
		 *
		 * Because the TTY may have been revoked in the mean
		 * time and could already have a new session associated
		 * with it, make sure we don't send a SIGHUP to a
		 * foreground process group that does not belong to this
		 * session.
		 */

		if (tp != NULL) {
			tty_lock(tp);
			if (tp->t_session == sp)
				tty_signal_pgrp(tp, SIGHUP);
			tty_unlock(tp);
		}

		if (ttyvp != NULL) {
			sx_xunlock(&proctree_lock);
			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
				VOP_REVOKE(ttyvp, REVOKEALL);
				VOP_UNLOCK(ttyvp, 0);
			}
			sx_xlock(&proctree_lock);
		}
	}
	fixjobc(p, p->p_pgrp, 0);
	sx_xunlock(&proctree_lock);
	(void)acct_process(td);

	/* Release the TTY now we've unlocked everything. */
	if (ttyvp != NULL)
		vrele(ttyvp);
#ifdef KTRACE
	/*
	 * Disable tracing, then drain any pending records and release
	 * the trace file.
	 */
	if (p->p_traceflag != 0) {
		PROC_LOCK(p);
		mtx_lock(&ktrace_mtx);
		p->p_traceflag = 0;
		mtx_unlock(&ktrace_mtx);
		PROC_UNLOCK(p);
		ktrprocexit(td);
		PROC_LOCK(p);
		mtx_lock(&ktrace_mtx);
		tracevp = p->p_tracevp;
		p->p_tracevp = NULL;
		tracecred = p->p_tracecred;
		p->p_tracecred = NULL;
		mtx_unlock(&ktrace_mtx);
		PROC_UNLOCK(p);
		if (tracevp != NULL) {
			locked = VFS_LOCK_GIANT(tracevp->v_mount);
			vrele(tracevp);
			VFS_UNLOCK_GIANT(locked);
		}
		if (tracecred != NULL)
			crfree(tracecred);
	}
#endif
	/*
	 * Release reference to text vnode
	 */
	if ((vtmp = p->p_textvp) != NULL) {
		p->p_textvp = NULL;
		locked = VFS_LOCK_GIANT(vtmp->v_mount);
		vrele(vtmp);
		VFS_UNLOCK_GIANT(locked);
	}

	/*
	 * Release our limits structure.
	 */
	PROC_LOCK(p);
	plim = p->p_limit;
	p->p_limit = NULL;
	PROC_UNLOCK(p);
	lim_free(plim);

	/*
	 * Remove proc from allproc queue and pidhash chain.
	 * Place onto zombproc.  Unlink from parent's child list.
	 */
	sx_xlock(&allproc_lock);
	LIST_REMOVE(p, p_list);
	LIST_INSERT_HEAD(&zombproc, p, p_list);
	LIST_REMOVE(p, p_hash);
	sx_xunlock(&allproc_lock);

	/*
	 * Call machine-dependent code to release any
	 * machine-dependent resources other than the address space.
	 * The address space is released by "vmspace_exitfree(p)" in
	 * vm_waitproc().
	 */
	cpu_exit(td);

	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);

	/*
	 * Reparent all of our children to init.
	 */
	sx_xlock(&proctree_lock);
	q = LIST_FIRST(&p->p_children);
	if (q != NULL)		/* only need this if any child is S_ZOMB */
		wakeup(initproc);
	for (; q != NULL; q = nq) {
		nq = LIST_NEXT(q, p_sibling);
		PROC_LOCK(q);
		proc_reparent(q, initproc);
		q->p_sigparent = SIGCHLD;
		/*
		 * Traced processes are killed
		 * since their existence means someone is screwing up.
		 */
		if (q->p_flag & P_TRACED) {
			struct thread *temp;

			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
			FOREACH_THREAD_IN_PROC(q, temp)
				temp->td_dbgflags &= ~TDB_SUSPEND;
			psignal(q, SIGKILL);
		}
		PROC_UNLOCK(q);
	}

	/* Save exit status. */
	PROC_LOCK(p);
	p->p_xstat = rv;
	p->p_xthread = td;

	/* Tell the prison that we are gone. */
	prison_proc_free(p->p_ucred->cr_prison);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the exit if it
	 * has declared an interest.
	 */
	if (dtrace_fasttrap_exit)
		dtrace_fasttrap_exit(p);
#endif

	/*
	 * Notify interested parties of our demise.
	 */
	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);

#ifdef KDTRACE_HOOKS
	int reason = CLD_EXITED;
	if (WCOREDUMP(rv))
		reason = CLD_DUMPED;
	else if (WIFSIGNALED(rv))
		reason = CLD_KILLED;
	SDT_PROBE(proc, kernel, , exit, reason, 0, 0, 0, 0);
#endif

	/*
	 * Just delete all entries in the p_klist. At this point we won't
	 * report any more events, and there are nasty race conditions that
	 * can beat us if we don't.
	 */
	knlist_clear(&p->p_klist, 1);

	/*
	 * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
	 * flag set, or if the handler is set to SIG_IGN, notify process
	 * 1 instead (and hope it will handle this situation).
	 */
	PROC_LOCK(p->p_pptr);
	mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
	if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
		struct proc *pp;

		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
		pp = p->p_pptr;
		PROC_UNLOCK(pp);
		proc_reparent(p, initproc);
		p->p_sigparent = SIGCHLD;
		PROC_LOCK(p->p_pptr);

		/*
		 * Notify parent, so in case he was wait(2)ing or
		 * executing waitpid(2) with our pid, he will
		 * continue.
		 */
		wakeup(pp);
	} else
		mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);

	if (p->p_pptr == initproc)
		psignal(p->p_pptr, SIGCHLD);
	else if (p->p_sigparent != 0) {
		if (p->p_sigparent == SIGCHLD)
			childproc_exited(p);
		else	/* LINUX thread */
			psignal(p->p_pptr, p->p_sigparent);
	}
	sx_xunlock(&proctree_lock);

	/*
	 * The state PRS_ZOMBIE prevents other proesses from sending
	 * signal to the process, to avoid memory leak, we free memory
	 * for signal queue at the time when the state is set.
	 */
	sigqueue_flush(&p->p_sigqueue);
	sigqueue_flush(&td->td_sigqueue);

	/*
	 * We have to wait until after acquiring all locks before
	 * changing p_state.  We need to avoid all possible context
	 * switches (including ones from blocking on a mutex) while
	 * marked as a zombie.  We also have to set the zombie state
	 * before we release the parent process' proc lock to avoid
	 * a lost wakeup.  So, we first call wakeup, then we grab the
	 * sched lock, update the state, and release the parent process'
	 * proc lock.
	 */
	wakeup(p->p_pptr);
	cv_broadcast(&p->p_pwait);
	sched_exit(p->p_pptr, td);
	PROC_SLOCK(p);
	p->p_state = PRS_ZOMBIE;
	PROC_UNLOCK(p->p_pptr);

	/*
	 * Hopefully no one will try to deliver a signal to the process this
	 * late in the game.
	 */
	knlist_destroy(&p->p_klist);

	/*
	 * Save our children's rusage information in our exit rusage.
	 */
	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);

	/*
	 * Make sure the scheduler takes this thread out of its tables etc.
	 * This will also release this thread's reference to the ucred.
	 * Other thread parts to release include pcb bits and such.
	 */
	thread_exit();
}
Exemplo n.º 16
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_doprocmap(PFS_FILL_ARGS)
{
	struct vmspace *vm;
	vm_map_t map;
	vm_map_entry_t entry, tmp_entry;
	struct vnode *vp;
	char *fullpath, *freepath;
	struct uidinfo *uip;
	int error, vfslocked;
	unsigned int last_timestamp;
#ifdef COMPAT_FREEBSD32
	int wrap32 = 0;
#endif

	PROC_LOCK(p);
	error = p_candebug(td, p);
	PROC_UNLOCK(p);
	if (error)
		return (error);

	if (uio->uio_rw != UIO_READ)
		return (EOPNOTSUPP);

#ifdef COMPAT_FREEBSD32
        if (curproc->p_sysent->sv_flags & SV_ILP32) {
                if (!(p->p_sysent->sv_flags & SV_ILP32))
                        return (EOPNOTSUPP);
                wrap32 = 1;
        }
#endif

	vm = vmspace_acquire_ref(p);
	if (vm == NULL)
		return (ESRCH);
	map = &vm->vm_map;
	vm_map_lock_read(map);
	for (entry = map->header.next; entry != &map->header;
	     entry = entry->next) {
		vm_object_t obj, tobj, lobj;
		int ref_count, shadow_count, flags;
		vm_offset_t e_start, e_end, addr;
		int resident, privateresident;
		char *type;
		vm_eflags_t e_eflags;
		vm_prot_t e_prot;

		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
			continue;

		e_eflags = entry->eflags;
		e_prot = entry->protection;
		e_start = entry->start;
		e_end = entry->end;
		privateresident = 0;
		obj = entry->object.vm_object;
		if (obj != NULL) {
			VM_OBJECT_LOCK(obj);
			if (obj->shadow_count == 1)
				privateresident = obj->resident_page_count;
		}
		uip = (entry->uip) ? entry->uip : (obj ? obj->uip : NULL);

		resident = 0;
		addr = entry->start;
		while (addr < entry->end) {
			if (pmap_extract(map->pmap, addr))
				resident++;
			addr += PAGE_SIZE;
		}

		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
			if (tobj != obj)
				VM_OBJECT_LOCK(tobj);
			if (lobj != obj)
				VM_OBJECT_UNLOCK(lobj);
			lobj = tobj;
		}
		last_timestamp = map->timestamp;
		vm_map_unlock_read(map);

		freepath = NULL;
		fullpath = "-";
		if (lobj) {
			switch (lobj->type) {
			default:
			case OBJT_DEFAULT:
				type = "default";
				vp = NULL;
				break;
			case OBJT_VNODE:
				type = "vnode";
				vp = lobj->handle;
				vref(vp);
				break;
			case OBJT_SWAP:
				type = "swap";
				vp = NULL;
				break;
			case OBJT_SG:
			case OBJT_DEVICE:
				type = "device";
				vp = NULL;
				break;
			}
			if (lobj != obj)
				VM_OBJECT_UNLOCK(lobj);

			flags = obj->flags;
			ref_count = obj->ref_count;
			shadow_count = obj->shadow_count;
			VM_OBJECT_UNLOCK(obj);
			if (vp != NULL) {
				vn_fullpath(td, vp, &fullpath, &freepath);
				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
				vrele(vp);
				VFS_UNLOCK_GIANT(vfslocked);
			}
		} else {
			type = "none";
			flags = 0;
			ref_count = 0;
			shadow_count = 0;
		}

		/*
		 * format:
		 *  start, end, resident, private resident, cow, access, type,
		 *         charged, charged uid.
		 */
		error = sbuf_printf(sb,
		    "0x%lx 0x%lx %d %d %p %s%s%s %d %d 0x%x %s %s %s %s %s %d\n",
			(u_long)e_start, (u_long)e_end,
			resident, privateresident,
#ifdef COMPAT_FREEBSD32
			wrap32 ? NULL : obj,	/* Hide 64 bit value */
#else
			obj,
#endif
			(e_prot & VM_PROT_READ)?"r":"-",
			(e_prot & VM_PROT_WRITE)?"w":"-",
			(e_prot & VM_PROT_EXECUTE)?"x":"-",
			ref_count, shadow_count, flags,
			(e_eflags & MAP_ENTRY_COW)?"COW":"NCOW",
			(e_eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
			type, fullpath,
			uip ? "CH":"NCH", uip ? uip->ui_uid : -1);

		if (freepath != NULL)
			free(freepath, M_TEMP);
		vm_map_lock_read(map);
		if (error == -1) {
			error = 0;
			break;
		}
		if (last_timestamp != map->timestamp) {
			/*
			 * Look again for the entry because the map was
			 * modified while it was unlocked.  Specifically,
			 * the entry may have been clipped, merged, or deleted.
			 */
			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
			entry = tmp_entry;
		}
	}
	vm_map_unlock_read(map);
	vmspace_free(vm);
	return (error);
}
Exemplo n.º 17
0
/*
 * Flush all pending data to disk.  This operation will block.
 */
static int
alq_doio(struct alq *alq)
{
	struct thread *td;
	struct mount *mp;
	struct vnode *vp;
	struct uio auio;
	struct iovec aiov[2];
	struct ale *ale;
	struct ale *alstart;
	int totlen;
	int iov;
	int vfslocked;

	vp = alq->aq_vp;
	td = curthread;
	totlen = 0;
	iov = 0;

	alstart = ale = alq->aq_entvalid;
	alq->aq_entvalid = NULL;

	bzero(&aiov, sizeof(aiov));
	bzero(&auio, sizeof(auio));

	do {
		if (aiov[iov].iov_base == NULL)
			aiov[iov].iov_base = ale->ae_data;
		aiov[iov].iov_len += alq->aq_entlen;
		totlen += alq->aq_entlen;
		/* Check to see if we're wrapping the buffer */
		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
			iov++;
		ale->ae_flags &= ~AE_VALID;
		ale = ale->ae_next;
	} while (ale->ae_flags & AE_VALID);

	alq->aq_flags |= AQ_FLUSHING;
	ALQ_UNLOCK(alq);

	if (iov == 2 || aiov[iov].iov_base == NULL)
		iov--;

	auio.uio_iov = &aiov[0];
	auio.uio_offset = 0;
	auio.uio_segflg = UIO_SYSSPACE;
	auio.uio_rw = UIO_WRITE;
	auio.uio_iovcnt = iov + 1;
	auio.uio_resid = totlen;
	auio.uio_td = td;

	/*
	 * Do all of the junk required to write now.
	 */
	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
	vn_start_write(vp, &mp, V_WAIT);
	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
	/*
	 * XXX: VOP_WRITE error checks are ignored.
	 */
#ifdef MAC
	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
#endif
		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
	VOP_UNLOCK(vp, 0);
	vn_finished_write(mp);
	VFS_UNLOCK_GIANT(vfslocked);

	ALQ_LOCK(alq);
	alq->aq_flags &= ~AQ_FLUSHING;

	if (alq->aq_entfree == NULL)
		alq->aq_entfree = alstart;

	if (alq->aq_flags & AQ_WANTED) {
		alq->aq_flags &= ~AQ_WANTED;
		return (1);
	}

	return(0);
}