/*
 * Flush any pages left on this rnode.
 */
static void
r4flushpages(rnode4_t *rp, cred_t *cr)
{
	vnode_t *vp;
	int error;

	/*
	 * Before freeing anything, wait until all asynchronous
	 * activity is done on this rnode.  This will allow all
	 * asynchronous read ahead and write behind i/o's to
	 * finish.
	 */
	mutex_enter(&rp->r_statelock);
	while (rp->r_count > 0)
		cv_wait(&rp->r_cv, &rp->r_statelock);
	mutex_exit(&rp->r_statelock);

	/*
	 * Flush and invalidate all pages associated with the vnode.
	 */
	vp = RTOV4(rp);
	if (nfs4_has_pages(vp)) {
		ASSERT(vp->v_type != VCHR);
		if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
			error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
			if (error && (error == ENOSPC || error == EDQUOT)) {
				mutex_enter(&rp->r_statelock);
				if (!rp->r_error)
					rp->r_error = error;
				mutex_exit(&rp->r_statelock);
			}
		}
		nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
	}
}
Esempio n. 2
0
/*ARGSUSED*/
int
spec_sync(struct vfs *vfsp,
	short	flag,
	struct cred *cr)
{
	struct snode *sync_list;
	register struct snode **spp, *sp, *spnext;
	register struct vnode *vp;

	if (mutex_tryenter(&spec_syncbusy) == 0)
		return (0);

	if (flag & SYNC_ATTR) {
		mutex_exit(&spec_syncbusy);
		return (0);
	}
	mutex_enter(&stable_lock);
	sync_list = NULL;
	/*
	 * Find all the snodes that are dirty and add them to the sync_list
	 */
	for (spp = stable; spp < &stable[STABLESIZE]; spp++) {
		for (sp = *spp; sp != NULL; sp = sp->s_next) {
			vp = STOV(sp);
			/*
			 * Don't bother sync'ing a vp if it's
			 * part of a virtual swap device.
			 */
			if (IS_SWAPVP(vp))
				continue;

			if (vp->v_type == VBLK && vn_has_cached_data(vp)) {
				/*
				 * Prevent vp from going away before we
				 * we get a chance to do a VOP_PUTPAGE
				 * via sync_list processing
				 */
				VN_HOLD(vp);
				sp->s_list = sync_list;
				sync_list = sp;
			}
		}
	}
	mutex_exit(&stable_lock);
	/*
	 * Now write out all the snodes we marked asynchronously.
	 */
	for (sp = sync_list; sp != NULL; sp = spnext) {
		spnext = sp->s_list;
		vp = STOV(sp);
		(void) VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, B_ASYNC, cr);
		VN_RELE(vp);		/* Release our hold on vnode */
	}
	mutex_exit(&spec_syncbusy);
	return (0);
}
Esempio n. 3
0
int
syncpcp(struct pcnode *pcp, int flags)
{
	int err;
	if (!vn_has_cached_data(PCTOV(pcp)))
		err = 0;
	else
		err = VOP_PUTPAGE(PCTOV(pcp), 0, 0, flags,
		    kcred, NULL);

	return (err);
}
Esempio n. 4
0
static void
vdev_file_close(vdev_t *vd)
{
	vdev_file_t *vf = vd->vdev_tsd;

	if (vf == NULL)
		return;

	if (vf->vf_vnode != NULL) {
		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
		(void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred, NULL);
		VN_RELE(vf->vf_vnode);
	}

	kmem_free(vf, sizeof (vdev_file_t));
	vd->vdev_tsd = NULL;
}
Esempio n. 5
0
static void
vdev_file_close(vdev_t *vd)
{
	vdev_file_t *vf = vd->vdev_tsd;

	if (vd->vdev_reopening || vf == NULL)
		return;

	if (vf->vf_vnode != NULL) {
		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
		(void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0,
		    kcred, NULL);
	}

	vd->vdev_delayed_close = B_FALSE;
	kmem_free(vf, sizeof (vdev_file_t));
	vd->vdev_tsd = NULL;
}
Esempio n. 6
0
/*
 * Scan page_t's and issue I/O's for modified pages.
 *
 * Also coalesces consecutive small sized free pages into the next larger
 * pagesize. This costs a tiny bit of time in fsflush, but will reduce time
 * spent scanning on later passes and for anybody allocating large pages.
 */
static void
fsflush_do_pages()
{
	vnode_t		*vp;
	ulong_t		pcount;
	hrtime_t	timer = gethrtime();
	ulong_t		releases = 0;
	ulong_t		nexamined = 0;
	ulong_t		nlocked = 0;
	ulong_t		nmodified = 0;
	ulong_t		ncoalesce = 0;
	ulong_t		cnt;
	int		mod;
	int		fspage = 1;
	u_offset_t	offset;
	uint_t		szc;

	page_t		*coal_page = NULL;  /* 1st page in group to coalesce */
	uint_t		coal_szc = 0;	    /* size code, coal_page->p_szc */
	uint_t		coal_cnt = 0;	    /* count of pages seen */

	static ulong_t	nscan = 0;
	static pgcnt_t	last_total_pages = 0;
	static page_t	*pp = NULL;

	/*
	 * Check to see if total_pages has changed.
	 */
	if (total_pages != last_total_pages) {
		last_total_pages = total_pages;
		nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup;
	}

	if (pp == NULL)
		pp = memsegs->pages;

	pcount = 0;
	while (pcount < nscan) {

		/*
		 * move to the next page, skipping over large pages
		 * and issuing prefetches.
		 */
		if (pp->p_szc && fspage == 0) {
			pfn_t pfn;

			pfn  = page_pptonum(pp);
			cnt = page_get_pagecnt(pp->p_szc);
			cnt -= pfn & (cnt - 1);
		} else
			cnt = 1;

		pp = page_nextn(pp, cnt);
		prefetch_page_r((void *)pp);
		ASSERT(pp != NULL);
		pcount += cnt;

		/*
		 * Do a bunch of dirty tests (ie. no locking) to determine
		 * if we can quickly skip this page. These tests are repeated
		 * after acquiring the page lock.
		 */
		++nexamined;
		if (PP_ISSWAP(pp)) {
			fspage = 0;
			coal_page = NULL;
			continue;
		}

		/*
		 * skip free pages too, but try coalescing them into larger
		 * pagesizes
		 */
		if (PP_ISFREE(pp)) {
			/*
			 * skip pages with a file system identity or that
			 * are already maximum size
			 */
			fspage = 0;
			szc = pp->p_szc;
			if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) {
				coal_page = NULL;
				continue;
			}

			/*
			 * If not in a coalescing candidate page or the size
			 * codes are different, start a new candidate.
			 */
			if (coal_page == NULL || coal_szc != szc) {

				/*
				 * page must be properly aligned
				 */
				if ((page_pptonum(pp) & fsf_mask[szc]) != 0) {
					coal_page = NULL;
					continue;
				}
				coal_page = pp;
				coal_szc = szc;
				coal_cnt = 1;
				continue;
			}

			/*
			 * acceptable to add this to existing candidate page
			 */
			++coal_cnt;
			if (coal_cnt < fsf_pgcnt[coal_szc])
				continue;

			/*
			 * We've got enough pages to coalesce, so do it.
			 * After promoting, we clear coal_page, so it will
			 * take another pass to promote this to an even
			 * larger page.
			 */
			++ncoalesce;
			(void) page_promote_size(coal_page, coal_szc);
			coal_page = NULL;
			continue;
		} else {
			coal_page = NULL;
		}

		if (PP_ISKAS(pp) ||
		    PAGE_LOCKED(pp) ||
		    pp->p_lckcnt != 0 ||
		    pp->p_cowcnt != 0) {
			fspage = 0;
			continue;
		}


		/*
		 * Reject pages that can't be "exclusively" locked.
		 */
		if (!page_trylock(pp, SE_EXCL))
			continue;
		++nlocked;


		/*
		 * After locking the page, redo the above checks.
		 * Since we locked the page, leave out the PAGE_LOCKED() test.
		 */
		vp = pp->p_vnode;
		if (PP_ISSWAP(pp) ||
		    PP_ISFREE(pp) ||
		    vp == NULL ||
		    PP_ISKAS(pp) ||
		    (vp->v_flag & VISSWAP) != 0) {
			page_unlock(pp);
			fspage = 0;
			continue;
		}
		if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
			page_unlock(pp);
			continue;
		}

		fspage = 1;
		ASSERT(vp->v_type != VCHR);

		/*
		 * Check the modified bit. Leaving the bit alone in hardware.
		 * It will be cleared if we do the putpage.
		 */
		if (IS_VMODSORT(vp))
			mod = hat_ismod(pp);
		else
			mod = hat_pagesync(pp,
			    HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD;

		if (mod) {
			++nmodified;
			offset = pp->p_offset;

			/*
			 * Hold the vnode before releasing the page lock
			 * to prevent it from being freed and re-used by
			 * some other thread.
			 */
			VN_HOLD(vp);

			page_unlock(pp);

			(void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC,
			    kcred, NULL);

			VN_RELE(vp);
		} else {

			/*
			 * Catch any pages which should be on the cache list,
			 * but aren't yet.
			 */
			if (hat_page_is_mapped(pp) == 0) {
				++releases;
				(void) page_release(pp, 1);
			} else {
				page_unlock(pp);
			}
		}
	}

	/*
	 * maintain statistics
	 * reset every million wakeups, just to avoid overflow
	 */
	if (++fsf_cycles == 1000000) {
		fsf_cycles = 0;
		fsf_total.fsf_scan = 0;
		fsf_total.fsf_examined = 0;
		fsf_total.fsf_locked = 0;
		fsf_total.fsf_modified = 0;
		fsf_total.fsf_coalesce = 0;
		fsf_total.fsf_time = 0;
		fsf_total.fsf_releases = 0;
	} else {
		fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan;
		fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined;
		fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked;
		fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified;
		fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce;
		fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer;
		fsf_total.fsf_releases += fsf_recent.fsf_releases = releases;
	}
}
Esempio n. 7
0
/*
 * ufs_alloc_data - supports allocating space and reads or writes
 * that involve changes to file length or space allocation.
 *
 * This function is more expensive, because of the UFS log transaction,
 * so ufs_rdwr_data() should be used when space or file length changes
 * will not occur.
 *
 * Inputs:
 * fdb - A null pointer instructs this function to only allocate
 *	space for the specified offset and length.
 *	An actual fdbuffer instructs this function to perform
 *	the read or write operation.
 * flags - defaults (zero value) to synchronous write
 *	B_READ - indicates read operation
 *	B_ASYNC - indicates perform operation asynchronously
 */
int
ufs_alloc_data(
	vnode_t		*vnodep,
	u_offset_t	offset,
	size_t		*len,
	fdbuffer_t	*fdbp,
	int		flags,
	cred_t		*credp)
{
	struct inode	*ip = VTOI(vnodep);
	size_t		done_len, io_len;
	int		contig;
	u_offset_t	uoff, io_off;
	int		error = 0;		/* No error occured */
	int		offsetn;		/* Start point this IO */
	int		nbytes;			/* Number bytes in this IO */
	daddr_t		bn;
	struct fs	*fs;
	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
	int		i_size_changed = 0;
	u_offset_t	old_i_size;
	struct ulockfs	*ulp;
	int		trans_size;
	int		issync;			/* UFS Log transaction */
						/* synchronous when non-zero */

	int		io_started = 0;		/* No IO started */
	uint_t		protp = PROT_ALL;

	ASSERT((flags & B_WRITE) == 0);

	/*
	 * Obey the lockfs protocol
	 */
	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
	if (error) {
		if ((fdbp != NULL) && (flags & B_ASYNC)) {
			fdb_ioerrdone(fdbp, error);
		}
		return (error);
	}
	if (ulp) {
		/*
		 * Try to begin a UFS log transaction
		 */
		trans_size = TOP_GETPAGE_SIZE(ip);
		TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
		    trans_size, error);
		if (error == EWOULDBLOCK) {
			ufs_lockfs_end(ulp);
			if ((fdbp != NULL) && (flags & B_ASYNC)) {
				fdb_ioerrdone(fdbp, EDEADLK);
			}
			return (EDEADLK);
		}
	}

	uoff = offset;
	io_off = offset;
	io_len = *len;
	done_len = 0;

	DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
	    uoff, (io_len - done_len), ip->i_size, (void *)fdbp));

	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
	rw_enter(&ip->i_contents, RW_WRITER);

	ASSERT((ip->i_mode & IFMT) == IFREG);

	fs = ip->i_fs;

	while (error == 0 && done_len < io_len) {
		uoff = (u_offset_t)(io_off + done_len);
		offsetn = (int)blkoff(fs, uoff);
		nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);

		DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
		    uoff, nbytes));

		if (uoff + nbytes > ip->i_size) {
			/*
			 * We are extending the length of the file.
			 * bmap is used so that we are sure that
			 * if we need to allocate new blocks, that it
			 * is done here before we up the file size.
			 */
			DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
			    ip->i_size, uoff + nbytes));

			error = bmap_write(ip, uoff, (offsetn + nbytes),
			    BI_ALLOC_ONLY, NULL, credp);
			if (ip->i_flag & (ICHG|IUPD))
				ip->i_seq++;
			if (error) {
				DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
				    "failed err: %d\n", error));
				break;
			}
			if (fdbp != NULL) {
				if (uoff >= ip->i_size) {
					/*
					 * Desired offset is past end of bytes
					 * in file, so we have a hole.
					 */
					fdb_add_hole(fdbp, uoff - offset,
					    nbytes);
				} else {
					int contig;
					buf_t *bp;

					error = bmap_read(ip, uoff, &bn,
					    &contig);
					if (error) {
						break;
					}

					contig = ip->i_size - uoff;
					contig = P2ROUNDUP(contig, DEV_BSIZE);

					bp = fdb_iosetup(fdbp, uoff - offset,
					    contig, vnodep, flags);

					bp->b_edev = ip->i_dev;
					bp->b_dev = cmpdev(ip->i_dev);
					bp->b_blkno = bn;
					bp->b_file = ip->i_vnode;
					bp->b_offset = (offset_t)uoff;

					if (ufsvfsp->vfs_snapshot) {
						fssnap_strategy(
						    &ufsvfsp->vfs_snapshot, bp);
					} else {
						(void) bdev_strategy(bp);
					}
					io_started = 1;

					lwp_stat_update(LWP_STAT_OUBLK, 1);

					if ((flags & B_ASYNC) == 0) {
						error = biowait(bp);
						fdb_iodone(bp);
						if (error) {
							break;
						}
					}
					if (contig > (ip->i_size - uoff)) {
						contig -= ip->i_size - uoff;

						fdb_add_hole(fdbp,
						    ip->i_size - offset,
						    contig);
					}
				}
			}

			i_size_changed = 1;
			old_i_size = ip->i_size;
			UFS_SET_ISIZE(uoff + nbytes, ip);
			TRANS_INODE(ip->i_ufsvfs, ip);
			/*
			 * file has grown larger than 2GB. Set flag
			 * in superblock to indicate this, if it
			 * is not already set.
			 */
			if ((ip->i_size > MAXOFF32_T) &&
			    !(fs->fs_flags & FSLARGEFILES)) {
				ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
				mutex_enter(&ufsvfsp->vfs_lock);
				fs->fs_flags |= FSLARGEFILES;
				ufs_sbwrite(ufsvfsp);
				mutex_exit(&ufsvfsp->vfs_lock);
			}
		} else {
			/*
			 * The file length is not being extended.
			 */
			error = bmap_read(ip, uoff, &bn, &contig);
			if (error) {
				DEBUGF((CE_CONT, "?ufs_alloc_data: "
				    "bmap_read err: %d\n", error));
				break;
			}

			if (bn != UFS_HOLE) {
				/*
				 * Did not map a hole in the file
				 */
				int	contig = P2ROUNDUP(nbytes, DEV_BSIZE);
				buf_t	*bp;

				if (fdbp != NULL) {
					bp = fdb_iosetup(fdbp, uoff - offset,
					    contig, vnodep, flags);

					bp->b_edev = ip->i_dev;
					bp->b_dev = cmpdev(ip->i_dev);
					bp->b_blkno = bn;
					bp->b_file = ip->i_vnode;
					bp->b_offset = (offset_t)uoff;

					if (ufsvfsp->vfs_snapshot) {
						fssnap_strategy(
						    &ufsvfsp->vfs_snapshot, bp);
					} else {
						(void) bdev_strategy(bp);
					}
					io_started = 1;

					lwp_stat_update(LWP_STAT_OUBLK, 1);

					if ((flags & B_ASYNC) == 0) {
						error = biowait(bp);
						fdb_iodone(bp);
						if (error) {
							break;
						}
					}
				}
			} else {
				/*
				 * We read a hole in the file.
				 * We have to allocate blocks for the hole.
				 */
				error = bmap_write(ip, uoff, (offsetn + nbytes),
				    BI_ALLOC_ONLY, NULL, credp);
				if (ip->i_flag & (ICHG|IUPD))
					ip->i_seq++;
				if (error) {
					DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
					    " hole failed error: %d\n", error));
					break;
				}
				if (fdbp != NULL) {
					fdb_add_hole(fdbp, uoff - offset,
					    nbytes);
				}
			}
		}
		done_len += nbytes;
	}

	if (error) {
		if (i_size_changed) {
			/*
			 * Allocation of the blocks for the file failed.
			 * So truncate the file size back to its original size.
			 */
			(void) ufs_itrunc(ip, old_i_size, 0, credp);
		}
	}

	DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
	    uoff, (io_len - done_len)));

	if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
		*len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
	} else {
		*len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
	}

	/*
	 * Flush cached pages.
	 *
	 * XXX - There should be no pages involved, since the I/O was performed
	 * through the device strategy routine and the page cache was bypassed.
	 * However, testing has demonstrated that this VOP_PUTPAGE is
	 * necessary. Without this, data might not always be read back as it
	 * was written.
	 *
	 */
	(void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp);

	rw_exit(&ip->i_contents);
	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);

	if ((fdbp != NULL) && (flags & B_ASYNC)) {
		/*
		 * Show that no more asynchronous IO will be added
		 */
		fdb_ioerrdone(fdbp, error);
	}
	if (ulp) {
		/*
		 * End the UFS Log transaction
		 */
		TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
		    trans_size);
		ufs_lockfs_end(ulp);
	}
	if (io_started && (flags & B_ASYNC)) {
		return (0);
	} else {
		return (error);
	}
}
/*
 * Set various fields of the dqblk according to the command.
 * Q_SETQUOTA - assign an entire dqblk structure.
 * Q_SETQLIM - assign a dqblk structure except for the usage.
 */
static int
setquota(int cmd, uid_t uid, struct ufsvfs *ufsvfsp,
    caddr_t addr, struct cred *cr)
{
	struct dquot *dqp;
	struct inode	*qip;
	struct dquot *xdqp;
	struct dqblk newlim;
	int error;
	int scan_type = SQD_TYPE_NONE;
	daddr_t bn;
	int contig;

	if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0)
		return (EPERM);

	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);

	/*
	 * Quotas are not enabled on this file system so there is
	 * nothing more to do.
	 */
	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
		rw_exit(&ufsvfsp->vfs_dqrwlock);
		return (ESRCH);
	}

	/*
	 * At this point, the quota subsystem is quiescent on this file
	 * system so we can do all the work necessary to modify the quota
	 * information for this user.
	 */

	if (copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)) != 0) {
		rw_exit(&ufsvfsp->vfs_dqrwlock);
		return (EFAULT);
	}
	error = getdiskquota(uid, ufsvfsp, 0, &xdqp);
	if (error) {
		rw_exit(&ufsvfsp->vfs_dqrwlock);
		return (error);
	}
	dqp = xdqp;
	/*
	 * Don't change disk usage on Q_SETQLIM
	 */
	mutex_enter(&dqp->dq_lock);
	if (cmd == Q_SETQLIM) {
		newlim.dqb_curblocks = dqp->dq_curblocks;
		newlim.dqb_curfiles = dqp->dq_curfiles;
	}
	if (uid == 0) {
		/*
		 * Timelimits for uid 0 set the relative time
		 * the other users can be over quota for this file system.
		 * If it is zero a default is used (see quota.h).
		 */
		ufsvfsp->vfs_btimelimit =
		    newlim.dqb_btimelimit? newlim.dqb_btimelimit: DQ_BTIMELIMIT;
		ufsvfsp->vfs_ftimelimit =
		    newlim.dqb_ftimelimit? newlim.dqb_ftimelimit: DQ_FTIMELIMIT;
	} else {
		if (newlim.dqb_bsoftlimit &&
		    newlim.dqb_curblocks >= newlim.dqb_bsoftlimit) {
			if (dqp->dq_bsoftlimit == 0 ||
			    dqp->dq_curblocks < dqp->dq_bsoftlimit) {
				/* If we're suddenly over the limit(s),	*/
				/* start the timer(s)			*/
				newlim.dqb_btimelimit =
				    (uint32_t)gethrestime_sec() +
				    ufsvfsp->vfs_btimelimit;
				dqp->dq_flags &= ~DQ_BLKS;
			} else {
				/* If we're currently over the soft	*/
				/* limit and were previously over the	*/
				/* soft limit then preserve the old	*/
				/* time limit but make sure the DQ_BLKS	*/
				/* flag is set since we must have been	*/
				/* previously warned.			*/
				newlim.dqb_btimelimit = dqp->dq_btimelimit;
				dqp->dq_flags |= DQ_BLKS;
			}
		} else {
			/* Either no quota or under quota, clear time limit */
			newlim.dqb_btimelimit = 0;
			dqp->dq_flags &= ~DQ_BLKS;
		}

		if (newlim.dqb_fsoftlimit &&
		    newlim.dqb_curfiles >= newlim.dqb_fsoftlimit) {
			if (dqp->dq_fsoftlimit == 0 ||
			    dqp->dq_curfiles < dqp->dq_fsoftlimit) {
				/* If we're suddenly over the limit(s),	*/
				/* start the timer(s)			*/
				newlim.dqb_ftimelimit =
				    (uint32_t)gethrestime_sec() +
				    ufsvfsp->vfs_ftimelimit;
				dqp->dq_flags &= ~DQ_FILES;
			} else {
				/* If we're currently over the soft	*/
				/* limit and were previously over the	*/
				/* soft limit then preserve the old	*/
				/* time limit but make sure the		*/
				/* DQ_FILES flag is set since we must	*/
				/* have been previously warned.		*/
				newlim.dqb_ftimelimit = dqp->dq_ftimelimit;
				dqp->dq_flags |= DQ_FILES;
			}
		} else {
			/* Either no quota or under quota, clear time limit */
			newlim.dqb_ftimelimit = 0;
			dqp->dq_flags &= ~DQ_FILES;
		}
	}

	/*
	 * If there was previously no limit and there is now at least
	 * one limit, then any inodes in the cache have NULL d_iquot
	 * fields (getinoquota() returns NULL when there are no limits).
	 */
	if ((dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
	    dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) &&
	    (newlim.dqb_fhardlimit || newlim.dqb_fsoftlimit ||
	    newlim.dqb_bhardlimit || newlim.dqb_bsoftlimit)) {
		scan_type = SQD_TYPE_LIMIT;
	}

	/*
	 * If there was previously at least one limit and there is now
	 * no limit, then any inodes in the cache have non-NULL d_iquot
	 * fields need to be reset to NULL.
	 */
	else if ((dqp->dq_fhardlimit || dqp->dq_fsoftlimit ||
	    dqp->dq_bhardlimit || dqp->dq_bsoftlimit) &&
	    (newlim.dqb_fhardlimit == 0 && newlim.dqb_fsoftlimit == 0 &&
	    newlim.dqb_bhardlimit == 0 && newlim.dqb_bsoftlimit == 0)) {
		scan_type = SQD_TYPE_NO_LIMIT;
	}

	dqp->dq_dqb = newlim;
	dqp->dq_flags |= DQ_MOD;

	/*
	 *  push the new quota to disk now.  If this is a trans device
	 *  then force the page out with ufs_putpage so it will be deltaed
	 *  by ufs_startio.
	 */
	qip = ufsvfsp->vfs_qinod;
	rw_enter(&qip->i_contents, RW_WRITER);
	(void) ufs_rdwri(UIO_WRITE, FWRITE | FSYNC, qip, (caddr_t)&dqp->dq_dqb,
	    sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE,
	    (int *)NULL, kcred);
	rw_exit(&qip->i_contents);

	(void) VOP_PUTPAGE(ITOV(qip), dqoff(dqp->dq_uid) & ~qip->i_fs->fs_bmask,
	    qip->i_fs->fs_bsize, B_INVAL, kcred, NULL);

	/*
	 * We must set the dq_mof even if not we are not logging in case
	 * we are later remount to logging.
	 */
	contig = 0;
	rw_enter(&qip->i_contents, RW_WRITER);
	error = bmap_read(qip, dqoff(dqp->dq_uid), &bn, &contig);
	rw_exit(&qip->i_contents);
	if (error || (bn == UFS_HOLE)) {
		dqp->dq_mof = UFS_HOLE;
	} else {
		dqp->dq_mof = ldbtob(bn) +
		    (offset_t)((dqoff(dqp->dq_uid)) & (DEV_BSIZE - 1));
	}

	dqp->dq_flags &= ~DQ_MOD;
	dqput(dqp);
	mutex_exit(&dqp->dq_lock);
	if (scan_type) {
		struct setquota_data sqd;

		sqd.sqd_type = scan_type;
		sqd.sqd_ufsvfsp = ufsvfsp;
		sqd.sqd_uid = uid;
		(void) ufs_scan_inodes(0, setquota_scan_inode, &sqd, ufsvfsp);
	}
	rw_exit(&ufsvfsp->vfs_dqrwlock);
	return (0);
}
/*
 * Set the quota file up for a particular file system.
 * Called as the result of a quotaon (Q_QUOTAON) ioctl.
 */
static int
opendq(
	struct ufsvfs *ufsvfsp,
	struct vnode *vp,		/* quota file */
	struct cred *cr)
{
	struct inode *qip;
	struct dquot *dqp;
	int error;
	int quotaon = 0;

	if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0)
		return (EPERM);

	VN_HOLD(vp);

	/*
	 * Check to be sure its a regular file.
	 */
	if (vp->v_type != VREG) {
		VN_RELE(vp);
		return (EACCES);
	}

	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);

	/*
	 * We have vfs_dqrwlock as writer, so if quotas are disabled,
	 * then vfs_qinod should be NULL or we have a race somewhere.
	 */
	ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) || (ufsvfsp->vfs_qinod == 0));

	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) != 0) {
		/*
		 * Quotas are already enabled on this file system.
		 *
		 * If the "quotas" file was replaced (different inode)
		 * while quotas were enabled we don't want to re-enable
		 * them with a new "quotas" file. Simply print a warning
		 * message to the console, release the new vnode, and
		 * return.
		 * XXX - The right way to fix this is to return EBUSY
		 * for the ioctl() issued by 'quotaon'.
		 */
		if (VTOI(vp) != ufsvfsp->vfs_qinod) {
			cmn_err(CE_WARN, "Previous quota file still in use."
			    " Disable quotas on %s before enabling.\n",
			    VTOI(vp)->i_fs->fs_fsmnt);
			VN_RELE(vp);
			rw_exit(&ufsvfsp->vfs_dqrwlock);
			return (0);
		}
		(void) quotasync(ufsvfsp, /* do_lock */ 0);
		/* remove extra hold on quota file */
		VN_RELE(vp);
		quotaon++;
		qip = ufsvfsp->vfs_qinod;
	} else {
		int qlen;

		ufsvfsp->vfs_qinod = VTOI(vp);
		qip = ufsvfsp->vfs_qinod;
		/*
		 * Force the file to have no partially allocated blocks
		 * to prevent a realloc from changing the location of
		 * the data. We must do this even if not logging in
		 * case we later remount to logging.
		 */
		qlen = qip->i_fs->fs_bsize * NDADDR;

		/*
		 * Largefiles: i_size needs to be atomically accessed now.
		 */
		rw_enter(&qip->i_contents, RW_WRITER);
		if (qip->i_size < qlen) {
			if (ufs_itrunc(qip, (u_offset_t)qlen, (int)0, cr) != 0)
				cmn_err(CE_WARN, "opendq failed to remove frags"
				    " from quota file\n");
			rw_exit(&qip->i_contents);
			(void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)qip->i_size,
			    B_INVAL, kcred, NULL);
		} else {
			rw_exit(&qip->i_contents);
		}
		TRANS_MATA_IGET(ufsvfsp, qip);
	}

	/*
	 * The file system time limits are in the dquot for uid 0.
	 * The time limits set the relative time the other users
	 * can be over quota for this file system.
	 * If it is zero a default is used (see quota.h).
	 */
	error = getdiskquota((uid_t)0, ufsvfsp, 1, &dqp);
	if (error == 0) {
		mutex_enter(&dqp->dq_lock);
		ufsvfsp->vfs_btimelimit =
		    (dqp->dq_btimelimit? dqp->dq_btimelimit: DQ_BTIMELIMIT);
		ufsvfsp->vfs_ftimelimit =
		    (dqp->dq_ftimelimit? dqp->dq_ftimelimit: DQ_FTIMELIMIT);

		ufsvfsp->vfs_qflags = MQ_ENABLED;	/* enable quotas */
		vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_QUOTA, NULL, 0);
		dqput(dqp);
		mutex_exit(&dqp->dq_lock);
	} else if (!quotaon) {
		/*
		 * Some sort of I/O error on the quota file, and quotas were
		 * not already on when we got here so clean up.
		 */
		ufsvfsp->vfs_qflags = 0;
		ufsvfsp->vfs_qinod = NULL;
		VN_RELE(ITOV(qip));
	}

	/*
	 * If quotas are enabled update all valid inodes in the
	 * cache with quota information.
	 */
	if (ufsvfsp->vfs_qflags & MQ_ENABLED) {
		(void) ufs_scan_inodes(0, opendq_scan_inode, ufsvfsp, ufsvfsp);
	}

	rw_exit(&ufsvfsp->vfs_dqrwlock);
	return (error);
}
Esempio n. 10
0
static int
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
{
	vdev_file_t *vf;
#ifdef __APPLE__
	struct vnode *vp, *rootdir;
	struct vnode_attr vattr;
	vfs_context_t context;
#else
	vnode_t *vp;
	vattr_t vattr;
#endif
	int error;

	/*
	 * We must have a pathname, and it must be absolute.
	 */
	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
		return (EINVAL);
	}

	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);

	/*
	 * We always open the files from the root of the global zone, even if
	 * we're in a local zone.  If the user has gotten to this point, the
	 * administrator has already decided that the pool should be available
	 * to local zone users, so the underlying devices should be as well.
	 */
	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
#ifdef __APPLE__
	rootdir = getrootdir();
#endif
	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, spa_mode | FOFFMAX,
	    0, &vp, 0, 0, rootdir);
	
	if (error) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	vf->vf_vnode = vp;

#ifdef _KERNEL
	/*
	 * Make sure it's a regular file.
	 */
#ifdef __APPLE__
	if (!vnode_isreg(vp)) {
#else
	if (vp->v_type != VREG) {
#endif
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (ENODEV);
	}
#endif

	/*
	 * Determine the physical size of the file.
	 */
#ifdef __APPLE__
	VATTR_INIT(&vattr);
	VATTR_WANTED(&vattr, va_data_size);

	context = vfs_context_create((vfs_context_t)0);
	error = vnode_getattr(vp, &vattr, context);
	(void) vfs_context_rele(context);

	if (error || !VATTR_IS_SUPPORTED(&vattr, va_data_size)) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	*psize = vattr.va_data_size;
#else
	vattr.va_mask = AT_SIZE;
	error = VOP_GETATTR(vp, &vattr, 0, kcred);
	if (error) {
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
		return (error);
	}

	*psize = vattr.va_size;
#endif
	*ashift = SPA_MINBLOCKSHIFT;

	return (0);
}

static void
vdev_file_close(vdev_t *vd)
{
	vdev_file_t *vf = vd->vdev_tsd;

	if (vf == NULL)
		return;

	if (vf->vf_vnode != NULL) {
#ifdef __APPLE__
		vfs_context_t context;

		context = vfs_context_create((vfs_context_t)0);
		/* ### APPLE TODO #### */
	//	(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred);
		(void) vnode_close(vf->vf_vnode, spa_mode, context);
		(void) vfs_context_rele(context);
#else
		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred);
		(void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred);
		VN_RELE(vf->vf_vnode);
#endif
	}

	kmem_free(vf, sizeof (vdev_file_t));
	vd->vdev_tsd = NULL;
}
Esempio n. 11
0
/*
 * Flush all vnodes in this (or every) vfs.
 * Used by nfs_sync and by nfs_unmount.
 */
void
r4flush(struct vfs *vfsp, cred_t *cr)
{
	int index;
	rnode4_t *rp;
	vnode_t *vp, **vplist;
	long num, cnt;

	/*
	 * Check to see whether there is anything to do.
	 */
	num = rnode4_new;
	if (num == 0)
		return;

	/*
	 * Allocate a slot for all currently active rnodes on the
	 * supposition that they all may need flushing.
	 */
	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
	cnt = 0;

	/*
	 * Walk the hash queues looking for rnodes with page
	 * lists associated with them.  Make a list of these
	 * files.
	 */
	for (index = 0; index < rtable4size; index++) {
		rw_enter(&rtable4[index].r_lock, RW_READER);
		for (rp = rtable4[index].r_hashf;
		    rp != (rnode4_t *)(&rtable4[index]);
		    rp = rp->r_hashf) {
			vp = RTOV4(rp);
			/*
			 * Don't bother sync'ing a vp if it
			 * is part of virtual swap device or
			 * if VFS is read-only
			 */
			if (IS_SWAPVP(vp) || vn_is_readonly(vp))
				continue;
			/*
			 * If flushing all mounted file systems or
			 * the vnode belongs to this vfs, has pages
			 * and is marked as either dirty or mmap'd,
			 * hold and add this vnode to the list of
			 * vnodes to flush.
			 */
			if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
			    nfs4_has_pages(vp) &&
			    ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
				VN_HOLD(vp);
				vplist[cnt++] = vp;
				if (cnt == num) {
					rw_exit(&rtable4[index].r_lock);
					goto toomany;
				}
			}
		}
		rw_exit(&rtable4[index].r_lock);
	}
toomany:

	/*
	 * Flush and release all of the files on the list.
	 */
	while (cnt-- > 0) {
		vp = vplist[cnt];
		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
		VN_RELE(vp);
	}

	/*
	 * Free the space allocated to hold the list.
	 */
	kmem_free(vplist, num * sizeof (*vplist));
}