/* * Flush any pages left on this rnode. */ static void r4flushpages(rnode4_t *rp, cred_t *cr) { vnode_t *vp; int error; /* * Before freeing anything, wait until all asynchronous * activity is done on this rnode. This will allow all * asynchronous read ahead and write behind i/o's to * finish. */ mutex_enter(&rp->r_statelock); while (rp->r_count > 0) cv_wait(&rp->r_cv, &rp->r_statelock); mutex_exit(&rp->r_statelock); /* * Flush and invalidate all pages associated with the vnode. */ vp = RTOV4(rp); if (nfs4_has_pages(vp)) { ASSERT(vp->v_type != VCHR); if ((rp->r_flags & R4DIRTY) && !rp->r_error) { error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL); if (error && (error == ENOSPC || error == EDQUOT)) { mutex_enter(&rp->r_statelock); if (!rp->r_error) rp->r_error = error; mutex_exit(&rp->r_statelock); } } nfs4_invalidate_pages(vp, (u_offset_t)0, cr); } }
/*ARGSUSED*/ int spec_sync(struct vfs *vfsp, short flag, struct cred *cr) { struct snode *sync_list; register struct snode **spp, *sp, *spnext; register struct vnode *vp; if (mutex_tryenter(&spec_syncbusy) == 0) return (0); if (flag & SYNC_ATTR) { mutex_exit(&spec_syncbusy); return (0); } mutex_enter(&stable_lock); sync_list = NULL; /* * Find all the snodes that are dirty and add them to the sync_list */ for (spp = stable; spp < &stable[STABLESIZE]; spp++) { for (sp = *spp; sp != NULL; sp = sp->s_next) { vp = STOV(sp); /* * Don't bother sync'ing a vp if it's * part of a virtual swap device. */ if (IS_SWAPVP(vp)) continue; if (vp->v_type == VBLK && vn_has_cached_data(vp)) { /* * Prevent vp from going away before we * we get a chance to do a VOP_PUTPAGE * via sync_list processing */ VN_HOLD(vp); sp->s_list = sync_list; sync_list = sp; } } } mutex_exit(&stable_lock); /* * Now write out all the snodes we marked asynchronously. */ for (sp = sync_list; sp != NULL; sp = spnext) { spnext = sp->s_list; vp = STOV(sp); (void) VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, B_ASYNC, cr); VN_RELE(vp); /* Release our hold on vnode */ } mutex_exit(&spec_syncbusy); return (0); }
int syncpcp(struct pcnode *pcp, int flags) { int err; if (!vn_has_cached_data(PCTOV(pcp))) err = 0; else err = VOP_PUTPAGE(PCTOV(pcp), 0, 0, flags, kcred, NULL); return (err); }
static void vdev_file_close(vdev_t *vd) { vdev_file_t *vf = vd->vdev_tsd; if (vf == NULL) return; if (vf->vf_vnode != NULL) { (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred, NULL); VN_RELE(vf->vf_vnode); } kmem_free(vf, sizeof (vdev_file_t)); vd->vdev_tsd = NULL; }
static void vdev_file_close(vdev_t *vd) { vdev_file_t *vf = vd->vdev_tsd; if (vd->vdev_reopening || vf == NULL) return; if (vf->vf_vnode != NULL) { (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL); (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); } vd->vdev_delayed_close = B_FALSE; kmem_free(vf, sizeof (vdev_file_t)); vd->vdev_tsd = NULL; }
/* * Scan page_t's and issue I/O's for modified pages. * * Also coalesces consecutive small sized free pages into the next larger * pagesize. This costs a tiny bit of time in fsflush, but will reduce time * spent scanning on later passes and for anybody allocating large pages. */ static void fsflush_do_pages() { vnode_t *vp; ulong_t pcount; hrtime_t timer = gethrtime(); ulong_t releases = 0; ulong_t nexamined = 0; ulong_t nlocked = 0; ulong_t nmodified = 0; ulong_t ncoalesce = 0; ulong_t cnt; int mod; int fspage = 1; u_offset_t offset; uint_t szc; page_t *coal_page = NULL; /* 1st page in group to coalesce */ uint_t coal_szc = 0; /* size code, coal_page->p_szc */ uint_t coal_cnt = 0; /* count of pages seen */ static ulong_t nscan = 0; static pgcnt_t last_total_pages = 0; static page_t *pp = NULL; /* * Check to see if total_pages has changed. */ if (total_pages != last_total_pages) { last_total_pages = total_pages; nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup; } if (pp == NULL) pp = memsegs->pages; pcount = 0; while (pcount < nscan) { /* * move to the next page, skipping over large pages * and issuing prefetches. */ if (pp->p_szc && fspage == 0) { pfn_t pfn; pfn = page_pptonum(pp); cnt = page_get_pagecnt(pp->p_szc); cnt -= pfn & (cnt - 1); } else cnt = 1; pp = page_nextn(pp, cnt); prefetch_page_r((void *)pp); ASSERT(pp != NULL); pcount += cnt; /* * Do a bunch of dirty tests (ie. no locking) to determine * if we can quickly skip this page. These tests are repeated * after acquiring the page lock. */ ++nexamined; if (PP_ISSWAP(pp)) { fspage = 0; coal_page = NULL; continue; } /* * skip free pages too, but try coalescing them into larger * pagesizes */ if (PP_ISFREE(pp)) { /* * skip pages with a file system identity or that * are already maximum size */ fspage = 0; szc = pp->p_szc; if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) { coal_page = NULL; continue; } /* * If not in a coalescing candidate page or the size * codes are different, start a new candidate. */ if (coal_page == NULL || coal_szc != szc) { /* * page must be properly aligned */ if ((page_pptonum(pp) & fsf_mask[szc]) != 0) { coal_page = NULL; continue; } coal_page = pp; coal_szc = szc; coal_cnt = 1; continue; } /* * acceptable to add this to existing candidate page */ ++coal_cnt; if (coal_cnt < fsf_pgcnt[coal_szc]) continue; /* * We've got enough pages to coalesce, so do it. * After promoting, we clear coal_page, so it will * take another pass to promote this to an even * larger page. */ ++ncoalesce; (void) page_promote_size(coal_page, coal_szc); coal_page = NULL; continue; } else { coal_page = NULL; } if (PP_ISKAS(pp) || PAGE_LOCKED(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { fspage = 0; continue; } /* * Reject pages that can't be "exclusively" locked. */ if (!page_trylock(pp, SE_EXCL)) continue; ++nlocked; /* * After locking the page, redo the above checks. * Since we locked the page, leave out the PAGE_LOCKED() test. */ vp = pp->p_vnode; if (PP_ISSWAP(pp) || PP_ISFREE(pp) || vp == NULL || PP_ISKAS(pp) || (vp->v_flag & VISSWAP) != 0) { page_unlock(pp); fspage = 0; continue; } if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { page_unlock(pp); continue; } fspage = 1; ASSERT(vp->v_type != VCHR); /* * Check the modified bit. Leaving the bit alone in hardware. * It will be cleared if we do the putpage. */ if (IS_VMODSORT(vp)) mod = hat_ismod(pp); else mod = hat_pagesync(pp, HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD; if (mod) { ++nmodified; offset = pp->p_offset; /* * Hold the vnode before releasing the page lock * to prevent it from being freed and re-used by * some other thread. */ VN_HOLD(vp); page_unlock(pp); (void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC, kcred, NULL); VN_RELE(vp); } else { /* * Catch any pages which should be on the cache list, * but aren't yet. */ if (hat_page_is_mapped(pp) == 0) { ++releases; (void) page_release(pp, 1); } else { page_unlock(pp); } } } /* * maintain statistics * reset every million wakeups, just to avoid overflow */ if (++fsf_cycles == 1000000) { fsf_cycles = 0; fsf_total.fsf_scan = 0; fsf_total.fsf_examined = 0; fsf_total.fsf_locked = 0; fsf_total.fsf_modified = 0; fsf_total.fsf_coalesce = 0; fsf_total.fsf_time = 0; fsf_total.fsf_releases = 0; } else { fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan; fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined; fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked; fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified; fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce; fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer; fsf_total.fsf_releases += fsf_recent.fsf_releases = releases; } }
/* * ufs_alloc_data - supports allocating space and reads or writes * that involve changes to file length or space allocation. * * This function is more expensive, because of the UFS log transaction, * so ufs_rdwr_data() should be used when space or file length changes * will not occur. * * Inputs: * fdb - A null pointer instructs this function to only allocate * space for the specified offset and length. * An actual fdbuffer instructs this function to perform * the read or write operation. * flags - defaults (zero value) to synchronous write * B_READ - indicates read operation * B_ASYNC - indicates perform operation asynchronously */ int ufs_alloc_data( vnode_t *vnodep, u_offset_t offset, size_t *len, fdbuffer_t *fdbp, int flags, cred_t *credp) { struct inode *ip = VTOI(vnodep); size_t done_len, io_len; int contig; u_offset_t uoff, io_off; int error = 0; /* No error occured */ int offsetn; /* Start point this IO */ int nbytes; /* Number bytes in this IO */ daddr_t bn; struct fs *fs; struct ufsvfs *ufsvfsp = ip->i_ufsvfs; int i_size_changed = 0; u_offset_t old_i_size; struct ulockfs *ulp; int trans_size; int issync; /* UFS Log transaction */ /* synchronous when non-zero */ int io_started = 0; /* No IO started */ uint_t protp = PROT_ALL; ASSERT((flags & B_WRITE) == 0); /* * Obey the lockfs protocol */ error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp); if (error) { if ((fdbp != NULL) && (flags & B_ASYNC)) { fdb_ioerrdone(fdbp, error); } return (error); } if (ulp) { /* * Try to begin a UFS log transaction */ trans_size = TOP_GETPAGE_SIZE(ip); TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE, trans_size, error); if (error == EWOULDBLOCK) { ufs_lockfs_end(ulp); if ((fdbp != NULL) && (flags & B_ASYNC)) { fdb_ioerrdone(fdbp, EDEADLK); } return (EDEADLK); } } uoff = offset; io_off = offset; io_len = *len; done_len = 0; DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n", uoff, (io_len - done_len), ip->i_size, (void *)fdbp)); rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); rw_enter(&ip->i_contents, RW_WRITER); ASSERT((ip->i_mode & IFMT) == IFREG); fs = ip->i_fs; while (error == 0 && done_len < io_len) { uoff = (u_offset_t)(io_off + done_len); offsetn = (int)blkoff(fs, uoff); nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len); DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n", uoff, nbytes)); if (uoff + nbytes > ip->i_size) { /* * We are extending the length of the file. * bmap is used so that we are sure that * if we need to allocate new blocks, that it * is done here before we up the file size. */ DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n", ip->i_size, uoff + nbytes)); error = bmap_write(ip, uoff, (offsetn + nbytes), BI_ALLOC_ONLY, NULL, credp); if (ip->i_flag & (ICHG|IUPD)) ip->i_seq++; if (error) { DEBUGF((CE_CONT, "?ufs_alloc_data: grow " "failed err: %d\n", error)); break; } if (fdbp != NULL) { if (uoff >= ip->i_size) { /* * Desired offset is past end of bytes * in file, so we have a hole. */ fdb_add_hole(fdbp, uoff - offset, nbytes); } else { int contig; buf_t *bp; error = bmap_read(ip, uoff, &bn, &contig); if (error) { break; } contig = ip->i_size - uoff; contig = P2ROUNDUP(contig, DEV_BSIZE); bp = fdb_iosetup(fdbp, uoff - offset, contig, vnodep, flags); bp->b_edev = ip->i_dev; bp->b_dev = cmpdev(ip->i_dev); bp->b_blkno = bn; bp->b_file = ip->i_vnode; bp->b_offset = (offset_t)uoff; if (ufsvfsp->vfs_snapshot) { fssnap_strategy( &ufsvfsp->vfs_snapshot, bp); } else { (void) bdev_strategy(bp); } io_started = 1; lwp_stat_update(LWP_STAT_OUBLK, 1); if ((flags & B_ASYNC) == 0) { error = biowait(bp); fdb_iodone(bp); if (error) { break; } } if (contig > (ip->i_size - uoff)) { contig -= ip->i_size - uoff; fdb_add_hole(fdbp, ip->i_size - offset, contig); } } } i_size_changed = 1; old_i_size = ip->i_size; UFS_SET_ISIZE(uoff + nbytes, ip); TRANS_INODE(ip->i_ufsvfs, ip); /* * file has grown larger than 2GB. Set flag * in superblock to indicate this, if it * is not already set. */ if ((ip->i_size > MAXOFF32_T) && !(fs->fs_flags & FSLARGEFILES)) { ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); mutex_enter(&ufsvfsp->vfs_lock); fs->fs_flags |= FSLARGEFILES; ufs_sbwrite(ufsvfsp); mutex_exit(&ufsvfsp->vfs_lock); } } else { /* * The file length is not being extended. */ error = bmap_read(ip, uoff, &bn, &contig); if (error) { DEBUGF((CE_CONT, "?ufs_alloc_data: " "bmap_read err: %d\n", error)); break; } if (bn != UFS_HOLE) { /* * Did not map a hole in the file */ int contig = P2ROUNDUP(nbytes, DEV_BSIZE); buf_t *bp; if (fdbp != NULL) { bp = fdb_iosetup(fdbp, uoff - offset, contig, vnodep, flags); bp->b_edev = ip->i_dev; bp->b_dev = cmpdev(ip->i_dev); bp->b_blkno = bn; bp->b_file = ip->i_vnode; bp->b_offset = (offset_t)uoff; if (ufsvfsp->vfs_snapshot) { fssnap_strategy( &ufsvfsp->vfs_snapshot, bp); } else { (void) bdev_strategy(bp); } io_started = 1; lwp_stat_update(LWP_STAT_OUBLK, 1); if ((flags & B_ASYNC) == 0) { error = biowait(bp); fdb_iodone(bp); if (error) { break; } } } } else { /* * We read a hole in the file. * We have to allocate blocks for the hole. */ error = bmap_write(ip, uoff, (offsetn + nbytes), BI_ALLOC_ONLY, NULL, credp); if (ip->i_flag & (ICHG|IUPD)) ip->i_seq++; if (error) { DEBUGF((CE_CONT, "?ufs_alloc_data: fill" " hole failed error: %d\n", error)); break; } if (fdbp != NULL) { fdb_add_hole(fdbp, uoff - offset, nbytes); } } } done_len += nbytes; } if (error) { if (i_size_changed) { /* * Allocation of the blocks for the file failed. * So truncate the file size back to its original size. */ (void) ufs_itrunc(ip, old_i_size, 0, credp); } } DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n", uoff, (io_len - done_len))); if ((offset + *len) < (NDADDR * fs->fs_bsize)) { *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset); } else { *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset); } /* * Flush cached pages. * * XXX - There should be no pages involved, since the I/O was performed * through the device strategy routine and the page cache was bypassed. * However, testing has demonstrated that this VOP_PUTPAGE is * necessary. Without this, data might not always be read back as it * was written. * */ (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp); rw_exit(&ip->i_contents); rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); if ((fdbp != NULL) && (flags & B_ASYNC)) { /* * Show that no more asynchronous IO will be added */ fdb_ioerrdone(fdbp, error); } if (ulp) { /* * End the UFS Log transaction */ TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE, trans_size); ufs_lockfs_end(ulp); } if (io_started && (flags & B_ASYNC)) { return (0); } else { return (error); } }
/* * Set various fields of the dqblk according to the command. * Q_SETQUOTA - assign an entire dqblk structure. * Q_SETQLIM - assign a dqblk structure except for the usage. */ static int setquota(int cmd, uid_t uid, struct ufsvfs *ufsvfsp, caddr_t addr, struct cred *cr) { struct dquot *dqp; struct inode *qip; struct dquot *xdqp; struct dqblk newlim; int error; int scan_type = SQD_TYPE_NONE; daddr_t bn; int contig; if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0) return (EPERM); rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER); /* * Quotas are not enabled on this file system so there is * nothing more to do. */ if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { rw_exit(&ufsvfsp->vfs_dqrwlock); return (ESRCH); } /* * At this point, the quota subsystem is quiescent on this file * system so we can do all the work necessary to modify the quota * information for this user. */ if (copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)) != 0) { rw_exit(&ufsvfsp->vfs_dqrwlock); return (EFAULT); } error = getdiskquota(uid, ufsvfsp, 0, &xdqp); if (error) { rw_exit(&ufsvfsp->vfs_dqrwlock); return (error); } dqp = xdqp; /* * Don't change disk usage on Q_SETQLIM */ mutex_enter(&dqp->dq_lock); if (cmd == Q_SETQLIM) { newlim.dqb_curblocks = dqp->dq_curblocks; newlim.dqb_curfiles = dqp->dq_curfiles; } if (uid == 0) { /* * Timelimits for uid 0 set the relative time * the other users can be over quota for this file system. * If it is zero a default is used (see quota.h). */ ufsvfsp->vfs_btimelimit = newlim.dqb_btimelimit? newlim.dqb_btimelimit: DQ_BTIMELIMIT; ufsvfsp->vfs_ftimelimit = newlim.dqb_ftimelimit? newlim.dqb_ftimelimit: DQ_FTIMELIMIT; } else { if (newlim.dqb_bsoftlimit && newlim.dqb_curblocks >= newlim.dqb_bsoftlimit) { if (dqp->dq_bsoftlimit == 0 || dqp->dq_curblocks < dqp->dq_bsoftlimit) { /* If we're suddenly over the limit(s), */ /* start the timer(s) */ newlim.dqb_btimelimit = (uint32_t)gethrestime_sec() + ufsvfsp->vfs_btimelimit; dqp->dq_flags &= ~DQ_BLKS; } else { /* If we're currently over the soft */ /* limit and were previously over the */ /* soft limit then preserve the old */ /* time limit but make sure the DQ_BLKS */ /* flag is set since we must have been */ /* previously warned. */ newlim.dqb_btimelimit = dqp->dq_btimelimit; dqp->dq_flags |= DQ_BLKS; } } else { /* Either no quota or under quota, clear time limit */ newlim.dqb_btimelimit = 0; dqp->dq_flags &= ~DQ_BLKS; } if (newlim.dqb_fsoftlimit && newlim.dqb_curfiles >= newlim.dqb_fsoftlimit) { if (dqp->dq_fsoftlimit == 0 || dqp->dq_curfiles < dqp->dq_fsoftlimit) { /* If we're suddenly over the limit(s), */ /* start the timer(s) */ newlim.dqb_ftimelimit = (uint32_t)gethrestime_sec() + ufsvfsp->vfs_ftimelimit; dqp->dq_flags &= ~DQ_FILES; } else { /* If we're currently over the soft */ /* limit and were previously over the */ /* soft limit then preserve the old */ /* time limit but make sure the */ /* DQ_FILES flag is set since we must */ /* have been previously warned. */ newlim.dqb_ftimelimit = dqp->dq_ftimelimit; dqp->dq_flags |= DQ_FILES; } } else { /* Either no quota or under quota, clear time limit */ newlim.dqb_ftimelimit = 0; dqp->dq_flags &= ~DQ_FILES; } } /* * If there was previously no limit and there is now at least * one limit, then any inodes in the cache have NULL d_iquot * fields (getinoquota() returns NULL when there are no limits). */ if ((dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 && dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) && (newlim.dqb_fhardlimit || newlim.dqb_fsoftlimit || newlim.dqb_bhardlimit || newlim.dqb_bsoftlimit)) { scan_type = SQD_TYPE_LIMIT; } /* * If there was previously at least one limit and there is now * no limit, then any inodes in the cache have non-NULL d_iquot * fields need to be reset to NULL. */ else if ((dqp->dq_fhardlimit || dqp->dq_fsoftlimit || dqp->dq_bhardlimit || dqp->dq_bsoftlimit) && (newlim.dqb_fhardlimit == 0 && newlim.dqb_fsoftlimit == 0 && newlim.dqb_bhardlimit == 0 && newlim.dqb_bsoftlimit == 0)) { scan_type = SQD_TYPE_NO_LIMIT; } dqp->dq_dqb = newlim; dqp->dq_flags |= DQ_MOD; /* * push the new quota to disk now. If this is a trans device * then force the page out with ufs_putpage so it will be deltaed * by ufs_startio. */ qip = ufsvfsp->vfs_qinod; rw_enter(&qip->i_contents, RW_WRITER); (void) ufs_rdwri(UIO_WRITE, FWRITE | FSYNC, qip, (caddr_t)&dqp->dq_dqb, sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE, (int *)NULL, kcred); rw_exit(&qip->i_contents); (void) VOP_PUTPAGE(ITOV(qip), dqoff(dqp->dq_uid) & ~qip->i_fs->fs_bmask, qip->i_fs->fs_bsize, B_INVAL, kcred, NULL); /* * We must set the dq_mof even if not we are not logging in case * we are later remount to logging. */ contig = 0; rw_enter(&qip->i_contents, RW_WRITER); error = bmap_read(qip, dqoff(dqp->dq_uid), &bn, &contig); rw_exit(&qip->i_contents); if (error || (bn == UFS_HOLE)) { dqp->dq_mof = UFS_HOLE; } else { dqp->dq_mof = ldbtob(bn) + (offset_t)((dqoff(dqp->dq_uid)) & (DEV_BSIZE - 1)); } dqp->dq_flags &= ~DQ_MOD; dqput(dqp); mutex_exit(&dqp->dq_lock); if (scan_type) { struct setquota_data sqd; sqd.sqd_type = scan_type; sqd.sqd_ufsvfsp = ufsvfsp; sqd.sqd_uid = uid; (void) ufs_scan_inodes(0, setquota_scan_inode, &sqd, ufsvfsp); } rw_exit(&ufsvfsp->vfs_dqrwlock); return (0); }
/* * Set the quota file up for a particular file system. * Called as the result of a quotaon (Q_QUOTAON) ioctl. */ static int opendq( struct ufsvfs *ufsvfsp, struct vnode *vp, /* quota file */ struct cred *cr) { struct inode *qip; struct dquot *dqp; int error; int quotaon = 0; if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0) return (EPERM); VN_HOLD(vp); /* * Check to be sure its a regular file. */ if (vp->v_type != VREG) { VN_RELE(vp); return (EACCES); } rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER); /* * We have vfs_dqrwlock as writer, so if quotas are disabled, * then vfs_qinod should be NULL or we have a race somewhere. */ ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) || (ufsvfsp->vfs_qinod == 0)); if ((ufsvfsp->vfs_qflags & MQ_ENABLED) != 0) { /* * Quotas are already enabled on this file system. * * If the "quotas" file was replaced (different inode) * while quotas were enabled we don't want to re-enable * them with a new "quotas" file. Simply print a warning * message to the console, release the new vnode, and * return. * XXX - The right way to fix this is to return EBUSY * for the ioctl() issued by 'quotaon'. */ if (VTOI(vp) != ufsvfsp->vfs_qinod) { cmn_err(CE_WARN, "Previous quota file still in use." " Disable quotas on %s before enabling.\n", VTOI(vp)->i_fs->fs_fsmnt); VN_RELE(vp); rw_exit(&ufsvfsp->vfs_dqrwlock); return (0); } (void) quotasync(ufsvfsp, /* do_lock */ 0); /* remove extra hold on quota file */ VN_RELE(vp); quotaon++; qip = ufsvfsp->vfs_qinod; } else { int qlen; ufsvfsp->vfs_qinod = VTOI(vp); qip = ufsvfsp->vfs_qinod; /* * Force the file to have no partially allocated blocks * to prevent a realloc from changing the location of * the data. We must do this even if not logging in * case we later remount to logging. */ qlen = qip->i_fs->fs_bsize * NDADDR; /* * Largefiles: i_size needs to be atomically accessed now. */ rw_enter(&qip->i_contents, RW_WRITER); if (qip->i_size < qlen) { if (ufs_itrunc(qip, (u_offset_t)qlen, (int)0, cr) != 0) cmn_err(CE_WARN, "opendq failed to remove frags" " from quota file\n"); rw_exit(&qip->i_contents); (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)qip->i_size, B_INVAL, kcred, NULL); } else { rw_exit(&qip->i_contents); } TRANS_MATA_IGET(ufsvfsp, qip); } /* * The file system time limits are in the dquot for uid 0. * The time limits set the relative time the other users * can be over quota for this file system. * If it is zero a default is used (see quota.h). */ error = getdiskquota((uid_t)0, ufsvfsp, 1, &dqp); if (error == 0) { mutex_enter(&dqp->dq_lock); ufsvfsp->vfs_btimelimit = (dqp->dq_btimelimit? dqp->dq_btimelimit: DQ_BTIMELIMIT); ufsvfsp->vfs_ftimelimit = (dqp->dq_ftimelimit? dqp->dq_ftimelimit: DQ_FTIMELIMIT); ufsvfsp->vfs_qflags = MQ_ENABLED; /* enable quotas */ vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_QUOTA, NULL, 0); dqput(dqp); mutex_exit(&dqp->dq_lock); } else if (!quotaon) { /* * Some sort of I/O error on the quota file, and quotas were * not already on when we got here so clean up. */ ufsvfsp->vfs_qflags = 0; ufsvfsp->vfs_qinod = NULL; VN_RELE(ITOV(qip)); } /* * If quotas are enabled update all valid inodes in the * cache with quota information. */ if (ufsvfsp->vfs_qflags & MQ_ENABLED) { (void) ufs_scan_inodes(0, opendq_scan_inode, ufsvfsp, ufsvfsp); } rw_exit(&ufsvfsp->vfs_dqrwlock); return (error); }
static int vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { vdev_file_t *vf; #ifdef __APPLE__ struct vnode *vp, *rootdir; struct vnode_attr vattr; vfs_context_t context; #else vnode_t *vp; vattr_t vattr; #endif int error; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); /* * We always open the files from the root of the global zone, even if * we're in a local zone. If the user has gotten to this point, the * administrator has already decided that the pool should be available * to local zone users, so the underlying devices should be as well. */ ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); #ifdef __APPLE__ rootdir = getrootdir(); #endif error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, spa_mode | FOFFMAX, 0, &vp, 0, 0, rootdir); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } vf->vf_vnode = vp; #ifdef _KERNEL /* * Make sure it's a regular file. */ #ifdef __APPLE__ if (!vnode_isreg(vp)) { #else if (vp->v_type != VREG) { #endif vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (ENODEV); } #endif /* * Determine the physical size of the file. */ #ifdef __APPLE__ VATTR_INIT(&vattr); VATTR_WANTED(&vattr, va_data_size); context = vfs_context_create((vfs_context_t)0); error = vnode_getattr(vp, &vattr, context); (void) vfs_context_rele(context); if (error || !VATTR_IS_SUPPORTED(&vattr, va_data_size)) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } *psize = vattr.va_data_size; #else vattr.va_mask = AT_SIZE; error = VOP_GETATTR(vp, &vattr, 0, kcred); if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } *psize = vattr.va_size; #endif *ashift = SPA_MINBLOCKSHIFT; return (0); } static void vdev_file_close(vdev_t *vd) { vdev_file_t *vf = vd->vdev_tsd; if (vf == NULL) return; if (vf->vf_vnode != NULL) { #ifdef __APPLE__ vfs_context_t context; context = vfs_context_create((vfs_context_t)0); /* ### APPLE TODO #### */ // (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred); (void) vnode_close(vf->vf_vnode, spa_mode, context); (void) vfs_context_rele(context); #else (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred); (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred); VN_RELE(vf->vf_vnode); #endif } kmem_free(vf, sizeof (vdev_file_t)); vd->vdev_tsd = NULL; }
/* * Flush all vnodes in this (or every) vfs. * Used by nfs_sync and by nfs_unmount. */ void r4flush(struct vfs *vfsp, cred_t *cr) { int index; rnode4_t *rp; vnode_t *vp, **vplist; long num, cnt; /* * Check to see whether there is anything to do. */ num = rnode4_new; if (num == 0) return; /* * Allocate a slot for all currently active rnodes on the * supposition that they all may need flushing. */ vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP); cnt = 0; /* * Walk the hash queues looking for rnodes with page * lists associated with them. Make a list of these * files. */ for (index = 0; index < rtable4size; index++) { rw_enter(&rtable4[index].r_lock, RW_READER); for (rp = rtable4[index].r_hashf; rp != (rnode4_t *)(&rtable4[index]); rp = rp->r_hashf) { vp = RTOV4(rp); /* * Don't bother sync'ing a vp if it * is part of virtual swap device or * if VFS is read-only */ if (IS_SWAPVP(vp) || vn_is_readonly(vp)) continue; /* * If flushing all mounted file systems or * the vnode belongs to this vfs, has pages * and is marked as either dirty or mmap'd, * hold and add this vnode to the list of * vnodes to flush. */ if ((vfsp == NULL || vp->v_vfsp == vfsp) && nfs4_has_pages(vp) && ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) { VN_HOLD(vp); vplist[cnt++] = vp; if (cnt == num) { rw_exit(&rtable4[index].r_lock); goto toomany; } } } rw_exit(&rtable4[index].r_lock); } toomany: /* * Flush and release all of the files on the list. */ while (cnt-- > 0) { vp = vplist[cnt]; (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL); VN_RELE(vp); } /* * Free the space allocated to hold the list. */ kmem_free(vplist, num * sizeof (*vplist)); }