/* * Real work associated with removing an extended attribute from a vnode. * Assumes the attribute lock has already been grabbed. */ static int ufs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name, struct ucred *cred, struct thread *td) { struct ufs_extattr_list_entry *attribute; struct ufs_extattr_header ueh; struct iovec local_aiov; struct uio local_aio; struct mount *mp = vp->v_mount; struct ufsmount *ump = VFSTOUFS(mp); struct inode *ip = VTOI(vp); off_t base_offset; int error = 0, ioflag; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) return (EOPNOTSUPP); if (!ufs_extattr_valid_attrname(attrnamespace, name)) return (EINVAL); error = extattr_check_cred(vp, attrnamespace, cred, td, VWRITE); if (error) return (error); attribute = ufs_extattr_find_attr(ump, attrnamespace, name); if (!attribute) return (ENOATTR); /* * Find base offset of header in file based on file header size, and * data header size + maximum data size, indexed by inode number. */ base_offset = sizeof(struct ufs_extattr_fileheader) + ip->i_number * (sizeof(struct ufs_extattr_header) + attribute->uele_fileheader.uef_size); /* * Check to see if currently defined. */ bzero(&ueh, sizeof(struct ufs_extattr_header)); local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_READ; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_td = td; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); /* * Don't need to get the lock on the backing vnode if the vnode we're * modifying is it, as we already hold the lock. */ if (attribute->uele_backing_vnode != vp) vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(attribute->uele_backing_vnode, &local_aio, IO_NODELOCKED, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; /* Defined? */ if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) { error = ENOATTR; goto vopunlock_exit; } /* Valid for the current inode generation? */ if (ueh.ueh_i_gen != ip->i_gen) { /* * The inode itself has a different generation number than * the attribute data. For now, the best solution is to * coerce this to undefined, and let it get cleaned up by * the next write or extattrctl clean. */ printf("ufs_extattr_rm (%s): inode number inconsistency (%d, %jd)\n", mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen); error = ENOATTR; goto vopunlock_exit; } /* Flag it as not in use. */ ueh.ueh_flags = 0; ueh.ueh_len = 0; local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_WRITE; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_td = td; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); ioflag = IO_NODELOCKED; if (ufs_extattr_sync) ioflag |= IO_SYNC; error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; if (local_aio.uio_resid != 0) error = ENXIO; vopunlock_exit: VOP_UNLOCK(attribute->uele_backing_vnode, 0); return (error); }
/* * Q_QUOTAON - set up a quota file for a particular filesystem. */ int quotaon(struct thread *td, struct mount *mp, int type, void *fname) { struct ufsmount *ump; struct vnode *vp, **vpp; struct vnode *mvp; struct dquot *dq; int error, flags; struct nameidata nd; error = priv_check(td, PRIV_UFS_QUOTAON); if (error != 0) { vfs_unbusy(mp); return (error); } if ((mp->mnt_flag & MNT_RDONLY) != 0) { vfs_unbusy(mp); return (EROFS); } ump = VFSTOUFS(mp); dq = NODQUOT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td); flags = FREAD | FWRITE; vfs_ref(mp); vfs_unbusy(mp); error = vn_open(&nd, &flags, 0, NULL); if (error != 0) { vfs_rel(mp); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; error = vfs_busy(mp, MBF_NOWAIT); vfs_rel(mp); if (error == 0) { if (vp->v_type != VREG) { error = EACCES; vfs_unbusy(mp); } } if (error != 0) { VOP_UNLOCK(vp, 0); (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); return (error); } UFS_LOCK(ump); if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) { UFS_UNLOCK(ump); VOP_UNLOCK(vp, 0); (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); vfs_unbusy(mp); return (EALREADY); } ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING; UFS_UNLOCK(ump); if ((error = dqopen(vp, ump, type)) != 0) { VOP_UNLOCK(vp, 0); UFS_LOCK(ump); ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING); UFS_UNLOCK(ump); (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); vfs_unbusy(mp); return (error); } VOP_UNLOCK(vp, 0); MNT_ILOCK(mp); mp->mnt_flag |= MNT_QUOTA; MNT_IUNLOCK(mp); vpp = &ump->um_quotas[type]; if (*vpp != vp) quotaoff1(td, mp, type); /* * When the directory vnode containing the quota file is * inactivated, due to the shared lookup of the quota file * vput()ing the dvp, the qsyncvp() call for the containing * directory would try to acquire the quota lock exclusive. * At the same time, lookup already locked the quota vnode * shared. Mark the quota vnode lock as allowing recursion * and automatically converting shared locks to exclusive. * * Also mark quota vnode as system. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_SYSTEM; VN_LOCK_AREC(vp); VN_LOCK_DSHARE(vp); VOP_UNLOCK(vp, 0); *vpp = vp; /* * Save the credential of the process that turned on quotas. * Set up the time limits for this quota. */ ump->um_cred[type] = crhold(td->td_ucred); ump->um_btime[type] = MAX_DQ_TIME; ump->um_itime[type] = MAX_IQ_TIME; if (dqget(NULLVP, 0, ump, type, &dq) == 0) { if (dq->dq_btime > 0) ump->um_btime[type] = dq->dq_btime; if (dq->dq_itime > 0) ump->um_itime[type] = dq->dq_itime; dqrele(NULLVP, dq); } /* * Allow the getdq from getinoquota below to read the quota * from file. */ UFS_LOCK(ump); ump->um_qflags[type] &= ~QTF_CLOSING; UFS_UNLOCK(ump); /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. */ again: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto again; } if (vp->v_type == VNON || vp->v_writecount == 0) { VOP_UNLOCK(vp, 0); vrele(vp); continue; } error = getinoquota(VTOI(vp)); VOP_UNLOCK(vp, 0); vrele(vp); if (error) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); break; } } if (error) quotaoff_inchange(td, mp, type); UFS_LOCK(ump); ump->um_qflags[type] &= ~QTF_OPENING; KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0, ("quotaon: leaking flags")); UFS_UNLOCK(ump); vfs_unbusy(mp); return (error); }
/* * load_inode_bitmap loads the inode bitmap for a blocks group * * It maintains a cache for the last bitmaps loaded. This cache is managed * with a LRU algorithm. * * Notes: * 1/ There is one cache per mounted file system. * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups, * this function reads the bitmap without maintaining a LRU cache. */ static int load_inode_bitmap (struct mount * mp, unsigned int block_group) { struct ext2_sb_info *sb = VFSTOUFS(mp)->um_e2fs; int i, j; unsigned long inode_bitmap_number; struct buffer_head * inode_bitmap; if (block_group >= sb->s_groups_count) panic ("load_inode_bitmap:" "block_group >= groups_count - " "block_group = %d, groups_count = %lu", block_group, sb->s_groups_count); if (sb->s_loaded_inode_bitmaps > 0 && sb->s_inode_bitmap_number[0] == block_group) return 0; if (sb->s_groups_count <= EXT2_MAX_GROUP_LOADED) { if (sb->s_inode_bitmap[block_group]) { if (sb->s_inode_bitmap_number[block_group] != block_group) panic ( "load_inode_bitmap:" "block_group != inode_bitmap_number"); else return block_group; } else { read_inode_bitmap (mp, block_group, block_group); return block_group; } } for (i = 0; i < sb->s_loaded_inode_bitmaps && sb->s_inode_bitmap_number[i] != block_group; i++) ; if (i < sb->s_loaded_inode_bitmaps && sb->s_inode_bitmap_number[i] == block_group) { inode_bitmap_number = sb->s_inode_bitmap_number[i]; inode_bitmap = sb->s_inode_bitmap[i]; for (j = i; j > 0; j--) { sb->s_inode_bitmap_number[j] = sb->s_inode_bitmap_number[j - 1]; sb->s_inode_bitmap[j] = sb->s_inode_bitmap[j - 1]; } sb->s_inode_bitmap_number[0] = inode_bitmap_number; sb->s_inode_bitmap[0] = inode_bitmap; } else { if (sb->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) sb->s_loaded_inode_bitmaps++; else ULCK_BUF(sb->s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]) for (j = sb->s_loaded_inode_bitmaps - 1; j > 0; j--) { sb->s_inode_bitmap_number[j] = sb->s_inode_bitmap_number[j - 1]; sb->s_inode_bitmap[j] = sb->s_inode_bitmap[j - 1]; } read_inode_bitmap (mp, block_group, 0); } return 0; }
/* * Attempt to build up a hash table for the directory contents in * inode 'ip'. Returns 0 on success, or -1 of the operation failed. */ int ufsdirhash_build(struct inode *ip) { struct dirhash *dh; struct buf *bp = NULL; struct direct *ep; struct vnode *vp; doff_t bmask, pos; int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; /* Check if we can/should use dirhash. */ if (ip->i_dirhash == NULL) { if (DIP(ip, size) < ufs_mindirhashsize || OFSFMT(ip->i_vnode)) return (-1); } else { /* Hash exists, but sysctls could have changed. */ if (DIP(ip, size) < ufs_mindirhashsize || ufs_dirhashmem > ufs_dirhashmaxmem) { ufsdirhash_free(ip); return (-1); } /* Check if hash exists and is intact (note: unlocked read). */ if (ip->i_dirhash->dh_hash != NULL) return (0); /* Free the old, recycled hash and build a new one. */ ufsdirhash_free(ip); } /* Don't hash removed directories. */ if (ip->i_effnlink == 0) return (-1); vp = ip->i_vnode; /* Allocate 50% more entries than this dir size could ever need. */ DIRHASH_ASSERT(DIP(ip, size) >= DIRBLKSIZ, ("ufsdirhash_build size")); nslots = DIP(ip, size) / DIRECTSIZ(1); nslots = (nslots * 3 + 1) / 2; narrays = howmany(nslots, DH_NBLKOFF); nslots = narrays * DH_NBLKOFF; dirblocks = howmany(DIP(ip, size), DIRBLKSIZ); nblocks = (dirblocks * 3 + 1) / 2; memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + nblocks * sizeof(*dh->dh_blkfree); DIRHASHLIST_LOCK(); if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { DIRHASHLIST_UNLOCK(); if (memreqd > ufs_dirhashmaxmem / 2) return (-1); /* Try to free some space. */ if (ufsdirhash_recycle(memreqd) != 0) return (-1); /* Enough was freed, and list has been locked. */ } ufs_dirhashmem += memreqd; DIRHASHLIST_UNLOCK(); /* * Use non-blocking mallocs so that we will revert to a linear * lookup on failure rather than potentially blocking forever. */ dh = malloc(sizeof(*dh), M_DIRHASH, M_NOWAIT|M_ZERO); if (dh == NULL) { DIRHASHLIST_LOCK(); ufs_dirhashmem -= memreqd; DIRHASHLIST_UNLOCK(); return (-1); } dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]), M_DIRHASH, M_NOWAIT|M_ZERO); dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]), M_DIRHASH, M_NOWAIT | M_ZERO); if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) goto fail; for (i = 0; i < narrays; i++) { if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL) goto fail; for (j = 0; j < DH_NBLKOFF; j++) dh->dh_hash[i][j] = DIRHASH_EMPTY; } /* Initialise the hash table and block statistics. */ mtx_init(&dh->dh_mtx, IPL_NONE); dh->dh_narrays = narrays; dh->dh_hlen = nslots; dh->dh_nblk = nblocks; dh->dh_dirblks = dirblocks; for (i = 0; i < dirblocks; i++) dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; for (i = 0; i < DH_NFSTATS; i++) dh->dh_firstfree[i] = -1; dh->dh_firstfree[DH_NFSTATS] = 0; dh->dh_seqopt = 0; dh->dh_seqoff = 0; dh->dh_score = DH_SCOREINIT; ip->i_dirhash = dh; bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; pos = 0; while (pos < DIP(ip, size)) { /* If necessary, get the next directory block. */ if ((pos & bmask) == 0) { if (bp != NULL) brelse(bp); if (UFS_BUFATOFF(ip, (off_t)pos, NULL, &bp) != 0) goto fail; } /* Add this entry to the hash. */ ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); if (ep->d_reclen == 0 || ep->d_reclen > DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { /* Corrupted directory. */ brelse(bp); goto fail; } if (ep->d_ino != 0) { /* Add the entry (simplified ufsdirhash_add). */ slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) slot = WRAPINCR(slot, dh->dh_hlen); dh->dh_hused++; DH_ENTRY(dh, slot) = pos; ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); } pos += ep->d_reclen; } if (bp != NULL) brelse(bp); DIRHASHLIST_LOCK(); TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); dh->dh_onlist = 1; DIRHASHLIST_UNLOCK(); return (0); fail: if (dh->dh_hash != NULL) { for (i = 0; i < narrays; i++) if (dh->dh_hash[i] != NULL) DIRHASH_BLKFREE(dh->dh_hash[i]); free(dh->dh_hash, M_DIRHASH); } if (dh->dh_blkfree != NULL) free(dh->dh_blkfree, M_DIRHASH); free(dh, M_DIRHASH); ip->i_dirhash = NULL; DIRHASHLIST_LOCK(); ufs_dirhashmem -= memreqd; DIRHASHLIST_UNLOCK(); return (-1); }
static int quota_handle_cmd_get(struct mount *mp, struct lwp *l, prop_dictionary_t cmddict, int type, prop_array_t datas) { prop_array_t replies; prop_object_iterator_t iter; prop_dictionary_t data; uint32_t id; struct ufsmount *ump = VFSTOUFS(mp); int error, defaultq = 0; const char *idstr; if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) return EOPNOTSUPP; replies = prop_array_create(); if (replies == NULL) return ENOMEM; iter = prop_array_iterator(datas); if (iter == NULL) { prop_object_release(replies); return ENOMEM; } while ((data = prop_object_iterator_next(iter)) != NULL) { if (!prop_dictionary_get_uint32(data, "id", &id)) { if (!prop_dictionary_get_cstring_nocopy(data, "id", &idstr)) continue; if (strcmp(idstr, "default")) { error = EINVAL; goto err; } id = 0; defaultq = 1; } else { defaultq = 0; } error = quota_get_auth(mp, l, id); if (error == EPERM) continue; if (error != 0) goto err; #ifdef QUOTA if (ump->um_flags & UFS_QUOTA) error = quota1_handle_cmd_get(ump, type, id, defaultq, replies); else #endif #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_get(ump, type, id, defaultq, replies); } else #endif panic("quota_handle_cmd_get: no support ?"); if (error == ENOENT) continue; if (error != 0) goto err; } prop_object_iterator_release(iter); if (!prop_dictionary_set_and_rel(cmddict, "data", replies)) { error = ENOMEM; } else { error = 0; } return error; err: prop_object_iterator_release(iter); prop_object_release(replies); return error; }
/* * If the superblock doesn't already have a recorded journal location * then we allocate the journal in one of two positions: * * - At the end of the partition after the filesystem if there's * enough space. "Enough space" is defined as >= 1MB of journal * per 1GB of filesystem or 64MB, whichever is smaller. * * - Inside the filesystem. We try to allocate a contiguous journal * based on the total filesystem size - the target is 1MB of journal * per 1GB of filesystem, up to a maximum journal size of 64MB. As * a worst case allowing for fragmentation, we'll allocate a journal * 1/4 of the desired size but never smaller than 1MB. * * XXX In the future if we allow for non-contiguous journal files we * can tighten the above restrictions. * * XXX * These seems like a lot of duplication both here and in some of * the userland tools (fsck_ffs, dumpfs, tunefs) with similar * "switch (fs_journal_location)" constructs. Can we centralise * this sort of code somehow/somewhere? */ int wapbl_log_position(struct mount *mp, struct fs *fs, struct vnode *devvp, daddr_t *startp, size_t *countp, size_t *blksizep, uint64_t *extradatap) { struct ufsmount *ump = VFSTOUFS(mp); daddr_t logstart, logend, desired_logsize; uint64_t numsecs; unsigned secsize; int error, location; if (fs->fs_journal_version == UFS_WAPBL_VERSION) { switch (fs->fs_journal_location) { case UFS_WAPBL_JOURNALLOC_END_PARTITION: DPRINTF("found existing end-of-partition log\n"); *startp = fs->fs_journallocs[UFS_WAPBL_EPART_ADDR]; *countp = fs->fs_journallocs[UFS_WAPBL_EPART_COUNT]; *blksizep = fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ]; DPRINTF(" start = %lld, size = %zu, " "blksize = %zu\n", *startp, *countp, *blksizep); return 0; case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM: DPRINTF("found existing in-filesystem log\n"); *startp = fs->fs_journallocs[UFS_WAPBL_INFS_ADDR]; *countp = fs->fs_journallocs[UFS_WAPBL_INFS_COUNT]; *blksizep = fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ]; DPRINTF(" start = %lld, size = %zu, " "blksize = %zu\n", *startp, *countp, *blksizep); return 0; default: printf("ffs_wapbl: unknown journal type %d\n", fs->fs_journal_location); return EINVAL; } } desired_logsize = lfragtosize(fs, fs->fs_size) / UFS_WAPBL_JOURNAL_SCALE; DPRINTF("desired log size = %lld kB\n", desired_logsize / 1024); desired_logsize = max(desired_logsize, UFS_WAPBL_MIN_JOURNAL_SIZE); desired_logsize = min(desired_logsize, UFS_WAPBL_MAX_JOURNAL_SIZE); DPRINTF("adjusted desired log size = %lld kB\n", desired_logsize / 1024); /* Is there space after after filesystem on partition for log? */ logstart = fsbtodb(fs, fs->fs_size); error = wapbl_getdisksize(devvp, &numsecs, &secsize); if (error) return error; KDASSERT(secsize != 0); logend = btodb(numsecs * secsize); if (dbtob(logend - logstart) >= desired_logsize) { DPRINTF("enough space, use end-of-partition log\n"); location = UFS_WAPBL_JOURNALLOC_END_PARTITION; *blksizep = secsize; *startp = logstart; *countp = (logend - logstart); *extradatap = 0; /* convert to physical block numbers */ *startp = dbtob(*startp) / secsize; *countp = dbtob(*countp) / secsize; fs->fs_journallocs[UFS_WAPBL_EPART_ADDR] = *startp; fs->fs_journallocs[UFS_WAPBL_EPART_COUNT] = *countp; fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ] = *blksizep; fs->fs_journallocs[UFS_WAPBL_EPART_UNUSED] = *extradatap; } else { DPRINTF("end-of-partition has only %lld free\n", logend - logstart); location = UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM; *blksizep = secsize; error = wapbl_create_infs_log(mp, fs, devvp, startp, countp, extradatap); ffs_sync(mp, MNT_WAIT, FSCRED, curproc); /* convert to physical block numbers */ *startp = dbtob(*startp) / secsize; *countp = dbtob(*countp) / secsize; fs->fs_journallocs[UFS_WAPBL_INFS_ADDR] = *startp; fs->fs_journallocs[UFS_WAPBL_INFS_COUNT] = *countp; fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ] = *blksizep; fs->fs_journallocs[UFS_WAPBL_INFS_INO] = *extradatap; } if (error == 0) { /* update superblock with log location */ fs->fs_journal_version = UFS_WAPBL_VERSION; fs->fs_journal_location = location; fs->fs_journal_flags = 0; error = ffs_sbupdate(ump, MNT_WAIT); } return error; }
/* * Vnode op for reading directories. * * This routine handles converting from the on-disk directory format * "struct direct" to the in-memory format "struct dirent" as well as * byte swapping the entries if necessary. */ int ufs_readdir(void *v) { struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; kauth_cred_t a_cred; int *a_eofflag; off_t **a_cookies; int *ncookies; } */ *ap = v; struct vnode *vp = ap->a_vp; struct direct *cdp, *ecdp; struct dirent *ndp; char *cdbuf, *ndbuf, *endp; struct uio auio, *uio; struct iovec aiov; int error; size_t count, ccount, rcount, cdbufsz, ndbufsz; off_t off, *ccp; off_t startoff; size_t skipbytes; struct ufsmount *ump = VFSTOUFS(vp->v_mount); int nswap = UFS_MPNEEDSWAP(ump); #if BYTE_ORDER == LITTLE_ENDIAN int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0; #else int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0; #endif uio = ap->a_uio; count = uio->uio_resid; rcount = count - ((uio->uio_offset + count) & (ump->um_dirblksiz - 1)); if (rcount < _DIRENT_MINSIZE(cdp) || count < _DIRENT_MINSIZE(ndp)) return EINVAL; startoff = uio->uio_offset & ~(ump->um_dirblksiz - 1); skipbytes = uio->uio_offset - startoff; rcount += skipbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = startoff; auio.uio_resid = rcount; UIO_SETUP_SYSSPACE(&auio); auio.uio_rw = UIO_READ; cdbufsz = rcount; cdbuf = kmem_alloc(cdbufsz, KM_SLEEP); aiov.iov_base = cdbuf; aiov.iov_len = rcount; error = VOP_READ(vp, &auio, 0, ap->a_cred); if (error != 0) { kmem_free(cdbuf, cdbufsz); return error; } rcount -= auio.uio_resid; cdp = (struct direct *)(void *)cdbuf; ecdp = (struct direct *)(void *)&cdbuf[rcount]; ndbufsz = count; ndbuf = kmem_alloc(ndbufsz, KM_SLEEP); ndp = (struct dirent *)(void *)ndbuf; endp = &ndbuf[count]; off = uio->uio_offset; if (ap->a_cookies) { ccount = rcount / _DIRENT_RECLEN(cdp, 1); ccp = *(ap->a_cookies) = malloc(ccount * sizeof(*ccp), M_TEMP, M_WAITOK); } else { /* XXX: GCC */ ccount = 0; ccp = NULL; } while (cdp < ecdp) { cdp->d_reclen = ufs_rw16(cdp->d_reclen, nswap); if (skipbytes > 0) { if (cdp->d_reclen <= skipbytes) { skipbytes -= cdp->d_reclen; cdp = _DIRENT_NEXT(cdp); continue; } /* * invalid cookie. */ error = EINVAL; goto out; } if (cdp->d_reclen == 0) { struct dirent *ondp = ndp; ndp->d_reclen = _DIRENT_MINSIZE(ndp); ndp = _DIRENT_NEXT(ndp); ondp->d_reclen = 0; cdp = ecdp; break; } if (needswap) { ndp->d_type = cdp->d_namlen; ndp->d_namlen = cdp->d_type; } else { ndp->d_type = cdp->d_type; ndp->d_namlen = cdp->d_namlen; } ndp->d_reclen = _DIRENT_RECLEN(ndp, ndp->d_namlen); if ((char *)(void *)ndp + ndp->d_reclen + _DIRENT_MINSIZE(ndp) > endp) break; ndp->d_fileno = ufs_rw32(cdp->d_ino, nswap); (void)memcpy(ndp->d_name, cdp->d_name, ndp->d_namlen); memset(&ndp->d_name[ndp->d_namlen], 0, ndp->d_reclen - _DIRENT_NAMEOFF(ndp) - ndp->d_namlen); off += cdp->d_reclen; if (ap->a_cookies) { KASSERT(ccp - *(ap->a_cookies) < ccount); *(ccp++) = off; } ndp = _DIRENT_NEXT(ndp); cdp = _DIRENT_NEXT(cdp); } count = ((char *)(void *)ndp - ndbuf); error = uiomove(ndbuf, count, uio); out: if (ap->a_cookies) { if (error) { free(*(ap->a_cookies), M_TEMP); *(ap->a_cookies) = NULL; *(ap->a_ncookies) = 0; } else { *ap->a_ncookies = ccp - *(ap->a_cookies); } } uio->uio_offset = off; kmem_free(ndbuf, ndbufsz); kmem_free(cdbuf, cdbufsz); *ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset; return error; }
/* * Read an inode from disk and initialize this vnode / inode pair. * Caller assures no other thread will try to load this inode. */ int ext2fs_loadvnode(struct mount *mp, struct vnode *vp, const void *key, size_t key_len, const void **new_key) { ino_t ino; struct m_ext2fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; dev_t dev; int error; KASSERT(key_len == sizeof(ino)); memcpy(&ino, key, key_len); ump = VFSTOUFS(mp); dev = ump->um_dev; fs = ump->um_e2fs; /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ino)), (int)fs->e2fs_bsize, 0, &bp); if (error) return error; /* Allocate and initialize inode. */ ip = pool_get(&ext2fs_inode_pool, PR_WAITOK); memset(ip, 0, sizeof(struct inode)); vp->v_tag = VT_EXT2FS; vp->v_op = ext2fs_vnodeop_p; vp->v_vflag |= VV_LOCKSWORK; vp->v_data = ip; ip->i_vnode = vp; ip->i_ump = ump; ip->i_e2fs = fs; ip->i_dev = dev; ip->i_number = ino; ip->i_e2fs_last_lblk = 0; ip->i_e2fs_last_blk = 0; /* Initialize genfs node. */ genfs_node_init(vp, &ext2fs_genfsops); error = ext2fs_loadvnode_content(fs, ino, bp, ip); brelse(bp, 0); if (error) return error; /* If the inode was deleted, reset all fields */ if (ip->i_e2fs_dtime != 0) { ip->i_e2fs_mode = 0; (void)ext2fs_setsize(ip, 0); (void)ext2fs_setnblock(ip, 0); memset(ip->i_e2fs_blocks, 0, sizeof(ip->i_e2fs_blocks)); } /* Initialize the vnode from the inode. */ ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp); /* Finish inode initialization. */ ip->i_devvp = ump->um_devvp; vref(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_e2fs_gen == 0) { if (++ext2gennumber < (u_long)time_second) ext2gennumber = time_second; ip->i_e2fs_gen = ext2gennumber; if ((mp->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } uvm_vnp_setsize(vp, ext2fs_size(ip)); *new_key = &ip->i_number; return 0; }
/* * Mkdir system call */ int ext2fs_mkdir(void *v) { struct vop_mkdir_args *ap = v; struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct ext2fs_dirtemplate dirtemplate; mode_t dmode; int error; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ext2fs_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t)dp->i_e2fs_nlink >= LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & ACCESSPERMS; dmode |= IFDIR; /* * Must simulate part of ext2fs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ if ((error = ext2fs_inode_alloc(dp, dmode, cnp->cn_cred, &tvp)) != 0) goto out; ip = VTOI(tvp); ip->i_e2fs_uid = cnp->cn_cred->cr_uid; ip->i_e2fs_gid = dp->i_e2fs_gid; ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_e2fs_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_e2fs_nlink = 2; error = ext2fs_update(ip, NULL, NULL, 1); /* * Bump link count in parent directory * to reflect work done below. Should * be done before reference is created * so reparation is possible if we crash. */ dp->i_e2fs_nlink++; dp->i_flag |= IN_CHANGE; if ((error = ext2fs_update(dp, NULL, NULL, 1)) != 0) goto bad; /* Initialize directory with "." and ".." from static template. */ bzero(&dirtemplate, sizeof(dirtemplate)); dirtemplate.dot_ino = h2fs32(ip->i_number); dirtemplate.dot_reclen = h2fs16(12); dirtemplate.dot_namlen = 1; if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 && (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) { dirtemplate.dot_type = EXT2_FT_DIR; } dirtemplate.dot_name[0] = '.'; dirtemplate.dotdot_ino = h2fs32(dp->i_number); dirtemplate.dotdot_reclen = h2fs16(VTOI(dvp)->i_e2fs->e2fs_bsize - 12); dirtemplate.dotdot_namlen = 2; if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0 && (ip->i_e2fs->e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE)) { dirtemplate.dotdot_type = EXT2_FT_DIR; } dirtemplate.dotdot_name[0] = dirtemplate.dotdot_name[1] = '.'; error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_SYNC, cnp->cn_cred, NULL, curproc); if (error) { dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; goto bad; } if (VTOI(dvp)->i_e2fs->e2fs_bsize > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) panic("ext2fs_mkdir: blksize"); /* XXX should grow with balloc() */ else { error = ext2fs_setsize(ip, VTOI(dvp)->i_e2fs->e2fs_bsize); if (error) { dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; goto bad; } ip->i_flag |= IN_CHANGE; } /* Directory set up, now install its entry in the parent directory. */ error = ext2fs_direnter(ip, dvp, cnp); if (error != 0) { dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; } bad: /* * No need to do an explicit VOP_TRUNCATE here, vrele will do this * for us because we set the link count to 0. */ if (error) { ip->i_e2fs_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); } else *ap->a_vpp = tvp; out: pool_put(&namei_pool, cnp->cn_pnbuf); vput(dvp); return (error); }
/* * VFS Operations. * * mount system call */ int ext2fs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; struct ufs_args *args = data; struct ufsmount *ump = NULL; struct m_ext2fs *fs; int error = 0, flags, update; mode_t accessmode; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { ump = VFSTOUFS(mp); if (ump == NULL) return EIO; memset(args, 0, sizeof *args); args->fspec = NULL; *data_len = sizeof *args; return 0; } update = mp->mnt_flag & MNT_UPDATE; /* Check arguments */ if (args->fspec != NULL) { /* * Look up the name and verify that it's sane. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) return error; if (!update) { /* * Be sure this is a valid block device */ if (devvp->v_type != VBLK) error = ENOTBLK; else if (bdevsw_lookup(devvp->v_rdev) == NULL) error = ENXIO; } else { /* * Be sure we're still naming the same device * used for our initial mount */ ump = VFSTOUFS(mp); if (devvp != ump->um_devvp) { if (devvp->v_rdev != ump->um_devvp->v_rdev) error = EINVAL; else { vrele(devvp); devvp = ump->um_devvp; vref(devvp); } } } } else { if (!update) { /* New mounts must have a filename for the device */ return EINVAL; } else { ump = VFSTOUFS(mp); devvp = ump->um_devvp; vref(devvp); } } /* * If mount by non-root, then verify that user has necessary * permissions on the device. * * Permission to update a mount is checked higher, so here we presume * updating the mount is okay (for example, as far as securelevel goes) * which leaves us with the normal check. */ if (error == 0) { accessmode = VREAD; if (update ? (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : (mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); VOP_UNLOCK(devvp); } if (error) { vrele(devvp); return error; } if (!update) { int xflags; if (mp->mnt_flag & MNT_RDONLY) xflags = FREAD; else xflags = FREAD|FWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(devvp, xflags, FSCRED); VOP_UNLOCK(devvp); if (error) goto fail; error = ext2fs_mountfs(devvp, mp); if (error) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void)VOP_CLOSE(devvp, xflags, NOCRED); VOP_UNLOCK(devvp); goto fail; } ump = VFSTOUFS(mp); fs = ump->um_e2fs; } else { /* * Update the mount. */ /* * The initial mount got a reference on this * device, so drop the one obtained via * namei(), above. */ vrele(devvp); ump = VFSTOUFS(mp); fs = ump->um_e2fs; if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { /* * Changing from r/w to r/o */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ext2fs_flushfiles(mp, flags); if (error == 0 && ext2fs_cgupdate(ump, MNT_WAIT) == 0 && (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) { fs->e2fs.e2fs_state = E2FS_ISCLEAN; (void) ext2fs_sbupdate(ump, MNT_WAIT); } if (error) return error; fs->e2fs_ronly = 1; } if (mp->mnt_flag & MNT_RELOAD) { error = ext2fs_reload(mp, l->l_cred, l); if (error) return error; } if (fs->e2fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { /* * Changing from read-only to read/write */ fs->e2fs_ronly = 0; if (fs->e2fs.e2fs_state == E2FS_ISCLEAN) fs->e2fs.e2fs_state = 0; else fs->e2fs.e2fs_state = E2FS_ERRORS; fs->e2fs_fmod = 1; } if (args->fspec == NULL) return 0; } error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); if (error == 0) ext2fs_sb_setmountinfo(fs, mp); if (fs->e2fs_fmod != 0) { /* XXX */ fs->e2fs_fmod = 0; if (fs->e2fs.e2fs_state == 0) fs->e2fs.e2fs_wtime = time_second; else printf("%s: file system not clean; please fsck(8)\n", mp->mnt_stat.f_mntfromname); (void) ext2fs_cgupdate(ump, MNT_WAIT); } return error; fail: vrele(devvp); return error; }
/* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ int ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) { struct vnode *vp, *devvp; struct inode *ip; struct buf *bp; struct m_ext2fs *fs; struct ext2fs *newfs; int i, error; struct ufsmount *ump; struct vnode_iterator *marker; if ((mp->mnt_flag & MNT_RDONLY) == 0) return EINVAL; ump = VFSTOUFS(mp); /* * Step 1: invalidate all cached meta-data. */ devvp = ump->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(devvp, 0, cred, l, 0, 0); VOP_UNLOCK(devvp); if (error) panic("ext2fs_reload: dirty1"); fs = ump->um_e2fs; /* * Step 2: re-read superblock from disk. Copy in new superblock, and compute * in-memory values. */ error = bread(devvp, SBLOCK, SBSIZE, 0, &bp); if (error) return error; newfs = (struct ext2fs *)bp->b_data; e2fs_sbload(newfs, &fs->e2fs); brelse(bp, 0); error = ext2fs_sbfill(fs, (mp->mnt_flag & MNT_RDONLY) != 0); if (error) return error; /* * Step 3: re-read summary information from disk. */ for (i = 0; i < fs->e2fs_ngdb; i++) { error = bread(devvp , EXT2_FSBTODB(fs, fs->e2fs.e2fs_first_dblock + 1 /* superblock */ + i), fs->e2fs_bsize, 0, &bp); if (error) { return error; } e2fs_cgload((struct ext2_gd *)bp->b_data, &fs->e2fs_gd[i * fs->e2fs_bsize / sizeof(struct ext2_gd)], fs->e2fs_bsize); brelse(bp, 0); } vfs_vnode_iterator_init(mp, &marker); while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) { /* * Step 4: invalidate all inactive vnodes. */ if (vrecycle(vp)) continue; /* * Step 5: invalidate all cached file data. */ if (vn_lock(vp, LK_EXCLUSIVE)) { vrele(vp); continue; } if (vinvalbuf(vp, 0, cred, l, 0, 0)) panic("ext2fs_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, 0, &bp); if (error) { vput(vp); break; } error = ext2fs_loadvnode_content(fs, ip->i_number, bp, ip); brelse(bp, 0); if (error) { vput(vp); break; } vput(vp); } vfs_vnode_iterator_destroy(marker); return error; }
/* * Real work associated with retrieving a named attribute--assumes that * the attribute lock has already been grabbed. */ static int ufs_extattr_get(struct vnode *vp, int attrnamespace, const char *name, struct uio *uio, size_t *size, struct ucred *cred, struct thread *td) { struct ufs_extattr_list_entry *attribute; struct ufs_extattr_header ueh; struct iovec local_aiov; struct uio local_aio; struct mount *mp = vp->v_mount; struct ufsmount *ump = VFSTOUFS(mp); struct inode *ip = VTOI(vp); off_t base_offset; size_t len, old_len; int error = 0; if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) return (EOPNOTSUPP); if (strlen(name) == 0) return (EINVAL); error = extattr_check_cred(vp, attrnamespace, cred, td, VREAD); if (error) return (error); attribute = ufs_extattr_find_attr(ump, attrnamespace, name); if (!attribute) return (ENOATTR); /* * Allow only offsets of zero to encourage the read/replace * extended attribute semantic. Otherwise we can't guarantee * atomicity, as we don't provide locks for extended attributes. */ if (uio != NULL && uio->uio_offset != 0) return (ENXIO); /* * Find base offset of header in file based on file header size, and * data header size + maximum data size, indexed by inode number. */ base_offset = sizeof(struct ufs_extattr_fileheader) + ip->i_number * (sizeof(struct ufs_extattr_header) + attribute->uele_fileheader.uef_size); /* * Read in the data header to see if the data is defined, and if so * how much. */ bzero(&ueh, sizeof(struct ufs_extattr_header)); local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_READ; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_td = td; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); /* * Acquire locks. * * Don't need to get a lock on the backing file if the getattr is * being applied to the backing file, as the lock is already held. */ if (attribute->uele_backing_vnode != vp) vn_lock(attribute->uele_backing_vnode, LK_SHARED | LK_RETRY); error = VOP_READ(attribute->uele_backing_vnode, &local_aio, IO_NODELOCKED, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; /* Defined? */ if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) { error = ENOATTR; goto vopunlock_exit; } /* Valid for the current inode generation? */ if (ueh.ueh_i_gen != ip->i_gen) { /* * The inode itself has a different generation number * than the attribute data. For now, the best solution * is to coerce this to undefined, and let it get cleaned * up by the next write or extattrctl clean. */ printf("ufs_extattr_get (%s): inode number inconsistency (%d, %jd)\n", mp->mnt_stat.f_mntonname, ueh.ueh_i_gen, (intmax_t)ip->i_gen); error = ENOATTR; goto vopunlock_exit; } /* Local size consistency check. */ if (ueh.ueh_len > attribute->uele_fileheader.uef_size) { error = ENXIO; goto vopunlock_exit; } /* Return full data size if caller requested it. */ if (size != NULL) *size = ueh.ueh_len; /* Return data if the caller requested it. */ if (uio != NULL) { /* Allow for offset into the attribute data. */ uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header); /* * Figure out maximum to transfer -- use buffer size and * local data limit. */ len = MIN(uio->uio_resid, ueh.ueh_len); old_len = uio->uio_resid; uio->uio_resid = len; error = VOP_READ(attribute->uele_backing_vnode, uio, IO_NODELOCKED, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; uio->uio_resid = old_len - (len - uio->uio_resid); } vopunlock_exit: if (uio != NULL) uio->uio_offset = 0; if (attribute->uele_backing_vnode != vp) VOP_UNLOCK(attribute->uele_backing_vnode, 0); return (error); }
/* * VFS call to manage extended attributes in UFS. If filename_vp is * non-NULL, it must be passed in locked, and regardless of errors in * processing, will be unlocked. */ int ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname) { struct ufsmount *ump = VFSTOUFS(mp); struct thread *td = curthread; int error; /* * Processes with privilege, but in jail, are not allowed to * configure extended attributes. */ error = priv_check(td, PRIV_UFS_EXTATTRCTL); if (error) { if (filename_vp != NULL) VOP_UNLOCK(filename_vp, 0); return (error); } /* * We only allow extattrctl(2) on UFS1 file systems, as UFS2 uses * native extended attributes. */ if (ump->um_fstype != UFS1) { if (filename_vp != NULL) VOP_UNLOCK(filename_vp, 0); return (EOPNOTSUPP); } switch(cmd) { case UFS_EXTATTR_CMD_START: if (filename_vp != NULL) { VOP_UNLOCK(filename_vp, 0); return (EINVAL); } if (attrname != NULL) return (EINVAL); error = ufs_extattr_start(mp, td); return (error); case UFS_EXTATTR_CMD_STOP: if (filename_vp != NULL) { VOP_UNLOCK(filename_vp, 0); return (EINVAL); } if (attrname != NULL) return (EINVAL); error = ufs_extattr_stop(mp, td); return (error); case UFS_EXTATTR_CMD_ENABLE: if (filename_vp == NULL) return (EINVAL); if (attrname == NULL) { VOP_UNLOCK(filename_vp, 0); return (EINVAL); } /* * ufs_extattr_enable_with_open() will always unlock the * vnode, regardless of failure. */ ufs_extattr_uepm_lock(ump); error = ufs_extattr_enable_with_open(ump, filename_vp, attrnamespace, attrname, td); ufs_extattr_uepm_unlock(ump); return (error); case UFS_EXTATTR_CMD_DISABLE: if (filename_vp != NULL) { VOP_UNLOCK(filename_vp, 0); return (EINVAL); } if (attrname == NULL) return (EINVAL); ufs_extattr_uepm_lock(ump); error = ufs_extattr_disable(ump, attrnamespace, attrname, td); ufs_extattr_uepm_unlock(ump); return (error); default: return (EINVAL); } }
static int ufs_extattr_autostart_locked(struct mount *mp, struct thread *td) { struct vnode *rvp, *attr_dvp, *attr_system_dvp, *attr_user_dvp; struct ufsmount *ump = VFSTOUFS(mp); int error; /* * UFS_EXTATTR applies only to UFS1, as UFS2 uses native extended * attributes, so don't autostart. */ if (ump->um_fstype != UFS1) return (0); /* * Does UFS_EXTATTR_FSROOTSUBDIR exist off the filesystem root? * If so, automatically start EA's. */ error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp); if (error) { printf("ufs_extattr_autostart.VFS_ROOT() returned %d\n", error); return (error); } error = ufs_extattr_lookup(rvp, UE_GETDIR_LOCKPARENT_DONT, UFS_EXTATTR_FSROOTSUBDIR, &attr_dvp, td); if (error) { /* rvp ref'd but now unlocked */ vrele(rvp); return (error); } if (rvp == attr_dvp) { /* Should never happen. */ vput(rvp); vrele(attr_dvp); return (EINVAL); } vrele(rvp); if (attr_dvp->v_type != VDIR) { printf("ufs_extattr_autostart: %s != VDIR\n", UFS_EXTATTR_FSROOTSUBDIR); goto return_vput_attr_dvp; } error = ufs_extattr_start_locked(ump, td); if (error) { printf("ufs_extattr_autostart: ufs_extattr_start failed (%d)\n", error); goto return_vput_attr_dvp; } /* * Look for two subdirectories: UFS_EXTATTR_SUBDIR_SYSTEM, * UFS_EXTATTR_SUBDIR_USER. For each, iterate over the sub-directory, * and start with appropriate type. Failures in either don't * result in an over-all failure. attr_dvp is left locked to * be cleaned up on exit. */ error = ufs_extattr_lookup(attr_dvp, UE_GETDIR_LOCKPARENT, UFS_EXTATTR_SUBDIR_SYSTEM, &attr_system_dvp, td); if (!error) { error = ufs_extattr_iterate_directory(VFSTOUFS(mp), attr_system_dvp, EXTATTR_NAMESPACE_SYSTEM, td); if (error) printf("ufs_extattr_iterate_directory returned %d\n", error); vput(attr_system_dvp); } error = ufs_extattr_lookup(attr_dvp, UE_GETDIR_LOCKPARENT, UFS_EXTATTR_SUBDIR_USER, &attr_user_dvp, td); if (!error) { error = ufs_extattr_iterate_directory(VFSTOUFS(mp), attr_user_dvp, EXTATTR_NAMESPACE_USER, td); if (error) printf("ufs_extattr_iterate_directory returned %d\n", error); vput(attr_user_dvp); } /* Mask startup failures in sub-directories. */ error = 0; return_vput_attr_dvp: vput(attr_dvp); return (error); }
int wapbl_remove_log(struct mount *mp) { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs = ump->um_fs; struct vnode *vp; struct inode *ip; ino_t log_ino; int error; /* If super block layout is too old to support WAPBL, return */ if (ffs_superblock_layout(fs) < 2) return 0; /* If all the log locators are 0, just clean up */ if (fs->fs_journallocs[0] == 0 && fs->fs_journallocs[1] == 0 && fs->fs_journallocs[2] == 0 && fs->fs_journallocs[3] == 0) { DPRINTF("empty locators, just clear\n"); goto done; } switch (fs->fs_journal_location) { case UFS_WAPBL_JOURNALLOC_NONE: /* nothing! */ DPRINTF("no log\n"); break; case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM: log_ino = fs->fs_journallocs[UFS_WAPBL_INFS_INO]; DPRINTF("in-fs log, ino = %lld\n",log_ino); /* if no existing log inode, just clear all fields and bail */ if (log_ino == 0) goto done; error = VFS_VGET(mp, log_ino, &vp); if (error != 0) { printf("ffs_wapbl: vget failed %d\n", error); /* clear out log info on error */ goto done; } ip = VTOI(vp); KASSERT(log_ino == ip->i_number); if ((DIP(ip, flags) & SF_LOG) == 0) { printf("ffs_wapbl: try to clear non-log inode " "%lld\n", log_ino); vput(vp); /* clear out log info on error */ goto done; } /* * remove the log inode by setting its link count back * to zero and bail. */ ip->i_effnlink = 0; DIP_ASSIGN(ip, nlink, 0); vput(vp); case UFS_WAPBL_JOURNALLOC_END_PARTITION: DPRINTF("end-of-partition log\n"); /* no extra work required */ break; default: printf("ffs_wapbl: unknown journal type %d\n", fs->fs_journal_location); break; } done: /* Clear out all previous knowledge of journal */ fs->fs_journal_version = 0; fs->fs_journal_location = 0; fs->fs_journal_flags = 0; fs->fs_journallocs[0] = 0; fs->fs_journallocs[1] = 0; fs->fs_journallocs[2] = 0; fs->fs_journallocs[3] = 0; (void) ffs_sbupdate(ump, MNT_WAIT); return 0; }
/* * Attempt to build up a hash table for the directory contents in * inode 'ip'. Returns 0 on success, or -1 of the operation failed. */ int ufsdirhash_build(struct inode *ip) { struct dirhash *dh; struct buf *bp = NULL; struct direct *ep; struct vnode *vp; doff_t bmask, pos; int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; const int needswap = UFS_MPNEEDSWAP(ip->i_ump); int dirblksiz = ip->i_ump->um_dirblksiz; /* Check if we can/should use dirhash. */ if (ip->i_dirhash == NULL) { if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip)) return (-1); } else { /* Hash exists, but sysctls could have changed. */ if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || ufs_dirhashmem > ufs_dirhashmaxmem) { ufsdirhash_free(ip); return (-1); } /* Check if hash exists and is intact (note: unlocked read). */ if (ip->i_dirhash->dh_hash != NULL) return (0); /* Free the old, recycled hash and build a new one. */ ufsdirhash_free(ip); } /* Don't hash removed directories. */ if (ip->i_nlink == 0) return (-1); vp = ip->i_vnode; /* Allocate 50% more entries than this dir size could ever need. */ KASSERT(ip->i_size >= dirblksiz); nslots = ip->i_size / UFS_DIRECTSIZ(1); nslots = (nslots * 3 + 1) / 2; narrays = howmany(nslots, DH_NBLKOFF); nslots = narrays * DH_NBLKOFF; dirblocks = howmany(ip->i_size, dirblksiz); nblocks = (dirblocks * 3 + 1) / 2; memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + nblocks * sizeof(*dh->dh_blkfree); while (atomic_add_int_nv(&ufs_dirhashmem, memreqd) > ufs_dirhashmaxmem) { atomic_add_int(&ufs_dirhashmem, -memreqd); if (memreqd > ufs_dirhashmaxmem / 2) return (-1); /* Try to free some space. */ if (ufsdirhash_recycle(memreqd) != 0) return (-1); else DIRHASHLIST_UNLOCK(); } /* * Use non-blocking mallocs so that we will revert to a linear * lookup on failure rather than potentially blocking forever. */ dh = pool_cache_get(ufsdirhash_cache, PR_NOWAIT); if (dh == NULL) { atomic_add_int(&ufs_dirhashmem, -memreqd); return (-1); } memset(dh, 0, sizeof(*dh)); mutex_init(&dh->dh_lock, MUTEX_DEFAULT, IPL_NONE); DIRHASH_LOCK(dh); dh->dh_hashsz = narrays * sizeof(dh->dh_hash[0]); dh->dh_hash = kmem_zalloc(dh->dh_hashsz, KM_NOSLEEP); dh->dh_blkfreesz = nblocks * sizeof(dh->dh_blkfree[0]); dh->dh_blkfree = kmem_zalloc(dh->dh_blkfreesz, KM_NOSLEEP); if (dh->dh_hash == NULL || dh->dh_blkfree == NULL) goto fail; for (i = 0; i < narrays; i++) { if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL) goto fail; for (j = 0; j < DH_NBLKOFF; j++) dh->dh_hash[i][j] = DIRHASH_EMPTY; } /* Initialise the hash table and block statistics. */ dh->dh_narrays = narrays; dh->dh_hlen = nslots; dh->dh_nblk = nblocks; dh->dh_dirblks = dirblocks; for (i = 0; i < dirblocks; i++) dh->dh_blkfree[i] = dirblksiz / DIRALIGN; for (i = 0; i < DH_NFSTATS; i++) dh->dh_firstfree[i] = -1; dh->dh_firstfree[DH_NFSTATS] = 0; dh->dh_seqopt = 0; dh->dh_seqoff = 0; dh->dh_score = DH_SCOREINIT; ip->i_dirhash = dh; bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; pos = 0; while (pos < ip->i_size) { if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) { preempt(); } /* If necessary, get the next directory block. */ if ((pos & bmask) == 0) { if (bp != NULL) brelse(bp, 0); if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp, false) != 0) goto fail; } /* Add this entry to the hash. */ ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); if (ep->d_reclen == 0 || ep->d_reclen > dirblksiz - (pos & (dirblksiz - 1))) { /* Corrupted directory. */ brelse(bp, 0); goto fail; } if (ep->d_ino != 0) { /* Add the entry (simplified ufsdirhash_add). */ slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) slot = WRAPINCR(slot, dh->dh_hlen); dh->dh_hused++; DH_ENTRY(dh, slot) = pos; ufsdirhash_adjfree(dh, pos, -UFS_DIRSIZ(0, ep, needswap), dirblksiz); } pos += ep->d_reclen; } if (bp != NULL) brelse(bp, 0); DIRHASHLIST_LOCK(); TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); dh->dh_onlist = 1; DIRHASH_UNLOCK(dh); DIRHASHLIST_UNLOCK(); return (0); fail: DIRHASH_UNLOCK(dh); if (dh->dh_hash != NULL) { for (i = 0; i < narrays; i++) if (dh->dh_hash[i] != NULL) DIRHASH_BLKFREE(dh->dh_hash[i]); kmem_free(dh->dh_hash, dh->dh_hashsz); } if (dh->dh_blkfree != NULL) kmem_free(dh->dh_blkfree, dh->dh_blkfreesz); mutex_destroy(&dh->dh_lock); pool_cache_put(ufsdirhash_cache, dh); ip->i_dirhash = NULL; atomic_add_int(&ufs_dirhashmem, -memreqd); return (-1); }
int ffs_wapbl_start(struct mount *mp) { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs = ump->um_fs; struct vnode *devvp = ump->um_devvp; daddr_t off; size_t count; size_t blksize; uint64_t extradata; int error; if (mp->mnt_wapbl == NULL) { if (fs->fs_journal_flags & UFS_WAPBL_FLAGS_CLEAR_LOG) { /* Clear out any existing journal file */ error = wapbl_remove_log(mp); if (error != 0) return error; } if (mp->mnt_flag & MNT_LOG) { KDASSERT(fs->fs_ronly == 0); /* WAPBL needs UFS2 format super block */ if (ffs_superblock_layout(fs) < 2) { printf("%s fs superblock in old format, " "not journaling\n", VFSTOUFS(mp)->um_fs->fs_fsmnt); mp->mnt_flag &= ~MNT_LOG; return EINVAL; } error = wapbl_log_position(mp, fs, devvp, &off, &count, &blksize, &extradata); if (error) return error; error = wapbl_start(&mp->mnt_wapbl, mp, devvp, off, count, blksize, mp->mnt_wapbl_replay, ffs_wapbl_sync_metadata, ffs_wapbl_abort_sync_metadata); if (error) return error; mp->mnt_wapbl_op = &wapbl_ops; #ifdef WAPBL_DEBUG printf("%s: enabling logging\n", fs->fs_fsmnt); #endif if ((fs->fs_flags & FS_DOWAPBL) == 0) { UFS_WAPBL_BEGIN(mp); fs->fs_flags |= FS_DOWAPBL; error = ffs_sbupdate(ump, MNT_WAIT); if (error) { UFS_WAPBL_END(mp); ffs_wapbl_stop(mp, MNT_FORCE); return error; } UFS_WAPBL_END(mp); error = wapbl_flush(mp->mnt_wapbl, 1); if (error) { ffs_wapbl_stop(mp, MNT_FORCE); return error; } } } else if (fs->fs_flags & FS_DOWAPBL) { fs->fs_fmod = 1; fs->fs_flags &= ~FS_DOWAPBL; } } /* * It is recommended that you finish replay with logging enabled. * However, even if logging is not enabled, the remaining log * replay should be safely recoverable with an fsck, so perform * it anyway. */ if ((fs->fs_ronly == 0) && mp->mnt_wapbl_replay) { int saveflag = mp->mnt_flag & MNT_RDONLY; /* * Make sure MNT_RDONLY is not set so that the inode * cleanup in ufs_inactive will actually do its work. */ mp->mnt_flag &= ~MNT_RDONLY; ffs_wapbl_replay_finish(mp); mp->mnt_flag |= saveflag; KASSERT(fs->fs_ronly == 0); } return 0; }
/* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ffs_ib and * once with the offset into the page itself. */ int ufs_getlbns(struct vnode *vp, daddr64_t bn, struct indir *ap, int *nump) { daddr64_t metalbn, realbn; struct ufsmount *ump; int64_t blockcnt; int i, numlevels, off; ump = VFSTOUFS(vp->v_mount); if (nump) *nump = 0; numlevels = 0; realbn = bn; if (bn < 0) bn = -bn; #ifdef DIAGNOSTIC if (realbn < 0 && realbn > -NDADDR) { panic ("ufs_getlbns: Invalid indirect block %lld specified", realbn); } #endif /* The first NDADDR blocks are direct blocks. */ if (bn < NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the given level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); blockcnt *= MNINDIR(ump); if (bn < blockcnt) break; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + NIADDR - i); else metalbn = -(-realbn - bn + NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = NIADDR - i; ap->in_exists = 0; ap++; for (++numlevels; i <= NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; blockcnt /= MNINDIR(ump); off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; ap->in_exists = 0; ++ap; metalbn -= -1 + off * blockcnt; } #ifdef DIAGNOSTIC if (realbn < 0 && metalbn != realbn) { panic("ufs_getlbns: indirect block %lld not found", realbn); } #endif if (nump) *nump = numlevels; return (0); }
/* * Find a suitable location for the journal in the filesystem. * * Our strategy here is to look for a contiguous block of free space * at least "logfile" MB in size (plus room for any indirect blocks). * We start at the middle of the filesystem and check each cylinder * group working outwards. If "logfile" MB is not available as a * single contigous chunk, then return the address and size of the * largest chunk found. * * XXX * At what stage does the search fail? Is if the largest space we could * find is less than a quarter the requested space reasonable? If the * search fails entirely, return a block address if "0" it indicate this. */ void wapbl_find_log_start(struct mount *mp, struct vnode *vp, off_t logsize, daddr_t *addr, daddr_t *indir_addr, size_t *size) { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs = ump->um_fs; struct vnode *devvp = ump->um_devvp; struct cg *cgp; struct buf *bp; uint8_t *blksfree; daddr_t blkno, best_addr, start_addr; daddr_t desired_blks, min_desired_blks; daddr_t freeblks, best_blks; int bpcg, cg, error, fixedsize, indir_blks, n, s; #ifdef FFS_EI const int needswap = UFS_FSNEEDSWAP(fs); #endif if (logsize == 0) { fixedsize = 0; /* We can adjust the size if tight */ logsize = lfragtosize(fs, fs->fs_dsize) / UFS_WAPBL_JOURNAL_SCALE; DPRINTF("suggested log size = %lld\n", logsize); logsize = max(logsize, UFS_WAPBL_MIN_JOURNAL_SIZE); logsize = min(logsize, UFS_WAPBL_MAX_JOURNAL_SIZE); DPRINTF("adjusted log size = %lld\n", logsize); } else { fixedsize = 1; DPRINTF("fixed log size = %lld\n", logsize); } desired_blks = logsize / fs->fs_bsize; DPRINTF("desired blocks = %lld\n", desired_blks); /* add in number of indirect blocks needed */ indir_blks = 0; if (desired_blks >= NDADDR) { struct indir indirs[NIADDR + 2]; int num; error = ufs_getlbns(vp, desired_blks, indirs, &num); if (error) { printf("%s: ufs_getlbns failed, error %d!\n", __func__, error); goto bad; } switch (num) { case 2: indir_blks = 1; /* 1st level indirect */ break; case 3: indir_blks = 1 + /* 1st level indirect */ 1 + /* 2nd level indirect */ indirs[1].in_off + 1; /* extra 1st level indirect */ break; default: printf("%s: unexpected numlevels %d from ufs_getlbns\n", __func__, num); *size = 0; goto bad; } desired_blks += indir_blks; } DPRINTF("desired blocks = %lld (including indirect)\n", desired_blks); /* * If a specific size wasn't requested, allow for a smaller log * if we're really tight for space... */ min_desired_blks = desired_blks; if (!fixedsize) min_desired_blks = desired_blks / 4; /* Look at number of blocks per CG. If it's too small, bail early. */ bpcg = fragstoblks(fs, fs->fs_fpg); if (min_desired_blks > bpcg) { printf("ffs_wapbl: cylinder group size of %lld MB " " is not big enough for journal\n", lblktosize(fs, bpcg) / (1024 * 1024)); goto bad; } /* * Start with the middle cylinder group, and search outwards in * both directions until we either find the requested log size * or reach the start/end of the file system. If we reach the * start/end without finding enough space for the full requested * log size, use the largest extent found if it is large enough * to satisfy the our minimum size. * * XXX * Can we just use the cluster contigsum stuff (esp on UFS2) * here to simplify this search code? */ best_addr = 0; best_blks = 0; for (cg = fs->fs_ncg / 2, s = 0, n = 1; best_blks < desired_blks && cg >= 0 && cg < fs->fs_ncg; s++, n = -n, cg += n * s) { DPRINTF("check cg %d of %d\n", cg, fs->fs_ncg); error = bread(devvp, fsbtodb(fs, cgtod(fs, cg)), fs->fs_cgsize, &bp); if (error) { continue; } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { brelse(bp); continue; } blksfree = cg_blksfree(cgp); for (blkno = 0; blkno < bpcg;) { /* look for next free block */ /* XXX use scanc() and fragtbl[] here? */ for (; blkno < bpcg - min_desired_blks; blkno++) if (ffs_isblock(fs, blksfree, blkno)) break; /* past end of search space in this CG? */ if (blkno >= bpcg - min_desired_blks) break; /* count how many free blocks in this extent */ start_addr = blkno; for (freeblks = 0; blkno < bpcg; blkno++, freeblks++) if (!ffs_isblock(fs, blksfree, blkno)) break; if (freeblks > best_blks) { best_blks = freeblks; best_addr = blkstofrags(fs, start_addr) + cgbase(fs, cg); if (freeblks >= desired_blks) { DPRINTF("found len %lld" " at offset %lld in gc\n", freeblks, start_addr); break; } } } brelse(bp); } DPRINTF("best found len = %lld, wanted %lld" " at addr %lld\n", best_blks, desired_blks, best_addr); if (best_blks < min_desired_blks) { *addr = 0; *indir_addr = 0; } else { /* put indirect blocks at start, and data blocks after */ *addr = best_addr + blkstofrags(fs, indir_blks); *indir_addr = best_addr; } *size = min(desired_blks, best_blks) - indir_blks; return; bad: *addr = 0; *indir_addr = 0; *size = 0; return; }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(struct vnode *vp, daddr64_t bn, daddr64_t *bnp, struct indir *ap, int *nump, int *runp) { struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; daddr64_t daddr, metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { /* * XXX * If MAXBSIZE is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; if (!nump) nump = # if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, DIP(ip, db[bn])); if (*bnp == 0) *bnp = -1; else if (runp) for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, DIP(ip, db[bn - 1]), DIP(ip, db[bn])); ++bn, ++*runp); return (0); } /* Get disk address out of indirect block array */ daddr = DIP(ip, ib[xap->in_off]); devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) brelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if (bp->b_flags & (B_DONE | B_DELWRI)) { ; } #ifdef DIAGNOSTIC else if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif else { bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp); curproc->p_ru.ru_inblock++; /* XXX */ if ((error = biowait(bp)) != 0) { brelse(bp); return (error); } } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) { daddr = ((int64_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int64_t *)bp->b_data)[bn - 1], ((int64_t *)bp->b_data)[bn]); ++bn, ++*runp); continue; } #endif /* FFS2 */ daddr = ((int32_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int32_t *)bp->b_data)[bn - 1], ((int32_t *)bp->b_data)[bn]); ++bn, ++*runp); } if (bp) brelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }
/* * whiteout vnode call */ int ufs_whiteout(void *v) { struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap = v; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct *newdir; int error; struct ufsmount *ump = VFSTOUFS(dvp->v_mount); struct ufs_lookup_results *ulr; /* XXX should handle this material another way */ ulr = &VTOI(dvp)->i_crap; UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (ump->um_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ fstrans_start(dvp->v_mount, FSTRANS_SHARED); error = UFS_WAPBL_BEGIN(dvp->v_mount); if (error) break; #ifdef DIAGNOSTIC if (ump->um_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); newdir->d_ino = UFS_WINO; newdir->d_namlen = cnp->cn_namelen; memcpy(newdir->d_name, cnp->cn_nameptr, (size_t)cnp->cn_namelen); newdir->d_name[cnp->cn_namelen] = '\0'; newdir->d_type = DT_WHT; error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL); pool_cache_put(ufs_direct_cache, newdir); break; case DELETE: /* remove an existing directory whiteout */ fstrans_start(dvp->v_mount, FSTRANS_SHARED); error = UFS_WAPBL_BEGIN(dvp->v_mount); if (error) break; #ifdef DIAGNOSTIC if (ump->um_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0); break; default: panic("ufs_whiteout: unknown op"); /* NOTREACHED */ } UFS_WAPBL_END(dvp->v_mount); fstrans_done(dvp->v_mount); return (error); }
/* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Should always be called with the mount point locked. */ int ffs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, allerror = 0, count; struct ffs_sync_args fsa; fs = ump->um_fs; /* * Write back modified superblock. * Consistency check that the superblock * is still in the buffer cache. */ if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { printf("fs = %s\n", fs->fs_fsmnt); panic("update: rofs mod"); } loop: /* * Write back each (modified) inode. */ fsa.allerror = 0; fsa.p = p; fsa.cred = cred; fsa.waitfor = waitfor; /* * Don't traverse the vnode list if we want to skip all of them. */ if (waitfor != MNT_LAZY) { vfs_mount_foreach_vnode(mp, ffs_sync_vnode, &fsa); allerror = fsa.allerror; } /* * Force stale file system control information to be flushed. */ if ((ump->um_mountp->mnt_flag & MNT_SOFTDEP) && waitfor == MNT_WAIT) { if ((error = softdep_flushworklist(ump->um_mountp, &count, p))) allerror = error; /* Flushed work items may create new vnodes to clean */ if (count) goto loop; } if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp, 0, p); } qsync(mp); /* * Write back modified superblock. */ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) allerror = error; return (allerror); }
/* * Find the offset of the specified name within the given inode. * Returns 0 on success, ENOENT if the entry does not exist, or * EJUSTRETURN if the caller should revert to a linear search. * * If successful, the directory offset is stored in *offp, and a * pointer to a struct buf containing the entry is stored in *bpp. If * prevoffp is non-NULL, the offset of the previous entry within * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry * is the first in a block, the start of the block is used). */ int ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, struct buf **bpp, doff_t *prevoffp) { struct dirhash *dh, *dh_next; struct direct *dp; struct vnode *vp; struct buf *bp; doff_t blkoff, bmask, offset, prevoff; int i, slot; if ((dh = ip->i_dirhash) == NULL) return (EJUSTRETURN); /* * Move this dirhash towards the end of the list if it has a * score higher than the next entry, and acquire the dh_mtx. * Optimise the case where it's already the last by performing * an unlocked read of the TAILQ_NEXT pointer. * * In both cases, end up holding just dh_mtx. */ if (TAILQ_NEXT(dh, dh_list) != NULL) { DIRHASHLIST_LOCK(); DIRHASH_LOCK(dh); /* * If the new score will be greater than that of the next * entry, then move this entry past it. With both mutexes * held, dh_next won't go away, but its dh_score could * change; that's not important since it is just a hint. */ if (dh->dh_hash != NULL && (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && dh->dh_score >= dh_next->dh_score) { DIRHASH_ASSERT(dh->dh_onlist, ("dirhash: not on list")); TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, dh_list); } DIRHASHLIST_UNLOCK(); } else { /* Already the last, though that could change as we wait. */ DIRHASH_LOCK(dh); } if (dh->dh_hash == NULL) { DIRHASH_UNLOCK(dh); ufsdirhash_free(ip); return (EJUSTRETURN); } /* Update the score. */ if (dh->dh_score < DH_SCOREMAX) dh->dh_score++; vp = ip->i_vnode; bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; blkoff = -1; bp = NULL; restart: slot = ufsdirhash_hash(dh, name, namelen); if (dh->dh_seqopt) { /* * Sequential access optimisation. dh_seqoff contains the * offset of the directory entry immediately following * the last entry that was looked up. Check if this offset * appears in the hash chain for the name we are looking for. */ for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; i = WRAPINCR(i, dh->dh_hlen)) if (offset == dh->dh_seqoff) break; if (offset == dh->dh_seqoff) { /* * We found an entry with the expected offset. This * is probably the entry we want, but if not, the * code below will turn off seqopt and retry. */ slot = i; } else dh->dh_seqopt = 0; } for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; slot = WRAPINCR(slot, dh->dh_hlen)) { if (offset == DIRHASH_DEL) continue; DIRHASH_UNLOCK(dh); if (offset < 0 || offset >= DIP(ip, size)) panic("ufsdirhash_lookup: bad offset in hash array"); if ((offset & ~bmask) != blkoff) { if (bp != NULL) brelse(bp); blkoff = offset & ~bmask; if (UFS_BUFATOFF(ip, (off_t)blkoff, NULL, &bp) != 0) return (EJUSTRETURN); } dp = (struct direct *)(bp->b_data + (offset & bmask)); if (dp->d_reclen == 0 || dp->d_reclen > DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { /* Corrupted directory. */ brelse(bp); return (EJUSTRETURN); } if (dp->d_namlen == namelen && bcmp(dp->d_name, name, namelen) == 0) { /* Found. Get the prev offset if needed. */ if (prevoffp != NULL) { if (offset & (DIRBLKSIZ - 1)) { prevoff = ufsdirhash_getprev(dp, offset); if (prevoff == -1) { brelse(bp); return (EJUSTRETURN); } } else prevoff = offset; *prevoffp = prevoff; } /* Check for sequential access, and update offset. */ if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset) dh->dh_seqopt = 1; dh->dh_seqoff = offset + DIRSIZ(0, dp); *bpp = bp; *offp = offset; return (0); } DIRHASH_LOCK(dh); if (dh->dh_hash == NULL) { DIRHASH_UNLOCK(dh); if (bp != NULL) brelse(bp); ufsdirhash_free(ip); return (EJUSTRETURN); } /* * When the name doesn't match in the seqopt case, go back * and search normally. */ if (dh->dh_seqopt) { dh->dh_seqopt = 0; goto restart; } } DIRHASH_UNLOCK(dh); if (bp != NULL) brelse(bp); return (ENOENT); }
/* * Look up a FFS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ int ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) { struct fs *fs; struct inode *ip; struct ufs1_dinode *dp1; #ifdef FFS2 struct ufs2_dinode *dp2; #endif struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int error; if (ino > (ufsino_t)-1) panic("ffs_vget: alien ino_t %llu", (unsigned long long)ino); ump = VFSTOUFS(mp); dev = ump->um_dev; retry: if ((*vpp = ufs_ihashget(dev, ino)) != NULL) return (0); /* Allocate a new vnode/inode. */ if ((error = getnewvnode(VT_UFS, mp, &ffs_vops, &vp)) != 0) { *vpp = NULL; return (error); } #ifdef VFSLCKDEBUG vp->v_flag |= VLOCKSWORK; #endif ip = pool_get(&ffs_ino_pool, PR_WAITOK|PR_ZERO); lockinit(&ip->i_lock, PINOD, "inode", 0, 0); ip->i_ump = ump; vref(ip->i_devvp); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; ip->i_vtbl = &ffs_vtbl; /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ error = ufs_ihashins(ip); if (error) { /* * VOP_INACTIVE will treat this as a stale file * and recycle it quickly */ vrele(vp); if (error == EEXIST) goto retry; return (error); } /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ vput(vp); brelse(bp); *vpp = NULL; return (error); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) { ip->i_din2 = pool_get(&ffs_dinode2_pool, PR_WAITOK); dp2 = (struct ufs2_dinode *) bp->b_data + ino_to_fsbo(fs, ino); *ip->i_din2 = *dp2; } else #endif { ip->i_din1 = pool_get(&ffs_dinode1_pool, PR_WAITOK); dp1 = (struct ufs1_dinode *) bp->b_data + ino_to_fsbo(fs, ino); *ip->i_din1 = *dp1; } brelse(bp); if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else ip->i_effnlink = DIP(ip, nlink); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, &ffs_specvops, FFS_FIFOOPS, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (DIP(ip, gen) == 0) { DIP_ASSIGN(ip, gen, arc4random() & INT_MAX); if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1) DIP_ASSIGN(ip, gen, 1); /* Shouldn't happen */ if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { ip->i_ffs1_uid = ip->i_din1->di_ouid; ip->i_ffs1_gid = ip->i_din1->di_ogid; } *vpp = vp; return (0); }
static int quota_handle_cmd_clear(struct mount *mp, struct lwp *l, prop_dictionary_t cmddict, int type, prop_array_t datas) { prop_array_t replies; prop_object_iterator_t iter; prop_dictionary_t data; uint32_t id; struct ufsmount *ump = VFSTOUFS(mp); int error, defaultq = 0; const char *idstr; if ((ump->um_flags & UFS_QUOTA2) == 0) return EOPNOTSUPP; replies = prop_array_create(); if (replies == NULL) return ENOMEM; iter = prop_array_iterator(datas); if (iter == NULL) { prop_object_release(replies); return ENOMEM; } while ((data = prop_object_iterator_next(iter)) != NULL) { if (!prop_dictionary_get_uint32(data, "id", &id)) { if (!prop_dictionary_get_cstring_nocopy(data, "id", &idstr)) continue; if (strcmp(idstr, "default")) continue; id = 0; defaultq = 1; } else { defaultq = 0; } error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(id), NULL); if (error != 0) goto err; #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_clear(ump, type, id, defaultq, data); } else #endif panic("quota_handle_cmd_get: no support ?"); if (error && error != ENOENT) goto err; } prop_object_iterator_release(iter); if (!prop_dictionary_set_and_rel(cmddict, "data", replies)) { error = ENOMEM; } else { error = 0; } return error; err: prop_object_iterator_release(iter); prop_object_release(replies); return error; }
/* * VFS Operations. * * mount system call */ int ffs_mount(struct mount *mp, const char *path, void *data, struct nameidata *ndp, struct proc *p) { struct vnode *devvp; struct ufs_args args; struct ufsmount *ump = NULL; struct fs *fs; char fname[MNAMELEN]; char fspec[MNAMELEN]; int error = 0, flags; int ronly; mode_t accessmode; error = copyin(data, &args, sizeof(struct ufs_args)); if (error) return (error); #ifndef FFS_SOFTUPDATES if (mp->mnt_flag & MNT_SOFTDEP) { printf("WARNING: soft updates isn't compiled in\n"); mp->mnt_flag &= ~MNT_SOFTDEP; } #endif /* * Soft updates is incompatible with "async", * so if we are doing softupdates stop the user * from setting the async flag. */ if ((mp->mnt_flag & (MNT_SOFTDEP | MNT_ASYNC)) == (MNT_SOFTDEP | MNT_ASYNC)) { return (EINVAL); } /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; devvp = ump->um_devvp; error = 0; ronly = fs->fs_ronly; if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { /* Flush any dirty data */ mp->mnt_flag &= ~MNT_RDONLY; VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); mp->mnt_flag |= MNT_RDONLY; /* * Get rid of files open for writing. */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (fs->fs_flags & FS_DOSOFTDEP) { error = softdep_flushfiles(mp, flags, p); mp->mnt_flag &= ~MNT_SOFTDEP; } else error = ffs_flushfiles(mp, flags, p); ronly = 1; } /* * Flush soft dependencies if disabling it via an update * mount. This may leave some items to be processed, * so don't do this yet XXX. */ if ((fs->fs_flags & FS_DOSOFTDEP) && !(mp->mnt_flag & MNT_SOFTDEP) && !(mp->mnt_flag & MNT_RDONLY) && fs->fs_ronly == 0) { #if 0 flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = softdep_flushfiles(mp, flags, p); #elif FFS_SOFTUPDATES mp->mnt_flag |= MNT_SOFTDEP; #endif } /* * When upgrading to a softdep mount, we must first flush * all vnodes. (not done yet -- see above) */ if (!(fs->fs_flags & FS_DOSOFTDEP) && (mp->mnt_flag & MNT_SOFTDEP) && fs->fs_ronly == 0) { #if 0 flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ffs_flushfiles(mp, flags, p); #else mp->mnt_flag &= ~MNT_SOFTDEP; #endif } if (!error && (mp->mnt_flag & MNT_RELOAD)) error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); if (error) goto error_1; if (ronly && (mp->mnt_flag & MNT_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (suser(p, 0)) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); VOP_UNLOCK(devvp, 0, p); if (error) goto error_1; } if (fs->fs_clean == 0) { #if 0 /* * It is safe to mount an unclean file system * if it was previously mounted with softdep * but we may lose space and must * sometimes run fsck manually. */ if (fs->fs_flags & FS_DOSOFTDEP) printf( "WARNING: %s was not properly unmounted\n", fs->fs_fsmnt); else #endif if (mp->mnt_flag & MNT_FORCE) { printf( "WARNING: %s was not properly unmounted\n", fs->fs_fsmnt); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); error = EROFS; goto error_1; } } if ((fs->fs_flags & FS_DOSOFTDEP)) { error = softdep_mount(devvp, mp, fs, p->p_ucred); if (error) goto error_1; } fs->fs_contigdirs = malloc((u_long)fs->fs_ncg, M_UFSMNT, M_WAITOK|M_ZERO); ronly = 0; } if (args.fspec == NULL) { /* * Process export requests. */ error = vfs_export(mp, &ump->um_export, &args.export_info); if (error) goto error_1; else goto success; } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ error = copyinstr(args.fspec, fspec, sizeof(fspec), NULL); if (error) goto error_1; if (disk_map(fspec, fname, MNAMELEN, DM_OPENBLCK) == -1) memcpy(fname, fspec, sizeof(fname)); NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fname, p); if ((error = namei(ndp)) != 0) goto error_1; devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { error = ENOTBLK; goto error_2; } if (major(devvp->v_rdev) >= nblkdev) { error = ENXIO; goto error_2; } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (suser(p, 0)) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); VOP_UNLOCK(devvp, 0, p); if (error) goto error_2; } if (mp->mnt_flag & MNT_UPDATE) { /* * UPDATE * If it's not the same vnode, or at least the same device * then it's not correct. */ if (devvp != ump->um_devvp) { if (devvp->v_rdev == ump->um_devvp->v_rdev) { vrele(devvp); } else { error = EINVAL; /* needs translation */ } } else vrele(devvp); /* * Update device name only on success */ if (!error) { /* * Save "mounted from" info for mount point (NULL pad) */ memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN); memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN); } } else { /* * Since this is a new mount, we want the names for * the device and the mount point copied in. If an * error occurs, the mountpoint is discarded by the * upper level code. */ memset(mp->mnt_stat.f_mntonname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntonname, path, MNAMELEN); memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fname, MNAMELEN); memset(mp->mnt_stat.f_mntfromspec, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromspec, fspec, MNAMELEN); error = ffs_mountfs(devvp, mp, p); } if (error) goto error_2; /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname * * This code is common to root and non-root mounts */ memcpy(&mp->mnt_stat.mount_info.ufs_args, &args, sizeof(args)); VFS_STATFS(mp, &mp->mnt_stat, p); success: if (path && (mp->mnt_flag & MNT_UPDATE)) { /* Update clean flag after changing read-onlyness. */ fs = ump->um_fs; if (ronly != fs->fs_ronly) { fs->fs_ronly = ronly; fs->fs_clean = ronly && (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0; if (ronly) free(fs->fs_contigdirs, M_UFSMNT, 0); } if (!ronly) { if (mp->mnt_flag & MNT_SOFTDEP) fs->fs_flags |= FS_DOSOFTDEP; else fs->fs_flags &= ~FS_DOSOFTDEP; } ffs_sbupdate(ump, MNT_WAIT); } return (0); error_2: /* error with devvp held */ vrele (devvp); error_1: /* no state to back out */ return (error); }
/* * Main code to turn off disk quotas for a filesystem. Does not change * flags. */ static int quotaoff1(struct thread *td, struct mount *mp, int type) { struct vnode *vp; struct vnode *qvp, *mvp; struct ufsmount *ump; struct dquot *dq; struct inode *ip; struct ucred *cr; int error; ump = VFSTOUFS(mp); UFS_LOCK(ump); KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0, ("quotaoff1: flags are invalid")); if ((qvp = ump->um_quotas[type]) == NULLVP) { UFS_UNLOCK(ump); return (0); } cr = ump->um_cred[type]; UFS_UNLOCK(ump); /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. */ again: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto again; } ip = VTOI(vp); dq = ip->i_dquot[type]; ip->i_dquot[type] = NODQUOT; dqrele(vp, dq); VOP_UNLOCK(vp, 0); vrele(vp); } error = dqflush(qvp); if (error != 0) return (error); /* * Clear um_quotas before closing the quota vnode to prevent * access to the closed vnode from dqget/dqsync */ UFS_LOCK(ump); ump->um_quotas[type] = NULLVP; ump->um_cred[type] = NOCRED; UFS_UNLOCK(ump); vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY); qvp->v_vflag &= ~VV_SYSTEM; VOP_UNLOCK(qvp, 0); error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td); crfree(cr); return (error); }
/* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ int ffs_reload(struct mount *mountp, struct ucred *cred, struct proc *p) { struct vnode *devvp; caddr_t space; struct fs *fs, *newfs; int i, blks, size, error; int32_t *lp; struct buf *bp = NULL; struct ffs_reload_args fra; if ((mountp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = vinvalbuf(devvp, 0, cred, p, 0, 0); VOP_UNLOCK(devvp, 0, p); if (error) panic("ffs_reload: dirty1"); /* * Step 2: re-read superblock from disk. */ fs = VFSTOUFS(mountp)->um_fs; error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, SBSIZE, &bp); if (error) { brelse(bp); return (error); } newfs = (struct fs *)bp->b_data; if (ffs_validate(newfs) == 0) { brelse(bp); return (EINVAL); } /* * Copy pointer fields back into superblock before copying in XXX * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ newfs->fs_csp = fs->fs_csp; newfs->fs_maxcluster = fs->fs_maxcluster; newfs->fs_ronly = fs->fs_ronly; memcpy(fs, newfs, fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; brelse(bp); mountp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs1_compat_read(fs, VFSTOUFS(mountp), fs->fs_sblockloc); ffs_oldfscompat(fs); (void)ffs_statfs(mountp, &mountp->mnt_stat, p); /* * Step 3: re-read summary information from disk. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = (caddr_t)fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, &bp); if (error) { brelse(bp); return (error); } memcpy(space, bp->b_data, size); space += size; brelse(bp); } if ((fs->fs_flags & FS_DOSOFTDEP)) (void) softdep_mount(devvp, mountp, fs, cred); /* * We no longer know anything about clusters per cylinder group. */ if (fs->fs_contigsumsize > 0) { lp = fs->fs_maxcluster; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; } fra.p = p; fra.cred = cred; fra.fs = fs; fra.devvp = devvp; error = vfs_mount_foreach_vnode(mountp, ffs_reload_vnode, &fra); return (error); }
/* * Real work associated with setting a vnode's extended attributes; * assumes that the attribute lock has already been grabbed. */ static int ufs_extattr_set(struct vnode *vp, int attrnamespace, const char *name, struct uio *uio, struct ucred *cred, struct thread *td) { struct ufs_extattr_list_entry *attribute; struct ufs_extattr_header ueh; struct iovec local_aiov; struct uio local_aio; struct mount *mp = vp->v_mount; struct ufsmount *ump = VFSTOUFS(mp); struct inode *ip = VTOI(vp); off_t base_offset; int error = 0, ioflag; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) return (EOPNOTSUPP); if (!ufs_extattr_valid_attrname(attrnamespace, name)) return (EINVAL); error = extattr_check_cred(vp, attrnamespace, cred, td, VWRITE); if (error) return (error); attribute = ufs_extattr_find_attr(ump, attrnamespace, name); if (!attribute) return (ENOATTR); /* * Early rejection of invalid offsets/length. * Reject: any offset but 0 (replace) * Any size greater than attribute size limit */ if (uio->uio_offset != 0 || uio->uio_resid > attribute->uele_fileheader.uef_size) return (ENXIO); /* * Find base offset of header in file based on file header size, and * data header size + maximum data size, indexed by inode number. */ base_offset = sizeof(struct ufs_extattr_fileheader) + ip->i_number * (sizeof(struct ufs_extattr_header) + attribute->uele_fileheader.uef_size); /* * Write out a data header for the data. */ ueh.ueh_len = uio->uio_resid; ueh.ueh_flags = UFS_EXTATTR_ATTR_FLAG_INUSE; ueh.ueh_i_gen = ip->i_gen; local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_WRITE; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_td = td; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); /* * Acquire locks. * * Don't need to get a lock on the backing file if the setattr is * being applied to the backing file, as the lock is already held. */ if (attribute->uele_backing_vnode != vp) vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_RETRY); ioflag = IO_NODELOCKED; if (ufs_extattr_sync) ioflag |= IO_SYNC; error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; if (local_aio.uio_resid != 0) { error = ENXIO; goto vopunlock_exit; } /* * Write out user data. */ uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header); ioflag = IO_NODELOCKED; if (ufs_extattr_sync) ioflag |= IO_SYNC; error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag, ump->um_extattr.uepm_ucred); vopunlock_exit: uio->uio_offset = 0; if (attribute->uele_backing_vnode != vp) VOP_UNLOCK(attribute->uele_backing_vnode, 0); return (error); }
/* * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ int ext2fs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) { struct m_ext2fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int error; void *cp; ump = VFSTOUFS(mp); dev = ump->um_dev; retry: if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) return (0); /* Allocate a new vnode/inode. */ error = getnewvnode(VT_EXT2FS, mp, ext2fs_vnodeop_p, NULL, &vp); if (error) { *vpp = NULL; return (error); } ip = pool_get(&ext2fs_inode_pool, PR_WAITOK); mutex_enter(&ufs_hashlock); if ((*vpp = ufs_ihashget(dev, ino, 0)) != NULL) { mutex_exit(&ufs_hashlock); ungetnewvnode(vp); pool_put(&ext2fs_inode_pool, ip); goto retry; } vp->v_vflag |= VV_LOCKSWORK; memset(ip, 0, sizeof(struct inode)); vp->v_data = ip; ip->i_vnode = vp; ip->i_ump = ump; ip->i_e2fs = fs = ump->um_e2fs; ip->i_dev = dev; ip->i_number = ino; ip->i_e2fs_last_lblk = 0; ip->i_e2fs_last_blk = 0; genfs_node_init(vp, &ext2fs_genfsops); /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ ufs_ihashins(ip); mutex_exit(&ufs_hashlock); /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ino)), (int)fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ vput(vp); *vpp = NULL; return (error); } cp = (char *)bp->b_data + (ino_to_fsbo(fs, ino) * EXT2_DINODE_SIZE(fs)); ip->i_din.e2fs_din = pool_get(&ext2fs_dinode_pool, PR_WAITOK); e2fs_iload((struct ext2fs_dinode *)cp, ip->i_din.e2fs_din); ext2fs_set_inode_guid(ip); brelse(bp, 0); /* If the inode was deleted, reset all fields */ if (ip->i_e2fs_dtime != 0) { ip->i_e2fs_mode = 0; (void)ext2fs_setsize(ip, 0); (void)ext2fs_setnblock(ip, 0); memset(ip->i_e2fs_blocks, 0, sizeof(ip->i_e2fs_blocks)); } /* * Initialize the vnode from the inode, check for aliases. */ error = ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization now that aliasing has been resolved. */ ip->i_devvp = ump->um_devvp; vref(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_e2fs_gen == 0) { if (++ext2gennumber < (u_long)time_second) ext2gennumber = time_second; ip->i_e2fs_gen = ext2gennumber; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } uvm_vnp_setsize(vp, ext2fs_size(ip)); *vpp = vp; return (0); }