int fusefs_rmdir(void *v) { struct vop_rmdir_args *ap = v; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct fusefs_node *ip, *dp; struct fusefs_mnt *fmp; struct fusebuf *fbuf; int error; ip = VTOI(vp); dp = VTOI(dvp); fmp = (struct fusefs_mnt *)ip->ufs_ino.i_ump; if (!fmp->sess_init) { error = ENXIO; goto out; } if (fmp->undef_op & UNDEF_RMDIR) { error = ENOSYS; goto out; } /* * No rmdir "." please. */ if (dp == ip) { vrele(dvp); vput(vp); return (EINVAL); } VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); fbuf = fb_setup(cnp->cn_namelen + 1, dp->ufs_ino.i_number, FBT_RMDIR, p); memcpy(fbuf->fb_dat, cnp->cn_nameptr, cnp->cn_namelen); fbuf->fb_dat[cnp->cn_namelen] = '\0'; error = fb_queue(fmp->dev, fbuf); if (error) { if (error == ENOSYS) fmp->undef_op |= UNDEF_RMDIR; if (error != ENOTEMPTY) VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); fb_delete(fbuf); goto out; } cache_purge(dvp); vput(dvp); dvp = NULL; cache_purge(ITOV(ip)); fb_delete(fbuf); out: if (dvp) vput(dvp); VN_KNOTE(vp, NOTE_DELETE); vput(vp); return (error); }
/* * Update disk usage, and take corrective action. */ int chkdq(struct inode *ip, ufs2_daddr_t change, struct ucred *cred, int flags) { struct dquot *dq; ufs2_daddr_t ncurblocks; struct vnode *vp = ITOV(ip); int i, error, warn, do_check; /* * Disk quotas must be turned off for system files. Currently * snapshot and quota files. */ if ((vp->v_vflag & VV_SYSTEM) != 0) return (0); /* * XXX: Turn off quotas for files with a negative UID or GID. * This prevents the creation of 100GB+ quota files. */ if ((int)ip->i_uid < 0 || (int)ip->i_gid < 0) return (0); #ifdef DIAGNOSTIC if ((flags & CHOWN) == 0) chkdquot(ip); #endif if (change == 0) return (0); if (change < 0) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; DQI_LOCK(dq); DQI_WAIT(dq, PINOD+1, "chkdq1"); ncurblocks = dq->dq_curblocks + change; if (ncurblocks >= 0) dq->dq_curblocks = ncurblocks; else dq->dq_curblocks = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; DQI_UNLOCK(dq); } return (0); } if ((flags & FORCE) == 0 && priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) do_check = 1; else do_check = 0; for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; warn = 0; DQI_LOCK(dq); DQI_WAIT(dq, PINOD+1, "chkdq2"); if (do_check) { error = chkdqchg(ip, change, cred, i, &warn); if (error) { /* * Roll back user quota changes when * group quota failed. */ while (i > 0) { --i; dq = ip->i_dquot[i]; if (dq == NODQUOT) continue; DQI_LOCK(dq); DQI_WAIT(dq, PINOD+1, "chkdq3"); ncurblocks = dq->dq_curblocks - change; if (ncurblocks >= 0) dq->dq_curblocks = ncurblocks; else dq->dq_curblocks = 0; dq->dq_flags &= ~DQ_BLKS; dq->dq_flags |= DQ_MOD; DQI_UNLOCK(dq); } return (error); } } /* Reset timer when crossing soft limit */ if (dq->dq_curblocks + change >= dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_btime = time_second + ITOUMP(ip)->um_btime[i]; dq->dq_curblocks += change; dq->dq_flags |= DQ_MOD; DQI_UNLOCK(dq); if (warn) uprintf("\n%s: warning, %s disk quota exceeded\n", ITOVFS(ip)->mnt_stat.f_mntonname, quotatypes[i]); } return (0); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ ffs_balloc( register struct inode *ip, register ufs_daddr_t lbn, int size, kauth_cred_t cred, struct buf **bpp, int flags, int * blk_alloc) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; int alloc_buffer = 1; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_fs; if (flags & B_NOBUFF) alloc_buffer = 0; if (blk_alloc) *blk_alloc = 0; /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* the filesize prior to this write can fit in direct * blocks (ie. fragmentaion is possibly done) * we are now extending the file write beyond * the block which has end of file prior to this write */ osize = blksize(fs, ip, nb); /* osize gives disk allocated size in the last block. It is * either in fragments or a file system block size */ if (osize < fs->fs_bsize && osize > 0) { /* few fragments are already allocated,since the * current extends beyond this block * allocate the complete block as fragments are only * in last block */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); /* adjust the inode size we just grew */ /* it is in nb+1 as nb starts from 0 */ ip->i_size = (nb + 1) * fs->fs_bsize; ubc_setsize(vp, (off_t)ip->i_size); ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; if ((flags & B_SYNC) || (!alloc_buffer)) { if (!alloc_buffer) buf_setflags(bp, B_NOCACHE); buf_bwrite(bp); } else buf_bdwrite(bp); /* note that bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } *bpp = bp; } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } else { error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* adjust the inode size we just grew */ ip->i_size = (lbn * fs->fs_bsize) + size; ubc_setsize(vp, (off_t)ip->i_size); if (!alloc_buffer) { buf_setflags(bp, B_NOCACHE); if (flags & B_SYNC) buf_bwrite(bp); else buf_bdwrite(bp); } else *bpp = bp; return (0); } } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); if (alloc_buffer) { bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb))); if (flags & B_CLRBUF) buf_clear(bp); } ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (blk_alloc) { *blk_alloc = nsize; } if (alloc_buffer) *bpp = bp; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); #if DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if ((error = buf_bwrite(bp)) != 0) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if ((flags & B_SYNC)) { buf_bwrite(bp); } else { buf_bdwrite(bp); } if(alloc_buffer ) { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); if (flags & B_CLRBUF) buf_clear(nbp); } if (blk_alloc) { *blk_alloc = fs->fs_bsize; } if(alloc_buffer) *bpp = nbp; return (0); } buf_brelse(bp); if (alloc_buffer) { if (flags & B_CLRBUF) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp); if (error) { buf_brelse(nbp); goto fail; } } else { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
/* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED * flag is used to specify that the inode needs to be updated but that the * times have already been set. The access and modified times are taken from * the second and third parameters; the inode change time is always taken * from the current time. If waitfor is set, then wait for the disk write * of the inode to complete. */ int ffs_update(struct inode *ip, struct timespec *atime, struct timespec *mtime, int waitfor) { struct vnode *vp; struct fs *fs; struct buf *bp; int error; struct timespec ts; vp = ITOV(ip); if (vp->v_mount->mnt_flag & MNT_RDONLY) { ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); return (0); } if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && waitfor != MNT_WAIT) return (0); getnanotime(&ts); if (ip->i_flag & IN_ACCESS) { DIP_ASSIGN(ip, atime, atime ? atime->tv_sec : ts.tv_sec); DIP_ASSIGN(ip, atimensec, atime ? atime->tv_nsec : ts.tv_nsec); } if (ip->i_flag & IN_UPDATE) { DIP_ASSIGN(ip, mtime, mtime ? mtime->tv_sec : ts.tv_sec); DIP_ASSIGN(ip, mtimensec, mtime ? mtime->tv_nsec : ts.tv_nsec); ip->i_modrev++; } if (ip->i_flag & IN_CHANGE) { DIP_ASSIGN(ip, ctime, ts.tv_sec); DIP_ASSIGN(ip, ctimensec, ts.tv_nsec); } ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); fs = ip->i_fs; /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { ip->i_din1->di_ouid = ip->i_ffs1_uid; ip->i_din1->di_ogid = ip->i_ffs1_gid; } error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != DIP(ip, nlink)) panic("ffs_update: bad link cnt"); #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; else #endif *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; if (waitfor && !DOINGASYNC(vp)) { return (bwrite(bp)); } else { bdwrite(bp); return (0); } }
/* * Check the inode limit, applying corrective action. */ int chkiq(struct inode *ip, int change, struct ucred *cred, int flags) { struct dquot *dq; ino_t ncurinodes; int i, error, warn, do_check; #ifdef DIAGNOSTIC if ((flags & CHOWN) == 0) chkdquot(ip); #endif if (change == 0) return (0); if (change < 0) { for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; DQI_LOCK(dq); DQI_WAIT(dq, PINOD+1, "chkiq1"); ncurinodes = dq->dq_curinodes + change; /* XXX: ncurinodes is unsigned */ if (dq->dq_curinodes != 0 && ncurinodes >= 0) dq->dq_curinodes = ncurinodes; else dq->dq_curinodes = 0; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; DQI_UNLOCK(dq); } return (0); } if ((flags & FORCE) == 0 && priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0)) do_check = 1; else do_check = 0; for (i = 0; i < MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; warn = 0; DQI_LOCK(dq); DQI_WAIT(dq, PINOD+1, "chkiq2"); if (do_check) { error = chkiqchg(ip, change, cred, i, &warn); if (error) { /* * Roll back user quota changes when * group quota failed. */ while (i > 0) { --i; dq = ip->i_dquot[i]; if (dq == NODQUOT) continue; DQI_LOCK(dq); DQI_WAIT(dq, PINOD+1, "chkiq3"); ncurinodes = dq->dq_curinodes - change; /* XXX: ncurinodes is unsigned */ if (dq->dq_curinodes != 0 && ncurinodes >= 0) dq->dq_curinodes = ncurinodes; else dq->dq_curinodes = 0; dq->dq_flags &= ~DQ_INODS; dq->dq_flags |= DQ_MOD; DQI_UNLOCK(dq); } return (error); } } /* Reset timer when crossing soft limit */ if (dq->dq_curinodes + change >= dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_itime = time_second + ip->i_ump->um_itime[i]; dq->dq_curinodes += change; dq->dq_flags |= DQ_MOD; DQI_UNLOCK(dq); if (warn) uprintf("\n%s: warning, %s inode quota exceeded\n", ITOV(ip)->v_mount->mnt_stat.f_mntonname, quotatypes[i]); } return (0); }
DENT_T * vnode_iop_lookup( INODE_T *dir, struct dentry *dent, struct nameidata *nd ) { char *name; mdki_boolean_t rele = FALSE; int err; VNODE_T *dvp; VNODE_T *rt_vnode; /* returned vnode */ INODE_T *rt_inode = NULL; /* returned inode ptr */ DENT_T * real_dentry; DENT_T *found_dentry = dent; VATTR_T *vap; struct lookup_ctx ctx; CALL_DATA_T cd; ASSERT_I_SEM_MINE(dir); /* We can find our parent entry via the dentry provided to us. */ ASSERT(dent->d_parent->d_inode == dir); if (dent->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); name = /* drop the const */(char *) dent->d_name.name; mdki_linux_init_call_data(&cd); /* We pass along the dentry, as well as the parent inode so that * mvop_linux_lookup_* has everything it needs, even if it is passed in * the realvp, and it gets back a negative dentry. */ dvp = ITOV(dir); ctx.dentrypp = &found_dentry; ctx.flags = LOOKUP_CTX_VALID; err = VOP_LOOKUP(dvp, name, &rt_vnode, (struct pathname *)NULL, VNODE_LF_LOOKUP, NULL, &cd, &ctx); err = mdki_errno_unix_to_linux(err); if (!err) { ASSERT(rt_vnode != NULL); if (MDKI_INOISCLRVN(VTOI(rt_vnode))) { /* unwrap to the real object */ ASSERT(CVN_TO_DENT(rt_vnode)); rt_inode = CVN_TO_INO(rt_vnode); if (MDKI_INOISMVFS(rt_inode)) { VN_HOLD(ITOV(rt_inode)); VN_RELE(rt_vnode); rt_vnode = ITOV(rt_inode); } else { igrab(rt_inode); VN_RELE(rt_vnode); rt_vnode = NULL; } } else rt_inode = VTOI(rt_vnode); } if (!err && (found_dentry != dent)) { mdki_linux_destroy_call_data(&cd); /* The hold was granted in makeloopnode() in the 'nocover' case. */ if (rt_vnode != NULL) VN_RELE(rt_vnode); else iput(rt_inode); /* * found_dentry is the real socket/block/char device node's dentry. * See mvop_linux_lookup_component(). * * For sockets, we use a dentry in our tree (we fill in the * provided dentry "dent") linked to the inode of the real * object. This lets file name operations work in our * namespace, and lets socket connections all work (as they're * keyed off of the inode address) from inside to outside & * v.v. * * We also do this for VCHR, VBLK devices, and it seems to work OK * (e.g. make a node the same as /dev/tty, you can write to it) */ switch (found_dentry->d_inode->i_mode & S_IFMT) { case S_IFSOCK: case S_IFCHR: case S_IFBLK: ASSERT(dent->d_inode == NULL); MDKI_SET_DOPS(dent, &vnode_shadow_dentry_ops); igrab(found_dentry->d_inode); VNODE_D_ADD(dent, found_dentry->d_inode); VNODE_DPUT(found_dentry); found_dentry = NULL; /* tell caller to use original dentry */ break; default: /* use returned dentry */ break; } return(found_dentry); } /* We need to pass back dentry ops even for negative dentries, I think. * Shadow inodes will have been taken care of in lookup_component. */ if (dent->d_op != &vnode_shadow_dentry_ops) { if (dent->d_parent->d_op == &vnode_setview_dentry_ops) MDKI_SET_DOPS(dent, &vnode_setview_dentry_ops); else MDKI_SET_DOPS(dent, &vnode_dentry_ops); } vap = VATTR_ALLOC(); if (vap == NULL) { err = -ENOMEM; goto alloc_err; } if (!err && MDKI_INOISMVFS(rt_inode)) { /* fetch attributes & place in inode */ VATTR_SET_MASK(vap, AT_ALL); err = VOP_GETATTR(rt_vnode, vap, GETATTR_FLAG_UPDATE_ATTRS, &cd); err = mdki_errno_unix_to_linux(err); if (err == -EOPNOTSUPP) /* ignore it */ err = 0; else if (err) rele = TRUE; else if ((rt_vnode->v_flag & VLOOPROOT) != 0 && rt_inode == vnlayer_get_urdir_inode()) { /* return the real root */ VN_RELE(rt_vnode); VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); return VNODE_DGET(vnlayer_get_root_dentry()); } else if (vnlayer_looproot_vp != NULL && rt_vnode == vnlayer_looproot_vp && (real_dentry = MVOP_DENT(rt_inode, &vnode_dentry_ops)) != NULL) { /* return the real /view */ VN_RELE(rt_vnode); VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); return real_dentry; } } VATTR_FREE(vap); alloc_err: mdki_linux_destroy_call_data(&cd); /* It's an mnode-based object, set up a dentry for it */ /* We don't return ENOENT. For Linux, the negative dentry is enough */ switch (err) { case -ENOENT: err = 0; ASSERT(rt_inode == NULL); VNODE_D_ADD(dent, rt_inode); break; case 0: /* We will consume the count on rt_inode as a reference for dent */ /* * For VOB vnodes, we maintain two separate dentry trees for * the vnodes. One tree is for setview-mode names (process * sets to a view context, then looks directly at the VOB * mountpoint without any cover vnodes in the path). The * other tree is for view-extended naming into a VOB, with * dentries starting at the view tag and covering non-VOB * objects until crossing a mount point into a VOB. * * Mostly the system doesn't care, as long as it goes down the * tree from parent to child, since it will be traversing only one * of the dentry trees. But when the cache misses, the system calls * this lookup method and wants to get a dentry in return. * There are standard interfaces ( d_splice_alias() in 2.6) * which can find a good dentry referencing the inode returned * by the file system's lookup method, but these methods don't * work right when we have VOB directory vnodes with both setview * and view-extended dentries. We implement our own function * [vnlayer_inode2dentry_internal()] which knows the * distinctions and the rules for determining that an existing * attached dentry is valid for the lookup request. * * We have our own d_compare() function which forces all VOB * lookups to come to the inode lookup method (this function), * and then we get to choose the right dentry to return. We * have our own lookup cache inside MVFS so we don't care that * the dentry cache is always missing on our names. * * If we have to make a new dentry, we may need to merge it * with an NFS-created temporary dentry using d_move() * (d_splice_alias() would do this for us, but we can't use it * for reasons listed above). */ /* * We want to find the "right" dentry (if there is one), so * look for one that has a d_parent with the same dentry ops * (indicating it's in the same dentry tree). */ if (S_ISDIR(rt_inode->i_mode)) { /* * It has been empirically shown that we have to check the * parent of the dentry. If the parent has been checked out * it is possible for the cache lookup to return an inode * from the tree below the old parent directory. If this * happens on a rename, the system will panic because the * Linux rename code checks the parent of the returned * dentry to see that it matches what it has for a parent. */ found_dentry = vnlayer_inode2dentry_internal(rt_inode, dent->d_parent, NULL, dent->d_op); } else { /* * For non-directories, we also need to consider the * parent & the requested name so that * vnlayer_inode2dentry_internal() finds the right dentry. * (There may be multiple hard links; we want the one in * the same directory with the same name) */ found_dentry = vnlayer_inode2dentry_internal(rt_inode, dent->d_parent, &dent->d_name, dent->d_op); } if (found_dentry != NULL) { ASSERT(found_dentry->d_inode == rt_inode); /* * If the existing one is a disconnected dentry, we need * to move the old one to the new one (just like * d_splice_alias) to get the proper name/parent attached * in the dcache. */ if ((found_dentry->d_flags & DCACHE_DISCONNECTED) != 0) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ASSERT((dent->d_flags & DCACHE_UNHASHED) != 0); #else ASSERT((dent->d_vfs_flags & DCACHE_UNHASHED) != 0); #endif d_rehash(dent); d_move(found_dentry, dent); } /* Release our count. found_dentry also references inode. */ iput(rt_inode); return found_dentry; } /* * Nothing suitable, wire it up to the proposed dentry. */ VNODE_D_ADD(dent, rt_inode); break; default: /* some other error case */ if (rele) VN_RELE(rt_vnode); break; } if (err) return ERR_PTR(err); else return NULL; }
/* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { struct vnode *ovp; daddr64_t lastblock, datablocks; daddr64_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; daddr64_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct fs *fs; struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; int i, aflags, error, allerror, needextclean = 0; off_t osize; #ifdef FFS2 daddr64_t extblocks; int softdepslowdown; #endif if (length < 0) return (EINVAL); ovp = ITOV(oip); fs = oip->i_fs; if (ovp->v_type != VREG && ovp->v_type != VDIR && ovp->v_type != VLNK) return (0); /* * Historically clients did not have to specify which data they were * truncating. So, if not specified, we assume traditional behavior, * e.g., just the normal data. */ if ((flags & (IO_EXT | IO_NORMAL)) == 0) flags |= IO_NORMAL; if (DIP(oip, size) == length && !(flags & IO_EXT)) return (0); datablocks = DIP(oip, blocks); #ifdef FFS2 /* * If we are truncating the extended-attributes, and cannot do it with * soft updates, then do it slowly here. If we are truncating both the * extended attributes and the file contents (e.g., the file is being * unlinked), then pick it off with soft updates below. */ needextclean = 0; softdepslowdown = DOINGSOFTDEP(ovp) && softdep_slowdown(ovp); extblocks = 0; if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_ffs2_extsize > 0) { extblocks = btodb(fragroundup(fs, oip->i_ffs2_extsize)); datablocks -= extblocks; } if ((flags & IO_EXT) && extblocks > 0) { if (DOINGSOFTDEP(ovp) && softdepslowdown == 0 && length == 0) { if ((flags & IO_NORMAL) == 0) { softdep_setup_freeblocks(oip, length, IO_EXT); return (0); } needextclean = 1; } else { #ifdef DIAGNOSTIC if (length != 0) panic("ffs_truncate: partial truncation of " "extended attributes"); #endif error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc); if (error) return (error); osize = oip->i_ffs2_extsize; oip->i_ffs2_blocks -= extblocks; (void)ufs_quota_free_blocks(oip, extblocks, NOCRED); (void) vinvalbuf(ovp, V_EXT, cred, curproc, 0, 0); oip->i_ffs2_extsize = 0; for (i = 0; i < NXADDR; i++) { oldblks[i] = oip->i_ffs2_extb[i]; oip->i_ffs2_extb[i] = 0; } oip->i_flag |= IN_CHANGE | IN_UPDATE; error = UFS_UPDATE(oip, MNT_WAIT); if (error) return (error); for (i = 0; i < NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(oip, oldblks[i], sblksize(fs, osize, i)); } } } if (!(flags & IO_NORMAL)) return (0); /* Nothing else to do. */ #endif /* FFS2 */ if (ovp->v_type == VLNK && (DIP(oip, size) < ovp->v_mount->mnt_maxsymlinklen || (ovp->v_mount->mnt_maxsymlinklen == 0 && datablocks == 0))) { #ifdef DIAGNOSTIC if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size)); DIP_ASSIGN(oip, size, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; #ifdef FFS2 if (needextclean) softdep_setup_freeblocks(oip, length, IO_EXT); #endif return (UFS_UPDATE(oip, MNT_WAIT)); } if ((error = getinoquota(oip)) != 0) return (error); uvm_vnp_setsize(ovp, length); oip->i_ci.ci_lasta = oip->i_ci.ci_clen = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; if (DOINGSOFTDEP(ovp)) { if (length > 0 || softdep_slowdown(ovp)) { /* * If a file is only partially truncated, then * we have to clean up the data structures * describing the allocation past the truncation * point. Finding and deallocating those structures * is a lot of work. Since partial truncation occurs * rarely, we solve the problem by syncing the file * so that it will have no data structures left. */ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0) return (error); } else { (void)ufs_quota_free_blocks(oip, datablocks, NOCRED); softdep_setup_freeblocks(oip, length, needextclean ? IO_EXT | IO_NORMAL : IO_NORMAL); (void) vinvalbuf(ovp, needextclean ? 0 : V_NORMAL, cred, curproc, 0, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(oip, 0)); } } osize = DIP(oip, size); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (length > fs->fs_maxfilesize) return (EFBIG); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = UFS_BUF_ALLOC(oip, length - 1, 1, cred, aflags, &bp); if (error) return (error); DIP_ASSIGN(oip, size, length); uvm_vnp_setsize(ovp, length); (void) uvm_vnp_uncache(ovp); if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(oip, MNT_WAIT)); } uvm_vnp_setsize(ovp, length); /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the * partial block following the end of the file must be * zero'ed in case it ever becomes accessible again because * of subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = blkoff(fs, length); if (offset == 0) { DIP_ASSIGN(oip, size, length); } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = UFS_BUF_ALLOC(oip, length - 1, 1, cred, aflags, &bp); if (error) return (error); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. */ if (DOINGSOFTDEP(ovp) && lbn < NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0) return (error); DIP_ASSIGN(oip, size, length); size = blksize(fs, oip, lbn); (void) uvm_vnp_uncache(ovp); if (ovp->v_type != VDIR) bzero((char *)bp->b_data + offset, (u_int)(size - offset)); bp->b_bcount = size; if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ for (level = TRIPLE; level >= SINGLE; level--) { oldblks[NDADDR + level] = DIP(oip, ib[level]); if (lastiblock[level] < 0) { DIP_ASSIGN(oip, ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < NDADDR; i++) { oldblks[i] = DIP(oip, db[i]); if (i > lastblock) DIP_ASSIGN(oip, db[i], 0); } oip->i_flag |= IN_CHANGE | IN_UPDATE; if ((error = UFS_UPDATE(oip, MNT_WAIT)) != 0) allerror = error; /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < NDADDR; i++) { newblks[i] = DIP(oip, db[i]); DIP_ASSIGN(oip, db[i], oldblks[i]); } for (i = 0; i < NIADDR; i++) { newblks[NDADDR + i] = DIP(oip, ib[i]); DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]); } DIP_ASSIGN(oip, size, osize); vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0); /* * Indirect blocks first. */ indir_lbn[SINGLE] = -NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = DIP(oip, ib[level]); if (bn != 0) { error = ffs_indirtrunc(oip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_ASSIGN(oip, ib[level], 0); ffs_blkfree(oip, bn, fs->fs_bsize); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = NDADDR - 1; i > lastblock; i--) { long bsize; bn = DIP(oip, db[i]); if (bn == 0) continue; DIP_ASSIGN(oip, db[i], 0); bsize = blksize(fs, oip, i); ffs_blkfree(oip, bn, bsize); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = DIP(oip, db[lastblock]); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, oip, lastblock); DIP_ASSIGN(oip, size, length); newspace = blksize(fs, oip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(oip, bn, oldspace - newspace); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (newblks[NDADDR + level] != DIP(oip, ib[level])) panic("ffs_truncate1"); for (i = 0; i < NDADDR; i++) if (newblks[i] != DIP(oip, db[i])) panic("ffs_truncate2"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ DIP_ASSIGN(oip, size, length); DIP_ADD(oip, blocks, -blocksreleased); if (DIP(oip, blocks) < 0) /* Sanity */ DIP_ASSIGN(oip, blocks, 0); oip->i_flag |= IN_CHANGE; (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); return (allerror); }
/* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2_balloc(struct inode *ip, e2fs_lbn_t lbn, int size, struct ucred *cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; struct ext2mount *ump; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2_NIADDR + 2]; e4fs_daddr_t nb, newb; e2fs_daddr_t *bap, pref; int osize, nsize, num, i, error; *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_e2fs; ump = ip->i_ump; /* * check if this is a sequential block allocation. * If so, increment next_alloc fields to allow ext2_blkpref * to make a good guess */ if (lbn == ip->i_next_alloc_block + 1) { ip->i_next_alloc_block++; ip->i_next_alloc_goal++; } if (ip->i_flag & IN_E4EXTENTS) return (ext2_ext_balloc(ip, lbn, size, cred, bpp, flags)); /* * The first EXT2_NDADDR blocks are direct blocks */ if (lbn < EXT2_NDADDR) { nb = ip->i_db[lbn]; /* * no new block is to be allocated, and no need to expand * the file */ if (nb != 0 && ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { /* * Godmar thinks: this shouldn't happen w/o * fragments */ printf("nsize %d(%d) > osize %d(%d) nb %d\n", (int)nsize, (int)size, (int)osize, (int)ip->i_size, (int)nb); panic( "ext2_balloc: Something is terribly wrong"); /* * please note there haven't been any changes from here on - * FFS seems to work. */ } } else { if (ip->i_size < (lbn + 1) * fs->e2fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->e2fs_bsize; EXT2_LOCK(ump); error = ext2_alloc(ip, lbn, ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0), nsize, cred, &newb); if (error) return (error); /* * If the newly allocated block exceeds 32-bit limit, * we can not use it in file block maps. */ if (newb > UINT_MAX) return (EFBIG); bp = getblk(vp, lbn, nsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); } ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0) return (error); #ifdef INVARIANTS if (num < 1) panic("ext2_balloc: ext2_getlbns returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { EXT2_LOCK(ump); pref = ext2_blkpref(ip, lbn, indirs[0].in_off + EXT2_NDIR_BLOCKS, &ip->i_db[0], 0); if ((error = ext2_alloc(ip, lbn, pref, fs->e2fs_bsize, cred, &newb))) return (error); if (newb > UINT_MAX) return (EFBIG); nb = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); vfs_bio_clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) { ext2_blkfree(ip, nb, fs->e2fs_bsize); return (error); } ip->i_ib[indirs[0].in_off] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bap = (e2fs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } EXT2_LOCK(ump); if (pref == 0) pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap, bp->b_lblkno); error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb); if (error) { brelse(bp); return (error); } if (newb > UINT_MAX) return (EFBIG); nb = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { ext2_blkfree(ip, nb, fs->e2fs_bsize); EXT2_UNLOCK(ump); brelse(bp); return (error); } bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->e2fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { EXT2_LOCK(ump); pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], bp->b_lblkno); if ((error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) { brelse(bp); return (error); } if (newb > UINT_MAX) return (EFBIG); nb = newb; nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->e2fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *bpp = nbp; return (0); } brelse(bp); if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->e2fs_bsize, NOCRED, MAXBSIZE, seqcount, 0, &nbp); } else { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp); } if (error) { brelse(nbp); return (error); } } else {
static int ext2_ext_balloc(struct inode *ip, uint32_t lbn, int size, struct ucred *cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; struct buf *bp = NULL; struct vnode *vp = ITOV(ip); daddr_t newblk; int osize, nsize, blks, error, allocated; fs = ip->i_e2fs; blks = howmany(size, fs->e2fs_bsize); error = ext4_ext_get_blocks(ip, lbn, blks, cred, NULL, &allocated, &newblk); if (error) return (error); if (allocated) { if (ip->i_size < (lbn + 1) * fs->e2fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->e2fs_bsize; bp = getblk(vp, lbn, nsize, 0, 0, 0); if(!bp) return (EIO); bp->b_blkno = fsbtodb(fs, newblk); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); } else { if (ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, newblk); *bpp = bp; return (0); } /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) error = bread(vp, lbn, osize, NOCRED, &bp); else error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, newblk); } *bpp = bp; return (error); }
/* * Set the quota file up for a particular file system. * Called as the result of a quotaon (Q_QUOTAON) ioctl. */ static int opendq( struct ufsvfs *ufsvfsp, struct vnode *vp, /* quota file */ struct cred *cr) { struct inode *qip; struct dquot *dqp; int error; int quotaon = 0; if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0) return (EPERM); VN_HOLD(vp); /* * Check to be sure its a regular file. */ if (vp->v_type != VREG) { VN_RELE(vp); return (EACCES); } rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER); /* * We have vfs_dqrwlock as writer, so if quotas are disabled, * then vfs_qinod should be NULL or we have a race somewhere. */ ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) || (ufsvfsp->vfs_qinod == 0)); if ((ufsvfsp->vfs_qflags & MQ_ENABLED) != 0) { /* * Quotas are already enabled on this file system. * * If the "quotas" file was replaced (different inode) * while quotas were enabled we don't want to re-enable * them with a new "quotas" file. Simply print a warning * message to the console, release the new vnode, and * return. * XXX - The right way to fix this is to return EBUSY * for the ioctl() issued by 'quotaon'. */ if (VTOI(vp) != ufsvfsp->vfs_qinod) { cmn_err(CE_WARN, "Previous quota file still in use." " Disable quotas on %s before enabling.\n", VTOI(vp)->i_fs->fs_fsmnt); VN_RELE(vp); rw_exit(&ufsvfsp->vfs_dqrwlock); return (0); } (void) quotasync(ufsvfsp, /* do_lock */ 0); /* remove extra hold on quota file */ VN_RELE(vp); quotaon++; qip = ufsvfsp->vfs_qinod; } else { int qlen; ufsvfsp->vfs_qinod = VTOI(vp); qip = ufsvfsp->vfs_qinod; /* * Force the file to have no partially allocated blocks * to prevent a realloc from changing the location of * the data. We must do this even if not logging in * case we later remount to logging. */ qlen = qip->i_fs->fs_bsize * NDADDR; /* * Largefiles: i_size needs to be atomically accessed now. */ rw_enter(&qip->i_contents, RW_WRITER); if (qip->i_size < qlen) { if (ufs_itrunc(qip, (u_offset_t)qlen, (int)0, cr) != 0) cmn_err(CE_WARN, "opendq failed to remove frags" " from quota file\n"); rw_exit(&qip->i_contents); (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)qip->i_size, B_INVAL, kcred, NULL); } else { rw_exit(&qip->i_contents); } TRANS_MATA_IGET(ufsvfsp, qip); } /* * The file system time limits are in the dquot for uid 0. * The time limits set the relative time the other users * can be over quota for this file system. * If it is zero a default is used (see quota.h). */ error = getdiskquota((uid_t)0, ufsvfsp, 1, &dqp); if (error == 0) { mutex_enter(&dqp->dq_lock); ufsvfsp->vfs_btimelimit = (dqp->dq_btimelimit? dqp->dq_btimelimit: DQ_BTIMELIMIT); ufsvfsp->vfs_ftimelimit = (dqp->dq_ftimelimit? dqp->dq_ftimelimit: DQ_FTIMELIMIT); ufsvfsp->vfs_qflags = MQ_ENABLED; /* enable quotas */ vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_QUOTA, NULL, 0); dqput(dqp); mutex_exit(&dqp->dq_lock); } else if (!quotaon) { /* * Some sort of I/O error on the quota file, and quotas were * not already on when we got here so clean up. */ ufsvfsp->vfs_qflags = 0; ufsvfsp->vfs_qinod = NULL; VN_RELE(ITOV(qip)); } /* * If quotas are enabled update all valid inodes in the * cache with quota information. */ if (ufsvfsp->vfs_qflags & MQ_ENABLED) { (void) ufs_scan_inodes(0, opendq_scan_inode, ufsvfsp, ufsvfsp); } rw_exit(&ufsvfsp->vfs_dqrwlock); return (error); }
/* * Set various fields of the dqblk according to the command. * Q_SETQUOTA - assign an entire dqblk structure. * Q_SETQLIM - assign a dqblk structure except for the usage. */ static int setquota(int cmd, uid_t uid, struct ufsvfs *ufsvfsp, caddr_t addr, struct cred *cr) { struct dquot *dqp; struct inode *qip; struct dquot *xdqp; struct dqblk newlim; int error; int scan_type = SQD_TYPE_NONE; daddr_t bn; int contig; if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0) return (EPERM); rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER); /* * Quotas are not enabled on this file system so there is * nothing more to do. */ if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { rw_exit(&ufsvfsp->vfs_dqrwlock); return (ESRCH); } /* * At this point, the quota subsystem is quiescent on this file * system so we can do all the work necessary to modify the quota * information for this user. */ if (copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)) != 0) { rw_exit(&ufsvfsp->vfs_dqrwlock); return (EFAULT); } error = getdiskquota(uid, ufsvfsp, 0, &xdqp); if (error) { rw_exit(&ufsvfsp->vfs_dqrwlock); return (error); } dqp = xdqp; /* * Don't change disk usage on Q_SETQLIM */ mutex_enter(&dqp->dq_lock); if (cmd == Q_SETQLIM) { newlim.dqb_curblocks = dqp->dq_curblocks; newlim.dqb_curfiles = dqp->dq_curfiles; } if (uid == 0) { /* * Timelimits for uid 0 set the relative time * the other users can be over quota for this file system. * If it is zero a default is used (see quota.h). */ ufsvfsp->vfs_btimelimit = newlim.dqb_btimelimit? newlim.dqb_btimelimit: DQ_BTIMELIMIT; ufsvfsp->vfs_ftimelimit = newlim.dqb_ftimelimit? newlim.dqb_ftimelimit: DQ_FTIMELIMIT; } else { if (newlim.dqb_bsoftlimit && newlim.dqb_curblocks >= newlim.dqb_bsoftlimit) { if (dqp->dq_bsoftlimit == 0 || dqp->dq_curblocks < dqp->dq_bsoftlimit) { /* If we're suddenly over the limit(s), */ /* start the timer(s) */ newlim.dqb_btimelimit = (uint32_t)gethrestime_sec() + ufsvfsp->vfs_btimelimit; dqp->dq_flags &= ~DQ_BLKS; } else { /* If we're currently over the soft */ /* limit and were previously over the */ /* soft limit then preserve the old */ /* time limit but make sure the DQ_BLKS */ /* flag is set since we must have been */ /* previously warned. */ newlim.dqb_btimelimit = dqp->dq_btimelimit; dqp->dq_flags |= DQ_BLKS; } } else { /* Either no quota or under quota, clear time limit */ newlim.dqb_btimelimit = 0; dqp->dq_flags &= ~DQ_BLKS; } if (newlim.dqb_fsoftlimit && newlim.dqb_curfiles >= newlim.dqb_fsoftlimit) { if (dqp->dq_fsoftlimit == 0 || dqp->dq_curfiles < dqp->dq_fsoftlimit) { /* If we're suddenly over the limit(s), */ /* start the timer(s) */ newlim.dqb_ftimelimit = (uint32_t)gethrestime_sec() + ufsvfsp->vfs_ftimelimit; dqp->dq_flags &= ~DQ_FILES; } else { /* If we're currently over the soft */ /* limit and were previously over the */ /* soft limit then preserve the old */ /* time limit but make sure the */ /* DQ_FILES flag is set since we must */ /* have been previously warned. */ newlim.dqb_ftimelimit = dqp->dq_ftimelimit; dqp->dq_flags |= DQ_FILES; } } else { /* Either no quota or under quota, clear time limit */ newlim.dqb_ftimelimit = 0; dqp->dq_flags &= ~DQ_FILES; } } /* * If there was previously no limit and there is now at least * one limit, then any inodes in the cache have NULL d_iquot * fields (getinoquota() returns NULL when there are no limits). */ if ((dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 && dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) && (newlim.dqb_fhardlimit || newlim.dqb_fsoftlimit || newlim.dqb_bhardlimit || newlim.dqb_bsoftlimit)) { scan_type = SQD_TYPE_LIMIT; } /* * If there was previously at least one limit and there is now * no limit, then any inodes in the cache have non-NULL d_iquot * fields need to be reset to NULL. */ else if ((dqp->dq_fhardlimit || dqp->dq_fsoftlimit || dqp->dq_bhardlimit || dqp->dq_bsoftlimit) && (newlim.dqb_fhardlimit == 0 && newlim.dqb_fsoftlimit == 0 && newlim.dqb_bhardlimit == 0 && newlim.dqb_bsoftlimit == 0)) { scan_type = SQD_TYPE_NO_LIMIT; } dqp->dq_dqb = newlim; dqp->dq_flags |= DQ_MOD; /* * push the new quota to disk now. If this is a trans device * then force the page out with ufs_putpage so it will be deltaed * by ufs_startio. */ qip = ufsvfsp->vfs_qinod; rw_enter(&qip->i_contents, RW_WRITER); (void) ufs_rdwri(UIO_WRITE, FWRITE | FSYNC, qip, (caddr_t)&dqp->dq_dqb, sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE, (int *)NULL, kcred); rw_exit(&qip->i_contents); (void) VOP_PUTPAGE(ITOV(qip), dqoff(dqp->dq_uid) & ~qip->i_fs->fs_bmask, qip->i_fs->fs_bsize, B_INVAL, kcred, NULL); /* * We must set the dq_mof even if not we are not logging in case * we are later remount to logging. */ contig = 0; rw_enter(&qip->i_contents, RW_WRITER); error = bmap_read(qip, dqoff(dqp->dq_uid), &bn, &contig); rw_exit(&qip->i_contents); if (error || (bn == UFS_HOLE)) { dqp->dq_mof = UFS_HOLE; } else { dqp->dq_mof = ldbtob(bn) + (offset_t)((dqoff(dqp->dq_uid)) & (DEV_BSIZE - 1)); } dqp->dq_flags &= ~DQ_MOD; dqput(dqp); mutex_exit(&dqp->dq_lock); if (scan_type) { struct setquota_data sqd; sqd.sqd_type = scan_type; sqd.sqd_ufsvfsp = ufsvfsp; sqd.sqd_uid = uid; (void) ufs_scan_inodes(0, setquota_scan_inode, &sqd, ufsvfsp); } rw_exit(&ufsvfsp->vfs_dqrwlock); return (0); }
/* * NFS access to vnode file systems. * * We provide dentry/inode_to_fh() and fh_to_dentry() methods so that the * vnode-based file system can hook up its VOP_FID() and VFS_VGET() * methods. The Linux NFS server calls these methods when encoding an * object into a file handle to be passed to the client for future * use, and when decoding a file handle and looking for the file * system object it describes. * * VOP_FID() takes a vnode and provides a file ID (fid) that can later * be presented (in a pair with a VFS pointer) to VFS_VGET() to * reconstitute that vnode. In a Sun ONC-NFS style kernel, VOP_FID() * is used twice per file handle, once for the exported directory and * once for the object itself. In Linux, the NFS layer itself handles * the export tree checking (depending on the status of * NFSEXP_NOSUBTREECHECK), so the file system only needs to fill in * the file handle with details for the object itself. We always * provide both object and parent in the file handle to be sure that * we don't end up short on file handle space in a future call that * requires both. * * On a call from the NFS client, the Linux NFS layer finds a * superblock pointer from the file handle passed by the NFS client, * then calls the fh_to_dentry() method to get a dentry. Sun ONC-NFS * kernels call VFS_VGET() on a vfsp, passing the FID portion of the * file handle. In this layer, we unpack the file handle, determine * whether the parent or the object is needed, and pass the info along * to a VFS_VGET() call. Once that returns, we look for an attached * dentry and use it, or fabricate a new one which NFS will attempt to * reconnect to the namespace. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) int vnlayer_inode_to_fh( struct inode *inode, __u32 *fh, int *lenp, struct inode *parent ) #else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) */ int vnlayer_dentry_to_fh( struct dentry *dent, __u32 *fh, int *lenp, int need_parent ) #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) */ { int error; int type; int mylen; MDKI_FID_T *lfidp = NULL; MDKI_FID_T *parent_fidp = NULL; mdki_boolean_t bailout_needed = TRUE; /* Assume we'll fail. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) SUPER_T *sbp; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) struct inode *inode = dent->d_inode; struct inode *parent = dent->d_parent->d_inode; #endif /* * We use the type byte (return value) to encode the FH length. Since we * always include two FIDs of the same size, the type must be even, so * that's how we "encode" the length of each FID (i.e. it is half the total * length). * * Always include parent entry; this makes sure that we only work with NFS * protocols that have enough room for our file handles. (Without this, we * may return a directory file handle OK yet be unable to return a plain * file handle.) Currently, we can just barely squeeze two standard * 10-byte vnode FIDs into the NFS v2 file handle. The NFS v3 handle has * plenty of room. */ ASSERT(ITOV(inode)); error = VOP_FID(ITOV(inode), &lfidp); if (error != 0) { ASSERT(lfidp == NULL); goto bailout; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) /* we may be called with a NULL parent */ if (parent == NULL) { /* in this case, fabricate a fake parent */ parent_fidp = (MDKI_FID_T *) KMEM_ALLOC(MDKI_FID_LEN(lfidp), KM_SLEEP); if (parent_fidp == NULL) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s: can't allocate %d bytes\n", __func__, (int) MDKI_FID_LEN(lfidp)); goto bailout; } memset(parent_fidp, 0xff, MDKI_FID_LEN(lfidp)); parent_fidp->fid_len = lfidp->fid_len; } else #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) */ { error = VOP_FID(ITOV(parent), &parent_fidp); if (error != 0) { ASSERT(parent_fidp == NULL); goto bailout; } } /* * Our encoding scheme can't tolerate different length FIDs * (because otherwise the type wouldn't be guaranteed to be even). */ if (parent_fidp->fid_len != lfidp->fid_len) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s: unbalanced parent/child fid lengths: %d, %d\n", __func__, parent_fidp->fid_len, lfidp->fid_len); goto bailout; } /* * vnode layer needs to release the storage for a fid on * Linux. The VOP_FID() function allocates its own fid in * non-error cases. Other UNIX systems release this storage * in the caller of VOP_FID, so we have to do it here. We * copy the vnode-style fid into the caller-allocated space, * then free our allocated version here. * * Remember: vnode lengths are counting bytes, Linux lengths count __u32 * units. */ type = parent_fidp->fid_len + lfidp->fid_len; /* Guaranteed even. */ mylen = roundup(type + MDKI_FID_EXTRA_SIZE, sizeof(*fh)); if (mylen == VNODE_NFS_FH_TYPE_RESERVED || mylen >= VNODE_NFS_FH_TYPE_ERROR) { MDKI_VFS_LOG(VFS_LOG_ESTALE, "%s: required length %d out of range (%d,%d)\n", __func__, mylen, VNODE_NFS_FH_TYPE_RESERVED, VNODE_NFS_FH_TYPE_ERROR); goto bailout; } if (((*lenp) * sizeof(*fh)) < mylen) { MDKI_VFS_LOG(VFS_LOG_ESTALE, "%s: need %d bytes for FH, have %d\n", __func__, mylen, (int) (sizeof(*fh) * (*lenp))); goto bailout; } /* Copy FIDs into file handle. */ *lenp = mylen / sizeof(*fh); /* No remainder because of roundup above. */ BZERO(fh, mylen); /* Zero whole fh to round up to __u32 boundary */ BCOPY(lfidp->fid_data, fh, lfidp->fid_len); BCOPY(parent_fidp->fid_data, ((caddr_t)fh) + (type / 2), parent_fidp->fid_len); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) /* * For 64 bits OS, use a 32 bits hash of the SB pointer. * For 32 bits OS, use the pointer itself. */ if (ITOV(inode) == NULL || ITOV(inode)->v_vfsmnt == NULL) { MDKI_VFS_LOG(VFS_LOG_ESTALE, "%s: %p is this a MVFS inode?\n", __func__, inode); goto bailout; } else { sbp = ((struct vfsmount *)ITOV(inode)->v_vfsmnt)->mnt_sb; } MDKI_FID_SET_SB_HASH(fh, type / 2, MDKI_FID_CALC_HASH(sbp)); #endif bailout_needed = FALSE; /* We're home free now. */ if (bailout_needed) { bailout: type = VNODE_NFS_FH_TYPE_ERROR; *lenp = 0; } #ifdef KMEMDEBUG if (lfidp != NULL) REAL_KMEM_FREE(lfidp, MDKI_FID_LEN(lfidp)); if (parent_fidp != NULL) REAL_KMEM_FREE(parent_fidp, MDKI_FID_LEN(parent_fidp)); #else if (lfidp != NULL) KMEM_FREE(lfidp, MDKI_FID_LEN(lfidp)); if (parent_fidp != NULL) KMEM_FREE(parent_fidp, MDKI_FID_LEN(parent_fidp)); #endif return type; }
extern void mvfs_linux_umount_begin( struct vfsmount * mnt, int flags ) #endif { VNODE_T *vp; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \ LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) struct vfsmount *mnt; #else /* * Since 2.6.18 and before 2.6.27 we have mnt as a parameter. * But we still need super_p. */ SUPER_T *super_p = mnt->mnt_sb; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) int mount_count = 0; #endif ASSERT(super_p != NULL); ASSERT(super_p->s_root != NULL); vp = ITOV(super_p->s_root->d_inode); ASSERT(vp != NULL); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \ LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) mnt = VTOVFSMNT(vp); #else /* Check that the mountpoint passed in matches the one * from the vp that we are going to clear. Skip it otherwise. * We know from experience that this can happen when unmounting * loopback (bind) mounts. */ if (mnt != VTOVFSMNT(vp)) return; #endif /* Note that there is no mechanism for restoring the mount pointer * in the vnode if an error happens later on in the umount. This is * the only callback into the mvfs during umount. So far this has not * been a problem and if we don't do this here, the umount will never * succeed because the Linux code expects the mnt_count to be 2. * The count is 3 at this point from the initial allocation of the * vfsmnt structure, the path_lookup call in this umount call and * from when we placed the pointer in the vp. */ if (mnt == NULL) { MDKI_VFS_LOG(VFS_LOG_ERR, "%s: mnt is NULL\n", __FUNCTION__); return; } #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) mount_count = MDKI_READ_MNT_COUNT(mnt); if (mount_count == 3) { MDKI_MNTPUT(mnt); SET_VTOVFSMNT(vp, NULL); } #else /* * may_umount returns !0 when the ref counter is 2 (and other conditions). * We took an extra ref, I'll drop it to test may_umount. If it is not * ready to be unmounted, the put is reverted. */ MDKI_MNTPUT(mnt); if (may_umount(mnt)) { SET_VTOVFSMNT(vp, NULL); } else { /* not ready yet */ MDKI_MNTGET(mnt); } #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) */ }
/* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. * The target is always vput before returning. */ int ext2fs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) { struct vnode *vp; int error, rootino, namlen; struct ext2fs_dirtemplate dirbuf; u_int32_t ino; vp = ITOV(target); if (target->i_number == source->i_number) { error = EEXIST; goto out; } rootino = ROOTINO; error = 0; if (target->i_number == rootino) goto out; for (;;) { if (vp->v_type != VDIR) { error = ENOTDIR; break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct ext2fs_dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, curproc); if (error != 0) break; namlen = dirbuf.dotdot_namlen; if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { error = ENOTDIR; break; } ino = fs2h32(dirbuf.dotdot_ino); if (ino == source->i_number) { error = EINVAL; break; } if (ino == rootino) break; vput(vp); error = VFS_VGET(vp->v_mount, ino, &vp); if (error != 0) { vp = NULL; break; } } out: if (error == ENOTDIR) { printf("checkpath: .. not a directory\n"); panic("checkpath"); } if (vp != NULL) vput(vp); return (error); }
/* This is really VOP_SETATTR() in sheep's clothing */ int vnode_iop_notify_change( DENT_T *dent_p, struct iattr * iattr_p ) { VNODE_T *vp; VATTR_T *vap; VNODE_T *cvp; int err = 0; DENT_T *rdent; CALL_DATA_T cd; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) mdki_boolean_t tooksem = FALSE; #endif if (iattr_p->ia_valid & ATTR_SIZE) { ASSERT_I_SEM_MINE(dent_p->d_inode); } if (MDKI_INOISMVFS(dent_p->d_inode)) { vap = VATTR_ALLOC(); if (vap != NULL) { vnode_iop_iattr2vattr(iattr_p, vap); /* reject attempts to use setattr to change object type */ vap->va_mask &= ~AT_TYPE; mdki_linux_init_call_data(&cd); vp = ITOV(dent_p->d_inode); err = VOP_SETATTR(vp, vap, 0, &cd); err = mdki_errno_unix_to_linux(err); /* Any underlying cleartxt got its inode truncated via changeattr * if there's a need to change its size. */ if (!err) mdki_linux_vattr_pullup(vp, vap, vap->va_mask); VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); } else { err = -ENOMEM; } } else { rdent = REALDENTRY_LOCKED(dent_p, &cvp); VNODE_DGET(rdent); if (rdent && rdent->d_inode) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) err = inode_setattr(dent_p->d_inode, iattr_p); if (err == 0) { if (iattr_p->ia_valid & ATTR_SIZE) { LOCK_INODE(rdent->d_inode); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) #if !defined RHEL_UPDATE || RHEL_UPDATE < 5 down_write(&rdent->d_inode->i_alloc_sem); #endif #endif /* * be paranoid and record the 'taken'ness in case * the called function squashes ia_valid (as is * done in nfs_setattr). */ tooksem = TRUE; } err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p); if (tooksem) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) #if !defined(RHEL_UPDATE) || RHEL_UPDATE < 5 up_write(&rdent->d_inode->i_alloc_sem); #endif #endif UNLOCK_INODE(rdent->d_inode); } } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */ err = simple_setattr(dent_p, iattr_p); if (err == 0) err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p); #endif /* else LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */ } else { /* It looks as though someone removed the realdentry on us. * I am not sure why this should happen. */ err = -ENOENT; } if (rdent) { VNODE_DPUT(rdent); REALDENTRY_UNLOCK(dent_p, cvp); } } return err; }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, daddr_t *countp, daddr_t *rcountp, long *lastsegp, size_t *bcp) { int i; struct buf *bp; struct lfs *fs = ip->i_lfs; int32_t *bap; /* XXX ondisk32 */ struct vnode *vp; daddr_t nb, nlbn, last; int32_t *copy = NULL; /* XXX ondisk32 */ daddr_t blkcount, rblkcount, factor; int nblocks; daddr_t blocksreleased = 0, real_released = 0; int error = 0, allerror = 0; ASSERT_SEGLOCK(fs); /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= LFS_NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0); if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn); } else { trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn); curlwp->l_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("lfs_indirtrunc: bad buffer size"); bp->b_blkno = LFS_FSBTODB(fs, dbn); VOP_STRATEGY(vp, bp); error = biowait(bp); } if (error) { brelse(bp, 0); *countp = *rcountp = 0; return (error); } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ if (lastbn >= 0) { copy = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK); memcpy((void *)copy, (void *)bap, lfs_sb_getbsize(fs)); memset((void *)&bap[last + 1], 0, /* XXX ondisk32 */ (u_int)(LFS_NINDIR(fs) - (last + 1)) * sizeof (int32_t)); error = VOP_BWRITE(bp->b_vp, bp); if (error) allerror = error; bap = copy; } /* * Recursively free totally unused blocks. */ for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { error = lfs_indirtrunc(ip, nlbn, nb, (daddr_t)-1, level - 1, &blkcount, &rblkcount, lastsegp, bcp); if (error) allerror = error; blocksreleased += blkcount; real_released += rblkcount; } lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp); if (bap[i] > 0) real_released += nblocks; blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = lfs_indirtrunc(ip, nlbn, nb, last, level - 1, &blkcount, &rblkcount, lastsegp, bcp); if (error) allerror = error; real_released += rblkcount; blocksreleased += blkcount; } } if (copy != NULL) { lfs_free(fs, copy, LFS_NB_IBLOCK); } else { mutex_enter(&bufcache_lock); if (bp->b_oflags & BO_DELWRI) { LFS_UNLOCK_BUF(bp); lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); wakeup(&fs->lfs_availsleep); } brelsel(bp, BC_INVAL); mutex_exit(&bufcache_lock); } *countp = blocksreleased; *rcountp = real_released; return (allerror); }
extern int vnode_iop_create( INODE_T * parent, struct dentry * dentry, int mode, struct nameidata *nd ) { int err = 0; VATTR_T *vap; VNODE_T *newvp; struct create_ctx ctx; CALL_DATA_T cd; ASSERT_I_SEM_MINE(parent); ASSERT(MDKI_INOISMVFS(parent)); vap = VATTR_ALLOC(); if (vap == NULL) return -ENOMEM; VATTR_NULL(vap); mdki_linux_init_call_data(&cd); /* * Solaris sends only type, mode, size, so we will too. */ vap->va_type = VREG; vap->va_mode = mode & ~S_IFMT; vap->va_size = 0; vap->va_mask = AT_TYPE|AT_MODE|AT_SIZE; newvp = NULL; dentry->d_inode = NULL; ctx.dentry = dentry; ctx.parent = parent; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) /* break any rcu-walk in progress */ # if defined(MRG) write_seqlock_barrier(&dentry->d_lock); # else /* defined (MRG) */ write_seqcount_barrier(&dentry->d_seq); # endif /* else defined (MRG) */ #endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) */ err = VOP_CREATE(ITOV(parent), (/* drop const */ char *) dentry->d_name.name, vap, NONEXCL, /* XXX handled by generic layer? */ mode, /* not used except for passthrough, see vap->va_mode */ &newvp, &cd, &ctx); err = mdki_errno_unix_to_linux(err); /* dentry reference uses the hold count from a successful create */ if (!err) { if (dentry->d_inode == NULL) { /* Not a shadow object */ ASSERT(newvp != NULL); ASSERT(VTOI(newvp) != NULL); VNODE_D_INSTANTIATE(dentry, VTOI(newvp)); VATTR_SET_MASK(vap, AT_ALL); if (VOP_GETATTR(newvp, vap, 0, &cd) == 0) mdki_linux_vattr_pullup(newvp, vap, AT_ALL); } else { /* drop the extra ref returned in newvp */ VN_RELE(newvp); } /* I nuked the code checking not VCHR, VREG--we are always VREG */ } else { ASSERT(!dentry->d_inode); ASSERT(!newvp); } VATTR_FREE(vap); mdki_linux_destroy_call_data(&cd); return(err); }
/* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. * The target is always vput before returning. */ int ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) { struct vnode *vp; int error, rootino, namlen; struct dirtemplate dirbuf; vp = ITOV(target); if (target->i_number == source->i_number) { error = EEXIST; goto out; } rootino = ROOTINO; error = 0; if (target->i_number == rootino) goto out; for (;;) { if (vp->v_type != VDIR) { error = ENOTDIR; break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL, (struct proc *)0); if (error != 0) break; # if (BYTE_ORDER == LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen > 0) namlen = dirbuf.dotdot_namlen; else namlen = dirbuf.dotdot_type; # else namlen = dirbuf.dotdot_namlen; # endif if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { error = ENOTDIR; break; } if (dirbuf.dotdot_ino == source->i_number) { error = EINVAL; break; } if (dirbuf.dotdot_ino == rootino) break; vput(vp); error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); if (error) { vp = NULL; break; } } out: if (error == ENOTDIR) printf("checkpath: .. not a directory\n"); if (vp != NULL) vput(vp); return (error); }
extern int vnode_iop_link( DENT_T * olddent, INODE_T * parent, DENT_T * newdent ) { int err = 0; struct link_ctx ctx; VATTR_T *vap; VNODE_T *parentvp; ASSERT_I_SEM_MINE(olddent->d_inode); ASSERT_I_SEM_MINE(parent); ASSERT(MDKI_INOISMVFS(parent)); if (!vnlayer_link_eligible(olddent)) return -EXDEV; /* VOP_REALVP will check that the parent is a loopback directory and * return EINVAL if it isn't. */ if (VOP_REALVP(ITOV(parent), &parentvp) == 0) { /* We are creating a shadow link so bypass the mvfs for the rest */ err = vnlayer_do_linux_link(parentvp, olddent, parent, newdent); err = mdki_errno_unix_to_linux(err); } else { /* This needs to be passed on to the mvfs to deal with */ CALL_DATA_T cd; INODE_T *inode; if (!MDKI_INOISOURS(olddent->d_inode)) return -EXDEV; ctx.parent = parent; ctx.newdent = newdent; ctx.olddent = olddent; ctx.done = FALSE; mdki_linux_init_call_data(&cd); if (MDKI_INOISMVFS(olddent->d_inode)) { err = VOP_LINK(ITOV(parent), ITOV(olddent->d_inode), (char *)newdent->d_name.name, &cd, &ctx); err = mdki_errno_unix_to_linux(err); if (err == 0 && !ctx.done) { /* Again, a heavy handed way of bumping the inode count and * handling the locking (This will use the inode lock) */ inode = igrab(olddent->d_inode); VNODE_D_INSTANTIATE(newdent, inode); if ((vap = VATTR_ALLOC()) != NULL) { VATTR_SET_MASK(vap, AT_ALL); if (VOP_GETATTR(ITOV(inode), vap, 0, &cd) == 0) mdki_linux_vattr_pullup(ITOV(inode), vap, AT_ALL); VATTR_FREE(vap); } } } else { err = -EXDEV; } mdki_linux_destroy_call_data(&cd); } return err; }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2fs_balloc(struct inode *ip, daddr_t bn, int size, kauth_cred_t cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2FS_NIADDR + 2]; daddr_t newb, lbn, pref; int32_t *bap; /* XXX ondisk32 */ int num, i, error; u_int deallocated; daddr_t *blkp, *allocblk, allociblk[EXT2FS_NIADDR + 1]; int32_t *allocib; /* XXX ondisk32 */ int unwindidx = -1; UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); if (bpp != NULL) { *bpp = NULL; } if (bn < 0) return (EFBIG); fs = ip->i_e2fs; lbn = bn; /* * The first EXT2FS_NDADDR blocks are direct blocks */ if (bn < EXT2FS_NDADDR) { /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[bn]); if (nb != 0) { /* * the block is already allocated, just read it. */ if (bpp != NULL) { error = bread(vp, bn, fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (error) { return (error); } *bpp = bp; } return (0); } /* * allocate a new direct block. */ error = ext2fs_alloc(ip, bn, ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), cred, &newb); if (error) return (error); ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ ip->i_e2fs_blocks[bn] = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]); allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) return (error); nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; unwindidx = 0; allocib = &ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]; /* XXX ondisk32 */ *allocib = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { goto fail; } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ nb = fs2h32(bap[indirs[i].in_off]); if (i == num) break; i++; if (nb != 0) { brelse(bp, 0); continue; } pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp, 0); goto fail; } if (unwindidx < 0) unwindidx = i - 1; /* XXX ondisk32 */ bap[indirs[i - 1].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ bap[indirs[num].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } if (bpp != NULL) { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } return (0); } brelse(bp, 0); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &nbp); if (error) { goto fail; } } else { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ext2fs_blkfree(ip, *blkp); deallocated += fs->e2fs_bsize; } if (unwindidx >= 0) { if (unwindidx == 0) { *allocib = 0; } else { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (r) { panic("Could not unwind indirect block, error %d", r); } else { bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, 0, 0); brelse(bp, BC_INVAL); } } if (deallocated) { ext2fs_setnblock(ip, ext2fs_nblock(ip) - btodb(deallocated)); ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE; } return error; }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ int ffs_indirtrunc(struct inode *ip, daddr64_t lbn, daddr64_t dbn, daddr64_t lastbn, int level, long *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; struct vnode *vp; void *copy = NULL; daddr64_t nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; int32_t *bap1 = NULL; #ifdef FFS2 int64_t *bap2 = NULL; #endif /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); if (!(bp->b_flags & (B_DONE | B_DELWRI))) { curproc->p_stats->p_ru.ru_inblock++; /* pay for read */ bcstats.pendingreads++; bcstats.numreads++; bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; VOP_STRATEGY(bp); error = biowait(bp); } if (error) { brelse(bp); *countp = 0; return (error); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) bap2 = (int64_t *)bp->b_data; else #endif bap1 = (int32_t *)bp->b_data; if (lastbn != -1) { copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy(bp->b_data, copy, (u_int) fs->fs_bsize); for (i = last + 1; i < NINDIR(fs); i++) BAP_ASSIGN(ip, i, 0); if (!DOINGASYNC(vp)) { error = bwrite(bp); if (error) allerror = error; } else { bawrite(bp); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) bap2 = (int64_t *)copy; else #endif bap1 = (int32_t *)copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = BAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (daddr64_t)-1, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = BAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { free(copy, M_TEMP); } else { bp->b_flags |= B_INVAL; brelse(bp); } *countp = blocksreleased; return (allerror); }
/* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ int ext2fs_rename(void *v) { struct vop_rename_args *ap = v; struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct inode *ip, *xp, *dp; struct proc *p = fcnp->cn_proc; struct ext2fs_dirtemplate dirbuf; /* struct timespec ts; */ int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ext2fs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ vrele(fdvp); vrele(fvp); return (error); } /* * Check if just deleting a link name. */ if (tvp && ((VTOI(tvp)->i_e2fs_flags & (EXT2_IMMUTABLE | EXT2_APPEND)) || (VTOI(tdvp)->i_e2fs_flags & EXT2_APPEND))) { error = EPERM; goto abortit; } if (fvp == tvp) { if (fvp->v_type == VDIR) { error = EINVAL; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* Delete source. */ vrele(fdvp); vrele(fvp); fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ext2fs_rename: lost from startdir"); fcnp->cn_nameiop = DELETE; (void) vfs_relookup(fdvp, &fvp, fcnp); return (VOP_REMOVE(fdvp, fvp, fcnp)); } if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((nlink_t)ip->i_e2fs_nlink >= LINK_MAX) { VOP_UNLOCK(fvp, 0); error = EMLINK; goto abortit; } if ((ip->i_e2fs_flags & (EXT2_IMMUTABLE | EXT2_APPEND)) || (dp->i_e2fs_flags & EXT2_APPEND)) { VOP_UNLOCK(fvp, 0); error = EPERM; goto abortit; } if ((ip->i_e2fs_mode & IFMT) == IFDIR) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); if (!error && tvp) error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred); if (error) { VOP_UNLOCK(fvp, 0); error = EACCES; goto abortit; } /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags&ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory++; } vrele(fdvp); /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ ip->i_e2fs_nlink++; ip->i_flag |= IN_CHANGE; if ((error = ext2fs_update(ip, NULL, NULL, 1)) != 0) { VOP_UNLOCK(fvp, 0); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); VOP_UNLOCK(fvp, 0); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); error = ext2fs_checkpath(ip, dp, tcnp->cn_cred); if (error != 0) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ext2fs_rename: lost to startdir"); if ((error = vfs_relookup(tdvp, &tvp, tcnp)) != 0) goto out; dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_dev != ip->i_dev) panic("rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t)dp->i_e2fs_nlink >= LINK_MAX) { error = EMLINK; goto bad; } dp->i_e2fs_nlink++; dp->i_flag |= IN_CHANGE; if ((error = ext2fs_update(dp, NULL, NULL, 1)) != 0) goto bad; } error = ext2fs_direnter(ip, tdvp, tcnp); if (error != 0) { if (doingdirectory && newparent) { dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; (void)ext2fs_update(dp, NULL, NULL, 1); } goto bad; } vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((dp->i_e2fs_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != dp->i_e2fs_uid && xp->i_e2fs_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_e2fs_mode & IFMT) == IFDIR) { if (!ext2fs_dirempty(xp, dp->i_number, tcnp->cn_cred) || xp->i_e2fs_nlink > 2) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = ext2fs_dirrewrite(dp, ip, tcnp); if (error != 0) goto bad; /* * If the target directory is in the same * directory as the source directory, * decrement the link count on the parent * of the target directory. */ if (doingdirectory && !newparent) { dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; } vput(tdvp); /* * Adjust the link count of the target to * reflect the dirrewrite above. If this is * a directory it is empty and there are * no links to it, so we can squash the inode and * any space associated with it. We disallowed * renaming over top of a directory with links to * it above, as the remaining link would point to * a directory without "." or ".." entries. */ xp->i_e2fs_nlink--; if (doingdirectory) { if (--xp->i_e2fs_nlink != 0) panic("rename: linked directory"); error = ext2fs_truncate(xp, (off_t)0, IO_SYNC, tcnp->cn_cred); } xp->i_flag |= IN_CHANGE; vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ext2fs_rename: lost from startdir"); (void) vfs_relookup(fdvp, &fvp, fcnp); if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("ext2fs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. * The IRENAME flag ensures that it cannot be moved by another * rename. */ if (xp != ip) { if (doingdirectory) panic("ext2fs_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, sizeof (struct ext2fs_dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, tcnp->cn_cred, NULL, curproc); if (error == 0) { namlen = dirbuf.dotdot_namlen; if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { ufs_dirbad(xp, (doff_t)12, "ext2fs_rename: mangled dir"); } else { dirbuf.dotdot_ino = h2fs32(newparent); (void) vn_rdwr(UIO_WRITE, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_SYNC, tcnp->cn_cred, NULL, curproc); cache_purge(fdvp); } } } error = ext2fs_dirremove(fdvp, fcnp); if (!error) { xp->i_e2fs_nlink--; xp->i_flag |= IN_CHANGE; } xp->i_flag &= ~IN_RENAME; } if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { ip->i_e2fs_nlink--; ip->i_flag |= IN_CHANGE; vput(fvp); } else vrele(fvp); return (error); }
void * osi_UfsOpen(afs_dcache_id_t *ainode) { #ifdef AFS_CACHE_VNODE_PATH struct vnode *vp; #else struct inode *ip; #endif struct osi_file *afile = NULL; afs_int32 code = 0; int dummy; #ifdef AFS_CACHE_VNODE_PATH char namebuf[1024]; struct pathname lookpn; #endif struct osi_stat tstat; afile = osi_AllocSmallSpace(sizeof(struct osi_file)); AFS_GUNLOCK(); /* * AFS_CACHE_VNODE_PATH can be used with any file system, including ZFS or tmpfs. * The ainode is not an inode number but a path. */ #ifdef AFS_CACHE_VNODE_PATH /* Can not use vn_open or lookupname, they use user's CRED() * We need to run as root So must use low level lookuppnvp * assume fname starts with / */ code = pn_get_buf(ainode->ufs, AFS_UIOSYS, &lookpn, namebuf, sizeof(namebuf)); if (code != 0) osi_Panic("UfsOpen: pn_get_buf failed %ld %s", code, ainode->ufs); VN_HOLD(rootdir); /* released in loopuppnvp */ code = lookuppnvp(&lookpn, NULL, FOLLOW, NULL, &vp, rootdir, rootdir, afs_osi_credp); if (code != 0) osi_Panic("UfsOpen: lookuppnvp failed %ld %s", code, ainode->ufs); #ifdef AFS_SUN511_ENV code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp, NULL); #else code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp); #endif if (code != 0) osi_Panic("UfsOpen: VOP_OPEN failed %ld %s", code, ainode->ufs); #else code = igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, ainode->ufs, &ip, CRED(), &dummy); #endif AFS_GLOCK(); if (code) { osi_FreeSmallSpace(afile); osi_Panic("UfsOpen: igetinode failed %ld %s", code, ainode->ufs); } #ifdef AFS_CACHE_VNODE_PATH afile->vnode = vp; code = afs_osi_Stat(afile, &tstat); afile->size = tstat.size; #else afile->vnode = ITOV(ip); afile->size = VTOI(afile->vnode)->i_size; #endif afile->offset = 0; afile->proc = (int (*)())0; return (void *)afile; }
/* * Rmdir system call. */ int ext2fs_rmdir(void *v) { struct vop_rmdir_args *ap = v; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * No rmdir "." please. */ if (dp == ip) { vrele(dvp); vput(vp); return (EINVAL); } /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (ip->i_e2fs_nlink != 2 || !ext2fs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_e2fs_flags & EXT2_APPEND) || (ip->i_e2fs_flags & (EXT2_IMMUTABLE | EXT2_APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ error = ext2fs_dirremove(dvp, cnp); if (error != 0) goto out; dp->i_e2fs_nlink--; dp->i_flag |= IN_CHANGE; cache_purge(dvp); vput(dvp); dvp = NULL; /* * Truncate inode. The only stuff left * in the directory is "." and "..". The * "." reference is inconsequential since * we're quashing it. The ".." reference * has already been adjusted above. We've * removed the "." reference and the reference * in the parent directory, but there may be * other hard links so decrement by 2 and * worry about them later. */ ip->i_e2fs_nlink -= 2; error = ext2fs_truncate(ip, (off_t)0, IO_SYNC, cnp->cn_cred); cache_purge(ITOV(ip)); out: if (dvp) vput(dvp); vput(vp); return (error); }
static int ext2_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, e4fs_daddr_t *countp) { struct buf *bp; struct m_ext2fs *fs = ip->i_e2fs; struct vnode *vp; e2fs_daddr_t *bap, *copy; int i, nblocks, error = 0, allerror = 0; e2fs_lbn_t nb, nlbn, last; e4fs_daddr_t blkcount, factor, blocksreleased = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->e2fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0, 0); if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { bp->b_iocmd = BIO_READ; if (bp->b_bcount > bp->b_bufsize) panic("ext2_indirtrunc: bad buffer size"); bp->b_blkno = dbn; vfs_busy_pages(bp, 0); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); error = bufwait(bp); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (e2fs_daddr_t *)bp->b_data; copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->e2fs_bsize); bzero((caddr_t)&bap[last + 1], (NINDIR(fs) - (last + 1)) * sizeof(e2fs_daddr_t)); if (last == -1) bp->b_flags |= B_INVAL; if (DOINGASYNC(vp)) { bdwrite(bp); } else { error = bwrite(bp); if (error) allerror = error; } bap = copy; /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if ((error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (int32_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ext2_blkfree(ip, nb, fs->e2fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { if ((error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } } free(copy, M_TEMP); *countp = blocksreleased; return (allerror); }
/* * Find a cylinder to place a directory. * * The policy implemented by this algorithm is to allocate a * directory inode in the same cylinder group as its parent * directory, but also to reserve space for its files inodes * and data. Restrict the number of directories which may be * allocated one after another in the same cylinder group * without intervening allocation of files. * * If we allocate a first level directory then force allocation * in another cylinder group. * */ static u_long ext2_dirpref(struct inode *pip) { struct m_ext2fs *fs; int cg, prefcg, cgsize; u_int avgifree, avgbfree, avgndir, curdirsize; u_int minifree, minbfree, maxndir; u_int mincg, minndir; u_int dirsize, maxcontigdirs; mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); fs = pip->i_e2fs; avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount; avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; /* * Force allocation in another cg if creating a first level dir. */ ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); if (ITOV(pip)->v_vflag & VV_ROOT) { prefcg = arc4random() % fs->e2fs_gcount; mincg = prefcg; minndir = fs->e2fs_ipg; for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { mincg = cg; minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; } for (cg = 0; cg < prefcg; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { mincg = cg; minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; } return (mincg); } /* * Count various limits which used for * optimal allocation of a directory inode. */ maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); minifree = avgifree - avgifree / 4; if (minifree < 1) minifree = 1; minbfree = avgbfree - avgbfree / 4; if (minbfree < 1) minbfree = 1; cgsize = fs->e2fs_fsize * fs->e2fs_fpg; dirsize = AVGDIRSIZE; curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; if (dirsize < curdirsize) dirsize = curdirsize; maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); if (maxcontigdirs == 0) maxcontigdirs = 1; /* * Limit number of dirs in one cg and reserve space for * regular files, but only if we have no deficit in * inodes or space. */ prefcg = ino_to_cg(fs, pip->i_number); for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } for (cg = 0; cg < prefcg; cg++) if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } /* * This is a backstop when we have deficit in space. */ for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) return (cg); for (cg = 0; cg < prefcg; cg++) if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) break; return (cg); }
/* * ffs_blkalloc allocates a disk block for ffs_pageout(), as a consequence * it does no buf_breads (that could lead to deadblock as the page may be already * marked busy as it is being paged out. Also important to note that we are not * growing the file in pageouts. So ip->i_size cannot increase by this call * due to the way UBC works. * This code is derived from ffs_balloc and many cases of that are dealt * in ffs_balloc are not applicable here * Do not call with B_CLRBUF flags as this should only be called only * from pageouts */ ffs_blkalloc( struct inode *ip, ufs_daddr_t lbn, int size, kauth_cred_t cred, int flags) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ fs = ip->i_fs; if(size > fs->fs_bsize) panic("ffs_blkalloc: too large for allocation"); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { panic("ffs_blkalloc():cannot extend file: i_size %d, lbn %d", ip->i_size, lbn); } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { /* TBD: trivial case; the block is already allocated */ return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize > osize) { panic("ffs_allocblk: trying to extend a fragment"); } return(0); } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); if(num == 0) { panic("ffs_blkalloc: file with direct blocks only"); } /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if (error = buf_bwrite(bp)) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } return (0); } buf_brelse(bp); return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
/* * Allocate a block in the file system. * * this takes the framework from ffs_alloc. To implement the * actual allocation, it calls ext2_new_block, the ported version * of the same Linux routine. * * we note that this is always called in connection with ext2_blkpref * * preallocation is done as Linux does it */ int ext2_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, struct ucred *cred, daddr_t *bnp) { struct ext2_sb_info *fs; daddr_t bno; #if QUOTA int error; #endif *bnp = 0; fs = ip->i_e2fs; #if DIAGNOSTIC if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) { kprintf("dev = %s, bsize = %lu, size = %d, fs = %s\n", devtoname(ip->i_dev), fs->s_blocksize, size, fs->fs_fsmnt); panic("ext2_alloc: bad size"); } if (cred == NOCRED) panic("ext2_alloc: missing credential"); #endif /* DIAGNOSTIC */ if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0) goto nospace; if (cred->cr_uid != 0 && fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count) goto nospace; #if QUOTA if ((error = ext2_chkdq(ip, (long)btodb(size), cred, 0)) != 0) return (error); #endif if (bpref >= fs->s_es->s_blocks_count) bpref = 0; /* call the Linux code */ #ifdef EXT2_PREALLOCATE /* To have a preallocation hit, we must * - have at least one block preallocated * - and our preferred block must have that block number or one below */ if (ip->i_prealloc_count && (bpref == ip->i_prealloc_block || bpref + 1 == ip->i_prealloc_block)) { bno = ip->i_prealloc_block++; ip->i_prealloc_count--; /* ext2_debug ("preallocation hit (%lu/%lu).\n", ++alloc_hits, ++alloc_attempts); */ /* Linux gets, clears, and releases the buffer at this point - we don't have to that; we leave it to the caller */ } else { ext2_discard_prealloc (ip); /* ext2_debug ("preallocation miss (%lu/%lu).\n", alloc_hits, ++alloc_attempts); */ if (S_ISREG(ip->i_mode)) bno = ext2_new_block (ITOV(ip)->v_mount, bpref, &ip->i_prealloc_count, &ip->i_prealloc_block); else bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); } #else bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0); #endif if (bno > 0) { /* set next_alloc fields as done in block_getblk */ ip->i_next_alloc_block = lbn; ip->i_next_alloc_goal = bno; ip->i_blocks += btodb(size); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bnp = bno; return (0); } #if QUOTA /* * Restore user's disk quota because allocation failed. */ ext2_chkdq(ip, (long)-btodb(size), cred, FORCE); #endif nospace: ext2_fserr(fs, cred->cr_uid, "file system full"); uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); return (ENOSPC); }
/* * this functino has been reduced to the actual 'find the inode number' part */ ino_t ext2_new_inode(const struct inode *dir, int mode) { struct ext2_sb_info * sb; struct buffer_head * bh; struct buffer_head * bh2; int i, j, avefreei; int bitmap_nr; struct ext2_group_desc * gdp; struct ext2_group_desc * tmp; struct ext2_super_block * es; if (!dir) return 0; sb = dir->i_e2fs; lock_super (DEVVP(dir)); es = sb->s_es; repeat: gdp = NULL; i=0; if (S_ISDIR(mode)) { avefreei = es->s_free_inodes_count / sb->s_groups_count; /* I am not yet convinced that this next bit is necessary. i = dir->u.ext2_i.i_block_group; for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) { tmp = get_group_desc (sb, i, &bh2); if ((tmp->bg_used_dirs_count << 8) < tmp->bg_free_inodes_count) { gdp = tmp; break; } else i = ++i % sb->u.ext2_sb.s_groups_count; } */ if (!gdp) { for (j = 0; j < sb->s_groups_count; j++) { tmp = get_group_desc(ITOV(dir)->v_mount,j,&bh2); if (tmp->bg_free_inodes_count && tmp->bg_free_inodes_count >= avefreei) { if (!gdp || (tmp->bg_free_blocks_count > gdp->bg_free_blocks_count)) { i = j; gdp = tmp; } } } } } else { /* * Try to place the inode in its parent directory */ i = dir->i_block_group; tmp = get_group_desc (ITOV(dir)->v_mount, i, &bh2); if (tmp->bg_free_inodes_count) gdp = tmp; else { /* * Use a quadratic hash to find a group with a * free inode */ for (j = 1; j < sb->s_groups_count; j <<= 1) { i += j; if (i >= sb->s_groups_count) i -= sb->s_groups_count; tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2); if (tmp->bg_free_inodes_count) { gdp = tmp; break; } } } if (!gdp) { /* * That failed: try linear search for a free inode */ i = dir->i_block_group + 1; for (j = 2; j < sb->s_groups_count; j++) { if (++i >= sb->s_groups_count) i = 0; tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2); if (tmp->bg_free_inodes_count) { gdp = tmp; break; } } } } if (!gdp) { unlock_super (DEVVP(dir)); return 0; } bitmap_nr = load_inode_bitmap (ITOV(dir)->v_mount, i); bh = sb->s_inode_bitmap[bitmap_nr]; if ((j = find_first_zero_bit ((unsigned long *) bh->b_data, EXT2_INODES_PER_GROUP(sb))) < EXT2_INODES_PER_GROUP(sb)) { if (set_bit (j, bh->b_data)) { kprintf ( "ext2_new_inode:" "bit already set for inode %d", j); goto repeat; } /* Linux now does the following: mark_buffer_dirty(bh); if (sb->s_flags & MS_SYNCHRONOUS) { ll_rw_block (WRITE, 1, &bh); wait_on_buffer (bh); } */ mark_buffer_dirty(bh); } else { if (gdp->bg_free_inodes_count != 0) { kprintf ( "ext2_new_inode:" "Free inodes count corrupted in group %d", i); unlock_super (DEVVP(dir)); return 0; } goto repeat; } j += i * EXT2_INODES_PER_GROUP(sb) + 1; if (j < EXT2_FIRST_INO(sb) || j > es->s_inodes_count) { kprintf ( "ext2_new_inode:" "reserved inode or inode > inodes count - " "block_group = %d,inode=%d", i, j); unlock_super (DEVVP(dir)); return 0; } gdp->bg_free_inodes_count--; if (S_ISDIR(mode)) gdp->bg_used_dirs_count++; mark_buffer_dirty(bh2); es->s_free_inodes_count--; /* mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); */ sb->s_dirt = 1; unlock_super (DEVVP(dir)); return j; }
int ud_dircheckforname(struct ud_inode *tdp, char *namep, int32_t namelen, struct slot *slotp, struct ud_inode **ipp, uint8_t *buf, struct cred *cr) { struct udf_vfs *udf_vfsp; uint32_t dirsize, offset; struct fbuf *fbp; struct file_id *fid; int32_t sz, error = 0, sz_req, matched = 0; uint8_t *nm; uint8_t *dname; int32_t id_len; ud_printf("ud_dircheckforname\n"); ASSERT(RW_WRITE_HELD(&tdp->i_rwlock)); fbp = NULL; dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP); udf_vfsp = tdp->i_udf; offset = 0; dirsize = tdp->i_size; if (slotp->status != FOUND) { int32_t temp; temp = 1024; /* set to size of dname allocated above */ if ((error = ud_compress(namelen, &temp, (uint8_t *)namep, dname)) != 0) { goto end; } sz_req = F_LEN + temp; sz_req = (sz_req + 3) & ~3; } while (offset < dirsize) { if ((error = ud_get_next_fid(tdp, &fbp, offset, &fid, &nm, buf)) != 0) { break; } if ((error = ud_uncompress(fid->fid_idlen, &id_len, nm, dname)) != 0) { break; } if ((fid->fid_flags & FID_DELETED) == 0) { /* Check for name match */ if (((namelen == id_len) && (strncmp(namep, (caddr_t)dname, namelen) == 0)) || ((fid->fid_flags & FID_PARENT) && (namep[0] == '.' && (namelen == 1 || (namelen == 2 && namep[1] == '.'))))) { tdp->i_diroff = offset; if ((fid->fid_flags & FID_PARENT) && (namelen == 1) && (namep[0] == '.')) { struct vnode *vp = ITOV(tdp); *ipp = tdp; VN_HOLD(vp); } else { uint16_t prn; uint32_t loc; prn = SWAP_16(fid->fid_icb.lad_ext_prn); loc = SWAP_32(fid->fid_icb.lad_ext_loc); if ((error = ud_iget(tdp->i_vfs, prn, loc, ipp, NULL, cr)) != 0) { fbrelse(fbp, S_OTHER); goto end; } } slotp->status = EXIST; slotp->offset = offset; slotp->size = FID_LEN(fid); slotp->fbp = fbp; slotp->ep = fid; slotp->endoff = 0; goto end; } } else { /* * see if we need to find an * empty slot and the current slot * matches */ if ((slotp->status != FOUND) || (matched == 0)) { sz = FID_LEN(fid); if (sz == sz_req) { slotp->status = FOUND; slotp->offset = offset; slotp->size = sz; } if (matched == 0) { if ((namelen == id_len) && (strncmp(namep, (caddr_t)dname, namelen) == 0)) { matched = 1; slotp->status = FOUND; slotp->offset = offset; slotp->size = sz; } } } } offset += FID_LEN(fid); } if (fbp) { fbrelse(fbp, S_OTHER); } if (slotp->status == NONE) { /* * We didn't find a slot; the new directory entry should be put * at the end of the directory. Return an indication of where * this is, and set "endoff" to zero; since we're going to have * to extend the directory, we're certainly not going to * trucate it. */ slotp->offset = dirsize; if (tdp->i_desc_type == ICB_FLAG_ONE_AD) { slotp->size = tdp->i_max_emb - tdp->i_size; } else { slotp->size = udf_vfsp->udf_lbsize - slotp->offset & udf_vfsp->udf_lbmask; } slotp->endoff = 0; } *ipp = NULL; end: kmem_free((caddr_t)dname, 1024); return (error); }