int fuse_vnode_setsize(struct vnode *vp, struct ucred *cred, off_t newsize) { struct fuse_vnode_data *fvdat = VTOFUD(vp); off_t oldsize; int err = 0; ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); oldsize = fvdat->filesize; fvdat->filesize = newsize; fvdat->flag |= FN_SIZECHANGE; if (newsize < oldsize) { err = vtruncbuf(vp, cred, newsize, fuse_iosize(vp)); } vnode_pager_setsize(vp, newsize); return err; }
int fuse_vnode_setsize(struct vnode *vp, struct ucred *cred, off_t newsize) { struct fuse_vnode_data *fvdat = VTOFUD(vp); off_t oldsize; int err = 0; FS_DEBUG("inode=%ju oldsize=%ju newsize=%ju\n", (uintmax_t)VTOI(vp), (uintmax_t)fvdat->filesize, (uintmax_t)newsize); ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); oldsize = fvdat->filesize; fvdat->filesize = newsize; fvdat->flag |= FN_SIZECHANGE; if (newsize < oldsize) { err = vtruncbuf(vp, cred, newsize, fuse_iosize(vp)); } vnode_pager_setsize(vp, newsize); fuse_invalidate_attr(vp); return err; }
int fuse_io_strategy(struct vnode *vp, struct buf *bp) { struct fuse_filehandle *fufh; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; int error = 0; const int biosize = fuse_iosize(vp); MPASS(vp->v_type == VREG || vp->v_type == VDIR); MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); FS_DEBUG("inode=%ju offset=%jd resid=%ld\n", (uintmax_t)VTOI(vp), (intmax_t)(((off_t)bp->b_blkno) * biosize), bp->b_bcount); error = fuse_filehandle_getrw(vp, (bp->b_iocmd == BIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); if (error) { printf("FUSE: strategy: filehandles are closed\n"); bp->b_ioflags |= BIO_ERROR; bp->b_error = error; return (error); } cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = curthread; /* * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; KASSERT(!(bp->b_flags & B_DONE), ("fuse_io_strategy: bp %p already marked done", bp)); if (bp->b_iocmd == BIO_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = ((off_t)bp->b_blkno) * biosize; error = fuse_read_directbackend(vp, uiop, cred, fufh); if ((!error && uiop->uio_resid) || (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO && uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 && uiop->uio_offset >= fvdat->cached_attrs.va_size)) { /* * If we had a short read with no error, we must have * hit a file hole. We should zero-fill the remainder. * This can also occur if the server hits the file EOF. * * Holes used to be able to occur due to pending * writes, but that is not possible any longer. */ int nread = bp->b_bcount - uiop->uio_resid; int left = uiop->uio_resid; if (error != 0) { printf("FUSE: Fix broken io: offset %ju, " " resid %zd, file size %ju/%ju\n", (uintmax_t)uiop->uio_offset, uiop->uio_resid, fvdat->filesize, fvdat->cached_attrs.va_size); error = 0; } if (left > 0) bzero((char *)bp->b_data + nread, left); uiop->uio_resid = 0; } if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } } else { /* * If we only need to commit, try to commit */ if (bp->b_flags & B_NEEDCOMMIT) { FS_DEBUG("write: B_NEEDCOMMIT flags set\n"); } /* * Setup for actual write */ if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > fvdat->filesize) bp->b_dirtyend = fvdat->filesize - (off_t)bp->b_blkno * biosize; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = (off_t)bp->b_blkno * biosize + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; error = fuse_write_directbackend(vp, uiop, cred, fufh, 0); if (error == EINTR || error == ETIMEDOUT || (!error && (bp->b_flags & B_NEEDCOMMIT))) { bp->b_flags &= ~(B_INVAL | B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((error == EINTR || error == ETIMEDOUT) && (bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_flags |= B_INVAL; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); return (0); } } bp->b_resid = uiop->uio_resid; bufdone(bp); return (error); }
static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct buf *bp; daddr_t lbn; int bcount; int n, on, err = 0; const int biosize = fuse_iosize(vp); KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n", uio->uio_resid, uio->uio_offset, fvdat->filesize); if (vp->v_type != VREG) return (EIO); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio_setoffset(uio, fvdat->filesize); /* * Find all of this file's B_NEEDCOMMIT buffers. If our writes * would exceed the local maximum per-file write commit size when * combined with those, we must decide whether to flush, * go synchronous, or return err. We don't bother checking * IO_UNIT -- we just make all writes atomic anyway, as there's * no point optimizing for something that really won't ever happen. */ do { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); n = MIN((unsigned)(biosize - on), uio->uio_resid); FS_DEBUG2G("lbn %ju, on %d, n %d, uio offset %ju, uio resid %zd\n", (uintmax_t)lbn, on, n, (uintmax_t)uio->uio_offset, uio->uio_resid); again: /* * Handle direct append and file extension cases, calculate * unaligned buffer size. */ if (uio->uio_offset == fvdat->filesize && n) { /* * Get the buffer (in its pre-append state to maintain * B_CACHE if it was previously set). Resize the * nfsnode after we have locked the buffer to prevent * readers from reading garbage. */ bcount = on; FS_DEBUG("getting block from OS, bcount %d\n", bcount); bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (bp != NULL) { long save; err = fuse_vnode_setsize(vp, cred, uio->uio_offset + n); if (err) { brelse(bp); break; } save = bp->b_flags & B_CACHE; bcount += n; allocbuf(bp, bcount); bp->b_flags |= save; } } else { /* * Obtain the locked cache block first, and then * adjust the file's size as appropriate. */ bcount = on + n; if ((off_t)lbn * biosize + bcount < fvdat->filesize) { if ((off_t)(lbn + 1) * biosize < fvdat->filesize) bcount = biosize; else bcount = fvdat->filesize - (off_t)lbn *biosize; } FS_DEBUG("getting block from OS, bcount %d\n", bcount); bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (bp && uio->uio_offset + n > fvdat->filesize) { err = fuse_vnode_setsize(vp, cred, uio->uio_offset + n); if (err) { brelse(bp); break; } } } if (!bp) { err = EINTR; break; } /* * Issue a READ if B_CACHE is not set. In special-append * mode, B_CACHE is based on the buffer prior to the write * op and is typically set, avoiding the read. If a read * is required in special append mode, the server will * probably send us a short-read since we extended the file * on our end, resulting in b_resid == 0 and, thusly, * B_CACHE getting set. * * We can also avoid issuing the read if the write covers * the entire buffer. We have to make sure the buffer state * is reasonable in this case since we will not be initiating * I/O. See the comments in kern/vfs_bio.c's getblk() for * more information. * * B_CACHE may also be set due to the buffer being cached * normally. */ if (on == 0 && n == bcount) { bp->b_flags |= B_CACHE; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; } if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); fuse_io_strategy(vp, bp); if ((err = bp->b_error)) { brelse(bp); break; } } if (bp->b_wcred == NOCRED) bp->b_wcred = crhold(cred); /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { FS_DEBUG("FUSE append race @%lx:%d\n", (long)bp->b_blkno * biosize, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { /* * Yes, we mean it. Write out everything to "storage" * immediately, without hesitation. (Apart from other * reasons: the only way to know if a write is valid * if its actually written out.) */ bwrite(bp); if (bp->b_error == EINTR) { err = EINTR; break; } goto again; } err = uiomove((char *)bp->b_data + on, n, uio); /* * Since this block is being modified, it must be written * again and not just committed. Since write clustering does * not work for the stage 1 data write, only the stage 2 * commit rpc, we have to clear B_CLUSTEROK as well. */ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (err) { bp->b_ioflags |= BIO_ERROR; bp->b_error = err; brelse(bp); break; } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. */ if (n) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); } else { bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } vfs_bio_set_valid(bp, on, n); } err = bwrite(bp); if (err) break; } while (uio->uio_resid > 0 && n > 0); if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0) fuse_vnode_savesize(vp, cred); return (err); }
static int fuse_read_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh) { struct buf *bp; daddr_t lbn; int bcount; int err = 0, n = 0, on = 0; off_t filesize; const int biosize = fuse_iosize(vp); FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n", uio->uio_resid, uio->uio_offset, VTOFUD(vp)->filesize); if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bcount = MIN(MAXBSIZE, biosize); filesize = VTOFUD(vp)->filesize; do { if (fuse_isdeadfs(vp)) { err = ENXIO; break; } lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize - 1); FS_DEBUG2G("biosize %d, lbn %d, on %d\n", biosize, (int)lbn, on); /* * Obtain the buffer cache block. Figure out the buffer size * when we are at EOF. If we are modifying the size of the * buffer based on an EOF condition we need to hold * nfs_rslock() through obtaining the buffer to prevent * a potential writer-appender from messing with n_size. * Otherwise we may accidentally truncate the buffer and * lose dirty data. * * Note that bcount is *not* DEV_BSIZE aligned. */ if ((off_t)lbn * biosize >= filesize) { bcount = 0; } else if ((off_t)(lbn + 1) * biosize > filesize) { bcount = filesize - (off_t)lbn *biosize; } bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); if (!bp) return (EINTR); /* * If B_CACHE is not set, we must issue the read. If this * fails, we return an error. */ if ((bp->b_flags & B_CACHE) == 0) { bp->b_iocmd = BIO_READ; vfs_busy_pages(bp, 0); err = fuse_io_strategy(vp, bp); if (err) { brelse(bp); return (err); } } /* * on is the offset into the current bp. Figure out how many * bytes we can copy out of the bp. Note that bcount is * NOT DEV_BSIZE aligned. * * Then figure out how many bytes we can copy into the uio. */ n = 0; if (on < bcount) n = MIN((unsigned)(bcount - on), uio->uio_resid); if (n > 0) { FS_DEBUG2G("feeding buffeater with %d bytes of buffer %p," " saying %d was asked for\n", n, bp->b_data + on, n + (int)bp->b_resid); err = uiomove(bp->b_data + on, n, uio); } brelse(bp); FS_DEBUG2G("end of turn, err %d, uio->uio_resid %zd, n %d\n", err, uio->uio_resid, n); } while (err == 0 && uio->uio_resid > 0 && n > 0); return (err); }