/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, int64_t *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; int32_t *bap1 = NULL; int64_t *bap2 = NULL; struct vnode *vp; daddr_t nb, nlbn, last; char *copy = NULL; int64_t blkcount, factor, blocksreleased = 0; int nblocks; int error = 0, allerror = 0; const int needswap = UFS_FSNEEDSWAP(fs); #define RBAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? \ ufs_rw32(bap1[i], needswap) : ufs_rw64(bap2[i], needswap)) #define BAP_ASSIGN(ip, i, value) \ do { \ if ((ip)->i_ump->um_fstype == UFS1) \ bap1[i] = (value); \ else \ bap2[i] = (value); \ } while(0) /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= FFS_NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); error = ffs_getblk(vp, lbn, FFS_NOBLK, fs->fs_bsize, false, &bp); if (error) { *countp = 0; return error; } if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); } else { trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); curlwp->l_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; bp->b_flags &= ~B_COWDONE; /* we change blkno below */ if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); VOP_STRATEGY(vp, bp); error = biowait(bp); if (error == 0) error = fscow_run(bp, true); } if (error) { brelse(bp, 0); *countp = 0; return (error); } if (ip->i_ump->um_fstype == UFS1) bap1 = (int32_t *)bp->b_data; else bap2 = (int64_t *)bp->b_data; if (lastbn >= 0) { copy = kmem_alloc(fs->fs_bsize, KM_SLEEP); memcpy((void *)copy, bp->b_data, (u_int)fs->fs_bsize); for (i = last + 1; i < FFS_NINDIR(fs); i++) BAP_ASSIGN(ip, i, 0); error = bwrite(bp); if (error) allerror = error; if (ip->i_ump->um_fstype == UFS1) bap1 = (int32_t *)copy; else bap2 = (int64_t *)copy; } /* * Recursively free totally unused blocks. */ for (i = FFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = RBAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), (daddr_t)-1, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } if ((ip->i_ump->um_mountp->mnt_wapbl) && ((level > SINGLE) || (ITOV(ip)->v_type != VREG))) { UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, FFS_FSBTODB(fs, nb), fs->fs_bsize); } else ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, ip->i_number); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = RBAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { kmem_free(copy, fs->fs_bsize); } else { brelse(bp, BC_INVAL); } *countp = blocksreleased; return (allerror); }
/* * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly * from the raw device to user buffers, and bypasses the buffer cache. * * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. */ int physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags, void (*min_phys)(struct buf *), struct uio *uio) { struct iovec *iovp; struct lwp *l = curlwp; struct proc *p = l->l_proc; int i, error; struct buf *bp = NULL; struct physio_stat *ps; int concurrency = PHYSIO_CONCURRENCY - 1; error = RUN_ONCE(&physio_initialized, physio_init); if (__predict_false(error != 0)) { return error; } DPRINTF(("%s: called: off=%" PRIu64 ", resid=%zu\n", __func__, uio->uio_offset, uio->uio_resid)); flags &= B_READ | B_WRITE; if ((ps = kmem_zalloc(sizeof(*ps), KM_SLEEP)) == NULL) return ENOMEM; /* ps->ps_running = 0; */ /* ps->ps_error = 0; */ /* ps->ps_failed = 0; */ ps->ps_orig_bp = obp; ps->ps_endoffset = -1; mutex_init(&ps->ps_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&ps->ps_cv, "physio"); /* Make sure we have a buffer, creating one if necessary. */ if (obp != NULL) { /* [raise the processor priority level to splbio;] */ mutex_enter(&bufcache_lock); /* Mark it busy, so nobody else will use it. */ while (bbusy(obp, false, 0, NULL) == EPASSTHROUGH) ; mutex_exit(&bufcache_lock); concurrency = 0; /* see "XXXkludge" comment below */ } uvm_lwp_hold(l); for (i = 0; i < uio->uio_iovcnt; i++) { bool sync = true; iovp = &uio->uio_iov[i]; while (iovp->iov_len > 0) { size_t todo; vaddr_t endp; mutex_enter(&ps->ps_lock); if (ps->ps_failed != 0) { goto done_locked; } physio_wait(ps, sync ? 0 : concurrency); mutex_exit(&ps->ps_lock); if (obp != NULL) { /* * XXXkludge * some drivers use "obp" as an identifier. */ bp = obp; } else { bp = getiobuf(NULL, true); bp->b_cflags = BC_BUSY; } bp->b_dev = dev; bp->b_proc = p; bp->b_private = ps; /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory, and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition * to the "busy" and read/write flag.) */ bp->b_oflags = 0; bp->b_cflags = BC_BUSY; bp->b_flags = flags | B_PHYS | B_RAW; bp->b_iodone = physio_biodone; /* [set up the buffer for a maximum-sized transfer] */ bp->b_blkno = btodb(uio->uio_offset); if (dbtob(bp->b_blkno) != uio->uio_offset) { error = EINVAL; goto done; } bp->b_bcount = MIN(MAXPHYS, iovp->iov_len); bp->b_data = iovp->iov_base; /* * [call minphys to bound the transfer size] * and remember the amount of data to transfer, * for later comparison. */ (*min_phys)(bp); todo = bp->b_bufsize = bp->b_bcount; #if defined(DIAGNOSTIC) if (todo > MAXPHYS) panic("todo(%zu) > MAXPHYS; minphys broken", todo); #endif /* defined(DIAGNOSTIC) */ sync = false; endp = (vaddr_t)bp->b_data + todo; if (trunc_page(endp) != endp) { /* * following requests can overlap. * note that uvm_vslock does round_page. */ sync = true; } /* * [lock the part of the user address space involved * in the transfer] * Beware vmapbuf(); it clobbers b_data and * saves it in b_saveaddr. However, vunmapbuf() * restores it. */ error = uvm_vslock(p->p_vmspace, bp->b_data, todo, (flags & B_READ) ? VM_PROT_WRITE : VM_PROT_READ); if (error) { goto done; } vmapbuf(bp, todo); BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); mutex_enter(&ps->ps_lock); ps->ps_running++; mutex_exit(&ps->ps_lock); /* [call strategy to start the transfer] */ (*strategy)(bp); bp = NULL; iovp->iov_len -= todo; iovp->iov_base = (char *)iovp->iov_base + todo; uio->uio_offset += todo; uio->uio_resid -= todo; } } done: mutex_enter(&ps->ps_lock); done_locked: physio_wait(ps, 0); mutex_exit(&ps->ps_lock); if (ps->ps_failed != 0) { off_t delta; delta = uio->uio_offset - ps->ps_endoffset; KASSERT(delta > 0); uio->uio_resid += delta; /* uio->uio_offset = ps->ps_endoffset; */ } else { KASSERT(ps->ps_endoffset == -1); } if (bp != NULL && bp != obp) { putiobuf(bp); } if (error == 0) { error = ps->ps_error; } mutex_destroy(&ps->ps_lock); cv_destroy(&ps->ps_cv); kmem_free(ps, sizeof(*ps)); /* * [clean up the state of the buffer] * Remember if somebody wants it, so we can wake them up below. * Also, if we had to steal it, give it back. */ if (obp != NULL) { KASSERT((obp->b_cflags & BC_BUSY) != 0); /* * [if another process is waiting for the raw I/O buffer, * wake up processes waiting to do physical I/O; */ mutex_enter(&bufcache_lock); obp->b_cflags &= ~(BC_BUSY | BC_WANTED); obp->b_flags &= ~(B_PHYS | B_RAW); obp->b_iodone = NULL; cv_broadcast(&obp->b_busy); mutex_exit(&bufcache_lock); } uvm_lwp_rele(l); DPRINTF(("%s: done: off=%" PRIu64 ", resid=%zu\n", __func__, uio->uio_offset, uio->uio_resid)); return error; }