/* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int cd9660_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap = v; struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; struct iso_node *ip; int error; ip = VTOI(vp); if (vp->v_type == VBLK || vp->v_type == VCHR) panic("cd9660_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); if (error) { bp->b_error = error; biodone(bp); return (error); } if ((long)bp->b_blkno == -1) clrbuf(bp); } if ((long)bp->b_blkno == -1) { biodone(bp); return (0); } vp = ip->i_mnt->im_devvp; return (VOP_STRATEGY(vp, bp)); }
/* * Synchronous write. * Release buffer on completion. */ int bwrite(register struct buf *bp) { int rv; if(bp->b_flags & B_INVAL) { brelse(bp); return (0); } else { int wasdelayed; if(!(bp->b_flags & B_BUSY)) panic("bwrite: not busy"); wasdelayed = bp->b_flags & B_DELWRI; bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_ASYNC|B_DELWRI); if(wasdelayed) reassignbuf(bp, bp->b_vp); bp->b_flags |= B_DIRTY; bp->b_vp->v_numoutput++; VOP_STRATEGY(bp); rv = biowait(bp); brelse(bp); return (rv); } }
int sysvbfs_strategy(void *arg) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *a = arg; struct buf *b = a->a_bp; struct vnode *v = a->a_vp; struct sysvbfs_node *bnode = v->v_data; struct sysvbfs_mount *bmp = bnode->bmp; int error; DPRINTF("%s:\n", __func__); KDASSERT(v->v_type == VREG); if (b->b_blkno == b->b_lblkno) { error = VOP_BMAP(v, b->b_lblkno, NULL, &b->b_blkno, NULL); if (error) { b->b_error = error; biodone(b); return error; } if ((long)b->b_blkno == -1) clrbuf(b); } if ((long)b->b_blkno == -1) { biodone(b); return 0; } return VOP_STRATEGY(bmp->devvp, b); }
/* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int filecore_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap = v; struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; struct filecore_node *ip; int error; ip = VTOI(vp); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); if (error) { bp->b_error = error; biodone(bp); return (error); } if ((long)bp->b_blkno == -1) clrbuf(bp); } if ((long)bp->b_blkno == -1) { biodone(bp); return (0); } vp = ip->i_devvp; return (VOP_STRATEGY(vp, bp)); }
int bread ( struct vnode * vp, lblkno_t blkno, int size, struct ucred * cred, struct buf ** bpp ) { struct buf *bp; bp = buf_getblk (vp, blkno, size); *bpp = bp; /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); bp->b_bio->bio_flags = BIO_READ; VOP_STRATEGY (vp, bp); return (buf_wait (bp)); } return (OK); }
int bwrite ( struct buf * bp ) { int rtval; int oldflags = bp->b_flags; if(bp->b_flags & B_INVAL) { brelse (bp); return (OK); } if((bp->b_flags & B_BUSY) == 0) { #ifdef DIAGNOSTIC logMsg ("bwrite: buffer is not busy", 0, 0, 0, 0, 0, 0); #endif return; } bp->b_flags &= ~(B_READ | B_DONE | B_ERROR); bp->b_bio->bio_flags = BIO_WRITE; VOP_STRATEGY (bp->b_vp, bp); if ((oldflags & B_ASYNC) == 0) { rtval = buf_wait (bp); brelse (bp); return (rtval); } return(OK); }
int v7fs_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *a = v; struct buf *b = a->a_bp; struct vnode *vp = a->a_vp; struct v7fs_node *v7node = vp->v_data; struct v7fs_mount *v7fsmount = v7node->v7fsmount; int error; DPRINTF("%p\n", vp); KDASSERT(vp->v_type == VREG); if (b->b_blkno == b->b_lblkno) { error = VOP_BMAP(vp, b->b_lblkno, NULL, &b->b_blkno, NULL); if (error) { b->b_error = error; biodone(b); return error; } if ((long)b->b_blkno == -1) clrbuf(b); } if ((long)b->b_blkno == -1) { biodone(b); return 0; } return VOP_STRATEGY(v7fsmount->devvp, b); }
static int cgd_diskstart(device_t dev, struct buf *bp) { struct cgd_softc *cs = device_private(dev); struct dk_softc *dksc = &cs->sc_dksc; struct buf *nbp; void * addr; void * newaddr; daddr_t bn; struct vnode *vp; DPRINTF_FOLLOW(("cgd_diskstart(%p, %p)\n", dksc, bp)); bn = bp->b_rawblkno; /* * We attempt to allocate all of our resources up front, so that * we can fail quickly if they are unavailable. */ nbp = getiobuf(cs->sc_tvn, false); if (nbp == NULL) return EAGAIN; /* * If we are writing, then we need to encrypt the outgoing * block into a new block of memory. */ newaddr = addr = bp->b_data; if ((bp->b_flags & B_READ) == 0) { newaddr = cgd_getdata(dksc, bp->b_bcount); if (!newaddr) { putiobuf(nbp); return EAGAIN; } cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn, DEV_BSIZE, CGD_CIPHER_ENCRYPT); } nbp->b_data = newaddr; nbp->b_flags = bp->b_flags; nbp->b_oflags = bp->b_oflags; nbp->b_cflags = bp->b_cflags; nbp->b_iodone = cgdiodone; nbp->b_proc = bp->b_proc; nbp->b_blkno = bn; nbp->b_bcount = bp->b_bcount; nbp->b_private = bp; BIO_COPYPRIO(nbp, bp); if ((nbp->b_flags & B_READ) == 0) { vp = nbp->b_vp; mutex_enter(vp->v_interlock); vp->v_numoutput++; mutex_exit(vp->v_interlock); } VOP_STRATEGY(cs->sc_tvn, nbp); return 0; }
int RUMP_VOP_STRATEGY(struct vnode *vp, struct buf *bp) { int error; rump_schedule(); error = VOP_STRATEGY(vp, bp); rump_unschedule(); return error; }
static void udf_queuebuf_bootstrap(struct udf_strat_args *args) { struct udf_mount *ump = args->ump; struct buf *buf = args->nestbuf; KASSERT(ump); KASSERT(buf); KASSERT(buf->b_iodone == nestiobuf_iodone); KASSERT(buf->b_flags & B_READ); VOP_STRATEGY(ump->devvp, buf); }
/* * Operates like bread, but also starts I/O on the specified * read-ahead block. [See page 55 of Bach's Book] */ int breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize, struct ucred *cred, struct buf **bpp) { struct buf *bp, *rabp; int rv = 0, needwait = 0; bp = getblk (vp, blkno, size); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ bp->b_rcred = cred; VOP_STRATEGY(bp); needwait++; } rabp = getblk (vp, rablkno, rabsize); /* if not found in cache, do some I/O (overlapped with first) */ if ((rabp->b_flags & B_CACHE) == 0 || (rabp->b_flags & B_INVAL) != 0) { rabp->b_flags |= B_READ | B_ASYNC; rabp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ rabp->b_rcred = cred; VOP_STRATEGY(rabp); } else brelse(rabp); /* wait for original I/O */ if (needwait) rv = biowait (bp); *bpp = bp; return (rv); }
static void ld_ataraid_start_vstrategy(void *arg) { struct ld_ataraid_softc *sc = arg; struct cbuf *cbp; while ((cbp = SIMPLEQ_FIRST(&sc->sc_cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&sc->sc_cbufq, cb_q); if ((cbp->cb_buf.b_flags & B_READ) == 0) { mutex_enter(cbp->cb_buf.b_vp->v_interlock); cbp->cb_buf.b_vp->v_numoutput++; mutex_exit(cbp->cb_buf.b_vp->v_interlock); } VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); } }
/* * Just call the device strategy routine */ int adosfs_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *sp = v; struct buf *bp; struct anode *ap; struct vnode *vp; int error; #ifdef ADOSFS_DIAGNOSTIC advopprint(sp); #endif bp = sp->a_bp; if (bp->b_vp == NULL) { bp->b_error = EIO; biodone(bp); error = EIO; goto reterr; } vp = sp->a_vp; ap = VTOA(vp); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); if (error) { bp->b_flags = error; biodone(bp); goto reterr; } } if ((long)bp->b_blkno == -1) { biodone(bp); error = 0; goto reterr; } vp = ap->amp->devvp; error = VOP_STRATEGY(vp, bp); reterr: #ifdef ADOSFS_DIAGNOSTIC printf(" %d)", error); #endif return(error); }
static int unionfs_strategy(void *v) { struct vop_strategy_args *ap = v; struct unionfs_node *unp; struct vnode *vp; unp = VTOUNIONFS(ap->a_vp); vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); #ifdef DIAGNOSTIC if (vp == NULLVP) panic("unionfs_strategy: nullvp"); if ((ap->a_bp->b_flags & B_READ) == 0 && vp == unp->un_lowervp) panic("unionfs_strategy: writing to lowervp"); #endif return (VOP_STRATEGY(vp, ap->a_bp)); }
struct buf * bio_doread(struct vnode *vp, daddr_t blkno, int size, int async) { struct buf *bp; struct mount *mp; bp = getblk(vp, blkno, size, 0, 0); /* * If buffer does not have valid data, start a read. * Note that if buffer is B_INVAL, getblk() won't return it. * Therefore, it's valid if its I/O has completed or been delayed. */ if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) { SET(bp->b_flags, B_READ | async); bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp); /* Pay for the read. */ curproc->p_ru.ru_inblock++; /* XXX */ } else if (async) { brelse(bp); } mp = vp->v_type == VBLK? vp->v_specmountpoint : vp->v_mount; /* * Collect statistics on synchronous and asynchronous reads. * Reads from block devices are charged to their associated * filesystem (if any). */ if (mp != NULL) { if (async == 0) mp->mnt_stat.f_syncreads++; else mp->mnt_stat.f_asyncreads++; } return (bp); }
/* * Asynchronous write. * Start I/O on a buffer, but do not wait for it to complete. * The buffer is released when the I/O completes. */ void bawrite(register struct buf *bp) { if(!(bp->b_flags & B_BUSY)) panic("bawrite: not busy"); if(bp->b_flags & B_INVAL) brelse(bp); else { int wasdelayed; wasdelayed = bp->b_flags & B_DELWRI; bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if(wasdelayed) reassignbuf(bp, bp->b_vp); bp->b_flags |= B_DIRTY | B_ASYNC; bp->b_vp->v_numoutput++; VOP_STRATEGY(bp); } }
/* * Find the block in the buffer pool. * If the buffer is not present, allocate a new buffer and load * its contents according to the filesystem fill routine. */ int bread(struct vnode *vp, daddr_t blkno, int size, struct ucred *cred, struct buf **bpp) { struct buf *bp; int rv = 0; bp = getblk (vp, blkno, size); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0 || (bp->b_flags & B_INVAL) != 0) { bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE|B_ERROR|B_INVAL); if (cred != NOCRED) crhold(cred); /* 25 Apr 92*/ bp->b_rcred = cred; VOP_STRATEGY(bp); rv = biowait (bp); } *bpp = bp; return (rv); }
static __inline struct buf * bio_doread(struct vnode *vp, daddr_t blkno, int size, int async) { struct buf *bp; bp = getblk(vp, blkno, size, 0, 0); /* * If buffer does not have data valid, start a read. * Note that if buffer is B_INVAL, getblk() won't return it. * Therefore, it's valid if it's I/O has completed or been delayed. */ if (!ISSET(bp->b_flags, (B_DONE | B_DELWRI))) { SET(bp->b_flags, B_READ | async); VOP_STRATEGY(bp); /* Pay for the read. */ curproc->p_stats->p_ru.ru_inblock++; /* XXX */ } else if (async) { brelse(bp); } return (bp); }
/* * miscfs/genfs getpages routine. This is a fair bit simpler than the * kernel counterpart since we're not being executed from a fault handler * and generally don't need to care about PGO_LOCKED or other cruft. * We do, however, need to care about page locking and we keep trying until * we get all the pages within the range. The object locking protocol * is the same as for the kernel: enter with the object lock held, * return with it released. */ int genfs_getpages(void *v) { struct vop_getpages_args /* { struct vnode *a_vp; voff_t a_offset; struct vm_page **a_m; int *a_count; int a_centeridx; vm_prot_t a_access_type; int a_advice; int a_flags; } */ *ap = v; struct vnode *vp = ap->a_vp; struct uvm_object *uobj = (struct uvm_object *)vp; struct vm_page *pg; voff_t curoff, endoff; off_t diskeof; size_t bufsize, remain, bufoff, xfersize; uint8_t *tmpbuf; int bshift = vp->v_mount->mnt_fs_bshift; int bsize = 1<<bshift; int count = *ap->a_count; int async; int i, error; /* * Ignore async for now, the structure of this routine * doesn't exactly allow for it ... */ async = 0; if (ap->a_centeridx != 0) panic("%s: centeridx != not supported", __func__); if (ap->a_access_type & VM_PROT_WRITE) vp->v_iflag |= VI_ONWORKLST; curoff = ap->a_offset & ~PAGE_MASK; for (i = 0; i < count; i++, curoff += PAGE_SIZE) { retrylookup: pg = uvm_pagelookup(uobj, curoff); if (pg == NULL) break; /* page is busy? we need to wait until it's released */ if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg",0); mutex_enter(&uobj->vmobjlock); goto retrylookup; } pg->flags |= PG_BUSY; if (pg->flags & PG_FAKE) break; ap->a_m[i] = pg; } /* got everything? if so, just return */ if (i == count) { mutex_exit(&uobj->vmobjlock); return 0; } /* * didn't? Ok, allocate backing pages. Start from the first * one we missed. */ for (; i < count; i++, curoff += PAGE_SIZE) { retrylookup2: pg = uvm_pagelookup(uobj, curoff); /* found? busy it and be happy */ if (pg) { if (pg->flags & PG_BUSY) { pg->flags = PG_WANTED; UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg2", 0); mutex_enter(&uobj->vmobjlock); goto retrylookup2; } else { pg->flags |= PG_BUSY; } /* not found? make a new page */ } else { pg = rumpvm_makepage(uobj, curoff); } ap->a_m[i] = pg; } /* * We have done all the clerical work and have all pages busied. * Release the vm object for other consumers. */ mutex_exit(&uobj->vmobjlock); /* * Now, we have all the pages here & busy. Transfer the range * starting from the missing offset and transfer into the * page buffers. */ GOP_SIZE(vp, vp->v_size, &diskeof, 0); /* align to boundaries */ endoff = trunc_page(ap->a_offset) + (count << PAGE_SHIFT); endoff = MIN(endoff, ((vp->v_writesize+bsize-1) & ~(bsize-1))); curoff = ap->a_offset & ~(MAX(bsize,PAGE_SIZE)-1); remain = endoff - curoff; if (diskeof > curoff) remain = MIN(remain, diskeof - curoff); DPRINTF(("a_offset: %llx, startoff: 0x%llx, endoff 0x%llx\n", (unsigned long long)ap->a_offset, (unsigned long long)curoff, (unsigned long long)endoff)); /* read everything into a buffer */ bufsize = round_page(remain); tmpbuf = kmem_zalloc(bufsize, KM_SLEEP); for (bufoff = 0; remain; remain -= xfersize, bufoff+=xfersize) { struct buf *bp; struct vnode *devvp; daddr_t lbn, bn; int run; lbn = (curoff + bufoff) >> bshift; /* XXX: assume eof */ error = VOP_BMAP(vp, lbn, &devvp, &bn, &run); if (error) panic("%s: VOP_BMAP & lazy bum: %d", __func__, error); DPRINTF(("lbn %d (off %d) -> bn %d run %d\n", (int)lbn, (int)(curoff+bufoff), (int)bn, run)); xfersize = MIN(((lbn+1+run)<<bshift)-(curoff+bufoff), remain); /* hole? */ if (bn == -1) { memset(tmpbuf + bufoff, 0, xfersize); continue; } bp = getiobuf(vp, true); bp->b_data = tmpbuf + bufoff; bp->b_bcount = xfersize; bp->b_blkno = bn; bp->b_lblkno = 0; bp->b_flags = B_READ; bp->b_cflags = BC_BUSY; if (async) { bp->b_flags |= B_ASYNC; bp->b_iodone = uvm_aio_biodone; } VOP_STRATEGY(devvp, bp); if (bp->b_error) panic("%s: VOP_STRATEGY, lazy bum", __func__); if (!async) putiobuf(bp); } /* skip to beginning of pages we're interested in */ bufoff = 0; while (round_page(curoff + bufoff) < trunc_page(ap->a_offset)) bufoff += PAGE_SIZE; DPRINTF(("first page offset 0x%x\n", (int)(curoff + bufoff))); for (i = 0; i < count; i++, bufoff += PAGE_SIZE) { /* past our prime? */ if (curoff + bufoff >= endoff) break; pg = uvm_pagelookup(&vp->v_uobj, curoff + bufoff); KASSERT(pg); DPRINTF(("got page %p (off 0x%x)\n", pg, (int)(curoff+bufoff))); if (pg->flags & PG_FAKE) { memcpy((void *)pg->uanon, tmpbuf+bufoff, PAGE_SIZE); pg->flags &= ~PG_FAKE; pg->flags |= PG_CLEAN; } ap->a_m[i] = pg; } *ap->a_count = i; kmem_free(tmpbuf, bufsize); return 0; }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, daddr_t *countp, daddr_t *rcountp, long *lastsegp, size_t *bcp) { int i; struct buf *bp; struct lfs *fs = ip->i_lfs; int32_t *bap; /* XXX ondisk32 */ struct vnode *vp; daddr_t nb, nlbn, last; int32_t *copy = NULL; /* XXX ondisk32 */ daddr_t blkcount, rblkcount, factor; int nblocks; daddr_t blocksreleased = 0, real_released = 0; int error = 0, allerror = 0; ASSERT_SEGLOCK(fs); /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= LFS_NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0); if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn); } else { trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn); curlwp->l_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("lfs_indirtrunc: bad buffer size"); bp->b_blkno = LFS_FSBTODB(fs, dbn); VOP_STRATEGY(vp, bp); error = biowait(bp); } if (error) { brelse(bp, 0); *countp = *rcountp = 0; return (error); } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ if (lastbn >= 0) { copy = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK); memcpy((void *)copy, (void *)bap, lfs_sb_getbsize(fs)); memset((void *)&bap[last + 1], 0, /* XXX ondisk32 */ (u_int)(LFS_NINDIR(fs) - (last + 1)) * sizeof (int32_t)); error = VOP_BWRITE(bp->b_vp, bp); if (error) allerror = error; bap = copy; } /* * Recursively free totally unused blocks. */ for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { error = lfs_indirtrunc(ip, nlbn, nb, (daddr_t)-1, level - 1, &blkcount, &rblkcount, lastsegp, bcp); if (error) allerror = error; blocksreleased += blkcount; real_released += rblkcount; } lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp); if (bap[i] > 0) real_released += nblocks; blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = lfs_indirtrunc(ip, nlbn, nb, last, level - 1, &blkcount, &rblkcount, lastsegp, bcp); if (error) allerror = error; real_released += rblkcount; blocksreleased += blkcount; } } if (copy != NULL) { lfs_free(fs, copy, LFS_NB_IBLOCK); } else { mutex_enter(&bufcache_lock); if (bp->b_oflags & BO_DELWRI) { LFS_UNLOCK_BUF(bp); lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); wakeup(&fs->lfs_availsleep); } brelsel(bp, BC_INVAL); mutex_exit(&bufcache_lock); } *countp = blocksreleased; *rcountp = real_released; return (allerror); }
void ccdstart(struct ccd_softc *cs, struct buf *bp) { long bcount, rcount; struct ccdbuf **cbpp, *cbp; caddr_t addr; daddr_t bn; struct partition *pp; int i, old_io = cs->sc_cflags & CCDF_OLD; CCD_DPRINTF(CCDB_FOLLOW, ("ccdstart(%p, %p, %s)\n", cs, bp, bp->b_flags & B_READ? "read" : "write")); /* Instrumentation. */ disk_busy(&cs->sc_dkdev); /* * Translate the partition-relative block number to an absolute. */ bn = bp->b_blkno; if (DISKPART(bp->b_dev) != RAW_PART) { pp = &cs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; bn += pp->p_offset; } /* * Allocate component buffers */ cbpp = malloc(2 * cs->sc_nccdisks * sizeof(struct ccdbuf *), M_DEVBUF, M_WAITOK); bzero(cbpp, 2 * cs->sc_nccdisks * sizeof(struct ccdbuf *)); addr = bp->b_data; old_io = old_io || ((vaddr_t)addr & PAGE_MASK); for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { rcount = ccdbuffer(cs, bp, bn, addr, bcount, cbpp, old_io); /* * This is the old, slower, but less restrictive, mode of * operation. It allows interleaves which are not multiples * of PAGE_SIZE and mirroring. */ if (old_io) { if ((cbpp[0]->cb_buf.b_flags & B_READ) == 0) cbpp[0]->cb_buf.b_vp->v_numoutput++; VOP_STRATEGY(&cbpp[0]->cb_buf); if ((cs->sc_cflags & CCDF_MIRROR) && ((cbpp[0]->cb_buf.b_flags & B_READ) == 0)) { cbpp[1]->cb_buf.b_vp->v_numoutput++; VOP_STRATEGY(&cbpp[1]->cb_buf); } } bn += btodb(rcount); addr += rcount; } /* The new leaner mode of operation */ if (!old_io) /* * Fire off the requests */ for (i = 0; i < 2*cs->sc_nccdisks; i++) { cbp = cbpp[i]; if (cbp) { if ((cbp->cb_buf.b_flags & B_READ) == 0) cbp->cb_buf.b_vp->v_numoutput++; VOP_STRATEGY(&cbp->cb_buf); } } free(cbpp, M_DEVBUF); }
/* * Block write. Described in Bach (p.56) */ int bwrite(struct buf *bp) { int rv, async, wasdelayed, s; struct vnode *vp; struct mount *mp; /* * Remember buffer type, to switch on it later. If the write was * synchronous, but the file system was mounted with MNT_ASYNC, * convert it to a delayed write. * XXX note that this relies on delayed tape writes being converted * to async, not sync writes (which is safe, but ugly). */ async = ISSET(bp->b_flags, B_ASYNC); if (!async && bp->b_vp && bp->b_vp->v_mount && ISSET(bp->b_vp->v_mount->mnt_flag, MNT_ASYNC)) { bdwrite(bp); return (0); } /* * Collect statistics on synchronous and asynchronous writes. * Writes to block devices are charged to their associated * filesystem (if any). */ if ((vp = bp->b_vp) != NULL) { if (vp->v_type == VBLK) mp = vp->v_specmountpoint; else mp = vp->v_mount; if (mp != NULL) { if (async) mp->mnt_stat.f_asyncwrites++; else mp->mnt_stat.f_syncwrites++; } } wasdelayed = ISSET(bp->b_flags, B_DELWRI); CLR(bp->b_flags, (B_READ | B_DONE | B_ERROR | B_DELWRI)); s = splbio(); /* * If not synchronous, pay for the I/O operation and make * sure the buf is on the correct vnode queue. We have * to do this now, because if we don't, the vnode may not * be properly notified that its I/O has completed. */ if (wasdelayed) { reassignbuf(bp); } else curproc->p_stats->p_ru.ru_oublock++; /* Initiate disk write. Make sure the appropriate party is charged. */ bp->b_vp->v_numoutput++; splx(s); SET(bp->b_flags, B_WRITEINPROG); VOP_STRATEGY(bp); if (async) return (0); /* * If I/O was synchronous, wait for it to complete. */ rv = biowait(bp); /* Release the buffer. */ brelse(bp); return (rv); }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, int64_t *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; int32_t *bap1 = NULL; int64_t *bap2 = NULL; struct vnode *vp; daddr_t nb, nlbn, last; char *copy = NULL; int64_t blkcount, factor, blocksreleased = 0; int nblocks; int error = 0, allerror = 0; const int needswap = UFS_FSNEEDSWAP(fs); #define RBAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? \ ufs_rw32(bap1[i], needswap) : ufs_rw64(bap2[i], needswap)) #define BAP_ASSIGN(ip, i, value) \ do { \ if ((ip)->i_ump->um_fstype == UFS1) \ bap1[i] = (value); \ else \ bap2[i] = (value); \ } while(0) /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= FFS_NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); error = ffs_getblk(vp, lbn, FFS_NOBLK, fs->fs_bsize, false, &bp); if (error) { *countp = 0; return error; } if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); } else { trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); curlwp->l_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; bp->b_flags &= ~B_COWDONE; /* we change blkno below */ if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); VOP_STRATEGY(vp, bp); error = biowait(bp); if (error == 0) error = fscow_run(bp, true); } if (error) { brelse(bp, 0); *countp = 0; return (error); } if (ip->i_ump->um_fstype == UFS1) bap1 = (int32_t *)bp->b_data; else bap2 = (int64_t *)bp->b_data; if (lastbn >= 0) { copy = kmem_alloc(fs->fs_bsize, KM_SLEEP); memcpy((void *)copy, bp->b_data, (u_int)fs->fs_bsize); for (i = last + 1; i < FFS_NINDIR(fs); i++) BAP_ASSIGN(ip, i, 0); error = bwrite(bp); if (error) allerror = error; if (ip->i_ump->um_fstype == UFS1) bap1 = (int32_t *)copy; else bap2 = (int64_t *)copy; } /* * Recursively free totally unused blocks. */ for (i = FFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = RBAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), (daddr_t)-1, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } if ((ip->i_ump->um_mountp->mnt_wapbl) && ((level > SINGLE) || (ITOV(ip)->v_type != VREG))) { UFS_WAPBL_REGISTER_DEALLOCATION(ip->i_ump->um_mountp, FFS_FSBTODB(fs, nb), fs->fs_bsize); } else ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize, ip->i_number); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = RBAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { kmem_free(copy, fs->fs_bsize); } else { brelse(bp, BC_INVAL); } *countp = blocksreleased; return (allerror); }
/* * Called at interrupt time. Mark the component as done and if all * components are done, take an "interrupt". */ static void ld_ataraid_iodone_raid0(struct buf *vbp) { struct cbuf *cbp = (struct cbuf *) vbp, *other_cbp; struct buf *bp = cbp->cb_obp; struct ld_ataraid_softc *sc = cbp->cb_sc; struct ataraid_array_info *aai = sc->sc_aai; struct ataraid_disk_info *adi; long count; int s, iodone; s = splbio(); iodone = cbp->cb_flags & CBUF_IODONE; other_cbp = cbp->cb_other; if (other_cbp != NULL) /* You are alone */ other_cbp->cb_other = NULL; if (cbp->cb_buf.b_error != 0) { /* * Mark this component broken. */ adi = &aai->aai_disks[cbp->cb_comp]; adi->adi_status &= ~ADI_S_ONLINE; printf("%s: error %d on component %d (%s)\n", device_xname(sc->sc_ld.sc_dv), bp->b_error, cbp->cb_comp, device_xname(adi->adi_dev)); /* * If we didn't see an error yet and we are reading * RAID1 disk, try another component. */ if (bp->b_error == 0 && (cbp->cb_buf.b_flags & B_READ) != 0 && (aai->aai_level & AAI_L_RAID1) != 0 && cbp->cb_comp < aai->aai_width) { cbp->cb_comp += aai->aai_width; adi = &aai->aai_disks[cbp->cb_comp]; if (adi->adi_status & ADI_S_ONLINE) { cbp->cb_buf.b_error = 0; VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); goto out; } } if (iodone || other_cbp != NULL) /* * If I/O on other component successfully done * or the I/O is still in progress, no need * to tell an error to upper layer. */ ; else { bp->b_error = cbp->cb_buf.b_error ? cbp->cb_buf.b_error : EIO; } /* XXX Update component config blocks. */ } else { /* * If other I/O is still in progress, tell it that * our I/O is successfully done. */ if (other_cbp != NULL) other_cbp->cb_flags |= CBUF_IODONE; } count = cbp->cb_buf.b_bcount; buf_destroy(&cbp->cb_buf); CBUF_PUT(cbp); if (other_cbp != NULL) goto out; /* If all done, "interrupt". */ bp->b_resid -= count; if (bp->b_resid < 0) panic("ld_ataraid_iodone_raid0: count"); if (bp->b_resid == 0) lddone(&sc->sc_ld, bp); out: splx(s); }
/* VOP_BWRITE ULFS_NIADDR+2 times */ int lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, int flags, struct buf **bpp) { int offset; daddr_t daddr, idaddr; struct buf *ibp, *bp; struct inode *ip; struct lfs *fs; struct indir indirs[ULFS_NIADDR+2], *idp; daddr_t lbn, lastblock; int bcount; int error, frags, i, nsize, osize, num; ip = VTOI(vp); fs = ip->i_lfs; offset = lfs_blkoff(fs, startoffset); KASSERT(iosize <= lfs_sb_getbsize(fs)); lbn = lfs_lblkno(fs, startoffset); /* (void)lfs_check(vp, lbn, 0); */ ASSERT_MAYBE_SEGLOCK(fs); /* * Three cases: it's a block beyond the end of file, it's a block in * the file that may or may not have been assigned a disk address or * we're writing an entire block. * * Note, if the daddr is UNWRITTEN, the block already exists in * the cache (it was read or written earlier). If so, make sure * we don't count it as a new block or zero out its contents. If * it did not, make sure we allocate any necessary indirect * blocks. * * If we are writing a block beyond the end of the file, we need to * check if the old last block was a fragment. If it was, we need * to rewrite it. */ if (bpp) *bpp = NULL; /* Check for block beyond end of file and fragment extension needed. */ lastblock = lfs_lblkno(fs, ip->i_size); if (lastblock < ULFS_NDADDR && lastblock < lbn) { osize = lfs_blksize(fs, ip, lastblock); if (osize < lfs_sb_getbsize(fs) && osize > 0) { if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), lastblock, (bpp ? &bp : NULL), cred))) return (error); ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); lfs_dino_setsize(fs, ip->i_din, ip->i_size); uvm_vnp_setsize(vp, ip->i_size); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) (void) VOP_BWRITE(bp->b_vp, bp); } } /* * If the block we are writing is a direct block, it's the last * block in the file, and offset + iosize is less than a full * block, we can write one or more fragments. There are two cases: * the block is brand new and we should allocate it the correct * size or it already exists and contains some fragments and * may need to extend it. */ if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { osize = lfs_blksize(fs, ip, lbn); nsize = lfs_fragroundup(fs, offset + iosize); if (lfs_lblktosize(fs, lbn) >= ip->i_size) { /* Brand new block or fragment */ frags = lfs_numfrags(fs, nsize); if (!ISSPACE(fs, frags, cred)) return ENOSPC; if (bpp) { *bpp = bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = UNWRITTEN; if (flags & B_CLRBUF) clrbuf(bp); } ip->i_lfs_effnblks += frags; mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, frags); mutex_exit(&lfs_lock); lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); } else { if (nsize <= osize) { /* No need to extend */ if (bpp && (error = bread(vp, lbn, osize, 0, &bp))) return error; } else { /* Extend existing block */ if ((error = lfs_fragextend(vp, osize, nsize, lbn, (bpp ? &bp : NULL), cred))) return error; } if (bpp) *bpp = bp; } return 0; } error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); if (error) return (error); KASSERT(daddr <= LFS_MAX_DADDR(fs)); /* * Do byte accounting all at once, so we can gracefully fail *before* * we start assigning blocks. */ frags = fs->um_seqinc; bcount = 0; if (daddr == UNASSIGNED) { bcount = frags; } for (i = 1; i < num; ++i) { if (!indirs[i].in_exists) { bcount += frags; } } if (ISSPACE(fs, bcount, cred)) { mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, bcount); mutex_exit(&lfs_lock); ip->i_lfs_effnblks += bcount; } else { return ENOSPC; } if (daddr == UNASSIGNED) { if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); } /* * Create new indirect blocks if necessary */ if (num > 1) { idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); for (i = 1; i < num; ++i) { ibp = getblk(vp, indirs[i].in_lbn, lfs_sb_getbsize(fs), 0,0); if (!indirs[i].in_exists) { clrbuf(ibp); ibp->b_blkno = UNWRITTEN; } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { ibp->b_blkno = LFS_FSBTODB(fs, idaddr); ibp->b_flags |= B_READ; VOP_STRATEGY(vp, ibp); biowait(ibp); } /* * This block exists, but the next one may not. * If that is the case mark it UNWRITTEN to keep * the accounting straight. */ /* XXX ondisk32 */ if (((int32_t *)ibp->b_data)[indirs[i].in_off] == 0) ((int32_t *)ibp->b_data)[indirs[i].in_off] = UNWRITTEN; /* XXX ondisk32 */ idaddr = ((int32_t *)ibp->b_data)[indirs[i].in_off]; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, indirs[i].in_lbn, ibp->b_flags, curproc->p_pid); } #endif if ((error = VOP_BWRITE(ibp->b_vp, ibp))) return error; } } } /* * Get the existing block from the cache, if requested. */ if (bpp) *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); /* * Do accounting on blocks that represent pages. */ if (!bpp) lfs_register_block(vp, lbn); /* * The block we are writing may be a brand new block * in which case we need to do accounting. * * We can tell a truly new block because ulfs_bmaparray will say * it is UNASSIGNED. Once we allocate it we will assign it the * disk address UNWRITTEN. */ if (daddr == UNASSIGNED) { if (bpp) { if (flags & B_CLRBUF) clrbuf(bp); /* Note the new address */ bp->b_blkno = UNWRITTEN; } switch (num) { case 0: lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); break; case 1: lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); break; default: idp = &indirs[num - 1]; if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), B_MODIFY, &ibp)) panic("lfs_balloc: bread bno %lld", (long long)idp->in_lbn); /* XXX ondisk32 */ ((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, idp->in_lbn, ibp->b_flags, curproc->p_pid); } #endif VOP_BWRITE(ibp->b_vp, ibp); } } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * Not a brand new block, also not in the cache; * read it in from disk. */ if (iosize == lfs_sb_getbsize(fs)) /* Optimization: I/O is unnecessary. */ bp->b_blkno = daddr; else { /* * We need to read the block to preserve the * existing bytes. */ bp->b_blkno = daddr; bp->b_flags |= B_READ; VOP_STRATEGY(vp, bp); return (biowait(bp)); } } return (0); }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(struct vnode *vp, daddr64_t bn, daddr64_t *bnp, struct indir *ap, int *nump, int *runp) { struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; daddr64_t daddr, metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { /* * XXX * If MAXBSIZE is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; if (!nump) nump = # if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, DIP(ip, db[bn])); if (*bnp == 0) *bnp = -1; else if (runp) for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, DIP(ip, db[bn - 1]), DIP(ip, db[bn])); ++bn, ++*runp); return (0); } /* Get disk address out of indirect block array */ daddr = DIP(ip, ib[xap->in_off]); devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) brelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if (bp->b_flags & (B_DONE | B_DELWRI)) { ; } #ifdef DIAGNOSTIC else if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif else { bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp); curproc->p_ru.ru_inblock++; /* XXX */ if ((error = biowait(bp)) != 0) { brelse(bp); return (error); } } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) { daddr = ((int64_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int64_t *)bp->b_data)[bn - 1], ((int64_t *)bp->b_data)[bn]); ++bn, ++*runp); continue; } #endif /* FFS2 */ daddr = ((int32_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int32_t *)bp->b_data)[bn - 1], ((int32_t *)bp->b_data)[bn]); ++bn, ++*runp); } if (bp) brelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }
static int ld_ataraid_start_span(struct ld_softc *ld, struct buf *bp) { struct ld_ataraid_softc *sc = (void *) ld; struct ataraid_array_info *aai = sc->sc_aai; struct ataraid_disk_info *adi; SIMPLEQ_HEAD(, cbuf) cbufq; struct cbuf *cbp; char *addr; daddr_t bn; long bcount, rcount; u_int comp; /* Allocate component buffers. */ SIMPLEQ_INIT(&cbufq); addr = bp->b_data; /* Find the first component. */ comp = 0; adi = &aai->aai_disks[comp]; bn = bp->b_rawblkno; while (bn >= adi->adi_compsize) { bn -= adi->adi_compsize; adi = &aai->aai_disks[++comp]; } bp->b_resid = bp->b_bcount; for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { rcount = bp->b_bcount; if ((adi->adi_compsize - bn) < btodb(rcount)) rcount = dbtob(adi->adi_compsize - bn); cbp = ld_ataraid_make_cbuf(sc, bp, comp, bn, addr, rcount); if (cbp == NULL) { /* Free the already allocated component buffers. */ while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); buf_destroy(&cbp->cb_buf); CBUF_PUT(cbp); } return (EAGAIN); } /* * For a span, we always know we advance to the next disk, * and always start at offset 0 on that disk. */ adi = &aai->aai_disks[++comp]; bn = 0; SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); addr += rcount; } /* Now fire off the requests. */ while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); if ((cbp->cb_buf.b_flags & B_READ) == 0) { mutex_enter(&cbp->cb_buf.b_vp->v_interlock); cbp->cb_buf.b_vp->v_numoutput++; mutex_exit(&cbp->cb_buf.b_vp->v_interlock); } VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); } return (0); }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ int ffs_indirtrunc(struct inode *ip, daddr64_t lbn, daddr64_t dbn, daddr64_t lastbn, int level, long *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; struct vnode *vp; void *copy = NULL; daddr64_t nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; int32_t *bap1 = NULL; #ifdef FFS2 int64_t *bap2 = NULL; #endif /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); if (!(bp->b_flags & (B_DONE | B_DELWRI))) { curproc->p_ru.ru_inblock++; /* pay for read */ bcstats.pendingreads++; bcstats.numreads++; bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; VOP_STRATEGY(bp); error = biowait(bp); } if (error) { brelse(bp); *countp = 0; return (error); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) bap2 = (int64_t *)bp->b_data; else #endif bap1 = (int32_t *)bp->b_data; if (lastbn != -1) { copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy(bp->b_data, copy, (u_int) fs->fs_bsize); for (i = last + 1; i < NINDIR(fs); i++) BAP_ASSIGN(ip, i, 0); if (!DOINGASYNC(vp)) { error = bwrite(bp); if (error) allerror = error; } else { bawrite(bp); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) bap2 = (int64_t *)copy; else #endif bap1 = (int32_t *)copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = BAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (daddr64_t)-1, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = BAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { free(copy, M_TEMP); } else { bp->b_flags |= B_INVAL; brelse(bp); } *countp = blocksreleased; return (allerror); }
/* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int ufs_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap = v; struct buf *bp; struct vnode *vp; struct inode *ip; struct mount *mp; int error; bp = ap->a_bp; vp = ap->a_vp; ip = VTOI(vp); if (vp->v_type == VBLK || vp->v_type == VCHR) panic("ufs_strategy: spec"); KASSERT(bp->b_bcount != 0); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); if (error) { bp->b_error = error; biodone(bp); return (error); } if (bp->b_blkno == -1) /* no valid data */ clrbuf(bp); } if (bp->b_blkno < 0) { /* block is not on disk */ biodone(bp); return (0); } vp = ip->i_devvp; error = VOP_STRATEGY(vp, bp); if (error) return error; if (!BUF_ISREAD(bp)) return 0; mp = wapbl_vptomp(vp); if (mp == NULL || mp->mnt_wapbl_replay == NULL || !WAPBL_REPLAY_ISOPEN(mp) || !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount)) return 0; error = biowait(bp); if (error) return error; error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount); if (error) { mutex_enter(&bufcache_lock); SET(bp->b_cflags, BC_INVAL); mutex_exit(&bufcache_lock); } return error; }
static int ld_ataraid_start_raid0(struct ld_softc *ld, struct buf *bp) { struct ld_ataraid_softc *sc = (void *) ld; struct ataraid_array_info *aai = sc->sc_aai; struct ataraid_disk_info *adi; SIMPLEQ_HEAD(, cbuf) cbufq; struct cbuf *cbp, *other_cbp; char *addr; daddr_t bn, cbn, tbn, off; long bcount, rcount; u_int comp; const int read = bp->b_flags & B_READ; const int mirror = aai->aai_level & AAI_L_RAID1; int error; /* Allocate component buffers. */ SIMPLEQ_INIT(&cbufq); addr = bp->b_data; bn = bp->b_rawblkno; bp->b_resid = bp->b_bcount; for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { tbn = bn / aai->aai_interleave; off = bn % aai->aai_interleave; if (__predict_false(tbn == aai->aai_capacity / aai->aai_interleave)) { /* Last stripe. */ daddr_t sz = (aai->aai_capacity - (tbn * aai->aai_interleave)) / aai->aai_width; comp = off / sz; cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + (off % sz); rcount = min(bcount, dbtob(sz)); } else { comp = tbn % aai->aai_width; cbn = ((tbn / aai->aai_width) * aai->aai_interleave) + off; rcount = min(bcount, dbtob(aai->aai_interleave - off)); } /* * See if a component is valid. */ try_mirror: adi = &aai->aai_disks[comp]; if ((adi->adi_status & ADI_S_ONLINE) == 0) { if (mirror && comp < aai->aai_width) { comp += aai->aai_width; goto try_mirror; } /* * No component available. */ error = EIO; goto free_and_exit; } cbp = ld_ataraid_make_cbuf(sc, bp, comp, cbn, addr, rcount); if (cbp == NULL) { resource_shortage: error = EAGAIN; free_and_exit: /* Free the already allocated component buffers. */ while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); buf_destroy(&cbp->cb_buf); CBUF_PUT(cbp); } return (error); } SIMPLEQ_INSERT_TAIL(&cbufq, cbp, cb_q); if (mirror && !read && comp < aai->aai_width) { comp += aai->aai_width; adi = &aai->aai_disks[comp]; if (adi->adi_status & ADI_S_ONLINE) { other_cbp = ld_ataraid_make_cbuf(sc, bp, comp, cbn, addr, rcount); if (other_cbp == NULL) goto resource_shortage; SIMPLEQ_INSERT_TAIL(&cbufq, other_cbp, cb_q); other_cbp->cb_other = cbp; cbp->cb_other = other_cbp; } } bn += btodb(rcount); addr += rcount; } /* Now fire off the requests. */ while ((cbp = SIMPLEQ_FIRST(&cbufq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&cbufq, cb_q); if ((cbp->cb_buf.b_flags & B_READ) == 0) { mutex_enter(&cbp->cb_buf.b_vp->v_interlock); cbp->cb_buf.b_vp->v_numoutput++; mutex_exit(&cbp->cb_buf.b_vp->v_interlock); } VOP_STRATEGY(cbp->cb_buf.b_vp, &cbp->cb_buf); } return (0); }