static int nvtruncbuf_bp_trunc(struct buf *bp, void *data) { struct truncbuf_info *info = data; /* * Do not try to use a buffer we cannot immediately lock, * but sleep anyway to prevent a livelock. The code will * loop until all buffers can be acted upon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { atomic_add_int(&bp->b_refs, 1); if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0) BUF_UNLOCK(bp); atomic_subtract_int(&bp->b_refs, 1); } else if ((info->clean && (bp->b_flags & B_DELWRI)) || (info->clean == 0 && (bp->b_flags & B_DELWRI) == 0) || bp->b_vp != info->vp || nvtruncbuf_bp_trunc_cmp(bp, data)) { BUF_UNLOCK(bp); } else { bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF | B_NOCACHE); brelse(bp); } lwkt_yield(); return(1); }
/* * Release a buffer, with no I/O implied. */ void brelse(struct buf *bp) { ASSERT(ISSET(bp->b_flags, B_BUSY)); DPRINTF(VFSDB_BIO, ("brelse: bp=%p dev=%llx blkno=%d\n", bp, (long long)bp->b_dev, bp->b_blkno)); BIO_LOCK(); CLR(bp->b_flags, B_BUSY); BUF_UNLOCK(bp); if (ISSET(bp->b_flags, B_INVAL)) bio_insert_head(bp); else bio_insert_tail(bp); BIO_UNLOCK(); }
/* * Invalidate all buffers. * This is called when unmount. */ void bio_sync(void) { struct buf *bp; int i; start: BIO_LOCK(); for (i = 0; i < NBUFS; i++) { bp = &buf_table[i]; if (ISSET(bp->b_flags, B_BUSY)) { BIO_UNLOCK(); BUF_LOCK(bp); BUF_UNLOCK(bp); goto start; } if (ISSET(bp->b_flags, B_DELWRI)) bwrite(bp); } BIO_UNLOCK(); }
int XFS_bwrite(xfs_buf_t *bp) { int error; if (bp->b_vp == NULL) { error = xfs_buf_iorequest(bp); if ((bp->b_flags & B_ASYNC) == 0) { error = bufwait(bp); #if 0 if (BUF_LOCKRECURSED(bp)) BUF_UNLOCK(bp); else brelse(bp); #endif brelse(bp); } return (error); } error = bwrite(bp); return (error); }
/* * Assign a buffer for the given block. * * The block is selected from the buffer list with LRU * algorithm. If the appropriate block already exists in the * block list, return it. Otherwise, the least recently used * block is used. */ struct buf * getblk(dev_t dev, int blkno) { struct buf *bp; DPRINTF(VFSDB_BIO, ("getblk: dev=%llx blkno=%d\n", (long long)dev, blkno)); start: BIO_LOCK(); bp = incore(dev, blkno); if (bp != NULL) { /* Block found in cache. */ if (ISSET(bp->b_flags, B_BUSY)) { /* * Wait buffer ready. */ BIO_UNLOCK(); BUF_LOCK(bp); BUF_UNLOCK(bp); /* Scan again if it's busy */ goto start; } bio_remove(bp); SET(bp->b_flags, B_BUSY); } else { bp = bio_remove_head(); if (ISSET(bp->b_flags, B_DELWRI)) { BIO_UNLOCK(); bwrite(bp); goto start; } bp->b_flags = B_BUSY; bp->b_dev = dev; bp->b_blkno = blkno; } BUF_LOCK(bp); BIO_UNLOCK(); DPRINTF(VFSDB_BIO, ("getblk: done bp=%p\n", bp)); return bp; }
/* I/O on subdisk completed */ void sdio_done(struct bio *bio) { struct sdbuf *sbp; get_mplock(); sbp = (struct sdbuf *) bio->bio_buf; if (sbp->b.b_flags & B_ERROR) { /* had an error */ sbp->bio->bio_buf->b_flags |= B_ERROR; /* propagate upwards */ sbp->bio->bio_buf->b_error = sbp->b.b_error; } #ifdef VINUMDEBUG if (debug & DEBUG_LASTREQS) logrq(loginfo_sdiodone, (union rqinfou)bio, bio); #endif sbp->bio->bio_buf->b_resid = sbp->b.b_resid; /* copy the resid field */ /* Now update the statistics */ if (sbp->b.b_cmd == BUF_CMD_READ) { /* read operation */ DRIVE[sbp->driveno].reads++; DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount; SD[sbp->sdno].reads++; SD[sbp->sdno].bytes_read += sbp->b.b_bcount; } else { /* write operation */ DRIVE[sbp->driveno].writes++; DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount; SD[sbp->sdno].writes++; SD[sbp->sdno].bytes_written += sbp->b.b_bcount; } biodone_sync(bio); biodone(sbp->bio); /* complete the caller's I/O */ BUF_UNLOCK(&sbp->b); uninitbufbio(&sbp->b); Free(sbp); rel_mplock(); }
/* Free a request block and anything hanging off it */ void freerq(struct request *rq) { struct rqgroup *rqg; struct rqgroup *nrqg; /* next in chain */ int rqno; for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */ if (rqg->lock) /* got a lock? */ unlockrange(rqg->plexno, rqg->lock); /* yes, free it */ for (rqno = 0; rqno < rqg->count; rqno++) { if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */ &&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */ Free(rqg->rqe[rqno].b.b_data); /* free it */ if (rqg->rqe[rqno].flags & XFR_BUFLOCKED) { /* locked this buffer, */ BUF_UNLOCK(&rqg->rqe[rqno].b); /* unlock it again */ uninitbufbio(&rqg->rqe[rqno].b); } } nrqg = rqg->next; /* note the next one */ Free(rqg); /* and free this one */ } Free(rq); /* free the request itself */ }
/* * Read data to a buf, including read-ahead if we find this to be beneficial. * cluster_read replaces bread. */ int cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size, struct ucred *cred, long totread, int seqcount, int gbflags, struct buf **bpp) { struct buf *bp, *rbp, *reqbp; struct bufobj *bo; daddr_t blkno, origblkno; int maxra, racluster; int error, ncontig; int i; error = 0; bo = &vp->v_bufobj; if (!unmapped_buf_allowed) gbflags &= ~GB_UNMAPPED; /* * Try to limit the amount of read-ahead by a few * ad-hoc parameters. This needs work!!! */ racluster = vp->v_mount->mnt_iosize_max / size; maxra = seqcount; maxra = min(read_max, maxra); maxra = min(nbuf/8, maxra); if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize) maxra = (filesize / size) - lblkno; /* * get the requested block */ *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags); if (bp == NULL) return (EBUSY); origblkno = lblkno; /* * if it is in the cache, then check to see if the reads have been * sequential. If they have, then try some read-ahead, otherwise * back-off on prospective read-aheads. */ if (bp->b_flags & B_CACHE) { if (!seqcount) { return 0; } else if ((bp->b_flags & B_RAM) == 0) { return 0; } else { bp->b_flags &= ~B_RAM; BO_RLOCK(bo); for (i = 1; i < maxra; i++) { /* * Stop if the buffer does not exist or it * is invalid (about to go away?) */ rbp = gbincore(&vp->v_bufobj, lblkno+i); if (rbp == NULL || (rbp->b_flags & B_INVAL)) break; /* * Set another read-ahead mark so we know * to check again. (If we can lock the * buffer without waiting) */ if ((((i % racluster) == (racluster - 1)) || (i == (maxra - 1))) && (0 == BUF_LOCK(rbp, LK_EXCLUSIVE | LK_NOWAIT, NULL))) { rbp->b_flags |= B_RAM; BUF_UNLOCK(rbp); } } BO_RUNLOCK(bo); if (i >= maxra) { return 0; } lblkno += i; } reqbp = bp = NULL; /* * If it isn't in the cache, then get a chunk from * disk if sequential, otherwise just get the block. */ } else { off_t firstread = bp->b_offset; int nblks; long minread; KASSERT(bp->b_offset != NOOFFSET, ("cluster_read: no buffer offset")); ncontig = 0; /* * Adjust totread if needed */ minread = read_min * size; if (minread > totread) totread = minread; /* * Compute the total number of blocks that we should read * synchronously. */ if (firstread + totread > filesize) totread = filesize - firstread; nblks = howmany(totread, size); if (nblks > racluster) nblks = racluster; /* * Now compute the number of contiguous blocks. */ if (nblks > 1) { error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL); /* * If this failed to map just do the original block. */ if (error || blkno == -1) ncontig = 0; } /* * If we have contiguous data available do a cluster * otherwise just read the requested block. */ if (ncontig) { /* Account for our first block. */ ncontig = min(ncontig + 1, nblks); if (ncontig < nblks) nblks = ncontig; bp = cluster_rbuild(vp, filesize, lblkno, blkno, size, nblks, gbflags, bp); lblkno += (bp->b_bufsize / size); } else { bp->b_flags |= B_RAM; bp->b_iocmd = BIO_READ; lblkno += 1; } } /* * handle the synchronous read so that it is available ASAP. */ if (bp) { if ((bp->b_flags & B_CLUSTER) == 0) { vfs_busy_pages(bp, 0); } bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL) BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); racct_add_buf(curproc, bp, 0); PROC_UNLOCK(curproc); } #endif /* RACCT */ curthread->td_ru.ru_inblock++; } /* * If we have been doing sequential I/O, then do some read-ahead. */ while (lblkno < (origblkno + maxra)) { error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL); if (error) break; if (blkno == -1) break; /* * We could throttle ncontig here by maxra but we might as * well read the data if it is contiguous. We're throttled * by racluster anyway. */ if (ncontig) { ncontig = min(ncontig + 1, racluster); rbp = cluster_rbuild(vp, filesize, lblkno, blkno, size, ncontig, gbflags, NULL); lblkno += (rbp->b_bufsize / size); if (rbp->b_flags & B_DELWRI) { bqrelse(rbp); continue; } } else { rbp = getblk(vp, lblkno, size, 0, 0, gbflags); lblkno += 1; if (rbp->b_flags & B_DELWRI) { bqrelse(rbp); continue; } rbp->b_flags |= B_ASYNC | B_RAM; rbp->b_iocmd = BIO_READ; rbp->b_blkno = blkno; } if (rbp->b_flags & B_CACHE) { rbp->b_flags &= ~B_ASYNC; bqrelse(rbp); continue; } if ((rbp->b_flags & B_CLUSTER) == 0) { vfs_busy_pages(rbp, 0); } rbp->b_flags &= ~B_INVAL; rbp->b_ioflags &= ~BIO_ERROR; if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL) BUF_KERNPROC(rbp); rbp->b_iooffset = dbtob(rbp->b_blkno); bstrategy(rbp); #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); racct_add_buf(curproc, rbp, 0); PROC_UNLOCK(curproc); } #endif /* RACCT */ curthread->td_ru.ru_inblock++; } if (reqbp) { /* * Like bread, always brelse() the buffer when * returning an error. */ error = bufwait(reqbp); if (error != 0) { brelse(reqbp); *bpp = NULL; } } return (error); }
/* * Chunked up transfer completion routine - chain transfers until done * * NOTE: MPSAFE callback. */ static void devfs_spec_strategy_done(struct bio *nbio) { struct buf *nbp = nbio->bio_buf; struct bio *bio = nbio->bio_caller_info1.ptr; /* original bio */ struct buf *bp = bio->bio_buf; /* original bp */ int chunksize = nbio->bio_caller_info2.index; /* chunking */ int boffset = nbp->b_data - bp->b_data; if (nbp->b_flags & B_ERROR) { /* * An error terminates the chain, propogate the error back * to the original bp */ bp->b_flags |= B_ERROR; bp->b_error = nbp->b_error; bp->b_resid = bp->b_bcount - boffset + (nbp->b_bcount - nbp->b_resid); #if SPEC_CHAIN_DEBUG & 1 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p error %d bcount %d/%d\n", bp, bp->b_error, bp->b_bcount, bp->b_bcount - bp->b_resid); #endif } else if (nbp->b_resid) { /* * A short read or write terminates the chain */ bp->b_error = nbp->b_error; bp->b_resid = bp->b_bcount - boffset + (nbp->b_bcount - nbp->b_resid); #if SPEC_CHAIN_DEBUG & 1 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(1) " "bcount %d/%d\n", bp, bp->b_bcount - bp->b_resid, bp->b_bcount); #endif } else if (nbp->b_bcount != nbp->b_bufsize) { /* * A short read or write can also occur by truncating b_bcount */ #if SPEC_CHAIN_DEBUG & 1 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(2) " "bcount %d/%d\n", bp, nbp->b_bcount + boffset, bp->b_bcount); #endif bp->b_error = 0; bp->b_bcount = nbp->b_bcount + boffset; bp->b_resid = nbp->b_resid; } else if (nbp->b_bcount + boffset == bp->b_bcount) { /* * No more data terminates the chain */ #if SPEC_CHAIN_DEBUG & 1 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p finished bcount %d\n", bp, bp->b_bcount); #endif bp->b_error = 0; bp->b_resid = 0; } else { /* * Continue the chain */ boffset += nbp->b_bcount; nbp->b_data = bp->b_data + boffset; nbp->b_bcount = bp->b_bcount - boffset; if (nbp->b_bcount > chunksize) nbp->b_bcount = chunksize; nbp->b_bio1.bio_done = devfs_spec_strategy_done; nbp->b_bio1.bio_offset = bio->bio_offset + boffset; #if SPEC_CHAIN_DEBUG & 1 devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n", bp, boffset, bp->b_bcount, nbp->b_bcount); #endif dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1); return; } /* * Fall through to here on termination. biodone(bp) and * clean up and free nbp. */ biodone(bio); BUF_UNLOCK(nbp); uninitbufbio(nbp); kfree(nbp, M_DEVBUF); }