/* * XXX - vop_strategy must be hand coded because it has no * YYY - and it is not coherent with anything * * vnode in its arguments. * This goes away with a merged VM/buffer cache. * * union_strategy(struct vnode *a_vp, struct bio *a_bio) */ static int union_strategy(struct vop_strategy_args *ap) { struct bio *bio = ap->a_bio; struct buf *bp = bio->bio_buf; struct vnode *othervp = OTHERVP(ap->a_vp); #ifdef DIAGNOSTIC if (othervp == NULLVP) panic("union_strategy: nil vp"); if (bp->b_cmd != BUF_CMD_READ && (othervp == LOWERVP(ap->a_vp))) panic("union_strategy: writing to lowervp"); #endif return (vn_strategy(othervp, bio)); }
/* * Do IO operation, called from dmstrategy routine. */ static int dm_target_linear_strategy(dm_table_entry_t * table_en, struct buf * bp) { dm_target_linear_config_t *tlc; tlc = table_en->target_config; /* printf("Linear target read function called %" PRIu64 "!!\n", tlc->offset);*/ #if 0 bp->b_blkno += tlc->offset; #endif bp->b_bio1.bio_offset += tlc->offset * DEV_BSIZE; vn_strategy(tlc->pdev->pdev_vnode, &bp->b_bio1); return 0; }
/* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. * * In order to be able to swap to a file, the VOP_BMAP operation may not * deadlock on memory. See hpfs_bmap() for details. XXXXXXX (not impl) * * hpfs_strategy(struct vnode *a_vp, struct bio *a_bio) */ int hpfs_strategy(struct vop_strategy_args *ap) { struct bio *bio = ap->a_bio; struct bio *nbio; struct buf *bp = bio->bio_buf; struct vnode *vp = ap->a_vp; struct hpfsnode *hp; int error; dprintf(("hpfs_strategy(): \n")); if (vp->v_type == VBLK || vp->v_type == VCHR) panic("hpfs_strategy: spec"); nbio = push_bio(bio); if (nbio->bio_offset == NOOFFSET) { error = VOP_BMAP(vp, bio->bio_offset, &nbio->bio_offset, NULL, NULL, bp->b_cmd); if (error) { kprintf("hpfs_strategy: VOP_BMAP FAILED %d\n", error); bp->b_error = error; bp->b_flags |= B_ERROR; /* I/O was never started on nbio, must biodone(bio) */ biodone(bio); return (error); } if (nbio->bio_offset == NOOFFSET) vfs_bio_clrbuf(bp); } if (nbio->bio_offset == NOOFFSET) { /* I/O was never started on nbio, must biodone(bio) */ biodone(bio); return (0); } hp = VTOHP(ap->a_vp); vn_strategy(hp->h_devvp, nbio); return (0); }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ffs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, ufs_daddr_t dbn, ufs_daddr_t lastbn, int level, long *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; ufs_daddr_t *bap; struct vnode *vp; ufs_daddr_t *copy = NULL, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the bio_offset field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags &= ~(B_ERROR|B_INVAL); bp->b_cmd = BUF_CMD_READ; if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); /* * BIO is bio2 which chains back to bio1. We wait * on bio1. */ bp->b_bio2.bio_offset = dbtodoff(fs, dbn); bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; vfs_busy_pages(vp, bp); /* * Access the block device layer using the device vnode * and the translated block number (bio2) instead of the * file vnode (vp) and logical block number (bio1). * * Even though we are bypassing the vnode layer, we still * want the vnode state to indicate that an I/O on its behalf * is in progress. */ bio_start_transaction(&bp->b_bio1, &vp->v_track_read); vn_strategy(ip->i_devvp, &bp->b_bio2); error = biowait(&bp->b_bio1, "biord"); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (ufs_daddr_t *)bp->b_data; if (lastbn != -1) { copy = kmalloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (uint)fs->fs_bsize); bzero((caddr_t)&bap[last + 1], (uint)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t)); if (DOINGASYNC(vp)) { bawrite(bp); } else { error = bwrite(bp); if (error) allerror = error; } bap = copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (ufs_daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { kfree(copy, M_TEMP); } else { bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } *countp = blocksreleased; return (allerror); }
static int ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t loffset, size_t len, struct buf *bp) { int error; int iolen; int blockoff; int bsize; struct vnode *dp; int bforwards; bsize = vp->v_mount->mnt_stat.f_iosize; /* * Make sure it fits into the pbuf */ iolen = (int)(intptr_t)udata & PAGE_MASK; if (len + iolen > bp->b_kvasize) { len = bp->b_kvasize; if (iolen != 0) len -= PAGE_SIZE; } /* * Raw disk address is in bio2, but we wait for it to * chain to bio1. */ bp->b_flags &= ~B_ERROR; bp->b_loffset = loffset; bp->b_bio2.bio_offset = NOOFFSET; bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; blockoff = (loffset % bsize) / DEV_BSIZE; error = VOP_BMAP(vp, bp->b_loffset, &bp->b_bio2.bio_offset, &bforwards, NULL, BUF_CMD_READ); if (error != 0) return error; dp = VTOI(vp)->i_devvp; if (bp->b_bio2.bio_offset == NOOFFSET) { /* * Fill holes with NULs to preserve semantics */ if (len + blockoff * DEV_BSIZE > bsize) len = bsize - blockoff * DEV_BSIZE; if (vmapbuf(bp, udata, len) < 0) return EFAULT; lwkt_user_yield(); bzero(bp->b_data, bp->b_bcount); /* Mark operation completed (similar to bufdone()) */ bp->b_resid = 0; return 0; } if (len + blockoff * DEV_BSIZE > bforwards) len = bforwards - blockoff * DEV_BSIZE; bp->b_bio2.bio_offset += blockoff * DEV_BSIZE; if (vmapbuf(bp, udata, len) < 0) return EFAULT; /* * Access the block device layer using the device vnode (dp) and * the translated block number (bio2) instead of the logical block * number (bio1). * * Even though we are bypassing the vnode layer, we still * want the vnode state to indicate that an I/O on its behalf * is in progress. */ bp->b_cmd = BUF_CMD_READ; bio_start_transaction(&bp->b_bio1, &vp->v_track_read); vn_strategy(dp, &bp->b_bio2); return 0; }
static int ext2_indirtrunc(struct inode *ip, daddr_t lbn, off_t doffset, daddr_t lastbn, int level, long *countp) { int i; struct buf *bp; struct ext2_sb_info *fs = ip->i_e2fs; daddr_t *bap; struct vnode *vp; daddr_t *copy, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->s_blocksize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the bio_offset field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->s_blocksize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags &= ~(B_ERROR | B_INVAL); bp->b_cmd = BUF_CMD_READ; if (bp->b_bcount > bp->b_bufsize) panic("ext2_indirtrunc: bad buffer size"); bp->b_bio2.bio_offset = doffset; bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; vfs_busy_pages(bp->b_vp, bp); vn_strategy(vp, &bp->b_bio1); error = biowait(&bp->b_bio1, "biord"); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (daddr_t *)bp->b_data; MALLOC(copy, daddr_t *, fs->s_blocksize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize); bzero((caddr_t)&bap[last + 1], (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); if (last == -1) bp->b_flags |= B_INVAL; error = bwrite(bp); if (error) allerror = error; bap = copy; /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if ((error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb), (daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ext2_blkfree(ip, nb, fs->s_blocksize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } FREE(copy, M_TEMP); *countp = blocksreleased; return (allerror); }
/* * Strategy routine called from dm_strategy. */ static int dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp) { dm_target_stripe_config_t *tsc; struct bio *bio = &bp->b_bio1; struct buf *nestbuf; uint64_t blkno, blkoff; uint64_t stripe, blknr; uint32_t stripe_off, stripe_rest, num_blks, issue_blks; int devnr; tsc = table_en->target_config; if (tsc == NULL) return 0; /* calculate extent of request */ KKASSERT(bp->b_resid % DEV_BSIZE == 0); switch(bp->b_cmd) { case BUF_CMD_READ: case BUF_CMD_WRITE: case BUF_CMD_FREEBLKS: /* * Loop through to individual operations */ blkno = bp->b_bio1.bio_offset / DEV_BSIZE; blkoff = 0; num_blks = bp->b_resid / DEV_BSIZE; nestiobuf_init(bio); while (num_blks > 0) { /* blockno to strip piece nr */ stripe = blkno / tsc->stripe_chunksize; stripe_off = blkno % tsc->stripe_chunksize; /* where we are inside the strip */ devnr = stripe % tsc->stripe_num; blknr = stripe / tsc->stripe_num; /* how much is left before we hit a boundary */ stripe_rest = tsc->stripe_chunksize - stripe_off; /* issue this piece on stripe `stripe' */ issue_blks = MIN(stripe_rest, num_blks); nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, blkoff, issue_blks * DEV_BSIZE, NULL); /* I need number of bytes. */ nestbuf->b_bio1.bio_offset = blknr * tsc->stripe_chunksize + stripe_off; nestbuf->b_bio1.bio_offset += tsc->stripe_devs[devnr].offset; nestbuf->b_bio1.bio_offset *= DEV_BSIZE; vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode, &nestbuf->b_bio1); blkno += issue_blks; blkoff += issue_blks * DEV_BSIZE; num_blks -= issue_blks; } nestiobuf_start(bio); break; case BUF_CMD_FLUSH: nestiobuf_init(bio); for (devnr = 0; devnr < tsc->stripe_num; ++devnr) { nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, 0, 0, NULL); nestbuf->b_bio1.bio_offset = 0; vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode, &nestbuf->b_bio1); } nestiobuf_start(bio); break; default: bp->b_flags |= B_ERROR; bp->b_error = EIO; biodone(bio); break; } return 0; }
/* Start the second phase of a RAID-4 or RAID-5 group write operation. */ void complete_raid5_write(struct rqelement *rqe) { int *sdata; /* source */ int *pdata; /* and parity block data */ int length; /* and count involved */ int count; /* loop counter */ int rqno; /* request index */ int rqoffset; /* offset of request data from parity data */ struct bio *ubio; /* user buffer header */ struct request *rq; /* pointer to our request */ struct rqgroup *rqg; /* and to the request group */ struct rqelement *prqe; /* point to the parity block */ struct drive *drive; /* drive to access */ rqg = rqe->rqg; /* and to our request group */ rq = rqg->rq; /* point to our request */ ubio = rq->bio; /* user's buffer header */ prqe = &rqg->rqe[0]; /* point to the parity block */ /* * If we get to this function, we have normal or * degraded writes, or a combination of both. We do * the same thing in each case: we perform an * exclusive or to the parity block. The only * difference is the origin of the data and the * address range. */ if (rqe->flags & XFR_DEGRADED_WRITE) { /* do the degraded write stuff */ pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */ bzero(pdata, prqe->grouplen << DEV_BSHIFT); /* start with nothing in the parity block */ /* Now get what data we need from each block */ for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */ rqe = &rqg->rqe[rqno]; /* this request */ sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */ length = rqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */ /* * Add the data block to the parity block. Before * we started the request, we zeroed the parity * block, so the result of adding all the other * blocks and the block we want to write will be * the correct parity block. */ for (count = 0; count < length; count++) pdata[count] ^= sdata[count]; if ((rqe->flags & XFR_MALLOCED) /* the buffer was malloced, */ &&((rqg->flags & XFR_NORMAL_WRITE) == 0)) { /* and we have no normal write, */ Free(rqe->b.b_data); /* free it now */ rqe->flags &= ~XFR_MALLOCED; } } } if (rqg->flags & XFR_NORMAL_WRITE) { /* do normal write stuff */ /* Get what data we need from each block */ for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */ rqe = &rqg->rqe[rqno]; /* this request */ if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE)) == (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) { /* good data block to write */ sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */ rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */ pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */ length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */ /* * "remove" the old data block * from the parity block */ if ((pdata < ((int *) prqe->b.b_data)) || (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount))) || (sdata < ((int *) rqe->b.b_data)) || (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount)))) panic("complete_raid5_write: bounds overflow"); for (count = 0; count < length; count++) pdata[count] ^= sdata[count]; /* "add" the new data block */ sdata = (int *) (&ubio->bio_buf->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */ if ((sdata < ((int *) ubio->bio_buf->b_data)) || (&sdata[length] > ((int *) (ubio->bio_buf->b_data + ubio->bio_buf->b_bcount)))) panic("complete_raid5_write: bounds overflow"); for (count = 0; count < length; count++) pdata[count] ^= sdata[count]; /* Free the malloced buffer */ if (rqe->flags & XFR_MALLOCED) { /* the buffer was malloced, */ Free(rqe->b.b_data); /* free it */ rqe->flags &= ~XFR_MALLOCED; } else panic("complete_raid5_write: malloc conflict"); if ((rqe->b.b_cmd == BUF_CMD_READ) /* this was a read */ &&((rqe->flags & XFR_BAD_SUBDISK) == 0)) { /* and we can write this block */ rqe->b.b_cmd = BUF_CMD_WRITE; /* we're writing now */ rqe->b.b_bio1.bio_done = complete_rqe; /* by calling us here */ rqe->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */ rqe->b.b_data = &ubio->bio_buf->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */ rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */ rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */ rqe->b.b_bio1.bio_offset += (off_t)rqe->dataoffset << DEV_BSHIFT; /* point to the correct block */ drive = &DRIVE[rqe->driveno]; /* drive to access */ rqe->b.b_bio1.bio_driver_info = drive->dev; rqg->active++; /* another active request */ /* We can't sleep here, so we just increment the counters. */ drive->active++; if (drive->active >= drive->maxactive) drive->maxactive = drive->active; vinum_conf.active++; if (vinum_conf.active >= vinum_conf.maxactive) vinum_conf.maxactive = vinum_conf.active; #if VINUMDEBUG if (debug & DEBUG_ADDRESSES) log(LOG_DEBUG, " %s dev %s, sd %d, offset 0x%jx, devoffset 0x%jx, length %d\n", (rqe->b.b_cmd == BUF_CMD_READ) ? "Read" : "Write", drive->devicename, rqe->sdno, (uintmax_t)(rqe->b.b_bio1.bio_offset - ((off_t)SD[rqe->sdno].driveoffset << DEV_BSHIFT)), (uintmax_t)rqe->b.b_bio1.bio_offset, rqe->b.b_bcount); if (debug & DEBUG_LASTREQS) logrq(loginfo_raid5_data, (union rqinfou) rqe, ubio); #endif vn_strategy(drive->vp, &rqe->b.b_bio1); } } } } /* Finally, write the parity block */ rqe = &rqg->rqe[0]; rqe->b.b_cmd = BUF_CMD_WRITE; /* we're writing now */ rqe->b.b_bio1.bio_done = complete_rqe; /* by calling us here */ rqg->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */ rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT; /* length to write */ rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */ drive = &DRIVE[rqe->driveno]; /* drive to access */ rqe->b.b_bio1.bio_driver_info = drive->dev; rqg->active++; /* another active request */ /* We can't sleep here, so we just increment the counters. */ drive->active++; if (drive->active >= drive->maxactive) drive->maxactive = drive->active; vinum_conf.active++; if (vinum_conf.active >= vinum_conf.maxactive) vinum_conf.maxactive = vinum_conf.active; #if VINUMDEBUG if (debug & DEBUG_ADDRESSES) log(LOG_DEBUG, " %s dev %s, sd %d, offset 0x%jx, devoffset 0x%jx, length %d\n", (rqe->b.b_cmd == BUF_CMD_READ) ? "Read" : "Write", drive->devicename, rqe->sdno, (uintmax_t)(rqe->b.b_bio1.bio_offset - ((off_t)SD[rqe->sdno].driveoffset << DEV_BSHIFT)), (uintmax_t)rqe->b.b_bio1.bio_offset, rqe->b.b_bcount); if (debug & DEBUG_LASTREQS) logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubio); #endif vn_strategy(drive->vp, &rqe->b.b_bio1); }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ext2_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ static int ext2_bmaparray(struct vnode *vp, ext2_daddr_t bn, ext2_daddr_t *bnp, struct indir *ap, int *nump, int *runp, int *runb) { struct inode *ip; struct buf *bp; struct ext2_mount *ump; struct mount *mp; struct ext2_sb_info *fs; struct indir a[NIADDR+1], *xap; ext2_daddr_t daddr; long metalbn; int error, maxrun, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOEXT2(mp); fs = ip->i_e2fs; #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ext2_bmaparray: invalid arguments"); #endif if (runp) { *runp = 0; } if (runb) { *runb = 0; } maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; xap = ap == NULL ? a : ap; if (!nump) nump = # error = ext2_getlbns(vp, bn, xap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) *bnp = -1; else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn+1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[xap->in_off]; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) || metalbn == bn) { break; } /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, lblktodoff(fs, metalbn), mp->mnt_stat.f_iosize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef DIAGNOSTIC if (!daddr) panic("ext2_bmaparray: indirect block not in cache"); #endif /* * This runs through ext2_strategy using bio2 to * cache the disk offset, then comes back through * bio1. So we want to wait on bio1 */ bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; bp->b_bio2.bio_offset = fsbtodoff(fs, daddr); bp->b_flags &= ~(B_INVAL|B_ERROR); bp->b_cmd = BUF_CMD_READ; vfs_busy_pages(bp->b_vp, bp); vn_strategy(bp->b_vp, &bp->b_bio1); error = biowait(&bp->b_bio1, "biord"); if (error) { brelse(bp); return (error); } } daddr = ((ext2_daddr_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) { for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ext2_daddr_t *)bp->b_data)[bn - 1], ((ext2_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = xap->in_off; if (runb && bn) { for(--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((daddr_t *)bp->b_data)[bn], ((daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }
/* * spec_getpages() - get pages associated with device vnode. * * Note that spec_read and spec_write do not use the buffer cache, so we * must fully implement getpages here. */ static int devfs_spec_getpages(struct vop_getpages_args *ap) { vm_offset_t kva; int error; int i, pcount, size; struct buf *bp; vm_page_t m; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; int blksiz; int gotreqpage; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the offset of the transfer and do sanity check. */ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; /* * Round up physical size for real devices. We cannot round using * v_mount's block size data because v_mount has nothing to do with * the device. i.e. it's usually '/dev'. We need the physical block * size for the device itself. * * We can't use v_rdev->si_mountpoint because it only exists when the * block device is mounted. However, we can use v_rdev. */ if (vn_isdisk(vp, NULL)) blksiz = vp->v_rdev->si_bsize_phys; else blksiz = DEV_BSIZE; size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf_kva(NULL); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_cmd = BUF_CMD_READ; bp->b_bcount = size; bp->b_resid = 0; bsetrunningbufspace(bp, size); bp->b_bio1.bio_offset = offset; bp->b_bio1.bio_done = devfs_spec_getpages_iodone; mycpu->gd_cnt.v_vnodein++; mycpu->gd_cnt.v_vnodepgsin += pcount; /* Do the input. */ vn_strategy(ap->a_vp, &bp->b_bio1); crit_enter(); /* We definitely need to be at splbio here. */ while (bp->b_cmd != BUF_CMD_DONE) tsleep(bp, 0, "spread", 0); crit_exit(); if (bp->b_flags & B_ERROR) { if (bp->b_error) error = bp->b_error; else error = EIO; } /* * If EOF is encountered we must zero-extend the result in order * to ensure that the page does not contain garabge. When no * error occurs, an early EOF is indicated if b_bcount got truncated. * b_resid is relative to b_bcount and should be 0, but some devices * might indicate an EOF with b_resid instead of truncating b_bcount. */ nread = bp->b_bcount - bp->b_resid; if (nread < ap->a_count) bzero((caddr_t)kva + nread, ap->a_count - nread); pmap_qremove(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; /* * NOTE: vm_page_undirty/clear_dirty etc do not clear the * pmap modified bit. pmap modified bit should have * already been cleared. */ if (nextoff <= nread) { m->valid = VM_PAGE_BITS_ALL; vm_page_undirty(m); } else if (toff < nread) { /* * Since this is a VM request, we have to supply the * unaligned offset to allow vm_page_set_valid() * to zero sub-DEV_BSIZE'd portions of the page. */ vm_page_set_valid(m, 0, nread - toff); vm_page_clear_dirty_end_nonincl(m, 0, nread - toff); } else { m->valid = 0; vm_page_undirty(m); } if (i != ap->a_reqpage) { /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error || (m->valid == VM_PAGE_BITS_ALL)) { if (m->valid) { if (m->flags & PG_REFERENCED) { vm_page_activate(m); } else { vm_page_deactivate(m); } vm_page_wakeup(m); } else { vm_page_free(m); } } else { vm_page_free(m); } } else if (m->valid) { gotreqpage = 1; /* * Since this is a VM request, we need to make the * entire page presentable by zeroing invalid sections. */ if (m->valid != VM_PAGE_BITS_ALL) vm_page_zero_invalid(m, FALSE); } } if (!gotreqpage) { m = ap->a_m[ap->a_reqpage]; devfs_debug(DEVFS_DEBUG_WARNING, "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n", devtoname(vp->v_rdev), error, bp, bp->b_vp); devfs_debug(DEVFS_DEBUG_WARNING, " size: %d, resid: %d, a_count: %d, valid: 0x%x\n", size, bp->b_resid, ap->a_count, m->valid); devfs_debug(DEVFS_DEBUG_WARNING, " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", nread, ap->a_reqpage, (u_long)m->pindex, pcount); /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp, NULL); return VM_PAGER_ERROR; } /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp, NULL); if (DEVFS_NODE(ap->a_vp)) nanotime(&DEVFS_NODE(ap->a_vp)->mtime); return VM_PAGER_OK; }