/* * Common code for reading a buffer with various options * * Read in (if necessary) the block and return a buffer pointer. */ struct buf * bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize) { struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; struct buf *bp; klwp_t *lwp = ttolwp(curthread); CPU_STATS_ADD_K(sys, lread, 1); bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1); if (bp->b_flags & B_DONE) return (bp); bp->b_flags |= B_READ; ASSERT(bp->b_bcount == bsize); if (ufsvfsp == NULL) { /* !ufs */ (void) bdev_strategy(bp); } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { /* ufs && logging */ (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { /* ufs && snapshots */ (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); } else { ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); ub.ub_breads.value.ul++; /* ufs && !logging */ (void) bdev_strategy(bp); } if (lwp != NULL) lwp->lwp_ru.inblock++; CPU_STATS_ADD_K(sys, bread, 1); (void) biowait(bp); return (bp); }
/* * Perform a synchronous indirect write of the given block number * on the given device, using the given fbuf. Upon return the fbp * is invalid. */ int fbiwrite(struct fbuf *fbp, vnode_t *devvp, daddr_t bn, int bsize) { struct buf *bp; int error, fberror; /* * Allocate a temp bp using pageio_setup, but then use it * for physio to the area mapped by fbuf which is currently * all locked down in place. * * XXX - need to have a generalized bp header facility * which we build up pageio_setup on top of. Other places * (like here and in device drivers for the raw I/O case) * could then use these new facilities in a more straight * forward fashion instead of playing all these games. */ bp = pageio_setup((struct page *)NULL, fbp->fb_count, devvp, B_WRITE); bp->b_flags &= ~B_PAGEIO; /* XXX */ bp->b_un.b_addr = fbp->fb_addr; bp->b_blkno = bn * btod(bsize); bp->b_dev = cmpdev(devvp->v_rdev); /* store in old dev format */ bp->b_edev = devvp->v_rdev; bp->b_proc = NULL; /* i.e. the kernel */ (void) bdev_strategy(bp); error = biowait(bp); pageio_done(bp); /*CSTYLED*/ FBCOMMON(fbp, S_OTHER, 0, fberror = ) return (error ? error : fberror); }
/* * Common code for writing a buffer with various options. * * force_wait - wait for write completion regardless of B_ASYNC flag * do_relse - release the buffer when we are done * clear_flags - flags to clear from the buffer */ void bwrite_common(void *arg, struct buf *bp, int force_wait, int do_relse, int clear_flags) { register int do_wait; struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; int flag; klwp_t *lwp = ttolwp(curthread); struct cpu *cpup; ASSERT(SEMA_HELD(&bp->b_sem)); flag = bp->b_flags; bp->b_flags &= ~clear_flags; if (lwp != NULL) lwp->lwp_ru.oublock++; CPU_STATS_ENTER_K(); cpup = CPU; /* get pointer AFTER preemption is disabled */ CPU_STATS_ADDQ(cpup, sys, lwrite, 1); CPU_STATS_ADDQ(cpup, sys, bwrite, 1); do_wait = ((flag & B_ASYNC) == 0 || force_wait); if (do_wait == 0) CPU_STATS_ADDQ(cpup, sys, bawrite, 1); CPU_STATS_EXIT_K(); if (ufsvfsp == NULL) { (void) bdev_strategy(bp); } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { /* ufs && logging */ (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { /* ufs && snapshots */ (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); } else { ub.ub_bwrites.value.ul++; /* ufs && !logging */ (void) bdev_strategy(bp); } if (do_wait) { (void) biowait(bp); if (do_relse) { brelse(bp); } } }
/* * Read in the block, like bread, but also start I/O on the * read-ahead block (which is not allocated to the caller). */ struct buf * breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize) { struct buf *bp, *rabp; klwp_t *lwp = ttolwp(curthread); bp = NULL; if (!bio_incore(dev, blkno)) { CPU_STATS_ADD_K(sys, lread, 1); bp = GETBLK(dev, blkno, bsize); if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; bp->b_bcount = bsize; (void) bdev_strategy(bp); if (lwp != NULL) lwp->lwp_ru.inblock++; CPU_STATS_ADD_K(sys, bread, 1); } } if (rablkno && bfreelist.b_bcount > 1 && !bio_incore(dev, rablkno)) { rabp = GETBLK(dev, rablkno, bsize); if (rabp->b_flags & B_DONE) brelse(rabp); else { rabp->b_flags |= B_READ|B_ASYNC; rabp->b_bcount = bsize; (void) bdev_strategy(rabp); if (lwp != NULL) lwp->lwp_ru.inblock++; CPU_STATS_ADD_K(sys, bread, 1); } } if (bp == NULL) return (BREAD(dev, blkno, bsize)); (void) biowait(bp); return (bp); }
/* * Called from roll thread; * buffer set for reading master * Returns * 0 - success, can continue with next buffer * 1 - failure due to logmap deltas being in use */ int top_read_roll(rollbuf_t *rbp, ml_unit_t *ul) { buf_t *bp = &rbp->rb_bh; offset_t mof = ldbtob(bp->b_blkno); /* * get a list of deltas */ if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) { /* logmap deltas are in use */ return (1); } /* * no deltas were found, nothing to roll */ if (rbp->rb_age == NULL) { bp->b_flags |= B_INVAL; return (0); } /* * If there is one cached roll buffer that cover all the deltas then * we can use that instead of copying to a separate roll buffer. */ if (rbp->rb_crb) { rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof); return (0); } /* * Set up the read. * If no read is needed logmap_setup_read() returns 0. */ if (logmap_setup_read(rbp->rb_age, rbp)) { /* * async read the data from master */ logstats.ls_rreads.value.ui64++; bp->b_bcount = MAPBLOCKSIZE; (void) bdev_strategy(bp); lwp_stat_update(LWP_STAT_INBLK, 1); } else { sema_v(&bp->b_io); /* mark read as complete */ } return (0); }
/*ARGSUSED*/ int ufs_rdwr_data( vnode_t *vnodep, u_offset_t offset, size_t len, fdbuffer_t *fdbp, int flags, cred_t *credp) { struct inode *ip = VTOI(vnodep); struct fs *fs; struct ufsvfs *ufsvfsp = ip->i_ufsvfs; struct buf *bp; krw_t rwtype = RW_READER; u_offset_t offset1 = offset; /* Initial offset */ size_t iolen; int curlen = 0; int pplen; daddr_t bn; int contig = 0; int error = 0; int nbytes; /* Number bytes this IO */ int offsetn; /* Start point this IO */ int iswrite = flags & B_WRITE; int io_started = 0; /* No IO started */ struct ulockfs *ulp; uint_t protp = PROT_ALL; error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite, &protp); if (error) { if (flags & B_ASYNC) { fdb_ioerrdone(fdbp, error); } return (error); } fs = ufsvfsp->vfs_fs; iolen = len; DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p off %llx len %lx" " isize: %llx fdb: %p\n", flags & B_READ ? "READ" : "WRITE", (void *)vnodep, (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp)); rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); rw_enter(&ip->i_contents, rwtype); ASSERT(offset1 < ip->i_size); if ((offset1 + iolen) > ip->i_size) { iolen = ip->i_size - offset1; } while (!error && curlen < iolen) { contig = 0; if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) { break; } ASSERT(!(bn == UFS_HOLE && iswrite)); if (bn == UFS_HOLE) { /* * If the above assertion is true, * then the following if statement can never be true. */ if (iswrite && (rwtype == RW_READER)) { rwtype = RW_WRITER; if (!rw_tryupgrade(&ip->i_contents)) { rw_exit(&ip->i_contents); rw_enter(&ip->i_contents, rwtype); continue; } } offsetn = blkoff(fs, offset1); pplen = P2ROUNDUP(len, PAGESIZE); nbytes = MIN((pplen - curlen), (fs->fs_bsize - offsetn)); ASSERT(nbytes > 0); /* * We may be reading or writing. */ DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n", offset1, (iolen - curlen))); if (iswrite) { printf("**WARNING: ignoring hole in write\n"); error = ENOSPC; } else { fdb_add_hole(fdbp, offset1 - offset, nbytes); } offset1 += nbytes; curlen += nbytes; continue; } ASSERT(contig > 0); pplen = P2ROUNDUP(len, PAGESIZE); contig = MIN(contig, len - curlen); contig = P2ROUNDUP(contig, DEV_BSIZE); bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags); bp->b_edev = ip->i_dev; bp->b_dev = cmpdev(ip->i_dev); bp->b_blkno = bn; bp->b_file = ip->i_vnode; bp->b_offset = (offset_t)offset1; if (ufsvfsp->vfs_snapshot) { fssnap_strategy(&ufsvfsp->vfs_snapshot, bp); } else { (void) bdev_strategy(bp); } io_started = 1; offset1 += contig; curlen += contig; if (iswrite) lwp_stat_update(LWP_STAT_OUBLK, 1); else lwp_stat_update(LWP_STAT_INBLK, 1); if ((flags & B_ASYNC) == 0) { error = biowait(bp); fdb_iodone(bp); } DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n", offset1, (iolen - curlen))); } DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n", offset1, (iolen - curlen), (void *)vnodep->v_pages)); rw_exit(&ip->i_contents); rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); if (flags & B_ASYNC) { /* * Show that no more asynchronous IO will be added */ fdb_ioerrdone(fdbp, error); } if (ulp) { ufs_lockfs_end(ulp); } if (io_started && flags & B_ASYNC) { return (0); } else { return (error); } }
/* * ufs_alloc_data - supports allocating space and reads or writes * that involve changes to file length or space allocation. * * This function is more expensive, because of the UFS log transaction, * so ufs_rdwr_data() should be used when space or file length changes * will not occur. * * Inputs: * fdb - A null pointer instructs this function to only allocate * space for the specified offset and length. * An actual fdbuffer instructs this function to perform * the read or write operation. * flags - defaults (zero value) to synchronous write * B_READ - indicates read operation * B_ASYNC - indicates perform operation asynchronously */ int ufs_alloc_data( vnode_t *vnodep, u_offset_t offset, size_t *len, fdbuffer_t *fdbp, int flags, cred_t *credp) { struct inode *ip = VTOI(vnodep); size_t done_len, io_len; int contig; u_offset_t uoff, io_off; int error = 0; /* No error occured */ int offsetn; /* Start point this IO */ int nbytes; /* Number bytes in this IO */ daddr_t bn; struct fs *fs; struct ufsvfs *ufsvfsp = ip->i_ufsvfs; int i_size_changed = 0; u_offset_t old_i_size; struct ulockfs *ulp; int trans_size; int issync; /* UFS Log transaction */ /* synchronous when non-zero */ int io_started = 0; /* No IO started */ uint_t protp = PROT_ALL; ASSERT((flags & B_WRITE) == 0); /* * Obey the lockfs protocol */ error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp); if (error) { if ((fdbp != NULL) && (flags & B_ASYNC)) { fdb_ioerrdone(fdbp, error); } return (error); } if (ulp) { /* * Try to begin a UFS log transaction */ trans_size = TOP_GETPAGE_SIZE(ip); TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE, trans_size, error); if (error == EWOULDBLOCK) { ufs_lockfs_end(ulp); if ((fdbp != NULL) && (flags & B_ASYNC)) { fdb_ioerrdone(fdbp, EDEADLK); } return (EDEADLK); } } uoff = offset; io_off = offset; io_len = *len; done_len = 0; DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n", uoff, (io_len - done_len), ip->i_size, (void *)fdbp)); rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER); rw_enter(&ip->i_contents, RW_WRITER); ASSERT((ip->i_mode & IFMT) == IFREG); fs = ip->i_fs; while (error == 0 && done_len < io_len) { uoff = (u_offset_t)(io_off + done_len); offsetn = (int)blkoff(fs, uoff); nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len); DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n", uoff, nbytes)); if (uoff + nbytes > ip->i_size) { /* * We are extending the length of the file. * bmap is used so that we are sure that * if we need to allocate new blocks, that it * is done here before we up the file size. */ DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n", ip->i_size, uoff + nbytes)); error = bmap_write(ip, uoff, (offsetn + nbytes), BI_ALLOC_ONLY, NULL, credp); if (ip->i_flag & (ICHG|IUPD)) ip->i_seq++; if (error) { DEBUGF((CE_CONT, "?ufs_alloc_data: grow " "failed err: %d\n", error)); break; } if (fdbp != NULL) { if (uoff >= ip->i_size) { /* * Desired offset is past end of bytes * in file, so we have a hole. */ fdb_add_hole(fdbp, uoff - offset, nbytes); } else { int contig; buf_t *bp; error = bmap_read(ip, uoff, &bn, &contig); if (error) { break; } contig = ip->i_size - uoff; contig = P2ROUNDUP(contig, DEV_BSIZE); bp = fdb_iosetup(fdbp, uoff - offset, contig, vnodep, flags); bp->b_edev = ip->i_dev; bp->b_dev = cmpdev(ip->i_dev); bp->b_blkno = bn; bp->b_file = ip->i_vnode; bp->b_offset = (offset_t)uoff; if (ufsvfsp->vfs_snapshot) { fssnap_strategy( &ufsvfsp->vfs_snapshot, bp); } else { (void) bdev_strategy(bp); } io_started = 1; lwp_stat_update(LWP_STAT_OUBLK, 1); if ((flags & B_ASYNC) == 0) { error = biowait(bp); fdb_iodone(bp); if (error) { break; } } if (contig > (ip->i_size - uoff)) { contig -= ip->i_size - uoff; fdb_add_hole(fdbp, ip->i_size - offset, contig); } } } i_size_changed = 1; old_i_size = ip->i_size; UFS_SET_ISIZE(uoff + nbytes, ip); TRANS_INODE(ip->i_ufsvfs, ip); /* * file has grown larger than 2GB. Set flag * in superblock to indicate this, if it * is not already set. */ if ((ip->i_size > MAXOFF32_T) && !(fs->fs_flags & FSLARGEFILES)) { ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES); mutex_enter(&ufsvfsp->vfs_lock); fs->fs_flags |= FSLARGEFILES; ufs_sbwrite(ufsvfsp); mutex_exit(&ufsvfsp->vfs_lock); } } else { /* * The file length is not being extended. */ error = bmap_read(ip, uoff, &bn, &contig); if (error) { DEBUGF((CE_CONT, "?ufs_alloc_data: " "bmap_read err: %d\n", error)); break; } if (bn != UFS_HOLE) { /* * Did not map a hole in the file */ int contig = P2ROUNDUP(nbytes, DEV_BSIZE); buf_t *bp; if (fdbp != NULL) { bp = fdb_iosetup(fdbp, uoff - offset, contig, vnodep, flags); bp->b_edev = ip->i_dev; bp->b_dev = cmpdev(ip->i_dev); bp->b_blkno = bn; bp->b_file = ip->i_vnode; bp->b_offset = (offset_t)uoff; if (ufsvfsp->vfs_snapshot) { fssnap_strategy( &ufsvfsp->vfs_snapshot, bp); } else { (void) bdev_strategy(bp); } io_started = 1; lwp_stat_update(LWP_STAT_OUBLK, 1); if ((flags & B_ASYNC) == 0) { error = biowait(bp); fdb_iodone(bp); if (error) { break; } } } } else { /* * We read a hole in the file. * We have to allocate blocks for the hole. */ error = bmap_write(ip, uoff, (offsetn + nbytes), BI_ALLOC_ONLY, NULL, credp); if (ip->i_flag & (ICHG|IUPD)) ip->i_seq++; if (error) { DEBUGF((CE_CONT, "?ufs_alloc_data: fill" " hole failed error: %d\n", error)); break; } if (fdbp != NULL) { fdb_add_hole(fdbp, uoff - offset, nbytes); } } } done_len += nbytes; } if (error) { if (i_size_changed) { /* * Allocation of the blocks for the file failed. * So truncate the file size back to its original size. */ (void) ufs_itrunc(ip, old_i_size, 0, credp); } } DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n", uoff, (io_len - done_len))); if ((offset + *len) < (NDADDR * fs->fs_bsize)) { *len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset); } else { *len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset); } /* * Flush cached pages. * * XXX - There should be no pages involved, since the I/O was performed * through the device strategy routine and the page cache was bypassed. * However, testing has demonstrated that this VOP_PUTPAGE is * necessary. Without this, data might not always be read back as it * was written. * */ (void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp); rw_exit(&ip->i_contents); rw_exit(&ip->i_ufsvfs->vfs_dqrwlock); if ((fdbp != NULL) && (flags & B_ASYNC)) { /* * Show that no more asynchronous IO will be added */ fdb_ioerrdone(fdbp, error); } if (ulp) { /* * End the UFS Log transaction */ TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE, trans_size); ufs_lockfs_end(ulp); } if (io_started && (flags & B_ASYNC)) { return (0); } else { return (error); } }
/* * Read a cluster from the snapshotted block device to the cache. */ static int fss_read_cluster(struct fss_softc *sc, u_int32_t cl) { int error, todo, offset, len; daddr_t dblk; struct buf *bp, *mbp; struct fss_cache *scp, *scl; /* * Get a free cache slot. */ scl = sc->sc_cache+sc->sc_cache_size; mutex_enter(&sc->sc_slock); restart: if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) { mutex_exit(&sc->sc_slock); return 0; } for (scp = sc->sc_cache; scp < scl; scp++) if (scp->fc_cluster == cl) { if (scp->fc_type == FSS_CACHE_VALID) { mutex_exit(&sc->sc_slock); return 0; } else if (scp->fc_type == FSS_CACHE_BUSY) { cv_wait(&scp->fc_state_cv, &sc->sc_slock); goto restart; } } for (scp = sc->sc_cache; scp < scl; scp++) if (scp->fc_type == FSS_CACHE_FREE) { scp->fc_type = FSS_CACHE_BUSY; scp->fc_cluster = cl; break; } if (scp >= scl) { cv_wait(&sc->sc_cache_cv, &sc->sc_slock); goto restart; } mutex_exit(&sc->sc_slock); /* * Start the read. */ dblk = btodb(FSS_CLTOB(sc, cl)); if (cl == sc->sc_clcount-1) { todo = sc->sc_clresid; memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo); } else todo = FSS_CLSIZE(sc); offset = 0; mbp = getiobuf(NULL, true); mbp->b_bufsize = todo; mbp->b_data = scp->fc_data; mbp->b_resid = mbp->b_bcount = todo; mbp->b_flags = B_READ; mbp->b_cflags = BC_BUSY; mbp->b_dev = sc->sc_bdev; while (todo > 0) { len = todo; if (len > MAXPHYS) len = MAXPHYS; if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo) bp = mbp; else { bp = getiobuf(NULL, true); nestiobuf_setup(mbp, bp, offset, len); } bp->b_lblkno = 0; bp->b_blkno = dblk; bdev_strategy(bp); dblk += btodb(len); offset += len; todo -= len; } error = biowait(mbp); putiobuf(mbp); mutex_enter(&sc->sc_slock); scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID); cv_broadcast(&scp->fc_state_cv); if (error == 0) { setbit(sc->sc_copied, scp->fc_cluster); cv_signal(&sc->sc_work_cv); } mutex_exit(&sc->sc_slock); return error; }
/* * The kernel thread (one for every active snapshot). * * After wakeup it cleans the cache and runs the I/O requests. */ static void fss_bs_thread(void *arg) { bool thread_idle, is_valid; int error, i, todo, len, crotor, is_read; long off; char *addr; u_int32_t c, cl, ch, *indirp; struct buf *bp, *nbp; struct fss_softc *sc; struct fss_cache *scp, *scl; sc = arg; scl = sc->sc_cache+sc->sc_cache_size; crotor = 0; thread_idle = false; mutex_enter(&sc->sc_slock); for (;;) { if (thread_idle) cv_wait(&sc->sc_work_cv, &sc->sc_slock); thread_idle = true; if ((sc->sc_flags & FSS_BS_THREAD) == 0) { mutex_exit(&sc->sc_slock); kthread_exit(0); } /* * Process I/O requests (persistent) */ if (sc->sc_flags & FSS_PERSISTENT) { if ((bp = bufq_get(sc->sc_bufq)) == NULL) continue; is_valid = FSS_ISVALID(sc); is_read = (bp->b_flags & B_READ); thread_idle = false; mutex_exit(&sc->sc_slock); if (is_valid) { disk_busy(sc->sc_dkdev); error = fss_bs_io(sc, FSS_READ, 0, dbtob(bp->b_blkno), bp->b_bcount, bp->b_data); disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read); } else error = ENXIO; bp->b_error = error; bp->b_resid = (error ? bp->b_bcount : 0); biodone(bp); mutex_enter(&sc->sc_slock); continue; } /* * Clean the cache */ for (i = 0; i < sc->sc_cache_size; i++) { crotor = (crotor + 1) % sc->sc_cache_size; scp = sc->sc_cache + crotor; if (scp->fc_type != FSS_CACHE_VALID) continue; mutex_exit(&sc->sc_slock); thread_idle = false; indirp = fss_bs_indir(sc, scp->fc_cluster); if (indirp != NULL) { error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext, 0, FSS_CLSIZE(sc), scp->fc_data); } else error = EIO; mutex_enter(&sc->sc_slock); if (error == 0) { *indirp = sc->sc_clnext++; sc->sc_indir_dirty = 1; } else fss_error(sc, "write error on backing store"); scp->fc_type = FSS_CACHE_FREE; cv_broadcast(&sc->sc_cache_cv); break; } /* * Process I/O requests */ if ((bp = bufq_get(sc->sc_bufq)) == NULL) continue; is_valid = FSS_ISVALID(sc); is_read = (bp->b_flags & B_READ); thread_idle = false; if (!is_valid) { mutex_exit(&sc->sc_slock); bp->b_error = ENXIO; bp->b_resid = bp->b_bcount; biodone(bp); mutex_enter(&sc->sc_slock); continue; } disk_busy(sc->sc_dkdev); /* * First read from the snapshotted block device unless * this request is completely covered by backing store. */ cl = FSS_BTOCL(sc, dbtob(bp->b_blkno)); off = FSS_CLOFF(sc, dbtob(bp->b_blkno)); ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1); error = 0; bp->b_resid = 0; bp->b_error = 0; for (c = cl; c <= ch; c++) { if (isset(sc->sc_copied, c)) continue; mutex_exit(&sc->sc_slock); /* Not on backing store, read from device. */ nbp = getiobuf(NULL, true); nbp->b_flags = B_READ; nbp->b_resid = nbp->b_bcount = bp->b_bcount; nbp->b_bufsize = bp->b_bcount; nbp->b_data = bp->b_data; nbp->b_blkno = bp->b_blkno; nbp->b_lblkno = 0; nbp->b_dev = sc->sc_bdev; SET(nbp->b_cflags, BC_BUSY); /* mark buffer busy */ bdev_strategy(nbp); error = biowait(nbp); if (error != 0) { bp->b_resid = bp->b_bcount; bp->b_error = nbp->b_error; disk_unbusy(sc->sc_dkdev, 0, is_read); biodone(bp); } putiobuf(nbp); mutex_enter(&sc->sc_slock); break; } if (error) continue; /* * Replace those parts that have been saved to backing store. */ addr = bp->b_data; todo = bp->b_bcount; for (c = cl; c <= ch; c++, off = 0, todo -= len, addr += len) { len = FSS_CLSIZE(sc)-off; if (len > todo) len = todo; if (isclr(sc->sc_copied, c)) continue; mutex_exit(&sc->sc_slock); indirp = fss_bs_indir(sc, c); if (indirp == NULL || *indirp == 0) { /* * Not on backing store. Either in cache * or hole in the snapshotted block device. */ mutex_enter(&sc->sc_slock); for (scp = sc->sc_cache; scp < scl; scp++) if (scp->fc_type == FSS_CACHE_VALID && scp->fc_cluster == c) break; if (scp < scl) memcpy(addr, (char *)scp->fc_data+off, len); else memset(addr, 0, len); continue; } /* * Read from backing store. */ error = fss_bs_io(sc, FSS_READ, *indirp, off, len, addr); mutex_enter(&sc->sc_slock); if (error) { bp->b_resid = bp->b_bcount; bp->b_error = error; break; } } mutex_exit(&sc->sc_slock); disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read); biodone(bp); mutex_enter(&sc->sc_slock); } }
void lqfs_write_strategy(ml_unit_t *ul, buf_t *bp) { offset_t mof = ldbtob(bp->b_blkno); off_t nb = bp->b_bcount; char *va; mapentry_t *me; uchar_t ord; qfsvfs_t *qfsvfsp = ul->un_qfsvfs; #ifdef LUFS #else caddr_t buf; va = bp_mapin_common(bp, VM_SLEEP); buf = bp->b_un.b_addr; #endif /* LUFS */ ASSERT((nb & DEV_BMASK) == 0); ul->un_logmap->mtm_ref = 1; /* * if there are deltas, move into log */ ord = lqfs_find_ord(qfsvfsp, bp); #ifdef LUFS me = deltamap_remove(ul->un_deltamap, mof, ord, nb); if (me) { va = bp_mapin_common(bp, VM_SLEEP); ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) || (ul->un_matamap == NULL)|| matamap_within(ul->un_matamap, mof, ord, nb)); /* * move to logmap */ if (qfs_crb_enable) { logmap_add_buf(ul, va, mof, ord, me, bp->b_un.b_addr, nb); } else { logmap_add(ul, va, mof, ord, me); } if (ul->un_flags & LDL_ERROR) { bp->b_flags |= B_ERROR; bp->b_error = EIO; } biodone(bp); return; } #else if (buf && qfs_crb_enable) { uint32_t bufsz; offset_t vamof; offset_t hmof; uchar_t vaord; uchar_t hord; uint32_t hnb, nb1; bufsz = bp->b_bcount; ASSERT((bufsz & DEV_BMASK) == 0); vamof = mof; vaord = ord; /* * Move any deltas to the logmap. Split requests that * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info). */ for (hmof = vamof - (va - buf), nb1 = nb; bufsz; bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) { hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF); if (hnb > bufsz) { hnb = bufsz; } LQFS_MSG(CE_WARN, "lqfs_write_strategy(): Removing " "deltamap deltas within mof 0x%llx ord %d nb %d\n", MAX(hmof, vamof), vaord, MIN(hnb, nb1)); me = deltamap_remove(ul->un_deltamap, MAX(hmof, vamof), vaord, MIN(hnb, nb1)); hord = vaord; if (me) { logmap_add_buf(ul, va, hmof, hord, me, buf, hnb); if (ul->un_flags & LDL_ERROR) { bp->b_flags |= B_ERROR; bp->b_error = EIO; } biodone(bp); return; } } } else { /* * if there are deltas */ LQFS_MSG(CE_WARN, "lqfs_write_strategy(): Removing " "deltamap deltas within mof 0x%x ord %d nb %d\n", mof, ord, nb); me = deltamap_remove(ul->un_deltamap, mof, ord, nb); if (me) { ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) || (ul->un_matamap == NULL)|| matamap_within(ul->un_matamap, mof, ord, nb)); /* * move to logmap */ logmap_add(ul, va, mof, ord, me); if (ul->un_flags & LDL_ERROR) { bp->b_flags |= B_ERROR; bp->b_error = EIO; } biodone(bp); return; } } #endif /* LUFS */ if (ul->un_flags & LDL_ERROR) { bp->b_flags |= B_ERROR; bp->b_error = EIO; biodone(bp); return; } /* * Check that we are not updating metadata, or if so then via B_PHYS. */ ASSERT((ul->un_matamap == NULL) || !(matamap_overlap(ul->un_matamap, mof, ord, nb) && ((bp->b_flags & B_PHYS) == 0))); LQFS_SET_IOTSTAMP(ul->un_qfsvfs, ddi_get_lbolt()); logstats.ls_lwrites.value.ui64++; #ifdef LQFS_TODO_SNAPSHOT /* If snapshots are enabled, write through the snapshot driver */ if (ul->un_qfsvfs->vfs_snapshot) { fssnap_strategy(&ul->un_qfsvfs->vfs_snapshot, bp); } else { #endif /* LQFS_TODO_SNAPSHOT */ if ((bp->b_flags & B_READ) == 0) { LQFS_MSG(CE_WARN, "lqfs_write_strategy(): " "bdev_strategy writing mof 0x%x edev %ld " "nb %d\n", bp->b_blkno * 512, bp->b_edev, bp->b_bcount); } else { LQFS_MSG(CE_WARN, "lqfs_write_strategy(): " "bdev_strategy reading mof 0x%x edev %ld " "nb %d\n", bp->b_blkno * 512, bp->b_edev, bp->b_bcount); } (void) bdev_strategy(bp); #ifdef LQFS_TODO_SNAPSHOT } #endif /* LQFS_TODO_SNAPSHOT */ #ifdef LQFS_TODO_STATS lwp_stat_update(LWP_STAT_OUBLK, 1); #endif /* LQFS_TODO_STATS */ }
void lqfs_read_strategy(ml_unit_t *ul, buf_t *bp) { mt_map_t *logmap = ul->un_logmap; offset_t mof = ldbtob(bp->b_blkno); off_t nb = bp->b_bcount; mapentry_t *age; char *va; int (*saviodone)(); int entire_range; uchar_t ord; qfsvfs_t *qfsvfsp = ul->un_qfsvfs; /* * get a linked list of overlapping deltas * returns with &mtm->mtm_rwlock held */ ord = lqfs_find_ord(qfsvfsp, bp); entire_range = logmap_list_get(logmap, mof, ord, nb, &age); /* * no overlapping deltas were found; read master */ if (age == NULL) { rw_exit(&logmap->mtm_rwlock); if (ul->un_flags & LDL_ERROR) { bp->b_flags |= B_ERROR; bp->b_error = EIO; biodone(bp); } else { LQFS_SET_IOTSTAMP(ul->un_qfsvfs, ddi_get_lbolt()); logstats.ls_lreads.value.ui64++; if ((bp->b_flags & B_READ) == 0) { LQFS_MSG(CE_WARN, "lqfs_read_strategy(): " "bdev_strategy writing mof 0x%x " "edev %ld nb %d\n", bp->b_blkno * 512, bp->b_edev, bp->b_bcount); } else { LQFS_MSG(CE_WARN, "lqfs_read_strategy(): " "bdev_strategy reading mof 0x%x " "edev %ld nb %d\n", bp->b_blkno * 512, bp->b_edev, bp->b_bcount); } (void) bdev_strategy(bp); #ifdef LQFS_TODO_STATS lwp_stat_update(LWP_STAT_INBLK, 1); #endif /* LQFS_TODO_STATS */ } return; } va = bp_mapin_common(bp, VM_SLEEP); /* * if necessary, sync read the data from master * errors are returned in bp */ if (!entire_range) { saviodone = bp->b_iodone; bp->b_iodone = trans_not_done; logstats.ls_mreads.value.ui64++; if ((bp->b_flags & B_READ) == 0) { LQFS_MSG(CE_WARN, "lqfs_read_strategy(): " "bdev_strategy writing mof 0x%x edev %ld " "nb %d\n", bp->b_blkno * 512, bp->b_edev, bp->b_bcount); } else { LQFS_MSG(CE_WARN, "lqfs_read_strategy(): " "bdev_strategy reading mof 0x%x edev %ld " "nb %d\n", bp->b_blkno * 512, bp->b_edev, bp->b_bcount); } (void) bdev_strategy(bp); #ifdef LQFS_TODO_STATS lwp_stat_update(LWP_STAT_INBLK, 1); #endif /* LQFS_TODO_STATS */ if (trans_not_wait(bp)) { ldl_seterror(ul, "Error reading master"); } bp->b_iodone = saviodone; } /* * sync read the data from the log * errors are returned inline */ if (ldl_read(ul, va, mof, ord, nb, age)) { bp->b_flags |= B_ERROR; bp->b_error = EIO; } /* * unlist the deltas */ logmap_list_put(logmap, age); /* * all done */ if (ul->un_flags & LDL_ERROR) { bp->b_flags |= B_ERROR; bp->b_error = EIO; } biodone(bp); }