static int vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); break; default: zio->io_error = SET_ERROR(ENOTSUP); } return (ZIO_PIPELINE_CONTINUE); } VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio, TQ_PUSHPAGE), !=, 0); return (ZIO_PIPELINE_STOP); }
static int vdev_file_io_start(zio_t *zio) { spa_t *spa = zio->io_spa; vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = ENXIO; return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); break; default: zio->io_error = ENOTSUP; } return (ZIO_PIPELINE_CONTINUE); } spa_taskq_dispatch_ent(spa, ZIO_TYPE_FREE, ZIO_TASKQ_ISSUE, vdev_file_io_strategy, zio, 0, &zio->io_tqent); return (ZIO_PIPELINE_STOP); }
static void vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; ssize_t resid = 0; if (zio->io_type == ZIO_TYPE_IOCTL) { if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); zio_interrupt(zio); return; } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (!vnode_getwithvid(vf->vf_vnode, vf->vf_vid)) { zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); vnode_put(vf->vf_vnode); } break; default: zio->io_error = SET_ERROR(ENOTSUP); } zio_interrupt(zio); return; } ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); if (!vnode_getwithvid(vf->vf_vnode, vf->vf_vid)) { /* VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio, TQ_PUSHPAGE), !=, 0); */ zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, zio->io_size, zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); vnode_put(vf->vf_vnode); } if (resid != 0 && zio->io_error == 0) zio->io_error = SET_ERROR(ENOSPC); zio_interrupt(zio); return; }
static int vdev_file_io_start(zio_t *zio) { spa_t *spa = zio->io_spa; vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; vdev_buf_t *vb; buf_t *bp; if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = ENXIO; return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); break; default: zio->io_error = ENOTSUP; } return (ZIO_PIPELINE_CONTINUE); } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); vb->vb_io = zio; bp = &vb->vb_buf; bioinit(bp); bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); bp->b_bcount = zio->io_size; bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); bp->b_bufsize = zio->io_size; bp->b_private = vf->vf_vnode; bp->b_iodone = (int (*)())vdev_file_io_intr; taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE], vdev_file_io_strategy, bp, 0, &zio->io_tqent); return (ZIO_PIPELINE_STOP); }
static int vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; ssize_t resid = 0; if (zio->io_type == ZIO_TYPE_IOCTL) { if (!vdev_readable(vd)) { zio->io_error = ENXIO; return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: vnode_getwithvid(vf->vf_vnode, vf->vf_vid); zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); vnode_put(vf->vf_vnode); break; default: zio->io_error = ENOTSUP; } return (ZIO_PIPELINE_CONTINUE); } vnode_getwithvid(vf->vf_vnode, vf->vf_vid); zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, zio->io_size, zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); vnode_put(vf->vf_vnode); if (resid != 0 && zio->io_error == 0) zio->io_error = ENOSPC; zio_interrupt(zio); return (ZIO_PIPELINE_STOP); }
static int vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_file_t *vf = vd->vdev_tsd; #ifdef LINUX_AIO struct iocb *iocbp = &zio->io_aio; #endif ssize_t resid; int error; if (zio->io_type == ZIO_TYPE_IOCTL) { zio_vdev_io_bypass(zio); /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = ENXIO; return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush) break; /* This doesn't actually do much with O_DIRECT... */ zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); dprintf("fsync(%s) = %d\n", vdev_description(vd), zio->io_error); if (vd->vdev_nowritecache) { zio->io_error = ENOTSUP; break; } /* Flush the write cache */ error = flushwc(vf->vf_vnode); dprintf("flushwc(%s) = %d\n", vdev_description(vd), error); if (error) { #ifdef _KERNEL cmn_err(CE_WARN, "Failed to flush write cache " "on device '%s'. Data on pool '%s' may be lost " "if power fails. No further warnings will " "be given.", vdev_description(vd), spa_name(vd->vdev_spa)); #endif vd->vdev_nowritecache = B_TRUE; zio->io_error = error; } break; default: zio->io_error = ENOTSUP; } return (ZIO_PIPELINE_CONTINUE); } /* * In the kernel, don't bother double-caching, but in userland, * we want to test the vdev_cache code. */ if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0) return (ZIO_PIPELINE_STOP); if ((zio = vdev_queue_io(zio)) == NULL) return (ZIO_PIPELINE_STOP); /* XXPOLICY */ if (zio->io_type == ZIO_TYPE_WRITE) error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO; else error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO; error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error; if (error) { zio->io_error = error; zio_interrupt(zio); return (ZIO_PIPELINE_STOP); } #ifdef LINUX_AIO if (zio->io_aio_ctx && zio->io_aio_ctx->zac_enabled) { if (zio->io_type == ZIO_TYPE_READ) io_prep_pread(&zio->io_aio, vf->vf_vnode->v_fd, zio->io_data, zio->io_size, zio->io_offset); else io_prep_pwrite(&zio->io_aio, vf->vf_vnode->v_fd, zio->io_data, zio->io_size, zio->io_offset); zio->io_aio.data = zio; do { error = io_submit(zio->io_aio_ctx->zac_ctx, 1, &iocbp); } while (error == -EINTR); if (error < 0) { zio->io_error = -error; zio_interrupt(zio); } else VERIFY(error == 1); return (ZIO_PIPELINE_STOP); } #endif zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, zio->io_size, zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); if (resid != 0 && zio->io_error == 0) zio->io_error = ENOSPC; zio_interrupt(zio); return (ZIO_PIPELINE_STOP); }
static void vdev_disk_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_disk_t *dvd = vd->vdev_tsd; struct buf *bp; vfs_context_t context; int flags, error = 0; /* * If the vdev is closed, it's likely in the REMOVED or FAULTED state. * Nothing to be done here but return failure. */ if (dvd == NULL || (dvd->vd_offline) || dvd->vd_devvp == NULL) { zio->io_error = ENXIO; zio_interrupt(zio); return; } switch (zio->io_type) { case ZIO_TYPE_IOCTL: if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); zio_interrupt(zio); return; } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush) break; if (vd->vdev_nowritecache) { zio->io_error = SET_ERROR(ENOTSUP); break; } context = vfs_context_create(spl_vfs_context_kernel()); error = VNOP_IOCTL(dvd->vd_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); (void) vfs_context_rele(context); if (error == 0) vdev_disk_ioctl_done(zio, error); else error = ENOTSUP; if (error == 0) { /* * The ioctl will be done asychronously, * and will call vdev_disk_ioctl_done() * upon completion. */ return; } else if (error == ENOTSUP || error == ENOTTY) { /* * If we get ENOTSUP or ENOTTY, we know that * no future attempts will ever succeed. * In this case we set a persistent bit so * that we don't bother with the ioctl in the * future. */ vd->vdev_nowritecache = B_TRUE; } zio->io_error = error; break; default: zio->io_error = SET_ERROR(ENOTSUP); } /* io_cmd */ zio_execute(zio); return; case ZIO_TYPE_WRITE: if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE) flags = B_WRITE; else flags = B_WRITE | B_ASYNC; break; case ZIO_TYPE_READ: if (zio->io_priority == ZIO_PRIORITY_SYNC_READ) flags = B_READ; else flags = B_READ | B_ASYNC; break; default: zio->io_error = SET_ERROR(ENOTSUP); zio_interrupt(zio); return; } /* io_type */ ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); /* Stop OSX from also caching our data */ flags |= B_NOCACHE; if (zio->io_flags & ZIO_FLAG_FAILFAST) flags |= B_FAILFAST; zio->io_target_timestamp = zio_handle_io_delay(zio); bp = buf_alloc(dvd->vd_devvp); ASSERT(bp != NULL); ASSERT(zio->io_data != NULL); ASSERT(zio->io_size != 0); buf_setflags(bp, flags); buf_setcount(bp, zio->io_size); buf_setdataptr(bp, (uintptr_t)zio->io_data); /* * Map offset to blcknumber, based on physical block number. * (512, 4096, ..). If we fail to map, default back to * standard 512. lbtodb() is fixed at 512. */ buf_setblkno(bp, zio->io_offset >> dvd->vd_ashift); buf_setlblkno(bp, zio->io_offset >> dvd->vd_ashift); buf_setsize(bp, zio->io_size); if (buf_setcallback(bp, vdev_disk_io_intr, zio) != 0) panic("vdev_disk_io_start: buf_setcallback failed\n"); if (zio->io_type == ZIO_TYPE_WRITE) { vnode_startwrite(dvd->vd_devvp); } error = VNOP_STRATEGY(bp); ASSERT(error == 0); if (error) { zio->io_error = error; zio_interrupt(zio); return; } }
/* * Avoid inlining the function to keep vdev_mirror_io_start(), which * is this functions only caller, as small as possible on the stack. */ noinline static mirror_map_t * vdev_mirror_map_alloc(zio_t *zio) { mirror_map_t *mm = NULL; mirror_child_t *mc; vdev_t *vd = zio->io_vd; int c, d; if (vd == NULL) { dva_t *dva = zio->io_bp->blk_dva; spa_t *spa = zio->io_spa; c = BP_GET_NDVAS(zio->io_bp); mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_PUSHPAGE); mm->mm_children = c; mm->mm_replacing = B_FALSE; mm->mm_preferred = spa_get_random(c); mm->mm_root = B_TRUE; /* * Check the other, lower-index DVAs to see if they're on * the same vdev as the child we picked. If they are, use * them since they are likely to have been allocated from * the primary metaslab in use at the time, and hence are * more likely to have locality with single-copy data. */ for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) { if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c])) mm->mm_preferred = d; } for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c])); mc->mc_offset = DVA_GET_OFFSET(&dva[c]); } } else { int lowest_pending = INT_MAX; int lowest_nr = 1; c = vd->vdev_children; mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_PUSHPAGE); mm->mm_children = c; mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || vd->vdev_ops == &vdev_spare_ops); mm->mm_preferred = 0; mm->mm_root = B_FALSE; for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; mc->mc_vd = vd->vdev_child[c]; mc->mc_offset = zio->io_offset; if (mm->mm_replacing) continue; if (!vdev_readable(mc->mc_vd)) { mc->mc_error = SET_ERROR(ENXIO); mc->mc_tried = 1; mc->mc_skipped = 1; mc->mc_pending = INT_MAX; continue; } mc->mc_pending = vdev_mirror_pending(mc->mc_vd); if (mc->mc_pending < lowest_pending) { lowest_pending = mc->mc_pending; lowest_nr = 1; } else if (mc->mc_pending == lowest_pending) { lowest_nr++; } } d = gethrtime() / (NSEC_PER_USEC * zfs_vdev_mirror_switch_us); d = (d % lowest_nr) + 1; for (c = 0; c < mm->mm_children; c++) { mc = &mm->mm_child[c]; if (mm->mm_child[c].mc_pending == lowest_pending) { if (--d == 0) { mm->mm_preferred = c; break; } } } } zio->io_vsd = mm; zio->io_vsd_ops = &vdev_mirror_vsd_ops; return (mm); }
static void vdev_disk_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_disk_t *dvd; vdev_buf_t *vb; struct dk_callback *dkc; buf_t *bp; int error; rw_enter(&vd->vdev_tsd_lock, RW_READER); dvd = vd->vdev_tsd; /* * If the vdev is closed, it's likely in the REMOVED or FAULTED state. * Nothing to be done here but return failure. */ if (dvd == NULL || dvd->vd_lh == NULL) { zio->io_error = ENXIO; rw_exit(&vd->vdev_tsd_lock); zio_interrupt(zio); return; } if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); rw_exit(&vd->vdev_tsd_lock); zio_interrupt(zio); return; } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush) break; if (vd->vdev_nowritecache) { zio->io_error = SET_ERROR(ENOTSUP); break; } zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP); zio->io_vsd_ops = &vdev_disk_vsd_ops; dkc->dkc_callback = vdev_disk_ioctl_done; dkc->dkc_flag = FLUSH_VOLATILE; dkc->dkc_cookie = zio; error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, (uintptr_t)dkc, FKIOCTL, kcred, NULL); if (error == 0) { /* * The ioctl will be done asychronously, * and will call vdev_disk_ioctl_done() * upon completion. */ rw_exit(&vd->vdev_tsd_lock); return; } if (error == ENOTSUP || error == ENOTTY) { /* * If we get ENOTSUP or ENOTTY, we know that * no future attempts will ever succeed. * In this case we set a persistent bit so * that we don't bother with the ioctl in the * future. */ vd->vdev_nowritecache = B_TRUE; } zio->io_error = error; break; case DKIOCFREE: /* * We perform device support checks here instead of * in zio_trim(), as zio_trim() might be invoked on * top of a top-level vdev, whereas vdev_disk_io_start * is guaranteed to be operating a leaf vdev. */ if (vd->vdev_notrim && spa_get_force_trim(vd->vdev_spa) != SPA_FORCE_TRIM_ON) { zio->io_error = SET_ERROR(ENOTSUP); break; } /* * zio->io_private contains a dkioc_free_list_t * specifying which offsets are to be freed */ ASSERT(zio->io_private != NULL); error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, (uintptr_t)zio->io_private, FKIOCTL, kcred, NULL); if (error == ENOTSUP || error == ENOTTY) vd->vdev_notrim = B_TRUE; zio->io_error = error; break; default: zio->io_error = SET_ERROR(ENOTSUP); } rw_exit(&vd->vdev_tsd_lock); zio_execute(zio); return; } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); vb->vb_io = zio; bp = &vb->vb_buf; bioinit(bp); bp->b_flags = B_BUSY | B_NOCACHE | (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) bp->b_flags |= B_FAILFAST; bp->b_bcount = zio->io_size; bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); bp->b_bufsize = zio->io_size; bp->b_iodone = (int (*)())vdev_disk_io_intr; /* ldi_strategy() will return non-zero only on programming errors */ VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); rw_exit(&vd->vdev_tsd_lock); }
static int vdev_disk_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; vdev_disk_t *dvd = vd->vdev_tsd; vdev_buf_t *vb; struct dk_callback *dkc; buf_t *bp; int error; /* * If the vdev is closed, it's likely in the REMOVED or FAULTED state. * Nothing to be done here but return failure. */ if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) { zio->io_error = ENXIO; return (ZIO_PIPELINE_CONTINUE); } if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); return (ZIO_PIPELINE_CONTINUE); } switch (zio->io_cmd) { case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush) break; if (vd->vdev_nowritecache) { zio->io_error = SET_ERROR(ENOTSUP); break; } zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP); zio->io_vsd_ops = &vdev_disk_vsd_ops; dkc->dkc_callback = vdev_disk_ioctl_done; dkc->dkc_flag = FLUSH_VOLATILE; dkc->dkc_cookie = zio; error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, (uintptr_t)dkc, FKIOCTL, kcred, NULL); if (error == 0) { /* * The ioctl will be done asychronously, * and will call vdev_disk_ioctl_done() * upon completion. */ return (ZIO_PIPELINE_STOP); } if (error == ENOTSUP || error == ENOTTY) { /* * If we get ENOTSUP or ENOTTY, we know that * no future attempts will ever succeed. * In this case we set a persistent bit so * that we don't bother with the ioctl in the * future. */ vd->vdev_nowritecache = B_TRUE; } zio->io_error = error; break; default: zio->io_error = SET_ERROR(ENOTSUP); } return (ZIO_PIPELINE_CONTINUE); } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); vb->vb_io = zio; bp = &vb->vb_buf; bioinit(bp); bp->b_flags = B_BUSY | B_NOCACHE | (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE); if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) bp->b_flags |= B_FAILFAST; bp->b_bcount = zio->io_size; bp->b_un.b_addr = zio->io_data; bp->b_lblkno = lbtodb(zio->io_offset); bp->b_bufsize = zio->io_size; bp->b_iodone = (int (*)())vdev_disk_io_intr; zfs_zone_zio_start(zio); /* ldi_strategy() will return non-zero only on programming errors */ VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); return (ZIO_PIPELINE_STOP); }