static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd = vd->vdev_tsd; vnode_t *devvp = NULLVP; vfs_context_t context = NULL; uint64_t blkcnt; uint32_t blksize; int fmode = 0; int error = 0; int isssd; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (dvd != NULL) { if (dvd->vd_offline) { /* * If we are opening a device in its offline notify * context, the LDI handle was just closed. Clean * up the LDI event callbacks and free vd->vdev_tsd. */ vdev_disk_free(vd); } else { ASSERT(vd->vdev_reopening); devvp = dvd->vd_devvp; goto skip_open; } } /* * Create vd->vdev_tsd. */ vdev_disk_alloc(vd); dvd = vd->vdev_tsd; /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the same device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ /* ### APPLE TODO ### */ #ifdef illumos if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } } #endif error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { context = vfs_context_create( spl_vfs_context_kernel() ); /* Obtain an opened/referenced vnode for the device. */ if ((error = vnode_open(vd->vdev_path, spa_mode(spa), 0, 0, &devvp, context))) { goto out; } if (!vnode_isblk(devvp)) { error = ENOTBLK; goto out; } /* * ### APPLE TODO ### * vnode_authorize devvp for KAUTH_VNODE_READ_DATA and * KAUTH_VNODE_WRITE_DATA */ /* * Disallow opening of a device that is currently in use. * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) { goto out; } if (VNOP_FSYNC(devvp, MNT_WAIT, context) != 0) { error = ENOTBLK; goto out; } if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) { goto out; } } else { goto out; } int len = MAXPATHLEN; if (vn_getpath(devvp, dvd->vd_readlinkname, &len) == 0) { dprintf("ZFS: '%s' resolved name is '%s'\n", vd->vdev_path, dvd->vd_readlinkname); } else { dvd->vd_readlinkname[0] = 0; } skip_open: /* * Determine the actual size of the device. */ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context) != 0 || VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context) != 0) { error = EINVAL; goto out; } *psize = blkcnt * (uint64_t)blksize; *max_psize = *psize; dvd->vd_ashift = highbit(blksize) - 1; dprintf("vdev_disk: Device %p ashift set to %d\n", devvp, dvd->vd_ashift); *ashift = highbit(MAX(blksize, SPA_MINBLOCKSIZE)) - 1; /* * ### APPLE TODO ### */ #ifdef illumos if (vd->vdev_wholedisk == 1) { int wce = 1; if (error == 0) { /* * If we have the capability to expand, we'd have * found out via success from DKIOCGMEDIAINFO{,EXT}. * Adjust max_psize upward accordingly since we know * we own the whole disk now. */ *max_psize = capacity * blksz; } /* * Since we own the whole disk, try to enable disk write * caching. We ignore errors because it's OK if we can't do it. */ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } #endif /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; /* Inform the ZIO pipeline that we are non-rotational */ vd->vdev_nonrot = B_FALSE; if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) { if (isssd) vd->vdev_nonrot = B_TRUE; } dprintf("ZFS: vdev_disk(%s) isSSD %d\n", vd->vdev_path ? vd->vdev_path : "", isssd); dvd->vd_devvp = devvp; out: if (error) { if (devvp) { vnode_close(devvp, fmode, context); dvd->vd_devvp = NULL; } vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; } if (context) (void) vfs_context_rele(context); if (error) printf("ZFS: vdev_disk_open('%s') failed error %d\n", vd->vdev_path ? vd->vdev_path : "", error); return (error); }
/* * Device close routine */ int spec_close(struct vnop_close_args *ap) { struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; int (*devclose)(dev_t, int, int, struct proc *); int mode, error; int flags = ap->a_fflag; struct proc *p = vfs_context_proc(ap->a_context); struct session *sessp; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 1 (this is the very * last close) */ sessp = proc_session(p); if (sessp != SESSION_NULL) { if ((vcount(vp) == 1) && (vp == sessp->s_ttyvp)) { session_lock(sessp); sessp->s_ttyvp = NULL; sessp->s_ttyvid = 0; sessp->s_ttyp = TTY_NULL; sessp->s_ttypgrpid = NO_PID; session_unlock(sessp); vnode_rele(vp); } session_rele(sessp); } devclose = cdevsw[major(dev)].d_close; mode = S_IFCHR; /* * close on last reference or on vnode revoke call */ if ((flags & IO_REVOKE) != 0) break; if (vcount(vp) > 0) return (0); break; case VBLK: /* * Since every use (buffer, vnode, swap, blockmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to zero, we are on last close. */ if (vcount(vp) > 0) return (0); /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ if ((error = spec_fsync_internal(vp, MNT_WAIT, ap->a_context))) return (error); error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); if (error) return (error); devclose = bdevsw[major(dev)].d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); return(EBADF); } return ((*devclose)(dev, flags, mode, p)); }
static int vdev_disk_open(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift) { vdev_disk_t *dvd = NULL; vnode_t *devvp = NULLVP; vfs_context_t context = NULL; uint64_t blkcnt; uint32_t blksize; int fmode = 0; int error = 0; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } dvd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); if (dvd == NULL) return ENOMEM; /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the save device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. * */ /* ### APPLE TODO ### */ /* ddi_devid_str_decode */ context = vfs_context_create((vfs_context_t)0); /* Obtain an opened/referenced vnode for the device. */ error = vnode_open(vd->vdev_path, spa_mode(vd->vdev_spa), 0, 0, &devvp, context); if (error) { goto out; } if (!vnode_isblk(devvp)) { error = ENOTBLK; goto out; } /* ### APPLE TODO ### */ /* vnode_authorize devvp for KAUTH_VNODE_READ_DATA and * KAUTH_VNODE_WRITE_DATA */ /* * Disallow opening of a device that is currently in use. * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) { goto out; } if (VNOP_FSYNC(devvp, MNT_WAIT, context) != 0) { error = ENOTBLK; goto out; } if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) { goto out; } /* * Determine the actual size of the device. */ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context) != 0 || VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context) != 0) { error = EINVAL; goto out; } *size = blkcnt * (uint64_t)blksize; /* * ### APPLE TODO ### * If we own the whole disk, try to enable disk write caching. */ /* * Take the device's minimum transfer size into account. */ *ashift = highbit(MAX(blksize, SPA_MINBLOCKSIZE)) - 1; /* * Setting the vdev_ashift did in fact break the pool for import * on ZEVO. This puts the logic into question. It appears that vdev_top * will also then change. It then panics in space_map from metaslab_alloc */ //vd->vdev_ashift = *ashift; dvd->vd_ashift = *ashift; /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; vd->vdev_tsd = dvd; dvd->vd_devvp = devvp; out: if (error) { if (devvp) vnode_close(devvp, fmode, context); if (dvd) kmem_free(dvd, sizeof (vdev_disk_t)); /* * Since the open has failed, vd->vdev_tsd should * be NULL when we get here, signaling to the * rest of the spa not to try and reopen or close this device */ vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; } if (context) { (void) vfs_context_rele(context); } return (error); }
/* * Function: devfs_kernel_mount * Purpose: * Mount devfs at the given mount point from within the kernel. */ int devfs_kernel_mount(char * mntname) { struct mount *mp; int error; struct nameidata nd; struct vnode * vp; vfs_context_t ctx = vfs_context_kernel(); struct vfstable *vfsp; /* Find our vfstable entry */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name))) break; if (!vfsp) { panic("Could not find entry in vfsconf for devfs.\n"); } /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(mntname), ctx); if ((error = namei(&nd))) { printf("devfs_kernel_mount: failed to find directory '%s', %d", mntname, error); return (error); } nameidone(&nd); vp = nd.ni_vp; if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) { printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error); vnode_put(vp); return (error); } if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) { printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error); vnode_put(vp); return (error); } if (vnode_isdir(vp) == 0) { printf("devfs_kernel_mount: '%s' is not a directory\n", mntname); vnode_put(vp); return (ENOTDIR); } if ((vnode_mountedhere(vp))) { vnode_put(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ MALLOC_ZONE(mp, struct mount *, sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; mp->mnt_ioflags = 0; mp->mnt_realrootvp = NULLVP; mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; mount_lock_init(mp); TAILQ_INIT(&mp->mnt_vnodelist); TAILQ_INIT(&mp->mnt_workerqueue); TAILQ_INIT(&mp->mnt_newvnodes); (void)vfs_busy(mp, LK_NOWAIT); mp->mnt_op = &devfs_vfsops; mp->mnt_vtable = vfsp; mp->mnt_flag = 0; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); #if CONFIG_MACF mac_mount_label_init(mp); mac_mount_label_associate(ctx, mp); #endif error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx); if (error) { printf("devfs_kernel_mount: mount %s failed: %d", mntname, error); mp->mnt_vtable->vfc_refcount--; vfs_unbusy(mp); mount_lock_destroy(mp); #if CONFIG_MACF mac_mount_label_destroy(mp); #endif FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); vnode_put(vp); return (error); } vnode_ref(vp); vnode_put(vp); vfs_unbusy(mp); mount_list_add(mp); return (0); }