/* * Write out a history event. */ int spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what) { history_arg_t *ha; int err = 0; dmu_tx_t *tx; ASSERT(what != LOG_INTERNAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE); ha->ha_history_str = spa_strdup(history_str); ha->ha_zone = spa_strdup(spa_history_zone()); ha->ha_log_type = what; ha->ha_uid = crgetuid(CRED()); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL, spa_history_log_sync, spa, ha, 0, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free ha and strings */ return (err); }
/* * Determine if the underlying device is accessible by reading and writing * to a known location. We must be able to do this during syncing context * and thus we cannot set the vdev state directly. */ static int vdev_file_probe(vdev_t *vd) { vdev_t *nvd; char *vl_boot; uint64_t offset; int l, error = 0, retries = 0; if (vd == NULL) return (EINVAL); /* Hijack the current vdev */ nvd = vd; /* * Pick a random label to rewrite. */ l = spa_get_random(VDEV_LABELS); ASSERT(l < VDEV_LABELS); offset = vdev_label_offset(vd->vdev_psize, l, offsetof(vdev_label_t, vl_boot_header)); vl_boot = kmem_alloc(VDEV_BOOT_HEADER_SIZE, KM_SLEEP); while ((error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE, offset, UIO_READ)) != 0 && retries == 0) { /* * If we failed with the vdev that was passed in then * try allocating a new one and try again. */ nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); if (vd->vdev_path) nvd->vdev_path = spa_strdup(vd->vdev_path); nvd->vdev_guid = vd->vdev_guid; retries++; if (vdev_file_open_common(nvd) != 0) break; } if ((spa_mode & FWRITE) && !error) { error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE, offset, UIO_WRITE); } if (retries) { vdev_file_close(nvd); if (nvd->vdev_path) spa_strfree(nvd->vdev_path); kmem_free(nvd, sizeof (vdev_t)); } kmem_free(vl_boot, VDEV_BOOT_HEADER_SIZE); if (!error) vd->vdev_is_failing = B_FALSE; return (error); }
static dsl_prop_record_t * dsl_prop_record_create(dsl_dir_t *dd, const char *propname) { dsl_prop_record_t *pr; ASSERT(MUTEX_HELD(&dd->dd_lock)); pr = kmem_alloc(sizeof (dsl_prop_record_t), KM_SLEEP); pr->pr_propname = spa_strdup(propname); list_create(&pr->pr_cbs, sizeof (dsl_prop_cb_record_t), offsetof(dsl_prop_cb_record_t, cbr_pr_node)); list_insert_head(&dd->dd_props, pr); return (pr); }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd = vd->vdev_tsd; ldi_ev_cookie_t ecookie; vdev_disk_ldi_cb_t *lcb; union { struct dk_minfo_ext ude; struct dk_minfo ud; } dks; struct dk_minfo_ext *dkmext = &dks.ude; struct dk_minfo *dkm = &dks.ud; int error; /* XXX Apple - must leave devid unchanged */ #ifdef illumos dev_t dev; int otyp; boolean_t validate_devid = B_FALSE; ddi_devid_t devid; #endif uint64_t capacity = 0, blksz = 0, pbsize; #ifdef __APPLE__ int isssd; #endif /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (dvd != NULL) { if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) { /* * If we are opening a device in its offline notify * context, the LDI handle was just closed. Clean * up the LDI event callbacks and free vd->vdev_tsd. */ vdev_disk_free(vd); } else { ASSERT(vd->vdev_reopening); goto skip_open; } } /* * Create vd->vdev_tsd. */ vdev_disk_alloc(vd); dvd = vd->vdev_tsd; /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the same device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ /* * XXX We must not set or modify the devid as this check would prevent * import on Solaris/illumos. */ #ifdef illumos if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; vdev_dbgmsg(vd, "vdev_disk_open: invalid " "vdev_devid '%s'", vd->vdev_devid); return (SET_ERROR(EINVAL)); } } #endif error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { /* * XXX This assumes that if vdev_path refers to a device path /dev/dsk/cNtNdN, * then the whole disk can be found by slice 0 at path /dev/dsk/cNtNdNs0. */ #ifdef illumos if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); (void) snprintf(buf, len, "%ss0", vd->vdev_path); error = ldi_open_by_name(buf, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); if (error == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; } else { kmem_free(buf, len); } } #endif /* * If we have not yet opened the device, try to open it by the * specified path. */ if (error != 0) { error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } /* XXX Apple - must leave devid unchanged */ #ifdef illumos /* * Compare the devid to the stored value. */ if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { error = SET_ERROR(EINVAL); (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; } ddi_devid_free(devid); } #endif /* * If we succeeded in opening the device, but 'vdev_wholedisk' * is not yet set, then this must be a slice. */ if (error == 0 && vd->vdev_wholedisk == -1ULL) vd->vdev_wholedisk = 0; } /* XXX Apple - must leave devid unchanged */ #ifdef illumos /* * If we were unable to open by path, or the devid check fails, open by * devid instead. */ if (error != 0 && vd->vdev_devid != NULL) { error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } #endif /* * If all else fails, then try opening by physical path (if available) * or the logical path (if we failed due to the devid check). While not * as reliable as the devid, this will give us something, and the higher * level vdev validation will prevent us from opening the wrong device. */ if (error) { /* XXX Apple - must leave devid unchanged */ #ifdef illumos if (vd->vdev_devid != NULL) validate_devid = B_TRUE; #endif /* XXX Apple to do - make ddi_ interface for this, using IORegistry path */ #ifdef illumos if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); #endif /* * Note that we don't support the legacy auto-wholedisk support * as above. This hasn't been used in a very long time and we * don't need to propagate its oddities to this edge condition. */ if (error && vd->vdev_path != NULL) error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]", error); return (error); } /* * XXX Apple - We must not set or modify the devid. Import on Solaris/illumos * expects a valid devid and fails if it cannot be decoded. */ #ifdef illumos /* * Now that the device has been successfully opened, update the devid * if necessary. */ if (validate_devid && spa_writeable(spa) && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { char *vd_devid; vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor); vdev_dbgmsg(vd, "vdev_disk_open: update devid from " "'%s' to '%s'", vd->vdev_devid, vd_devid); spa_strfree(vd->vdev_devid); vd->vdev_devid = spa_strdup(vd_devid); ddi_devid_str_free(vd_devid); } ddi_devid_free(devid); } #endif /* XXX Apple to do, needs IORegistry physpath interface */ #ifdef illumos /* * Once a device is opened, verify that the physical device path (if * available) is up to date. */ if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { char *physpath, *minorname; physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); minorname = NULL; if (ddi_dev_pathname(dev, otyp, physpath) == 0 && ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && (vd->vdev_physpath == NULL || strcmp(vd->vdev_physpath, physpath) != 0)) { if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); (void) strlcat(physpath, ":", MAXPATHLEN); (void) strlcat(physpath, minorname, MAXPATHLEN); vd->vdev_physpath = spa_strdup(physpath); } if (minorname) kmem_free(minorname, strlen(minorname) + 1); kmem_free(physpath, MAXPATHLEN); } #endif /* * Register callbacks for the LDI offline event. */ if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) == LDI_EV_SUCCESS) { lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); list_insert_tail(&dvd->vd_ldi_cbs, lcb); (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id); } /* XXX Apple to do - we could support the degrade event, or just no-op */ #ifdef illumos /* * Register callbacks for the LDI degrade event. */ if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) == LDI_EV_SUCCESS) { lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); list_insert_tail(&dvd->vd_ldi_cbs, lcb); (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id); } #endif #if 0 int len = MAXPATHLEN; if (vn_getpath(devvp, dvd->vd_readlinkname, &len) == 0) { dprintf("ZFS: '%s' resolved name is '%s'\n", vd->vdev_path, dvd->vd_readlinkname); } else { dvd->vd_readlinkname[0] = 0; } #endif skip_open: /* * Determine the actual size of the device. */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; vdev_dbgmsg(vd, "vdev_disk_open: failed to get size"); return (SET_ERROR(EINVAL)); } *max_psize = *psize; /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) { capacity = dkmext->dki_capacity - 1; blksz = dkmext->dki_lbsize; pbsize = dkmext->dki_pbsize; } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) { VDEV_DEBUG( "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n", vd->vdev_path); capacity = dkm->dki_capacity - 1; blksz = dkm->dki_lbsize; pbsize = blksz; } else { VDEV_DEBUG("vdev_disk_open(\"%s\"): " "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", vd->vdev_path, error); pbsize = DEV_BSIZE; } *ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; /* XXX Now that we opened the device, determine if it is a whole disk. */ #ifdef __APPLE__ /* * XXX Apple to do - provide an ldi_ mechanism * to report whether this is a whole disk or a * partition. * Return 0 (no), 1 (yes), or -1 (error). */ // vd->vdev_wholedisk = ldi_is_wholedisk(vd->vd_lh); #endif if (vd->vdev_wholedisk == 1) { int wce = 1; /* Gets information about the disk if it has GPT partitions */ #ifdef illumos if (error == 0) { /* * If we have the capability to expand, we'd have * found out via success from DKIOCGMEDIAINFO{,EXT}. * Adjust max_psize upward accordingly since we know * we own the whole disk now. */ *max_psize = capacity * blksz; } #endif /* * Since we own the whole disk, try to enable disk write * caching. We ignore errors because it's OK if we can't do it. */ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; #ifdef __APPLE__ /* Inform the ZIO pipeline that we are non-rotational */ vd->vdev_nonrot = B_FALSE; if (ldi_ioctl(dvd->vd_lh, DKIOCISSOLIDSTATE, (intptr_t)&isssd, FKIOCTL, kcred, NULL) == 0) { vd->vdev_nonrot = (isssd ? B_TRUE : B_FALSE); } #endif //__APPLE__ return (0); }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd; ldi_ev_cookie_t ecookie; vdev_disk_ldi_cb_t *lcb; union { struct dk_minfo_ext ude; struct dk_minfo ud; } dks; struct dk_minfo_ext *dkmext = &dks.ude; struct dk_minfo *dkm = &dks.ud; int error; dev_t dev; int otyp; boolean_t validate_devid = B_FALSE; ddi_devid_t devid; uint64_t capacity = 0, blksz = 0, pbsize; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (SET_ERROR(EINVAL)); } rw_enter(&vd->vdev_tsd_lock, RW_WRITER); dvd = vd->vdev_tsd; /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (dvd != NULL) { ASSERT(vd->vdev_reopening); /* * Here vd_lh is protected by vdev_tsd_lock */ ASSERT(dvd->vd_lh != NULL); /* This should not happen, but let's be safe */ if (dvd->vd_lh == NULL) { /* What are we going to do here??? */ rw_exit(&vd->vdev_tsd_lock); return (SET_ERROR(ENXIO)); } goto skip_open; } /* * Create dvd to be used as vd->vdev_tsd. */ vd->vdev_tsd = dvd = vdev_disk_alloc(); /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the same device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; vdev_disk_free_locked(vd); rw_exit(&vd->vdev_tsd_lock); return (SET_ERROR(EINVAL)); } } error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); (void) snprintf(buf, len, "%ss0", vd->vdev_path); error = ldi_open_by_name(buf, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); if (error == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; } else { kmem_free(buf, len); } } /* * If we have not yet opened the device, try to open it by the * specified path. */ if (error != 0) { error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } /* * Compare the devid to the stored value. */ if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { error = SET_ERROR(EINVAL); (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; } ddi_devid_free(devid); } /* * If we succeeded in opening the device, but 'vdev_wholedisk' * is not yet set, then this must be a slice. */ if (error == 0 && vd->vdev_wholedisk == -1ULL) vd->vdev_wholedisk = 0; } /* * If we were unable to open by path, or the devid check fails, open by * devid instead. */ if (error != 0 && vd->vdev_devid != NULL) { error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } /* * If all else fails, then try opening by physical path (if available) * or the logical path (if we failed due to the devid check). While not * as reliable as the devid, this will give us something, and the higher * level vdev validation will prevent us from opening the wrong device. */ if (error) { if (vd->vdev_devid != NULL) validate_devid = B_TRUE; if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Note that we don't support the legacy auto-wholedisk support * as above. This hasn't been used in a very long time and we * don't need to propagate its oddities to this edge condition. */ if (error && vd->vdev_path != NULL) error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; vdev_disk_free_locked(vd); rw_exit(&vd->vdev_tsd_lock); return (error); } /* * Now that the device has been successfully opened, update the devid * if necessary. */ if (validate_devid && spa_writeable(spa) && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { char *vd_devid; vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor); zfs_dbgmsg("vdev %s: update devid from %s, " "to %s", vd->vdev_path, vd->vdev_devid, vd_devid); spa_strfree(vd->vdev_devid); vd->vdev_devid = spa_strdup(vd_devid); ddi_devid_str_free(vd_devid); } ddi_devid_free(devid); } /* * Once a device is opened, verify that the physical device path (if * available) is up to date. */ if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { char *physpath, *minorname; physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); minorname = NULL; if (ddi_dev_pathname(dev, otyp, physpath) == 0 && ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && (vd->vdev_physpath == NULL || strcmp(vd->vdev_physpath, physpath) != 0)) { if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); (void) strlcat(physpath, ":", MAXPATHLEN); (void) strlcat(physpath, minorname, MAXPATHLEN); vd->vdev_physpath = spa_strdup(physpath); } if (minorname) kmem_free(minorname, strlen(minorname) + 1); kmem_free(physpath, MAXPATHLEN); } /* * Register callbacks for the LDI offline event. */ if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) == LDI_EV_SUCCESS) { lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); list_insert_tail(&dvd->vd_ldi_cbs, lcb); (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id); } /* * Register callbacks for the LDI degrade event. */ if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) == LDI_EV_SUCCESS) { lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP); list_insert_tail(&dvd->vd_ldi_cbs, lcb); (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie, &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id); } /* Reset TRIM flag, as underlying device support may have changed */ vd->vdev_notrim = B_FALSE; skip_open: ASSERT(dvd != NULL); /* * Determine the actual size of the device. */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; vdev_disk_free_locked(vd); rw_exit(&vd->vdev_tsd_lock); return (SET_ERROR(EINVAL)); } *max_psize = *psize; /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) { capacity = dkmext->dki_capacity - 1; blksz = dkmext->dki_lbsize; pbsize = dkmext->dki_pbsize; } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) { VDEV_DEBUG( "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n", vd->vdev_path); capacity = dkm->dki_capacity - 1; blksz = dkm->dki_lbsize; pbsize = blksz; } else { VDEV_DEBUG("vdev_disk_open(\"%s\"): " "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", vd->vdev_path, error); pbsize = DEV_BSIZE; } *ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; if (vd->vdev_wholedisk == 1) { int wce = 1; if (error == 0) { /* * If we have the capability to expand, we'd have * found out via success from DKIOCGMEDIAINFO{,EXT}. * Adjust max_psize upward accordingly since we know * we own the whole disk now. */ *max_psize += vdev_disk_get_space(vd, capacity, blksz); zfs_dbgmsg("capacity change: vdev %s, psize %llu, " "max_psize %llu", vd->vdev_path, *psize, *max_psize); } /* * Since we own the whole disk, try to enable disk write * caching. We ignore errors because it's OK if we can't do it. */ (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } /* * We are done with vd_lh and vdev_tsd, release the vdev_tsd_lock */ rw_exit(&vd->vdev_tsd_lock); /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; return (0); }
static int vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) { spa_t *spa = vd->vdev_spa; vdev_disk_t *dvd; struct dk_minfo_ext dkmext; int error; dev_t dev; int otyp; /* * We must have a pathname, and it must be absolute. */ if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } /* * Reopen the device if it's not currently open. Otherwise, * just update the physical size of the device. */ if (vd->vdev_tsd != NULL) { ASSERT(vd->vdev_reopening); dvd = vd->vdev_tsd; goto skip_open; } dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); /* * When opening a disk device, we want to preserve the user's original * intent. We always want to open the device by the path the user gave * us, even if it is one of multiple paths to the save device. But we * also want to be able to survive disks being removed/recabled. * Therefore the sequence of opening devices is: * * 1. Try opening the device by path. For legacy pools without the * 'whole_disk' property, attempt to fix the path by appending 's0'. * * 2. If the devid of the device matches the stored value, return * success. * * 3. Otherwise, the device may have moved. Try opening the device * by the devid instead. */ if (vd->vdev_devid != NULL) { if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid, &dvd->vd_minor) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return (EINVAL); } } error = EINVAL; /* presume failure */ if (vd->vdev_path != NULL) { ddi_devid_t devid; if (vd->vdev_wholedisk == -1ULL) { size_t len = strlen(vd->vdev_path) + 3; char *buf = kmem_alloc(len, KM_SLEEP); ldi_handle_t lh; (void) snprintf(buf, len, "%ss0", vd->vdev_path); if (ldi_open_by_name(buf, spa_mode(spa), kcred, &lh, zfs_li) == 0) { spa_strfree(vd->vdev_path); vd->vdev_path = buf; vd->vdev_wholedisk = 1ULL; (void) ldi_close(lh, spa_mode(spa), kcred); } else { kmem_free(buf, len); } } error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Compare the devid to the stored value. */ if (error == 0 && vd->vdev_devid != NULL && ldi_get_devid(dvd->vd_lh, &devid) == 0) { if (ddi_devid_compare(devid, dvd->vd_devid) != 0) { error = EINVAL; (void) ldi_close(dvd->vd_lh, spa_mode(spa), kcred); dvd->vd_lh = NULL; } ddi_devid_free(devid); } /* * If we succeeded in opening the device, but 'vdev_wholedisk' * is not yet set, then this must be a slice. */ if (error == 0 && vd->vdev_wholedisk == -1ULL) vd->vdev_wholedisk = 0; } /* * If we were unable to open by path, or the devid check fails, open by * devid instead. */ if (error != 0 && vd->vdev_devid != NULL) error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * If all else fails, then try opening by physical path (if available) * or the logical path (if we failed due to the devid check). While not * as reliable as the devid, this will give us something, and the higher * level vdev validation will prevent us from opening the wrong device. */ if (error) { if (vd->vdev_physpath != NULL && (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); /* * Note that we don't support the legacy auto-wholedisk support * as above. This hasn't been used in a very long time and we * don't need to propagate its oddities to this edge condition. */ if (error && vd->vdev_path != NULL) error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred, &dvd->vd_lh, zfs_li); } if (error) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (error); } /* * Once a device is opened, verify that the physical device path (if * available) is up to date. */ if (ldi_get_dev(dvd->vd_lh, &dev) == 0 && ldi_get_otyp(dvd->vd_lh, &otyp) == 0) { char *physpath, *minorname; physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); minorname = NULL; if (ddi_dev_pathname(dev, otyp, physpath) == 0 && ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 && (vd->vdev_physpath == NULL || strcmp(vd->vdev_physpath, physpath) != 0)) { if (vd->vdev_physpath) spa_strfree(vd->vdev_physpath); (void) strlcat(physpath, ":", MAXPATHLEN); (void) strlcat(physpath, minorname, MAXPATHLEN); vd->vdev_physpath = spa_strdup(physpath); } if (minorname) kmem_free(minorname, strlen(minorname) + 1); kmem_free(physpath, MAXPATHLEN); } skip_open: /* * Determine the actual size of the device. */ if (ldi_get_size(dvd->vd_lh, psize) != 0) { vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; return (EINVAL); } /* * If we own the whole disk, try to enable disk write caching. * We ignore errors because it's OK if we can't do it. */ if (vd->vdev_wholedisk == 1) { int wce = 1; (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL, kcred, NULL); } /* * Determine the device's minimum transfer size. * If the ioctl isn't supported, assume DEV_BSIZE. */ if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext, FKIOCTL, kcred, NULL) != 0) dkmext.dki_pbsize = DEV_BSIZE; *ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1; /* * Clear the nowritecache bit, so that on a vdev_reopen() we will * try again. */ vd->vdev_nowritecache = B_FALSE; return (0); }
/* * Allocate a new vdev. The 'alloctype' is used to control whether we are * creating a new vdev or loading an existing one - the behavior is slightly * different for each case. */ int vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id, int alloctype) { vdev_ops_t *ops; char *type; uint64_t guid = 0; vdev_t *vd; ASSERT(spa_config_held(spa, RW_WRITER)); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0) return (EINVAL); if ((ops = vdev_getops(type)) == NULL) return (EINVAL); /* * If this is a load, get the vdev guid from the nvlist. * Otherwise, vdev_alloc_common() will generate one for us. */ if (alloctype == VDEV_ALLOC_LOAD) { uint64_t label_id; if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID, &label_id) || label_id != id) return (EINVAL); if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) return (EINVAL); } else if (alloctype == VDEV_ALLOC_SPARE) { if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) return (EINVAL); } /* * The first allocated vdev must be of type 'root'. */ if (ops != &vdev_root_ops && spa->spa_root_vdev == NULL) return (EINVAL); vd = vdev_alloc_common(spa, id, guid, ops); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0) vd->vdev_path = spa_strdup(vd->vdev_path); if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0) vd->vdev_devid = spa_strdup(vd->vdev_devid); /* * Set the nparity propery for RAID-Z vdevs. */ if (ops == &vdev_raidz_ops) { if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY, &vd->vdev_nparity) == 0) { /* * Currently, we can only support 2 parity devices. */ if (vd->vdev_nparity > 2) return (EINVAL); /* * Older versions can only support 1 parity device. */ if (vd->vdev_nparity == 2 && spa_version(spa) < ZFS_VERSION_RAID6) return (ENOTSUP); } else { /* * We require the parity to be specified for SPAs that * support multiple parity levels. */ if (spa_version(spa) >= ZFS_VERSION_RAID6) return (EINVAL); /* * Otherwise, we default to 1 parity device for RAID-Z. */ vd->vdev_nparity = 1; } } else { vd->vdev_nparity = 0; } /* * Set the whole_disk property. If it's not specified, leave the value * as -1. */ if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &vd->vdev_wholedisk) != 0) vd->vdev_wholedisk = -1ULL; /* * Look for the 'not present' flag. This will only be set if the device * was not present at the time of import. */ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &vd->vdev_not_present); /* * Get the alignment requirement. */ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &vd->vdev_ashift); /* * If we're a top-level vdev, try to load the allocation parameters. */ if (parent && !parent->vdev_parent && alloctype == VDEV_ALLOC_LOAD) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY, &vd->vdev_ms_array); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT, &vd->vdev_ms_shift); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASIZE, &vd->vdev_asize); } /* * If we're a leaf vdev, try to load the DTL object and offline state. */ if (vd->vdev_ops->vdev_op_leaf && alloctype == VDEV_ALLOC_LOAD) { (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL, &vd->vdev_dtl.smo_object); (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &vd->vdev_offline); } /* * Add ourselves to the parent's list of children. */ vdev_add_child(parent, vd); *vdp = vd; return (0); }