/* ARGSUSED3 */ static int return_a_hs( set_t setno, mddb_recid_t id, mddb_recid_t *hs_id, mdkey_t key, diskaddr_t sblock, uint64_t size, hotspare_states_t new_state) { hot_spare_pool_t *hsp; hot_spare_t *hs; int i; /* * NOTE: sblock/size are not currently being used. * That is because we always allocate the whole hs. * Later if we choose to allocate only what is needed * then the sblock/size can be used to determine * which part is being unreseved. */ *hs_id = 0; hsp = find_hot_spare_pool(setno, id); if (hsp == NULL) return (-1); for (i = 0; i < hsp->hsp_nhotspares; i++) { hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1); if (hs->hs_key != key) continue; set_hot_spare_state(hs, new_state); *hs_id = hs->hs_record_id; if (new_state == HSS_BROKEN) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_HS, setno, hs->hs_devnum); } if (new_state == HSS_AVAILABLE) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS, setno, hs->hs_devnum); } /* NOTE: Mirror/Raid code commits the hs record */ return (0); } return (-1); }
/* * NAMES: raid_replay_error * DESCRIPTION: RAID metadevice replay error handling routine (TBD) * PARAMETERS: * RETURNS: */ static int raid_replay_error(mr_unit_t *un, int column) { int error = RAID_RPLY_COMPREPLAY; raid_set_state(un, column, RCS_ERRED, 0); raid_commit(un, NULL); if (UNIT_STATE(un) == RUS_LAST_ERRED) { error = RAID_RPLY_READONLY; SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } else if (UNIT_STATE(un) == RUS_ERRED) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } return (error); }
/* * NAME: raid_resync_unit * * DESCRIPTION: RAID metadevice specific resync routine. * Open the unit and start resync_unit as a separate thread. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * md_error_t *ep - output error parameter * * RETURN: On error return 1 or set ep to nonzero, otherwise return 0. * * LOCKS: Acquires and releases Unit Writer Lock. */ int raid_resync_unit( minor_t mnum, md_error_t *ep ) { mdi_unit_t *ui; set_t setno = MD_MIN2SET(mnum); mr_unit_t *un; ui = MDI_UNIT(mnum); un = MD_UNIT(mnum); if (md_get_setstatus(setno) & MD_SET_STALE) return (mdmddberror(ep, MDE_DB_STALE, mnum, setno)); ASSERT(un->un_column[un->un_resync_index].un_devflags & (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); /* Don't start a resync if the device is not available */ if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) { return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); } if (raid_internal_open(mnum, FREAD | FWRITE, OTYP_LYR, 0)) { (void) md_unit_writerlock(ui); release_resync_request(mnum); md_unit_writerexit(ui); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE, setno, MD_SID(un)); return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum)); } /* start resync_unit thread */ (void) thread_create(NULL, 0, resync_unit, (void *)(uintptr_t)mnum, 0, &p0, TS_RUN, minclsyspri); return (0); }
/* * NAME: resync_comp * * DESCRIPTION: Resync the component. Iterate through the raid unit a line at * a time, read from the good device(s) and write the resync * device. * * PARAMETERS: minor_t mnum - minor number identity of metadevice * md_raidcs_t *cs - child save struct * * RETURN: 0 - successfull * 1 - failed * -1 - aborted * * LOCKS: Expects Unit Reader Lock to be held across call. Acquires and * releases Line Reader Lock for per-line I/O. */ static void resync_comp( minor_t mnum, md_raidcs_t *cs ) { mdi_unit_t *ui; mr_unit_t *un; mddb_recid_t recids[2]; rcs_state_t state; md_dev64_t dev_to_write; diskaddr_t write_pwstart; diskaddr_t write_devstart; md_dev64_t dev; int resync; int i; int single_read = 0; int err; int err_cnt; int last_err; diskaddr_t line; diskaddr_t segsincolumn; size_t bsize; uint_t line_count; /* * hs_state is the state of the hotspare on the column being resynced * dev_state is the state of the resync target */ hs_cmds_t hs_state; int err_col = -1; diskaddr_t resync_end_pos; ui = MDI_UNIT(mnum); ASSERT(ui != NULL); un = cs->cs_un; md_unit_readerexit(ui); un = (mr_unit_t *)md_io_writerlock(ui); un = (mr_unit_t *)md_unit_writerlock(ui); resync = un->un_resync_index; state = un->un_column[resync].un_devstate; line_count = un->un_maxio / un->un_segsize; if (line_count == 0) { /* handle the case of segsize > maxio */ line_count = 1; bsize = un->un_maxio; } else bsize = line_count * un->un_segsize; un->un_resync_copysize = (uint_t)bsize; ASSERT(un->c.un_status & MD_UN_RESYNC_ACTIVE); ASSERT(un->un_column[resync].un_devflags & (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC)); /* * if the column is not in resync then just bail out. */ if (! (un->un_column[resync].un_devstate & RCS_RESYNC)) { md_unit_writerexit(ui); md_io_writerexit(ui); un = (mr_unit_t *)md_unit_readerlock(ui); return; } SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); /* identify device to write and its start block */ if (un->un_column[resync].un_alt_dev != NODEV64) { if (raid_open_alt(un, resync)) { raid_set_state(un, resync, state, 0); md_unit_writerexit(ui); md_io_writerexit(ui); un = (mr_unit_t *)md_unit_readerlock(ui); cmn_err(CE_WARN, "md: %s: %s open failed replace " "terminated", md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), un->un_column[resync].un_alt_dev, NULL, 0)); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); return; } ASSERT(un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC); dev_to_write = un->un_column[resync].un_alt_dev; write_devstart = un->un_column[resync].un_alt_devstart; write_pwstart = un->un_column[resync].un_alt_pwstart; if (un->un_column[resync].un_devflags & MD_RAID_DEV_ERRED) { single_read = 0; hs_state = HS_BAD; } else { hs_state = HS_FREE; single_read = 1; } un->un_column[resync].un_devflags |= MD_RAID_WRITE_ALT; } else { dev_to_write = un->un_column[resync].un_dev; write_devstart = un->un_column[resync].un_devstart; write_pwstart = un->un_column[resync].un_pwstart; single_read = 0; hs_state = HS_FREE; ASSERT(un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC); } alloc_bufs(cs, dbtob(bsize)); /* initialize pre-write area */ if (init_pw_area(un, dev_to_write, write_pwstart, resync)) { un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; if (un->un_column[resync].un_alt_dev != NODEV64) { raid_close_alt(un, resync); } md_unit_writerexit(ui); md_io_writerexit(ui); if (dev_to_write == un->un_column[resync].un_dev) hs_state = HS_BAD; err = RAID_RESYNC_WRERROR; goto resync_comp_error; } un->c.un_status &= ~MD_UN_RESYNC_CANCEL; segsincolumn = un->un_segsincolumn; err_cnt = raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED); /* commit the record */ md_unit_writerexit(ui); md_io_writerexit(ui); /* resync each line of the unit */ for (line = 0; line < segsincolumn; line += line_count) { /* * Update address range in child struct and lock the line. * * The reader version of the line lock is used since only * resync will use data beyond un_resync_line_index on the * resync device. */ un = (mr_unit_t *)md_io_readerlock(ui); if (line + line_count > segsincolumn) line_count = segsincolumn - line; resync_end_pos = raid_resync_fillin_cs(line, line_count, cs); (void) md_unit_readerlock(ui); ASSERT(un->un_resync_line_index == resync_end_pos); err = raid_resync_region(cs, line, (int)line_count, &single_read, &hs_state, &err_col, dev_to_write, write_devstart); /* * if the column failed to resync then stop writing directly * to the column. */ if (err) un->un_resync_line_index = 0; md_unit_readerexit(ui); raid_line_exit(cs); md_io_readerexit(ui); if (err) break; un = (mr_unit_t *)md_unit_writerlock(ui); if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) != err_cnt) { err = RAID_RESYNC_STATE; md_unit_writerexit(ui); break; } md_unit_writerexit(ui); } /* for */ resync_comp_error: un = (mr_unit_t *)md_io_writerlock(ui); (void) md_unit_writerlock(ui); un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; recids[0] = 0; recids[1] = 0; switch (err) { /* * successful resync */ case RAID_RESYNC_OKAY: /* initialize pre-write area */ if ((un->un_column[resync].un_orig_dev != NODEV64) && (un->un_column[resync].un_orig_dev == un->un_column[resync].un_alt_dev)) { /* * replacing a hot spare * release the hot spare, which will close the hotspare * and mark it closed. */ raid_hs_release(hs_state, un, &recids[0], resync); /* * make the resync target the main device and * mark open */ un->un_column[resync].un_hs_id = 0; un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; un->un_column[resync].un_devflags |= MD_RAID_DEV_ISOPEN; /* alt becomes the device so don't close it */ un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT; un->un_column[resync].un_devflags &= ~MD_RAID_ALT_ISOPEN; un->un_column[resync].un_alt_dev = NODEV64; } raid_set_state(un, resync, RCS_OKAY, 0); break; case RAID_RESYNC_WRERROR: if (HOTSPARED(un, resync) && single_read && (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { /* * this is the case where the resync target is * bad but there is a good hotspare. In this * case keep the hotspare, and go back to okay. */ raid_set_state(un, resync, RCS_OKAY, 0); cmn_err(CE_WARN, "md: %s: %s write error, replace " "terminated", md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), un->un_column[resync].un_orig_dev, NULL, 0)); break; } if (HOTSPARED(un, resync)) { raid_hs_release(hs_state, un, &recids[0], resync); un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; } raid_set_state(un, resync, RCS_ERRED, 0); if (un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC) dev = un->un_column[resync].un_dev; else dev = un->un_column[resync].un_alt_dev; cmn_err(CE_WARN, "md: %s: %s write error replace terminated", md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), dev, NULL, 0)); break; case RAID_RESYNC_STATE: if (HOTSPARED(un, resync) && single_read && (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) { /* * this is the case where the resync target is * bad but there is a good hotspare. In this * case keep the hotspare, and go back to okay. */ raid_set_state(un, resync, RCS_OKAY, 0); cmn_err(CE_WARN, "md: %s: needs maintenance, replace " "terminated", md_shortname(MD_SID(un))); break; } if (HOTSPARED(un, resync)) { raid_hs_release(hs_state, un, &recids[0], resync); un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; } break; case RAID_RESYNC_RDERROR: if (HOTSPARED(un, resync)) { raid_hs_release(hs_state, un, &recids[0], resync); un->un_column[resync].un_dev = un->un_column[resync].un_orig_dev; un->un_column[resync].un_devstart = un->un_column[resync].un_orig_devstart; un->un_column[resync].un_pwstart = un->un_column[resync].un_orig_pwstart; } if ((resync != err_col) && (err_col != NOCOLUMN)) raid_set_state(un, err_col, RCS_ERRED, 0); break; default: ASSERT(0); } if (un->un_column[resync].un_alt_dev != NODEV64) { raid_close_alt(un, resync); } /* * an io operation may have gotten an error and placed a * column in erred state. This will abort the resync, which * will end up in last erred. This is ugly so go through * the columns and do cleanup */ err_cnt = 0; last_err = 0; for (i = 0; i < un->un_totalcolumncnt; i++) { if (un->un_column[i].un_devstate & RCS_OKAY) continue; if (i == resync) { raid_set_state(un, i, RCS_ERRED, 1); err_cnt++; } else if (err == RAID_RESYNC_OKAY) { err_cnt++; } else { raid_set_state(un, i, RCS_LAST_ERRED, 1); last_err++; } } if ((err_cnt == 0) && (last_err == 0)) un->un_state = RUS_OKAY; else if (last_err == 0) { un->un_state = RUS_ERRED; ASSERT(err_cnt == 1); } else if (last_err > 0) { un->un_state = RUS_LAST_ERRED; } uniqtime32(&un->un_column[resync].un_devtimestamp); un->un_resync_copysize = 0; un->un_column[resync].un_devflags &= ~(MD_RAID_REGEN_RESYNC | MD_RAID_COPY_RESYNC); raid_commit(un, recids); /* release unit writer lock and acquire unit reader lock */ md_unit_writerexit(ui); md_io_writerexit(ui); (void) md_unit_readerlock(ui); if (err == RAID_RESYNC_OKAY) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } else { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) > 1) { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } else { SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); } } free_bufs(dbtob(bsize), cs); }
/* * NAME: check_comp_4_hs * * DESCRIPTION: Check whether the input component has an error and can be * backed with a hot spare (RCS_ERRED state), and initiate * a resync if so. * * PARAMETERS: mr_unit_t *un - raid unit * int hs_index - component to check * * LOCKS: Expects Unit Writer Lock to be held upon entrance. Releases * the lock prior to calling raid_resync_unit, then reacquires * it before returning. */ static void check_comp_4_hs( mr_unit_t *un, int hs_index ) { mddb_recid_t recids[3]; minor_t mnum = MD_SID(un); mdi_unit_t *ui; rcs_state_t state; diskaddr_t size; int err; mr_column_t *col; md_error_t mde = mdnullerror; char devname[MD_MAX_CTDLEN]; char hs_devname[MD_MAX_CTDLEN]; set_t setno; md_dev64_t tmpdev; diskaddr_t tmpdaddr; /* initialize */ setno = MD_UN2SET(un); ui = MDI_UNIT(mnum); md_unit_readerexit(ui); (void) md_io_writerlock(ui); un = (mr_unit_t *)md_unit_writerlock(ui); col = &un->un_column[hs_index]; /* * add a hotspare for erred column only if not resyncing */ if ((!(COLUMN_STATE(un, hs_index) & RCS_ERRED)) || (raid_state_cnt(un, (RCS_ERRED | RCS_LAST_ERRED)) != 1) || (raid_state_cnt(un, RCS_RESYNC) > 0)) { goto errout; } recids[0] = 0; recids[1] = 0; /* if there is already a hotspare then just return */ if (HOTSPARED(un, hs_index) && (col->un_devstate & RCS_ERRED)) { raid_hs_release(HS_BAD, un, &recids[0], hs_index); cmn_err(CE_WARN, "md: %s: %s hotspare errored and released", md_shortname(mnum), md_devname(MD_MIN2SET(mnum), col->un_dev, NULL, 0)); col->un_dev = col->un_orig_dev; col->un_pwstart = col->un_orig_pwstart; col->un_devstart = col->un_orig_devstart; raid_commit(un, recids); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_METADEVICE, setno, MD_SID(un)); } ASSERT(!HOTSPARED(un, hs_index)); state = col->un_devstate; size = col->un_pwstart + un->un_pwsize + (un->un_segsize * un->un_segsincolumn); again: /* quit if resync is already active */ col->un_devflags |= MD_RAID_REGEN_RESYNC; if (resync_request(mnum, hs_index, 0, NULL)) goto errout; recids[0] = 0; recids[1] = 0; tmpdev = col->un_dev; tmpdaddr = col->un_hs_pwstart; /* get a hotspare */ if (md_hot_spare_ifc(HS_GET, un->un_hsp_id, size, ((col->un_orig_pwstart >= 1) && (col->un_orig_pwstart != MD_DISKADDR_ERROR)), &col->un_hs_id, &col->un_hs_key, &tmpdev, &tmpdaddr) != 0) { col->un_dev = tmpdev; col->un_hs_pwstart = tmpdaddr; release_resync_request(mnum); raid_set_state(un, hs_index, state, 1); goto errout; } col->un_hs_pwstart = tmpdaddr; /* * record id is filled in by raid_commit, recids[0] filled in by * md_hot_spare_ifc if needed */ recids[0] = col->un_hs_id; recids[1] = 0; /* * close the device and open the hot spare. The device should * never be a hotspare here. */ if (col->un_devflags & MD_RAID_DEV_ISOPEN) { md_layered_close(col->un_orig_dev, MD_OFLG_NULL); col->un_devflags &= ~MD_RAID_DEV_ISOPEN; } /* * Try open by device id */ tmpdev = md_resolve_bydevid(mnum, tmpdev, col->un_hs_key); if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) { md_dev64_t hs_dev = tmpdev; /* cannot open return to orig */ raid_hs_release(HS_BAD, un, &recids[0], hs_index); release_resync_request(mnum); raid_set_state(un, hs_index, state, 1); col->un_dev = col->un_orig_dev; col->un_devstart = col->un_orig_devstart; col->un_pwstart = col->un_orig_pwstart; col->un_devflags &= ~MD_RAID_DEV_ISOPEN; raid_commit(un, recids); cmn_err(CE_WARN, "md: %s: open error of hotspare %s", md_shortname(mnum), md_devname(MD_MIN2SET(mnum), hs_dev, NULL, 0)); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS, setno, MD_SID(un)); goto again; } col->un_dev = tmpdev; col->un_devflags |= MD_RAID_DEV_ISOPEN; /* * move the values into the device fields. Since in some cases * the pwstart is not zero this must be added into the start of * the hotspare to avoid over writting the label */ col->un_hs_pwstart += col->un_orig_pwstart; col->un_pwstart = col->un_hs_pwstart; col->un_hs_devstart = col->un_hs_pwstart + un->un_pwsize; col->un_devstart = col->un_hs_devstart; /* commit unit and hotspare records and release lock */ raid_commit(un, recids); md_unit_writerexit(ui); md_io_writerexit(ui); err = raid_resync_unit(mnum, &mde); /* if resync fails, transition back to erred state and reset */ if (err) { /* reaquire unit writerr lock */ un = (mr_unit_t *)md_unit_writerlock(ui); raid_set_state(un, hs_index, RCS_ERRED, 0); /* * close the hotspare and return it. Then restore the * original device back to the original state */ raid_hs_release(HS_FREE, un, &recids[0], hs_index); col->un_dev = col->un_orig_dev; col->un_devstart = col->un_orig_devstart; col->un_pwstart = col->un_orig_pwstart; raid_commit(un, recids); md_unit_writerexit(ui); un = (mr_unit_t *)md_unit_readerlock(ui); return; } setno = MD_MIN2SET(mnum); (void) md_devname(setno, col->un_orig_dev, devname, sizeof (devname)); (void) md_devname(setno, col->un_dev, hs_devname, sizeof (hs_devname)); cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s", md_shortname(mnum), devname, hs_devname); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HOTSPARED, SVM_TAG_HS, setno, MD_SID(un)); (void) md_unit_readerlock(ui); return; errout: md_unit_writerexit(ui); md_io_writerexit(ui); un = (mr_unit_t *)md_unit_readerlock(ui); }
static int stripe_change( md_stripe_params_t *msp, IOLOCK *lock ) { ms_params_t *pp = &msp->params; minor_t mnum = msp->mnum; ms_unit_t *un; mdi_unit_t *ui; int r, c, i; struct ms_row *mdr; ms_comp_t *mdcomp, *mdc; mddb_recid_t recids[4]; int irecid; int inc_new_hsp = 0; int err; set_t setno = MD_MIN2SET(mnum); mdclrerror(&msp->mde); if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) return (mdmderror(&msp->mde, MDE_INVAL_UNIT, mnum)); if (md_get_setstatus(setno) & MD_SET_STALE) return (mdmddberror(&msp->mde, MDE_DB_STALE, mnum, setno)); if ((ui = MDI_UNIT(mnum)) == NULL) { return (mdmderror(&msp->mde, MDE_UNIT_NOT_SETUP, mnum)); } if (!pp->change_hsp_id) return (0); un = (ms_unit_t *)md_ioctl_writerlock(lock, ui); /* verify that no hot spares are in use */ mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); for (r = 0; r < un->un_nrows; r++) { mdr = &un->un_row[r]; for (c = 0, i = mdr->un_icomp; c < mdr->un_ncomp; c++) { mdc = &mdcomp[i++]; if (mdc->un_mirror.ms_hs_id != 0) { return (mdmderror(&msp->mde, MDE_HS_IN_USE, mnum)); } } } recids[1] = 0; recids[2] = 0; irecid = 1; if (pp->hsp_id != -1) { /* increment the reference count of the new hsp */ err = md_hot_spare_ifc(HSP_INCREF, pp->hsp_id, 0, 0, &recids[1], NULL, NULL, NULL); if (err) { return (mdhsperror(&msp->mde, MDE_INVAL_HSP, pp->hsp_id)); } inc_new_hsp = 1; irecid++; } if (un->un_hsp_id != -1) { /* decrement the reference count of the old hsp */ err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, &recids[irecid], NULL, NULL, NULL); if (err) { err = mdhsperror(&msp->mde, MDE_INVAL_HSP, pp->hsp_id); if (inc_new_hsp) { (void) md_hot_spare_ifc(HSP_DECREF, pp->hsp_id, 0, 0, &recids[1], NULL, NULL, NULL); /* * Don't need to commit the record, * cause it never got commit before */ } return (err); } } un->un_hsp_id = pp->hsp_id; recids[0] = un->c.un_record_id; recids[3] = 0; mddb_commitrecs_wrapper(recids); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); return (0); }
static int stripe_replace(replace_params_t *params) { minor_t mnum = params->mnum; ms_unit_t *un; mddb_recid_t recids[6]; ms_new_dev_t nd; ms_cd_info_t cd; int ci; int cmpcnt; void *repl_data; md_dev64_t fake_devt; void (*repl_done)(); mdclrerror(¶ms->mde); un = (ms_unit_t *)MD_UNIT(mnum); if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum)); } nd.nd_dev = params->new_dev; nd.nd_key = params->new_key; nd.nd_nblks = params->number_blks; nd.nd_start_blk = params->start_blk; nd.nd_labeled = params->has_label; nd.nd_hs_id = 0; /* * stripe_component_count and stripe_get_dev only care about the * minor number associated with the first argument which is a * md_dev64_t * * The comments section for these two routines have been updated * to indicate that this routine calls with fake major numbers. */ fake_devt = md_makedevice(0, mnum); cmpcnt = stripe_component_count(fake_devt, NULL); for (ci = 0; ci < cmpcnt; ci++) { (void) stripe_get_dev(fake_devt, NULL, ci, &cd); if ((cd.cd_dev == params->old_dev) || (cd.cd_orig_dev == params->old_dev)) break; } if (ci == cmpcnt) { return (EINVAL); } /* In case of a dryrun we're done here */ if (params->options & MDIOCTL_DRYRUN) { return (0); } (void) stripe_replace_dev(fake_devt, 0, ci, &nd, recids, 6, &repl_done, &repl_data); mddb_commitrecs_wrapper(recids); (*repl_done)(fake_devt, repl_data); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); return (0); }
static int stripe_grow(void *d, int mode, IOLOCK *lockp) { minor_t mnum; ms_unit_t *un, *new_un; mdi_unit_t *ui; minor_t *par = NULL; IOLOCK *plock = NULL; ms_comp_t *mdcomp, *new_comp; int row, i, c; mddb_recid_t ms_recid; mddb_recid_t old_vtoc = 0; mddb_recid_t *recids; md_create_rec_option_t options; mddb_type_t typ1; int err; int64_t tb, atb; uint_t nr, oc; int opened; int rval = 0; set_t setno; md_error_t *mdep; int npar; int rid; int num_recs; u_longlong_t rev; md_grow_params_t *mgp = d; mnum = mgp->mnum; mdep = &mgp->mde; setno = MD_MIN2SET(mnum); npar = mgp->npar; mdclrerror(mdep); if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); if (md_get_setstatus(setno) & MD_SET_STALE) return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); ui = MDI_UNIT(mnum); if (ui == NULL) { return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum)); } if (npar >= 1) { ASSERT((minor_t *)(uintptr_t)mgp->par != NULL); par = kmem_alloc(npar * sizeof (*par), KM_SLEEP); plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP); if (ddi_copyin((caddr_t)(uintptr_t)mgp->par, (caddr_t)par, (npar * sizeof (*par)), mode) != 0) { kmem_free(par, npar * sizeof (*par)); kmem_free(plock, npar * sizeof (*plock)); return (EFAULT); } } /* * we grab unit reader/writer first, then parent locks, * then our own. * we expect parent units to be sorted to avoid deadlock */ rw_enter(&md_unit_array_rw.lock, RW_WRITER); for (i = 0; i < npar; ++i) { (void) md_ioctl_writerlock(&plock[i], MDI_UNIT(par[i])); } un = (ms_unit_t *)md_ioctl_writerlock(lockp, ui); if (un->un_nrows != mgp->nrows) { rval = EINVAL; goto out; } typ1 = (mddb_type_t)md_getshared_key(setno, stripe_md_ops.md_driver.md_drivername); /* * Preserve the friendly name nature of growing device. */ options = MD_CRO_STRIPE; if (un->c.un_revision & MD_FN_META_DEV) options |= MD_CRO_FN; if (mgp->options & MD_CRO_64BIT) { #if defined(_ILP32) rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum); goto out; #else ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0, MD_CRO_64BIT | options, setno); #endif } else { ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0, MD_CRO_32BIT | options, setno); } if (ms_recid < 0) { rval = mddbstatus2error(mdep, (int)ms_recid, mnum, setno); goto out; } /* get the address of the new unit */ new_un = (ms_unit_t *)mddb_getrecaddr(ms_recid); /* * It is okay that we muck with the new unit here, * since no one else will know about the unit struct * until we commit it. If we crash, the record will * be automatically purged, since we haven't * committed it yet and the old unit struct will be found. */ /* copy in the user's unit struct */ err = ddi_copyin((caddr_t)(uintptr_t)mgp->mdp, (caddr_t)new_un, (size_t)mgp->size, mode); if (err) { mddb_deleterec_wrapper(ms_recid); rval = EFAULT; goto out; } if (options & MD_CRO_FN) new_un->c.un_revision |= MD_FN_META_DEV; /* * allocate the real recids array. since we may have to * commit underlying metadevice records, we need an * array of size: total number of new components being * attached + 2 (one for the stripe itself, one for the * end marker). */ num_recs = 2; rid = 0; for (row = 0; row < new_un->un_nrows; row++) { struct ms_row *mdr = &new_un->un_row[row]; num_recs += mdr->un_ncomp; } recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); recids[rid++] = ms_recid; /* * Save a few of the new unit structs fields. * Before they get clobbered. */ tb = new_un->c.un_total_blocks; atb = new_un->c.un_actual_tb; nr = new_un->un_nrows; oc = new_un->un_ocomp; rev = new_un->c.un_revision; /* * Copy the old unit struct (static stuff) * into new unit struct */ bcopy((caddr_t)un, (caddr_t)new_un, sizeof (ms_unit_t) + ((nr - 2) * (sizeof (struct ms_row)))); /* * Restore the saved stuff. */ new_un->c.un_total_blocks = tb; md_nblocks_set(mnum, new_un->c.un_total_blocks); new_un->c.un_actual_tb = atb; new_un->un_nrows = nr; new_un->un_ocomp = oc; new_un->c.un_revision = rev; new_un->c.un_record_id = ms_recid; new_un->c.un_size = mgp->size; /* All 64 bit metadevices only support EFI labels. */ if (mgp->options & MD_CRO_64BIT) { new_un->c.un_flag |= MD_EFILABEL; /* * If the device was previously smaller than a terabyte, * and had a vtoc record attached to it, we remove the * vtoc record, because the layout has changed completely. */ if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) && (un->c.un_vtoc_id != 0)) { old_vtoc = un->c.un_vtoc_id; new_un->c.un_vtoc_id = md_vtoc_to_efi_record(old_vtoc, setno); } } /* * Copy the old component structs into the new unit struct. */ mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]); new_comp = (ms_comp_t *)((void *)&((char *)new_un)[new_un->un_ocomp]); for (row = 0; row < un->un_nrows; row++) { struct ms_row *mdr = &un->un_row[row]; for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++, c++) { bcopy((caddr_t)&mdcomp[c], (caddr_t)&new_comp[c], sizeof (ms_comp_t)); } } opened = md_unit_isopen(ui); /* * Set parent on metadevices being added. * Open the new devices being added. * NOTE: currently soft partitions are the only metadevices * which can appear within a stripe. */ for (row = un->un_nrows; row < new_un->un_nrows; row++) { struct ms_row *mdr = &new_un->un_row[row]; for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { struct ms_comp *mdc = &new_comp[c++]; md_dev64_t comp_dev; md_unit_t *comp_un; comp_dev = mdc->un_dev; /* set parent on any metadevices */ if (md_getmajor(comp_dev) == md_major) { comp_un = MD_UNIT(md_getminor(comp_dev)); recids[rid++] = MD_RECID(comp_un); md_set_parent(comp_dev, MD_SID(new_un)); } if (opened) { md_dev64_t tmpdev = mdc->un_dev; /* * Open by device id * Check if this comp is hotspared and * if it is then use the key for hotspare */ tmpdev = md_resolve_bydevid(mnum, tmpdev, mdc->un_mirror.ms_hs_id ? mdc->un_mirror.ms_hs_key : mdc->un_key); (void) md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); mdc->un_dev = tmpdev; mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; } } } /* set end marker */ recids[rid] = 0; /* commit new unit struct */ mddb_commitrecs_wrapper(recids); /* delete old unit struct */ mddb_deleterec_wrapper(un->c.un_record_id); /* place new unit in in-core array */ md_nblocks_set(mnum, new_un->c.un_total_blocks); MD_UNIT(mnum) = new_un; /* * If old_vtoc has a non zero value, we know: * - This unit crossed the border from smaller to larger one TB * - There was a vtoc record for the unit, * - This vtoc record is no longer needed, because * a new efi record has been created for this un. */ if (old_vtoc != 0) { mddb_deleterec_wrapper(old_vtoc); } /* free recids array */ kmem_free(recids, num_recs * sizeof (mddb_recid_t)); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE, MD_UN2SET(new_un), MD_SID(new_un)); /* release locks, return success */ out: for (i = npar - 1; (i >= 0); --i) md_ioctl_writerexit(&plock[i]); rw_exit(&md_unit_array_rw.lock); if (plock != NULL) kmem_free(plock, npar * sizeof (*plock)); if (par != NULL) kmem_free(par, npar * sizeof (*par)); return (rval); }
static int stripe_set(void *d, int mode) { minor_t mnum; ms_unit_t *un; void *p; mddb_recid_t ms_recid; mddb_recid_t *recids; mddb_type_t typ1; int err; set_t setno; md_error_t *mdep; struct ms_comp *mdcomp; int row; int rid; int num_recs; int i, c; md_set_params_t *msp = d; mnum = msp->mnum; setno = MD_MIN2SET(mnum); mdep = &msp->mde; mdclrerror(mdep); if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { return (mdmderror(mdep, MDE_INVAL_UNIT, mnum)); } if (md_get_setstatus(setno) & MD_SET_STALE) return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno)); un = MD_UNIT(mnum); if (un != NULL) { return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum)); } typ1 = (mddb_type_t)md_getshared_key(setno, stripe_md_ops.md_driver.md_drivername); /* create the db record for this mdstruct */ if (msp->options & MD_CRO_64BIT) { #if defined(_ILP32) return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum)); #else ms_recid = mddb_createrec((size_t)msp->size, typ1, 0, MD_CRO_64BIT | MD_CRO_STRIPE | MD_CRO_FN, setno); #endif } else { ms_recid = mddb_createrec((size_t)msp->size, typ1, 0, MD_CRO_32BIT | MD_CRO_STRIPE | MD_CRO_FN, setno); } if (ms_recid < 0) return (mddbstatus2error(mdep, ms_recid, mnum, setno)); /* get the address of the mdstruct */ p = (void *) mddb_getrecaddr(ms_recid); /* * It is okay that we muck with the mdstruct here, * since no one else will know about the mdstruct * until we commit it. If we crash, the record will * be automatically purged, since we haven't * committed it yet. */ /* copy in the user's mdstruct */ if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, (caddr_t)p, (size_t)msp->size, mode)) { mddb_deleterec_wrapper(ms_recid); return (EFAULT); } un = (ms_unit_t *)p; /* All 64 bit metadevices only support EFI labels. */ if (msp->options & MD_CRO_64BIT) { un->c.un_flag |= MD_EFILABEL; } /* * allocate the real recids array. since we may have to commit * underlying metadevice records, we need an array * of size: total number of components in stripe + 3 * (1 for the stripe itself, one for the hotspare, one * for the end marker). */ num_recs = 3; rid = 0; for (row = 0; row < un->un_nrows; row++) { struct ms_row *mdr = &un->un_row[row]; num_recs += mdr->un_ncomp; } recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP); recids[rid++] = ms_recid; MD_SID(un) = mnum; MD_RECID(un) = recids[0]; MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SUB_MIRROR | MD_CAN_SP; MD_PARENT(un) = MD_NO_PARENT; un->c.un_revision |= MD_FN_META_DEV; if (err = stripe_build_incore(p, 0)) { md_nblocks_set(mnum, -1ULL); MD_UNIT(mnum) = NULL; mddb_deleterec_wrapper(recids[0]); kmem_free(recids, num_recs * sizeof (mddb_recid_t)); return (err); } /* * Update unit availability */ md_set[setno].s_un_avail--; recids[rid] = 0; if (un->un_hsp_id != -1) err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0, &recids[rid++], NULL, NULL, NULL); if (err) { md_nblocks_set(mnum, -1ULL); MD_UNIT(mnum) = NULL; mddb_deleterec_wrapper(recids[0]); kmem_free(recids, num_recs * sizeof (mddb_recid_t)); return (mdhsperror(mdep, MDE_INVAL_HSP, un->un_hsp_id)); } /* * set the parent on any metadevice components. * NOTE: currently soft partitions are the only metadevices * which can appear within a stripe. */ mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]); for (row = 0; row < un->un_nrows; row++) { struct ms_row *mdr = &un->un_row[row]; for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { ms_comp_t *mdc = &mdcomp[c++]; md_dev64_t comp_dev; md_unit_t *comp_un; comp_dev = mdc->un_dev; if (md_getmajor(comp_dev) == md_major) { /* set parent and disallow soft partitioning */ comp_un = MD_UNIT(md_getminor(comp_dev)); recids[rid++] = MD_RECID(comp_un); md_set_parent(mdc->un_dev, MD_SID(un)); } } } /* set end marker */ recids[rid] = 0; mddb_commitrecs_wrapper(recids); md_create_unit_incore(mnum, &stripe_md_ops, 0); kmem_free(recids, (num_recs * sizeof (mddb_recid_t))); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un)); return (0); }
static int seths_enable(set_hs_params_t *shs) { hot_spare_t *hs; mddb_recid_t recids[2]; set_t setno = shs->md_driver.md_setno; mdkey_t key_old; int num_keys_old = 0; /* * Find device by using key associated with shs_component_old. * If unable to find a unique key for shs_component_old * then fail since namespace has multiple entries * for this old component and we're unable to determine * which key is the valid match for shs_component_old. * This failure keeps a hotspare from being enabled on a slice * that may already be in use by another metadevice. */ if (md_getkeyfromdev(setno, mddb_getsidenum(setno), shs->shs_component_old, &key_old, &num_keys_old) != 0) { return (mddeverror(&shs->mde, MDE_NAME_SPACE, shs->shs_component_old)); } /* * If more than one key matches given old_dev - fail command * since unable to determine which key is correct. */ if (num_keys_old > 1) { return (mddeverror(&shs->mde, MDE_MULTNM, shs->shs_component_old)); } /* * If there is no key for this entry then fail since * a key for this entry should exist. */ if (num_keys_old == 0) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* Scan the hot spare list for the hs */ hs = (hot_spare_t *)md_set[setno].s_hs; while (hs) { /* * Since component may or may not be currently in the system, * use the keys to find a match (not the devt). */ if (hs->hs_key == key_old) { break; } hs = hs->hs_next; } if (hs == NULL) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* make sure it's broken */ if (hs->hs_state != HSS_BROKEN) { return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE, hs->hs_devnum)); } /* In case of a dryrun, we're done here */ if (shs->shs_options & HS_OPT_DRYRUN) { return (0); } /* fix it */ set_hot_spare_state(hs, HSS_AVAILABLE); hs->hs_start_blk = shs->shs_start_blk; hs->hs_has_label = shs->shs_has_label; hs->hs_number_blks = shs->shs_number_blks; /* commit the db records */ recids[0] = hs->hs_record_id; recids[1] = 0; mddb_commitrecs_wrapper(recids); SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno, shs->shs_component_old); return (0); }
static int seths_create_hsp(set_hs_params_t *shs) { hot_spare_pool_t *hsp; mddb_recid_t recid; set_t setno; mddb_type_t typ1; setno = HSP_SET(shs->shs_hot_spare_pool); /* Scan the hot spare pool list */ hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool); if (hsp != (hot_spare_pool_t *)0) return (0); typ1 = (mddb_type_t)md_getshared_key(setno, hotspares_md_ops.md_driver.md_drivername); /* create a hot spare pool record */ if (shs->shs_options & MD_CRO_64BIT) { #if defined(_ILP32) return (mdhsperror(&shs->mde, MDE_HSP_UNIT_TOO_LARGE, shs->shs_hot_spare_pool)); #else recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1, HSP_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno); #endif } else { recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1, HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno); } if (recid < 0) { return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE, shs->shs_hot_spare_pool)); } /* get the record addr */ hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp), HSP_ONDSK_STR_OFF); hsp->hsp_self_id = shs->shs_hot_spare_pool; hsp->hsp_record_id = recid; hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp; hsp->hsp_refcount = 0; hsp->hsp_nhotspares = 0; hsp->hsp_revision |= MD_FN_META_DEV; md_set[setno].s_hsp = (void *) hsp; mddb_commitrec_wrapper(recid); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno, md_expldev(hsp->hsp_self_id)); rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); hsp->hsp_link.ln_next = hotspares_md_ops.md_head; hsp->hsp_link.ln_setno = setno; hsp->hsp_link.ln_id = hsp->hsp_self_id; hotspares_md_ops.md_head = &hsp->hsp_link; rw_exit(&hotspares_md_ops.md_link_rw.lock); return (0); }
static int seths_replace(set_hs_params_t *shs) { hot_spare_t *hs; hot_spare_t *prev_hs; hot_spare_t *new_hs; hot_spare_pool_t *hsp; int new_found = 0; mddb_recid_t recid; mddb_recid_t recids[5]; int i; sv_dev_t sv; int delete_hs = 0; set_t setno; mddb_type_t typ1; mdkey_t key_old; int num_keys_old = 0; setno = HSP_SET(shs->shs_hot_spare_pool); typ1 = (mddb_type_t)md_getshared_key(setno, hotspares_md_ops.md_driver.md_drivername); /* Scan the hot spare list */ hs = (hot_spare_t *)md_set[setno].s_hs; prev_hs = (hot_spare_t *)0; while (hs) { if (hs->hs_devnum == shs->shs_component_old) { break; } prev_hs = hs; hs = hs->hs_next; } if (hs == NULL) { /* * Unable to find device using devnum so use * key associated with shs_component_old instead. * If unable to find a unique key for shs_component_old * then fail since namespace has multiple entries * for this old component and we're unable to determine * which key is the valid match for shs_component_old. * * Only need to compare keys when hs_devnum is NODEV. */ if (md_getkeyfromdev(setno, mddb_getsidenum(setno), shs->shs_component_old, &key_old, &num_keys_old) != 0) { return (mddeverror(&shs->mde, MDE_NAME_SPACE, shs->shs_component_old)); } /* * If more than one key matches given old_dev - fail command * since unable to determine which key is correct. */ if (num_keys_old > 1) { return (mddeverror(&shs->mde, MDE_MULTNM, shs->shs_component_old)); } /* * If there is no key for this entry then fail since * a key for this entry should exist. */ if (num_keys_old == 0) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* Scan the hot spare list again */ hs = (hot_spare_t *)md_set[setno].s_hs; prev_hs = (hot_spare_t *)0; while (hs) { /* * Only need to compare keys when hs_devnum is NODEV. */ if ((hs->hs_devnum == NODEV64) && (hs->hs_key == key_old)) { break; } prev_hs = hs; hs = hs->hs_next; } } if (hs == NULL) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* check the force flag and the state of the hot spare */ if (((shs->shs_options & HS_OPT_FORCE) == 0) && (hs->hs_state == HSS_RESERVED)) { return (mdhserror(&shs->mde, MDE_HS_RESVD, shs->shs_hot_spare_pool, hs->hs_devnum)); } /* Scan the hot spare pool list */ hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool); if (hsp == (hot_spare_pool_t *)0) { return (mdhsperror(&shs->mde, MDE_INVAL_HSP, shs->shs_hot_spare_pool)); } /* * Make sure the old device is in the pool. */ for (i = 0; i < hsp->hsp_nhotspares; i++) { if (hsp->hsp_hotspares[i] == hs->hs_record_id) { break; } } if (i >= hsp->hsp_nhotspares) { return (mddeverror(&shs->mde, MDE_INVAL_HS, hs->hs_devnum)); } /* Scan the hot spare list for the new hs */ new_hs = (hot_spare_t *)md_set[setno].s_hs; new_found = 0; while (new_hs) { if (new_hs->hs_devnum == shs->shs_component_new) { new_found = 1; break; } new_hs = new_hs->hs_next; } /* * Make sure the new device is not already in the pool. * We don't have to search the hs in this hsp, if the * new hs was just created. Only if the hot spare was found. */ if (new_found) { for (i = 0; i < hsp->hsp_nhotspares; i++) if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) { return (mdhserror(&shs->mde, MDE_HS_INUSE, shs->shs_hot_spare_pool, new_hs->hs_devnum)); } } /* In case of a dryrun, we're done here */ if (shs->shs_options & HS_OPT_DRYRUN) { return (0); } /* * Create the new hotspare */ if (!new_found) { /* create a hot spare record */ if (shs->shs_size_option & MD_CRO_64BIT) { #if defined(_ILP32) return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE, shs->shs_hot_spare_pool, shs->shs_component_new)); #else recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE, setno); #endif } else { recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE, setno); } if (recid < 0) { return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE, shs->shs_hot_spare_pool, shs->shs_component_new)); } /* get the addr */ new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*new_hs), 0); new_hs->hs_record_id = recid; new_hs->hs_devnum = shs->shs_component_new; new_hs->hs_key = shs->shs_key_new; new_hs->hs_start_blk = shs->shs_start_blk; new_hs->hs_has_label = shs->shs_has_label; new_hs->hs_number_blks = shs->shs_number_blks; set_hot_spare_state(new_hs, HSS_AVAILABLE); new_hs->hs_refcount = 0; new_hs->hs_isopen = 1; } /* lock the db records */ recids[0] = hs->hs_record_id; recids[1] = new_hs->hs_record_id; recids[2] = hsp->hsp_record_id; recids[3] = 0; sv.setno = setno; sv.key = hs->hs_key; hs->hs_refcount--; if (hs->hs_refcount == 0) { /* * NOTE: We do not commit the previous hot spare record. * There is no need, the link we get rebuilt at boot time. */ if (prev_hs) { prev_hs->hs_next = hs->hs_next; } else md_set[setno].s_hs = (void *) hs->hs_next; /* mark hs to be deleted in the correct order */ delete_hs = 1; recids[0] = new_hs->hs_record_id; recids[1] = hsp->hsp_record_id; recids[2] = 0; } /* link into the hs list */ new_hs->hs_refcount++; if (!new_found) { /* do this AFTER the old dev is possibly removed */ new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs; md_set[setno].s_hs = (void *) new_hs; } /* find the location of the old hs in the hsp */ for (i = 0; i < hsp->hsp_nhotspares; i++) { if (hsp->hsp_hotspares[i] == hs->hs_record_id) { hsp->hsp_hotspares[i] = new_hs->hs_record_id; break; } } if (shs->shs_size_option & MD_CRO_64BIT) { new_hs->hs_revision |= MD_64BIT_META_DEV; } else { new_hs->hs_revision &= ~MD_64BIT_META_DEV; } /* commit the db records */ mddb_commitrecs_wrapper(recids); if (delete_hs) mddb_deleterec_wrapper(hs->hs_record_id); md_rem_names(&sv, 1); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno, md_expldev(hsp->hsp_self_id)); return (0); }
static int seths_delete(set_hs_params_t *shs) { hot_spare_t *hs; hot_spare_t *prev_hs; hot_spare_pool_t *hsp; mddb_recid_t recids[4]; int i; set_t setno; sv_dev_t sv; int delete_hs = 0; mdkey_t key_old; int num_keys_old = 0; /* delete the hot spare pool */ if (shs->shs_options & HS_OPT_POOL) { return (seths_delete_hsp(shs)); } setno = HSP_SET(shs->shs_hot_spare_pool); /* Scan the hot spare list */ hs = (hot_spare_t *)md_set[setno].s_hs; prev_hs = (hot_spare_t *)0; while (hs) { if (hs->hs_devnum == shs->shs_component_old) { break; } prev_hs = hs; hs = hs->hs_next; } if (hs == NULL) { /* * Unable to find device using devnum so use * key associated with shs_component_old instead. * If unable to find a unique key for shs_component_old * then fail since namespace has multiple entries * for this old component and we're unable to determine * which key is the valid match for shs_component_old. * * Only need to compare keys when hs_devnum is NODEV. */ if (md_getkeyfromdev(setno, mddb_getsidenum(setno), shs->shs_component_old, &key_old, &num_keys_old) != 0) { return (mddeverror(&shs->mde, MDE_NAME_SPACE, shs->shs_component_old)); } /* * If more than one key matches given old_dev - fail command * since shouldn't add new hotspare if namespace has * multiple entries. */ if (num_keys_old > 1) { return (mddeverror(&shs->mde, MDE_MULTNM, shs->shs_component_old)); } /* * If there is no key for this entry then fail since * a key for this entry should exist. */ if (num_keys_old == 0) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* Scan the hot spare list again */ hs = (hot_spare_t *)md_set[setno].s_hs; prev_hs = (hot_spare_t *)0; while (hs) { /* * Only need to compare keys when hs_devnum is NODEV. */ if ((hs->hs_devnum == NODEV64) && (hs->hs_key == key_old)) { break; } prev_hs = hs; hs = hs->hs_next; } } if (hs == NULL) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* Scan the hot spare pool list */ hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool); if (hsp == (hot_spare_pool_t *)0) { return (mdhsperror(&shs->mde, MDE_INVAL_HSP, shs->shs_hot_spare_pool)); } /* check for force flag and state of hot spare */ if (((shs->shs_options & HS_OPT_FORCE) == 0) && (hs->hs_state == HSS_RESERVED)) { return (mdhserror(&shs->mde, MDE_HS_RESVD, shs->shs_hot_spare_pool, shs->shs_component_old)); } if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) { return (mdhserror(&shs->mde, MDE_HS_RESVD, shs->shs_hot_spare_pool, shs->shs_component_old)); } /* * Make sure the device is in the pool. */ for (i = 0; i < hsp->hsp_nhotspares; i++) { if (hsp->hsp_hotspares[i] == hs->hs_record_id) { break; } } if (i >= hsp->hsp_nhotspares) { return (mddeverror(&shs->mde, MDE_INVAL_HS, hs->hs_devnum)); } /* In case of a dryrun, we're done here */ if (shs->shs_options & HS_OPT_DRYRUN) { return (0); } /* lock the db records */ recids[0] = hs->hs_record_id; recids[1] = hsp->hsp_record_id; recids[2] = 0; sv.setno = setno; sv.key = hs->hs_key; hs->hs_refcount--; if (hs->hs_refcount == 0) { /* * NOTE: We do not commit the previous hot spare record. * There is no need, the link we get rebuilt at boot time. */ if (prev_hs) { prev_hs->hs_next = hs->hs_next; } else md_set[setno].s_hs = (void *) hs->hs_next; /* mark the hot spare to be deleted */ delete_hs = 1; recids[0] = hsp->hsp_record_id; recids[1] = 0; } /* find the location of the hs in the hsp */ for (i = 0; i < hsp->hsp_nhotspares; i++) { if (hsp->hsp_hotspares[i] == hs->hs_record_id) break; } /* remove the hs from the hsp */ for (i++; i < hsp->hsp_nhotspares; i++) hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i]; hsp->hsp_nhotspares--; /* commit the db records */ mddb_commitrecs_wrapper(recids); if (delete_hs) mddb_deleterec_wrapper(hs->hs_record_id); md_rem_names(&sv, 1); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno, md_expldev(hsp->hsp_self_id)); return (0); }
static int seths_delete_hsp(set_hs_params_t *shs) { hot_spare_pool_t *prev_hsp; hot_spare_pool_t *hsp; set_t setno; hsp_t hspid; setno = HSP_SET(shs->shs_hot_spare_pool); /* Scan the hot spare pool list */ prev_hsp = (hot_spare_pool_t *)0; hsp = (hot_spare_pool_t *)md_set[setno].s_hsp; while (hsp) { if (hsp->hsp_self_id == shs->shs_hot_spare_pool) { break; } prev_hsp = hsp; hsp = hsp->hsp_next; } if (hsp == NULL) { return (mdhsperror(&shs->mde, MDE_INVAL_HSP, shs->shs_hot_spare_pool)); } if (hsp->hsp_nhotspares != 0) { return (mdhsperror(&shs->mde, MDE_HSP_BUSY, shs->shs_hot_spare_pool)); } if (hsp->hsp_refcount != 0) { return (mdhsperror(&shs->mde, MDE_HSP_REF, shs->shs_hot_spare_pool)); } /* In case of a dryrun, we're done here */ if (shs->shs_options & HS_OPT_DRYRUN) { return (0); } /* * NOTE: We do not commit the previous hot spare pool record. * There is no need, the link gets rebuilt at boot time. */ if (prev_hsp) prev_hsp->hsp_next = hsp->hsp_next; else md_set[setno].s_hsp = (void *) hsp->hsp_next; hspid = hsp->hsp_self_id; md_rem_link(setno, hsp->hsp_self_id, &hotspares_md_ops.md_link_rw.lock, &hotspares_md_ops.md_head); mddb_deleterec_wrapper(hsp->hsp_record_id); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_HSP, setno, md_expldev(hspid)); return (0); }
static int seths_add(set_hs_params_t *shs) { hot_spare_t *hs; hot_spare_pool_t *hsp; hot_spare_pool_t *prev_hsp; hot_spare_pool_t *new_hsp; hot_spare_pool_t *old_hsp; md_create_rec_option_t options; mddb_recid_t recid; mddb_recid_t recids[5]; size_t new_size; int i; int delete_hsp = 0; int irecid; set_t setno; mddb_type_t typ1; int hsp_created = 0; mdkey_t key_old; int num_keys_old = 0; /* Not much to do here in case of a dryrun */ if (shs->shs_options & HS_OPT_DRYRUN) { return (0); } /* create an empty hot spare pool */ if (shs->shs_options & HS_OPT_POOL) { return (seths_create_hsp(shs)); } setno = HSP_SET(shs->shs_hot_spare_pool); typ1 = (mddb_type_t)md_getshared_key(setno, hotspares_md_ops.md_driver.md_drivername); /* Scan the hot spare list */ hs = (hot_spare_t *)md_set[setno].s_hs; while (hs) { if (hs->hs_devnum == shs->shs_component_old) { break; } hs = hs->hs_next; } if (hs == NULL) { /* * Did not find match for device using devnum so use * key associated with shs_component_old just * in case there is a match but the match's dev is NODEV. * If unable to find a unique key for shs_component_old * then fail since namespace has multiple entries * for this old component and we shouldn't allow * an addition of a hotspare in this case. */ if (md_getkeyfromdev(setno, mddb_getsidenum(setno), shs->shs_component_old, &key_old, &num_keys_old) != 0) { return (mddeverror(&shs->mde, MDE_NAME_SPACE, shs->shs_component_old)); } /* * If more than one key matches given old_dev - fail command * since shouldn't add new hotspare if namespace has * multiple entries. */ if (num_keys_old > 1) { return (mddeverror(&shs->mde, MDE_MULTNM, shs->shs_component_old)); } /* * If there is no key for this entry then fail since * a key for this entry should exist. */ if (num_keys_old == 0) { return (mddeverror(&shs->mde, MDE_INVAL_HS, shs->shs_component_old)); } /* Scan the hot spare list again */ hs = (hot_spare_t *)md_set[setno].s_hs; while (hs) { /* * Only need to compare keys when hs_devnum is NODEV. */ if ((hs->hs_devnum == NODEV64) && (hs->hs_key == key_old)) { break; } hs = hs->hs_next; } } if (hs == NULL) { /* create a hot spare record */ if (shs->shs_size_option & MD_CRO_64BIT) { #if defined(_ILP32) return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE, shs->shs_hot_spare_pool, shs->shs_component_old)); #else recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE, setno); #endif } else { recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE, setno); } if (recid < 0) { return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE, shs->shs_hot_spare_pool, shs->shs_component_old)); } /* get the addr */ hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs), 0); hs->hs_record_id = recid; hs->hs_devnum = shs->shs_component_old; hs->hs_key = shs->shs_key_old; hs->hs_start_blk = shs->shs_start_blk; hs->hs_has_label = shs->shs_has_label; hs->hs_number_blks = shs->shs_number_blks; set_hot_spare_state(hs, HSS_AVAILABLE); hs->hs_refcount = 0; hs->hs_next = (hot_spare_t *)md_set[setno].s_hs; md_set[setno].s_hs = (void *) hs; } /* Scan the hot spare pool list */ hsp = (hot_spare_pool_t *)md_set[setno].s_hsp; prev_hsp = (hot_spare_pool_t *)0; while (hsp) { if (hsp->hsp_self_id == shs->shs_hot_spare_pool) { break; } prev_hsp = hsp; hsp = hsp->hsp_next; } if (hsp == NULL) { /* create a hot spare pool record */ recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1, HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno); if (recid < 0) { return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE, shs->shs_hot_spare_pool)); } /* get the record addr */ hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp), HSP_ONDSK_STR_OFF); hsp->hsp_self_id = shs->shs_hot_spare_pool; hsp->hsp_record_id = recid; hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp; hsp->hsp_refcount = 0; hsp->hsp_nhotspares = 0; hsp->hsp_revision |= MD_FN_META_DEV; /* force prev_hsp to NULL, this will cause hsp to be linked */ prev_hsp = (hot_spare_pool_t *)0; rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); hsp->hsp_link.ln_next = hotspares_md_ops.md_head; hsp->hsp_link.ln_setno = setno; hsp->hsp_link.ln_id = hsp->hsp_self_id; hotspares_md_ops.md_head = &hsp->hsp_link; rw_exit(&hotspares_md_ops.md_link_rw.lock); hsp_created = 1; } else { /* * Make sure the hot spare is not already in the pool. */ for (i = 0; i < hsp->hsp_nhotspares; i++) if (hsp->hsp_hotspares[i] == hs->hs_record_id) { return (mdhserror(&shs->mde, MDE_HS_INUSE, shs->shs_hot_spare_pool, hs->hs_devnum)); } /* * Create a new hot spare pool record * This gives us the one extra hs slot, * because there is one slot in the * hot_spare_pool struct */ new_size = sizeof (hot_spare_pool_ond_t) + (sizeof (mddb_recid_t) * hsp->hsp_nhotspares); /* * The Friendly Name status of the new HSP should duplicate * the status of the existing one. */ if (hsp->hsp_revision & MD_FN_META_DEV) { options = MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN; } else { options = MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL; } recid = mddb_createrec(new_size, typ1, HSP_REC, options, setno); if (recid < 0) { return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE, hsp->hsp_self_id)); } new_size = sizeof (hot_spare_pool_t) + (sizeof (mddb_recid_t) * hsp->hsp_nhotspares); /* get the record addr */ new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, new_size, HSP_ONDSK_STR_OFF); /* copy the old record into the new one */ bcopy((caddr_t)hsp, (caddr_t)new_hsp, (size_t)((sizeof (hot_spare_pool_t) + (sizeof (mddb_recid_t) * hsp->hsp_nhotspares) - sizeof (mddb_recid_t)))); new_hsp->hsp_record_id = recid; md_rem_link(setno, hsp->hsp_self_id, &hotspares_md_ops.md_link_rw.lock, &hotspares_md_ops.md_head); rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER); new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head; new_hsp->hsp_link.ln_setno = setno; new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id; hotspares_md_ops.md_head = &new_hsp->hsp_link; rw_exit(&hotspares_md_ops.md_link_rw.lock); /* mark the old hsp to be deleted */ delete_hsp = 1; old_hsp = hsp; hsp = new_hsp; } if (shs->shs_size_option & MD_CRO_64BIT) { hs->hs_revision |= MD_64BIT_META_DEV; } else { hs->hs_revision &= ~MD_64BIT_META_DEV; } /* lock the db records */ recids[0] = hs->hs_record_id; recids[1] = hsp->hsp_record_id; irecid = 2; if (delete_hsp) recids[irecid++] = old_hsp->hsp_record_id; recids[irecid] = 0; /* increment the reference count */ hs->hs_refcount++; /* add the hs at the end of the hot spare pool */ hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id; hsp->hsp_nhotspares++; /* * NOTE: We do not commit the previous hot spare pool record. * There is no need, the link gets rebuilt at boot time. */ if (prev_hsp) prev_hsp->hsp_next = hsp; else md_set[setno].s_hsp = (void *) hsp; if (delete_hsp) old_hsp->hsp_self_id = MD_HSP_NONE; /* commit the db records */ mddb_commitrecs_wrapper(recids); if (delete_hsp) { /* delete the old hot spare pool record */ mddb_deleterec_wrapper(old_hsp->hsp_record_id); } if (hsp_created) { SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno, md_expldev(hsp->hsp_self_id)); } SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno, md_expldev(hsp->hsp_self_id)); return (0); }
void reset_stripe(ms_unit_t *un, minor_t mnum, int removing) { ms_comp_t *mdcomp; struct ms_row *mdr; int i, c; int row; int nsv; int isv; sv_dev_t *sv; mddb_recid_t *recids; mddb_recid_t vtoc_id; int rid = 0; md_destroy_unit_incore(mnum, &stripe_md_ops); md_nblocks_set(mnum, -1ULL); MD_UNIT(mnum) = NULL; /* * Attempt release of its minor node */ md_remove_minor_node(mnum); if (!removing) return; nsv = 0; /* Count the number of devices */ for (row = 0; row < un->un_nrows; row++) { mdr = &un->un_row[row]; nsv += mdr->un_ncomp; } sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP); /* * allocate recids array. since we may have to commit * underlying soft partition records, we need an array * of size: total number of components in stripe + 3 * (one for the stripe itself, one for the hotspare, one * for the end marker). */ recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP); /* * Save the md_dev64_t's and driver nm indexes. * Because after the mddb_deleterec() we will * not be able to access the unit structure. * * NOTE: Deleting the names before deleting the * unit structure would cause problems if * the machine crashed in between the two. */ isv = 0; mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); for (row = 0; row < un->un_nrows; row++) { mdr = &un->un_row[row]; for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { struct ms_comp *mdc; md_dev64_t child_dev; md_unit_t *child_un; mdc = &mdcomp[c++]; if (mdc->un_mirror.ms_hs_id != 0) { mdkey_t hs_key; hs_key = mdc->un_mirror.ms_hs_key; mdc->un_dev = mdc->un_mirror.ms_orig_dev; mdc->un_start_block = mdc->un_mirror.ms_orig_blk; mdc->un_mirror.ms_hs_id = 0; mdc->un_mirror.ms_hs_key = 0; mdc->un_mirror.ms_orig_dev = 0; recids[0] = 0; recids[1] = 0; /* recids[1] filled in below */ recids[2] = 0; (void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id, 0, 0, &recids[0], &hs_key, NULL, NULL); mddb_commitrecs_wrapper(recids); } /* * check if we've got metadevice below us and * deparent it if we do. * NOTE: currently soft partitions are the * the only metadevices stripes can be * built on top of. */ child_dev = mdc->un_dev; if (md_getmajor(child_dev) == md_major) { child_un = MD_UNIT(md_getminor(child_dev)); md_reset_parent(child_dev); recids[rid++] = MD_RECID(child_un); } sv[isv].setno = MD_MIN2SET(mnum); sv[isv++].key = mdc->un_key; } } recids[rid++] = un->c.un_record_id; recids[rid] = 0; /* filled in below */ /* * Decrement the HSP reference count and * remove the knowledge of the HSP from the unit struct. * This is done atomically to remove a window. */ if (un->un_hsp_id != -1) { (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, &recids[rid++], NULL, NULL, NULL); un->un_hsp_id = -1; } /* set end marker and commit records */ recids[rid] = 0; mddb_commitrecs_wrapper(recids); vtoc_id = un->c.un_vtoc_id; /* * Remove self from the namespace */ if (un->c.un_revision & MD_FN_META_DEV) { (void) md_rem_selfname(un->c.un_self_id); } /* Remove the unit structure */ mddb_deleterec_wrapper(un->c.un_record_id); /* Remove the vtoc, if present */ if (vtoc_id) mddb_deleterec_wrapper(vtoc_id); SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); md_rem_names(sv, nsv); kmem_free(sv, sizeof (sv_dev_t) * nsv); kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3)); }