/*
 * NAMES:	raid_replay_error
 * DESCRIPTION: RAID metadevice replay error handling routine (TBD)
 * PARAMETERS:
 * RETURNS:
 */
static int
raid_replay_error(mr_unit_t *un, int column)
{
	int	error = RAID_RPLY_COMPREPLAY;

	raid_set_state(un, column, RCS_ERRED, 0);
	raid_commit(un, NULL);

	if (UNIT_STATE(un) == RUS_LAST_ERRED) {
		error = RAID_RPLY_READONLY;
		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE,
		    MD_UN2SET(un), MD_SID(un));
	} else if (UNIT_STATE(un) == RUS_ERRED) {
		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE,
		    MD_UN2SET(un), MD_SID(un));
	}

	return (error);
}
static int
raid_open_alt(mr_unit_t *un, int index)
{
	mr_column_t	*column = &un->un_column[index];
	set_t		setno = MD_MIN2SET(MD_SID(un));
	side_t		side = mddb_getsidenum(setno);
	md_dev64_t	tmpdev = column->un_alt_dev;

	/* correct locks */
	ASSERT(UNIT_WRITER_HELD(un));
	/* not already writing to */
	ASSERT(! (column->un_devflags & MD_RAID_WRITE_ALT));
	/* not already open */
	ASSERT(! (column->un_devflags & MD_RAID_ALT_ISOPEN));

	if (tmpdev != NODEV64) {
		/*
		 * Open by device id. We use orig_key since alt_dev
		 * has been set by the caller to be the same as orig_dev.
		 */
		if ((md_getmajor(tmpdev) != md_major) &&
			md_devid_found(setno, side, column->un_orig_key) == 1) {
			tmpdev = md_resolve_bydevid(MD_SID(un), tmpdev,
				column->un_orig_key);
		}
		if (md_layered_open(MD_SID(un), &tmpdev, MD_OFLG_NULL)) {
			/* failed open */
			column->un_alt_dev = tmpdev;
			return (1);
		} else {
			/* open suceeded */
			column->un_alt_dev = tmpdev;
			column->un_devflags |= MD_RAID_ALT_ISOPEN;
			return (0);
		}
	} else
		/* no alt device to open */
		return (1);
}
/*
 * NAME:	raid_resync_unit
 *
 * DESCRIPTION: RAID metadevice specific resync routine.
 *		Open the unit and start resync_unit as a separate thread.
 *
 * PARAMETERS:	minor_t	  mnum - minor number identity of metadevice
 *		md_error_t *ep - output error parameter
 *
 * RETURN:	On error return 1 or set ep to nonzero, otherwise return 0.
 *
 * LOCKS:	Acquires and releases Unit Writer Lock.
 */
int
raid_resync_unit(
	minor_t			mnum,
	md_error_t		*ep
)
{
	mdi_unit_t	*ui;
	set_t		setno = MD_MIN2SET(mnum);
	mr_unit_t	*un;

	ui = MDI_UNIT(mnum);
	un = MD_UNIT(mnum);

	if (md_get_setstatus(setno) & MD_SET_STALE)
		return (mdmddberror(ep, MDE_DB_STALE, mnum, setno));

	ASSERT(un->un_column[un->un_resync_index].un_devflags &
	    (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC));

	/* Don't start a resync if the device is not available */
	if ((ui == NULL) || (ui->ui_tstate & MD_DEV_ERRORED)) {
		return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
	}

	if (raid_internal_open(mnum, FREAD | FWRITE, OTYP_LYR, 0)) {
		(void) md_unit_writerlock(ui);
		release_resync_request(mnum);
		md_unit_writerexit(ui);
		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, SVM_TAG_METADEVICE,
		    setno, MD_SID(un));
		return (mdmderror(ep, MDE_RAID_OPEN_FAILURE, mnum));
	}

	/* start resync_unit thread */
	(void) thread_create(NULL, 0, resync_unit, (void *)(uintptr_t)mnum,
	    0, &p0, TS_RUN, minclsyspri);

	return (0);
}
/*
 * NAME:	resync_comp
 *
 * DESCRIPTION: Resync the component.  Iterate through the raid unit a line at
 *		a time, read from the good device(s) and write the resync
 *		device.
 *
 * PARAMETERS:	minor_t	   mnum - minor number identity of metadevice
 *		md_raidcs_t *cs - child save struct
 *
 * RETURN:	 0 - successfull
 *		 1 - failed
 *		-1 - aborted
 *
 * LOCKS:	Expects Unit Reader Lock to be held across call.  Acquires and
 *		releases Line Reader Lock for per-line I/O.
 */
static void
resync_comp(
	minor_t		mnum,
	md_raidcs_t	*cs
)
{
	mdi_unit_t	*ui;
	mr_unit_t	*un;
	mddb_recid_t	recids[2];
	rcs_state_t	state;
	md_dev64_t	dev_to_write;
	diskaddr_t	write_pwstart;
	diskaddr_t	write_devstart;
	md_dev64_t	dev;
	int		resync;
	int		i;
	int		single_read = 0;
	int		err;
	int		err_cnt;
	int		last_err;
	diskaddr_t	line;
	diskaddr_t	segsincolumn;
	size_t		bsize;
	uint_t		line_count;

	/*
	 * hs_state is the state of the hotspare on the column being resynced
	 * dev_state is the state of the resync target
	 */
	hs_cmds_t	hs_state;
	int		err_col = -1;
	diskaddr_t	resync_end_pos;

	ui = MDI_UNIT(mnum);
	ASSERT(ui != NULL);

	un = cs->cs_un;

	md_unit_readerexit(ui);
	un = (mr_unit_t *)md_io_writerlock(ui);
	un = (mr_unit_t *)md_unit_writerlock(ui);
	resync = un->un_resync_index;
	state = un->un_column[resync].un_devstate;
	line_count = un->un_maxio / un->un_segsize;
	if (line_count == 0) { /* handle the case of segsize > maxio */
		line_count = 1;
		bsize = un->un_maxio;
	} else
		bsize = line_count * un->un_segsize;

	un->un_resync_copysize = (uint_t)bsize;

	ASSERT(un->c.un_status & MD_UN_RESYNC_ACTIVE);
	ASSERT(un->un_column[resync].un_devflags &
	    (MD_RAID_COPY_RESYNC | MD_RAID_REGEN_RESYNC));

	/*
	 * if the column is not in resync then just bail out.
	 */
	if (! (un->un_column[resync].un_devstate & RCS_RESYNC)) {
		md_unit_writerexit(ui);
		md_io_writerexit(ui);
		un = (mr_unit_t *)md_unit_readerlock(ui);
		return;
	}
	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_START, SVM_TAG_METADEVICE,
	    MD_UN2SET(un), MD_SID(un));

	/* identify device to write and its start block */

	if (un->un_column[resync].un_alt_dev != NODEV64) {
		if (raid_open_alt(un, resync)) {
			raid_set_state(un, resync, state, 0);
			md_unit_writerexit(ui);
			md_io_writerexit(ui);
			un = (mr_unit_t *)md_unit_readerlock(ui);
			cmn_err(CE_WARN, "md: %s: %s open failed replace "
				"terminated", md_shortname(MD_SID(un)),
				md_devname(MD_UN2SET(un),
					un->un_column[resync].un_alt_dev,
					NULL, 0));
			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED,
			    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
			return;
		}
		ASSERT(un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC);
		dev_to_write = un->un_column[resync].un_alt_dev;
		write_devstart = un->un_column[resync].un_alt_devstart;
		write_pwstart = un->un_column[resync].un_alt_pwstart;
		if (un->un_column[resync].un_devflags & MD_RAID_DEV_ERRED) {
			single_read = 0;
			hs_state = HS_BAD;
		} else {
			hs_state = HS_FREE;
			single_read = 1;
		}
		un->un_column[resync].un_devflags |= MD_RAID_WRITE_ALT;
	} else {
		dev_to_write = un->un_column[resync].un_dev;
		write_devstart = un->un_column[resync].un_devstart;
		write_pwstart = un->un_column[resync].un_pwstart;
		single_read = 0;
		hs_state = HS_FREE;
		ASSERT(un->un_column[resync].un_devflags &
		    MD_RAID_REGEN_RESYNC);
	}

	alloc_bufs(cs, dbtob(bsize));
	/* initialize pre-write area */
	if (init_pw_area(un, dev_to_write, write_pwstart, resync)) {
		un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT;
		if (un->un_column[resync].un_alt_dev != NODEV64) {
			raid_close_alt(un, resync);
		}
		md_unit_writerexit(ui);
		md_io_writerexit(ui);
		if (dev_to_write == un->un_column[resync].un_dev)
			hs_state = HS_BAD;
		err = RAID_RESYNC_WRERROR;
		goto resync_comp_error;
	}

	un->c.un_status &= ~MD_UN_RESYNC_CANCEL;
	segsincolumn = un->un_segsincolumn;
	err_cnt = raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED);

	/* commit the record */

	md_unit_writerexit(ui);
	md_io_writerexit(ui);


	/* resync each line of the unit */
	for (line = 0; line <  segsincolumn; line += line_count) {
		/*
		 * Update address range in child struct and lock the line.
		 *
		 * The reader version of the line lock is used since only
		 * resync will use data beyond un_resync_line_index on the
		 * resync device.
		 */
		un = (mr_unit_t *)md_io_readerlock(ui);
		if (line + line_count > segsincolumn)
			line_count = segsincolumn - line;
		resync_end_pos = raid_resync_fillin_cs(line, line_count, cs);
		(void) md_unit_readerlock(ui);
		ASSERT(un->un_resync_line_index == resync_end_pos);
		err = raid_resync_region(cs, line, (int)line_count,
		    &single_read, &hs_state, &err_col, dev_to_write,
		    write_devstart);

		/*
		 * if the column failed to resync then stop writing directly
		 * to the column.
		 */
		if (err)
			un->un_resync_line_index = 0;

		md_unit_readerexit(ui);
		raid_line_exit(cs);
		md_io_readerexit(ui);

		if (err)
			break;

		un = (mr_unit_t *)md_unit_writerlock(ui);

		if (raid_state_cnt(un, RCS_ERRED | RCS_LAST_ERRED) != err_cnt) {
			err = RAID_RESYNC_STATE;
			md_unit_writerexit(ui);
			break;
		}
		md_unit_writerexit(ui);
	} /* for */

resync_comp_error:
	un = (mr_unit_t *)md_io_writerlock(ui);
	(void) md_unit_writerlock(ui);
	un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT;

	recids[0] = 0;
	recids[1] = 0;
	switch (err) {
		/*
		 * successful resync
		 */
	    case RAID_RESYNC_OKAY:
		/* initialize pre-write area */
		if ((un->un_column[resync].un_orig_dev != NODEV64) &&
		    (un->un_column[resync].un_orig_dev ==
		    un->un_column[resync].un_alt_dev)) {
			/*
			 * replacing a hot spare
			 * release the hot spare, which will close the hotspare
			 * and mark it closed.
			 */
			raid_hs_release(hs_state, un, &recids[0], resync);
			/*
			 * make the resync target the main device and
			 * mark open
			 */
			un->un_column[resync].un_hs_id = 0;
			un->un_column[resync].un_dev =
			    un->un_column[resync].un_orig_dev;
			un->un_column[resync].un_devstart =
			    un->un_column[resync].un_orig_devstart;
			un->un_column[resync].un_pwstart =
			    un->un_column[resync].un_orig_pwstart;
			un->un_column[resync].un_devflags |= MD_RAID_DEV_ISOPEN;
			/* alt becomes the device so don't close it */
			un->un_column[resync].un_devflags &= ~MD_RAID_WRITE_ALT;
			un->un_column[resync].un_devflags &=
			    ~MD_RAID_ALT_ISOPEN;
			un->un_column[resync].un_alt_dev = NODEV64;
		}
		raid_set_state(un, resync, RCS_OKAY, 0);
		break;

	    case RAID_RESYNC_WRERROR:
		if (HOTSPARED(un, resync) && single_read &&
		    (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) {
			/*
			 * this is the case where the resync target is
			 * bad but there is a good hotspare.  In this
			 * case keep the hotspare, and go back to okay.
			 */
			raid_set_state(un, resync, RCS_OKAY, 0);
			cmn_err(CE_WARN, "md: %s: %s write error, replace "
				"terminated", md_shortname(MD_SID(un)),
				md_devname(MD_UN2SET(un),
					un->un_column[resync].un_orig_dev,
					NULL, 0));
			break;
		}
		if (HOTSPARED(un, resync)) {
			raid_hs_release(hs_state, un, &recids[0], resync);
			un->un_column[resync].un_dev =
			    un->un_column[resync].un_orig_dev;
			un->un_column[resync].un_devstart =
			    un->un_column[resync].un_orig_devstart;
			un->un_column[resync].un_pwstart =
			    un->un_column[resync].un_orig_pwstart;
		}
		raid_set_state(un, resync, RCS_ERRED, 0);
		if (un->un_column[resync].un_devflags & MD_RAID_REGEN_RESYNC)
			dev = un->un_column[resync].un_dev;
		else
			dev = un->un_column[resync].un_alt_dev;
		cmn_err(CE_WARN, "md: %s: %s write error replace terminated",
		    md_shortname(MD_SID(un)), md_devname(MD_UN2SET(un), dev,
		    NULL, 0));
		break;

	    case RAID_RESYNC_STATE:
		if (HOTSPARED(un, resync) && single_read &&
		    (un->un_column[resync].un_devflags & MD_RAID_COPY_RESYNC)) {
			/*
			 * this is the case where the resync target is
			 * bad but there is a good hotspare.  In this
			 * case keep the hotspare, and go back to okay.
			 */
			raid_set_state(un, resync, RCS_OKAY, 0);
			cmn_err(CE_WARN, "md: %s: needs maintenance, replace "
			    "terminated", md_shortname(MD_SID(un)));
			break;
		}
		if (HOTSPARED(un, resync)) {
			raid_hs_release(hs_state, un, &recids[0], resync);
			un->un_column[resync].un_dev =
			    un->un_column[resync].un_orig_dev;
			un->un_column[resync].un_devstart =
			    un->un_column[resync].un_orig_devstart;
			un->un_column[resync].un_pwstart =
			    un->un_column[resync].un_orig_pwstart;
		}
		break;
	    case RAID_RESYNC_RDERROR:
		if (HOTSPARED(un, resync)) {
			raid_hs_release(hs_state, un, &recids[0], resync);
			un->un_column[resync].un_dev =
			    un->un_column[resync].un_orig_dev;
			un->un_column[resync].un_devstart =
			    un->un_column[resync].un_orig_devstart;
			un->un_column[resync].un_pwstart =
			    un->un_column[resync].un_orig_pwstart;
		}

		if ((resync != err_col) && (err_col != NOCOLUMN))
			raid_set_state(un, err_col, RCS_ERRED, 0);
		break;

	    default:
		ASSERT(0);
	}
	if (un->un_column[resync].un_alt_dev != NODEV64) {
		raid_close_alt(un, resync);
	}

	/*
	 * an io operation may have gotten an error and placed a
	 * column in erred state.  This will abort the resync, which
	 * will end up in last erred.  This is ugly so go through
	 * the columns and do cleanup
	 */
	err_cnt = 0;
	last_err = 0;
	for (i = 0; i < un->un_totalcolumncnt; i++) {
		if (un->un_column[i].un_devstate & RCS_OKAY)
			continue;
		if (i == resync) {
			raid_set_state(un, i, RCS_ERRED, 1);
			err_cnt++;
		} else if (err == RAID_RESYNC_OKAY) {
			err_cnt++;
		} else {
			raid_set_state(un, i, RCS_LAST_ERRED, 1);
			last_err++;
		}
	}
	if ((err_cnt == 0) && (last_err == 0))
		un->un_state = RUS_OKAY;
	else if (last_err == 0) {
		un->un_state = RUS_ERRED;
		ASSERT(err_cnt == 1);
	} else if (last_err > 0) {
		un->un_state = RUS_LAST_ERRED;
	}

	uniqtime32(&un->un_column[resync].un_devtimestamp);
	un->un_resync_copysize = 0;
	un->un_column[resync].un_devflags &=
	    ~(MD_RAID_REGEN_RESYNC | MD_RAID_COPY_RESYNC);
	raid_commit(un, recids);
	/* release unit writer lock and acquire unit reader lock */
	md_unit_writerexit(ui);
	md_io_writerexit(ui);
	(void) md_unit_readerlock(ui);
	if (err == RAID_RESYNC_OKAY) {
		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_DONE,
		    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
	} else {
		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_RESYNC_FAILED,
		    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
		if (raid_state_cnt(un, RCS_ERRED |
			RCS_LAST_ERRED) > 1) {
			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED,
			    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
		} else {
			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED,
			    SVM_TAG_METADEVICE, MD_UN2SET(un), MD_SID(un));
		}
	}

	free_bufs(dbtob(bsize), cs);
}
static int
stripe_change(
	md_stripe_params_t	*msp,
	IOLOCK			*lock
)
{
	ms_params_t		*pp = &msp->params;
	minor_t			mnum = msp->mnum;
	ms_unit_t		*un;
	mdi_unit_t		*ui;
	int			r, c, i;
	struct ms_row		*mdr;
	ms_comp_t		*mdcomp, *mdc;
	mddb_recid_t		recids[4];
	int			irecid;
	int			inc_new_hsp = 0;
	int			err;
	set_t			setno = MD_MIN2SET(mnum);

	mdclrerror(&msp->mde);

	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
		return (mdmderror(&msp->mde, MDE_INVAL_UNIT, mnum));

	if (md_get_setstatus(setno) & MD_SET_STALE)
		return (mdmddberror(&msp->mde, MDE_DB_STALE, mnum, setno));

	if ((ui = MDI_UNIT(mnum)) == NULL) {
		return (mdmderror(&msp->mde, MDE_UNIT_NOT_SETUP, mnum));
	}

	if (!pp->change_hsp_id)
		return (0);

	un = (ms_unit_t *)md_ioctl_writerlock(lock, ui);

	/* verify that no hot spares are in use */
	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
	for (r = 0; r < un->un_nrows; r++) {
		mdr = &un->un_row[r];
		for (c = 0, i = mdr->un_icomp; c < mdr->un_ncomp; c++) {
			mdc = &mdcomp[i++];
			if (mdc->un_mirror.ms_hs_id != 0) {
				return (mdmderror(&msp->mde, MDE_HS_IN_USE,
				    mnum));
			}
		}
	}

	recids[1] = 0;
	recids[2] = 0;
	irecid = 1;
	if (pp->hsp_id != -1) {
		/* increment the reference count of the new hsp */
		err = md_hot_spare_ifc(HSP_INCREF, pp->hsp_id, 0, 0,
		    &recids[1], NULL, NULL, NULL);
		if (err) {
			return (mdhsperror(&msp->mde, MDE_INVAL_HSP,
			    pp->hsp_id));
		}
		inc_new_hsp = 1;
		irecid++;
	}

	if (un->un_hsp_id != -1) {
		/* decrement the reference count of the old hsp */
		err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0,
		    &recids[irecid], NULL, NULL, NULL);
		if (err) {
			err = mdhsperror(&msp->mde, MDE_INVAL_HSP,
			    pp->hsp_id);
			if (inc_new_hsp) {
				(void) md_hot_spare_ifc(HSP_DECREF,
				    pp->hsp_id, 0, 0,
				    &recids[1], NULL, NULL, NULL);
				/*
				 * Don't need to commit the record,
				 * cause it never got commit before
				 */
			}
			return (err);
		}
	}

	un->un_hsp_id = pp->hsp_id;

	recids[0] = un->c.un_record_id;
	recids[3] = 0;
	mddb_commitrecs_wrapper(recids);
	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
	    MD_UN2SET(un), MD_SID(un));

	return (0);
}
示例#6
0
/*
 * NAME:	check_comp_4_hs
 *
 * DESCRIPTION: Check whether the input component has an error and can be
 *		backed with a hot spare (RCS_ERRED state), and initiate
 *		a resync if so.
 *
 * PARAMETERS:	mr_unit_t *un - raid unit
 *		int hs_index	- component to check
 *
 * LOCKS:	Expects Unit Writer Lock to be held upon entrance.  Releases
 *		the lock prior to calling raid_resync_unit, then reacquires
 *		it before returning.
 */
static void
check_comp_4_hs(
	mr_unit_t *un,
	int hs_index
)
{
	mddb_recid_t	recids[3];
	minor_t		mnum = MD_SID(un);
	mdi_unit_t	*ui;
	rcs_state_t	state;
	diskaddr_t	size;
	int		err;
	mr_column_t	*col;
	md_error_t	mde = mdnullerror;
	char		devname[MD_MAX_CTDLEN];
	char		hs_devname[MD_MAX_CTDLEN];
	set_t		setno;
	md_dev64_t	tmpdev;
	diskaddr_t	tmpdaddr;


	/* initialize */
	setno = MD_UN2SET(un);
	ui = MDI_UNIT(mnum);
	md_unit_readerexit(ui);
	(void) md_io_writerlock(ui);
	un = (mr_unit_t *)md_unit_writerlock(ui);
	col = &un->un_column[hs_index];

	/*
	 * add a hotspare for erred column only if not resyncing
	 */
	if ((!(COLUMN_STATE(un, hs_index) & RCS_ERRED)) ||
	    (raid_state_cnt(un, (RCS_ERRED | RCS_LAST_ERRED)) != 1) ||
	    (raid_state_cnt(un, RCS_RESYNC) > 0)) {
		goto errout;
	}

	recids[0] = 0;
	recids[1] = 0;
	/* if there is already a hotspare then just return */
	if (HOTSPARED(un, hs_index) && (col->un_devstate & RCS_ERRED)) {
		raid_hs_release(HS_BAD, un, &recids[0], hs_index);
		cmn_err(CE_WARN, "md: %s: %s hotspare errored and released",
		    md_shortname(mnum),
		    md_devname(MD_MIN2SET(mnum), col->un_dev, NULL, 0));
		col->un_dev = col->un_orig_dev;
		col->un_pwstart = col->un_orig_pwstart;
		col->un_devstart = col->un_orig_devstart;
		raid_commit(un, recids);

		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_METADEVICE,
		    setno, MD_SID(un));
	}
	ASSERT(!HOTSPARED(un, hs_index));

	state = col->un_devstate;
	size = col->un_pwstart + un->un_pwsize +
	    (un->un_segsize * un->un_segsincolumn);

again:
	/* quit if resync is already active */
	col->un_devflags |= MD_RAID_REGEN_RESYNC;
	if (resync_request(mnum, hs_index, 0, NULL))
		goto errout;

	recids[0] = 0;
	recids[1] = 0;

	tmpdev = col->un_dev;
	tmpdaddr = col->un_hs_pwstart;

	/* get a hotspare */
	if (md_hot_spare_ifc(HS_GET, un->un_hsp_id, size,
	    ((col->un_orig_pwstart >= 1) &&
	    (col->un_orig_pwstart != MD_DISKADDR_ERROR)),
	    &col->un_hs_id, &col->un_hs_key, &tmpdev, &tmpdaddr) != 0) {
		col->un_dev = tmpdev;
		col->un_hs_pwstart = tmpdaddr;
		release_resync_request(mnum);
		raid_set_state(un, hs_index, state, 1);
		goto errout;
	}

	col->un_hs_pwstart = tmpdaddr;

	/*
	 * record id is filled in by raid_commit, recids[0] filled in by
	 * md_hot_spare_ifc if needed
	 */
	recids[0] = col->un_hs_id;
	recids[1] = 0;

	/*
	 * close the device and open the hot spare.  The device should
	 * never be a hotspare here.
	 */
	if (col->un_devflags & MD_RAID_DEV_ISOPEN) {
		md_layered_close(col->un_orig_dev, MD_OFLG_NULL);
		col->un_devflags &= ~MD_RAID_DEV_ISOPEN;
	}
	/*
	 * Try open by device id
	 */
	tmpdev = md_resolve_bydevid(mnum, tmpdev, col->un_hs_key);
	if (md_layered_open(mnum, &tmpdev, MD_OFLG_NULL)) {
		md_dev64_t hs_dev = tmpdev;
		/* cannot open return to orig */
		raid_hs_release(HS_BAD, un, &recids[0], hs_index);
		release_resync_request(mnum);
		raid_set_state(un, hs_index, state, 1);
		col->un_dev = col->un_orig_dev;
		col->un_devstart = col->un_orig_devstart;
		col->un_pwstart = col->un_orig_pwstart;
		col->un_devflags &= ~MD_RAID_DEV_ISOPEN;
		raid_commit(un, recids);
		cmn_err(CE_WARN, "md: %s: open error of hotspare %s",
		    md_shortname(mnum),
		    md_devname(MD_MIN2SET(mnum), hs_dev, NULL, 0));
		SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS, setno,
		    MD_SID(un));
		goto again;
	}

	col->un_dev = tmpdev;

	col->un_devflags |= MD_RAID_DEV_ISOPEN;

	/*
	 * move the values into the device fields.  Since in some cases
	 * the pwstart is not zero this must be added into the start of
	 * the hotspare to avoid over writting the label
	 */
	col->un_hs_pwstart += col->un_orig_pwstart;
	col->un_pwstart = col->un_hs_pwstart;
	col->un_hs_devstart = col->un_hs_pwstart + un->un_pwsize;
	col->un_devstart = col->un_hs_devstart;

	/* commit unit and hotspare records and release lock */
	raid_commit(un, recids);
	md_unit_writerexit(ui);
	md_io_writerexit(ui);

	err = raid_resync_unit(mnum, &mde);

	/* if resync fails, transition back to erred state and reset */
	if (err) {
		/* reaquire unit writerr lock */
		un = (mr_unit_t *)md_unit_writerlock(ui);

		raid_set_state(un, hs_index, RCS_ERRED, 0);

		/*
		 * close the hotspare and return it.  Then restore the
		 * original device back to the original state
		 */
		raid_hs_release(HS_FREE, un, &recids[0], hs_index);
		col->un_dev = col->un_orig_dev;
		col->un_devstart = col->un_orig_devstart;
		col->un_pwstart = col->un_orig_pwstart;
		raid_commit(un, recids);
		md_unit_writerexit(ui);
		un = (mr_unit_t *)md_unit_readerlock(ui);
		return;
	}

	setno = MD_MIN2SET(mnum);

	(void) md_devname(setno, col->un_orig_dev, devname,
		sizeof (devname));
	(void) md_devname(setno, col->un_dev, hs_devname,
		sizeof (hs_devname));

	cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s",
	    md_shortname(mnum), devname, hs_devname);
	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HOTSPARED, SVM_TAG_HS, setno,
	    MD_SID(un));
	(void) md_unit_readerlock(ui);
	return;

errout:
	md_unit_writerexit(ui);
	md_io_writerexit(ui);
	un = (mr_unit_t *)md_unit_readerlock(ui);
}
static int
stripe_replace(replace_params_t *params)
{
	minor_t		mnum = params->mnum;
	ms_unit_t	*un;
	mddb_recid_t	recids[6];
	ms_new_dev_t	nd;
	ms_cd_info_t	cd;
	int		ci;
	int		cmpcnt;
	void		*repl_data;
	md_dev64_t	fake_devt;
	void		(*repl_done)();

	mdclrerror(&params->mde);

	un = (ms_unit_t *)MD_UNIT(mnum);

	if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
		return (mdmderror(&params->mde, MDE_RESYNC_ACTIVE, mnum));
	}

	nd.nd_dev = params->new_dev;
	nd.nd_key = params->new_key;
	nd.nd_nblks = params->number_blks;
	nd.nd_start_blk = params->start_blk;
	nd.nd_labeled = params->has_label;
	nd.nd_hs_id = 0;

	/*
	 * stripe_component_count and stripe_get_dev only care about the
	 * minor number associated with the first argument which is a
	 * md_dev64_t
	 *
	 * The comments section for these two routines have been updated
	 * to indicate that this routine calls with fake major numbers.
	 */
	fake_devt = md_makedevice(0, mnum);
	cmpcnt = stripe_component_count(fake_devt, NULL);
	for (ci = 0; ci < cmpcnt; ci++) {
		(void) stripe_get_dev(fake_devt, NULL, ci, &cd);
		if ((cd.cd_dev == params->old_dev) ||
		    (cd.cd_orig_dev == params->old_dev))
			break;
	}
	if (ci == cmpcnt) {
		return (EINVAL);
	}

	/*  In case of a dryrun we're done here */
	if (params->options & MDIOCTL_DRYRUN) {
		return (0);
	}

	(void) stripe_replace_dev(fake_devt, 0, ci, &nd, recids, 6,
	    &repl_done, &repl_data);
	mddb_commitrecs_wrapper(recids);
	(*repl_done)(fake_devt, repl_data);

	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
	    MD_UN2SET(un), MD_SID(un));
	return (0);
}
static int
stripe_grow(void *d, int mode, IOLOCK *lockp)
{
	minor_t		mnum;
	ms_unit_t	*un, *new_un;
	mdi_unit_t	*ui;
	minor_t		*par = NULL;
	IOLOCK		*plock = NULL;
	ms_comp_t	*mdcomp, *new_comp;
	int		row, i, c;
	mddb_recid_t	ms_recid;
	mddb_recid_t	old_vtoc = 0;
	mddb_recid_t	*recids;
	md_create_rec_option_t options;
	mddb_type_t	typ1;
	int		err;
	int64_t		tb, atb;
	uint_t		nr, oc;
	int		opened;
	int		rval = 0;
	set_t		setno;
	md_error_t	*mdep;
	int		npar;
	int		rid;
	int		num_recs;
	u_longlong_t	rev;
	md_grow_params_t	*mgp = d;


	mnum = mgp->mnum;
	mdep = &mgp->mde;
	setno = MD_MIN2SET(mnum);
	npar = mgp->npar;

	mdclrerror(mdep);

	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));

	if (md_get_setstatus(setno) & MD_SET_STALE)
		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));

	ui = MDI_UNIT(mnum);
	if (ui == NULL) {
		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
	}

	if (npar >= 1) {
		ASSERT((minor_t *)(uintptr_t)mgp->par != NULL);
		par = kmem_alloc(npar * sizeof (*par), KM_SLEEP);
		plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP);
		if (ddi_copyin((caddr_t)(uintptr_t)mgp->par, (caddr_t)par,
		    (npar * sizeof (*par)), mode) != 0) {
			kmem_free(par, npar * sizeof (*par));
			kmem_free(plock, npar * sizeof (*plock));
			return (EFAULT);
		}
	}

	/*
	 * we grab unit reader/writer first, then parent locks,
	 * then our own.
	 * we expect parent units to be sorted to avoid deadlock
	 */
	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
	for (i = 0; i < npar; ++i) {
		(void) md_ioctl_writerlock(&plock[i],
		    MDI_UNIT(par[i]));
	}
	un = (ms_unit_t *)md_ioctl_writerlock(lockp, ui);

	if (un->un_nrows != mgp->nrows) {
		rval = EINVAL;
		goto out;
	}

	typ1 = (mddb_type_t)md_getshared_key(setno,
	    stripe_md_ops.md_driver.md_drivername);

	/*
	 * Preserve the friendly name nature of growing device.
	 */
	options = MD_CRO_STRIPE;
	if (un->c.un_revision & MD_FN_META_DEV)
		options |= MD_CRO_FN;
	if (mgp->options & MD_CRO_64BIT) {
#if defined(_ILP32)
		rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum);
		goto out;
#else
		ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0,
		    MD_CRO_64BIT | options, setno);
#endif
	} else {
		ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0,
		    MD_CRO_32BIT | options, setno);
	}


	if (ms_recid < 0) {
		rval = mddbstatus2error(mdep, (int)ms_recid, mnum, setno);
		goto out;
	}

	/* get the address of the new unit */
	new_un = (ms_unit_t *)mddb_getrecaddr(ms_recid);

	/*
	 * It is okay that we muck with the new unit here,
	 * since no one else will know about the unit struct
	 * until we commit it. If we crash, the record will
	 * be automatically purged, since we haven't
	 * committed it yet and the old unit struct will be found.
	 */

	/* copy in the user's unit struct */
	err = ddi_copyin((caddr_t)(uintptr_t)mgp->mdp, (caddr_t)new_un,
	    (size_t)mgp->size, mode);
	if (err) {
		mddb_deleterec_wrapper(ms_recid);
		rval = EFAULT;
		goto out;
	}
	if (options & MD_CRO_FN)
		new_un->c.un_revision |= MD_FN_META_DEV;

	/*
	 * allocate the real recids array.  since we may have to
	 * commit underlying metadevice records, we need an
	 * array of size: total number of new components being
	 * attached + 2 (one for the stripe itself, one for the
	 * end marker).
	 */
	num_recs = 2;
	rid = 0;
	for (row = 0; row < new_un->un_nrows; row++) {
		struct ms_row *mdr = &new_un->un_row[row];
		num_recs += mdr->un_ncomp;
	}
	recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
	recids[rid++] = ms_recid;

	/*
	 * Save a few of the new unit structs fields.
	 * Before they get clobbered.
	 */
	tb = new_un->c.un_total_blocks;
	atb = new_un->c.un_actual_tb;
	nr = new_un->un_nrows;
	oc = new_un->un_ocomp;
	rev = new_un->c.un_revision;

	/*
	 * Copy the old unit struct (static stuff)
	 * into new unit struct
	 */
	bcopy((caddr_t)un, (caddr_t)new_un,
	    sizeof (ms_unit_t) + ((nr - 2) * (sizeof (struct ms_row))));

	/*
	 * Restore the saved stuff.
	 */
	new_un->c.un_total_blocks = tb;
	md_nblocks_set(mnum, new_un->c.un_total_blocks);
	new_un->c.un_actual_tb = atb;
	new_un->un_nrows = nr;
	new_un->un_ocomp = oc;
	new_un->c.un_revision = rev;

	new_un->c.un_record_id = ms_recid;
	new_un->c.un_size = mgp->size;

	/* All 64 bit metadevices only support EFI labels. */
	if (mgp->options & MD_CRO_64BIT) {
		new_un->c.un_flag |= MD_EFILABEL;
		/*
		 * If the device was previously smaller than a terabyte,
		 * and had a vtoc record attached to it, we remove the
		 * vtoc record, because the layout has changed completely.
		 */
		if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
		    (un->c.un_vtoc_id != 0)) {
			old_vtoc = un->c.un_vtoc_id;
			new_un->c.un_vtoc_id =
			    md_vtoc_to_efi_record(old_vtoc, setno);
		}
	}

	/*
	 * Copy the old component structs into the new unit struct.
	 */
	mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]);
	new_comp = (ms_comp_t *)((void *)&((char *)new_un)[new_un->un_ocomp]);
	for (row = 0; row < un->un_nrows; row++) {
		struct ms_row *mdr = &un->un_row[row];
		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++, c++) {
			bcopy((caddr_t)&mdcomp[c], (caddr_t)&new_comp[c],
			    sizeof (ms_comp_t));
		}
	}

	opened = md_unit_isopen(ui);

	/*
	 * Set parent on metadevices being added.
	 * Open the new devices being added.
	 * NOTE: currently soft partitions are the only metadevices
	 * which can appear within a stripe.
	 */
	for (row = un->un_nrows; row < new_un->un_nrows; row++) {
		struct ms_row *mdr = &new_un->un_row[row];
		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
			struct ms_comp	*mdc = &new_comp[c++];
			md_dev64_t comp_dev;
			md_unit_t *comp_un;

			comp_dev = mdc->un_dev;
			/* set parent on any metadevices */
			if (md_getmajor(comp_dev) == md_major) {
				comp_un = MD_UNIT(md_getminor(comp_dev));
				recids[rid++] = MD_RECID(comp_un);
				md_set_parent(comp_dev, MD_SID(new_un));
			}

			if (opened) {
				md_dev64_t tmpdev = mdc->un_dev;
				/*
				 * Open by device id
				 * Check if this comp is hotspared and
				 * if it is then use the key for hotspare
				 */
				tmpdev = md_resolve_bydevid(mnum, tmpdev,
				    mdc->un_mirror.ms_hs_id ?
				    mdc->un_mirror.ms_hs_key : mdc->un_key);
				(void) md_layered_open(mnum, &tmpdev,
				    MD_OFLG_NULL);
				mdc->un_dev = tmpdev;
				mdc->un_mirror.ms_flags |= MDM_S_ISOPEN;
			}
		}
	}

	/* set end marker */
	recids[rid] = 0;
	/* commit new unit struct */
	mddb_commitrecs_wrapper(recids);

	/* delete old unit struct */
	mddb_deleterec_wrapper(un->c.un_record_id);

	/* place new unit in in-core array */
	md_nblocks_set(mnum, new_un->c.un_total_blocks);
	MD_UNIT(mnum) = new_un;

	/*
	 * If old_vtoc has a non zero value, we know:
	 * - This unit crossed the border from smaller to larger one TB
	 * - There was a vtoc record for the unit,
	 * - This vtoc record is no longer needed, because
	 *   a new efi record has been created for this un.
	 */
	if (old_vtoc != 0) {
		mddb_deleterec_wrapper(old_vtoc);
	}

	/* free recids array */
	kmem_free(recids, num_recs * sizeof (mddb_recid_t));

	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
	    MD_UN2SET(new_un), MD_SID(new_un));

	/* release locks, return success */
out:
	for (i =  npar - 1; (i >= 0); --i)
		md_ioctl_writerexit(&plock[i]);
	rw_exit(&md_unit_array_rw.lock);
	if (plock != NULL)
		kmem_free(plock, npar * sizeof (*plock));
	if (par != NULL)
		kmem_free(par, npar * sizeof (*par));
	return (rval);
}
static int
stripe_set(void *d, int mode)
{
	minor_t		mnum;
	ms_unit_t	*un;
	void		*p;
	mddb_recid_t	ms_recid;
	mddb_recid_t	*recids;
	mddb_type_t	typ1;
	int		err;
	set_t		setno;
	md_error_t	*mdep;
	struct ms_comp	*mdcomp;
	int		row;
	int		rid;
	int		num_recs;
	int		i, c;
	md_set_params_t	*msp = d;

	mnum = msp->mnum;
	setno = MD_MIN2SET(mnum);

	mdep = &msp->mde;

	mdclrerror(mdep);

	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
	}

	if (md_get_setstatus(setno) & MD_SET_STALE)
		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));

	un = MD_UNIT(mnum);
	if (un != NULL) {
		return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum));
	}


	typ1 = (mddb_type_t)md_getshared_key(setno,
	    stripe_md_ops.md_driver.md_drivername);

	/* create the db record for this mdstruct */
	if (msp->options & MD_CRO_64BIT) {
#if defined(_ILP32)
		return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum));
#else
		ms_recid = mddb_createrec((size_t)msp->size, typ1, 0,
		    MD_CRO_64BIT | MD_CRO_STRIPE | MD_CRO_FN, setno);
#endif
	} else {
		ms_recid = mddb_createrec((size_t)msp->size, typ1, 0,
		    MD_CRO_32BIT | MD_CRO_STRIPE | MD_CRO_FN, setno);
	}
	if (ms_recid < 0)
		return (mddbstatus2error(mdep, ms_recid, mnum, setno));

	/* get the address of the mdstruct */
	p = (void *) mddb_getrecaddr(ms_recid);
	/*
	 * It is okay that we muck with the mdstruct here,
	 * since no one else will know about the mdstruct
	 * until we commit it. If we crash, the record will
	 * be automatically purged, since we haven't
	 * committed it yet.
	 */

	/* copy in the user's mdstruct */
	if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, (caddr_t)p,
	    (size_t)msp->size, mode)) {
		mddb_deleterec_wrapper(ms_recid);
		return (EFAULT);
	}

	un = (ms_unit_t *)p;

	/* All 64 bit metadevices only support EFI labels. */
	if (msp->options & MD_CRO_64BIT) {
		un->c.un_flag |= MD_EFILABEL;
	}

	/*
	 * allocate the real recids array.  since we may have to commit
	 * underlying metadevice records, we need an array
	 * of size: total number of components in stripe + 3
	 * (1 for the stripe itself, one for the hotspare, one
	 * for the end marker).
	 */
	num_recs = 3;
	rid = 0;
	for (row = 0; row < un->un_nrows; row++) {
		struct ms_row *mdr = &un->un_row[row];
		num_recs += mdr->un_ncomp;
	}
	recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
	recids[rid++] = ms_recid;

	MD_SID(un) = mnum;
	MD_RECID(un) = recids[0];
	MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SUB_MIRROR | MD_CAN_SP;
	MD_PARENT(un) = MD_NO_PARENT;
	un->c.un_revision |= MD_FN_META_DEV;

	if (err = stripe_build_incore(p, 0)) {
		md_nblocks_set(mnum, -1ULL);
		MD_UNIT(mnum) = NULL;

		mddb_deleterec_wrapper(recids[0]);
		kmem_free(recids, num_recs * sizeof (mddb_recid_t));
		return (err);
	}

	/*
	 * Update unit availability
	 */
	md_set[setno].s_un_avail--;

	recids[rid] = 0;
	if (un->un_hsp_id != -1)
		err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0,
		    &recids[rid++], NULL, NULL, NULL);


	if (err) {
		md_nblocks_set(mnum, -1ULL);
		MD_UNIT(mnum) = NULL;

		mddb_deleterec_wrapper(recids[0]);
		kmem_free(recids, num_recs * sizeof (mddb_recid_t));
		return (mdhsperror(mdep, MDE_INVAL_HSP, un->un_hsp_id));
	}

	/*
	 * set the parent on any metadevice components.
	 * NOTE: currently soft partitions are the only metadevices
	 * which can appear within a stripe.
	 */
	mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]);
	for (row = 0; row < un->un_nrows; row++) {
		struct ms_row *mdr = &un->un_row[row];
		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
			ms_comp_t *mdc = &mdcomp[c++];
			md_dev64_t comp_dev;
			md_unit_t *comp_un;

			comp_dev = mdc->un_dev;
			if (md_getmajor(comp_dev) == md_major) {
				/* set parent and disallow soft partitioning */
				comp_un = MD_UNIT(md_getminor(comp_dev));
				recids[rid++] = MD_RECID(comp_un);
				md_set_parent(mdc->un_dev, MD_SID(un));
			}
		}
	}

	/* set end marker */
	recids[rid] = 0;
	mddb_commitrecs_wrapper(recids);

	md_create_unit_incore(mnum, &stripe_md_ops, 0);
	kmem_free(recids, (num_recs * sizeof (mddb_recid_t)));
	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE,
	    MD_UN2SET(un), MD_SID(un));
	return (0);
}
示例#10
0
int
stripe_build_incore(void *p, int snarfing)
{
	ms_unit_t *un = (ms_unit_t *)p;
	struct ms_comp	*mdcomp;
	minor_t		mnum;
	int		row;
	int		i;
	int		c;
	int		ncomps;

	mnum = MD_SID(un);

	if (MD_UNIT(mnum) != NULL)
		return (0);

	MD_STATUS(un) = 0;

	/*
	 * Reset all the is_open flags, these are probably set
	 * cause they just came out of the database.
	 */
	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);

	ncomps = 0;
	for (row = 0; row < un->un_nrows; row++) {
		struct ms_row *mdr = &un->un_row[row];
		ncomps += mdr->un_ncomp;
	}

	for (row = 0; row < un->un_nrows; row++) {
		struct ms_row *mdr = &un->un_row[row];
		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
			struct ms_comp		*mdc;
			set_t			setno;
			md_dev64_t		tmpdev;

			mdc = &mdcomp[c++];
			mdc->un_mirror.ms_flags &=
			    ~(MDM_S_ISOPEN | MDM_S_IOERR | MDM_S_RS_TRIED);

			if (!snarfing)
				continue;

			setno = MD_MIN2SET(mnum);

			tmpdev = md_getdevnum(setno, mddb_getsidenum(setno),
			    mdc->un_key, MD_NOTRUST_DEVT);
			mdc->un_dev = tmpdev;
			/*
			 * Check for hotspares. If the hotspares haven't been
			 * snarfed yet, stripe_open_all_devs() will do the
			 * remapping of the dev's later.
			 */
			if (mdc->un_mirror.ms_hs_id != 0) {
				mdc->un_mirror.ms_orig_dev = mdc->un_dev;
				(void) md_hot_spare_ifc(HS_MKDEV, 0, 0,
				    0, &mdc->un_mirror.ms_hs_id, NULL,
				    &tmpdev, NULL);
				mdc->un_dev = tmpdev;
			}
		}
	}

	/* place various information in the in-core data structures */
	md_nblocks_set(mnum, un->c.un_total_blocks);
	MD_UNIT(mnum) = un;

	return (0);
}
示例#11
0
static int
stripe_open_all_devs(ms_unit_t *un, int md_oflags)
{
	minor_t		mnum = MD_SID(un);
	int		row;
	int		i;
	int		c;
	struct ms_comp	*mdcomp;
	int		err;
	int		cont_on_errors = (md_oflags & MD_OFLG_CONT_ERRS);
	int		probe_err_cnt = 0;
	int		total_comp_cnt = 0;
	set_t		setno = MD_MIN2SET(MD_SID(un));
	side_t		side = mddb_getsidenum(setno);
	mdkey_t		key;

	mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);

	/*
	 * For a probe call, if any component of a stripe or a concat
	 * can be opened, it is considered to be a success. The total number
	 * of components in a stripe are computed prior to starting a probe.
	 * This number is then compared against the number of components
	 * that could be be successfully opened. If none of the components
	 * in a stripe can be opened, only then an ENXIO is returned for a
	 * probe type open.
	 */

	for (row = 0; row < un->un_nrows; row++) {
		struct ms_row *mdr = &un->un_row[row];

		if (md_oflags & MD_OFLG_PROBEDEV)
			total_comp_cnt += mdr->un_ncomp;

		for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
			struct ms_comp	*mdc;
			md_dev64_t tmpdev;

			mdc = &mdcomp[c++];
			tmpdev = mdc->un_dev;
			/*
			 * Do the open by device id
			 * Check if this comp is hotspared and
			 * if it is then use the key for hotspare.
			 * MN disksets don't use devids, so we better don't use
			 * md_devid_found/md_resolve_bydevid there. Rather do,
			 * what's done in stripe_build_incore()
			 */
			if (MD_MNSET_SETNO(setno)) {
				if (mdc->un_mirror.ms_hs_id != 0) {
					(void) md_hot_spare_ifc(HS_MKDEV, 0, 0,
					    0, &mdc->un_mirror.ms_hs_id, NULL,
					    &tmpdev, NULL);
				}
			} else {
				key = mdc->un_mirror.ms_hs_id ?
				    mdc->un_mirror.ms_hs_key : mdc->un_key;
				if ((md_getmajor(tmpdev) != md_major) &&
				    md_devid_found(setno, side, key) == 1) {
					tmpdev = md_resolve_bydevid(mnum,
					    tmpdev, key);
				}
			}

			/*
			 * For a submirror, we only want to open those devices
			 * that are not errored. If the device is errored then
			 * then there is no reason to open it and leaving it
			 * closed allows the RCM/DR code to work so that the
			 * errored device can be replaced.
			 */
			if ((md_oflags & MD_OFLG_PROBEDEV) ||
			    ! (mdc->un_mirror.ms_state & CS_ERRED)) {

				err = md_layered_open(mnum, &tmpdev, md_oflags);
			} else {
				err = ENXIO;
			}

			/*
			 * Only set the un_dev if the tmpdev != NODEV64. If
			 * it is NODEV64 then the md_layered_open() will have
			 * failed in some manner.
			 */
			if (tmpdev != NODEV64)
				mdc->un_dev = tmpdev;

			if (err) {
				if (!cont_on_errors) {
					stripe_close_all_devs(un, md_oflags);
					return (ENXIO);
				}

				if (md_oflags & MD_OFLG_PROBEDEV)
					probe_err_cnt++;
			} else {
				if (md_oflags & MD_OFLG_PROBEDEV) {
					mdc->un_mirror.ms_flags |=
					    MDM_S_PROBEOPEN;
				} else
					mdc->un_mirror.ms_flags |= MDM_S_ISOPEN;
			}
		}
	}

	/* If every component in a stripe could not be opened fail */
	if ((md_oflags & MD_OFLG_PROBEDEV) &&
	    (probe_err_cnt == total_comp_cnt))
		return (ENXIO);
	else
		return (0);
}