Ejemplo n.º 1
0
/*
 * FUNCTION:	meta_replicaslice()
 * INPUT:	dnp	- the name of the drive to check
 * OUTPUT:	slicep	- pointer to slice number
 *		ep	- pointer to an md_error_t structure in which
 *			  to return errors to the caller
 * RETURNS:	int	-  0 - value pointed to by slicep is valid
 *			  -1 - otherwise
 *
 * PURPOSE:	Determine which slice of the specified drive to
 *		reserve, presumably for metadb replica usage.
 *
 * NOTE:	If slicep is NULL, the return code will indicate
 *		whether or not the slice number could be determined
 */
int
meta_replicaslice(
	mddrivename_t	*dnp,
	uint_t		*slicep,
	md_error_t	*ep
)
{
	int		err = 0;
	int		ioctl_return;
	int		fd;
	char		*rname;
	struct dk_geom	geom;

	rname = dnp->rname;
	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
		char	*n;
		int	open_errno;
		size_t	len;

		if (errno != ENOENT)
			return (mdsyserror(ep, errno, rname));

		len = strlen(rname) + 3;
		n = Zalloc(len);
		(void) snprintf(n, len, "%ss0", rname);
		fd = open(n, (O_RDONLY|O_NDELAY), 0);
		open_errno = errno;
		Free(n);
		if (fd < 0) {
			return (mdsyserror(ep, open_errno, rname));
		}
	}

	/*
	 * if our drivenamep points to a device not supporting
	 * DKIOCGGEOM, we have an EFI label.
	 */
	errno = 0;
	ioctl_return = ioctl(fd, DKIOCGGEOM, &geom);
	err = errno;

	(void) close(fd);

	/*
	 * If the DKIOCGGEOM ioctl succeeded, then the device has a
	 * VTOC style label.  In this case, we use slice 7.
	 */
	if (ioctl_return == 0) {
		if (slicep != NULL) {
			*slicep = MD_SLICE7;
		}
		return (0);
	}

	/*
	 * ENOTSUP indicates an EFI style label, in which case slice 7
	 * cannot be used because its minor number is reserved.  In
	 * this case, use slice 6.
	 */
	if (err == ENOTSUP) {
		if (slicep != NULL) {
			*slicep = MD_SLICE6;
		}
		return (0);
	}

	/*
	 * Those are the only two cases we know how to deal with;
	 * either the drivenamep didn't point to a disk, or the ioctl
	 * failed for some other reason.
	 */
	if (err == ENOTTY) {
		return (mddeverror(ep, MDE_NOT_DISK, NODEV, rname));
	}

	return (mdsyserror(ep, err, rname));
}
Ejemplo n.º 2
0
/*
 * FUNCTION:	meta_repartition_drive()
 * INPUT:	sp	- the set name for the device to check
 *		dnp	- the name of the drive to partition
 *              options - options (see NOTES)
 * OUTPUT:	vtocp	- pointer to an mdvtoc_t structure in which
 *			  to return the new VTOC to the caller
 *		ep	- pointer to an md_error_t structure in which
 *			  to return errors to the caller
 * RETURNS:	int	-  0 - drive was or can be repartitioned
 *			  -1 - drive could not or should not be
 *			       repartitioned
 * PURPOSE:	Repartition a disk for use in a disk set or in order
 *		to create soft partitions on it.  Alternatively,
 *		return the VTOC that the disk would have if it were
 *		repartitioned without actually repartitioning it.
 *
 * NOTES:
 *
 *     This routine will repartition a drive to make it suitable for
 *     inclusion in a diskset.  Specifically, it will create a
 *     proposed VTOC that specifies a replica slice that begins at the
 *     first valid lba, is large enough to hold a label and a metadb
 *     replica, does not overlap any other slices, and is unmountable.
 *     If the current replica slice already satisfies those criteria,
 *     the routine will neither create a proposed VTOC nor repartition
 *     the drive unless the MD_REPART_FORCE flag is passed into the
 *     routine in the options argument.  If the routine does create a
 *     proposed VTOC, it will return the proposed VTOC in *vtocp if
 *     vtocp isn't NULL.
 *
 *     The slice to be used as the replica slice is determined by the
 *     function meta_replicaslice().
 *
 *     If the replica slice does not satisfy the above criteria or the
 *     MD_REPART_FORCE flag is set, the proposed VTOC will specify a
 *     replica slice that satisfies the above criteria, a slice zero
 *     that contains the remaining space on the disk, and no other
 *     slices.  If that repartitioning would cause the replica slice
 *     to move or shrink, and the MD_REPART_LEAVE_REP option is set,
 *     the routine will return -1 without creating or returning a
 *     proposed vtoc, and without repartitioning the disk.  Otherwise
 *     the routine will repartition the disk unless the
 *     MD_REPART_DONT_LABEL flag is set in the options argument.
 *
 *     If the MD_REPART_DONT_LABEL flag is set in the options argument,
 *     but the routine would otherwise repartition the drive, the
 *     routine won't repartition the drive, but will create a proposed
 *     VTOC that satisfies the criteria defined above and return it
 *     it in *vtocp if vtocp isn't NULL,  The MD_REPART_DONT_LABEL
 *     option allows calling routines to determine what the contents of
 *     the drive's VTOC would be if the drive were repartitioned without
 *     actually repartitioning the drive.
 */
int
meta_repartition_drive(
	mdsetname_t	*sp,
	mddrivename_t	*dnp,
	int		options,
	mdvtoc_t	*vtocp,
	md_error_t	*ep
)
{
	uint_t			 replicaslice;
	diskaddr_t		 first_lba, last_lba;
	int			 round_sizes = 1;
	unsigned long long	 cylsize;
	unsigned long long	 drvsize;
	int			 i;
	mdgeom_t		*mdgp;
	mdvtoc_t		*mdvp;
	mdvtoc_t		 proposed_vtoc;
	uint_t			 reservedcyl;
	ushort_t		 resflag;
	mdname_t		*resnp;
	unsigned long long	 ressize;
	md_set_desc		*sd;
	daddr_t			 dbsize;
	diskaddr_t		 replica_start;
	diskaddr_t		 replica_size;
	diskaddr_t		 replica_end;
	diskaddr_t		 data_start;
	diskaddr_t		 data_size;

	if (meta_replicaslice(dnp, &replicaslice, ep) != 0) {
		return (-1);
	}

	/* Don't round for EFI disks */
	if (replicaslice == MD_SLICE6)
		round_sizes = 0;

	/*
	 * We took as argument a drive name pointer, but we need a
	 * slice name pointer to retrieve vtoc information.  So get
	 * the name pointer for slice zero first, then use it to get
	 * the vtoc info for the disk.
	 */
	if ((resnp = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
		return (-1);

	if ((mdvp = metagetvtoc(resnp, FALSE, NULL, ep)) == NULL)
		return (-1);

	/*
	 * Determine the metadb size.
	 */
	dbsize = MD_DBSIZE;
	if (!metaislocalset(sp)) {
		if ((sd = metaget_setdesc(sp, ep)) == NULL)
			return (-1);

		if (MD_MNSET_DESC(sd))
			dbsize = MD_MN_DBSIZE;
	}

	/* If we've got an efi disk, we better have lba info */
	first_lba = mdvp->first_lba;
	last_lba = mdvp->last_lba;
	ASSERT((round_sizes != 0) || (last_lba > 0));

	/*
	 * At this point, ressize is used as a minimum value.  Later
	 * it will be rounded up to a cylinder boundary if
	 * appropriate.  ressize is in units of disk sectors.
	 */
	ressize = dbsize + VTOC_SIZE;
	resflag = V_UNMNT;

	/*
	 * If we're forcing the repartition, we can skip the replica
	 * slice and overlap tests.
	 */
	if (options & MD_REPART_FORCE) {
		goto do_repartition;
	}

	/*
	 * Replica slice tests: it must begin at first_lba, be long
	 * enough, have the right flags, and not overlap any other
	 * slices.  If any of these conditions is violated, we need to
	 * repartition the disk.
	 */
	if (mdvp->parts[replicaslice].start != first_lba) {
		goto do_repartition;
	}

	if (mdvp->parts[replicaslice].size < ressize) {
		goto do_repartition;
	}

	if (mdvp->parts[replicaslice].flag != resflag) {
		goto do_repartition;
	}

	/*
	 * Check for overlap: this test should use the actual size of
	 * the replica slice, as contained in the vtoc, and NOT the
	 * minimum size calculated above.
	 */
	replica_end = first_lba + mdvp->parts[replicaslice].size;
	for (i = 0; i < mdvp->nparts; i++) {
		if (i != replicaslice) {
			if ((mdvp->parts[i].size > 0) &&
			    (mdvp->parts[i].start < replica_end)) {
				goto do_repartition;
			}
		}
	}

	/*
	 * If we passed the above tests, then the disk is already
	 * partitioned appropriately, and we're not being told to
	 * force a change.
	 */
	return (0);

do_repartition:

	/* Retrieve disk geometry info and round to cylinder sizes */
	if (round_sizes != 0) {

		if ((mdgp = metagetgeom(resnp, ep)) == NULL)
			return (-1);

		/*
		 * Both cylsize and drvsize are in units of disk
		 * sectors.
		 *
		 * The intended results are of type unsigned long
		 * long.  Since each operand of the first
		 * multiplication is of type unsigned int, we risk
		 * overflow by multiplying and then converting the
		 * result.  Therefore we explicitly cast (at least)
		 * one of the operands, forcing conversion BEFORE
		 * multiplication, and avoiding overflow.  The second
		 * assignment is OK, since one of the operands is
		 * already of the desired type.
		 */
		cylsize =
		    ((unsigned long long)mdgp->nhead) * mdgp->nsect;
		drvsize = cylsize * mdgp->ncyl;

		/*
		 * How many cylinders must we reserve for the replica
		 * slice to ensure that it meets the previously
		 * calculated minimum size?
		 */
		reservedcyl = (ressize + cylsize - 1) / cylsize;
		ressize = reservedcyl * cylsize;
	} else {
		drvsize = last_lba - first_lba;
	}

	/* Would this require a forbidden change? */
	if (options & MD_REPART_LEAVE_REP) {
		if ((mdvp->parts[replicaslice].start != first_lba) ||
		    (mdvp->parts[replicaslice].size < ressize)) {
			return (mddeverror(ep, MDE_REPART_REPLICA,
			    resnp->dev, NULL));
		}
	}

	/*
	 * It seems unlikely that someone would pass us too small a
	 * disk, but it's still worth checking for...
	 */
	if (((round_sizes != 0) && (reservedcyl >= (int)mdgp->ncyl)) ||
	    ((round_sizes == 0) && (ressize + first_lba >= last_lba))) {
		return (mdmddberror(ep, MDE_DB_TOOSMALL,
		    meta_getminor(resnp->dev), sp->setno, 0, NULL));
	}

	replica_start = first_lba;
	replica_size = ressize;
	data_start = first_lba + ressize;
	data_size = drvsize - ressize;

	/*
	 * Create the proposed VTOC.  First copy the current VTOC
	 * into the proposed VTOC to duplicate the values that don't
	 * need to change.  Then change the partition table and set
	 * the flag value for the replica slice to resflag to reserve it
	 * for metadata.
	 */
	proposed_vtoc = *mdvp;
	/* We need at least replicaslice partitions in the proposed vtoc */
	if (replicaslice >= proposed_vtoc.nparts) {
		proposed_vtoc.nparts = replicaslice + 1;
	}
	for (i = 0; i < proposed_vtoc.nparts; i++) {
		/* don't change the reserved partition of an EFI device */
		if (proposed_vtoc.parts[i].tag == V_RESERVED)
			data_size = proposed_vtoc.parts[i].start - data_start;
		else
			(void) memset(&proposed_vtoc.parts[i], '\0',
				sizeof (proposed_vtoc.parts[i]));
	}

	proposed_vtoc.parts[MD_SLICE0].start = data_start;
	proposed_vtoc.parts[MD_SLICE0].size = data_size;
	proposed_vtoc.parts[MD_SLICE0].tag = V_USR;
	proposed_vtoc.parts[replicaslice].start = replica_start;
	proposed_vtoc.parts[replicaslice].size = replica_size;
	proposed_vtoc.parts[replicaslice].flag = resflag;
	proposed_vtoc.parts[replicaslice].tag = V_USR;

	if (!(options & MD_REPART_DONT_LABEL)) {
		/*
		 * Label the disk with the proposed VTOC.
		 */
		*mdvp = proposed_vtoc;
		if (metasetvtoc(resnp, ep) != 0) {
			return (-1);
		}
	}

	if (vtocp != NULL) {
		/*
		 * Return the proposed VTOC.
		 */
		*vtocp = proposed_vtoc;
	}

	return (0);
}
Ejemplo n.º 3
0
static int
seths_enable(set_hs_params_t *shs)
{
	hot_spare_t	*hs;
	mddb_recid_t	recids[2];
	set_t		setno = shs->md_driver.md_setno;
	mdkey_t		key_old;
	int		num_keys_old = 0;


	/*
	 * Find device by using key associated with shs_component_old.
	 * If unable to find a unique key for shs_component_old
	 * then fail since namespace has multiple entries
	 * for this old component and we're unable to determine
	 * which key is the valid match for shs_component_old.
	 * This failure keeps a hotspare from being enabled on a slice
	 * that may already be in use by another metadevice.
	 */
	if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
	    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
		return (mddeverror(&shs->mde, MDE_NAME_SPACE,
		    shs->shs_component_old));
	}

	/*
	 * If more than one key matches given old_dev - fail command
	 * since unable to determine which key is correct.
	 */
	if (num_keys_old > 1) {
		return (mddeverror(&shs->mde, MDE_MULTNM,
		    shs->shs_component_old));
	}
	/*
	 * If there is no key for this entry then fail since
	 * a key for this entry should exist.
	 */
	if (num_keys_old == 0) {
		return (mddeverror(&shs->mde, MDE_INVAL_HS,
		    shs->shs_component_old));
	}

	/* Scan the hot spare list for the hs */
	hs = (hot_spare_t *)md_set[setno].s_hs;
	while (hs) {
		/*
		 * Since component may or may not be currently in the system,
		 * use the keys to find a match (not the devt).
		 */
		if (hs->hs_key == key_old) {
			break;
		}
		hs = hs->hs_next;
	}

	if (hs == NULL) {
		return (mddeverror(&shs->mde, MDE_INVAL_HS,
			shs->shs_component_old));
	}

	/* make sure it's broken */
	if (hs->hs_state != HSS_BROKEN) {
		return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE,
		    hs->hs_devnum));
	}

	/* In case of a dryrun, we're done here */
	if (shs->shs_options & HS_OPT_DRYRUN) {
		return (0);
	}

	/* fix it */
	set_hot_spare_state(hs, HSS_AVAILABLE);
	hs->hs_start_blk = shs->shs_start_blk;
	hs->hs_has_label = shs->shs_has_label;
	hs->hs_number_blks = shs->shs_number_blks;

	/* commit the db records */
	recids[0] = hs->hs_record_id;
	recids[1] = 0;
	mddb_commitrecs_wrapper(recids);
	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno,
	    shs->shs_component_old);

	return (0);
}
Ejemplo n.º 4
0
static int
seths_replace(set_hs_params_t *shs)
{
	hot_spare_t		*hs;
	hot_spare_t		*prev_hs;
	hot_spare_t		*new_hs;
	hot_spare_pool_t	*hsp;
	int			new_found = 0;
	mddb_recid_t		recid;
	mddb_recid_t		recids[5];
	int			i;
	sv_dev_t		sv;
	int			delete_hs = 0;
	set_t			setno;
	mddb_type_t		typ1;
	mdkey_t			key_old;
	int			num_keys_old = 0;

	setno = HSP_SET(shs->shs_hot_spare_pool);
	typ1 = (mddb_type_t)md_getshared_key(setno,
	    hotspares_md_ops.md_driver.md_drivername);

	/* Scan the hot spare list */
	hs = (hot_spare_t *)md_set[setno].s_hs;
	prev_hs = (hot_spare_t *)0;
	while (hs) {
		if (hs->hs_devnum == shs->shs_component_old) {
			break;
		}
		prev_hs = hs;
		hs = hs->hs_next;
	}

	if (hs == NULL) {
		/*
		 * Unable to find device using devnum so use
		 * key associated with shs_component_old instead.
		 * If unable to find a unique key for shs_component_old
		 * then fail since namespace has multiple entries
		 * for this old component and we're unable to determine
		 * which key is the valid match for shs_component_old.
		 *
		 * Only need to compare keys when hs_devnum is NODEV.
		 */
		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
			    shs->shs_component_old));
		}

		/*
		 * If more than one key matches given old_dev - fail command
		 * since unable to determine which key is correct.
		 */
		if (num_keys_old > 1) {
			return (mddeverror(&shs->mde, MDE_MULTNM,
			    shs->shs_component_old));
		}
		/*
		 * If there is no key for this entry then fail since
		 * a key for this entry should exist.
		 */
		if (num_keys_old == 0) {
			return (mddeverror(&shs->mde, MDE_INVAL_HS,
			    shs->shs_component_old));
		}
		/* Scan the hot spare list again */
		hs = (hot_spare_t *)md_set[setno].s_hs;
		prev_hs = (hot_spare_t *)0;
		while (hs) {
			/*
			 * Only need to compare keys when hs_devnum is NODEV.
			 */
			if ((hs->hs_devnum == NODEV64) &&
			    (hs->hs_key == key_old)) {
				break;
			}
			prev_hs = hs;
			hs = hs->hs_next;
		}
	}

	if (hs == NULL) {
		return (mddeverror(&shs->mde, MDE_INVAL_HS,
		    shs->shs_component_old));
	}

	/* check the force flag and the state of the hot spare */
	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
	    (hs->hs_state == HSS_RESERVED)) {
		return (mdhserror(&shs->mde, MDE_HS_RESVD,
		    shs->shs_hot_spare_pool,
		    hs->hs_devnum));
	}

	/* Scan the hot spare pool list */
	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
	if (hsp == (hot_spare_pool_t *)0) {
		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
		    shs->shs_hot_spare_pool));
	}

	/*
	 * Make sure the old device is in the pool.
	 */
	for (i = 0; i < hsp->hsp_nhotspares; i++) {
		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
			break;
		}
	}
	if (i >= hsp->hsp_nhotspares) {
		return (mddeverror(&shs->mde, MDE_INVAL_HS,
		    hs->hs_devnum));
	}

	/* Scan the hot spare list for the new hs */
	new_hs = (hot_spare_t *)md_set[setno].s_hs;
	new_found = 0;
	while (new_hs) {
		if (new_hs->hs_devnum == shs->shs_component_new) {
			new_found = 1;
			break;
		}
		new_hs = new_hs->hs_next;
	}

	/*
	 * Make sure the new device is not already in the pool.
	 * We don't have to search the hs in this hsp, if the
	 * new hs was just created. Only if the hot spare was found.
	 */
	if (new_found) {
		for (i = 0; i < hsp->hsp_nhotspares; i++)
			if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) {
				return (mdhserror(&shs->mde, MDE_HS_INUSE,
				    shs->shs_hot_spare_pool,
				    new_hs->hs_devnum));
			}
	}

	/* In case of a dryrun, we're done here */
	if (shs->shs_options & HS_OPT_DRYRUN) {
		return (0);
	}

	/*
	 * Create the new hotspare
	 */
	if (!new_found) {
		/* create a hot spare record */
		if (shs->shs_size_option & MD_CRO_64BIT) {
#if defined(_ILP32)
			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
			    shs->shs_hot_spare_pool, shs->shs_component_new));
#else
			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
#endif
		} else {
			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
		}

		if (recid < 0) {
			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
			    shs->shs_hot_spare_pool,
			    shs->shs_component_new));
		}

		/* get the addr */
		new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid,
			sizeof (*new_hs), 0);

		new_hs->hs_record_id = recid;
		new_hs->hs_devnum = shs->shs_component_new;
		new_hs->hs_key = shs->shs_key_new;
		new_hs->hs_start_blk = shs->shs_start_blk;
		new_hs->hs_has_label = shs->shs_has_label;
		new_hs->hs_number_blks = shs->shs_number_blks;
		set_hot_spare_state(new_hs, HSS_AVAILABLE);
		new_hs->hs_refcount = 0;
		new_hs->hs_isopen = 1;
	}

	/* lock the db records */
	recids[0] = hs->hs_record_id;
	recids[1] = new_hs->hs_record_id;
	recids[2] = hsp->hsp_record_id;
	recids[3] = 0;

	sv.setno = setno;
	sv.key = hs->hs_key;

	hs->hs_refcount--;
	if (hs->hs_refcount == 0) {
		/*
		 * NOTE: We do not commit the previous hot spare record.
		 *	 There is no need, the link we get rebuilt at boot time.
		 */
		if (prev_hs) {
			prev_hs->hs_next = hs->hs_next;
		} else
			md_set[setno].s_hs = (void *) hs->hs_next;

		/* mark hs to be deleted in the correct order */
		delete_hs = 1;

		recids[0] = new_hs->hs_record_id;
		recids[1] = hsp->hsp_record_id;
		recids[2] = 0;
	}

	/* link into the hs list */
	new_hs->hs_refcount++;
	if (!new_found) {
		/* do this AFTER the old dev is possibly removed */
		new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
		md_set[setno].s_hs = (void *) new_hs;
	}

	/* find the location of the old hs in the hsp */
	for (i = 0; i < hsp->hsp_nhotspares; i++) {
		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
			hsp->hsp_hotspares[i] = new_hs->hs_record_id;
			break;
		}
	}

	if (shs->shs_size_option & MD_CRO_64BIT) {
		new_hs->hs_revision |= MD_64BIT_META_DEV;
	} else {
		new_hs->hs_revision &= ~MD_64BIT_META_DEV;
	}

	/* commit the db records */
	mddb_commitrecs_wrapper(recids);

	if (delete_hs)
		mddb_deleterec_wrapper(hs->hs_record_id);

	md_rem_names(&sv, 1);

	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno,
	    md_expldev(hsp->hsp_self_id));
	return (0);
}
Ejemplo n.º 5
0
static int
seths_delete(set_hs_params_t *shs)
{
	hot_spare_t		*hs;
	hot_spare_t		*prev_hs;
	hot_spare_pool_t	*hsp;
	mddb_recid_t		recids[4];
	int			i;
	set_t			setno;
	sv_dev_t		sv;
	int			delete_hs = 0;
	mdkey_t			key_old;
	int			num_keys_old = 0;

	/* delete the hot spare pool */
	if (shs->shs_options & HS_OPT_POOL) {
		return (seths_delete_hsp(shs));
	}

	setno = HSP_SET(shs->shs_hot_spare_pool);

	/* Scan the hot spare list */
	hs = (hot_spare_t *)md_set[setno].s_hs;
	prev_hs = (hot_spare_t *)0;
	while (hs) {
		if (hs->hs_devnum == shs->shs_component_old) {
			break;
		}
		prev_hs = hs;
		hs = hs->hs_next;
	}

	if (hs == NULL) {
		/*
		 * Unable to find device using devnum so use
		 * key associated with shs_component_old instead.
		 * If unable to find a unique key for shs_component_old
		 * then fail since namespace has multiple entries
		 * for this old component and we're unable to determine
		 * which key is the valid match for shs_component_old.
		 *
		 * Only need to compare keys when hs_devnum is NODEV.
		 */
		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
			    shs->shs_component_old));
		}

		/*
		 * If more than one key matches given old_dev - fail command
		 * since shouldn't add new hotspare if namespace has
		 * multiple entries.
		 */
		if (num_keys_old > 1) {
			return (mddeverror(&shs->mde, MDE_MULTNM,
			    shs->shs_component_old));
		}
		/*
		 * If there is no key for this entry then fail since
		 * a key for this entry should exist.
		 */
		if (num_keys_old == 0) {
			return (mddeverror(&shs->mde, MDE_INVAL_HS,
			    shs->shs_component_old));
		}
		/* Scan the hot spare list again */
		hs = (hot_spare_t *)md_set[setno].s_hs;
		prev_hs = (hot_spare_t *)0;
		while (hs) {
			/*
			 * Only need to compare keys when hs_devnum is NODEV.
			 */
			if ((hs->hs_devnum == NODEV64) &&
			    (hs->hs_key == key_old)) {
				break;
			}
			prev_hs = hs;
			hs = hs->hs_next;
		}
	}

	if (hs == NULL) {
		return (mddeverror(&shs->mde, MDE_INVAL_HS,
		    shs->shs_component_old));
	}

	/* Scan the hot spare pool list */
	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
	if (hsp == (hot_spare_pool_t *)0) {
		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
		    shs->shs_hot_spare_pool));
	}

	/* check for force flag and state of hot spare */
	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
	    (hs->hs_state == HSS_RESERVED)) {
		return (mdhserror(&shs->mde, MDE_HS_RESVD,
		    shs->shs_hot_spare_pool, shs->shs_component_old));
	}

	if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) {
		return (mdhserror(&shs->mde, MDE_HS_RESVD,
		    shs->shs_hot_spare_pool, shs->shs_component_old));
	}

	/*
	 * Make sure the device is in the pool.
	 */
	for (i = 0; i < hsp->hsp_nhotspares; i++) {
		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
			break;
		}
	}

	if (i >= hsp->hsp_nhotspares) {
		return (mddeverror(&shs->mde, MDE_INVAL_HS,
		    hs->hs_devnum));
	}

	/* In case of a dryrun, we're done here */
	if (shs->shs_options & HS_OPT_DRYRUN) {
		return (0);
	}

	/* lock the db records */
	recids[0] = hs->hs_record_id;
	recids[1] = hsp->hsp_record_id;
	recids[2] = 0;

	sv.setno = setno;
	sv.key = hs->hs_key;

	hs->hs_refcount--;
	if (hs->hs_refcount == 0) {
		/*
		 * NOTE: We do not commit the previous hot spare record.
		 *	 There is no need, the link we get rebuilt at boot time.
		 */
		if (prev_hs) {
			prev_hs->hs_next = hs->hs_next;
		} else
			md_set[setno].s_hs = (void *) hs->hs_next;

		/* mark the hot spare to be deleted */
		delete_hs = 1;
		recids[0] = hsp->hsp_record_id;
		recids[1] = 0;
	}

	/* find the location of the hs in the hsp */
	for (i = 0; i < hsp->hsp_nhotspares; i++) {
		if (hsp->hsp_hotspares[i] == hs->hs_record_id)
			break;
	}

	/* remove the hs from the hsp */
	for (i++; i < hsp->hsp_nhotspares; i++)
		hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i];

	hsp->hsp_nhotspares--;

	/* commit the db records */
	mddb_commitrecs_wrapper(recids);

	if (delete_hs)
		mddb_deleterec_wrapper(hs->hs_record_id);

	md_rem_names(&sv, 1);

	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno,
	    md_expldev(hsp->hsp_self_id));

	return (0);
}
Ejemplo n.º 6
0
static int
seths_add(set_hs_params_t *shs)
{
	hot_spare_t		*hs;
	hot_spare_pool_t	*hsp;
	hot_spare_pool_t	*prev_hsp;
	hot_spare_pool_t	*new_hsp;
	hot_spare_pool_t	*old_hsp;
	md_create_rec_option_t	options;
	mddb_recid_t		recid;
	mddb_recid_t		recids[5];
	size_t			new_size;
	int			i;
	int			delete_hsp = 0;
	int			irecid;
	set_t			setno;
	mddb_type_t		typ1;
	int			hsp_created = 0;
	mdkey_t			key_old;
	int			num_keys_old = 0;

	/* Not much to do here in case of a dryrun */
	if (shs->shs_options & HS_OPT_DRYRUN) {
		return (0);
	}

	/* create an empty hot spare pool */
	if (shs->shs_options & HS_OPT_POOL) {
		return (seths_create_hsp(shs));
	}

	setno = HSP_SET(shs->shs_hot_spare_pool);
	typ1 = (mddb_type_t)md_getshared_key(setno,
	    hotspares_md_ops.md_driver.md_drivername);

	/* Scan the hot spare list */
	hs = (hot_spare_t *)md_set[setno].s_hs;
	while (hs) {
		if (hs->hs_devnum == shs->shs_component_old) {
			break;
		}
		hs = hs->hs_next;
	}

	if (hs == NULL) {
		/*
		 * Did not find match for device using devnum so use
		 * key associated with shs_component_old just
		 * in case there is a match but the match's dev is NODEV.
		 * If unable to find a unique key for shs_component_old
		 * then fail since namespace has multiple entries
		 * for this old component and we shouldn't allow
		 * an addition of a hotspare in this case.
		 */
		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
			    shs->shs_component_old));
		}

		/*
		 * If more than one key matches given old_dev - fail command
		 * since shouldn't add new hotspare if namespace has
		 * multiple entries.
		 */
		if (num_keys_old > 1) {
			return (mddeverror(&shs->mde, MDE_MULTNM,
			    shs->shs_component_old));
		}
		/*
		 * If there is no key for this entry then fail since
		 * a key for this entry should exist.
		 */
		if (num_keys_old == 0) {
			return (mddeverror(&shs->mde, MDE_INVAL_HS,
			    shs->shs_component_old));
		}
		/* Scan the hot spare list again */
		hs = (hot_spare_t *)md_set[setno].s_hs;
		while (hs) {
			/*
			 * Only need to compare keys when hs_devnum is NODEV.
			 */
			if ((hs->hs_devnum == NODEV64) &&
			    (hs->hs_key == key_old)) {
				break;
			}
			hs = hs->hs_next;
		}
	}

	if (hs == NULL) {
		/* create a hot spare record */
		if (shs->shs_size_option & MD_CRO_64BIT) {
#if defined(_ILP32)
			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
			    shs->shs_hot_spare_pool, shs->shs_component_old));
#else
			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
#endif
		} else {
			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
		}

		if (recid < 0) {
			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
			    shs->shs_hot_spare_pool,
			    shs->shs_component_old));
		}

		/* get the addr */
		hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs),
			0);

		hs->hs_record_id = recid;

		hs->hs_devnum = shs->shs_component_old;
		hs->hs_key = shs->shs_key_old;
		hs->hs_start_blk = shs->shs_start_blk;
		hs->hs_has_label = shs->shs_has_label;
		hs->hs_number_blks = shs->shs_number_blks;
		set_hot_spare_state(hs, HSS_AVAILABLE);
		hs->hs_refcount = 0;
		hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
		md_set[setno].s_hs = (void *) hs;
	}

	/* Scan the hot spare pool list */
	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
	prev_hsp = (hot_spare_pool_t *)0;
	while (hsp) {
		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
			break;
		}
		prev_hsp = hsp;
		hsp = hsp->hsp_next;
	}

	if (hsp == NULL) {
		/* create a hot spare pool record */
		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t),
		    typ1, HSP_REC,
		    MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno);

		if (recid < 0) {
			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
			    shs->shs_hot_spare_pool));
		}

		/* get the record addr */
		hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
			sizeof (*hsp), HSP_ONDSK_STR_OFF);

		hsp->hsp_self_id = shs->shs_hot_spare_pool;
		hsp->hsp_record_id = recid;
		hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
		hsp->hsp_refcount = 0;
		hsp->hsp_nhotspares = 0;
		hsp->hsp_revision |= MD_FN_META_DEV;

		/* force prev_hsp to NULL, this will cause hsp to be linked */
		prev_hsp = (hot_spare_pool_t *)0;

		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
		hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
		hsp->hsp_link.ln_setno = setno;
		hsp->hsp_link.ln_id = hsp->hsp_self_id;
		hotspares_md_ops.md_head = &hsp->hsp_link;
		rw_exit(&hotspares_md_ops.md_link_rw.lock);
		hsp_created = 1;
	} else {

		/*
		 * Make sure the hot spare is not already in the pool.
		 */
		for (i = 0; i < hsp->hsp_nhotspares; i++)
			if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
				return (mdhserror(&shs->mde, MDE_HS_INUSE,
					shs->shs_hot_spare_pool,
					hs->hs_devnum));
			}
		/*
		 * Create a new hot spare pool record
		 * This gives us the one extra hs slot,
		 * because there is one slot in the
		 * hot_spare_pool struct
		 */
		new_size = sizeof (hot_spare_pool_ond_t) +
			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);

		/*
		 * The Friendly Name status of the new HSP should duplicate
		 * the status of the existing one.
		 */
		if (hsp->hsp_revision & MD_FN_META_DEV) {
			options =
				MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN;
		} else {
			options = MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL;
		}
		recid = mddb_createrec(new_size, typ1, HSP_REC, options, setno);

		if (recid < 0) {
			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
			    hsp->hsp_self_id));
		}
		new_size = sizeof (hot_spare_pool_t) +
			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);

		/* get the record addr */
		new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
			new_size, HSP_ONDSK_STR_OFF);

		/* copy the old record into the new one */
		bcopy((caddr_t)hsp, (caddr_t)new_hsp,
		    (size_t)((sizeof (hot_spare_pool_t) +
		    (sizeof (mddb_recid_t) * hsp->hsp_nhotspares)
		    - sizeof (mddb_recid_t))));
		new_hsp->hsp_record_id = recid;

		md_rem_link(setno, hsp->hsp_self_id,
		    &hotspares_md_ops.md_link_rw.lock,
		    &hotspares_md_ops.md_head);

		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
		new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
		new_hsp->hsp_link.ln_setno = setno;
		new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id;
		hotspares_md_ops.md_head = &new_hsp->hsp_link;
		rw_exit(&hotspares_md_ops.md_link_rw.lock);

		/* mark the old hsp to be deleted */
		delete_hsp = 1;
		old_hsp = hsp;
		hsp = new_hsp;
	}

	if (shs->shs_size_option & MD_CRO_64BIT) {
		hs->hs_revision |= MD_64BIT_META_DEV;
	} else {
		hs->hs_revision &= ~MD_64BIT_META_DEV;
	}

	/* lock the db records */
	recids[0] = hs->hs_record_id;
	recids[1] = hsp->hsp_record_id;
	irecid = 2;
	if (delete_hsp)
		recids[irecid++] = old_hsp->hsp_record_id;
	recids[irecid] = 0;

	/* increment the reference count */
	hs->hs_refcount++;

	/* add the hs at the end of the hot spare pool */
	hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id;
	hsp->hsp_nhotspares++;

	/*
	 * NOTE: We do not commit the previous hot spare pool record.
	 *	 There is no need, the link gets rebuilt at boot time.
	 */
	if (prev_hsp)
		prev_hsp->hsp_next = hsp;
	else
		md_set[setno].s_hsp = (void *) hsp;

	if (delete_hsp)
		old_hsp->hsp_self_id = MD_HSP_NONE;

	/* commit the db records */
	mddb_commitrecs_wrapper(recids);

	if (delete_hsp) {
		/* delete the old hot spare pool record */
		mddb_deleterec_wrapper(old_hsp->hsp_record_id);
	}

	if (hsp_created) {
		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
		    md_expldev(hsp->hsp_self_id));
	}
	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno,
	    md_expldev(hsp->hsp_self_id));

	return (0);
}