Exemple #1
0
int Build(char *mddev, struct mddev_dev *devlist,
	  struct shape *s, struct context *c)
{
	/* Build a linear or raid0 arrays without superblocks
	 * We cannot really do any checks, we just do it.
	 * For md_version < 0.90.0, we call REGISTER_DEV
	 * with the device numbers, and then
	 * START_MD giving the "geometry"
	 * geometry is 0xpp00cc
	 * where pp is personality: 1==linear, 2=raid0
	 * cc = chunk size factor: 0==4k, 1==8k etc.
	 *
	 * For md_version >= 0.90.0 we call
	 * SET_ARRAY_INFO,  ADD_NEW_DISK, RUN_ARRAY
	 *
	 */
	int i;
	int vers;
	struct stat stb;
	int subdevs = 0, missing_disks = 0;
	struct mddev_dev *dv;
	int bitmap_fd;
	unsigned long long bitmapsize;
	int mdfd;
	char chosen_name[1024];
	int uuid[4] = {0,0,0,0};
	struct map_ent *map = NULL;

	/* scan all devices, make sure they really are block devices */
	for (dv = devlist; dv; dv=dv->next) {
		subdevs++;
		if (strcmp("missing", dv->devname) == 0) {
			missing_disks++;
			continue;
		}
		if (stat(dv->devname, &stb)) {
			pr_err("Cannot find %s: %s\n",
				dv->devname, strerror(errno));
			return 1;
		}
		if ((stb.st_mode & S_IFMT) != S_IFBLK) {
			pr_err("%s is not a block device.\n",
				dv->devname);
			return 1;
		}
	}

	if (s->raiddisks != subdevs) {
		pr_err("requested %d devices in array but listed %d\n",
			s->raiddisks, subdevs);
		return 1;
	}

	if (s->layout == UnSet)
		switch(s->level) {
		default: /* no layout */
			s->layout = 0;
			break;
		case 10:
			s->layout = 0x102; /* near=2, far=1 */
			if (c->verbose > 0)
				pr_err("layout defaults to n1\n");
			break;
		case 5:
		case 6:
			s->layout = map_name(r5layout, "default");
			if (c->verbose > 0)
				pr_err("layout defaults to %s\n", map_num(r5layout, s->layout));
			break;
		case LEVEL_FAULTY:
			s->layout = map_name(faultylayout, "default");

			if (c->verbose > 0)
				pr_err("layout defaults to %s\n", map_num(faultylayout, s->layout));
			break;
		}

	/* We need to create the device.  It can have no name. */
	map_lock(&map);
	mdfd = create_mddev(mddev, NULL, c->autof, LOCAL,
			    chosen_name);
	if (mdfd < 0) {
		map_unlock(&map);
		return 1;
	}
	mddev = chosen_name;

	map_update(&map, fd2devnm(mdfd), "none", uuid, chosen_name);
	map_unlock(&map);

	vers = md_get_version(mdfd);

	/* looks Ok, go for it */
	if (vers >= 9000) {
		mdu_array_info_t array;
		array.level = s->level;
		if (s->size == MAX_SIZE)
			s->size = 0;
		array.size = s->size;
		array.nr_disks = s->raiddisks;
		array.raid_disks = s->raiddisks;
		array.md_minor = 0;
		if (fstat(mdfd, &stb)==0)
			array.md_minor = minor(stb.st_rdev);
		array.not_persistent = 1;
		array.state = 0; /* not clean, but no errors */
		if (s->assume_clean)
			array.state |= 1;
		array.active_disks = s->raiddisks - missing_disks;
		array.working_disks = s->raiddisks - missing_disks;
		array.spare_disks = 0;
		array.failed_disks = missing_disks;
		if (s->chunk == 0 && (s->level==0 || s->level==LEVEL_LINEAR))
			s->chunk = 64;
		array.chunk_size = s->chunk*1024;
		array.layout = s->layout;
		if (ioctl(mdfd, SET_ARRAY_INFO, &array)) {
			pr_err("SET_ARRAY_INFO failed for %s: %s\n",
				mddev, strerror(errno));
			goto abort;
		}
	} else if (s->bitmap_file) {
		pr_err("bitmaps not supported with this kernel\n");
		goto abort;
	}

	if (s->bitmap_file && strcmp(s->bitmap_file, "none") == 0)
		s->bitmap_file = NULL;
	if (s->bitmap_file && s->level <= 0) {
		pr_err("bitmaps not meaningful with level %s\n",
			map_num(pers, s->level)?:"given");
		goto abort;
	}
Exemple #2
0
/*
 * btt_write -- write a block to a btt namespace
 *
 * Returns 0 on success, otherwise -1/errno.
 */
int
btt_write(struct btt *bttp, int lane, uint64_t lba, const void *buf)
{
	LOG(3, "bttp %p lane %u lba %zu", bttp, lane, lba);

	if (invalid_lba(bttp, lba))
		return -1;

	/* first write through here will initialize the metadata layout */
	if (!bttp->laidout) {
		int err = 0;

		pthread_mutex_lock(&bttp->layout_write_mutex);
		if (!bttp->laidout)
			err = write_layout(bttp, lane, 1);
		pthread_mutex_unlock(&bttp->layout_write_mutex);

		if (err < 0)
			return err;
	}

	/* find which arena LBA lives in, and the offset to the map entry */
	struct arena *arenap;
	uint32_t premap_lba;
	if (lba_to_arena_lba(bttp, lba, &arenap, &premap_lba) < 0)
		return -1;

	/* if the arena is in an error state, writing is not allowed */
	if (arenap->flags & BTTINFO_FLAG_ERROR_MASK) {
		LOG(1, "EIO due to btt_info error flags 0x%x",
			arenap->flags & BTTINFO_FLAG_ERROR_MASK);
		errno = EIO;
		return -1;
	}

	/*
	 * This routine was passed a unique "lane" which is an index
	 * into the flog.  That means the free block held by flog[lane]
	 * is assigned to this thread and to no other threads (no additional
	 * locking required).  So start by performing the write to the
	 * free block.  It is only safe to write to a free block if it
	 * doesn't appear in the read tracking table, so scan that first
	 * and if found, wait for the thread reading from it to finish.
	 */
	uint32_t free_entry =
		arenap->flogs[lane].flog.old_map & BTT_MAP_ENTRY_LBA_MASK;

	LOG(3, "free_entry %u (before mask %u)", free_entry,
				arenap->flogs[lane].flog.old_map);

	/* wait for other threads to finish any reads on free block */
	for (int i = 0; i < bttp->nlane; i++)
		while (arenap->rtt[i] == free_entry)
			;

	/* it is now safe to perform write to the free block */
	off_t data_block_off =
			arenap->dataoff + free_entry * arenap->internal_lbasize;
	if ((*bttp->ns_cbp->nswrite)(bttp->ns, lane, buf,
				bttp->lbasize, data_block_off) < 0)
		return -1;

	/*
	 * Make the new block active atomically by updating the on-media flog
	 * and then updating the map.
	 */
	uint32_t old_entry;
	if (map_lock(bttp, lane, arenap, &old_entry, premap_lba) < 0)
		return -1;

	old_entry = le32toh(old_entry);

	/* update the flog */
	if (flog_update(bttp, lane, arenap, premap_lba,
					old_entry, free_entry) < 0) {
		map_abort(bttp, lane, arenap, premap_lba);
		return -1;
	}

	if (map_unlock(bttp, lane, arenap, htole32(free_entry),
					premap_lba) < 0) {
		/* XXX retry? revert the flog? */
		return -1;
	}

	return 0;
}
Exemple #3
0
/*
 * map_entry_setf -- (internal) set a given flag on a map entry
 *
 * Returns 0 on success, otherwise -1/errno.
 */
static int
map_entry_setf(struct btt *bttp, int lane, uint64_t lba, uint32_t setf)
{
	LOG(3, "bttp %p lane %u lba %zu setf 0x%x", bttp, lane, lba, setf);

	if (invalid_lba(bttp, lba))
		return -1;

	if (!bttp->laidout) {
		/*
		 * No layout is written yet.  If the flag being set
		 * is the zero flag, it is superfluous since all blocks
		 * read as zero at this point.
		 */
		if (setf == BTT_MAP_ENTRY_ZERO)
			return 0;

		/*
		 * Treat this like the first write and write out
		 * the metadata layout at this point.
		 */
		int err = 0;
		pthread_mutex_lock(&bttp->layout_write_mutex);
		if (!bttp->laidout)
			err = write_layout(bttp, lane, 1);
		pthread_mutex_unlock(&bttp->layout_write_mutex);

		if (err < 0)
			return err;
	}

	/* find which arena LBA lives in, and the offset to the map entry */
	struct arena *arenap;
	uint32_t premap_lba;
	if (lba_to_arena_lba(bttp, lba, &arenap, &premap_lba) < 0)
		return -1;

	/* if the arena is in an error state, writing is not allowed */
	if (arenap->flags & BTTINFO_FLAG_ERROR_MASK) {
		LOG(1, "EIO due to btt_info error flags 0x%x",
			arenap->flags & BTTINFO_FLAG_ERROR_MASK);
		errno = EIO;
		return -1;
	}

	/*
	 * Set the flags in the map entry.  To do this, read the
	 * current map entry, set the flags, and write out the update.
	 */
	uint32_t old_entry;
	uint32_t new_entry;

	if (map_lock(bttp, lane, arenap, &old_entry, premap_lba) < 0)
		return -1;

	old_entry = le32toh(old_entry);

	if (setf == BTT_MAP_ENTRY_ZERO && (old_entry & BTT_MAP_ENTRY_ZERO)) {
		map_abort(bttp, lane, arenap, premap_lba);
		return 0;	/* block already zero, nothing to do */
	}

	/* create the new map entry */
	new_entry = old_entry | setf;

	if (map_unlock(bttp, lane, arenap, htole32(new_entry), premap_lba) < 0)
		return -1;

	return 0;
}
Exemple #4
0
int Manage_runstop(char *devname, int fd, int runstop, int quiet)
{
	/* Run or stop the array. array must already be configured
	 * required >= 0.90.0
	 * Only print failure messages if quiet == 0;
	 * quiet > 0 means really be quiet
	 * quiet < 0 means we will try again if it fails.
	 */
	mdu_param_t param; /* unused */

	if (runstop == -1 && md_get_version(fd) < 9000) {
		if (ioctl(fd, STOP_MD, 0)) {
			if (quiet == 0) fprintf(stderr,
						Name ": stopping device %s "
						"failed: %s\n",
						devname, strerror(errno));
			return 1;
		}
	}

	if (md_get_version(fd) < 9000) {
		fprintf(stderr, Name ": need md driver version 0.90.0 or later\n");
		return 1;
	}
	/*
	if (ioctl(fd, GET_ARRAY_INFO, &array)) {
		fprintf(stderr, Name ": %s does not appear to be active.\n",
			devname);
		return 1;
	}
	*/
	if (runstop>0) {
		if (ioctl(fd, RUN_ARRAY, &param)) {
			fprintf(stderr, Name ": failed to run array %s: %s\n",
				devname, strerror(errno));
			return 1;
		}
		if (quiet <= 0)
			fprintf(stderr, Name ": started %s\n", devname);
	} else if (runstop < 0){
		struct map_ent *map = NULL;
		struct stat stb;
		struct mdinfo *mdi;
		int devnum;
		int err;
		int count;
		/* If this is an mdmon managed array, just write 'inactive'
		 * to the array state and let mdmon clear up.
		 */
		devnum = fd2devnum(fd);
		/* Get EXCL access first.  If this fails, then attempting
		 * to stop is probably a bad idea.
		 */
		close(fd);
		fd = open(devname, O_RDONLY|O_EXCL);
		if (fd < 0 || fd2devnum(fd) != devnum) {
			if (fd >= 0)
				close(fd);
			fprintf(stderr,
				Name ": Cannot get exclusive access to %s:"
				"Perhaps a running "
				"process, mounted filesystem "
				"or active volume group?\n",
				devname);
			return 1;
		}
		mdi = sysfs_read(fd, -1, GET_LEVEL|GET_VERSION);
		if (mdi &&
		    mdi->array.level > 0 &&
		    is_subarray(mdi->text_version)) {
			int err;
			/* This is mdmon managed. */
			close(fd);

			count = 25;
			while (count &&
			       (err = sysfs_set_str(mdi, NULL,
						    "array_state",
						    "inactive")) < 0
			       && errno == EBUSY) {
				usleep(200000);
				count--;
			}
			if (err && !quiet) {
				fprintf(stderr, Name
					": failed to stop array %s: %s\n",
					devname, strerror(errno));
				return 1;
			}

			/* Give monitor a chance to act */
			ping_monitor(mdi->text_version);

			fd = open_dev_excl(devnum);
			if (fd < 0) {
				fprintf(stderr, Name
					": failed to completely stop %s"
					": Device is busy\n",
					devname);
				return 1;
			}
		} else if (mdi &&
			   mdi->array.major_version == -1 &&
			   mdi->array.minor_version == -2 &&
			   !is_subarray(mdi->text_version)) {
			struct mdstat_ent *mds, *m;
			/* container, possibly mdmon-managed.
			 * Make sure mdmon isn't opening it, which
			 * would interfere with the 'stop'
			 */
			ping_monitor(mdi->sys_name);

			/* now check that there are no existing arrays
			 * which are members of this array
			 */
			mds = mdstat_read(0, 0);
			for (m=mds; m; m=m->next)
				if (m->metadata_version &&
				    strncmp(m->metadata_version, "external:", 9)==0 &&
				    is_subarray(m->metadata_version+9) &&
				    devname2devnum(m->metadata_version+10) == devnum) {
					if (!quiet)
						fprintf(stderr, Name
							": Cannot stop container %s: "
							"member %s still active\n",
							devname, m->dev);
					free_mdstat(mds);
					if (mdi)
						sysfs_free(mdi);
					return 1;
				}
		}

		/* As we have an O_EXCL open, any use of the device
		 * which blocks STOP_ARRAY is probably a transient use,
		 * so it is reasonable to retry for a while - 5 seconds.
		 */
		count = 25; err = 0;
		while (count && fd >= 0
		       && (err = ioctl(fd, STOP_ARRAY, NULL)) < 0
		       && errno == EBUSY) {
			usleep(200000);
			count --;
		}
		if (fd >= 0 && err) {
			if (quiet == 0) {
				fprintf(stderr, Name
					": failed to stop array %s: %s\n",
					devname, strerror(errno));
				if (errno == EBUSY)
					fprintf(stderr, "Perhaps a running "
						"process, mounted filesystem "
						"or active volume group?\n");
			}
			if (mdi)
				sysfs_free(mdi);
			return 1;
		}
		/* prior to 2.6.28, KOBJ_CHANGE was not sent when an md array
		 * was stopped, so We'll do it here just to be sure.  Drop any
		 * partitions as well...
		 */
		if (fd >= 0)
			ioctl(fd, BLKRRPART, 0);
		if (mdi)
			sysfs_uevent(mdi, "change");

		
		if (devnum != NoMdDev &&
		    (stat("/dev/.udev", &stb) != 0 ||
		     check_env("MDADM_NO_UDEV"))) {
			struct map_ent *mp = map_by_devnum(&map, devnum);
			remove_devices(devnum, mp ? mp->path : NULL);
		}


		if (quiet <= 0)
			fprintf(stderr, Name ": stopped %s\n", devname);
		map_lock(&map);
		map_remove(&map, devnum);
		map_unlock(&map);
	}
	return 0;
}