예제 #1
0
/*
 * Attempt to read a disk label from a device.  
 *
 * Returns NULL on sucess, and an error string on failure
 */
static const char *
l32_readdisklabel(cdev_t dev, struct diskslice *sp, disklabel_t *lpp,
		struct disk_info *info)
{
	disklabel_t lpx;
	struct buf *bp;
	struct disklabel32 *dlp;
	const char *msg = NULL;
	int secsize = info->d_media_blksize;

	bp = geteblk(secsize);
	bp->b_bio1.bio_offset = (off_t)LABELSECTOR32 * secsize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = secsize;
	bp->b_flags &= ~B_INVAL;
	bp->b_cmd = BUF_CMD_READ;
	dev_dstrategy(dev, &bp->b_bio1);
	if (biowait(&bp->b_bio1, "labrd"))
		msg = "I/O error";
	else for (dlp = (struct disklabel32 *)bp->b_data;
	    dlp <= (struct disklabel32 *)((char *)bp->b_data +
	    secsize - sizeof(*dlp));
	    dlp = (struct disklabel32 *)((char *)dlp + sizeof(long))) {
		if (dlp->d_magic != DISKMAGIC32 ||
		    dlp->d_magic2 != DISKMAGIC32) {
			/*
			 * NOTE! dsreadandsetlabel() does a strcmp() on
			 * this string.
			 */
			if (msg == NULL) 
				msg = "no disk label";
		} else if (dlp->d_npartitions > MAXPARTITIONS32 ||
			   dkcksum32(dlp) != 0) {
			msg = "disk label corrupted";
		} else {
			lpx.lab32 = dlp;
			msg = l32_fixlabel(NULL, sp, lpx, FALSE);
			if (msg == NULL) {
				(*lpp).lab32 = kmalloc(sizeof(*dlp),
						       M_DEVBUF, M_WAITOK|M_ZERO);
				*(*lpp).lab32 = *dlp;
			}
			break;
		}
	}
	bp->b_flags |= B_INVAL | B_AGE;
	brelse(bp);
	return (msg);
}
예제 #2
0
/*
 * spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length)
 */
static int
devfs_spec_freeblks(struct vop_freeblks_args *ap)
{
	struct buf *bp;

	/*
	 * XXX: This assumes that strategy does the deed right away.
	 * XXX: this may not be TRTTD.
	 */
	KKASSERT(ap->a_vp->v_rdev != NULL);
	if ((ap->a_vp->v_rdev->si_flags & SI_CANFREE) == 0)
		return (0);
	bp = geteblk(ap->a_length);
	bp->b_cmd = BUF_CMD_FREEBLKS;
	bp->b_bio1.bio_offset = ap->a_offset;
	bp->b_bcount = ap->a_length;
	dev_dstrategy(ap->a_vp->v_rdev, &bp->b_bio1);
	return (0);
}
예제 #3
0
int
mbrinit(cdev_t dev, struct disk_info *info, struct diskslices **sspp)
{
	struct buf *bp;
	u_char	*cp;
	int	dospart;
	struct dos_partition *dp;
	struct dos_partition *dp0;
	struct dos_partition dpcopy[NDOSPART];
	int	error;
	int	max_ncyls;
	int	max_nsectors;
	int	max_ntracks;
	u_int64_t mbr_offset;
	char	partname[2];
	u_long	secpercyl;
	char	*sname = "tempname";
	struct diskslice *sp;
	struct diskslices *ssp;
	cdev_t wdev;

	mbr_offset = DOSBBSECTOR;
reread_mbr:
	/*
	 * Don't bother if the block size is weird or the
	 * media size is 0 (probably means no media present).
	 */
	if (info->d_media_blksize & DEV_BMASK)
		return (EIO);
	if (info->d_media_size == 0)
		return (EIO);

	/*
	 * Read master boot record.
	 */
	wdev = dev;
	bp = geteblk((int)info->d_media_blksize);
	bp->b_bio1.bio_offset = (off_t)mbr_offset * info->d_media_blksize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = info->d_media_blksize;
	bp->b_cmd = BUF_CMD_READ;
	bp->b_flags |= B_FAILONDIS;
	dev_dstrategy(wdev, &bp->b_bio1);
	if (biowait(&bp->b_bio1, "mbrrd") != 0) {
		if ((info->d_dsflags & DSO_MBRQUIET) == 0) {
			diskerr(&bp->b_bio1, wdev,
				"reading primary partition table: error",
				LOG_PRINTF, 0);
			kprintf("\n");
		}
		error = EIO;
		goto done;
	}

	/* Weakly verify it. */
	cp = bp->b_data;
	sname = dsname(dev, 0, 0, 0, NULL);
	if (cp[0x1FE] != 0x55 || cp[0x1FF] != 0xAA) {
		if (bootverbose)
			kprintf("%s: invalid primary partition table: no magic\n",
			       sname);
		error = EINVAL;
		goto done;
	}

	/* Make a copy of the partition table to avoid alignment problems. */
	memcpy(&dpcopy[0], cp + DOSPARTOFF, sizeof(dpcopy));

	dp0 = &dpcopy[0];

	/*
	 * Check for "Ontrack Diskmanager" or GPT.  If a GPT is found in
	 * the first dos partition, ignore the rest of the MBR and go
	 * to GPT processing.
	 */
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
		if (dospart == 0 &&
		    (dp->dp_typ == DOSPTYP_PMBR || dp->dp_typ == DOSPTYP_GPT)) {
			if (bootverbose)
				kprintf(
	    "%s: Found GPT in slice #%d\n", sname, dospart + 1);
			error = gptinit(dev, info, sspp);
			goto done;
		}

		if (dp->dp_typ == DOSPTYP_ONTRACK) {
			if (bootverbose)
				kprintf(
	    "%s: Found \"Ontrack Disk Manager\" on this disk.\n", sname);
			bp->b_flags |= B_INVAL | B_AGE;
			brelse(bp);
			mbr_offset = 63;
			goto reread_mbr;
		}
	}

	if (bcmp(dp0, historical_bogus_partition_table,
		 sizeof historical_bogus_partition_table) == 0 ||
	    bcmp(dp0, historical_bogus_partition_table_fixed,
		 sizeof historical_bogus_partition_table_fixed) == 0) {
#if 0
		TRACE(("%s: invalid primary partition table: historical\n",
		       sname));
#endif /* 0 */
		if (bootverbose)
			kprintf(
     "%s: invalid primary partition table: Dangerously Dedicated (ignored)\n",
			       sname);
		error = EINVAL;
		goto done;
	}

	/* Guess the geometry. */
	/*
	 * TODO:
	 * Perhaps skip entries with 0 size.
	 * Perhaps only look at entries of type DOSPTYP_386BSD.
	 */
	max_ncyls = 0;
	max_nsectors = 0;
	max_ntracks = 0;
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
		int	ncyls;
		int	nsectors;
		int	ntracks;

		ncyls = DPCYL(dp->dp_ecyl, dp->dp_esect) + 1;
		if (max_ncyls < ncyls)
			max_ncyls = ncyls;
		nsectors = DPSECT(dp->dp_esect);
		if (max_nsectors < nsectors)
			max_nsectors = nsectors;
		ntracks = dp->dp_ehd + 1;
		if (max_ntracks < ntracks)
			max_ntracks = ntracks;
	}

	/*
	 * Check that we have guessed the geometry right by checking the
	 * partition entries.
	 */
	/*
	 * TODO:
	 * As above.
	 * Check for overlaps.
	 * Check against d_secperunit if the latter is reliable.
	 */
	error = 0;
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
		if (dp->dp_scyl == 0 && dp->dp_shd == 0 && dp->dp_ssect == 0
		    && dp->dp_start == 0 && dp->dp_size == 0)
			continue;
		//sname = dsname(dev, dkunit(dev), BASE_SLICE + dospart,
		//	       WHOLE_SLICE_PART, partname);

		/*
		 * Temporarily ignore errors from this check.  We could
		 * simplify things by accepting the table eariler if we
		 * always ignore errors here.  Perhaps we should always
		 * accept the table if the magic is right but not let
		 * bad entries affect the geometry.
		 */
		check_part(sname, dp, mbr_offset, max_nsectors, max_ntracks,
			   mbr_offset);
	}
	if (error != 0)
		goto done;

	/*
	 * Accept the DOS partition table.
	 *
	 * Adjust the disk information structure with updated CHS
	 * conversion parameters, but only use values extracted from
	 * the primary partition table.
	 *
	 * NOTE!  Regardless of our having to deal with this old cruft,
	 * we do not screw around with the info->d_media* parameters.
	 */
	secpercyl = (u_long)max_nsectors * max_ntracks;
	if (secpercyl != 0 && mbr_offset == DOSBBSECTOR) {
		info->d_secpertrack = max_nsectors;
		info->d_nheads = max_ntracks;
		info->d_secpercyl = secpercyl;
		info->d_ncylinders = info->d_media_blocks / secpercyl;
	}

	/*
	 * We are passed a pointer to a suitably initialized minimal
	 * slices "struct" with no dangling pointers in it.  Replace it
	 * by a maximal one.  This usually oversizes the "struct", but
	 * enlarging it while searching for logical drives would be
	 * inconvenient.
	 */
	kfree(*sspp, M_DEVBUF);
	ssp = dsmakeslicestruct(MAX_SLICES, info);
	*sspp = ssp;

	/* Initialize normal slices. */
	sp = &ssp->dss_slices[BASE_SLICE];
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++, sp++) {
		sname = dsname(dev, dkunit(dev), BASE_SLICE + dospart,
			       WHOLE_SLICE_PART, partname);
		(void)mbr_setslice(sname, info, sp, dp, mbr_offset);
	}
	ssp->dss_nslices = BASE_SLICE + NDOSPART;

	/* Handle extended partitions. */
	sp -= NDOSPART;
	for (dospart = 0; dospart < NDOSPART; dospart++, sp++) {
		if (sp->ds_type == DOSPTYP_EXTENDED ||
		    sp->ds_type == DOSPTYP_EXTENDEDX) {
			mbr_extended(wdev, info, ssp,
				     sp->ds_offset, sp->ds_size, sp->ds_offset,
				     max_nsectors, max_ntracks, mbr_offset, 1);
		}
	}

	/*
	 * mbr_extended() abuses ssp->dss_nslices for the number of slices
	 * that would be found if there were no limit on the number of slices
	 * in *ssp.  Cut it back now.
	 */
	if (ssp->dss_nslices > MAX_SLICES)
		ssp->dss_nslices = MAX_SLICES;

done:
	bp->b_flags |= B_INVAL | B_AGE;
	brelse(bp);
	if (error == EINVAL)
		error = 0;
	return (error);
}
예제 #4
0
static
void
mbr_extended(cdev_t dev, struct disk_info *info, struct diskslices *ssp,
	    u_int64_t ext_offset, u_int64_t ext_size, u_int64_t base_ext_offset,
	    int nsectors, int ntracks, u_int64_t mbr_offset, int level)
{
	struct buf *bp;
	u_char	*cp;
	int	dospart;
	struct dos_partition *dp;
	struct dos_partition dpcopy[NDOSPART];
	u_int64_t ext_offsets[NDOSPART];
	u_int64_t ext_sizes[NDOSPART];
	char	partname[2];
	int	slice;
	char	*sname;
	struct diskslice *sp;

	if (level >= 16) {
		kprintf(
	"%s: excessive recursion in search for slices; aborting search\n",
		       devtoname(dev));
		return;
	}

	/* Read extended boot record. */
	bp = geteblk((int)info->d_media_blksize);
	bp->b_bio1.bio_offset = (off_t)ext_offset * info->d_media_blksize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = info->d_media_blksize;
	bp->b_cmd = BUF_CMD_READ;
	bp->b_flags |= B_FAILONDIS;
	dev_dstrategy(dev, &bp->b_bio1);
	if (biowait(&bp->b_bio1, "mbrrd") != 0) {
		diskerr(&bp->b_bio1, dev,
			"reading extended partition table: error",
			LOG_PRINTF, 0);
		kprintf("\n");
		goto done;
	}

	/* Weakly verify it. */
	cp = bp->b_data;
	if (cp[0x1FE] != 0x55 || cp[0x1FF] != 0xAA) {
		sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE, WHOLE_SLICE_PART,
			       partname);
		if (bootverbose)
			kprintf("%s: invalid extended partition table: no magic\n",
			       sname);
		goto done;
	}

	/* Make a copy of the partition table to avoid alignment problems. */
	memcpy(&dpcopy[0], cp + DOSPARTOFF, sizeof(dpcopy));

	slice = ssp->dss_nslices;
	for (dospart = 0, dp = &dpcopy[0]; dospart < NDOSPART;
	    dospart++, dp++) {
		ext_sizes[dospart] = 0;
		if (dp->dp_scyl == 0 && dp->dp_shd == 0 && dp->dp_ssect == 0
		    && dp->dp_start == 0 && dp->dp_size == 0)
			continue;
		if (dp->dp_typ == DOSPTYP_EXTENDED ||
		    dp->dp_typ == DOSPTYP_EXTENDEDX) {
			static char buf[32];

			sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE,
				       WHOLE_SLICE_PART, partname);
			ksnprintf(buf, sizeof(buf), "%s", sname);
			if (strlen(buf) < sizeof buf - 11)
				strcat(buf, "<extended>");
			check_part(buf, dp, base_ext_offset, nsectors,
				   ntracks, mbr_offset);
			ext_offsets[dospart] = base_ext_offset + dp->dp_start;
			ext_sizes[dospart] = dp->dp_size;
		} else {
			sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART,
				       partname);
			check_part(sname, dp, ext_offset, nsectors, ntracks,
				   mbr_offset);
			if (slice >= MAX_SLICES) {
				kprintf("%s: too many slices\n", sname);
				slice++;
				continue;
			}
			sp = &ssp->dss_slices[slice];
			if (mbr_setslice(sname, info, sp, dp, ext_offset) != 0)
				continue;
			slice++;
		}
	}
	ssp->dss_nslices = slice;

	/* If we found any more slices, recursively find all the subslices. */
	for (dospart = 0; dospart < NDOSPART; dospart++) {
		if (ext_sizes[dospart] != 0) {
			mbr_extended(dev, info, ssp, ext_offsets[dospart],
				     ext_sizes[dospart], base_ext_offset,
				     nsectors, ntracks, mbr_offset, ++level);
		}
	}

done:
	bp->b_flags |= B_INVAL | B_AGE;
	brelse(bp);
}
예제 #5
0
/*
 * Handle GPT on raw disk.  Note that GPTs are not recursive.  The MBR is
 * ignored once a GPT has been detected.
 *
 * GPTs always start at block #1, regardless of how the MBR has been set up.
 * In fact, the MBR's starting block might be pointing to the boot partition
 * in the GPT rather then to the start of the GPT.
 *
 * This routine is called from mbrinit() when a GPT has been detected.
 */
int
gptinit(cdev_t dev, struct disk_info *info, struct diskslices **sspp)
{
	struct buf *bp1 = NULL;
	struct buf *bp2 = NULL;
	struct gpt_hdr *gpt;
	struct gpt_ent *ent;
	struct diskslice *sp;
	struct diskslices *ssp;
	cdev_t wdev;
	int error;
	uint32_t len;
	uint32_t entries;
	uint32_t entsz;
	uint32_t crc;
	uint32_t table_lba;
	uint32_t table_blocks;
	int i = 0, j;
	const char *dname;

	/*
	 * The GPT starts in sector 1.
	 */
	wdev = dev;
	dname = dev_dname(wdev);
	bp1 = geteblk((int)info->d_media_blksize);
	bp1->b_bio1.bio_offset = info->d_media_blksize;
	bp1->b_bio1.bio_done = biodone_sync;
	bp1->b_bio1.bio_flags |= BIO_SYNC;
	bp1->b_bcount = info->d_media_blksize;
	bp1->b_cmd = BUF_CMD_READ;
	dev_dstrategy(wdev, &bp1->b_bio1);
	if (biowait(&bp1->b_bio1, "gptrd") != 0) {
		kprintf("%s: reading GPT @ block 1: error %d\n",
			dname, bp1->b_error);
		error = EIO;
		goto done;
	}

	/*
	 * Header sanity check
	 */
	gpt = (void *)bp1->b_data;
	len = le32toh(gpt->hdr_size);
	if (len < GPT_MIN_HDR_SIZE || len > info->d_media_blksize) {
		kprintf("%s: Illegal GPT header size %d\n", dname, len);
		error = EINVAL;
		goto done;
	}

	crc = le32toh(gpt->hdr_crc_self);
	gpt->hdr_crc_self = 0;
	if (crc32(gpt, len) != crc) {
		kprintf("%s: GPT CRC32 did not match\n", dname);
		error = EINVAL;
		goto done;
	}

	/*
	 * Validate the partition table and its location, then read it
	 * into a buffer.
	 */
	entries = le32toh(gpt->hdr_entries);
	entsz = le32toh(gpt->hdr_entsz);
	table_lba = le32toh(gpt->hdr_lba_table);
	table_blocks = (entries * entsz + info->d_media_blksize - 1) /
		       info->d_media_blksize;
	if (entries < 1 || entries > 128 ||
	    entsz < 128 || (entsz & 7) || entsz > MAXBSIZE / entries ||
	    table_lba < 2 || table_lba + table_blocks > info->d_media_blocks) {
		kprintf("%s: GPT partition table is out of bounds\n", dname);
		error = EINVAL;
		goto done;
	}

	/*
	 * XXX subject to device dma size limitations
	 */
	bp2 = geteblk((int)(table_blocks * info->d_media_blksize));
	bp2->b_bio1.bio_offset = (off_t)table_lba * info->d_media_blksize;
	bp2->b_bio1.bio_done = biodone_sync;
	bp2->b_bio1.bio_flags |= BIO_SYNC;
	bp2->b_bcount = table_blocks * info->d_media_blksize;
	bp2->b_cmd = BUF_CMD_READ;
	dev_dstrategy(wdev, &bp2->b_bio1);
	if (biowait(&bp2->b_bio1, "gptrd") != 0) {
		kprintf("%s: reading GPT partition table @ %lld: error %d\n",
			dname,
			(long long)bp2->b_bio1.bio_offset,
			bp2->b_error);
		error = EIO;
		goto done;
	}

	/*
	 * We are passed a pointer to a minimal slices struct.  Replace
	 * it with a maximal one (128 slices + special slices).  Well,
	 * really there is only one special slice (the WHOLE_DISK_SLICE)
	 * since we use the compatibility slice for s0, but don't quibble.
	 * 
	 */
	kfree(*sspp, M_DEVBUF);
	ssp = *sspp = dsmakeslicestruct(BASE_SLICE+128, info);

	/*
	 * Create a slice for each partition.
	 */
	for (i = 0; i < (int)entries && i < 128; ++i) {
		struct gpt_ent sent;
		char partname[2];
		char *sname;

		ent = (void *)((char *)bp2->b_data + i * entsz);
		le_uuid_dec(&ent->ent_type, &sent.ent_type);
		le_uuid_dec(&ent->ent_uuid, &sent.ent_uuid);
		sent.ent_lba_start = le64toh(ent->ent_lba_start);
		sent.ent_lba_end = le64toh(ent->ent_lba_end);
		sent.ent_attr = le64toh(ent->ent_attr);

		for (j = 0; j < NELEM(ent->ent_name); ++j)
			sent.ent_name[j] = le16toh(ent->ent_name[j]);

		/*
		 * The COMPATIBILITY_SLICE is actually slice 0 (s0).  This
		 * is a bit weird becaue the whole-disk slice is #1, so
		 * slice 1 (s1) starts at BASE_SLICE.
		 */
		if (i == 0)
			sp = &ssp->dss_slices[COMPATIBILITY_SLICE];
		else
			sp = &ssp->dss_slices[BASE_SLICE+i-1];
		sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE,
			       WHOLE_SLICE_PART, partname);

		if (kuuid_is_nil(&sent.ent_type))
			continue;

		if (sent.ent_lba_start < table_lba + table_blocks ||
		    sent.ent_lba_end >= info->d_media_blocks ||
		    sent.ent_lba_start >= sent.ent_lba_end) {
			kprintf("%s part %d: unavailable, bad start or "
				"ending lba\n",
				sname, i);
		} else {
			gpt_setslice(sname, info, sp, &sent);
		}
	}
	ssp->dss_nslices = BASE_SLICE + i;

	error = 0;
done:
	if (bp1) {
		bp1->b_flags |= B_INVAL | B_AGE;
		brelse(bp1);
	}
	if (bp2) {
		bp2->b_flags |= B_INVAL | B_AGE;
		brelse(bp2);
	}
	if (error == EINVAL)
		error = 0;
	return (error);
}
예제 #6
0
/*
 * Write disk label back to device after modification.
 */
static int
l32_writedisklabel(cdev_t dev, struct diskslices *ssp, struct diskslice *sp,
		   disklabel_t lpx)
{
	struct disklabel32 *lp;
	struct disklabel32 *dlp;
	struct buf *bp;
	const char *msg;
	int error = 0;

	lp = lpx.lab32;

	if (lp->d_partitions[RAW_PART].p_offset != 0)
		return (EXDEV);			/* not quite right */

	bp = geteblk((int)lp->d_secsize);
	bp->b_bio1.bio_offset = (off_t)LABELSECTOR32 * lp->d_secsize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = lp->d_secsize;

#if 1
	/*
	 * We read the label first to see if it's there,
	 * in which case we will put ours at the same offset into the block..
	 * (I think this is stupid [Julian])
	 * Note that you can't write a label out over a corrupted label!
	 * (also stupid.. how do you write the first one? by raw writes?)
	 */
	bp->b_flags &= ~B_INVAL;
	bp->b_cmd = BUF_CMD_READ;
	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
	dev_dstrategy(dev, &bp->b_bio1);
	error = biowait(&bp->b_bio1, "labrd");
	if (error)
		goto done;
	for (dlp = (struct disklabel32 *)bp->b_data;
	    dlp <= (struct disklabel32 *)
	      ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
	    dlp = (struct disklabel32 *)((char *)dlp + sizeof(long))) {
		if (dlp->d_magic == DISKMAGIC32 &&
		    dlp->d_magic2 == DISKMAGIC32 && dkcksum32(dlp) == 0) {
			*dlp = *lp;
			lpx.lab32 = dlp;
			msg = l32_fixlabel(NULL, sp, lpx, TRUE);
			if (msg) {
				error = EINVAL;
			} else {
				bp->b_cmd = BUF_CMD_WRITE;
				bp->b_bio1.bio_done = biodone_sync;
				bp->b_bio1.bio_flags |= BIO_SYNC;
				KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
				dev_dstrategy(dev, &bp->b_bio1);
				error = biowait(&bp->b_bio1, "labwr");
			}
			goto done;
		}
	}
	error = ESRCH;
done:
#else
	bzero(bp->b_data, lp->d_secsize);
	dlp = (struct disklabel32 *)bp->b_data;
	*dlp = *lp;
	bp->b_flags &= ~B_INVAL;
	bp->b_cmd = BUF_CMD_WRITE;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	BUF_STRATEGY(bp, 1);
	error = biowait(&bp->b_bio1, "labwr");
#endif
	bp->b_flags |= B_INVAL | B_AGE;
	brelse(bp);
	return (error);
}
예제 #7
0
/*
 * Chunked up transfer completion routine - chain transfers until done
 *
 * NOTE: MPSAFE callback.
 */
static
void
devfs_spec_strategy_done(struct bio *nbio)
{
	struct buf *nbp = nbio->bio_buf;
	struct bio *bio = nbio->bio_caller_info1.ptr;	/* original bio */
	struct buf *bp = bio->bio_buf;			/* original bp */
	int chunksize = nbio->bio_caller_info2.index;	/* chunking */
	int boffset = nbp->b_data - bp->b_data;

	if (nbp->b_flags & B_ERROR) {
		/*
		 * An error terminates the chain, propogate the error back
		 * to the original bp
		 */
		bp->b_flags |= B_ERROR;
		bp->b_error = nbp->b_error;
		bp->b_resid = bp->b_bcount - boffset +
			      (nbp->b_bcount - nbp->b_resid);
#if SPEC_CHAIN_DEBUG & 1
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "spec_strategy: chain %p error %d bcount %d/%d\n",
			    bp, bp->b_error, bp->b_bcount,
			    bp->b_bcount - bp->b_resid);
#endif
	} else if (nbp->b_resid) {
		/*
		 * A short read or write terminates the chain
		 */
		bp->b_error = nbp->b_error;
		bp->b_resid = bp->b_bcount - boffset +
			      (nbp->b_bcount - nbp->b_resid);
#if SPEC_CHAIN_DEBUG & 1
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "spec_strategy: chain %p short read(1) "
			    "bcount %d/%d\n",
			    bp, bp->b_bcount - bp->b_resid, bp->b_bcount);
#endif
	} else if (nbp->b_bcount != nbp->b_bufsize) {
		/*
		 * A short read or write can also occur by truncating b_bcount
		 */
#if SPEC_CHAIN_DEBUG & 1
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "spec_strategy: chain %p short read(2) "
			    "bcount %d/%d\n",
			    bp, nbp->b_bcount + boffset, bp->b_bcount);
#endif
		bp->b_error = 0;
		bp->b_bcount = nbp->b_bcount + boffset;
		bp->b_resid = nbp->b_resid;
	} else if (nbp->b_bcount + boffset == bp->b_bcount) {
		/*
		 * No more data terminates the chain
		 */
#if SPEC_CHAIN_DEBUG & 1
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "spec_strategy: chain %p finished bcount %d\n",
			    bp, bp->b_bcount);
#endif
		bp->b_error = 0;
		bp->b_resid = 0;
	} else {
		/*
		 * Continue the chain
		 */
		boffset += nbp->b_bcount;
		nbp->b_data = bp->b_data + boffset;
		nbp->b_bcount = bp->b_bcount - boffset;
		if (nbp->b_bcount > chunksize)
			nbp->b_bcount = chunksize;
		nbp->b_bio1.bio_done = devfs_spec_strategy_done;
		nbp->b_bio1.bio_offset = bio->bio_offset + boffset;

#if SPEC_CHAIN_DEBUG & 1
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "spec_strategy: chain %p offset %d/%d bcount %d\n",
			    bp, boffset, bp->b_bcount, nbp->b_bcount);
#endif

		dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1);
		return;
	}

	/*
	 * Fall through to here on termination.  biodone(bp) and
	 * clean up and free nbp.
	 */
	biodone(bio);
	BUF_UNLOCK(nbp);
	uninitbufbio(nbp);
	kfree(nbp, M_DEVBUF);
}
예제 #8
0
/*
 * Convert a vnode strategy call into a device strategy call.  Vnode strategy
 * calls are not limited to device DMA limits so we have to deal with the
 * case.
 *
 * spec_strategy(struct vnode *a_vp, struct bio *a_bio)
 */
static int
devfs_spec_strategy(struct vop_strategy_args *ap)
{
	struct bio *bio = ap->a_bio;
	struct buf *bp = bio->bio_buf;
	struct buf *nbp;
	struct vnode *vp;
	struct mount *mp;
	int chunksize;
	int maxiosize;

	if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL)
		buf_start(bp);

	/*
	 * Collect statistics on synchronous and asynchronous read
	 * and write counts for disks that have associated filesystems.
	 */
	vp = ap->a_vp;
	KKASSERT(vp->v_rdev != NULL);	/* XXX */
	if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) {
		if (bp->b_cmd == BUF_CMD_READ) {
			if (bp->b_flags & BIO_SYNC)
				mp->mnt_stat.f_syncreads++;
			else
				mp->mnt_stat.f_asyncreads++;
		} else {
			if (bp->b_flags & BIO_SYNC)
				mp->mnt_stat.f_syncwrites++;
			else
				mp->mnt_stat.f_asyncwrites++;
		}
	}

        /*
         * Device iosize limitations only apply to read and write.  Shortcut
         * the I/O if it fits.
         */
	if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) {
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "%s: si_iosize_max not set!\n",
			    dev_dname(vp->v_rdev));
		maxiosize = MAXPHYS;
	}
#if SPEC_CHAIN_DEBUG & 2
	maxiosize = 4096;
#endif
        if (bp->b_bcount <= maxiosize ||
            (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) {
                dev_dstrategy_chain(vp->v_rdev, bio);
                return (0);
        }

	/*
	 * Clone the buffer and set up an I/O chain to chunk up the I/O.
	 */
	nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO);
	initbufbio(nbp);
	buf_dep_init(nbp);
	BUF_LOCK(nbp, LK_EXCLUSIVE);
	BUF_KERNPROC(nbp);
	nbp->b_vp = vp;
	nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP);
	nbp->b_data = bp->b_data;
	nbp->b_bio1.bio_done = devfs_spec_strategy_done;
	nbp->b_bio1.bio_offset = bio->bio_offset;
	nbp->b_bio1.bio_caller_info1.ptr = bio;

	/*
	 * Start the first transfer
	 */
	if (vn_isdisk(vp, NULL))
		chunksize = vp->v_rdev->si_bsize_phys;
	else
		chunksize = DEV_BSIZE;
	chunksize = maxiosize / chunksize * chunksize;
#if SPEC_CHAIN_DEBUG & 1
	devfs_debug(DEVFS_DEBUG_DEBUG,
		    "spec_strategy chained I/O chunksize=%d\n",
		    chunksize);
#endif
	nbp->b_cmd = bp->b_cmd;
	nbp->b_bcount = chunksize;
	nbp->b_bufsize = chunksize;	/* used to detect a short I/O */
	nbp->b_bio1.bio_caller_info2.index = chunksize;

#if SPEC_CHAIN_DEBUG & 1
	devfs_debug(DEVFS_DEBUG_DEBUG,
		    "spec_strategy: chain %p offset %d/%d bcount %d\n",
		    bp, 0, bp->b_bcount, nbp->b_bcount);
#endif

	dev_dstrategy(vp->v_rdev, &nbp->b_bio1);

	if (DEVFS_NODE(vp)) {
		nanotime(&DEVFS_NODE(vp)->atime);
		nanotime(&DEVFS_NODE(vp)->mtime);
	}

	return (0);
}