Exemple #1
0
static void
vdev_disk_io_start(zio_t *zio)
{
	vdev_t *vd = zio->io_vd;
	vdev_disk_t *dvd = vd->vdev_tsd;
	struct buf *bp;
	vfs_context_t context;
	int flags, error = 0;

	/*
	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
	 * Nothing to be done here but return failure.
	 */
	if (dvd == NULL || (dvd->vd_offline) || dvd->vd_devvp == NULL) {
		zio->io_error = ENXIO;
		zio_interrupt(zio);
		return;
	}

	switch (zio->io_type) {
		case ZIO_TYPE_IOCTL:

			if (!vdev_readable(vd)) {
				zio->io_error = SET_ERROR(ENXIO);
				zio_interrupt(zio);
				return;
			}

			switch (zio->io_cmd) {

				case DKIOCFLUSHWRITECACHE:

					if (zfs_nocacheflush)
						break;

					if (vd->vdev_nowritecache) {
						zio->io_error = SET_ERROR(ENOTSUP);
						break;
					}

					context = vfs_context_create(spl_vfs_context_kernel());
					error = VNOP_IOCTL(dvd->vd_devvp, DKIOCSYNCHRONIZECACHE,
									   NULL, FWRITE, context);
					(void) vfs_context_rele(context);

					if (error == 0)
						vdev_disk_ioctl_done(zio, error);
					else
						error = ENOTSUP;

					if (error == 0) {
						/*
						 * The ioctl will be done asychronously,
						 * and will call vdev_disk_ioctl_done()
						 * upon completion.
						 */
						return;
					} else if (error == ENOTSUP || error == ENOTTY) {
						/*
						 * If we get ENOTSUP or ENOTTY, we know that
						 * no future attempts will ever succeed.
						 * In this case we set a persistent bit so
						 * that we don't bother with the ioctl in the
						 * future.
						 */
						vd->vdev_nowritecache = B_TRUE;
					}
					zio->io_error = error;

					break;

				default:
					zio->io_error = SET_ERROR(ENOTSUP);
			} /* io_cmd */

			zio_execute(zio);
			return;

	case ZIO_TYPE_WRITE:
		if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
			flags = B_WRITE;
		else
			flags = B_WRITE | B_ASYNC;
		break;

	case ZIO_TYPE_READ:
		if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
			flags = B_READ;
		else
			flags = B_READ | B_ASYNC;
		break;

		default:
			zio->io_error = SET_ERROR(ENOTSUP);
			zio_interrupt(zio);
			return;
	} /* io_type */

	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);

	/* Stop OSX from also caching our data */
	flags |= B_NOCACHE;

	if (zio->io_flags & ZIO_FLAG_FAILFAST)
		flags |= B_FAILFAST;

	zio->io_target_timestamp = zio_handle_io_delay(zio);

	bp = buf_alloc(dvd->vd_devvp);

	ASSERT(bp != NULL);
	ASSERT(zio->io_data != NULL);
	ASSERT(zio->io_size != 0);

	buf_setflags(bp, flags);
	buf_setcount(bp, zio->io_size);
	buf_setdataptr(bp, (uintptr_t)zio->io_data);

	/*
	 * Map offset to blcknumber, based on physical block number.
	 * (512, 4096, ..). If we fail to map, default back to
	 * standard 512. lbtodb() is fixed at 512.
	 */
	buf_setblkno(bp, zio->io_offset >> dvd->vd_ashift);
	buf_setlblkno(bp, zio->io_offset >> dvd->vd_ashift);

	buf_setsize(bp, zio->io_size);
	if (buf_setcallback(bp, vdev_disk_io_intr, zio) != 0)
		panic("vdev_disk_io_start: buf_setcallback failed\n");

	if (zio->io_type == ZIO_TYPE_WRITE) {
		vnode_startwrite(dvd->vd_devvp);
	}
	error = VNOP_STRATEGY(bp);
	ASSERT(error == 0);

	if (error) {
		zio->io_error = error;
		zio_interrupt(zio);
		return;
	}
}
Exemple #2
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
ffs_balloc(
	register struct inode *ip,
	register ufs_daddr_t lbn,
	int size,
	kauth_cred_t cred,
	struct buf **bpp,
	int flags,
	int * blk_alloc)
{
	register struct fs *fs;
	register ufs_daddr_t nb;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	int devBlockSize=0;
	int alloc_buffer = 1;
	struct mount *mp=vp->v_mount;
#if REV_ENDIAN_FS
	int rev_endian=(mp->mnt_flag & MNT_REVEND);
#endif /* REV_ENDIAN_FS */

	*bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	fs = ip->i_fs;
	if (flags & B_NOBUFF) 
		alloc_buffer = 0;

	if (blk_alloc)
		*blk_alloc = 0;

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/* the filesize prior to this write  can fit in direct 
		 * blocks (ie.  fragmentaion is possibly done)
		 * we are now extending the file write beyond 
		 * the block which has end of file prior to this write 
		 */
		osize = blksize(fs, ip, nb); 
		/* osize gives disk allocated size in the last block. It is 
		 * either in fragments or a file system block size */
		if (osize < fs->fs_bsize && osize > 0) {
			/* few fragments are already allocated,since the
			 * current extends beyond this block 
			 * allocate the complete block as fragments are only
			 * in last block
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			/* adjust the inode size we just grew */
			/* it is in nb+1 as nb starts from 0 */
			ip->i_size = (nb + 1) * fs->fs_bsize;
			ubc_setsize(vp, (off_t)ip->i_size);

			ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp));
			ip->i_flag |= IN_CHANGE | IN_UPDATE;

			if ((flags & B_SYNC) || (!alloc_buffer)) {
				if (!alloc_buffer) 
					buf_setflags(bp, B_NOCACHE);
				buf_bwrite(bp);
			} else
				buf_bdwrite(bp);
			/* note that bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) {
			if (alloc_buffer) {
			error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp);
			if (error) {
				buf_brelse(bp);
				return (error);
			}
			*bpp = bp;
			}
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				if (alloc_buffer) {
				error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp);
				if (error) {
					buf_brelse(bp);
					return (error);
				}
				ip->i_flag |= IN_CHANGE | IN_UPDATE;
				*bpp = bp;
				return (0);
				}
				else {
					ip->i_flag |= IN_CHANGE | IN_UPDATE;
					return (0);
				}
			} else {
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp));
				ip->i_flag |= IN_CHANGE | IN_UPDATE;

				/* adjust the inode size we just grew */
				ip->i_size = (lbn * fs->fs_bsize) + size;
				ubc_setsize(vp, (off_t)ip->i_size);

				if (!alloc_buffer) {
					buf_setflags(bp, B_NOCACHE);
					if (flags & B_SYNC)
						buf_bwrite(bp);
					else
						buf_bdwrite(bp);
				 } else
					*bpp = bp;
				return (0);

			}
		} else {
			if (ip->i_size < (lbn + 1) * fs->fs_bsize)
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			if (alloc_buffer) {
			        bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE);
				buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb)));

				if (flags & B_CLRBUF)
				        buf_clear(bp);
			}
			ip->i_db[lbn] = newb;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (blk_alloc) {
				*blk_alloc = nsize;
			}
			if (alloc_buffer)
				*bpp = bp;
			return (0);
		}
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if (error = ufs_getlbns(vp, lbn, indirs, &num))
		return(error);
#if DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb))
			return (error);
		nb = newb;
		*allocblk++ = nb;
		bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(bp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(bp);
		} else if ((error = buf_bwrite(bp)) != 0) {
			goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			buf_brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)buf_dataptr(bp);
#if	REV_ENDIAN_FS
	if (rev_endian)
		nb = OSSwapInt32(bap[indirs[i].in_off]);
	else {
#endif	/* REV_ENDIAN_FS */
		nb = bap[indirs[i].in_off];
#if REV_ENDIAN_FS
	}
#endif /* REV_ENDIAN_FS */
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			buf_brelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
		if (error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(nbp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(nbp);
		} else if (error = buf_bwrite(nbp)) {
			buf_brelse(bp);
			goto fail;
		}
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i - 1].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i - 1].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		if (error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if ((flags & B_SYNC)) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
		if(alloc_buffer ) {
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));

		if (flags & B_CLRBUF)
			buf_clear(nbp);
		}
		if (blk_alloc) {
			*blk_alloc = fs->fs_bsize;
		}
		if(alloc_buffer) 
			*bpp = nbp;

		return (0);
	}
	buf_brelse(bp);
	if (alloc_buffer) {
	        if (flags & B_CLRBUF) {
		        error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp);
			if (error) {
			        buf_brelse(nbp);
				goto fail;
			}
		} else {
		        nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE);
			buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		}
		*bpp = nbp;
	}
	return (0);
fail:
	/*
	 * If we have failed part way through block allocation, we
	 * have to deallocate any indirect blocks that we have allocated.
	 */
	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ffs_blkfree(ip, *blkp, fs->fs_bsize);
		deallocated += fs->fs_bsize;
	}
	if (allocib != NULL)
		*allocib = 0;
	if (deallocated) {
	        devBlockSize = vfs_devblocksize(mp);
#if QUOTA
		/*
		 * Restore user's disk quota because allocation failed.
		 */
		(void) chkdq(ip, (int64_t)-deallocated, cred, FORCE);
#endif /* QUOTA */
		ip->i_blocks -= btodb(deallocated, devBlockSize);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	return (error);
}
Exemple #3
0
static int
vdev_disk_io_start(zio_t *zio)
{
	vdev_t *vd = zio->io_vd;
	vdev_disk_t *dvd = vd->vdev_tsd;
	struct buf *bp;
	vfs_context_t context;
	int flags, error = 0;

	if (zio->io_type == ZIO_TYPE_IOCTL) {
		zio_vdev_io_bypass(zio);

		/* XXPOLICY */
		if (vdev_is_dead(vd)) {
			zio->io_error = ENXIO;
			//zio_next_stage_async(zio);
			return (ZIO_PIPELINE_CONTINUE);
            //return;
		}

		switch (zio->io_cmd) {

		case DKIOCFLUSHWRITECACHE:

			if (zfs_nocacheflush)
				break;

			if (vd->vdev_nowritecache) {
				zio->io_error = SET_ERROR(ENOTSUP);
				break;
			}

			context = vfs_context_create((vfs_context_t)0);
			error = VNOP_IOCTL(dvd->vd_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
			(void) vfs_context_rele(context);

			if (error == 0)
				vdev_disk_ioctl_done(zio, error);
			else
				error = ENOTSUP;

			if (error == 0) {
				/*
				 * The ioctl will be done asychronously,
				 * and will call vdev_disk_ioctl_done()
				 * upon completion.
				 */
				return ZIO_PIPELINE_STOP;
			} else if (error == ENOTSUP || error == ENOTTY) {
				/*
				 * If we get ENOTSUP or ENOTTY, we know that
				 * no future attempts will ever succeed.
				 * In this case we set a persistent bit so
				 * that we don't bother with the ioctl in the
				 * future.
				 */
				vd->vdev_nowritecache = B_TRUE;
			}
			zio->io_error = error;

			break;

		default:
			zio->io_error = SET_ERROR(ENOTSUP);
		}

		//zio_next_stage_async(zio);
        return (ZIO_PIPELINE_CONTINUE);
	}

	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
        return (ZIO_PIPELINE_STOP);
    //		return;

	if ((zio = vdev_queue_io(zio)) == NULL)
        return (ZIO_PIPELINE_CONTINUE);
    //		return;

	flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
	//flags |= B_NOCACHE;

	if (zio->io_flags & ZIO_FLAG_FAILFAST)
		flags |= B_FAILFAST;

	/*
	 * Check the state of this device to see if it has been offlined or
	 * is in an error state.  If the device was offlined or closed,
	 * dvd will be NULL and buf_alloc below will fail
	 */
	//error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
	if (vdev_is_dead(vd)) {
        error = ENXIO;
    }

	if (error) {
		zio->io_error = error;
		//zio_next_stage_async(zio);
		return (ZIO_PIPELINE_CONTINUE);
	}

	bp = buf_alloc(dvd->vd_devvp);

	ASSERT(bp != NULL);
	ASSERT(zio->io_data != NULL);
	ASSERT(zio->io_size != 0);

	buf_setflags(bp, flags);
	buf_setcount(bp, zio->io_size);
	buf_setdataptr(bp, (uintptr_t)zio->io_data);
    if (dvd->vd_ashift) {
        buf_setlblkno(bp, zio->io_offset>>dvd->vd_ashift);
        buf_setblkno(bp,  zio->io_offset>>dvd->vd_ashift);
    } else {
Exemple #4
0
int
physio( void (*f_strategy)(buf_t), 
	buf_t bp,
	dev_t dev,
	int flags,
	u_int (*f_minphys)(buf_t),
	struct uio *uio,
	int blocksize)
{
	struct proc *p = current_proc();
	int error, i, buf_allocated, todo, iosize;
	int orig_bflags = 0;
	int64_t done;

	error = 0;
	flags &= B_READ | B_WRITE;
	buf_allocated = 0;

	/*
	 * [check user read/write access to the data buffer]
	 *
	 * Check each iov one by one.  Note that we know if we're reading or
	 * writing, so we ignore the uio's rw parameter.  Also note that if
	 * we're doing a read, that's a *write* to user-space.
	 */
	for (i = 0; i < uio->uio_iovcnt; i++) {
		if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) {
			user_addr_t base;
			user_size_t len;
			
			if (uio_getiov(uio, i, &base, &len) ||
				!useracc(base,
					len,
		    		(flags == B_READ) ? B_WRITE : B_READ))
			return (EFAULT);
		}
	}
	/*
	 * Make sure we have a buffer, creating one if necessary.
	 */
	if (bp == NULL) {
		bp = buf_alloc((vnode_t)0);
		buf_allocated = 1;
	} else
	        orig_bflags = buf_flags(bp);
	/*
	 * at this point we should have a buffer
	 * that is marked BL_BUSY... we either 
	 * acquired it via buf_alloc, or it was
	 * passed into us... if it was passed
	 * in, it needs to already be owned by
	 * the caller (i.e. BL_BUSY is set)
	 */
	assert(bp->b_lflags & BL_BUSY);

	/*
	 * [set up the fixed part of the buffer for a transfer]
	 */
	bp->b_dev = dev;
	bp->b_proc = p;

	/*
	 * [mark the buffer busy for physical I/O]
	 * (i.e. set B_PHYS (because it's an I/O to user
	 * memory, and B_RAW, because B_RAW is to be
	 * "Set by physio for raw transfers.", in addition
	 * to the read/write flag.)
	 */
        buf_setflags(bp, B_PHYS | B_RAW);

	/*
	 * [while there is data to transfer and no I/O error]
	 * Note that I/O errors are handled with a 'goto' at the bottom
	 * of the 'while' loop.
	 */
	while (uio_resid(uio) > 0) {
			
			if ( (iosize = uio_curriovlen(uio)) > MAXPHYSIO_WIRED)
			        iosize = MAXPHYSIO_WIRED;
			/*
			 * make sure we're set to issue a fresh I/O
			 * in the right direction
			 */
			buf_reset(bp, flags);

			/* [set up the buffer for a maximum-sized transfer] */
 			buf_setblkno(bp, uio_offset(uio) / blocksize);
			buf_setcount(bp, iosize);
			buf_setdataptr(bp, (uintptr_t)CAST_DOWN(caddr_t, uio_curriovbase(uio)));
			
			/*
			 * [call f_minphys to bound the tranfer size]
			 * and remember the amount of data to transfer,
			 * for later comparison.
			 */
			(*f_minphys)(bp);
			todo = buf_count(bp);

			/*
			 * [lock the part of the user address space involved
			 *    in the transfer]
			 */

			if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) {
				error = vslock(CAST_USER_ADDR_T(buf_dataptr(bp)),
					       (user_size_t)todo);
				if (error)
					goto done;
			}
			
			/* [call f_strategy to start the transfer] */
			(*f_strategy)(bp);


			/* [wait for the transfer to complete] */
			error = (int)buf_biowait(bp);

			/*
			 * [unlock the part of the address space previously
			 *    locked]
			 */
			if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg))
				vsunlock(CAST_USER_ADDR_T(buf_dataptr(bp)),
					 (user_size_t)todo,
					 (flags & B_READ));

			/*
			 * [deduct the transfer size from the total number
			 *    of data to transfer]
			 */
			done = buf_count(bp) - buf_resid(bp);
			uio_update(uio, done);

			/*
			 * Now, check for an error.
			 * Also, handle weird end-of-disk semantics.
			 */
			if (error || done < todo)
				goto done;
	}

done:
	if (buf_allocated)
	        buf_free(bp);
	else
		buf_setflags(bp, orig_bflags);

	return (error);
}