Exemple #1
0
static int
vn_readwrite_io(struct vn_softc * vn, struct buf * bp, vfs_context_t ctx)
{
	int			error = 0;
	char *			iov_base;
	caddr_t 		vaddr;

	if (buf_map(bp, &vaddr)) 
	        panic("vn device: buf_map failed");
	iov_base = (char *)vaddr;

	if (vn->sc_shadow_vp == NULL) {
	        user_ssize_t		temp_resid;

		error = file_io(vn->sc_vp, ctx,
				buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE,
				iov_base,
				(off_t)buf_blkno(bp) * vn->sc_secsize,
				buf_resid(bp), &temp_resid);
		buf_setresid(bp, temp_resid);
	}
	else {
		if (buf_flags(bp) & B_READ)
			error = shadow_read(vn, bp, iov_base, ctx);
		else
			error = shadow_write(vn, bp, iov_base, ctx);
	}
	buf_unmap(bp);

	return (error);
}
Exemple #2
0
static int
vnop_strategy_9p(struct vnop_strategy_args *ap)
{
	mount_t mp;
	struct buf *bp;
	node_9p *np;
	caddr_t addr;
	uio_t uio;
	int e, flags;

	TRACE();
	bp = ap->a_bp;
	np = NTO9P(buf_vnode(bp));
	flags = buf_flags(bp);
	uio = NULL;
	addr = NULL;

	mp = vnode_mount(buf_vnode(bp));
	if (mp == NULL)
		return ENXIO;

	if ((e=buf_map(bp, &addr)))
		goto error;

	uio = uio_create(1, buf_blkno(bp) * vfs_statfs(mp)->f_bsize, UIO_SYSSPACE,
					 ISSET(flags, B_READ)? UIO_READ: UIO_WRITE);
	if (uio == NULL) {
		e = ENOMEM;
		goto error;
	}
	
	uio_addiov(uio, CAST_USER_ADDR_T(addr), buf_count(bp));
	if (ISSET(flags, B_READ)) {
		if((e=nread_9p(np, uio)))
			goto error;
		/* zero the rest of the page if we reached EOF */
		if (uio_resid(uio) > 0) {
			bzero(addr+buf_count(bp)-uio_resid(uio), uio_resid(uio));
			uio_update(uio, uio_resid(uio));
		}
	} else {
		if ((e=nwrite_9p(np, uio)))
			goto error;
	}
	buf_setresid(bp, uio_resid(uio));
error:
	if (uio)
		uio_free(uio);
	if (addr)
		buf_unmap(bp);
	buf_seterror(bp, e);
	buf_biodone(bp);
	return e;
}
Exemple #3
0
static int
shadow_write(struct vn_softc * vn, struct buf * bp, char * base, 
	     vfs_context_t ctx)
{
	u_int32_t		blocksize = vn->sc_secsize;
	int 		error = 0;
	u_int32_t		offset;
	boolean_t	shadow_grew;
	u_int32_t		resid;
	u_int32_t		start = 0;

	offset = buf_blkno(bp);
	resid =  buf_resid(bp) / blocksize;
	while (resid > 0) {
		user_ssize_t	temp_resid;
		u_int32_t		this_offset;
		u_int32_t		this_resid;

		shadow_grew = shadow_map_write(vn->sc_shadow_map, 
					       offset, resid, 
					       &this_offset, &this_resid);
		if (shadow_grew) {
#if 0
			off_t	size;
			/* truncate the file to its new length before write */
			size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) 
				* blocksize;
			vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx);
#endif
		}
		error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE, 
				base + start,
				(off_t)this_offset * blocksize,
				(user_ssize_t)this_resid * blocksize, 
				&temp_resid);
		if (error) {
			break;
		}
		this_resid -= (temp_resid / blocksize);
		if (this_resid == 0) {
			printf("vn device: shadow_write zero length write\n");
			break;
		}
		resid -= this_resid;
		offset += this_resid;
		start += this_resid * blocksize;
	}
	buf_setresid(bp, resid * blocksize);
	return (error);
}
Exemple #4
0
static int
shadow_read(struct vn_softc * vn, struct buf * bp, char * base,
	vfs_context_t ctx)
{
	u_int32_t		blocksize = vn->sc_secsize;
	int 		error = 0;
	u_int32_t		offset;
	boolean_t	read_shadow;
	u_int32_t		resid;
	u_int32_t		start = 0;

	offset = buf_blkno(bp);
	resid =  buf_resid(bp) / blocksize;
	while (resid > 0) {
		user_ssize_t	temp_resid;
		u_int32_t		this_offset;
		u_int32_t		this_resid;
		struct vnode *	vp;

		read_shadow = shadow_map_read(vn->sc_shadow_map,
					      offset, resid,
					      &this_offset, &this_resid);
		if (read_shadow) {
			vp = vn->sc_shadow_vp;
		}
		else {
			vp = vn->sc_vp;
		}
		error = file_io(vp, ctx, UIO_READ, base + start,
				(off_t)this_offset * blocksize,
				(user_ssize_t)this_resid * blocksize, 
				&temp_resid);
		if (error) {
			break;
		}
		this_resid -= (temp_resid / blocksize);
		if (this_resid == 0) {
			printf("vn device: shadow_read zero length read\n");
			break;
		}
		resid -= this_resid;
		offset += this_resid;
		start += this_resid * blocksize;
	}
	buf_setresid(bp, resid * blocksize);
	return (error);
}
Exemple #5
0
static void mdevstrategy(struct buf *bp) {
	unsigned int left, lop, csize;
	vm_offset_t vaddr, blkoff;
	int devid;
	addr64_t paddr, fvaddr;
	ppnum_t pp;

	devid = minor(buf_device(bp));							/* Get minor device number */

	if ((mdev[devid].mdFlags & mdInited) == 0) {		/* Have we actually been defined yet? */
	        buf_seterror(bp, ENXIO);
		buf_biodone(bp);
		return;
	}

	buf_setresid(bp, buf_count(bp));						/* Set byte count */
	
	blkoff = buf_blkno(bp) * mdev[devid].mdSecsize;		/* Get offset into file */

/*
 *	Note that reading past end is an error, but reading at end is an EOF.  For these
 *	we just return with resid == count.
 */

	if (blkoff >= (mdev[devid].mdSize << 12)) {			/* Are they trying to read/write at/after end? */
		if(blkoff != (mdev[devid].mdSize << 12)) {		/* Are we trying to read after EOF? */
		        buf_seterror(bp, EINVAL);						/* Yeah, this is an error */
		}
		buf_biodone(bp);								/* Return */
		return;
	}

	if ((blkoff + buf_count(bp)) > (mdev[devid].mdSize << 12)) {		/* Will this read go past end? */
		buf_setcount(bp, ((mdev[devid].mdSize << 12) - blkoff));	/* Yes, trim to max */
	}
	/*
	 * make sure the buffer's data area is
	 * accessible
	 */
	if (buf_map(bp, (caddr_t *)&vaddr))
	        panic("ramstrategy: buf_map failed\n");

	fvaddr = (mdev[devid].mdBase << 12) + blkoff;		/* Point to offset into ram disk */
	
	if (buf_flags(bp) & B_READ) {					/* Is this a read? */
		if(!(mdev[devid].mdFlags & mdPhys)) {			/* Physical mapped disk? */
			bcopy((void *)((uintptr_t)fvaddr),
				(void *)vaddr, (size_t)buf_count(bp));	/* This is virtual, just get the data */
		}
		else {
			left = buf_count(bp);						/* Init the amount left to copy */
			while(left) {								/* Go until it is all copied */
				
				lop = min((4096 - (vaddr & 4095)), (4096 - (fvaddr & 4095)));	/* Get smallest amount left on sink and source */
				csize = min(lop, left);					/* Don't move more than we need to */
				
				pp = pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)vaddr));	/* Get the sink physical address */
				if(!pp) {								/* Not found, what gives? */
					panic("mdevstrategy: sink address %016llX not mapped\n", (addr64_t)((uintptr_t)vaddr));
				}
				paddr = (addr64_t)(((addr64_t)pp << 12) | (addr64_t)(vaddr & 4095));	/* Get actual address */
				bcopy_phys(fvaddr, paddr, csize);		/* Copy this on in */
				mapping_set_mod(paddr >> 12);			/* Make sure we know that it is modified */
				
				left = left - csize;					/* Calculate what is left */
				vaddr = vaddr + csize;					/* Move to next sink address */
				fvaddr = fvaddr + csize;				/* Bump to next physical address */
			}
		}
	}
	else {												/* This is a write */
		if(!(mdev[devid].mdFlags & mdPhys)) {			/* Physical mapped disk? */
Exemple #6
0
static void
vnstrategy(struct buf *bp)
{
	struct vn_softc *vn;
	int error = 0;
	long sz;	/* in sc_secsize chunks */
	daddr64_t blk_num;
	struct vnode *		shadow_vp = NULL;
	struct vnode *		vp = NULL;
	struct vfs_context  	context; 

	vn = vn_table + vnunit(buf_device(bp));
	if ((vn->sc_flags & VNF_INITED) == 0) {
		error = ENXIO;
		goto done;
	}

	context.vc_thread = current_thread();
	context.vc_ucred = vn->sc_cred;

	buf_setresid(bp, buf_count(bp));
	/*
	 * Check for required alignment.  Transfers must be a valid
	 * multiple of the sector size.
	 */
	blk_num = buf_blkno(bp);
	if (buf_count(bp) % vn->sc_secsize != 0) {
		error = EINVAL;
		goto done;
	}
	sz = howmany(buf_count(bp), vn->sc_secsize);

	/*
	 * If out of bounds return an error.  If at the EOF point,
	 * simply read or write less.
	 */
	if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) {
		if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) {
			error = EINVAL;
		}
		goto done;
	}
	/*
	 * If the request crosses EOF, truncate the request.
	 */
	if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) {
		buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize);
		buf_setresid(bp, buf_count(bp));
	}
	vp = vn->sc_vp;
	if (vp == NULL) {
		error = ENXIO;
		goto done;
	}

	error = vnode_getwithvid(vp, vn->sc_vid);
	if (error != 0) {
		/* the vnode is no longer available, abort */
		error = ENXIO;
		vnclear(vn, &context);
		goto done;
	}
	shadow_vp = vn->sc_shadow_vp;
	if (shadow_vp != NULL) {
		error = vnode_getwithvid(shadow_vp,
					 vn->sc_shadow_vid);
		if (error != 0) {
			/* the vnode is no longer available, abort */
			error = ENXIO;
			vnode_put(vn->sc_vp);
			vnclear(vn, &context);
			goto done;
		}
	}

	error = vn_readwrite_io(vn, bp, &context);
	vnode_put(vp);
	if (shadow_vp != NULL) {
		vnode_put(shadow_vp);
	}

 done:
	if (error) {
	        buf_seterror(bp, error);
	}
	buf_biodone(bp);
	return;
}
/*
 * Convert a component of a pathname into a pointer to a locked inode.
 * This is a very central and rather complicated routine.
 * If the file system is not maintained in a strict tree hierarchy,
 * this can result in a deadlock situation (see comments in code below).
 *
 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
 * whether the name is to be looked up, created, renamed, or deleted.
 * When CREATE, RENAME, or DELETE is specified, information usable in
 * creating, renaming, or deleting a directory entry may be calculated.
 * If flag has LOCKPARENT or'ed into it and the target of the pathname
 * exists, lookup returns both the target and its parent directory locked.
 * When creating or renaming and LOCKPARENT is specified, the target may
 * not be ".".  When deleting and LOCKPARENT is specified, the target may
 * be "."., but the caller must check to ensure it does an vrele and iput
 * instead of two iputs.
 *
 * Overall outline of ufs_lookup:
 *
 *	check accessibility of directory
 *	look for name in cache, if found, then if at end of path
 *	  and deleting or creating, drop it, else return name
 *	search for name in directory, to found or notfound
 * notfound:
 *	if creating, return locked directory, leaving info on available slots
 *	else return error
 * found:
 *	if at end of path and deleting, return information to allow delete
 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 *	  inode and return info to allow rewrite
 *	if not at end, add name to cache; if at end and neither creating
 *	  nor deleting, add name to cache
 *
 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
 */
int
cd9660_lookup(struct vnop_lookup_args *ap)
{
	register struct vnode *vdp;	/* vnode for directory being searched */
	register struct iso_node *dp;	/* inode for directory being searched */
	register struct iso_mnt *imp;	/* file system that directory is in */
	struct buf *bp;			/* a buffer of directory entries */
	struct iso_directory_record *ep = NULL;/* the current directory entry */
	int entryoffsetinblock;		/* offset of ep in bp's buffer */
	int saveoffset = 0;		/* offset of last directory entry in dir */
	int numdirpasses;		/* strategy for directory search */
	doff_t endsearch;		/* offset to end directory search */
	struct vnode *pdp;		/* saved dp during symlink work */
	struct vnode *tdp;		/* returned by cd9660_vget_internal */
	u_long bmask;			/* block offset mask */
	int lockparent;			/* 1 => lockparent flag is set */
	int wantparent;			/* 1 => wantparent or lockparent flag */
	int wantassoc;
	int error;
	ino_t ino = 0;
	int reclen;
	u_short namelen;
	int isoflags;
	char altname[ISO_RRIP_NAMEMAX];
	int res;
	int len;
	char *name;
	struct vnode **vpp = ap->a_vpp;
	struct componentname *cnp = ap->a_cnp;
	int flags = cnp->cn_flags;
	int nameiop = cnp->cn_nameiop;
	vfs_context_t ctx = cnp->cn_context;
	size_t altlen;
	
	bp = NULL;
	*vpp = NULL;
	vdp = ap->a_dvp;
	dp = VTOI(vdp);
	imp = dp->i_mnt;
	lockparent = flags & LOCKPARENT;
	wantparent = flags & (LOCKPARENT|WANTPARENT);
	wantassoc = 0;

	
	/*
	 * We now have a segment name to search for, and a directory to search.
	 *
	 * Before tediously performing a linear scan of the directory,
	 * check the name cache to see if the directory/name pair
	 * we are looking for is known already.
	 */
	if ((error = cache_lookup(vdp, vpp, cnp))) {
		if (error == ENOENT)
			return (error);
		return (0);
	}
	
	len = cnp->cn_namelen;
	name = cnp->cn_nameptr;
	altname[0] = '\0';
	/*
	 * A "._" prefix means, we are looking for an associated file
	 */
	if (imp->iso_ftype != ISO_FTYPE_RRIP &&
	    *name == ASSOCCHAR1 && *(name+1) == ASSOCCHAR2) {
		wantassoc = 1;
		len -= 2;
		name += 2;
	}
	/*
	 * Decode search name into UCS-2 (Unicode)
	 */
	if ((imp->iso_ftype == ISO_FTYPE_JOLIET) &&
	    !((len == 1 && *name == '.') || (flags & ISDOTDOT))) {
		int flags1 = UTF_PRECOMPOSED;

		(void) utf8_decodestr(name, len, (u_int16_t*) altname, &altlen,
					sizeof(altname), 0, flags1);
		name = altname;
		len = altlen;
	}
	/*
	 * If there is cached information on a previous search of
	 * this directory, pick up where we last left off.
	 * We cache only lookups as these are the most common
	 * and have the greatest payoff. Caching CREATE has little
	 * benefit as it usually must search the entire directory
	 * to determine that the entry does not exist. Caching the
	 * location of the last DELETE or RENAME has not reduced
	 * profiling time and hence has been removed in the interest
	 * of simplicity.
	 */
	bmask = imp->im_sector_size - 1;
	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
	    dp->i_diroff > dp->i_size) {
		entryoffsetinblock = 0;
		dp->i_offset = 0;
		numdirpasses = 1;
	} else {
		dp->i_offset = dp->i_diroff;
		
		if ((entryoffsetinblock = dp->i_offset & bmask) &&
		    (error = cd9660_blkatoff(vdp, SECTOFF(imp, dp->i_offset), NULL, &bp)))
				return (error);
		numdirpasses = 2;
		iso_nchstats.ncs_2passes++;
	}
	endsearch = dp->i_size;
	
searchloop:
	while (dp->i_offset < endsearch) {
		/*
		 * If offset is on a block boundary,
		 * read the next directory block.
		 * Release previous if it exists.
		 */
		if ((dp->i_offset & bmask) == 0) {
			if (bp != NULL)
				buf_brelse(bp);
			if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp,dp->i_offset), NULL, &bp)) )
				return (error);
			entryoffsetinblock = 0;
		}
		/*
		 * Get pointer to next entry.
		 */
		ep = (struct iso_directory_record *)
			((char *)0 + buf_dataptr(bp) + entryoffsetinblock);
		
		reclen = isonum_711(ep->length);
		if (reclen == 0) {
			/* skip to next block, if any */
			dp->i_offset =
			    (dp->i_offset & ~bmask) + imp->im_sector_size;
			continue;
		}
		
		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
			/* illegal entry, stop */
			break;
		}
		if (entryoffsetinblock + reclen > imp->im_sector_size) {
			/* entries are not allowed to cross sector boundaries */
			break;
		}
		namelen = isonum_711(ep->name_len);
		isoflags = isonum_711(ep->flags);
		
		if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
			/* illegal entry, stop */
			break;
		/*
		 * Check for a name match.
		 */
		if (imp->iso_ftype == ISO_FTYPE_RRIP) {
			if (isoflags & directoryBit)
				ino = isodirino(ep, imp);
			else
				ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock;
			dp->i_ino = ino;
			cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
			if (namelen == cnp->cn_namelen
			    && !bcmp(name,altname,namelen))
				goto found;
			ino = 0;
		} else {
			if ((!(isoflags & associatedBit)) == !wantassoc) {
				if ((len == 1
				     && *name == '.')
				    || (flags & ISDOTDOT)) {
					if (namelen == 1
					    && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
						/*
						 * Save directory entry's inode number and
						 * release directory buffer.
						 */
						dp->i_ino = isodirino(ep, imp);
						goto found;
					}
					if (namelen != 1
					    || ep->name[0] != 0)
						goto notfound;
				} else if (imp->iso_ftype != ISO_FTYPE_JOLIET && !(res = isofncmp(name, len, ep->name, namelen))) {
					if ( isoflags & directoryBit )
						ino = isodirino(ep, imp);
					else
						ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock;
					saveoffset = dp->i_offset;
				} else if (imp->iso_ftype == ISO_FTYPE_JOLIET && !(res = ucsfncmp((u_int16_t*)name, len,
							    (u_int16_t*) ep->name, namelen))) {
					if ( isoflags & directoryBit )
						ino = isodirino(ep, imp);
					else
						ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock;
					saveoffset = dp->i_offset;
				} else if (ino)
					goto foundino;
#ifdef	NOSORTBUG	/* On some CDs directory entries are not sorted correctly */
				else if (res < 0)
					goto notfound;
				else if (res > 0 && numdirpasses == 2)
					numdirpasses++;
#endif
			}
		}
		dp->i_offset += reclen;
		entryoffsetinblock += reclen;
	} /* endwhile */
	
	if (ino) {
foundino:
		dp->i_ino = ino;
		if (saveoffset != dp->i_offset) {
			if (lblkno(imp, dp->i_offset) !=
			    lblkno(imp, saveoffset)) {
				if (bp != NULL)
					buf_brelse(bp);
				if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, saveoffset), NULL, &bp)) )
					return (error);
			}
			entryoffsetinblock = saveoffset & bmask;
			ep = (struct iso_directory_record *)
				((char *)0 + buf_dataptr(bp) + entryoffsetinblock);
			dp->i_offset = saveoffset;
		}
		goto found;
	}
notfound:
	/*
	 * If we started in the middle of the directory and failed
	 * to find our target, we must check the beginning as well.
	 */
	if (numdirpasses == 2) {
		numdirpasses--;
		dp->i_offset = 0;
		endsearch = dp->i_diroff;
		goto searchloop;
	}
	if (bp != NULL)
		buf_brelse(bp);

	/*
	 * Insert name into cache (as non-existent) if appropriate.
	 */
	if (cnp->cn_flags & MAKEENTRY)
		cache_enter(vdp, *vpp, cnp);
	return (ENOENT);
	
found:
	if (numdirpasses == 2)
		iso_nchstats.ncs_pass2++;
	
	/*
	 * Found component in pathname.
	 * If the final component of path name, save information
	 * in the cache as to where the entry was found.
	 */
	if ((flags & ISLASTCN) && nameiop == LOOKUP)
		dp->i_diroff = dp->i_offset;
	
	/*
	 * Step through the translation in the name.  We do not `iput' the
	 * directory because we may need it again if a symbolic link
	 * is relative to the current directory.  Instead we save it
	 * unlocked as "pdp".  We must get the target inode before unlocking
	 * the directory to insure that the inode will not be removed
	 * before we get it.  We prevent deadlock by always fetching
	 * inodes from the root, moving down the directory tree. Thus
	 * when following backward pointers ".." we must unlock the
	 * parent directory before getting the requested directory.
	 * There is a potential race condition here if both the current
	 * and parent directories are removed before the `iget' for the
	 * inode associated with ".." returns.  We hope that this occurs
	 * infrequently since we cannot avoid this race condition without
	 * implementing a sophisticated deadlock detection algorithm.
	 * Note also that this simple deadlock detection scheme will not
	 * work if the file system has any hard links other than ".."
	 * that point backwards in the directory structure.
	 */
	pdp = vdp;
	/*
	 * If ino is different from dp->i_ino,
	 * it's a relocated directory.
	 */
	if (flags & ISDOTDOT) {
		error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, NULL, NULL,
					     dp->i_ino != ino, ep,
					     vfs_context_proc(ctx));
		VTOI(tdp)->i_parent = VTOI(pdp)->i_number;
		buf_brelse(bp);

		*vpp = tdp;
	} else if (dp->i_number == dp->i_ino) {
		buf_brelse(bp);
		vnode_get(vdp);	/* we want ourself, ie "." */
		*vpp = vdp;
	} else {
	        error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, vdp, cnp,
			     dp->i_ino != ino, ep, vfs_context_proc(ctx));
		/* save parent inode number */
		VTOI(tdp)->i_parent = VTOI(pdp)->i_number;
		buf_brelse(bp);
		if (error)
			return (error);
		*vpp = tdp;
	}
	return (0);
}
Exemple #8
0
int
spec_strategy(struct vnop_strategy_args *ap)
{
        buf_t	bp;
	int	bflags;
	int 	policy;
	dev_t	bdev;
	uthread_t ut;
	size_t devbsdunit;
	mount_t mp;

        bp = ap->a_bp;
	bdev = buf_device(bp);
	bflags = buf_flags(bp);
	mp = buf_vnode(bp)->v_mount;

        if (kdebug_enable) {
	        int    code = 0;

		if (bflags & B_READ)
		        code |= DKIO_READ;
		if (bflags & B_ASYNC)
		        code |= DKIO_ASYNC;

		if (bflags & B_META)
		        code |= DKIO_META;
		else if (bflags & B_PAGEIO)
		        code |= DKIO_PAGING;

		KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
				      bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
        }
	if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
	    mp && (mp->mnt_kern_flag & MNTK_ROOTDEV))
	        hard_throttle_on_root = 1;


	if (mp != NULL)
		devbsdunit = mp->mnt_devbsdunit;
	else
		devbsdunit = LOWPRI_MAX_NUM_DEV - 1;

	throttle_info_update(&_throttle_io_info[devbsdunit], bflags);
	if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) {
		bp->b_flags |= B_THROTTLED_IO;
	}


	if ((bflags & B_READ) == 0) {
		microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
		if (mp) {
			INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size);
		}
	} else if (mp) {
		INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size);
	}

	(*bdevsw[major(bdev)].d_strategy)(bp);
	
	return (0);
}
Exemple #9
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
ffs_balloc(
	register struct inode *ip,
	register ufs_daddr_t lbn,
	int size,
	kauth_cred_t cred,
	struct buf **bpp,
	int flags,
	int * blk_alloc)
{
	register struct fs *fs;
	register ufs_daddr_t nb;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	int devBlockSize=0;
	int alloc_buffer = 1;
	struct mount *mp=vp->v_mount;
#if REV_ENDIAN_FS
	int rev_endian=(mp->mnt_flag & MNT_REVEND);
#endif /* REV_ENDIAN_FS */

	*bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	fs = ip->i_fs;
	if (flags & B_NOBUFF) 
		alloc_buffer = 0;

	if (blk_alloc)
		*blk_alloc = 0;

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/* the filesize prior to this write  can fit in direct 
		 * blocks (ie.  fragmentaion is possibly done)
		 * we are now extending the file write beyond 
		 * the block which has end of file prior to this write 
		 */
		osize = blksize(fs, ip, nb); 
		/* osize gives disk allocated size in the last block. It is 
		 * either in fragments or a file system block size */
		if (osize < fs->fs_bsize && osize > 0) {
			/* few fragments are already allocated,since the
			 * current extends beyond this block 
			 * allocate the complete block as fragments are only
			 * in last block
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			/* adjust the inode size we just grew */
			/* it is in nb+1 as nb starts from 0 */
			ip->i_size = (nb + 1) * fs->fs_bsize;
			ubc_setsize(vp, (off_t)ip->i_size);

			ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp));
			ip->i_flag |= IN_CHANGE | IN_UPDATE;

			if ((flags & B_SYNC) || (!alloc_buffer)) {
				if (!alloc_buffer) 
					buf_setflags(bp, B_NOCACHE);
				buf_bwrite(bp);
			} else
				buf_bdwrite(bp);
			/* note that bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) {
			if (alloc_buffer) {
			error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp);
			if (error) {
				buf_brelse(bp);
				return (error);
			}
			*bpp = bp;
			}
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				if (alloc_buffer) {
				error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp);
				if (error) {
					buf_brelse(bp);
					return (error);
				}
				ip->i_flag |= IN_CHANGE | IN_UPDATE;
				*bpp = bp;
				return (0);
				}
				else {
					ip->i_flag |= IN_CHANGE | IN_UPDATE;
					return (0);
				}
			} else {
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp));
				ip->i_flag |= IN_CHANGE | IN_UPDATE;

				/* adjust the inode size we just grew */
				ip->i_size = (lbn * fs->fs_bsize) + size;
				ubc_setsize(vp, (off_t)ip->i_size);

				if (!alloc_buffer) {
					buf_setflags(bp, B_NOCACHE);
					if (flags & B_SYNC)
						buf_bwrite(bp);
					else
						buf_bdwrite(bp);
				 } else
					*bpp = bp;
				return (0);

			}
		} else {
			if (ip->i_size < (lbn + 1) * fs->fs_bsize)
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			if (alloc_buffer) {
			        bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE);
				buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb)));

				if (flags & B_CLRBUF)
				        buf_clear(bp);
			}
			ip->i_db[lbn] = newb;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (blk_alloc) {
				*blk_alloc = nsize;
			}
			if (alloc_buffer)
				*bpp = bp;
			return (0);
		}
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if (error = ufs_getlbns(vp, lbn, indirs, &num))
		return(error);
#if DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb))
			return (error);
		nb = newb;
		*allocblk++ = nb;
		bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(bp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(bp);
		} else if ((error = buf_bwrite(bp)) != 0) {
			goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			buf_brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)buf_dataptr(bp);
#if	REV_ENDIAN_FS
	if (rev_endian)
		nb = OSSwapInt32(bap[indirs[i].in_off]);
	else {
#endif	/* REV_ENDIAN_FS */
		nb = bap[indirs[i].in_off];
#if REV_ENDIAN_FS
	}
#endif /* REV_ENDIAN_FS */
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			buf_brelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
		if (error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(nbp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(nbp);
		} else if (error = buf_bwrite(nbp)) {
			buf_brelse(bp);
			goto fail;
		}
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i - 1].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i - 1].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		if (error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if ((flags & B_SYNC)) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
		if(alloc_buffer ) {
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));

		if (flags & B_CLRBUF)
			buf_clear(nbp);
		}
		if (blk_alloc) {
			*blk_alloc = fs->fs_bsize;
		}
		if(alloc_buffer) 
			*bpp = nbp;

		return (0);
	}
	buf_brelse(bp);
	if (alloc_buffer) {
	        if (flags & B_CLRBUF) {
		        error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp);
			if (error) {
			        buf_brelse(nbp);
				goto fail;
			}
		} else {
		        nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE);
			buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		}
		*bpp = nbp;
	}
	return (0);
fail:
	/*
	 * If we have failed part way through block allocation, we
	 * have to deallocate any indirect blocks that we have allocated.
	 */
	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ffs_blkfree(ip, *blkp, fs->fs_bsize);
		deallocated += fs->fs_bsize;
	}
	if (allocib != NULL)
		*allocib = 0;
	if (deallocated) {
	        devBlockSize = vfs_devblocksize(mp);
#if QUOTA
		/*
		 * Restore user's disk quota because allocation failed.
		 */
		(void) chkdq(ip, (int64_t)-deallocated, cred, FORCE);
#endif /* QUOTA */
		ip->i_blocks -= btodb(deallocated, devBlockSize);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	return (error);
}
Exemple #10
0
__private_extern__
int
fuse_internal_strategy(vnode_t vp, buf_t bp)
{
    size_t biosize;
    size_t chunksize;
    size_t respsize;

    int mapped = FALSE;
    int mode;
    int op;
    int vtype = vnode_vtype(vp);

    int err = 0;

    caddr_t bufdat;
    off_t   left;
    off_t   offset;
    int32_t bflags = buf_flags(bp);

    fufh_type_t             fufh_type;
    struct fuse_dispatcher  fdi;
    struct fuse_data       *data;
    struct fuse_vnode_data *fvdat = VTOFUD(vp);
    struct fuse_filehandle *fufh = NULL;
    mount_t mp = vnode_mount(vp);

    data = fuse_get_mpdata(mp);

    biosize = data->blocksize;

    if (!(vtype == VREG || vtype == VDIR)) {
        return ENOTSUP;
    }
 
    if (bflags & B_READ) {
        mode = FREAD;
        fufh_type = FUFH_RDONLY; /* FUFH_RDWR will also do */
    } else {
        mode = FWRITE;
        fufh_type = FUFH_WRONLY; /* FUFH_RDWR will also do */
    }

    if (fvdat->flag & FN_CREATING) {
        fuse_lck_mtx_lock(fvdat->createlock);
        if (fvdat->flag & FN_CREATING) {
            (void)fuse_msleep(fvdat->creator, fvdat->createlock,
                              PDROP | PINOD | PCATCH, "fuse_internal_strategy",
                              NULL);
        } else {
            fuse_lck_mtx_unlock(fvdat->createlock);
        }
    }

    fufh = &(fvdat->fufh[fufh_type]);

    if (!FUFH_IS_VALID(fufh)) {
        fufh_type = FUFH_RDWR;
        fufh = &(fvdat->fufh[fufh_type]);
        if (!FUFH_IS_VALID(fufh)) {
            fufh = NULL;
        } else {
            /* We've successfully fallen back to FUFH_RDWR. */
        }
    }

    if (!fufh) {

        if (mode == FREAD) {
            fufh_type = FUFH_RDONLY;
        } else {
            fufh_type = FUFH_RDWR;
        }

        /*
         * Lets NOT do the filehandle preflight check here.
         */

        err = fuse_filehandle_get(vp, NULL, fufh_type, 0 /* mode */);

        if (!err) {
            fufh = &(fvdat->fufh[fufh_type]);
            FUFH_AUX_INC(fufh);
            /* We've created a NEW fufh of type fufh_type. open_count is 1. */
        }

    } else { /* good fufh */

        FUSE_OSAddAtomic(1, (SInt32 *)&fuse_fh_reuse_count);

        /* We're using an existing fufh of type fufh_type. */
    }

    if (err) {

         /* A more typical error case. */
         if ((err == ENOTCONN) || fuse_isdeadfs(vp)) {
             buf_seterror(bp, EIO);
             buf_biodone(bp);
             return EIO;
         }

         IOLog("MacFUSE: strategy failed to get fh "
               "(vtype=%d, fufh_type=%d, err=%d)\n", vtype, fufh_type, err);

         if (!vfs_issynchronous(mp)) {
             IOLog("MacFUSE: asynchronous write failed!\n");
         }

         buf_seterror(bp, EIO);
         buf_biodone(bp);
         return EIO;
    }

    if (!fufh) {
        panic("MacFUSE: tried everything but still no fufh");
        /* NOTREACHED */
    }

#define B_INVAL 0x00040000 /* Does not contain valid info. */
#define B_ERROR 0x00080000 /* I/O error occurred. */

    if (bflags & B_INVAL) {
        IOLog("MacFUSE: buffer does not contain valid information\n");
    } 

    if (bflags & B_ERROR) {
        IOLog("MacFUSE: an I/O error has occured\n");
    }

    if (buf_count(bp) == 0) {
        return 0;
    }

    fdisp_init(&fdi, 0);

    if (mode == FREAD) {

        struct fuse_read_in *fri;

        buf_setresid(bp, buf_count(bp));
        offset = (off_t)((off_t)buf_blkno(bp) * biosize);

        if (offset >= fvdat->filesize) {
            /* Trying to read at/after EOF? */           
            if (offset != fvdat->filesize) {
                /* Trying to read after EOF? */
                buf_seterror(bp, EINVAL);
            }
            buf_biodone(bp);
            return 0;
        }

        /* Note that we just made sure that offset < fvdat->filesize. */
        if ((offset + buf_count(bp)) > fvdat->filesize) {
            /* Trimming read */
            buf_setcount(bp, (uint32_t)(fvdat->filesize - offset));
        }

        if (buf_map(bp, &bufdat)) {
            IOLog("MacFUSE: failed to map buffer in strategy\n");
            return EFAULT;
        } else {
            mapped = TRUE;
        }

        while (buf_resid(bp) > 0) {

            chunksize = min((size_t)buf_resid(bp), data->iosize);

            fdi.iosize = sizeof(*fri);

            op = FUSE_READ;
            if (vtype == VDIR) {
                op = FUSE_READDIR;
            }
            fdisp_make_vp(&fdi, op, vp, (vfs_context_t)0);
        
            fri = fdi.indata;
            fri->fh = fufh->fh_id;

            /*
             * Historical note:
             *
             * fri->offset = ((off_t)(buf_blkno(bp))) * biosize;
             *
             * This wasn't being incremented!?
             */

            fri->offset = offset;
            fri->size = (typeof(fri->size))chunksize;
            fdi.tick->tk_aw_type = FT_A_BUF;
            fdi.tick->tk_aw_bufdata = bufdat;
        
            if ((err = fdisp_wait_answ(&fdi))) {
                /* There was a problem with reading. */
                goto out;
            }

            respsize = fdi.tick->tk_aw_bufsize;

            if (respsize < 0) { /* Cannot really happen... */
                err = EIO;
                goto out;
            }

            buf_setresid(bp, (uint32_t)(buf_resid(bp) - respsize));
            bufdat += respsize;
            offset += respsize;

            /* Did we hit EOF before being done? */
            if ((respsize == 0) && (buf_resid(bp) > 0)) {
                 /*
                  * Historical note:
                  * If we don't get enough data, just fill the rest with zeros.
                  * In NFS context, this would mean a hole in the file.
                  */

                 /* Zero-pad the incomplete buffer. */
                 bzero(bufdat, buf_resid(bp));
                 buf_setresid(bp, 0);
                 break;
            }
        } /* while (buf_resid(bp) > 0) */
    } else {
        /* write */
        struct fuse_write_in  *fwi;
        struct fuse_write_out *fwo;
        int merr = 0;
        off_t diff;

        if (buf_map(bp, &bufdat)) {
            IOLog("MacFUSE: failed to map buffer in strategy\n");
            return EFAULT;
        } else {
            mapped = TRUE;
        }

        /* Write begin */

        buf_setresid(bp, buf_count(bp));
        offset = (off_t)((off_t)buf_blkno(bp) * biosize);

        /* XXX: TBD -- Check here for extension (writing past end) */

        left = buf_count(bp);

        while (left) {

            fdi.iosize = sizeof(*fwi);
            op = FUSE_WRITE;

            fdisp_make_vp(&fdi, op, vp, (vfs_context_t)0);
            chunksize = min((size_t)left, data->iosize);

            fwi = fdi.indata;
            fwi->fh = fufh->fh_id;
            fwi->offset = offset;
            fwi->size = (typeof(fwi->size))chunksize;

            fdi.tick->tk_ms_type = FT_M_BUF;
            fdi.tick->tk_ms_bufdata = bufdat;
            fdi.tick->tk_ms_bufsize = chunksize;

            /* About to write <chunksize> at <offset> */

            if ((err = fdisp_wait_answ(&fdi))) {
                merr = 1;
                break;
            }
    
            fwo = fdi.answ;
            diff = chunksize - fwo->size;
            if (diff < 0) {
                err = EINVAL;
                break;
            }
    
            left -= fwo->size;
            bufdat += fwo->size;
            offset += fwo->size;
            buf_setresid(bp, buf_resid(bp) - fwo->size);
        }

        if (merr) {
            goto out;
        }
    }

    if (fdi.tick) {
        fuse_ticket_drop(fdi.tick);
    } else {
        /* No ticket upon leaving */
    }

out:

    if (err) {
        buf_seterror(bp, err);
    }

    if (mapped == TRUE) {
        buf_unmap(bp);
    }

    buf_biodone(bp);

    return err;
}    
Exemple #11
0
__private_extern__
errno_t
fuse_internal_strategy_buf(struct vnop_strategy_args *ap)
{
    int32_t   bflags;
    upl_t     bupl;
    daddr64_t blkno, lblkno;
    int       bmap_flags;
    buf_t     bp    = ap->a_bp;
    vnode_t   vp    = buf_vnode(bp);
    int       vtype = vnode_vtype(vp);

    struct fuse_data *data;

    if (!vp || vtype == VCHR || vtype == VBLK) {
        panic("MacFUSE: buf_strategy: b_vp == NULL || vtype == VCHR | VBLK\n");
    }

    bflags = buf_flags(bp);

    if (bflags & B_READ) {
        bmap_flags = VNODE_READ;
    } else {
        bmap_flags = VNODE_WRITE;
    }

    bupl = buf_upl(bp);
    blkno = buf_blkno(bp);
    lblkno = buf_lblkno(bp);

    if (!(bflags & B_CLUSTER)) {

        if (bupl) {
            return cluster_bp(bp);
        }

        if (blkno == lblkno) {
            off_t  f_offset;
            size_t contig_bytes;

            data = fuse_get_mpdata(vnode_mount(vp));

            // Still think this is a kludge?
            f_offset = lblkno * data->blocksize;
            blkno = f_offset / data->blocksize;

            buf_setblkno(bp, blkno);

            contig_bytes = buf_count(bp);

            if (blkno == -1) {
                buf_clear(bp);
            }
                        
            /*
             * Our "device" is always /all contiguous/. We don't wanna be
             * doing things like:
             *
             * ...
             *     else if ((long)contig_bytes < buf_count(bp)) {
             *         ret = buf_strategy_fragmented(devvp, bp, f_offset,
             *                                       contig_bytes));
             *         return ret;
             *      }
             */
        }

        if (blkno == -1) {
            buf_biodone(bp);
            return 0;
        }
    }

    // Issue the I/O

    return fuse_internal_strategy(vp, bp);
}