static int vn_readwrite_io(struct vn_softc * vn, struct buf * bp, vfs_context_t ctx) { int error = 0; char * iov_base; caddr_t vaddr; if (buf_map(bp, &vaddr)) panic("vn device: buf_map failed"); iov_base = (char *)vaddr; if (vn->sc_shadow_vp == NULL) { user_ssize_t temp_resid; error = file_io(vn->sc_vp, ctx, buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE, iov_base, (off_t)buf_blkno(bp) * vn->sc_secsize, buf_resid(bp), &temp_resid); buf_setresid(bp, temp_resid); } else { if (buf_flags(bp) & B_READ) error = shadow_read(vn, bp, iov_base, ctx); else error = shadow_write(vn, bp, iov_base, ctx); } buf_unmap(bp); return (error); }
static int vnop_strategy_9p(struct vnop_strategy_args *ap) { mount_t mp; struct buf *bp; node_9p *np; caddr_t addr; uio_t uio; int e, flags; TRACE(); bp = ap->a_bp; np = NTO9P(buf_vnode(bp)); flags = buf_flags(bp); uio = NULL; addr = NULL; mp = vnode_mount(buf_vnode(bp)); if (mp == NULL) return ENXIO; if ((e=buf_map(bp, &addr))) goto error; uio = uio_create(1, buf_blkno(bp) * vfs_statfs(mp)->f_bsize, UIO_SYSSPACE, ISSET(flags, B_READ)? UIO_READ: UIO_WRITE); if (uio == NULL) { e = ENOMEM; goto error; } uio_addiov(uio, CAST_USER_ADDR_T(addr), buf_count(bp)); if (ISSET(flags, B_READ)) { if((e=nread_9p(np, uio))) goto error; /* zero the rest of the page if we reached EOF */ if (uio_resid(uio) > 0) { bzero(addr+buf_count(bp)-uio_resid(uio), uio_resid(uio)); uio_update(uio, uio_resid(uio)); } } else { if ((e=nwrite_9p(np, uio))) goto error; } buf_setresid(bp, uio_resid(uio)); error: if (uio) uio_free(uio); if (addr) buf_unmap(bp); buf_seterror(bp, e); buf_biodone(bp); return e; }
static int shadow_write(struct vn_softc * vn, struct buf * bp, char * base, vfs_context_t ctx) { u_int32_t blocksize = vn->sc_secsize; int error = 0; u_int32_t offset; boolean_t shadow_grew; u_int32_t resid; u_int32_t start = 0; offset = buf_blkno(bp); resid = buf_resid(bp) / blocksize; while (resid > 0) { user_ssize_t temp_resid; u_int32_t this_offset; u_int32_t this_resid; shadow_grew = shadow_map_write(vn->sc_shadow_map, offset, resid, &this_offset, &this_resid); if (shadow_grew) { #if 0 off_t size; /* truncate the file to its new length before write */ size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) * blocksize; vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx); #endif } error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE, base + start, (off_t)this_offset * blocksize, (user_ssize_t)this_resid * blocksize, &temp_resid); if (error) { break; } this_resid -= (temp_resid / blocksize); if (this_resid == 0) { printf("vn device: shadow_write zero length write\n"); break; } resid -= this_resid; offset += this_resid; start += this_resid * blocksize; } buf_setresid(bp, resid * blocksize); return (error); }
static int shadow_read(struct vn_softc * vn, struct buf * bp, char * base, vfs_context_t ctx) { u_int32_t blocksize = vn->sc_secsize; int error = 0; u_int32_t offset; boolean_t read_shadow; u_int32_t resid; u_int32_t start = 0; offset = buf_blkno(bp); resid = buf_resid(bp) / blocksize; while (resid > 0) { user_ssize_t temp_resid; u_int32_t this_offset; u_int32_t this_resid; struct vnode * vp; read_shadow = shadow_map_read(vn->sc_shadow_map, offset, resid, &this_offset, &this_resid); if (read_shadow) { vp = vn->sc_shadow_vp; } else { vp = vn->sc_vp; } error = file_io(vp, ctx, UIO_READ, base + start, (off_t)this_offset * blocksize, (user_ssize_t)this_resid * blocksize, &temp_resid); if (error) { break; } this_resid -= (temp_resid / blocksize); if (this_resid == 0) { printf("vn device: shadow_read zero length read\n"); break; } resid -= this_resid; offset += this_resid; start += this_resid * blocksize; } buf_setresid(bp, resid * blocksize); return (error); }
static void mdevstrategy(struct buf *bp) { unsigned int left, lop, csize; vm_offset_t vaddr, blkoff; int devid; addr64_t paddr, fvaddr; ppnum_t pp; devid = minor(buf_device(bp)); /* Get minor device number */ if ((mdev[devid].mdFlags & mdInited) == 0) { /* Have we actually been defined yet? */ buf_seterror(bp, ENXIO); buf_biodone(bp); return; } buf_setresid(bp, buf_count(bp)); /* Set byte count */ blkoff = buf_blkno(bp) * mdev[devid].mdSecsize; /* Get offset into file */ /* * Note that reading past end is an error, but reading at end is an EOF. For these * we just return with resid == count. */ if (blkoff >= (mdev[devid].mdSize << 12)) { /* Are they trying to read/write at/after end? */ if(blkoff != (mdev[devid].mdSize << 12)) { /* Are we trying to read after EOF? */ buf_seterror(bp, EINVAL); /* Yeah, this is an error */ } buf_biodone(bp); /* Return */ return; } if ((blkoff + buf_count(bp)) > (mdev[devid].mdSize << 12)) { /* Will this read go past end? */ buf_setcount(bp, ((mdev[devid].mdSize << 12) - blkoff)); /* Yes, trim to max */ } /* * make sure the buffer's data area is * accessible */ if (buf_map(bp, (caddr_t *)&vaddr)) panic("ramstrategy: buf_map failed\n"); fvaddr = (mdev[devid].mdBase << 12) + blkoff; /* Point to offset into ram disk */ if (buf_flags(bp) & B_READ) { /* Is this a read? */ if(!(mdev[devid].mdFlags & mdPhys)) { /* Physical mapped disk? */ bcopy((void *)((uintptr_t)fvaddr), (void *)vaddr, (size_t)buf_count(bp)); /* This is virtual, just get the data */ } else { left = buf_count(bp); /* Init the amount left to copy */ while(left) { /* Go until it is all copied */ lop = min((4096 - (vaddr & 4095)), (4096 - (fvaddr & 4095))); /* Get smallest amount left on sink and source */ csize = min(lop, left); /* Don't move more than we need to */ pp = pmap_find_phys(kernel_pmap, (addr64_t)((uintptr_t)vaddr)); /* Get the sink physical address */ if(!pp) { /* Not found, what gives? */ panic("mdevstrategy: sink address %016llX not mapped\n", (addr64_t)((uintptr_t)vaddr)); } paddr = (addr64_t)(((addr64_t)pp << 12) | (addr64_t)(vaddr & 4095)); /* Get actual address */ bcopy_phys(fvaddr, paddr, csize); /* Copy this on in */ mapping_set_mod(paddr >> 12); /* Make sure we know that it is modified */ left = left - csize; /* Calculate what is left */ vaddr = vaddr + csize; /* Move to next sink address */ fvaddr = fvaddr + csize; /* Bump to next physical address */ } } } else { /* This is a write */ if(!(mdev[devid].mdFlags & mdPhys)) { /* Physical mapped disk? */
static void vnstrategy(struct buf *bp) { struct vn_softc *vn; int error = 0; long sz; /* in sc_secsize chunks */ daddr64_t blk_num; struct vnode * shadow_vp = NULL; struct vnode * vp = NULL; struct vfs_context context; vn = vn_table + vnunit(buf_device(bp)); if ((vn->sc_flags & VNF_INITED) == 0) { error = ENXIO; goto done; } context.vc_thread = current_thread(); context.vc_ucred = vn->sc_cred; buf_setresid(bp, buf_count(bp)); /* * Check for required alignment. Transfers must be a valid * multiple of the sector size. */ blk_num = buf_blkno(bp); if (buf_count(bp) % vn->sc_secsize != 0) { error = EINVAL; goto done; } sz = howmany(buf_count(bp), vn->sc_secsize); /* * If out of bounds return an error. If at the EOF point, * simply read or write less. */ if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) { if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) { error = EINVAL; } goto done; } /* * If the request crosses EOF, truncate the request. */ if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) { buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize); buf_setresid(bp, buf_count(bp)); } vp = vn->sc_vp; if (vp == NULL) { error = ENXIO; goto done; } error = vnode_getwithvid(vp, vn->sc_vid); if (error != 0) { /* the vnode is no longer available, abort */ error = ENXIO; vnclear(vn, &context); goto done; } shadow_vp = vn->sc_shadow_vp; if (shadow_vp != NULL) { error = vnode_getwithvid(shadow_vp, vn->sc_shadow_vid); if (error != 0) { /* the vnode is no longer available, abort */ error = ENXIO; vnode_put(vn->sc_vp); vnclear(vn, &context); goto done; } } error = vn_readwrite_io(vn, bp, &context); vnode_put(vp); if (shadow_vp != NULL) { vnode_put(shadow_vp); } done: if (error) { buf_seterror(bp, error); } buf_biodone(bp); return; }
/* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and iput * instead of two iputs. * * Overall outline of ufs_lookup: * * check accessibility of directory * look for name in cache, if found, then if at end of path * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache * * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked. */ int cd9660_lookup(struct vnop_lookup_args *ap) { register struct vnode *vdp; /* vnode for directory being searched */ register struct iso_node *dp; /* inode for directory being searched */ register struct iso_mnt *imp; /* file system that directory is in */ struct buf *bp; /* a buffer of directory entries */ struct iso_directory_record *ep = NULL;/* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ int saveoffset = 0; /* offset of last directory entry in dir */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by cd9660_vget_internal */ u_long bmask; /* block offset mask */ int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int wantassoc; int error; ino_t ino = 0; int reclen; u_short namelen; int isoflags; char altname[ISO_RRIP_NAMEMAX]; int res; int len; char *name; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; vfs_context_t ctx = cnp->cn_context; size_t altlen; bp = NULL; *vpp = NULL; vdp = ap->a_dvp; dp = VTOI(vdp); imp = dp->i_mnt; lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); wantassoc = 0; /* * We now have a segment name to search for, and a directory to search. * * Before tediously performing a linear scan of the directory, * check the name cache to see if the directory/name pair * we are looking for is known already. */ if ((error = cache_lookup(vdp, vpp, cnp))) { if (error == ENOENT) return (error); return (0); } len = cnp->cn_namelen; name = cnp->cn_nameptr; altname[0] = '\0'; /* * A "._" prefix means, we are looking for an associated file */ if (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR1 && *(name+1) == ASSOCCHAR2) { wantassoc = 1; len -= 2; name += 2; } /* * Decode search name into UCS-2 (Unicode) */ if ((imp->iso_ftype == ISO_FTYPE_JOLIET) && !((len == 1 && *name == '.') || (flags & ISDOTDOT))) { int flags1 = UTF_PRECOMPOSED; (void) utf8_decodestr(name, len, (u_int16_t*) altname, &altlen, sizeof(altname), 0, flags1); name = altname; len = altlen; } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = imp->im_sector_size - 1; if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) { entryoffsetinblock = 0; dp->i_offset = 0; numdirpasses = 1; } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && (error = cd9660_blkatoff(vdp, SECTOFF(imp, dp->i_offset), NULL, &bp))) return (error); numdirpasses = 2; iso_nchstats.ncs_2passes++; } endsearch = dp->i_size; searchloop: while (dp->i_offset < endsearch) { /* * If offset is on a block boundary, * read the next directory block. * Release previous if it exists. */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) buf_brelse(bp); if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp,dp->i_offset), NULL, &bp)) ) return (error); entryoffsetinblock = 0; } /* * Get pointer to next entry. */ ep = (struct iso_directory_record *) ((char *)0 + buf_dataptr(bp) + entryoffsetinblock); reclen = isonum_711(ep->length); if (reclen == 0) { /* skip to next block, if any */ dp->i_offset = (dp->i_offset & ~bmask) + imp->im_sector_size; continue; } if (reclen < ISO_DIRECTORY_RECORD_SIZE) { /* illegal entry, stop */ break; } if (entryoffsetinblock + reclen > imp->im_sector_size) { /* entries are not allowed to cross sector boundaries */ break; } namelen = isonum_711(ep->name_len); isoflags = isonum_711(ep->flags); if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen) /* illegal entry, stop */ break; /* * Check for a name match. */ if (imp->iso_ftype == ISO_FTYPE_RRIP) { if (isoflags & directoryBit) ino = isodirino(ep, imp); else ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; dp->i_ino = ino; cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp); if (namelen == cnp->cn_namelen && !bcmp(name,altname,namelen)) goto found; ino = 0; } else { if ((!(isoflags & associatedBit)) == !wantassoc) { if ((len == 1 && *name == '.') || (flags & ISDOTDOT)) { if (namelen == 1 && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) { /* * Save directory entry's inode number and * release directory buffer. */ dp->i_ino = isodirino(ep, imp); goto found; } if (namelen != 1 || ep->name[0] != 0) goto notfound; } else if (imp->iso_ftype != ISO_FTYPE_JOLIET && !(res = isofncmp(name, len, ep->name, namelen))) { if ( isoflags & directoryBit ) ino = isodirino(ep, imp); else ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; saveoffset = dp->i_offset; } else if (imp->iso_ftype == ISO_FTYPE_JOLIET && !(res = ucsfncmp((u_int16_t*)name, len, (u_int16_t*) ep->name, namelen))) { if ( isoflags & directoryBit ) ino = isodirino(ep, imp); else ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; saveoffset = dp->i_offset; } else if (ino) goto foundino; #ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */ else if (res < 0) goto notfound; else if (res > 0 && numdirpasses == 2) numdirpasses++; #endif } } dp->i_offset += reclen; entryoffsetinblock += reclen; } /* endwhile */ if (ino) { foundino: dp->i_ino = ino; if (saveoffset != dp->i_offset) { if (lblkno(imp, dp->i_offset) != lblkno(imp, saveoffset)) { if (bp != NULL) buf_brelse(bp); if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, saveoffset), NULL, &bp)) ) return (error); } entryoffsetinblock = saveoffset & bmask; ep = (struct iso_directory_record *) ((char *)0 + buf_dataptr(bp) + entryoffsetinblock); dp->i_offset = saveoffset; } goto found; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; dp->i_offset = 0; endsearch = dp->i_diroff; goto searchloop; } if (bp != NULL) buf_brelse(bp); /* * Insert name into cache (as non-existent) if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (ENOENT); found: if (numdirpasses == 2) iso_nchstats.ncs_pass2++; /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = dp->i_offset; /* * Step through the translation in the name. We do not `iput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the `iget' for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; /* * If ino is different from dp->i_ino, * it's a relocated directory. */ if (flags & ISDOTDOT) { error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, NULL, NULL, dp->i_ino != ino, ep, vfs_context_proc(ctx)); VTOI(tdp)->i_parent = VTOI(pdp)->i_number; buf_brelse(bp); *vpp = tdp; } else if (dp->i_number == dp->i_ino) { buf_brelse(bp); vnode_get(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, vdp, cnp, dp->i_ino != ino, ep, vfs_context_proc(ctx)); /* save parent inode number */ VTOI(tdp)->i_parent = VTOI(pdp)->i_number; buf_brelse(bp); if (error) return (error); *vpp = tdp; } return (0); }
int spec_strategy(struct vnop_strategy_args *ap) { buf_t bp; int bflags; int policy; dev_t bdev; uthread_t ut; size_t devbsdunit; mount_t mp; bp = ap->a_bp; bdev = buf_device(bp); bflags = buf_flags(bp); mp = buf_vnode(bp)->v_mount; if (kdebug_enable) { int code = 0; if (bflags & B_READ) code |= DKIO_READ; if (bflags & B_ASYNC) code |= DKIO_ASYNC; if (bflags & B_META) code |= DKIO_META; else if (bflags & B_PAGEIO) code |= DKIO_PAGING; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); } if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && mp && (mp->mnt_kern_flag & MNTK_ROOTDEV)) hard_throttle_on_root = 1; if (mp != NULL) devbsdunit = mp->mnt_devbsdunit; else devbsdunit = LOWPRI_MAX_NUM_DEV - 1; throttle_info_update(&_throttle_io_info[devbsdunit], bflags); if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) { bp->b_flags |= B_THROTTLED_IO; } if ((bflags & B_READ) == 0) { microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp); if (mp) { INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size); } } else if (mp) { INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size); } (*bdevsw[major(bdev)].d_strategy)(bp); return (0); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ ffs_balloc( register struct inode *ip, register ufs_daddr_t lbn, int size, kauth_cred_t cred, struct buf **bpp, int flags, int * blk_alloc) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; int alloc_buffer = 1; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_fs; if (flags & B_NOBUFF) alloc_buffer = 0; if (blk_alloc) *blk_alloc = 0; /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* the filesize prior to this write can fit in direct * blocks (ie. fragmentaion is possibly done) * we are now extending the file write beyond * the block which has end of file prior to this write */ osize = blksize(fs, ip, nb); /* osize gives disk allocated size in the last block. It is * either in fragments or a file system block size */ if (osize < fs->fs_bsize && osize > 0) { /* few fragments are already allocated,since the * current extends beyond this block * allocate the complete block as fragments are only * in last block */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); /* adjust the inode size we just grew */ /* it is in nb+1 as nb starts from 0 */ ip->i_size = (nb + 1) * fs->fs_bsize; ubc_setsize(vp, (off_t)ip->i_size); ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; if ((flags & B_SYNC) || (!alloc_buffer)) { if (!alloc_buffer) buf_setflags(bp, B_NOCACHE); buf_bwrite(bp); } else buf_bdwrite(bp); /* note that bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } *bpp = bp; } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } else { error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* adjust the inode size we just grew */ ip->i_size = (lbn * fs->fs_bsize) + size; ubc_setsize(vp, (off_t)ip->i_size); if (!alloc_buffer) { buf_setflags(bp, B_NOCACHE); if (flags & B_SYNC) buf_bwrite(bp); else buf_bdwrite(bp); } else *bpp = bp; return (0); } } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); if (alloc_buffer) { bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb))); if (flags & B_CLRBUF) buf_clear(bp); } ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (blk_alloc) { *blk_alloc = nsize; } if (alloc_buffer) *bpp = bp; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); #if DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if ((error = buf_bwrite(bp)) != 0) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if ((flags & B_SYNC)) { buf_bwrite(bp); } else { buf_bdwrite(bp); } if(alloc_buffer ) { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); if (flags & B_CLRBUF) buf_clear(nbp); } if (blk_alloc) { *blk_alloc = fs->fs_bsize; } if(alloc_buffer) *bpp = nbp; return (0); } buf_brelse(bp); if (alloc_buffer) { if (flags & B_CLRBUF) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp); if (error) { buf_brelse(nbp); goto fail; } } else { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
__private_extern__ int fuse_internal_strategy(vnode_t vp, buf_t bp) { size_t biosize; size_t chunksize; size_t respsize; int mapped = FALSE; int mode; int op; int vtype = vnode_vtype(vp); int err = 0; caddr_t bufdat; off_t left; off_t offset; int32_t bflags = buf_flags(bp); fufh_type_t fufh_type; struct fuse_dispatcher fdi; struct fuse_data *data; struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_filehandle *fufh = NULL; mount_t mp = vnode_mount(vp); data = fuse_get_mpdata(mp); biosize = data->blocksize; if (!(vtype == VREG || vtype == VDIR)) { return ENOTSUP; } if (bflags & B_READ) { mode = FREAD; fufh_type = FUFH_RDONLY; /* FUFH_RDWR will also do */ } else { mode = FWRITE; fufh_type = FUFH_WRONLY; /* FUFH_RDWR will also do */ } if (fvdat->flag & FN_CREATING) { fuse_lck_mtx_lock(fvdat->createlock); if (fvdat->flag & FN_CREATING) { (void)fuse_msleep(fvdat->creator, fvdat->createlock, PDROP | PINOD | PCATCH, "fuse_internal_strategy", NULL); } else { fuse_lck_mtx_unlock(fvdat->createlock); } } fufh = &(fvdat->fufh[fufh_type]); if (!FUFH_IS_VALID(fufh)) { fufh_type = FUFH_RDWR; fufh = &(fvdat->fufh[fufh_type]); if (!FUFH_IS_VALID(fufh)) { fufh = NULL; } else { /* We've successfully fallen back to FUFH_RDWR. */ } } if (!fufh) { if (mode == FREAD) { fufh_type = FUFH_RDONLY; } else { fufh_type = FUFH_RDWR; } /* * Lets NOT do the filehandle preflight check here. */ err = fuse_filehandle_get(vp, NULL, fufh_type, 0 /* mode */); if (!err) { fufh = &(fvdat->fufh[fufh_type]); FUFH_AUX_INC(fufh); /* We've created a NEW fufh of type fufh_type. open_count is 1. */ } } else { /* good fufh */ FUSE_OSAddAtomic(1, (SInt32 *)&fuse_fh_reuse_count); /* We're using an existing fufh of type fufh_type. */ } if (err) { /* A more typical error case. */ if ((err == ENOTCONN) || fuse_isdeadfs(vp)) { buf_seterror(bp, EIO); buf_biodone(bp); return EIO; } IOLog("MacFUSE: strategy failed to get fh " "(vtype=%d, fufh_type=%d, err=%d)\n", vtype, fufh_type, err); if (!vfs_issynchronous(mp)) { IOLog("MacFUSE: asynchronous write failed!\n"); } buf_seterror(bp, EIO); buf_biodone(bp); return EIO; } if (!fufh) { panic("MacFUSE: tried everything but still no fufh"); /* NOTREACHED */ } #define B_INVAL 0x00040000 /* Does not contain valid info. */ #define B_ERROR 0x00080000 /* I/O error occurred. */ if (bflags & B_INVAL) { IOLog("MacFUSE: buffer does not contain valid information\n"); } if (bflags & B_ERROR) { IOLog("MacFUSE: an I/O error has occured\n"); } if (buf_count(bp) == 0) { return 0; } fdisp_init(&fdi, 0); if (mode == FREAD) { struct fuse_read_in *fri; buf_setresid(bp, buf_count(bp)); offset = (off_t)((off_t)buf_blkno(bp) * biosize); if (offset >= fvdat->filesize) { /* Trying to read at/after EOF? */ if (offset != fvdat->filesize) { /* Trying to read after EOF? */ buf_seterror(bp, EINVAL); } buf_biodone(bp); return 0; } /* Note that we just made sure that offset < fvdat->filesize. */ if ((offset + buf_count(bp)) > fvdat->filesize) { /* Trimming read */ buf_setcount(bp, (uint32_t)(fvdat->filesize - offset)); } if (buf_map(bp, &bufdat)) { IOLog("MacFUSE: failed to map buffer in strategy\n"); return EFAULT; } else { mapped = TRUE; } while (buf_resid(bp) > 0) { chunksize = min((size_t)buf_resid(bp), data->iosize); fdi.iosize = sizeof(*fri); op = FUSE_READ; if (vtype == VDIR) { op = FUSE_READDIR; } fdisp_make_vp(&fdi, op, vp, (vfs_context_t)0); fri = fdi.indata; fri->fh = fufh->fh_id; /* * Historical note: * * fri->offset = ((off_t)(buf_blkno(bp))) * biosize; * * This wasn't being incremented!? */ fri->offset = offset; fri->size = (typeof(fri->size))chunksize; fdi.tick->tk_aw_type = FT_A_BUF; fdi.tick->tk_aw_bufdata = bufdat; if ((err = fdisp_wait_answ(&fdi))) { /* There was a problem with reading. */ goto out; } respsize = fdi.tick->tk_aw_bufsize; if (respsize < 0) { /* Cannot really happen... */ err = EIO; goto out; } buf_setresid(bp, (uint32_t)(buf_resid(bp) - respsize)); bufdat += respsize; offset += respsize; /* Did we hit EOF before being done? */ if ((respsize == 0) && (buf_resid(bp) > 0)) { /* * Historical note: * If we don't get enough data, just fill the rest with zeros. * In NFS context, this would mean a hole in the file. */ /* Zero-pad the incomplete buffer. */ bzero(bufdat, buf_resid(bp)); buf_setresid(bp, 0); break; } } /* while (buf_resid(bp) > 0) */ } else { /* write */ struct fuse_write_in *fwi; struct fuse_write_out *fwo; int merr = 0; off_t diff; if (buf_map(bp, &bufdat)) { IOLog("MacFUSE: failed to map buffer in strategy\n"); return EFAULT; } else { mapped = TRUE; } /* Write begin */ buf_setresid(bp, buf_count(bp)); offset = (off_t)((off_t)buf_blkno(bp) * biosize); /* XXX: TBD -- Check here for extension (writing past end) */ left = buf_count(bp); while (left) { fdi.iosize = sizeof(*fwi); op = FUSE_WRITE; fdisp_make_vp(&fdi, op, vp, (vfs_context_t)0); chunksize = min((size_t)left, data->iosize); fwi = fdi.indata; fwi->fh = fufh->fh_id; fwi->offset = offset; fwi->size = (typeof(fwi->size))chunksize; fdi.tick->tk_ms_type = FT_M_BUF; fdi.tick->tk_ms_bufdata = bufdat; fdi.tick->tk_ms_bufsize = chunksize; /* About to write <chunksize> at <offset> */ if ((err = fdisp_wait_answ(&fdi))) { merr = 1; break; } fwo = fdi.answ; diff = chunksize - fwo->size; if (diff < 0) { err = EINVAL; break; } left -= fwo->size; bufdat += fwo->size; offset += fwo->size; buf_setresid(bp, buf_resid(bp) - fwo->size); } if (merr) { goto out; } } if (fdi.tick) { fuse_ticket_drop(fdi.tick); } else { /* No ticket upon leaving */ } out: if (err) { buf_seterror(bp, err); } if (mapped == TRUE) { buf_unmap(bp); } buf_biodone(bp); return err; }
__private_extern__ errno_t fuse_internal_strategy_buf(struct vnop_strategy_args *ap) { int32_t bflags; upl_t bupl; daddr64_t blkno, lblkno; int bmap_flags; buf_t bp = ap->a_bp; vnode_t vp = buf_vnode(bp); int vtype = vnode_vtype(vp); struct fuse_data *data; if (!vp || vtype == VCHR || vtype == VBLK) { panic("MacFUSE: buf_strategy: b_vp == NULL || vtype == VCHR | VBLK\n"); } bflags = buf_flags(bp); if (bflags & B_READ) { bmap_flags = VNODE_READ; } else { bmap_flags = VNODE_WRITE; } bupl = buf_upl(bp); blkno = buf_blkno(bp); lblkno = buf_lblkno(bp); if (!(bflags & B_CLUSTER)) { if (bupl) { return cluster_bp(bp); } if (blkno == lblkno) { off_t f_offset; size_t contig_bytes; data = fuse_get_mpdata(vnode_mount(vp)); // Still think this is a kludge? f_offset = lblkno * data->blocksize; blkno = f_offset / data->blocksize; buf_setblkno(bp, blkno); contig_bytes = buf_count(bp); if (blkno == -1) { buf_clear(bp); } /* * Our "device" is always /all contiguous/. We don't wanna be * doing things like: * * ... * else if ((long)contig_bytes < buf_count(bp)) { * ret = buf_strategy_fragmented(devvp, bp, f_offset, * contig_bytes)); * return ret; * } */ } if (blkno == -1) { buf_biodone(bp); return 0; } } // Issue the I/O return fuse_internal_strategy(vp, bp); }