/* * Return buffer with the contents of block "offset" from the beginning of * directory "ip". If "res" is non-zero, fill it in with a pointer to the * remaining space in the directory. */ int cd9660_blkatoff(vnode_t vp, off_t offset, char **res, buf_t *bpp) { struct iso_node *ip; register struct iso_mnt *imp; buf_t bp; daddr_t lbn; int bsize, error; ip = VTOI(vp); imp = ip->i_mnt; lbn = lblkno(imp, offset); bsize = blksize(imp, ip, lbn); if ((bsize != imp->im_sector_size) && (offset & (imp->im_sector_size - 1)) == 0) { bsize = imp->im_sector_size; } if ( (error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), bsize, NOCRED, &bp)) ) { buf_brelse(bp); *bpp = NULL; return (error); } if (res) *res = (char *)0 + buf_dataptr(bp) + blkoff(imp, offset); *bpp = bp; return (0); }
static int ReadMultipleNodes( BTScanState *theScanStatePtr ) { int myErr = E_NONE; BTreeControlBlockPtr myBTreeCBPtr; daddr64_t myPhyBlockNum; u_int32_t myBufferSize; struct vnode * myDevPtr; unsigned int myBlockRun; u_int32_t myBlocksInBufferCount; // release old buffer if we have one if ( theScanStatePtr->bufferPtr != NULL ) { buf_markinvalid(theScanStatePtr->bufferPtr); buf_brelse( theScanStatePtr->bufferPtr ); theScanStatePtr->bufferPtr = NULL; theScanStatePtr->currentNodePtr = NULL; } myBTreeCBPtr = theScanStatePtr->btcb; // map logical block in catalog btree file to physical block on volume myErr = hfs_bmap(myBTreeCBPtr->fileRefNum, theScanStatePtr->nodeNum, &myDevPtr, &myPhyBlockNum, &myBlockRun); if ( myErr != E_NONE ) { goto ExitThisRoutine; } // bmap block run gives us the remaining number of valid blocks (number of blocks // minus the first). so if there are 10 valid blocks our run number will be 9. // blocks, in our case is the same as nodes (both are 4K) myBlocksInBufferCount = (theScanStatePtr->bufferSize / myBTreeCBPtr->nodeSize ); myBufferSize = theScanStatePtr->bufferSize; if ( (myBlockRun + 1) < myBlocksInBufferCount ) { myBufferSize = (myBlockRun + 1) * myBTreeCBPtr->nodeSize; } // now read blocks from the device myErr = (int)buf_bread(myDevPtr, myPhyBlockNum, myBufferSize, NOCRED, &theScanStatePtr->bufferPtr ); if ( myErr != E_NONE ) { goto ExitThisRoutine; } theScanStatePtr->nodesLeftInBuffer = buf_count(theScanStatePtr->bufferPtr) / theScanStatePtr->btcb->nodeSize; theScanStatePtr->currentNodePtr = (BTNodeDescriptor *) buf_dataptr(theScanStatePtr->bufferPtr); ExitThisRoutine: return myErr; } /* ReadMultipleNodes */
/* ;_______________________________________________________________________ ; ; Routine: ReadBitmapBlock ; ; Function: Read in a bitmap block corresponding to a given allocation ; block (bit). Return a pointer to the bitmap block. ; ; Inputs: ; vcb -- Pointer to ExtendedVCB ; bit -- Allocation block whose bitmap block is desired ; ; Outputs: ; buffer -- Pointer to bitmap block corresonding to "block" ; blockRef ;_______________________________________________________________________ */ static OSErr ReadBitmapBlock( ExtendedVCB *vcb, u_int32_t bit, u_int32_t **buffer, u_int32_t *blockRef) { OSErr err; struct buf *bp = NULL; struct vnode *vp = NULL; daddr64_t block; u_int32_t blockSize; /* * volume bitmap blocks are protected by the allocation file lock */ REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); blockSize = (u_int32_t)vcb->vcbVBMIOSize; block = (daddr64_t)(bit / (blockSize * kBitsPerByte)); if (vcb->vcbSigWord == kHFSPlusSigWord) { vp = vcb->hfs_allocation_vp; /* use allocation file vnode */ } else /* hfs */ { vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */ block += vcb->vcbVBMSt; /* map to physical block */ } err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp); if (bp) { if (err) { buf_brelse(bp); *blockRef = 0; *buffer = NULL; } else { *blockRef = (u_int32_t)bp; *buffer = (u_int32_t *)buf_dataptr(bp); } } return err; }
/* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and iput * instead of two iputs. * * Overall outline of ufs_lookup: * * check accessibility of directory * look for name in cache, if found, then if at end of path * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache * * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked. */ int cd9660_lookup(struct vnop_lookup_args *ap) { register struct vnode *vdp; /* vnode for directory being searched */ register struct iso_node *dp; /* inode for directory being searched */ register struct iso_mnt *imp; /* file system that directory is in */ struct buf *bp; /* a buffer of directory entries */ struct iso_directory_record *ep = NULL;/* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ int saveoffset = 0; /* offset of last directory entry in dir */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by cd9660_vget_internal */ u_long bmask; /* block offset mask */ int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int wantassoc; int error; ino_t ino = 0; int reclen; u_short namelen; int isoflags; char altname[ISO_RRIP_NAMEMAX]; int res; int len; char *name; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; vfs_context_t ctx = cnp->cn_context; size_t altlen; bp = NULL; *vpp = NULL; vdp = ap->a_dvp; dp = VTOI(vdp); imp = dp->i_mnt; lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); wantassoc = 0; /* * We now have a segment name to search for, and a directory to search. * * Before tediously performing a linear scan of the directory, * check the name cache to see if the directory/name pair * we are looking for is known already. */ if ((error = cache_lookup(vdp, vpp, cnp))) { if (error == ENOENT) return (error); return (0); } len = cnp->cn_namelen; name = cnp->cn_nameptr; altname[0] = '\0'; /* * A "._" prefix means, we are looking for an associated file */ if (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR1 && *(name+1) == ASSOCCHAR2) { wantassoc = 1; len -= 2; name += 2; } /* * Decode search name into UCS-2 (Unicode) */ if ((imp->iso_ftype == ISO_FTYPE_JOLIET) && !((len == 1 && *name == '.') || (flags & ISDOTDOT))) { int flags1 = UTF_PRECOMPOSED; (void) utf8_decodestr(name, len, (u_int16_t*) altname, &altlen, sizeof(altname), 0, flags1); name = altname; len = altlen; } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = imp->im_sector_size - 1; if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) { entryoffsetinblock = 0; dp->i_offset = 0; numdirpasses = 1; } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && (error = cd9660_blkatoff(vdp, SECTOFF(imp, dp->i_offset), NULL, &bp))) return (error); numdirpasses = 2; iso_nchstats.ncs_2passes++; } endsearch = dp->i_size; searchloop: while (dp->i_offset < endsearch) { /* * If offset is on a block boundary, * read the next directory block. * Release previous if it exists. */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) buf_brelse(bp); if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp,dp->i_offset), NULL, &bp)) ) return (error); entryoffsetinblock = 0; } /* * Get pointer to next entry. */ ep = (struct iso_directory_record *) ((char *)0 + buf_dataptr(bp) + entryoffsetinblock); reclen = isonum_711(ep->length); if (reclen == 0) { /* skip to next block, if any */ dp->i_offset = (dp->i_offset & ~bmask) + imp->im_sector_size; continue; } if (reclen < ISO_DIRECTORY_RECORD_SIZE) { /* illegal entry, stop */ break; } if (entryoffsetinblock + reclen > imp->im_sector_size) { /* entries are not allowed to cross sector boundaries */ break; } namelen = isonum_711(ep->name_len); isoflags = isonum_711(ep->flags); if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen) /* illegal entry, stop */ break; /* * Check for a name match. */ if (imp->iso_ftype == ISO_FTYPE_RRIP) { if (isoflags & directoryBit) ino = isodirino(ep, imp); else ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; dp->i_ino = ino; cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp); if (namelen == cnp->cn_namelen && !bcmp(name,altname,namelen)) goto found; ino = 0; } else { if ((!(isoflags & associatedBit)) == !wantassoc) { if ((len == 1 && *name == '.') || (flags & ISDOTDOT)) { if (namelen == 1 && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) { /* * Save directory entry's inode number and * release directory buffer. */ dp->i_ino = isodirino(ep, imp); goto found; } if (namelen != 1 || ep->name[0] != 0) goto notfound; } else if (imp->iso_ftype != ISO_FTYPE_JOLIET && !(res = isofncmp(name, len, ep->name, namelen))) { if ( isoflags & directoryBit ) ino = isodirino(ep, imp); else ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; saveoffset = dp->i_offset; } else if (imp->iso_ftype == ISO_FTYPE_JOLIET && !(res = ucsfncmp((u_int16_t*)name, len, (u_int16_t*) ep->name, namelen))) { if ( isoflags & directoryBit ) ino = isodirino(ep, imp); else ino = ((daddr_t)buf_blkno(bp) << imp->im_bshift) + entryoffsetinblock; saveoffset = dp->i_offset; } else if (ino) goto foundino; #ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */ else if (res < 0) goto notfound; else if (res > 0 && numdirpasses == 2) numdirpasses++; #endif } } dp->i_offset += reclen; entryoffsetinblock += reclen; } /* endwhile */ if (ino) { foundino: dp->i_ino = ino; if (saveoffset != dp->i_offset) { if (lblkno(imp, dp->i_offset) != lblkno(imp, saveoffset)) { if (bp != NULL) buf_brelse(bp); if ( (error = cd9660_blkatoff(vdp, SECTOFF(imp, saveoffset), NULL, &bp)) ) return (error); } entryoffsetinblock = saveoffset & bmask; ep = (struct iso_directory_record *) ((char *)0 + buf_dataptr(bp) + entryoffsetinblock); dp->i_offset = saveoffset; } goto found; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; dp->i_offset = 0; endsearch = dp->i_diroff; goto searchloop; } if (bp != NULL) buf_brelse(bp); /* * Insert name into cache (as non-existent) if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (ENOENT); found: if (numdirpasses == 2) iso_nchstats.ncs_pass2++; /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = dp->i_offset; /* * Step through the translation in the name. We do not `iput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the `iget' for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; /* * If ino is different from dp->i_ino, * it's a relocated directory. */ if (flags & ISDOTDOT) { error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, NULL, NULL, dp->i_ino != ino, ep, vfs_context_proc(ctx)); VTOI(tdp)->i_parent = VTOI(pdp)->i_number; buf_brelse(bp); *vpp = tdp; } else if (dp->i_number == dp->i_ino) { buf_brelse(bp); vnode_get(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { error = cd9660_vget_internal(vnode_mount(vdp), dp->i_ino, &tdp, vdp, cnp, dp->i_ino != ino, ep, vfs_context_proc(ctx)); /* save parent inode number */ VTOI(tdp)->i_parent = VTOI(pdp)->i_number; buf_brelse(bp); if (error) return (error); *vpp = tdp; } return (0); }
/* * Vnode op for write */ int spec_write(struct vnop_write_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct buf *bp; daddr64_t bn; int bsize, blkmask, bscale; int io_sync; int devBlockSize=0; int n, on; int error = 0; dev_t dev; #if DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); return (error); case VBLK: if (uio_resid(uio) == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); io_sync = (ap->a_ioflag & IO_SYNC); dev = (vp->v_rdev); devBlockSize = vp->v_specsize; if (devBlockSize > PAGE_SIZE) return(EINVAL); bscale = PAGE_SIZE / devBlockSize; blkmask = bscale - 1; bsize = bscale * devBlockSize; do { bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ blkmask); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio_resid(uio)); /* * Use buf_getblk() as an optimization IFF: * * 1) We are reading exactly a block on a block * aligned boundary * 2) We know the size of the device from spec_open * 3) The read doesn't span the end of the device * * Otherwise, we fall back on buf_bread(). */ if (n == bsize && vp->v_specdevsize != (u_int64_t)0 && (uio->uio_offset + (u_int64_t)n) > vp->v_specdevsize) { /* reduce the size of the read to what is there */ n = (uio->uio_offset + (u_int64_t)n) - vp->v_specdevsize; } if (n == bsize) bp = buf_getblk(vp, bn, bsize, 0, 0, BLK_WRITE); else error = (int)buf_bread(vp, bn, bsize, NOCRED, &bp); /* Translate downstream error for upstream, if needed */ if (!error) error = (int)buf_error(bp); if (error) { buf_brelse(bp); return (error); } n = min(n, bsize - buf_resid(bp)); error = uiomove((char *)0 + buf_dataptr(bp) + on, n, uio); if (error) { buf_brelse(bp); return (error); } buf_markaged(bp); if (io_sync) error = buf_bwrite(bp); else { if ((n + on) == bsize) error = buf_bawrite(bp); else error = buf_bdwrite(bp); } } while (error == 0 && uio_resid(uio) > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ return (0); }
/* * Vnode op for read */ int spec_read(struct vnop_read_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct buf *bp; daddr64_t bn, nextbn; long bsize, bscale; int devBlockSize=0; int n, on; int error = 0; dev_t dev; #if DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) panic("spec_read proc"); #endif if (uio_resid(uio) == 0) return (0); switch (vp->v_type) { case VCHR: error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); dev = vp->v_rdev; devBlockSize = vp->v_specsize; if (devBlockSize > PAGE_SIZE) return (EINVAL); bscale = PAGE_SIZE / devBlockSize; bsize = bscale * devBlockSize; do { on = uio->uio_offset % bsize; bn = (daddr64_t)((uio->uio_offset / devBlockSize) &~ (bscale - 1)); if (vp->v_speclastr + bscale == bn) { nextbn = bn + bscale; error = buf_breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = buf_bread(vp, bn, (int)bsize, NOCRED, &bp); vnode_lock(vp); vp->v_speclastr = bn; vnode_unlock(vp); n = bsize - buf_resid(bp); if ((on > n) || error) { if (!error) error = EINVAL; buf_brelse(bp); return (error); } n = min((unsigned)(n - on), uio_resid(uio)); error = uiomove((char *)0 + buf_dataptr(bp) + on, n, uio); if (n + on == bsize) buf_markaged(bp); buf_brelse(bp); } while (error == 0 && uio_resid(uio) > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ return (0); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ ffs_balloc( register struct inode *ip, register ufs_daddr_t lbn, int size, kauth_cred_t cred, struct buf **bpp, int flags, int * blk_alloc) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; int alloc_buffer = 1; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_fs; if (flags & B_NOBUFF) alloc_buffer = 0; if (blk_alloc) *blk_alloc = 0; /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* the filesize prior to this write can fit in direct * blocks (ie. fragmentaion is possibly done) * we are now extending the file write beyond * the block which has end of file prior to this write */ osize = blksize(fs, ip, nb); /* osize gives disk allocated size in the last block. It is * either in fragments or a file system block size */ if (osize < fs->fs_bsize && osize > 0) { /* few fragments are already allocated,since the * current extends beyond this block * allocate the complete block as fragments are only * in last block */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); /* adjust the inode size we just grew */ /* it is in nb+1 as nb starts from 0 */ ip->i_size = (nb + 1) * fs->fs_bsize; ubc_setsize(vp, (off_t)ip->i_size); ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; if ((flags & B_SYNC) || (!alloc_buffer)) { if (!alloc_buffer) buf_setflags(bp, B_NOCACHE); buf_bwrite(bp); } else buf_bdwrite(bp); /* note that bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } *bpp = bp; } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } else { error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* adjust the inode size we just grew */ ip->i_size = (lbn * fs->fs_bsize) + size; ubc_setsize(vp, (off_t)ip->i_size); if (!alloc_buffer) { buf_setflags(bp, B_NOCACHE); if (flags & B_SYNC) buf_bwrite(bp); else buf_bdwrite(bp); } else *bpp = bp; return (0); } } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); if (alloc_buffer) { bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb))); if (flags & B_CLRBUF) buf_clear(bp); } ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (blk_alloc) { *blk_alloc = nsize; } if (alloc_buffer) *bpp = bp; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); #if DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if ((error = buf_bwrite(bp)) != 0) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if ((flags & B_SYNC)) { buf_bwrite(bp); } else { buf_bdwrite(bp); } if(alloc_buffer ) { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); if (flags & B_CLRBUF) buf_clear(nbp); } if (blk_alloc) { *blk_alloc = fs->fs_bsize; } if(alloc_buffer) *bpp = nbp; return (0); } buf_brelse(bp); if (alloc_buffer) { if (flags & B_CLRBUF) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp); if (error) { buf_brelse(nbp); goto fail; } } else { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
/* * ffs_blkalloc allocates a disk block for ffs_pageout(), as a consequence * it does no buf_breads (that could lead to deadblock as the page may be already * marked busy as it is being paged out. Also important to note that we are not * growing the file in pageouts. So ip->i_size cannot increase by this call * due to the way UBC works. * This code is derived from ffs_balloc and many cases of that are dealt * in ffs_balloc are not applicable here * Do not call with B_CLRBUF flags as this should only be called only * from pageouts */ ffs_blkalloc( struct inode *ip, ufs_daddr_t lbn, int size, kauth_cred_t cred, int flags) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ fs = ip->i_fs; if(size > fs->fs_bsize) panic("ffs_blkalloc: too large for allocation"); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { panic("ffs_blkalloc():cannot extend file: i_size %d, lbn %d", ip->i_size, lbn); } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { /* TBD: trivial case; the block is already allocated */ return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize > osize) { panic("ffs_allocblk: trying to extend a fragment"); } return(0); } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); if(num == 0) { panic("ffs_blkalloc: file with direct blocks only"); } /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if (error = buf_bwrite(bp)) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } return (0); } buf_brelse(bp); return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
int physio( void (*f_strategy)(buf_t), buf_t bp, dev_t dev, int flags, u_int (*f_minphys)(buf_t), struct uio *uio, int blocksize) { struct proc *p = current_proc(); int error, i, buf_allocated, todo, iosize; int orig_bflags = 0; int64_t done; error = 0; flags &= B_READ | B_WRITE; buf_allocated = 0; /* * [check user read/write access to the data buffer] * * Check each iov one by one. Note that we know if we're reading or * writing, so we ignore the uio's rw parameter. Also note that if * we're doing a read, that's a *write* to user-space. */ for (i = 0; i < uio->uio_iovcnt; i++) { if (UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { user_addr_t base; user_size_t len; if (uio_getiov(uio, i, &base, &len) || !useracc(base, len, (flags == B_READ) ? B_WRITE : B_READ)) return (EFAULT); } } /* * Make sure we have a buffer, creating one if necessary. */ if (bp == NULL) { bp = buf_alloc((vnode_t)0); buf_allocated = 1; } else orig_bflags = buf_flags(bp); /* * at this point we should have a buffer * that is marked BL_BUSY... we either * acquired it via buf_alloc, or it was * passed into us... if it was passed * in, it needs to already be owned by * the caller (i.e. BL_BUSY is set) */ assert(bp->b_lflags & BL_BUSY); /* * [set up the fixed part of the buffer for a transfer] */ bp->b_dev = dev; bp->b_proc = p; /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory, and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition * to the read/write flag.) */ buf_setflags(bp, B_PHYS | B_RAW); /* * [while there is data to transfer and no I/O error] * Note that I/O errors are handled with a 'goto' at the bottom * of the 'while' loop. */ while (uio_resid(uio) > 0) { if ( (iosize = uio_curriovlen(uio)) > MAXPHYSIO_WIRED) iosize = MAXPHYSIO_WIRED; /* * make sure we're set to issue a fresh I/O * in the right direction */ buf_reset(bp, flags); /* [set up the buffer for a maximum-sized transfer] */ buf_setblkno(bp, uio_offset(uio) / blocksize); buf_setcount(bp, iosize); buf_setdataptr(bp, (uintptr_t)CAST_DOWN(caddr_t, uio_curriovbase(uio))); /* * [call f_minphys to bound the tranfer size] * and remember the amount of data to transfer, * for later comparison. */ (*f_minphys)(bp); todo = buf_count(bp); /* * [lock the part of the user address space involved * in the transfer] */ if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { error = vslock(CAST_USER_ADDR_T(buf_dataptr(bp)), (user_size_t)todo); if (error) goto done; } /* [call f_strategy to start the transfer] */ (*f_strategy)(bp); /* [wait for the transfer to complete] */ error = (int)buf_biowait(bp); /* * [unlock the part of the address space previously * locked] */ if(UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) vsunlock(CAST_USER_ADDR_T(buf_dataptr(bp)), (user_size_t)todo, (flags & B_READ)); /* * [deduct the transfer size from the total number * of data to transfer] */ done = buf_count(bp) - buf_resid(bp); uio_update(uio, done); /* * Now, check for an error. * Also, handle weird end-of-disk semantics. */ if (error || done < todo) goto done; } done: if (buf_allocated) buf_free(bp); else buf_setflags(bp, orig_bflags); return (error); }