static int nvtruncbuf_bp_trunc(struct buf *bp, void *data) { struct truncbuf_info *info = data; /* * Do not try to use a buffer we cannot immediately lock, * but sleep anyway to prevent a livelock. The code will * loop until all buffers can be acted upon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { atomic_add_int(&bp->b_refs, 1); if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0) BUF_UNLOCK(bp); atomic_subtract_int(&bp->b_refs, 1); } else if ((info->clean && (bp->b_flags & B_DELWRI)) || (info->clean == 0 && (bp->b_flags & B_DELWRI) == 0) || bp->b_vp != info->vp || nvtruncbuf_bp_trunc_cmp(bp, data)) { BUF_UNLOCK(bp); } else { bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF | B_NOCACHE); brelse(bp); } lwkt_yield(); return(1); }
/* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the * block cache, mark it as having been found, make it busy * and return it. Otherwise, return an empty block of the * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ struct buf * getblk(register struct vnode *vp, daddr_t blkno, int size) { struct buf *bp, *bh; int x; for (;;) { if (bp = incore(vp, blkno)) { x = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; sleep (bp, PRIBIO); splx(x); continue; } bp->b_flags |= B_BUSY | B_CACHE; bremfree(bp); if (size > bp->b_bufsize) panic("now what do we do?"); /* if (bp->b_bufsize != size) allocbuf(bp, size); */ } else { if((bp = getnewbuf(size)) == 0) continue; bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); x = splbio(); bh = BUFHASH(vp, blkno); binshash(bp, bh); bp->b_flags = B_BUSY; } splx(x); return (bp); } }
/* * Buffer cleaning daemon. */ void buf_daemon(struct proc *p) { int s; struct buf *bp; struct timeval starttime, timediff; cleanerproc = curproc; for (;;) { if (numdirtypages < hidirtypages) { tsleep(&bd_req, PRIBIO - 7, "cleaner", 0); } starttime = time; s = splbio(); while ((bp = TAILQ_FIRST(&bufqueues[BQ_DIRTY]))) { bremfree(bp); SET(bp->b_flags, B_BUSY); splx(s); if (ISSET(bp->b_flags, B_INVAL)) { brelse(bp); s = splbio(); continue; } #ifdef DIAGNOSTIC if (!ISSET(bp->b_flags, B_DELWRI)) panic("Clean buffer on BQ_DIRTY"); #endif if (LIST_FIRST(&bp->b_dep) != NULL && !ISSET(bp->b_flags, B_DEFERRED) && buf_countdeps(bp, 0, 1)) { SET(bp->b_flags, B_DEFERRED); s = splbio(); numfreepages += btoc(bp->b_bufsize); numdirtypages += btoc(bp->b_bufsize); binstailfree(bp, &bufqueues[BQ_DIRTY]); CLR(bp->b_flags, B_BUSY); continue; } bawrite(bp); if (numdirtypages < lodirtypages) break; /* Never allow processing to run for more than 1 sec */ timersub(&time, &starttime, &timediff); if (timediff.tv_sec) break; s = splbio(); } } }
/* * Synch buffers associated with a block device */ int spec_fsync(void *v) { struct vop_fsync_args *ap = v; struct vnode *vp = ap->a_vp; struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); #ifdef WAPBL if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && vp->v_specmountpoint->mnt_wapbl != NULL) { int error = ffs_wapbl_fsync_vfs(vp, ap->a_waitfor); if (error) return (error); } #endif /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); bremfree(bp); buf_acquire(bp); splx(s); bawrite(bp); goto loop; } if (ap->a_waitfor == MNT_WAIT) { vwaitforio (vp, 0, "spec_fsync", 0); #ifdef DIAGNOSTIC if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { splx(s); vprint("spec_fsync: dirty", vp); goto loop; } #endif } splx(s); return (0); }
/* * Synch an open file. */ int ffs_fsync(void *v) { struct vop_fsync_args *ap = v; struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; int s, error, passes, skipmeta; if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) softdep_fsync_mountdev(vp, ap->a_waitfor); /* * Flush all dirty buffers associated with a vnode. */ passes = NIADDR + 1; skipmeta = 0; if (ap->a_waitfor == MNT_WAIT) skipmeta = 1; s = splbio(); loop: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = LIST_NEXT(bp, b_vnbufs)) bp->b_flags &= ~B_SCANNED; for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); /* * Reasons to skip this buffer: it has already been considered * on this pass, this pass is the first time through on a * synchronous flush request and the buffer being considered * is metadata, the buffer has dependencies that will cause * it to be redirtied and it has not already been deferred, * or it is already being written. */ if (bp->b_flags & (B_BUSY | B_SCANNED)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); if (skipmeta && bp->b_lblkno < 0) continue; if (ap->a_waitfor != MNT_WAIT && LIST_FIRST(&bp->b_dep) != NULL && (bp->b_flags & B_DEFERRED) == 0 && buf_countdeps(bp, 0, 1)) { bp->b_flags |= B_DEFERRED; continue; } bremfree(bp); buf_acquire(bp); bp->b_flags |= B_SCANNED; splx(s); /* * On our final pass through, do all I/O synchronously * so that we can find out if our flush is failing * because of write errors. */ if (passes > 0 || ap->a_waitfor != MNT_WAIT) (void) bawrite(bp); else if ((error = bwrite(bp)) != 0) return (error); s = splbio(); /* * Since we may have slept during the I/O, we need * to start from a known point. */ nbp = LIST_FIRST(&vp->v_dirtyblkhd); } if (skipmeta) { skipmeta = 0; goto loop; } if (ap->a_waitfor == MNT_WAIT) { vwaitforio(vp, 0, "ffs_fsync", 0); /* * Ensure that any filesystem metadata associated * with the vnode has been written. */ splx(s); if ((error = softdep_sync_metadata(ap)) != 0) return (error); s = splbio(); if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { /* * Block devices associated with filesystems may * have new I/O requests posted for them even if * the vnode is locked, so no amount of trying will * get them clean. Thus we give block devices a * good effort, then just give up. For all other file * types, go around and try again until it is clean. */ if (passes > 0) { passes -= 1; goto loop; } #ifdef DIAGNOSTIC if (vp->v_type != VBLK) vprint("ffs_fsync: dirty", vp); #endif } } splx(s); return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); }
/* * Find a buffer which is available for use. * * We must notify getblk if we slept during the buffer allocation. When * that happens, we allocate a buffer anyway (unless tsleep is interrupted * or times out) and return !0. */ int getnewbuf(int slpflag, int slptimeo, struct buf **bpp) { struct buf *bp; int s, ret, error; *bpp = NULL; ret = 0; start: s = splbio(); /* * Wake up cleaner if we're getting low on buffers. */ if (numdirtypages >= hidirtypages) wakeup(&bd_req); if ((numcleanpages <= locleanpages) && curproc != syncerproc && curproc != cleanerproc) { needbuffer++; error = tsleep(&needbuffer, slpflag|(PRIBIO+1), "getnewbuf", slptimeo); splx(s); if (error) return (1); ret = 1; goto start; } if ((bp = TAILQ_FIRST(&bufqueues[BQ_CLEAN])) == NULL) { /* wait for a free buffer of any kind */ nobuffers = 1; error = tsleep(&nobuffers, slpflag|(PRIBIO-3), "getnewbuf", slptimeo); splx(s); if (error) return (1); ret = 1; goto start; } bremfree(bp); /* Buffer is no longer on free lists. */ SET(bp->b_flags, B_BUSY); #ifdef DIAGNOSTIC if (ISSET(bp->b_flags, B_DELWRI)) panic("Dirty buffer on BQ_CLEAN"); #endif /* disassociate us from our vnode, if we had one... */ if (bp->b_vp) brelvp(bp); splx(s); #ifdef DIAGNOSTIC /* CLEAN buffers must have no dependencies */ if (LIST_FIRST(&bp->b_dep) != NULL) panic("BQ_CLEAN has buffer with dependencies"); #endif /* clear out various other fields */ bp->b_flags = B_BUSY; bp->b_dev = NODEV; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_validoff = bp->b_validend = 0; bremhash(bp); *bpp = bp; return (ret); }
/* * Expand or contract the actual memory allocated to a buffer. * * If the buffer shrinks, data is lost, so it's up to the * caller to have written it out *first*; this routine will not * start a write. If the buffer grows, it's the callers * responsibility to fill out the buffer's additional contents. */ void allocbuf(struct buf *bp, int size) { struct buf *nbp; vsize_t desired_size; int s; desired_size = round_page(size); if (desired_size > MAXBSIZE) panic("allocbuf: buffer larger than MAXBSIZE requested"); if (bp->b_bufsize == desired_size) goto out; /* * If the buffer is smaller than the desired size, we need to snarf * it from other buffers. Get buffers (via getnewbuf()), and * steal their pages. */ while (bp->b_bufsize < desired_size) { int amt; /* find a buffer */ getnewbuf(0, 0, &nbp); SET(nbp->b_flags, B_INVAL); binshash(nbp, &invalhash); /* and steal its pages, up to the amount we need */ amt = MIN(nbp->b_bufsize, (desired_size - bp->b_bufsize)); pagemove((nbp->b_data + nbp->b_bufsize - amt), bp->b_data + bp->b_bufsize, amt); bp->b_bufsize += amt; nbp->b_bufsize -= amt; /* reduce transfer count if we stole some data */ if (nbp->b_bcount > nbp->b_bufsize) nbp->b_bcount = nbp->b_bufsize; #ifdef DIAGNOSTIC if (nbp->b_bufsize < 0) panic("allocbuf: negative bufsize"); #endif brelse(nbp); } /* * If we want a buffer smaller than the current size, * shrink this buffer. Grab a buf head from the EMPTY queue, * move a page onto it, and put it on front of the AGE queue. * If there are no free buffer headers, leave the buffer alone. */ if (bp->b_bufsize > desired_size) { s = splbio(); if ((nbp = bufqueues[BQ_EMPTY].tqh_first) == NULL) { /* No free buffer head */ splx(s); goto out; } bremfree(nbp); SET(nbp->b_flags, B_BUSY); splx(s); /* move the page to it and note this change */ pagemove(bp->b_data + desired_size, nbp->b_data, bp->b_bufsize - desired_size); nbp->b_bufsize = bp->b_bufsize - desired_size; bp->b_bufsize = desired_size; nbp->b_bcount = 0; SET(nbp->b_flags, B_INVAL); /* release the newly-filled buffer and leave */ brelse(nbp); } out: bp->b_bcount = size; }
/* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the * block cache, mark it as having been found, make it busy * and return it. Otherwise, return an empty block of the * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ struct buf * getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo) { struct bufhashhdr *bh; struct buf *bp, *nbp = NULL; int s, err; /* * XXX * The following is an inlined version of 'incore()', but with * the 'invalid' test moved to after the 'busy' test. It's * necessary because there are some cases in which the NFS * code sets B_INVAL prior to writing data to the server, but * in which the buffers actually contain valid data. In this * case, we can't allow the system to allocate a new buffer for * the block until the write is finished. */ bh = BUFHASH(vp, blkno); start: LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { if (bp->b_lblkno != blkno || bp->b_vp != vp) continue; s = splbio(); if (ISSET(bp->b_flags, B_BUSY)) { SET(bp->b_flags, B_WANTED); err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk", slptimeo); splx(s); if (err) return (NULL); goto start; } if (!ISSET(bp->b_flags, B_INVAL)) { SET(bp->b_flags, (B_BUSY | B_CACHE)); bremfree(bp); splx(s); break; } splx(s); } if (bp == NULL) { if (nbp == NULL && getnewbuf(slpflag, slptimeo, &nbp) != 0) { goto start; } bp = nbp; binshash(bp, bh); bp->b_blkno = bp->b_lblkno = blkno; s = splbio(); bgetvp(vp, bp); splx(s); } else if (nbp != NULL) { /* * Set B_AGE so that buffer appear at BQ_CLEAN head * and gets reused ASAP. */ SET(nbp->b_flags, B_AGE); brelse(nbp); } allocbuf(bp, size); return (bp); }
/* * Find a buffer which is available for use. * If free memory for buffer space and an empty header from the empty list, * use that. Otherwise, select something from a free list. * Preference is to AGE list, then LRU list. */ static struct buf * getnewbuf(int sz) { struct buf *bp; int x; x = splbio(); start: /* can we constitute a new buffer? */ if (freebufspace > sz && bfreelist[BQ_EMPTY].av_forw != (struct buf *)bfreelist+BQ_EMPTY) { caddr_t addr; /*#define notyet*/ #ifndef notyet if ((addr = malloc (sz, M_TEMP, M_WAITOK)) == 0) goto tryfree; #else /* notyet */ /* get new memory buffer */ if (round_page(sz) == sz) addr = (caddr_t) kmem_alloc_wired_wait(buffer_map, sz); else addr = (caddr_t) malloc (sz, M_TEMP, M_WAITOK); /*if ((addr = malloc (sz, M_TEMP, M_NOWAIT)) == 0) goto tryfree;*/ bzero(addr, sz); #endif /* notyet */ freebufspace -= sz; allocbufspace += sz; bp = bfreelist[BQ_EMPTY].av_forw; bp->b_flags = B_BUSY | B_INVAL; bremfree(bp); bp->b_un.b_addr = addr; bp->b_bufsize = sz; /* 20 Aug 92*/ goto fillin; } tryfree: if (bfreelist[BQ_AGE].av_forw != (struct buf *)bfreelist+BQ_AGE) { bp = bfreelist[BQ_AGE].av_forw; bremfree(bp); } else if (bfreelist[BQ_LRU].av_forw != (struct buf *)bfreelist+BQ_LRU) { bp = bfreelist[BQ_LRU].av_forw; bremfree(bp); } else { /* wait for a free buffer of any kind */ (bfreelist + BQ_AGE)->b_flags |= B_WANTED; sleep(bfreelist, PRIBIO); splx(x); return (0); } /* if we are a delayed write, convert to an async write! */ if (bp->b_flags & B_DELWRI) { bp->b_flags |= B_BUSY; bawrite (bp); goto start; } if(bp->b_vp) brelvp(bp); /* we are not free, nor do we contain interesting data */ if (bp->b_rcred != NOCRED) crfree(bp->b_rcred); /* 25 Apr 92*/ if (bp->b_wcred != NOCRED) crfree(bp->b_wcred); bp->b_flags = B_BUSY; fillin: bremhash(bp); splx(x); bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_wcred = bp->b_rcred = NOCRED; if (bp->b_bufsize != sz) allocbuf(bp, sz); bp->b_bcount = bp->b_bufsize = sz; bp->b_dirtyoff = bp->b_dirtyend = 0; return (bp); }
static __inline int _vnode_validate(hammer_dedup_cache_t dcp, void *data, int *errorp) { struct hammer_transaction trans; hammer_inode_t ip; struct vnode *vp; struct buf *bp; off_t dooffset; int result, error; result = error = 0; *errorp = 0; hammer_simple_transaction(&trans, dcp->hmp); ip = hammer_get_inode(&trans, NULL, dcp->obj_id, HAMMER_MAX_TID, dcp->localization, 0, &error); if (ip == NULL) { hkprintf("dedup: unable to find objid %016jx:%08x\n", (intmax_t)dcp->obj_id, dcp->localization); *errorp = 1; goto failed2; } error = hammer_get_vnode(ip, &vp); if (error) { hkprintf("dedup: unable to acquire vnode for %016jx:%08x\n", (intmax_t)dcp->obj_id, dcp->localization); *errorp = 2; goto failed; } if ((bp = findblk(ip->vp, dcp->file_offset, FINDBLK_NBLOCK)) != NULL) { bremfree(bp); /* XXX if (mapped to userspace) goto done, *errorp = 4 */ if ((bp->b_flags & B_CACHE) == 0 || bp->b_flags & B_DIRTY) { *errorp = 5; goto done; } if (bp->b_bio2.bio_offset != dcp->data_offset) { error = VOP_BMAP(ip->vp, dcp->file_offset, &dooffset, NULL, NULL, BUF_CMD_READ); if (error) { *errorp = 6; goto done; } if (dooffset != dcp->data_offset) { *errorp = 7; goto done; } hammer_live_dedup_bmap_saves++; } if (bcmp(data, bp->b_data, dcp->bytes) == 0) result = 1; done: bqrelse(bp); } else { *errorp = 3; } vput(vp); failed: hammer_rel_inode(ip, 0); failed2: hammer_done_transaction(&trans); return (result); }