/* * Initialize buffer headers and related structures. */ void bufinit() { struct bufhd *bh; struct buf *bp; /* first, make a null hash table */ for(bh = bufhash; bh < bufhash + BUFHSZ; bh++) { bh->b_flags = 0; bh->b_forw = (struct buf *)bh; bh->b_back = (struct buf *)bh; } /* next, make a null set of free lists */ for(bp = bfreelist; bp < bfreelist + BQUEUES; bp++) { bp->b_flags = 0; bp->av_forw = bp; bp->av_back = bp; bp->b_forw = bp; bp->b_back = bp; } /* finally, initialize each buffer header and stick on empty q */ for(bp = buf; bp < buf + nbuf ; bp++) { bp->b_flags = B_HEAD | B_INVAL; /* we're just an empty header */ bp->b_dev = NODEV; bp->b_vp = 0; binstailfree(bp, bfreelist + BQ_EMPTY); binshash(bp, bfreelist + BQ_EMPTY); } }
/* * Initialize buffers and hash links for buffers. */ void bufinit(void) { struct buf *bp; struct bqueues *dp; int i; int base, residual; pool_init(&bufpool, sizeof(struct buf), 0, 0, 0, "bufpl", NULL); for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) TAILQ_INIT(dp); bufhashtbl = hashinit(nbuf, M_CACHE, M_WAITOK, &bufhash); base = bufpages / nbuf; residual = bufpages % nbuf; for (i = 0; i < nbuf; i++) { bp = &buf[i]; bzero((char *)bp, sizeof *bp); bp->b_dev = NODEV; bp->b_vnbufs.le_next = NOLIST; bp->b_data = buffers + i * MAXBSIZE; LIST_INIT(&bp->b_dep); if (i < residual) bp->b_bufsize = (base + 1) * PAGE_SIZE; else bp->b_bufsize = base * PAGE_SIZE; bp->b_flags = B_INVAL; if (bp->b_bufsize) { dp = &bufqueues[BQ_CLEAN]; numfreepages += btoc(bp->b_bufsize); numcleanpages += btoc(bp->b_bufsize); } else { dp = &bufqueues[BQ_EMPTY]; numemptybufs++; } binsheadfree(bp, dp); binshash(bp, &invalhash); } hidirtypages = bufpages / 4; lodirtypages = hidirtypages / 2; /* * Reserve 5% of bufpages for syncer's needs, * but not more than 25% and if possible * not less then 2 * MAXBSIZE. locleanpages * value must be not too small, but probably * there are no reason to set it more than 1-2 MB. */ locleanpages = bufpages / 20; if (locleanpages < btoc(2 * MAXBSIZE)) locleanpages = btoc(2 * MAXBSIZE); if (locleanpages > bufpages / 4) locleanpages = bufpages / 4; if (locleanpages > btoc(2 * 1024 * 1024)) locleanpages = btoc(2 * 1024 * 1024); #ifdef DEBUG mincleanpages = locleanpages; #endif }
/* * Patiently await operations to complete on this buffer. * When they do, extract error value and return it. * Extract and return any errors associated with the I/O. * If an invalid block, force it off the lookup hash chains. */ int biowait(register struct buf *bp) { int x; x = splbio(); while ((bp->b_flags & B_DONE) == 0) sleep((caddr_t)bp, PRIBIO); if((bp->b_flags & B_ERROR) || bp->b_error) { if ((bp->b_flags & B_INVAL) == 0) { bp->b_flags |= B_INVAL; bremhash(bp); binshash(bp, bfreelist + BQ_AGE); } if (!bp->b_error) bp->b_error = EIO; else bp->b_flags |= B_ERROR; splx(x); return (bp->b_error); } else { splx(x); return (0); } }
/* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the * block cache, mark it as having been found, make it busy * and return it. Otherwise, return an empty block of the * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ struct buf * getblk(register struct vnode *vp, daddr_t blkno, int size) { struct buf *bp, *bh; int x; for (;;) { if (bp = incore(vp, blkno)) { x = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; sleep (bp, PRIBIO); splx(x); continue; } bp->b_flags |= B_BUSY | B_CACHE; bremfree(bp); if (size > bp->b_bufsize) panic("now what do we do?"); /* if (bp->b_bufsize != size) allocbuf(bp, size); */ } else { if((bp = getnewbuf(size)) == 0) continue; bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); x = splbio(); bh = BUFHASH(vp, blkno); binshash(bp, bh); bp->b_flags = B_BUSY; } splx(x); return (bp); } }
/* * Get an empty, disassociated buffer of given size. */ struct buf * geteblk(int size) { struct buf *bp; getnewbuf(0, 0, &bp); SET(bp->b_flags, B_INVAL); binshash(bp, &invalhash); allocbuf(bp, size); return (bp); }
/* * Get an empty, disassociated buffer of given size. */ struct buf * geteblk(int size) { struct buf *bp; int x; while ((bp = getnewbuf(size)) == 0) ; x = splbio(); binshash(bp, bfreelist + BQ_AGE); splx(x); return (bp); }
/* * Initialize the buffer I/O system by freeing * all buffers and setting all device buffer lists to empty. */ static void binit() { register struct buf *bp; register int i; caddr_t paddr; for (bp = bfreelist; bp < &bfreelist[BQUEUES]; bp++) bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp; paddr = bufdata; for (i = 0; i < NBUF; i++, paddr += MAXBSIZE) { bp = &buf[i]; bp->b_dev = NODEV; bp->b_bcount = 0; bp->b_addr = paddr; binshash(bp, &bfreelist[BQ_AGE]); bp->b_flags = B_BUSY|B_INVAL; brelse(bp); } }
/* * Expand or contract the actual memory allocated to a buffer. * * If the buffer shrinks, data is lost, so it's up to the * caller to have written it out *first*; this routine will not * start a write. If the buffer grows, it's the callers * responsibility to fill out the buffer's additional contents. */ void allocbuf(struct buf *bp, int size) { struct buf *nbp; vsize_t desired_size; int s; desired_size = round_page(size); if (desired_size > MAXBSIZE) panic("allocbuf: buffer larger than MAXBSIZE requested"); if (bp->b_bufsize == desired_size) goto out; /* * If the buffer is smaller than the desired size, we need to snarf * it from other buffers. Get buffers (via getnewbuf()), and * steal their pages. */ while (bp->b_bufsize < desired_size) { int amt; /* find a buffer */ getnewbuf(0, 0, &nbp); SET(nbp->b_flags, B_INVAL); binshash(nbp, &invalhash); /* and steal its pages, up to the amount we need */ amt = MIN(nbp->b_bufsize, (desired_size - bp->b_bufsize)); pagemove((nbp->b_data + nbp->b_bufsize - amt), bp->b_data + bp->b_bufsize, amt); bp->b_bufsize += amt; nbp->b_bufsize -= amt; /* reduce transfer count if we stole some data */ if (nbp->b_bcount > nbp->b_bufsize) nbp->b_bcount = nbp->b_bufsize; #ifdef DIAGNOSTIC if (nbp->b_bufsize < 0) panic("allocbuf: negative bufsize"); #endif brelse(nbp); } /* * If we want a buffer smaller than the current size, * shrink this buffer. Grab a buf head from the EMPTY queue, * move a page onto it, and put it on front of the AGE queue. * If there are no free buffer headers, leave the buffer alone. */ if (bp->b_bufsize > desired_size) { s = splbio(); if ((nbp = bufqueues[BQ_EMPTY].tqh_first) == NULL) { /* No free buffer head */ splx(s); goto out; } bremfree(nbp); SET(nbp->b_flags, B_BUSY); splx(s); /* move the page to it and note this change */ pagemove(bp->b_data + desired_size, nbp->b_data, bp->b_bufsize - desired_size); nbp->b_bufsize = bp->b_bufsize - desired_size; bp->b_bufsize = desired_size; nbp->b_bcount = 0; SET(nbp->b_flags, B_INVAL); /* release the newly-filled buffer and leave */ brelse(nbp); } out: bp->b_bcount = size; }
/* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the * block cache, mark it as having been found, make it busy * and return it. Otherwise, return an empty block of the * correct size. It is up to the caller to insure that the * cached blocks be of the correct size. */ struct buf * getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo) { struct bufhashhdr *bh; struct buf *bp, *nbp = NULL; int s, err; /* * XXX * The following is an inlined version of 'incore()', but with * the 'invalid' test moved to after the 'busy' test. It's * necessary because there are some cases in which the NFS * code sets B_INVAL prior to writing data to the server, but * in which the buffers actually contain valid data. In this * case, we can't allow the system to allocate a new buffer for * the block until the write is finished. */ bh = BUFHASH(vp, blkno); start: LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) { if (bp->b_lblkno != blkno || bp->b_vp != vp) continue; s = splbio(); if (ISSET(bp->b_flags, B_BUSY)) { SET(bp->b_flags, B_WANTED); err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk", slptimeo); splx(s); if (err) return (NULL); goto start; } if (!ISSET(bp->b_flags, B_INVAL)) { SET(bp->b_flags, (B_BUSY | B_CACHE)); bremfree(bp); splx(s); break; } splx(s); } if (bp == NULL) { if (nbp == NULL && getnewbuf(slpflag, slptimeo, &nbp) != 0) { goto start; } bp = nbp; binshash(bp, bh); bp->b_blkno = bp->b_lblkno = blkno; s = splbio(); bgetvp(vp, bp); splx(s); } else if (nbp != NULL) { /* * Set B_AGE so that buffer appear at BQ_CLEAN head * and gets reused ASAP. */ SET(nbp->b_flags, B_AGE); brelse(nbp); } allocbuf(bp, size); return (bp); }
/* * Assign a buffer for the given block. If the appropriate * block is already associated, return it; otherwise search * for the oldest non-busy buffer and reassign it. */ struct buf * getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg) { ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg; struct buf *bp; struct buf *dp; struct buf *nbp = NULL; struct buf *errbp; uint_t index; kmutex_t *hmp; struct hbuf *hp; if (getmajor(dev) >= devcnt) cmn_err(CE_PANIC, "blkdev"); biostats.bio_lookup.value.ui32++; index = bio_bhash(dev, blkno); hp = &hbuf[index]; dp = (struct buf *)hp; hmp = &hp->b_lock; mutex_enter(hmp); loop: for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) continue; /* * Avoid holding the hash lock in the event that * the buffer is locked by someone. Since the hash chain * may change when we drop the hash lock * we have to start at the beginning of the chain if the * buffer identity/contents aren't valid. */ if (!sema_tryp(&bp->b_sem)) { biostats.bio_bufbusy.value.ui32++; mutex_exit(hmp); /* * OK, we are dealing with a busy buffer. * In the case that we are panicking and we * got called from bread(), we have some chance * for error recovery. So better bail out from * here since sema_p() won't block. If we got * called directly from ufs routines, there is * no way to report an error yet. */ if (panicstr && errflg) goto errout; /* * For the following line of code to work * correctly never kmem_free the buffer "header". */ sema_p(&bp->b_sem); if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) { sema_v(&bp->b_sem); mutex_enter(hmp); goto loop; /* start over */ } mutex_enter(hmp); } /* Found */ biostats.bio_hit.value.ui32++; bp->b_flags &= ~B_AGE; /* * Yank it off the free/delayed write lists */ hp->b_length--; notavail(bp); mutex_exit(hmp); ASSERT((bp->b_flags & B_NOCACHE) == NULL); if (nbp == NULL) { /* * Make the common path short. */ ASSERT(SEMA_HELD(&bp->b_sem)); return (bp); } biostats.bio_bufdup.value.ui32++; /* * The buffer must have entered during the lock upgrade * so free the new buffer we allocated and return the * found buffer. */ kmem_free(nbp->b_un.b_addr, nbp->b_bufsize); nbp->b_un.b_addr = NULL; /* * Account for the memory */ mutex_enter(&bfree_lock); bfreelist.b_bufsize += nbp->b_bufsize; mutex_exit(&bfree_lock); /* * Destroy buf identity, and place on avail list */ nbp->b_dev = (o_dev_t)NODEV; nbp->b_edev = NODEV; nbp->b_flags = 0; nbp->b_file = NULL; nbp->b_offset = -1; sema_v(&nbp->b_sem); bio_bhdr_free(nbp); ASSERT(SEMA_HELD(&bp->b_sem)); return (bp); } /* * bio_getfreeblk may block so check the hash chain again. */ if (nbp == NULL) { mutex_exit(hmp); nbp = bio_getfreeblk(bsize); mutex_enter(hmp); goto loop; } /* * New buffer. Assign nbp and stick it on the hash. */ nbp->b_flags = B_BUSY; nbp->b_edev = dev; nbp->b_dev = (o_dev_t)cmpdev(dev); nbp->b_blkno = blkno; nbp->b_iodone = NULL; nbp->b_bcount = bsize; /* * If we are given a ufsvfsp and the vfs_root field is NULL * then this must be I/O for a superblock. A superblock's * buffer is set up in mountfs() and there is no root vnode * at that point. */ if (ufsvfsp && ufsvfsp->vfs_root) { nbp->b_vp = ufsvfsp->vfs_root; } else { nbp->b_vp = NULL; } ASSERT((nbp->b_flags & B_NOCACHE) == NULL); binshash(nbp, dp); mutex_exit(hmp); ASSERT(SEMA_HELD(&nbp->b_sem)); return (nbp); /* * Come here in case of an internal error. At this point we couldn't * get a buffer, but he have to return one. Hence we allocate some * kind of error reply buffer on the fly. This buffer is marked as * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following: * - B_ERROR will indicate error to the caller. * - B_DONE will prevent us from reading the buffer from * the device. * - B_NOCACHE will cause that this buffer gets free'd in * brelse(). */ errout: errbp = geteblk(); sema_p(&errbp->b_sem); errbp->b_flags &= ~B_BUSY; errbp->b_flags |= (B_ERROR | B_DONE); return (errbp); }