/** * @fn struct buf *getblk(int dev, bc_daddr_t blkno) * Permet de récuperer un buffer en fonction d'un device et d'un numéro de bloc. * S'il n'existe pas dans la table de hachage des buffers précédemment alloués, un buffer est * retiré de la liste des buffers disponibles et alloué au device et au bloc correspondant * * Assign a buffer for the given block. If the appropriate * block is already associated, return it; otherwise search * for the oldest non-busy buffer and reassign it. * @param dev : device (major+minor) * @param blkno : numéro de bloc * @return le buffer associé au device dev et de numéro de bloc blkno */ struct buf *getblk(int dev, bc_daddr_t blkno) { struct buf *bp; struct buf *dp; loop: dp = bhash(dev, blkno); // Error if (dp == NULL) { } for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_blkno != blkno || bp->b_dev != dev) { continue; } spl(BDINHB); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; tsleep(PRIBIO + 1, (caddr_t) bp); goto loop; } spl(NORMAL); notavail(bp); return bp; } spl(BDINHB); if (bfreelist.av_forw == &bfreelist) { bfreelist.b_flags |= B_WANTED; tsleep(PRIBIO + 1, (caddr_t) &bfreelist); goto loop; } spl(NORMAL); bp = bfreelist.av_forw; notavail(bp); if (bp->b_flags & B_DELWRI) { bp->b_flags |= B_ASYNC; bwrite(bp); goto loop; } bp->b_flags = B_BUSY; bp->b_back->b_forw = bp->b_forw; bp->b_forw->b_back = bp->b_back; bp->b_forw = dp->b_forw; bp->b_back = dp; dp->b_forw->b_back = bp; dp->b_forw = bp; bp->b_dev = dev; bp->b_blkno = blkno; return bp; }
/* * Ensure that a specified block is up-to-date on disk. */ void blkflush(dev_t dev, daddr_t blkno) { struct buf *bp, *dp; struct hbuf *hp; struct buf *sbp = NULL; uint_t index; kmutex_t *hmp; index = bio_bhash(dev, blkno); hp = &hbuf[index]; dp = (struct buf *)hp; hmp = &hp->b_lock; /* * Identify the buffer in the cache belonging to * this device and blkno (if any). */ mutex_enter(hmp); for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) continue; sbp = bp; break; } mutex_exit(hmp); if (sbp == NULL) return; /* * Now check the buffer we have identified and * make sure it still belongs to the device and is B_DELWRI */ sema_p(&sbp->b_sem); if (sbp->b_blkno == blkno && sbp->b_edev == dev && (sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) { mutex_enter(hmp); hp->b_length--; notavail(sbp); mutex_exit(hmp); /* * XXX - There is nothing to guarantee a synchronous * write here if the B_ASYNC flag is set. This needs * some investigation. */ if (sbp->b_vp == NULL) { /* !ufs */ BWRITE(sbp); /* synchronous write */ } else { /* ufs */ UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp); } } else { sema_v(&sbp->b_sem); } }
/* * Return a buffer w/o sleeping */ struct buf * trygetblk(dev_t dev, daddr_t blkno) { struct buf *bp; struct buf *dp; struct hbuf *hp; kmutex_t *hmp; uint_t index; index = bio_bhash(dev, blkno); hp = &hbuf[index]; hmp = &hp->b_lock; if (!mutex_tryenter(hmp)) return (NULL); dp = (struct buf *)hp; for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) continue; /* * Get access to a valid buffer without sleeping */ if (sema_tryp(&bp->b_sem)) { if (bp->b_flags & B_DONE) { hp->b_length--; notavail(bp); mutex_exit(hmp); return (bp); } else { sema_v(&bp->b_sem); break; } } break; } mutex_exit(hmp); return (NULL); }
/* * As part of file system hardening, this daemon is awakened * every second to flush cached data which includes the * buffer cache, the inode cache and mapped pages. */ void fsflush() { struct buf *bp, *dwp; struct hbuf *hp; int autoup; unsigned int ix, icount, count = 0; callb_cpr_t cprinfo; uint_t bcount; kmutex_t *hmp; struct vfssw *vswp; proc_fsflush = ttoproc(curthread); proc_fsflush->p_cstime = 0; proc_fsflush->p_stime = 0; proc_fsflush->p_cutime = 0; proc_fsflush->p_utime = 0; bcopy("fsflush", curproc->p_user.u_psargs, 8); bcopy("fsflush", curproc->p_user.u_comm, 7); mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL); sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL); /* * Setup page coalescing. */ fsf_npgsz = page_num_pagesizes(); ASSERT(fsf_npgsz < MAX_PAGESIZES); for (ix = 0; ix < fsf_npgsz - 1; ++ix) { fsf_pgcnt[ix] = page_get_pagesize(ix + 1) / page_get_pagesize(ix); fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1; } autoup = v.v_autoup * hz; icount = v.v_autoup / tune.t_fsflushr; CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush"); loop: sema_v(&fsflush_sema); mutex_enter(&fsflush_lock); CALLB_CPR_SAFE_BEGIN(&cprinfo); cv_wait(&fsflush_cv, &fsflush_lock); /* wait for clock */ CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock); mutex_exit(&fsflush_lock); sema_p(&fsflush_sema); /* * Write back all old B_DELWRI buffers on the freelist. */ bcount = 0; for (ix = 0; ix < v.v_hbuf; ix++) { hp = &hbuf[ix]; dwp = (struct buf *)&dwbuf[ix]; bcount += (hp->b_length); if (dwp->av_forw == dwp) { continue; } hmp = &hbuf[ix].b_lock; mutex_enter(hmp); bp = dwp->av_forw; /* * Go down only on the delayed write lists. */ while (bp != dwp) { ASSERT(bp->b_flags & B_DELWRI); if ((bp->b_flags & B_DELWRI) && (ddi_get_lbolt() - bp->b_start >= autoup) && sema_tryp(&bp->b_sem)) { bp->b_flags |= B_ASYNC; hp->b_length--; notavail(bp); mutex_exit(hmp); if (bp->b_vp == NULL) { BWRITE(bp); } else { UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); } mutex_enter(hmp); bp = dwp->av_forw; } else { bp = bp->av_forw; } } mutex_exit(hmp); } /* * * There is no need to wakeup any thread waiting on bio_mem_cv * since brelse will wake them up as soon as IO is complete. */ bfreelist.b_bcount = bcount; if (dopageflush) fsflush_do_pages(); if (!doiflush) goto loop; /* * If the system was not booted to single user mode, skip the * inode flushing until after fsflush_iflush_delay secs have elapsed. */ if ((boothowto & RB_SINGLE) == 0 && (ddi_get_lbolt64() / hz) < fsflush_iflush_delay) goto loop; /* * Flush cached attribute information (e.g. inodes). */ if (++count >= icount) { count = 0; /* * Sync back cached data. */ RLOCK_VFSSW(); for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { vfs_refvfssw(vswp); RUNLOCK_VFSSW(); (void) fsop_sync_by_kind(vswp - vfssw, SYNC_ATTR, kcred); vfs_unrefvfssw(vswp); RLOCK_VFSSW(); } } RUNLOCK_VFSSW(); } goto loop; }
/* * Make sure all write-behind blocks on dev (or NODEV for all) * are flushed out. */ void bflush(dev_t dev) { struct buf *bp, *dp; struct hbuf *hp; struct buf *delwri_list = EMPTY_LIST; int i, index; kmutex_t *hmp; mutex_enter(&blist_lock); /* * Wait for any invalidates or flushes ahead of us to finish. * We really could split blist_lock up per device for better * parallelism here. */ while (bio_doinginval || bio_doingflush) { bio_flinv_cv_wanted = 1; cv_wait(&bio_flushinval_cv, &blist_lock); } bio_doingflush++; /* * Gather all B_DELWRI buffer for device. * Lock ordering is b_sem > hash lock (brelse). * Since we are finding the buffer via the delayed write list, * it may be busy and we would block trying to get the * b_sem lock while holding hash lock. So transfer all the * candidates on the delwri_list and then drop the hash locks. */ for (i = 0; i < v.v_hbuf; i++) { vfs_syncprogress(); hmp = &hbuf[i].b_lock; dp = (struct buf *)&dwbuf[i]; mutex_enter(hmp); for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) { if (dev == NODEV || bp->b_edev == dev) { if (bp->b_list == NULL) { bp->b_list = delwri_list; delwri_list = bp; } } } mutex_exit(hmp); } mutex_exit(&blist_lock); /* * Now that the hash locks have been dropped grab the semaphores * and write back all the buffers that have B_DELWRI set. */ while (delwri_list != EMPTY_LIST) { vfs_syncprogress(); bp = delwri_list; sema_p(&bp->b_sem); /* may block */ if ((dev != bp->b_edev && dev != NODEV) || (panicstr && bp->b_flags & B_BUSY)) { sema_v(&bp->b_sem); delwri_list = bp->b_list; bp->b_list = NULL; continue; /* No longer a candidate */ } if (bp->b_flags & B_DELWRI) { index = bio_bhash(bp->b_edev, bp->b_blkno); hp = &hbuf[index]; hmp = &hp->b_lock; dp = (struct buf *)hp; bp->b_flags |= B_ASYNC; mutex_enter(hmp); hp->b_length--; notavail(bp); mutex_exit(hmp); if (bp->b_vp == NULL) { /* !ufs */ BWRITE(bp); } else { /* ufs */ UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); } } else { sema_v(&bp->b_sem); } delwri_list = bp->b_list; bp->b_list = NULL; } mutex_enter(&blist_lock); bio_doingflush--; if (bio_flinv_cv_wanted) { bio_flinv_cv_wanted = 0; cv_broadcast(&bio_flushinval_cv); } mutex_exit(&blist_lock); }
/* * Assign a buffer for the given block. If the appropriate * block is already associated, return it; otherwise search * for the oldest non-busy buffer and reassign it. */ struct buf * getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg) { ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg; struct buf *bp; struct buf *dp; struct buf *nbp = NULL; struct buf *errbp; uint_t index; kmutex_t *hmp; struct hbuf *hp; if (getmajor(dev) >= devcnt) cmn_err(CE_PANIC, "blkdev"); biostats.bio_lookup.value.ui32++; index = bio_bhash(dev, blkno); hp = &hbuf[index]; dp = (struct buf *)hp; hmp = &hp->b_lock; mutex_enter(hmp); loop: for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) continue; /* * Avoid holding the hash lock in the event that * the buffer is locked by someone. Since the hash chain * may change when we drop the hash lock * we have to start at the beginning of the chain if the * buffer identity/contents aren't valid. */ if (!sema_tryp(&bp->b_sem)) { biostats.bio_bufbusy.value.ui32++; mutex_exit(hmp); /* * OK, we are dealing with a busy buffer. * In the case that we are panicking and we * got called from bread(), we have some chance * for error recovery. So better bail out from * here since sema_p() won't block. If we got * called directly from ufs routines, there is * no way to report an error yet. */ if (panicstr && errflg) goto errout; /* * For the following line of code to work * correctly never kmem_free the buffer "header". */ sema_p(&bp->b_sem); if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) { sema_v(&bp->b_sem); mutex_enter(hmp); goto loop; /* start over */ } mutex_enter(hmp); } /* Found */ biostats.bio_hit.value.ui32++; bp->b_flags &= ~B_AGE; /* * Yank it off the free/delayed write lists */ hp->b_length--; notavail(bp); mutex_exit(hmp); ASSERT((bp->b_flags & B_NOCACHE) == NULL); if (nbp == NULL) { /* * Make the common path short. */ ASSERT(SEMA_HELD(&bp->b_sem)); return (bp); } biostats.bio_bufdup.value.ui32++; /* * The buffer must have entered during the lock upgrade * so free the new buffer we allocated and return the * found buffer. */ kmem_free(nbp->b_un.b_addr, nbp->b_bufsize); nbp->b_un.b_addr = NULL; /* * Account for the memory */ mutex_enter(&bfree_lock); bfreelist.b_bufsize += nbp->b_bufsize; mutex_exit(&bfree_lock); /* * Destroy buf identity, and place on avail list */ nbp->b_dev = (o_dev_t)NODEV; nbp->b_edev = NODEV; nbp->b_flags = 0; nbp->b_file = NULL; nbp->b_offset = -1; sema_v(&nbp->b_sem); bio_bhdr_free(nbp); ASSERT(SEMA_HELD(&bp->b_sem)); return (bp); } /* * bio_getfreeblk may block so check the hash chain again. */ if (nbp == NULL) { mutex_exit(hmp); nbp = bio_getfreeblk(bsize); mutex_enter(hmp); goto loop; } /* * New buffer. Assign nbp and stick it on the hash. */ nbp->b_flags = B_BUSY; nbp->b_edev = dev; nbp->b_dev = (o_dev_t)cmpdev(dev); nbp->b_blkno = blkno; nbp->b_iodone = NULL; nbp->b_bcount = bsize; /* * If we are given a ufsvfsp and the vfs_root field is NULL * then this must be I/O for a superblock. A superblock's * buffer is set up in mountfs() and there is no root vnode * at that point. */ if (ufsvfsp && ufsvfsp->vfs_root) { nbp->b_vp = ufsvfsp->vfs_root; } else { nbp->b_vp = NULL; } ASSERT((nbp->b_flags & B_NOCACHE) == NULL); binshash(nbp, dp); mutex_exit(hmp); ASSERT(SEMA_HELD(&nbp->b_sem)); return (nbp); /* * Come here in case of an internal error. At this point we couldn't * get a buffer, but he have to return one. Hence we allocate some * kind of error reply buffer on the fly. This buffer is marked as * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following: * - B_ERROR will indicate error to the caller. * - B_DONE will prevent us from reading the buffer from * the device. * - B_NOCACHE will cause that this buffer gets free'd in * brelse(). */ errout: errbp = geteblk(); sema_p(&errbp->b_sem); errbp->b_flags &= ~B_BUSY; errbp->b_flags |= (B_ERROR | B_DONE); return (errbp); }
/* * Same as binval, except can force-invalidate delayed-write buffers * (which are not be already flushed because of device errors). Also * makes sure that the retry write flag is cleared. */ int bfinval(dev_t dev, int force) { struct buf *dp; struct buf *bp; struct buf *binval_list = EMPTY_LIST; int i, error = 0; kmutex_t *hmp; uint_t index; struct buf **backp; mutex_enter(&blist_lock); /* * Wait for any flushes ahead of us to finish, it's ok to * do invalidates in parallel. */ while (bio_doingflush) { bio_flinv_cv_wanted = 1; cv_wait(&bio_flushinval_cv, &blist_lock); } bio_doinginval++; /* Gather bp's */ for (i = 0; i < v.v_hbuf; i++) { dp = (struct buf *)&hbuf[i]; hmp = &hbuf[i].b_lock; mutex_enter(hmp); for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_edev == dev) { if (bp->b_list == NULL) { bp->b_list = binval_list; binval_list = bp; } } } mutex_exit(hmp); } mutex_exit(&blist_lock); /* Invalidate all bp's found */ while (binval_list != EMPTY_LIST) { bp = binval_list; sema_p(&bp->b_sem); if (bp->b_edev == dev) { if (force && (bp->b_flags & B_DELWRI)) { /* clear B_DELWRI, move to non-dw freelist */ index = bio_bhash(bp->b_edev, bp->b_blkno); hmp = &hbuf[index].b_lock; dp = (struct buf *)&hbuf[index]; mutex_enter(hmp); /* remove from delayed write freelist */ notavail(bp); /* add to B_AGE side of non-dw freelist */ backp = &dp->av_forw; (*backp)->av_back = bp; bp->av_forw = *backp; *backp = bp; bp->av_back = dp; /* * make sure write retries and busy are cleared */ bp->b_flags &= ~(B_BUSY | B_DELWRI | B_RETRYWRI); mutex_exit(hmp); } if ((bp->b_flags & B_DELWRI) == 0) bp->b_flags |= B_STALE|B_AGE; else error = EIO; } sema_v(&bp->b_sem); binval_list = bp->b_list; bp->b_list = NULL; } mutex_enter(&blist_lock); bio_doinginval--; if (bio_flinv_cv_wanted) { cv_broadcast(&bio_flushinval_cv); bio_flinv_cv_wanted = 0; } mutex_exit(&blist_lock); return (error); }