/* * Asynchronous I/O daemons for client nfs. * They do read-ahead and write-behind operations on the block I/O cache. * Returns if we hit the timeout defined by the iodmaxidle sysctl. */ static void nfssvc_iod(void *instance) { struct buf *bp; struct nfsmount *nmp; int myiod, timo; int error = 0; mtx_lock(&nfs_iod_mtx); myiod = (int *)instance - nfs_asyncdaemon; /* * Main loop */ for (;;) { while (((nmp = nfs_iodmount[myiod]) == NULL) || !TAILQ_FIRST(&nmp->nm_bufq)) { if (myiod >= nfs_iodmax) goto finish; if (nmp) nmp->nm_bufqiods--; if (nfs_iodwant[myiod] == NFSIOD_NOT_AVAILABLE) nfs_iodwant[myiod] = NFSIOD_AVAILABLE; nfs_iodmount[myiod] = NULL; /* * Always keep at least nfs_iodmin kthreads. */ timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; error = msleep(&nfs_iodwant[myiod], &nfs_iod_mtx, PWAIT | PCATCH, "-", timo); if (error) { nmp = nfs_iodmount[myiod]; /* * Rechecking the nm_bufq closes a rare race where the * nfsiod is woken up at the exact time the idle timeout * fires */ if (nmp && TAILQ_FIRST(&nmp->nm_bufq)) error = 0; break; } } if (error) break; while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { int giant_locked = 0; /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen--; if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { nmp->nm_bufqwant = 0; wakeup(&nmp->nm_bufq); } mtx_unlock(&nfs_iod_mtx); if (NFS_ISV4(bp->b_vp)) { giant_locked = 1; mtx_lock(&Giant); } if (bp->b_flags & B_DIRECT) { KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set")); (void)nfs_doio_directwrite(bp); } else { if (bp->b_iocmd == BIO_READ) (void) nfs_doio(bp->b_vp, bp, bp->b_rcred, NULL); else (void) nfs_doio(bp->b_vp, bp, bp->b_wcred, NULL); } if (giant_locked) mtx_unlock(&Giant); mtx_lock(&nfs_iod_mtx); /* * If there are more than one iod on this mount, then defect * so that the iods can be shared out fairly between the mounts */ if (nfs_defect && nmp->nm_bufqiods > 1) { NFS_DPF(ASYNCIO, ("nfssvc_iod: iod %d defecting from mount %p\n", myiod, nmp)); nfs_iodmount[myiod] = NULL; nmp->nm_bufqiods--; break; } } } finish: nfs_asyncdaemon[myiod] = 0; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = NFSIOD_NOT_AVAILABLE; nfs_iodmount[myiod] = NULL; /* Someone may be waiting for the last nfsiod to terminate. */ if (--nfs_numasync == 0) wakeup(&nfs_numasync); mtx_unlock(&nfs_iod_mtx); if ((error == 0) || (error == EWOULDBLOCK)) kproc_exit(0); /* Abnormal termination */ kproc_exit(1); }
/* * Look for the request in the cache * If found then * return action and optionally reply * else * insert it in the cache * * The rules are as follows: * - if in progress, return DROP request * - if completed within DELAY of the current time, return DROP it * - if completed a longer time ago return REPLY if the reply was cached or * return DOIT * Update/add new request at end of lru list */ int nfsrv_getcache(struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct mbuf **repp) { struct nfsrvcache *rp; struct mbuf *mb; struct sockaddr_in *saddr; caddr_t bpos; int ret; /* * Don't cache recent requests for reliable transport protocols. * (Maybe we should for the case of a reconnect, but..) */ if (!nd->nd_nam2) return (RC_DOIT); lwkt_gettoken(&srvcache_token); loop: for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != NULL; rp = rp->rc_hash.le_next) { if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { NFS_DPF(RC, ("H%03x", rp->rc_xid & 0xfff)); if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; tsleep((caddr_t)rp, 0, "nfsrc", 0); goto loop; } rp->rc_flag |= RC_LOCKED; /* If not at end of LRU chain, move it there */ if (TAILQ_NEXT(rp, rc_lru) != NULL) { TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); } if (rp->rc_state == RC_UNUSED) panic("nfsrv cache"); if (rp->rc_state == RC_INPROG) { nfsstats.srvcache_inproghits++; ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { nfsstats.srvcache_nonidemdonehits++; nfs_rephead(0, nd, slp, rp->rc_status, repp, &mb, &bpos); ret = RC_REPLY; } else if (rp->rc_flag & RC_REPMBUF) { nfsstats.srvcache_nonidemdonehits++; *repp = m_copym(rp->rc_reply, 0, M_COPYALL, MB_WAIT); ret = RC_REPLY; } else { nfsstats.srvcache_idemdonehits++; rp->rc_state = RC_INPROG; ret = RC_DOIT; } rp->rc_flag &= ~RC_LOCKED; if (rp->rc_flag & RC_WANTED) { rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } lwkt_reltoken(&srvcache_token); return (ret); } } nfsstats.srvcache_misses++; NFS_DPF(RC, ("M%03x", nd->nd_retxid & 0xfff)); if (numnfsrvcache < desirednfsrvcache) { rp = kmalloc((u_long)sizeof *rp, M_NFSD, M_WAITOK | M_ZERO); numnfsrvcache++; rp->rc_flag = RC_LOCKED; } else { rp = TAILQ_FIRST(&nfsrvlruhead); while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; tsleep((caddr_t)rp, 0, "nfsrc", 0); rp = TAILQ_FIRST(&nfsrvlruhead); } rp->rc_flag |= RC_LOCKED; LIST_REMOVE(rp, rc_hash); TAILQ_REMOVE(&nfsrvlruhead, rp, rc_lru); if (rp->rc_flag & RC_REPMBUF) { m_freem(rp->rc_reply); rp->rc_reply = NULL; rp->rc_flag &= ~RC_REPMBUF; } if (rp->rc_flag & RC_NAM) { kfree(rp->rc_nam, M_SONAME); rp->rc_nam = NULL; rp->rc_flag &= ~RC_NAM; } } TAILQ_INSERT_TAIL(&nfsrvlruhead, rp, rc_lru); rp->rc_state = RC_INPROG; rp->rc_xid = nd->nd_retxid; saddr = (struct sockaddr_in *)nd->nd_nam; switch (saddr->sin_family) { case AF_INET: rp->rc_flag |= RC_INETADDR; rp->rc_inetaddr = saddr->sin_addr.s_addr; break; case AF_ISO: default: rp->rc_flag |= RC_NAM; rp->rc_nam = dup_sockaddr(nd->nd_nam); break; }; rp->rc_proc = nd->nd_procnum; LIST_INSERT_HEAD(NFSRCHASH(nd->nd_retxid), rp, rc_hash); rp->rc_flag &= ~RC_LOCKED; if (rp->rc_flag & RC_WANTED) { rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } lwkt_reltoken(&srvcache_token); return (RC_DOIT); }
/* * Update a request cache entry after the rpc has been done */ void nfsrv_updatecache(struct nfsrv_descript *nd, int repvalid, struct mbuf *repmbuf) { struct nfsrvcache *rp; if (!nd->nd_nam2) return; lwkt_gettoken(&srvcache_token); loop: for (rp = NFSRCHASH(nd->nd_retxid)->lh_first; rp != NULL; rp = rp->rc_hash.le_next) { if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc && netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { NFS_DPF(RC, ("U%03x", rp->rc_xid & 0xfff)); if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; tsleep((caddr_t)rp, 0, "nfsrc", 0); goto loop; } rp->rc_flag |= RC_LOCKED; if (rp->rc_state == RC_DONE) { /* * This can occur if the cache is too small. * Retransmits of the same request aren't * dropped so we may see the operation * complete more then once. */ if (rp->rc_flag & RC_REPMBUF) { m_freem(rp->rc_reply); rp->rc_reply = NULL; rp->rc_flag &= ~RC_REPMBUF; } } rp->rc_state = RC_DONE; /* * If we have a valid reply update status and save * the reply for non-idempotent rpc's. */ if (repvalid && nonidempotent[nd->nd_procnum]) { if ((nd->nd_flag & ND_NFSV3) == 0 && nfsv2_repstat[nfsv2_procid[nd->nd_procnum]]) { rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; } else { if (rp->rc_flag & RC_REPMBUF) { m_freem(rp->rc_reply); rp->rc_reply = NULL; rp->rc_flag &= ~RC_REPMBUF; } rp->rc_reply = m_copym(repmbuf, 0, M_COPYALL, MB_WAIT); rp->rc_flag |= RC_REPMBUF; } } rp->rc_flag &= ~RC_LOCKED; if (rp->rc_flag & RC_WANTED) { rp->rc_flag &= ~RC_WANTED; wakeup((caddr_t)rp); } break; } } lwkt_reltoken(&srvcache_token); NFS_DPF(RC, ("L%03x", nd->nd_retxid & 0xfff)); }
/* * Asynchronous I/O daemons for client nfs. * They do read-ahead and write-behind operations on the block I/O cache. * Returns if we hit the timeout defined by the iodmaxidle sysctl. */ static void nfssvc_iod(void *instance) { struct buf *bp; struct nfsmount *nmp; int myiod, timo; int error = 0; mtx_lock(&Giant); myiod = (int *)instance - nfs_asyncdaemon; /* * Main loop */ for (;;) { while (((nmp = nfs_iodmount[myiod]) == NULL || !TAILQ_FIRST(&nmp->nm_bufq)) && error == 0) { if (myiod >= nfs_iodmax) goto finish; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = curthread->td_proc; nfs_iodmount[myiod] = NULL; /* * Always keep at least nfs_iodmin kthreads. */ timo = (myiod < nfs_iodmin) ? 0 : nfs_iodmaxidle * hz; error = tsleep((caddr_t)&nfs_iodwant[myiod], PWAIT | PCATCH, "nfsidl", timo); } if (error) break; while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) { /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen--; if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { nmp->nm_bufqwant = 0; wakeup(&nmp->nm_bufq); } if (bp->b_iocmd == BIO_READ) (void) nfs_doio(bp, bp->b_rcred, NULL); else (void) nfs_doio(bp, bp->b_wcred, NULL); /* * If there are more than one iod on this mount, then defect * so that the iods can be shared out fairly between the mounts */ if (nfs_defect && nmp->nm_bufqiods > 1) { NFS_DPF(ASYNCIO, ("nfssvc_iod: iod %d defecting from mount %p\n", myiod, nmp)); nfs_iodmount[myiod] = NULL; nmp->nm_bufqiods--; break; } } } finish: nfs_asyncdaemon[myiod] = 0; if (nmp) nmp->nm_bufqiods--; nfs_iodwant[myiod] = NULL; nfs_iodmount[myiod] = NULL; nfs_numasync--; if ((error == 0) || (error == EWOULDBLOCK)) kthread_exit(0); /* Abnormal termination */ kthread_exit(1); }