/** * @brief Comparison function for entries in a shared DRC * * @param[in] lhs Left-hand-side * @param[in] rhs Right-hand-side * * @return -1,0,1. */ static inline int dupreq_shared_cmpf(const struct opr_rbtree_node *lhs, const struct opr_rbtree_node *rhs) { dupreq_entry_t *lk, *rk; lk = opr_containerof(lhs, dupreq_entry_t, rbt_k); rk = opr_containerof(rhs, dupreq_entry_t, rbt_k); switch (sockaddr_cmpf(&lk->hin.addr, &rk->hin.addr, false)) { case -1: return -1; case 0: switch (uint32_cmpf(lk->hin.tcp.rq_xid, rk->hin.tcp.rq_xid)) { case -1: return -1; case 0: return uint64_cmpf(lk->hk, rk->hk); default: break; } /* xid */ break; default: break; } /* addr+port */ return 1; }
/** * @brief Comparison function for recycled per-connection (TCP) DRCs * * @param[in] lhs Left-hand-side * @param[in] rhs Right-hand-side * * @return -1,0,1. */ static inline int drc_recycle_cmpf(const struct opr_rbtree_node *lhs, const struct opr_rbtree_node *rhs) { drc_t *lk, *rk; lk = opr_containerof(lhs, drc_t, d_u.tcp.recycle_k); rk = opr_containerof(rhs, drc_t, d_u.tcp.recycle_k); return sockaddr_cmpf( &lk->d_u.tcp.addr, &rk->d_u.tcp.addr, false); }
bool rpc_ctx_xfer_replymsg(struct x_vc_data *xd, struct rpc_msg *msg) { rpc_ctx_t ctx_k, *ctx; struct opr_rbtree_node *nv; rpc_dplx_lock_t *lk = &xd->rec->recv.lock; ctx_k.xid = msg->rm_xid; mutex_lock(&xd->rec->mtx); nv = opr_rbtree_lookup(&xd->cx.calls.t, &ctx_k.node_k); if (nv) { ctx = opr_containerof(nv, rpc_ctx_t, node_k); opr_rbtree_remove(&xd->cx.calls.t, &ctx->node_k); free_rpc_msg(ctx->msg); /* free call header */ ctx->msg = msg; /* and stash reply header */ ctx->flags |= RPC_CTX_FLAG_SYNCDONE; mutex_unlock(&xd->rec->mtx); cond_signal(&lk->we.cv); /* XXX we hold lk->we.mtx */ /* now, we must ourselves wait for the other side to run */ while (! (ctx->flags & RPC_CTX_FLAG_ACKSYNC)) cond_wait(&lk->we.cv, &lk->we.mtx); /* ctx-specific signal--indicates we will make no further * references to ctx whatsoever */ mutex_lock(&ctx->we.mtx); ctx->flags &= ~RPC_CTX_FLAG_WAITSYNC; cond_signal(&ctx->we.cv); mutex_unlock(&ctx->we.mtx); return (TRUE); } mutex_unlock(&xd->rec->mtx); return (FALSE); }
/** * @brief Comparison function for entries in a per-connection (TCP) DRC * * @param[in] lhs Left-hand-side * @param[in] rhs Right-hand-side * * @return -1,0,1. */ static inline int dupreq_tcp_cmpf(const struct opr_rbtree_node *lhs, const struct opr_rbtree_node *rhs) { dupreq_entry_t *lk, *rk; LogDebug(COMPONENT_DUPREQ, "%s", __func__); lk = opr_containerof(lhs, dupreq_entry_t, rbt_k); rk = opr_containerof(rhs, dupreq_entry_t, rbt_k); if (lk->hin.tcp.rq_xid < rk->hin.tcp.rq_xid) return -1; if (lk->hin.tcp.rq_xid == rk->hin.tcp.rq_xid) { LogDebug(COMPONENT_DUPREQ, "xids eq (%u), ck1 %" PRIu64 " ck2 %" PRIu64, lk->hin.tcp.rq_xid, lk->hk, rk->hk); return uint64_cmpf(lk->hk, rk->hk); } return 1; }
/** * @brief Start a duplicate request transaction * * Finds any matching request entry in the cache, if one exists, else * creates one in the START state. On any non-error return, the refcnt * of the corresponding entry is incremented. * * @param[in] reqnfs The NFS request data * @param[in] req The request to be cached * * @retval DUPREQ_SUCCESS if successful. * @retval DUPREQ_INSERT_MALLOC_ERROR if an error occured during insertion. */ dupreq_status_t nfs_dupreq_start(nfs_request_t *reqnfs, struct svc_req *req) { dupreq_status_t status = DUPREQ_SUCCESS; dupreq_entry_t *dv, *dk = NULL; bool release_dk = true; nfs_res_t *res = NULL; drc_t *drc; /* Disabled? */ if (nfs_param.core_param.drc.disabled) { req->rq_u1 = (void *)DUPREQ_NOCACHE; res = alloc_nfs_res(); goto out; } req->rq_u1 = (void *)DUPREQ_BAD_ADDR1; req->rq_u2 = (void *)DUPREQ_BAD_ADDR1; drc = nfs_dupreq_get_drc(req); if (!drc) { status = DUPREQ_INSERT_MALLOC_ERROR; goto out; } switch (drc->type) { case DRC_TCP_V4: if (reqnfs->funcdesc->service_function == nfs4_Compound) { if (!nfs_dupreq_v4_cacheable(reqnfs)) { /* for such requests, we merely thread * the request through for later * cleanup--all v41 caching is handled * by the v41 slot reply cache */ req->rq_u1 = (void *)DUPREQ_NOCACHE; res = alloc_nfs_res(); goto out; } } break; default: /* likewise for other protocol requests we may not or choose not * to cache */ if (!(reqnfs->funcdesc->dispatch_behaviour & CAN_BE_DUP)) { req->rq_u1 = (void *)DUPREQ_NOCACHE; res = alloc_nfs_res(); goto out; } break; } dk = alloc_dupreq(); if (dk == NULL) { release_dk = false; status = DUPREQ_ERROR; goto release_dk; } dk->hin.drc = drc; /* trans. call path ref to dv */ switch (drc->type) { case DRC_TCP_V4: case DRC_TCP_V3: dk->hin.tcp.rq_xid = req->rq_xid; /* XXX needed? */ dk->hin.rq_prog = req->rq_prog; dk->hin.rq_vers = req->rq_vers; dk->hin.rq_proc = req->rq_proc; break; case DRC_UDP_V234: dk->hin.tcp.rq_xid = req->rq_xid; if (unlikely(!copy_xprt_addr(&dk->hin.addr, req->rq_xprt))) { status = DUPREQ_INSERT_MALLOC_ERROR; goto release_dk; } dk->hin.rq_prog = req->rq_prog; dk->hin.rq_vers = req->rq_vers; dk->hin.rq_proc = req->rq_proc; break; default: /* error */ status = DUPREQ_ERROR; goto release_dk; } /* TI-RPC computed checksum */ dk->hk = req->rq_cksum; dk->state = DUPREQ_START; dk->timestamp = time(NULL); { struct opr_rbtree_node *nv; struct rbtree_x_part *t = rbtx_partition_of_scalar(&drc->xt, dk->hk); PTHREAD_MUTEX_lock(&t->mtx); /* partition lock */ nv = rbtree_x_cached_lookup(&drc->xt, t, &dk->rbt_k, dk->hk); if (nv) { /* cached request */ dv = opr_containerof(nv, dupreq_entry_t, rbt_k); PTHREAD_MUTEX_lock(&dv->mtx); if (unlikely(dv->state == DUPREQ_START)) { status = DUPREQ_BEING_PROCESSED; } else { /* satisfy req from the DRC, incref, extend window */ res = dv->res; PTHREAD_MUTEX_lock(&drc->mtx); drc_inc_retwnd(drc); PTHREAD_MUTEX_unlock(&drc->mtx); status = DUPREQ_EXISTS; (dv->refcnt)++; } LogDebug(COMPONENT_DUPREQ, "dupreq hit dk=%p, dk xid=%u cksum %" PRIu64 " state=%s", dk, dk->hin.tcp.rq_xid, dk->hk, dupreq_state_table[dk->state]); req->rq_u1 = dv; PTHREAD_MUTEX_unlock(&dv->mtx); } else { /* new request */ res = req->rq_u2 = dk->res = alloc_nfs_res(); (void)rbtree_x_cached_insert(&drc->xt, t, &dk->rbt_k, dk->hk); (dk->refcnt)++; /* add to q tail */ PTHREAD_MUTEX_lock(&drc->mtx); TAILQ_INSERT_TAIL(&drc->dupreq_q, dk, fifo_q); ++(drc->size); PTHREAD_MUTEX_unlock(&drc->mtx); req->rq_u1 = dk; release_dk = false; dv = dk; } PTHREAD_MUTEX_unlock(&t->mtx); } LogFullDebug(COMPONENT_DUPREQ, "starting dv=%p xid=%u on DRC=%p state=%s, status=%s, refcnt=%d", dv, dk->hin.tcp.rq_xid, drc, dupreq_state_table[dv->state], dupreq_status_table[status], dv->refcnt); release_dk: if (release_dk) nfs_dupreq_free_dupreq(dk); nfs_dupreq_put_drc(req->rq_xprt, drc, DRC_FLAG_NONE); /* dk ref */ out: if (res) reqnfs->res_nfs = req->rq_u2 = res; return status; }
/** * @brief Find and reference a DRC to process the supplied svc_req. * * @param[in] req The svc_req being processed. * * @return The ref'd DRC if sucessfully located, else NULL. */ static /* inline */ drc_t * nfs_dupreq_get_drc(struct svc_req *req) { enum drc_type dtype = get_drc_type(req); drc_t *drc = NULL; bool drc_check_expired = false; switch (dtype) { case DRC_UDP_V234: LogFullDebug(COMPONENT_DUPREQ, "ref shared UDP DRC"); drc = &(drc_st->udp_drc); DRC_ST_LOCK(); (void)nfs_dupreq_ref_drc(drc); DRC_ST_UNLOCK(); goto out; case DRC_TCP_V4: case DRC_TCP_V3: /* Idempotent address, no need for lock; * xprt will be valid as long as svc_req. */ drc = (drc_t *)req->rq_xprt->xp_u2; if (drc) { /* found, no danger of removal */ LogFullDebug(COMPONENT_DUPREQ, "ref DRC=%p for xprt=%p", drc, req->rq_xprt); PTHREAD_MUTEX_lock(&drc->mtx); /* LOCKED */ } else { drc_t drc_k; struct rbtree_x_part *t = NULL; struct opr_rbtree_node *ndrc = NULL; drc_t *tdrc = NULL; memset(&drc_k, 0, sizeof(drc_k)); drc_k.type = dtype; /* Since the drc can last longer than the xprt, * copy the address. Read operation of constant data, * no xprt lock required. */ (void)copy_xprt_addr(&drc_k.d_u.tcp.addr, req->rq_xprt); drc_k.d_u.tcp.hk = CityHash64WithSeed((char *)&drc_k.d_u.tcp.addr, sizeof(sockaddr_t), 911); { char str[SOCK_NAME_MAX]; sprint_sockaddr(&drc_k.d_u.tcp.addr, str, sizeof(str)); LogFullDebug(COMPONENT_DUPREQ, "get drc for addr: %s", str); } t = rbtx_partition_of_scalar(&drc_st->tcp_drc_recycle_t, drc_k.d_u.tcp.hk); DRC_ST_LOCK(); ndrc = opr_rbtree_lookup(&t->t, &drc_k.d_u.tcp.recycle_k); if (ndrc) { /* reuse old DRC */ tdrc = opr_containerof(ndrc, drc_t, d_u.tcp.recycle_k); PTHREAD_MUTEX_lock(&tdrc->mtx); /* LOCKED */ if (tdrc->flags & DRC_FLAG_RECYCLE) { TAILQ_REMOVE(&drc_st->tcp_drc_recycle_q, tdrc, d_u.tcp.recycle_q); --(drc_st->tcp_drc_recycle_qlen); tdrc->flags &= ~DRC_FLAG_RECYCLE; } drc = tdrc; LogFullDebug(COMPONENT_DUPREQ, "recycle TCP DRC=%p for xprt=%p", tdrc, req->rq_xprt); } if (!drc) { drc = alloc_tcp_drc(dtype); LogFullDebug(COMPONENT_DUPREQ, "alloc new TCP DRC=%p for xprt=%p", drc, req->rq_xprt); /* assign addr */ memcpy(&drc->d_u.tcp.addr, &drc_k.d_u.tcp.addr, sizeof(sockaddr_t)); /* assign already-computed hash */ drc->d_u.tcp.hk = drc_k.d_u.tcp.hk; PTHREAD_MUTEX_lock(&drc->mtx); /* LOCKED */ /* xprt ref */ drc->refcnt = 1; /* insert dict */ opr_rbtree_insert(&t->t, &drc->d_u.tcp.recycle_k); } DRC_ST_UNLOCK(); drc->d_u.tcp.recycle_time = 0; (void)nfs_dupreq_ref_drc(drc); /* xprt ref */ /* try to expire unused DRCs somewhat in proportion to * new connection arrivals */ drc_check_expired = true; LogFullDebug(COMPONENT_DUPREQ, "after ref drc %p refcnt==%u ", drc, drc->refcnt); /* Idempotent address, no need for lock; * set once here, never changes. * No other fields are modified. * Assumes address stores are atomic. */ req->rq_xprt->xp_u2 = (void *)drc; } break; default: /* XXX error */ break; } /* call path ref */ (void)nfs_dupreq_ref_drc(drc); PTHREAD_MUTEX_unlock(&drc->mtx); if (drc_check_expired) drc_free_expired(); out: return drc; }
/** * @brief Find and reference a DRC to process the supplied svc_req. * * @param[in] req The svc_req being processed. * * @return The ref'd DRC if sucessfully located, else NULL. */ static /* inline */ drc_t * nfs_dupreq_get_drc(struct svc_req *req) { enum drc_type dtype = get_drc_type(req); gsh_xprt_private_t *xu = (gsh_xprt_private_t *) req->rq_xprt->xp_u1; drc_t *drc = NULL; bool drc_check_expired = false; switch (dtype) { case DRC_UDP_V234: LogFullDebug(COMPONENT_DUPREQ, "ref shared UDP DRC"); drc = &(drc_st->udp_drc); DRC_ST_LOCK(); (void)nfs_dupreq_ref_drc(drc); DRC_ST_UNLOCK(); goto out; break; case DRC_TCP_V4: case DRC_TCP_V3: pthread_mutex_lock(&req->rq_xprt->xp_lock); if (xu->drc) { drc = xu->drc; LogFullDebug(COMPONENT_DUPREQ, "ref DRC=%p for xprt=%p", drc, req->rq_xprt); pthread_mutex_lock(&drc->mtx); /* LOCKED */ } else { drc_t drc_k; struct rbtree_x_part *t = NULL; struct opr_rbtree_node *ndrc = NULL; drc_t *tdrc = NULL; memset(&drc_k, 0, sizeof(drc_k)); drc_k.type = dtype; (void)copy_xprt_addr(&drc_k.d_u.tcp.addr, req->rq_xprt); drc_k.d_u.tcp.hk = CityHash64WithSeed((char *)&drc_k.d_u.tcp.addr, sizeof(sockaddr_t), 911); { char str[512]; sprint_sockaddr(&drc_k.d_u.tcp.addr, str, 512); LogFullDebug(COMPONENT_DUPREQ, "get drc for addr: %s", str); } t = rbtx_partition_of_scalar(&drc_st->tcp_drc_recycle_t, drc_k.d_u.tcp.hk); DRC_ST_LOCK(); ndrc = opr_rbtree_lookup(&t->t, &drc_k.d_u.tcp.recycle_k); if (ndrc) { /* reuse old DRC */ tdrc = opr_containerof(ndrc, drc_t, d_u.tcp.recycle_k); pthread_mutex_lock(&tdrc->mtx); /* LOCKED */ if (tdrc->flags & DRC_FLAG_RECYCLE) { TAILQ_REMOVE(&drc_st->tcp_drc_recycle_q, tdrc, d_u.tcp.recycle_q); --(drc_st->tcp_drc_recycle_qlen); tdrc->flags &= ~DRC_FLAG_RECYCLE; } drc = tdrc; LogFullDebug(COMPONENT_DUPREQ, "recycle TCP DRC=%p for xprt=%p", tdrc, req->rq_xprt); } if (!drc) { drc = alloc_tcp_drc(dtype); LogFullDebug(COMPONENT_DUPREQ, "alloc new TCP DRC=%p for xprt=%p", drc, req->rq_xprt); /* assign addr */ memcpy(&drc->d_u.tcp.addr, &drc_k.d_u.tcp.addr, sizeof(sockaddr_t)); /* assign already-computed hash */ drc->d_u.tcp.hk = drc_k.d_u.tcp.hk; pthread_mutex_lock(&drc->mtx); /* LOCKED */ /* xprt ref */ drc->refcnt = 1; /* insert dict */ opr_rbtree_insert(&t->t, &drc->d_u.tcp.recycle_k); } DRC_ST_UNLOCK(); drc->d_u.tcp.recycle_time = 0; /* xprt drc */ (void)nfs_dupreq_ref_drc(drc); /* xu ref */ /* try to expire unused DRCs somewhat in proportion to * new connection arrivals */ drc_check_expired = true; LogFullDebug(COMPONENT_DUPREQ, "after ref drc %p refcnt==%u ", drc, drc->refcnt); xu->drc = drc; } pthread_mutex_unlock(&req->rq_xprt->xp_lock); break; default: /* XXX error */ break; } /* call path ref */ (void)nfs_dupreq_ref_drc(drc); pthread_mutex_unlock(&drc->mtx); if (drc_check_expired) drc_free_expired(); out: return drc; }