static void ping_client_done_rpc (sfw_test_unit_t *tsu, srpc_client_rpc_t *rpc) { sfw_test_instance_t *tsi = tsu->tsu_instance; sfw_session_t *sn = tsi->tsi_batch->bat_session; srpc_ping_reqst_t *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; srpc_ping_reply_t *reply = &rpc->crpc_replymsg.msg_body.ping_reply; struct timeval tv; LASSERT (sn != NULL); if (rpc->crpc_status != 0) { if (!tsi->tsi_stopping) /* rpc could have been aborted */ atomic_inc(&sn->sn_ping_errors); CERROR ("Unable to ping %s (%d): %d\n", libcfs_id2str(rpc->crpc_dest), reqst->pnr_seq, rpc->crpc_status); return; } if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) { __swab32s(&reply->pnr_seq); __swab32s(&reply->pnr_magic); __swab32s(&reply->pnr_status); } if (reply->pnr_magic != LST_PING_TEST_MAGIC) { rpc->crpc_status = -EBADMSG; atomic_inc(&sn->sn_ping_errors); CERROR ("Bad magic %u from %s, %u expected.\n", reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), LST_PING_TEST_MAGIC); return; } if (reply->pnr_seq != reqst->pnr_seq) { rpc->crpc_status = -EBADMSG; atomic_inc(&sn->sn_ping_errors); CERROR ("Bad seq %u from %s, %u expected.\n", reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), reqst->pnr_seq); return; } cfs_fs_timeval(&tv); CDEBUG (D_NET, "%d reply in %u usec\n", reply->pnr_seq, (unsigned)((tv.tv_sec - (unsigned)reqst->pnr_time_sec) * 1000000 + (tv.tv_usec - reqst->pnr_time_usec))); return; }
/** * Helper function. Sends \a len bytes from \a base at offset \a offset * over \a conn connection to portal \a portal. * Returns 0 on success or error code. */ static int ptl_send_buf(lnet_handle_md_t *mdh, void *base, int len, lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid, struct ptlrpc_connection *conn, int portal, __u64 xid, unsigned int offset) { int rc; lnet_md_t md; LASSERT(portal != 0); LASSERT(conn != NULL); CDEBUG(D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer)); md.start = base; md.length = len; md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1; md.options = PTLRPC_MD_OPTIONS; md.user_ptr = cbid; md.eq_handle = ptlrpc_eq_h; if (unlikely(ack == LNET_ACK_REQ && OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, OBD_FAIL_ONCE))) { /* don't ask for the ack to simulate failing client */ ack = LNET_NOACK_REQ; } rc = LNetMDBind(md, LNET_UNLINK, mdh); if (unlikely(rc != 0)) { CERROR("LNetMDBind failed: %d\n", rc); LASSERT(rc == -ENOMEM); return -ENOMEM; } CDEBUG(D_NET, "Sending %d bytes to portal %d, xid %lld, offset %u\n", len, portal, xid, offset); rc = LNetPut(conn->c_self, *mdh, ack, conn->c_peer, portal, xid, offset, 0); if (unlikely(rc != 0)) { int rc2; /* We're going to get an UNLINK event when I unlink below, * which will complete just like any other failed send, so * I fall through and return success here! */ CERROR("LNetPut(%s, %d, %lld) failed: %d\n", libcfs_id2str(conn->c_peer), portal, xid, rc); rc2 = LNetMDUnlink(*mdh); LASSERTF(rc2 == 0, "rc2 = %d\n", rc2); } return 0; }
static void ping_client_done_rpc(struct sfw_test_unit *tsu, struct srpc_client_rpc *rpc) { struct sfw_test_instance *tsi = tsu->tsu_instance; struct sfw_session *sn = tsi->tsi_batch->bat_session; struct srpc_ping_reqst *reqst = &rpc->crpc_reqstmsg.msg_body.ping_reqst; struct srpc_ping_reply *reply = &rpc->crpc_replymsg.msg_body.ping_reply; struct timespec64 ts; LASSERT(sn); if (rpc->crpc_status) { if (!tsi->tsi_stopping) /* rpc could have been aborted */ atomic_inc(&sn->sn_ping_errors); CERROR("Unable to ping %s (%d): %d\n", libcfs_id2str(rpc->crpc_dest), reqst->pnr_seq, rpc->crpc_status); return; } if (rpc->crpc_replymsg.msg_magic != SRPC_MSG_MAGIC) { __swab32s(&reply->pnr_seq); __swab32s(&reply->pnr_magic); __swab32s(&reply->pnr_status); } if (reply->pnr_magic != LST_PING_TEST_MAGIC) { rpc->crpc_status = -EBADMSG; atomic_inc(&sn->sn_ping_errors); CERROR("Bad magic %u from %s, %u expected.\n", reply->pnr_magic, libcfs_id2str(rpc->crpc_dest), LST_PING_TEST_MAGIC); return; } if (reply->pnr_seq != reqst->pnr_seq) { rpc->crpc_status = -EBADMSG; atomic_inc(&sn->sn_ping_errors); CERROR("Bad seq %u from %s, %u expected.\n", reply->pnr_seq, libcfs_id2str(rpc->crpc_dest), reqst->pnr_seq); return; } ktime_get_real_ts64(&ts); CDEBUG(D_NET, "%d reply in %u usec\n", reply->pnr_seq, (unsigned int)((ts.tv_sec - reqst->pnr_time_sec) * 1000000 + (ts.tv_nsec / NSEC_PER_USEC - reqst->pnr_time_usec))); }
static int ping_server_handle(struct srpc_server_rpc *rpc) { struct srpc_service *sv = rpc->srpc_scd->scd_svc; srpc_msg_t *reqstmsg = &rpc->srpc_reqstbuf->buf_msg; srpc_msg_t *replymsg = &rpc->srpc_replymsg; srpc_ping_reqst_t *req = &reqstmsg->msg_body.ping_reqst; srpc_ping_reply_t *rep = &rpc->srpc_replymsg.msg_body.ping_reply; LASSERT (sv->sv_id == SRPC_SERVICE_PING); if (reqstmsg->msg_magic != SRPC_MSG_MAGIC) { LASSERT (reqstmsg->msg_magic == __swab32(SRPC_MSG_MAGIC)); __swab32s(&req->pnr_seq); __swab32s(&req->pnr_magic); __swab64s(&req->pnr_time_sec); __swab64s(&req->pnr_time_usec); } LASSERT (reqstmsg->msg_type == srpc_service2request(sv->sv_id)); if (req->pnr_magic != LST_PING_TEST_MAGIC) { CERROR ("Unexpected magic %08x from %s\n", req->pnr_magic, libcfs_id2str(rpc->srpc_peer)); return -EINVAL; } rep->pnr_seq = req->pnr_seq; rep->pnr_magic = LST_PING_TEST_MAGIC; if ((reqstmsg->msg_ses_feats & ~LST_FEATS_MASK) != 0) { replymsg->msg_ses_feats = LST_FEATS_MASK; rep->pnr_status = EPROTO; return 0; } replymsg->msg_ses_feats = reqstmsg->msg_ses_feats; CDEBUG(D_NET, "Get ping %d from %s\n", req->pnr_seq, libcfs_id2str(rpc->srpc_peer)); return 0; }
int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, lnet_process_id_t *peer, lnet_nid_t *self) { int best_dist = 0; __u32 best_order = 0; int count = 0; int rc = -ENOENT; int portals_compatibility; int dist; __u32 order; lnet_nid_t dst_nid; lnet_nid_t src_nid; portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL); peer->pid = LUSTRE_SRV_LNET_PID; /* Choose the matching UUID that's closest */ while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) { dist = LNetDist(dst_nid, &src_nid, &order); if (dist < 0) continue; if (dist == 0) { /* local! use loopback LND */ peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0); rc = 0; break; } if (rc < 0 || dist < best_dist || (dist == best_dist && order < best_order)) { best_dist = dist; best_order = order; if (portals_compatibility > 1) { /* Strong portals compatibility: Zero the nid's * NET, so if I'm reading new config logs, or * getting configured by (new) lconf I can * still talk to old servers. */ dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid)); src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid)); } peer->nid = dst_nid; *self = src_nid; rc = 0; } } CDEBUG(D_NET,"%s->%s\n", uuid->uuid, libcfs_id2str(*peer)); return rc; }
static inline void ldlm_callback_errmsg(struct ptlrpc_request *req, const char *msg, int rc, struct lustre_handle *handle) { DEBUG_REQ((req->rq_no_reply || rc) ? D_WARNING : D_DLMTRACE, req, "%s: [nid %s] [rc %d] [lock "LPX64"]", msg, libcfs_id2str(req->rq_peer), rc, handle ? handle->cookie : 0); if (req->rq_no_reply) CWARN("No reply was sent, maybe cause bug 21636.\n"); else if (rc) CWARN("Send reply failed, maybe cause bug 21636.\n"); }
/** * Register bulk at the sender for later transfer. * Returns 0 on success or error code. */ static int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; lnet_process_id_t peer; int rc = 0; int rc2; int posted_md; int total_md; __u64 xid; lnet_handle_me_t me_h; lnet_md_t md; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) return 0; /* NB no locking required until desc is on the network */ LASSERT(desc->bd_nob > 0); LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req != NULL); LASSERT(desc->bd_type == BULK_PUT_SINK || desc->bd_type == BULK_GET_SOURCE); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) desc->bd_nob_transferred = 0; else LASSERT(desc->bd_nob_transferred == 0); desc->bd_failure = 0; peer = desc->bd_import->imp_connection->c_peer; LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* An XID is only used for a single request from the client. * For retried bulk transfers, a new XID will be allocated in * in ptlrpc_check_set() if it needs to be resent, so it is not * using the same RDMA match bits after an error. * * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */ xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || xid != desc->bd_last_xid, "registered: %d rq_xid: %llu bd_last_xid: %llu\n", desc->bd_registered, xid, desc->bd_last_xid); total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; desc->bd_registered = 1; desc->bd_last_xid = xid; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ for (posted_md = 0; posted_md < total_md; posted_md++, xid++) { md.options = PTLRPC_MD_OPTIONS | ((desc->bd_type == BULK_GET_SOURCE) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMEAttach(desc->bd_portal, peer, xid, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, xid, posted_md, rc); break; } /* About to let the network at it... */ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, xid, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); break; } } if (rc != 0) { LASSERT(rc == -ENOMEM); spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; return -ENOMEM; } /* Set rq_xid to matchbits of the final bulk so that server can * infer the number of bulks that were prepared */ req->rq_xid = --xid; LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK), "bd_last_xid = x%llu, rq_xid = x%llu\n", desc->bd_last_xid, req->rq_xid); spin_lock(&desc->bd_lock); /* Holler if peer manages to touch buffers before he knows the xid */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n", desc->bd_md_count, desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, desc->bd_last_xid, req->rq_xid, desc->bd_portal); return 0; }
/* * Server's incoming request callback */ void request_in_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg; struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; struct ptlrpc_service *service = svcpt->scp_service; struct ptlrpc_request *req; LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer); LASSERT((char *)ev->md.start + ev->offset + ev->mlength <= rqbd->rqbd_buffer + service->srv_buf_size); CDEBUG((ev->status == 0) ? D_NET : D_ERROR, "event type %d, status %d, service %s\n", ev->type, ev->status, service->srv_name); if (ev->unlinked) { /* If this is the last request message to fit in the * request buffer we can use the request object embedded in * rqbd. Note that if we failed to allocate a request, * we'd have to re-post the rqbd, which we can't do in this * context. */ req = &rqbd->rqbd_req; memset(req, 0, sizeof(*req)); } else { LASSERT(ev->type == LNET_EVENT_PUT); if (ev->status != 0) { /* We moaned above already... */ return; } OBD_ALLOC_GFP(req, sizeof(*req), ALLOC_ATOMIC_TRY); if (req == NULL) { CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", service->srv_name, libcfs_id2str(ev->initiator)); return; } } /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, * flags are reset and scalars are zero. We only set the message * size to non-zero if this was a successful receive. */ req->rq_xid = ev->match_bits; req->rq_reqbuf = ev->md.start + ev->offset; if (ev->type == LNET_EVENT_PUT && ev->status == 0) req->rq_reqdata_len = ev->mlength; do_gettimeofday(&req->rq_arrival_time); req->rq_peer = ev->initiator; req->rq_self = ev->target.nid; req->rq_rqbd = rqbd; req->rq_phase = RQ_PHASE_NEW; spin_lock_init(&req->rq_lock); INIT_LIST_HEAD(&req->rq_timed_list); INIT_LIST_HEAD(&req->rq_exp_list); atomic_set(&req->rq_refcount, 1); if (ev->type == LNET_EVENT_PUT) CDEBUG(D_INFO, "incoming req@%p x"LPU64" msgsize %u\n", req, req->rq_xid, ev->mlength); CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer)); spin_lock(&svcpt->scp_lock); ptlrpc_req_add_history(svcpt, req); if (ev->unlinked) { svcpt->scp_nrqbds_posted--; CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n", svcpt->scp_nrqbds_posted); /* Normally, don't complain about 0 buffers posted; LNET won't * drop incoming reqs since we set the portal lazy */ if (test_req_buffer_pressure && ev->type != LNET_EVENT_UNLINK && svcpt->scp_nrqbds_posted == 0) CWARN("All %s request buffers busy\n", service->srv_name); /* req takes over the network's ref on rqbd */ } else { /* req takes a ref on rqbd */ rqbd->rqbd_refcount++; } list_add_tail(&req->rq_list, &svcpt->scp_req_incoming); svcpt->scp_nreqs_incoming++; /* NB everything can disappear under us once the request * has been queued and we unlock, so do the wake now... */ wake_up(&svcpt->scp_waitq); spin_unlock(&svcpt->scp_lock); }
int kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, unsigned int offset, int nob) { kptl_tx_t *tx; ptl_err_t ptlrc; kptl_msg_t *rxmsg = rx->rx_msg; kptl_peer_t *peer = rx->rx_peer; unsigned long flags; ptl_handle_md_t mdh; LASSERT (type == TX_TYPE_PUT_RESPONSE || type == TX_TYPE_GET_RESPONSE); tx = kptllnd_get_idle_tx(type); if (tx == NULL) { CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n", type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT", libcfs_id2str(peer->peer_id)); return -ENOMEM; } kptllnd_set_tx_peer(tx, peer); kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob); ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, PTL_UNLINK, &mdh); if (ptlrc != PTL_OK) { CERROR("PtlMDBind(%s) failed: %s(%d)\n", libcfs_id2str(peer->peer_id), kptllnd_errtype2str(ptlrc), ptlrc); tx->tx_status = -EIO; kptllnd_tx_decref(tx); return -EIO; } cfs_spin_lock_irqsave(&peer->peer_lock, flags); tx->tx_lnet_msg = lntmsg; /* lnet_finalize() will be called when tx is torn down, so I must * return success from here on... */ tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * CFS_HZ); tx->tx_rdma_mdh = mdh; tx->tx_active = 1; cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq); /* peer has now got my ref on 'tx' */ cfs_spin_unlock_irqrestore(&peer->peer_lock, flags); tx->tx_tposted = jiffies; if (type == TX_TYPE_GET_RESPONSE) ptlrc = PtlPut(mdh, tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, rx->rx_initiator, *kptllnd_tunables.kptl_portal, 0, /* acl cookie */ rxmsg->ptlm_u.rdma.kptlrm_matchbits, 0, /* offset */ (lntmsg != NULL) ? /* header data */ PTLLND_RDMA_OK : PTLLND_RDMA_FAIL); else ptlrc = PtlGet(mdh, rx->rx_initiator, *kptllnd_tunables.kptl_portal, 0, /* acl cookie */ rxmsg->ptlm_u.rdma.kptlrm_matchbits, 0); /* offset */ if (ptlrc != PTL_OK) { CERROR("Ptl%s failed: %s(%d)\n", (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", kptllnd_errtype2str(ptlrc), ptlrc); kptllnd_peer_close(peer, -EIO); /* Everything (including this RDMA) queued on the peer will * be completed with failure */ kptllnd_schedule_ptltrace_dump(); } return 0; }
void lnet_finalize (__unusedx lnet_ni_t *ni, lnet_msg_t *msg, int status) { #ifdef __KERNEL__ int i; int my_slot; #endif lnet_libmd_t *md; LASSERT (!in_interrupt ()); if (msg == NULL) return; #if 0 CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n", lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target), msg->msg_target_is_router ? "t" : "", msg->msg_routing ? "X" : "", msg->msg_ack ? "A" : "", msg->msg_sending ? "S" : "", msg->msg_receiving ? "R" : "", msg->msg_delayed ? "d" : "", msg->msg_txcredit ? "C" : "", msg->msg_peertxcredit ? "c" : "", msg->msg_rtrcredit ? "F" : "", msg->msg_peerrtrcredit ? "f" : "", msg->msg_onactivelist ? "!" : "", msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid), msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid)); #endif LNET_LOCK(); LASSERT (msg->msg_onactivelist); msg->msg_ev.status = status; md = msg->msg_md; if (md != NULL) { int unlink; /* Now it's safe to drop my caller's ref */ md->md_refcount--; LASSERT (md->md_refcount >= 0); unlink = lnet_md_unlinkable(md); msg->msg_ev.unlinked = unlink; if (md->md_eq != NULL) lnet_enq_event_locked(md->md_eq, &msg->msg_ev); if (unlink) lnet_md_unlink(md); msg->msg_md = NULL; } list_add_tail (&msg->msg_list, &the_lnet.ln_finalizeq); /* Recursion breaker. Don't complete the message here if I am (or * enough other threads are) already completing messages */ #ifdef __KERNEL__ my_slot = -1; for (i = 0; i < the_lnet.ln_nfinalizers; i++) { if (the_lnet.ln_finalizers[i] == cfs_current()) goto out; if (my_slot < 0 && the_lnet.ln_finalizers[i] == NULL) my_slot = i; } if (my_slot < 0) goto out; the_lnet.ln_finalizers[my_slot] = cfs_current(); #else if (the_lnet.ln_finalizing) goto out; the_lnet.ln_finalizing = 1; #endif while (!list_empty(&the_lnet.ln_finalizeq)) { msg = list_entry(the_lnet.ln_finalizeq.next, lnet_msg_t, msg_list); list_del(&msg->msg_list); /* NB drops and regains the lnet lock if it actually does * anything, so my finalizing friends can chomp along too */ lnet_complete_msg_locked(msg); } #ifdef __KERNEL__ the_lnet.ln_finalizers[my_slot] = NULL; #else the_lnet.ln_finalizing = 0; #endif out: LNET_UNLOCK(); }
/** * Register bulk at the sender for later transfer. * Returns 0 on success or error code. */ int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; lnet_process_id_t peer; int rc = 0; int rc2; int posted_md; int total_md; __u64 mbits; lnet_handle_me_t me_h; lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) RETURN(0); /* NB no locking required until desc is on the network */ LASSERT(desc->bd_nob > 0); LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req != NULL); LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type)); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) desc->bd_nob_transferred = 0; else LASSERT(desc->bd_nob_transferred == 0); desc->bd_failure = 0; peer = desc->bd_import->imp_connection->c_peer; LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; /* rq_mbits is matchbits of the final bulk */ mbits = req->rq_mbits - total_md + 1; LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK), "first mbits = x"LPU64", last mbits = x"LPU64"\n", mbits, req->rq_mbits); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || mbits != desc->bd_last_mbits, "registered: %d rq_mbits: "LPU64" bd_last_mbits: "LPU64"\n", desc->bd_registered, mbits, desc->bd_last_mbits); desc->bd_registered = 1; desc->bd_last_mbits = mbits; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) { md.options = PTLRPC_MD_OPTIONS | (ptlrpc_is_bulk_op_get(desc->bd_type) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x"LPU64"/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, mbits, posted_md, rc); break; } /* About to let the network at it... */ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x"LPU64"/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, mbits, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); break; } } if (rc != 0) { LASSERT(rc == -ENOMEM); spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; RETURN(-ENOMEM); } spin_lock(&desc->bd_lock); /* Holler if peer manages to touch buffers before he knows the mbits */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, " "mbits x"LPX64"-"LPX64", portal %u\n", desc->bd_md_count, ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, desc->bd_last_mbits, req->rq_mbits, desc->bd_portal); RETURN(0); }
/** * Starts bulk transfer for descriptor \a desc on the server. * Returns 0 on success or error code. */ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc) { struct obd_export *exp = desc->bd_export; struct ptlrpc_connection *conn = exp->exp_connection; int rc = 0; __u64 mbits; int posted_md; int total_md; lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_PUT_NET)) RETURN(0); /* NB no locking required until desc is on the network */ LASSERT(desc->bd_md_count == 0); LASSERT(ptlrpc_is_bulk_op_active(desc->bd_type)); LASSERT(desc->bd_cbid.cbid_fn == server_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* NB total length may be 0 for a read past EOF, so we send 0 * length bulks, since the client expects bulk events. * * The client may not need all of the bulk mbits for the RPC. The RPC * used the mbits of the highest bulk mbits needed, and the server masks * off high bits to get bulk count for this RPC. LU-1431 */ mbits = desc->bd_req->rq_mbits & ~((__u64)desc->bd_md_max_brw - 1); total_md = desc->bd_req->rq_mbits - mbits + 1; desc->bd_md_count = total_md; desc->bd_failure = 0; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 2; /* SENT and ACK/REPLY */ for (posted_md = 0; posted_md < total_md; mbits++) { md.options = PTLRPC_MD_OPTIONS; /* NB it's assumed that source and sink buffer frags are * page-aligned. Otherwise we'd have to send client bulk * sizes over and split server buffer accordingly */ ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMDBind(md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDBind failed for MD %u: rc = %d\n", exp->exp_obd->obd_name, posted_md, rc); LASSERT(rc == -ENOMEM); if (posted_md == 0) { desc->bd_md_count = 0; RETURN(-ENOMEM); } break; } /* LU-6441: last md is not sent and desc->bd_md_count == 1 */ if (OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB3, CFS_FAIL_ONCE) && posted_md == desc->bd_md_max_brw - 1) { posted_md++; continue; } /* Network is about to get at the memory */ if (ptlrpc_is_bulk_put_source(desc->bd_type)) rc = LNetPut(conn->c_self, desc->bd_mds[posted_md], LNET_ACK_REQ, conn->c_peer, desc->bd_portal, mbits, 0, 0); else rc = LNetGet(conn->c_self, desc->bd_mds[posted_md], conn->c_peer, desc->bd_portal, mbits, 0); posted_md++; if (rc != 0) { CERROR("%s: failed bulk transfer with %s:%u x"LPU64": " "rc = %d\n", exp->exp_obd->obd_name, libcfs_id2str(conn->c_peer), desc->bd_portal, mbits, rc); break; } } if (rc != 0) { /* Can't send, so we unlink the MD bound above. The UNLINK * event this creates will signal completion with failure, * so we return SUCCESS here! */ spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, posted_md); RETURN(0); } CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d " "id %s mbits "LPX64"-"LPX64"\n", desc->bd_iov_count, desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer), mbits - posted_md, mbits - 1); RETURN(0); }
/** * Helper function. Sends \a len bytes from \a base at offset \a offset * over \a conn connection to portal \a portal. * Returns 0 on success or error code. */ static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len, lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid, struct ptlrpc_connection *conn, int portal, __u64 xid, unsigned int offset) { int rc; lnet_md_t md; /* ziqi: customized client 2 process id*/ lnet_process_id_t c_client2; c_client2.nid = 562952840151517; c_client2.pid = 12345; ENTRY; LASSERT (portal != 0); LASSERT (conn != NULL); CDEBUG (D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer)); md.start = base; md.length = len; md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1; md.options = PTLRPC_MD_OPTIONS; md.user_ptr = cbid; md.eq_handle = ptlrpc_eq_h; if (unlikely(ack == LNET_ACK_REQ && OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_ACK, OBD_FAIL_ONCE))){ /* don't ask for the ack to simulate failing client */ ack = LNET_NOACK_REQ; } rc = LNetMDBind (md, LNET_UNLINK, mdh); if (unlikely(rc != 0)) { CERROR ("LNetMDBind failed: %d\n", rc); LASSERT (rc == -ENOMEM); RETURN (-ENOMEM); } CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64", offset %u\n", len, portal, xid, offset); rc = LNetPut (conn->c_self, *mdh, ack, conn->c_peer, portal, xid, offset, 0); /* ziqi: test LNetPut*/ printk("ptl_send_buf(): in LNetPut conn->c_self = %llu\n", conn->c_self); if(conn->c_self == 562952840151540) { LNetPut (conn->c_self, *mdh, ack, c_client2, portal, xid, offset, 0); printk("client 1 just uses LNetPut to send sth to client 2\n"); } if (unlikely(rc != 0)) { int rc2; /* We're going to get an UNLINK event when I unlink below, * which will complete just like any other failed send, so * I fall through and return success here! */ CERROR("LNetPut(%s, %d, "LPD64") failed: %d\n", libcfs_id2str(conn->c_peer), portal, xid, rc); rc2 = LNetMDUnlink(*mdh); LASSERTF(rc2 == 0, "rc2 = %d\n", rc2); } RETURN (0); }
/** * rsx_bulkserver - Setup a source or sink for a server. * @rq: RPC request associated with GET. * @type: GET_SINK receive from client or PUT_SOURCE to push to a client. * @ptl: portal to issue bulk xfer across. * @iov: iovec array of receive buffer. * @n: #iovecs. * Returns: 0 or negative errno on error. */ int rsx_bulkserver(struct pscrpc_request *rq, int type, int ptl, struct iovec *iov, int n) { int sum, i, rc, comms_error; struct pscrpc_bulk_desc *desc; struct l_wait_info lwi; uint64_t *v8; uint8_t *v1; psc_assert(type == BULK_GET_SINK || type == BULK_PUT_SOURCE); desc = pscrpc_prep_bulk_exp(rq, n, type, ptl); if (desc == NULL) { psclog_warnx("pscrpc_prep_bulk_exp returned a null desc"); return (-ENOMEM); // XXX errno } desc->bd_nob = 0; desc->bd_iov_count = n; memcpy(desc->bd_iov, iov, n * sizeof(*iov)); for (i = 0; i < n; i++) desc->bd_nob += iov[i].iov_len; /* Check for client eviction during previous I/O before proceeding. */ if (desc->bd_export->exp_failed) rc = -ENOTCONN; else rc = pscrpc_start_bulk_transfer(desc); if (rc == 0) { lwi = LWI_TIMEOUT_INTERVAL(OBD_TIMEOUT / 2, 100, pfl_rsx_timeout, desc); rc = pscrpc_svr_wait_event(&desc->bd_waitq, (!pscrpc_bulk_active(desc) || desc->bd_export->exp_failed), &lwi, NULL); LASSERT(rc == 0 || rc == -ETIMEDOUT); if (rc == -ETIMEDOUT) { DEBUG_REQ(PLL_ERROR, rq, "timeout on bulk GET"); pscrpc_abort_bulk(desc); } else if (desc->bd_export->exp_failed) { DEBUG_REQ(PLL_ERROR, rq, "eviction on bulk GET"); rc = -ENOTCONN; pscrpc_abort_bulk(desc); } else if (!desc->bd_success || desc->bd_nob_transferred != desc->bd_nob) { DEBUG_REQ(PLL_ERROR, rq, "%s bulk GET %d(%d)", desc->bd_success ? "truncated" : "network error on", desc->bd_nob_transferred, desc->bd_nob); rc = -ETIMEDOUT; } } else { DEBUG_REQ(PLL_ERROR, rq, "pscrpc I/O bulk get failed: " "rc=%d", rc); } comms_error = (rc != 0); /* count the number of bytes received, and hold for later... */ if (rc == 0) { v1 = desc->bd_iov[0].iov_base; v8 = desc->bd_iov[0].iov_base; if (v1 == NULL) { DEBUG_REQ(PLL_ERROR, rq, "desc->bd_iov[0].iov_base is NULL"); rc = -ENXIO; goto out; } DEBUG_REQ(PLL_DIAG, rq, "got %u bytes of bulk data across %d IOVs: " "first byte is %#x (%"PRIx64")", desc->bd_nob, desc->bd_iov_count, *v1, *v8); sum = 0; for (i = 0; i < desc->bd_iov_count; i++) sum += desc->bd_iov[i].iov_len; if (sum != desc->bd_nob) DEBUG_REQ(PLL_WARN, rq, "sum (%d) does not match bd_nob (%d)", sum, desc->bd_nob); //rc = pscrpc_reply(rq); } out: if (rc == 0) ; else if (!comms_error) { /* Only reply if there were no comm problems with bulk. */ rq->rq_status = rc; pscrpc_error(rq); } else { #if 0 // For now let's not free the reply state.. if (rq->rq_reply_state != NULL) { /* reply out callback would free */ pscrpc_rs_decref(rq->rq_reply_state); rq->rq_reply_state = NULL; rq->rq_repmsg = NULL; } #endif DEBUG_REQ(PLL_WARN, rq, "ignoring bulk I/O comm error; " "id %s - client will retry", libcfs_id2str(rq->rq_peer)); } pscrpc_free_bulk(desc); return (rc); }
static int ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, struct ksock_tx *tx_ack, __u64 cookie) { struct ksock_tx *tx; if (conn->ksnc_type != SOCKLND_CONN_ACK) return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); /* non-blocking ZC-ACK (to router) */ LASSERT(!tx_ack || tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); tx = conn->ksnc_tx_carrier; if (!tx) { if (tx_ack) { list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); conn->ksnc_tx_carrier = tx_ack; } return 0; } /* conn->ksnc_tx_carrier */ if (tx_ack) cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */ return 1; if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) { /* replace the keepalive PING with a real ACK */ LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); tx->tx_msg.ksm_zc_cookies[1] = cookie; return 1; } if (cookie == tx->tx_msg.ksm_zc_cookies[0] || cookie == tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); return 1; /* XXX return error in the future */ } if (!tx->tx_msg.ksm_zc_cookies[0]) { /* * NOOP tx has only one ZC-ACK cookie, * can carry at least one more */ if (tx->tx_msg.ksm_zc_cookies[1] > cookie) { tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1]; tx->tx_msg.ksm_zc_cookies[1] = cookie; } else { tx->tx_msg.ksm_zc_cookies[0] = cookie; } if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) { /* * not likely to carry more ACKs, skip it * to simplify logic */ ksocknal_next_tx_carrier(conn); } return 1; } /* takes two or more cookies already */ if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) { __u64 tmp = 0; /* two separated cookies: (a+2, a) or (a+1, a) */ LASSERT(tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] <= 2); if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] == 2) { if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) tmp = cookie; } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) { tmp = tx->tx_msg.ksm_zc_cookies[1]; } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) { tmp = tx->tx_msg.ksm_zc_cookies[0]; } if (tmp) { /* range of cookies */ tx->tx_msg.ksm_zc_cookies[0] = tmp - 1; tx->tx_msg.ksm_zc_cookies[1] = tmp + 1; return 1; } } else { /* * ksm_zc_cookies[0] < ksm_zc_cookies[1], * it is range of cookies */ if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && cookie <= tx->tx_msg.ksm_zc_cookies[1]) { CWARN("%s: duplicated ZC cookie: %llu\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); return 1; /* XXX: return error in the future */ } if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { tx->tx_msg.ksm_zc_cookies[1] = cookie; return 1; } if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) { tx->tx_msg.ksm_zc_cookies[0] = cookie; return 1; } } /* failed to piggyback ZC-ACK */ if (tx_ack) { list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); /* the next tx can piggyback at least 1 ACK */ ksocknal_next_tx_carrier(conn); } return 0; }