/** * Server side bulk abort. Idempotent. Not thread-safe (i.e. only * serialises with completion callback) */ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) { struct l_wait_info lwi; int rc; LASSERT(!in_interrupt()); /* might sleep */ if (!ptlrpc_server_bulk_active(desc)) /* completed or */ return; /* never started */ /* We used to poison the pages with 0xab here because we did not want to * send any meaningful data over the wire for evicted clients (bug 9297) * However, this is no longer safe now that we use the page cache on the * OSS (bug 20560) */ /* The unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still l_wait_event() in this case, to give liblustre * a chance to run server_bulk_callback()*/ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); rc = l_wait_event(desc->bd_waitq, !ptlrpc_server_bulk_active(desc), &lwi); if (rc == 0) return; LASSERT(rc == -ETIMEDOUT); CWARN("Unexpectedly long timeout: desc %p\n", desc); } }
/** * Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). * Returns 1 on success or 0 if network unregistration failed for whatever * reason. */ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; wait_queue_head_t *wq; int rc; LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ return 1; /* never registered */ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ return 1; /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK); /* Do not wait for unlink to finish. */ if (async) return 0; if (req->rq_set) wq = &req->rq_set->set_waitq; else wq = &req->rq_reply_waitq; for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish LNDs */ int cnt = 0; while (cnt < LONG_UNLINK && (rc = wait_event_idle_timeout(*wq, !ptlrpc_client_bulk_active(req), HZ)) == 0) cnt += 1; if (rc > 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); return 1; } DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } return 0; }
/** * Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). * Returns 1 on success or 0 if network unregistration failed for whatever * reason. */ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; struct l_wait_info lwi; int rc; ENTRY; LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0) req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ RETURN(1); /* never registered */ LASSERT(desc->bd_req == req); /* bd_req NULL until registered */ /* the unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, * but we must still l_wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ RETURN(1); /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); /* Do not wait for unlink to finish. */ if (async) RETURN(0); for (;;) { #ifdef __KERNEL__ /* The wq argument is ignored by user-space wait_event macros */ wait_queue_head_t *wq = (req->rq_set != NULL) ? &req->rq_set->set_waitq : &req->rq_reply_waitq; #endif /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi); if (rc == 0) { ptlrpc_rqphase_move(req, req->rq_next_phase); RETURN(1); } LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p", desc); } RETURN(0); }
/** * Register bulk at the sender for later transfer. * Returns 0 on success or error code. */ static int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; lnet_process_id_t peer; int rc = 0; int rc2; int posted_md; int total_md; __u64 xid; lnet_handle_me_t me_h; lnet_md_t md; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) return 0; /* NB no locking required until desc is on the network */ LASSERT(desc->bd_nob > 0); LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req != NULL); LASSERT(desc->bd_type == BULK_PUT_SINK || desc->bd_type == BULK_GET_SOURCE); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) desc->bd_nob_transferred = 0; else LASSERT(desc->bd_nob_transferred == 0); desc->bd_failure = 0; peer = desc->bd_import->imp_connection->c_peer; LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* An XID is only used for a single request from the client. * For retried bulk transfers, a new XID will be allocated in * in ptlrpc_check_set() if it needs to be resent, so it is not * using the same RDMA match bits after an error. * * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */ xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || xid != desc->bd_last_xid, "registered: %d rq_xid: %llu bd_last_xid: %llu\n", desc->bd_registered, xid, desc->bd_last_xid); total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; desc->bd_registered = 1; desc->bd_last_xid = xid; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ for (posted_md = 0; posted_md < total_md; posted_md++, xid++) { md.options = PTLRPC_MD_OPTIONS | ((desc->bd_type == BULK_GET_SOURCE) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMEAttach(desc->bd_portal, peer, xid, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, xid, posted_md, rc); break; } /* About to let the network at it... */ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, xid, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); break; } } if (rc != 0) { LASSERT(rc == -ENOMEM); spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; return -ENOMEM; } /* Set rq_xid to matchbits of the final bulk so that server can * infer the number of bulks that were prepared */ req->rq_xid = --xid; LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK), "bd_last_xid = x%llu, rq_xid = x%llu\n", desc->bd_last_xid, req->rq_xid); spin_lock(&desc->bd_lock); /* Holler if peer manages to touch buffers before he knows the xid */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n", desc->bd_md_count, desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, desc->bd_last_xid, req->rq_xid, desc->bd_portal); return 0; }
/** * Register bulk at the sender for later transfer. * Returns 0 on success or error code. */ int ptlrpc_register_bulk(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; lnet_process_id_t peer; int rc = 0; int rc2; int posted_md; int total_md; __u64 mbits; lnet_handle_me_t me_h; lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET)) RETURN(0); /* NB no locking required until desc is on the network */ LASSERT(desc->bd_nob > 0); LASSERT(desc->bd_md_count == 0); LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT); LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); LASSERT(desc->bd_req != NULL); LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type)); /* cleanup the state of the bulk for it will be reused */ if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY) desc->bd_nob_transferred = 0; else LASSERT(desc->bd_nob_transferred == 0); desc->bd_failure = 0; peer = desc->bd_import->imp_connection->c_peer; LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV; /* rq_mbits is matchbits of the final bulk */ mbits = req->rq_mbits - total_md + 1; LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK), "first mbits = x"LPU64", last mbits = x"LPU64"\n", mbits, req->rq_mbits); LASSERTF(!(desc->bd_registered && req->rq_send_state != LUSTRE_IMP_REPLAY) || mbits != desc->bd_last_mbits, "registered: %d rq_mbits: "LPU64" bd_last_mbits: "LPU64"\n", desc->bd_registered, mbits, desc->bd_last_mbits); desc->bd_registered = 1; desc->bd_last_mbits = mbits; desc->bd_md_count = total_md; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) { md.options = PTLRPC_MD_OPTIONS | (ptlrpc_is_bulk_op_get(desc->bd_type) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); if (rc != 0) { CERROR("%s: LNetMEAttach failed x"LPU64"/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, mbits, posted_md, rc); break; } /* About to let the network at it... */ rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDAttach failed x"LPU64"/%d: rc = %d\n", desc->bd_import->imp_obd->obd_name, mbits, posted_md, rc); rc2 = LNetMEUnlink(me_h); LASSERT(rc2 == 0); break; } } if (rc != 0) { LASSERT(rc == -ENOMEM); spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw); req->rq_status = -ENOMEM; RETURN(-ENOMEM); } spin_lock(&desc->bd_lock); /* Holler if peer manages to touch buffers before he knows the mbits */ if (desc->bd_md_count != total_md) CWARN("%s: Peer %s touched %d buffers while I registered\n", desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer), total_md - desc->bd_md_count); spin_unlock(&desc->bd_lock); CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, " "mbits x"LPX64"-"LPX64", portal %u\n", desc->bd_md_count, ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, desc->bd_last_mbits, req->rq_mbits, desc->bd_portal); RETURN(0); }
/** * Starts bulk transfer for descriptor \a desc on the server. * Returns 0 on success or error code. */ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc) { struct obd_export *exp = desc->bd_export; struct ptlrpc_connection *conn = exp->exp_connection; int rc = 0; __u64 mbits; int posted_md; int total_md; lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_PUT_NET)) RETURN(0); /* NB no locking required until desc is on the network */ LASSERT(desc->bd_md_count == 0); LASSERT(ptlrpc_is_bulk_op_active(desc->bd_type)); LASSERT(desc->bd_cbid.cbid_fn == server_bulk_callback); LASSERT(desc->bd_cbid.cbid_arg == desc); /* NB total length may be 0 for a read past EOF, so we send 0 * length bulks, since the client expects bulk events. * * The client may not need all of the bulk mbits for the RPC. The RPC * used the mbits of the highest bulk mbits needed, and the server masks * off high bits to get bulk count for this RPC. LU-1431 */ mbits = desc->bd_req->rq_mbits & ~((__u64)desc->bd_md_max_brw - 1); total_md = desc->bd_req->rq_mbits - mbits + 1; desc->bd_md_count = total_md; desc->bd_failure = 0; md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 2; /* SENT and ACK/REPLY */ for (posted_md = 0; posted_md < total_md; mbits++) { md.options = PTLRPC_MD_OPTIONS; /* NB it's assumed that source and sink buffer frags are * page-aligned. Otherwise we'd have to send client bulk * sizes over and split server buffer accordingly */ ptlrpc_fill_bulk_md(&md, desc, posted_md); rc = LNetMDBind(md, LNET_UNLINK, &desc->bd_mds[posted_md]); if (rc != 0) { CERROR("%s: LNetMDBind failed for MD %u: rc = %d\n", exp->exp_obd->obd_name, posted_md, rc); LASSERT(rc == -ENOMEM); if (posted_md == 0) { desc->bd_md_count = 0; RETURN(-ENOMEM); } break; } /* LU-6441: last md is not sent and desc->bd_md_count == 1 */ if (OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB3, CFS_FAIL_ONCE) && posted_md == desc->bd_md_max_brw - 1) { posted_md++; continue; } /* Network is about to get at the memory */ if (ptlrpc_is_bulk_put_source(desc->bd_type)) rc = LNetPut(conn->c_self, desc->bd_mds[posted_md], LNET_ACK_REQ, conn->c_peer, desc->bd_portal, mbits, 0, 0); else rc = LNetGet(conn->c_self, desc->bd_mds[posted_md], conn->c_peer, desc->bd_portal, mbits, 0); posted_md++; if (rc != 0) { CERROR("%s: failed bulk transfer with %s:%u x"LPU64": " "rc = %d\n", exp->exp_obd->obd_name, libcfs_id2str(conn->c_peer), desc->bd_portal, mbits, rc); break; } } if (rc != 0) { /* Can't send, so we unlink the MD bound above. The UNLINK * event this creates will signal completion with failure, * so we return SUCCESS here! */ spin_lock(&desc->bd_lock); desc->bd_md_count -= total_md - posted_md; spin_unlock(&desc->bd_lock); LASSERT(desc->bd_md_count >= 0); mdunlink_iterate_helper(desc->bd_mds, posted_md); RETURN(0); } CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d " "id %s mbits "LPX64"-"LPX64"\n", desc->bd_iov_count, desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer), mbits - posted_md, mbits - 1); RETURN(0); }