コード例 #1
0
ファイル: niobuf.c プロジェクト: Keeper-of-the-Keys/Lustre
/**
 * Server side bulk abort. Idempotent. Not thread-safe (i.e. only
 * serialises with completion callback)
 */
void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
{
	struct l_wait_info       lwi;
	int                      rc;

	LASSERT(!in_interrupt());           /* might sleep */

	if (!ptlrpc_server_bulk_active(desc))   /* completed or */
		return;                         /* never started */

	/* We used to poison the pages with 0xab here because we did not want to
	 * send any meaningful data over the wire for evicted clients (bug 9297)
	 * However, this is no longer safe now that we use the page cache on the
	 * OSS (bug 20560) */

	/* The unlink ensures the callback happens ASAP and is the last
	 * one.  If it fails, it must be because completion just happened,
	 * but we must still l_wait_event() in this case, to give liblustre
	 * a chance to run server_bulk_callback()*/
	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);

	for (;;) {
		/* Network access will complete in finite time but the HUGE
		 * timeout lets us CWARN for visibility of sluggish NALs */
		lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
					   cfs_time_seconds(1), NULL, NULL);
		rc = l_wait_event(desc->bd_waitq,
				  !ptlrpc_server_bulk_active(desc), &lwi);
		if (rc == 0)
			return;

		LASSERT(rc == -ETIMEDOUT);
		CWARN("Unexpectedly long timeout: desc %p\n", desc);
	}
}
コード例 #2
0
ファイル: niobuf.c プロジェクト: the-snowwhite/linux-socfpga
/**
 * Disconnect a bulk desc from the network. Idempotent. Not
 * thread-safe (i.e. only interlocks with completion callback).
 * Returns 1 on success or 0 if network unregistration failed for whatever
 * reason.
 */
int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	wait_queue_head_t *wq;
	int rc;

	LASSERT(!in_interrupt());     /* might sleep */

	/* Let's setup deadline for reply unlink. */
	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
	    async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
		req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		return 1;				/* never registered */

	LASSERT(desc->bd_req == req);  /* bd_req NULL until registered */

	/* the unlink ensures the callback happens ASAP and is the last
	 * one.  If it fails, it must be because completion just happened,
	 * but we must still wait_event() in this case to give liblustre
	 * a chance to run client_bulk_callback()
	 */
	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		return 1;				/* never registered */

	/* Move to "Unregistering" phase as bulk was not unlinked yet. */
	ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);

	/* Do not wait for unlink to finish. */
	if (async)
		return 0;

	if (req->rq_set)
		wq = &req->rq_set->set_waitq;
	else
		wq = &req->rq_reply_waitq;

	for (;;) {
		/* Network access will complete in finite time but the HUGE
		 * timeout lets us CWARN for visibility of sluggish LNDs
		 */
		int cnt = 0;
		while (cnt < LONG_UNLINK &&
		       (rc = wait_event_idle_timeout(*wq,
						     !ptlrpc_client_bulk_active(req),
						     HZ)) == 0)
			cnt += 1;
		if (rc > 0) {
			ptlrpc_rqphase_move(req, req->rq_next_phase);
			return 1;
		}

		DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
			  desc);
	}
	return 0;
}
コード例 #3
0
ファイル: niobuf.c プロジェクト: hocks/lustre-release
/**
 * Disconnect a bulk desc from the network. Idempotent. Not
 * thread-safe (i.e. only interlocks with completion callback).
 * Returns 1 on success or 0 if network unregistration failed for whatever
 * reason.
 */
int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	struct l_wait_info       lwi;
	int                      rc;
	ENTRY;

	LASSERT(!in_interrupt());     /* might sleep */

	/* Let's setup deadline for reply unlink. */
	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
	    async && req->rq_bulk_deadline == 0)
		req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK;

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		RETURN(1);				/* never registered */

	LASSERT(desc->bd_req == req);  /* bd_req NULL until registered */

	/* the unlink ensures the callback happens ASAP and is the last
	 * one.  If it fails, it must be because completion just happened,
	 * but we must still l_wait_event() in this case to give liblustre
	 * a chance to run client_bulk_callback() */
	mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);

	if (ptlrpc_client_bulk_active(req) == 0)	/* completed or */
		RETURN(1);				/* never registered */

        /* Move to "Unregistering" phase as bulk was not unlinked yet. */
        ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);

        /* Do not wait for unlink to finish. */
        if (async)
                RETURN(0);

        for (;;) {
#ifdef __KERNEL__
		/* The wq argument is ignored by user-space wait_event macros */
		wait_queue_head_t *wq = (req->rq_set != NULL) ?
					&req->rq_set->set_waitq :
					&req->rq_reply_waitq;
#endif
                /* Network access will complete in finite time but the HUGE
                 * timeout lets us CWARN for visibility of sluggish NALs */
                lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK),
                                           cfs_time_seconds(1), NULL, NULL);
                rc = l_wait_event(*wq, !ptlrpc_client_bulk_active(req), &lwi);
                if (rc == 0) {
                        ptlrpc_rqphase_move(req, req->rq_next_phase);
                        RETURN(1);
                }

                LASSERT(rc == -ETIMEDOUT);
                DEBUG_REQ(D_WARNING, req, "Unexpectedly long timeout: desc %p",
                          desc);
        }
        RETURN(0);
}
コード例 #4
0
ファイル: niobuf.c プロジェクト: a2hojsjsjs/linux
/**
 * Register bulk at the sender for later transfer.
 * Returns 0 on success or error code.
 */
static int ptlrpc_register_bulk(struct ptlrpc_request *req)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	lnet_process_id_t peer;
	int rc = 0;
	int rc2;
	int posted_md;
	int total_md;
	__u64 xid;
	lnet_handle_me_t me_h;
	lnet_md_t md;

	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
		return 0;

	/* NB no locking required until desc is on the network */
	LASSERT(desc->bd_nob > 0);
	LASSERT(desc->bd_md_count == 0);
	LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
	LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
	LASSERT(desc->bd_req != NULL);
	LASSERT(desc->bd_type == BULK_PUT_SINK ||
		desc->bd_type == BULK_GET_SOURCE);

	/* cleanup the state of the bulk for it will be reused */
	if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
		desc->bd_nob_transferred = 0;
	else
		LASSERT(desc->bd_nob_transferred == 0);

	desc->bd_failure = 0;

	peer = desc->bd_import->imp_connection->c_peer;

	LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
	LASSERT(desc->bd_cbid.cbid_arg == desc);

	/* An XID is only used for a single request from the client.
	 * For retried bulk transfers, a new XID will be allocated in
	 * in ptlrpc_check_set() if it needs to be resent, so it is not
	 * using the same RDMA match bits after an error.
	 *
	 * For multi-bulk RPCs, rq_xid is the last XID needed for bulks. The
	 * first bulk XID is power-of-two aligned before rq_xid. LU-1431 */
	xid = req->rq_xid & ~((__u64)desc->bd_md_max_brw - 1);
	LASSERTF(!(desc->bd_registered &&
		   req->rq_send_state != LUSTRE_IMP_REPLAY) ||
		 xid != desc->bd_last_xid,
		 "registered: %d  rq_xid: %llu bd_last_xid: %llu\n",
		 desc->bd_registered, xid, desc->bd_last_xid);

	total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
	desc->bd_registered = 1;
	desc->bd_last_xid = xid;
	desc->bd_md_count = total_md;
	md.user_ptr = &desc->bd_cbid;
	md.eq_handle = ptlrpc_eq_h;
	md.threshold = 1;		       /* PUT or GET */

	for (posted_md = 0; posted_md < total_md; posted_md++, xid++) {
		md.options = PTLRPC_MD_OPTIONS |
			     ((desc->bd_type == BULK_GET_SOURCE) ?
			      LNET_MD_OP_GET : LNET_MD_OP_PUT);
		ptlrpc_fill_bulk_md(&md, desc, posted_md);

		rc = LNetMEAttach(desc->bd_portal, peer, xid, 0,
				  LNET_UNLINK, LNET_INS_AFTER, &me_h);
		if (rc != 0) {
			CERROR("%s: LNetMEAttach failed x%llu/%d: rc = %d\n",
			       desc->bd_import->imp_obd->obd_name, xid,
			       posted_md, rc);
			break;
		}

		/* About to let the network at it... */
		rc = LNetMDAttach(me_h, md, LNET_UNLINK,
				  &desc->bd_mds[posted_md]);
		if (rc != 0) {
			CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
			       desc->bd_import->imp_obd->obd_name, xid,
			       posted_md, rc);
			rc2 = LNetMEUnlink(me_h);
			LASSERT(rc2 == 0);
			break;
		}
	}

	if (rc != 0) {
		LASSERT(rc == -ENOMEM);
		spin_lock(&desc->bd_lock);
		desc->bd_md_count -= total_md - posted_md;
		spin_unlock(&desc->bd_lock);
		LASSERT(desc->bd_md_count >= 0);
		mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
		req->rq_status = -ENOMEM;
		return -ENOMEM;
	}

	/* Set rq_xid to matchbits of the final bulk so that server can
	 * infer the number of bulks that were prepared */
	req->rq_xid = --xid;
	LASSERTF(desc->bd_last_xid == (req->rq_xid & PTLRPC_BULK_OPS_MASK),
		 "bd_last_xid = x%llu, rq_xid = x%llu\n",
		 desc->bd_last_xid, req->rq_xid);

	spin_lock(&desc->bd_lock);
	/* Holler if peer manages to touch buffers before he knows the xid */
	if (desc->bd_md_count != total_md)
		CWARN("%s: Peer %s touched %d buffers while I registered\n",
		      desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
		      total_md - desc->bd_md_count);
	spin_unlock(&desc->bd_lock);

	CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, xid x%#llx-%#llx, portal %u\n",
	       desc->bd_md_count,
	       desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink",
	       desc->bd_iov_count, desc->bd_nob,
	       desc->bd_last_xid, req->rq_xid, desc->bd_portal);

	return 0;
}
コード例 #5
0
ファイル: niobuf.c プロジェクト: Keeper-of-the-Keys/Lustre
/**
 * Register bulk at the sender for later transfer.
 * Returns 0 on success or error code.
 */
int ptlrpc_register_bulk(struct ptlrpc_request *req)
{
	struct ptlrpc_bulk_desc *desc = req->rq_bulk;
	lnet_process_id_t peer;
	int rc = 0;
	int rc2;
	int posted_md;
	int total_md;
	__u64 mbits;
	lnet_handle_me_t  me_h;
	lnet_md_t         md;
	ENTRY;

        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_GET_NET))
                RETURN(0);

	/* NB no locking required until desc is on the network */
	LASSERT(desc->bd_nob > 0);
	LASSERT(desc->bd_md_count == 0);
	LASSERT(desc->bd_md_max_brw <= PTLRPC_BULK_OPS_COUNT);
	LASSERT(desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
	LASSERT(desc->bd_req != NULL);
	LASSERT(ptlrpc_is_bulk_op_passive(desc->bd_type));

	/* cleanup the state of the bulk for it will be reused */
	if (req->rq_resend || req->rq_send_state == LUSTRE_IMP_REPLAY)
		desc->bd_nob_transferred = 0;
	else
		LASSERT(desc->bd_nob_transferred == 0);

	desc->bd_failure = 0;

	peer = desc->bd_import->imp_connection->c_peer;

	LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
	LASSERT(desc->bd_cbid.cbid_arg == desc);

	total_md = (desc->bd_iov_count + LNET_MAX_IOV - 1) / LNET_MAX_IOV;
	/* rq_mbits is matchbits of the final bulk */
	mbits = req->rq_mbits - total_md + 1;

	LASSERTF(mbits == (req->rq_mbits & PTLRPC_BULK_OPS_MASK),
		 "first mbits = x"LPU64", last mbits = x"LPU64"\n",
		 mbits, req->rq_mbits);
	LASSERTF(!(desc->bd_registered &&
		   req->rq_send_state != LUSTRE_IMP_REPLAY) ||
		 mbits != desc->bd_last_mbits,
		 "registered: %d  rq_mbits: "LPU64" bd_last_mbits: "LPU64"\n",
		 desc->bd_registered, mbits, desc->bd_last_mbits);

	desc->bd_registered = 1;
	desc->bd_last_mbits = mbits;
	desc->bd_md_count = total_md;
	md.user_ptr = &desc->bd_cbid;
	md.eq_handle = ptlrpc_eq_h;
	md.threshold = 1;                       /* PUT or GET */

	for (posted_md = 0; posted_md < total_md; posted_md++, mbits++) {
		md.options = PTLRPC_MD_OPTIONS |
			     (ptlrpc_is_bulk_op_get(desc->bd_type) ?
			      LNET_MD_OP_GET : LNET_MD_OP_PUT);
		ptlrpc_fill_bulk_md(&md, desc, posted_md);

		rc = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
				  LNET_UNLINK, LNET_INS_AFTER, &me_h);
		if (rc != 0) {
			CERROR("%s: LNetMEAttach failed x"LPU64"/%d: rc = %d\n",
			       desc->bd_import->imp_obd->obd_name, mbits,
			       posted_md, rc);
			break;
		}

		/* About to let the network at it... */
		rc = LNetMDAttach(me_h, md, LNET_UNLINK,
				  &desc->bd_mds[posted_md]);
		if (rc != 0) {
			CERROR("%s: LNetMDAttach failed x"LPU64"/%d: rc = %d\n",
			       desc->bd_import->imp_obd->obd_name, mbits,
			       posted_md, rc);
			rc2 = LNetMEUnlink(me_h);
			LASSERT(rc2 == 0);
			break;
		}
	}

	if (rc != 0) {
		LASSERT(rc == -ENOMEM);
		spin_lock(&desc->bd_lock);
		desc->bd_md_count -= total_md - posted_md;
		spin_unlock(&desc->bd_lock);
		LASSERT(desc->bd_md_count >= 0);
		mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
		req->rq_status = -ENOMEM;
		RETURN(-ENOMEM);
	}

	spin_lock(&desc->bd_lock);
	/* Holler if peer manages to touch buffers before he knows the mbits */
	if (desc->bd_md_count != total_md)
		CWARN("%s: Peer %s touched %d buffers while I registered\n",
		      desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
		      total_md - desc->bd_md_count);
	spin_unlock(&desc->bd_lock);

	CDEBUG(D_NET, "Setup %u bulk %s buffers: %u pages %u bytes, "
	       "mbits x"LPX64"-"LPX64", portal %u\n", desc->bd_md_count,
	       ptlrpc_is_bulk_op_get(desc->bd_type) ? "get-source" : "put-sink",
	       desc->bd_iov_count, desc->bd_nob,
	       desc->bd_last_mbits, req->rq_mbits, desc->bd_portal);

	RETURN(0);
}
コード例 #6
0
ファイル: niobuf.c プロジェクト: Keeper-of-the-Keys/Lustre
/**
 * Starts bulk transfer for descriptor \a desc on the server.
 * Returns 0 on success or error code.
 */
int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc)
{
	struct obd_export        *exp = desc->bd_export;
	struct ptlrpc_connection *conn = exp->exp_connection;
	int                       rc = 0;
	__u64                     mbits;
	int                       posted_md;
	int                       total_md;
	lnet_md_t                 md;
	ENTRY;

	if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_PUT_NET))
		RETURN(0);

	/* NB no locking required until desc is on the network */
	LASSERT(desc->bd_md_count == 0);
	LASSERT(ptlrpc_is_bulk_op_active(desc->bd_type));

	LASSERT(desc->bd_cbid.cbid_fn == server_bulk_callback);
	LASSERT(desc->bd_cbid.cbid_arg == desc);

	/* NB total length may be 0 for a read past EOF, so we send 0
	 * length bulks, since the client expects bulk events.
	 *
	 * The client may not need all of the bulk mbits for the RPC. The RPC
	 * used the mbits of the highest bulk mbits needed, and the server masks
	 * off high bits to get bulk count for this RPC. LU-1431 */
	mbits = desc->bd_req->rq_mbits & ~((__u64)desc->bd_md_max_brw - 1);
	total_md = desc->bd_req->rq_mbits - mbits + 1;

	desc->bd_md_count = total_md;
	desc->bd_failure = 0;

	md.user_ptr = &desc->bd_cbid;
	md.eq_handle = ptlrpc_eq_h;
	md.threshold = 2; /* SENT and ACK/REPLY */

	for (posted_md = 0; posted_md < total_md; mbits++) {
		md.options = PTLRPC_MD_OPTIONS;

		/* NB it's assumed that source and sink buffer frags are
		 * page-aligned. Otherwise we'd have to send client bulk
		 * sizes over and split server buffer accordingly */
		ptlrpc_fill_bulk_md(&md, desc, posted_md);
		rc = LNetMDBind(md, LNET_UNLINK, &desc->bd_mds[posted_md]);
		if (rc != 0) {
			CERROR("%s: LNetMDBind failed for MD %u: rc = %d\n",
			       exp->exp_obd->obd_name, posted_md, rc);
			LASSERT(rc == -ENOMEM);
			if (posted_md == 0) {
				desc->bd_md_count = 0;
				RETURN(-ENOMEM);
			}
			break;
		}

		/* LU-6441: last md is not sent and desc->bd_md_count == 1 */
		if (OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB3,
					 CFS_FAIL_ONCE) &&
		    posted_md == desc->bd_md_max_brw - 1) {
			posted_md++;
			continue;
		}

		/* Network is about to get at the memory */
		if (ptlrpc_is_bulk_put_source(desc->bd_type))
			rc = LNetPut(conn->c_self, desc->bd_mds[posted_md],
				     LNET_ACK_REQ, conn->c_peer,
				     desc->bd_portal, mbits, 0, 0);
		else
			rc = LNetGet(conn->c_self, desc->bd_mds[posted_md],
				     conn->c_peer, desc->bd_portal, mbits, 0);

		posted_md++;
		if (rc != 0) {
			CERROR("%s: failed bulk transfer with %s:%u x"LPU64": "
			       "rc = %d\n", exp->exp_obd->obd_name,
			       libcfs_id2str(conn->c_peer), desc->bd_portal,
			       mbits, rc);
			break;
		}
	}

	if (rc != 0) {
		/* Can't send, so we unlink the MD bound above.  The UNLINK
		 * event this creates will signal completion with failure,
		 * so we return SUCCESS here! */
		spin_lock(&desc->bd_lock);
		desc->bd_md_count -= total_md - posted_md;
		spin_unlock(&desc->bd_lock);
		LASSERT(desc->bd_md_count >= 0);

		mdunlink_iterate_helper(desc->bd_mds, posted_md);
		RETURN(0);
	}

	CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d "
	       "id %s mbits "LPX64"-"LPX64"\n", desc->bd_iov_count,
	       desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer),
	       mbits - posted_md, mbits - 1);

	RETURN(0);
}