Ejemplo n.º 1
0
static int
usocklnd_send_tx_immediately(usock_conn_t *conn, usock_tx_t *tx)
{
        int           rc;
        int           rc2;
        int           partial_send = 0;
        usock_peer_t *peer         = conn->uc_peer;

        LASSERT (peer != NULL);

        /* usocklnd_enqueue_tx() turned it on for us */
        LASSERT(conn->uc_sending);

        //counter_imm_start++;
        rc = usocklnd_send_tx(conn, tx);
        if (rc == 0) { /* partial send or connection closed */
                pthread_mutex_lock(&conn->uc_lock);
                cfs_list_add(&tx->tx_list, &conn->uc_tx_list);
                conn->uc_sending = 0;
                pthread_mutex_unlock(&conn->uc_lock);
                partial_send = 1;
        } else {
                usocklnd_destroy_tx(peer->up_ni, tx);
                /* NB: lnetmsg was finalized, so we *must* return 0 */

                if (rc < 0) { /* real error */
                        usocklnd_conn_kill(conn);
                        return 0;
                }

                /* rc == 1: tx was sent completely */
                rc = 0; /* let's say to caller 'Ok' */
                //counter_imm_complete++;
        }

        pthread_mutex_lock(&conn->uc_lock);
        conn->uc_sending = 0;

        /* schedule write handler */
        if (partial_send ||
            (conn->uc_state == UC_READY &&
             (!cfs_list_empty(&conn->uc_tx_list) ||
              !cfs_list_empty(&conn->uc_zcack_list)))) {
                conn->uc_tx_deadline =
                        cfs_time_shift(usock_tuns.ut_timeout);
                conn->uc_tx_flag = 1;
                rc2 = usocklnd_add_pollrequest(conn, POLL_TX_SET_REQUEST, POLLOUT);
                if (rc2 != 0)
                        usocklnd_conn_kill_locked(conn);
                else
                        usocklnd_wakeup_pollthread(conn->uc_pt_idx);
        }

        pthread_mutex_unlock(&conn->uc_lock);

        return rc;
}
Ejemplo n.º 2
0
int osp_init_precreate(struct osp_device *d)
{
	struct l_wait_info	 lwi = { 0 };
	struct task_struct		*task;

	ENTRY;

	OBD_ALLOC_PTR(d->opd_pre);
	if (d->opd_pre == NULL)
		RETURN(-ENOMEM);

	/* initially precreation isn't ready */
	d->opd_pre_status = -EAGAIN;
	fid_zero(&d->opd_pre_used_fid);
	d->opd_pre_used_fid.f_oid = 1;
	fid_zero(&d->opd_pre_last_created_fid);
	d->opd_pre_last_created_fid.f_oid = 1;
	d->opd_pre_reserved = 0;
	d->opd_got_disconnected = 1;
	d->opd_pre_grow_slow = 0;
	d->opd_pre_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_max_grow_count = OST_MAX_PRECREATE;

	spin_lock_init(&d->opd_pre_lock);
	init_waitqueue_head(&d->opd_pre_waitq);
	init_waitqueue_head(&d->opd_pre_user_waitq);
	init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);

	/*
	 * Initialize statfs-related things
	 */
	d->opd_statfs_maxage = 5; /* default update interval */
	d->opd_statfs_fresh_till = cfs_time_shift(-1000);
	CDEBUG(D_OTHER, "current %llu, fresh till %llu\n",
	       (unsigned long long)cfs_time_current(),
	       (unsigned long long)d->opd_statfs_fresh_till);
	cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d);

	/*
	 * start thread handling precreation and statfs updates
	 */
	task = kthread_run(osp_precreate_thread, d,
			   "osp-pre-%u-%u", d->opd_index, d->opd_group);
	if (IS_ERR(task)) {
		CERROR("can't start precreate thread %ld\n", PTR_ERR(task));
		RETURN(PTR_ERR(task));
	}

	l_wait_event(d->opd_pre_thread.t_ctl_waitq,
		     osp_precreate_running(d) || osp_precreate_stopped(d),
		     &lwi);

	RETURN(0);
}
Ejemplo n.º 3
0
/*
 * XXX: there might be a case where removed object(s) do not add free
 * space (empty object). if the number of such deletions is high, then
 * we can start to update statfs too often - a rpc storm
 * TODO: some throttling is needed
 */
void osp_statfs_need_now(struct osp_device *d)
{
	if (!d->opd_statfs_update_in_progress) {
		/*
		 * if current status is -ENOSPC (lack of free space on OST)
		 * then we should poll OST immediately once object destroy
		 * is replied
		 */
		d->opd_statfs_fresh_till = cfs_time_shift(-1);
		cfs_timer_disarm(&d->opd_statfs_timer);
		wake_up(&d->opd_pre_waitq);
	}
}
Ejemplo n.º 4
0
int osp_init_precreate(struct osp_device *d)
{
	struct l_wait_info	 lwi = { 0 };
	int			 rc;

	ENTRY;

	/* initially precreation isn't ready */
	d->opd_pre_status = -EAGAIN;
	fid_zero(&d->opd_pre_used_fid);
	d->opd_pre_used_fid.f_oid = 1;
	fid_zero(&d->opd_pre_last_created_fid);
	d->opd_pre_last_created_fid.f_oid = 1;
	d->opd_pre_reserved = 0;
	d->opd_got_disconnected = 1;
	d->opd_pre_grow_slow = 0;
	d->opd_pre_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_max_grow_count = OST_MAX_PRECREATE;

	spin_lock_init(&d->opd_pre_lock);
	cfs_waitq_init(&d->opd_pre_waitq);
	cfs_waitq_init(&d->opd_pre_user_waitq);
	cfs_waitq_init(&d->opd_pre_thread.t_ctl_waitq);

	/*
	 * Initialize statfs-related things
	 */
	d->opd_statfs_maxage = 5; /* default update interval */
	d->opd_statfs_fresh_till = cfs_time_shift(-1000);
	CDEBUG(D_OTHER, "current %llu, fresh till %llu\n",
	       (unsigned long long)cfs_time_current(),
	       (unsigned long long)d->opd_statfs_fresh_till);
	cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d);

	/*
	 * start thread handling precreation and statfs updates
	 */
	rc = cfs_create_thread(osp_precreate_thread, d, 0);
	if (rc < 0) {
		CERROR("can't start precreate thread %d\n", rc);
		RETURN(rc);
	}

	l_wait_event(d->opd_pre_thread.t_ctl_waitq,
		     osp_precreate_running(d) || osp_precreate_stopped(d),
		     &lwi);

	RETURN(0);
}
Ejemplo n.º 5
0
static int osp_statfs_interpret(const struct lu_env *env,
				struct ptlrpc_request *req,
				union ptlrpc_async_args *aa, int rc)
{
	struct obd_import	*imp = req->rq_import;
	struct obd_statfs	*msfs;
	struct osp_device	*d;

	ENTRY;

	aa = ptlrpc_req_async_args(req);
	d = aa->pointer_arg[0];
	LASSERT(d);

	if (rc != 0)
		GOTO(out, rc);

	msfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
	if (msfs == NULL)
		GOTO(out, rc = -EPROTO);

	d->opd_statfs = *msfs;

	osp_pre_update_status(d, rc);

	/* schedule next update */
	d->opd_statfs_fresh_till = cfs_time_shift(d->opd_statfs_maxage);
	cfs_timer_arm(&d->opd_statfs_timer, d->opd_statfs_fresh_till);
	d->opd_statfs_update_in_progress = 0;

	CDEBUG(D_CACHE, "updated statfs %p\n", d);

	RETURN(0);
out:
	/* couldn't update statfs, try again as soon as possible */
	if (d->opd_pre != NULL && osp_precreate_running(d))
		wake_up(&d->opd_pre_waitq);

	if (req->rq_import_generation == imp->imp_generation)
		CDEBUG(D_CACHE, "%s: couldn't update statfs: rc = %d\n",
		       d->opd_obd->obd_name, rc);
	RETURN(rc);
}
Ejemplo n.º 6
0
static int osp_statfs_update(struct osp_device *d)
{
	struct ptlrpc_request	*req;
	struct obd_import	*imp;
	union ptlrpc_async_args	*aa;
	int			 rc;

	ENTRY;

	CDEBUG(D_CACHE, "going to update statfs\n");

	imp = d->opd_obd->u.cli.cl_import;
	LASSERT(imp);

	req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
	if (req == NULL)
		RETURN(-ENOMEM);

	rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
	if (rc) {
		ptlrpc_request_free(req);
		RETURN(rc);
	}
	ptlrpc_request_set_replen(req);
	req->rq_request_portal = OST_CREATE_PORTAL;
	ptlrpc_at_set_req_timeout(req);

	req->rq_interpret_reply = (ptlrpc_interpterer_t)osp_statfs_interpret;
	aa = ptlrpc_req_async_args(req);
	aa->pointer_arg[0] = d;

	/*
	 * no updates till reply
	 */
	cfs_timer_disarm(&d->opd_statfs_timer);
	d->opd_statfs_fresh_till = cfs_time_shift(obd_timeout * 1000);
	d->opd_statfs_update_in_progress = 1;

	ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);

	RETURN(0);
}
Ejemplo n.º 7
0
/*
 * called to reserve object in the pool
 * return codes:
 *  ENOSPC - no space on corresponded OST
 *  EAGAIN - precreation is in progress, try later
 *  EIO    - no access to OST
 */
int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
{
	struct l_wait_info	 lwi;
	cfs_time_t		 expire = cfs_time_shift(obd_timeout);
	int			 precreated, rc;

	ENTRY;

	LASSERTF(osp_objs_precreated(env, d) >= 0, "Last created FID "DFID
		 "Next FID "DFID"\n", PFID(&d->opd_pre_last_created_fid),
		 PFID(&d->opd_pre_used_fid));

	/*
	 * wait till:
	 *  - preallocation is done
	 *  - no free space expected soon
	 *  - can't connect to OST for too long (obd_timeout)
	 *  - OST can allocate fid sequence.
	 */
	while ((rc = d->opd_pre_status) == 0 || rc == -ENOSPC ||
		rc == -ENODEV || rc == -EAGAIN || rc == -ENOTCONN) {

		/*
		 * increase number of precreations
		 */
		precreated = osp_objs_precreated(env, d);
		if (d->opd_pre_grow_count < d->opd_pre_max_grow_count &&
		    d->opd_pre_grow_slow == 0 &&
		    precreated <= (d->opd_pre_grow_count / 4 + 1)) {
			spin_lock(&d->opd_pre_lock);
			d->opd_pre_grow_slow = 1;
			d->opd_pre_grow_count *= 2;
			spin_unlock(&d->opd_pre_lock);
		}

		spin_lock(&d->opd_pre_lock);
		precreated = osp_objs_precreated(env, d);
		if (precreated > d->opd_pre_reserved &&
		    !d->opd_pre_recovering) {
			d->opd_pre_reserved++;
			spin_unlock(&d->opd_pre_lock);
			rc = 0;

			/* XXX: don't wake up if precreation is in progress */
			if (osp_precreate_near_empty_nolock(env, d) &&
			   !osp_precreate_end_seq_nolock(env, d))
				wake_up(&d->opd_pre_waitq);

			break;
		}
		spin_unlock(&d->opd_pre_lock);

		/*
		 * all precreated objects have been used and no-space
		 * status leave us no chance to succeed very soon
		 * but if there is destroy in progress, then we should
		 * wait till that is done - some space might be released
		 */
		if (unlikely(rc == -ENOSPC)) {
			if (d->opd_syn_changes) {
				/* force local commit to release space */
				dt_commit_async(env, d->opd_storage);
			}
			if (d->opd_syn_rpc_in_progress) {
				/* just wait till destroys are done */
				/* see l_wait_even() few lines below */
			}
			if (d->opd_syn_changes +
			    d->opd_syn_rpc_in_progress == 0) {
				/* no hope for free space */
				break;
			}
		}

		/* XXX: don't wake up if precreation is in progress */
		wake_up(&d->opd_pre_waitq);

		lwi = LWI_TIMEOUT(expire - cfs_time_current(),
				osp_precreate_timeout_condition, d);
		if (cfs_time_aftereq(cfs_time_current(), expire)) {
			rc = -ETIMEDOUT;
			break;
		}

		l_wait_event(d->opd_pre_user_waitq,
			     osp_precreate_ready_condition(env, d), &lwi);
	}

	RETURN(rc);
}
Ejemplo n.º 8
0
Archivo: pinger.c Proyecto: hpc/lustre
        ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);

        RETURN(0);
}

void ptlrpc_update_next_ping(struct obd_import *imp, int soon)
{
#ifdef ENABLE_PINGER
        int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL;
        if (imp->imp_state == LUSTRE_IMP_DISCON) {
                int dtime = max_t(int, CONNECTION_SWITCH_MIN,
                                  AT_OFF ? 0 :
                                  at_get(&imp->imp_at.iat_net_latency));
                time = min(time, dtime);
        }
        imp->imp_next_ping = cfs_time_shift(time);
#endif /* ENABLE_PINGER */
}

void ptlrpc_ping_import_soon(struct obd_import *imp)
{
        imp->imp_next_ping = cfs_time_current();
}

static inline int imp_is_deactive(struct obd_import *imp)
{
        return (imp->imp_deactive ||
                OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE));
}

static inline int ptlrpc_next_reconnect(struct obd_import *imp)
Ejemplo n.º 9
0
/* All actions that we need after sending hello on passive conn:
 * 1) Cope with 1st easy case: conn is already linked to a peer
 * 2) Cope with 2nd easy case: remove zombie conn
  * 3) Resolve race:
 *    a) find the peer
 *    b) link the conn to the peer if conn[idx] is empty
 *    c) if the conn[idx] isn't empty and is in READY state,
 *       remove the conn as duplicated
 *    d) if the conn[idx] isn't empty and isn't in READY state,
 *       override conn[idx] with the conn
 */
int
usocklnd_passiveconn_hellosent(usock_conn_t *conn)
{
        usock_conn_t    *conn2;
        usock_peer_t    *peer;
        cfs_list_t       tx_list;
        cfs_list_t       zcack_list;
        int              idx;
        int              rc = 0;

        /* almost nothing to do if conn is already linked to peer hash table */
        if (conn->uc_peer != NULL)
                goto passive_hellosent_done;

        /* conn->uc_peer == NULL, so the conn isn't accessible via
         * peer hash list, so nobody can touch the conn but us */

        if (conn->uc_ni == NULL) /* remove zombie conn */
                goto passive_hellosent_connkill;

        /* all code below is race resolution, because normally
         * passive conn is linked to peer just after receiving hello */
        CFS_INIT_LIST_HEAD (&tx_list);
        CFS_INIT_LIST_HEAD (&zcack_list);

        /* conn is passive and isn't linked to any peer,
           so its tx and zc_ack lists have to be empty */
        LASSERT (cfs_list_empty(&conn->uc_tx_list) &&
                 cfs_list_empty(&conn->uc_zcack_list) &&
                 conn->uc_sending == 0);

        rc = usocklnd_find_or_create_peer(conn->uc_ni, conn->uc_peerid, &peer);
        if (rc)
                return rc;

        idx = usocklnd_type2idx(conn->uc_type);

        /* try to link conn to peer */
        pthread_mutex_lock(&peer->up_lock);
        if (peer->up_conns[idx] == NULL) {
                usocklnd_link_conn_to_peer(conn, peer, idx);
                usocklnd_conn_addref(conn);
                conn->uc_peer = peer;
                usocklnd_peer_addref(peer);
        } else {
                conn2 = peer->up_conns[idx];
                pthread_mutex_lock(&conn2->uc_lock);

                if (conn2->uc_state == UC_READY) {
                        /* conn2 is in READY state, so conn is "duplicated" */
                        pthread_mutex_unlock(&conn2->uc_lock);
                        pthread_mutex_unlock(&peer->up_lock);
                        usocklnd_peer_decref(peer);
                        goto passive_hellosent_connkill;
                }

                /* uc_state != UC_READY => switch conn and conn2 */
                /* Relink txs and zc_acks from conn2 to conn.
                 * We're sure that nobody but us can access to conn,
                 * nevertheless we use mutex (if we're wrong yet,
                 * deadlock is easy to see that corrupted list */
                cfs_list_add(&tx_list, &conn2->uc_tx_list);
                cfs_list_del_init(&conn2->uc_tx_list);
                cfs_list_add(&zcack_list, &conn2->uc_zcack_list);
                cfs_list_del_init(&conn2->uc_zcack_list);

                pthread_mutex_lock(&conn->uc_lock);
                cfs_list_add_tail(&conn->uc_tx_list, &tx_list);
                cfs_list_del_init(&tx_list);
                cfs_list_add_tail(&conn->uc_zcack_list, &zcack_list);
                cfs_list_del_init(&zcack_list);
                conn->uc_peer = peer;
                pthread_mutex_unlock(&conn->uc_lock);

                conn2->uc_peer = NULL; /* make conn2 zombie */
                pthread_mutex_unlock(&conn2->uc_lock);
                usocklnd_conn_decref(conn2);

                usocklnd_link_conn_to_peer(conn, peer, idx);
                usocklnd_conn_addref(conn);
                conn->uc_peer = peer;
        }

        lnet_ni_decref(conn->uc_ni);
        conn->uc_ni = NULL;
        pthread_mutex_unlock(&peer->up_lock);
        usocklnd_peer_decref(peer);

  passive_hellosent_done:
        /* safely transit to UC_READY state */
        /* rc == 0 */
        pthread_mutex_lock(&conn->uc_lock);
        if (conn->uc_state != UC_DEAD) {
                usocklnd_rx_ksmhdr_state_transition(conn);

                /* we're ready to recive incoming packets and maybe
                   already have smth. to transmit */
                LASSERT (conn->uc_sending == 0);
                if ( cfs_list_empty(&conn->uc_tx_list) &&
                     cfs_list_empty(&conn->uc_zcack_list) ) {
                        conn->uc_tx_flag = 0;
                        rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST,
                                                 POLLIN);
                } else {
                        conn->uc_tx_deadline =
                                cfs_time_shift(usock_tuns.ut_timeout);
                        conn->uc_tx_flag = 1;
                        rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST,
                                                      POLLIN | POLLOUT);
                }

                if (rc == 0)
                        conn->uc_state = UC_READY;
        }
        pthread_mutex_unlock(&conn->uc_lock);
        return rc;

  passive_hellosent_connkill:
        usocklnd_conn_kill(conn);
        return 0;
}
Ejemplo n.º 10
0
/* All actions that we need after receiving hello on passive conn:
 * 1) Stash peer's nid, pid, incarnation and conn type
 * 2) Cope with easy case: conn[idx] is empty - just save conn there
 * 3) Resolve race:
 *    a) if our nid is higher - reply with CONN_NONE and make us zombie
 *    b) if peer's nid is higher - postpone race resolution till
 *       READY state
 * 4) Anyhow, send reply hello
*/
int
usocklnd_passiveconn_hellorecv(usock_conn_t *conn)
{
        ksock_hello_msg_t *hello = conn->uc_rx_hello;
        int                type;
        int                idx;
        int                rc;
        usock_peer_t      *peer;
        lnet_ni_t         *ni        = conn->uc_ni;
        __u32              peer_ip   = conn->uc_peer_ip;
        __u16              peer_port = conn->uc_peer_port;

        /* don't know parent peer yet and not zombie */
        LASSERT (conn->uc_peer == NULL &&
                 ni != NULL);

        /* don't know peer's nid and incarnation yet */
        if (peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
                /* do not trust liblustre clients */
                conn->uc_peerid.pid = peer_port | LNET_PID_USERFLAG;
                conn->uc_peerid.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
                                                 peer_ip);
                if (hello->kshm_ctype != SOCKLND_CONN_ANY) {
                        lnet_ni_decref(ni);
                        conn->uc_ni = NULL;
                        CERROR("Refusing to accept connection of type=%d from "
                               "userspace process %u.%u.%u.%u:%d\n", hello->kshm_ctype,
                               HIPQUAD(peer_ip), peer_port);
                        return -EINVAL;
                }
        } else {
                conn->uc_peerid.pid = hello->kshm_src_pid;
                conn->uc_peerid.nid = hello->kshm_src_nid;
        }
        conn->uc_type = type = usocklnd_invert_type(hello->kshm_ctype);

        rc = usocklnd_find_or_create_peer(ni, conn->uc_peerid, &peer);
        if (rc) {
                lnet_ni_decref(ni);
                conn->uc_ni = NULL;
                return rc;
        }

        peer->up_last_alive = cfs_time_current();

        idx = usocklnd_type2idx(conn->uc_type);

        /* safely check whether we're first */
        pthread_mutex_lock(&peer->up_lock);

        usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation, NULL);

        if (peer->up_conns[idx] == NULL) {
                peer->up_last_alive = cfs_time_current();
                conn->uc_peer = peer;
                conn->uc_ni = NULL;
                usocklnd_link_conn_to_peer(conn, peer, idx);
                usocklnd_conn_addref(conn);
        } else {
                usocklnd_peer_decref(peer);

                /* Resolve race in favour of higher NID */
                if (conn->uc_peerid.nid < conn->uc_ni->ni_nid) {
                        /* make us zombie */
                        conn->uc_ni = NULL;
                        type = SOCKLND_CONN_NONE;
                }

                /* if conn->uc_peerid.nid > conn->uc_ni->ni_nid,
                 * postpone race resolution till READY state
                 * (hopefully that conn[idx] will die because of
                 * incoming hello of CONN_NONE type) */
        }
        pthread_mutex_unlock(&peer->up_lock);

        /* allocate and initialize fake tx with hello */
        conn->uc_tx_hello = usocklnd_create_hello_tx(ni, type,
                                                     conn->uc_peerid.nid);
        if (conn->uc_ni == NULL)
                lnet_ni_decref(ni);

        if (conn->uc_tx_hello == NULL)
                return -ENOMEM;

        /* rc == 0 */
        pthread_mutex_lock(&conn->uc_lock);
        if (conn->uc_state == UC_DEAD)
                goto passive_hellorecv_done;

        conn->uc_state = UC_SENDING_HELLO;
        conn->uc_tx_deadline = cfs_time_shift(usock_tuns.ut_timeout);
        conn->uc_tx_flag = 1;
        rc = usocklnd_add_pollrequest(conn, POLL_SET_REQUEST, POLLOUT);

  passive_hellorecv_done:
        pthread_mutex_unlock(&conn->uc_lock);
        return rc;
}
Ejemplo n.º 11
0
/* All actions that we need after receiving hello on active conn:
 * 1) Schedule removing if we're zombie
 * 2) Restart active conn if we lost the race
 * 3) Else: update RX part to receive KSM header
 */
int
usocklnd_activeconn_hellorecv(usock_conn_t *conn)
{
        int                rc    = 0;
        ksock_hello_msg_t *hello = conn->uc_rx_hello;
        usock_peer_t      *peer  = conn->uc_peer;

        /* Active conn with peer==NULL is zombie.
         * Don't try to link it to peer because the conn
         * has already had a chance to proceed at the beginning */
        if (peer == NULL) {
                LASSERT(cfs_list_empty(&conn->uc_tx_list) &&
                        cfs_list_empty(&conn->uc_zcack_list));

                usocklnd_conn_kill(conn);
                return 0;
        }

        peer->up_last_alive = cfs_time_current();

        /* peer says that we lost the race */
        if (hello->kshm_ctype == SOCKLND_CONN_NONE) {
                /* Start new active conn, relink txs and zc_acks from
                 * the conn to new conn, schedule removing the conn.
                 * Actually, we're expecting that a passive conn will
                 * make us zombie soon and take care of our txs and
                 * zc_acks */

                cfs_list_t tx_list, zcack_list;
                usock_conn_t *conn2;
                int idx = usocklnd_type2idx(conn->uc_type);

                CFS_INIT_LIST_HEAD (&tx_list);
                CFS_INIT_LIST_HEAD (&zcack_list);

                /* Block usocklnd_send() to check peer->up_conns[idx]
                 * and to enqueue more txs */
                pthread_mutex_lock(&peer->up_lock);
                pthread_mutex_lock(&conn->uc_lock);

                /* usocklnd_shutdown() could kill us */
                if (conn->uc_state == UC_DEAD) {
                        pthread_mutex_unlock(&conn->uc_lock);
                        pthread_mutex_unlock(&peer->up_lock);
                        return 0;
                }

                LASSERT (peer == conn->uc_peer);
                LASSERT (peer->up_conns[idx] == conn);

                rc = usocklnd_create_active_conn(peer, conn->uc_type, &conn2);
                if (rc) {
                        conn->uc_errored = 1;
                        pthread_mutex_unlock(&conn->uc_lock);
                        pthread_mutex_unlock(&peer->up_lock);
                        return rc;
                }

                usocklnd_link_conn_to_peer(conn2, peer, idx);
                conn2->uc_peer = peer;

                /* unlink txs and zcack from the conn */
                cfs_list_add(&tx_list, &conn->uc_tx_list);
                cfs_list_del_init(&conn->uc_tx_list);
                cfs_list_add(&zcack_list, &conn->uc_zcack_list);
                cfs_list_del_init(&conn->uc_zcack_list);

                /* link they to the conn2 */
                cfs_list_add(&conn2->uc_tx_list, &tx_list);
                cfs_list_del_init(&tx_list);
                cfs_list_add(&conn2->uc_zcack_list, &zcack_list);
                cfs_list_del_init(&zcack_list);

                /* make conn zombie */
                conn->uc_peer = NULL;
                usocklnd_peer_decref(peer);

                /* schedule conn2 for processing */
                rc = usocklnd_add_pollrequest(conn2, POLL_ADD_REQUEST, POLLOUT);
                if (rc) {
                        peer->up_conns[idx] = NULL;
                        usocklnd_conn_decref(conn2); /* should destroy conn */
                } else {
                        usocklnd_conn_kill_locked(conn);
                }

                pthread_mutex_unlock(&conn->uc_lock);
                pthread_mutex_unlock(&peer->up_lock);
                usocklnd_conn_decref(conn);

        } else { /* hello->kshm_ctype != SOCKLND_CONN_NONE */
                if (conn->uc_type != usocklnd_invert_type(hello->kshm_ctype))
                        return -EPROTO;

                pthread_mutex_lock(&peer->up_lock);
                usocklnd_cleanup_stale_conns(peer, hello->kshm_src_incarnation,
                                             conn);
                pthread_mutex_unlock(&peer->up_lock);

                /* safely transit to UC_READY state */
                /* rc == 0 */
                pthread_mutex_lock(&conn->uc_lock);
                if (conn->uc_state != UC_DEAD) {
                        usocklnd_rx_ksmhdr_state_transition(conn);

                        /* POLLIN is already set because we just
                         * received hello, but maybe we've smth. to
                         * send? */
                        LASSERT (conn->uc_sending == 0);
                        if ( !cfs_list_empty(&conn->uc_tx_list) ||
                             !cfs_list_empty(&conn->uc_zcack_list) ) {

                                conn->uc_tx_deadline =
                                        cfs_time_shift(usock_tuns.ut_timeout);
                                conn->uc_tx_flag = 1;
                                rc = usocklnd_add_pollrequest(conn,
                                                              POLL_SET_REQUEST,
                                                              POLLIN | POLLOUT);
                        }

                        if (rc == 0)
                                conn->uc_state = UC_READY;
                }
                pthread_mutex_unlock(&conn->uc_lock);
        }

        return rc;
}