示例#1
0
void
sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
{
	if (sctp->sctp_conn_tfp) {
		sctp_conn_hash_remove(sctp);
	}

	if (!caller_holds_lock) {
		mutex_enter(&tf->tf_lock);
	} else {
		ASSERT(MUTEX_HELD(&tf->tf_lock));
	}

	sctp->sctp_conn_hash_next = tf->tf_sctp;
	if (tf->tf_sctp) {
		tf->tf_sctp->sctp_conn_hash_prev = sctp;
	}
	sctp->sctp_conn_hash_prev = NULL;
	tf->tf_sctp = sctp;
	sctp->sctp_conn_tfp = tf;
	if (!caller_holds_lock) {
		mutex_exit(&tf->tf_lock);
	}
}
/*
 * Connect to a peer - this function inserts the sctp in the
 * bind and conn fanouts, sends the INIT, and replies to the client
 * with an OK ack.
 */
int
sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen,
    cred_t *cr, pid_t pid)
{
	sin_t		*sin;
	sin6_t		*sin6;
	in6_addr_t	dstaddr;
	in_port_t	dstport;
	mblk_t		*initmp;
	sctp_tf_t	*tbf;
	sctp_t		*lsctp;
	char		buf[INET6_ADDRSTRLEN];
	int		sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
	int		err;
	sctp_faddr_t	*cur_fp;
	sctp_stack_t	*sctps = sctp->sctp_sctps;
	conn_t		*connp = sctp->sctp_connp;
	uint_t		scope_id = 0;
	ip_xmit_attr_t	*ixa;

	/*
	 * Determine packet type based on type of address passed in
	 * the request should contain an IPv4 or IPv6 address.
	 * Make sure that address family matches the type of
	 * family of the address passed down.
	 */
	if (addrlen < sizeof (sin_t)) {
		return (EINVAL);
	}
	switch (dst->sa_family) {
	case AF_INET:
		sin = (sin_t *)dst;

		/* Check for attempt to connect to non-unicast */
		if (CLASSD(sin->sin_addr.s_addr) ||
		    (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
			ip0dbg(("sctp_connect: non-unicast\n"));
			return (EINVAL);
		}
		if (connp->conn_ipv6_v6only)
			return (EAFNOSUPPORT);

		/* convert to v6 mapped */
		/* Check for attempt to connect to INADDR_ANY */
		if (sin->sin_addr.s_addr == INADDR_ANY)  {
			struct in_addr v4_addr;
			/*
			 * SunOS 4.x and 4.3 BSD allow an application
			 * to connect a TCP socket to INADDR_ANY.
			 * When they do this, the kernel picks the
			 * address of one interface and uses it
			 * instead.  The kernel usually ends up
			 * picking the address of the loopback
			 * interface.  This is an undocumented feature.
			 * However, we provide the same thing here
			 * in case any TCP apps that use this feature
			 * are being ported to SCTP...
			 */
			v4_addr.s_addr = htonl(INADDR_LOOPBACK);
			IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
		} else {
			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
		}
		dstport = sin->sin_port;
		break;
	case AF_INET6:
		sin6 = (sin6_t *)dst;
		/* Check for attempt to connect to non-unicast. */
		if ((addrlen < sizeof (sin6_t)) ||
		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
			ip0dbg(("sctp_connect: non-unicast\n"));
			return (EINVAL);
		}
		if (connp->conn_ipv6_v6only &&
		    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
			return (EAFNOSUPPORT);
		}
		/* check for attempt to connect to unspec */
		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
			dstaddr = ipv6_loopback;
		} else {
			dstaddr = sin6->sin6_addr;
			if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) {
				sctp->sctp_linklocal = 1;
				scope_id = sin6->sin6_scope_id;
			}
		}
		dstport = sin6->sin6_port;
		connp->conn_flowinfo = sin6->sin6_flowinfo;
		break;
	default:
		dprint(1, ("sctp_connect: unknown family %d\n",
		    dst->sa_family));
		return (EAFNOSUPPORT);
	}

	(void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
	dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));

	RUN_SCTP(sctp);

	if (connp->conn_family != dst->sa_family ||
	    (connp->conn_state_flags & CONN_CLOSING)) {
		WAKE_SCTP(sctp);
		return (EINVAL);
	}

	/* We update our cred/cpid based on the caller of connect */
	if (connp->conn_cred != cr) {
		crhold(cr);
		crfree(connp->conn_cred);
		connp->conn_cred = cr;
	}
	connp->conn_cpid = pid;

	/* Cache things in conn_ixa without any refhold */
	ixa = connp->conn_ixa;
	ixa->ixa_cred = cr;
	ixa->ixa_cpid = pid;
	if (is_system_labeled()) {
		/* We need to restart with a label based on the cred */
		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
	}

	switch (sctp->sctp_state) {
	case SCTPS_IDLE: {
		struct sockaddr_storage	ss;

		/*
		 * We support a quick connect capability here, allowing
		 * clients to transition directly from IDLE to COOKIE_WAIT.
		 * sctp_bindi will pick an unused port, insert the connection
		 * in the bind hash and transition to BOUND state. SCTP
		 * picks and uses what it considers the optimal local address
		 * set (just like specifiying INADDR_ANY to bind()).
		 */
		dprint(1, ("sctp_connect: idle, attempting bind...\n"));
		ASSERT(sctp->sctp_nsaddrs == 0);

		bzero(&ss, sizeof (ss));
		ss.ss_family = connp->conn_family;
		WAKE_SCTP(sctp);
		if ((err = sctp_bind(sctp, (struct sockaddr *)&ss,
		    sizeof (ss))) != 0) {
			return (err);
		}
		RUN_SCTP(sctp);
		/* FALLTHRU */
	}

	case SCTPS_BOUND:
		ASSERT(sctp->sctp_nsaddrs > 0);

		/* do the connect */
		/* XXX check for attempt to connect to self */
		connp->conn_fport = dstport;

		ASSERT(sctp->sctp_iphc);
		ASSERT(sctp->sctp_iphc6);

		/*
		 * Don't allow this connection to completely duplicate
		 * an existing connection.
		 *
		 * Ensure that the duplicate check and insertion is atomic.
		 */
		sctp_conn_hash_remove(sctp);
		tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps,
		    connp->conn_ports)];
		mutex_enter(&tbf->tf_lock);
		lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports,
		    SCTPS_COOKIE_WAIT);
		if (lsctp != NULL) {
			/* found a duplicate connection */
			mutex_exit(&tbf->tf_lock);
			SCTP_REFRELE(lsctp);
			WAKE_SCTP(sctp);
			return (EADDRINUSE);
		}

		/*
		 * OK; set up the peer addr (this may grow after we get
		 * the INIT ACK from the peer with additional addresses).
		 */
		if ((err = sctp_add_faddr(sctp, &dstaddr, sleep,
		    B_FALSE)) != 0) {
			mutex_exit(&tbf->tf_lock);
			WAKE_SCTP(sctp);
			return (err);
		}
		cur_fp = sctp->sctp_faddrs;
		ASSERT(cur_fp->ixa != NULL);

		/* No valid src addr, return. */
		if (cur_fp->state == SCTP_FADDRS_UNREACH) {
			mutex_exit(&tbf->tf_lock);
			WAKE_SCTP(sctp);
			return (EADDRNOTAVAIL);
		}

		sctp->sctp_primary = cur_fp;
		sctp->sctp_current = cur_fp;
		sctp->sctp_mss = cur_fp->sfa_pmss;
		sctp_conn_hash_insert(tbf, sctp, 1);
		mutex_exit(&tbf->tf_lock);

		ixa = cur_fp->ixa;
		ASSERT(ixa->ixa_cred != NULL);

		if (scope_id != 0) {
			ixa->ixa_flags |= IXAF_SCOPEID_SET;
			ixa->ixa_scopeid = scope_id;
		} else {
			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
		}

		/* initialize composite headers */
		if ((err = sctp_set_hdraddrs(sctp)) != 0) {
			sctp_conn_hash_remove(sctp);
			WAKE_SCTP(sctp);
			return (err);
		}

		if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) {
			sctp_conn_hash_remove(sctp);
			WAKE_SCTP(sctp);
			return (err);
		}

		/*
		 * Turn off the don't fragment bit on the (only) faddr,
		 * so that if one of the messages exchanged during the
		 * initialization sequence exceeds the path mtu, it
		 * at least has a chance to get there. SCTP does no
		 * fragmentation of initialization messages.  The DF bit
		 * will be turned on again in sctp_send_cookie_echo()
		 * (but the cookie echo will still be sent with the df bit
		 * off).
		 */
		cur_fp->df = B_FALSE;

		/* Mark this address as alive */
		cur_fp->state = SCTP_FADDRS_ALIVE;

		/* Send the INIT to the peer */
		SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto);
		sctp->sctp_state = SCTPS_COOKIE_WAIT;
		/*
		 * sctp_init_mp() could result in modifying the source
		 * address list, so take the hash lock.
		 */
		mutex_enter(&tbf->tf_lock);
		initmp = sctp_init_mp(sctp, cur_fp);
		if (initmp == NULL) {
			mutex_exit(&tbf->tf_lock);
			/*
			 * It may happen that all the source addresses
			 * (loopback/link local) are removed.  In that case,
			 * faile the connect.
			 */
			if (sctp->sctp_nsaddrs == 0) {
				sctp_conn_hash_remove(sctp);
				SCTP_FADDR_TIMER_STOP(cur_fp);
				WAKE_SCTP(sctp);
				return (EADDRNOTAVAIL);
			}

			/* Otherwise, let the retransmission timer retry */
			WAKE_SCTP(sctp);
			goto notify_ulp;
		}
		mutex_exit(&tbf->tf_lock);

		/*
		 * On a clustered note send this notification to the clustering
		 * subsystem.
		 */
		if (cl_sctp_connect != NULL) {
			uchar_t		*slist;
			uchar_t		*flist;
			size_t		ssize;
			size_t		fsize;

			fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
			ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
			slist = kmem_alloc(ssize, KM_SLEEP);
			flist = kmem_alloc(fsize, KM_SLEEP);
			/* The clustering module frees the lists */
			sctp_get_saddr_list(sctp, slist, ssize);
			sctp_get_faddr_list(sctp, flist, fsize);
			(*cl_sctp_connect)(connp->conn_family, slist,
			    sctp->sctp_nsaddrs, connp->conn_lport,
			    flist, sctp->sctp_nfaddrs, connp->conn_fport,
			    B_TRUE, (cl_sctp_handle_t)sctp);
		}
		ASSERT(ixa->ixa_cred != NULL);
		ASSERT(ixa->ixa_ire != NULL);

		(void) conn_ip_output(initmp, ixa);
		BUMP_LOCAL(sctp->sctp_opkts);
		WAKE_SCTP(sctp);

notify_ulp:
		sctp_set_ulp_prop(sctp);

		return (0);
	default:
		ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
		WAKE_SCTP(sctp);
		return (EINVAL);
	}
}