void sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp) { if (sctp->sctp_listen_tfp) { sctp_listen_hash_remove(sctp); } mutex_enter(&tf->tf_lock); sctp->sctp_listen_hash_next = tf->tf_sctp; if (tf->tf_sctp) { tf->tf_sctp->sctp_listen_hash_prev = sctp; } sctp->sctp_listen_hash_prev = NULL; tf->tf_sctp = sctp; sctp->sctp_listen_tfp = tf; mutex_exit(&tf->tf_lock); /* * On a clustered note send this notification to the clustering * subsystem. */ if (cl_sctp_listen != NULL) { uchar_t *slist; ssize_t ssize; ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; slist = kmem_alloc(ssize, KM_SLEEP); sctp_get_saddr_list(sctp, slist, ssize); (*cl_sctp_listen)(sctp->sctp_family, slist, sctp->sctp_nsaddrs, sctp->sctp_lport); /* list will be freed by the clustering module */ } }
void sctp_listen_hash_remove(sctp_t *sctp) { sctp_tf_t *tf = sctp->sctp_listen_tfp; if (!tf) { return; } /* * On a clustered note send this notification to the clustering * subsystem. */ if (cl_sctp_unlisten != NULL) { uchar_t *slist; ssize_t ssize; ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; slist = kmem_alloc(ssize, KM_SLEEP); sctp_get_saddr_list(sctp, slist, ssize); (*cl_sctp_unlisten)(sctp->sctp_family, slist, sctp->sctp_nsaddrs, sctp->sctp_lport); /* list will be freed by the clustering module */ } mutex_enter(&tf->tf_lock); ASSERT(tf->tf_sctp); if (tf->tf_sctp == sctp) { tf->tf_sctp = sctp->sctp_listen_hash_next; if (sctp->sctp_listen_hash_next) { ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp); tf->tf_sctp->sctp_listen_hash_prev = NULL; } } else { ASSERT(sctp->sctp_listen_hash_prev); ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next == sctp); sctp->sctp_listen_hash_prev->sctp_listen_hash_next = sctp->sctp_listen_hash_next; if (sctp->sctp_listen_hash_next) { ASSERT( sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp); sctp->sctp_listen_hash_next->sctp_listen_hash_prev = sctp->sctp_listen_hash_prev; } } sctp->sctp_listen_hash_next = NULL; sctp->sctp_listen_hash_prev = NULL; sctp->sctp_listen_tfp = NULL; mutex_exit(&tf->tf_lock); }
/* * Connect to a peer - this function inserts the sctp in the * bind and conn fanouts, sends the INIT, and replies to the client * with an OK ack. */ int sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen, cred_t *cr, pid_t pid) { sin_t *sin; sin6_t *sin6; in6_addr_t dstaddr; in_port_t dstport; mblk_t *initmp; sctp_tf_t *tbf; sctp_t *lsctp; char buf[INET6_ADDRSTRLEN]; int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; int err; sctp_faddr_t *cur_fp; sctp_stack_t *sctps = sctp->sctp_sctps; conn_t *connp = sctp->sctp_connp; uint_t scope_id = 0; ip_xmit_attr_t *ixa; /* * Determine packet type based on type of address passed in * the request should contain an IPv4 or IPv6 address. * Make sure that address family matches the type of * family of the address passed down. */ if (addrlen < sizeof (sin_t)) { return (EINVAL); } switch (dst->sa_family) { case AF_INET: sin = (sin_t *)dst; /* Check for attempt to connect to non-unicast */ if (CLASSD(sin->sin_addr.s_addr) || (sin->sin_addr.s_addr == INADDR_BROADCAST)) { ip0dbg(("sctp_connect: non-unicast\n")); return (EINVAL); } if (connp->conn_ipv6_v6only) return (EAFNOSUPPORT); /* convert to v6 mapped */ /* Check for attempt to connect to INADDR_ANY */ if (sin->sin_addr.s_addr == INADDR_ANY) { struct in_addr v4_addr; /* * SunOS 4.x and 4.3 BSD allow an application * to connect a TCP socket to INADDR_ANY. * When they do this, the kernel picks the * address of one interface and uses it * instead. The kernel usually ends up * picking the address of the loopback * interface. This is an undocumented feature. * However, we provide the same thing here * in case any TCP apps that use this feature * are being ported to SCTP... */ v4_addr.s_addr = htonl(INADDR_LOOPBACK); IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); } else { IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); } dstport = sin->sin_port; break; case AF_INET6: sin6 = (sin6_t *)dst; /* Check for attempt to connect to non-unicast. */ if ((addrlen < sizeof (sin6_t)) || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { ip0dbg(("sctp_connect: non-unicast\n")); return (EINVAL); } if (connp->conn_ipv6_v6only && IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { return (EAFNOSUPPORT); } /* check for attempt to connect to unspec */ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { dstaddr = ipv6_loopback; } else { dstaddr = sin6->sin6_addr; if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) { sctp->sctp_linklocal = 1; scope_id = sin6->sin6_scope_id; } } dstport = sin6->sin6_port; connp->conn_flowinfo = sin6->sin6_flowinfo; break; default: dprint(1, ("sctp_connect: unknown family %d\n", dst->sa_family)); return (EAFNOSUPPORT); } (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); RUN_SCTP(sctp); if (connp->conn_family != dst->sa_family || (connp->conn_state_flags & CONN_CLOSING)) { WAKE_SCTP(sctp); return (EINVAL); } /* We update our cred/cpid based on the caller of connect */ if (connp->conn_cred != cr) { crhold(cr); crfree(connp->conn_cred); connp->conn_cred = cr; } connp->conn_cpid = pid; /* Cache things in conn_ixa without any refhold */ ixa = connp->conn_ixa; ixa->ixa_cred = cr; ixa->ixa_cpid = pid; if (is_system_labeled()) { /* We need to restart with a label based on the cred */ ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); } switch (sctp->sctp_state) { case SCTPS_IDLE: { struct sockaddr_storage ss; /* * We support a quick connect capability here, allowing * clients to transition directly from IDLE to COOKIE_WAIT. * sctp_bindi will pick an unused port, insert the connection * in the bind hash and transition to BOUND state. SCTP * picks and uses what it considers the optimal local address * set (just like specifiying INADDR_ANY to bind()). */ dprint(1, ("sctp_connect: idle, attempting bind...\n")); ASSERT(sctp->sctp_nsaddrs == 0); bzero(&ss, sizeof (ss)); ss.ss_family = connp->conn_family; WAKE_SCTP(sctp); if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, sizeof (ss))) != 0) { return (err); } RUN_SCTP(sctp); /* FALLTHRU */ } case SCTPS_BOUND: ASSERT(sctp->sctp_nsaddrs > 0); /* do the connect */ /* XXX check for attempt to connect to self */ connp->conn_fport = dstport; ASSERT(sctp->sctp_iphc); ASSERT(sctp->sctp_iphc6); /* * Don't allow this connection to completely duplicate * an existing connection. * * Ensure that the duplicate check and insertion is atomic. */ sctp_conn_hash_remove(sctp); tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, connp->conn_ports)]; mutex_enter(&tbf->tf_lock); lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports, SCTPS_COOKIE_WAIT); if (lsctp != NULL) { /* found a duplicate connection */ mutex_exit(&tbf->tf_lock); SCTP_REFRELE(lsctp); WAKE_SCTP(sctp); return (EADDRINUSE); } /* * OK; set up the peer addr (this may grow after we get * the INIT ACK from the peer with additional addresses). */ if ((err = sctp_add_faddr(sctp, &dstaddr, sleep, B_FALSE)) != 0) { mutex_exit(&tbf->tf_lock); WAKE_SCTP(sctp); return (err); } cur_fp = sctp->sctp_faddrs; ASSERT(cur_fp->ixa != NULL); /* No valid src addr, return. */ if (cur_fp->state == SCTP_FADDRS_UNREACH) { mutex_exit(&tbf->tf_lock); WAKE_SCTP(sctp); return (EADDRNOTAVAIL); } sctp->sctp_primary = cur_fp; sctp->sctp_current = cur_fp; sctp->sctp_mss = cur_fp->sfa_pmss; sctp_conn_hash_insert(tbf, sctp, 1); mutex_exit(&tbf->tf_lock); ixa = cur_fp->ixa; ASSERT(ixa->ixa_cred != NULL); if (scope_id != 0) { ixa->ixa_flags |= IXAF_SCOPEID_SET; ixa->ixa_scopeid = scope_id; } else { ixa->ixa_flags &= ~IXAF_SCOPEID_SET; } /* initialize composite headers */ if ((err = sctp_set_hdraddrs(sctp)) != 0) { sctp_conn_hash_remove(sctp); WAKE_SCTP(sctp); return (err); } if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) { sctp_conn_hash_remove(sctp); WAKE_SCTP(sctp); return (err); } /* * Turn off the don't fragment bit on the (only) faddr, * so that if one of the messages exchanged during the * initialization sequence exceeds the path mtu, it * at least has a chance to get there. SCTP does no * fragmentation of initialization messages. The DF bit * will be turned on again in sctp_send_cookie_echo() * (but the cookie echo will still be sent with the df bit * off). */ cur_fp->df = B_FALSE; /* Mark this address as alive */ cur_fp->state = SCTP_FADDRS_ALIVE; /* Send the INIT to the peer */ SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); sctp->sctp_state = SCTPS_COOKIE_WAIT; /* * sctp_init_mp() could result in modifying the source * address list, so take the hash lock. */ mutex_enter(&tbf->tf_lock); initmp = sctp_init_mp(sctp, cur_fp); if (initmp == NULL) { mutex_exit(&tbf->tf_lock); /* * It may happen that all the source addresses * (loopback/link local) are removed. In that case, * faile the connect. */ if (sctp->sctp_nsaddrs == 0) { sctp_conn_hash_remove(sctp); SCTP_FADDR_TIMER_STOP(cur_fp); WAKE_SCTP(sctp); return (EADDRNOTAVAIL); } /* Otherwise, let the retransmission timer retry */ WAKE_SCTP(sctp); goto notify_ulp; } mutex_exit(&tbf->tf_lock); /* * On a clustered note send this notification to the clustering * subsystem. */ if (cl_sctp_connect != NULL) { uchar_t *slist; uchar_t *flist; size_t ssize; size_t fsize; fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; slist = kmem_alloc(ssize, KM_SLEEP); flist = kmem_alloc(fsize, KM_SLEEP); /* The clustering module frees the lists */ sctp_get_saddr_list(sctp, slist, ssize); sctp_get_faddr_list(sctp, flist, fsize); (*cl_sctp_connect)(connp->conn_family, slist, sctp->sctp_nsaddrs, connp->conn_lport, flist, sctp->sctp_nfaddrs, connp->conn_fport, B_TRUE, (cl_sctp_handle_t)sctp); } ASSERT(ixa->ixa_cred != NULL); ASSERT(ixa->ixa_ire != NULL); (void) conn_ip_output(initmp, ixa); BUMP_LOCAL(sctp->sctp_opkts); WAKE_SCTP(sctp); notify_ulp: sctp_set_ulp_prop(sctp); return (0); default: ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); WAKE_SCTP(sctp); return (EINVAL); } }
/* * Exported routine for extracting active SCTP associations. * Like TCP, we terminate the walk if the callback returns non-zero. */ int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *arg, boolean_t cansleep) { sctp_t *sctp; sctp_t *sctp_prev; cl_sctp_info_t cl_sctpi; uchar_t *slist; uchar_t *flist; sctp = gsctp; sctp_prev = NULL; mutex_enter(&sctp_g_lock); while (sctp != NULL) { size_t ssize; size_t fsize; mutex_enter(&sctp->sctp_reflock); if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) { mutex_exit(&sctp->sctp_reflock); sctp = list_next(&sctp_g_list, sctp); continue; } sctp->sctp_refcnt++; mutex_exit(&sctp->sctp_reflock); mutex_exit(&sctp_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); RUN_SCTP(sctp); ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP); flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP); if (slist == NULL || flist == NULL) { WAKE_SCTP(sctp); if (slist != NULL) kmem_free(slist, ssize); if (flist != NULL) kmem_free(flist, fsize); SCTP_REFRELE(sctp); return (1); } cl_sctpi.cl_sctpi_version = CL_SCTPI_V1; sctp_get_saddr_list(sctp, slist, ssize); sctp_get_faddr_list(sctp, flist, fsize); cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs; cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs; cl_sctpi.cl_sctpi_family = sctp->sctp_family; cl_sctpi.cl_sctpi_ipversion = sctp->sctp_ipversion; cl_sctpi.cl_sctpi_state = sctp->sctp_state; cl_sctpi.cl_sctpi_lport = sctp->sctp_lport; cl_sctpi.cl_sctpi_fport = sctp->sctp_fport; cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp; WAKE_SCTP(sctp); cl_sctpi.cl_sctpi_laddrp = slist; cl_sctpi.cl_sctpi_faddrp = flist; if ((*cl_callback)(&cl_sctpi, arg) != 0) { kmem_free(slist, ssize); kmem_free(flist, fsize); SCTP_REFRELE(sctp); return (1); } /* list will be freed by cl_callback */ sctp_prev = sctp; mutex_enter(&sctp_g_lock); sctp = list_next(&sctp_g_list, sctp); } mutex_exit(&sctp_g_lock); if (sctp_prev != NULL) SCTP_REFRELE(sctp_prev); return (0); }
/* Process the COOKIE packet, mp, directed at the listener 'sctp' */ sctp_t * sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, sctp_init_chunk_t *iack, ip_recv_attr_t *ira) { sctp_t *eager; ip6_t *ip6h; int err; conn_t *connp, *econnp; sctp_stack_t *sctps; struct sock_proto_props sopp; cred_t *cr; pid_t cpid; in6_addr_t faddr, laddr; ip_xmit_attr_t *ixa; /* * No need to check for duplicate as this is the listener * and we are holding the lock. This means that no new * connection can be created out of it. And since the * fanout already done cannot find a match, it means that * there is no duplicate. */ ASSERT(OK_32PTR(mp->b_rptr)); if ((eager = sctp_create_eager(sctp)) == NULL) { return (NULL); } connp = sctp->sctp_connp; sctps = sctp->sctp_sctps; econnp = eager->sctp_connp; if (connp->conn_policy != NULL) { /* Inherit the policy from the listener; use actions from ira */ if (!ip_ipsec_policy_inherit(econnp, connp, ira)) { sctp_close_eager(eager); BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } } ip6h = (ip6_t *)mp->b_rptr; if (ira->ira_flags & IXAF_IS_IPV4) { ipha_t *ipha; ipha = (ipha_t *)ip6h; IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &laddr); IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &faddr); } else { laddr = ip6h->ip6_dst; faddr = ip6h->ip6_src; } if (ira->ira_flags & IRAF_IPSEC_SECURE) { /* * XXX need to fix the cached policy issue here. * We temporarily set the conn_laddr/conn_faddr here so * that IPsec can use it for the latched policy * selector. This is obvioursly wrong as SCTP can * use different addresses... */ econnp->conn_laddr_v6 = laddr; econnp->conn_faddr_v6 = faddr; econnp->conn_saddr_v6 = laddr; } if (ipsec_conn_cache_policy(econnp, (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) { sctp_close_eager(eager); BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } /* Save for getpeerucred */ cr = ira->ira_cred; cpid = ira->ira_cpid; if (is_system_labeled()) { ip_xmit_attr_t *ixa = econnp->conn_ixa; ASSERT(ira->ira_tsl != NULL); /* Discard any old label */ if (ixa->ixa_free_flags & IXA_FREE_TSL) { ASSERT(ixa->ixa_tsl != NULL); label_rele(ixa->ixa_tsl); ixa->ixa_free_flags &= ~IXA_FREE_TSL; ixa->ixa_tsl = NULL; } if ((connp->conn_mlp_type != mlptSingle || connp->conn_mac_mode != CONN_MAC_DEFAULT) && ira->ira_tsl != NULL) { /* * If this is an MLP connection or a MAC-Exempt * connection with an unlabeled node, packets are to be * exchanged using the security label of the received * Cookie packet instead of the server application's * label. * tsol_check_dest called from ip_set_destination * might later update TSF_UNLABELED by replacing * ixa_tsl with a new label. */ label_hold(ira->ira_tsl); ip_xmit_attr_replace_tsl(ixa, ira->ira_tsl); } else { ixa->ixa_tsl = crgetlabel(econnp->conn_cred); } } err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); if (err != 0) { sctp_close_eager(eager); BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } ASSERT(eager->sctp_current->ixa != NULL); ixa = eager->sctp_current->ixa; if (!(ira->ira_flags & IXAF_IS_IPV4)) { ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) { eager->sctp_linklocal = 1; ixa->ixa_flags |= IXAF_SCOPEID_SET; ixa->ixa_scopeid = ifindex; econnp->conn_incoming_ifindex = ifindex; } } /* * On a clustered note send this notification to the clustering * subsystem. */ if (cl_sctp_connect != NULL) { uchar_t *slist; uchar_t *flist; size_t fsize; size_t ssize; fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; slist = kmem_alloc(ssize, KM_NOSLEEP); flist = kmem_alloc(fsize, KM_NOSLEEP); if (slist == NULL || flist == NULL) { if (slist != NULL) kmem_free(slist, ssize); if (flist != NULL) kmem_free(flist, fsize); sctp_close_eager(eager); BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); SCTP_KSTAT(sctps, sctp_cl_connect); return (NULL); } /* The clustering module frees these list */ sctp_get_saddr_list(eager, slist, ssize); sctp_get_faddr_list(eager, flist, fsize); (*cl_sctp_connect)(econnp->conn_family, slist, eager->sctp_nsaddrs, econnp->conn_lport, flist, eager->sctp_nfaddrs, econnp->conn_fport, B_FALSE, (cl_sctp_handle_t)eager); } /* Connection established, so send up the conn_ind */ if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, (sock_lower_handle_t)eager, NULL, cr, cpid, &eager->sctp_upcalls)) == NULL) { sctp_close_eager(eager); BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); return (NULL); } ASSERT(SCTP_IS_DETACHED(eager)); eager->sctp_detached = B_FALSE; bzero(&sopp, sizeof (sopp)); sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF; sopp.sopp_maxblk = strmsgsz; if (econnp->conn_family == AF_INET) { sopp.sopp_wroff = sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + sctp->sctp_hdr_len; } else { sopp.sopp_wroff = sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + sctp->sctp_hdr6_len; } eager->sctp_ulp_prop(eager->sctp_ulpd, &sopp); return (eager); }