Example #1
0
/*
 * Allocate a default DCE and a hash table for per-IP address DCEs
 */
void
dce_stack_init(ip_stack_t *ipst)
{
	int	i;

	ipst->ips_dce_default = kmem_cache_alloc(dce_cache, KM_SLEEP);
	bzero(ipst->ips_dce_default, sizeof (dce_t));
	ipst->ips_dce_default->dce_flags = DCEF_DEFAULT;
	ipst->ips_dce_default->dce_generation = DCE_GENERATION_INITIAL;
	ipst->ips_dce_default->dce_last_change_time =
	    TICK_TO_SEC(ddi_get_lbolt64());
	ipst->ips_dce_default->dce_refcnt = 1;	/* Should never go away */
	ipst->ips_dce_default->dce_ipst = ipst;

	/* This must be a power of two since we are using IRE_ADDR_HASH macro */
	ipst->ips_dce_hashsize = 256;
	ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize *
	    sizeof (dcb_t), KM_SLEEP);
	ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize *
	    sizeof (dcb_t), KM_SLEEP);
	for (i = 0; i < ipst->ips_dce_hashsize; i++) {
		rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT,
		    NULL);
		rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT,
		    NULL);
	}
}
void
sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
{
	int64_t now = ddi_get_lbolt64();

	fp->strikes = 0;
	sctp->sctp_strikes = 0;
	fp->lastactive = now;
	fp->hb_expiry = now + SET_HB_INTVL(fp);
	fp->hb_pending = B_FALSE;
	if (fp->state != SCTP_FADDRS_ALIVE) {
		fp->state = SCTP_FADDRS_ALIVE;
		sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0);
		/* Should have a full IRE now */
		sctp_get_dest(sctp, fp);

		/*
		 * If this is the primary, switch back to it now.  And
		 * we probably want to reset the source addr used to reach
		 * it.
		 * Note that if we didn't find a source in sctp_get_dest
		 * then we'd be unreachable at this point in time.
		 */
		if (fp == sctp->sctp_primary &&
		    fp->state != SCTP_FADDRS_UNREACH) {
			sctp_set_faddr_current(sctp, fp);
			return;
		}
	}
}
Example #3
0
void
spa_handle_ignored_writes(spa_t *spa)
{
	inject_handler_t *handler;

	if (zio_injection_enabled == 0)
		return;

	rw_enter(&inject_lock, RW_READER);

	for (handler = list_head(&inject_handlers); handler != NULL;
	    handler = list_next(&inject_handlers, handler)) {

		if (spa != handler->zi_spa ||
		    handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
			continue;

		if (handler->zi_record.zi_duration > 0) {
			VERIFY(handler->zi_record.zi_timer == 0 ||
			    handler->zi_record.zi_timer +
			    handler->zi_record.zi_duration * hz >
			    ddi_get_lbolt64());
		} else {
			/* duration is negative so the subtraction here adds */
			VERIFY(handler->zi_record.zi_timer == 0 ||
			    handler->zi_record.zi_timer -
			    handler->zi_record.zi_duration >=
			    spa_syncing_txg(spa));
		}
	}

	rw_exit(&inject_lock);
}
Example #4
0
/*
 * Simulate hardware that ignores cache flushes.  For requested number
 * of seconds nix the actual writing to disk.
 */
void
zio_handle_ignored_writes(zio_t *zio)
{
	inject_handler_t *handler;

	rw_enter(&inject_lock, RW_READER);

	for (handler = list_head(&inject_handlers); handler != NULL;
	    handler = list_next(&inject_handlers, handler)) {

		/* Ignore errors not destined for this pool */
		if (zio->io_spa != handler->zi_spa ||
		    handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
			continue;

		/*
		 * Positive duration implies # of seconds, negative
		 * a number of txgs
		 */
		if (handler->zi_record.zi_timer == 0) {
			if (handler->zi_record.zi_duration > 0)
				handler->zi_record.zi_timer = ddi_get_lbolt64();
			else
				handler->zi_record.zi_timer = zio->io_txg;
		}

		/* Have a "problem" writing 60% of the time */
		if (spa_get_random(100) < 60)
			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
		break;
	}

	rw_exit(&inject_lock);
}
Example #5
0
/*
 * Atomically looks for a non-default DCE, and if not found tries to create one.
 * If there is no memory it returns NULL.
 * When an entry is created we increase the generation number on
 * the default DCE so that conn_ip_output will detect there is a new DCE.
 * ifindex should only be used with link-local addresses.
 */
dce_t *
dce_lookup_and_add_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst)
{
	uint_t		hash;
	dcb_t		*dcb;
	dce_t		*dce;

	/* We should not create entries for link-locals w/o an ifindex */
	ASSERT(!(IN6_IS_ADDR_LINKSCOPE(dst)) || ifindex != 0);

	hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize);
	dcb = &ipst->ips_dce_hash_v6[hash];
	rw_enter(&dcb->dcb_lock, RW_WRITER);
	for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
		if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) &&
		    dce->dce_ifindex == ifindex) {
			mutex_enter(&dce->dce_lock);
			if (!DCE_IS_CONDEMNED(dce)) {
				dce_refhold(dce);
				mutex_exit(&dce->dce_lock);
				rw_exit(&dcb->dcb_lock);
				return (dce);
			}
			mutex_exit(&dce->dce_lock);
		}
	}

	dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP);
	if (dce == NULL) {
		rw_exit(&dcb->dcb_lock);
		return (NULL);
	}
	bzero(dce, sizeof (dce_t));
	dce->dce_ipst = ipst;	/* No netstack_hold */
	dce->dce_v6addr = *dst;
	dce->dce_ifindex = ifindex;
	dce->dce_generation = DCE_GENERATION_INITIAL;
	dce->dce_ipversion = IPV6_VERSION;
	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
	dce_refhold(dce);	/* For the hash list */

	/* Link into list */
	if (dcb->dcb_dce != NULL)
		dcb->dcb_dce->dce_ptpn = &dce->dce_next;
	dce->dce_next = dcb->dcb_dce;
	dce->dce_ptpn = &dcb->dcb_dce;
	dcb->dcb_dce = dce;
	dce->dce_bucket = dcb;
	atomic_add_32(&dcb->dcb_cnt, 1);
	dce_refhold(dce);	/* For the caller */
	rw_exit(&dcb->dcb_lock);

	/* Initialize dce_ident to be different than for the last packet */
	dce->dce_ident = ipst->ips_dce_default->dce_ident + 1;
	dce_increment_generation(ipst->ips_dce_default);
	return (dce);
}
static void
sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr,
    mblk_t *timer_mp)
{
	sctp_stack_t	*sctps = sctp->sctp_sctps;

	ASSERT(fp->ixa != NULL);

	bcopy(addr, &fp->faddr, sizeof (*addr));
	if (IN6_IS_ADDR_V4MAPPED(addr)) {
		fp->isv4 = 1;
		/* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */
		fp->sfa_pmss =
		    (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) &
		    ~(SCTP_ALIGN - 1);
		fp->ixa->ixa_flags |= IXAF_IS_IPV4;
	} else {
		fp->isv4 = 0;
		fp->sfa_pmss =
		    (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) &
		    ~(SCTP_ALIGN - 1);
		fp->ixa->ixa_flags &= ~IXAF_IS_IPV4;
	}
	fp->cwnd = sctps->sctps_slow_start_initial * fp->sfa_pmss;
	fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max);
	SCTP_MAX_RTO(sctp, fp);
	fp->srtt = -1;
	fp->rtt_updates = 0;
	fp->strikes = 0;
	fp->max_retr = sctp->sctp_pp_max_rxt;
	/* Mark it as not confirmed. */
	fp->state = SCTP_FADDRS_UNCONFIRMED;
	fp->hb_interval = sctp->sctp_hb_interval;
	fp->ssthresh = sctps->sctps_initial_ssthresh;
	fp->suna = 0;
	fp->pba = 0;
	fp->acked = 0;
	fp->lastactive = fp->hb_expiry = ddi_get_lbolt64();
	fp->timer_mp = timer_mp;
	fp->hb_pending = B_FALSE;
	fp->hb_enabled = B_TRUE;
	fp->df = 1;
	fp->pmtu_discovered = 0;
	fp->next = NULL;
	fp->T3expire = 0;
	(void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret,
	    sizeof (fp->hb_secret));
	fp->rxt_unacked = 0;

	sctp_get_dest(sctp, fp);
}
Example #7
0
/*
 * Atomically looks for a non-default DCE, and if not found tries to create one.
 * If there is no memory it returns NULL.
 * When an entry is created we increase the generation number on
 * the default DCE so that conn_ip_output will detect there is a new DCE.
 */
dce_t *
dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst)
{
	uint_t		hash;
	dcb_t		*dcb;
	dce_t		*dce;

	hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize);
	dcb = &ipst->ips_dce_hash_v4[hash];
	rw_enter(&dcb->dcb_lock, RW_WRITER);
	for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) {
		if (dce->dce_v4addr == dst) {
			mutex_enter(&dce->dce_lock);
			if (!DCE_IS_CONDEMNED(dce)) {
				dce_refhold(dce);
				mutex_exit(&dce->dce_lock);
				rw_exit(&dcb->dcb_lock);
				return (dce);
			}
			mutex_exit(&dce->dce_lock);
		}
	}
	dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP);
	if (dce == NULL) {
		rw_exit(&dcb->dcb_lock);
		return (NULL);
	}
	bzero(dce, sizeof (dce_t));
	dce->dce_ipst = ipst;	/* No netstack_hold */
	dce->dce_v4addr = dst;
	dce->dce_generation = DCE_GENERATION_INITIAL;
	dce->dce_ipversion = IPV4_VERSION;
	dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
	dce_refhold(dce);	/* For the hash list */

	/* Link into list */
	if (dcb->dcb_dce != NULL)
		dcb->dcb_dce->dce_ptpn = &dce->dce_next;
	dce->dce_next = dcb->dcb_dce;
	dce->dce_ptpn = &dcb->dcb_dce;
	dcb->dcb_dce = dce;
	dce->dce_bucket = dcb;
	dce_refhold(dce);	/* For the caller */
	rw_exit(&dcb->dcb_lock);

	/* Initialize dce_ident to be different than for the last packet */
	dce->dce_ident = ipst->ips_dce_default->dce_ident + 1;

	dce_increment_generation(ipst->ips_dce_default);
	return (dce);
}
Example #8
0
/*
 * Reclaim a fraction of dce's in the dcb.
 * For now we have a higher probability to delete DCEs without DCE_PMTU.
 */
static void
dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction)
{
	uint_t	fraction_pmtu = fraction*4;
	uint_t	hash;
	dce_t	*dce, *nextdce;

	rw_enter(&dcb->dcb_lock, RW_WRITER);
	for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) {
		nextdce = dce->dce_next;
		/* Clear DCEF_PMTU if the pmtu is too old */
		mutex_enter(&dce->dce_lock);
		if ((dce->dce_flags & DCEF_PMTU) &&
		    TICK_TO_SEC(ddi_get_lbolt64()) - dce->dce_last_change_time >
		    ipst->ips_ip_pathmtu_interval) {
			dce->dce_flags &= ~DCEF_PMTU;
			mutex_exit(&dce->dce_lock);
			dce_increment_generation(dce);
		} else {
			mutex_exit(&dce->dce_lock);
		}
		hash = RANDOM_HASH((uint64_t)(uintptr_t)dce);
		if (dce->dce_flags & DCEF_PMTU) {
			if (hash % fraction_pmtu != 0)
				continue;
		} else {
			if (hash % fraction != 0)
				continue;
		}

		IP_STAT(ipst, ip_dce_reclaim_deleted);
		dce_delete_locked(dcb, dce);
		dce_refrele(dce);
	}
	rw_exit(&dcb->dcb_lock);
}
Example #9
0
static uint_t
ntwdt_cyclic_softint(caddr_t arg)
{
	/*LINTED E_BAD_PTR_CAST_ALIGN*/
	ntwdt_state_t *ntwdt_ptr = (ntwdt_state_t *)arg;
	ntwdt_runstate_t *ntwdt_state;

	ntwdt_state = ntwdt_ptr->ntwdt_run_state;

	mutex_enter(&ntwdt_state->ntwdt_runstate_mutex);

	if ((ntwdt_state->ntwdt_watchdog_flags & NTWDT_FLAG_SKIP_CYCLIC) != 0) {
		ntwdt_state->ntwdt_watchdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC;
		goto end;
	}

	if ((ntwdt_state->ntwdt_timer_running == 0) ||
	    (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) ||
	    (ntwdt_state->ntwdt_watchdog_enabled == 0)) {
		goto end;
	}

	NTWDT_DBG(NTWDT_DBG_IOCTL, ("cyclic_softint: %d"
	    "ddi_get_lbolt64(): %d\n", ntwdt_state->ntwdt_watchdog_timeout,
	    (int)TICK_TO_MSEC(ddi_get_lbolt64())));

	/*
	 * Decrement the virtual watchdog timer and check if it has expired.
	 */
	ntwdt_state->ntwdt_time_remaining -= NTWDT_DECREMENT_INTERVAL;

	if (ntwdt_state->ntwdt_time_remaining == 0) {
		cmn_err(CE_WARN, "application-watchdog expired");
		ntwdt_state->ntwdt_watchdog_expired = 1;

		if (ntwdt_state->ntwdt_reset_enabled != 0) {
			/*
			 * The user wants to reset the system.
			 */
			mutex_exit(&ntwdt_state->ntwdt_runstate_mutex);

			NTWDT_DBG(NTWDT_DBG_NTWDT, ("recovery being done"));
			ntwdt_enforce_timeout();
		} else {
			NTWDT_DBG(NTWDT_DBG_NTWDT, ("no recovery being done"));
			ntwdt_state->ntwdt_watchdog_enabled = 0;
		}

		/*
		 * Schedule Callout to stop the cyclic.
		 */
		(void) timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0);
	} else {
		_NOTE(EMPTY)
		NTWDT_DBG(NTWDT_DBG_NTWDT, ("time remaining in AWDT: %d secs",
		    (int)TICK_TO_MSEC(ntwdt_state->ntwdt_time_remaining)));
	}

end:
	mutex_exit(&ntwdt_state->ntwdt_runstate_mutex);
	return (DDI_INTR_CLAIMED);
}
Example #10
0
/* ARGSUSED */
void
tcp_time_wait_collector(void *arg)
{
	tcp_t *tcp;
	int64_t now;
	mblk_t *mp;
	conn_t *connp;
	kmutex_t *lock;
	boolean_t removed;
	extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
	    uint8_t *, in_port_t, uint8_t *, in_port_t, void *);

	squeue_t *sqp = (squeue_t *)arg;
	tcp_squeue_priv_t *tcp_time_wait =
	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));

	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
	tcp_time_wait->tcp_time_wait_tid = 0;
#ifdef DEBUG
	tcp_time_wait->tcp_time_wait_running = B_TRUE;
#endif

	if (tcp_time_wait->tcp_free_list != NULL &&
	    tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
		TCP_G_STAT(tcp_freelist_cleanup);
		while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
			tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
			tcp->tcp_time_wait_next = NULL;
			tcp_time_wait->tcp_free_list_cnt--;
			ASSERT(tcp->tcp_tcps == NULL);
			CONN_DEC_REF(tcp->tcp_connp);
		}
		ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
	}

	/*
	 * In order to reap time waits reliably, we should use a
	 * source of time that is not adjustable by the user -- hence
	 * the call to ddi_get_lbolt64().
	 */
	now = ddi_get_lbolt64();
	while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
		/*
		 * lbolt64 should not wrap around in practice...  So we can
		 * do a direct comparison.
		 */
		if (now < tcp->tcp_time_wait_expire)
			break;

		removed = tcp_time_wait_remove(tcp, tcp_time_wait);
		ASSERT(removed);

		connp = tcp->tcp_connp;
		ASSERT(connp->conn_fanout != NULL);
		lock = &connp->conn_fanout->connf_lock;
		/*
		 * This is essentially a TW reclaim fast path optimization for
		 * performance where the timewait collector checks under the
		 * fanout lock (so that no one else can get access to the
		 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
		 * the classifier hash list. If ref count is indeed 2, we can
		 * just remove the conn under the fanout lock and avoid
		 * cleaning up the conn under the squeue, provided that
		 * clustering callbacks are not enabled. If clustering is
		 * enabled, we need to make the clustering callback before
		 * setting the CONDEMNED flag and after dropping all locks and
		 * so we forego this optimization and fall back to the slow
		 * path. Also please see the comments in tcp_closei_local
		 * regarding the refcnt logic.
		 *
		 * Since we are holding the tcp_time_wait_lock, its better
		 * not to block on the fanout_lock because other connections
		 * can't add themselves to time_wait list. So we do a
		 * tryenter instead of mutex_enter.
		 */
		if (mutex_tryenter(lock)) {
			mutex_enter(&connp->conn_lock);
			if ((connp->conn_ref == 2) &&
			    (cl_inet_disconnect == NULL)) {
				ipcl_hash_remove_locked(connp,
				    connp->conn_fanout);
				/*
				 * Set the CONDEMNED flag now itself so that
				 * the refcnt cannot increase due to any
				 * walker.
				 */
				connp->conn_state_flags |= CONN_CONDEMNED;
				mutex_exit(lock);
				mutex_exit(&connp->conn_lock);
				if (tcp_time_wait->tcp_free_list_cnt <
				    tcp_free_list_max_cnt) {
					/* Add to head of tcp_free_list */
					mutex_exit(
					    &tcp_time_wait->tcp_time_wait_lock);
					tcp_cleanup(tcp);
					ASSERT(connp->conn_latch == NULL);
					ASSERT(connp->conn_policy == NULL);
					ASSERT(tcp->tcp_tcps == NULL);
					ASSERT(connp->conn_netstack == NULL);

					mutex_enter(
					    &tcp_time_wait->tcp_time_wait_lock);
					tcp->tcp_time_wait_next =
					    tcp_time_wait->tcp_free_list;
					tcp_time_wait->tcp_free_list = tcp;
					tcp_time_wait->tcp_free_list_cnt++;
					continue;
				} else {
					/* Do not add to tcp_free_list */
					mutex_exit(
					    &tcp_time_wait->tcp_time_wait_lock);
					tcp_bind_hash_remove(tcp);
					ixa_cleanup(tcp->tcp_connp->conn_ixa);
					tcp_ipsec_cleanup(tcp);
					CONN_DEC_REF(tcp->tcp_connp);
				}
			} else {
				CONN_INC_REF_LOCKED(connp);
				mutex_exit(lock);
				mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
				mutex_exit(&connp->conn_lock);
				/*
				 * We can reuse the closemp here since conn has
				 * detached (otherwise we wouldn't even be in
				 * time_wait list). tcp_closemp_used can safely
				 * be changed without taking a lock as no other
				 * thread can concurrently access it at this
				 * point in the connection lifecycle.
				 */

				if (tcp->tcp_closemp.b_prev == NULL)
					tcp->tcp_closemp_used = B_TRUE;
				else
					cmn_err(CE_PANIC,
					    "tcp_timewait_collector: "
					    "concurrent use of tcp_closemp: "
					    "connp %p tcp %p\n", (void *)connp,
					    (void *)tcp);

				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
				mp = &tcp->tcp_closemp;
				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
				    tcp_timewait_close, connp, NULL,
				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
			}
		} else {
			mutex_enter(&connp->conn_lock);
			CONN_INC_REF_LOCKED(connp);
			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
			mutex_exit(&connp->conn_lock);
			/*
			 * We can reuse the closemp here since conn has
			 * detached (otherwise we wouldn't even be in
			 * time_wait list). tcp_closemp_used can safely
			 * be changed without taking a lock as no other
			 * thread can concurrently access it at this
			 * point in the connection lifecycle.
			 */

			if (tcp->tcp_closemp.b_prev == NULL)
				tcp->tcp_closemp_used = B_TRUE;
			else
				cmn_err(CE_PANIC, "tcp_timewait_collector: "
				    "concurrent use of tcp_closemp: "
				    "connp %p tcp %p\n", (void *)connp,
				    (void *)tcp);

			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
			mp = &tcp->tcp_closemp;
			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
			    tcp_timewait_close, connp, NULL,
			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
		}
		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
	}

	if (tcp_time_wait->tcp_free_list != NULL)
		tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;

	/*
	 * If the time wait list is not empty and there is no timer running,
	 * restart it.
	 */
	if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
	    tcp_time_wait->tcp_time_wait_tid == 0) {
		hrtime_t firetime;

		firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
		/* This ensures that we won't wake up too often. */
		firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
		tcp_time_wait->tcp_time_wait_tid =
		    timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
		    sqp, firetime, CALLOUT_TCP_RESOLUTION,
		    CALLOUT_FLAG_ROUNDUP);
	}
#ifdef DEBUG
	tcp_time_wait->tcp_time_wait_running = B_FALSE;
#endif
	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
}
Example #11
0
/*
 * Add a connection to the list of detached TIME_WAIT connections
 * and set its time to expire.
 */
void
tcp_time_wait_append(tcp_t *tcp)
{
	tcp_stack_t	*tcps = tcp->tcp_tcps;
	squeue_t	*sqp = tcp->tcp_connp->conn_sqp;
	tcp_squeue_priv_t *tcp_time_wait =
	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));

	tcp_timers_stop(tcp);

	/* Freed above */
	ASSERT(tcp->tcp_timer_tid == 0);
	ASSERT(tcp->tcp_ack_tid == 0);

	/* must have happened at the time of detaching the tcp */
	ASSERT(tcp->tcp_ptpahn == NULL);
	ASSERT(tcp->tcp_flow_stopped == 0);
	ASSERT(tcp->tcp_time_wait_next == NULL);
	ASSERT(tcp->tcp_time_wait_prev == NULL);
	ASSERT(tcp->tcp_time_wait_expire == 0);
	ASSERT(tcp->tcp_listener == NULL);

	tcp->tcp_time_wait_expire = ddi_get_lbolt64();
	/*
	 * Since tcp_time_wait_expire is lbolt64, it should not wrap around
	 * in practice.  Hence it cannot be 0.  Note that zero means that the
	 * tcp_t is not in the TIME_WAIT list.
	 */
	tcp->tcp_time_wait_expire += MSEC_TO_TICK(
	    tcps->tcps_time_wait_interval);

	ASSERT(TCP_IS_DETACHED(tcp));
	ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
	ASSERT(tcp->tcp_time_wait_next == NULL);
	ASSERT(tcp->tcp_time_wait_prev == NULL);
	TCP_DBGSTAT(tcps, tcp_time_wait);

	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
	if (tcp_time_wait->tcp_time_wait_head == NULL) {
		ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
		tcp_time_wait->tcp_time_wait_head = tcp;

		/*
		 * Even if the list was empty before, there may be a timer
		 * running since a tcp_t can be removed from the list
		 * in other places, such as tcp_clean_death().  So check if
		 * a timer is needed.
		 */
		if (tcp_time_wait->tcp_time_wait_tid == 0) {
			tcp_time_wait->tcp_time_wait_tid =
			    timeout_generic(CALLOUT_NORMAL,
			    tcp_time_wait_collector, sqp,
			    (hrtime_t)(tcps->tcps_time_wait_interval + 1) *
			    MICROSEC, CALLOUT_TCP_RESOLUTION,
			    CALLOUT_FLAG_ROUNDUP);
		}
	} else {
		/*
		 * The list is not empty, so a timer must be running.  If not,
		 * tcp_time_wait_collector() must be running on this
		 * tcp_time_wait list at the same time.
		 */
		ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
		    tcp_time_wait->tcp_time_wait_running);
		ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
		ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
		    TCPS_TIME_WAIT);
		tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp;
		tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail;

	}
	tcp_time_wait->tcp_time_wait_tail = tcp;
	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
}
Example #12
0
/*
 * As part of file system hardening, this daemon is awakened
 * every second to flush cached data which includes the
 * buffer cache, the inode cache and mapped pages.
 */
void
fsflush()
{
	struct buf *bp, *dwp;
	struct hbuf *hp;
	int autoup;
	unsigned int ix, icount, count = 0;
	callb_cpr_t cprinfo;
	uint_t		bcount;
	kmutex_t	*hmp;
	struct vfssw *vswp;

	proc_fsflush = ttoproc(curthread);
	proc_fsflush->p_cstime = 0;
	proc_fsflush->p_stime =  0;
	proc_fsflush->p_cutime =  0;
	proc_fsflush->p_utime = 0;
	bcopy("fsflush", curproc->p_user.u_psargs, 8);
	bcopy("fsflush", curproc->p_user.u_comm, 7);

	mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL);
	sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL);

	/*
	 * Setup page coalescing.
	 */
	fsf_npgsz = page_num_pagesizes();
	ASSERT(fsf_npgsz < MAX_PAGESIZES);
	for (ix = 0; ix < fsf_npgsz - 1; ++ix) {
		fsf_pgcnt[ix] =
		    page_get_pagesize(ix + 1) / page_get_pagesize(ix);
		fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1;
	}

	autoup = v.v_autoup * hz;
	icount = v.v_autoup / tune.t_fsflushr;
	CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush");
loop:
	sema_v(&fsflush_sema);
	mutex_enter(&fsflush_lock);
	CALLB_CPR_SAFE_BEGIN(&cprinfo);
	cv_wait(&fsflush_cv, &fsflush_lock);		/* wait for clock */
	CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock);
	mutex_exit(&fsflush_lock);
	sema_p(&fsflush_sema);

	/*
	 * Write back all old B_DELWRI buffers on the freelist.
	 */
	bcount = 0;
	for (ix = 0; ix < v.v_hbuf; ix++) {

		hp = &hbuf[ix];
		dwp = (struct buf *)&dwbuf[ix];

		bcount += (hp->b_length);

		if (dwp->av_forw == dwp) {
			continue;
		}

		hmp = &hbuf[ix].b_lock;
		mutex_enter(hmp);
		bp = dwp->av_forw;

		/*
		 * Go down only on the delayed write lists.
		 */
		while (bp != dwp) {

			ASSERT(bp->b_flags & B_DELWRI);

			if ((bp->b_flags & B_DELWRI) &&
			    (ddi_get_lbolt() - bp->b_start >= autoup) &&
			    sema_tryp(&bp->b_sem)) {
				bp->b_flags |= B_ASYNC;
				hp->b_length--;
				notavail(bp);
				mutex_exit(hmp);
				if (bp->b_vp == NULL) {
					BWRITE(bp);
				} else {
					UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs,
					    bp);
				}
				mutex_enter(hmp);
				bp = dwp->av_forw;
			} else {
				bp = bp->av_forw;
			}
		}
		mutex_exit(hmp);
	}

	/*
	 *
	 * There is no need to wakeup any thread waiting on bio_mem_cv
	 * since brelse will wake them up as soon as IO is complete.
	 */
	bfreelist.b_bcount = bcount;

	if (dopageflush)
		fsflush_do_pages();

	if (!doiflush)
		goto loop;

	/*
	 * If the system was not booted to single user mode, skip the
	 * inode flushing until after fsflush_iflush_delay secs have elapsed.
	 */
	if ((boothowto & RB_SINGLE) == 0 &&
	    (ddi_get_lbolt64() / hz) < fsflush_iflush_delay)
		goto loop;

	/*
	 * Flush cached attribute information (e.g. inodes).
	 */
	if (++count >= icount) {
		count = 0;

		/*
		 * Sync back cached data.
		 */
		RLOCK_VFSSW();
		for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
			if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) {
				vfs_refvfssw(vswp);
				RUNLOCK_VFSSW();
				(void) fsop_sync_by_kind(vswp - vfssw,
				    SYNC_ATTR, kcred);
				vfs_unrefvfssw(vswp);
				RLOCK_VFSSW();
			}
		}
		RUNLOCK_VFSSW();
	}
	goto loop;
}
Example #13
0
int
sctp_listen(sctp_t *sctp)
{
	sctp_tf_t	*tf;
	sctp_stack_t	*sctps = sctp->sctp_sctps;
	conn_t		*connp = sctp->sctp_connp;

	RUN_SCTP(sctp);
	/*
	 * TCP handles listen() increasing the backlog, need to check
	 * if it should be handled here too
	 */
	if (sctp->sctp_state > SCTPS_BOUND ||
	    (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
		WAKE_SCTP(sctp);
		return (EINVAL);
	}

	/* Do an anonymous bind for unbound socket doing listen(). */
	if (sctp->sctp_nsaddrs == 0) {
		struct sockaddr_storage ss;
		int ret;

		bzero(&ss, sizeof (ss));
		ss.ss_family = connp->conn_family;

		WAKE_SCTP(sctp);
		if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss,
		    sizeof (ss))) != 0)
			return (ret);
		RUN_SCTP(sctp)
	}

	/* Cache things in the ixa without any refhold */
	ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
	connp->conn_ixa->ixa_cred = connp->conn_cred;
	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
	if (is_system_labeled())
		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);

	sctp->sctp_state = SCTPS_LISTEN;
	(void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN);
	sctp->sctp_last_secret_update = ddi_get_lbolt64();
	bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN);

	/*
	 * If there is an association limit, allocate and initialize
	 * the counter struct.  Note that since listen can be called
	 * multiple times, the struct may have been allready allocated.
	 */
	if (!list_is_empty(&sctps->sctps_listener_conf) &&
	    sctp->sctp_listen_cnt == NULL) {
		sctp_listen_cnt_t *slc;
		uint32_t ratio;

		ratio = sctp_find_listener_conf(sctps,
		    ntohs(connp->conn_lport));
		if (ratio != 0) {
			uint32_t mem_ratio, tot_buf;

			slc = kmem_alloc(sizeof (sctp_listen_cnt_t), KM_SLEEP);
			/*
			 * Calculate the connection limit based on
			 * the configured ratio and maxusers.  Maxusers
			 * are calculated based on memory size,
			 * ~ 1 user per MB.  Note that the conn_rcvbuf
			 * and conn_sndbuf may change after a
			 * connection is accepted.  So what we have
			 * is only an approximation.
			 */
			if ((tot_buf = connp->conn_rcvbuf +
			    connp->conn_sndbuf) < MB) {
				mem_ratio = MB / tot_buf;
				slc->slc_max = maxusers / ratio * mem_ratio;
			} else {
				mem_ratio = tot_buf / MB;
				slc->slc_max = maxusers / ratio / mem_ratio;
			}
			/* At least we should allow some associations! */
			if (slc->slc_max < sctp_min_assoc_listener)
				slc->slc_max = sctp_min_assoc_listener;
			slc->slc_cnt = 1;
			slc->slc_drop = 0;
			sctp->sctp_listen_cnt = slc;
		}
	}


	tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(
	    ntohs(connp->conn_lport))];
	sctp_listen_hash_insert(tf, sctp);

	WAKE_SCTP(sctp);
	return (0);
}
/*
 * Common accept code.  Called by sctp_conn_request.
 * cr_pkt is the INIT / INIT ACK packet.
 */
static int
sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
    uint_t ip_hdr_len, sctp_init_chunk_t *iack)
{

	sctp_hdr_t		*sctph;
	sctp_chunk_hdr_t	*ich;
	sctp_init_chunk_t	*init;
	int			err;
	uint_t			sctp_options;
	conn_t			*aconnp;
	conn_t			*lconnp;
	sctp_stack_t	*sctps = listener->sctp_sctps;

	sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
	ASSERT(OK_32PTR(sctph));

	aconnp = acceptor->sctp_connp;
	lconnp = listener->sctp_connp;
	aconnp->conn_lport = lconnp->conn_lport;
	aconnp->conn_fport = sctph->sh_sport;

	ich = (sctp_chunk_hdr_t *)(iack + 1);
	init = (sctp_init_chunk_t *)(ich + 1);

	/* acceptor isn't in any fanouts yet, so don't need to hold locks */
	ASSERT(acceptor->sctp_faddrs == NULL);
	err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
	    &sctp_options);
	if (err != 0)
		return (err);

	if ((err = sctp_set_hdraddrs(acceptor)) != 0)
		return (err);

	if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0)
		return (err);

	if ((sctp_options & SCTP_PRSCTP_OPTION) &&
	    listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
		acceptor->sctp_prsctp_aware = B_TRUE;
	} else {
		acceptor->sctp_prsctp_aware = B_FALSE;
	}

	/* Get  initial TSNs */
	acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
	acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
	    acceptor->sctp_ltsn - 1;
	acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
	/* Serial numbers are initialized to the same value as the TSNs */
	acceptor->sctp_lcsn = acceptor->sctp_ltsn;

	if (!sctp_initialize_params(acceptor, init, iack))
		return (ENOMEM);

	/*
	 * Copy sctp_secret from the listener in case we need to validate
	 * a possibly delayed cookie.
	 */
	bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
	bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
	    SCTP_SECRET_LEN);
	acceptor->sctp_last_secret_update = ddi_get_lbolt64();

	/*
	 * After acceptor is inserted in the hash list, it can be found.
	 * So we need to lock it here.
	 */
	RUN_SCTP(acceptor);

	sctp_conn_hash_insert(&sctps->sctps_conn_fanout[
	    SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0);
	sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
	    SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);

	/*
	 * No need to check for multicast destination since ip will only pass
	 * up multicasts to those that have expressed interest
	 * TODO: what about rejecting broadcasts?
	 * Also check that source is not a multicast or broadcast address.
	 */
	/* XXXSCTP */
	acceptor->sctp_state = SCTPS_ESTABLISHED;
	acceptor->sctp_assoc_start_time = (uint32_t)ddi_get_lbolt();
	/*
	 * listener->sctp_rwnd should be the default window size or a
	 * window size changed via SO_RCVBUF option.
	 */
	acceptor->sctp_rwnd = listener->sctp_rwnd;
	acceptor->sctp_irwnd = acceptor->sctp_rwnd;
	acceptor->sctp_pd_point = acceptor->sctp_rwnd;
	acceptor->sctp_upcalls = listener->sctp_upcalls;

	return (0);
}