/* * Allocate a default DCE and a hash table for per-IP address DCEs */ void dce_stack_init(ip_stack_t *ipst) { int i; ipst->ips_dce_default = kmem_cache_alloc(dce_cache, KM_SLEEP); bzero(ipst->ips_dce_default, sizeof (dce_t)); ipst->ips_dce_default->dce_flags = DCEF_DEFAULT; ipst->ips_dce_default->dce_generation = DCE_GENERATION_INITIAL; ipst->ips_dce_default->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); ipst->ips_dce_default->dce_refcnt = 1; /* Should never go away */ ipst->ips_dce_default->dce_ipst = ipst; /* This must be a power of two since we are using IRE_ADDR_HASH macro */ ipst->ips_dce_hashsize = 256; ipst->ips_dce_hash_v4 = kmem_zalloc(ipst->ips_dce_hashsize * sizeof (dcb_t), KM_SLEEP); ipst->ips_dce_hash_v6 = kmem_zalloc(ipst->ips_dce_hashsize * sizeof (dcb_t), KM_SLEEP); for (i = 0; i < ipst->ips_dce_hashsize; i++) { rw_init(&ipst->ips_dce_hash_v4[i].dcb_lock, NULL, RW_DEFAULT, NULL); rw_init(&ipst->ips_dce_hash_v6[i].dcb_lock, NULL, RW_DEFAULT, NULL); } }
void sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp) { int64_t now = ddi_get_lbolt64(); fp->strikes = 0; sctp->sctp_strikes = 0; fp->lastactive = now; fp->hb_expiry = now + SET_HB_INTVL(fp); fp->hb_pending = B_FALSE; if (fp->state != SCTP_FADDRS_ALIVE) { fp->state = SCTP_FADDRS_ALIVE; sctp_intf_event(sctp, fp->faddr, SCTP_ADDR_AVAILABLE, 0); /* Should have a full IRE now */ sctp_get_dest(sctp, fp); /* * If this is the primary, switch back to it now. And * we probably want to reset the source addr used to reach * it. * Note that if we didn't find a source in sctp_get_dest * then we'd be unreachable at this point in time. */ if (fp == sctp->sctp_primary && fp->state != SCTP_FADDRS_UNREACH) { sctp_set_faddr_current(sctp, fp); return; } } }
void spa_handle_ignored_writes(spa_t *spa) { inject_handler_t *handler; if (zio_injection_enabled == 0) return; rw_enter(&inject_lock, RW_READER); for (handler = list_head(&inject_handlers); handler != NULL; handler = list_next(&inject_handlers, handler)) { if (spa != handler->zi_spa || handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) continue; if (handler->zi_record.zi_duration > 0) { VERIFY(handler->zi_record.zi_timer == 0 || handler->zi_record.zi_timer + handler->zi_record.zi_duration * hz > ddi_get_lbolt64()); } else { /* duration is negative so the subtraction here adds */ VERIFY(handler->zi_record.zi_timer == 0 || handler->zi_record.zi_timer - handler->zi_record.zi_duration >= spa_syncing_txg(spa)); } } rw_exit(&inject_lock); }
/* * Simulate hardware that ignores cache flushes. For requested number * of seconds nix the actual writing to disk. */ void zio_handle_ignored_writes(zio_t *zio) { inject_handler_t *handler; rw_enter(&inject_lock, RW_READER); for (handler = list_head(&inject_handlers); handler != NULL; handler = list_next(&inject_handlers, handler)) { /* Ignore errors not destined for this pool */ if (zio->io_spa != handler->zi_spa || handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES) continue; /* * Positive duration implies # of seconds, negative * a number of txgs */ if (handler->zi_record.zi_timer == 0) { if (handler->zi_record.zi_duration > 0) handler->zi_record.zi_timer = ddi_get_lbolt64(); else handler->zi_record.zi_timer = zio->io_txg; } /* Have a "problem" writing 60% of the time */ if (spa_get_random(100) < 60) zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; break; } rw_exit(&inject_lock); }
/* * Atomically looks for a non-default DCE, and if not found tries to create one. * If there is no memory it returns NULL. * When an entry is created we increase the generation number on * the default DCE so that conn_ip_output will detect there is a new DCE. * ifindex should only be used with link-local addresses. */ dce_t * dce_lookup_and_add_v6(const in6_addr_t *dst, uint_t ifindex, ip_stack_t *ipst) { uint_t hash; dcb_t *dcb; dce_t *dce; /* We should not create entries for link-locals w/o an ifindex */ ASSERT(!(IN6_IS_ADDR_LINKSCOPE(dst)) || ifindex != 0); hash = IRE_ADDR_HASH_V6(*dst, ipst->ips_dce_hashsize); dcb = &ipst->ips_dce_hash_v6[hash]; rw_enter(&dcb->dcb_lock, RW_WRITER); for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { if (IN6_ARE_ADDR_EQUAL(&dce->dce_v6addr, dst) && dce->dce_ifindex == ifindex) { mutex_enter(&dce->dce_lock); if (!DCE_IS_CONDEMNED(dce)) { dce_refhold(dce); mutex_exit(&dce->dce_lock); rw_exit(&dcb->dcb_lock); return (dce); } mutex_exit(&dce->dce_lock); } } dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP); if (dce == NULL) { rw_exit(&dcb->dcb_lock); return (NULL); } bzero(dce, sizeof (dce_t)); dce->dce_ipst = ipst; /* No netstack_hold */ dce->dce_v6addr = *dst; dce->dce_ifindex = ifindex; dce->dce_generation = DCE_GENERATION_INITIAL; dce->dce_ipversion = IPV6_VERSION; dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); dce_refhold(dce); /* For the hash list */ /* Link into list */ if (dcb->dcb_dce != NULL) dcb->dcb_dce->dce_ptpn = &dce->dce_next; dce->dce_next = dcb->dcb_dce; dce->dce_ptpn = &dcb->dcb_dce; dcb->dcb_dce = dce; dce->dce_bucket = dcb; atomic_add_32(&dcb->dcb_cnt, 1); dce_refhold(dce); /* For the caller */ rw_exit(&dcb->dcb_lock); /* Initialize dce_ident to be different than for the last packet */ dce->dce_ident = ipst->ips_dce_default->dce_ident + 1; dce_increment_generation(ipst->ips_dce_default); return (dce); }
static void sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr, mblk_t *timer_mp) { sctp_stack_t *sctps = sctp->sctp_sctps; ASSERT(fp->ixa != NULL); bcopy(addr, &fp->faddr, sizeof (*addr)); if (IN6_IS_ADDR_V4MAPPED(addr)) { fp->isv4 = 1; /* Make sure that sfa_pmss is a multiple of SCTP_ALIGN. */ fp->sfa_pmss = (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) & ~(SCTP_ALIGN - 1); fp->ixa->ixa_flags |= IXAF_IS_IPV4; } else { fp->isv4 = 0; fp->sfa_pmss = (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) & ~(SCTP_ALIGN - 1); fp->ixa->ixa_flags &= ~IXAF_IS_IPV4; } fp->cwnd = sctps->sctps_slow_start_initial * fp->sfa_pmss; fp->rto = MIN(sctp->sctp_rto_initial, sctp->sctp_init_rto_max); SCTP_MAX_RTO(sctp, fp); fp->srtt = -1; fp->rtt_updates = 0; fp->strikes = 0; fp->max_retr = sctp->sctp_pp_max_rxt; /* Mark it as not confirmed. */ fp->state = SCTP_FADDRS_UNCONFIRMED; fp->hb_interval = sctp->sctp_hb_interval; fp->ssthresh = sctps->sctps_initial_ssthresh; fp->suna = 0; fp->pba = 0; fp->acked = 0; fp->lastactive = fp->hb_expiry = ddi_get_lbolt64(); fp->timer_mp = timer_mp; fp->hb_pending = B_FALSE; fp->hb_enabled = B_TRUE; fp->df = 1; fp->pmtu_discovered = 0; fp->next = NULL; fp->T3expire = 0; (void) random_get_pseudo_bytes((uint8_t *)&fp->hb_secret, sizeof (fp->hb_secret)); fp->rxt_unacked = 0; sctp_get_dest(sctp, fp); }
/* * Atomically looks for a non-default DCE, and if not found tries to create one. * If there is no memory it returns NULL. * When an entry is created we increase the generation number on * the default DCE so that conn_ip_output will detect there is a new DCE. */ dce_t * dce_lookup_and_add_v4(ipaddr_t dst, ip_stack_t *ipst) { uint_t hash; dcb_t *dcb; dce_t *dce; hash = IRE_ADDR_HASH(dst, ipst->ips_dce_hashsize); dcb = &ipst->ips_dce_hash_v4[hash]; rw_enter(&dcb->dcb_lock, RW_WRITER); for (dce = dcb->dcb_dce; dce != NULL; dce = dce->dce_next) { if (dce->dce_v4addr == dst) { mutex_enter(&dce->dce_lock); if (!DCE_IS_CONDEMNED(dce)) { dce_refhold(dce); mutex_exit(&dce->dce_lock); rw_exit(&dcb->dcb_lock); return (dce); } mutex_exit(&dce->dce_lock); } } dce = kmem_cache_alloc(dce_cache, KM_NOSLEEP); if (dce == NULL) { rw_exit(&dcb->dcb_lock); return (NULL); } bzero(dce, sizeof (dce_t)); dce->dce_ipst = ipst; /* No netstack_hold */ dce->dce_v4addr = dst; dce->dce_generation = DCE_GENERATION_INITIAL; dce->dce_ipversion = IPV4_VERSION; dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); dce_refhold(dce); /* For the hash list */ /* Link into list */ if (dcb->dcb_dce != NULL) dcb->dcb_dce->dce_ptpn = &dce->dce_next; dce->dce_next = dcb->dcb_dce; dce->dce_ptpn = &dcb->dcb_dce; dcb->dcb_dce = dce; dce->dce_bucket = dcb; dce_refhold(dce); /* For the caller */ rw_exit(&dcb->dcb_lock); /* Initialize dce_ident to be different than for the last packet */ dce->dce_ident = ipst->ips_dce_default->dce_ident + 1; dce_increment_generation(ipst->ips_dce_default); return (dce); }
/* * Reclaim a fraction of dce's in the dcb. * For now we have a higher probability to delete DCEs without DCE_PMTU. */ static void dcb_reclaim(dcb_t *dcb, ip_stack_t *ipst, uint_t fraction) { uint_t fraction_pmtu = fraction*4; uint_t hash; dce_t *dce, *nextdce; rw_enter(&dcb->dcb_lock, RW_WRITER); for (dce = dcb->dcb_dce; dce != NULL; dce = nextdce) { nextdce = dce->dce_next; /* Clear DCEF_PMTU if the pmtu is too old */ mutex_enter(&dce->dce_lock); if ((dce->dce_flags & DCEF_PMTU) && TICK_TO_SEC(ddi_get_lbolt64()) - dce->dce_last_change_time > ipst->ips_ip_pathmtu_interval) { dce->dce_flags &= ~DCEF_PMTU; mutex_exit(&dce->dce_lock); dce_increment_generation(dce); } else { mutex_exit(&dce->dce_lock); } hash = RANDOM_HASH((uint64_t)(uintptr_t)dce); if (dce->dce_flags & DCEF_PMTU) { if (hash % fraction_pmtu != 0) continue; } else { if (hash % fraction != 0) continue; } IP_STAT(ipst, ip_dce_reclaim_deleted); dce_delete_locked(dcb, dce); dce_refrele(dce); } rw_exit(&dcb->dcb_lock); }
static uint_t ntwdt_cyclic_softint(caddr_t arg) { /*LINTED E_BAD_PTR_CAST_ALIGN*/ ntwdt_state_t *ntwdt_ptr = (ntwdt_state_t *)arg; ntwdt_runstate_t *ntwdt_state; ntwdt_state = ntwdt_ptr->ntwdt_run_state; mutex_enter(&ntwdt_state->ntwdt_runstate_mutex); if ((ntwdt_state->ntwdt_watchdog_flags & NTWDT_FLAG_SKIP_CYCLIC) != 0) { ntwdt_state->ntwdt_watchdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC; goto end; } if ((ntwdt_state->ntwdt_timer_running == 0) || (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) || (ntwdt_state->ntwdt_watchdog_enabled == 0)) { goto end; } NTWDT_DBG(NTWDT_DBG_IOCTL, ("cyclic_softint: %d" "ddi_get_lbolt64(): %d\n", ntwdt_state->ntwdt_watchdog_timeout, (int)TICK_TO_MSEC(ddi_get_lbolt64()))); /* * Decrement the virtual watchdog timer and check if it has expired. */ ntwdt_state->ntwdt_time_remaining -= NTWDT_DECREMENT_INTERVAL; if (ntwdt_state->ntwdt_time_remaining == 0) { cmn_err(CE_WARN, "application-watchdog expired"); ntwdt_state->ntwdt_watchdog_expired = 1; if (ntwdt_state->ntwdt_reset_enabled != 0) { /* * The user wants to reset the system. */ mutex_exit(&ntwdt_state->ntwdt_runstate_mutex); NTWDT_DBG(NTWDT_DBG_NTWDT, ("recovery being done")); ntwdt_enforce_timeout(); } else { NTWDT_DBG(NTWDT_DBG_NTWDT, ("no recovery being done")); ntwdt_state->ntwdt_watchdog_enabled = 0; } /* * Schedule Callout to stop the cyclic. */ (void) timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0); } else { _NOTE(EMPTY) NTWDT_DBG(NTWDT_DBG_NTWDT, ("time remaining in AWDT: %d secs", (int)TICK_TO_MSEC(ntwdt_state->ntwdt_time_remaining))); } end: mutex_exit(&ntwdt_state->ntwdt_runstate_mutex); return (DDI_INTR_CLAIMED); }
/* ARGSUSED */ void tcp_time_wait_collector(void *arg) { tcp_t *tcp; int64_t now; mblk_t *mp; conn_t *connp; kmutex_t *lock; boolean_t removed; extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t, uint8_t *, in_port_t, uint8_t *, in_port_t, void *); squeue_t *sqp = (squeue_t *)arg; tcp_squeue_priv_t *tcp_time_wait = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); mutex_enter(&tcp_time_wait->tcp_time_wait_lock); tcp_time_wait->tcp_time_wait_tid = 0; #ifdef DEBUG tcp_time_wait->tcp_time_wait_running = B_TRUE; #endif if (tcp_time_wait->tcp_free_list != NULL && tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) { TCP_G_STAT(tcp_freelist_cleanup); while ((tcp = tcp_time_wait->tcp_free_list) != NULL) { tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next; tcp->tcp_time_wait_next = NULL; tcp_time_wait->tcp_free_list_cnt--; ASSERT(tcp->tcp_tcps == NULL); CONN_DEC_REF(tcp->tcp_connp); } ASSERT(tcp_time_wait->tcp_free_list_cnt == 0); } /* * In order to reap time waits reliably, we should use a * source of time that is not adjustable by the user -- hence * the call to ddi_get_lbolt64(). */ now = ddi_get_lbolt64(); while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) { /* * lbolt64 should not wrap around in practice... So we can * do a direct comparison. */ if (now < tcp->tcp_time_wait_expire) break; removed = tcp_time_wait_remove(tcp, tcp_time_wait); ASSERT(removed); connp = tcp->tcp_connp; ASSERT(connp->conn_fanout != NULL); lock = &connp->conn_fanout->connf_lock; /* * This is essentially a TW reclaim fast path optimization for * performance where the timewait collector checks under the * fanout lock (so that no one else can get access to the * conn_t) that the refcnt is 2 i.e. one for TCP and one for * the classifier hash list. If ref count is indeed 2, we can * just remove the conn under the fanout lock and avoid * cleaning up the conn under the squeue, provided that * clustering callbacks are not enabled. If clustering is * enabled, we need to make the clustering callback before * setting the CONDEMNED flag and after dropping all locks and * so we forego this optimization and fall back to the slow * path. Also please see the comments in tcp_closei_local * regarding the refcnt logic. * * Since we are holding the tcp_time_wait_lock, its better * not to block on the fanout_lock because other connections * can't add themselves to time_wait list. So we do a * tryenter instead of mutex_enter. */ if (mutex_tryenter(lock)) { mutex_enter(&connp->conn_lock); if ((connp->conn_ref == 2) && (cl_inet_disconnect == NULL)) { ipcl_hash_remove_locked(connp, connp->conn_fanout); /* * Set the CONDEMNED flag now itself so that * the refcnt cannot increase due to any * walker. */ connp->conn_state_flags |= CONN_CONDEMNED; mutex_exit(lock); mutex_exit(&connp->conn_lock); if (tcp_time_wait->tcp_free_list_cnt < tcp_free_list_max_cnt) { /* Add to head of tcp_free_list */ mutex_exit( &tcp_time_wait->tcp_time_wait_lock); tcp_cleanup(tcp); ASSERT(connp->conn_latch == NULL); ASSERT(connp->conn_policy == NULL); ASSERT(tcp->tcp_tcps == NULL); ASSERT(connp->conn_netstack == NULL); mutex_enter( &tcp_time_wait->tcp_time_wait_lock); tcp->tcp_time_wait_next = tcp_time_wait->tcp_free_list; tcp_time_wait->tcp_free_list = tcp; tcp_time_wait->tcp_free_list_cnt++; continue; } else { /* Do not add to tcp_free_list */ mutex_exit( &tcp_time_wait->tcp_time_wait_lock); tcp_bind_hash_remove(tcp); ixa_cleanup(tcp->tcp_connp->conn_ixa); tcp_ipsec_cleanup(tcp); CONN_DEC_REF(tcp->tcp_connp); } } else { CONN_INC_REF_LOCKED(connp); mutex_exit(lock); mutex_exit(&tcp_time_wait->tcp_time_wait_lock); mutex_exit(&connp->conn_lock); /* * We can reuse the closemp here since conn has * detached (otherwise we wouldn't even be in * time_wait list). tcp_closemp_used can safely * be changed without taking a lock as no other * thread can concurrently access it at this * point in the connection lifecycle. */ if (tcp->tcp_closemp.b_prev == NULL) tcp->tcp_closemp_used = B_TRUE; else cmn_err(CE_PANIC, "tcp_timewait_collector: " "concurrent use of tcp_closemp: " "connp %p tcp %p\n", (void *)connp, (void *)tcp); TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); mp = &tcp->tcp_closemp; SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL, SQ_FILL, SQTAG_TCP_TIMEWAIT); } } else { mutex_enter(&connp->conn_lock); CONN_INC_REF_LOCKED(connp); mutex_exit(&tcp_time_wait->tcp_time_wait_lock); mutex_exit(&connp->conn_lock); /* * We can reuse the closemp here since conn has * detached (otherwise we wouldn't even be in * time_wait list). tcp_closemp_used can safely * be changed without taking a lock as no other * thread can concurrently access it at this * point in the connection lifecycle. */ if (tcp->tcp_closemp.b_prev == NULL) tcp->tcp_closemp_used = B_TRUE; else cmn_err(CE_PANIC, "tcp_timewait_collector: " "concurrent use of tcp_closemp: " "connp %p tcp %p\n", (void *)connp, (void *)tcp); TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); mp = &tcp->tcp_closemp; SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL, SQ_FILL, SQTAG_TCP_TIMEWAIT); } mutex_enter(&tcp_time_wait->tcp_time_wait_lock); } if (tcp_time_wait->tcp_free_list != NULL) tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE; /* * If the time wait list is not empty and there is no timer running, * restart it. */ if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL && tcp_time_wait->tcp_time_wait_tid == 0) { hrtime_t firetime; firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now); /* This ensures that we won't wake up too often. */ firetime = MAX(TCP_TIME_WAIT_DELAY, firetime); tcp_time_wait->tcp_time_wait_tid = timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp, firetime, CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); } #ifdef DEBUG tcp_time_wait->tcp_time_wait_running = B_FALSE; #endif mutex_exit(&tcp_time_wait->tcp_time_wait_lock); }
/* * Add a connection to the list of detached TIME_WAIT connections * and set its time to expire. */ void tcp_time_wait_append(tcp_t *tcp) { tcp_stack_t *tcps = tcp->tcp_tcps; squeue_t *sqp = tcp->tcp_connp->conn_sqp; tcp_squeue_priv_t *tcp_time_wait = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); tcp_timers_stop(tcp); /* Freed above */ ASSERT(tcp->tcp_timer_tid == 0); ASSERT(tcp->tcp_ack_tid == 0); /* must have happened at the time of detaching the tcp */ ASSERT(tcp->tcp_ptpahn == NULL); ASSERT(tcp->tcp_flow_stopped == 0); ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); ASSERT(tcp->tcp_time_wait_expire == 0); ASSERT(tcp->tcp_listener == NULL); tcp->tcp_time_wait_expire = ddi_get_lbolt64(); /* * Since tcp_time_wait_expire is lbolt64, it should not wrap around * in practice. Hence it cannot be 0. Note that zero means that the * tcp_t is not in the TIME_WAIT list. */ tcp->tcp_time_wait_expire += MSEC_TO_TICK( tcps->tcps_time_wait_interval); ASSERT(TCP_IS_DETACHED(tcp)); ASSERT(tcp->tcp_state == TCPS_TIME_WAIT); ASSERT(tcp->tcp_time_wait_next == NULL); ASSERT(tcp->tcp_time_wait_prev == NULL); TCP_DBGSTAT(tcps, tcp_time_wait); mutex_enter(&tcp_time_wait->tcp_time_wait_lock); if (tcp_time_wait->tcp_time_wait_head == NULL) { ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL); tcp_time_wait->tcp_time_wait_head = tcp; /* * Even if the list was empty before, there may be a timer * running since a tcp_t can be removed from the list * in other places, such as tcp_clean_death(). So check if * a timer is needed. */ if (tcp_time_wait->tcp_time_wait_tid == 0) { tcp_time_wait->tcp_time_wait_tid = timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp, (hrtime_t)(tcps->tcps_time_wait_interval + 1) * MICROSEC, CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); } } else { /* * The list is not empty, so a timer must be running. If not, * tcp_time_wait_collector() must be running on this * tcp_time_wait list at the same time. */ ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 || tcp_time_wait->tcp_time_wait_running); ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL); ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state == TCPS_TIME_WAIT); tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = tcp; tcp->tcp_time_wait_prev = tcp_time_wait->tcp_time_wait_tail; } tcp_time_wait->tcp_time_wait_tail = tcp; mutex_exit(&tcp_time_wait->tcp_time_wait_lock); }
/* * As part of file system hardening, this daemon is awakened * every second to flush cached data which includes the * buffer cache, the inode cache and mapped pages. */ void fsflush() { struct buf *bp, *dwp; struct hbuf *hp; int autoup; unsigned int ix, icount, count = 0; callb_cpr_t cprinfo; uint_t bcount; kmutex_t *hmp; struct vfssw *vswp; proc_fsflush = ttoproc(curthread); proc_fsflush->p_cstime = 0; proc_fsflush->p_stime = 0; proc_fsflush->p_cutime = 0; proc_fsflush->p_utime = 0; bcopy("fsflush", curproc->p_user.u_psargs, 8); bcopy("fsflush", curproc->p_user.u_comm, 7); mutex_init(&fsflush_lock, NULL, MUTEX_DEFAULT, NULL); sema_init(&fsflush_sema, 0, NULL, SEMA_DEFAULT, NULL); /* * Setup page coalescing. */ fsf_npgsz = page_num_pagesizes(); ASSERT(fsf_npgsz < MAX_PAGESIZES); for (ix = 0; ix < fsf_npgsz - 1; ++ix) { fsf_pgcnt[ix] = page_get_pagesize(ix + 1) / page_get_pagesize(ix); fsf_mask[ix] = page_get_pagecnt(ix + 1) - 1; } autoup = v.v_autoup * hz; icount = v.v_autoup / tune.t_fsflushr; CALLB_CPR_INIT(&cprinfo, &fsflush_lock, callb_generic_cpr, "fsflush"); loop: sema_v(&fsflush_sema); mutex_enter(&fsflush_lock); CALLB_CPR_SAFE_BEGIN(&cprinfo); cv_wait(&fsflush_cv, &fsflush_lock); /* wait for clock */ CALLB_CPR_SAFE_END(&cprinfo, &fsflush_lock); mutex_exit(&fsflush_lock); sema_p(&fsflush_sema); /* * Write back all old B_DELWRI buffers on the freelist. */ bcount = 0; for (ix = 0; ix < v.v_hbuf; ix++) { hp = &hbuf[ix]; dwp = (struct buf *)&dwbuf[ix]; bcount += (hp->b_length); if (dwp->av_forw == dwp) { continue; } hmp = &hbuf[ix].b_lock; mutex_enter(hmp); bp = dwp->av_forw; /* * Go down only on the delayed write lists. */ while (bp != dwp) { ASSERT(bp->b_flags & B_DELWRI); if ((bp->b_flags & B_DELWRI) && (ddi_get_lbolt() - bp->b_start >= autoup) && sema_tryp(&bp->b_sem)) { bp->b_flags |= B_ASYNC; hp->b_length--; notavail(bp); mutex_exit(hmp); if (bp->b_vp == NULL) { BWRITE(bp); } else { UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); } mutex_enter(hmp); bp = dwp->av_forw; } else { bp = bp->av_forw; } } mutex_exit(hmp); } /* * * There is no need to wakeup any thread waiting on bio_mem_cv * since brelse will wake them up as soon as IO is complete. */ bfreelist.b_bcount = bcount; if (dopageflush) fsflush_do_pages(); if (!doiflush) goto loop; /* * If the system was not booted to single user mode, skip the * inode flushing until after fsflush_iflush_delay secs have elapsed. */ if ((boothowto & RB_SINGLE) == 0 && (ddi_get_lbolt64() / hz) < fsflush_iflush_delay) goto loop; /* * Flush cached attribute information (e.g. inodes). */ if (++count >= icount) { count = 0; /* * Sync back cached data. */ RLOCK_VFSSW(); for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) { if (ALLOCATED_VFSSW(vswp) && VFS_INSTALLED(vswp)) { vfs_refvfssw(vswp); RUNLOCK_VFSSW(); (void) fsop_sync_by_kind(vswp - vfssw, SYNC_ATTR, kcred); vfs_unrefvfssw(vswp); RLOCK_VFSSW(); } } RUNLOCK_VFSSW(); } goto loop; }
int sctp_listen(sctp_t *sctp) { sctp_tf_t *tf; sctp_stack_t *sctps = sctp->sctp_sctps; conn_t *connp = sctp->sctp_connp; RUN_SCTP(sctp); /* * TCP handles listen() increasing the backlog, need to check * if it should be handled here too */ if (sctp->sctp_state > SCTPS_BOUND || (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { WAKE_SCTP(sctp); return (EINVAL); } /* Do an anonymous bind for unbound socket doing listen(). */ if (sctp->sctp_nsaddrs == 0) { struct sockaddr_storage ss; int ret; bzero(&ss, sizeof (ss)); ss.ss_family = connp->conn_family; WAKE_SCTP(sctp); if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss, sizeof (ss))) != 0) return (ret); RUN_SCTP(sctp) } /* Cache things in the ixa without any refhold */ ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); connp->conn_ixa->ixa_cred = connp->conn_cred; connp->conn_ixa->ixa_cpid = connp->conn_cpid; if (is_system_labeled()) connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); sctp->sctp_state = SCTPS_LISTEN; (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN); sctp->sctp_last_secret_update = ddi_get_lbolt64(); bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN); /* * If there is an association limit, allocate and initialize * the counter struct. Note that since listen can be called * multiple times, the struct may have been allready allocated. */ if (!list_is_empty(&sctps->sctps_listener_conf) && sctp->sctp_listen_cnt == NULL) { sctp_listen_cnt_t *slc; uint32_t ratio; ratio = sctp_find_listener_conf(sctps, ntohs(connp->conn_lport)); if (ratio != 0) { uint32_t mem_ratio, tot_buf; slc = kmem_alloc(sizeof (sctp_listen_cnt_t), KM_SLEEP); /* * Calculate the connection limit based on * the configured ratio and maxusers. Maxusers * are calculated based on memory size, * ~ 1 user per MB. Note that the conn_rcvbuf * and conn_sndbuf may change after a * connection is accepted. So what we have * is only an approximation. */ if ((tot_buf = connp->conn_rcvbuf + connp->conn_sndbuf) < MB) { mem_ratio = MB / tot_buf; slc->slc_max = maxusers / ratio * mem_ratio; } else { mem_ratio = tot_buf / MB; slc->slc_max = maxusers / ratio / mem_ratio; } /* At least we should allow some associations! */ if (slc->slc_max < sctp_min_assoc_listener) slc->slc_max = sctp_min_assoc_listener; slc->slc_cnt = 1; slc->slc_drop = 0; sctp->sctp_listen_cnt = slc; } } tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH( ntohs(connp->conn_lport))]; sctp_listen_hash_insert(tf, sctp); WAKE_SCTP(sctp); return (0); }
/* * Common accept code. Called by sctp_conn_request. * cr_pkt is the INIT / INIT ACK packet. */ static int sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, uint_t ip_hdr_len, sctp_init_chunk_t *iack) { sctp_hdr_t *sctph; sctp_chunk_hdr_t *ich; sctp_init_chunk_t *init; int err; uint_t sctp_options; conn_t *aconnp; conn_t *lconnp; sctp_stack_t *sctps = listener->sctp_sctps; sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); ASSERT(OK_32PTR(sctph)); aconnp = acceptor->sctp_connp; lconnp = listener->sctp_connp; aconnp->conn_lport = lconnp->conn_lport; aconnp->conn_fport = sctph->sh_sport; ich = (sctp_chunk_hdr_t *)(iack + 1); init = (sctp_init_chunk_t *)(ich + 1); /* acceptor isn't in any fanouts yet, so don't need to hold locks */ ASSERT(acceptor->sctp_faddrs == NULL); err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, &sctp_options); if (err != 0) return (err); if ((err = sctp_set_hdraddrs(acceptor)) != 0) return (err); if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0) return (err); if ((sctp_options & SCTP_PRSCTP_OPTION) && listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) { acceptor->sctp_prsctp_aware = B_TRUE; } else { acceptor->sctp_prsctp_aware = B_FALSE; } /* Get initial TSNs */ acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = acceptor->sctp_ltsn - 1; acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; /* Serial numbers are initialized to the same value as the TSNs */ acceptor->sctp_lcsn = acceptor->sctp_ltsn; if (!sctp_initialize_params(acceptor, init, iack)) return (ENOMEM); /* * Copy sctp_secret from the listener in case we need to validate * a possibly delayed cookie. */ bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, SCTP_SECRET_LEN); acceptor->sctp_last_secret_update = ddi_get_lbolt64(); /* * After acceptor is inserted in the hash list, it can be found. * So we need to lock it here. */ RUN_SCTP(acceptor); sctp_conn_hash_insert(&sctps->sctps_conn_fanout[ SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0); sctp_bind_hash_insert(&sctps->sctps_bind_fanout[ SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0); /* * No need to check for multicast destination since ip will only pass * up multicasts to those that have expressed interest * TODO: what about rejecting broadcasts? * Also check that source is not a multicast or broadcast address. */ /* XXXSCTP */ acceptor->sctp_state = SCTPS_ESTABLISHED; acceptor->sctp_assoc_start_time = (uint32_t)ddi_get_lbolt(); /* * listener->sctp_rwnd should be the default window size or a * window size changed via SO_RCVBUF option. */ acceptor->sctp_rwnd = listener->sctp_rwnd; acceptor->sctp_irwnd = acceptor->sctp_rwnd; acceptor->sctp_pd_point = acceptor->sctp_rwnd; acceptor->sctp_upcalls = listener->sctp_upcalls; return (0); }