/* * Initialize expirations and counters based on lifetime payload. */ void import_lifetime(struct tdb *tdb, struct sadb_lifetime *sadb_lifetime, int type) { struct timeval tv; if (!sadb_lifetime) return; getmicrotime(&tv); switch (type) { case PFKEYV2_LIFETIME_HARD: if ((tdb->tdb_exp_allocations = sadb_lifetime->sadb_lifetime_allocations) != 0) tdb->tdb_flags |= TDBF_ALLOCATIONS; else tdb->tdb_flags &= ~TDBF_ALLOCATIONS; if ((tdb->tdb_exp_bytes = sadb_lifetime->sadb_lifetime_bytes) != 0) tdb->tdb_flags |= TDBF_BYTES; else tdb->tdb_flags &= ~TDBF_BYTES; if ((tdb->tdb_exp_timeout = sadb_lifetime->sadb_lifetime_addtime) != 0) { tdb->tdb_flags |= TDBF_TIMER; if (tv.tv_sec + tdb->tdb_exp_timeout < tv.tv_sec) tv.tv_sec = ((unsigned long) -1) / 2; /* XXX */ else tv.tv_sec += tdb->tdb_exp_timeout; timeout_add(&tdb->tdb_timer_tmo, hzto(&tv)); } else tdb->tdb_flags &= ~TDBF_TIMER; if ((tdb->tdb_exp_first_use = sadb_lifetime->sadb_lifetime_usetime) != 0) tdb->tdb_flags |= TDBF_FIRSTUSE; else tdb->tdb_flags &= ~TDBF_FIRSTUSE; break; case PFKEYV2_LIFETIME_SOFT: if ((tdb->tdb_soft_allocations = sadb_lifetime->sadb_lifetime_allocations) != 0) tdb->tdb_flags |= TDBF_SOFT_ALLOCATIONS; else tdb->tdb_flags &= ~TDBF_SOFT_ALLOCATIONS; if ((tdb->tdb_soft_bytes = sadb_lifetime->sadb_lifetime_bytes) != 0) tdb->tdb_flags |= TDBF_SOFT_BYTES; else tdb->tdb_flags &= ~TDBF_SOFT_BYTES; if ((tdb->tdb_soft_timeout = sadb_lifetime->sadb_lifetime_addtime) != 0) { tdb->tdb_flags |= TDBF_SOFT_TIMER; if (tv.tv_sec + tdb->tdb_soft_timeout < tv.tv_sec) tv.tv_sec = ((unsigned long) -1) / 2; /* XXX */ else tv.tv_sec += tdb->tdb_soft_timeout; timeout_add(&tdb->tdb_stimer_tmo, hzto(&tv)); } else tdb->tdb_flags &= ~TDBF_SOFT_TIMER; if ((tdb->tdb_soft_first_use = sadb_lifetime->sadb_lifetime_usetime) != 0) tdb->tdb_flags |= TDBF_SOFT_FIRSTUSE; else tdb->tdb_flags &= ~TDBF_SOFT_FIRSTUSE; break; case PFKEYV2_LIFETIME_CURRENT: /* Nothing fancy here. */ tdb->tdb_cur_allocations = sadb_lifetime->sadb_lifetime_allocations; tdb->tdb_cur_bytes = sadb_lifetime->sadb_lifetime_bytes; tdb->tdb_established = sadb_lifetime->sadb_lifetime_addtime; tdb->tdb_first_use = sadb_lifetime->sadb_lifetime_usetime; } }
/* * Write out process accounting information, on process exit. * Data to be written out is specified in Leffler, et al. * and are enumerated below. (They're also noted in the system * "acct.h" header file.) */ int acct_process(struct proc *p) { struct acct acct; struct rusage *r; struct timeval ut, st, tmp; int t; struct vnode *vp; struct plimit *oplim = NULL; int error; /* If accounting isn't enabled, don't bother */ vp = acctp; if (vp == NULL) return (0); /* * Raise the file limit so that accounting can't be stopped by the * user. (XXX - we should think about the cpu limit too). */ if (p->p_p->ps_limit->p_refcnt > 1) { oplim = p->p_p->ps_limit; p->p_p->ps_limit = limcopy(p->p_p->ps_limit); } p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; /* * Get process accounting information. */ /* (1) The name of the command that ran */ bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm); /* (2) The amount of user and system time that was used */ calcru(p, &ut, &st, NULL); acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec); acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec); /* (3) The elapsed time the command ran (and its starting time) */ acct.ac_btime = p->p_stats->p_start.tv_sec; getmicrotime(&tmp); timersub(&tmp, &p->p_stats->p_start, &tmp); acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec); /* (4) The average amount of memory used */ r = &p->p_stats->p_ru; timeradd(&ut, &st, &tmp); t = tmp.tv_sec * hz + tmp.tv_usec / tick; if (t) acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; else acct.ac_mem = 0; /* (5) The number of disk I/O operations done */ acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); /* (6) The UID and GID of the process */ acct.ac_uid = p->p_cred->p_ruid; acct.ac_gid = p->p_cred->p_rgid; /* (7) The terminal from which the process was started */ if ((p->p_p->ps_flags & PS_CONTROLT) && p->p_p->ps_pgrp->pg_session->s_ttyp) acct.ac_tty = p->p_p->ps_pgrp->pg_session->s_ttyp->t_dev; else acct.ac_tty = NODEV; /* (8) The boolean flags that tell how the process terminated, etc. */ acct.ac_flag = p->p_acflag; /* * Now, just write the accounting information to the file. */ error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct), (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred, NULL, p); if (oplim) { limfree(p->p_p->ps_limit); p->p_p->ps_limit = oplim; } return error; }
/* * Queue a packet. Start transmission if not active. * Packet is placed in Information field of PPP frame. * Called at splnet as the if->if_output handler. * Called at splnet from pppwrite(). */ static int pppoutput_serialized(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst, struct rtentry *rtp) { struct ppp_softc *sc = &ppp_softc[ifp->if_dunit]; int protocol, address, control; u_char *cp; int error; #ifdef INET struct ip *ip; #endif struct ifqueue *ifq; enum NPmode mode; int len; struct mbuf *m; struct altq_pktattr pktattr; if (sc->sc_devp == NULL || (ifp->if_flags & IFF_RUNNING) == 0 || ((ifp->if_flags & IFF_UP) == 0 && dst->sa_family != AF_UNSPEC)) { error = ENETDOWN; /* sort of */ goto bad; } ifq_classify(&ifp->if_snd, m0, dst->sa_family, &pktattr); /* * Compute PPP header. */ m0->m_flags &= ~M_HIGHPRI; switch (dst->sa_family) { #ifdef INET case AF_INET: address = PPP_ALLSTATIONS; control = PPP_UI; protocol = PPP_IP; mode = sc->sc_npmode[NP_IP]; /* * If this packet has the "low delay" bit set in the IP header, * put it on the fastq instead. */ ip = mtod(m0, struct ip *); if (ip->ip_tos & IPTOS_LOWDELAY) m0->m_flags |= M_HIGHPRI; break; #endif #ifdef IPX case AF_IPX: /* * This is pretty bogus.. We dont have an ipxcp module in pppd * yet to configure the link parameters. Sigh. I guess a * manual ifconfig would do.... -Peter */ address = PPP_ALLSTATIONS; control = PPP_UI; protocol = PPP_IPX; mode = NPMODE_PASS; break; #endif case AF_UNSPEC: address = PPP_ADDRESS(dst->sa_data); control = PPP_CONTROL(dst->sa_data); protocol = PPP_PROTOCOL(dst->sa_data); mode = NPMODE_PASS; break; default: kprintf("%s: af%d not supported\n", ifp->if_xname, dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Drop this packet, or return an error, if necessary. */ if (mode == NPMODE_ERROR) { error = ENETDOWN; goto bad; } if (mode == NPMODE_DROP) { error = 0; goto bad; } /* * Add PPP header. If no space in first mbuf, allocate another. * (This assumes M_LEADINGSPACE is always 0 for a cluster mbuf.) */ if (M_LEADINGSPACE(m0) < PPP_HDRLEN) { m0 = m_prepend(m0, PPP_HDRLEN, MB_DONTWAIT); if (m0 == NULL) { error = ENOBUFS; goto bad; } m0->m_len = 0; } else m0->m_data -= PPP_HDRLEN; cp = mtod(m0, u_char *); *cp++ = address; *cp++ = control; *cp++ = protocol >> 8; *cp++ = protocol & 0xff; m0->m_len += PPP_HDRLEN; len = 0; for (m = m0; m != NULL; m = m->m_next) len += m->m_len; if (sc->sc_flags & SC_LOG_OUTPKT) { kprintf("%s output: ", ifp->if_xname); pppdumpm(m0); } if ((protocol & 0x8000) == 0) { #ifdef PPP_FILTER /* * Apply the pass and active filters to the packet, * but only if it is a data packet. */ *mtod(m0, u_char *) = 1; /* indicates outbound */ if (sc->sc_pass_filt.bf_insns != 0 && bpf_filter(sc->sc_pass_filt.bf_insns, (u_char *) m0, len, 0) == 0) { error = 0; /* drop this packet */ goto bad; } /* * Update the time we sent the most recent packet. */ if (sc->sc_active_filt.bf_insns == 0 || bpf_filter(sc->sc_active_filt.bf_insns, (u_char *) m0, len, 0)) sc->sc_last_sent = time_second; *mtod(m0, u_char *) = address; #else /* * Update the time we sent the most recent data packet. */ sc->sc_last_sent = time_second; #endif /* PPP_FILTER */ } BPF_MTAP(ifp, m0); /* * Put the packet on the appropriate queue. */ crit_enter(); if (mode == NPMODE_QUEUE) { /* XXX we should limit the number of packets on this queue */ *sc->sc_npqtail = m0; m0->m_nextpkt = NULL; sc->sc_npqtail = &m0->m_nextpkt; } else { /* fastq and if_snd are emptied at spl[soft]net now */ if ((m0->m_flags & M_HIGHPRI) && !ifq_is_enabled(&sc->sc_if.if_snd)) { ifq = &sc->sc_fastq; if (IF_QFULL(ifq) && dst->sa_family != AF_UNSPEC) { IF_DROP(ifq); m_freem(m0); error = ENOBUFS; } else { IF_ENQUEUE(ifq, m0); error = 0; } } else { ASSERT_IFNET_SERIALIZED_TX(&sc->sc_if); error = ifq_enqueue(&sc->sc_if.if_snd, m0, &pktattr); } if (error) { crit_exit(); sc->sc_if.if_oerrors++; sc->sc_stats.ppp_oerrors++; return (error); } (*sc->sc_start)(sc); } getmicrotime(&ifp->if_lastchange); ifp->if_opackets++; ifp->if_obytes += len; crit_exit(); return (0); bad: m_freem(m0); return (error); }
/* * we're emulating a mousesystems serial mouse here.. */ void msintr(void *arg) { static const char to_one[] = { 1, 2, 2, 4, 4, 4, 4 }; static const int to_id[] = { MS_RIGHT, MS_MIDDLE, 0, MS_LEFT }; struct ms_port *ms = arg; struct firm_event *fe; int mb, ub, d, get, put, any, port; u_char pra, *horc, *verc; u_short pot, count; short dx, dy; port = ms->ms_portno; horc = ((u_char *) &count) + 1; verc = (u_char *) &count; /* * first read the three buttons. */ pot = custom.potgor; pra = ciaa.pra; pot >>= port == 0 ? 8 : 12; /* contains right and middle button */ pra >>= port == 0 ? 6 : 7; /* contains left button */ mb = (pot & 4) / 4 + (pot & 1) * 2 + (pra & 1) * 4; mb ^= 0x07; /* * read current values of counter registers */ if (port == 0) count = custom.joy0dat; else count = custom.joy1dat; /* * take care of wraparound */ dx = *horc - ms->ms_horc; if (dx < -127) dx += 255; else if (dx > 127) dx -= 255; dy = *verc - ms->ms_verc; if (dy < -127) dy += 255; else if (dy > 127) dy -= 255; /* * remember current values for next scan */ ms->ms_horc = *horc; ms->ms_verc = *verc; ms->ms_dx = dx; ms->ms_dy = dy; ms->ms_mb = mb; #if NWSMOUSE > 0 /* * If we have attached wsmouse and we are not opened * directly then pass events to wscons. */ if (ms->ms_wsmousedev && ms->ms_wsenabled) { int buttons = 0; if (mb & 4) buttons |= 1; if (mb & 2) buttons |= 2; if (mb & 1) buttons |= 4; wsmouse_input(ms->ms_wsmousedev, buttons, dx, -dy, 0, 0, WSMOUSE_INPUT_DELTA); } else #endif if (dx || dy || ms->ms_ub != ms->ms_mb) { /* * We have at least one event (mouse button, delta-X, or * delta-Y; possibly all three, and possibly three separate * button events). Deliver these events until we are out of * changes or out of room. As events get delivered, mark them * `unchanged'. */ any = 0; get = ms->ms_events.ev_get; put = ms->ms_events.ev_put; fe = &ms->ms_events.ev_q[put]; mb = ms->ms_mb; ub = ms->ms_ub; while ((d = mb ^ ub) != 0) { /* * Mouse button change. Convert up to three changes * to the `first' change, and drop it into the event * queue. */ if ((++put) % EV_QSIZE == get) { put--; goto out; } d = to_one[d - 1]; /* from 1..7 to {1,2,4} */ fe->id = to_id[d - 1]; /* from {1,2,4} to ID */ fe->value = mb & d ? VKEY_DOWN : VKEY_UP; getmicrotime(&fe->time); fe++; if (put >= EV_QSIZE) { put = 0; fe = &ms->ms_events.ev_q[0]; } any = 1; ub ^= d; } if (ms->ms_dx) { if ((++put) % EV_QSIZE == get) { put--; goto out; } fe->id = LOC_X_DELTA; fe->value = ms->ms_dx; getmicrotime(&fe->time); fe++; if (put >= EV_QSIZE) { put = 0; fe = &ms->ms_events.ev_q[0]; } any = 1; ms->ms_dx = 0; } if (ms->ms_dy) { if ((++put) % EV_QSIZE == get) { put--; goto out; } fe->id = LOC_Y_DELTA; fe->value = ms->ms_dy; getmicrotime(&fe->time); fe++; if (put >= EV_QSIZE) { put = 0; fe = &ms->ms_events.ev_q[0]; } any = 1; ms->ms_dy = 0; } out: if (any) { ms->ms_ub = ub; ms->ms_events.ev_put = put; EV_WAKEUP(&ms->ms_events); } } /* * reschedule handler, or if terminating, * handshake with ms_disable */ if (ms->ms_ready) callout_reset(&ms->ms_intr_ch, 2, msintr, ms); else wakeup(ms); }
/* * FDDI output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ static int fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int16_t type; int loop_copy = 0, error = 0, hdrcmplt = 0; u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN]; struct fddi_header *fh; #if defined(INET) || defined(INET6) int is_gw = 0; #endif #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); getmicrotime(&ifp->if_lastchange); #if defined(INET) || defined(INET6) if (ro != NULL) is_gw = (ro->ro_flags & RT_HAS_GW) != 0; #endif switch (dst->sa_family) { #ifdef INET case AF_INET: { error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; } case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = -1; /* if this is for us, don't do it */ switch (ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN); else bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IPV6); break; #endif /* INET6 */ case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; eh = (const struct ether_header *)dst->sa_data; bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN); /* FALLTHROUGH */ } case AF_UNSPEC: { const struct ether_header *eh; loop_copy = -1; eh = (const struct ether_header *)dst->sa_data; bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN); if (*edst & 1) m->m_flags |= (M_BCAST|M_MCAST); type = eh->ether_type; break; } case AF_IMPLINK: { fh = mtod(m, struct fddi_header *); error = EPROTONOSUPPORT; switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) { case FDDIFC_LLC_ASYNC: { /* legal priorities are 0 through 7 */ if ((fh->fddi_fc & FDDIFC_Z) > 7) goto bad; break; } case FDDIFC_LLC_SYNC: { /* FDDIFC_Z bits reserved, must be zero */ if (fh->fddi_fc & FDDIFC_Z) goto bad; break; } case FDDIFC_SMT: { /* FDDIFC_Z bits must be non zero */ if ((fh->fddi_fc & FDDIFC_Z) == 0) goto bad; break; } default: { /* anything else is too dangerous */ goto bad; } } error = 0; if (fh->fddi_dhost[0] & 1) m->m_flags |= (M_BCAST|M_MCAST); goto queue_it; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } /* * Add LLC header. */ if (type != 0) { struct llc *l; M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); l = mtod(m, struct llc *); l->llc_control = LLC_UI; l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP; l->llc_snap.org_code[0] = l->llc_snap.org_code[1] = l->llc_snap.org_code[2] = 0; l->llc_snap.ether_type = htons(type); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); fh = mtod(m, struct fddi_header *); fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4; bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN); queue_it: if (hdrcmplt) bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN); else bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, FDDI_HDR_LEN); } else if (bcmp(fh->fddi_dhost, fh->fddi_shost, FDDI_ADDR_LEN) == 0) { (void) if_simloop(ifp, m, dst->sa_family, FDDI_HDR_LEN); return (0); /* XXX */ } } error = (ifp->if_transmit)(ifp, m); if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); bad: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (m) m_freem(m); return (error); }
/* * bfq_queue(): .queue callback of the bfq policy. * * A thread calls this function to hand in its I/O requests (bio). * Their bios are stored in the per-thread queue, in tdio structure. * Currently, the sync/async bios are queued together, which may cause * some issues on performance. * * Besides queueing bios, this function also calculates the average * thinking time and average seek distance of a thread, using the * information in bio structure. * * If the calling thread is waiting by the bfq scheduler due to * the AS feature, this function will cancel the callout alarm * and resume the scheduler to continue serving this thread. * * lock: * THREAD_IO_LOCK: protect from queue iteration in bfq_dequeue() * BFQ_LOCK: protect from other insertions/deletions in wf2q_augtree * in bfq_queue() or bfq_dequeue(). * * refcount: * If the calling thread is waited by the scheduler, the refcount * of the related tdio will decrease by 1 after this function. The * counterpart increasing is in bfq_dequeue(), before resetting the * callout alarm. * * Return value: * EINVAL: if bio->bio_buf->b_cmd == BUF_CMD_FLUSH * 0: bio is queued successfully. */ static int bfq_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, struct bio *bio) { struct bfq_disk_ctx *bfq_diskctx = (struct bfq_disk_ctx *)diskctx; struct bfq_thread_io *bfq_tdio = (struct bfq_thread_io *)tdio; int original_qlength; /* we do not handle flush requests. push it down to dsched */ if (__predict_false(bio->bio_buf->b_cmd == BUF_CMD_FLUSH)) return (EINVAL); DSCHED_THREAD_IO_LOCK(tdio); KKASSERT(tdio->debug_priv == 0xF00FF00F); dsched_debug(BFQ_DEBUG_NORMAL, "bfq: tdio %p pushes bio %p\n", bfq_tdio, bio); dsched_set_bio_priv(bio, tdio); dsched_thread_io_ref(tdio); if ((bio->bio_buf->b_cmd == BUF_CMD_READ) || (bio->bio_buf->b_cmd == BUF_CMD_WRITE)) { bfq_update_tdio_seek_avg(bfq_tdio, bio); } bfq_update_tdio_ttime_avg(bfq_tdio); /* update last_bio_pushed_time */ getmicrotime(&bfq_tdio->last_bio_pushed_time); if ((bfq_tdio->seek_samples > BFQ_VALID_MIN_SAMPLES) && BFQ_TDIO_SEEKY(bfq_tdio)) dsched_debug(BFQ_DEBUG_NORMAL, "BFQ: tdio %p is seeky\n", bfq_tdio); /* * If a tdio taks too long to think, we disable the AS feature of it. */ if ((bfq_tdio->ttime_samples > BFQ_VALID_MIN_SAMPLES) && (bfq_tdio->ttime_avg > BFQ_T_WAIT * (1000 / hz) * 1000) && (bfq_tdio->service_received > bfq_tdio->budget / 8)) { dsched_debug(BFQ_DEBUG_NORMAL, "BFQ: tdio %p takes too long time to think\n", bfq_tdio); bfq_tdio->tdio_as_switch = 0; } else { bfq_tdio->tdio_as_switch = 1; } /* insert the bio into the tdio's own queue */ KKASSERT(lockstatus(&tdio->lock, curthread) == LK_EXCLUSIVE); TAILQ_INSERT_TAIL(&tdio->queue, bio, link); #if 0 tdio->qlength++; #endif original_qlength = atomic_fetchadd_int(&tdio->qlength, 1); DSCHED_THREAD_IO_UNLOCK(tdio); /* * A new thread: * In dequeue function, we remove the thread * from the aug-tree if it has no further bios. * Therefore "new" means a really new thread (a * newly created thread or a thread that pushed no more * bios when the scheduler was waiting for it) or * one that was removed from the aug-tree earlier. */ if (original_qlength == 0) { /* * a really new thread */ BFQ_LOCK(bfq_diskctx); if (bfq_tdio != bfq_diskctx->bfq_active_tdio) { /* insert the tdio into the wf2q queue */ wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, bfq_tdio); } else { /* * the thread being waited by the scheduler */ if (bfq_diskctx->bfq_blockon == bfq_tdio) { /* * XXX: possible race condition here: * if the callout function is triggered when * the following code is executed, then after * releasing the TDIO lock, the callout function * will set the thread inactive and it will never * be inserted into the aug-tree (so its bio pushed * this time will not be dispatched) until it pushes * further bios */ bfq_diskctx->bfq_as_hit++; bfq_update_as_avg_wait(bfq_diskctx, bfq_tdio, BFQ_AS_STAT_ALL); if (callout_pending(&bfq_diskctx->bfq_callout)) callout_stop(&bfq_diskctx->bfq_callout); bfq_diskctx->bfq_blockon = NULL; /* ref'ed in dequeue(), before resetting callout */ dsched_thread_io_unref(&bfq_tdio->head); dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p pushes a new bio when AS\n", bfq_tdio); } } BFQ_UNLOCK(bfq_diskctx); } helper_msg_dequeue(bfq_diskctx); return 0; }
/* * bfq_dequeue(): dispatch bios to the disk driver. * * This function will push as many bios as the number of free slots * in the tag queue. * * In the progress of dispatching, the following events may happen: * - Current thread is timeout: Expire the current thread for * BFQ_REASON_TIMEOUT, and select a new thread to serve in the * wf2q tree. * * - Current thread runs out of its budget: Expire the current thread * for BFQ_REASON_OUT_OF_BUDGET, and select a new thread to serve * * - Current thread has no further bios in its queue: if the AS feature * is turned on, the bfq scheduler sets an alarm and starts to suspend. * The bfq_timeout() or bfq_queue() calls may resume the scheduler. * * Implementation note: The bios selected to be dispatched will first * be stored in an array bio_do_dispatch. After this function releases * all the locks it holds, it will call dsched_strategy_request_polling() * for each bio stored. * * With the help of bfq_disk_ctx->pending_dequeue, * there will be only one bfq_dequeue pending on the BFQ_LOCK. * * lock: * BFQ_LOCK: protect from wf2q_augtree operations in bfq_queue() * THREAD_IO_LOCK: locks the active_tdio. Protect from queue insertions * in bfq_queue; Protect the active_tdio->budget * * refcount: * If the scheduler decides to suspend, the refcount of active_tdio * increases by 1. The counterpart decreasing is in bfq_queue() and * bfq_timeout() * blocking: * May be blocking on the disk driver lock. It depends on drivers. * * Calling path: * The callers could be: * bfq_queue(), bfq_timeout() and the registered polling function. * * caller --> helper_msg_dequeue --lwkt_msg--> helper_thread-> me * */ void bfq_dequeue(struct dsched_disk_ctx *diskctx) { int free_slots, bio_index = 0, i, remaining_budget = 0;/* remaining budget of current active process */ struct bio *bio, *bio_to_dispatch[33]; struct bfq_thread_io *active_tdio = NULL; struct bfq_disk_ctx *bfq_diskctx = (struct bfq_disk_ctx *)diskctx; BFQ_LOCK(bfq_diskctx); atomic_cmpset_int(&bfq_diskctx->pending_dequeue, 1, 0); /* * The whole scheduler is waiting for further bios * from process currently being served */ if (bfq_diskctx->bfq_blockon != NULL) goto rtn; remaining_budget = bfq_diskctx->bfq_remaining_budget; active_tdio = bfq_diskctx->bfq_active_tdio; dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: dequeue: Im in. active_tdio = %p\n", active_tdio); free_slots = diskctx->max_tag_queue_depth - diskctx->current_tag_queue_depth; KKASSERT(free_slots >= 0 && free_slots <= 32); if (active_tdio) DSCHED_THREAD_IO_LOCK(&active_tdio->head); while (free_slots) { /* Here active_tdio must be locked ! */ if (active_tdio) { /* * the bio_done function has marked the current * tdio timeout */ if (active_tdio->maybe_timeout) { dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p time out in dequeue()\n", active_tdio); wf2q_update_vd(active_tdio, active_tdio->budget - remaining_budget); bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_TIMEOUT); /* there still exist bios not dispatched, * reinsert the tdio into aug-tree*/ if (active_tdio->head.qlength > 0) { wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, active_tdio); KKASSERT(bfq_diskctx->bfq_wf2q.wf2q_tdio_count); } active_tdio->maybe_timeout = 0; DSCHED_THREAD_IO_UNLOCK(&active_tdio->head); active_tdio = NULL; continue; } /* select next bio to dispatch */ /* TODO: a wiser slection */ KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE); bio = TAILQ_FIRST(&active_tdio->head.queue); dsched_debug(BFQ_DEBUG_NORMAL, "bfq: the first bio in queue of active_tdio %p is %p\n", active_tdio, bio); dsched_debug(BFQ_DEBUG_VERBOSE, "bfq: active_tdio %p exists, remaining budget = %d, tdio budget = %d\n, qlength = %d, first bio = %p, first bio cmd = %d, first bio size = %d\n", active_tdio, remaining_budget, active_tdio->budget, active_tdio->head.qlength, bio, bio?bio->bio_buf->b_cmd:-1, bio?bio->bio_buf->b_bcount:-1); /* * The bio is not read or write, just * push it down. */ if (bio && (bio->bio_buf->b_cmd != BUF_CMD_READ) && (bio->bio_buf->b_cmd != BUF_CMD_WRITE)) { dsched_debug(BFQ_DEBUG_NORMAL, "bfq: remove bio %p from the queue of %p\n", bio, active_tdio); KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE); TAILQ_REMOVE(&active_tdio->head.queue, bio, link); active_tdio->head.qlength--; free_slots--; #if 0 dsched_strategy_request_polling(diskctx->dp, bio, diskctx); #endif bio_to_dispatch[bio_index++] = bio; KKASSERT(bio_index <= bfq_diskctx->head.max_tag_queue_depth); continue; } /* * Run out of budget * But this is not because the size of bio is larger * than the complete budget. * If the size of bio is larger than the complete * budget, then use a complete budget to cover it. */ if (bio && (remaining_budget < BIO_SIZE(bio)) && (remaining_budget != active_tdio->budget)) { /* charge budget used */ wf2q_update_vd(active_tdio, active_tdio->budget - remaining_budget); bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_OUT_OF_BUDGET); wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, active_tdio); dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: thread %p ran out of budget\n", active_tdio); DSCHED_THREAD_IO_UNLOCK(&active_tdio->head); active_tdio = NULL; } else { /* if (bio && remaining_budget < BIO_SIZE(bio) && remaining_budget != active_tdio->budget) */ /* * Having enough budget, * or having a complete budget and the size of bio * is larger than that. */ if (bio) { /* dispatch */ remaining_budget -= BIO_SIZE(bio); /* * The size of the first bio is larger * than the whole budget, we should * charge the extra part */ if (remaining_budget < 0) wf2q_update_vd(active_tdio, -remaining_budget); /* compensate */ wf2q_update_vd(active_tdio, -remaining_budget); /* * remaining_budget may be < 0, * but to prevent the budget of current tdio * to substract a negative number, * the remaining_budget has to be >= 0 */ remaining_budget = MAX(0, remaining_budget); dsched_debug(BFQ_DEBUG_NORMAL, "bfq: remove bio %p from the queue of %p\n", bio, active_tdio); KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE); TAILQ_REMOVE(&active_tdio->head.queue, bio, link); free_slots--; active_tdio->head.qlength--; active_tdio->bio_dispatched++; wf2q_inc_tot_service(&bfq_diskctx->bfq_wf2q, BIO_SIZE(bio)); dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p's bio dispatched, size=%d, remaining_budget = %d\n", active_tdio, BIO_SIZE(bio), remaining_budget); #if 0 dsched_strategy_request_polling(diskctx->dp, bio, diskctx); #endif bio_to_dispatch[bio_index++] = bio; KKASSERT(bio_index <= bfq_diskctx->head.max_tag_queue_depth); } else { /* if (bio) */ KKASSERT(active_tdio); /* * If AS feature is switched off, * expire the tdio as well */ if ((remaining_budget <= 0) || !(bfq_diskctx->bfq_flag & BFQ_FLAG_AS) || !active_tdio->tdio_as_switch) { active_tdio->budget -= remaining_budget; wf2q_update_vd(active_tdio, active_tdio->budget); bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_OUT_OF_BUDGET); DSCHED_THREAD_IO_UNLOCK(&active_tdio->head); active_tdio = NULL; } else { /* no further bio, wait for a while */ bfq_diskctx->bfq_blockon = active_tdio; /* * Increase ref count to ensure that * tdio will not be destroyed during waiting. */ dsched_thread_io_ref(&active_tdio->head); /* * If the tdio is seeky but not thingking for * too long, we wait for it a little shorter */ if (active_tdio->seek_samples >= BFQ_VALID_MIN_SAMPLES && BFQ_TDIO_SEEKY(active_tdio)) callout_reset(&bfq_diskctx->bfq_callout, BFQ_T_WAIT_MIN, (void (*) (void *))helper_msg_as_timeout, bfq_diskctx); else callout_reset(&bfq_diskctx->bfq_callout, BFQ_T_WAIT, (void (*) (void *))helper_msg_as_timeout, bfq_diskctx); /* save the start time of blocking */ getmicrotime(&active_tdio->as_start_time); dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: blocked on %p, remaining_budget = %d\n", active_tdio, remaining_budget); DSCHED_THREAD_IO_UNLOCK(&active_tdio->head); goto save_and_rtn; } } } } else { /* if (active_tdio) */ /* there is no active tdio */ /* no pending bios at all */ active_tdio = wf2q_get_next_thread_io(&bfq_diskctx->bfq_wf2q); if (!active_tdio) { KKASSERT(bfq_diskctx->bfq_wf2q.wf2q_tdio_count == 0); dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: no more eligible tdio!\n"); goto save_and_rtn; } /* * A new tdio is picked, * initialize the service related statistic data */ DSCHED_THREAD_IO_LOCK(&active_tdio->head); active_tdio->service_received = 0; /* * Reset the maybe_timeout flag, which * may be set by a biodone after the the service is done */ getmicrotime(&active_tdio->service_start_time); active_tdio->maybe_timeout = 0; remaining_budget = active_tdio->budget; dsched_debug(BFQ_DEBUG_VERBOSE, "bfq: active_tdio %p selected, remaining budget = %d, tdio budget = %d\n, qlength = %d\n", active_tdio, remaining_budget, active_tdio->budget, active_tdio->head.qlength); } }/* while (free_slots) */ /* reach here only when free_slots == 0 */ if (active_tdio) /* && lockcount(&active_tdio->head.lock) > 0) */ DSCHED_THREAD_IO_UNLOCK(&active_tdio->head); save_and_rtn: /* save the remaining budget */ bfq_diskctx->bfq_remaining_budget = remaining_budget; bfq_diskctx->bfq_active_tdio = active_tdio; rtn: BFQ_UNLOCK(bfq_diskctx); /*dispatch the planned bios*/ for (i = 0; i < bio_index; i++) dsched_strategy_request_polling(diskctx->dp, bio_to_dispatch[i], diskctx); }
/** * mpssas_SSU_to_SATA_devices * @sc: per adapter object * * Looks through the target list and issues a StartStopUnit SCSI command to each * SATA direct-access device. This helps to ensure that data corruption is * avoided when the system is being shut down. This must be called after the IR * System Shutdown RAID Action is sent if in IR mode. * * Return nothing. */ static void mpssas_SSU_to_SATA_devices(struct mps_softc *sc) { struct mpssas_softc *sassc = sc->sassc; union ccb *ccb; path_id_t pathid = cam_sim_path(sassc->sim); target_id_t targetid; struct mpssas_target *target; char path_str[64]; struct timeval cur_time, start_time; /* * For each target, issue a StartStopUnit command to stop the device. */ sc->SSU_started = TRUE; sc->SSU_refcount = 0; for (targetid = 0; targetid < sc->facts->MaxTargets; targetid++) { target = &sassc->targets[targetid]; if (target->handle == 0x0) { continue; } ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { mps_dprint(sc, MPS_FAULT, "Unable to alloc CCB to stop " "unit.\n"); return; } /* * The stop_at_shutdown flag will be set if this device is * a SATA direct-access end device. */ if (target->stop_at_shutdown) { if (xpt_create_path(&ccb->ccb_h.path, xpt_periph, pathid, targetid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { mps_dprint(sc, MPS_FAULT, "Unable to create " "LUN path to stop unit.\n"); xpt_free_ccb(ccb); return; } xpt_path_string(ccb->ccb_h.path, path_str, sizeof(path_str)); mps_dprint(sc, MPS_INFO, "Sending StopUnit: path %s " "handle %d\n", path_str, target->handle); /* * Issue a START STOP UNIT command for the target. * Increment the SSU counter to be used to count the * number of required replies. */ mps_dprint(sc, MPS_INFO, "Incrementing SSU count\n"); sc->SSU_refcount++; ccb->ccb_h.target_id = xpt_path_target_id(ccb->ccb_h.path); ccb->ccb_h.ppriv_ptr1 = sassc; scsi_start_stop(&ccb->csio, /*retries*/0, mpssas_stop_unit_done, MSG_SIMPLE_Q_TAG, /*start*/FALSE, /*load/eject*/0, /*immediate*/FALSE, MPS_SENSE_LEN, /*timeout*/10000); xpt_action(ccb); } } /* * Wait until all of the SSU commands have completed or time has * expired (60 seconds). Pause for 100ms each time through. If any * command times out, the target will be reset in the SCSI command * timeout routine. */ getmicrotime(&start_time); while (sc->SSU_refcount) { pause("mpswait", hz/10); getmicrotime(&cur_time); if ((cur_time.tv_sec - start_time.tv_sec) > 60) { mps_dprint(sc, MPS_FAULT, "Time has expired waiting " "for SSU commands to complete.\n"); break; } } }
/* * General fork call. Note that another LWP in the process may call exec() * or exit() while we are forking. It's safe to continue here, because * neither operation will complete until all LWPs have exited the process. */ int fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize, void (*func)(void *), void *arg, register_t *retval, struct proc **rnewprocp) { struct proc *p1, *p2, *parent; struct plimit *p1_lim; uid_t uid; struct lwp *l2; int count; vaddr_t uaddr; int tnprocs; int tracefork; int error = 0; p1 = l1->l_proc; uid = kauth_cred_getuid(l1->l_cred); tnprocs = atomic_inc_uint_nv(&nprocs); /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. */ if (__predict_false(tnprocs >= maxproc)) error = -1; else error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL); if (error) { static struct timeval lasttfm; atomic_dec_uint(&nprocs); if (ratecheck(&lasttfm, &fork_tfmrate)) tablefull("proc", "increase kern.maxproc or NPROC"); if (forkfsleep) kpause("forkmx", false, forkfsleep, NULL); return EAGAIN; } /* * Enforce limits. */ count = chgproccnt(uid, 1); if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) { if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT, p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) { (void)chgproccnt(uid, -1); atomic_dec_uint(&nprocs); if (forkfsleep) kpause("forkulim", false, forkfsleep, NULL); return EAGAIN; } } /* * Allocate virtual address space for the U-area now, while it * is still easy to abort the fork operation if we're out of * kernel virtual address space. */ uaddr = uvm_uarea_alloc(); if (__predict_false(uaddr == 0)) { (void)chgproccnt(uid, -1); atomic_dec_uint(&nprocs); return ENOMEM; } /* * We are now committed to the fork. From here on, we may * block on resources, but resource allocation may NOT fail. */ /* Allocate new proc. */ p2 = proc_alloc(); /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ memset(&p2->p_startzero, 0, (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero)); memcpy(&p2->p_startcopy, &p1->p_startcopy, (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy)); TAILQ_INIT(&p2->p_sigpend.sp_info); LIST_INIT(&p2->p_lwps); LIST_INIT(&p2->p_sigwaiters); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * Inherit flags we want to keep. The flags related to SIGCHLD * handling are important in order to keep a consistent behaviour * for the child after the fork. If we are a 32-bit process, the * child will be too. */ p2->p_flag = p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32); p2->p_emul = p1->p_emul; p2->p_execsw = p1->p_execsw; if (flags & FORK_SYSTEM) { /* * Mark it as a system process. Set P_NOCLDWAIT so that * children are reparented to init(8) when they exit. * init(8) can easily wait them out for us. */ p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT); } mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); rw_init(&p2->p_reflock); cv_init(&p2->p_waitcv, "wait"); cv_init(&p2->p_lwpcv, "lwpwait"); /* * Share a lock between the processes if they are to share signal * state: we must synchronize access to it. */ if (flags & FORK_SHARESIGS) { p2->p_lock = p1->p_lock; mutex_obj_hold(p1->p_lock); } else p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); kauth_proc_fork(p1, p2); p2->p_raslist = NULL; #if defined(__HAVE_RAS) ras_fork(p1, p2); #endif /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) vref(p2->p_textvp); if (flags & FORK_SHAREFILES) fd_share(p2); else if (flags & FORK_CLEANFILES) p2->p_fd = fd_init(NULL); else p2->p_fd = fd_copy(); /* XXX racy */ p2->p_mqueue_cnt = p1->p_mqueue_cnt; if (flags & FORK_SHARECWD) cwdshare(p2); else p2->p_cwdi = cwdinit(); /* * Note: p_limit (rlimit stuff) is copy-on-write, so normally * we just need increase pl_refcnt. */ p1_lim = p1->p_limit; if (!p1_lim->pl_writeable) { lim_addref(p1_lim); p2->p_limit = p1_lim; } else { p2->p_limit = lim_copy(p1_lim); } if (flags & FORK_PPWAIT) { /* Mark ourselves as waiting for a child. */ l1->l_pflag |= LP_VFORKWAIT; p2->p_lflag = PL_PPWAIT; p2->p_vforklwp = l1; } else { p2->p_lflag = 0; } p2->p_sflag = 0; p2->p_slflag = 0; parent = (flags & FORK_NOWAIT) ? initproc : p1; p2->p_pptr = parent; p2->p_ppid = parent->p_pid; LIST_INIT(&p2->p_children); p2->p_aio = NULL; #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag & KTRFAC_INHERIT) { mutex_enter(&ktrace_lock); p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) ktradref(p2); mutex_exit(&ktrace_lock); } #endif /* * Create signal actions for the child process. */ p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS); mutex_enter(p1->p_lock); p2->p_sflag |= (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP)); sched_proc_fork(p1, p2); mutex_exit(p1->p_lock); p2->p_stflag = p1->p_stflag; /* * p_stats. * Copy parts of p_stats, and zero out the rest. */ p2->p_stats = pstatscopy(p1->p_stats); /* * Set up the new process address space. */ uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false); /* * Finish creating the child process. * It will return through a different path later. */ lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0, stack, stacksize, (func != NULL) ? func : child_return, arg, &l2, l1->l_class); /* * Inherit l_private from the parent. * Note that we cannot use lwp_setprivate() here since that * also sets the CPU TLS register, which is incorrect if the * process has changed that without letting the kernel know. */ l2->l_private = l1->l_private; /* * If emulation has a process fork hook, call it now. */ if (p2->p_emul->e_proc_fork) (*p2->p_emul->e_proc_fork)(p2, l1, flags); /* * ...and finally, any other random fork hooks that subsystems * might have registered. */ doforkhooks(p2, p1); SDT_PROBE(proc,,,create, p2, p1, flags, 0, 0); /* * It's now safe for the scheduler and other processes to see the * child process. */ mutex_enter(proc_lock); if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT) p2->p_lflag |= PL_CONTROLT; LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling); p2->p_exitsig = exitsig; /* signal for parent on exit */ /* * We don't want to tracefork vfork()ed processes because they * will not receive the SIGTRAP until it is too late. */ tracefork = (p1->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) == (PSL_TRACEFORK|PSL_TRACED) && (flags && FORK_PPWAIT) == 0; if (tracefork) { p2->p_slflag |= PSL_TRACED; p2->p_opptr = p2->p_pptr; if (p2->p_pptr != p1->p_pptr) { struct proc *parent1 = p2->p_pptr; if (parent1->p_lock < p2->p_lock) { if (!mutex_tryenter(parent1->p_lock)) { mutex_exit(p2->p_lock); mutex_enter(parent1->p_lock); } } else if (parent1->p_lock > p2->p_lock) { mutex_enter(parent1->p_lock); } parent1->p_slflag |= PSL_CHTRACED; proc_reparent(p2, p1->p_pptr); if (parent1->p_lock != p2->p_lock) mutex_exit(parent1->p_lock); } /* * Set ptrace status. */ p1->p_fpid = p2->p_pid; p2->p_fpid = p1->p_pid; } LIST_INSERT_AFTER(p1, p2, p_pglist); LIST_INSERT_HEAD(&allproc, p2, p_list); p2->p_trace_enabled = trace_is_enabled(p2); #ifdef __HAVE_SYSCALL_INTERN (*p2->p_emul->e_syscall_intern)(p2); #endif /* * Update stats now that we know the fork was successful. */ uvmexp.forks++; if (flags & FORK_PPWAIT) uvmexp.forks_ppwait++; if (flags & FORK_SHAREVM) uvmexp.forks_sharevm++; /* * Pass a pointer to the new process to the caller. */ if (rnewprocp != NULL) *rnewprocp = p2; if (ktrpoint(KTR_EMUL)) p2->p_traceflag |= KTRFAC_TRC_EMUL; /* * Notify any interested parties about the new process. */ if (!SLIST_EMPTY(&p1->p_klist)) { mutex_exit(proc_lock); KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); mutex_enter(proc_lock); } /* * Make child runnable, set start time, and add to run queue except * if the parent requested the child to start in SSTOP state. */ mutex_enter(p2->p_lock); /* * Start profiling. */ if ((p2->p_stflag & PST_PROFIL) != 0) { mutex_spin_enter(&p2->p_stmutex); startprofclock(p2); mutex_spin_exit(&p2->p_stmutex); } getmicrotime(&p2->p_stats->p_start); p2->p_acflag = AFORK; lwp_lock(l2); KASSERT(p2->p_nrlwps == 1); if (p2->p_sflag & PS_STOPFORK) { struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate; p2->p_nrlwps = 0; p2->p_stat = SSTOP; p2->p_waited = 0; p1->p_nstopchild++; l2->l_stat = LSSTOP; KASSERT(l2->l_wchan == NULL); lwp_unlock_to(l2, spc->spc_lwplock); } else { p2->p_nrlwps = 1; p2->p_stat = SACTIVE; l2->l_stat = LSRUN; sched_enqueue(l2, false); lwp_unlock(l2); } /* * Return child pid to parent process, * marking us as parent via retval[1]. */ if (retval != NULL) { retval[0] = p2->p_pid; retval[1] = 0; } mutex_exit(p2->p_lock); /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, sleep until it clears LP_VFORKWAIT. */ #if 0 while (l1->l_pflag & LP_VFORKWAIT) { cv_wait(&l1->l_waitcv, proc_lock); } #else while (p2->p_lflag & PL_PPWAIT) cv_wait(&p1->p_waitcv, proc_lock); #endif /* * Let the parent know that we are tracing its child. */ if (tracefork) { ksiginfo_t ksi; KSI_INIT_EMPTY(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_lid = l1->l_lid; kpsignal(p1, &ksi, NULL); } mutex_exit(proc_lock); return 0; }
int fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize, void (*func)(void *), void *arg, register_t *retval, struct proc **rnewprocp) { struct proc *p2; uid_t uid; struct vmspace *vm; int count; vaddr_t uaddr; int s; extern void endtsleep(void *); extern void realitexpire(void *); /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. We reserve * the last 5 processes to root. The variable nprocs is the current * number of processes, maxproc is the limit. */ uid = p1->p_cred->p_ruid; if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) { static struct timeval lasttfm; if (ratecheck(&lasttfm, &fork_tfmrate)) tablefull("proc"); return (EAGAIN); } nprocs++; /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. */ count = chgproccnt(uid, 1); if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) { (void)chgproccnt(uid, -1); nprocs--; return (EAGAIN); } uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1); if (uaddr == 0) { chgproccnt(uid, -1); nprocs--; return (ENOMEM); } /* * From now on, we're committed to the fork and cannot fail. */ /* Allocate new proc. */ p2 = pool_get(&proc_pool, PR_WAITOK); p2->p_stat = SIDL; /* protect against others */ p2->p_exitsig = exitsig; p2->p_forw = p2->p_back = NULL; #ifdef RTHREADS if (flags & FORK_THREAD) { atomic_setbits_int(&p2->p_flag, P_THREAD); p2->p_p = p1->p_p; TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link); } else { process_new(p2, p1); } #else process_new(p2, p1); #endif /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ bzero(&p2->p_startzero, (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); bcopy(&p1->p_startcopy, &p2->p_startcopy, (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); /* * Initialize the timeouts. */ timeout_set(&p2->p_sleep_to, endtsleep, p2); timeout_set(&p2->p_realit_to, realitexpire, p2); #if defined(__HAVE_CPUINFO) p2->p_cpu = p1->p_cpu; #endif /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ p2->p_flag = 0; p2->p_emul = p1->p_emul; if (p1->p_flag & P_PROFIL) startprofclock(p2); atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC)); if (flags & FORK_PTRACE) atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED); #ifdef RTHREADS if (flags & FORK_THREAD) { /* nothing */ } else #endif { p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK); bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred)); p2->p_p->ps_cred->p_refcnt = 1; crhold(p1->p_ucred); } TAILQ_INIT(&p2->p_selects); /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) VREF(p2->p_textvp); if (flags & FORK_CLEANFILES) p2->p_fd = fdinit(p1); else if (flags & FORK_SHAREFILES) p2->p_fd = fdshare(p1); else p2->p_fd = fdcopy(p1); /* * If ps_limit is still copy-on-write, bump refcnt, * otherwise get a copy that won't be modified. * (If PL_SHAREMOD is clear, the structure is shared * copy-on-write.) */ #ifdef RTHREADS if (flags & FORK_THREAD) { /* nothing */ } else #endif { if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD) p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit); else { p2->p_p->ps_limit = p1->p_p->ps_limit; p2->p_p->ps_limit->p_refcnt++; } } if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) atomic_setbits_int(&p2->p_flag, P_CONTROLT); if (flags & FORK_PPWAIT) atomic_setbits_int(&p2->p_flag, P_PPWAIT); p2->p_pptr = p1; if (flags & FORK_NOZOMBIE) atomic_setbits_int(&p2->p_flag, P_NOZOMBIE); LIST_INIT(&p2->p_children); #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag & KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) VREF(p2->p_tracep); } #endif /* * set priority of child to be that of parent * XXX should move p_estcpu into the region of struct proc which gets * copied. */ scheduler_fork_hook(p1, p2); /* * Create signal actions for the child process. */ if (flags & FORK_SIGHAND) sigactsshare(p1, p2); else p2->p_sigacts = sigactsinit(p1); /* * If emulation has process fork hook, call it now. */ if (p2->p_emul->e_proc_fork) (*p2->p_emul->e_proc_fork)(p2, p1); p2->p_addr = (struct user *)uaddr; /* * Finish creating the child process. It will return through a * different path later. */ uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack, stacksize, func ? func : child_return, arg ? arg : p2); timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2); timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2); vm = p2->p_vmspace; if (flags & FORK_FORK) { forkstat.cntfork++; forkstat.sizfork += vm->vm_dsize + vm->vm_ssize; } else if (flags & FORK_VFORK) { forkstat.cntvfork++; forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize; } else if (flags & FORK_RFORK) { forkstat.cntrfork++; forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize; } else { forkstat.cntkthread++; forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize; } /* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */ do { lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX; } while (pidtaken(lastpid)); p2->p_pid = lastpid; LIST_INSERT_HEAD(&allproc, p2, p_list); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling); LIST_INSERT_AFTER(p1, p2, p_pglist); if (p2->p_flag & P_TRACED) { p2->p_oppid = p1->p_pid; if (p2->p_pptr != p1->p_pptr) proc_reparent(p2, p1->p_pptr); /* * Set ptrace status. */ if (flags & FORK_FORK) { p2->p_ptstat = malloc(sizeof(*p2->p_ptstat), M_SUBPROC, M_WAITOK); p1->p_ptstat->pe_report_event = PTRACE_FORK; p2->p_ptstat->pe_report_event = PTRACE_FORK; p1->p_ptstat->pe_other_pid = p2->p_pid; p2->p_ptstat->pe_other_pid = p1->p_pid; } } #if NSYSTRACE > 0 if (ISSET(p1->p_flag, P_SYSTRACE)) systrace_fork(p1, p2); #endif /* * Make child runnable, set start time, and add to run queue. */ SCHED_LOCK(s); getmicrotime(&p2->p_stats->p_start); p2->p_acflag = AFORK; p2->p_stat = SRUN; setrunqueue(p2); SCHED_UNLOCK(s); /* * Notify any interested parties about the new process. */ KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); /* * Update stats now that we know the fork was successfull. */ uvmexp.forks++; if (flags & FORK_PPWAIT) uvmexp.forks_ppwait++; if (flags & FORK_SHAREVM) uvmexp.forks_sharevm++; /* * Pass a pointer to the new process to the caller. */ if (rnewprocp != NULL) *rnewprocp = p2; /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, set P_PPWAIT on child, and sleep on our * proc (in case of exit). */ if (flags & FORK_PPWAIT) while (p2->p_flag & P_PPWAIT) tsleep(p1, PWAIT, "ppwait", 0); /* * If we're tracing the child, alert the parent too. */ if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED)) psignal(p1, SIGTRAP); /* * Return child pid to parent process, * marking us as parent via retval[1]. */ if (retval != NULL) { retval[0] = p2->p_pid; retval[1] = 0; } return (0); }
__private_extern__ errno_t arp_route_to_gateway_route(const struct sockaddr *net_dest, route_t hint0, route_t *out_route) { struct timeval timenow; route_t rt = hint0, hint = hint0; errno_t error = 0; *out_route = NULL; /* * Next hop determination. Because we may involve the gateway route * in addition to the original route, locking is rather complicated. * The general concept is that regardless of whether the route points * to the original route or to the gateway route, this routine takes * an extra reference on such a route. This extra reference will be * released at the end. * * Care must be taken to ensure that the "hint0" route never gets freed * via rtfree(), since the caller may have stored it inside a struct * route with a reference held for that placeholder. */ if (rt != NULL) { unsigned int ifindex; RT_LOCK_SPIN(rt); ifindex = rt->rt_ifp->if_index; RT_ADDREF_LOCKED(rt); if (!(rt->rt_flags & RTF_UP)) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); /* route is down, find a new one */ hint = rt = rtalloc1_scoped((struct sockaddr *) (size_t)net_dest, 1, 0, ifindex); if (hint != NULL) { RT_LOCK_SPIN(rt); ifindex = rt->rt_ifp->if_index; } else { senderr(EHOSTUNREACH); } } /* * We have a reference to "rt" by now; it will either * be released or freed at the end of this routine. */ RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) { struct rtentry *gwrt = rt->rt_gwroute; struct sockaddr_in gw; /* If there's no gateway rt, look it up */ if (gwrt == NULL) { gw = *((struct sockaddr_in *)rt->rt_gateway); RT_UNLOCK(rt); goto lookup; } /* Become a regular mutex */ RT_CONVERT_LOCK(rt); /* * Take gwrt's lock while holding route's lock; * this is okay since gwrt never points back * to "rt", so no lock ordering issues. */ RT_LOCK_SPIN(gwrt); if (!(gwrt->rt_flags & RTF_UP)) { struct rtentry *ogwrt; rt->rt_gwroute = NULL; RT_UNLOCK(gwrt); gw = *((struct sockaddr_in *)rt->rt_gateway); RT_UNLOCK(rt); rtfree(gwrt); lookup: gwrt = rtalloc1_scoped( (struct sockaddr *)&gw, 1, 0, ifindex); RT_LOCK(rt); /* * Bail out if the route is down, no route * to gateway, circular route, or if the * gateway portion of "rt" has changed. */ if (!(rt->rt_flags & RTF_UP) || gwrt == NULL || gwrt == rt || !equal(SA(&gw), rt->rt_gateway)) { if (gwrt == rt) { RT_REMREF_LOCKED(gwrt); gwrt = NULL; } RT_UNLOCK(rt); if (gwrt != NULL) rtfree(gwrt); senderr(EHOSTUNREACH); } /* Remove any existing gwrt */ ogwrt = rt->rt_gwroute; if ((rt->rt_gwroute = gwrt) != NULL) RT_ADDREF(gwrt); /* Clean up "rt" now while we can */ if (rt == hint0) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } else { RT_UNLOCK(rt); rtfree(rt); } rt = gwrt; /* Now free the replaced gwrt */ if (ogwrt != NULL) rtfree(ogwrt); /* If still no route to gateway, bail out */ if (rt == NULL) senderr(EHOSTUNREACH); } else { RT_ADDREF_LOCKED(gwrt); RT_UNLOCK(gwrt); /* Clean up "rt" now while we can */ if (rt == hint0) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } else { RT_UNLOCK(rt); rtfree(rt); } rt = gwrt; } /* rt == gwrt; if it is now down, give up */ RT_LOCK_SPIN(rt); if (!(rt->rt_flags & RTF_UP)) { RT_UNLOCK(rt); senderr(EHOSTUNREACH); } } if (rt->rt_flags & RTF_REJECT) { getmicrotime(&timenow); if (rt->rt_rmx.rmx_expire == 0 || timenow.tv_sec < rt->rt_rmx.rmx_expire) { RT_UNLOCK(rt); senderr(rt == hint ? EHOSTDOWN : EHOSTUNREACH); } } /* Become a regular mutex */ RT_CONVERT_LOCK(rt); /* Caller is responsible for cleaning up "rt" */ *out_route = rt; } return (0); bad: /* Clean up route (either it is "rt" or "gwrt") */ if (rt != NULL) { RT_LOCK_SPIN(rt); if (rt == hint0) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } else { RT_UNLOCK(rt); rtfree(rt); } } return (error); }
/* * Parallel to llc_rtrequest. */ static void arp_rtrequest( int req, struct rtentry *rt, __unused struct sockaddr *sa) { struct sockaddr *gate = rt->rt_gateway; struct llinfo_arp *la = rt->rt_llinfo; static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}}; struct timeval timenow; if (!arpinit_done) { panic("%s: ARP has not been initialized", __func__); /* NOTREACHED */ } lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) return; getmicrotime(&timenow); switch (req) { case RTM_ADD: /* * XXX: If this is a manually added route to interface * such as older version of routed or gated might provide, * restore cloning bit. */ if ((rt->rt_flags & RTF_HOST) == 0 && SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) rt->rt_flags |= RTF_CLONING; if (rt->rt_flags & RTF_CLONING) { /* * Case 1: This route should come from a route to iface. */ if (rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl) == 0) { gate = rt->rt_gateway; SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; /* * In case we're called before 1.0 sec. * has elapsed. */ rt->rt_expire = MAX(timenow.tv_sec, 1); } break; } /* Announce a new entry if requested. */ if (rt->rt_flags & RTF_ANNOUNCE) { RT_UNLOCK(rt); dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST, SDL(gate), rt_key(rt), NULL, rt_key(rt)); RT_LOCK(rt); } /*FALLTHROUGH*/ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || gate->sa_len < sizeof(null_sdl)) { if (log_arp_warnings) log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); break; } SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; if (la != 0) break; /* This happens on a route change */ /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ rt->rt_llinfo = la = arp_llinfo_alloc(); if (la == NULL) { if (log_arp_warnings) log(LOG_DEBUG, "%s: malloc failed\n", __func__); break; } rt->rt_llinfo_free = arp_llinfo_free; arp_inuse++, arp_allocated++; Bzero(la, sizeof(*la)); la->la_rt = rt; rt->rt_flags |= RTF_LLINFO; LIST_INSERT_HEAD(&llinfo_arp, la, la_le); /* * This keeps the multicast addresses from showing up * in `arp -a' listings as unresolved. It's not actually * functional. Then the same for broadcast. */ if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { RT_UNLOCK(rt); dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate, sizeof(struct sockaddr_dl)); RT_LOCK(rt); rt->rt_expire = 0; } else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { struct sockaddr_dl *gate_ll = SDL(gate); size_t broadcast_len; ifnet_llbroadcast_copy_bytes(rt->rt_ifp, LLADDR(gate_ll), sizeof(gate_ll->sdl_data), &broadcast_len); gate_ll->sdl_alen = broadcast_len; gate_ll->sdl_family = AF_LINK; gate_ll->sdl_len = sizeof(struct sockaddr_dl); /* In case we're called before 1.0 sec. has elapsed */ rt->rt_expire = MAX(timenow.tv_sec, 1); } if (SIN(rt_key(rt))->sin_addr.s_addr == (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) { /* * This test used to be * if (loif.if_flags & IFF_UP) * It allowed local traffic to be forced * through the hardware by configuring the loopback down. * However, it causes problems during network configuration * for boards that can't receive packets they send. * It is now necessary to clear "useloopback" and remove * the route to force traffic out to the hardware. */ rt->rt_expire = 0; ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6); if (useloopback) { #if IFNET_ROUTE_REFCNT /* Adjust route ref count for the interfaces */ if (rt->rt_if_ref_fn != NULL && rt->rt_ifp != lo_ifp) { rt->rt_if_ref_fn(lo_ifp, 1); rt->rt_if_ref_fn(rt->rt_ifp, -1); } #endif /* IFNET_ROUTE_REFCNT */ rt->rt_ifp = lo_ifp; } } break; case RTM_DELETE: if (la == 0) break; arp_inuse--; /* * Unchain it but defer the actual freeing until the route * itself is to be freed. rt->rt_llinfo still points to * llinfo_arp, and likewise, la->la_rt still points to this * route entry, except that RTF_LLINFO is now cleared. */ LIST_REMOVE(la, la_le); la->la_le.le_next = NULL; la->la_le.le_prev = NULL; rt->rt_flags &= ~RTF_LLINFO; if (la->la_hold != NULL) m_freem(la->la_hold); la->la_hold = NULL; } }