Ejemplo n.º 1
0
/*
 * Initialize expirations and counters based on lifetime payload.
 */
void
import_lifetime(struct tdb *tdb, struct sadb_lifetime *sadb_lifetime, int type)
{
	struct timeval tv;

	if (!sadb_lifetime)
		return;

	getmicrotime(&tv);

	switch (type) {
	case PFKEYV2_LIFETIME_HARD:
		if ((tdb->tdb_exp_allocations =
		    sadb_lifetime->sadb_lifetime_allocations) != 0)
			tdb->tdb_flags |= TDBF_ALLOCATIONS;
		else
			tdb->tdb_flags &= ~TDBF_ALLOCATIONS;

		if ((tdb->tdb_exp_bytes =
		    sadb_lifetime->sadb_lifetime_bytes) != 0)
			tdb->tdb_flags |= TDBF_BYTES;
		else
			tdb->tdb_flags &= ~TDBF_BYTES;

		if ((tdb->tdb_exp_timeout =
		    sadb_lifetime->sadb_lifetime_addtime) != 0) {
			tdb->tdb_flags |= TDBF_TIMER;
			if (tv.tv_sec + tdb->tdb_exp_timeout < tv.tv_sec)
				tv.tv_sec = ((unsigned long) -1) / 2; /* XXX */
			else
				tv.tv_sec += tdb->tdb_exp_timeout;
			timeout_add(&tdb->tdb_timer_tmo, hzto(&tv));
		} else
			tdb->tdb_flags &= ~TDBF_TIMER;

		if ((tdb->tdb_exp_first_use =
		    sadb_lifetime->sadb_lifetime_usetime) != 0)
			tdb->tdb_flags |= TDBF_FIRSTUSE;
		else
			tdb->tdb_flags &= ~TDBF_FIRSTUSE;
		break;

	case PFKEYV2_LIFETIME_SOFT:
		if ((tdb->tdb_soft_allocations =
		    sadb_lifetime->sadb_lifetime_allocations) != 0)
			tdb->tdb_flags |= TDBF_SOFT_ALLOCATIONS;
		else
			tdb->tdb_flags &= ~TDBF_SOFT_ALLOCATIONS;

		if ((tdb->tdb_soft_bytes =
		    sadb_lifetime->sadb_lifetime_bytes) != 0)
			tdb->tdb_flags |= TDBF_SOFT_BYTES;
		else
			tdb->tdb_flags &= ~TDBF_SOFT_BYTES;

		if ((tdb->tdb_soft_timeout =
		    sadb_lifetime->sadb_lifetime_addtime) != 0) {
			tdb->tdb_flags |= TDBF_SOFT_TIMER;
			if (tv.tv_sec + tdb->tdb_soft_timeout < tv.tv_sec)
				tv.tv_sec = ((unsigned long) -1) / 2; /* XXX */
			else
				tv.tv_sec += tdb->tdb_soft_timeout;
			timeout_add(&tdb->tdb_stimer_tmo, hzto(&tv));
		} else
			tdb->tdb_flags &= ~TDBF_SOFT_TIMER;

		if ((tdb->tdb_soft_first_use =
		    sadb_lifetime->sadb_lifetime_usetime) != 0)
			tdb->tdb_flags |= TDBF_SOFT_FIRSTUSE;
		else
			tdb->tdb_flags &= ~TDBF_SOFT_FIRSTUSE;
		break;

	case PFKEYV2_LIFETIME_CURRENT:  /* Nothing fancy here. */
		tdb->tdb_cur_allocations =
		    sadb_lifetime->sadb_lifetime_allocations;
		tdb->tdb_cur_bytes = sadb_lifetime->sadb_lifetime_bytes;
		tdb->tdb_established = sadb_lifetime->sadb_lifetime_addtime;
		tdb->tdb_first_use = sadb_lifetime->sadb_lifetime_usetime;
	}
}
Ejemplo n.º 2
0
/*
 * Write out process accounting information, on process exit.
 * Data to be written out is specified in Leffler, et al.
 * and are enumerated below.  (They're also noted in the system
 * "acct.h" header file.)
 */
int
acct_process(struct proc *p)
{
	struct acct acct;
	struct rusage *r;
	struct timeval ut, st, tmp;
	int t;
	struct vnode *vp;
	struct plimit *oplim = NULL;
	int error;

	/* If accounting isn't enabled, don't bother */
	vp = acctp;
	if (vp == NULL)
		return (0);

	/*
	 * Raise the file limit so that accounting can't be stopped by the
	 * user. (XXX - we should think about the cpu limit too).
	 */
	if (p->p_p->ps_limit->p_refcnt > 1) {
		oplim = p->p_p->ps_limit;
		p->p_p->ps_limit = limcopy(p->p_p->ps_limit);
	}
	p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;

	/*
	 * Get process accounting information.
	 */

	/* (1) The name of the command that ran */
	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);

	/* (2) The amount of user and system time that was used */
	calcru(p, &ut, &st, NULL);
	acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec);
	acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec);

	/* (3) The elapsed time the command ran (and its starting time) */
	acct.ac_btime = p->p_stats->p_start.tv_sec;
	getmicrotime(&tmp);
	timersub(&tmp, &p->p_stats->p_start, &tmp);
	acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec);

	/* (4) The average amount of memory used */
	r = &p->p_stats->p_ru;
	timeradd(&ut, &st, &tmp);
	t = tmp.tv_sec * hz + tmp.tv_usec / tick;
	if (t)
		acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t;
	else
		acct.ac_mem = 0;

	/* (5) The number of disk I/O operations done */
	acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0);

	/* (6) The UID and GID of the process */
	acct.ac_uid = p->p_cred->p_ruid;
	acct.ac_gid = p->p_cred->p_rgid;

	/* (7) The terminal from which the process was started */
	if ((p->p_p->ps_flags & PS_CONTROLT) &&
	    p->p_p->ps_pgrp->pg_session->s_ttyp)
		acct.ac_tty = p->p_p->ps_pgrp->pg_session->s_ttyp->t_dev;
	else
		acct.ac_tty = NODEV;

	/* (8) The boolean flags that tell how the process terminated, etc. */
	acct.ac_flag = p->p_acflag;

	/*
	 * Now, just write the accounting information to the file.
	 */
	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct),
	    (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred, NULL, p);

	if (oplim) {
		limfree(p->p_p->ps_limit);
		p->p_p->ps_limit = oplim;
	}

	return error;
}
Ejemplo n.º 3
0
/*
 * Queue a packet.  Start transmission if not active.
 * Packet is placed in Information field of PPP frame.
 * Called at splnet as the if->if_output handler.
 * Called at splnet from pppwrite().
 */
static int
pppoutput_serialized(struct ifnet *ifp, struct mbuf *m0, struct sockaddr *dst,
		     struct rtentry *rtp)
{
    struct ppp_softc *sc = &ppp_softc[ifp->if_dunit];
    int protocol, address, control;
    u_char *cp;
    int error;
#ifdef INET
    struct ip *ip;
#endif
    struct ifqueue *ifq;
    enum NPmode mode;
    int len;
    struct mbuf *m;
    struct altq_pktattr pktattr;

    if (sc->sc_devp == NULL || (ifp->if_flags & IFF_RUNNING) == 0
	|| ((ifp->if_flags & IFF_UP) == 0 && dst->sa_family != AF_UNSPEC)) {
	error = ENETDOWN;	/* sort of */
	goto bad;
    }

    ifq_classify(&ifp->if_snd, m0, dst->sa_family, &pktattr);

    /*
     * Compute PPP header.
     */
    m0->m_flags &= ~M_HIGHPRI;
    switch (dst->sa_family) {
#ifdef INET
    case AF_INET:
	address = PPP_ALLSTATIONS;
	control = PPP_UI;
	protocol = PPP_IP;
	mode = sc->sc_npmode[NP_IP];

	/*
	 * If this packet has the "low delay" bit set in the IP header,
	 * put it on the fastq instead.
	 */
	ip = mtod(m0, struct ip *);
	if (ip->ip_tos & IPTOS_LOWDELAY)
	    m0->m_flags |= M_HIGHPRI;
	break;
#endif
#ifdef IPX
    case AF_IPX:
	/*
	 * This is pretty bogus.. We dont have an ipxcp module in pppd
	 * yet to configure the link parameters.  Sigh. I guess a
	 * manual ifconfig would do....  -Peter
	 */
	address = PPP_ALLSTATIONS;
	control = PPP_UI;
	protocol = PPP_IPX;
	mode = NPMODE_PASS;
	break;
#endif
    case AF_UNSPEC:
	address = PPP_ADDRESS(dst->sa_data);
	control = PPP_CONTROL(dst->sa_data);
	protocol = PPP_PROTOCOL(dst->sa_data);
	mode = NPMODE_PASS;
	break;
    default:
	kprintf("%s: af%d not supported\n", ifp->if_xname, dst->sa_family);
	error = EAFNOSUPPORT;
	goto bad;
    }

    /*
     * Drop this packet, or return an error, if necessary.
     */
    if (mode == NPMODE_ERROR) {
	error = ENETDOWN;
	goto bad;
    }
    if (mode == NPMODE_DROP) {
	error = 0;
	goto bad;
    }

    /*
     * Add PPP header.  If no space in first mbuf, allocate another.
     * (This assumes M_LEADINGSPACE is always 0 for a cluster mbuf.)
     */
    if (M_LEADINGSPACE(m0) < PPP_HDRLEN) {
	m0 = m_prepend(m0, PPP_HDRLEN, MB_DONTWAIT);
	if (m0 == NULL) {
	    error = ENOBUFS;
	    goto bad;
	}
	m0->m_len = 0;
    } else
	m0->m_data -= PPP_HDRLEN;

    cp = mtod(m0, u_char *);
    *cp++ = address;
    *cp++ = control;
    *cp++ = protocol >> 8;
    *cp++ = protocol & 0xff;
    m0->m_len += PPP_HDRLEN;

    len = 0;
    for (m = m0; m != NULL; m = m->m_next)
	len += m->m_len;

    if (sc->sc_flags & SC_LOG_OUTPKT) {
	kprintf("%s output: ", ifp->if_xname);
	pppdumpm(m0);
    }

    if ((protocol & 0x8000) == 0) {
#ifdef PPP_FILTER
	/*
	 * Apply the pass and active filters to the packet,
	 * but only if it is a data packet.
	 */
	*mtod(m0, u_char *) = 1;	/* indicates outbound */
	if (sc->sc_pass_filt.bf_insns != 0
	    && bpf_filter(sc->sc_pass_filt.bf_insns, (u_char *) m0,
			  len, 0) == 0) {
	    error = 0;		/* drop this packet */
	    goto bad;
	}

	/*
	 * Update the time we sent the most recent packet.
	 */
	if (sc->sc_active_filt.bf_insns == 0
	    || bpf_filter(sc->sc_active_filt.bf_insns, (u_char *) m0, len, 0))
	    sc->sc_last_sent = time_second;

	*mtod(m0, u_char *) = address;
#else
	/*
	 * Update the time we sent the most recent data packet.
	 */
	sc->sc_last_sent = time_second;
#endif /* PPP_FILTER */
    }

    BPF_MTAP(ifp, m0);

    /*
     * Put the packet on the appropriate queue.
     */
    crit_enter();
    if (mode == NPMODE_QUEUE) {
	/* XXX we should limit the number of packets on this queue */
	*sc->sc_npqtail = m0;
	m0->m_nextpkt = NULL;
	sc->sc_npqtail = &m0->m_nextpkt;
    } else {
	/* fastq and if_snd are emptied at spl[soft]net now */
	if ((m0->m_flags & M_HIGHPRI) && !ifq_is_enabled(&sc->sc_if.if_snd)) {
	    ifq = &sc->sc_fastq;
	    if (IF_QFULL(ifq) && dst->sa_family != AF_UNSPEC) {
	        IF_DROP(ifq);
	        m_freem(m0);
	        error = ENOBUFS;
	    } else {
	        IF_ENQUEUE(ifq, m0);
	        error = 0;
	    }
	} else {
	    ASSERT_IFNET_SERIALIZED_TX(&sc->sc_if);
	    error = ifq_enqueue(&sc->sc_if.if_snd, m0, &pktattr);
	}
	if (error) {
	    crit_exit();
	    sc->sc_if.if_oerrors++;
	    sc->sc_stats.ppp_oerrors++;
	    return (error);
	}
	(*sc->sc_start)(sc);
    }
    getmicrotime(&ifp->if_lastchange);
    ifp->if_opackets++;
    ifp->if_obytes += len;

    crit_exit();
    return (0);

bad:
    m_freem(m0);
    return (error);
}
Ejemplo n.º 4
0
/*
 * we're emulating a mousesystems serial mouse here..
 */
void
msintr(void *arg)
{
	static const char to_one[] = { 1, 2, 2, 4, 4, 4, 4 };
	static const int to_id[] = { MS_RIGHT, MS_MIDDLE, 0, MS_LEFT };
	struct ms_port *ms = arg;
	struct firm_event *fe;
	int mb, ub, d, get, put, any, port;
	u_char pra, *horc, *verc;
	u_short pot, count;
	short dx, dy;

	port = ms->ms_portno;

	horc = ((u_char *) &count) + 1;
	verc = (u_char *) &count;

	/*
	 * first read the three buttons.
	 */
	pot  = custom.potgor;
	pra  = ciaa.pra;
	pot >>= port == 0 ? 8 : 12;	/* contains right and middle button */
	pra >>= port == 0 ? 6 : 7;	/* contains left button */
	mb = (pot & 4) / 4 + (pot & 1) * 2 + (pra & 1) * 4;
	mb ^= 0x07;

	/*
	 * read current values of counter registers
	 */
	if (port == 0)
		count = custom.joy0dat;
	else
		count = custom.joy1dat;

	/*
	 * take care of wraparound
	 */
	dx = *horc - ms->ms_horc;
	if (dx < -127)
		dx += 255;
	else if (dx > 127)
		dx -= 255;
	dy = *verc - ms->ms_verc;
	if (dy < -127)
		dy += 255;
	else if (dy > 127)
		dy -= 255;

	/*
	 * remember current values for next scan
	 */
	ms->ms_horc = *horc;
	ms->ms_verc = *verc;

	ms->ms_dx = dx;
	ms->ms_dy = dy;
	ms->ms_mb = mb;

#if NWSMOUSE > 0
	/*
	 * If we have attached wsmouse and we are not opened
	 * directly then pass events to wscons.
	 */
	if (ms->ms_wsmousedev && ms->ms_wsenabled)
	{
		int buttons = 0;

		if (mb & 4)
			buttons |= 1;
		if (mb & 2)
			buttons |= 2;
		if (mb & 1)
			buttons |= 4;

		wsmouse_input(ms->ms_wsmousedev, 
			      buttons,
			      dx, -dy, 0, 0,
			      WSMOUSE_INPUT_DELTA);

	} else
#endif
	if (dx || dy || ms->ms_ub != ms->ms_mb) {
		/*
		 * We have at least one event (mouse button, delta-X, or
		 * delta-Y; possibly all three, and possibly three separate
		 * button events).  Deliver these events until we are out of
		 * changes or out of room.  As events get delivered, mark them
		 * `unchanged'.
		 */
		any = 0;
		get = ms->ms_events.ev_get;
		put = ms->ms_events.ev_put;
		fe = &ms->ms_events.ev_q[put];

		mb = ms->ms_mb;
		ub = ms->ms_ub;
		while ((d = mb ^ ub) != 0) {
			/*
			 * Mouse button change.  Convert up to three changes
			 * to the `first' change, and drop it into the event
			 * queue.
			 */
			if ((++put) % EV_QSIZE == get) {
				put--;
				goto out;
			}

			d = to_one[d - 1];	/* from 1..7 to {1,2,4} */
			fe->id = to_id[d - 1];	/* from {1,2,4} to ID */
			fe->value = mb & d ? VKEY_DOWN : VKEY_UP;
			getmicrotime(&fe->time);
			fe++;

			if (put >= EV_QSIZE) {
				put = 0;
				fe = &ms->ms_events.ev_q[0];
			}
			any = 1;

			ub ^= d;
		}
		if (ms->ms_dx) {
			if ((++put) % EV_QSIZE == get) {
				put--;
				goto out;
			}

			fe->id = LOC_X_DELTA;
			fe->value = ms->ms_dx;
			getmicrotime(&fe->time);
			fe++;

			if (put >= EV_QSIZE) {
				put = 0;
				fe = &ms->ms_events.ev_q[0];
			}
			any = 1;

			ms->ms_dx = 0;
		}
		if (ms->ms_dy) {
			if ((++put) % EV_QSIZE == get) {
				put--;
				goto out;
			}

			fe->id = LOC_Y_DELTA;
			fe->value = ms->ms_dy;
			getmicrotime(&fe->time);
			fe++;

			if (put >= EV_QSIZE) {
				put = 0;
				fe = &ms->ms_events.ev_q[0];
			}
			any = 1;

			ms->ms_dy = 0;
		}
out:
		if (any) {
			ms->ms_ub = ub;
			ms->ms_events.ev_put = put;
			EV_WAKEUP(&ms->ms_events);
		}
	}

	/*
	 * reschedule handler, or if terminating,
	 * handshake with ms_disable
	 */
	if (ms->ms_ready)
		callout_reset(&ms->ms_intr_ch, 2, msintr, ms);
	else
		wakeup(ms);
}
Ejemplo n.º 5
0
/*
 * FDDI output routine.
 * Encapsulate a packet of type family for the local net.
 * Use trailer local net encapsulation if enough data in first
 * packet leaves a multiple of 512 bytes of data in remainder.
 */
static int
fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
	struct route *ro)
{
	u_int16_t type;
	int loop_copy = 0, error = 0, hdrcmplt = 0;
 	u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
	struct fddi_header *fh;
#if defined(INET) || defined(INET6)
	int is_gw = 0;
#endif

#ifdef MAC
	error = mac_ifnet_check_transmit(ifp, m);
	if (error)
		senderr(error);
#endif

	if (ifp->if_flags & IFF_MONITOR)
		senderr(ENETDOWN);
	if (!((ifp->if_flags & IFF_UP) &&
	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
		senderr(ENETDOWN);
	getmicrotime(&ifp->if_lastchange);

#if defined(INET) || defined(INET6)
	if (ro != NULL)
		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
#endif

	switch (dst->sa_family) {
#ifdef INET
	case AF_INET: {
		error = arpresolve(ifp, is_gw, m, dst, edst, NULL, NULL);
		if (error)
			return (error == EWOULDBLOCK ? 0 : error);
		type = htons(ETHERTYPE_IP);
		break;
	}
	case AF_ARP:
	{
		struct arphdr *ah;
		ah = mtod(m, struct arphdr *);
		ah->ar_hrd = htons(ARPHRD_ETHER);

		loop_copy = -1; /* if this is for us, don't do it */

		switch (ntohs(ah->ar_op)) {
		case ARPOP_REVREQUEST:
		case ARPOP_REVREPLY:
			type = htons(ETHERTYPE_REVARP);
			break;
		case ARPOP_REQUEST:
		case ARPOP_REPLY:
		default:
			type = htons(ETHERTYPE_ARP);
			break;
		}

		if (m->m_flags & M_BCAST)
			bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN);
                else
			bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN);

	}
	break;
#endif /* INET */
#ifdef INET6
	case AF_INET6:
		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL, NULL);
		if (error)
			return (error == EWOULDBLOCK ? 0 : error);
		type = htons(ETHERTYPE_IPV6);
		break;
#endif /* INET6 */
	case pseudo_AF_HDRCMPLT:
	{
		const struct ether_header *eh;

		hdrcmplt = 1;
		eh = (const struct ether_header *)dst->sa_data;
		bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
		/* FALLTHROUGH */
	}

	case AF_UNSPEC:
	{
		const struct ether_header *eh;

		loop_copy = -1;
		eh = (const struct ether_header *)dst->sa_data;
		bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
		if (*edst & 1)
			m->m_flags |= (M_BCAST|M_MCAST);
		type = eh->ether_type;
		break;
	}

	case AF_IMPLINK:
	{
		fh = mtod(m, struct fddi_header *);
		error = EPROTONOSUPPORT;
		switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) {
			case FDDIFC_LLC_ASYNC: {
				/* legal priorities are 0 through 7 */
				if ((fh->fddi_fc & FDDIFC_Z) > 7)
			        	goto bad;
				break;
			}
			case FDDIFC_LLC_SYNC: {
				/* FDDIFC_Z bits reserved, must be zero */
				if (fh->fddi_fc & FDDIFC_Z)
					goto bad;
				break;
			}
			case FDDIFC_SMT: {
				/* FDDIFC_Z bits must be non zero */
				if ((fh->fddi_fc & FDDIFC_Z) == 0)
					goto bad;
				break;
			}
			default: {
				/* anything else is too dangerous */
               	 		goto bad;
			}
		}
		error = 0;
		if (fh->fddi_dhost[0] & 1)
			m->m_flags |= (M_BCAST|M_MCAST);
		goto queue_it;
	}
	default:
		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
		senderr(EAFNOSUPPORT);
	}

	/*
	 * Add LLC header.
	 */
	if (type != 0) {
		struct llc *l;
		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
		if (m == NULL)
			senderr(ENOBUFS);
		l = mtod(m, struct llc *);
		l->llc_control = LLC_UI;
		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
		l->llc_snap.org_code[0] =
			l->llc_snap.org_code[1] =
			l->llc_snap.org_code[2] = 0;
		l->llc_snap.ether_type = htons(type);
	}

	/*
	 * Add local net header.  If no space in first mbuf,
	 * allocate another.
	 */
	M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
	if (m == NULL)
		senderr(ENOBUFS);
	fh = mtod(m, struct fddi_header *);
	fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
	bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN);
  queue_it:
	if (hdrcmplt)
		bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN);
	else
		bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost,
			FDDI_ADDR_LEN);

	/*
	 * If a simplex interface, and the packet is being sent to our
	 * Ethernet address or a broadcast address, loopback a copy.
	 * XXX To make a simplex device behave exactly like a duplex
	 * device, we should copy in the case of sending to our own
	 * ethernet address (thus letting the original actually appear
	 * on the wire). However, we don't do that here for security
	 * reasons and compatibility with the original behavior.
	 */
	if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
			struct mbuf *n;
			n = m_copy(m, 0, (int)M_COPYALL);
			(void) if_simloop(ifp, n, dst->sa_family,
					  FDDI_HDR_LEN);
	     	} else if (bcmp(fh->fddi_dhost, fh->fddi_shost,
				FDDI_ADDR_LEN) == 0) {
			(void) if_simloop(ifp, m, dst->sa_family,
					  FDDI_HDR_LEN);
			return (0);	/* XXX */
		}
	}

	error = (ifp->if_transmit)(ifp, m);
	if (error)
		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);

	return (error);

bad:
	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
	if (m)
		m_freem(m);
	return (error);
}
Ejemplo n.º 6
0
/*
 * bfq_queue(): .queue callback of the bfq policy.
 *
 * A thread calls this function to hand in its I/O requests (bio).
 * Their bios are stored in the per-thread queue, in tdio structure.
 * Currently, the sync/async bios are queued together, which may cause
 * some issues on performance.
 *
 * Besides queueing bios, this function also calculates the average
 * thinking time and average seek distance of a thread, using the
 * information in bio structure.
 *
 * If the calling thread is waiting by the bfq scheduler due to
 * the AS feature, this function will cancel the callout alarm
 * and resume the scheduler to continue serving this thread.
 *
 * lock:
 *   THREAD_IO_LOCK: protect from queue iteration in bfq_dequeue()
 *   BFQ_LOCK: protect from other insertions/deletions in wf2q_augtree
 *   in bfq_queue() or bfq_dequeue().
 *
 * refcount:
 *   If the calling thread is waited by the scheduler, the refcount
 *   of the related tdio will decrease by 1 after this function. The
 *   counterpart increasing is in bfq_dequeue(), before resetting the
 *   callout alarm.
 *
 * Return value:
 *  EINVAL: if bio->bio_buf->b_cmd == BUF_CMD_FLUSH
 *  0: bio is queued successfully.
 */
static int
bfq_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio,
		struct  bio *bio)
{
	struct bfq_disk_ctx *bfq_diskctx = (struct bfq_disk_ctx *)diskctx;
	struct bfq_thread_io *bfq_tdio = (struct bfq_thread_io *)tdio;
	int original_qlength;

	/* we do not handle flush requests. push it down to dsched */
	if (__predict_false(bio->bio_buf->b_cmd == BUF_CMD_FLUSH))
		return (EINVAL);

	DSCHED_THREAD_IO_LOCK(tdio);
	KKASSERT(tdio->debug_priv == 0xF00FF00F);
	dsched_debug(BFQ_DEBUG_NORMAL, "bfq: tdio %p pushes bio %p\n", bfq_tdio, bio);

	dsched_set_bio_priv(bio, tdio);
	dsched_thread_io_ref(tdio);

	if ((bio->bio_buf->b_cmd == BUF_CMD_READ) ||
	    (bio->bio_buf->b_cmd == BUF_CMD_WRITE)) {
		bfq_update_tdio_seek_avg(bfq_tdio, bio);
	}

	bfq_update_tdio_ttime_avg(bfq_tdio);

	/* update last_bio_pushed_time */
	getmicrotime(&bfq_tdio->last_bio_pushed_time);

	if ((bfq_tdio->seek_samples > BFQ_VALID_MIN_SAMPLES) &&
	    BFQ_TDIO_SEEKY(bfq_tdio))
		dsched_debug(BFQ_DEBUG_NORMAL, "BFQ: tdio %p is seeky\n", bfq_tdio);

	/*
	 * If a tdio taks too long to think, we disable the AS feature of it.
	 */
	if ((bfq_tdio->ttime_samples > BFQ_VALID_MIN_SAMPLES) &&
	    (bfq_tdio->ttime_avg > BFQ_T_WAIT * (1000 / hz) * 1000) &&
	    (bfq_tdio->service_received > bfq_tdio->budget / 8)) {
		dsched_debug(BFQ_DEBUG_NORMAL, "BFQ: tdio %p takes too long time to think\n", bfq_tdio);
		bfq_tdio->tdio_as_switch = 0;
	} else {
		bfq_tdio->tdio_as_switch = 1;
	}

	/* insert the bio into the tdio's own queue */
	KKASSERT(lockstatus(&tdio->lock, curthread) == LK_EXCLUSIVE);
	TAILQ_INSERT_TAIL(&tdio->queue, bio, link);
#if 0
	tdio->qlength++;
#endif
	original_qlength = atomic_fetchadd_int(&tdio->qlength, 1);
	DSCHED_THREAD_IO_UNLOCK(tdio);
	/*
	 * A new thread:
	 * In dequeue function, we remove the thread
	 * from the aug-tree if it has no further bios.
	 * Therefore "new" means a really new thread (a
	 * newly created thread or a thread that pushed no more
	 * bios when the scheduler was waiting for it) or
	 * one that was removed from the aug-tree earlier.
	 */
	if (original_qlength == 0) {
		/*
		 * a really new thread
		 */
		BFQ_LOCK(bfq_diskctx);
		if (bfq_tdio != bfq_diskctx->bfq_active_tdio) {
			/* insert the tdio into the wf2q queue */
			wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, bfq_tdio);
		} else {
			/*
			 * the thread being waited by the scheduler
			 */
			if (bfq_diskctx->bfq_blockon == bfq_tdio) {
				/*
				 * XXX: possible race condition here:
				 * if the callout function is triggered when
				 * the following code is executed, then after
				 * releasing the TDIO lock, the callout function
				 * will set the thread inactive and it will never
				 * be inserted into the aug-tree (so its bio pushed
				 * this time will not be dispatched) until it pushes
				 * further bios
				 */
				bfq_diskctx->bfq_as_hit++;
				bfq_update_as_avg_wait(bfq_diskctx, bfq_tdio, BFQ_AS_STAT_ALL);

				if (callout_pending(&bfq_diskctx->bfq_callout))
					callout_stop(&bfq_diskctx->bfq_callout);
				bfq_diskctx->bfq_blockon = NULL;

				/* ref'ed in dequeue(), before resetting callout */
				dsched_thread_io_unref(&bfq_tdio->head);

				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p pushes a new bio when AS\n", bfq_tdio);
			}
		}

		BFQ_UNLOCK(bfq_diskctx);
	}

	helper_msg_dequeue(bfq_diskctx);

	return 0;
}
Ejemplo n.º 7
0
/*
 * bfq_dequeue(): dispatch bios to the disk driver.
 *
 * This function will push as many bios as the number of free slots
 * in the tag queue.
 *
 * In the progress of dispatching, the following events may happen:
 *  - Current thread is timeout: Expire the current thread for
 *    BFQ_REASON_TIMEOUT, and select a new thread to serve in the
 *    wf2q tree.
 *
 *  - Current thread runs out of its budget: Expire the current thread
 *    for BFQ_REASON_OUT_OF_BUDGET, and select a new thread to serve
 *
 *  - Current thread has no further bios in its queue: if the AS feature
 *    is turned on, the bfq scheduler sets an alarm and starts to suspend.
 *    The bfq_timeout() or bfq_queue() calls may resume the scheduler.
 *
 * Implementation note: The bios selected to be dispatched will first
 * be stored in an array bio_do_dispatch. After this function releases
 * all the locks it holds, it will call dsched_strategy_request_polling()
 * for each bio stored.
 *
 * With the help of bfq_disk_ctx->pending_dequeue,
 * there will be only one bfq_dequeue pending on the BFQ_LOCK.
 *
 * lock:
 *	BFQ_LOCK: protect from wf2q_augtree operations in bfq_queue()
 *	THREAD_IO_LOCK: locks the active_tdio. Protect from queue insertions
 *	in bfq_queue; Protect the active_tdio->budget
 *
 * refcount:
 *  If the scheduler decides to suspend, the refcount of active_tdio
 *  increases by 1. The counterpart decreasing is in bfq_queue() and
 *  bfq_timeout()
 * blocking:
 *  May be blocking on the disk driver lock. It depends on drivers.
 *
 * Calling path:
 * The callers could be:
 *	bfq_queue(), bfq_timeout() and the registered polling function.
 *
 *	caller --> helper_msg_dequeue --lwkt_msg--> helper_thread-> me
 *
 */
void
bfq_dequeue(struct dsched_disk_ctx *diskctx)
{
	int free_slots,
	    bio_index = 0, i,
	    remaining_budget = 0;/* remaining budget of current active process */

	struct bio *bio, *bio_to_dispatch[33];
	struct bfq_thread_io *active_tdio = NULL;
	struct bfq_disk_ctx *bfq_diskctx = (struct bfq_disk_ctx *)diskctx;

	BFQ_LOCK(bfq_diskctx);
	atomic_cmpset_int(&bfq_diskctx->pending_dequeue, 1, 0);

	/*
	 * The whole scheduler is waiting for further bios
	 * from process currently being served
	 */
	if (bfq_diskctx->bfq_blockon != NULL)
		goto rtn;

	remaining_budget = bfq_diskctx->bfq_remaining_budget;
	active_tdio = bfq_diskctx->bfq_active_tdio;
	dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: dequeue: Im in. active_tdio = %p\n", active_tdio);

	free_slots = diskctx->max_tag_queue_depth - diskctx->current_tag_queue_depth;
	KKASSERT(free_slots >= 0 && free_slots <= 32);

	if (active_tdio)
		DSCHED_THREAD_IO_LOCK(&active_tdio->head);

	while (free_slots) {
		/* Here active_tdio must be locked ! */
		if (active_tdio) {
			/*
			 * the bio_done function has marked the current
			 * tdio timeout
			 */
			if (active_tdio->maybe_timeout) {
				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: %p time out in dequeue()\n", active_tdio);
				wf2q_update_vd(active_tdio, active_tdio->budget - remaining_budget);
				bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_TIMEOUT);

				/* there still exist bios not dispatched,
				 * reinsert the tdio into aug-tree*/
				if (active_tdio->head.qlength > 0) {
					wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, active_tdio);
					KKASSERT(bfq_diskctx->bfq_wf2q.wf2q_tdio_count);
				}

				active_tdio->maybe_timeout = 0;
				DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
				active_tdio = NULL;
				continue;
			}

			/* select next bio to dispatch */
			/* TODO: a wiser slection */
			KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE);
			bio = TAILQ_FIRST(&active_tdio->head.queue);
			dsched_debug(BFQ_DEBUG_NORMAL, "bfq: the first bio in queue of active_tdio %p is %p\n", active_tdio, bio);

			dsched_debug(BFQ_DEBUG_VERBOSE, "bfq: active_tdio %p exists, remaining budget = %d, tdio budget = %d\n, qlength = %d, first bio = %p, first bio cmd = %d, first bio size = %d\n", active_tdio, remaining_budget, active_tdio->budget, active_tdio->head.qlength, bio, bio?bio->bio_buf->b_cmd:-1, bio?bio->bio_buf->b_bcount:-1);

			/*
			 * The bio is not read or write, just
			 * push it down.
			 */
			if (bio && (bio->bio_buf->b_cmd != BUF_CMD_READ) &&
			    (bio->bio_buf->b_cmd != BUF_CMD_WRITE)) {
				dsched_debug(BFQ_DEBUG_NORMAL, "bfq: remove bio %p from the queue of %p\n", bio, active_tdio);
				KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE);
				TAILQ_REMOVE(&active_tdio->head.queue, bio, link);
				active_tdio->head.qlength--;
				free_slots--;

#if 0
				dsched_strategy_request_polling(diskctx->dp, bio, diskctx);
#endif
				bio_to_dispatch[bio_index++] = bio;
				KKASSERT(bio_index <= bfq_diskctx->head.max_tag_queue_depth);
				continue;
			}
			/*
			 * Run out of budget
			 * But this is not because the size of bio is larger
			 * than the complete budget.
			 * If the size of bio is larger than the complete
			 * budget, then use a complete budget to cover it.
			 */
			if (bio && (remaining_budget < BIO_SIZE(bio)) &&
			    (remaining_budget != active_tdio->budget)) {
				/* charge budget used */
				wf2q_update_vd(active_tdio, active_tdio->budget - remaining_budget);
				bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_OUT_OF_BUDGET);
				wf2q_insert_thread_io(&bfq_diskctx->bfq_wf2q, active_tdio);
				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: thread %p ran out of budget\n", active_tdio);
				DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
				active_tdio = NULL;
			} else { /* if (bio && remaining_budget < BIO_SIZE(bio) && remaining_budget != active_tdio->budget) */

				/*
				 * Having enough budget,
				 * or having a complete budget and the size of bio
				 * is larger than that.
				 */
				if (bio) {
					/* dispatch */
					remaining_budget -= BIO_SIZE(bio);
					/*
					 * The size of the first bio is larger
					 * than the whole budget, we should
					 * charge the extra part
					 */
					if (remaining_budget < 0)
						wf2q_update_vd(active_tdio, -remaining_budget);
					/* compensate */
					wf2q_update_vd(active_tdio, -remaining_budget);
					/*
					 * remaining_budget may be < 0,
					 * but to prevent the budget of current tdio
					 * to substract a negative number,
					 * the remaining_budget has to be >= 0
					 */
					remaining_budget = MAX(0, remaining_budget);
					dsched_debug(BFQ_DEBUG_NORMAL, "bfq: remove bio %p from the queue of %p\n", bio, active_tdio);
					KKASSERT(lockstatus(&active_tdio->head.lock, curthread) == LK_EXCLUSIVE);
					TAILQ_REMOVE(&active_tdio->head.queue, bio, link);
					free_slots--;
					active_tdio->head.qlength--;
					active_tdio->bio_dispatched++;
					wf2q_inc_tot_service(&bfq_diskctx->bfq_wf2q, BIO_SIZE(bio));
					dsched_debug(BFQ_DEBUG_VERBOSE,
					    "BFQ: %p's bio dispatched, size=%d, remaining_budget = %d\n",
					    active_tdio, BIO_SIZE(bio), remaining_budget);
#if 0
					dsched_strategy_request_polling(diskctx->dp, bio, diskctx);
#endif
					bio_to_dispatch[bio_index++] = bio;
					KKASSERT(bio_index <= bfq_diskctx->head.max_tag_queue_depth);

				} else { /* if (bio) */

					KKASSERT(active_tdio);
					/*
					 * If AS feature is switched off,
					 * expire the tdio as well
					 */
					if ((remaining_budget <= 0) ||
					    !(bfq_diskctx->bfq_flag & BFQ_FLAG_AS) ||
					    !active_tdio->tdio_as_switch) {
						active_tdio->budget -= remaining_budget;
						wf2q_update_vd(active_tdio, active_tdio->budget);
						bfq_expire(bfq_diskctx, active_tdio, BFQ_REASON_OUT_OF_BUDGET);
						DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
						active_tdio = NULL;
					} else {

						/* no further bio, wait for a while */
						bfq_diskctx->bfq_blockon = active_tdio;
						/*
						 * Increase ref count to ensure that
						 * tdio will not be destroyed during waiting.
						 */
						dsched_thread_io_ref(&active_tdio->head);
						/*
						 * If the tdio is seeky but not thingking for
						 * too long, we wait for it a little shorter
						 */
						if (active_tdio->seek_samples >= BFQ_VALID_MIN_SAMPLES && BFQ_TDIO_SEEKY(active_tdio))
							callout_reset(&bfq_diskctx->bfq_callout, BFQ_T_WAIT_MIN, (void (*) (void *))helper_msg_as_timeout, bfq_diskctx);
						else
							callout_reset(&bfq_diskctx->bfq_callout, BFQ_T_WAIT, (void (*) (void *))helper_msg_as_timeout, bfq_diskctx);

						/* save the start time of blocking */
						getmicrotime(&active_tdio->as_start_time);

						dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: blocked on %p, remaining_budget = %d\n", active_tdio, remaining_budget);
						DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);
						goto save_and_rtn;
					}
				}
			}
		} else { /* if (active_tdio) */
			/* there is no active tdio */

			/* no pending bios at all */
			active_tdio = wf2q_get_next_thread_io(&bfq_diskctx->bfq_wf2q);

			if (!active_tdio) {
				KKASSERT(bfq_diskctx->bfq_wf2q.wf2q_tdio_count == 0);
				dsched_debug(BFQ_DEBUG_VERBOSE, "BFQ: no more eligible tdio!\n");
				goto save_and_rtn;
			}

			/*
			 * A new tdio is picked,
			 * initialize the service related statistic data
			 */
			DSCHED_THREAD_IO_LOCK(&active_tdio->head);
			active_tdio->service_received = 0;

			/*
			 * Reset the maybe_timeout flag, which
			 * may be set by a biodone after the the service is done
			 */
			getmicrotime(&active_tdio->service_start_time);
			active_tdio->maybe_timeout = 0;

			remaining_budget = active_tdio->budget;
			dsched_debug(BFQ_DEBUG_VERBOSE, "bfq: active_tdio %p selected, remaining budget = %d, tdio budget = %d\n, qlength = %d\n", active_tdio, remaining_budget, active_tdio->budget, active_tdio->head.qlength);
		}

	}/* while (free_slots) */

	/* reach here only when free_slots == 0 */
	if (active_tdio) /* && lockcount(&active_tdio->head.lock) > 0) */
		DSCHED_THREAD_IO_UNLOCK(&active_tdio->head);

save_and_rtn:
	/* save the remaining budget */
	bfq_diskctx->bfq_remaining_budget = remaining_budget;
	bfq_diskctx->bfq_active_tdio = active_tdio;
rtn:
	BFQ_UNLOCK(bfq_diskctx);
	/*dispatch the planned bios*/
	for (i = 0; i < bio_index; i++)
		dsched_strategy_request_polling(diskctx->dp, bio_to_dispatch[i], diskctx);

}
Ejemplo n.º 8
0
/**
 * mpssas_SSU_to_SATA_devices 
 * @sc: per adapter object
 *
 * Looks through the target list and issues a StartStopUnit SCSI command to each
 * SATA direct-access device.  This helps to ensure that data corruption is
 * avoided when the system is being shut down.  This must be called after the IR
 * System Shutdown RAID Action is sent if in IR mode.
 *
 * Return nothing.
 */
static void
mpssas_SSU_to_SATA_devices(struct mps_softc *sc)
{
	struct mpssas_softc *sassc = sc->sassc;
	union ccb *ccb;
	path_id_t pathid = cam_sim_path(sassc->sim);
	target_id_t targetid;
	struct mpssas_target *target;
	char path_str[64];
	struct timeval cur_time, start_time;

	/*
	 * For each target, issue a StartStopUnit command to stop the device.
	 */
	sc->SSU_started = TRUE;
	sc->SSU_refcount = 0;
	for (targetid = 0; targetid < sc->facts->MaxTargets; targetid++) {
		target = &sassc->targets[targetid];
		if (target->handle == 0x0) {
			continue;
		}

		ccb = xpt_alloc_ccb_nowait();
		if (ccb == NULL) {
			mps_dprint(sc, MPS_FAULT, "Unable to alloc CCB to stop "
			    "unit.\n");
			return;
		}

		/*
		 * The stop_at_shutdown flag will be set if this device is
		 * a SATA direct-access end device.
		 */
		if (target->stop_at_shutdown) {
			if (xpt_create_path(&ccb->ccb_h.path,
			    xpt_periph, pathid, targetid,
			    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
				mps_dprint(sc, MPS_FAULT, "Unable to create "
				    "LUN path to stop unit.\n");
				xpt_free_ccb(ccb);
				return;
			}
			xpt_path_string(ccb->ccb_h.path, path_str,
			    sizeof(path_str));

			mps_dprint(sc, MPS_INFO, "Sending StopUnit: path %s "
			    "handle %d\n", path_str, target->handle);
			
			/*
			 * Issue a START STOP UNIT command for the target.
			 * Increment the SSU counter to be used to count the
			 * number of required replies.
			 */
			mps_dprint(sc, MPS_INFO, "Incrementing SSU count\n");
			sc->SSU_refcount++;
			ccb->ccb_h.target_id =
			    xpt_path_target_id(ccb->ccb_h.path);
			ccb->ccb_h.ppriv_ptr1 = sassc;
			scsi_start_stop(&ccb->csio,
			    /*retries*/0,
			    mpssas_stop_unit_done,
			    MSG_SIMPLE_Q_TAG,
			    /*start*/FALSE,
			    /*load/eject*/0,
			    /*immediate*/FALSE,
			    MPS_SENSE_LEN,
			    /*timeout*/10000);
			xpt_action(ccb);
		}
	}

	/*
	 * Wait until all of the SSU commands have completed or time has
	 * expired (60 seconds).  Pause for 100ms each time through.  If any
	 * command times out, the target will be reset in the SCSI command
	 * timeout routine.
	 */
	getmicrotime(&start_time);
	while (sc->SSU_refcount) {
		pause("mpswait", hz/10);
		
		getmicrotime(&cur_time);
		if ((cur_time.tv_sec - start_time.tv_sec) > 60) {
			mps_dprint(sc, MPS_FAULT, "Time has expired waiting "
			    "for SSU commands to complete.\n");
			break;
		}
	}
}
Ejemplo n.º 9
0
/*
 * General fork call.  Note that another LWP in the process may call exec()
 * or exit() while we are forking.  It's safe to continue here, because
 * neither operation will complete until all LWPs have exited the process.
 */
int
fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval,
    struct proc **rnewprocp)
{
	struct proc	*p1, *p2, *parent;
	struct plimit   *p1_lim;
	uid_t		uid;
	struct lwp	*l2;
	int		count;
	vaddr_t		uaddr;
	int		tnprocs;
	int		tracefork;
	int		error = 0;

	p1 = l1->l_proc;
	uid = kauth_cred_getuid(l1->l_cred);
	tnprocs = atomic_inc_uint_nv(&nprocs);

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create.
	 */
	if (__predict_false(tnprocs >= maxproc))
		error = -1;
	else
		error = kauth_authorize_process(l1->l_cred,
		    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);

	if (error) {
		static struct timeval lasttfm;
		atomic_dec_uint(&nprocs);
		if (ratecheck(&lasttfm, &fork_tfmrate))
			tablefull("proc", "increase kern.maxproc or NPROC");
		if (forkfsleep)
			kpause("forkmx", false, forkfsleep, NULL);
		return EAGAIN;
	}

	/*
	 * Enforce limits.
	 */
	count = chgproccnt(uid, 1);
	if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
		if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
		    p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
		    &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) {
			(void)chgproccnt(uid, -1);
			atomic_dec_uint(&nprocs);
			if (forkfsleep)
				kpause("forkulim", false, forkfsleep, NULL);
			return EAGAIN;
		}
	}

	/*
	 * Allocate virtual address space for the U-area now, while it
	 * is still easy to abort the fork operation if we're out of
	 * kernel virtual address space.
	 */
	uaddr = uvm_uarea_alloc();
	if (__predict_false(uaddr == 0)) {
		(void)chgproccnt(uid, -1);
		atomic_dec_uint(&nprocs);
		return ENOMEM;
	}

	/*
	 * We are now committed to the fork.  From here on, we may
	 * block on resources, but resource allocation may NOT fail.
	 */

	/* Allocate new proc. */
	p2 = proc_alloc();

	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */
	memset(&p2->p_startzero, 0,
	    (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
	memcpy(&p2->p_startcopy, &p1->p_startcopy,
	    (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));

	TAILQ_INIT(&p2->p_sigpend.sp_info);

	LIST_INIT(&p2->p_lwps);
	LIST_INIT(&p2->p_sigwaiters);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * Inherit flags we want to keep.  The flags related to SIGCHLD
	 * handling are important in order to keep a consistent behaviour
	 * for the child after the fork.  If we are a 32-bit process, the
	 * child will be too.
	 */
	p2->p_flag =
	    p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
	p2->p_emul = p1->p_emul;
	p2->p_execsw = p1->p_execsw;

	if (flags & FORK_SYSTEM) {
		/*
		 * Mark it as a system process.  Set P_NOCLDWAIT so that
		 * children are reparented to init(8) when they exit.
		 * init(8) can easily wait them out for us.
		 */
		p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT);
	}

	mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
	mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
	rw_init(&p2->p_reflock);
	cv_init(&p2->p_waitcv, "wait");
	cv_init(&p2->p_lwpcv, "lwpwait");

	/*
	 * Share a lock between the processes if they are to share signal
	 * state: we must synchronize access to it.
	 */
	if (flags & FORK_SHARESIGS) {
		p2->p_lock = p1->p_lock;
		mutex_obj_hold(p1->p_lock);
	} else
		p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

	kauth_proc_fork(p1, p2);

	p2->p_raslist = NULL;
#if defined(__HAVE_RAS)
	ras_fork(p1, p2);
#endif

	/* bump references to the text vnode (for procfs) */
	p2->p_textvp = p1->p_textvp;
	if (p2->p_textvp)
		vref(p2->p_textvp);

	if (flags & FORK_SHAREFILES)
		fd_share(p2);
	else if (flags & FORK_CLEANFILES)
		p2->p_fd = fd_init(NULL);
	else
		p2->p_fd = fd_copy();

	/* XXX racy */
	p2->p_mqueue_cnt = p1->p_mqueue_cnt;

	if (flags & FORK_SHARECWD)
		cwdshare(p2);
	else
		p2->p_cwdi = cwdinit();

	/*
	 * Note: p_limit (rlimit stuff) is copy-on-write, so normally
	 * we just need increase pl_refcnt.
	 */
	p1_lim = p1->p_limit;
	if (!p1_lim->pl_writeable) {
		lim_addref(p1_lim);
		p2->p_limit = p1_lim;
	} else {
		p2->p_limit = lim_copy(p1_lim);
	}

	if (flags & FORK_PPWAIT) {
		/* Mark ourselves as waiting for a child. */
		l1->l_pflag |= LP_VFORKWAIT;
		p2->p_lflag = PL_PPWAIT;
		p2->p_vforklwp = l1;
	} else {
		p2->p_lflag = 0;
	}
	p2->p_sflag = 0;
	p2->p_slflag = 0;
	parent = (flags & FORK_NOWAIT) ? initproc : p1;
	p2->p_pptr = parent;
	p2->p_ppid = parent->p_pid;
	LIST_INIT(&p2->p_children);

	p2->p_aio = NULL;

#ifdef KTRACE
	/*
	 * Copy traceflag and tracefile if enabled.
	 * If not inherited, these were zeroed above.
	 */
	if (p1->p_traceflag & KTRFAC_INHERIT) {
		mutex_enter(&ktrace_lock);
		p2->p_traceflag = p1->p_traceflag;
		if ((p2->p_tracep = p1->p_tracep) != NULL)
			ktradref(p2);
		mutex_exit(&ktrace_lock);
	}
#endif

	/*
	 * Create signal actions for the child process.
	 */
	p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS);
	mutex_enter(p1->p_lock);
	p2->p_sflag |=
	    (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
	sched_proc_fork(p1, p2);
	mutex_exit(p1->p_lock);

	p2->p_stflag = p1->p_stflag;

	/*
	 * p_stats.
	 * Copy parts of p_stats, and zero out the rest.
	 */
	p2->p_stats = pstatscopy(p1->p_stats);

	/*
	 * Set up the new process address space.
	 */
	uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false);

	/*
	 * Finish creating the child process.
	 * It will return through a different path later.
	 */
	lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0,
	    stack, stacksize, (func != NULL) ? func : child_return, arg, &l2,
	    l1->l_class);

	/*
	 * Inherit l_private from the parent.
	 * Note that we cannot use lwp_setprivate() here since that
	 * also sets the CPU TLS register, which is incorrect if the
	 * process has changed that without letting the kernel know.
	 */
	l2->l_private = l1->l_private;

	/*
	 * If emulation has a process fork hook, call it now.
	 */
	if (p2->p_emul->e_proc_fork)
		(*p2->p_emul->e_proc_fork)(p2, l1, flags);

	/*
	 * ...and finally, any other random fork hooks that subsystems
	 * might have registered.
	 */
	doforkhooks(p2, p1);

	SDT_PROBE(proc,,,create, p2, p1, flags, 0, 0);

	/*
	 * It's now safe for the scheduler and other processes to see the
	 * child process.
	 */
	mutex_enter(proc_lock);

	if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
		p2->p_lflag |= PL_CONTROLT;

	LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling);
	p2->p_exitsig = exitsig;		/* signal for parent on exit */

	/*
	 * We don't want to tracefork vfork()ed processes because they
	 * will not receive the SIGTRAP until it is too late.
	 */
	tracefork = (p1->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) ==
	    (PSL_TRACEFORK|PSL_TRACED) && (flags && FORK_PPWAIT) == 0;
	if (tracefork) {
		p2->p_slflag |= PSL_TRACED;
		p2->p_opptr = p2->p_pptr;
		if (p2->p_pptr != p1->p_pptr) {
			struct proc *parent1 = p2->p_pptr;

			if (parent1->p_lock < p2->p_lock) {
				if (!mutex_tryenter(parent1->p_lock)) {
					mutex_exit(p2->p_lock);
					mutex_enter(parent1->p_lock);
				}
			} else if (parent1->p_lock > p2->p_lock) {
				mutex_enter(parent1->p_lock);
			}
			parent1->p_slflag |= PSL_CHTRACED;
			proc_reparent(p2, p1->p_pptr);
			if (parent1->p_lock != p2->p_lock)
				mutex_exit(parent1->p_lock);
		}

		/*
		 * Set ptrace status.
		 */
		p1->p_fpid = p2->p_pid;
		p2->p_fpid = p1->p_pid;
	}

	LIST_INSERT_AFTER(p1, p2, p_pglist);
	LIST_INSERT_HEAD(&allproc, p2, p_list);

	p2->p_trace_enabled = trace_is_enabled(p2);
#ifdef __HAVE_SYSCALL_INTERN
	(*p2->p_emul->e_syscall_intern)(p2);
#endif

	/*
	 * Update stats now that we know the fork was successful.
	 */
	uvmexp.forks++;
	if (flags & FORK_PPWAIT)
		uvmexp.forks_ppwait++;
	if (flags & FORK_SHAREVM)
		uvmexp.forks_sharevm++;

	/*
	 * Pass a pointer to the new process to the caller.
	 */
	if (rnewprocp != NULL)
		*rnewprocp = p2;

	if (ktrpoint(KTR_EMUL))
		p2->p_traceflag |= KTRFAC_TRC_EMUL;

	/*
	 * Notify any interested parties about the new process.
	 */
	if (!SLIST_EMPTY(&p1->p_klist)) {
		mutex_exit(proc_lock);
		KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
		mutex_enter(proc_lock);
	}

	/*
	 * Make child runnable, set start time, and add to run queue except
	 * if the parent requested the child to start in SSTOP state.
	 */
	mutex_enter(p2->p_lock);

	/*
	 * Start profiling.
	 */
	if ((p2->p_stflag & PST_PROFIL) != 0) {
		mutex_spin_enter(&p2->p_stmutex);
		startprofclock(p2);
		mutex_spin_exit(&p2->p_stmutex);
	}

	getmicrotime(&p2->p_stats->p_start);
	p2->p_acflag = AFORK;
	lwp_lock(l2);
	KASSERT(p2->p_nrlwps == 1);
	if (p2->p_sflag & PS_STOPFORK) {
		struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate;
		p2->p_nrlwps = 0;
		p2->p_stat = SSTOP;
		p2->p_waited = 0;
		p1->p_nstopchild++;
		l2->l_stat = LSSTOP;
		KASSERT(l2->l_wchan == NULL);
		lwp_unlock_to(l2, spc->spc_lwplock);
	} else {
		p2->p_nrlwps = 1;
		p2->p_stat = SACTIVE;
		l2->l_stat = LSRUN;
		sched_enqueue(l2, false);
		lwp_unlock(l2);
	}

	/*
	 * Return child pid to parent process,
	 * marking us as parent via retval[1].
	 */
	if (retval != NULL) {
		retval[0] = p2->p_pid;
		retval[1] = 0;
	}
	mutex_exit(p2->p_lock);

	/*
	 * Preserve synchronization semantics of vfork.  If waiting for
	 * child to exec or exit, sleep until it clears LP_VFORKWAIT.
	 */
#if 0
	while (l1->l_pflag & LP_VFORKWAIT) {
		cv_wait(&l1->l_waitcv, proc_lock);
	}
#else
	while (p2->p_lflag & PL_PPWAIT)
		cv_wait(&p1->p_waitcv, proc_lock);
#endif

	/*
	 * Let the parent know that we are tracing its child.
	 */
	if (tracefork) {
		ksiginfo_t ksi;

		KSI_INIT_EMPTY(&ksi);
		ksi.ksi_signo = SIGTRAP;
		ksi.ksi_lid = l1->l_lid;
		kpsignal(p1, &ksi, NULL);
	}
	mutex_exit(proc_lock);

	return 0;
}
Ejemplo n.º 10
0
int
fork1(struct proc *p1, int exitsig, int flags, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval,
    struct proc **rnewprocp)
{
	struct proc *p2;
	uid_t uid;
	struct vmspace *vm;
	int count;
	vaddr_t uaddr;
	int s;
	extern void endtsleep(void *);
	extern void realitexpire(void *);

	/*
	 * Although process entries are dynamically created, we still keep
	 * a global limit on the maximum number we will create. We reserve
	 * the last 5 processes to root. The variable nprocs is the current
	 * number of processes, maxproc is the limit.
	 */
	uid = p1->p_cred->p_ruid;
	if ((nprocs >= maxproc - 5 && uid != 0) || nprocs >= maxproc) {
		static struct timeval lasttfm;

		if (ratecheck(&lasttfm, &fork_tfmrate))
			tablefull("proc");
		return (EAGAIN);
	}
	nprocs++;

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit.
	 */
	count = chgproccnt(uid, 1);
	if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) {
		(void)chgproccnt(uid, -1);
		nprocs--;
		return (EAGAIN);
	}

	uaddr = uvm_km_alloc1(kernel_map, USPACE, USPACE_ALIGN, 1);
	if (uaddr == 0) {
		chgproccnt(uid, -1);
		nprocs--;
		return (ENOMEM);
	}

	/*
	 * From now on, we're committed to the fork and cannot fail.
	 */

	/* Allocate new proc. */
	p2 = pool_get(&proc_pool, PR_WAITOK);

	p2->p_stat = SIDL;			/* protect against others */
	p2->p_exitsig = exitsig;
	p2->p_forw = p2->p_back = NULL;

#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		atomic_setbits_int(&p2->p_flag, P_THREAD);
		p2->p_p = p1->p_p;
		TAILQ_INSERT_TAIL(&p2->p_p->ps_threads, p2, p_thr_link);
	} else {
		process_new(p2, p1);
	}
#else
	process_new(p2, p1);
#endif

	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */
	bzero(&p2->p_startzero,
	    (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));

	/*
	 * Initialize the timeouts.
	 */
	timeout_set(&p2->p_sleep_to, endtsleep, p2);
	timeout_set(&p2->p_realit_to, realitexpire, p2);

#if defined(__HAVE_CPUINFO)
	p2->p_cpu = p1->p_cpu;
#endif

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 * The p_stats and p_sigacts substructs are set in vm_fork.
	 */
	p2->p_flag = 0;
	p2->p_emul = p1->p_emul;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);
	atomic_setbits_int(&p2->p_flag, p1->p_flag & (P_SUGID | P_SUGIDEXEC));
	if (flags & FORK_PTRACE)
		atomic_setbits_int(&p2->p_flag, p1->p_flag & P_TRACED);
#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		/* nothing */
	} else
#endif
	{
		p2->p_p->ps_cred = pool_get(&pcred_pool, PR_WAITOK);
		bcopy(p1->p_p->ps_cred, p2->p_p->ps_cred, sizeof(*p2->p_p->ps_cred));
		p2->p_p->ps_cred->p_refcnt = 1;
		crhold(p1->p_ucred);
	}

	TAILQ_INIT(&p2->p_selects);

	/* bump references to the text vnode (for procfs) */
	p2->p_textvp = p1->p_textvp;
	if (p2->p_textvp)
		VREF(p2->p_textvp);

	if (flags & FORK_CLEANFILES)
		p2->p_fd = fdinit(p1);
	else if (flags & FORK_SHAREFILES)
		p2->p_fd = fdshare(p1);
	else
		p2->p_fd = fdcopy(p1);

	/*
	 * If ps_limit is still copy-on-write, bump refcnt,
	 * otherwise get a copy that won't be modified.
	 * (If PL_SHAREMOD is clear, the structure is shared
	 * copy-on-write.)
	 */
#ifdef RTHREADS
	if (flags & FORK_THREAD) {
		/* nothing */
	} else
#endif
	{
		if (p1->p_p->ps_limit->p_lflags & PL_SHAREMOD)
			p2->p_p->ps_limit = limcopy(p1->p_p->ps_limit);
		else {
			p2->p_p->ps_limit = p1->p_p->ps_limit;
			p2->p_p->ps_limit->p_refcnt++;
		}
	}

	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		atomic_setbits_int(&p2->p_flag, P_CONTROLT);
	if (flags & FORK_PPWAIT)
		atomic_setbits_int(&p2->p_flag, P_PPWAIT);
	p2->p_pptr = p1;
	if (flags & FORK_NOZOMBIE)
		atomic_setbits_int(&p2->p_flag, P_NOZOMBIE);
	LIST_INIT(&p2->p_children);

#ifdef KTRACE
	/*
	 * Copy traceflag and tracefile if enabled.
	 * If not inherited, these were zeroed above.
	 */
	if (p1->p_traceflag & KTRFAC_INHERIT) {
		p2->p_traceflag = p1->p_traceflag;
		if ((p2->p_tracep = p1->p_tracep) != NULL)
			VREF(p2->p_tracep);
	}
#endif

	/*
	 * set priority of child to be that of parent
	 * XXX should move p_estcpu into the region of struct proc which gets
	 * copied.
	 */
	scheduler_fork_hook(p1, p2);

	/*
	 * Create signal actions for the child process.
	 */
	if (flags & FORK_SIGHAND)
		sigactsshare(p1, p2);
	else
		p2->p_sigacts = sigactsinit(p1);

	/*
	 * If emulation has process fork hook, call it now.
	 */
	if (p2->p_emul->e_proc_fork)
		(*p2->p_emul->e_proc_fork)(p2, p1);

	p2->p_addr = (struct user *)uaddr;

	/*
	 * Finish creating the child process.  It will return through a
	 * different path later.
	 */
	uvm_fork(p1, p2, ((flags & FORK_SHAREVM) ? TRUE : FALSE), stack,
	    stacksize, func ? func : child_return, arg ? arg : p2);

	timeout_set(&p2->p_stats->p_virt_to, virttimer_trampoline, p2);
	timeout_set(&p2->p_stats->p_prof_to, proftimer_trampoline, p2);

	vm = p2->p_vmspace;

	if (flags & FORK_FORK) {
		forkstat.cntfork++;
		forkstat.sizfork += vm->vm_dsize + vm->vm_ssize;
	} else if (flags & FORK_VFORK) {
		forkstat.cntvfork++;
		forkstat.sizvfork += vm->vm_dsize + vm->vm_ssize;
	} else if (flags & FORK_RFORK) {
		forkstat.cntrfork++;
		forkstat.sizrfork += vm->vm_dsize + vm->vm_ssize;
	} else {
		forkstat.cntkthread++;
		forkstat.sizkthread += vm->vm_dsize + vm->vm_ssize;
	}

	/* Find an unused pid satisfying 1 <= lastpid <= PID_MAX */
	do {
		lastpid = 1 + (randompid ? arc4random() : lastpid) % PID_MAX;
	} while (pidtaken(lastpid));
	p2->p_pid = lastpid;

	LIST_INSERT_HEAD(&allproc, p2, p_list);
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	if (p2->p_flag & P_TRACED) {
		p2->p_oppid = p1->p_pid;
		if (p2->p_pptr != p1->p_pptr)
			proc_reparent(p2, p1->p_pptr);

		/*
		 * Set ptrace status.
		 */
		if (flags & FORK_FORK) {
			p2->p_ptstat = malloc(sizeof(*p2->p_ptstat),
			    M_SUBPROC, M_WAITOK);
			p1->p_ptstat->pe_report_event = PTRACE_FORK;
			p2->p_ptstat->pe_report_event = PTRACE_FORK;
			p1->p_ptstat->pe_other_pid = p2->p_pid;
			p2->p_ptstat->pe_other_pid = p1->p_pid;
		}
	}

#if NSYSTRACE > 0
	if (ISSET(p1->p_flag, P_SYSTRACE))
		systrace_fork(p1, p2);
#endif

	/*
	 * Make child runnable, set start time, and add to run queue.
	 */
	SCHED_LOCK(s);
 	getmicrotime(&p2->p_stats->p_start);
	p2->p_acflag = AFORK;
	p2->p_stat = SRUN;
	setrunqueue(p2);
	SCHED_UNLOCK(s);

	/*
	 * Notify any interested parties about the new process.
	 */
	KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);

	/*
	 * Update stats now that we know the fork was successfull.
	 */
	uvmexp.forks++;
	if (flags & FORK_PPWAIT)
		uvmexp.forks_ppwait++;
	if (flags & FORK_SHAREVM)
		uvmexp.forks_sharevm++;

	/*
	 * Pass a pointer to the new process to the caller.
	 */
	if (rnewprocp != NULL)
		*rnewprocp = p2;

	/*
	 * Preserve synchronization semantics of vfork.  If waiting for
	 * child to exec or exit, set P_PPWAIT on child, and sleep on our
	 * proc (in case of exit).
	 */
	if (flags & FORK_PPWAIT)
		while (p2->p_flag & P_PPWAIT)
			tsleep(p1, PWAIT, "ppwait", 0);

	/*
	 * If we're tracing the child, alert the parent too.
	 */
	if ((flags & FORK_PTRACE) && (p1->p_flag & P_TRACED))
		psignal(p1, SIGTRAP);

	/*
	 * Return child pid to parent process,
	 * marking us as parent via retval[1].
	 */
	if (retval != NULL) {
		retval[0] = p2->p_pid;
		retval[1] = 0;
	}
	return (0);
}
Ejemplo n.º 11
0
__private_extern__ errno_t
arp_route_to_gateway_route(const struct sockaddr *net_dest, route_t hint0,
     route_t *out_route)
{
	struct timeval timenow;
	route_t rt = hint0, hint = hint0;
	errno_t error = 0;

	*out_route = NULL;

	/*
	 * Next hop determination.  Because we may involve the gateway route
	 * in addition to the original route, locking is rather complicated.
	 * The general concept is that regardless of whether the route points
	 * to the original route or to the gateway route, this routine takes
	 * an extra reference on such a route.  This extra reference will be
	 * released at the end.
	 *
	 * Care must be taken to ensure that the "hint0" route never gets freed
	 * via rtfree(), since the caller may have stored it inside a struct
	 * route with a reference held for that placeholder.
	 */
	if (rt != NULL) {
		unsigned int ifindex;

		RT_LOCK_SPIN(rt);
		ifindex = rt->rt_ifp->if_index;
		RT_ADDREF_LOCKED(rt);
		if (!(rt->rt_flags & RTF_UP)) {
			RT_REMREF_LOCKED(rt);
			RT_UNLOCK(rt);
			/* route is down, find a new one */
			hint = rt = rtalloc1_scoped((struct sockaddr *)
			    (size_t)net_dest, 1, 0, ifindex);
			if (hint != NULL) {
				RT_LOCK_SPIN(rt);
				ifindex = rt->rt_ifp->if_index;
			} else {
				senderr(EHOSTUNREACH);
			}
		}

		/*
		 * We have a reference to "rt" by now; it will either
		 * be released or freed at the end of this routine.
		 */
		RT_LOCK_ASSERT_HELD(rt);
		if (rt->rt_flags & RTF_GATEWAY) {
			struct rtentry *gwrt = rt->rt_gwroute;
			struct sockaddr_in gw;

			/* If there's no gateway rt, look it up */
			if (gwrt == NULL) {
				gw = *((struct sockaddr_in *)rt->rt_gateway);
				RT_UNLOCK(rt);
				goto lookup;
			}
			/* Become a regular mutex */
			RT_CONVERT_LOCK(rt);

			/*
			 * Take gwrt's lock while holding route's lock;
			 * this is okay since gwrt never points back
			 * to "rt", so no lock ordering issues.
			 */
			RT_LOCK_SPIN(gwrt);
			if (!(gwrt->rt_flags & RTF_UP)) {
				struct rtentry *ogwrt;

				rt->rt_gwroute = NULL;
				RT_UNLOCK(gwrt);
				gw = *((struct sockaddr_in *)rt->rt_gateway);
				RT_UNLOCK(rt);
				rtfree(gwrt);
lookup:
				gwrt = rtalloc1_scoped(
				    (struct sockaddr *)&gw, 1, 0, ifindex);

				RT_LOCK(rt);
				/*
				 * Bail out if the route is down, no route
				 * to gateway, circular route, or if the
				 * gateway portion of "rt" has changed.
				 */
				if (!(rt->rt_flags & RTF_UP) ||
				    gwrt == NULL || gwrt == rt ||
				    !equal(SA(&gw), rt->rt_gateway)) {
					if (gwrt == rt) {
						RT_REMREF_LOCKED(gwrt);
						gwrt = NULL;
					}
					RT_UNLOCK(rt);
					if (gwrt != NULL)
						rtfree(gwrt);
					senderr(EHOSTUNREACH);
				}

				/* Remove any existing gwrt */
				ogwrt = rt->rt_gwroute;
				if ((rt->rt_gwroute = gwrt) != NULL)
					RT_ADDREF(gwrt);

				/* Clean up "rt" now while we can */
				if (rt == hint0) {
					RT_REMREF_LOCKED(rt);
					RT_UNLOCK(rt);
				} else {
					RT_UNLOCK(rt);
					rtfree(rt);
				}
				rt = gwrt;
				/* Now free the replaced gwrt */
				if (ogwrt != NULL)
					rtfree(ogwrt);
				/* If still no route to gateway, bail out */
				if (rt == NULL)
					senderr(EHOSTUNREACH);
			} else {
				RT_ADDREF_LOCKED(gwrt);
				RT_UNLOCK(gwrt);
				/* Clean up "rt" now while we can */
				if (rt == hint0) {
					RT_REMREF_LOCKED(rt);
					RT_UNLOCK(rt);
				} else {
					RT_UNLOCK(rt);
					rtfree(rt);
				}
				rt = gwrt;
			}

			/* rt == gwrt; if it is now down, give up */
			RT_LOCK_SPIN(rt);
			if (!(rt->rt_flags & RTF_UP)) {
				RT_UNLOCK(rt);
				senderr(EHOSTUNREACH);
			}
		}

		if (rt->rt_flags & RTF_REJECT) {
			getmicrotime(&timenow);
			if (rt->rt_rmx.rmx_expire == 0 ||
			    timenow.tv_sec < rt->rt_rmx.rmx_expire) {
				RT_UNLOCK(rt);
				senderr(rt == hint ? EHOSTDOWN : EHOSTUNREACH);
			}
		}

		/* Become a regular mutex */
		RT_CONVERT_LOCK(rt);

		/* Caller is responsible for cleaning up "rt" */
		*out_route = rt;
	}
	return (0);

bad:
	/* Clean up route (either it is "rt" or "gwrt") */
	if (rt != NULL) {
		RT_LOCK_SPIN(rt);
		if (rt == hint0) {
			RT_REMREF_LOCKED(rt);
			RT_UNLOCK(rt);
		} else {
			RT_UNLOCK(rt);
			rtfree(rt);
		}
	}
	return (error);
}
Ejemplo n.º 12
0
/*
 * Parallel to llc_rtrequest.
 */
static void
arp_rtrequest(
	int req,
	struct rtentry *rt,
	__unused struct sockaddr *sa)
{
	struct sockaddr *gate = rt->rt_gateway;
	struct llinfo_arp *la = rt->rt_llinfo;
	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}};
	struct timeval timenow;

	if (!arpinit_done) {
		panic("%s: ARP has not been initialized", __func__);
		/* NOTREACHED */
	}
	lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
	RT_LOCK_ASSERT_HELD(rt);

	if (rt->rt_flags & RTF_GATEWAY)
		return;
	getmicrotime(&timenow);
	switch (req) {

	case RTM_ADD:
		/*
		 * XXX: If this is a manually added route to interface
		 * such as older version of routed or gated might provide,
		 * restore cloning bit.
		 */
		if ((rt->rt_flags & RTF_HOST) == 0 &&
		    SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
			rt->rt_flags |= RTF_CLONING;
		if (rt->rt_flags & RTF_CLONING) {
			/*
			 * Case 1: This route should come from a route to iface.
			 */
			if (rt_setgate(rt, rt_key(rt),
			    (struct sockaddr *)&null_sdl) == 0) {
				gate = rt->rt_gateway;
				SDL(gate)->sdl_type = rt->rt_ifp->if_type;
				SDL(gate)->sdl_index = rt->rt_ifp->if_index;
				/*
				 * In case we're called before 1.0 sec.
				 * has elapsed.
				 */
				rt->rt_expire = MAX(timenow.tv_sec, 1);
			}
			break;
		}
		/* Announce a new entry if requested. */
		if (rt->rt_flags & RTF_ANNOUNCE) {
			RT_UNLOCK(rt);
			dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST,
			    SDL(gate), rt_key(rt), NULL, rt_key(rt));
			RT_LOCK(rt);
		}
		/*FALLTHROUGH*/
	case RTM_RESOLVE:
		if (gate->sa_family != AF_LINK ||
		    gate->sa_len < sizeof(null_sdl)) {
		        if (log_arp_warnings)
				log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n");
			break;
		}
		SDL(gate)->sdl_type = rt->rt_ifp->if_type;
		SDL(gate)->sdl_index = rt->rt_ifp->if_index;
		if (la != 0)
			break; /* This happens on a route change */
		/*
		 * Case 2:  This route may come from cloning, or a manual route
		 * add with a LL address.
		 */
		rt->rt_llinfo = la = arp_llinfo_alloc();
		if (la == NULL) {
			if (log_arp_warnings)
				log(LOG_DEBUG, "%s: malloc failed\n", __func__);
			break;
		}
		rt->rt_llinfo_free = arp_llinfo_free;

		arp_inuse++, arp_allocated++;
		Bzero(la, sizeof(*la));
		la->la_rt = rt;
		rt->rt_flags |= RTF_LLINFO;
		LIST_INSERT_HEAD(&llinfo_arp, la, la_le);

		/*
		 * This keeps the multicast addresses from showing up
		 * in `arp -a' listings as unresolved.  It's not actually
		 * functional.  Then the same for broadcast.
		 */
		if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) {
			RT_UNLOCK(rt);
			dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate,
			    sizeof(struct sockaddr_dl));
			RT_LOCK(rt);
			rt->rt_expire = 0;
		}
		else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) {
			struct sockaddr_dl	*gate_ll = SDL(gate);
			size_t	broadcast_len;
			ifnet_llbroadcast_copy_bytes(rt->rt_ifp,
			    LLADDR(gate_ll), sizeof(gate_ll->sdl_data),
			    &broadcast_len);
			gate_ll->sdl_alen = broadcast_len;
			gate_ll->sdl_family = AF_LINK;
			gate_ll->sdl_len = sizeof(struct sockaddr_dl);
			/* In case we're called before 1.0 sec. has elapsed */
			rt->rt_expire = MAX(timenow.tv_sec, 1);
		}

		if (SIN(rt_key(rt))->sin_addr.s_addr ==
		    (IA_SIN(rt->rt_ifa))->sin_addr.s_addr) {
		    /*
		     * This test used to be
		     *	if (loif.if_flags & IFF_UP)
		     * It allowed local traffic to be forced
		     * through the hardware by configuring the loopback down.
		     * However, it causes problems during network configuration
		     * for boards that can't receive packets they send.
		     * It is now necessary to clear "useloopback" and remove
		     * the route to force traffic out to the hardware.
		     */
			rt->rt_expire = 0;
			ifnet_lladdr_copy_bytes(rt->rt_ifp, LLADDR(SDL(gate)), SDL(gate)->sdl_alen = 6);
			if (useloopback) {
#if IFNET_ROUTE_REFCNT
				/* Adjust route ref count for the interfaces */
				if (rt->rt_if_ref_fn != NULL &&
				    rt->rt_ifp != lo_ifp) {
					rt->rt_if_ref_fn(lo_ifp, 1);
					rt->rt_if_ref_fn(rt->rt_ifp, -1);
				}
#endif /* IFNET_ROUTE_REFCNT */
				rt->rt_ifp = lo_ifp;
			}

		}
		break;

	case RTM_DELETE:
		if (la == 0)
			break;
		arp_inuse--;
		/*
		 * Unchain it but defer the actual freeing until the route
		 * itself is to be freed.  rt->rt_llinfo still points to
		 * llinfo_arp, and likewise, la->la_rt still points to this
		 * route entry, except that RTF_LLINFO is now cleared.
		 */
		LIST_REMOVE(la, la_le);
		la->la_le.le_next = NULL;
		la->la_le.le_prev = NULL;
		rt->rt_flags &= ~RTF_LLINFO;
		if (la->la_hold != NULL)
			m_freem(la->la_hold);
		la->la_hold = NULL;
	}
}