/* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int ofp_sbappendaddr_locked(struct sockbuf *sb, odp_packet_t pkt, odp_packet_t control) { SOCKBUF_LOCK_ASSERT(sb); if (control != ODP_PACKET_INVALID) odp_packet_free(control); sb->sb_mb[sb->sb_put++] = pkt; if (sb->sb_put >= SOCKBUF_LEN) sb->sb_put = 0; if (sb->sb_put == sb->sb_get) { sb->sb_put--; if (sb->sb_put < 0) sb->sb_put = SOCKBUF_LEN-1; OFP_ERR("Buffers full, sb_get=%d max_num=%d", sb->sb_get, SOCKBUF_LEN); return 0; } sballoc(sb, pkt); return (1); }
/* XXX: handle_ddp_data code duplication */ void insert_ddp_data(struct toepcb *toep, uint32_t n) { struct inpcb *inp = toep->inp; struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb = &inp->inp_socket->so_rcv; struct mbuf *m; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK_ASSERT(sb); m = get_ddp_mbuf(n); tp->rcv_nxt += n; #ifndef USE_DDP_RX_FLOW_CONTROL KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__)); tp->rcv_wnd -= n; #endif KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= n; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); }
/* * Mark ready "count" mbufs starting with "m". */ int sbready(struct sockbuf *sb, struct mbuf *m, int count) { u_int blocker; SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; for (int i = 0; i < count; i++, m = m->m_next) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); m->m_flags &= ~(M_NOTREADY | blocker); if (blocker) sb->sb_acc += m->m_len; } if (!blocker) return (EINPROGRESS); /* This one was blocking all the queue. */ for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { KASSERT(m->m_flags & M_BLOCKED, ("%s: m %p !M_BLOCKED", __func__, m)); m->m_flags &= ~M_BLOCKED; sb->sb_acc += m->m_len; } sb->sb_fnrdy = m; return (0); }
/* * Drop data from (the front of) a sockbuf. */ void ofp_sbdrop_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); sbdrop_internal(sb, len); }
void handle_ddp_close(struct toepcb *toep, struct tcpcb *tp, struct sockbuf *sb, __be32 rcv_nxt) { struct mbuf *m; int len; SOCKBUF_LOCK_ASSERT(sb); INP_WLOCK_ASSERT(toep->inp); len = be32toh(rcv_nxt) - tp->rcv_nxt; /* Signal handle_ddp() to break out of its sleep loop. */ toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); if (len == 0) return; tp->rcv_nxt += len; KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= len; /* adjust for F_RX_FC_DDP */ #endif m = get_ddp_mbuf(len); sbappendstream_locked(sb, m, 0); toep->sb_cc = sbused(sb); }
/* * Append the data in mbuf chain (m) into the socket buffer sb following mbuf * (n). If (n) is NULL, the buffer is presumed empty. * * When the data is compressed, mbufs in the chain may be handled in one of * three ways: * * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no * record boundary, and no change in data type). * * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into * an mbuf already in the socket buffer. This can occur if an * appropriate mbuf exists, there is room, and no merging of data types * will occur. * * (3) The mbuf may be appended to the end of the existing mbuf chain. * * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as * end-of-record. */ void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { int eor = 0; struct mbuf *o; SOCKBUF_LOCK_ASSERT(sb); while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { if (sb->sb_lastrecord == m) sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } if (n && (n->m_flags & M_EOR) == 0 && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { if (n->m_flags & M_HOLE) { n->m_len += m->m_len; sb->sb_cc += m->m_len; m = m_free(m); continue; } else if (m->m_len <= MCLBYTES / 4) { /* XXX: Don't copy too much */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, (unsigned)m->m_len); n->m_len += m->m_len; sb->sb_cc += m->m_len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) /* XXX: Probably don't need.*/ sb->sb_ctl += m->m_len; m = m_free(m); continue; } } if (n) n->m_next = m; else sb->sb_mb = m; sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); n->m_flags |= eor; } SBLASTMBUFCHK(sb); }
/* * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ int sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, struct thread *td) { rlim_t sbsize_limit; SOCKBUF_LOCK_ASSERT(sb); /* * When a thread is passed, we take into account the thread's socket * buffer size limit. The caller will generally pass curthread, but * in the TCP input path, NULL will be passed to indicate that no * appropriate thread resource limits are available. In that case, * we don't apply a process limit. */ if (cc > sb_max_adj) return (0); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) return (0); sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (1); }
/* * Drop data from (the front of) a sockbuf, * and return it to caller. */ struct mbuf * sbcut_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); return (sbcut_internal(sb, len)); }
/* * Adjust sockbuf state reflecting allocation of m. */ void sballoc(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) { if (m->m_flags & M_NOTREADY) sb->sb_fnrdy = m; else sb->sb_acc += m->m_len; } else m->m_flags |= M_BLOCKED; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl += m->m_len; sb->sb_mbcnt += MSIZE; sb->sb_mcnt += 1; if (m->m_flags & M_EXT) { sb->sb_mbcnt += m->m_ext.ext_size; sb->sb_ccnt += 1; } }
void sbflush_locked(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sbflush_internal(sb); }
void t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; struct sockbuf *sb = &so->so_rcv; struct toepcb *toep = tp->t_toe; int credits; INP_WLOCK_ASSERT(inp); SOCKBUF_LOCK_ASSERT(sb); KASSERT(toep->sb_cc >= sbused(sb), ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sbused(sb), toep->sb_cc)); toep->rx_credits += toep->sb_cc - sbused(sb); toep->sb_cc = sbused(sb); if (toep->rx_credits > 0 && (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { credits = send_rx_credits(sc, toep, toep->rx_credits); toep->rx_credits -= credits; tp->rcv_wnd += credits; tp->rcv_adv += credits; } }
/* * Append the data in mbuf chain (m) into the socket buffer sb following mbuf * (n). If (n) is NULL, the buffer is presumed empty. * * When the data is compressed, mbufs in the chain may be handled in one of * three ways: * * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no * record boundary, and no change in data type). * * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into * an mbuf already in the socket buffer. This can occur if an * appropriate mbuf exists, there is room, and no merging of data types * will occur. * * (3) The mbuf may be appended to the end of the existing mbuf chain. * * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as * end-of-record. */ void ofp_sbcompress(struct sockbuf *sb, odp_packet_t pkt, int n) { (void)n; SOCKBUF_LOCK_ASSERT(sb); ofp_sockbuf_put_last(sb, pkt); }
/* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. * * Called with the socket buffer lock held; will release the lock by the end * of the function. This allows the caller to acquire the socket buffer lock * while testing for the need for various sorts of wakeup and hold it through * to the point where it's no longer required. We currently hold the lock * through calls out to other subsystems (with the exception of kqueue), and * then release it to avoid lock order issues. It's not clear that's * correct. */ void sowakeup(struct socket *so, struct sockbuf *sb) { int ret = 0; SOCKBUF_LOCK_ASSERT(sb); so_wake_poll(so, sb); if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_cc); } if (sb->sb_upcall != NULL) { ret = sb->sb_upcall(so, sb->sb_upcallarg, M_DONTWAIT); if (ret == SU_ISCONNECTED) { KASSERT(sb == &so->so_rcv, ("SO_SND upcall returned SU_ISCONNECTED")); soupcall_clear(so, SO_RCV); } } else ret = SU_OK; SOCKBUF_UNLOCK(sb); if (ret == SU_ISCONNECTED) soisconnected(so); mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); }
int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { struct mbuf *m, *n, *mlast; int space; SOCKBUF_LOCK_ASSERT(sb); if (control == 0) panic("sbappendcontrol_locked"); space = m_length(control, &n) + m_length(m0, NULL); if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ SBLASTRECORDCHK(sb); for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); sballoc(sb, m); mlast = m; SBLINKRECORD(sb, control); sb->sb_mbtail = mlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); }
/* * Allocate page pods for DDP buffer 1 (the user buffer) and set up the tag in * the TCB. We allocate page pods in multiples of PPOD_CLUSTER_SIZE. First we * try to allocate enough page pods to accommodate the whole buffer, subject to * the MAX_PPODS limit. If that fails we try to allocate PPOD_CLUSTER_SIZE page * pods before failing entirely. */ static int alloc_buf1_ppods(struct toepcb *toep, struct ddp_state *p, unsigned long addr, unsigned int len) { int err, tag, npages, nppods; struct tom_data *d = TOM_DATA(toep->tp_toedev); #if 0 SOCKBUF_LOCK_ASSERT(&so->so_rcv); #endif npages = ((addr & PAGE_MASK) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; nppods = min(pages2ppods(npages), MAX_PPODS); nppods = roundup2(nppods, PPOD_CLUSTER_SIZE); err = t3_alloc_ppods(d, nppods, &tag); if (err && nppods > PPOD_CLUSTER_SIZE) { nppods = PPOD_CLUSTER_SIZE; err = t3_alloc_ppods(d, nppods, &tag); } if (err) return (ENOMEM); p->ubuf_nppods = nppods; p->ubuf_tag = tag; #if NUM_DDP_KBUF == 1 t3_set_ddp_tag(toep, 1, tag << 6); #endif return (0); }
void sblastmbufchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mbtail) { printf("%s: sb_mb %p sb_mbtail %p last %p\n", __func__, sb->sb_mb, sb->sb_mbtail, m); printf("packet tree:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) printf("%p ", n); printf("\n"); } panic("%s from %s:%u", __func__, file, line); } }
void ofp_sbrelease_locked(struct sockbuf *sb, struct socket *so) { SOCKBUF_LOCK_ASSERT(sb); ofp_sbrelease_internal(sb, so); }
/* * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ int ofp_sbreserve_locked(struct sockbuf *sb, uint64_t cc, struct socket *so, struct thread *td) { (void)so; (void)td; SOCKBUF_LOCK_ASSERT(sb); long mclbytes = global_param->pkt_pool.buffer_size; /* * When a thread is passed, we take into account the thread's socket * buffer size limit. The caller will generally pass curthread, but * in the TCP input path, NULL will be passed to indicate that no * appropriate thread resource limits are available. In that case, * we don't apply a process limit. */ uint64_t ofp_sb_max_adj = (int64_t)SB_MAX * global_param->pkt_pool.buffer_size / (MSIZE + mclbytes); /* adjusted ofp_sb_max */ if (cc > ofp_sb_max_adj) return (0); sb->sb_hiwat = cc; sb->sb_mbmax = min(cc * sb_efficiency, ofp_sb_max); if (sb->sb_lowat > (int)sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (1); }
/* * As above, except the mbuf chain begins a new record. */ void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); if (m0 == NULL) return; m_clrprotoflags(m0); /* * Put the first mbuf on the queue. Note this permits zero length * records. */ sballoc(sb, m0); SBLASTRECORDCHK(sb); SBLINKRECORD(sb, m0); sb->sb_mbtail = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } /* always call sbcompress() so it can do SBLASTMBUFCHK() */ sbcompress(sb, m, m0); }
/* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. * * Called with the socket buffer lock held; will release the lock by the end * of the function. This allows the caller to acquire the socket buffer lock * while testing for the need for various sorts of wakeup and hold it through * to the point where it's no longer required. We currently hold the lock * through calls out to other subsystems (with the exception of kqueue), and * then release it to avoid lock order issues. It's not clear that's * correct. */ void sowakeup(struct socket *so, struct sockbuf *sb) { int ret; SOCKBUF_LOCK_ASSERT(sb); selwakeuppri(sb->sb_sel, PSOCK); if (!SEL_WAITING(sb->sb_sel)) sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_acc); } KNOTE_LOCKED(&sb->sb_sel->si_note, 0); if (sb->sb_upcall != NULL) { ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); if (ret == SU_ISCONNECTED) { KASSERT(sb == &so->so_rcv, ("SO_SND upcall returned SU_ISCONNECTED")); soupcall_clear(so, SO_RCV); } } else ret = SU_OK; if (sb->sb_flags & SB_AIO) sowakeup_aio(so, sb); SOCKBUF_UNLOCK(sb); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); }
/* * Drop data from (the front of) a sockbuf. */ void sbdrop_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); m_freem(sbcut_internal(sb, len)); }
void ofp_socantrcvmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); so->so_rcv.sb_state |= SBS_CANTRCVMORE; sorwakeup_locked(so); }
void ofp_socantsendmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); }
/* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags |= SB_WAIT; return (msleep(&sb->sb_cc, &sb->sb_mtx, 0, "sbwait", sb->sb_timeo)); }
/* * Socantsendmore indicates that no more data will be sent on the socket; it * would normally be applied to a socket when the user informs the system * that no more data is to be sent, by the protocol code (in case * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be * received, and will normally be applied to the socket by a protocol when it * detects that the peer will send no more data. Data queued for reading in * the socket may yet be read. */ void socantsendmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_snd); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); }
void socantrcvmore_locked(struct socket *so) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); so->so_rcv.sb_state |= SBS_CANTRCVMORE; sorwakeup_locked(so); mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); }
/* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if insufficient mbufs. Does not validate space * on the receiving sockbuf. */ int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; SOCKBUF_LOCK_ASSERT(sb); ctrl_last = (control == NULL) ? NULL : m_last(control); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); }
int ofp_sbwait(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags |= SB_WAIT; return (ofp_msleep(&sb->sb_cc, &sb->sb_mtx, 0 /*HJo (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH*/, "sbwait", 1000000UL/HZ*sb->sb_timeo)); }
/* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_flags |= SB_WAIT; return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); }
/* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void ofp_sbappendstream_locked(struct sockbuf *sb, odp_packet_t m) { SOCKBUF_LOCK_ASSERT(sb); SBLASTMBUFCHK(sb); sb->sb_lastrecord = sb->sb_put; ofp_sbcompress(sb, m, sb->sb_mbtail); SBLASTRECORDCHK(sb); }