int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg, int flags) { bus_dma_segment_t *segs; int error; int nsegs; if ((flags & BUS_DMA_NOWAIT) == 0) _bus_dmamap_waitok(dmat, map, mem, callback, callback_arg); nsegs = -1; error = 0; switch (mem->md_type) { case MEMDESC_VADDR: error = _bus_dmamap_load_buffer(dmat, map, mem->u.md_vaddr, mem->md_opaque, kernel_pmap, flags, NULL, &nsegs); break; case MEMDESC_PADDR: error = _bus_dmamap_load_phys(dmat, map, mem->u.md_paddr, mem->md_opaque, flags, NULL, &nsegs); break; case MEMDESC_VLIST: error = _bus_dmamap_load_vlist(dmat, map, mem->u.md_list, mem->md_opaque, kernel_pmap, &nsegs, flags); break; case MEMDESC_PLIST: error = _bus_dmamap_load_plist(dmat, map, mem->u.md_list, mem->md_opaque, &nsegs, flags); break; case MEMDESC_BIO: error = _bus_dmamap_load_bio(dmat, map, mem->u.md_bio, &nsegs, flags); break; case MEMDESC_UIO: error = _bus_dmamap_load_uio(dmat, map, mem->u.md_uio, &nsegs, flags); break; case MEMDESC_MBUF: error = _bus_dmamap_load_mbuf_sg(dmat, map, mem->u.md_mbuf, NULL, &nsegs, flags); break; case MEMDESC_CCB: error = _bus_dmamap_load_ccb(dmat, map, mem->u.md_ccb, &nsegs, flags); break; } nsegs++; CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, flags, error, nsegs); if (error == EINPROGRESS) return (error); segs = _bus_dmamap_complete(dmat, map, NULL, nsegs, error); if (error) (*callback)(callback_arg, segs, 0, error); else (*callback)(callback_arg, segs, nsegs, 0); /* * Return ENOMEM to the caller so that it can pass it up the stack. * This error only happens when NOWAIT is set, so deferral is disabled. */ if (error == ENOMEM) return (error); return (0); }
static int do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); u_int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct socket *so; struct sockbuf *sb; struct tcpcb *tp; struct icl_cxgbei_conn *icc; struct icl_conn *ic; struct icl_cxgbei_pdu *icp = toep->ulpcb2; struct icl_pdu *ip; u_int pdu_len, val; MPASS(m == NULL); /* Must already be assembling a PDU. */ MPASS(icp != NULL); MPASS(icp->pdu_flags & SBUF_ULP_FLAG_HDR_RCVD); /* Data is optional. */ ip = &icp->ip; icp->pdu_flags |= SBUF_ULP_FLAG_STATUS_RCVD; val = ntohl(cpl->ddpvld); if (val & F_DDP_PADDING_ERR) icp->pdu_flags |= SBUF_ULP_FLAG_PAD_ERROR; if (val & F_DDP_HDRCRC_ERR) icp->pdu_flags |= SBUF_ULP_FLAG_HCRC_ERROR; if (val & F_DDP_DATACRC_ERR) icp->pdu_flags |= SBUF_ULP_FLAG_DCRC_ERROR; if (ip->ip_data_mbuf == NULL) { /* XXXNP: what should ip->ip_data_len be, and why? */ icp->pdu_flags |= SBUF_ULP_FLAG_DATA_DDPED; } pdu_len = ntohs(cpl->len); /* includes everything. */ INP_WLOCK(inp); if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, pdu_len, inp->inp_flags); INP_WUNLOCK(inp); icl_cxgbei_conn_pdu_free(NULL, ip); #ifdef INVARIANTS toep->ulpcb2 = NULL; #endif return (0); } tp = intotcpcb(inp); MPASS(icp->pdu_seq == tp->rcv_nxt); MPASS(tp->rcv_wnd >= pdu_len); tp->rcv_nxt += pdu_len; tp->rcv_wnd -= pdu_len; tp->t_rcvtime = ticks; /* update rx credits */ toep->rx_credits += pdu_len; t4_rcvd(&toep->td->tod, tp); /* XXX: sc->tom_softc.tod */ so = inp->inp_socket; sb = &so->so_rcv; SOCKBUF_LOCK(sb); icc = toep->ulpcb; if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) { CTR5(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x", __func__, tid, pdu_len, icc, sb->sb_state); SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); icl_cxgbei_conn_pdu_free(NULL, ip); #ifdef INVARIANTS toep->ulpcb2 = NULL; #endif return (0); } MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); ic = &icc->ic; icl_cxgbei_new_pdu_set_conn(ip, ic); MPASS(m == NULL); /* was unused, we'll use it now. */ m = sbcut_locked(sb, sbused(sb)); /* XXXNP: toep->sb_cc accounting? */ if (__predict_false(m != NULL)) { int len = m_length(m, NULL); /* * PDUs were received before the tid transitioned to ULP mode. * Convert them to icl_cxgbei_pdus and send them to ICL before * the PDU in icp/ip. */ CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, tid, len); /* XXXNP: needs to be rewritten. */ if (len == sizeof(struct iscsi_bhs) || len == 4 + sizeof(struct iscsi_bhs)) { struct icl_cxgbei_pdu *icp0; struct icl_pdu *ip0; ip0 = icl_cxgbei_new_pdu(M_NOWAIT); icl_cxgbei_new_pdu_set_conn(ip0, ic); if (ip0 == NULL) CXGBE_UNIMPLEMENTED("PDU allocation failure"); icp0 = ip_to_icp(ip0); icp0->pdu_seq = 0; /* XXX */ icp0->pdu_flags = SBUF_ULP_FLAG_HDR_RCVD | SBUF_ULP_FLAG_STATUS_RCVD; m_copydata(m, 0, sizeof(struct iscsi_bhs), (void *)ip0->ip_bhs); STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip0, ip_next); } m_freem(m); } #if 0 CTR4(KTR_CXGBE, "%s: tid %u, pdu_len %u, pdu_flags 0x%x", __func__, tid, pdu_len, icp->pdu_flags); #endif STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next); if ((icc->rx_flags & RXF_ACTIVE) == 0) { struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt]; mtx_lock(&cwt->cwt_lock); icc->rx_flags |= RXF_ACTIVE; TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link); if (cwt->cwt_state == CWT_SLEEPING) { cwt->cwt_state = CWT_RUNNING; cv_signal(&cwt->cwt_cv); } mtx_unlock(&cwt->cwt_lock); } SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); #ifdef INVARIANTS toep->ulpcb2 = NULL; #endif return (0); }
static int handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) { uint32_t report = be32toh(ddp_report); unsigned int db_flag; struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; struct mbuf *m; db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; if (__predict_false(!(report & F_DDP_INV))) CXGBE_UNIMPLEMENTED("DDP buffer still valid"); INP_WLOCK(inp); so = inp_inpcbtosocket(inp); sb = &so->so_rcv; if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) { /* * XXX: think a bit more. * tcpcb probably gone, but socket should still be around * because we always wait for DDP completion in soreceive no * matter what. Just wake it up and let it clean up. */ CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); SOCKBUF_LOCK(sb); goto wakeup; } tp = intotcpcb(inp); len += be32toh(rcv_nxt) - tp->rcv_nxt; tp->rcv_nxt += len; tp->t_rcvtime = ticks; #ifndef USE_DDP_RX_FLOW_CONTROL KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); tp->rcv_wnd -= len; #endif m = get_ddp_mbuf(len); SOCKBUF_LOCK(sb); if (report & F_DDP_BUF_COMPLETE) toep->ddp_score = DDP_HIGH_SCORE; else discourage_ddp(toep); /* receive buffer autosize */ if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else toep->rx_credits += newsize - hiwat; } KASSERT(toep->sb_cc >= sb->sb_cc, ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sb->sb_cc, toep->sb_cc)); toep->rx_credits += toep->sb_cc - sb->sb_cc; #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= len; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m); toep->sb_cc = sb->sb_cc; wakeup: KASSERT(toep->ddp_flags & db_flag, ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x", __func__, toep, toep->ddp_flags, report)); toep->ddp_flags &= ~db_flag; sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); return (0); }
/* * This function represents the so-called 'hard case' for sx_xlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; u_int i, spintries = 0; #endif uintptr_t x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int error = 0; #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; #endif /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n", sx->lock_object.lo_name, file, line)); sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx); return (0); } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, sx->lock_object.lo_name, (void *)sx->sx_lock, file, line); while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ x = sx->sx_lock; if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) != 0) { if ((x & SX_LOCK_SHARED) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } continue; } } else if (SX_SHARERS(x) && spintries < ASX_RETRIES) { GIANT_SAVE(); spintries++; for (i = 0; i < ASX_LOOPS; i++) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, sx, spintries, i); x = sx->sx_lock; if ((x & SX_LOCK_SHARED) == 0 || SX_SHARERS(x) == 0) break; cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } if (i != ASX_LOOPS) continue; } } #endif sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * If the lock was released while spinning on the * sleep queue chain lock, try again. */ if (x == SX_LOCK_UNLOCKED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the sleep queue * chain lock. If so, drop the sleep queue lock and try * again. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * If an exclusive lock was released with both shared * and exclusive waiters and a shared waiter hasn't * woken up and acquired the lock yet, sx_lock will be * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS. * If we see that value, try to acquire it once. Note * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS * as there are other exclusive waiters still. If we * fail, restart the loop. */ if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) { if (atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS, tid | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); CTR2(KTR_LOCK, "%s: %p claimed by new writer", __func__, sx); break; } sleepq_release(&sx->lock_object); continue; } /* * Try to set the SX_LOCK_EXCLUSIVE_WAITERS. If we fail, * than loop back and retry. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set excl waiters flag", __func__, sx); } /* * Since we have been unable to acquire the exclusive * lock and the exclusive waiters flag is set, we have * to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } GIANT_RESTORE(); if (!error) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt)); #endif return (error); }
/* * This function is called when we are unable to obtain a write lock on the * first try. This means that at least one other thread holds either a * read or write lock. */ void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif uintptr_t v, x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #ifdef KDTRACE_HOOKS uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw)) { KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, ("%s: recursing but non-recursive rw %s @ %s:%d\n", __func__, rw->lock_object.lo_name, file, line)); rw->rw_recurse++; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); return; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); #ifdef KDTRACE_HOOKS all_time -= lockstat_nsecs(&rw->lock_object); state = rw->rw_lock; #endif while (!_rw_write_lock(rw, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ v = rw->rw_lock; owner = (struct thread *)RW_OWNER(v); if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; } if ((v & RW_LOCK_READ) && RW_READERS(v) && spintries < rowner_retries) { if (!(v & RW_LOCK_WRITE_SPINNER)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_SPINNER)) { continue; } } spintries++; KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) break; cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); #ifdef KDTRACE_HOOKS spin_cnt += rowner_loops - i; #endif if (i != rowner_loops) continue; } #endif ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (!(v & RW_LOCK_READ)) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * Check for the waiters flags about this rwlock. * If the lock was released, without maintain any pending * waiters queue, simply try to acquire it. * If a pending waiters queue is present, claim the lock * ownership and maintain the pending queue. */ x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); if ((v & ~x) == RW_UNLOCKED) { x &= ~RW_LOCK_WRITE_SPINNER; if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); continue; } /* * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to * set it. If we fail to set it, then loop back and try * again. */ if (!(v & RW_LOCK_WRITE_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set write waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the write waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&rw->lock_object); #endif turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&rw->lock_object); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); #ifdef ADAPTIVE_RWLOCKS spintries = 0; #endif } #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&rw->lock_object); if (sleep_time) LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, waittime, file, line); }
static int do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_rx_data *cpl = mtod(m, const void *); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp; struct socket *so; struct sockbuf *sb; int len; uint32_t ddp_placed = 0; if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); m_freem(m); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); /* strip off CPL header */ m_adj(m, sizeof(*cpl)); len = m->m_pkthdr.len; INP_WLOCK(inp); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", __func__, tid, len, inp->inp_flags); INP_WUNLOCK(inp); m_freem(m); return (0); } tp = intotcpcb(inp); if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; tp->rcv_nxt += len; KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); tp->rcv_wnd -= len; tp->t_rcvtime = ticks; so = inp_inpcbtosocket(inp); sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", __func__, tid, len); m_freem(m); SOCKBUF_UNLOCK(sb); INP_WUNLOCK(inp); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); } /* receive buffer autosize */ if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && len > (sbspace(sb) / 8 * 7)) { unsigned int hiwat = sb->sb_hiwat; unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); if (!sbreserve_locked(sb, newsize, so, NULL)) sb->sb_flags &= ~SB_AUTOSIZE; else toep->rx_credits += newsize - hiwat; } if (toep->ulp_mode == ULP_MODE_TCPDDP) { int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; if (changed) { if (toep->ddp_flags & DDP_SC_REQ) toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; else { KASSERT(cpl->ddp_off == 1, ("%s: DDP switched on by itself.", __func__)); /* Fell out of DDP mode */ toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); if (ddp_placed) insert_ddp_data(toep, ddp_placed); } } if ((toep->ddp_flags & DDP_OK) == 0 && time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) { toep->ddp_score = DDP_LOW_SCORE; toep->ddp_flags |= DDP_OK; CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u", __func__, tid, time_uptime); } if (toep->ddp_flags & DDP_ON) { /* * CPL_RX_DATA with DDP on can only be an indicate. Ask * soreceive to post a buffer or disable DDP. The * payload that arrived in this indicate is appended to * the socket buffer as usual. */ #if 0 CTR5(KTR_CXGBE, "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)", __func__, tid, toep->flags, be32toh(cpl->seq), len); #endif sb->sb_flags |= SB_DDP_INDICATE; } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK && tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) { /* * DDP allowed but isn't on (and a request to switch it * on isn't pending either), and conditions are ripe for * it to work. Switch it on. */ enable_ddp(sc, toep); } } KASSERT(toep->sb_cc >= sb->sb_cc, ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sb->sb_cc, toep->sb_cc)); toep->rx_credits += toep->sb_cc - sb->sb_cc; sbappendstream_locked(sb, m); toep->sb_cc = sb->sb_cc; sorwakeup_locked(so); SOCKBUF_UNLOCK_ASSERT(sb); INP_WUNLOCK(inp); return (0); }
/* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; struct sockbuf *sb; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_INFO_WLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; tp->rcv_nxt++; /* FIN */ so = inp->inp_socket; sb = &so->so_rcv; SOCKBUF_LOCK(sb); if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) { m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) CXGBE_UNIMPLEMENTED("mbuf alloc failure"); m->m_len = be32toh(cpl->rcv_nxt) - tp->rcv_nxt; m->m_flags |= M_DDP; /* Data is already where it should be */ m->m_data = "nothing to see here"; tp->rcv_nxt = be32toh(cpl->rcv_nxt); toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE); KASSERT(toep->sb_cc >= sb->sb_cc, ("%s: sb %p has more data (%d) than last time (%d).", __func__, sb, sb->sb_cc, toep->sb_cc)); toep->rx_credits += toep->sb_cc - sb->sb_cc; #ifdef USE_DDP_RX_FLOW_CONTROL toep->rx_credits -= m->m_len; /* adjust for F_RX_FC_DDP */ #endif sbappendstream_locked(sb, m); toep->sb_cc = sb->sb_cc; } socantrcvmore_locked(so); /* unlocks the sockbuf */ KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_WUNLOCK(&V_tcbinfo); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); return (0); }
static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; int ret; int eqsize; struct wrqe *wr; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->rq.qid) goto err1; if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); if (!wq->sq.sw_sq) goto err2; wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); if (!wq->rq.sw_rq) goto err3; } /* RQT must be a power of 2. */ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) goto err4; if (alloc_host_sq(rdev, &wq->sq)) goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = contigmalloc(wq->rq.memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (wq->rq.queue) wq->rq.dma_addr = vtophys(wq->rq.queue); else goto err6; CTR5(KTR_IW_CXGBE, "%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", __func__, wq->sq.queue, (unsigned long long)vtophys(wq->sq.queue), wq->rq.queue, (unsigned long long)vtophys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = (void *)((unsigned long)rman_get_virtual(sc->regs_res) + sc->sge_kdoorbell_reg); wq->gts = (void *)((unsigned long)rman_get_virtual(rdev->adap->regs_res) + sc->sge_gts_reg); if (user) { wq->sq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->sq.qid << rdev->qpshift)); wq->sq.udb &= PAGE_MASK; wq->rq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->rq.qid << rdev->qpshift)); wq->rq.udb &= PAGE_MASK; } wq->rdev = rdev; wq->rq.msn = 1; /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; wr = alloc_wrqe(wr_len, &sc->sge.mgmtq); if (wr == NULL) return (0); res_wr = wrtod(wr); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( V_FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | F_FW_WR_COMPL); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + (sc->params.sge.spg_len / EQ_ESIZE); res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + (sc->params.sge.spg_len / EQ_ESIZE); res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(&wr_wait); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; CTR6(KTR_IW_CXGBE, "%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx", __func__, wq->sq.qid, wq->rq.qid, wq->db, (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); return 0; err7: contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); err6: dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: kfree(wq->rq.sw_rq); err3: kfree(wq->sq.sw_sq); err2: c4iw_put_qpid(rdev, wq->rq.qid, uctx); err1: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return -ENOMEM; }
/* * Peer has sent us a FIN. */ static int do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_peer_close *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct toepcb *toep = lookup_tid(sc, tid); struct inpcb *inp = toep->inp; struct tcpcb *tp = NULL; struct socket *so; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PEER_CLOSE, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); if (__predict_false(toep->flags & TPF_SYNQE)) { #ifdef INVARIANTS struct synq_entry *synqe = (void *)toep; INP_WLOCK(synqe->lctx->inp); if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } else { /* * do_pass_accept_req is still running and will * eventually take care of this tid. */ } INP_WUNLOCK(synqe->lctx->inp); #endif CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, toep, toep->flags); return (0); } KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); if (toep->flags & TPF_ABORT_SHUTDOWN) goto done; tp->rcv_nxt++; /* FIN */ so = inp->inp_socket; if (toep->ulp_mode == ULP_MODE_TCPDDP) { DDP_LOCK(toep); if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) handle_ddp_close(toep, tp, cpl->rcv_nxt); DDP_UNLOCK(toep); } socantrcvmore(so); if (toep->ulp_mode != ULP_MODE_RDMA) { KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, be32toh(cpl->rcv_nxt))); } switch (tp->t_state) { case TCPS_SYN_RECEIVED: tp->t_starttime = ticks; /* FALLTHROUGH */ case TCPS_ESTABLISHED: tp->t_state = TCPS_CLOSE_WAIT; break; case TCPS_FIN_WAIT_1: tp->t_state = TCPS_CLOSING; break; case TCPS_FIN_WAIT_2: tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); INP_WLOCK(inp); final_cpl_received(toep); return (0); default: log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", __func__, tid, tp->t_state); } done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); return (0); }
static int write_page_pods(struct adapter *sc, struct toepcb *toep, struct ddp_buffer *db) { struct wrqe *wr; struct ulp_mem_io *ulpmc; struct ulptx_idata *ulpsc; struct pagepod *ppod; int i, j, k, n, chunk, len, ddp_pgsz, idx; u_int ppod_addr; uint32_t cmd; cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); if (is_t4(sc)) cmd |= htobe32(F_ULP_MEMIO_ORDER); else cmd |= htobe32(F_T5_ULP_MEMIO_IMM); ddp_pgsz = t4_ddp_pgsz[G_PPOD_PGSZ(db->tag)]; ppod_addr = db->ppod_addr; for (i = 0; i < db->nppods; ppod_addr += chunk) { /* How many page pods are we writing in this cycle */ n = min(db->nppods - i, NUM_ULP_TX_SC_IMM_PPODS); chunk = PPOD_SZ(n); len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); wr = alloc_wrqe(len, toep->ctrlq); if (wr == NULL) return (ENOMEM); /* ok to just bail out */ ulpmc = wrtod(wr); INIT_ULPTX_WR(ulpmc, len, 0, 0); ulpmc->cmd = cmd; ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(chunk); ppod = (struct pagepod *)(ulpsc + 1); for (j = 0; j < n; i++, j++, ppod++) { ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | V_PPOD_TID(toep->tid) | db->tag); ppod->len_offset = htobe64(V_PPOD_LEN(db->len) | V_PPOD_OFST(db->offset)); ppod->rsvd = 0; idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE); for (k = 0; k < nitems(ppod->addr); k++) { if (idx < db->npages) { ppod->addr[k] = htobe64(db->pages[idx]->phys_addr); idx += ddp_pgsz / PAGE_SIZE; } else ppod->addr[k] = 0; #if 0 CTR5(KTR_CXGBE, "%s: tid %d ppod[%d]->addr[%d] = %p", __func__, toep->tid, i, k, htobe64(ppod->addr[k])); #endif } } t4_wrq_tx(sc, wr); } return (0); }
struct tte * tsb_tte_enter(pmap_t pm, vm_page_t m, vm_offset_t va, u_long sz, u_long data) { struct tte *bucket; struct tte *rtp; struct tte *tp; vm_offset_t ova; int b0; int i; if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) { CTR5(KTR_SPARE2, "tsb_tte_enter: off colour va=%#lx pa=%#lx o=%p ot=%d pi=%#lx", va, VM_PAGE_TO_PHYS(m), m->object, m->object ? m->object->type : -1, m->pindex); if (pm == kernel_pmap) PMAP_STATS_INC(tsb_nenter_k_oc); else PMAP_STATS_INC(tsb_nenter_u_oc); } mtx_assert(&vm_page_queue_mtx, MA_OWNED); PMAP_LOCK_ASSERT(pm, MA_OWNED); if (pm == kernel_pmap) { PMAP_STATS_INC(tsb_nenter_k); tp = tsb_kvtotte(va); KASSERT((tp->tte_data & TD_V) == 0, ("tsb_tte_enter: replacing valid kernel mapping")); goto enter; } PMAP_STATS_INC(tsb_nenter_u); bucket = tsb_vtobucket(pm, sz, va); tp = NULL; rtp = NULL; b0 = rd(tick) & (TSB_BUCKET_SIZE - 1); i = b0; do { if ((bucket[i].tte_data & TD_V) == 0) { tp = &bucket[i]; break; } if (tp == NULL) { if ((bucket[i].tte_data & TD_REF) == 0) tp = &bucket[i]; else if (rtp == NULL) rtp = &bucket[i]; } } while ((i = (i + 1) & (TSB_BUCKET_SIZE - 1)) != b0); if (tp == NULL) tp = rtp; if ((tp->tte_data & TD_V) != 0) { PMAP_STATS_INC(tsb_nrepl); ova = TTE_GET_VA(tp); pmap_remove_tte(pm, NULL, tp, ova); tlb_page_demap(pm, ova); } enter: if ((m->flags & PG_FICTITIOUS) == 0) { data |= TD_CP; if ((m->flags & PG_UNMANAGED) == 0) { pm->pm_stats.resident_count++; data |= TD_PV; } if (pmap_cache_enter(m, va) != 0) data |= TD_CV; TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link); } else data |= TD_FAKE | TD_E; tp->tte_vpn = TV_VPN(va, sz); tp->tte_data = data; return (tp); }
/* * active open (soconnect). * * State of affairs on entry: * soisconnecting (so_state |= SS_ISCONNECTING) * tcbinfo not locked (This has changed - used to be WLOCKed) * inp WLOCKed * tp->t_state = TCPS_SYN_SENT * rtalloc1, RT_UNLOCK on rt. */ int t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; struct tom_data *td = tod_td(tod); struct toepcb *toep = NULL; struct wrqe *wr = NULL; struct ifnet *rt_ifp = rt->rt_ifp; struct port_info *pi; int mtu_idx, rscale, qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); int reason; INP_WLOCK_ASSERT(inp); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) pi = rt_ifp->if_softc; else if (rt_ifp->if_type == IFT_L2VLAN) { struct ifnet *ifp = VLAN_COOKIE(rt_ifp); pi = ifp->if_softc; } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep = alloc_toepcb(pi, -1, -1, M_NOWAIT); if (toep == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->tid = alloc_atid(sc, toep); if (toep->tid < 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->l2te = t4_l2t_get(pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); if (toep->l2te == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); isipv6 = nam->sa_family == AF_INET6; wr = alloc_wrqe(isipv6 ? sizeof(struct cpl_act_open_req6) : sizeof(struct cpl_act_open_req), toep->ctrlq); if (wr == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) set_tcpddp_ulp_mode(toep); else toep->ulp_mode = ULP_MODE_NONE; SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); /* * The kernel sets request_r_scale based on sb_max whereas we need to * take hardware's MAX_RCV_WND into account too. This is normally a * no-op as MAX_RCV_WND is much larger than the default sb_max. */ if (tp->t_flags & TF_REQ_SCALE) rscale = tp->request_r_scale = select_rcv_wscale(); else rscale = 0; mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0); qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid; if (isipv6) { struct cpl_act_open_req6 *cpl = wrtod(wr); if ((inp->inp_vflag & INP_IPV6) == 0) { /* XXX think about this a bit more */ log(LOG_ERR, "%s: time to think about AF_INET6 + vflag 0x%x.\n", __func__, inp->inp_vflag); DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); } toep->ce = hold_lip(td, &inp->in6p_laddr); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); INIT_TP_WR(cpl, 0); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); cpl->local_port = inp->inp_lport; cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode); cpl->opt2 = calc_opt2a(so, toep); } else { struct cpl_act_open_req *cpl = wrtod(wr); INIT_TP_WR(cpl, 0); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, toep->rx_credits, toep->ulp_mode); cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode); cpl->opt2 = calc_opt2a(so, toep); } CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__, toep->tid, tcpstates[tp->t_state], toep, inp); offload_socket(so, toep); rc = t4_l2t_send(sc, wr, toep->l2te); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); reason = __LINE__; failed: CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); if (wr) free_wrqe(wr); if (toep) { if (toep->tid >= 0) free_atid(sc, toep->tid); if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) release_lip(td, toep->ce); free_toepcb(toep); } return (rc); }