static int thc_is_thr_name_taken(tcp_helper_cluster_t* thc, char* name) { int i = 0; tcp_helper_resource_t* thr_walk = thc->thc_thr_head; while( thr_walk != NULL && i < thc->thc_cluster_size ) { if( strncmp(name, thr_walk->name, CI_CFG_STACK_NAME_LEN) == 0 ) return 1; thr_walk = thr_walk->thc_thr_next; ++i; } ci_assert_le(i, thc->thc_cluster_size); return 0; }
/* initialise the iptimer scheduler */ void ci_ip_timer_state_init(ci_netif* netif, unsigned cpu_khz) { ci_ip_timer_state* ipts = IPTIMER_STATE(netif); int i; int us2isn; /* initialise the cycle to tick constants */ ipts->khz = cpu_khz; ipts->ci_ip_time_frc2tick = shift_for_gran(CI_IP_TIME_APP_GRANULARITY, ipts->khz); ipts->ci_ip_time_frc2us = shift_for_gran(1, ipts->khz); /* The Linux kernel ticks the initial sequence number that it would use for * a given tuple every 64 ns. Onload does the same, when using * EF_TCP_ISN_MODE=clocked. However in EF_TCP_ISN_MODE=clocked+cache our use * of the clock-driven ISN is slightly different, though, as we remember * old sequence numbers in the case where the clock-driven ISN is not known * to be safe. As such, we don't need it to tick so fast, and so we let it * tick at most every 256 ns. This means that it takes more than eight * minutes to wrap by half, while four minutes is our assumed maximum * peer-MSL. This in practice reduces the cases in which we have to * remember old sequence numbers. */ us2isn = NI_OPTS(netif).tcp_isn_mode != 0 ? 2 : 4; ipts->ci_ip_time_frc2isn = ipts->ci_ip_time_frc2us > us2isn ? ipts->ci_ip_time_frc2us - us2isn : 0; ci_ip_time_initial_sync(ipts); ipts->sched_ticks = ci_ip_time_now(netif); ipts->closest_timer = ipts->sched_ticks + IPTIME_INFINITY; /* To convert ms to ticks we will use fixed point arithmetic * Calculate conversion factor, which is expected to be in range <0.5,1] * */ ipts->ci_ip_time_ms2tick_fxp = (((ci_uint64)ipts->khz) << 32) / (1u << ipts->ci_ip_time_frc2tick); ci_assert_gt(ipts->ci_ip_time_ms2tick_fxp, 1ull<<31); ci_assert_le(ipts->ci_ip_time_ms2tick_fxp, 1ull<<32); /* set module specific time constants dependent on frc2tick */ ci_tcp_timer_init(netif); ci_ni_dllist_init(netif, &ipts->fire_list, oo_ptr_to_statep(netif, &ipts->fire_list), "fire"); /* Initialise the wheel lists. */ for( i=0; i < CI_IPTIME_WHEELSIZE; i++) ci_ni_dllist_init(netif, &ipts->warray[i], oo_ptr_to_statep(netif, &ipts->warray[i]), "timw"); }
void ci_netif_filter_init(ci_netif_filter_table* tbl, int size_lg2) { unsigned i; unsigned size = ci_pow2(size_lg2); ci_assert(tbl); ci_assert_gt(size_lg2, 0); ci_assert_le(size_lg2, 32); tbl->table_size_mask = size - 1; for( i = 0; i < size; ++i ) { tbl->table[i].id = EMPTY; tbl->table[i].route_count = 0; tbl->table[i].laddr = 0; } }
int onload_zc_send(struct onload_zc_mmsg* msgs, int mlen, int flags) { int done = 0, last_fd = -1, i; citp_lib_context_t lib_context; citp_fdinfo* fdi = NULL; Log_CALL(ci_log("%s(%p, %d, %x)", __FUNCTION__, msgs, mlen, flags)); citp_enter_lib(&lib_context); for( i = 0; i < mlen; ++i ) { if( msgs[i].fd != last_fd ) { if( fdi != NULL ) citp_fdinfo_release_ref(fdi, 0); fdi = citp_fdtable_lookup(msgs[i].fd); if( fdi == NULL ) { msgs[i].rc = -ESOCKTNOSUPPORT; ++done; goto out; } last_fd = msgs[i].fd; } CI_TRY_EQ( citp_fdinfo_get_ops(fdi)->zc_send(fdi, &msgs[i], flags), 1); /* If we got an error, return the number of msgs that have had * rc set and exit. fd_op should have updated msgs.rc appropriately */ ++done; if( msgs[i].rc < 0 ) goto out; } out: if( fdi != NULL ) citp_fdinfo_release_ref(fdi, 0); citp_exit_lib(&lib_context, TRUE); ci_assert_gt(done, 0); ci_assert_le(done, mlen); Log_CALL_RESULT(done); return done; }
ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, int offset, size_t size, loff_t* ppos, int flags) { ci_private_t* priv = filp->private_data; tcp_helper_resource_t* trs = efab_priv_to_thr(priv); ci_sock_cmn* s; OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__, NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset, (int) size, flags)); ci_assert(page); ci_assert_ge(offset, 0); ci_assert_gt(size, 0); ci_assert_le(offset + size, CI_PAGE_SIZE); #ifndef MSG_SENDPAGE_NOTLAST /* "flags" is really "more". Convert it. */ if( flags ) flags = MSG_MORE; /* [more] is sometimes true even for the last page. We get a little ** closer to the truth by spotting that we're not reading to the end of ** the page. - seen on 2.6.18, but not on 2.6.26 or later */ if( offset + size < CI_PAGE_SIZE && flags ) flags = 0; #endif s = SP_TO_SOCK(&trs->netif, priv->sock_id); if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN )) return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags); else /* Closed or listening. Return epipe. Do not send SIGPIPE, because ** Linux will do it for us. */ return -s->tx_errno; }
void ci_ip_send_tcp_slow(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) { /* We're here because the ipcache is not valid. */ int rc, prev_mtu = ts->s.pkt.mtu; cicp_user_retrieve(ni, &ts->s.pkt, &ts->s.cp); if( ts->s.pkt.status == retrrc_success ) { if( ts->s.pkt.mtu != prev_mtu ) CI_PMTU_TIMER_NOW(ni, &ts->pmtus); ci_ip_set_mac_and_port(ni, &ts->s.pkt, pkt); ci_netif_send(ni, pkt); return; } else if( ts->s.pkt.status == retrrc_localroute && (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) ) ci_ip_local_send(ni, pkt, &ts->s, OO_SP_NULL); /* For TCP, we want the ipcache to only be valid when onloadable. */ ci_ip_cache_invalidate(&ts->s.pkt); switch( ts->s.pkt.status ) { case retrrc_nomac: rc = 0; /* If we resend SYN, and there is no MAC - it means ARP failed. * Connect() should return with EHOSTUNREACH. * We verify twice - on the first and the second retransmit. * Very hackish. */ if( ts->s.b.state == CI_TCP_SYN_SENT ) { if( ts->retransmits == 1 ) ts->tcpflags |= CI_TCPT_FLAG_NO_ARP; else if( (ts->tcpflags & CI_TCPT_FLAG_NO_ARP) && ts->retransmits == 2 ) { ci_tcp_drop(ni, ts, EHOSTUNREACH); return; } } cicp_user_defer_send(ni, retrrc_nomac, &rc, OO_PKT_P(pkt), ts->s.pkt.ifindex); ++ts->stats.tx_nomac_defer; return; case retrrc_noroute: rc = -EHOSTUNREACH; break; case retrrc_alienroute: case retrrc_localroute: /* ?? TODO: inc some stat */ return; default: ci_assert_lt(ts->s.pkt.status, 0); if( ts->s.pkt.status < 0 ) rc = ts->s.pkt.status; else /* belt and braces... */ rc = 0; } ci_assert_le(rc, 0); /* In most cases, we should ignore return code; the packet will be resend * later, because of RTO. However, in SYN-SENT we should pass errors to * user. At the same time, we should not pass ENOBUFS to user - it is * pretty internal problem of cplane, so we should try again. Possibly, * there may be other internal problems, such as ENOMEM. * * Also, do not break connection when the first SYN fails: * - Linux does not do it; * - cplane has some latency, so we have false positives here; * - ci_tcp_connect() does not expect it. */ if( ts->s.b.state == CI_TCP_SYN_SENT && rc < 0 && ts->retransmits > 0 && (rc == -EHOSTUNREACH || rc == -ENETUNREACH || rc == -ENETDOWN) ) ci_tcp_drop(ni, ts, -rc); }
int oo_iobufset_resource_alloc(struct oo_buffer_pages * pages, struct efrm_pd *pd, struct oo_iobufset **iobrs_out, uint64_t *hw_addrs, int reset_pending) { struct oo_iobufset *iobrs; int rc; int gfp_flag = (in_atomic() || in_interrupt()) ? GFP_ATOMIC : GFP_KERNEL; int size = sizeof(struct oo_iobufset) + pages->n_bufs * sizeof(dma_addr_t); int nic_order; void **addrs; unsigned int i; ci_assert(iobrs_out); ci_assert(pd); if( size <= PAGE_SIZE ) { iobrs = kmalloc(size, gfp_flag); if( iobrs == NULL ) return -ENOMEM; iobrs->dma_addrs = (void *)(iobrs + 1); } else { /* Avoid multi-page allocations */ iobrs = kmalloc(sizeof(struct oo_iobufset), gfp_flag); if( iobrs == NULL ) return -ENOMEM; ci_assert_le(pages->n_bufs * sizeof(dma_addr_t), PAGE_SIZE); iobrs->dma_addrs = kmalloc(pages->n_bufs * sizeof(dma_addr_t), gfp_flag); if( iobrs->dma_addrs == NULL ) { kfree(iobrs); return -ENOMEM; } } oo_atomic_set(&iobrs->ref_count, 1); iobrs->pd = pd; iobrs->pages = pages; nic_order = EFHW_GFP_ORDER_TO_NIC_ORDER(compound_order(pages->pages[0])); ci_assert_le(sizeof(void *) * pages->n_bufs, PAGE_SIZE); addrs = kmalloc(sizeof(void *) * pages->n_bufs, gfp_flag); if (addrs == NULL) { rc = -ENOMEM; goto fail; } for (i = 0; i < pages->n_bufs; i++) { addrs[i] = page_address(pages->pages[i]); } rc = efrm_pd_dma_map(iobrs->pd, pages->n_bufs, nic_order, addrs, sizeof(addrs[0]), &iobrs->dma_addrs[0], sizeof(iobrs->dma_addrs[0]), hw_addrs, sizeof(hw_addrs[0]), put_user_fake, &iobrs->buf_tbl_alloc, reset_pending); kfree(addrs); if( rc < 0 ) goto fail; OO_DEBUG_VERB(ci_log("%s: [%p] %d pages", __FUNCTION__, iobrs, iobrs->pages->n_bufs)); efrm_resource_ref(efrm_pd_to_resource(pd)); oo_atomic_inc(&pages->ref_count); *iobrs_out = iobrs; return 0; fail: oo_iobufset_free_memory(iobrs); return rc; }
static int oo_bufpage_alloc(struct oo_buffer_pages **pages_out, int user_order, int low_order, int *flags, int gfp_flag) { int i; struct oo_buffer_pages *pages; int n_bufs = 1 << (user_order - low_order); int size = sizeof(struct oo_buffer_pages) + n_bufs * sizeof(struct page *); if( size < PAGE_SIZE ) { pages = kmalloc(size, gfp_flag); if( pages == NULL ) return -ENOMEM; pages->pages = (void *)(pages + 1); } else { /* Avoid multi-page allocations */ pages = kmalloc(sizeof(struct oo_buffer_pages), gfp_flag); if( pages == NULL ) return -ENOMEM; ci_assert_le(n_bufs * sizeof(struct page *), PAGE_SIZE); pages->pages = kmalloc(n_bufs * sizeof(struct page *), gfp_flag); if( pages->pages == NULL ) { kfree(pages); return -ENOMEM; } } pages->n_bufs = n_bufs; oo_atomic_set(&pages->ref_count, 1); #ifdef OO_DO_HUGE_PAGES if( (*flags & (OO_IOBUFSET_FLAG_HUGE_PAGE_TRY | OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE)) && gfp_flag == GFP_KERNEL && low_order == HPAGE_SHIFT - PAGE_SHIFT ) { if (oo_bufpage_huge_alloc(pages, flags) == 0) { *pages_out = pages; return 0; } } pages->shmid = -1; if( *flags & OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE ) { ci_assert_equal(low_order, HPAGE_SHIFT - PAGE_SHIFT); return -ENOMEM; } #endif if( low_order > 0 ) { #ifdef OO_HAVE_COMPOUND_PAGES /* __GFP_COMP hint stolen from http://samirdas.blog.com/ * __GFP_NOWARN is necessary because we properly handle high-order page * allocation failure by allocating pages one-by-one. */ gfp_flag |= __GFP_COMP | __GFP_NOWARN; #else return -EINVAL; #endif } for( i = 0; i < n_bufs; ++i ) { pages->pages[i] = alloc_pages_node(numa_node_id(), gfp_flag, low_order); if( pages->pages[i] == NULL ) { OO_DEBUG_VERB(ci_log("%s: failed to allocate page (i=%u) " "user_order=%d page_order=%d", __FUNCTION__, i, user_order, low_order)); pages->n_bufs = i; oo_iobufset_free_pages(pages); return -ENOMEM; } memset(page_address(pages->pages[i]), 0, PAGE_SIZE << low_order); } *pages_out = pages; return 0; }