ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, int offset, size_t size, loff_t* ppos, int flags) { ci_private_t* priv = filp->private_data; tcp_helper_resource_t* trs = efab_priv_to_thr(priv); ci_sock_cmn* s; OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__, NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset, (int) size, flags)); ci_assert(page); ci_assert_ge(offset, 0); ci_assert_gt(size, 0); ci_assert_le(offset + size, CI_PAGE_SIZE); #ifndef MSG_SENDPAGE_NOTLAST /* "flags" is really "more". Convert it. */ if( flags ) flags = MSG_MORE; /* [more] is sometimes true even for the last page. We get a little ** closer to the truth by spotting that we're not reading to the end of ** the page. - seen on 2.6.18, but not on 2.6.26 or later */ if( offset + size < CI_PAGE_SIZE && flags ) flags = 0; #endif s = SP_TO_SOCK(&trs->netif, priv->sock_id); if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN )) return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags); else /* Closed or listening. Return epipe. Do not send SIGPIPE, because ** Linux will do it for us. */ return -s->tx_errno; }
static int setup_trampoline(struct pt_regs *regs, int opcode, int arg, int bits) { struct mm_hash *p; ci_uintptr_t trampoline_entry = 0, trampoline_exclude = 0, trampoline_toc = 0, trampoline_fixup = 0; int rc = -EBADF; read_lock(&oo_mm_tbl_lock); p = oo_mm_tbl_lookup(current->mm); if (p) { trampoline_entry = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_entry); trampoline_exclude = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_exclude); trampoline_toc = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_toc); trampoline_fixup = (ci_uintptr_t) CI_USER_PTR_GET(p->trampoline_user_fixup); } read_unlock(&oo_mm_tbl_lock); TRAMP_DEBUG("%s: trampoline_entry = %p \n", __func__, (void *)trampoline_entry); /* OK. We have the entry - set up a trampoline to user space */ if (trampoline_entry) { if (!access_ok(VERIFY_READ, trampoline_entry, 1)) { /* Can't read this address. Fail! */ ci_log("Pid %d (mm=%p) has bad trampoline entry: %p", current->tgid, current->mm, (void *)trampoline_entry); return -EBADF; } /* Check for the excluded address */ if (regs->nip == trampoline_exclude) { TRAMP_DEBUG("Ignoring call from excluded address 0x%08lx", (unsigned long)trampoline_exclude); return -EBUSY; } TRAMP_DEBUG("%s: bits = %d; set up trampoline. \n", __func__, bits); if (bits == TRAMPOLINE_BITS_64) { setup_trampoline64(regs, opcode, arg, (void *)trampoline_entry, (void *)trampoline_toc, (void *)trampoline_fixup); } #ifdef CONFIG_COMPAT else { setup_trampoline32(regs, opcode, arg, (void *)trampoline_entry, (void *)trampoline_toc, (void *)trampoline_fixup); } #endif rc = 0; } else { OO_DEBUG_VERB(ci_log("Error -- attempt to trampoline for unknown process")); rc = -ENOENT; } return rc; }
int oo_iobufset_pages_alloc(int nic_order, int *flags, struct oo_buffer_pages **pages_out) { int rc; int gfp_flag = (in_atomic() || in_interrupt()) ? GFP_ATOMIC : GFP_KERNEL; int order = nic_order - fls(EFHW_NIC_PAGES_IN_OS_PAGE) + 1; ci_assert(pages_out); #if CI_CFG_PKTS_AS_HUGE_PAGES if( *flags & OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE ) { # ifdef OO_DO_HUGE_PAGES rc = oo_bufpage_alloc(pages_out, order, order, flags, gfp_flag); # else rc = -ENOMEM; # endif } else #endif { #ifdef OO_HAVE_COMPOUND_PAGES int low_order = order; if( *flags & OO_IOBUFSET_FLAG_COMPOUND_PAGE_LIMIT ) low_order -= 3; else if( *flags & OO_IOBUFSET_FLAG_COMPOUND_PAGE_NONE ) low_order = 0; do { /* It is better to allocate high-order pages for many reasons: * - in theory, access to continious memory is faster; * - with high-order pages, we get small size for dma_addrs array * and it fits into one or two pages. * * So, if one-compound-page-for-all failed, we try lower order in * hope to keep both dma_addrs array and the packet buffers themselves * to use not-very-high-order allocations. * * TODO: it may be useful to go through EF10 page orders: * x86: 9(hugepage),8,4,0 * ppc: 4(max,=9nic),3(=8nic),0(=5nic) */ rc = oo_bufpage_alloc(pages_out, order, low_order, flags, gfp_flag); if( rc == 0 || low_order == 0 ) break; low_order -= 3; if( low_order < 0 ) low_order = 0; } while( 1 ); #elif defined(OO_DO_HUGE_PAGES) && CI_CFG_PKTS_AS_HUGE_PAGES rc = -ENOMEM; if( *flags & (OO_IOBUFSET_FLAG_HUGE_PAGE_TRY | OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE) ) rc = oo_bufpage_alloc(pages_out, order, order, flags, gfp_flag); if( rc != 0 ) rc = oo_bufpage_alloc(pages_out, order, 0, flags, gfp_flag); #else rc = oo_bufpage_alloc(pages_out, order, 0, flags, gfp_flag); #endif } OO_DEBUG_VERB(ci_log("%s: [%p] order %d", __FUNCTION__, *pages_out, order)); return rc; }
int oo_iobufset_resource_alloc(struct oo_buffer_pages * pages, struct efrm_pd *pd, struct oo_iobufset **iobrs_out, uint64_t *hw_addrs, int reset_pending) { struct oo_iobufset *iobrs; int rc; int gfp_flag = (in_atomic() || in_interrupt()) ? GFP_ATOMIC : GFP_KERNEL; int size = sizeof(struct oo_iobufset) + pages->n_bufs * sizeof(dma_addr_t); int nic_order; void **addrs; unsigned int i; ci_assert(iobrs_out); ci_assert(pd); if( size <= PAGE_SIZE ) { iobrs = kmalloc(size, gfp_flag); if( iobrs == NULL ) return -ENOMEM; iobrs->dma_addrs = (void *)(iobrs + 1); } else { /* Avoid multi-page allocations */ iobrs = kmalloc(sizeof(struct oo_iobufset), gfp_flag); if( iobrs == NULL ) return -ENOMEM; ci_assert_le(pages->n_bufs * sizeof(dma_addr_t), PAGE_SIZE); iobrs->dma_addrs = kmalloc(pages->n_bufs * sizeof(dma_addr_t), gfp_flag); if( iobrs->dma_addrs == NULL ) { kfree(iobrs); return -ENOMEM; } } oo_atomic_set(&iobrs->ref_count, 1); iobrs->pd = pd; iobrs->pages = pages; nic_order = EFHW_GFP_ORDER_TO_NIC_ORDER(compound_order(pages->pages[0])); ci_assert_le(sizeof(void *) * pages->n_bufs, PAGE_SIZE); addrs = kmalloc(sizeof(void *) * pages->n_bufs, gfp_flag); if (addrs == NULL) { rc = -ENOMEM; goto fail; } for (i = 0; i < pages->n_bufs; i++) { addrs[i] = page_address(pages->pages[i]); } rc = efrm_pd_dma_map(iobrs->pd, pages->n_bufs, nic_order, addrs, sizeof(addrs[0]), &iobrs->dma_addrs[0], sizeof(iobrs->dma_addrs[0]), hw_addrs, sizeof(hw_addrs[0]), put_user_fake, &iobrs->buf_tbl_alloc, reset_pending); kfree(addrs); if( rc < 0 ) goto fail; OO_DEBUG_VERB(ci_log("%s: [%p] %d pages", __FUNCTION__, iobrs, iobrs->pages->n_bufs)); efrm_resource_ref(efrm_pd_to_resource(pd)); oo_atomic_inc(&pages->ref_count); *iobrs_out = iobrs; return 0; fail: oo_iobufset_free_memory(iobrs); return rc; }
static int oo_bufpage_alloc(struct oo_buffer_pages **pages_out, int user_order, int low_order, int *flags, int gfp_flag) { int i; struct oo_buffer_pages *pages; int n_bufs = 1 << (user_order - low_order); int size = sizeof(struct oo_buffer_pages) + n_bufs * sizeof(struct page *); if( size < PAGE_SIZE ) { pages = kmalloc(size, gfp_flag); if( pages == NULL ) return -ENOMEM; pages->pages = (void *)(pages + 1); } else { /* Avoid multi-page allocations */ pages = kmalloc(sizeof(struct oo_buffer_pages), gfp_flag); if( pages == NULL ) return -ENOMEM; ci_assert_le(n_bufs * sizeof(struct page *), PAGE_SIZE); pages->pages = kmalloc(n_bufs * sizeof(struct page *), gfp_flag); if( pages->pages == NULL ) { kfree(pages); return -ENOMEM; } } pages->n_bufs = n_bufs; oo_atomic_set(&pages->ref_count, 1); #ifdef OO_DO_HUGE_PAGES if( (*flags & (OO_IOBUFSET_FLAG_HUGE_PAGE_TRY | OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE)) && gfp_flag == GFP_KERNEL && low_order == HPAGE_SHIFT - PAGE_SHIFT ) { if (oo_bufpage_huge_alloc(pages, flags) == 0) { *pages_out = pages; return 0; } } pages->shmid = -1; if( *flags & OO_IOBUFSET_FLAG_HUGE_PAGE_FORCE ) { ci_assert_equal(low_order, HPAGE_SHIFT - PAGE_SHIFT); return -ENOMEM; } #endif if( low_order > 0 ) { #ifdef OO_HAVE_COMPOUND_PAGES /* __GFP_COMP hint stolen from http://samirdas.blog.com/ * __GFP_NOWARN is necessary because we properly handle high-order page * allocation failure by allocating pages one-by-one. */ gfp_flag |= __GFP_COMP | __GFP_NOWARN; #else return -EINVAL; #endif } for( i = 0; i < n_bufs; ++i ) { pages->pages[i] = alloc_pages_node(numa_node_id(), gfp_flag, low_order); if( pages->pages[i] == NULL ) { OO_DEBUG_VERB(ci_log("%s: failed to allocate page (i=%u) " "user_order=%d page_order=%d", __FUNCTION__, i, user_order, low_order)); pages->n_bufs = i; oo_iobufset_free_pages(pages); return -ENOMEM; } memset(page_address(pages->pages[i]), 0, PAGE_SIZE << low_order); } *pages_out = pages; return 0; }