static void free_pagelist(PAGELIST_T *pagelist, int actual) { vm_page_t *pages; unsigned int num_pages, i; vcos_log_trace("free_pagelist - %x, %d", (unsigned int)pagelist, actual); num_pages = (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / PAGE_SIZE; pages = (vm_page_t *)(pagelist->addrs + num_pages); /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { FRAGMENTS_T *fragments = g_fragments_base + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); int head_bytes, tail_bytes; if (actual >= 0) { /* XXXBSD: might be inefficient */ void *page_address = pmap_mapdev(VM_PAGE_TO_PHYS(pages[0]), PAGE_SIZE*num_pages); if ((head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & (CACHE_LINE_SIZE - 1)) != 0) { if (head_bytes > actual) head_bytes = actual; memcpy((char *)page_address + pagelist->offset, fragments->headbuf, head_bytes); } if ((head_bytes < actual) && (tail_bytes = (pagelist->offset + actual) & (CACHE_LINE_SIZE - 1)) != 0) { memcpy((char *)page_address + PAGE_SIZE*(num_pages - 1) + ((pagelist->offset + actual) & (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)), fragments->tailbuf, tail_bytes); } pmap_qremove((vm_offset_t)page_address, PAGE_SIZE*num_pages); } mtx_lock(&g_free_fragments_mutex); *(FRAGMENTS_T **) fragments = g_free_fragments; g_free_fragments = fragments; mtx_unlock(&g_free_fragments_mutex); sema_post(&g_free_fragments_sema); } for (i = 0; i < num_pages; i++) { if (pagelist->type != PAGELIST_WRITE) vm_page_dirty(pages[i]); } vm_page_unhold_pages(pages, num_pages); free(pagelist, M_VCPAGELIST); }
/* * Reuse, or allocate (and program the page pods for) a new DDP buffer. The * "pages" array is handed over to this function and should not be used in any * way by the caller after that. */ static int select_ddp_buffer(struct adapter *sc, struct toepcb *toep, vm_page_t *pages, int npages, int db_off, int db_len) { struct ddp_buffer *db; struct tom_data *td = sc->tom_softc; int i, empty_slot = -1; /* Try to reuse */ for (i = 0; i < nitems(toep->db); i++) { if (bufcmp(toep->db[i], pages, npages, db_off, db_len) == 0) { free(pages, M_CXGBE); return (i); /* pages still held */ } else if (toep->db[i] == NULL && empty_slot < 0) empty_slot = i; } /* Allocate new buffer, write its page pods. */ db = alloc_ddp_buffer(td, pages, npages, db_off, db_len); if (db == NULL) { vm_page_unhold_pages(pages, npages); free(pages, M_CXGBE); return (-1); } if (write_page_pods(sc, toep, db) != 0) { vm_page_unhold_pages(pages, npages); free_ddp_buffer(td, db); return (-1); } i = empty_slot; if (i < 0) { i = arc4random() % nitems(toep->db); free_ddp_buffer(td, toep->db[i]); } toep->db[i] = db; CTR5(KTR_CXGBE, "%s: tid %d, DDP buffer[%d] = %p (tag 0x%x)", __func__, toep->tid, i, db, db->tag); return (i); }
static int create_pagelist(char __user *buf, size_t count, unsigned short type, struct proc *p, PAGELIST_T ** ppagelist) { PAGELIST_T *pagelist; vm_page_t* pages; unsigned long *addrs; unsigned int num_pages, offset, i; int pagelist_size; char *addr, *base_addr, *next_addr; int run, addridx, actual_pages; offset = (unsigned int)buf & (PAGE_SIZE - 1); num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE; *ppagelist = NULL; /* Allocate enough storage to hold the page pointers and the page ** list */ pagelist_size = sizeof(PAGELIST_T) + (num_pages * sizeof(unsigned long)) + (num_pages * sizeof(vm_page_t)); pagelist = malloc(pagelist_size, M_VCPAGELIST, M_WAITOK | M_ZERO); vchiq_log_trace(vchiq_arm_log_level, "create_pagelist - %x", (unsigned int)pagelist); if (!pagelist) return -ENOMEM; addrs = pagelist->addrs; pages = (vm_page_t*)(addrs + num_pages); actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map, (vm_offset_t)buf, count, (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages); if (actual_pages != num_pages) { vm_page_unhold_pages(pages, actual_pages); free(pagelist, M_VCPAGELIST); return (-ENOMEM); } pagelist->length = count; pagelist->type = type; pagelist->offset = offset; /* Group the pages into runs of contiguous pages */ base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0])); next_addr = base_addr + PAGE_SIZE; addridx = 0; run = 0; for (i = 1; i < num_pages; i++) { addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i])); if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) { next_addr += PAGE_SIZE; run++; } else { addrs[addridx] = (unsigned long)base_addr + run; addridx++; base_addr = addr; next_addr = addr + PAGE_SIZE; run = 0; } } addrs[addridx] = (unsigned long)base_addr + run; addridx++; /* Partial cache lines (fragments) require special measures */ if ((type == PAGELIST_READ) && ((pagelist->offset & (CACHE_LINE_SIZE - 1)) || ((pagelist->offset + pagelist->length) & (CACHE_LINE_SIZE - 1)))) { FRAGMENTS_T *fragments; if (down_interruptible(&g_free_fragments_sema) != 0) { free(pagelist, M_VCPAGELIST); return -EINTR; } WARN_ON(g_free_fragments == NULL); down(&g_free_fragments_mutex); fragments = (FRAGMENTS_T *) g_free_fragments; WARN_ON(fragments == NULL); g_free_fragments = *(FRAGMENTS_T **) g_free_fragments; up(&g_free_fragments_mutex); pagelist->type = PAGELIST_READ_WITH_FRAGMENTS + (fragments - g_fragments_base); } cpu_dcache_wbinv_range((vm_offset_t)pagelist, pagelist_size); *ppagelist = pagelist; return 0; }
int physio(struct cdev *dev, struct uio *uio, int ioflag) { struct cdevsw *csw; struct buf *pbuf; struct bio *bp; struct vm_page **pages; caddr_t sa; u_int iolen, poff; int error, i, npages, maxpages; vm_prot_t prot; csw = dev->si_devsw; npages = 0; sa = NULL; /* check if character device is being destroyed */ if (csw == NULL) return (ENXIO); /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } /* * If the driver does not want I/O to be split, that means that we * need to reject any requests that will not fit into one buffer. */ if (dev->si_flags & SI_NOSPLIT && (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS || uio->uio_iovcnt > 1)) { /* * Tell the user why his I/O was rejected. */ if (uio->uio_resid > dev->si_iosize_max) uprintf("%s: request size=%zd > si_iosize_max=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, dev->si_iosize_max); if (uio->uio_resid > MAXPHYS) uprintf("%s: request size=%zd > MAXPHYS=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, MAXPHYS); if (uio->uio_iovcnt > 1) uprintf("%s: request vectors=%d > 1; " "cannot split request\n", devtoname(dev), uio->uio_iovcnt); return (EFBIG); } /* * Keep the process UPAGES from being swapped. Processes swapped * out while holding pbufs, used by swapper, may lead to deadlock. */ PHOLD(curproc); bp = g_alloc_bio(); if (uio->uio_segflg != UIO_USERSPACE) { pbuf = NULL; pages = NULL; } else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { pbuf = NULL; maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1; pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK); } else { pbuf = uma_zalloc(pbuf_zone, M_WAITOK); sa = pbuf->b_data; maxpages = btoc(MAXPHYS); pages = pbuf->b_pages; } prot = VM_PROT_READ; if (uio->uio_rw == UIO_READ) prot |= VM_PROT_WRITE; /* Less backwards than it looks */ error = 0; for (i = 0; i < uio->uio_iovcnt; i++) { #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); if (uio->uio_rw == UIO_READ) { racct_add_force(curproc, RACCT_READBPS, uio->uio_iov[i].iov_len); racct_add_force(curproc, RACCT_READIOPS, 1); } else { racct_add_force(curproc, RACCT_WRITEBPS, uio->uio_iov[i].iov_len); racct_add_force(curproc, RACCT_WRITEIOPS, 1); } PROC_UNLOCK(curproc); } #endif /* RACCT */ while (uio->uio_iov[i].iov_len) { g_reset_bio(bp); if (uio->uio_rw == UIO_READ) { bp->bio_cmd = BIO_READ; curthread->td_ru.ru_inblock++; } else { bp->bio_cmd = BIO_WRITE; curthread->td_ru.ru_oublock++; } bp->bio_offset = uio->uio_offset; bp->bio_data = uio->uio_iov[i].iov_base; bp->bio_length = uio->uio_iov[i].iov_len; if (bp->bio_length > dev->si_iosize_max) bp->bio_length = dev->si_iosize_max; if (bp->bio_length > MAXPHYS) bp->bio_length = MAXPHYS; /* * Make sure the pbuf can map the request. * The pbuf has kvasize = MAXPHYS, so a request * larger than MAXPHYS - PAGE_SIZE must be * page aligned or it will be fragmented. */ poff = (vm_offset_t)bp->bio_data & PAGE_MASK; if (pbuf && bp->bio_length + poff > pbuf->b_kvasize) { if (dev->si_flags & SI_NOSPLIT) { uprintf("%s: request ptr %p is not " "on a page boundary; cannot split " "request\n", devtoname(dev), bp->bio_data); error = EFBIG; goto doerror; } bp->bio_length = pbuf->b_kvasize; if (poff != 0) bp->bio_length -= PAGE_SIZE; } bp->bio_bcount = bp->bio_length; bp->bio_dev = dev; if (pages) { if ((npages = vm_fault_quick_hold_pages( &curproc->p_vmspace->vm_map, (vm_offset_t)bp->bio_data, bp->bio_length, prot, pages, maxpages)) < 0) { error = EFAULT; goto doerror; } if (pbuf && sa) { pmap_qenter((vm_offset_t)sa, pages, npages); bp->bio_data = sa + poff; } else { bp->bio_ma = pages; bp->bio_ma_n = npages; bp->bio_ma_offset = poff; bp->bio_data = unmapped_buf; bp->bio_flags |= BIO_UNMAPPED; } } csw->d_strategy(bp); if (uio->uio_rw == UIO_READ) biowait(bp, "physrd"); else biowait(bp, "physwr"); if (pages) { if (pbuf) pmap_qremove((vm_offset_t)sa, npages); vm_page_unhold_pages(pages, npages); } iolen = bp->bio_length - bp->bio_resid; if (iolen == 0 && !(bp->bio_flags & BIO_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base = (char *)uio->uio_iov[i].iov_base + iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if (bp->bio_flags & BIO_ERROR) { error = bp->bio_error; goto doerror; } } } doerror: if (pbuf) uma_zfree(pbuf_zone, pbuf); else if (pages) free(pages, M_DEVBUF); g_destroy_bio(bp); PRELE(curproc); return (error); }
static void free_pagelist(BULKINFO_T *bi, int actual) { vm_page_t*pages; unsigned int num_pages, i; void *page_address; PAGELIST_T *pagelist; pagelist = bi->pagelist; vchiq_log_trace(vchiq_arm_log_level, "free_pagelist - %x, %d", (unsigned int)pagelist, actual); num_pages = (pagelist->length + pagelist->offset + PAGE_SIZE - 1) / PAGE_SIZE; pages = (vm_page_t*)(pagelist->addrs + num_pages); /* Deal with any partial cache lines (fragments) */ if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { FRAGMENTS_T *fragments = g_fragments_base + (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); int head_bytes, tail_bytes; head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & (CACHE_LINE_SIZE - 1); tail_bytes = (pagelist->offset + actual) & (CACHE_LINE_SIZE - 1); if ((actual >= 0) && (head_bytes != 0)) { if (head_bytes > actual) head_bytes = actual; memcpy((char *)bi->buf, fragments->headbuf, head_bytes); } if ((actual >= 0) && (head_bytes < actual) && (tail_bytes != 0)) { memcpy((char *)bi->buf + actual - tail_bytes, fragments->tailbuf, tail_bytes); } down(&g_free_fragments_mutex); *(FRAGMENTS_T **) fragments = g_free_fragments; g_free_fragments = fragments; up(&g_free_fragments_mutex); up(&g_free_fragments_sema); } for (i = 0; i < num_pages; i++) { if (pagelist->type != PAGELIST_WRITE) vm_page_dirty(pages[i]); } vm_page_unhold_pages(pages, num_pages); bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map); bus_dmamem_free(bi->pagelist_dma_tag, bi->pagelist, bi->pagelist_dma_map); bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map); bus_dma_tag_destroy(bi->pagelist_dma_tag); free(bi, M_VCPAGELIST); }
static int create_pagelist(char __user *buf, size_t count, unsigned short type, struct proc *p, BULKINFO_T *bi) { PAGELIST_T *pagelist; vm_page_t* pages; unsigned long *addrs; unsigned int num_pages, i; vm_offset_t offset; int pagelist_size; char *addr, *base_addr, *next_addr; int run, addridx, actual_pages; int err; vm_paddr_t pagelist_phys; offset = (vm_offset_t)buf & (PAGE_SIZE - 1); num_pages = (count + offset + PAGE_SIZE - 1) / PAGE_SIZE; bi->pagelist = NULL; bi->buf = buf; bi->size = count; /* Allocate enough storage to hold the page pointers and the page ** list */ pagelist_size = sizeof(PAGELIST_T) + (num_pages * sizeof(unsigned long)) + (num_pages * sizeof(pages[0])); err = bus_dma_tag_create( NULL, PAGE_SIZE, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ pagelist_size, 1, /* maxsize, nsegments */ pagelist_size, 0, /* maxsegsize, flags */ NULL, NULL, /* lockfunc, lockarg */ &bi->pagelist_dma_tag); err = bus_dmamem_alloc(bi->pagelist_dma_tag, (void **)&pagelist, BUS_DMA_COHERENT | BUS_DMA_WAITOK, &bi->pagelist_dma_map); if (err) { vchiq_log_error(vchiq_core_log_level, "Unable to allocate pagelist memory"); err = -ENOMEM; goto failed_alloc; } err = bus_dmamap_load(bi->pagelist_dma_tag, bi->pagelist_dma_map, pagelist, pagelist_size, vchiq_dmamap_cb, &pagelist_phys, 0); if (err) { vchiq_log_error(vchiq_core_log_level, "cannot load DMA map for pagelist memory"); err = -ENOMEM; goto failed_load; } vchiq_log_trace(vchiq_arm_log_level, "create_pagelist - %x", (unsigned int)pagelist); if (!pagelist) return -ENOMEM; addrs = pagelist->addrs; pages = (vm_page_t*)(addrs + num_pages); actual_pages = vm_fault_quick_hold_pages(&p->p_vmspace->vm_map, (vm_offset_t)buf, count, (type == PAGELIST_READ ? VM_PROT_WRITE : 0 ) | VM_PROT_READ, pages, num_pages); if (actual_pages != num_pages) { vm_page_unhold_pages(pages, actual_pages); free(pagelist, M_VCPAGELIST); return (-ENOMEM); } pagelist->length = count; pagelist->type = type; pagelist->offset = offset; /* Group the pages into runs of contiguous pages */ base_addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[0])); next_addr = base_addr + PAGE_SIZE; addridx = 0; run = 0; for (i = 1; i < num_pages; i++) { addr = (void *)PHYS_TO_VCBUS(VM_PAGE_TO_PHYS(pages[i])); if ((addr == next_addr) && (run < (PAGE_SIZE - 1))) { next_addr += PAGE_SIZE; run++; } else { addrs[addridx] = (unsigned long)base_addr + run; addridx++; base_addr = addr; next_addr = addr + PAGE_SIZE; run = 0; } } addrs[addridx] = (unsigned long)base_addr + run; addridx++; /* Partial cache lines (fragments) require special measures */ if ((type == PAGELIST_READ) && ((pagelist->offset & (CACHE_LINE_SIZE - 1)) || ((pagelist->offset + pagelist->length) & (CACHE_LINE_SIZE - 1)))) { FRAGMENTS_T *fragments; if (down_interruptible(&g_free_fragments_sema) != 0) { free(pagelist, M_VCPAGELIST); return -EINTR; } WARN_ON(g_free_fragments == NULL); down(&g_free_fragments_mutex); fragments = (FRAGMENTS_T *) g_free_fragments; WARN_ON(fragments == NULL); g_free_fragments = *(FRAGMENTS_T **) g_free_fragments; up(&g_free_fragments_mutex); pagelist->type = PAGELIST_READ_WITH_FRAGMENTS + (fragments - g_fragments_base); } /* XXX: optimize? INV operation for read WBINV for write? */ cpu_dcache_wbinv_range((vm_offset_t)buf, count); bi->pagelist = pagelist; return 0; failed_load: bus_dmamap_unload(bi->pagelist_dma_tag, bi->pagelist_dma_map); failed_alloc: bus_dmamap_destroy(bi->pagelist_dma_tag, bi->pagelist_dma_map); bus_dma_tag_destroy(bi->pagelist_dma_tag); return err; }
static int handle_ddp(struct socket *so, struct uio *uio, int flags, int error) { struct sockbuf *sb = &so->so_rcv; struct tcpcb *tp = so_sototcpcb(so); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); vm_page_t *pages; int npages, db_idx, rc, buf_flag; struct ddp_buffer *db; struct wrqe *wr; uint64_t ddp_flags; SOCKBUF_LOCK_ASSERT(sb); #if 0 if (sbused(sb) + sc->tt.ddp_thres > uio->uio_resid) { CTR4(KTR_CXGBE, "%s: sb_cc %d, threshold %d, resid %d", __func__, sbused(sb), sc->tt.ddp_thres, uio->uio_resid); } #endif /* XXX: too eager to disable DDP, could handle NBIO better than this. */ if (sbused(sb) >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres || uio->uio_resid > MAX_DDP_BUFFER_SIZE || uio->uio_iovcnt > 1 || so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO) || error || so->so_error || sb->sb_state & SBS_CANTRCVMORE) goto no_ddp; /* * Fault in and then hold the pages of the uio buffers. We'll wire them * a bit later if everything else works out. */ SOCKBUF_UNLOCK(sb); if (hold_uio(uio, &pages, &npages) != 0) { SOCKBUF_LOCK(sb); goto no_ddp; } SOCKBUF_LOCK(sb); if (__predict_false(so->so_error || sb->sb_state & SBS_CANTRCVMORE)) { vm_page_unhold_pages(pages, npages); free(pages, M_CXGBE); goto no_ddp; } /* * Figure out which one of the two DDP buffers to use this time. */ db_idx = select_ddp_buffer(sc, toep, pages, npages, (uintptr_t)uio->uio_iov->iov_base & PAGE_MASK, uio->uio_resid); pages = NULL; /* handed off to select_ddp_buffer */ if (db_idx < 0) goto no_ddp; db = toep->db[db_idx]; buf_flag = db_idx == 0 ? DDP_BUF0_ACTIVE : DDP_BUF1_ACTIVE; /* * Build the compound work request that tells the chip where to DMA the * payload. */ ddp_flags = select_ddp_flags(so, flags, db_idx); wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sbused(sb), ddp_flags); if (wr == NULL) { /* * Just unhold the pages. The DDP buffer's software state is * left as-is in the toep. The page pods were written * successfully and we may have an opportunity to use it in the * future. */ vm_page_unhold_pages(db->pages, db->npages); goto no_ddp; } /* Wire (and then unhold) the pages, and give the chip the go-ahead. */ wire_ddp_buffer(db); t4_wrq_tx(sc, wr); sb->sb_flags &= ~SB_DDP_INDICATE; toep->ddp_flags |= buf_flag; /* * Wait for the DDP operation to complete and then unwire the pages. * The return code from the sbwait will be the final return code of this * function. But we do need to wait for DDP no matter what. */ rc = sbwait(sb); while (toep->ddp_flags & buf_flag) { /* XXXGL: shouldn't here be sbwait() call? */ sb->sb_flags |= SB_WAIT; msleep(&sb->sb_acc, &sb->sb_mtx, PSOCK , "sbwait", 0); } unwire_ddp_buffer(db); return (rc); no_ddp: disable_ddp(sc, toep); discourage_ddp(toep); sb->sb_flags &= ~SB_DDP_INDICATE; return (0); }