static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { if (dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs)) return pgprot_noncached(prot); else if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) return pgprot_writecombine(prot); return prot; }
/** * sn_dma_map_single_attrs - map a single page for DMA * @dev: device to map for * @cpu_addr: kernel virtual address of the region to map * @size: size of the region * @direction: DMA direction * @attrs: optional dma attributes * * Map the region pointed to by @cpu_addr for DMA and return the * DMA address. * * We map this to the one step pcibr_dmamap_trans interface rather than * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have * no way of saving the dmamap handle from the alloc to later free * (which is pretty much unacceptable). * * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with * dma_map_consistent() so that writes force a flush of pending DMA. * (See "SGI Altix Architecture Considerations for Linux Device Drivers", * Document Number: 007-4763-001) * * TODO: simplify our interface; * figure out how to save dmamap handle so can use two step. */ dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size, int direction, struct dma_attrs *attrs) { dma_addr_t dma_addr; unsigned long phys_addr; struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); int dmabarr; dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); BUG_ON(dev->bus != &pci_bus_type); phys_addr = __pa(cpu_addr); if (dmabarr) dma_addr = provider->dma_map_consistent(pdev, phys_addr, size, SN_DMA_ADDR_PHYS); else dma_addr = provider->dma_map(pdev, phys_addr, size, SN_DMA_ADDR_PHYS); if (!dma_addr) { printk(KERN_ERR "%s: out of ATEs\n", __func__); return 0; } return dma_addr; }
static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long addr = (unsigned long)cpu_addr; unsigned long off = vma->vm_pgoff; unsigned long pfn; int ret = -ENXIO; if (!plat_device_is_coherent(dev) && !hw_coherentio) addr = CAC_ADDR(addr); pfn = page_to_pfn(virt_to_page((void *)addr)); if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; if (off < count && user_count <= (count - off)) { ret = remap_pfn_range(vma, vma->vm_start, pfn + off, user_count << PAGE_SHIFT, vma->vm_page_prot); } return ret; }
static void __dma_set_pages(struct page *page, unsigned int count, struct dma_attrs *attrs) { int ret = 0; if (attrs == NULL) ret = set_pages_uc(page, count); else if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) ret = set_pages_wc(page, count); else if (!dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) pr_warn("%s:DMA attrs %p not supported\n", __func__, attrs->flags); if (ret) pr_err("%s failed\n", __func__); }
static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) return pgprot_writecombine(prot); return prot; }
/** * arm_dma_map_page - map a portion of a page for streaming DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @page: page that buffer resides in * @offset: offset into page for start of buffer * @size: size of buffer to map * @dir: DMA transfer direction * * Ensure that any data held in the cache is appropriately discarded * or written back. * * The device owns this memory once this call has completed. The CPU * can regain ownership by calling dma_unmap_page(). */ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; }
/** * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @handle: DMA address of buffer * @size: size of buffer (same as passed to dma_map_page) * @dir: DMA transfer direction (same as passed to dma_map_page) * * Unmap a page streaming mode DMA translation. The handle and size * must match what was provided in the previous dma_map_page() call. * All other usages are undefined. * * After this call, reads by the CPU to the buffer are guaranteed to see * whatever the device wrote there. */ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); }
void *rockchip_gem_prime_vmap(struct drm_gem_object *obj) { struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj); if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, &rk_obj->dma_attrs)) return NULL; return rk_obj->kvaddr; }
void removed_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs) { bool no_kernel_mapping = dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); if (!no_kernel_mapping) iounmap(cpu_addr); dma_release_from_contiguous(dev, __phys_to_pfn(handle), size >> PAGE_SHIFT); }
void *removed_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { bool no_kernel_mapping = dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); bool skip_zeroing = dma_get_attr(DMA_ATTR_SKIP_ZEROING, attrs); unsigned long pfn; unsigned long order = get_order(size); void *addr = NULL; size = PAGE_ALIGN(size); if (!(gfp & __GFP_WAIT)) return NULL; pfn = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, order); if (pfn) { if (no_kernel_mapping && skip_zeroing) { *handle = __pfn_to_phys(pfn); return (void *)NO_KERNEL_MAPPING_DUMMY; } addr = ioremap(__pfn_to_phys(pfn), size); if (WARN_ON(!addr)) { dma_release_from_contiguous(dev, pfn, order); } else { if (!skip_zeroing) memset_io(addr, 0, size); if (no_kernel_mapping) { iounmap(addr); addr = (void *)NO_KERNEL_MAPPING_DUMMY; } *handle = __pfn_to_phys(pfn); } } return addr; }
static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, struct dma_attrs *attrs, bool is_coherent) { unsigned long dma_mask; struct page *page; unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(dev, flag); flag |= __GFP_ZERO; again: page = NULL; /* CMA can be used only in the context which permits sleeping */ if (flag & __GFP_WAIT) #ifdef CONFIG_CMA_EXPLICIT_USE if (dma_get_attr(DMA_ATTR_CMA, attrs)) { #endif page = dma_alloc_from_contiguous(dev, count, get_order(size)); #ifdef CONFIG_CMA_EXPLICIT_USE if (!page) return NULL; } #endif /* fallback */ if (!page) page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); if (!page) return NULL; addr = page_to_phys(page); if (addr + size > dma_mask) { __free_pages(page, get_order(size)); if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { flag = (flag & ~GFP_DMA32) | GFP_DMA; goto again; } return NULL; } if (is_coherent == false) __dma_set_pages(page, count, attrs); *dma_addr = addr; return page_address(page); }
static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); return NULL; } if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { struct page *page; void *addr; size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); if (!page) return NULL; *dma_handle = phys_to_dma(dev, page_to_phys(page)); addr = page_address(page); memset(addr, 0, size); if (dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs) || dma_get_attr(DMA_ATTR_STRONGLY_ORDERED, attrs)) { /* * flush the caches here because we can't later */ __dma_flush_range(addr, addr + size); __dma_remap(page, size, 0, true); } return addr; } else { return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
static void arc_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { struct page *page = virt_to_page(dma_handle); int is_non_coh = 1; is_non_coh = dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs) || (is_isa_arcv2() && ioc_exists); if (PageHighMem(page) || !is_non_coh) iounmap((void __force __iomem *)vaddr); __free_pages(page, get_order(size)); }
static int tce_build_cell(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, struct dma_attrs *attrs) { int i; unsigned long *io_pte, base_pte; struct iommu_window *window = container_of(tbl, struct iommu_window, table); /* implementing proper protection causes problems with the spidernet * driver - check mapping directions later, but allow read & write by * default for now.*/ #ifdef CELL_IOMMU_STRICT_PROTECTION /* to avoid referencing a global, we use a trick here to setup the * protection bit. "prot" is setup to be 3 fields of 4 bits apprended * together for each of the 3 supported direction values. It is then * shifted left so that the fields matching the desired direction * lands on the appropriate bits, and other bits are masked out. */ const unsigned long prot = 0xc48; base_pte = ((prot << (52 + 4 * direction)) & (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) | CBE_IOPTE_M | CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask); #else base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask); #endif if (unlikely(dma_get_attr(DMA_ATTR_WEAK_ORDERING, attrs))) base_pte &= ~CBE_IOPTE_SO_RW; io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset); for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE) io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask); mb(); invalidate_tce_cache(window->iommu, io_pte, npages); pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", index, npages, direction, base_pte); return 0; }
/** * scsi_dma_map - perform DMA mapping against command's sg lists * @cmd: scsi command * * Returns the number of sg lists actually used, zero if the sg lists * is NULL, or -ENOMEM if the mapping failed. */ int scsi_dma_map(struct scsi_cmnd *cmd) { int nseg = 0; struct dma_attrs *attrs = &scsi_direct_attrs; if (scsi_sg_count(cmd)) { struct device *dev = cmd->device->host->dma_dev; if (dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) attrs = (cmd->request->cmd_flags & REQ_KERNEL) ? &scsi_direct_attrs : NULL; nseg = dma_map_sg_attr(dev, scsi_sglist(cmd), scsi_sg_count(cmd), cmd->sc_data_direction, attrs); if (unlikely(!nseg)) return -ENOMEM; } return nseg; }
/* this code was heavily inspired by _ump_ukk_msync() in * drivers/amlogic/gpu/ump/common/ump_kernel_api.c */ int meson_ioctl_msync(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_gem_object *gem_obj; struct drm_gem_cma_object *cma_obj; struct drm_meson_msync *args = data; struct meson_drm_session_data *session_data = file->driver_priv; void *virtual = NULL; u32 size = 0; u32 offset = 0; if (!args || !session_data) return -EINVAL; gem_obj = drm_gem_object_lookup(dev, file, args->handle); if (NULL == gem_obj) { DBG_MSG(1, ("meson_ioctl_msync(): %02u Failed to look up mapping\n", args->handle)); return -EFAULT; } cma_obj = to_drm_gem_cma_obj(gem_obj); if (NULL == cma_obj) { DBG_MSG(1, ("meson_ioctl_msync(): %02u Failed to get gem_cma_obj containing gem_obj\n", args->handle)); return -EFAULT; } /* Returns the cache settings back to Userspace */ args->is_cached = dma_get_attr(DMA_ATTR_NON_CONSISTENT, &cma_obj->dma_attrs); DBG_MSG(3, ("meson_ioctl_msync(): %02u cache_enabled %d\n op %d address 0x%08x mapping 0x%08x\n", args->handle, args->is_cached, args->op, args->address, args->mapping)); /* Nothing to do in these cases */ if ((DRM_MESON_MSYNC_READOUT_CACHE_ENABLED == args->op) || (!args->is_cached)) return 0; if (args->address) { virtual = (void *)((u32)args->address); offset = (u32)((args->address) - (args->mapping)); } else {
static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { unsigned long addr = (unsigned long) vaddr; unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page = NULL; if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) { mips_dma_free_noncoherent(dev, size, vaddr, dma_handle); return; } plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); if (!plat_device_is_coherent(dev) && !hw_coherentio) addr = CAC_ADDR(addr); page = virt_to_page((void *) addr); if (!dma_release_from_contiguous(dev, page, count)) __free_pages(page, get_order(size)); }
static void *mips_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs) { void *ret; struct page *page = NULL; unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; /* * XXX: seems like the coherent and non-coherent implementations could * be consolidated. */ if (dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) return mips_dma_alloc_noncoherent(dev, size, dma_handle, gfp); gfp = massage_gfp_flags(dev, gfp); if (IS_ENABLED(CONFIG_DMA_CMA) && !(gfp & GFP_ATOMIC)) page = dma_alloc_from_contiguous(dev, count, get_order(size)); if (!page) page = alloc_pages(gfp, get_order(size)); if (!page) return NULL; ret = page_address(page); memset(ret, 0, size); *dma_handle = plat_map_dma_mem(dev, ret, size); if (!plat_device_is_coherent(dev)) { dma_cache_wback_inv((unsigned long) ret, size); if (!hw_coherentio) ret = UNCAC_ADDR(ret); } return ret; }
static void *arc_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, struct dma_attrs *attrs) { unsigned long order = get_order(size); struct page *page; phys_addr_t paddr; void *kvaddr; int need_coh = 1, need_kvaddr = 0; page = alloc_pages(gfp, order); if (!page) return NULL; /* * IOC relies on all data (even coherent DMA data) being in cache * Thus allocate normal cached memory * * The gains with IOC are two pronged: * -For streaming data, elides need for cache maintenance, saving * cycles in flush code, and bus bandwidth as all the lines of a * buffer need to be flushed out to memory * -For coherent data, Read/Write to buffers terminate early in cache * (vs. always going to memory - thus are faster) */ if ((is_isa_arcv2() && ioc_exists) || dma_get_attr(DMA_ATTR_NON_CONSISTENT, attrs)) need_coh = 0; /* * - A coherent buffer needs MMU mapping to enforce non-cachability * - A highmem page needs a virtual handle (hence MMU mapping) * independent of cachability */ if (PageHighMem(page) || need_coh) need_kvaddr = 1; /* This is linear addr (0x8000_0000 based) */ paddr = page_to_phys(page); *dma_handle = plat_phys_to_dma(dev, paddr); /* This is kernel Virtual address (0x7000_0000 based) */ if (need_kvaddr) { kvaddr = ioremap_nocache(paddr, size); if (kvaddr == NULL) { __free_pages(page, order); return NULL; } } else { kvaddr = (void *)(u32)paddr; } /* * Evict any existing L1 and/or L2 lines for the backing page * in case it was used earlier as a normal "cached" page. * Yeah this bit us - STAR 9000898266 * * Although core does call flush_cache_vmap(), it gets kvaddr hence * can't be used to efficiently flush L1 and/or L2 which need paddr * Currently flush_cache_vmap nukes the L1 cache completely which * will be optimized as a separate commit */ if (need_coh) dma_cache_wback_inv(paddr, size); return kvaddr; }