/** * memremap() - remap an iomem_resource as cacheable memory * @offset: iomem resource start address * @size: size of remap * @flags: any of MEMREMAP_WB, MEMREMAP_WT and MEMREMAP_WC * * memremap() is "ioremap" for cases where it is known that the resource * being mapped does not have i/o side effects and the __iomem * annotation is not applicable. In the case of multiple flags, the different * mapping types will be attempted in the order listed below until one of * them succeeds. * * MEMREMAP_WB - matches the default mapping for System RAM on * the architecture. This is usually a read-allocate write-back cache. * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM * memremap() will bypass establishing a new mapping and instead return * a pointer into the direct map. * * MEMREMAP_WT - establish a mapping whereby writes either bypass the * cache or are written through to memory and never exist in a * cache-dirty state with respect to program visibility. Attempts to * map System RAM with this mapping type will fail. * * MEMREMAP_WC - establish a writecombine mapping, whereby writes may * be coalesced together (e.g. in the CPU's write buffers), but is otherwise * uncached. Attempts to map System RAM with this mapping type will fail. */ void *memremap(resource_size_t offset, size_t size, unsigned long flags) { int is_ram = region_intersects(offset, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); void *addr = NULL; if (!flags) return NULL; if (is_ram == REGION_MIXED) { WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n", &offset, (unsigned long) size); return NULL; } /* Try all mapping types requested until one returns non-NULL */ if (flags & MEMREMAP_WB) { /* * MEMREMAP_WB is special in that it can be satisifed * from the direct map. Some archs depend on the * capability of memremap() to autodetect cases where * the requested range is potentially in System RAM. */ if (is_ram == REGION_INTERSECTS) addr = try_ram_remap(offset, size); if (!addr) addr = arch_memremap_wb(offset, size); } /* * If we don't have a mapping yet and other request flags are * present then we will be attempting to establish a new virtual * address mapping. Enforce that this mapping is not aliasing * System RAM. */ if (!addr && is_ram == REGION_INTERSECTS && flags != MEMREMAP_WB) { WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n", &offset, (unsigned long) size); return NULL; } if (!addr && (flags & MEMREMAP_WT)) addr = ioremap_wt(offset, size); if (!addr && (flags & MEMREMAP_WC)) addr = ioremap_wc(offset, size); return addr; }
/** * memremap() - remap an iomem_resource as cacheable memory * @offset: iomem resource start address * @size: size of remap * @flags: either MEMREMAP_WB or MEMREMAP_WT * * memremap() is "ioremap" for cases where it is known that the resource * being mapped does not have i/o side effects and the __iomem * annotation is not applicable. * * MEMREMAP_WB - matches the default mapping for "System RAM" on * the architecture. This is usually a read-allocate write-back cache. * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM * memremap() will bypass establishing a new mapping and instead return * a pointer into the direct map. * * MEMREMAP_WT - establish a mapping whereby writes either bypass the * cache or are written through to memory and never exist in a * cache-dirty state with respect to program visibility. Attempts to * map "System RAM" with this mapping type will fail. */ void *memremap(resource_size_t offset, size_t size, unsigned long flags) { int is_ram = region_intersects(offset, size, "System RAM"); void *addr = NULL; if (is_ram == REGION_MIXED) { WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n", &offset, (unsigned long) size); return NULL; } /* Try all mapping types requested until one returns non-NULL */ if (flags & MEMREMAP_WB) { flags &= ~MEMREMAP_WB; /* * MEMREMAP_WB is special in that it can be satisifed * from the direct map. Some archs depend on the * capability of memremap() to autodetect cases where * the requested range is potentially in "System RAM" */ if (is_ram == REGION_INTERSECTS) addr = __va(offset); else addr = ioremap_cache(offset, size); } /* * If we don't have a mapping yet and more request flags are * pending then we will be attempting to establish a new virtual * address mapping. Enforce that this mapping is not aliasing * "System RAM" */ if (!addr && is_ram == REGION_INTERSECTS && flags) { WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n", &offset, (unsigned long) size); return NULL; } if (!addr && (flags & MEMREMAP_WT)) { flags &= ~MEMREMAP_WT; addr = ioremap_wt(offset, size); } return addr; }
void *devm_memremap_pages(struct device *dev, struct resource *res) { int is_ram = region_intersects(res->start, resource_size(res), "System RAM"); struct page_map *page_map; int error, nid; if (is_ram == REGION_MIXED) { WARN_ONCE(1, "%s attempted on mixed region %pr\n", __func__, res); return ERR_PTR(-ENXIO); } if (is_ram == REGION_INTERSECTS) return __va(res->start); page_map = devres_alloc(devm_memremap_pages_release, sizeof(*page_map), GFP_KERNEL); if (!page_map) return ERR_PTR(-ENOMEM); memcpy(&page_map->res, res, sizeof(*res)); nid = dev_to_node(dev); if (nid < 0) nid = 0; error = arch_add_memory(nid, res->start, resource_size(res), true); if (error) { devres_free(page_map); return ERR_PTR(error); } devres_add(dev, page_map); return __va(res->start); }
/** * devm_memremap_pages - remap and provide memmap backing for the given resource * @dev: hosting device for @res * @res: "host memory" address range * @ref: a live per-cpu reference count * @altmap: optional descriptor for allocating the memmap from @res * * Notes: * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time * (or devm release event). * * 2/ @res is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but * this is not enforced. */ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { resource_size_t key, align_start, align_size, align_end; pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; struct page_map *page_map; int error, nid, is_ram; unsigned long pfn; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start; is_ram = region_intersects(align_start, align_size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); if (is_ram == REGION_MIXED) { WARN_ONCE(1, "%s attempted on mixed region %pr\n", __func__, res); return ERR_PTR(-ENXIO); } if (is_ram == REGION_INTERSECTS) return __va(res->start); if (!ref) return ERR_PTR(-EINVAL); page_map = devres_alloc_node(devm_memremap_pages_release, sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); if (!page_map) return ERR_PTR(-ENOMEM); pgmap = &page_map->pgmap; memcpy(&page_map->res, res, sizeof(*res)); pgmap->dev = dev; if (altmap) { memcpy(&page_map->altmap, altmap, sizeof(*altmap)); pgmap->altmap = &page_map->altmap; } pgmap->ref = ref; pgmap->res = &page_map->res; mutex_lock(&pgmap_lock); error = 0; align_end = align_start + align_size - 1; for (key = align_start; key <= align_end; key += SECTION_SIZE) { struct dev_pagemap *dup; rcu_read_lock(); dup = find_dev_pagemap(key); rcu_read_unlock(); if (dup) { dev_err(dev, "%s: %pr collides with mapping for %s\n", __func__, res, dev_name(dup->dev)); error = -EBUSY; break; } error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT, page_map); if (error) { dev_err(dev, "%s: failed: %d\n", __func__, error); break; } } mutex_unlock(&pgmap_lock); if (error) goto err_radix; nid = dev_to_node(dev); if (nid < 0) nid = numa_mem_id(); error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0, align_size); if (error) goto err_pfn_remap; mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); if (error) goto err_add_memory; for_each_device_pfn(pfn, page_map) { struct page *page = pfn_to_page(pfn); /* * ZONE_DEVICE pages union ->lru with a ->pgmap back * pointer. It is a bug if a ZONE_DEVICE page is ever * freed or placed on a driver-private list. Seed the * storage with LIST_POISON* values. */ list_del(&page->lru); page->pgmap = pgmap; } devres_add(dev, page_map); return __va(res->start); err_add_memory: untrack_pfn(NULL, PHYS_PFN(align_start), align_size); err_pfn_remap: err_radix: pgmap_radix_release(res); devres_free(page_map); return ERR_PTR(error); }
/** * devm_memremap_pages - remap and provide memmap backing for the given resource * @dev: hosting device for @res * @res: "host memory" address range * @ref: a live per-cpu reference count * @altmap: optional descriptor for allocating the memmap from @res * * Notes: * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time * (or devm release event). * * 2/ @res is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but * this is not enforced. */ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { int is_ram = region_intersects(res->start, resource_size(res), "System RAM"); resource_size_t key, align_start, align_size, align_end; struct dev_pagemap *pgmap; struct page_map *page_map; unsigned long pfn; int error, nid; if (is_ram == REGION_MIXED) { WARN_ONCE(1, "%s attempted on mixed region %pr\n", __func__, res); return ERR_PTR(-ENXIO); } if (is_ram == REGION_INTERSECTS) return __va(res->start); if (altmap && !IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) { dev_err(dev, "%s: altmap requires CONFIG_SPARSEMEM_VMEMMAP=y\n", __func__); return ERR_PTR(-ENXIO); } if (!ref) return ERR_PTR(-EINVAL); page_map = devres_alloc_node(devm_memremap_pages_release, sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); if (!page_map) return ERR_PTR(-ENOMEM); pgmap = &page_map->pgmap; memcpy(&page_map->res, res, sizeof(*res)); pgmap->dev = dev; if (altmap) { memcpy(&page_map->altmap, altmap, sizeof(*altmap)); pgmap->altmap = &page_map->altmap; } pgmap->ref = ref; pgmap->res = &page_map->res; mutex_lock(&pgmap_lock); error = 0; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); align_end = align_start + align_size - 1; for (key = align_start; key <= align_end; key += SECTION_SIZE) { struct dev_pagemap *dup; rcu_read_lock(); dup = find_dev_pagemap(key); rcu_read_unlock(); if (dup) { dev_err(dev, "%s: %pr collides with mapping for %s\n", __func__, res, dev_name(dup->dev)); error = -EBUSY; break; } error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT, page_map); if (error) { dev_err(dev, "%s: failed: %d\n", __func__, error); break; } } mutex_unlock(&pgmap_lock); if (error) goto err_radix; nid = dev_to_node(dev); if (nid < 0) nid = numa_mem_id(); error = arch_add_memory(nid, align_start, align_size, true); if (error) goto err_add_memory; for_each_device_pfn(pfn, page_map) { struct page *page = pfn_to_page(pfn); /* ZONE_DEVICE pages must never appear on a slab lru */ list_force_poison(&page->lru); page->pgmap = pgmap; } devres_add(dev, page_map); return __va(res->start); err_add_memory: err_radix: pgmap_radix_release(res); devres_free(page_map); return ERR_PTR(error); }
static int nd_pfn_init(struct nd_pfn *nd_pfn) { u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; u32 start_pad = 0, end_trunc = 0; resource_size_t start, size; struct nd_namespace_io *nsio; struct nd_region *nd_region; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; const char *sig; u64 checksum; int rc; pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); if (!pfn_sb) return -ENOMEM; nd_pfn->pfn_sb = pfn_sb; if (is_nd_dax(&nd_pfn->dev)) sig = DAX_SIG; else sig = PFN_SIG; rc = nd_pfn_validate(nd_pfn, sig); if (rc != -ENODEV) return rc; /* no info block, do init */; nd_region = to_nd_region(nd_pfn->dev.parent); if (nd_region->ro) { dev_info(&nd_pfn->dev, "%s is read-only, unable to init metadata\n", dev_name(&nd_region->dev)); return -ENXIO; } memset(pfn_sb, 0, sizeof(*pfn_sb)); /* * Check if pmem collides with 'System RAM' when section aligned and * trim it accordingly */ nsio = to_nd_namespace_io(&ndns->dev); start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); size = resource_size(&nsio->res); if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) == REGION_MIXED) { start = nsio->res.start; start_pad = PHYS_SECTION_ALIGN_UP(start) - start; } start = nsio->res.start; size = PHYS_SECTION_ALIGN_UP(start + size) - start; if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) == REGION_MIXED) { size = resource_size(&nsio->res); end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); } if (start_pad + end_trunc) dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", dev_name(&ndns->dev), start_pad + end_trunc); /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ start += start_pad; size = resource_size(&nsio->res); npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) / PAGE_SIZE); if (nd_pfn->mode == PFN_MODE_PMEM) { /* * The altmap should be padded out to the block size used * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) offset = ALIGN(start + SZ_8K + dax_label_reserve, nd_pfn->align) - start; else return -ENXIO; if (offset + start_pad + end_trunc >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } npfns = (size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); pfn_sb->version_minor = cpu_to_le16(2); pfn_sb->start_pad = cpu_to_le32(start_pad); pfn_sb->end_trunc = cpu_to_le32(end_trunc); pfn_sb->align = cpu_to_le32(nd_pfn->align); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); }