/* Callback for SLIT parsing */ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) { int i, j; for (i = 0; i < slit->locality_count; i++) for (j = 0; j < slit->locality_count; j++) numa_set_distance(pxm_to_node(i), pxm_to_node(j), slit->entry[slit->locality_count * i + j]); }
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) { struct pci_root_info info; struct pci_controller *controller; unsigned int windows = 0; struct pci_bus *pbus; char *name; int pxm; controller = alloc_pci_controller(domain); if (!controller) goto out1; controller->acpi_handle = device->handle; pxm = acpi_get_pxm(controller->acpi_handle); #ifdef CONFIG_NUMA if (pxm >= 0) controller->node = pxm_to_node(pxm); #endif acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, &windows); if (windows) { controller->window = kmalloc_node(sizeof(*controller->window) * windows, GFP_KERNEL, controller->node); if (!controller->window) goto out2; } name = kmalloc(16, GFP_KERNEL); if (!name) goto out3; sprintf(name, "PCI Bus %04x:%02x", domain, bus); info.controller = controller; info.name = name; acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window, &info); /* * See arch/x86/pci/acpi.c. * The desired pci bus might already be scanned in a quirk. We * should handle the case here, but it appears that IA64 hasn't * such quirk. So we just ignore the case now. */ pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller); if (pbus) pcibios_setup_root_windows(pbus, controller); return pbus; out3: kfree(controller->window); out2: kfree(controller); out1: return NULL; }
unsigned int __init dom0_max_vcpus(void) { unsigned int i, max_vcpus, limit; nodeid_t node; for ( i = 0; i < dom0_nr_pxms; ++i ) if ( (node = pxm_to_node(dom0_pxms[i])) != NUMA_NO_NODE ) node_set(node, dom0_nodes); nodes_and(dom0_nodes, dom0_nodes, node_online_map); if ( nodes_empty(dom0_nodes) ) dom0_nodes = node_online_map; for_each_node_mask ( node, dom0_nodes ) cpumask_or(&dom0_cpus, &dom0_cpus, &node_to_cpumask(node)); cpumask_and(&dom0_cpus, &dom0_cpus, cpupool0->cpu_valid); if ( cpumask_empty(&dom0_cpus) ) cpumask_copy(&dom0_cpus, cpupool0->cpu_valid); max_vcpus = cpumask_weight(&dom0_cpus); if ( opt_dom0_max_vcpus_min > max_vcpus ) max_vcpus = opt_dom0_max_vcpus_min; if ( opt_dom0_max_vcpus_max < max_vcpus ) max_vcpus = opt_dom0_max_vcpus_max; limit = dom0_pvh ? HVM_MAX_VCPUS : MAX_VIRT_CPUS; if ( max_vcpus > limit ) max_vcpus = limit; return max_vcpus; }
static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header, const unsigned long end) { struct acpi_srat_gicc_affinity *pa; int cpu, pxm, node; if (srat_disabled()) return -EINVAL; pa = (struct acpi_srat_gicc_affinity *)header; if (!pa) return -EINVAL; if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) return 0; pxm = pa->proximity_domain; node = pxm_to_node(pxm); /* * If we can't map the UID to a logical cpu this * means that the UID is not part of possible cpus * so we do not need a NUMA mapping for it, skip * the SRAT entry and keep parsing. */ cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); if (cpu < 0) return 0; acpi_early_node_map[cpu] = node; pr_info("SRAT: PXM %d -> MPIDR 0x%llx -> Node %d\n", pxm, cpu_logical_map(cpu), node); return 0; }
/* * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for * I/O localities since SRAT does not list them. I/O localities are * not supported at this point. */ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) { int i, j; for (i = 0; i < slit->locality_count; i++) { const int from_node = pxm_to_node(i); if (from_node == NUMA_NO_NODE) continue; for (j = 0; j < slit->locality_count; j++) { const int to_node = pxm_to_node(j); if (to_node == NUMA_NO_NODE) continue; numa_set_distance(from_node, to_node, slit->entry[slit->locality_count * i + j]); } } }
static acpi_status __devinit acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; int pxm, node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return AE_OK; if (!buffer.length || !buffer.pointer) return AE_OK; obj = buffer.pointer; if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < sizeof(*iosapic)) { kfree(buffer.pointer); return AE_OK; } iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { kfree(buffer.pointer); return AE_OK; } gsi_base = iosapic->global_irq_base; kfree(buffer.pointer); /* * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell * us which node to associate this with. */ pxm = acpi_get_pxm(handle); if (pxm < 0) return AE_OK; node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) return AE_OK; /* We know a gsi to node mapping! */ map_iosapic_to_node(gsi_base, node); return AE_OK; }
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) { struct pci_root_info info; struct pci_controller *controller; unsigned int windows = 0; struct pci_bus *pbus; char *name; int pxm; controller = alloc_pci_controller(domain); if (!controller) goto out1; controller->acpi_handle = device->handle; pxm = acpi_get_pxm(controller->acpi_handle); #ifdef CONFIG_NUMA if (pxm >= 0) controller->node = pxm_to_node(pxm); #endif acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, &windows); controller->window = kmalloc_node(sizeof(*controller->window) * windows, GFP_KERNEL, controller->node); if (!controller->window) goto out2; name = kmalloc(16, GFP_KERNEL); if (!name) goto out3; sprintf(name, "PCI Bus %04x:%02x", domain, bus); info.controller = controller; info.name = name; acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window, &info); pbus = pci_scan_bus_parented(NULL, bus, &pci_root_ops, controller); if (pbus) pcibios_setup_root_windows(pbus, controller); return pbus; out3: kfree(controller->window); out2: kfree(controller); out1: return NULL; }
static int __init pxm_to_nasid(int pxm) { int i; int nid; nid = pxm_to_node(pxm); for (i = 0; i < num_node_memblks; i++) { if (node_memblk[i].nid == nid) { return NASID_GET(node_memblk[i].start_paddr); } } return -1; }
static acpi_status __devinit acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; int pxm, node; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return AE_OK; if (!buffer.length || !buffer.pointer) return AE_OK; obj = buffer.pointer; if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < sizeof(*iosapic)) { kfree(buffer.pointer); return AE_OK; } iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { kfree(buffer.pointer); return AE_OK; } gsi_base = iosapic->global_irq_base; kfree(buffer.pointer); pxm = acpi_get_pxm(handle); if (pxm < 0) return AE_OK; node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpumask_empty(cpumask_of_node(node))) return AE_OK; map_iosapic_to_node(gsi_base, node); return AE_OK; }
static int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) { #ifdef CONFIG_ACPI_NUMA int pxm_id; pxm_id = acpi_get_pxm(handle); /* * Assuming that the container driver would have set the proximity * domain and would have initialized pxm_to_node(pxm_id) && pxm_flag */ node_cpuid[cpu].nid = (pxm_id < 0) ? 0 : pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; #endif return (0); }
int __init get_memcfg_from_srat(void) { int i, j, nid; if (srat_disabled()) goto out_fail; if (acpi_numa_init() < 0) goto out_fail; if (num_memory_chunks == 0) { printk(KERN_DEBUG "could not find any ACPI SRAT memory areas.\n"); goto out_fail; } /* Calculate total number of nodes in system from PXM bitmap and create * a set of sequential node IDs starting at zero. (ACPI doesn't seem * to specify the range of _PXM values.) */ /* * MCD - we no longer HAVE to number nodes sequentially. PXM domain * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically * 32, so we will continue numbering them in this manner until MAX_NUMNODES * approaches MAX_PXM_DOMAINS for i386. */ nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (BMAP_TEST(pxm_bitmap, i)) { int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } BUG_ON(num_online_nodes() == 0); /* set cnode id in memory chunk structure */ for (i = 0; i < num_memory_chunks; i++) node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); printk(KERN_DEBUG "pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { printk(KERN_CONT "%02x ", pxm_bitmap[i]); } printk(KERN_CONT "\n"); printk(KERN_DEBUG "Number of logical nodes in system = %d\n", num_online_nodes()); printk(KERN_DEBUG "Number of memory chunks in system = %d\n", num_memory_chunks); for (i = 0; i < MAX_LOCAL_APIC; i++) set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); for (j = 0; j < num_memory_chunks; j++){ struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; printk(KERN_DEBUG "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); if (node_read_chunk(chunk->nid, chunk)) continue; memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn, min(chunk->end_pfn, max_pfn)); } /* for out of order entries in SRAT */ sort_node_map(); for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; unsigned long end = min(node_end_pfn[nid], max_pfn); memory_present(nid, start, end); node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); } return 1; out_fail: printk(KERN_DEBUG "failed to get NUMA memory information from SRAT" " table\n"); return 0; }
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; struct pci_root_info *info = NULL; int domain = root->segment; int busnum = root->secondary.start; LIST_HEAD(resources); struct pci_bus *bus = NULL; struct pci_sysdata *sd; int node; #ifdef CONFIG_ACPI_NUMA int pxm; #endif if (pci_ignore_seg) domain = 0; if (domain && !pci_domains_supported) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (multiple domains not supported)\n", domain, busnum); return NULL; } node = -1; #ifdef CONFIG_ACPI_NUMA pxm = acpi_get_pxm(device->handle); if (pxm >= 0) node = pxm_to_node(pxm); if (node != -1) set_mp_bus_to_node(busnum, node); else #endif node = get_mp_bus_to_node(busnum); if (node != -1 && !node_online(node)) node = -1; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); return NULL; } sd = &info->sd; sd->domain = domain; sd->node = node; sd->companion = device; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. */ bus = pci_find_bus(domain, busnum); if (bus) { /* * If the desired bus exits, the content of bus->sysdata will * be replaced by sd. */ memcpy(bus->sysdata, sd, sizeof(*sd)); kfree(info); } else { probe_pci_root_info(info, device, busnum, domain); /* insert busn res at first */ pci_add_resource(&resources, &root->secondary); /* * _CRS with no apertures is normal, so only fall back to * defaults or native bridge info if we're ignoring _CRS. */ if (pci_use_crs) add_resources(info, &resources); else { free_pci_root_info_res(info); x86_pci_root_bus_resources(busnum, &resources); } if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, (u8)root->secondary.end, root->mcfg_addr)) bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); if (bus) { pci_scan_child_bus(bus); pci_set_host_bridge_release( to_pci_host_bridge(bus->bridge), release_pci_root_info, info); } else { pci_free_resource_list(&resources); __release_pci_root_info(info); } } /* After the PCI-E bus has been walked and all devices discovered, * configure any settings of the fabric that might be necessary. */ if (bus) { struct pci_bus *child; list_for_each_entry(child, &bus->children, node) pcie_bus_configure_settings(child); } if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA if (pxm >= 0) dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d (pxm %d)\n", node, pxm); #else dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); #endif } return bus; }
void __init acpi_numa_arch_fixup(void) { int i, j, node_from, node_to; /* If there's no SRAT, fix the phys_id and mark node 0 online */ if (srat_num_cpus == 0) { node_set_online(0); node_cpuid[0].phys_id = hard_smp_processor_id(); return; } /* * MCD - This can probably be dropped now. No need for pxm ID to node ID * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. */ nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (pxm_bit_test(i)) { int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } /* set logical node id in memory chunk structure */ for (i = 0; i < num_node_memblks; i++) node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); /* assign memory bank numbers for each chunk on each node */ for_each_online_node(i) { int bank; bank = 0; for (j = 0; j < num_node_memblks; j++) if (node_memblk[j].nid == i) node_memblk[j].bank = bank++; } /* set logical node id in cpu structure */ for_each_possible_early_cpu(i) node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); if (!slit_table) return; memset(numa_slit, -1, sizeof(numa_slit)); for (i = 0; i < slit_table->locality_count; i++) { if (!pxm_bit_test(i)) continue; node_from = pxm_to_node(i); for (j = 0; j < slit_table->locality_count; j++) { if (!pxm_bit_test(j)) continue; node_to = pxm_to_node(j); node_distance(node_from, node_to) = slit_table->entry[i * slit_table->locality_count + j]; } } #ifdef SLIT_DEBUG printk("ACPI 2.0 SLIT locality table:\n"); for_each_online_node(i) { for_each_online_node(j) printk("%03d ", node_distance(i, j)); printk("\n"); } #endif }
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; int domain = root->segment; int busnum = root->secondary.start; struct pci_bus *bus; struct pci_sysdata *sd; int node; #ifdef CONFIG_ACPI_NUMA int pxm; #endif if (domain && !pci_domains_supported) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (multiple domains not supported)\n", domain, busnum); return NULL; } node = -1; #ifdef CONFIG_ACPI_NUMA pxm = acpi_get_pxm(device->handle); if (pxm >= 0) node = pxm_to_node(pxm); if (node != -1) set_mp_bus_to_node(busnum, node); else #endif node = get_mp_bus_to_node(busnum); if (node != -1 && !node_online(node)) node = -1; /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. */ sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); return NULL; } sd->domain = domain; sd->node = node; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. */ bus = pci_find_bus(domain, busnum); if (bus) { /* * If the desired bus exits, the content of bus->sysdata will * be replaced by sd. */ memcpy(bus->sysdata, sd, sizeof(*sd)); kfree(sd); } else { bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); if (bus) { get_current_resources(device, busnum, domain, bus); bus->subordinate = pci_scan_child_bus(bus); } } if (!bus) kfree(sd); if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA if (pxm >= 0) dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d (pxm %d)\n", node, pxm); #else dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); #endif } return bus; }
/* Parse the ACPI Static Resource Affinity Table */ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp) { u8 *start, *end, *p; int i, j, nid; start = (u8 *)(&(sratp->reserved) + 1); /* skip header */ p = start; end = (u8 *)sratp + sratp->header.length; memset(pxm_bitmap, 0, sizeof(pxm_bitmap)); /* init proximity domain bitmap */ memset(node_memory_chunk, 0, sizeof(node_memory_chunk)); num_memory_chunks = 0; while (p < end) { switch (*p) { case ACPI_SRAT_TYPE_CPU_AFFINITY: parse_cpu_affinity_structure(p); break; case ACPI_SRAT_TYPE_MEMORY_AFFINITY: parse_memory_affinity_structure(p); break; default: printk("ACPI 2.0 SRAT: unknown entry skipped: type=0x%02X, len=%d\n", p[0], p[1]); break; } p += p[1]; if (p[1] == 0) { printk("acpi20_parse_srat: Entry length value is zero;" " can't parse any further!\n"); break; } } if (num_memory_chunks == 0) { printk("could not finy any ACPI SRAT memory areas.\n"); goto out_fail; } /* Calculate total number of nodes in system from PXM bitmap and create * a set of sequential node IDs starting at zero. (ACPI doesn't seem * to specify the range of _PXM values.) */ /* * MCD - we no longer HAVE to number nodes sequentially. PXM domain * numbers could go as high as 256, and MAX_NUMNODES for i386 is typically * 32, so we will continue numbering them in this manner until MAX_NUMNODES * approaches MAX_PXM_DOMAINS for i386. */ nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (BMAP_TEST(pxm_bitmap, i)) { int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } BUG_ON(num_online_nodes() == 0); /* set cnode id in memory chunk structure */ for (i = 0; i < num_memory_chunks; i++) node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm); printk("pxm bitmap: "); for (i = 0; i < sizeof(pxm_bitmap); i++) { printk("%02X ", pxm_bitmap[i]); } printk("\n"); printk("Number of logical nodes in system = %d\n", num_online_nodes()); printk("Number of memory chunks in system = %d\n", num_memory_chunks); for (i = 0; i < MAX_APICID; i++) apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); for (j = 0; j < num_memory_chunks; j++){ struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", j, chunk->nid, chunk->start_pfn, chunk->end_pfn); node_read_chunk(chunk->nid, chunk); add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn); } for_each_online_node(nid) { unsigned long start = node_start_pfn[nid]; unsigned long end = node_end_pfn[nid]; memory_present(nid, start, end); node_remap_size[nid] = node_memmap_size_bytes(nid, start, end); } return 1; out_fail: return 0; }
void __init acpi_numa_arch_fixup(void) { int i, j, node_from, node_to; if (srat_num_cpus == 0) { node_set_online(0); node_cpuid[0].phys_id = hard_smp_processor_id(); return; } nodes_clear(node_online_map); for (i = 0; i < MAX_PXM_DOMAINS; i++) { if (pxm_bit_test(i)) { int nid = acpi_map_pxm_to_node(i); node_set_online(nid); } } for (i = 0; i < num_node_memblks; i++) node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); for_each_online_node(i) { int bank; bank = 0; for (j = 0; j < num_node_memblks; j++) if (node_memblk[j].nid == i) node_memblk[j].bank = bank++; } for_each_possible_early_cpu(i) node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); if (!slit_table) { for (i = 0; i < MAX_NUMNODES; i++) for (j = 0; j < MAX_NUMNODES; j++) node_distance(i, j) = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; return; } memset(numa_slit, -1, sizeof(numa_slit)); for (i = 0; i < slit_table->locality_count; i++) { if (!pxm_bit_test(i)) continue; node_from = pxm_to_node(i); for (j = 0; j < slit_table->locality_count; j++) { if (!pxm_bit_test(j)) continue; node_to = pxm_to_node(j); node_distance(node_from, node_to) = slit_table->entry[i * slit_table->locality_count + j]; } } #ifdef SLIT_DEBUG printk("ACPI 2.0 SLIT locality table:\n"); for_each_online_node(i) { for_each_online_node(j) printk("%03d ", node_distance(i, j)); printk("\n"); } #endif }