int __cpuinit cpu_up(unsigned int cpu) { int err = 0; #ifdef CONFIG_MEMORY_HOTPLUG int nid; pg_data_t *pgdat; #endif if (!cpu_possible(cpu)) { printk(KERN_ERR "can't online cpu %d because it is not " "configured as may-hotadd at boot time\n", cpu); #if defined(CONFIG_IA64) printk(KERN_ERR "please check additional_cpus= boot " "parameter\n"); #endif return -EINVAL; } #ifdef CONFIG_MEMORY_HOTPLUG nid = cpu_to_node(cpu); if (!node_online(nid)) { err = mem_online_node(nid); if (err) return err; } pgdat = NODE_DATA(nid); if (!pgdat) { printk(KERN_ERR "Can't online cpu %d due to NULL pgdat\n", cpu); return -ENOMEM; } if (pgdat->node_zonelists->_zonerefs->zone == NULL) { mutex_lock(&zonelists_mutex); build_all_zonelists(NULL); mutex_unlock(&zonelists_mutex); } #endif cpu_maps_update_begin(); if (cpu_hotplug_disabled) { err = -EBUSY; goto out; } err = _cpu_up(cpu, 0); out: cpu_maps_update_done(); return err; }
void arch_unregister_cpu(int num) { struct node *parent = NULL; #ifdef CONFIG_NUMA int node = cpu_to_node(num); if (node_online(node)) parent = &node_devices[node].node; #endif /* CONFIG_NUMA */ return unregister_cpu(&cpu_devices[num].cpu, parent); }
static int __init topology_init(void) { int i; for (i = 0; i < MAX_NUMNODES; i++) { if (node_online(i)) arch_register_node(i); } for (i = 0; i < NR_CPUS; i++) if (cpu_possible(i)) arch_register_cpu(i); return 0; }
static acpi_status __devinit acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; int pxm, node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return AE_OK; if (!buffer.length || !buffer.pointer) return AE_OK; obj = buffer.pointer; if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < sizeof(*iosapic)) { kfree(buffer.pointer); return AE_OK; } iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { kfree(buffer.pointer); return AE_OK; } gsi_base = iosapic->global_irq_base; kfree(buffer.pointer); /* * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell * us which node to associate this with. */ pxm = acpi_get_pxm(handle); if (pxm < 0) return AE_OK; node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpus_empty(node_to_cpumask(node))) return AE_OK; /* We know a gsi to node mapping! */ map_iosapic_to_node(gsi_base, node); return AE_OK; }
/* * register cpu under node */ int register_cpu_under_node(unsigned int cpu, unsigned int nid) { if (node_online(nid)) { struct sys_device *obj = get_cpu_sysdev(cpu); if (!obj) return 0; return sysfs_create_link(&node_devices[nid].sysdev.kobj, &obj->kobj, kobject_name(&obj->kobj)); } return 0; }
int add_memory(int nid, u64 start, u64 size) { pg_data_t *pgdat = NULL; int new_pgdat = 0; struct resource *res; int ret; res = register_memory_resource(start, size); if (!res) return -EEXIST; if (!node_online(nid)) { pgdat = hotadd_new_pgdat(nid, start); if (!pgdat) return -ENOMEM; new_pgdat = 1; } /* call arch's memory hotadd */ ret = arch_add_memory(nid, start, size); if (ret < 0) goto error; /* we online node here. we can't roll back from here. */ node_set_online(nid); cpuset_track_online_nodes(); if (new_pgdat) { ret = register_one_node(nid); /* * If sysfs file of new node can't create, cpu on the node * can't be hot-added. There is no rollback way now. * So, check by BUG_ON() to catch it reluctantly.. */ BUG_ON(ret); } return ret; error: /* rollback pgdat allocation and others */ if (new_pgdat) rollback_node_hotadd(nid, pgdat); if (res) release_memory_resource(res); return ret; }
static void __init MP_translation_info(struct mpc_trans *m) { printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); if (mpc_record >= MAX_MPC_ENTRY) printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); else translation_table[mpc_record] = m; /* stash this for later */ if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) node_set_online(m->trans_quad); }
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = apicid_to_node[apicid]; if (node == NUMA_NO_NODE || !node_online(node)) node = first_node(node_online_map); numa_set_node(cpu, node); #endif }
static acpi_status __devinit acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; int pxm, node; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return AE_OK; if (!buffer.length || !buffer.pointer) return AE_OK; obj = buffer.pointer; if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < sizeof(*iosapic)) { kfree(buffer.pointer); return AE_OK; } iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { kfree(buffer.pointer); return AE_OK; } gsi_base = iosapic->global_irq_base; kfree(buffer.pointer); pxm = acpi_get_pxm(handle); if (pxm < 0) return AE_OK; node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || cpumask_empty(cpumask_of_node(node))) return AE_OK; map_iosapic_to_node(gsi_base, node); return AE_OK; }
static void register_nodes(void) { int i; for (i = 0; i < MAX_NUMNODES; i++) { if (node_online(i)) { int p_node = parent_node(i); struct node *parent = NULL; if (p_node != i) parent = &node_devices[p_node]; register_node(&node_devices[i], i, parent); } } }
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA unsigned node; int cpu = smp_processor_id(); /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE || !node_online(node)) { /* reuse the value from init_cpu_to_node() */ node = cpu_to_node(cpu); } numa_set_node(cpu, node); #endif }
static void __cpuinit srat_detect_node(void) { #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); int apicid = hard_smp_processor_id(); /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = apicid_to_node[apicid]; if (node == NUMA_NO_NODE || !node_online(node)) node = first_node(node_online_map); numa_set_node(cpu, node); printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif }
/* * Setup early cpu_to_node. * * Populate cpu_to_node[] only if x86_cpu_to_apicid[], * and apicid_to_node[] tables have valid entries for a CPU. * This means we skip cpu_to_node[] initialisation for NUMA * emulation and faking node case (when running a kernel compiled * for NUMA on a non NUMA box), which is OK as cpu_to_node[] * is already initialized in a round robin manner at numa_init_array, * prior to this call, and this initialization is good enough * for the fake NUMA cases. * * Called before the per_cpu areas are setup. */ void __init init_cpu_to_node(void) { int cpu; u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); BUG_ON(cpu_to_apicid == NULL); for_each_possible_cpu(cpu) { int node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) continue; if (!node_online(node)) node = find_near_online_node(node); numa_set_node(cpu, node); } }
/** * percpu_populate - populate per-cpu data for given cpu * @__pdata: per-cpu data to populate further * @size: size of per-cpu object * @gfp: may sleep or not etc. * @cpu: populate per-data for this cpu * * Populating per-cpu data for a cpu coming online would be a typical * use case. You need to register a cpu hotplug handler for that purpose. * Per-cpu object is populated with zeroed buffer. */ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) { struct percpu_data *pdata = __percpu_disguise(__pdata); int node = cpu_to_node(cpu); /* * We should make sure each CPU gets private memory. */ size = roundup(size, cache_line_size()); BUG_ON(pdata->ptrs[cpu]); if (node_online(node)) pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node); else pdata->ptrs[cpu] = kzalloc(size, gfp); return pdata->ptrs[cpu]; }
/* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. * Always point %gs to its beginning */ void __init setup_per_cpu_areas(void) { ssize_t size = PERCPU_ENOUGH_ROOM; char *ptr; int cpu; /* Setup cpu_pda map */ setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ size = PERCPU_ENOUGH_ROOM; printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES ptr = alloc_bootmem_pages(size); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { ptr = alloc_bootmem_pages(size); printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); } else ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", NR_CPUS, nr_cpu_ids, nr_node_ids); /* Setup percpu data maps */ setup_per_cpu_maps(); /* Setup node to cpumask map */ setup_node_to_cpumask_map(); /* Setup cpumask_of_cpu map */ setup_cpumask_of_cpu(); }
/** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * * If NUMA is not configured or there is only one NUMA node available, * there is no reason to consider NUMA. This function determines * whether percpu allocation should consider NUMA or not. * * RETURNS: * true if NUMA should be considered; otherwise, false. */ static bool __init pcpu_need_numa(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES pg_data_t *last = NULL; unsigned int cpu; for_each_possible_cpu(cpu) { int node = early_cpu_to_node(cpu); if (node_online(node) && NODE_DATA(node) && last && last != NODE_DATA(node)) return true; last = NODE_DATA(node); } #endif return false; }
int get_mp_bus_to_node(int busnum) { int node = -1; if (busnum < 0 || busnum > (BUS_NR - 1)) return node; node = mp_bus_to_node[busnum]; /* * let numa_node_id to decide it later in dma_alloc_pages * if there is no ram on that node */ if (node != -1 && !node_online(node)) node = -1; return node; }
int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { struct device *obj; if (!node_online(nid)) return 0; obj = get_cpu_device(cpu); if (!obj) return 0; sysfs_remove_link(&node_devices[nid]->dev.kobj, kobject_name(&obj->kobj)); sysfs_remove_link(&obj->kobj, kobject_name(&node_devices[nid]->dev.kobj)); return 0; }
static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; int node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return AE_OK; if (!buffer.length || !buffer.pointer) return AE_OK; obj = buffer.pointer; if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < sizeof(*iosapic)) { kfree(buffer.pointer); return AE_OK; } iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { kfree(buffer.pointer); return AE_OK; } gsi_base = iosapic->global_irq_base; kfree(buffer.pointer); /* OK, it's an IOSAPIC MADT entry; associate it with a node */ node = acpi_get_node(handle); if (node == NUMA_NO_NODE || !node_online(node) || cpumask_empty(cpumask_of_node(node))) return AE_OK; /* We know a gsi to node mapping! */ map_iosapic_to_node(gsi_base, node); return AE_OK; }
/* register memory section under specified node if it spans that node */ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) { int ret; unsigned long pfn, sect_start_pfn, sect_end_pfn; if (!mem_blk) return -EFAULT; if (!node_online(nid)) return 0; sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); sect_end_pfn += PAGES_PER_SECTION - 1; for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int page_nid; /* * memory block could have several absent sections from start. * skip pfn range from absent section */ if (!pfn_present(pfn)) { pfn = round_down(pfn + PAGES_PER_SECTION, PAGES_PER_SECTION) - 1; continue; } page_nid = get_nid_for_pfn(pfn); if (page_nid < 0) continue; if (page_nid != nid) continue; ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, kobject_name(&mem_blk->dev.kobj)); if (ret) return ret; return sysfs_create_link_nowarn(&mem_blk->dev.kobj, &node_devices[nid]->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj)); } /* mem section does not span the specified node */ return 0; }
/** * fill_mp_bus_to_cpumask() * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ __init static int fill_mp_bus_to_cpumask(void) { struct pci_dev *nb_dev = NULL; int i, j; u32 ldtbus, nid; static int lbnr[3] = { LDT_BUS_NUMBER_REGISTER_0, LDT_BUS_NUMBER_REGISTER_1, LDT_BUS_NUMBER_REGISTER_2 }; while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); /* * if there are no busses hanging off of the current * ldt link then both the secondary and subordinate * bus number fields are set to 0. * * RED-PEN * This is slightly broken because it assumes * HT node IDs == Linux node ids, which is not always * true. However it is probably mostly true. */ if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) { int node = NODE_ID(nid); if (!node_online(node)) node = 0; pci_bus_to_node[j] = node; } } } } return 0; }
/** * acpi_map_pxm_to_online_node - Map proximity ID to online node * @pxm: ACPI proximity ID * * This is similar to acpi_map_pxm_to_node(), but always returns an online * node. When the mapped node from a given proximity ID is offline, it * looks up the node distance table and returns the nearest online node. * * ACPI device drivers, which are called after the NUMA initialization has * completed in the kernel, can call this interface to obtain their device * NUMA topology from ACPI tables. Such drivers do not have to deal with * offline nodes. A node may be offline when a device proximity ID is * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. * "numa=off" on x86. */ int acpi_map_pxm_to_online_node(int pxm) { int node, n, dist, min_dist; node = acpi_map_pxm_to_node(pxm); if (node == NUMA_NO_NODE) node = 0; if (!node_online(node)) { min_dist = INT_MAX; for_each_online_node(n) { dist = node_distance(node, n); if (dist < min_dist) { min_dist = dist; node = n; } } }
void __cpuinit map_cpu_to_node(int cpu, int nid) { int oldnid; if (nid < 0) { /* just initialize by zero */ cpu_to_node_map[cpu] = 0; return; } /* sanity check first */ oldnid = cpu_to_node_map[cpu]; if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) { return; /* nothing to do */ } /* we don't have cpu-driven node hot add yet... In usual case, node is created from SRAT at boot time. */ if (!node_online(nid)) nid = first_online_node; cpu_to_node_map[cpu] = nid; cpu_set(cpu, node_to_cpu_mask[nid]); return; }
/* * register cpu under node */ int register_cpu_under_node(unsigned int cpu, unsigned int nid) { int ret; struct device *obj; if (!node_online(nid)) return 0; obj = get_cpu_device(cpu); if (!obj) return 0; ret = sysfs_create_link(&node_devices[nid]->dev.kobj, &obj->kobj, kobject_name(&obj->kobj)); if (ret) return ret; return sysfs_create_link(&obj->kobj, &node_devices[nid]->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj)); }
/* Set correct numa_node information for AMD NB functions */ static void quirk_amd_nb_node(struct pci_dev *dev) { struct pci_dev *nb_ht; unsigned int devfn; u32 node; u32 val; devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); nb_ht = pci_get_slot(dev->bus, devfn); if (!nb_ht) return; pci_read_config_dword(nb_ht, 0x60, &val); node = pcibus_to_node(dev->bus) | (val & 7); /* * Some hardware may return an invalid node ID, * so check it first: */ if (node_online(node)) set_dev_node(&dev->dev, node); pci_dev_put(nb_ht); }
/* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. */ static int numa_setup_cpu(unsigned long lcpu) { int nid = -1; struct device_node *cpu; /* * If a valid cpu-to-node mapping is already available, use it * directly instead of querying the firmware, since it represents * the most recent mapping notified to us by the platform (eg: VPHN). */ if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) { map_cpu_to_node(lcpu, nid); return nid; } cpu = of_get_cpu_node(lcpu, NULL); if (!cpu) { WARN_ON(1); if (cpu_present(lcpu)) goto out_present; else goto out; } nid = of_node_to_nid_single(cpu); out_present: if (nid < 0 || !node_online(nid)) nid = first_online_node; map_cpu_to_node(lcpu, nid); of_node_put(cpu); out: return nid; }
static void nlm_init_bootmem_node (unsigned long mapstart, unsigned long min_pfn, unsigned long max_pfn) { int i; for (i = 0; i < NLM_MAX_CPU_NODE; i++) { unsigned long map_pfn, start_pfn, end_pfn, bootmem_size; int j; if(!node_online(i)) continue; start_pfn = NODE_MEM_DATA(i)->low_pfn; end_pfn = NODE_MEM_DATA(i)->high_pfn; if (start_pfn && start_pfn < min_pfn) start_pfn = min_pfn; if (end_pfn > max_pfn) end_pfn = max_pfn; /* in general, never hit the condition */ if (start_pfn && start_pfn >= end_pfn) { NODE_MEM_DATA(i)->map_pfn = 0; /* indicate a bad map_pfn */ continue; } if (start_pfn > mapstart) map_pfn = start_pfn; else map_pfn = mapstart; if((start_pfn == 0) && (end_pfn == 0)) { map_pfn = _low_virt_to_phys(&_node_map_mem[i][0]) >> PAGE_SHIFT; __node_data[i] = __va(map_pfn << PAGE_SHIFT); } else {
void __devinit igb_check_options(struct igb_adapter *adapter) { int bd = adapter->bd_number; struct e1000_hw *hw = &adapter->hw; if (bd >= IGB_MAX_NIC) { DPRINTK(PROBE, NOTICE, "Warning: no configuration for board #%d\n", bd); DPRINTK(PROBE, NOTICE, "Using defaults for all values\n"); #ifndef module_param_array bd = IGB_MAX_NIC; #endif } { /* Interrupt Throttling Rate */ struct igb_option opt = { .type = range_option, .name = "Interrupt Throttling Rate (ints/sec)", .err = "using default of " __MODULE_STRING(DEFAULT_ITR), .def = DEFAULT_ITR, .arg = { .r = { .min = MIN_ITR, .max = MAX_ITR } } }; #ifdef module_param_array if (num_InterruptThrottleRate > bd) { #endif unsigned int itr = InterruptThrottleRate[bd]; switch (itr) { case 0: DPRINTK(PROBE, INFO, "%s turned off\n", opt.name); if(hw->mac.type >= e1000_i350) adapter->dmac = IGB_DMAC_DISABLE; adapter->rx_itr_setting = itr; break; case 1: DPRINTK(PROBE, INFO, "%s set to dynamic mode\n", opt.name); adapter->rx_itr_setting = itr; break; case 3: DPRINTK(PROBE, INFO, "%s set to dynamic conservative mode\n", opt.name); adapter->rx_itr_setting = itr; break; default: igb_validate_option(&itr, &opt, adapter); /* Save the setting, because the dynamic bits * change itr. In case of invalid user value, * default to conservative mode, else need to * clear the lower two bits because they are * used as control */ if (itr == 3) { adapter->rx_itr_setting = itr; } else { adapter->rx_itr_setting = 1000000000 / (itr * 256); adapter->rx_itr_setting &= ~3; } break; } #ifdef module_param_array } else { adapter->rx_itr_setting = opt.def; } #endif adapter->tx_itr_setting = adapter->rx_itr_setting; } { /* Interrupt Mode */ struct igb_option opt = { .type = range_option, .name = "Interrupt Mode", .err = "defaulting to 2 (MSI-X)", .def = IGB_INT_MODE_MSIX, .arg = { .r = { .min = MIN_INTMODE, .max = MAX_INTMODE } } }; #ifdef module_param_array if (num_IntMode > bd) { #endif unsigned int int_mode = IntMode[bd]; igb_validate_option(&int_mode, &opt, adapter); adapter->int_mode = int_mode; #ifdef module_param_array } else { adapter->int_mode = opt.def; } #endif } { /* Low Latency Interrupt TCP Port */ struct igb_option opt = { .type = range_option, .name = "Low Latency Interrupt TCP Port", .err = "using default of " __MODULE_STRING(DEFAULT_LLIPORT), .def = DEFAULT_LLIPORT, .arg = { .r = { .min = MIN_LLIPORT, .max = MAX_LLIPORT } } }; #ifdef module_param_array if (num_LLIPort > bd) { #endif adapter->lli_port = LLIPort[bd]; if (adapter->lli_port) { igb_validate_option(&adapter->lli_port, &opt, adapter); } else { DPRINTK(PROBE, INFO, "%s turned off\n", opt.name); } #ifdef module_param_array } else { adapter->lli_port = opt.def; } #endif } { /* Low Latency Interrupt on Packet Size */ struct igb_option opt = { .type = range_option, .name = "Low Latency Interrupt on Packet Size", .err = "using default of " __MODULE_STRING(DEFAULT_LLISIZE), .def = DEFAULT_LLISIZE, .arg = { .r = { .min = MIN_LLISIZE, .max = MAX_LLISIZE } } }; #ifdef module_param_array if (num_LLISize > bd) { #endif adapter->lli_size = LLISize[bd]; if (adapter->lli_size) { igb_validate_option(&adapter->lli_size, &opt, adapter); } else { DPRINTK(PROBE, INFO, "%s turned off\n", opt.name); } #ifdef module_param_array } else { adapter->lli_size = opt.def; } #endif } { /* Low Latency Interrupt on TCP Push flag */ struct igb_option opt = { .type = enable_option, .name = "Low Latency Interrupt on TCP Push flag", .err = "defaulting to Disabled", .def = OPTION_DISABLED }; #ifdef module_param_array if (num_LLIPush > bd) { #endif unsigned int lli_push = LLIPush[bd]; igb_validate_option(&lli_push, &opt, adapter); adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0; #ifdef module_param_array } else { adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0; } #endif } { /* SRIOV - Enable SR-IOV VF devices */ struct igb_option opt = { .type = range_option, .name = "max_vfs - SR-IOV VF devices", .err = "using default of " __MODULE_STRING(DEFAULT_SRIOV), .def = DEFAULT_SRIOV, .arg = { .r = { .min = MIN_SRIOV, .max = MAX_SRIOV } } }; #ifdef module_param_array if (num_max_vfs > bd) { #endif adapter->vfs_allocated_count = max_vfs[bd]; igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter); #ifdef module_param_array } else { adapter->vfs_allocated_count = opt.def; } #endif if (adapter->vfs_allocated_count) { switch (hw->mac.type) { case e1000_82575: case e1000_82580: adapter->vfs_allocated_count = 0; DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n"); default: break; } } } { /* VMDQ - Enable VMDq multiqueue receive */ struct igb_option opt = { .type = range_option, .name = "VMDQ - VMDq multiqueue queue count", .err = "using default of " __MODULE_STRING(DEFAULT_VMDQ), .def = DEFAULT_VMDQ, .arg = { .r = { .min = MIN_VMDQ, .max = (MAX_VMDQ - adapter->vfs_allocated_count) } } }; #ifdef module_param_array if (num_VMDQ > bd) { #endif adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]); if (adapter->vfs_allocated_count && !adapter->vmdq_pools) { DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n"); adapter->vmdq_pools = 1; } igb_validate_option(&adapter->vmdq_pools, &opt, adapter); #ifdef module_param_array } else { if (!adapter->vfs_allocated_count) adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def); else adapter->vmdq_pools = 1; } #endif #ifdef CONFIG_IGB_VMDQ_NETDEV if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) { DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n"); adapter->vmdq_pools = 0; } #endif } { /* RSS - Enable RSS multiqueue receives */ struct igb_option opt = { .type = range_option, .name = "RSS - RSS multiqueue receive count", .err = "using default of " __MODULE_STRING(DEFAULT_RSS), .def = DEFAULT_RSS, .arg = { .r = { .min = MIN_RSS, .max = MAX_RSS } } }; if (adapter->vmdq_pools) { switch (hw->mac.type) { #ifndef CONFIG_IGB_VMDQ_NETDEV case e1000_82576: opt.arg.r.max = 2; break; case e1000_82575: if (adapter->vmdq_pools == 2) opt.arg.r.max = 3; if (adapter->vmdq_pools <= 2) break; #endif default: opt.arg.r.max = 1; break; } } #ifdef module_param_array if (num_RSS > bd) { #endif adapter->rss_queues = RSS[bd]; switch (adapter->rss_queues) { case 1: break; default: igb_validate_option(&adapter->rss_queues, &opt, adapter); if (adapter->rss_queues) break; case 0: adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus()); break; } #ifdef module_param_array } else { adapter->rss_queues = opt.def ?: min_t(u32, opt.arg.r.max, num_online_cpus()); } #endif } { /* QueuePairs - Enable TX/RX queue pairs for interrupt handling */ struct igb_option opt = { .type = enable_option, .name = "QueuePairs - TX/RX queue pairs for interrupt handling", .err = "defaulting to Enabled", .def = OPTION_ENABLED }; #ifdef module_param_array if (num_QueuePairs > bd) { #endif unsigned int qp = QueuePairs[bd]; /* * we must enable queue pairs if the number of queues * exceeds the number of avaialble interrupts. We are * limited to 10, or 3 per unallocated vf. */ if ((adapter->rss_queues > 4) || (adapter->vmdq_pools > 4) || ((adapter->rss_queues > 1) && ((adapter->vmdq_pools > 3) || (adapter->vfs_allocated_count > 6)))) { if (qp == OPTION_DISABLED) { qp = OPTION_ENABLED; DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n",opt.err); } } igb_validate_option(&qp, &opt, adapter); adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0; #ifdef module_param_array } else { adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0; } #endif } { /* EEE - Enable EEE for capable adapters */ if (hw->mac.type >= e1000_i350) { struct igb_option opt = { .type = enable_option, .name = "EEE Support", .err = "defaulting to Enabled", .def = OPTION_ENABLED }; #ifdef module_param_array if (num_EEE > bd) { #endif unsigned int eee = EEE[bd]; igb_validate_option(&eee, &opt, adapter); adapter->flags |= eee ? IGB_FLAG_EEE : 0; if (eee) hw->dev_spec._82575.eee_disable = false; else hw->dev_spec._82575.eee_disable = true; #ifdef module_param_array } else { adapter->flags |= opt.def ? IGB_FLAG_EEE : 0; if (adapter->flags & IGB_FLAG_EEE) hw->dev_spec._82575.eee_disable = false; else hw->dev_spec._82575.eee_disable = true; } #endif } } { /* DMAC - Enable DMA Coalescing for capable adapters */ if (hw->mac.type >= e1000_i350) { struct igb_opt_list list [] = { { IGB_DMAC_DISABLE, "DMAC Disable"}, { IGB_DMAC_MIN, "DMAC 250 usec"}, { IGB_DMAC_500, "DMAC 500 usec"}, { IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"}, { IGB_DMAC_2000, "DMAC 2000 usec"}, { IGB_DMAC_3000, "DMAC 3000 usec"}, { IGB_DMAC_4000, "DMAC 4000 usec"}, { IGB_DMAC_5000, "DMAC 5000 usec"}, { IGB_DMAC_6000, "DMAC 6000 usec"}, { IGB_DMAC_7000, "DMAC 7000 usec"}, { IGB_DMAC_8000, "DMAC 8000 usec"}, { IGB_DMAC_9000, "DMAC 9000 usec"}, { IGB_DMAC_MAX, "DMAC 10000 usec"} }; struct igb_option opt = { .type = list_option, .name = "DMA Coalescing", .err = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE), .def = IGB_DMAC_DISABLE, .arg = { .l = { .nr = 13, .p = list } } }; #ifdef module_param_array if (num_DMAC > bd) { #endif unsigned int dmac = DMAC[bd]; if (adapter->rx_itr_setting == IGB_DMAC_DISABLE) dmac = IGB_DMAC_DISABLE; igb_validate_option(&dmac, &opt, adapter); switch (dmac) { case IGB_DMAC_DISABLE: adapter->dmac = dmac; break; case IGB_DMAC_MIN: adapter->dmac = dmac; break; case IGB_DMAC_500: adapter->dmac = dmac; break; case IGB_DMAC_EN_DEFAULT: adapter->dmac = dmac; break; case IGB_DMAC_2000: adapter->dmac = dmac; break; case IGB_DMAC_3000: adapter->dmac = dmac; break; case IGB_DMAC_4000: adapter->dmac = dmac; break; case IGB_DMAC_5000: adapter->dmac = dmac; break; case IGB_DMAC_6000: adapter->dmac = dmac; break; case IGB_DMAC_7000: adapter->dmac = dmac; break; case IGB_DMAC_8000: adapter->dmac = dmac; break; case IGB_DMAC_9000: adapter->dmac = dmac; break; case IGB_DMAC_MAX: adapter->dmac = dmac; break; default: adapter->dmac = opt.def; DPRINTK(PROBE, INFO, "Invalid DMAC setting, " "resetting DMAC to %d\n", opt.def); } #ifdef module_param_array } else adapter->dmac = opt.def; #endif } } #ifndef IGB_NO_LRO { /* LRO - Enable Large Receive Offload */ struct igb_option opt = { .type = enable_option, .name = "LRO - Large Receive Offload", .err = "defaulting to Disabled", .def = OPTION_DISABLED }; struct net_device *netdev = adapter->netdev; #ifdef module_param_array if (num_LRO > bd) { #endif unsigned int lro = LRO[bd]; igb_validate_option(&lro, &opt, adapter); netdev->features |= lro ? NETIF_F_LRO : 0; #ifdef module_param_array } else if (opt.def == OPTION_ENABLED) { netdev->features |= NETIF_F_LRO; } #endif } #endif /* IGB_NO_LRO */ { /* Node assignment */ static struct igb_option opt = { .type = range_option, .name = "Node to start on", .err = "defaulting to -1", #ifdef HAVE_EARLY_VMALLOC_NODE .def = 0, #else .def = -1, #endif .arg = { .r = { .min = 0, .max = (MAX_NUMNODES - 1)}} }; int node_param = opt.def; /* if the default was zero then we need to set the * default value to an online node, which is not * necessarily zero, and the constant initializer * above can't take first_online_node */ if (node_param == 0) /* must set opt.def for validate */ opt.def = node_param = first_online_node; #ifdef module_param_array if (num_Node > bd) { #endif node_param = Node[bd]; igb_validate_option((uint *)&node_param, &opt, adapter); if (node_param != OPTION_UNSET) { DPRINTK(PROBE, INFO, "node set to %d\n", node_param); } #ifdef module_param_array } #endif /* check sanity of the value */ if (node_param != -1 && !node_online(node_param)) { DPRINTK(PROBE, INFO, "ignoring node set to invalid value %d\n", node_param); node_param = opt.def; } adapter->node = node_param; } { /* MDD - Enable Malicious Driver Detection. Only available when SR-IOV is enabled. */ struct igb_option opt = { .type = enable_option, .name = "Malicious Driver Detection", .err = "defaulting to 1", .def = OPTION_ENABLED, .arg = { .r = { .min = OPTION_DISABLED, .max = OPTION_ENABLED } } }; #ifdef module_param_array if (num_MDD > bd) { #endif adapter->mdd = MDD[bd]; igb_validate_option((uint *)&adapter->mdd, &opt, adapter); #ifdef module_param_array } else { adapter->mdd = opt.def; } #endif } }
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; struct pci_root_info *info = NULL; int domain = root->segment; int busnum = root->secondary.start; LIST_HEAD(resources); struct pci_bus *bus = NULL; struct pci_sysdata *sd; int node; #ifdef CONFIG_ACPI_NUMA int pxm; #endif if (pci_ignore_seg) domain = 0; if (domain && !pci_domains_supported) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (multiple domains not supported)\n", domain, busnum); return NULL; } node = -1; #ifdef CONFIG_ACPI_NUMA pxm = acpi_get_pxm(device->handle); if (pxm >= 0) node = pxm_to_node(pxm); if (node != -1) set_mp_bus_to_node(busnum, node); else #endif node = get_mp_bus_to_node(busnum); if (node != -1 && !node_online(node)) node = -1; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); return NULL; } sd = &info->sd; sd->domain = domain; sd->node = node; sd->companion = device; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. */ bus = pci_find_bus(domain, busnum); if (bus) { /* * If the desired bus exits, the content of bus->sysdata will * be replaced by sd. */ memcpy(bus->sysdata, sd, sizeof(*sd)); kfree(info); } else { probe_pci_root_info(info, device, busnum, domain); /* insert busn res at first */ pci_add_resource(&resources, &root->secondary); /* * _CRS with no apertures is normal, so only fall back to * defaults or native bridge info if we're ignoring _CRS. */ if (pci_use_crs) add_resources(info, &resources); else { free_pci_root_info_res(info); x86_pci_root_bus_resources(busnum, &resources); } if (!setup_mcfg_map(info, domain, (u8)root->secondary.start, (u8)root->secondary.end, root->mcfg_addr)) bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); if (bus) { pci_scan_child_bus(bus); pci_set_host_bridge_release( to_pci_host_bridge(bus->bridge), release_pci_root_info, info); } else { pci_free_resource_list(&resources); __release_pci_root_info(info); } } /* After the PCI-E bus has been walked and all devices discovered, * configure any settings of the fabric that might be necessary. */ if (bus) { struct pci_bus *child; list_for_each_entry(child, &bus->children, node) pcie_bus_configure_settings(child); } if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA if (pxm >= 0) dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d (pxm %d)\n", node, pxm); #else dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); #endif } return bus; }
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) { struct acpi_device *device = root->device; int domain = root->segment; int busnum = root->secondary.start; struct pci_bus *bus; struct pci_sysdata *sd; int node; #ifdef CONFIG_ACPI_NUMA int pxm; #endif if (domain && !pci_domains_supported) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (multiple domains not supported)\n", domain, busnum); return NULL; } node = -1; #ifdef CONFIG_ACPI_NUMA pxm = acpi_get_pxm(device->handle); if (pxm >= 0) node = pxm_to_node(pxm); if (node != -1) set_mp_bus_to_node(busnum, node); else #endif node = get_mp_bus_to_node(busnum); if (node != -1 && !node_online(node)) node = -1; /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. */ sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { printk(KERN_WARNING "pci_bus %04x:%02x: " "ignored (out of memory)\n", domain, busnum); return NULL; } sd->domain = domain; sd->node = node; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. */ bus = pci_find_bus(domain, busnum); if (bus) { /* * If the desired bus exits, the content of bus->sysdata will * be replaced by sd. */ memcpy(bus->sysdata, sd, sizeof(*sd)); kfree(sd); } else { bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); if (bus) { get_current_resources(device, busnum, domain, bus); bus->subordinate = pci_scan_child_bus(bus); } } if (!bus) kfree(sd); if (bus && node != -1) { #ifdef CONFIG_ACPI_NUMA if (pxm >= 0) dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d (pxm %d)\n", node, pxm); #else dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); #endif } return bus; }