int __cpuinit cpu_up(unsigned int cpu)
{
	int err = 0;

#ifdef	CONFIG_MEMORY_HOTPLUG
	int nid;
	pg_data_t	*pgdat;
#endif

	if (!cpu_possible(cpu)) {
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
#if defined(CONFIG_IA64)
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}

#ifdef	CONFIG_MEMORY_HOTPLUG
	nid = cpu_to_node(cpu);
	if (!node_online(nid)) {
		err = mem_online_node(nid);
		if (err)
			return err;
	}

	pgdat = NODE_DATA(nid);
	if (!pgdat) {
		printk(KERN_ERR
			"Can't online cpu %d due to NULL pgdat\n", cpu);
		return -ENOMEM;
	}

	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
		mutex_lock(&zonelists_mutex);
		build_all_zonelists(NULL);
		mutex_unlock(&zonelists_mutex);
	}
#endif

	cpu_maps_update_begin();

	if (cpu_hotplug_disabled) {
		err = -EBUSY;
		goto out;
	}

	err = _cpu_up(cpu, 0);

out:
	cpu_maps_update_done();
	return err;
}
Example #2
0
void arch_unregister_cpu(int num) {
    struct node *parent = NULL;

#ifdef CONFIG_NUMA
    int node = cpu_to_node(num);
    if (node_online(node))
        parent = &node_devices[node].node;
#endif /* CONFIG_NUMA */

    return unregister_cpu(&cpu_devices[num].cpu, parent);
}
Example #3
0
static int __init topology_init(void)
{
    int i;

    for (i = 0; i < MAX_NUMNODES; i++) {
        if (node_online(i))
            arch_register_node(i);
    }
    for (i = 0; i < NR_CPUS; i++)
        if (cpu_possible(i)) arch_register_cpu(i);
    return 0;
}
Example #4
0
static acpi_status __devinit
acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
{
	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
	union acpi_object *obj;
	struct acpi_madt_io_sapic *iosapic;
	unsigned int gsi_base;
	int pxm, node;

	/* Only care about objects w/ a method that returns the MADT */
	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
		return AE_OK;

	if (!buffer.length || !buffer.pointer)
		return AE_OK;

	obj = buffer.pointer;
	if (obj->type != ACPI_TYPE_BUFFER ||
	    obj->buffer.length < sizeof(*iosapic)) {
		kfree(buffer.pointer);
		return AE_OK;
	}

	iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer;

	if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) {
		kfree(buffer.pointer);
		return AE_OK;
	}

	gsi_base = iosapic->global_irq_base;

	kfree(buffer.pointer);

	/*
	 * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
	 * us which node to associate this with.
	 */
	pxm = acpi_get_pxm(handle);
	if (pxm < 0)
		return AE_OK;

	node = pxm_to_node(pxm);

	if (node >= MAX_NUMNODES || !node_online(node) ||
	    cpus_empty(node_to_cpumask(node)))
		return AE_OK;

	/* We know a gsi to node mapping! */
	map_iosapic_to_node(gsi_base, node);
	return AE_OK;
}
Example #5
0
File: node.c Project: E-LLP/n900
/*
 * register cpu under node
 */
int register_cpu_under_node(unsigned int cpu, unsigned int nid)
{
	if (node_online(nid)) {
		struct sys_device *obj = get_cpu_sysdev(cpu);
		if (!obj)
			return 0;
		return sysfs_create_link(&node_devices[nid].sysdev.kobj,
					 &obj->kobj,
					 kobject_name(&obj->kobj));
	 }

	return 0;
}
Example #6
0
int add_memory(int nid, u64 start, u64 size)
{
	pg_data_t *pgdat = NULL;
	int new_pgdat = 0;
	struct resource *res;
	int ret;

	res = register_memory_resource(start, size);
	if (!res)
		return -EEXIST;

	if (!node_online(nid)) {
		pgdat = hotadd_new_pgdat(nid, start);
		if (!pgdat)
			return -ENOMEM;
		new_pgdat = 1;
	}

	/* call arch's memory hotadd */
	ret = arch_add_memory(nid, start, size);

	if (ret < 0)
		goto error;

	/* we online node here. we can't roll back from here. */
	node_set_online(nid);

	cpuset_track_online_nodes();

	if (new_pgdat) {
		ret = register_one_node(nid);
		/*
		 * If sysfs file of new node can't create, cpu on the node
		 * can't be hot-added. There is no rollback way now.
		 * So, check by BUG_ON() to catch it reluctantly..
		 */
		BUG_ON(ret);
	}

	return ret;
error:
	/* rollback pgdat allocation and others */
	if (new_pgdat)
		rollback_node_hotadd(nid, pgdat);
	if (res)
		release_memory_resource(res);

	return ret;
}
static void __init MP_translation_info(struct mpc_trans *m)
{
	printk(KERN_INFO
	    "Translation: record %d, type %d, quad %d, global %d, local %d\n",
	       mpc_record, m->trans_type, m->trans_quad, m->trans_global,
	       m->trans_local);

	if (mpc_record >= MAX_MPC_ENTRY)
		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
	else
		translation_table[mpc_record] = m; /* stash this for later */

	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
		node_set_online(m->trans_quad);
}
Example #8
0
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
	unsigned node;
	int cpu = smp_processor_id();
	int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;

	/* Don't do the funky fallback heuristics the AMD version employs
	   for now. */
	node = apicid_to_node[apicid];
	if (node == NUMA_NO_NODE || !node_online(node))
		node = first_node(node_online_map);
	numa_set_node(cpu, node);
#endif
}
Example #9
0
static acpi_status __devinit
acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
{
	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
	union acpi_object *obj;
	struct acpi_madt_io_sapic *iosapic;
	unsigned int gsi_base;
	int pxm, node;

	
	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
		return AE_OK;

	if (!buffer.length || !buffer.pointer)
		return AE_OK;

	obj = buffer.pointer;
	if (obj->type != ACPI_TYPE_BUFFER ||
	    obj->buffer.length < sizeof(*iosapic)) {
		kfree(buffer.pointer);
		return AE_OK;
	}

	iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer;

	if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) {
		kfree(buffer.pointer);
		return AE_OK;
	}

	gsi_base = iosapic->global_irq_base;

	kfree(buffer.pointer);

	pxm = acpi_get_pxm(handle);
	if (pxm < 0)
		return AE_OK;

	node = pxm_to_node(pxm);

	if (node >= MAX_NUMNODES || !node_online(node) ||
	    cpumask_empty(cpumask_of_node(node)))
		return AE_OK;

	
	map_iosapic_to_node(gsi_base, node);
	return AE_OK;
}
Example #10
0
static void register_nodes(void)
{
	int i;

	for (i = 0; i < MAX_NUMNODES; i++) {
		if (node_online(i)) {
			int p_node = parent_node(i);
			struct node *parent = NULL;

			if (p_node != i)
				parent = &node_devices[p_node];

			register_node(&node_devices[i], i, parent);
		}
	}
}
Example #11
0
File: intel.c Project: 08opt/linux
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_NUMA
	unsigned node;
	int cpu = smp_processor_id();

	/* Don't do the funky fallback heuristics the AMD version employs
	   for now. */
	node = numa_cpu_node(cpu);
	if (node == NUMA_NO_NODE || !node_online(node)) {
		/* reuse the value from init_cpu_to_node() */
		node = cpu_to_node(cpu);
	}
	numa_set_node(cpu, node);
#endif
}
Example #12
0
static void __cpuinit srat_detect_node(void)
{
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
	unsigned node;
	int cpu = smp_processor_id();
	int apicid = hard_smp_processor_id();

	/* Don't do the funky fallback heuristics the AMD version employs
	   for now. */
	node = apicid_to_node[apicid];
	if (node == NUMA_NO_NODE || !node_online(node))
		node = first_node(node_online_map);
	numa_set_node(cpu, node);

	printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
#endif
}
Example #13
0
/*
 * Setup early cpu_to_node.
 *
 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
 * and apicid_to_node[] tables have valid entries for a CPU.
 * This means we skip cpu_to_node[] initialisation for NUMA
 * emulation and faking node case (when running a kernel compiled
 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
 * is already initialized in a round robin manner at numa_init_array,
 * prior to this call, and this initialization is good enough
 * for the fake NUMA cases.
 *
 * Called before the per_cpu areas are setup.
 */
void __init init_cpu_to_node(void)
{
	int cpu;
	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);

	BUG_ON(cpu_to_apicid == NULL);

	for_each_possible_cpu(cpu) {
		int node = numa_cpu_node(cpu);

		if (node == NUMA_NO_NODE)
			continue;
		if (!node_online(node))
			node = find_near_online_node(node);
		numa_set_node(cpu, node);
	}
}
Example #14
0
/**
 * percpu_populate - populate per-cpu data for given cpu
 * @__pdata: per-cpu data to populate further
 * @size: size of per-cpu object
 * @gfp: may sleep or not etc.
 * @cpu: populate per-data for this cpu
 *
 * Populating per-cpu data for a cpu coming online would be a typical
 * use case. You need to register a cpu hotplug handler for that purpose.
 * Per-cpu object is populated with zeroed buffer.
 */
void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
{
	struct percpu_data *pdata = __percpu_disguise(__pdata);
	int node = cpu_to_node(cpu);

	/*
	 * We should make sure each CPU gets private memory.
	 */
	size = roundup(size, cache_line_size());

	BUG_ON(pdata->ptrs[cpu]);
	if (node_online(node))
		pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node);
	else
		pdata->ptrs[cpu] = kzalloc(size, gfp);
	return pdata->ptrs[cpu];
}
Example #15
0
/*
 * Great future plan:
 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
 * Always point %gs to its beginning
 */
void __init setup_per_cpu_areas(void)
{
	ssize_t size = PERCPU_ENOUGH_ROOM;
	char *ptr;
	int cpu;

	/* Setup cpu_pda map */
	setup_cpu_pda_map();

	/* Copy section for each CPU (we discard the original) */
	size = PERCPU_ENOUGH_ROOM;
	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
			  size);

	for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
		ptr = alloc_bootmem_pages(size);
#else
		int node = early_cpu_to_node(cpu);
		if (!node_online(node) || !NODE_DATA(node)) {
			ptr = alloc_bootmem_pages(size);
			printk(KERN_INFO
			       "cpu %d has no node %d or node-local memory\n",
				cpu, node);
		}
		else
			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
		per_cpu_offset(cpu) = ptr - __per_cpu_start;
		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);

	}

	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
		NR_CPUS, nr_cpu_ids, nr_node_ids);

	/* Setup percpu data maps */
	setup_per_cpu_maps();

	/* Setup node to cpumask map */
	setup_node_to_cpumask_map();

	/* Setup cpumask_of_cpu map */
	setup_cpumask_of_cpu();
}
Example #16
0
/**
 * pcpu_need_numa - determine percpu allocation needs to consider NUMA
 *
 * If NUMA is not configured or there is only one NUMA node available,
 * there is no reason to consider NUMA.  This function determines
 * whether percpu allocation should consider NUMA or not.
 *
 * RETURNS:
 * true if NUMA should be considered; otherwise, false.
 */
static bool __init pcpu_need_numa(void)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES
	pg_data_t *last = NULL;
	unsigned int cpu;

	for_each_possible_cpu(cpu) {
		int node = early_cpu_to_node(cpu);

		if (node_online(node) && NODE_DATA(node) &&
		    last && last != NODE_DATA(node))
			return true;

		last = NODE_DATA(node);
	}
#endif
	return false;
}
Example #17
0
int get_mp_bus_to_node(int busnum)
{
	int node = -1;

	if (busnum < 0 || busnum > (BUS_NR - 1))
		return node;

	node = mp_bus_to_node[busnum];

	/*
	 * let numa_node_id to decide it later in dma_alloc_pages
	 * if there is no ram on that node
	 */
	if (node != -1 && !node_online(node))
		node = -1;

	return node;
}
Example #18
0
File: node.c Project: jthatch12/STi
int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
{
	struct device *obj;

	if (!node_online(nid))
		return 0;

	obj = get_cpu_device(cpu);
	if (!obj)
		return 0;

	sysfs_remove_link(&node_devices[nid]->dev.kobj,
			  kobject_name(&obj->kobj));
	sysfs_remove_link(&obj->kobj,
			  kobject_name(&node_devices[nid]->dev.kobj));

	return 0;
}
Example #19
0
static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth,
				    void *context, void **ret)
{
	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
	union acpi_object *obj;
	struct acpi_madt_io_sapic *iosapic;
	unsigned int gsi_base;
	int node;

	/* Only care about objects w/ a method that returns the MADT */
	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
		return AE_OK;

	if (!buffer.length || !buffer.pointer)
		return AE_OK;

	obj = buffer.pointer;
	if (obj->type != ACPI_TYPE_BUFFER ||
	    obj->buffer.length < sizeof(*iosapic)) {
		kfree(buffer.pointer);
		return AE_OK;
	}

	iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer;

	if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) {
		kfree(buffer.pointer);
		return AE_OK;
	}

	gsi_base = iosapic->global_irq_base;

	kfree(buffer.pointer);

	/* OK, it's an IOSAPIC MADT entry; associate it with a node */
	node = acpi_get_node(handle);
	if (node == NUMA_NO_NODE || !node_online(node) ||
	    cpumask_empty(cpumask_of_node(node)))
		return AE_OK;

	/* We know a gsi to node mapping! */
	map_iosapic_to_node(gsi_base, node);
	return AE_OK;
}
/* register memory section under specified node if it spans that node */
int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
{
	int ret;
	unsigned long pfn, sect_start_pfn, sect_end_pfn;

	if (!mem_blk)
		return -EFAULT;
	if (!node_online(nid))
		return 0;

	sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
	sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
	sect_end_pfn += PAGES_PER_SECTION - 1;
	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
		int page_nid;

		/*
		 * memory block could have several absent sections from start.
		 * skip pfn range from absent section
		 */
		if (!pfn_present(pfn)) {
			pfn = round_down(pfn + PAGES_PER_SECTION,
					 PAGES_PER_SECTION) - 1;
			continue;
		}

		page_nid = get_nid_for_pfn(pfn);
		if (page_nid < 0)
			continue;
		if (page_nid != nid)
			continue;
		ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
					&mem_blk->dev.kobj,
					kobject_name(&mem_blk->dev.kobj));
		if (ret)
			return ret;

		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
				&node_devices[nid]->dev.kobj,
				kobject_name(&node_devices[nid]->dev.kobj));
	}
	/* mem section does not span the specified node */
	return 0;
}
Example #21
0
/**
 * fill_mp_bus_to_cpumask()
 * fills the mp_bus_to_cpumask array based according to the LDT Bus Number
 * Registers found in the K8 northbridge
 */
__init static int
fill_mp_bus_to_cpumask(void)
{
	struct pci_dev *nb_dev = NULL;
	int i, j;
	u32 ldtbus, nid;
	static int lbnr[3] = {
		LDT_BUS_NUMBER_REGISTER_0,
		LDT_BUS_NUMBER_REGISTER_1,
		LDT_BUS_NUMBER_REGISTER_2
	};

	while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
			PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) {
		pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);

		for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {
			pci_read_config_dword(nb_dev, lbnr[i], &ldtbus);
			/*
			 * if there are no busses hanging off of the current
			 * ldt link then both the secondary and subordinate
			 * bus number fields are set to 0.
			 * 
			 * RED-PEN
			 * This is slightly broken because it assumes
 			 * HT node IDs == Linux node ids, which is not always
			 * true. However it is probably mostly true.
			 */
			if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0
				&& SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) {
				for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus);
				     j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
				     j++) { 
					int node = NODE_ID(nid);
					if (!node_online(node))
						node = 0;
					pci_bus_to_node[j] = node;
				}		
			}
		}
	}

	return 0;
}
Example #22
0
/**
 * acpi_map_pxm_to_online_node - Map proximity ID to online node
 * @pxm: ACPI proximity ID
 *
 * This is similar to acpi_map_pxm_to_node(), but always returns an online
 * node.  When the mapped node from a given proximity ID is offline, it
 * looks up the node distance table and returns the nearest online node.
 *
 * ACPI device drivers, which are called after the NUMA initialization has
 * completed in the kernel, can call this interface to obtain their device
 * NUMA topology from ACPI tables.  Such drivers do not have to deal with
 * offline nodes.  A node may be offline when a device proximity ID is
 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
 * "numa=off" on x86.
 */
int acpi_map_pxm_to_online_node(int pxm)
{
	int node, n, dist, min_dist;

	node = acpi_map_pxm_to_node(pxm);

	if (node == NUMA_NO_NODE)
		node = 0;

	if (!node_online(node)) {
		min_dist = INT_MAX;
		for_each_online_node(n) {
			dist = node_distance(node, n);
			if (dist < min_dist) {
				min_dist = dist;
				node = n;
			}
		}
	}
Example #23
0
void __cpuinit map_cpu_to_node(int cpu, int nid)
{
	int oldnid;
	if (nid < 0) { /* just initialize by zero */
		cpu_to_node_map[cpu] = 0;
		return;
	}
	/* sanity check first */
	oldnid = cpu_to_node_map[cpu];
	if (cpu_isset(cpu, node_to_cpu_mask[oldnid])) {
		return; /* nothing to do */
	}
	/* we don't have cpu-driven node hot add yet...
	   In usual case, node is created from SRAT at boot time. */
	if (!node_online(nid))
		nid = first_online_node;
	cpu_to_node_map[cpu] = nid;
	cpu_set(cpu, node_to_cpu_mask[nid]);
	return;
}
Example #24
0
File: node.c Project: jthatch12/STi
/*
 * register cpu under node
 */
int register_cpu_under_node(unsigned int cpu, unsigned int nid)
{
	int ret;
	struct device *obj;

	if (!node_online(nid))
		return 0;

	obj = get_cpu_device(cpu);
	if (!obj)
		return 0;

	ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
				&obj->kobj,
				kobject_name(&obj->kobj));
	if (ret)
		return ret;

	return sysfs_create_link(&obj->kobj,
				 &node_devices[nid]->dev.kobj,
				 kobject_name(&node_devices[nid]->dev.kobj));
}
Example #25
0
/* Set correct numa_node information for AMD NB functions */
static void quirk_amd_nb_node(struct pci_dev *dev)
{
	struct pci_dev *nb_ht;
	unsigned int devfn;
	u32 node;
	u32 val;

	devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
	nb_ht = pci_get_slot(dev->bus, devfn);
	if (!nb_ht)
		return;

	pci_read_config_dword(nb_ht, 0x60, &val);
	node = pcibus_to_node(dev->bus) | (val & 7);
	/*
	 * Some hardware may return an invalid node ID,
	 * so check it first:
	 */
	if (node_online(node))
		set_dev_node(&dev->dev, node);
	pci_dev_put(nb_ht);
}
Example #26
0
File: numa.c Project: Gaffey/linux
/*
 * Figure out to which domain a cpu belongs and stick it there.
 * Return the id of the domain used.
 */
static int numa_setup_cpu(unsigned long lcpu)
{
	int nid = -1;
	struct device_node *cpu;

	/*
	 * If a valid cpu-to-node mapping is already available, use it
	 * directly instead of querying the firmware, since it represents
	 * the most recent mapping notified to us by the platform (eg: VPHN).
	 */
	if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) {
		map_cpu_to_node(lcpu, nid);
		return nid;
	}

	cpu = of_get_cpu_node(lcpu, NULL);

	if (!cpu) {
		WARN_ON(1);
		if (cpu_present(lcpu))
			goto out_present;
		else
			goto out;
	}

	nid = of_node_to_nid_single(cpu);

out_present:
	if (nid < 0 || !node_online(nid))
		nid = first_online_node;

	map_cpu_to_node(lcpu, nid);
	of_node_put(cpu);
out:
	return nid;
}
Example #27
0
static void nlm_init_bootmem_node (unsigned long mapstart, unsigned long min_pfn, unsigned long max_pfn)
{
	int i;

	for (i = 0; i < NLM_MAX_CPU_NODE; i++) {
		unsigned long map_pfn, start_pfn, end_pfn, bootmem_size;
		int j;

		if(!node_online(i))
			continue;

		start_pfn = NODE_MEM_DATA(i)->low_pfn;
		end_pfn   = NODE_MEM_DATA(i)->high_pfn;

		if (start_pfn && start_pfn < min_pfn)
			start_pfn = min_pfn;

		if (end_pfn > max_pfn)
			end_pfn = max_pfn;

		/* in general, never hit the condition */
		if (start_pfn && start_pfn >= end_pfn) {
			NODE_MEM_DATA(i)->map_pfn = 0; /* indicate a bad map_pfn */
			continue;
		}

		if (start_pfn > mapstart)
			map_pfn = start_pfn;
		else
			map_pfn = mapstart;

		if((start_pfn == 0) && (end_pfn == 0)) {
			map_pfn = 
			_low_virt_to_phys(&_node_map_mem[i][0]) >> PAGE_SHIFT;
			__node_data[i] = __va(map_pfn << PAGE_SHIFT);
		} else {
Example #28
0
void __devinit igb_check_options(struct igb_adapter *adapter)
{
	int bd = adapter->bd_number;
	struct e1000_hw *hw = &adapter->hw;

	if (bd >= IGB_MAX_NIC) {
		DPRINTK(PROBE, NOTICE,
		       "Warning: no configuration for board #%d\n", bd);
		DPRINTK(PROBE, NOTICE, "Using defaults for all values\n");
#ifndef module_param_array
		bd = IGB_MAX_NIC;
#endif
	}

	{ /* Interrupt Throttling Rate */
		struct igb_option opt = {
			.type = range_option,
			.name = "Interrupt Throttling Rate (ints/sec)",
			.err  = "using default of " __MODULE_STRING(DEFAULT_ITR),
			.def  = DEFAULT_ITR,
			.arg  = { .r = { .min = MIN_ITR,
					 .max = MAX_ITR } }
		};

#ifdef module_param_array
		if (num_InterruptThrottleRate > bd) {
#endif
			unsigned int itr = InterruptThrottleRate[bd];

			switch (itr) {
			case 0:
				DPRINTK(PROBE, INFO, "%s turned off\n",
				        opt.name);
				if(hw->mac.type >= e1000_i350)
					adapter->dmac = IGB_DMAC_DISABLE;
				adapter->rx_itr_setting = itr;
				break;
			case 1:
				DPRINTK(PROBE, INFO, "%s set to dynamic mode\n",
					opt.name);
				adapter->rx_itr_setting = itr;
				break;
			case 3:
				DPRINTK(PROBE, INFO,
				        "%s set to dynamic conservative mode\n",
					opt.name);
				adapter->rx_itr_setting = itr;
				break;
			default:
				igb_validate_option(&itr, &opt, adapter);
				/* Save the setting, because the dynamic bits
				 * change itr.  In case of invalid user value,
				 * default to conservative mode, else need to
				 * clear the lower two bits because they are
				 * used as control */
				if (itr == 3) {
					adapter->rx_itr_setting = itr;
				} else {
					adapter->rx_itr_setting = 1000000000 /
					                          (itr * 256);
					adapter->rx_itr_setting &= ~3;
				}
				break;
			}
#ifdef module_param_array
		} else {
			adapter->rx_itr_setting = opt.def;
		}
#endif
		adapter->tx_itr_setting = adapter->rx_itr_setting;
	}
	{ /* Interrupt Mode */
		struct igb_option opt = {
			.type = range_option,
			.name = "Interrupt Mode",
			.err  = "defaulting to 2 (MSI-X)",
			.def  = IGB_INT_MODE_MSIX,
			.arg  = { .r = { .min = MIN_INTMODE,
					 .max = MAX_INTMODE } }
		};

#ifdef module_param_array
		if (num_IntMode > bd) {
#endif
			unsigned int int_mode = IntMode[bd];
			igb_validate_option(&int_mode, &opt, adapter);
			adapter->int_mode = int_mode;
#ifdef module_param_array
		} else {
			adapter->int_mode = opt.def;
		}
#endif
	}
	{ /* Low Latency Interrupt TCP Port */
		struct igb_option opt = {
			.type = range_option,
			.name = "Low Latency Interrupt TCP Port",
			.err  = "using default of " __MODULE_STRING(DEFAULT_LLIPORT),
			.def  = DEFAULT_LLIPORT,
			.arg  = { .r = { .min = MIN_LLIPORT,
					 .max = MAX_LLIPORT } }
		};

#ifdef module_param_array
		if (num_LLIPort > bd) {
#endif
			adapter->lli_port = LLIPort[bd];
			if (adapter->lli_port) {
				igb_validate_option(&adapter->lli_port, &opt,
				        adapter);
			} else {
				DPRINTK(PROBE, INFO, "%s turned off\n",
					opt.name);
			}
#ifdef module_param_array
		} else {
			adapter->lli_port = opt.def;
		}
#endif
	}
	{ /* Low Latency Interrupt on Packet Size */
		struct igb_option opt = {
			.type = range_option,
			.name = "Low Latency Interrupt on Packet Size",
			.err  = "using default of " __MODULE_STRING(DEFAULT_LLISIZE),
			.def  = DEFAULT_LLISIZE,
			.arg  = { .r = { .min = MIN_LLISIZE,
					 .max = MAX_LLISIZE } }
		};

#ifdef module_param_array
		if (num_LLISize > bd) {
#endif
			adapter->lli_size = LLISize[bd];
			if (adapter->lli_size) {
				igb_validate_option(&adapter->lli_size, &opt,
				        adapter);
			} else {
				DPRINTK(PROBE, INFO, "%s turned off\n",
					opt.name);
			}
#ifdef module_param_array
		} else {
			adapter->lli_size = opt.def;
		}
#endif
	}
	{ /* Low Latency Interrupt on TCP Push flag */
		struct igb_option opt = {
			.type = enable_option,
			.name = "Low Latency Interrupt on TCP Push flag",
			.err  = "defaulting to Disabled",
			.def  = OPTION_DISABLED
		};

#ifdef module_param_array
		if (num_LLIPush > bd) {
#endif
			unsigned int lli_push = LLIPush[bd];
			igb_validate_option(&lli_push, &opt, adapter);
			adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0;
#ifdef module_param_array
		} else {
			adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0;
		}
#endif
	}
	{ /* SRIOV - Enable SR-IOV VF devices */
		struct igb_option opt = {
			.type = range_option,
			.name = "max_vfs - SR-IOV VF devices",
			.err  = "using default of " __MODULE_STRING(DEFAULT_SRIOV),
			.def  = DEFAULT_SRIOV,
			.arg  = { .r = { .min = MIN_SRIOV,
					 .max = MAX_SRIOV } }
		};

#ifdef module_param_array
		if (num_max_vfs > bd) {
#endif
			adapter->vfs_allocated_count = max_vfs[bd];
			igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter);

#ifdef module_param_array
		} else {
			adapter->vfs_allocated_count = opt.def;
		}
#endif
		if (adapter->vfs_allocated_count) {
			switch (hw->mac.type) {
			case e1000_82575:
			case e1000_82580:
				adapter->vfs_allocated_count = 0;
				DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n");
			default:
				break;
			}
		}
	}
	{ /* VMDQ - Enable VMDq multiqueue receive */
		struct igb_option opt = {
			.type = range_option,
			.name = "VMDQ - VMDq multiqueue queue count",
			.err  = "using default of " __MODULE_STRING(DEFAULT_VMDQ),
			.def  = DEFAULT_VMDQ,
			.arg  = { .r = { .min = MIN_VMDQ,
					 .max = (MAX_VMDQ - adapter->vfs_allocated_count) } }
		};
#ifdef module_param_array
		if (num_VMDQ > bd) {
#endif
			adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]);
			if (adapter->vfs_allocated_count && !adapter->vmdq_pools) {
				DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n");
				adapter->vmdq_pools = 1;
			}
			igb_validate_option(&adapter->vmdq_pools, &opt, adapter);

#ifdef module_param_array
		} else {
			if (!adapter->vfs_allocated_count)
				adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def);
			else
				adapter->vmdq_pools = 1;
		}
#endif
#ifdef CONFIG_IGB_VMDQ_NETDEV
		if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) {
			DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n");
			adapter->vmdq_pools = 0;
		}
#endif
	}
	{ /* RSS - Enable RSS multiqueue receives */
		struct igb_option opt = {
			.type = range_option,
			.name = "RSS - RSS multiqueue receive count",
			.err  = "using default of " __MODULE_STRING(DEFAULT_RSS),
			.def  = DEFAULT_RSS,
			.arg  = { .r = { .min = MIN_RSS,
					 .max = MAX_RSS } }
		};

		if (adapter->vmdq_pools) {
			switch (hw->mac.type) {
#ifndef CONFIG_IGB_VMDQ_NETDEV
			case e1000_82576:
				opt.arg.r.max = 2;
				break;
			case e1000_82575:
				if (adapter->vmdq_pools == 2)
					opt.arg.r.max = 3;
				if (adapter->vmdq_pools <= 2)
					break;
#endif
			default:
				opt.arg.r.max = 1;
				break;
			}
		}

#ifdef module_param_array
		if (num_RSS > bd) {
#endif
			adapter->rss_queues = RSS[bd];
			switch (adapter->rss_queues) {
			case 1:
				break;
			default:
				igb_validate_option(&adapter->rss_queues, &opt, adapter);
				if (adapter->rss_queues)
					break;
			case 0:
				adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus());
				break;
			}
#ifdef module_param_array
		} else {
			adapter->rss_queues = opt.def ?:
				min_t(u32, opt.arg.r.max, num_online_cpus());
		}
#endif
	}
	{ /* QueuePairs - Enable TX/RX queue pairs for interrupt handling */
		struct igb_option opt = {
			.type = enable_option,
			.name = "QueuePairs - TX/RX queue pairs for interrupt handling",
			.err  = "defaulting to Enabled",
			.def  = OPTION_ENABLED
		};
#ifdef module_param_array
		if (num_QueuePairs > bd) {
#endif
			unsigned int qp = QueuePairs[bd];
			/*
			 * we must enable queue pairs if the number of queues
			 * exceeds the number of avaialble interrupts.  We are
			 * limited to 10, or 3 per unallocated vf.
			 */
			if ((adapter->rss_queues > 4) ||
			    (adapter->vmdq_pools > 4) ||
			    ((adapter->rss_queues > 1) &&
			     ((adapter->vmdq_pools > 3) ||
			      (adapter->vfs_allocated_count > 6)))) {
				if (qp == OPTION_DISABLED) {
					qp = OPTION_ENABLED;
					DPRINTK(PROBE, INFO,
					        "Number of queues exceeds available interrupts, %s\n",opt.err);
				}
			}
			igb_validate_option(&qp, &opt, adapter);
			adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0;
#ifdef module_param_array
		} else {
			adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0;
		}
#endif
	}
	{ /* EEE -  Enable EEE for capable adapters */

		if (hw->mac.type >= e1000_i350) {
			struct igb_option opt = {
				.type = enable_option,
				.name = "EEE Support",
				.err  = "defaulting to Enabled",
				.def  = OPTION_ENABLED
			};
#ifdef module_param_array
			if (num_EEE > bd) {
#endif
				unsigned int eee = EEE[bd];
				igb_validate_option(&eee, &opt, adapter);
				adapter->flags |= eee ? IGB_FLAG_EEE : 0;
				if (eee)
					hw->dev_spec._82575.eee_disable = false;
				else
					hw->dev_spec._82575.eee_disable = true;

#ifdef module_param_array
			} else {
				adapter->flags |= opt.def ? IGB_FLAG_EEE : 0;
				if (adapter->flags & IGB_FLAG_EEE)
					hw->dev_spec._82575.eee_disable = false;
				else
					hw->dev_spec._82575.eee_disable = true;
			}
#endif
		}
	}
	{ /* DMAC -  Enable DMA Coalescing for capable adapters */

		if (hw->mac.type >= e1000_i350) {
			struct igb_opt_list list [] = {
				{ IGB_DMAC_DISABLE, "DMAC Disable"},
				{ IGB_DMAC_MIN, "DMAC 250 usec"},
				{ IGB_DMAC_500, "DMAC 500 usec"},
				{ IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"},
				{ IGB_DMAC_2000, "DMAC 2000 usec"},
				{ IGB_DMAC_3000, "DMAC 3000 usec"},
				{ IGB_DMAC_4000, "DMAC 4000 usec"},
				{ IGB_DMAC_5000, "DMAC 5000 usec"},
				{ IGB_DMAC_6000, "DMAC 6000 usec"},
				{ IGB_DMAC_7000, "DMAC 7000 usec"},
				{ IGB_DMAC_8000, "DMAC 8000 usec"},
				{ IGB_DMAC_9000, "DMAC 9000 usec"},
				{ IGB_DMAC_MAX, "DMAC 10000 usec"}
			};
			struct igb_option opt = {
				.type = list_option,
				.name = "DMA Coalescing",
				.err  = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE),
				.def  = IGB_DMAC_DISABLE,
				.arg = { .l = { .nr = 13,
					 	.p = list
					}
				}
			};
#ifdef module_param_array
			if (num_DMAC > bd) {
#endif
				unsigned int dmac = DMAC[bd];
				if (adapter->rx_itr_setting == IGB_DMAC_DISABLE)
					dmac = IGB_DMAC_DISABLE;
				igb_validate_option(&dmac, &opt, adapter);
				switch (dmac) {
				case IGB_DMAC_DISABLE:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_MIN:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_500:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_EN_DEFAULT:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_2000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_3000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_4000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_5000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_6000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_7000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_8000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_9000:
					adapter->dmac = dmac;
					break;
				case IGB_DMAC_MAX:
					adapter->dmac = dmac;
					break;
				default:
					adapter->dmac = opt.def;
					DPRINTK(PROBE, INFO,
					"Invalid DMAC setting, "
					"resetting DMAC to %d\n", opt.def);
				}
#ifdef module_param_array
			} else
				adapter->dmac = opt.def;
#endif
		}
	}
#ifndef IGB_NO_LRO
	{ /* LRO - Enable Large Receive Offload */
		struct igb_option opt = {
			.type = enable_option,
			.name = "LRO - Large Receive Offload",
			.err  = "defaulting to Disabled",
			.def  = OPTION_DISABLED
		};
		struct net_device *netdev = adapter->netdev;
#ifdef module_param_array
		if (num_LRO > bd) {
#endif
			unsigned int lro = LRO[bd];
			igb_validate_option(&lro, &opt, adapter);
			netdev->features |= lro ? NETIF_F_LRO : 0;
#ifdef module_param_array
		} else if (opt.def == OPTION_ENABLED) {
			netdev->features |= NETIF_F_LRO;
		}
#endif
	}
#endif /* IGB_NO_LRO */
	{ /* Node assignment */
		static struct igb_option opt = {
			.type = range_option,
			.name = "Node to start on",
			.err  = "defaulting to -1",
#ifdef HAVE_EARLY_VMALLOC_NODE
			.def  = 0,
#else
			.def  = -1,
#endif
			.arg  = { .r = { .min = 0,
					 .max = (MAX_NUMNODES - 1)}}
		};
		int node_param = opt.def;

		/* if the default was zero then we need to set the
		 * default value to an online node, which is not
		 * necessarily zero, and the constant initializer
		 * above can't take first_online_node */
		if (node_param == 0)
			/* must set opt.def for validate */
			opt.def = node_param = first_online_node;

#ifdef module_param_array
		if (num_Node > bd) {
#endif
			node_param = Node[bd];
			igb_validate_option((uint *)&node_param, &opt, adapter);

			if (node_param != OPTION_UNSET) {
				DPRINTK(PROBE, INFO, "node set to %d\n", node_param);
			}
#ifdef module_param_array
		}
#endif

		/* check sanity of the value */
		if (node_param != -1 && !node_online(node_param)) {
			DPRINTK(PROBE, INFO,
			        "ignoring node set to invalid value %d\n",
			        node_param);
			node_param = opt.def;
		}

		adapter->node = node_param;
	}
	{ /* MDD - Enable Malicious Driver Detection. Only available when
	     SR-IOV is enabled. */
		struct igb_option opt = {
			.type = enable_option,
			.name = "Malicious Driver Detection",
			.err  = "defaulting to 1",
			.def  = OPTION_ENABLED,
			.arg  = { .r = { .min = OPTION_DISABLED,
					 .max = OPTION_ENABLED } }
		};

#ifdef module_param_array
		if (num_MDD > bd) {
#endif
			adapter->mdd = MDD[bd];
			igb_validate_option((uint *)&adapter->mdd, &opt,
					    adapter);
#ifdef module_param_array
		} else {
			adapter->mdd = opt.def;
		}
#endif
	}
}
Example #29
0
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
	struct acpi_device *device = root->device;
	struct pci_root_info *info = NULL;
	int domain = root->segment;
	int busnum = root->secondary.start;
	LIST_HEAD(resources);
	struct pci_bus *bus = NULL;
	struct pci_sysdata *sd;
	int node;
#ifdef CONFIG_ACPI_NUMA
	int pxm;
#endif

	if (pci_ignore_seg)
		domain = 0;

	if (domain && !pci_domains_supported) {
		printk(KERN_WARNING "pci_bus %04x:%02x: "
		       "ignored (multiple domains not supported)\n",
		       domain, busnum);
		return NULL;
	}

	node = -1;
#ifdef CONFIG_ACPI_NUMA
	pxm = acpi_get_pxm(device->handle);
	if (pxm >= 0)
		node = pxm_to_node(pxm);
	if (node != -1)
		set_mp_bus_to_node(busnum, node);
	else
#endif
		node = get_mp_bus_to_node(busnum);

	if (node != -1 && !node_online(node))
		node = -1;

	info = kzalloc(sizeof(*info), GFP_KERNEL);
	if (!info) {
		printk(KERN_WARNING "pci_bus %04x:%02x: "
		       "ignored (out of memory)\n", domain, busnum);
		return NULL;
	}

	sd = &info->sd;
	sd->domain = domain;
	sd->node = node;
	sd->companion = device;
	/*
	 * Maybe the desired pci bus has been already scanned. In such case
	 * it is unnecessary to scan the pci bus with the given domain,busnum.
	 */
	bus = pci_find_bus(domain, busnum);
	if (bus) {
		/*
		 * If the desired bus exits, the content of bus->sysdata will
		 * be replaced by sd.
		 */
		memcpy(bus->sysdata, sd, sizeof(*sd));
		kfree(info);
	} else {
		probe_pci_root_info(info, device, busnum, domain);

		/* insert busn res at first */
		pci_add_resource(&resources,  &root->secondary);
		/*
		 * _CRS with no apertures is normal, so only fall back to
		 * defaults or native bridge info if we're ignoring _CRS.
		 */
		if (pci_use_crs)
			add_resources(info, &resources);
		else {
			free_pci_root_info_res(info);
			x86_pci_root_bus_resources(busnum, &resources);
		}

		if (!setup_mcfg_map(info, domain, (u8)root->secondary.start,
				    (u8)root->secondary.end, root->mcfg_addr))
			bus = pci_create_root_bus(NULL, busnum, &pci_root_ops,
						  sd, &resources);

		if (bus) {
			pci_scan_child_bus(bus);
			pci_set_host_bridge_release(
				to_pci_host_bridge(bus->bridge),
				release_pci_root_info, info);
		} else {
			pci_free_resource_list(&resources);
			__release_pci_root_info(info);
		}
	}

	/* After the PCI-E bus has been walked and all devices discovered,
	 * configure any settings of the fabric that might be necessary.
	 */
	if (bus) {
		struct pci_bus *child;
		list_for_each_entry(child, &bus->children, node)
			pcie_bus_configure_settings(child);
	}

	if (bus && node != -1) {
#ifdef CONFIG_ACPI_NUMA
		if (pxm >= 0)
			dev_printk(KERN_DEBUG, &bus->dev,
				   "on NUMA node %d (pxm %d)\n", node, pxm);
#else
		dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
#endif
	}

	return bus;
}
Example #30
0
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
{
	struct acpi_device *device = root->device;
	int domain = root->segment;
	int busnum = root->secondary.start;
	struct pci_bus *bus;
	struct pci_sysdata *sd;
	int node;
#ifdef CONFIG_ACPI_NUMA
	int pxm;
#endif

	if (domain && !pci_domains_supported) {
		printk(KERN_WARNING "pci_bus %04x:%02x: "
		       "ignored (multiple domains not supported)\n",
		       domain, busnum);
		return NULL;
	}

	node = -1;
#ifdef CONFIG_ACPI_NUMA
	pxm = acpi_get_pxm(device->handle);
	if (pxm >= 0)
		node = pxm_to_node(pxm);
	if (node != -1)
		set_mp_bus_to_node(busnum, node);
	else
#endif
		node = get_mp_bus_to_node(busnum);

	if (node != -1 && !node_online(node))
		node = -1;

	/* Allocate per-root-bus (not per bus) arch-specific data.
	 * TODO: leak; this memory is never freed.
	 * It's arguable whether it's worth the trouble to care.
	 */
	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
	if (!sd) {
		printk(KERN_WARNING "pci_bus %04x:%02x: "
		       "ignored (out of memory)\n", domain, busnum);
		return NULL;
	}

	sd->domain = domain;
	sd->node = node;
	/*
	 * Maybe the desired pci bus has been already scanned. In such case
	 * it is unnecessary to scan the pci bus with the given domain,busnum.
	 */
	bus = pci_find_bus(domain, busnum);
	if (bus) {
		/*
		 * If the desired bus exits, the content of bus->sysdata will
		 * be replaced by sd.
		 */
		memcpy(bus->sysdata, sd, sizeof(*sd));
		kfree(sd);
	} else {
		bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd);
		if (bus) {
			get_current_resources(device, busnum, domain, bus);
			bus->subordinate = pci_scan_child_bus(bus);
		}
	}

	if (!bus)
		kfree(sd);

	if (bus && node != -1) {
#ifdef CONFIG_ACPI_NUMA
		if (pxm >= 0)
			dev_printk(KERN_DEBUG, &bus->dev,
				   "on NUMA node %d (pxm %d)\n", node, pxm);
#else
		dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node);
#endif
	}

	return bus;
}