/* * Determine the type of allocation constraint. */ static enum oom_constraint constrained_alloc(struct oom_control *oc) { struct zone *zone; struct zoneref *z; enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask); bool cpuset_limited = false; int nid; if (is_memcg_oom(oc)) { oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1; return CONSTRAINT_MEMCG; } /* Default to all available memory */ oc->totalpages = totalram_pages() + total_swap_pages; if (!IS_ENABLED(CONFIG_NUMA)) return CONSTRAINT_NONE; if (!oc->zonelist) return CONSTRAINT_NONE; /* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ if (oc->gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; /* * This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). */ if (oc->nodemask && !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { oc->totalpages = total_swap_pages; for_each_node_mask(nid, *oc->nodemask) oc->totalpages += node_spanned_pages(nid); return CONSTRAINT_MEMORY_POLICY; } /* Check this allocation failure is caused by cpuset's wall function */ for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, high_zoneidx, oc->nodemask) if (!cpuset_zone_allowed(zone, oc->gfp_mask)) cpuset_limited = true; if (cpuset_limited) { oc->totalpages = total_swap_pages; for_each_node_mask(nid, cpuset_current_mems_allowed) oc->totalpages += node_spanned_pages(nid); return CONSTRAINT_CPUSET; } return CONSTRAINT_NONE; }
static enum oom_constraint constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask, nodemask_t *nodemask, unsigned long *totalpages) { struct zone *zone; struct zoneref *z; enum zone_type high_zoneidx = gfp_zone(gfp_mask); bool cpuset_limited = false; int nid; /* Default to all available memory */ *totalpages = totalram_pages + total_swap_pages; if (!zonelist) return CONSTRAINT_NONE; /* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ if (gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; /* * This is not a __GFP_THISNODE allocation, so a truncated nodemask in * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). */ if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) { *totalpages = total_swap_pages; for_each_node_mask(nid, *nodemask) *totalpages += node_spanned_pages(nid); return CONSTRAINT_MEMORY_POLICY; } /* Check this allocation failure is caused by cpuset's wall function */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) cpuset_limited = true; if (cpuset_limited) { *totalpages = total_swap_pages; for_each_node_mask(nid, cpuset_current_mems_allowed) *totalpages += node_spanned_pages(nid); return CONSTRAINT_CPUSET; } return CONSTRAINT_NONE; }
unsigned int __init dom0_max_vcpus(void) { unsigned int i, max_vcpus, limit; nodeid_t node; for ( i = 0; i < dom0_nr_pxms; ++i ) if ( (node = pxm_to_node(dom0_pxms[i])) != NUMA_NO_NODE ) node_set(node, dom0_nodes); nodes_and(dom0_nodes, dom0_nodes, node_online_map); if ( nodes_empty(dom0_nodes) ) dom0_nodes = node_online_map; for_each_node_mask ( node, dom0_nodes ) cpumask_or(&dom0_cpus, &dom0_cpus, &node_to_cpumask(node)); cpumask_and(&dom0_cpus, &dom0_cpus, cpupool0->cpu_valid); if ( cpumask_empty(&dom0_cpus) ) cpumask_copy(&dom0_cpus, cpupool0->cpu_valid); max_vcpus = cpumask_weight(&dom0_cpus); if ( opt_dom0_max_vcpus_min > max_vcpus ) max_vcpus = opt_dom0_max_vcpus_min; if ( opt_dom0_max_vcpus_max < max_vcpus ) max_vcpus = opt_dom0_max_vcpus_max; limit = dom0_pvh ? HVM_MAX_VCPUS : MAX_VIRT_CPUS; if ( max_vcpus > limit ) max_vcpus = limit; return max_vcpus; }
static int __init numa_alloc_distance(void) { nodemask_t nodes_parsed; size_t size; int i, j, cnt = 0; u64 phys; /* size the new table and allocate it */ /* numa_nodes_parsed에는 이미 분석이 끝난 node 정보들이 들어 있음 */ nodes_parsed = numa_nodes_parsed; numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); /* 마지막 node id가 cnt로 됨 */ for_each_node_mask(i, nodes_parsed) cnt = i; cnt++; size = cnt * cnt * sizeof(numa_distance[0]); /* size크기 만큼 할당가능한 메모리 주소를 얻어옮 */ phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), size, PAGE_SIZE); if (!phys) { pr_warning("NUMA: Warning: can't allocate distance table!\n"); /* don't retry until explicitly reset */ numa_distance = (void *)1LU; return -ENOMEM; } /* size 크기 만큼 등록 */ memblock_reserve(phys, size); /* numa_distance 재설정 */ numa_distance = __va(phys); numa_distance_cnt = cnt; /* fill with the default distances */ /* 기본 distance 설정. 같은 node일경우 Local Distance(10)를, 아닌 경우 RemoteDistance(20) 저장 */ for (i = 0; i < cnt; i++) for (j = 0; j < cnt; j++) numa_distance[i * cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); return 0; }
void __init setup_node_to_cpumask_map(void) { unsigned int node, num = 0; if (nr_node_ids == MAX_NUMNODES) { for_each_node_mask(node, node_possible_map) num = node; nr_node_ids = num + 1; } for (node = 0; node < nr_node_ids; node++) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); }
/* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * * Note: cpumask_of_node() is not valid until after this is done. * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ void __init setup_node_to_cpumask_map(void) { unsigned int node, num = 0; /* setup nr_node_ids if not done yet */ if (nr_node_ids == MAX_NUMNODES) { for_each_node_mask(node, node_possible_map) num = node; nr_node_ids = num + 1; } /* allocate the map */ for (node = 0; node < nr_node_ids; node++) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); }
/* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * * Note: node_to_cpumask() is not valid until after this is done. */ static void __init setup_node_to_cpumask_map(void) { unsigned int node, num = 0; cpumask_t *map; /* setup nr_node_ids if not done yet */ if (nr_node_ids == MAX_NUMNODES) { for_each_node_mask(node, node_possible_map) num = node; nr_node_ids = num + 1; } /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); /* node_to_cpumask() will now work */ node_to_cpumask_map = map; }
static int __init numa_alloc_distance(void) { nodemask_t nodes_parsed; size_t size; int i, j, cnt = 0; u64 phys; nodes_parsed = numa_nodes_parsed; numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); for_each_node_mask(i, nodes_parsed) cnt = i; cnt++; size = cnt * cnt * sizeof(numa_distance[0]); phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), size, PAGE_SIZE); if (!phys) { pr_warning("NUMA: Warning: can't allocate distance table!\n"); numa_distance = (void *)1LU; return -ENOMEM; } memblock_reserve(phys, size); numa_distance = __va(phys); numa_distance_cnt = cnt; for (i = 0; i < cnt; i++) for (j = 0; j < cnt; j++) numa_distance[i * cnt + j] = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); return 0; }
unsigned long __init dom0_compute_nr_pages( struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len) { nodeid_t node; unsigned long avail = 0, nr_pages, min_pages, max_pages; bool_t need_paging; for_each_node_mask ( node, dom0_nodes ) avail += avail_domheap_pages_region(node, 0, 0) + initial_images_nrpages(node); /* Reserve memory for further dom0 vcpu-struct allocations... */ avail -= (d->max_vcpus - 1UL) << get_order_from_bytes(sizeof(struct vcpu)); /* ...and compat_l4's, if needed. */ if ( is_pv_32bit_domain(d) ) avail -= d->max_vcpus - 1; /* Reserve memory for iommu_dom0_init() (rough estimate). */ if ( iommu_enabled ) { unsigned int s; for ( s = 9; s < BITS_PER_LONG; s += 9 ) avail -= max_pdx >> s; } need_paging = is_hvm_domain(d) && (!iommu_hap_pt_share || !paging_mode_hap(d)); for ( ; ; need_paging = 0 ) { nr_pages = dom0_nrpages; min_pages = dom0_min_nrpages; max_pages = dom0_max_nrpages; /* * If allocation isn't specified, reserve 1/16th of available memory * for things like DMA buffers. This reservation is clamped to a * maximum of 128MB. */ if ( nr_pages == 0 ) nr_pages = -min(avail / 16, 128UL << (20 - PAGE_SHIFT)); /* Negative specification means "all memory - specified amount". */ if ( (long)nr_pages < 0 ) nr_pages += avail; if ( (long)min_pages < 0 ) min_pages += avail; if ( (long)max_pages < 0 ) max_pages += avail; /* Clamp according to min/max limits and available memory. */ nr_pages = max(nr_pages, min_pages); nr_pages = min(nr_pages, max_pages); nr_pages = min(nr_pages, avail); if ( !need_paging ) break; /* Reserve memory for shadow or HAP. */ avail -= dom0_paging_pages(d, nr_pages); } if ( is_pv_domain(d) && (parms->p2m_base == UNSET_ADDR) && (dom0_nrpages <= 0) && ((dom0_min_nrpages <= 0) || (nr_pages > min_pages)) ) { /* * Legacy Linux kernels (i.e. such without a XEN_ELFNOTE_INIT_P2M * note) require that there is enough virtual space beyond the initial * allocation to set up their initial page tables. This space is * roughly the same size as the p2m table, so make sure the initial * allocation doesn't consume more than about half the space that's * available between params.virt_base and the address space end. */ unsigned long vstart, vend, end; size_t sizeof_long = is_pv_32bit_domain(d) ? sizeof(int) : sizeof(long); vstart = parms->virt_base; vend = round_pgup(parms->virt_kend); if ( !parms->unmapped_initrd ) vend += round_pgup(initrd_len); end = vend + nr_pages * sizeof_long; if ( end > vstart ) end += end - vstart; if ( end <= vstart || (sizeof_long < sizeof(end) && end > (1UL << (8 * sizeof_long))) ) { end = sizeof_long >= sizeof(end) ? 0 : 1UL << (8 * sizeof_long); nr_pages = (end - vend) / (2 * sizeof_long); if ( dom0_min_nrpages > 0 && nr_pages < min_pages ) nr_pages = min_pages; printk("Dom0 memory clipped to %lu pages\n", nr_pages); } } d->max_pages = min_t(unsigned long, max_pages, UINT_MAX); return nr_pages; }