/* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the * *_early_ptr's are zeroed indicating that the static arrays are gone. */ static void __init setup_per_cpu_maps(void) { int cpu; for_each_possible_cpu(cpu) { per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); #ifdef X86_64_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif } /* indicate the early static arrays will soon be gone */ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; #ifdef X86_64_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif }
void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* * Allocate percpu area. Embedding allocator is our favorite; * however, on NUMA configurations, it can result in very * sparse unit mapping and vmalloc area isn't spacious enough * on 32bit. Use page in that case. */ #ifdef CONFIG_X86_32 if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) pcpu_chosen_fc = PCPU_FC_PAGE; #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These * arrays then become expendable and the *_early_ptr's * are zeroed indicating that the static arrays are * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; #endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); /* * Ensure that the boot cpu numa_node is correct when the boot * cpu is on a node that doesn't have memory installed. * Also cpu_up() will call cpu_to_node() for APs when * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set * up later with c_init aka intel_init/amd_init. * So set them all (boot cpu and all APs). */ set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif /* * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. */ if (!cpu) switch_to_new_gdt(cpu); } /* indicate the early static arrays will soon be gone */ #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; #endif #ifdef CONFIG_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); }
void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); #ifdef CONFIG_X86_32 if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) pcpu_chosen_fc = PCPU_FC_PAGE; #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; size_t atom_size; #ifdef CONFIG_X86_64 atom_size = PMD_SIZE; #else atom_size = PAGE_SIZE; #endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; #endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif if (!cpu) switch_to_new_gdt(cpu); } #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; #endif #ifdef CONFIG_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif setup_node_to_cpumask_map(); setup_cpu_local_masks(); }
/* x86은 이 함수를 타게 된다 */ void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* * Allocate percpu area. Embedding allocator is our favorite; * however, on NUMA configurations, it can result in very * sparse unit mapping and vmalloc area isn't spacious enough * on 32bit. Use page in that case. */ #ifdef CONFIG_X86_32 /* 32bit 한정 first chunk 가 auto인데 numa라면, page로 한다. 32bit에서 * embed(2mb단위) 방식은 메모리 할당면에서 ᅠᆼ 안좋기 때문 */ if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) pcpu_chosen_fc = PCPU_FC_PAGE; #endif rc = -EINVAL; /* first chunk 방식이 PAGE가 아니면 auto 또는 embed인데, auto 는 * embed, page 순으로 시도하게 된다(결국 PCPU_FC_EMBED == PCPU_FC_AUTO) */ if (pcpu_chosen_fc != PCPU_FC_PAGE) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; // 8KB + 20KB - 8KB size_t atom_size; /* * On 64bit, use PMD_SIZE for atom_size so that embedded * percpu areas are aligned to PMD. This, in the future, * can also allow using PMD mappings in vmalloc area. Use * PAGE_SIZE on 32bit as vmalloc space is highly contended * and large vmalloc area allocs can easily fail. */ #ifdef CONFIG_X86_64 /* 64bit 일때, PS bit를 사용 PAGE 단위를 2MB로 할당하여, * vmalloc의 PMD size align 된 연속적인 공간을 얻기 위해서 * 인 것으로 보인다. 32bit에서는 2MB 단위로 요청하면, 자꾸 * 실패해서 체념한 듯.. :) */ atom_size = PMD_SIZE; // 2MB #else atom_size = PAGE_SIZE; #endif /* embed 방식으로 첫번재 청크를 할당한다. */ rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, // 8 << 10 dyn_size, atom_size, // 20KB, 2MB pcpu_cpu_distance, // func pcpu_fc_alloc, pcpu_fc_free); // func, func if (rc < 0) pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) /* `embed`방식으로 첫번재 청크를 할당이 실패하면 `page`방식으로 할당 한다. */ rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_fc_alloc, pcpu_fc_free, pcpup_populate_pte); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { /* * per_cpu_offset()은 percpu variable에 더해져야만 하는 offset이다. * 목적은 certain processor 까지의 거리를 위하여 존재. * 대부분의 아키텍쳐는 __per_cpu_offset array를 사용하지만 x86_64는 자신만의 방법이 존재 */ /* fc를 초기화 할 때 얻었던, unit offset에 차이값을 더해, 각각 cpu 오프셋을 구해준다 */ per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; /*this_cpu_off라는 포인터에다가 offset저장*/ per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); /*cpu number도 함께 저장해준다.*/ per_cpu(cpu_number, cpu) = cpu; /* * x86_64에서는 percpu_segment와 canary를 사용하지 않는다. * canary에 대한 설명은 http://studyfoss.egloos.com/5279959 * 에서 찾아볼 수 있도록 한다. */ setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These * arrays then become expendable and the *_early_ptr's * are zeroed indicating that the static arrays are * gone. */ #ifdef CONFIG_X86_LOCAL_APIC /* 기존에 구했던(early) apicid를 pcpu로 이동. */ per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); #endif #ifdef CONFIG_X86_64 /* 각각의 cpu에 irq stack pointer지정. gs+canary영역이 48 * byte인데, irq_stack을 보호하기 위해 18 byte만큼을 더 둔 것으로 * 보임(정확하지 않음) */ per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; #endif #ifdef CONFIG_NUMA /* 기존에 구했던(early) NUMA 정보 역시 pcpu로 이동. */ per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); /* * Ensure that the boot cpu numa_node is correct when the boot * cpu is on a node that doesn't have memory installed. * Also cpu_up() will call cpu_to_node() for APs when * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set * up later with c_init aka intel_init/amd_init. * So set them all (boot cpu and all APs). */ set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif /* * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. */ if (!cpu) switch_to_new_gdt(cpu); } /* indicate the early static arrays will soon be gone */ #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; #endif #ifdef CONFIG_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); }
/* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. * Always point %gs to its beginning */ void __init setup_per_cpu_areas(void) { size_t static_size = __per_cpu_end - __per_cpu_start; unsigned int cpu; unsigned long delta; size_t pcpu_unit_size; ssize_t ret; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* * Allocate percpu area. If PSE is supported, try to make use * of large page mappings. Please read comments on top of * each allocator for details. */ ret = setup_pcpu_remap(static_size); if (ret < 0) ret = setup_pcpu_embed(static_size); if (ret < 0) ret = setup_pcpu_4k(static_size); if (ret < 0) panic("cannot allocate static percpu area (%zu bytes, err=%zd)", static_size, ret); pcpu_unit_size = ret; /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These * arrays then become expendable and the *_early_ptr's * are zeroed indicating that the static arrays are * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif #endif /* * Up to this point, the boot CPU has been using .data.init * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) switch_to_new_gdt(cpu); } /* indicate the early static arrays will soon be gone */ #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; #endif #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); }