static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen, u16 *new_asid, bool *need_flush) { u16 asid; if (!static_cpu_has(X86_FEATURE_PCID)) { *new_asid = 0; *need_flush = true; return; } for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) != next->context.ctx_id) continue; *new_asid = asid; *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) < next_tlb_gen); return; } /* * We don't currently own an ASID slot on this CPU. * Allocate a slot. */ *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1; if (*new_asid >= TLB_NR_DYN_ASIDS) { *new_asid = 0; this_cpu_write(cpu_tlbstate.next_asid, 1); } *need_flush = true; }
/* * TLB flush funcation: * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. */ static void flush_tlb_func(void *info) { struct flush_tlb_info *f = info; inc_irq_stat(irq_tlb_count); if (f->flush_mm && f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_end == TLB_FLUSH_ALL) { local_flush_tlb(); trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, TLB_FLUSH_ALL); } else { unsigned long addr; unsigned long nr_pages = (f->flush_end - f->flush_start) / PAGE_SIZE; addr = f->flush_start; while (addr < f->flush_end) { __flush_tlb_single(addr); addr += PAGE_SIZE; } trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, nr_pages); } } else leave_mm(smp_processor_id()); }
void init_espfix_ap(void) { unsigned int cpu, page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; int n; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ if (likely(this_cpu_read(espfix_stack))) return; /* Already initialized */ cpu = smp_processor_id(); addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ stack_page = ACCESS_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ stack_page = ACCESS_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) set_pud(&pud_p[n], pud); }
/* * Call this when reinitializing a CPU. It fixes the following potential * problems: * * - The ASID changed from what cpu_tlbstate thinks it is (most likely * because the CPU was taken down and came back up with CR3's PCID * bits clear. CPU hotplug can do this. * * - The TLB contains junk in slots corresponding to inactive ASIDs. * * - The CPU went so far out to lunch that it may have missed a TLB * flush. */ void initialize_tlbstate_and_flush(void) { int i; struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm); u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen); unsigned long cr3 = __read_cr3(); /* Assert that CR3 already references the right mm. */ WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); /* * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization * doesn't work like other CR4 bits because it can only be set from * long mode.) */ WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && !(cr4_read_shadow() & X86_CR4_PCIDE)); /* Force ASID 0 and force a TLB flush. */ write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); for (i = 1; i < TLB_NR_DYN_ASIDS; i++) this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); }
enum paravirt_lazy_mode paravirt_get_lazy_mode(void) { if (in_interrupt()) return PARAVIRT_LAZY_NONE; return this_cpu_read(paravirt_lazy_mode); }
void l4x_pte_check_empty(struct mm_struct *mm) { struct unmap_log_t *log; int i; WARN_ON(!irqs_disabled()); // otherwise we need to go non-preemtible log = this_cpu_ptr(&unmap_log); if (likely(this_cpu_read(unmap_log.cnt) == 0)) return; for (i = 0; i < log->cnt; ++i) { if (mm != log->log[i].mm) continue; l4x_printf("L4x: exiting with non-flushed entry: %lx:%lx[sz=%d,r=%x,from=%lx,cpu=%d,num=%d]\n", log->log[i].mm->context.task, log->log[i].addr, log->log[i].size, log->log[i].rights, log->log[i].dbg1, raw_smp_processor_id(), i); } l4x_unmap_log_flush(); }
int __sbitmap_queue_get(struct sbitmap_queue *sbq) { unsigned int hint, depth; int nr; hint = this_cpu_read(*sbq->alloc_hint); depth = READ_ONCE(sbq->sb.depth); if (unlikely(hint >= depth)) { hint = depth ? prandom_u32() % depth : 0; this_cpu_write(*sbq->alloc_hint, hint); } nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); if (nr == -1) { /* If the map is full, a hint won't do us much good. */ this_cpu_write(*sbq->alloc_hint, 0); } else if (nr == hint || unlikely(sbq->round_robin)) { /* Only update the hint if we used it. */ hint = nr + 1; if (hint >= depth - 1) hint = 0; this_cpu_write(*sbq->alloc_hint, hint); } return nr; }
/* * Please ignore the name of this function. It should be called * switch_to_kernel_thread(). * * enter_lazy_tlb() is a hint from the scheduler that we are entering a * kernel thread or other context without an mm. Acceptable implementations * include doing nothing whatsoever, switching to init_mm, or various clever * lazy tricks to try to minimize TLB flushes. * * The scheduler reserves the right to call enter_lazy_tlb() several times * in a row. It will notify us that we're going back to a real mm by * calling switch_mm_irqs_off(). */ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm) return; this_cpu_write(cpu_tlbstate.is_lazy, true); }
static void xen_restore_fl(unsigned long flags) { struct vcpu_info *vcpu; /* convert from IF type flag */ flags = !(flags & X86_EFLAGS_IF); /* There's a one instruction preempt window here. We need to make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); /* Doesn't matter if we get preempted here, because any pending event will get dealt with anyway. */ if (flags == 0) { preempt_check_resched(); barrier(); /* unmask then check (avoid races) */ if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); } }
static void do_flush_tlb_all(void *info) { count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); __flush_tlb_all(); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(smp_processor_id()); }
/* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the * necessary invalidation by clearing out the 'ctx_id' which * forces a TLB flush when the context is loaded. */ static void clear_asid_other(void) { u16 asid; /* * This is only expected to be set if we have disabled * kernel _PAGE_GLOBAL pages. */ if (!static_cpu_has(X86_FEATURE_PTI)) { WARN_ON_ONCE(1); return; } for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) { /* Do not need to flush the current asid */ if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid)) continue; /* * Make sure the next time we go to switch to * this asid, we do a flush: */ this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0); } this_cpu_write(cpu_tlbstate.invalidate_other, false); }
void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in6_addr *gw, int oif) { if (this_cpu_read(nf_skb_duplicated)) return; skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_reset(skb); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); #endif if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { struct ipv6hdr *iph = ipv6_hdr(skb); --iph->hop_limit; } if (nf_dup_ipv6_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); ip6_local_out(net, skb->sk, skb); __this_cpu_write(nf_skb_duplicated, false); } else { kfree_skb(skb); } }
void cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; preempt_disable_notrace(); do { seq = this_cpu_read(cyc2ns.seq.sequence); idx = seq & 1; data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset); data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul); data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift); } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); }
/* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. */ void leave_mm(int cpu) { struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) { cpumask_clear_cpu(cpu, mm_cpumask(active_mm)); load_cr3(swapper_pg_dir); /* * This gets called in the idle path where RCU * functions differently. Tracing normally * uses RCU, so we have to call the tracepoint * specially here. */ trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); } }
static void xen_irq_disable(void) { /* There's a one instruction preempt window here. We need to make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); }
/* * If a preemption happens between this_cpu_read and this_cpu_write, the only * side effect is that we'll give a few allocated in different contexts objects * the same tag. Since tag-based KASAN is meant to be used a probabilistic * bug-detection debug feature, this doesn't have significant negative impact. * * Ideally the tags use strong randomness to prevent any attempts to predict * them during explicit exploit attempts. But strong randomness is expensive, * and we did an intentional trade-off to use a PRNG. This non-atomic RMW * sequence has in fact positive effect, since interrupts that randomly skew * PRNG at unpredictable points do only good. */ u8 random_tag(void) { u32 state = this_cpu_read(prng_state); state = 1664525 * state + 1013904223; this_cpu_write(prng_state, state); return (u8)(state % (KASAN_TAG_MAX + 1)); }
void paravirt_start_context_switch(struct task_struct *prev) { BUG_ON(preemptible()); if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { arch_leave_lazy_mmu_mode(); set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); } enter_lazy(PARAVIRT_LAZY_CPU); }
static void set_cpuid_faulting(bool on) { u64 msrval; msrval = this_cpu_read(msr_misc_features_shadow); msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); this_cpu_write(msr_misc_features_shadow, msrval); wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval); }
/* We don't care if we get preempted and read/write IVs from the next CPU. */ static void echainiv_read_iv(u8 *dst, unsigned size) { u32 *a = (u32 *)dst; u32 __percpu *b = echainiv_iv; for (; size >= 4; size -= 4) { *a++ = this_cpu_read(*b); b++; } }
__visible void trace_hardirqs_off_caller(unsigned long caller_addr) { if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, caller_addr); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); } lockdep_hardirqs_off(CALLER_ADDR0); }
void trace_hardirqs_on(void) { if (this_cpu_read(tracing_irq_cpu)) { if (!in_nmi()) trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } lockdep_hardirqs_on(CALLER_ADDR0); }
void leave_mm(int cpu) { struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm); /* * It's plausible that we're in lazy TLB mode while our mm is init_mm. * If so, our callers still expect us to flush the TLB, but there * aren't any user TLB entries in init_mm to worry about. * * This needs to happen before any other sanity checks due to * intel_idle's shenanigans. */ if (loaded_mm == &init_mm) return; /* Warn if we're not lazy. */ WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy)); switch_mm(NULL, &init_mm, NULL); }
static unsigned long xen_read_cr0(void) { unsigned long cr0 = this_cpu_read(xen_cr0_value); if (unlikely(cr0 == 0)) { cr0 = native_read_cr0(); this_cpu_write(xen_cr0_value, cr0); } return cr0; }
void trace_hardirqs_off(void) { if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); } lockdep_hardirqs_off(CALLER_ADDR0); }
/* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { struct action_flow_keys *keys = this_cpu_ptr(flow_keys); int level = this_cpu_read(exec_actions_level); struct sw_flow_key *key = NULL; if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { key = &keys->key[level - 1]; *key = *key_; } return key; }
static int irqtime_account_si_update(void) { u64 *cpustat = kcpustat_this_cpu->cpustat; unsigned long flags; u64 latest_ns; int ret = 0; local_irq_save(flags); latest_ns = this_cpu_read(cpu_softirq_time); if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) ret = 1; local_irq_restore(flags); return ret; }
void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, const struct in_addr *gw, int oif) { struct iphdr *iph; if (this_cpu_read(nf_skb_duplicated)) return; /* * Copy the skb, and route the copy. Will later return %XT_CONTINUE for * the original skb, which should continue on its way as if nothing has * happened. The copy should be independently delivered to the gateway. */ skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) return; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ nf_conntrack_put(skb->nfct); skb->nfct = &nf_ct_untracked_get()->ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); #endif /* * If we are in PREROUTING/INPUT, the checksum must be recalculated * since the length could have changed as a result of defragmentation. * * We also decrease the TTL to mitigate potential loops between two * hosts. * * Set %IP_DF so that the original source is notified of a potentially * decreased MTU on the clone route. IPv6 does this too. */ iph = ip_hdr(skb); iph->frag_off |= htons(IP_DF); if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) --iph->ttl; ip_send_check(iph); if (nf_dup_ipv4_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); ip_local_out(net, skb->sk, skb); __this_cpu_write(nf_skb_duplicated, false); } else { kfree_skb(skb); } }
static unsigned long xen_save_fl(void) { struct vcpu_info *vcpu; unsigned long flags; vcpu = this_cpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; /* convert to IF type flag -0 -> 0x00000000 -1 -> 0xffffffff */ return (-flags) & X86_EFLAGS_IF; }
void l4x_pte_check_empty(struct mm_struct *mm) { if (this_cpu_read(unmap_log.cnt)) { struct unmap_log_t *log = &__get_cpu_var(unmap_log); int i; for (i = 0; i < log->cnt; ++i) { if (mm != log->log[i].mm) continue; l4x_printf("L4x: exiting with non-flushed entry: %lx:%lx[%d,%x]\n", log->log[i].mm->context.task, log->log[i].addr, log->log[i].size, log->log[i].rights); } } }
RTCCUINTREG VBOXCALL supdrvOSChangeCR4(RTCCUINTREG fOrMask, RTCCUINTREG fAndMask) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 20, 0) RTCCUINTREG uOld = this_cpu_read(cpu_tlbstate.cr4); RTCCUINTREG uNew = (uOld & fAndMask) | fOrMask; if (uNew != uOld) { this_cpu_write(cpu_tlbstate.cr4, uNew); __write_cr4(uNew); } #else RTCCUINTREG uOld = ASMGetCR4(); RTCCUINTREG uNew = (uOld & fAndMask) | fOrMask; if (uNew != uOld) ASMSetCR4(uNew); #endif return uOld; }