/** * trace_call_bpf - invoke BPF program * @prog: BPF program * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. * Can be used from static tracepoints in the future. * * Return: BPF programs always return an integer which is interpreted by * kprobe handler as: * 0 - return from kprobe (event is filtered out) * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) { unsigned int ret; if (in_nmi()) /* not supported yet */ return 1; preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { /* * since some bpf program is already running on this cpu, * don't call into another bpf program (same or different) * and don't send kprobe event into ring-buffer, * so return zero here */ ret = 0; goto out; } rcu_read_lock(); ret = BPF_PROG_RUN(prog, ctx); rcu_read_unlock(); out: __this_cpu_dec(bpf_prog_active); preempt_enable(); return ret; }
static inline void get_seq(__u32 *ts, int *cpu) { preempt_disable(); *ts = __this_cpu_inc_return(proc_event_counts) -1; *cpu = smp_processor_id(); preempt_enable(); }
/* * Decode and save high level MCE information into per cpu buffer which * is an array of machine_check_event structure. */ void save_mce_event(struct pt_regs *regs, long handled, struct mce_error_info *mce_err, uint64_t nip, uint64_t addr) { uint64_t srr1; int index = __this_cpu_inc_return(mce_nest_count) - 1; struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]); /* * Return if we don't have enough space to log mce event. * mce_nest_count may go beyond MAX_MC_EVT but that's ok, * the check below will stop buffer overrun. */ if (index >= MAX_MC_EVT) return; /* Populate generic machine check info */ mce->version = MCE_V1; mce->srr0 = nip; mce->srr1 = regs->msr; mce->gpr3 = regs->gpr[3]; mce->in_use = 1; mce->initiator = MCE_INITIATOR_CPU; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; mce->severity = MCE_SEV_ERROR_SYNC; srr1 = regs->msr; /* * Populate the mce error_type and type-specific error_type. */ mce_set_error_info(mce, mce_err); if (!addr) return; if (mce->error_type == MCE_ERROR_TYPE_TLB) { mce->u.tlb_error.effective_address_provided = true; mce->u.tlb_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_SLB) { mce->u.slb_error.effective_address_provided = true; mce->u.slb_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { mce->u.erat_error.effective_address_provided = true; mce->u.erat_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_UE) { mce->u.ue_error.effective_address_provided = true; mce->u.ue_error.effective_address = addr; } return; }
static void queue_ue_paddr(unsigned long paddr) { int index; index = __this_cpu_inc_return(rtas_ue_count) - 1; if (index >= MAX_MC_EVT) { __this_cpu_dec(rtas_ue_count); return; } this_cpu_write(rtas_ue_paddr[index], paddr); schedule_work(&hwpoison_work); }
/* * Optimized increment and decrement functions. * * These are only for a single page and therefore can take a struct page * * argument instead of struct zone *. This allows the inclusion of the code * generated for page_zone(page) into the optimized functions. * * No overflow check is necessary and therefore the differential can be * incremented or decremented in place which may allow the compilers to * generate better code. * The increment or decrement is known and therefore one boundary check can * be omitted. * * NOTE: These functions are very performance sensitive. Change only * with care. * * Some processors have inc/dec instructions that are atomic vs an interrupt. * However, the code must first determine the differential location in a zone * based on the processor number and then inc/dec the counter. There is no * guarantee without disabling preemption that the processor will not change * in between and therefore the atomicity vs. interrupt cannot be exploited * in a useful way here. */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { struct per_cpu_pageset __percpu *pcp = zone->pageset; s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { s8 overstep = t >> 1; zone_page_state_add(v + overstep, zone, item); __this_cpu_write(*p, -overstep); }
/* * Queue up the MCE event which then can be handled later. */ void machine_check_ue_event(struct machine_check_event *evt) { int index; index = __this_cpu_inc_return(mce_ue_count) - 1; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { __this_cpu_dec(mce_ue_count); return; } memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt)); /* Queue work to process this event later. */ schedule_work(&mce_ue_event_work); }
static inline void send_msg(struct cn_msg *msg) { preempt_disable(); msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; ((struct proc_event *)msg->data)->cpu = smp_processor_id(); /* * Preemption remains disabled during send to ensure the messages are * ordered according to their sequence numbers. * * If cn_netlink_send() fails, the data is not sent. */ cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); preempt_enable(); }
/* * Queue up the MCE event which then can be handled later. */ void machine_check_queue_event(void) { int index; struct machine_check_event evt; if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) return; index = __this_cpu_inc_return(mce_queue_count) - 1; /* If queue is full, just return for now. */ if (index >= MAX_MC_EVT) { __this_cpu_dec(mce_queue_count); return; } memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt)); /* Queue irq work to process this event later. */ irq_work_queue(&mce_event_process_work); }
/** * trace_call_bpf - invoke BPF program * @call: tracepoint event * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. * Can be used from static tracepoints in the future. * * Return: BPF programs always return an integer which is interpreted by * kprobe handler as: * 0 - return from kprobe (event is filtered out) * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; if (in_nmi()) /* not supported yet */ return 1; preempt_disable(); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { /* * since some bpf program is already running on this cpu, * don't call into another bpf program (same or different) * and don't send kprobe event into ring-buffer, * so return zero here */ ret = 0; goto out; } /* * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock * to all call sites, we did a bpf_prog_array_valid() there to check * whether call->prog_array is empty or not, which is * a heurisitc to speed up execution. * * If bpf_prog_array_valid() fetched prog_array was * non-NULL, we go into trace_call_bpf() and do the actual * proper rcu_dereference() under RCU lock. * If it turns out that prog_array is NULL then, we bail out. * For the opposite, if the bpf_prog_array_valid() fetched pointer * was NULL, you'll skip the prog_array with the risk of missing * out of events when it was updated in between this and the * rcu_dereference() which is accepted risk. */ ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); out: __this_cpu_dec(bpf_prog_active); preempt_enable(); return ret; }