int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs; struct task_struct *tsk; int err; childregs = task_pt_regs(p); *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(regs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } set_tsk_thread_flag(p, TIF_IO_BITMAP); } err = 0; if (clone_flags & CLONE_SETTLS) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); p->thread.debugctlmsr = 0; return err; }
int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs; struct task_struct *tsk; int err; childregs = task_pt_regs(p); *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(regs); p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } set_tsk_thread_flag(p, TIF_IO_BITMAP); } err = 0; /* * Set a new TLS for the child thread? */ if (clone_flags & CLONE_SETTLS) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } return err; }
/* * Enable single or block step. */ static void enable_step(struct task_struct *child, bool block) { /* * Make sure block stepping (BTF) is not enabled unless it should be. * Note that we don't try to worry about any is_setting_trap_flag() * instructions after the first when using block stepping. * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ if (enable_single_step(child) && block) set_task_blockstep(child, true); else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) set_task_blockstep(child, false); }
static int test_task_flag(struct task_struct *p, int flag) { struct task_struct *t = p; rcu_read_lock(); for_each_thread(p, t) { task_lock(t); if (test_tsk_thread_flag(t, flag)) { task_unlock(t); rcu_read_unlock(); return 1; } task_unlock(t); }
/* * Replace the return address with the trampoline address. Returns * the original return address. */ static unsigned long arch_hijack_uret_addr(unsigned long trampoline_address, struct pt_regs *regs, struct uprobe_task *utask) { unsigned long orig_ret_addr; #ifdef CONFIG_COMPAT if (test_tsk_thread_flag(utask->tsk, TIF_31BIT)) orig_ret_addr = regs->gprs[14]&0x7FFFFFFFUL; else #endif orig_ret_addr = regs->gprs[14]; regs->gprs[14] = trampoline_address; return orig_ret_addr; }
static int test_task_flag(struct task_struct *p, int flag) { struct task_struct *t = p; do { task_lock(t); if (test_tsk_thread_flag(t, flag)) { task_unlock(t); return 1; } task_unlock(t); } while_each_thread(p, t); return 0; }
void user_disable_single_step(struct task_struct *child) { /* * Make sure block stepping (BTF) is disabled. */ if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); /* But touch TF only if it was set by us.. */ if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) task_pt_regs(child)->flags &= ~X86_EFLAGS_TF; }
/** * freezing_slow_path - slow path for testing whether a task needs to be frozen * @p: task to be tested * * This function is called by freezing() if system_freezing_cnt isn't zero * and tests whether @p needs to enter and stay in frozen state. Can be * called under any context. The freezers are responsible for ensuring the * target tasks see the updated state. */ bool freezing_slow_path(struct task_struct *p) { if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) return false; if (test_tsk_thread_flag(p, TIF_MEMDIE)) return false; if (pm_nosig_freezing || cgroup_freezing(p)) return true; if (pm_freezing && !(p->flags & PF_KTHREAD)) return true; return false; }
RTDECL(bool) RTThreadPreemptIsPending(RTTHREAD hThread) { Assert(hThread == NIL_RTTHREAD); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 4) return !!test_tsk_thread_flag(current, TIF_NEED_RESCHED); #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 20) return !!need_resched(); #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 110) return current->need_resched != 0; #else return need_resched != 0; #endif }
int ptrace_set_watch_regs(struct task_struct *child, struct pt_watch_regs __user *addr) { int i; int watch_active = 0; unsigned long lt[NUM_WATCH_REGS]; u16 ht[NUM_WATCH_REGS]; if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0) return -EIO; if (!access_ok(VERIFY_READ, addr, sizeof(struct pt_watch_regs))) return -EIO; /* Check the values. */ for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { __get_user(lt[i], &addr->WATCH_STYLE.watchlo[i]); #ifdef CONFIG_32BIT if (lt[i] & __UA_LIMIT) return -EINVAL; #else if (test_tsk_thread_flag(child, TIF_32BIT_ADDR)) { if (lt[i] & 0xffffffff80000000UL) return -EINVAL; } else { if (lt[i] & __UA_LIMIT) return -EINVAL; } #endif __get_user(ht[i], &addr->WATCH_STYLE.watchhi[i]); if (ht[i] & ~MIPS_WATCHHI_MASK) return -EINVAL; } /* Install them. */ for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { if (lt[i] & MIPS_WATCHLO_IRW) watch_active = 1; child->thread.watch.mips3264.watchlo[i] = lt[i]; /* Set the G bit. */ child->thread.watch.mips3264.watchhi[i] = ht[i]; } if (watch_active) set_tsk_thread_flag(child, TIF_LOAD_WATCH); else clear_tsk_thread_flag(child, TIF_LOAD_WATCH); return 0; }
/* * arch_uprobe_pre_xol - prepare to execute out of line. * @auprobe: the probepoint information. * @regs: reflects the saved user state of current task. */ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct arch_uprobe_task *autask; autask = ¤t->utask->autask; autask->saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; regs->ip = current->utask->xol_vaddr; pre_xol_rip_insn(auprobe, regs, autask); autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); regs->flags |= X86_EFLAGS_TF; if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); return 0; }
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { struct thread_struct *prev, *next; prev = &prev_p->thread; next = &next_p->thread; if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); debugctl &= ~DEBUGCTLMSR_BTF; if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) debugctl |= DEBUGCTLMSR_BTF; update_debugctlmsr(debugctl); } if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ if (test_tsk_thread_flag(next_p, TIF_NOTSC)) hard_disable_TSC(); else hard_enable_TSC(); } if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Copy the relevant range of the IO bitmap. * Normally this is 128 bytes or less: */ memcpy(tss->io_bitmap, next->io_bitmap_ptr, max(prev->io_bitmap_max, next->io_bitmap_max)); } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { /* * Clear any possible leftover bits: */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } propagate_user_return_notify(prev_p, next_p); }
enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill) { if (task->exit_state) return OOM_SCAN_CONTINUE; if (oom_unkillable_task(task, NULL, nodemask)) return OOM_SCAN_CONTINUE; /* * This task already has access to memory reserves and is being killed. * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { if (unlikely(frozen(task))) __thaw_task(task); if (!force_kill) return OOM_SCAN_ABORT; } if (!task->mm) return OOM_SCAN_CONTINUE; if (task->flags & PF_EXITING) { /* * If task is current and is in the process of releasing memory, * allow the "kill" to set TIF_MEMDIE, which will allow it to * access memory reserves. Otherwise, it may stall forever. * * The iteration isn't broken here, however, in case other * threads are found to have already been oom killed. */ if (task == current) return OOM_SCAN_SELECT; else if (!force_kill) { /* * If this task is not being ptraced on exit, then wait * for it to finish before killing some other task * unnecessarily. */ if (!(task->group_leader->ptrace & PT_TRACE_EXIT)) return OOM_SCAN_ABORT; } } return OOM_SCAN_OK; }
static long _fm_syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { long ret = -1; #ifdef CONFIG_X86_64 /* Check if this is a 32 bit process running on 64 bit kernel */ int ia32 = test_tsk_thread_flag(task, TIF_IA32); if (ia32) { long ia32_id = syscall_get_nr(task, regs); if (ia32_id >= 0 && ia32_id < FILEMON_SYSCALLS_IA32_SIZE) ret = _ia32_to_syscall_map[ia32_id]; } else { ret = syscall_get_nr(task, regs); } #else ret = syscall_get_nr(task, regs); #endif return ret; }
/* * Ensure that task->thread.sve_state is up to date with respect to * the task->thread.uw.fpsimd_state. * * This should only be called by ptrace to merge new FPSIMD register * values into a task for which SVE is currently active. * task must be non-runnable. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.uw.fpsimd_state must already have been initialised with * the new FPSIMD register values to be merged in. */ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!test_tsk_thread_flag(task, TIF_SVE)) return; vq = sve_vq_from_vl(task->thread.sve_vl); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); for (i = 0; i < 32; ++i) memcpy(ZREG(sst, vq, i), &fst->vregs[i], sizeof(fst->vregs[i])); }
enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill) { if (task->exit_state) { #ifdef CONFIG_OOM_SCAN_WA_PREVENT_WRONG_SEARCH if (task->pid == task->tgid) return OOM_SCAN_SKIP_SEARCH_THREAD; #endif return OOM_SCAN_CONTINUE; } if (oom_unkillable_task(task, NULL, nodemask)) return OOM_SCAN_CONTINUE; /* * This task already has access to memory reserves and is being killed. * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { if (unlikely(frozen(task))) __thaw_task(task); if (!force_kill) return OOM_SCAN_ABORT; } if (!task->mm) return OOM_SCAN_CONTINUE; /* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ if (oom_task_origin(task)) return OOM_SCAN_SELECT; if (task->flags & PF_EXITING && !force_kill) { /* * If this task is not being ptraced on exit, then wait for it * to finish before killing some other task unnecessarily. */ if (!(task->group_leader->ptrace & PT_TRACE_EXIT)) return OOM_SCAN_ABORT; } return OOM_SCAN_OK; }
int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs; struct task_struct *tsk; childregs = task_pt_regs(p); *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long)childregs; p->thread.sp0 = (unsigned long)(childregs + 1); p->thread.ip = (unsigned long)ret_from_fork; task_user_gs(p) = get_user_gs(regs); p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (test_tsk_thread_flag(tsk, TIF_IO_BITMAP)) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } set_tsk_thead_flag(p, TIF_IO_BITMAP); } err = 0; if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } return err; }
/* * The transition to the target patch state is complete. Clean up the data * structures. */ static void klp_complete_transition(void) { struct klp_object *obj; struct klp_func *func; struct task_struct *g, *task; unsigned int cpu; pr_debug("'%s': completing %s transition\n", klp_transition_patch->mod->name, klp_target_state == KLP_PATCHED ? "patching" : "unpatching"); if (klp_target_state == KLP_UNPATCHED) { /* * All tasks have transitioned to KLP_UNPATCHED so we can now * remove the new functions from the func_stack. */ klp_unpatch_objects(klp_transition_patch); /* * Make sure klp_ftrace_handler() can no longer see functions * from this patch on the ops->func_stack. Otherwise, after * func->transition gets cleared, the handler may choose a * removed function. */ klp_synchronize_transition(); } klp_for_each_object(klp_transition_patch, obj) klp_for_each_func(obj, func) func->transition = false; /* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */ if (klp_target_state == KLP_PATCHED) klp_synchronize_transition(); read_lock(&tasklist_lock); for_each_process_thread(g, task) { WARN_ON_ONCE(test_tsk_thread_flag(task, TIF_PATCH_PENDING)); task->patch_state = KLP_UNDEFINED; }
/* Return (user) module in which the the given addr falls. Returns NULL when no module can be found that contains the addr. Fills in vm_start (addr where module is mapped in) and (base) name of module when given. Note that user modules always have exactly one section (.dynamic or .absolute). */ static struct _stp_module *_stp_umod_lookup(unsigned long addr, struct task_struct *task, const char **name, unsigned long *vm_start, unsigned long *vm_end) { void *user = NULL; #ifdef CONFIG_COMPAT /* Handle 32bit signed values in 64bit longs, chop off top bits. */ if (test_tsk_thread_flag(task, TIF_32BIT)) addr &= ((compat_ulong_t) ~0); #endif if (stap_find_vma_map_info(task->group_leader, addr, vm_start, vm_end, name, &user) == 0) if (user != NULL) { struct _stp_module *m = (struct _stp_module *)user; dbug_sym(1, "found module %s at 0x%lx\n", m->path, vm_start ? *vm_start : 0); return m; } return NULL; }
void ocd_disable(struct task_struct *child) { u32 dc; if (!child) pr_debug("ocd_disable (no child)\n"); else if (test_tsk_thread_flag(child, TIF_DEBUG)) pr_debug("ocd_disable: child=%s [%u]\n", child->comm, child->pid); if (!child || test_and_clear_tsk_thread_flag(child, TIF_DEBUG)) { spin_lock(&ocd_lock); ocd_count--; WARN_ON(ocd_count < 0); if (ocd_count <= 0) { dc = ocd_read(DC); dc &= ~((1 << OCD_DC_MM_BIT) | (1 << OCD_DC_DBE_BIT)); ocd_write(DC, dc); } spin_unlock(&ocd_lock); } }
static int signal_return(struct mm_struct *mm, struct pt_regs *regs, unsigned long address, unsigned long error_code) { u16 instruction; int rc; #ifdef CONFIG_COMPAT int compat; #endif pagefault_disable(); rc = __get_user(instruction, (u16 __user *) regs->psw.addr); pagefault_enable(); if (rc) return -EFAULT; up_read(&mm->mmap_sem); clear_tsk_thread_flag(current, TIF_SINGLE_STEP); #ifdef CONFIG_COMPAT compat = test_tsk_thread_flag(current, TIF_31BIT); if (compat && instruction == 0x0a77) sys32_sigreturn(regs); else if (compat && instruction == 0x0aad) sys32_rt_sigreturn(regs); else #endif if (instruction == 0x0a77) sys_sigreturn(regs); else if (instruction == 0x0aad) sys_rt_sigreturn(regs); else { current->thread.prot_addr = address; current->thread.trap_no = error_code; do_sigsegv(regs, error_code, SEGV_MAPERR, address); } return 0; }
static const char *_stp_kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, const char **modname, /* char ** secname? */ struct task_struct *task) { struct _stp_module *m = NULL; struct _stp_section *sec = NULL; struct _stp_symbol *s = NULL; unsigned end, begin = 0; unsigned long rel_addr = 0; if (addr == 0) return NULL; if (task) { unsigned long vm_start = 0; unsigned long vm_end = 0; #ifdef CONFIG_COMPAT /* Handle 32bit signed values in 64bit longs, chop off top bits. _stp_umod_lookup does the same, but we need it here for the binary search on addr below. */ if (test_tsk_thread_flag(task, TIF_32BIT)) addr &= ((compat_ulong_t) ~0); #endif m = _stp_umod_lookup(addr, task, modname, &vm_start, &vm_end); if (m) { sec = &m->sections[0]; /* XXX .absolute sections really shouldn't be here... */ if (strcmp(".dynamic", m->sections[0].name) == 0) rel_addr = addr - vm_start; else rel_addr = addr; } if (modname && *modname) { /* In case no symbol is found, fill in based on module. */ if (offset) *offset = addr - vm_start; if (symbolsize) *symbolsize = vm_end - vm_start; } } else { m = _stp_kmod_sec_lookup(addr, &sec); if (m) { rel_addr = addr - sec->static_addr; if (modname) *modname = m->name; } } if (unlikely (m == NULL || sec == NULL)) return NULL; /* NB: relativize the address to the section. */ addr = rel_addr; end = sec->num_symbols; /* binary search for symbols within the module */ do { unsigned mid = (begin + end) / 2; if (addr < sec->symbols[mid].addr) end = mid; else begin = mid; } while (begin + 1 < end); /* result index in $begin */ s = & sec->symbols[begin]; if (likely(addr >= s->addr)) { if (offset) *offset = addr - s->addr; /* We could also pass sec->name here. */ if (symbolsize) { if ((begin + 1) < sec->num_symbols) *symbolsize = sec->symbols[begin + 1].addr - s->addr; else *symbolsize = 0; // NB: This is only a heuristic. Sometimes there are large // gaps between text areas of modules. } return s->symbol; } return NULL; }
/* * Simple selection loop. We chose the process with the highest * number of 'points'. We expect the caller will lock the tasklist. * * (not docbooked, we don't want this one cluttering up the manual) */ static struct task_struct *select_bad_process(unsigned int *ppoints, unsigned long totalpages, struct mem_cgroup *memcg, const nodemask_t *nodemask, bool force_kill) { struct task_struct *g, *p; struct task_struct *chosen = NULL; *ppoints = 0; do_each_thread(g, p) { unsigned int points; if (p->exit_state) continue; if (oom_unkillable_task(p, memcg, nodemask)) continue; /* * This task already has access to memory reserves and is * being killed. Don't allow any other task access to the * memory reserve. * * Note: this may have a chance of deadlock if it gets * blocked waiting for another task which itself is waiting * for memory. Is there a better alternative? */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) { if (unlikely(frozen(p))) __thaw_task(p); if (!force_kill) return ERR_PTR(-1UL); } if (!p->mm) continue; if (p->flags & PF_EXITING) { /* * If p is the current task and is in the process of * releasing memory, we allow the "kill" to set * TIF_MEMDIE, which will allow it to gain access to * memory reserves. Otherwise, it may stall forever. * * The loop isn't broken here, however, in case other * threads are found to have already been oom killed. */ if (p == current) { chosen = p; *ppoints = 1000; } else if (!force_kill) { /* * If this task is not being ptraced on exit, * then wait for it to finish before killing * some other task unnecessarily. */ if (!(p->group_leader->ptrace & PT_TRACE_EXIT)) return ERR_PTR(-1UL); } } points = oom_badness(p, memcg, nodemask, totalpages); if (points > *ppoints) { chosen = p; *ppoints = points; } } while_each_thread(g, p);
/** * kthread_create_on_node - create a kthread. * @threadfn: the function to run until signal_pending(current). * @data: data ptr for @threadfn. * @node: memory node number. * @namefmt: printf-style name for the thread. * * Description: This helper function creates and names a kernel * thread. The thread will be stopped: use wake_up_process() to start * it. See also kthread_run(). * * If thread is going to be bound on a particular cpu, give its node * in @node, to get NUMA affinity for kthread stack, or else give -1. * When woken, the thread will run @threadfn() with @data as its * argument. @threadfn() can either call do_exit() directly if it is a * standalone thread for which no one will call kthread_stop(), or * return when 'kthread_should_stop()' is true (which means * kthread_stop() has been called). The return value should be zero * or a negative error number; it will be passed to kthread_stop(). * * Returns a task_struct or ERR_PTR(-ENOMEM). */ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, int node, const char namefmt[], ...) { DECLARE_COMPLETION_ONSTACK(done); struct task_struct *task; struct kthread_create_info *create = kmalloc(sizeof(*create), GFP_KERNEL); if (!create) return ERR_PTR(-ENOMEM); create->threadfn = threadfn; create->data = data; create->node = node; create->done = &done; spin_lock(&kthread_create_lock); list_add_tail(&create->list, &kthread_create_list); spin_unlock(&kthread_create_lock); wake_up_process(kthreadd_task); /* * Wait for completion in killable state, for I might be chosen by * the OOM killer while kthreadd is trying to allocate memory for * new kernel thread. */ if (unlikely(wait_for_completion_killable(&done))) { int i = 0; /* * I got SIGKILL, but wait for 10 more seconds for completion * unless chosen by the OOM killer. This delay is there as a * workaround for boot failure caused by SIGKILL upon device * driver initialization timeout. */ while (i++ < 10 && !test_tsk_thread_flag(current, TIF_MEMDIE)) if (wait_for_completion_timeout(&done, HZ)) goto ready; /* * If I was SIGKILLed before kthreadd (or new kernel thread) * calls complete(), leave the cleanup of this structure to * that thread. */ if (xchg(&create->done, NULL)) return ERR_PTR(-ENOMEM); /* * kthreadd (or new kernel thread) will call complete() * shortly. */ wait_for_completion(&done); } ready: task = create->result; if (!IS_ERR(task)) { static const struct sched_param param = { .sched_priority = 0 }; va_list args; va_start(args, namefmt); vsnprintf(task->comm, sizeof(task->comm), namefmt, args); va_end(args); /* * root may have changed our (kthreadd's) priority or CPU mask. * The kernel thread should not inherit these properties. */ sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); set_cpus_allowed_ptr(task, cpu_all_mask); } kfree(create); return task; } EXPORT_SYMBOL(kthread_create_on_node); static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state) { /* Must have done schedule() in kthread() before we set_task_cpu */ if (!wait_task_inactive(p, state)) { WARN_ON(1); return; } /* It's safe because the task is inactive. */ do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_NO_SETAFFINITY; } /** * kthread_bind - bind a just-created kthread to a cpu. * @p: thread created by kthread_create(). * @cpu: cpu (might not be online, must be possible) for @k to run on. * * Description: This function is equivalent to set_cpus_allowed(), * except that @cpu doesn't need to be online, and the thread must be * stopped (i.e., just returned from kthread_create()). */ void kthread_bind(struct task_struct *p, unsigned int cpu) { __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE); }
unsigned long _stp_linenumber_lookup(unsigned long addr, struct task_struct *task, char ** filename, int need_filename) { struct _stp_module *m; struct _stp_section *sec; const char *modname = NULL; uint8_t *linep, *enddatap; int compat_task = _stp_is_compat_task(); int user = (task ? 1 : 0); // the portion below is encased in this conditional because some of the functions // and constants needed are encased in a similar condition #ifdef STP_NEED_LINE_DATA if (addr == 0) return 0; if (task) { unsigned long vm_start = 0; unsigned long vm_end = 0; #ifdef CONFIG_COMPAT /* Handle 32bit signed values in 64bit longs, chop off top bits. */ if (test_tsk_thread_flag(task, TIF_32BIT)) addr &= ((compat_ulong_t) ~0); #endif m = _stp_umod_lookup(addr, task, &modname, &vm_start, &vm_end); } else m = _stp_kmod_sec_lookup(addr, &sec); if (m == NULL || m->debug_line == NULL) return 0; // if addr is a kernel address, it will need to be adjusted if (!task) { int i; unsigned long offset = 0; // have to factor in the load_offset of (specifically) the .text section for (i=0; i<m->num_sections; i++) if (!strcmp(m->sections[i].name, ".text")) { offset = (m->sections[i].static_addr - m->sections[i].sec_load_offset); break; } if (addr < offset) return 0; addr = addr - offset; } linep = m->debug_line; enddatap = m->debug_line + m->debug_line_len; while (linep < enddatap) { // similar to print_debug_line_section() in elfutils unsigned int length = 4, curr_file_idx = 1, prev_file_idx = 1; unsigned int skip_to_seq_end = 0, op_index = 0; uint64_t unit_length, hdr_length, curr_addr = 0; uint8_t *endunitp, *endhdrp, *dirsecp, *stdopcode_lens_secp; uint16_t version; uint8_t opcode_base, line_range, min_instr_len = 0, max_ops = 1; unsigned long curr_linenum = 1; int8_t line_base; long cumm_line_adv = 0; unit_length = (uint64_t) read_pointer ((const uint8_t **) &linep, enddatap, DW_EH_PE_data4, user, compat_task); if (unit_length == 0xffffffff) { if (unlikely (linep + 8 > enddatap)) return 0; unit_length = (uint64_t) read_pointer ((const uint8_t **) &linep, enddatap, DW_EH_PE_data8, user, compat_task); length = 8; } if (unit_length < (length + 2) || (linep + unit_length) > enddatap) return 0; endunitp = linep + unit_length; version = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data2, user, compat_task); if (length == 4) hdr_length = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data4, user, compat_task); else hdr_length = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data8, user, compat_task); if ((linep + hdr_length) > endunitp || hdr_length < (version >= 4 ? 6 : 5)) return 0; endhdrp = linep + hdr_length; // minimum instruction length min_instr_len = *linep++; // max operations per instruction if (version >= 4) { max_ops = *linep++; if (max_ops == 0) return 0; // max operations per instruction is supposed to > 0; } // default value of the is_stmt register ++linep; // line base. this is a signed value. line_base = *linep++; // line range line_range = *linep++; if (line_range == 0) return 0; // opcode base opcode_base = *linep++; // opcodes stdopcode_lens_secp = linep - 1; // need this check if the header length check covers this region? if ((linep + opcode_base - 1) >= endhdrp) return 0; linep += opcode_base - 1; // at the directory table. don't need an other information from the header // in order to find the desired line number, so we will save a pointer to // this point and skip ahead to the end of the header. this portion of the // header will be visited again after a line number has been found if a // filename is needed. dirsecp = linep; linep = endhdrp; // iterating through the opcodes. will deal with three defined types of // opcode: special, extended and standard. there is also a portion at // the end of this loop that will deal with unknown (standard) opcodes. while (linep < endunitp) { uint8_t opcode = *linep++; long addr_adv = 0; if (opcode >= opcode_base) // special opcode { // line range was checked before this point. this variable is not altered after it is initialized. cumm_line_adv += (line_base + ((opcode - opcode_base) % line_range)); addr_adv = ((opcode - opcode_base) / line_range); } else if (opcode == 0) // extended opcode { int len; uint8_t subopcode; if (linep + 1 > endunitp) return 0; len = *linep++; if (linep + len > endunitp || len < 1) return 0; subopcode = *linep++; // the sub opcode switch (subopcode) { case DW_LNE_end_sequence: // reset the line and address. cumm_line_adv = 1 - curr_linenum; addr_adv = 0 - curr_addr; skip_to_seq_end = 0; op_index = 0; break; case DW_LNE_set_address: if ((len - 1) == 4) // account for the opcode (the -1) curr_addr = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data4, user, compat_task); else if ((len - 1) == 8) curr_addr = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data8, user, compat_task); else return 0; // if the set address is past the address we want, iterate // to the end of the sequence without doing more address // and linenumber calcs than necessary if (curr_addr > addr) skip_to_seq_end = 1; op_index = 0; break; default: // advance the ptr by the specified amount linep += len-1; break; } } else if (opcode <= DW_LNS_set_isa) // known standard opcode { uint8_t *linep_before = linep; switch (opcode) { case DW_LNS_advance_pc: addr_adv = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task); break; case DW_LNS_fixed_advance_pc: addr_adv = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data2, user, compat_task); if (linep_before == linep) // the read failed return 0; op_index = 0; break; case DW_LNS_advance_line: cumm_line_adv += read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128+DW_EH_PE_signed, user, compat_task); break; case DW_LNS_set_file: curr_file_idx = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task); break; case DW_LNS_set_column: read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task); break; case DW_LNS_const_add_pc: addr_adv = ((255 - opcode_base) / line_range); break; case DW_LNS_set_isa: read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task); break; } if (linep > endunitp) // reading in the leb128 failed return 0; } else { int i; for (i=stdopcode_lens_secp[opcode]; i>0; --i) { read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task); if (linep > endunitp) return 0; } } // don't worry about doing line/address advances since we are waiting // till we hit the end of the sequence or the end of the unit at which // point the address and linenumber will be reset if (skip_to_seq_end == 1) continue; // calculate actual address advance if (opcode != 0 && opcode != DW_LNS_fixed_advance_pc) { addr_adv = min_instr_len * (op_index + addr_adv) / max_ops; op_index = (op_index + addr_adv) % max_ops; } // found an address that at least sort of matches the address we // were looking for if ((curr_addr <= addr && addr < (curr_addr + addr_adv)) || (curr_addr == addr && addr_adv != 0)) { if (need_filename) _stp_filename_lookup(m, filename, dirsecp, endhdrp, prev_file_idx, user, compat_task); return curr_linenum; } // update the linenumber and file index if the curr_addr is to be updated if (addr_adv != 0) { prev_file_idx = curr_file_idx; } if (prev_file_idx == curr_file_idx) { curr_linenum += cumm_line_adv; cumm_line_adv = 0; } curr_addr += addr_adv; } } #endif /* STP_NEED_LINE_DATA */ // no linenumber was found otherwise this function would have returned before this point return 0; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int selected_tasksize = 0; int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (!spin_trylock(&lowmem_shrink_lock)){ lowmem_print(4, "lowmem_shrink lock faild\n"); return -1; } /* For JB: FOREGROUND is adj0 (Default lowmem_adj of AMS is 0, 1, 2, 4, 9, 15) */ /* For ICS: HOME is adj6 (Default lowmem_adj of AMS is 0, 1, 2, 4, 9, 15) */ if (other_free <= lowmem_minfree[1]) { /* Notify Kernel that we should consider Android threshold */ lmk_adjz_minfree = lowmem_minfree[0]; } else { lmk_adjz_minfree = 0; } if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); /* * disable indication if low memory */ #if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) if (in_lowmem) { in_lowmem = 0; lowmem_indicator = 1; DAL_LowMemoryOff(); } #endif spin_unlock(&lowmem_shrink_lock); return rem; } selected_oom_score_adj = min_score_adj; // add debug log #ifdef CONFIG_MT_ENG_BUILD if (min_score_adj <= lowmem_debug_adj) { lowmem_print(1, "======low memory killer=====\n"); lowmem_print(1, "Free memory other_free: %d, other_file:%d pages\n", other_free, other_file); } #endif rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { lowmem_print(1, "lowmem_shrink return directly, due to %d (%s) is dying\n", p->pid, p->comm); task_unlock(p); rcu_read_unlock(); spin_unlock(&lowmem_shrink_lock); return 0; } /* We use oom_score_adj to represent oom_adj here although the later has been deprecated by kernel. */ /* This is because that JB AMS still uses oom_adj to stand for the importantance of activities. */ /* oom_score_adj = p->signal->oom_score_adj; - 2012.07.16 - */ oom_score_adj = p->signal->oom_adj; #ifdef CONFIG_MT_ENG_BUILD if (min_score_adj <= lowmem_debug_adj) lowmem_print(1, "Candidate %d (%s), adj %d, rss %lu, to kill\n", p->pid, p->comm, oom_score_adj, get_mm_rss(p->mm)); #endif if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; #ifdef CONFIG_MT_ENG_BUILD if (min_score_adj <= lowmem_debug_adj) { lowmem_print(1, "low memory info:\n"); show_free_areas_minimum(); } #endif /* * when kill adj=0 process trigger kernel warning, only in MTK internal eng/non_LCA load */ #if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) && \ !defined (MTK_LCA_SUPPORT) && !defined (PARTIAL_BUILD) //mtk internal if (selected_oom_score_adj <= 0) { // can set 16 for test lowmem_print(1, "low memory trigger kernel warning\n"); aee_kernel_warning_api("LMK", 0, DB_OPT_DEFAULT|DB_OPT_DUMPSYS_ACTIVITY, "Framework low memory", "please contact AP/AF memory module owner\n"); } #endif /* * show an indication if low memory */ #if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) if (lowmem_indicator && selected_oom_score_adj <= 1) { lowmem_print(5, "low memory: raise aee warning\n"); in_lowmem = 1; lowmem_indicator = 0; DAL_LowMemoryOn(); //aee_kernel_warning(module_name, lowmem_warning); } #endif send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); spin_unlock(&lowmem_shrink_lock); return rem; }
/* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ static inline int do_exception(struct pt_regs *regs, int access) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long trans_exc_code; unsigned long address; unsigned int flags; int fault; if (notify_page_fault(regs)) return 0; tsk = current; mm = tsk->mm; trans_exc_code = regs->int_parm_long; /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ fault = VM_FAULT_BADCONTEXT; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); flags = FAULT_FLAG_ALLOW_RETRY; if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; goto out_up; } if (address == -ENOMEM) { fault = VM_FAULT_OOM; goto out_up; } } #endif retry: fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) goto out_up; if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; if (expand_stack(vma, address)) goto out_up; } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. */ fault = VM_FAULT_BADACCESS; if (unlikely(!(vma->vm_flags & access))) goto out_up; if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ fault = handle_mm_fault(mm, vma, address, flags); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; /* * Major/minor page fault accounting is only done on the * initial attempt. If we go through a retry, it is extremely * likely that the page will be found in page cache at that point. */ if (flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); } else { tsk->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } if (fault & VM_FAULT_RETRY) { /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; } } /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(tsk, TIF_PER_TRAP); fault = 0; out_up: up_read(&mm->mmap_sem); out: return fault; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; int minfree = 0; int selected_tasksize = 0; short selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; /* For request of unmovable pages, take no account of free CMA pages*/ if(IS_ENABLED(CONFIG_CMA) && (allocflags_to_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE)) other_free -= global_page_state(NR_FREE_CMA_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { minfree = lowmem_minfree[i]; if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; short oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \ " to free %ldkB on behalf of '%s' (%d) because\n" \ " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \ " Free memory is %ldkB above reserved\n", selected->comm, selected->pid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, other_file * (long)(PAGE_SIZE / 1024), minfree * (long)(PAGE_SIZE / 1024), min_score_adj, other_free * (long)(PAGE_SIZE / 1024)); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int selected_tasksize = 0; int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); if (selected) compact_nodes(false); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; #ifdef ENHANCED_LMK_ROUTINE struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE static DEFINE_RATELIMIT_STATE(lmk_rs, DEFAULT_RATELIMIT_INTERVAL, 0); #else static DEFINE_RATELIMIT_STATE(lmk_rs, 6*DEFAULT_RATELIMIT_INTERVAL, 0); #endif #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef ENHANCED_LMK_ROUTINE int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; int selected_oom_score_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif int array_size = ARRAY_SIZE(lowmem_adj); #if (!defined(CONFIG_MACH_JF) \ && !defined(CONFIG_SEC_PRODUCT_8960)\ ) unsigned long nr_to_scan = sc->nr_to_scan; #endif #ifndef CONFIG_CMA int other_free = global_page_state(NR_FREE_PAGES); #else int other_free = global_page_state(NR_FREE_PAGES) - global_page_state(NR_FREE_CMA_PAGES); #endif int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); #ifdef CONFIG_ZRAM_FOR_ANDROID other_file -= total_swapcache_pages; #endif /* CONFIG_ZRAM_FOR_ANDROID */ if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 1); #endif /* CONFIG_ZRAM_FOR_ANDROID */ read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef ENHANCED_LMK_ROUTINE int is_exist_oom_task = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); read_unlock(&tasklist_lock); #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 0); #endif /* CONFIG_ZRAM_FOR_ANDROID */ return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; #ifdef ENHANCED_LMK_ROUTINE if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) all_selected_oom++; if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "select %d (%s), adj %d, \ size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected[i]) { lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d, free memory = %d, reclaimable memory = %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i], other_free, other_file); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected[i], 0); set_tsk_thread_flag(selected[i], TIF_MEMDIE); rem -= selected_tasksize[i]; #ifdef LMK_COUNT_READ lmk_count++; #endif } } #else if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; #ifdef LMK_COUNT_READ lmk_count++; #endif } #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO if (__ratelimit(&lmk_rs)) { lowmem_print(1, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE show_mem(SHOW_MEM_FILTER_NODES); dump_tasks_info(); #endif } #endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 0); #endif /* CONFIG_ZRAM_FOR_ANDROID */ return rem; }