int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs; struct task_struct *tsk; int err; childregs = task_pt_regs(p); *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(regs); p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } set_tsk_thread_flag(p, TIF_IO_BITMAP); } err = 0; /* * Set a new TLS for the child thread? */ if (clone_flags & CLONE_SETTLS) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } return err; }
int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct pt_regs *childregs; struct task_struct *tsk; int err; childregs = task_pt_regs(p); *childregs = *regs; childregs->ax = 0; childregs->sp = sp; p->thread.sp = (unsigned long) childregs; p->thread.sp0 = (unsigned long) (childregs+1); p->thread.ip = (unsigned long) ret_from_fork; task_user_gs(p) = get_user_gs(regs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; return -ENOMEM; } set_tsk_thread_flag(p, TIF_IO_BITMAP); } err = 0; if (clone_flags & CLONE_SETTLS) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); p->thread.debugctlmsr = 0; return err; }
static void set_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; if (regs != NULL) { #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC; regs->msr |= MSR_DE; #else regs->msr |= MSR_SE; #endif } set_tsk_thread_flag(task, TIF_SINGLESTEP); }
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) return -EINVAL; if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) return -EINVAL; clear_pt_regs_flag(regs, PIF_PER_TRAP); auprobe->saved_per = psw_bits(regs->psw).per; auprobe->saved_int_code = regs->int_code; regs->int_code = UPROBE_TRAP_NR; regs->psw.addr = current->utask->xol_vaddr; set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); update_cr_regs(current); return 0; }
int ptrace_set_watch_regs(struct task_struct *child, struct pt_watch_regs __user *addr) { int i; int watch_active = 0; unsigned long lt[NUM_WATCH_REGS]; u16 ht[NUM_WATCH_REGS]; if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0) return -EIO; if (!access_ok(VERIFY_READ, addr, sizeof(struct pt_watch_regs))) return -EIO; /* Check the values. */ for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { __get_user(lt[i], &addr->WATCH_STYLE.watchlo[i]); #ifdef CONFIG_32BIT if (lt[i] & __UA_LIMIT) return -EINVAL; #else if (test_tsk_thread_flag(child, TIF_32BIT_ADDR)) { if (lt[i] & 0xffffffff80000000UL) return -EINVAL; } else { if (lt[i] & __UA_LIMIT) return -EINVAL; } #endif __get_user(ht[i], &addr->WATCH_STYLE.watchhi[i]); if (ht[i] & ~MIPS_WATCHHI_MASK) return -EINVAL; } /* Install them. */ for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) { if (lt[i] & MIPS_WATCHLO_IRW) watch_active = 1; child->thread.watch.mips3264.watchlo[i] = lt[i]; /* Set the G bit. */ child->thread.watch.mips3264.watchhi[i] = ht[i]; } if (watch_active) set_tsk_thread_flag(child, TIF_LOAD_WATCH); else clear_tsk_thread_flag(child, TIF_LOAD_WATCH); return 0; }
static inline void seccomp_assign_mode(struct task_struct *task, unsigned long seccomp_mode, unsigned long flags) { assert_spin_locked(&task->sighand->siglock); task->seccomp.mode = seccomp_mode; /* * Make sure TIF_SECCOMP cannot be set before the mode (and * filter) is set. */ smp_mb__before_atomic(); /* Assume default seccomp processes want spec flaw mitigation. */ if ((flags & SECCOMP_FILTER_FLAG_SPEC_ALLOW) == 0) arch_seccomp_spec_mitigate(task); set_tsk_thread_flag(task, TIF_SECCOMP); }
void user_enable_single_step(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_BLOCKSTEP); set_tsk_thread_flag(task, TIF_SINGLESTEP); if (pa_psw(task)->n) { struct siginfo si; /* */ task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1]; task_regs(task)->iasq[0] = task_regs(task)->iasq[1]; task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4; pa_psw(task)->n = 0; pa_psw(task)->x = 0; pa_psw(task)->y = 0; pa_psw(task)->z = 0; pa_psw(task)->b = 0; ptrace_disable(task); /* */ si.si_code = TRAP_TRACE; si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3); si.si_signo = SIGTRAP; si.si_errno = 0; force_sig_info(SIGTRAP, &si, task); /* */ return; } /* */ pa_psw(task)->r = 1; pa_psw(task)->t = 0; pa_psw(task)->h = 0; pa_psw(task)->l = 0; }
void user_enable_single_step(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_BLOCKSTEP); set_tsk_thread_flag(task, TIF_SINGLESTEP); if (pa_psw(task)->n) { struct siginfo si; /* Nullified, just crank over the queue. */ task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1]; task_regs(task)->iasq[0] = task_regs(task)->iasq[1]; task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4; pa_psw(task)->n = 0; pa_psw(task)->x = 0; pa_psw(task)->y = 0; pa_psw(task)->z = 0; pa_psw(task)->b = 0; ptrace_disable(task); /* Don't wake up the task, but let the parent know something happened. */ si.si_code = TRAP_TRACE; si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3); si.si_signo = SIGTRAP; si.si_errno = 0; force_sig_info(SIGTRAP, &si, task); /* notify_parent(task, SIGCHLD); */ return; } /* Enable recovery counter traps. The recovery counter * itself will be set to zero on a task switch. If the * task is suspended on a syscall then the syscall return * path will overwrite the recovery counter with a suitable * value such that it traps once back in user space. We * disable interrupts in the tasks PSW here also, to avoid * interrupts while the recovery counter is decrementing. */ pa_psw(task)->r = 1; pa_psw(task)->t = 0; pa_psw(task)->h = 0; pa_psw(task)->l = 0; }
SYSCALL_DEFINE2(smunch,int,pid,unsigned long,bit_pattern) { unsigned long flags; struct task_struct *task; rcu_read_lock(); task = pid_task(find_vpid(pid),PIDTYPE_PID); rcu_read_unlock(); if(!task) return -1; // Process not present if(!lock_task_sighand(task,&flags)) { //Process refuses to give the lock. Either dead/dying unlock_task_sighand(task,&flags); return -1; } if(!thread_group_empty(task)) { printk(KERN_ALERT "\nMULTI-Threaded Process, Exiting without processing"); ret=-1; goto return_path; } printk(KERN_ALERT "\nExit State:%XH,State=%XH\n",task->exit_state,task->state); //Info to user if(task->state & TASK_UNINTERRUPTIBLE) printk(KERN_ALERT "\nProcess is in Uniterruptible Wait-DeepSleep!!"); // Info to User if(bit_pattern & (1UL<<(SIGKILL-1)) && (task->exit_state & EXIT_ZOMBIE)) { printk(KERN_ALERT "\nSIGKILL present while Process is Zombie, releasing task!!"); unlock_task_sighand(task,&flags); release_task(task); // detach_pid is called from release_task() return 0; } /* If !SIGKILL || (ordinary process) || DeepSleep, sending all signals. It is Users responsility to note that signals will get handled from 1-64 order*/ printk(KERN_ALERT "!SIGKILL || (ordinary process) || DeepSleep, sending all signals!"); task->signal->shared_pending.signal.sig[0] = bit_pattern; set_tsk_thread_flag(task,TIF_SIGPENDING); signal_wake_up(task,1); ret=0; return_path: unlock_task_sighand(task,&flags); return ret; }
int sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; int ret = -EPERM; lock_kernel(); if (request == PTRACE_TRACEME) { /* are we already being traced? */ if (current->ptrace & PT_PTRACED) goto out; ret = security_ptrace(current->parent, current); if (ret) goto out; /* set the ptrace bit in the process flags. */ current->ptrace |= PT_PTRACED; ret = 0; goto out; } ret = -ESRCH; read_lock(&tasklist_lock); child = find_task_by_pid(pid); if (child) get_task_struct(child); read_unlock(&tasklist_lock); if (!child) goto out; ret = -EPERM; if (pid == 1) /* you may not mess with init */ goto out_tsk; if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out_tsk; } ret = ptrace_check_attach(child, request == PTRACE_KILL); if (ret < 0) goto out_tsk; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { unsigned long tmp; int copied; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) break; ret = put_user(tmp,(unsigned long __user *) data); break; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long index; unsigned long tmp; ret = -EIO; /* convert to index and check */ index = (unsigned long) addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) break; if (index < PT_FPR0) { tmp = get_reg(child, (int)index); } else { flush_fp_to_thread(child); tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; } ret = put_user(tmp,(unsigned long __user *) data); break; } /* If I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; ret = -EIO; break; /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: { unsigned long index; ret = -EIO; /* convert to index and check */ index = (unsigned long) addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) break; if (index == PT_ORIG_R3) break; if (index < PT_FPR0) { ret = put_reg(child, index, data); } else { flush_fp_to_thread(child); ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; ret = 0; } break; } case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; if ((unsigned long) data > _NSIG) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); ret = 0; break; } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: { ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; /* make sure the single step bit is not set. */ clear_single_step(child); wake_up_process(child); break; } case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; if ((unsigned long) data > _NSIG) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_single_step(child); child->exit_code = data; /* give it a chance to run. */ wake_up_process(child); ret = 0; break; } case PTRACE_DETACH: ret = ptrace_detach(child, data); break; case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; for (i = 0; i < 32; i++) { ret = put_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; for (i = 0; i < 32; i++) { ret = get_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; flush_fp_to_thread(child); for (i = 0; i < 32; i++) { ret = put_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ int i; unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; unsigned long __user *tmp = (unsigned long __user *)addr; flush_fp_to_thread(child); for (i = 0; i < 32; i++) { ret = get_user(*reg, tmp); if (ret) break; reg++; tmp++; } break; } default: ret = ptrace_request(child, request, addr, data); break; } out_tsk: put_task_struct(child); out: unlock_kernel(); return ret; }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { unsigned long tmp; ret = read_long(child, addr, &tmp); if (ret < 0) break ; ret = put_user(tmp, (unsigned long *) data); break ; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp = 0; if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { ret = -EIO; break ; } ret = 0; /* Default return condition */ addr = addr >> 2; /* temporary hack. */ if (addr < H8300_REGS_NO) tmp = h8300_get_reg(child, addr); else { switch(addr) { case 49: tmp = child->mm->start_code; break ; case 50: tmp = child->mm->start_data; break ; case 51: tmp = child->mm->end_code; break ; case 52: tmp = child->mm->end_data; break ; default: ret = -EIO; } } if (!ret) ret = put_user(tmp,(unsigned long *) data); break ; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKETEXT: /* write the word at location addr. */ case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; ret = -EIO; break; case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ if ((addr & 3) || addr < 0 || addr >= sizeof(struct user)) { ret = -EIO; break ; } addr = addr >> 2; /* temporary hack. */ if (addr == PT_ORIG_ER0) { ret = -EIO; break ; } if (addr < H8300_REGS_NO) { ret = h8300_put_reg(child, addr, data); break ; } ret = -EIO; break ; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: { /* restart after signal. */ ret = -EIO; if ((unsigned long) data >= _NSIG) break ; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; wake_up_process(child); /* make sure the single step bit is not set. */ h8300_disable_trace(child); ret = 0; } /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: { ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; h8300_disable_trace(child); wake_up_process(child); break; } case PTRACE_SINGLESTEP: { /* set the trap flag. */ ret = -EIO; if ((unsigned long) data > _NSIG) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; h8300_enable_trace(child); wake_up_process(child); ret = 0; break; } case PTRACE_DETACH: /* detach a process that was attached. */ ret = ptrace_detach(child, data); break; case PTRACE_GETREGS: { /* Get all gp regs from the child. */ int i; unsigned long tmp; for (i = 0; i < H8300_REGS_NO; i++) { tmp = h8300_get_reg(child, i); if (put_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } data += sizeof(long); } ret = 0; break; } case PTRACE_SETREGS: { /* Set all gp regs in the child. */ int i; unsigned long tmp; for (i = 0; i < H8300_REGS_NO; i++) { if (get_user(tmp, (unsigned long *) data)) { ret = -EFAULT; break; } h8300_put_reg(child, i, tmp); data += sizeof(long); } ret = 0; break; } default: ret = -EIO; break; } return ret; }
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long pid = alloc_pidmap(); if (pid < 0) return -EAGAIN; if (unlikely(current->ptrace)) { trace = fork_traceflag (clone_flags); if (trace) clone_flags |= CLONE_PTRACE; } p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); } if (!(clone_flags & CLONE_STOPPED)) wake_up_new_task(p, clone_flags); else p->state = TASK_STOPPED; if (unlikely (trace)) { current->ptrace_message = pid; ptrace_notify ((trace << 8) | SIGTRAP); } if (clone_flags & CLONE_VFORK) { wait_for_completion(&vfork); if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); } } else { free_pidmap(pid); pid = PTR_ERR(p); } return pid; }
int thread_function(void *data) { unsigned int readPos = 0, min_pos, i, j; unsigned long min_rss, tmp_rss; struct task_struct *g, *p; struct task_struct *tasks_ptr[TASKS_PTR_SIZE]; // unsigned long *sys_call_table = (unsigned long*)(0xc07992b0); // sys_tkill = (sys_call_table[__NR_tkill]); do_each_thread(g, p) { struct mm_struct *mm; if (!thread_group_leader(p)) continue; task_lock(p); mm = p->mm; if (mm && (p->real_parent->pid != 2) ) { /* * add only has mm_struct, not kernel thread */ tasks_ptr[readPos++] = p; printk(KERN_INFO "(Origin) PID:[%-5d]| Name:%-20s| VM:%-8lu| RSS:%-8lu| OOM_adj: %-3d\n", p->pid, p->comm, mm->total_vm, get_mm_rss(mm), p->signal->oom_adj); } task_unlock(p); } while_each_thread(g, p); /* * Sort the threads using seleciton sort */ for (i = 0; i < readPos; ++i) { min_rss = get_mm_rss(tasks_ptr[i]->mm); p = tasks_ptr[i]; min_pos = i; for ( j = i+1; j < readPos; ++j ) { tmp_rss = get_mm_rss(tasks_ptr[j]->mm); if ( tmp_rss < min_rss ) { min_rss = tmp_rss; min_pos = j; } } p = tasks_ptr[i]; tasks_ptr[i] = tasks_ptr[min_pos]; tasks_ptr[min_pos] = p; } for (i = 0; i < readPos; ++i) { printk(KERN_INFO "(Sorted) PID:[%-5d]| Name:%-20s| VM:%-8lu| RSS:%-8lu| OOM_adj: %-3d\n", tasks_ptr[i]->pid, tasks_ptr[i]->comm, tasks_ptr[i]->mm->total_vm, get_mm_rss(tasks_ptr[i]->mm), tasks_ptr[i]->signal->oom_adj); } printk(KERN_INFO "Kill PID[%-5d], Name:%-20s, RSS:%-8lu", tasks_ptr[readPos-1]->pid, tasks_ptr[readPos-1]->comm, get_mm_rss(tasks_ptr[readPos-1]->mm)); p = tasks_ptr[readPos-1]; p->rt.time_slice = HZ; set_tsk_thread_flag(p, TIF_MEMDIE); force_sig(SIGKILL, p); return 0; }
/* * Note that this implementation of ptrace behaves differently from vanilla * ptrace. Contrary to what the man page says, in the PTRACE_PEEKTEXT, * PTRACE_PEEKDATA, and PTRACE_PEEKUSER requests the data variable is not * ignored. Instead, the data variable is expected to point at a location * (in user space) where the result of the ptrace call is written (instead of * being returned). */ long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { /* Read word at location address. */ case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: { unsigned long tmp; int copied; copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); ret = -EIO; if (copied != sizeof(tmp)) break; ret = put_user(tmp,datap); break; } /* Read the word at location address in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) break; tmp = get_reg(child, addr >> 2); ret = put_user(tmp, datap); break; } /* Write the word at location address. */ case PTRACE_POKETEXT: case PTRACE_POKEDATA: ret = 0; if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) break; ret = -EIO; break; /* Write the word at location address in the USER area. */ case PTRACE_POKEUSR: ret = -EIO; if ((addr & 3) || addr < 0 || addr > PT_MAX << 2) break; addr >>= 2; if (addr == PT_DCCR) { /* don't allow the tracing process to change stuff like * interrupt enable, kernel/user bit, dma enables etc. */ data &= DCCR_MASK; data |= get_reg(child, PT_DCCR) & ~DCCR_MASK; } if (put_reg(child, addr, data)) break; ret = 0; break; case PTRACE_SYSCALL: case PTRACE_CONT: ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } else { clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; /* TODO: make sure any pending breakpoint is killed */ wake_up_process(child); ret = 0; break; /* Make the child exit by sending it a sigkill. */ case PTRACE_KILL: ret = 0; if (child->exit_state == EXIT_ZOMBIE) break; child->exit_code = SIGKILL; /* TODO: make sure any pending breakpoint is killed */ wake_up_process(child); break; /* Set the trap flag. */ case PTRACE_SINGLESTEP: ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); /* TODO: set some clever breakpoint mechanism... */ child->exit_code = data; wake_up_process(child); ret = 0; break; case PTRACE_DETACH: ret = ptrace_detach(child, data); break; /* Get all GP registers from the child. */ case PTRACE_GETREGS: { int i; unsigned long tmp; for (i = 0; i <= PT_MAX; i++) { tmp = get_reg(child, i); if (put_user(tmp, datap)) { ret = -EFAULT; goto out_tsk; } data += sizeof(long); } ret = 0; break; } /* Set all GP registers in the child. */ case PTRACE_SETREGS: { int i; unsigned long tmp; for (i = 0; i <= PT_MAX; i++) { if (get_user(tmp, datap)) { ret = -EFAULT; goto out_tsk; } if (i == PT_DCCR) { tmp &= DCCR_MASK; tmp |= get_reg(child, PT_DCCR) & ~DCCR_MASK; } put_reg(child, i, tmp); data += sizeof(long); } ret = 0; break; } default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace) { int retval; struct task_struct *p; int cgroup_callbacks_done = 0; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); /* * Siblings of global init remain as zombies on exit since they are * not reaped by their parent (swapper). To solve this and to avoid * multi-rooted process trees, prevent global and container-inits * from creating siblings. */ if ((clone_flags & CLONE_PARENT) && current->signal->flags & SIGNAL_UNKILLABLE) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; ftrace_graph_init_task(p); rt_mutex_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->real_cred->user != INIT_USER) goto bad_fork_free; } retval = copy_creds(p, clone_flags); if (retval < 0) goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); INIT_RCU_HEAD(&p->rcu); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; p->default_timer_slack_ns = current->timer_slack_ns; task_io_accounting_init(&p->ioac); acct_clear_integrals(p); posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup_cgroup; } mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW p->hardirqs_enabled = 1; #else p->hardirqs_enabled = 0; #endif p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; p->hardirq_disable_event = 0; p->softirqs_enabled = 1; p->softirq_enable_ip = _THIS_IP_; p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; p->hardirq_context = 0; p->softirq_context = 0; #endif #ifdef CONFIG_LOCKDEP p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; #endif #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif p->bts = NULL; p->stack_start = stack_start; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); retval = perf_event_init_task(p); if (retval) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_namespaces(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; if (pid != &init_struct_pid) { retval = -ENOMEM; pid = alloc_pid(p->nsproxy->pid_ns); if (!pid) goto bad_fork_cleanup_io; if (clone_flags & CLONE_NEWPID) { retval = pid_ns_prepare_proc(p->nsproxy->pid_ns); if (retval < 0) goto bad_fork_free_pid; } } p->pid = pid_nr(pid); p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if (current->nsproxy != p->nsproxy) { retval = ns_cgroup_clone(p, pid); if (retval) goto bad_fork_free_pid; } p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT p->compat_robust_list = NULL; #endif INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif /* * sigaltstack should be cleared when sharing the same VM */ if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) p->sas_ss_sp = p->sas_ss_size = 0; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif clear_all_latency_tracing(p); /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible * on the tasklist. */ cgroup_fork_callbacks(p); cgroup_callbacks_done = 1; /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so its cpus_allowed mask will * not be changed, nor will its assigned CPU. * * The cpus_allowed mask of the parent may have changed after it was * copied first time - so re-copy it here, then check the child's CPU * to ensure it is on a valid CPU (and if not, just force it back to * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); /* * The state of the parent's TIF_KTRACE flag may have changed * since it was copied in dup_task_struct() so we re-copy it here. */ if (test_thread_flag(TIF_KERNEL_TRACE)) set_tsk_thread_flag(p, TIF_KERNEL_TRACE); else clear_tsk_thread_flag(p, TIF_KERNEL_TRACE); /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; } spin_lock(¤t->sighand->siglock); /* * Process group and session signals need to be delivered to just the * parent before the fork or both the parent and the child after the * fork. Restart if a signal comes in before we add the new process to * it's process group. * A fatal signal pending means that current will exit, so the new * thread can't slip out of an OOM kill (or normal SIGKILL). */ recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->sigcnt); atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); } if (likely(p->pid)) { list_add_tail(&p->sibling, &p->real_parent->children); tracehook_finish_clone(p, clone_flags, trace); if (thread_group_leader(p)) { if (clone_flags & CLONE_NEWPID) p->nsproxy->pid_ns->child_reaper = p; p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } attach_pid(p, PIDTYPE_PID, pid); nr_threads++; } total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); perf_event_fork(p); return p; bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: if (!(clone_flags & CLONE_THREAD)) free_signal_struct(p->signal); bad_fork_cleanup_sighand: __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: #endif cgroup_exit(p, cgroup_callbacks_done); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: free_task(p); fork_out: return ERR_PTR(retval); }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int selected_tasksize = 0; int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); if (selected) compact_nodes(false); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; #ifdef ENHANCED_LMK_ROUTINE struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE static DEFINE_RATELIMIT_STATE(lmk_rs, DEFAULT_RATELIMIT_INTERVAL, 0); #else static DEFINE_RATELIMIT_STATE(lmk_rs, 6*DEFAULT_RATELIMIT_INTERVAL, 0); #endif #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef ENHANCED_LMK_ROUTINE int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; int selected_oom_score_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif int array_size = ARRAY_SIZE(lowmem_adj); #if (!defined(CONFIG_MACH_JF) \ && !defined(CONFIG_SEC_PRODUCT_8960)\ ) unsigned long nr_to_scan = sc->nr_to_scan; #endif #ifndef CONFIG_CMA int other_free = global_page_state(NR_FREE_PAGES); #else int other_free = global_page_state(NR_FREE_PAGES) - global_page_state(NR_FREE_CMA_PAGES); #endif int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); #ifdef CONFIG_ZRAM_FOR_ANDROID other_file -= total_swapcache_pages; #endif /* CONFIG_ZRAM_FOR_ANDROID */ if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 1); #endif /* CONFIG_ZRAM_FOR_ANDROID */ read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef ENHANCED_LMK_ROUTINE int is_exist_oom_task = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); read_unlock(&tasklist_lock); #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 0); #endif /* CONFIG_ZRAM_FOR_ANDROID */ return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; #ifdef ENHANCED_LMK_ROUTINE if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) all_selected_oom++; if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "select %d (%s), adj %d, \ size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected[i]) { lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d, free memory = %d, reclaimable memory = %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i], other_free, other_file); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected[i], 0); set_tsk_thread_flag(selected[i], TIF_MEMDIE); rem -= selected_tasksize[i]; #ifdef LMK_COUNT_READ lmk_count++; #endif } } #else if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; #ifdef LMK_COUNT_READ lmk_count++; #endif } #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO if (__ratelimit(&lmk_rs)) { lowmem_print(1, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE show_mem(SHOW_MEM_FILTER_NODES); dump_tasks_info(); #endif } #endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 0); #endif /* CONFIG_ZRAM_FOR_ANDROID */ return rem; }
static int android_oom_handler(struct notifier_block *nb, unsigned long val, void *data) { struct task_struct *tsk; #ifdef MULTIPLE_OOM_KILLER struct task_struct *selected[OOM_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef MULTIPLE_OOM_KILLER int selected_tasksize[OOM_DEPTH] = {0,}; int selected_oom_score_adj[OOM_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL/5, 1); #endif unsigned long *freed = data; #if defined(CONFIG_CMA_PAGE_COUNTING) unsigned long nr_cma_free; unsigned long nr_cma_inactive_file; unsigned long nr_cma_active_file; int other_free; int other_file; nr_cma_free = global_page_state(NR_FREE_CMA_PAGES); other_free = global_page_state(NR_FREE_PAGES) - nr_cma_free; nr_cma_inactive_file = global_page_state(NR_CMA_INACTIVE_FILE); nr_cma_active_file = global_page_state(NR_CMA_ACTIVE_FILE); other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM) - total_swapcache_pages - nr_cma_inactive_file - nr_cma_active_file; #endif /* show status */ pr_warning("%s invoked Android-oom-killer: " "oom_adj=%d, oom_score_adj=%d\n", current->comm, current->signal->oom_adj, current->signal->oom_score_adj); #ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO if (__ratelimit(&oom_rs)) { dump_stack(); show_mem(SHOW_MEM_FILTER_NODES); dump_tasks_info(); } #endif min_score_adj = 0; #ifdef MULTIPLE_OOM_KILLER for (i = 0; i < OOM_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef MULTIPLE_OOM_KILLER int is_exist_oom_task = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; lowmem_print(2, "oom: ------ %d (%s), adj %d, size %d\n", p->pid, p->comm, oom_score_adj, tasksize); #ifdef MULTIPLE_OOM_KILLER if (all_selected_oom < OOM_DEPTH) { for (i = 0; i < OOM_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < OOM_DEPTH) all_selected_oom++; if (all_selected_oom == OOM_DEPTH) { for (i = 0; i < OOM_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "oom: max_selected_oom_idx(%d) select %d (%s), adj %d, \ size %d, to kill\n", max_selected_oom_idx, p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "oom: select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef MULTIPLE_OOM_KILLER for (i = 0; i < OOM_DEPTH; i++) { if (selected[i]) { #if defined(CONFIG_CMA_PAGE_COUNTING) lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, " "size %d ofree %d ofile %d " "cma_free %lu cma_i_file %lu cma_a_file %lu\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i], other_free, other_file, nr_cma_free, nr_cma_inactive_file, nr_cma_active_file); #else lowmem_print(1, "oom: send sigkill to %d (%s), adj %d,\ size %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i]); #endif send_sig(SIGKILL, selected[i], 0); rem -= selected_tasksize[i]; *freed += (unsigned long)selected_tasksize[i]; #ifdef OOM_COUNT_READ oom_count++; #endif } } #else if (selected) { lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; *freed += (unsigned long)selected_tasksize; #ifdef OOM_COUNT_READ oom_count++; #endif } #endif read_unlock(&tasklist_lock); lowmem_print(2, "oom: get memory %lu", *freed); return rem; }
static int android_oom_handler(struct notifier_block *nb, unsigned long val, void *data) { struct task_struct *tsk; #ifdef MULTIPLE_OOM_KILLER struct task_struct *selected[OOM_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef MULTIPLE_OOM_KILLER int selected_tasksize[OOM_DEPTH] = {0,}; int selected_oom_score_adj[OOM_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL/5, 1); unsigned long *freed = data; /* show status */ pr_warning("%s invoked Android-oom-killer: " "oom_adj=%d, oom_score_adj=%d\n", current->comm, current->signal->oom_adj, current->signal->oom_score_adj); dump_stack(); show_mem(SHOW_MEM_FILTER_NODES); if (__ratelimit(&oom_rs)) dump_tasks_info(); min_score_adj = 0; #ifdef MULTIPLE_OOM_KILLER for (i = 0; i < OOM_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 1); #endif read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef MULTIPLE_OOM_KILLER int is_exist_oom_task = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; lowmem_print(2, "oom: ------ %d (%s), adj %d, size %d\n", p->pid, p->comm, oom_score_adj, tasksize); #ifdef MULTIPLE_OOM_KILLER if (all_selected_oom < OOM_DEPTH) { for (i = 0; i < OOM_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < OOM_DEPTH) all_selected_oom++; if (all_selected_oom == OOM_DEPTH) { for (i = 0; i < OOM_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "oom: max_selected_oom_idx(%d) select %d (%s), adj %d, \ size %d, to kill\n", max_selected_oom_idx, p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "oom: select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef MULTIPLE_OOM_KILLER for (i = 0; i < OOM_DEPTH; i++) { if (selected[i]) { lowmem_print(1, "oom: send sigkill to %d (%s), adj %d,\ size %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i]); send_sig(SIGKILL, selected[i], 0); rem -= selected_tasksize[i]; *freed += (unsigned long)selected_tasksize[i]; #ifdef OOM_COUNT_READ oom_count++; #endif } } #else if (selected) { lowmem_print(1, "oom: send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; *freed += (unsigned long)selected_tasksize; #ifdef OOM_COUNT_READ oom_count++; #endif } #endif read_unlock(&tasklist_lock); #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 0); #endif lowmem_print(2, "oom: get memory %lu", *freed); return rem; }
/* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore * it is possible to get a watchpoint trap here from inside the kernel. * However, the code in ./ptrace.c has ensured that the user can * only set watchpoints on userspace addresses. Therefore the in-kernel * watchpoint trap can only occur in code which is reading/writing * from user space. Such code must not hold kernel locks (since it * can equally take a page fault), therefore it is safe to call * force_sig_info even though that claims and releases locks. * * Code in ./signal.c ensures that the debug control register * is restored before we deliver any signal, and therefore that * user code runs with the correct debug control register even though * we clear it here. * * Being careful here means that we don't have to be as careful in a * lot of more complicated places (task switching can be a bit lazy * about restoring all the debug state, and ptrace doesn't have to * find every occurrence of the TF bit that could be saved away even * by user code) * * May run on IST stack. */ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; unsigned long condition; int si_code; get_debugreg(condition, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg7) goto clear_dr7; } #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) goto debug_vm86; #endif /* Save debug status register where ptrace can see it */ tsk->thread.debugreg6 = condition; /* * Single-stepping through TF: make sure we ignore any events in * kernel space (but re-enable TF when returning to user mode). */ if (condition & DR_STEP) { if (!user_mode(regs)) goto clear_TF_reenable; } si_code = get_si_code(condition); /* Ok, finally something we can handle */ send_sigtrap(tsk, regs, error_code, si_code); /* * Disable additional traps. They'll be re-enabled when * the signal is delivered. */ clear_dr7: set_debugreg(0, 7); preempt_conditional_cli(regs); return; #ifdef CONFIG_X86_32 debug_vm86: /* reenable preemption: handle_vm86_trap() might sleep */ dec_preempt_count(); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); conditional_cli(regs); return; #endif clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); regs->flags &= ~X86_EFLAGS_TF; preempt_conditional_cli(regs); return; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; short min_score_adj = OOM_SCORE_ADJ_MAX + 1; int minfree = 0; int selected_tasksize = 0; short selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; /* For request of unmovable pages, take no account of free CMA pages*/ if(IS_ENABLED(CONFIG_CMA) && (allocflags_to_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE)) other_free -= global_page_state(NR_FREE_CMA_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { minfree = lowmem_minfree[i]; if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; short oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { task_unlock(p); rcu_read_unlock(); return 0; } oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \ " to free %ldkB on behalf of '%s' (%d) because\n" \ " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \ " Free memory is %ldkB above reserved\n", selected->comm, selected->pid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, other_file * (long)(PAGE_SIZE / 1024), minfree * (long)(PAGE_SIZE / 1024), min_score_adj, other_free * (long)(PAGE_SIZE / 1024)); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; }
long arch_ptrace(struct task_struct *child, long request, long addr, long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKDATA: pr_debug("ptrace: PEEKDATA\n"); /* fall through */ case PTRACE_PEEKTEXT: /* read word at location addr. */ { unsigned long tmp = 0; int copied; ret = -EIO; pr_debug("ptrace: PEEKTEXT at addr 0x%08lx + %ld\n", addr, sizeof(data)); if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) break; pr_debug("ptrace: user address is valid\n"); if (L1_CODE_LENGTH != 0 && addr >= get_l1_code_start() && addr + sizeof(tmp) <= get_l1_code_start() + L1_CODE_LENGTH) { safe_dma_memcpy (&tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else if (L1_DATA_A_LENGTH != 0 && addr >= L1_DATA_A_START && addr + sizeof(tmp) <= L1_DATA_A_START + L1_DATA_A_LENGTH) { memcpy(&tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else if (L1_DATA_B_LENGTH != 0 && addr >= L1_DATA_B_START && addr + sizeof(tmp) <= L1_DATA_B_START + L1_DATA_B_LENGTH) { memcpy(&tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else if (addr >= FIXED_CODE_START && addr + sizeof(tmp) <= FIXED_CODE_END) { copy_from_user_page(0, 0, 0, &tmp, (const void *)(addr), sizeof(tmp)); copied = sizeof(tmp); } else copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); pr_debug("ptrace: copied size %d [0x%08lx]\n", copied, tmp); if (copied != sizeof(tmp)) break; ret = put_user(tmp, datap); break; } /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; tmp = 0; if ((addr & 3) || (addr > (sizeof(struct pt_regs) + 16))) { printk(KERN_WARNING "ptrace error : PEEKUSR : temporarily returning " "0 - %x sizeof(pt_regs) is %lx\n", (int)addr, sizeof(struct pt_regs)); break; } if (addr == sizeof(struct pt_regs)) { /* PT_TEXT_ADDR */ tmp = child->mm->start_code + TEXT_OFFSET; } else if (addr == (sizeof(struct pt_regs) + 4)) { /* PT_TEXT_END_ADDR */ tmp = child->mm->end_code; } else if (addr == (sizeof(struct pt_regs) + 8)) { /* PT_DATA_ADDR */ tmp = child->mm->start_data; #ifdef CONFIG_BINFMT_ELF_FDPIC } else if (addr == (sizeof(struct pt_regs) + 12)) { tmp = child->mm->context.exec_fdpic_loadmap; } else if (addr == (sizeof(struct pt_regs) + 16)) { tmp = child->mm->context.interp_fdpic_loadmap; #endif } else { tmp = get_reg(child, addr); } ret = put_user(tmp, datap); break; } /* when I and D space are separate, this will have to be fixed. */ case PTRACE_POKEDATA: pr_debug("ptrace: PTRACE_PEEKDATA\n"); /* fall through */ case PTRACE_POKETEXT: /* write the word at location addr. */ { int copied; ret = -EIO; pr_debug("ptrace: POKETEXT at addr 0x%08lx + %ld bytes %lx\n", addr, sizeof(data), data); if (is_user_addr_valid(child, addr, sizeof(data)) < 0) break; pr_debug("ptrace: user address is valid\n"); if (L1_CODE_LENGTH != 0 && addr >= get_l1_code_start() && addr + sizeof(data) <= get_l1_code_start() + L1_CODE_LENGTH) { safe_dma_memcpy ((void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else if (L1_DATA_A_LENGTH != 0 && addr >= L1_DATA_A_START && addr + sizeof(data) <= L1_DATA_A_START + L1_DATA_A_LENGTH) { memcpy((void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else if (L1_DATA_B_LENGTH != 0 && addr >= L1_DATA_B_START && addr + sizeof(data) <= L1_DATA_B_START + L1_DATA_B_LENGTH) { memcpy((void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else if (addr >= FIXED_CODE_START && addr + sizeof(data) <= FIXED_CODE_END) { copy_to_user_page(0, 0, 0, (void *)(addr), &data, sizeof(data)); copied = sizeof(data); } else copied = access_process_vm(child, addr, &data, sizeof(data), 1); pr_debug("ptrace: copied size %d\n", copied); if (copied != sizeof(data)) break; ret = 0; break; } case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & 3) || (addr > (sizeof(struct pt_regs) + 16))) { printk(KERN_WARNING "ptrace error : POKEUSR: temporarily returning 0\n"); break; } if (addr >= (sizeof(struct pt_regs))) { ret = 0; break; } if (addr == PT_SYSCFG) { data &= SYSCFG_MASK; data |= get_reg(child, PT_SYSCFG); } ret = put_reg(child, addr, data); break; case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ pr_debug("ptrace: syscall/cont\n"); ret = -EIO; if (!valid_signal(data)) break; if (request == PTRACE_SYSCALL) set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->exit_code = data; ptrace_disable(child); pr_debug("ptrace: before wake_up_process\n"); wake_up_process(child); ret = 0; break; /* * make the child exit. Best I can do is send it a sigkill. * perhaps it should be put in the status that it wants to * exit. */ case PTRACE_KILL: ret = 0; if (child->exit_state == EXIT_ZOMBIE) /* already dead */ break; child->exit_code = SIGKILL; ptrace_disable(child); wake_up_process(child); break; case PTRACE_SINGLESTEP: /* set the trap flag. */ pr_debug("ptrace: single step\n"); ret = -EIO; if (!valid_signal(data)) break; clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); ptrace_enable(child); child->exit_code = data; wake_up_process(child); ret = 0; break; case PTRACE_GETREGS: /* Get all gp regs from the child. */ ret = ptrace_getregs(child, datap); break; case PTRACE_SETREGS: printk(KERN_WARNING "ptrace: SETREGS: **** NOT IMPLEMENTED ***\n"); /* Set all gp regs in the child. */ ret = 0; break; default: ret = ptrace_request(child, request, addr, data); break; } return ret; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int selected_tasksize = 0; int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (!spin_trylock(&lowmem_shrink_lock)){ lowmem_print(4, "lowmem_shrink lock faild\n"); return -1; } /* For JB: FOREGROUND is adj0 (Default lowmem_adj of AMS is 0, 1, 2, 4, 9, 15) */ /* For ICS: HOME is adj6 (Default lowmem_adj of AMS is 0, 1, 2, 4, 9, 15) */ if (other_free <= lowmem_minfree[1]) { /* Notify Kernel that we should consider Android threshold */ lmk_adjz_minfree = lowmem_minfree[0]; } else { lmk_adjz_minfree = 0; } if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); /* * disable indication if low memory */ #if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) if (in_lowmem) { in_lowmem = 0; lowmem_indicator = 1; DAL_LowMemoryOff(); } #endif spin_unlock(&lowmem_shrink_lock); return rem; } selected_oom_score_adj = min_score_adj; // add debug log #ifdef CONFIG_MT_ENG_BUILD if (min_score_adj <= lowmem_debug_adj) { lowmem_print(1, "======low memory killer=====\n"); lowmem_print(1, "Free memory other_free: %d, other_file:%d pages\n", other_free, other_file); } #endif rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; if (test_tsk_thread_flag(p, TIF_MEMDIE) && time_before_eq(jiffies, lowmem_deathpending_timeout)) { lowmem_print(1, "lowmem_shrink return directly, due to %d (%s) is dying\n", p->pid, p->comm); task_unlock(p); rcu_read_unlock(); spin_unlock(&lowmem_shrink_lock); return 0; } /* We use oom_score_adj to represent oom_adj here although the later has been deprecated by kernel. */ /* This is because that JB AMS still uses oom_adj to stand for the importantance of activities. */ /* oom_score_adj = p->signal->oom_score_adj; - 2012.07.16 - */ oom_score_adj = p->signal->oom_adj; #ifdef CONFIG_MT_ENG_BUILD if (min_score_adj <= lowmem_debug_adj) lowmem_print(1, "Candidate %d (%s), adj %d, rss %lu, to kill\n", p->pid, p->comm, oom_score_adj, get_mm_rss(p->mm)); #endif if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending = selected; lowmem_deathpending_timeout = jiffies + HZ; #ifdef CONFIG_MT_ENG_BUILD if (min_score_adj <= lowmem_debug_adj) { lowmem_print(1, "low memory info:\n"); show_free_areas_minimum(); } #endif /* * when kill adj=0 process trigger kernel warning, only in MTK internal eng/non_LCA load */ #if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) && \ !defined (MTK_LCA_SUPPORT) && !defined (PARTIAL_BUILD) //mtk internal if (selected_oom_score_adj <= 0) { // can set 16 for test lowmem_print(1, "low memory trigger kernel warning\n"); aee_kernel_warning_api("LMK", 0, DB_OPT_DEFAULT|DB_OPT_DUMPSYS_ACTIVITY, "Framework low memory", "please contact AP/AF memory module owner\n"); } #endif /* * show an indication if low memory */ #if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) if (lowmem_indicator && selected_oom_score_adj <= 1) { lowmem_print(5, "low memory: raise aee warning\n"); in_lowmem = 1; lowmem_indicator = 0; DAL_LowMemoryOn(); //aee_kernel_warning(module_name, lowmem_warning); } #endif send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); spin_unlock(&lowmem_shrink_lock); return rem; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *p; struct task_struct *selected[MANAGED_PROCESS_TYPES] = {NULL}; int rem = 0; int tasksize; int i; int min_adj = OOM_ADJUST_MAX + 1; int minfree = 0; enum lowmem_process_type proc_type = KILLABLE_PROCESS; int selected_tasksize[MANAGED_PROCESS_TYPES] = {0}; int selected_oom_adj[MANAGED_PROCESS_TYPES]; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * */ if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_adj == OOM_ADJUST_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } /* Set the initial oom_adj for each managed process type */ for (proc_type = KILLABLE_PROCESS; proc_type < MANAGED_PROCESS_TYPES; proc_type++) selected_oom_adj[proc_type] = min_adj; read_lock(&tasklist_lock); for_each_process(p) { struct mm_struct *mm; struct signal_struct *sig; int oom_adj; task_lock(p); mm = p->mm; sig = p->signal; if (!mm || !sig) { task_unlock(p); continue; } oom_adj = sig->oom_adj; if (oom_adj < min_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(mm); task_unlock(p); if (tasksize <= 0) continue; /* Initially consider the process as killable */ proc_type = KILLABLE_PROCESS; #ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_DO_NOT_KILL_PROCESS /* Check if the process name is contained inside the process to be preserved lists */ if (is_in_donotkill_proc_list(p->comm)) { /* This user process must be preserved from killing */ proc_type = DO_NOT_KILL_PROCESS; lowmem_print(2, "The process '%s' is inside the donotkill_proc_names", p->comm); } else if (is_in_donotkill_sysproc_list(p->comm)) { /* This system process must be preserved from killing */ proc_type = DO_NOT_KILL_SYSTEM_PROCESS; lowmem_print(2, "The process '%s' is inside the donotkill_sysproc_names", p->comm); } #endif if (selected[proc_type]) { if (oom_adj < selected_oom_adj[proc_type]) continue; if (oom_adj == selected_oom_adj[proc_type] && tasksize <= selected_tasksize[proc_type]) continue; } selected[proc_type] = p; selected_tasksize[proc_type] = tasksize; selected_oom_adj[proc_type] = oom_adj; lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_adj, tasksize); } /* For each managed process type check if a process to be killed has been found: * - check first if a standard killable process has been found, if so kill it * - if there is no killable process, then check if a user process has been found, * if so kill it to prevent system slowdowns, hangs, etc. * - if there is no killable and user process, then check if a system process has been found, * if so kill it to prevent system slowdowns, hangs, etc. */ for (proc_type = KILLABLE_PROCESS; proc_type < MANAGED_PROCESS_TYPES; proc_type++) { if (selected[proc_type]) { lowmem_print(1, "Killing '%s' (%d), adj %d,\n" \ " to free %ldkB on behalf of '%s' (%d) because\n" \ " cache %ldkB is below limit %ldkB for oom_adj %d\n" \ " Free memory is %ldkB above reserved\n", selected[proc_type]->comm, selected[proc_type]->pid, selected_oom_adj[proc_type], selected_tasksize[proc_type] * (long)(PAGE_SIZE / 1024), current->comm, current->pid, other_file * (long)(PAGE_SIZE / 1024), minfree * (long)(PAGE_SIZE / 1024), min_adj, other_free * (long)(PAGE_SIZE / 1024)); lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected[proc_type], 0); set_tsk_thread_flag(selected[proc_type], TIF_MEMDIE); rem -= selected_tasksize[proc_type]; break; } #ifdef CONFIG_HUAWEI_FEATURE_LOW_MEMORY_KILLER_STUB sysLowKernel_write(selected); #endif } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); return rem; }
void user_enable_single_step(struct task_struct *child) { set_tsk_thread_flag(child, TIF_SINGLESTEP); }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int minfree = 0; int selected_tasksize = 0; int selected_oom_score_adj; int array_size = ARRAY_SIZE(lowmem_adj); int other_free; int other_file; unsigned long nr_to_scan = sc->nr_to_scan; if (nr_to_scan > 0) { if (mutex_lock_interruptible(&scan_mutex) < 0) return 0; } other_free = global_page_state(NR_FREE_PAGES); other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); tune_lmk_param(&other_free, &other_file, sc); if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { minfree = lowmem_minfree[i]; if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } } if (nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", nr_to_scan, sc->gfp_mask, rem); if (nr_to_scan > 0) mutex_unlock(&scan_mutex); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; /* if task no longer has any memory ignore it */ if (test_task_flag(tsk, TIF_MM_RELEASED)) continue; if (time_before_eq(jiffies, lowmem_deathpending_timeout)) { if (test_task_flag(tsk, TIF_MEMDIE)) { rcu_read_unlock(); /* give the system time to free up the memory */ msleep_interruptible(20); mutex_unlock(&scan_mutex); return 0; } } p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; lowmem_print(3, "select '%s' (%d), adj %hd, size %d, to kill\n", p->comm, p->pid, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \ " to free %ldkB on behalf of '%s' (%d) because\n" \ " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \ " Free memory is %ldkB above reserved.\n" \ " Free CMA is %ldkB\n" \ " Total reserve is %ldkB\n" \ " Total free pages is %ldkB\n" \ " Total file cache is %ldkB\n" \ " GFP mask is 0x%x\n", selected->comm, selected->pid, selected_oom_score_adj, selected_tasksize * (long)(PAGE_SIZE / 1024), current->comm, current->pid, other_file * (long)(PAGE_SIZE / 1024), minfree * (long)(PAGE_SIZE / 1024), min_score_adj, other_free * (long)(PAGE_SIZE / 1024), global_page_state(NR_FREE_CMA_PAGES) * (long)(PAGE_SIZE / 1024), totalreserve_pages * (long)(PAGE_SIZE / 1024), global_page_state(NR_FREE_PAGES) * (long)(PAGE_SIZE / 1024), global_page_state(NR_FILE_PAGES) * (long)(PAGE_SIZE / 1024), sc->gfp_mask); if (lowmem_debug_level >= 2 && selected_oom_score_adj == 0) { show_mem(SHOW_MEM_FILTER_NODES); dump_tasks(NULL, NULL); } lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; rcu_read_unlock(); /* give the system time to free up the memory */ msleep_interruptible(20); } else rcu_read_unlock(); lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", nr_to_scan, sc->gfp_mask, rem); mutex_unlock(&scan_mutex); return rem; }
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long nr; /* * Do some preliminary argument and permissions checking before we * actually start allocating stuff */ if (clone_flags & CLONE_NEWUSER) { if (clone_flags & CLONE_THREAD) return -EINVAL; /* hopefully this check will go away when userns support is * complete */ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) || !capable(CAP_SETGID)) return -EPERM; } /* * We hope to recycle these flags after 2.6.26 */ if (unlikely(clone_flags & CLONE_STOPPED)) { static int __read_mostly count = 100; if (count > 0 && printk_ratelimit()) { char comm[TASK_COMM_LEN]; count--; printk(KERN_INFO "fork(): process `%s' used deprecated " "clone flags 0x%lx\n", get_task_comm(comm, current), clone_flags & CLONE_STOPPED); } } /* * When called from kernel_thread, don't do user tracing stuff. */ if (likely(user_mode(regs))) trace = tracehook_prepare_clone(clone_flags); p = copy_process(clone_flags, stack_start, regs, stack_size, child_tidptr, NULL, trace); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ if (!IS_ERR(p)) { struct completion vfork; trace_sched_process_fork(current, p); nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to * use this to distinguish a fully live task from one that * hasn't gotten to tracehook_report_clone() yet. Now we * clear it and set the child going. */ p->flags &= ~PF_STARTING; if (unlikely(clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); __set_task_state(p, TASK_STOPPED); } else { wake_up_new_task(p, clone_flags); } tracehook_report_clone_complete(trace, regs, clone_flags, nr, p); if (clone_flags & CLONE_VFORK) { freezer_do_not_count(); wait_for_completion(&vfork); freezer_count(); tracehook_report_vfork_done(p, nr); } } else { nr = PTR_ERR(p); } return nr; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; struct task_struct *selected = NULL; int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; int selected_tasksize = 0; int selected_oom_score_adj; int selected_oom_adj = 0; int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES); int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM) - global_page_state(NR_MLOCK); int fork_boost = 0; size_t *min_array; if (lowmem_fork_boost && time_before_eq(jiffies, lowmem_fork_boost_timeout)) { for (i = 0; i < lowmem_minfree_size; i++) minfree_tmp[i] = lowmem_minfree[i] + lowmem_fork_boost_minfree[i]; min_array = minfree_tmp; } else min_array = lowmem_minfree; if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < min_array[i] && other_file < min_array[i]) { min_score_adj = lowmem_adj[i]; fork_boost = lowmem_fork_boost_minfree[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } selected_oom_score_adj = min_score_adj; rcu_read_lock(); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; if (tsk->flags & PF_KTHREAD) continue; if (time_before_eq(jiffies, lowmem_deathpending_timeout)) { if (test_task_flag(tsk, TIF_MEMDIE)) { rcu_read_unlock(); return 0; } } p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); task_unlock(p); if (tasksize <= 0) continue; if (selected) { if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; selected_oom_adj = p->signal->oom_adj; lowmem_print(2, "select %d (%s), oom_adj %d score_adj %d, size %d, to kill\n", p->pid, p->comm, selected_oom_adj, oom_score_adj, tasksize); } if (selected) { lowmem_print(1, "[%s] send sigkill to %d (%s), oom_adj %d, score_adj %d," " min_score_adj %d, size %dK, free %dK, file %dK, fork_boost %dK\n", current->comm, selected->pid, selected->comm, selected_oom_adj, selected_oom_score_adj, min_score_adj, selected_tasksize << 2, other_free << 2, other_file << 2, fork_boost << 2); lowmem_deathpending_timeout = jiffies + HZ; if (selected_oom_adj < 7) { show_meminfo(); dump_tasks(); } send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; } lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); rcu_read_unlock(); return rem; }
/* * This creates a new process as a copy of the old one, * but does not actually start it yet. * * It copies the registers, and all the appropriate * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ static task_t *copy_process(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr, int pid) { int retval; struct task_struct *p = NULL; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. */ if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) return ERR_PTR(-EINVAL); /* * Shared signal handlers imply shared VM. By way of the above, * thread groups also imply shared VM. Blocking this case allows * for various simplifications in other code. */ if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) return ERR_PTR(-EINVAL); retval = security_task_create(clone_flags); if (retval) goto fork_out; retval = -ENOMEM; p = dup_task_struct(current); if (!p) goto fork_out; retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && p->user != &root_user) goto bad_fork_free; } atomic_inc(&p->user->__count); atomic_inc(&p->user->processes); get_group_info(p->group_info); /* * If multiple threads are within copy_process(), then this check * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ if (nr_threads >= max_threads) goto bad_fork_cleanup_count; if (!try_module_get(p->thread_info->exec_domain->module)) goto bad_fork_cleanup_count; if (p->binfmt && !try_module_get(p->binfmt->module)) goto bad_fork_cleanup_put_domain; p->did_exec = 0; copy_flags(clone_flags, p); p->pid = pid; retval = -EFAULT; if (clone_flags & CLONE_PARENT_SETTID) if (put_user(p->pid, parent_tidptr)) goto bad_fork_cleanup; p->proc_dentry = NULL; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); spin_lock_init(&p->proc_lock); clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); p->it_real_value = 0; p->it_real_incr = 0; p->it_virt_value = cputime_zero; p->it_virt_incr = cputime_zero; p->it_prof_value = cputime_zero; p->it_prof_incr = cputime_zero; init_timer(&p->real_timer); p->real_timer.data = (unsigned long) p; p->utime = cputime_zero; p->stime = cputime_zero; p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ acct_clear_integrals(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); p->security = NULL; p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; #ifdef CONFIG_NUMA p->mempolicy = mpol_copy(p->mempolicy); if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; goto bad_fork_cleanup; } #endif p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; if ((retval = security_task_alloc(p))) goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; if ((retval = copy_files(clone_flags, p))) goto bad_fork_cleanup_semundo; if ((retval = copy_fs(clone_flags, p))) goto bad_fork_cleanup_files; if ((retval = copy_sighand(clone_flags, p))) goto bad_fork_cleanup_fs; if ((retval = copy_signal(clone_flags, p))) goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; if ((retval = copy_keys(clone_flags, p))) goto bad_fork_cleanup_mm; if ((retval = copy_namespace(clone_flags, p))) goto bad_fork_cleanup_keys; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_namespace; p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; /* * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. */ clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ p->parent_exec_id = p->self_exec_id; /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; p->exit_state = 0; /* Perform scheduler related setup */ sched_fork(p); /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. */ p->group_leader = p; INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); /* Need tasklist lock for parent etc handling! */ write_lock_irq(&tasklist_lock); /* * The task hasn't been attached yet, so cpus_allowed mask cannot * have changed. The cpus_allowed mask of the parent may have * changed after it was copied first time, and it may then move to * another CPU - so we re-copy it here and set the child's CPU to * the parent's CPU. This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; set_task_cpu(p, smp_processor_id()); /* * Check for pending SIGKILL! The new thread should not be allowed * to slip out of an OOM kill. (or normal SIGKILL.) */ if (sigismember(¤t->pending.signal, SIGKILL)) { write_unlock_irq(&tasklist_lock); retval = -EINTR; goto bad_fork_cleanup_namespace; } /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; else p->real_parent = current; p->parent = p->real_parent; if (clone_flags & CLONE_THREAD) { spin_lock(¤t->sighand->siglock); /* * Important: if an exit-all has been started then * do not create this new thread - the whole thread * group is supposed to exit anyway. */ if (current->signal->flags & SIGNAL_GROUP_EXIT) { spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -EAGAIN; goto bad_fork_cleanup_namespace; } p->group_leader = current->group_leader; if (current->signal->group_stop_count > 0) { /* * There is an all-stop in progress for the group. * We ourselves will stop as soon as we check signals. * Make the new thread part of that group stop too. */ current->signal->group_stop_count++; set_tsk_thread_flag(p, TIF_SIGPENDING); } spin_unlock(¤t->sighand->siglock); } SET_LINKS(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); attach_pid(p, PIDTYPE_PID, p->pid); attach_pid(p, PIDTYPE_TGID, p->tgid); if (thread_group_leader(p)) { attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); if (p->pid) __get_cpu_var(process_counts)++; } nr_threads++; total_forks++; write_unlock_irq(&tasklist_lock); retval = 0; fork_out: if (retval) return ERR_PTR(retval); return p; bad_fork_cleanup_namespace: exit_namespace(p); bad_fork_cleanup_keys: exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: exit_files(p); /* blocking */ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_free(p->mempolicy); #endif bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); bad_fork_cleanup_put_domain: module_put(p->thread_info->exec_domain->module); bad_fork_cleanup_count: put_group_info(p->group_info); atomic_dec(&p->user->processes); free_uid(p->user); bad_fork_free: free_task(p); goto fork_out; }
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; #ifdef ENHANCED_LMK_ROUTINE struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,}; #else struct task_struct *selected = NULL; #endif int rem = 0; int tasksize; int i; int min_score_adj = OOM_SCORE_ADJ_MAX + 1; #ifdef ENHANCED_LMK_ROUTINE int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,}; int selected_oom_score_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,}; int all_selected_oom = 0; int max_selected_oom_idx = 0; #else int selected_tasksize = 0; int selected_oom_score_adj; #endif #ifdef CONFIG_SAMP_HOTNESS int selected_hotness_adj = 0; #endif int array_size = ARRAY_SIZE(lowmem_adj); int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM); struct zone *zone; #if defined(CONFIG_ZRAM_FOR_ANDROID) || defined(CONFIG_ZSWAP) other_file -= total_swapcache_pages; #endif if (offlining) { /* Discount all free space in the section being offlined */ for_each_zone(zone) { if (zone_idx(zone) == ZONE_MOVABLE) { other_free -= zone_page_state(zone, NR_FREE_PAGES); lowmem_print(4, "lowmem_shrink discounted " "%lu pages in movable zone\n", zone_page_state(zone, NR_FREE_PAGES)); } } } /* * If we already have a death outstanding, then * bail out right away; indicating to vmscan * that we have nothing further to offer on * this pass. * * Note: Currently you need CONFIG_PROFILING * for this to work correctly. */ #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (lowmem_deathpending[i] && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; } #else if (lowmem_deathpending && time_before_eq(jiffies, lowmem_deathpending_timeout)) return 0; #endif if (lowmem_adj_size < array_size) array_size = lowmem_adj_size; if (lowmem_minfree_size < array_size) array_size = lowmem_minfree_size; for (i = 0; i < array_size; i++) { if (other_free < lowmem_minfree[i] && other_file < lowmem_minfree[i]) { min_score_adj = lowmem_adj[i]; break; } } if (sc->nr_to_scan > 0) lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n", sc->nr_to_scan, sc->gfp_mask, other_free, other_file, min_score_adj); rem = global_page_state(NR_ACTIVE_ANON) + global_page_state(NR_ACTIVE_FILE) + global_page_state(NR_INACTIVE_ANON) + global_page_state(NR_INACTIVE_FILE); if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) { lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); return rem; } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) selected_oom_score_adj[i] = min_score_adj; #else selected_oom_score_adj = min_score_adj; #endif #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 1); #endif read_lock(&tasklist_lock); for_each_process(tsk) { struct task_struct *p; int oom_score_adj; #ifdef ENHANCED_LMK_ROUTINE int is_exist_oom_task = 0; #endif #ifdef CONFIG_SAMP_HOTNESS int hotness_adj = 0; #endif if (tsk->flags & PF_KTHREAD) continue; p = find_lock_task_mm(tsk); if (!p) continue; oom_score_adj = p->signal->oom_score_adj; if (oom_score_adj < min_score_adj) { task_unlock(p); continue; } tasksize = get_mm_rss(p->mm); #ifdef CONFIG_SAMP_HOTNESS hotness_adj = p->signal->hotness_adj; #endif task_unlock(p); if (tasksize <= 0) continue; #ifdef ENHANCED_LMK_ROUTINE if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (!selected[i]) { is_exist_oom_task = 1; max_selected_oom_idx = i; break; } } } else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj || (selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj && selected_tasksize[max_selected_oom_idx] < tasksize)) { is_exist_oom_task = 1; } if (is_exist_oom_task) { selected[max_selected_oom_idx] = p; selected_tasksize[max_selected_oom_idx] = tasksize; selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj; if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) all_selected_oom++; if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) { for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx]) max_selected_oom_idx = i; else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] && selected_tasksize[i] < selected_tasksize[max_selected_oom_idx]) max_selected_oom_idx = i; } } lowmem_print(2, "select %d (%s), adj %d, \ size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); } #else if (selected) { #ifdef CONFIG_SAMP_HOTNESS if (min_score_adj <= lowmem_adj[4]) { #endif if (oom_score_adj < selected_oom_score_adj) continue; if (oom_score_adj == selected_oom_score_adj && tasksize <= selected_tasksize) continue; #ifdef CONFIG_SAMP_HOTNESS } else { if (hotness_adj > selected_hotness_adj) continue; if (hotness_adj == selected_hotness_adj && tasksize <= selected_tasksize) continue; } #endif } selected = p; selected_tasksize = tasksize; selected_oom_score_adj = oom_score_adj; #ifdef CONFIG_SAMP_HOTNESS selected_hotness_adj = hotness_adj; #endif lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n", p->pid, p->comm, oom_score_adj, tasksize); #endif } #ifdef ENHANCED_LMK_ROUTINE for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) { if (selected[i]) { #ifdef CONFIG_SAMP_HOTNESS lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d ,hotness %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i], selected_hotness_adj); #else lowmem_print(1, "send sigkill to %d (%s), adj %d,\ size %d\n", selected[i]->pid, selected[i]->comm, selected_oom_score_adj[i], selected_tasksize[i]); #endif lowmem_deathpending[i] = selected[i]; lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected[i], 0); rem -= selected_tasksize[i]; #ifdef LMK_COUNT_READ lmk_count++; #endif } } #else if (selected) { #ifdef CONFIG_SAMP_HOTNESS lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d ,hotness %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize,selected_hotness_adj); #else lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n", selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); #endif lowmem_deathpending_timeout = jiffies + HZ; send_sig(SIGKILL, selected, 0); set_tsk_thread_flag(selected, TIF_MEMDIE); rem -= selected_tasksize; #ifdef LMK_COUNT_READ lmk_count++; #endif } #endif lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n", sc->nr_to_scan, sc->gfp_mask, rem); read_unlock(&tasklist_lock); #ifdef CONFIG_ZRAM_FOR_ANDROID atomic_set(&s_reclaim.lmk_running, 0); #endif return rem; }