Beispiel #1
0
int copy_thread(unsigned long clone_flags, unsigned long sp,
	unsigned long unused,
	struct task_struct *p, struct pt_regs *regs)
{
	struct pt_regs *childregs;
	struct task_struct *tsk;
	int err;

	childregs = task_pt_regs(p);
	*childregs = *regs;
	childregs->ax = 0;
	childregs->sp = sp;

	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);

	p->thread.ip = (unsigned long) ret_from_fork;

	task_user_gs(p) = get_user_gs(regs);

	tsk = current;
	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
	}

	err = 0;

	
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
			(struct user_desc __user *)childregs->si, 0);

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}

	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
	p->thread.ds_ctx = NULL;

	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
	p->thread.debugctlmsr = 0;

	return err;
}
Beispiel #2
0
int copy_thread(unsigned long clone_flags, unsigned long sp,
	unsigned long unused,
	struct task_struct *p, struct pt_regs *regs)
{
	struct pt_regs *childregs;
	struct task_struct *tsk;
	int err;

	childregs = task_pt_regs(p);
	*childregs = *regs;
	childregs->ax = 0;
	childregs->sp = sp;

	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);

	p->thread.ip = (unsigned long) ret_from_fork;

	task_user_gs(p) = get_user_gs(regs);

	p->thread.io_bitmap_ptr = NULL;
	tsk = current;
	err = -ENOMEM;

	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));

	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
	}

	err = 0;

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
			(struct user_desc __user *)childregs->si, 0);

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}
/*
 * Enable single or block step.
 */
static void enable_step(struct task_struct *child, bool block)
{
	/*
	 * Make sure block stepping (BTF) is not enabled unless it should be.
	 * Note that we don't try to worry about any is_setting_trap_flag()
	 * instructions after the first when using block stepping.
	 * So no one should try to use debugger block stepping in a program
	 * that uses user-mode single stepping itself.
	 */
	if (enable_single_step(child) && block)
		set_task_blockstep(child, true);
	else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
		set_task_blockstep(child, false);
}
static int test_task_flag(struct task_struct *p, int flag)
{
	struct task_struct *t = p;

	rcu_read_lock();
	for_each_thread(p, t) {
		task_lock(t);
		if (test_tsk_thread_flag(t, flag)) {
			task_unlock(t);
			rcu_read_unlock();
			return 1;
		}
		task_unlock(t);
	}
Beispiel #5
0
/*
 * Replace the return address with the trampoline address.  Returns
 * the original return address.
 */
static
unsigned long arch_hijack_uret_addr(unsigned long trampoline_address,
                struct pt_regs *regs, struct uprobe_task *utask)
{
	unsigned long orig_ret_addr;
#ifdef CONFIG_COMPAT
	if (test_tsk_thread_flag(utask->tsk, TIF_31BIT))
		orig_ret_addr = regs->gprs[14]&0x7FFFFFFFUL;
	else
#endif
		orig_ret_addr = regs->gprs[14];
	regs->gprs[14] = trampoline_address;
	return orig_ret_addr;
}
static int test_task_flag(struct task_struct *p, int flag)
{
	struct task_struct *t = p;

	do {
		task_lock(t);
		if (test_tsk_thread_flag(t, flag)) {
			task_unlock(t);
			return 1;
		}
		task_unlock(t);
	} while_each_thread(p, t);

	return 0;
}
void user_disable_single_step(struct task_struct *child)
{
	/*
	 * Make sure block stepping (BTF) is disabled.
	 */
	if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
		set_task_blockstep(child, false);

	/* Always clear TIF_SINGLESTEP... */
	clear_tsk_thread_flag(child, TIF_SINGLESTEP);

	/* But touch TF only if it was set by us.. */
	if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF))
		task_pt_regs(child)->flags &= ~X86_EFLAGS_TF;
}
Beispiel #8
0
/**
 * freezing_slow_path - slow path for testing whether a task needs to be frozen
 * @p: task to be tested
 *
 * This function is called by freezing() if system_freezing_cnt isn't zero
 * and tests whether @p needs to enter and stay in frozen state.  Can be
 * called under any context.  The freezers are responsible for ensuring the
 * target tasks see the updated state.
 */
bool freezing_slow_path(struct task_struct *p)
{
	if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
		return false;

	if (test_tsk_thread_flag(p, TIF_MEMDIE))
		return false;

	if (pm_nosig_freezing || cgroup_freezing(p))
		return true;

	if (pm_freezing && !(p->flags & PF_KTHREAD))
		return true;

	return false;
}
RTDECL(bool) RTThreadPreemptIsPending(RTTHREAD hThread)
{
    Assert(hThread == NIL_RTTHREAD);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 5, 4)
    return !!test_tsk_thread_flag(current, TIF_NEED_RESCHED);

#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 4, 20)
    return !!need_resched();

#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2, 1, 110)
    return current->need_resched != 0;

#else
    return need_resched != 0;
#endif
}
Beispiel #10
0
int ptrace_set_watch_regs(struct task_struct *child,
			  struct pt_watch_regs __user *addr)
{
	int i;
	int watch_active = 0;
	unsigned long lt[NUM_WATCH_REGS];
	u16 ht[NUM_WATCH_REGS];

	if (!cpu_has_watch || boot_cpu_data.watch_reg_use_cnt == 0)
		return -EIO;
	if (!access_ok(VERIFY_READ, addr, sizeof(struct pt_watch_regs)))
		return -EIO;
	/* Check the values. */
	for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) {
		__get_user(lt[i], &addr->WATCH_STYLE.watchlo[i]);
#ifdef CONFIG_32BIT
		if (lt[i] & __UA_LIMIT)
			return -EINVAL;
#else
		if (test_tsk_thread_flag(child, TIF_32BIT_ADDR)) {
			if (lt[i] & 0xffffffff80000000UL)
				return -EINVAL;
		} else {
			if (lt[i] & __UA_LIMIT)
				return -EINVAL;
		}
#endif
		__get_user(ht[i], &addr->WATCH_STYLE.watchhi[i]);
		if (ht[i] & ~MIPS_WATCHHI_MASK)
			return -EINVAL;
	}
	/* Install them. */
	for (i = 0; i < boot_cpu_data.watch_reg_use_cnt; i++) {
		if (lt[i] & MIPS_WATCHLO_IRW)
			watch_active = 1;
		child->thread.watch.mips3264.watchlo[i] = lt[i];
		/* Set the G bit. */
		child->thread.watch.mips3264.watchhi[i] = ht[i];
	}

	if (watch_active)
		set_tsk_thread_flag(child, TIF_LOAD_WATCH);
	else
		clear_tsk_thread_flag(child, TIF_LOAD_WATCH);

	return 0;
}
Beispiel #11
0
/*
 * arch_uprobe_pre_xol - prepare to execute out of line.
 * @auprobe: the probepoint information.
 * @regs: reflects the saved user state of current task.
 */
int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
	struct arch_uprobe_task *autask;

	autask = &current->utask->autask;
	autask->saved_trap_nr = current->thread.trap_nr;
	current->thread.trap_nr = UPROBE_TRAP_NR;
	regs->ip = current->utask->xol_vaddr;
	pre_xol_rip_insn(auprobe, regs, autask);

	autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
	regs->flags |= X86_EFLAGS_TF;
	if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
		set_task_blockstep(current, false);

	return 0;
}
Beispiel #12
0
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
		      struct tss_struct *tss)
{
	struct thread_struct *prev, *next;

	prev = &prev_p->thread;
	next = &next_p->thread;

	if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
	    test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
		unsigned long debugctl = get_debugctlmsr();

		debugctl &= ~DEBUGCTLMSR_BTF;
		if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
			debugctl |= DEBUGCTLMSR_BTF;

		update_debugctlmsr(debugctl);
	}

	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
		/* prev and next are different */
		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
			hard_disable_TSC();
		else
			hard_enable_TSC();
	}

	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
		/*
		 * Copy the relevant range of the IO bitmap.
		 * Normally this is 128 bytes or less:
		 */
		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
		       max(prev->io_bitmap_max, next->io_bitmap_max));
	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
		/*
		 * Clear any possible leftover bits:
		 */
		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
	}
	propagate_user_return_notify(prev_p, next_p);
}
Beispiel #13
0
enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
		unsigned long totalpages, const nodemask_t *nodemask,
		bool force_kill)
{
	if (task->exit_state)
		return OOM_SCAN_CONTINUE;
	if (oom_unkillable_task(task, NULL, nodemask))
		return OOM_SCAN_CONTINUE;

	/*
	 * This task already has access to memory reserves and is being killed.
	 * Don't allow any other task to have access to the reserves.
	 */
	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
		if (unlikely(frozen(task)))
			__thaw_task(task);
		if (!force_kill)
			return OOM_SCAN_ABORT;
	}
	if (!task->mm)
		return OOM_SCAN_CONTINUE;

	if (task->flags & PF_EXITING) {
		/*
		 * If task is current and is in the process of releasing memory,
		 * allow the "kill" to set TIF_MEMDIE, which will allow it to
		 * access memory reserves.  Otherwise, it may stall forever.
		 *
		 * The iteration isn't broken here, however, in case other
		 * threads are found to have already been oom killed.
		 */
		if (task == current)
			return OOM_SCAN_SELECT;
		else if (!force_kill) {
			/*
			 * If this task is not being ptraced on exit, then wait
			 * for it to finish before killing some other task
			 * unnecessarily.
			 */
			if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
				return OOM_SCAN_ABORT;
		}
	}
	return OOM_SCAN_OK;
}
Beispiel #14
0
static long _fm_syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
	long ret = -1;
#ifdef CONFIG_X86_64
	/* Check if this is a 32 bit process running on 64 bit kernel */
	int ia32 = test_tsk_thread_flag(task, TIF_IA32);
	if (ia32) {
		long ia32_id = syscall_get_nr(task, regs);
		if (ia32_id >= 0 && ia32_id < FILEMON_SYSCALLS_IA32_SIZE)
			ret = _ia32_to_syscall_map[ia32_id];
	} else {
		ret = syscall_get_nr(task, regs);
	}
#else
	ret = syscall_get_nr(task, regs);
#endif
	return ret;
}
Beispiel #15
0
/*
 * Ensure that task->thread.sve_state is up to date with respect to
 * the task->thread.uw.fpsimd_state.
 *
 * This should only be called by ptrace to merge new FPSIMD register
 * values into a task for which SVE is currently active.
 * task must be non-runnable.
 * task->thread.sve_state must point to at least sve_state_size(task)
 * bytes of allocated kernel memory.
 * task->thread.uw.fpsimd_state must already have been initialised with
 * the new FPSIMD register values to be merged in.
 */
void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
{
	unsigned int vq;
	void *sst = task->thread.sve_state;
	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
	unsigned int i;

	if (!test_tsk_thread_flag(task, TIF_SVE))
		return;

	vq = sve_vq_from_vl(task->thread.sve_vl);

	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));

	for (i = 0; i < 32; ++i)
		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
		       sizeof(fst->vregs[i]));
}
enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
		unsigned long totalpages, const nodemask_t *nodemask,
		bool force_kill)
{
	if (task->exit_state) {
#ifdef CONFIG_OOM_SCAN_WA_PREVENT_WRONG_SEARCH
		if (task->pid == task->tgid)
			return OOM_SCAN_SKIP_SEARCH_THREAD;
#endif
		return OOM_SCAN_CONTINUE;
	}
	if (oom_unkillable_task(task, NULL, nodemask))
		return OOM_SCAN_CONTINUE;

	/*
	 * This task already has access to memory reserves and is being killed.
	 * Don't allow any other task to have access to the reserves.
	 */
	if (test_tsk_thread_flag(task, TIF_MEMDIE)) {
		if (unlikely(frozen(task)))
			__thaw_task(task);
		if (!force_kill)
			return OOM_SCAN_ABORT;
	}
	if (!task->mm)
		return OOM_SCAN_CONTINUE;

	/*
	 * If task is allocating a lot of memory and has been marked to be
	 * killed first if it triggers an oom, then select it.
	 */
	if (oom_task_origin(task))
		return OOM_SCAN_SELECT;

	if (task->flags & PF_EXITING && !force_kill) {
		/*
		 * If this task is not being ptraced on exit, then wait for it
		 * to finish before killing some other task unnecessarily.
		 */
		if (!(task->group_leader->ptrace & PT_TRACE_EXIT))
			return OOM_SCAN_ABORT;
	}
	return OOM_SCAN_OK;
}
Beispiel #17
0
int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused,
		        struct task_struct *p, struct pt_regs *regs)
{
   struct pt_regs *childregs;
   struct task_struct *tsk;

   childregs = task_pt_regs(p);
   *childregs = *regs;

   childregs->ax = 0;
   childregs->sp = sp;

   p->thread.sp = (unsigned long)childregs;
   p->thread.sp0 = (unsigned long)(childregs + 1);

   p->thread.ip = (unsigned long)ret_from_fork;

   task_user_gs(p) = get_user_gs(regs);

   p->thread.io_bitmap_ptr = NULL;
   tsk = current;
   err = -ENOMEM;

   memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));

   if (test_tsk_thread_flag(tsk, TIF_IO_BITMAP)) {
       p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL);
	   if (!p->thread.io_bitmap_ptr) {
		   p->thread.io_bitmap_max = 0;
		   return -ENOMEM;
	   }
	   set_tsk_thead_flag(p, TIF_IO_BITMAP);
   }

   err = 0;
   
   if (err && p->thread.io_bitmap_ptr) {
       kfree(p->thread.io_bitmap_ptr);
	   p->thread.io_bitmap_max = 0;
   }

   return err;
}
Beispiel #18
0
/*
 * The transition to the target patch state is complete.  Clean up the data
 * structures.
 */
static void klp_complete_transition(void)
{
	struct klp_object *obj;
	struct klp_func *func;
	struct task_struct *g, *task;
	unsigned int cpu;

	pr_debug("'%s': completing %s transition\n",
		 klp_transition_patch->mod->name,
		 klp_target_state == KLP_PATCHED ? "patching" : "unpatching");

	if (klp_target_state == KLP_UNPATCHED) {
		/*
		 * All tasks have transitioned to KLP_UNPATCHED so we can now
		 * remove the new functions from the func_stack.
		 */
		klp_unpatch_objects(klp_transition_patch);

		/*
		 * Make sure klp_ftrace_handler() can no longer see functions
		 * from this patch on the ops->func_stack.  Otherwise, after
		 * func->transition gets cleared, the handler may choose a
		 * removed function.
		 */
		klp_synchronize_transition();
	}

	klp_for_each_object(klp_transition_patch, obj)
		klp_for_each_func(obj, func)
			func->transition = false;

	/* Prevent klp_ftrace_handler() from seeing KLP_UNDEFINED state */
	if (klp_target_state == KLP_PATCHED)
		klp_synchronize_transition();

	read_lock(&tasklist_lock);
	for_each_process_thread(g, task) {
		WARN_ON_ONCE(test_tsk_thread_flag(task, TIF_PATCH_PENDING));
		task->patch_state = KLP_UNDEFINED;
	}
Beispiel #19
0
/* Return (user) module in which the the given addr falls.  Returns
   NULL when no module can be found that contains the addr.  Fills in
   vm_start (addr where module is mapped in) and (base) name of module
   when given.  Note that user modules always have exactly one section
   (.dynamic or .absolute). */
static struct _stp_module *_stp_umod_lookup(unsigned long addr,
					    struct task_struct *task,
					    const char **name,
					    unsigned long *vm_start,
					    unsigned long *vm_end)
{
  void *user = NULL;
#ifdef CONFIG_COMPAT
        /* Handle 32bit signed values in 64bit longs, chop off top bits. */
        if (test_tsk_thread_flag(task, TIF_32BIT))
          addr &= ((compat_ulong_t) ~0);
#endif
  if (stap_find_vma_map_info(task->group_leader, addr,
			     vm_start, vm_end, name, &user) == 0)
    if (user != NULL)
      {
	struct _stp_module *m = (struct _stp_module *)user;
	dbug_sym(1, "found module %s at 0x%lx\n", m->path,
		 vm_start ? *vm_start : 0);
	return m;
      }
  return NULL;
}
Beispiel #20
0
void ocd_disable(struct task_struct *child)
{
	u32 dc;

	if (!child)
		pr_debug("ocd_disable (no child)\n");
	else if (test_tsk_thread_flag(child, TIF_DEBUG))
		pr_debug("ocd_disable: child=%s [%u]\n",
				child->comm, child->pid);

	if (!child || test_and_clear_tsk_thread_flag(child, TIF_DEBUG)) {
		spin_lock(&ocd_lock);
		ocd_count--;

		WARN_ON(ocd_count < 0);

		if (ocd_count <= 0) {
			dc = ocd_read(DC);
			dc &= ~((1 << OCD_DC_MM_BIT) | (1 << OCD_DC_DBE_BIT));
			ocd_write(DC, dc);
		}
		spin_unlock(&ocd_lock);
	}
}
Beispiel #21
0
static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
			 unsigned long address, unsigned long error_code)
{
	u16 instruction;
	int rc;
#ifdef CONFIG_COMPAT
	int compat;
#endif

	pagefault_disable();
	rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
	pagefault_enable();
	if (rc)
		return -EFAULT;

	up_read(&mm->mmap_sem);
	clear_tsk_thread_flag(current, TIF_SINGLE_STEP);
#ifdef CONFIG_COMPAT
	compat = test_tsk_thread_flag(current, TIF_31BIT);
	if (compat && instruction == 0x0a77)
		sys32_sigreturn(regs);
	else if (compat && instruction == 0x0aad)
		sys32_rt_sigreturn(regs);
	else
#endif
	if (instruction == 0x0a77)
		sys_sigreturn(regs);
	else if (instruction == 0x0aad)
		sys_rt_sigreturn(regs);
	else {
		current->thread.prot_addr = address;
		current->thread.trap_no = error_code;
		do_sigsegv(regs, error_code, SEGV_MAPERR, address);
	}
	return 0;
}
Beispiel #22
0
static const char *_stp_kallsyms_lookup(unsigned long addr,
                                        unsigned long *symbolsize,
                                        unsigned long *offset, 
                                        const char **modname, 
                                        /* char ** secname? */
					struct task_struct *task)
{
	struct _stp_module *m = NULL;
	struct _stp_section *sec = NULL;
	struct _stp_symbol *s = NULL;
	unsigned end, begin = 0;
	unsigned long rel_addr = 0;

	if (addr == 0)
	  return NULL;

	if (task)
	  {
	    unsigned long vm_start = 0;
	    unsigned long vm_end = 0;
#ifdef CONFIG_COMPAT
        /* Handle 32bit signed values in 64bit longs, chop off top bits.
           _stp_umod_lookup does the same, but we need it here for the
           binary search on addr below. */
        if (test_tsk_thread_flag(task, TIF_32BIT))
          addr &= ((compat_ulong_t) ~0);
#endif
	    m = _stp_umod_lookup(addr, task, modname, &vm_start, &vm_end);
	    if (m)
	      {
		sec = &m->sections[0];
		/* XXX .absolute sections really shouldn't be here... */
		if (strcmp(".dynamic", m->sections[0].name) == 0)
		  rel_addr = addr - vm_start;
		else
		  rel_addr = addr;
	      }
	    if (modname && *modname)
	      {
		/* In case no symbol is found, fill in based on module. */
		if (offset)
		  *offset = addr - vm_start;
		if (symbolsize)
		  *symbolsize = vm_end - vm_start;
	      }
	  }
	else
	  {
	    m = _stp_kmod_sec_lookup(addr, &sec);
	    if (m)
	      {
	        rel_addr = addr - sec->static_addr;
		if (modname)
		  *modname = m->name;
	      }
	  }

        if (unlikely (m == NULL || sec == NULL))
          return NULL;
        
        /* NB: relativize the address to the section. */
        addr = rel_addr;
	end = sec->num_symbols;

	/* binary search for symbols within the module */
	do {
		unsigned mid = (begin + end) / 2;
		if (addr < sec->symbols[mid].addr)
			end = mid;
		else
			begin = mid;
	} while (begin + 1 < end);
	/* result index in $begin */

	s = & sec->symbols[begin];
	if (likely(addr >= s->addr)) {
		if (offset)
			*offset = addr - s->addr;
                /* We could also pass sec->name here. */
		if (symbolsize) {
			if ((begin + 1) < sec->num_symbols)
				*symbolsize = sec->symbols[begin + 1].addr - s->addr;
			else
				*symbolsize = 0;
			// NB: This is only a heuristic.  Sometimes there are large
			// gaps between text areas of modules.
		}
		return s->symbol;
	}
	return NULL;
}
Beispiel #23
0
/*
 * Simple selection loop. We chose the process with the highest
 * number of 'points'. We expect the caller will lock the tasklist.
 *
 * (not docbooked, we don't want this one cluttering up the manual)
 */
static struct task_struct *select_bad_process(unsigned int *ppoints,
		unsigned long totalpages, struct mem_cgroup *memcg,
		const nodemask_t *nodemask, bool force_kill)
{
	struct task_struct *g, *p;
	struct task_struct *chosen = NULL;
	*ppoints = 0;

	do_each_thread(g, p) {
		unsigned int points;

		if (p->exit_state)
			continue;
		if (oom_unkillable_task(p, memcg, nodemask))
			continue;

		/*
		 * This task already has access to memory reserves and is
		 * being killed. Don't allow any other task access to the
		 * memory reserve.
		 *
		 * Note: this may have a chance of deadlock if it gets
		 * blocked waiting for another task which itself is waiting
		 * for memory. Is there a better alternative?
		 */
		if (test_tsk_thread_flag(p, TIF_MEMDIE)) {
			if (unlikely(frozen(p)))
				__thaw_task(p);
			if (!force_kill)
				return ERR_PTR(-1UL);
		}
		if (!p->mm)
			continue;

		if (p->flags & PF_EXITING) {
			/*
			 * If p is the current task and is in the process of
			 * releasing memory, we allow the "kill" to set
			 * TIF_MEMDIE, which will allow it to gain access to
			 * memory reserves.  Otherwise, it may stall forever.
			 *
			 * The loop isn't broken here, however, in case other
			 * threads are found to have already been oom killed.
			 */
			if (p == current) {
				chosen = p;
				*ppoints = 1000;
			} else if (!force_kill) {
				/*
				 * If this task is not being ptraced on exit,
				 * then wait for it to finish before killing
				 * some other task unnecessarily.
				 */
				if (!(p->group_leader->ptrace & PT_TRACE_EXIT))
					return ERR_PTR(-1UL);
			}
		}

		points = oom_badness(p, memcg, nodemask, totalpages);
		if (points > *ppoints) {
			chosen = p;
			*ppoints = points;
		}
	} while_each_thread(g, p);
/**
 * kthread_create_on_node - create a kthread.
 * @threadfn: the function to run until signal_pending(current).
 * @data: data ptr for @threadfn.
 * @node: memory node number.
 * @namefmt: printf-style name for the thread.
 *
 * Description: This helper function creates and names a kernel
 * thread.  The thread will be stopped: use wake_up_process() to start
 * it.  See also kthread_run().
 *
 * If thread is going to be bound on a particular cpu, give its node
 * in @node, to get NUMA affinity for kthread stack, or else give -1.
 * When woken, the thread will run @threadfn() with @data as its
 * argument. @threadfn() can either call do_exit() directly if it is a
 * standalone thread for which no one will call kthread_stop(), or
 * return when 'kthread_should_stop()' is true (which means
 * kthread_stop() has been called).  The return value should be zero
 * or a negative error number; it will be passed to kthread_stop().
 *
 * Returns a task_struct or ERR_PTR(-ENOMEM).
 */
struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
					   void *data, int node,
					   const char namefmt[],
					   ...)
{
	DECLARE_COMPLETION_ONSTACK(done);
	struct task_struct *task;
	struct kthread_create_info *create = kmalloc(sizeof(*create),
						     GFP_KERNEL);

	if (!create)
		return ERR_PTR(-ENOMEM);
	create->threadfn = threadfn;
	create->data = data;
	create->node = node;
	create->done = &done;

	spin_lock(&kthread_create_lock);
	list_add_tail(&create->list, &kthread_create_list);
	spin_unlock(&kthread_create_lock);

	wake_up_process(kthreadd_task);
	/*
	 * Wait for completion in killable state, for I might be chosen by
	 * the OOM killer while kthreadd is trying to allocate memory for
	 * new kernel thread.
	 */
	if (unlikely(wait_for_completion_killable(&done))) {
		int i = 0;

		/*
		 * I got SIGKILL, but wait for 10 more seconds for completion
		 * unless chosen by the OOM killer. This delay is there as a
		 * workaround for boot failure caused by SIGKILL upon device
		 * driver initialization timeout.
		 */
		while (i++ < 10 && !test_tsk_thread_flag(current, TIF_MEMDIE))
			if (wait_for_completion_timeout(&done, HZ))
				goto ready;
		/*
		 * If I was SIGKILLed before kthreadd (or new kernel thread)
		 * calls complete(), leave the cleanup of this structure to
		 * that thread.
		 */
		if (xchg(&create->done, NULL))
			return ERR_PTR(-ENOMEM);
		/*
		 * kthreadd (or new kernel thread) will call complete()
		 * shortly.
		 */
		wait_for_completion(&done);
	}
ready:
	task = create->result;
	if (!IS_ERR(task)) {
		static const struct sched_param param = { .sched_priority = 0 };
		va_list args;

		va_start(args, namefmt);
		vsnprintf(task->comm, sizeof(task->comm), namefmt, args);
		va_end(args);
		/*
		 * root may have changed our (kthreadd's) priority or CPU mask.
		 * The kernel thread should not inherit these properties.
		 */
		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
		set_cpus_allowed_ptr(task, cpu_all_mask);
	}
	kfree(create);
	return task;
}
EXPORT_SYMBOL(kthread_create_on_node);

static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
{
	/* Must have done schedule() in kthread() before we set_task_cpu */
	if (!wait_task_inactive(p, state)) {
		WARN_ON(1);
		return;
	}
	/* It's safe because the task is inactive. */
	do_set_cpus_allowed(p, cpumask_of(cpu));
	p->flags |= PF_NO_SETAFFINITY;
}

/**
 * kthread_bind - bind a just-created kthread to a cpu.
 * @p: thread created by kthread_create().
 * @cpu: cpu (might not be online, must be possible) for @k to run on.
 *
 * Description: This function is equivalent to set_cpus_allowed(),
 * except that @cpu doesn't need to be online, and the thread must be
 * stopped (i.e., just returned from kthread_create()).
 */
void kthread_bind(struct task_struct *p, unsigned int cpu)
{
	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
}
Beispiel #25
0
unsigned long _stp_linenumber_lookup(unsigned long addr, struct task_struct *task, char ** filename, int need_filename)
{
  struct _stp_module *m;
  struct _stp_section *sec;
  const char *modname = NULL;
  uint8_t *linep, *enddatap;
  int compat_task = _stp_is_compat_task();
  int user = (task ? 1 : 0);

// the portion below is encased in this conditional because some of the functions
// and constants needed are encased in a similar condition
#ifdef STP_NEED_LINE_DATA
  if (addr == 0)
      return 0;

  if (task)
    {
	    unsigned long vm_start = 0;
	    unsigned long vm_end = 0;
#ifdef CONFIG_COMPAT
      /* Handle 32bit signed values in 64bit longs, chop off top bits. */
      if (test_tsk_thread_flag(task, TIF_32BIT))
        addr &= ((compat_ulong_t) ~0);
#endif
	    m = _stp_umod_lookup(addr, task, &modname, &vm_start, &vm_end);
    }
  else
    m = _stp_kmod_sec_lookup(addr, &sec);

  if (m == NULL || m->debug_line == NULL)
    return 0;

  // if addr is a kernel address, it will need to be adjusted
  if (!task)
    {
      int i;
      unsigned long offset = 0;
      // have to factor in the load_offset of (specifically) the .text section
      for (i=0; i<m->num_sections; i++)
        if (!strcmp(m->sections[i].name, ".text"))
          {
            offset = (m->sections[i].static_addr - m->sections[i].sec_load_offset);
            break;
          }

      if (addr < offset)
        return 0;
      addr = addr - offset;
    }


  linep = m->debug_line;
  enddatap = m->debug_line + m->debug_line_len;

  while (linep < enddatap)
    {
      // similar to print_debug_line_section() in elfutils
      unsigned int length = 4, curr_file_idx = 1, prev_file_idx = 1;
      unsigned int skip_to_seq_end = 0, op_index = 0;
      uint64_t unit_length, hdr_length, curr_addr = 0;
      uint8_t *endunitp, *endhdrp, *dirsecp, *stdopcode_lens_secp;
      uint16_t version;
      uint8_t opcode_base, line_range, min_instr_len = 0, max_ops = 1;
      unsigned long curr_linenum = 1;
      int8_t line_base;
      long cumm_line_adv = 0;

      unit_length = (uint64_t) read_pointer ((const uint8_t **) &linep, enddatap, DW_EH_PE_data4, user, compat_task);
      if (unit_length == 0xffffffff)
        {
          if (unlikely (linep + 8 > enddatap))
            return 0;
          unit_length = (uint64_t) read_pointer ((const uint8_t **) &linep, enddatap, DW_EH_PE_data8, user, compat_task);
          length = 8;
        }
      if (unit_length < (length + 2) || (linep + unit_length) > enddatap)
        return 0;

      endunitp = linep + unit_length;

      version = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data2, user, compat_task);

      if (length == 4)
        hdr_length = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data4, user, compat_task);
      else
        hdr_length = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data8, user, compat_task);

      if ((linep + hdr_length) > endunitp || hdr_length < (version >= 4 ? 6 : 5))
        return 0;

      endhdrp = linep + hdr_length;

      // minimum instruction length
      min_instr_len = *linep++;
      // max operations per instruction
      if (version >= 4)
        {
          max_ops = *linep++;
          if (max_ops == 0)
              return 0; // max operations per instruction is supposed to > 0;
        }
      // default value of the is_stmt register
      ++linep;
      // line base. this is a signed value.
      line_base = *linep++;
      // line range
      line_range = *linep++;
      if (line_range == 0)
        return 0;
      // opcode base
      opcode_base = *linep++;
      // opcodes
      stdopcode_lens_secp = linep - 1;
      // need this check if the header length check covers this region?
      if ((linep + opcode_base - 1) >= endhdrp)
        return 0;
      linep += opcode_base - 1;

      // at the directory table. don't need an other information from the header
      // in order to find the desired line number, so we will save a pointer to
      // this point and skip ahead to the end of the header. this portion of the
      // header will be visited again after a line number has been found if a
      // filename is needed.
      dirsecp = linep;
      linep = endhdrp;

      // iterating through the opcodes. will deal with three defined types of
      // opcode: special, extended and standard. there is also a portion at
      // the end of this loop that will deal with unknown (standard) opcodes.
      while (linep < endunitp)
        {
          uint8_t opcode = *linep++;
          long addr_adv = 0;

          if (opcode >= opcode_base) // special opcode
            {
              // line range was checked before this point. this variable is not altered after it is initialized.
              cumm_line_adv += (line_base + ((opcode - opcode_base) % line_range));
              addr_adv = ((opcode - opcode_base) / line_range);
            }

          else if (opcode == 0) // extended opcode
            {
              int len;
              uint8_t subopcode;

              if (linep + 1 > endunitp)
                return 0;

              len = *linep++;
              if (linep + len > endunitp || len < 1)
                return 0;

              subopcode = *linep++; // the sub opcode
              switch (subopcode)
                {
                  case DW_LNE_end_sequence:
                    // reset the line and address.
                    cumm_line_adv = 1 - curr_linenum;
                    addr_adv = 0 - curr_addr;
                    skip_to_seq_end = 0;
                    op_index = 0;
                    break;
                  case DW_LNE_set_address:
                    if ((len - 1) == 4) // account for the opcode (the -1)
                      curr_addr = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data4, user, compat_task);
                    else if ((len - 1) == 8)
                      curr_addr = (uint64_t) read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data8, user, compat_task);
                    else
                      return 0;

                    // if the set address is past the address we want, iterate
                    // to the end of the sequence without doing more address
                    // and linenumber calcs than necessary
                    if (curr_addr > addr)
                      skip_to_seq_end = 1;
                    op_index = 0;
                    break;
                  default: // advance the ptr by the specified amount
                    linep += len-1;
                    break;
                }
            }
          else if (opcode <= DW_LNS_set_isa) // known standard opcode
            {
              uint8_t *linep_before = linep;
              switch (opcode)
                {
                  case DW_LNS_advance_pc:
                    addr_adv = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task);
                    break;
                  case DW_LNS_fixed_advance_pc:
                    addr_adv = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_data2, user, compat_task);
                    if (linep_before == linep) // the read failed
                      return 0;
                    op_index = 0;
                    break;
                  case DW_LNS_advance_line:
                    cumm_line_adv += read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128+DW_EH_PE_signed, user, compat_task);
                    break;
                  case DW_LNS_set_file:
                    curr_file_idx = read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task);
                    break;
                  case DW_LNS_set_column:
                    read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task);
                    break;
                  case DW_LNS_const_add_pc:
                    addr_adv = ((255 - opcode_base) / line_range);
                    break;
                  case DW_LNS_set_isa:
                    read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task);
                    break;
                }
                if (linep > endunitp) // reading in the leb128 failed
                  return 0;
            }
          else
            {
              int i;
              for (i=stdopcode_lens_secp[opcode]; i>0; --i)
                {
                  read_pointer ((const uint8_t **) &linep, endunitp, DW_EH_PE_leb128, user, compat_task);
                  if (linep > endunitp)
                    return 0;
                }
            }

          // don't worry about doing line/address advances since we are waiting
          // till we hit the end of the sequence or the end of the unit at which
          // point the address and linenumber will be reset
          if (skip_to_seq_end == 1)
            continue;

          // calculate actual address advance
          if (opcode != 0 && opcode != DW_LNS_fixed_advance_pc)
            {
              addr_adv = min_instr_len * (op_index + addr_adv) / max_ops;
              op_index =  (op_index + addr_adv) % max_ops;
            }

          // found an address that at least sort of matches the address we
          // were looking for
          if ((curr_addr <= addr && addr < (curr_addr + addr_adv))
              || (curr_addr == addr && addr_adv != 0))
            {

              if (need_filename)
                _stp_filename_lookup(m, filename, dirsecp, endhdrp,
                                     prev_file_idx, user, compat_task);
              return curr_linenum;
            }

          // update the linenumber and file index if the curr_addr is to be updated
          if (addr_adv != 0)
            {
              prev_file_idx = curr_file_idx;
            }
          if (prev_file_idx == curr_file_idx)
          {
              curr_linenum += cumm_line_adv;
              cumm_line_adv = 0;
            }

          curr_addr += addr_adv;
        }
    }
#endif /* STP_NEED_LINE_DATA */

  // no linenumber was found otherwise this function would have returned before this point
  return 0;
}
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
	struct task_struct *tsk;
	struct task_struct *selected = NULL;
	int rem = 0;
	int tasksize;
	int i;
	int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
	int selected_tasksize = 0;
	int selected_oom_score_adj;
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
	int other_file = global_page_state(NR_FILE_PAGES) -
						global_page_state(NR_SHMEM);
						
    /*
	 * If we already have a death outstanding, then
	 * bail out right away; indicating to vmscan
	 * that we have nothing further to offer on
	 * this pass.
	 *
	 */
	if (lowmem_deathpending &&
	    time_before_eq(jiffies, lowmem_deathpending_timeout))
		return 0;
		
    if (!spin_trylock(&lowmem_shrink_lock)){
	    lowmem_print(4, "lowmem_shrink lock faild\n");
	    return -1;
	}

	/* For JB: FOREGROUND is adj0 (Default lowmem_adj of AMS is 0, 1, 2, 4, 9, 15) */
	/* For ICS: HOME is adj6 (Default lowmem_adj of AMS is 0, 1, 2, 4, 9, 15) */
	if (other_free <= lowmem_minfree[1]) {
		/* Notify Kernel that we should consider Android threshold */
		lmk_adjz_minfree = lowmem_minfree[0];
	} else {
		lmk_adjz_minfree = 0;
	}

	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;

	for (i = 0; i < array_size; i++) {
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i]) {
			min_score_adj = lowmem_adj[i];
			break;
		}
	}
	if (sc->nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
				sc->nr_to_scan, sc->gfp_mask, other_free,
				other_file, min_score_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
		lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
			     sc->nr_to_scan, sc->gfp_mask, rem);
    /*
     * disable indication if low memory
     */
#if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD)
		if (in_lowmem) {
			in_lowmem = 0;
			lowmem_indicator = 1;
			DAL_LowMemoryOff();
		}
#endif
        spin_unlock(&lowmem_shrink_lock);
		return rem;
	}

	selected_oom_score_adj = min_score_adj;
	// add debug log
#ifdef CONFIG_MT_ENG_BUILD
	if (min_score_adj <= lowmem_debug_adj) {
		lowmem_print(1, "======low memory killer=====\n");
		lowmem_print(1, "Free memory other_free: %d, other_file:%d pages\n", other_free, other_file);
	}		
#endif

	rcu_read_lock();
	for_each_process(tsk) {
		struct task_struct *p;
		int oom_score_adj;

		if (tsk->flags & PF_KTHREAD)
			continue;

		p = find_lock_task_mm(tsk);
		if (!p)
			continue;

		if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
		    time_before_eq(jiffies, lowmem_deathpending_timeout)) {
		    lowmem_print(1, "lowmem_shrink return directly, due to  %d (%s) is dying\n",
			     p->pid, p->comm);
			task_unlock(p);
			rcu_read_unlock();
			spin_unlock(&lowmem_shrink_lock);
			return 0;
		}

		/* We use oom_score_adj to represent oom_adj here although the later has been deprecated by kernel. */
		/* This is because that JB AMS still uses oom_adj to stand for the importantance of activities. */
		/* oom_score_adj = p->signal->oom_score_adj;					 - 2012.07.16 - */
		oom_score_adj = p->signal->oom_adj;
#ifdef CONFIG_MT_ENG_BUILD
		if (min_score_adj <= lowmem_debug_adj)
			lowmem_print(1, "Candidate %d (%s), adj %d, rss %lu, to kill\n",
				     p->pid, p->comm, oom_score_adj, get_mm_rss(p->mm));
#endif
		if (oom_score_adj < min_score_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(p->mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;
		if (selected) {
			if (oom_score_adj < selected_oom_score_adj)
				continue;
			if (oom_score_adj == selected_oom_score_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_score_adj = oom_score_adj;
		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_score_adj, tasksize);
	}
	if (selected) {
		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_score_adj, selected_tasksize);
	    lowmem_deathpending = selected;
		lowmem_deathpending_timeout = jiffies + HZ;
#ifdef CONFIG_MT_ENG_BUILD
		if (min_score_adj <= lowmem_debug_adj) {
		    lowmem_print(1, "low memory info:\n");
		    show_free_areas_minimum();
        }
#endif

        /*
		 * when kill adj=0 process trigger kernel warning, only in MTK internal eng/non_LCA load
		 */
#if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD) && \
    !defined (MTK_LCA_SUPPORT) && !defined (PARTIAL_BUILD) //mtk internal   
        if (selected_oom_score_adj <= 0) { // can set 16 for test 
            lowmem_print(1, "low memory trigger kernel warning\n");
            aee_kernel_warning_api("LMK", 0, DB_OPT_DEFAULT|DB_OPT_DUMPSYS_ACTIVITY, 
                "Framework low memory", "please contact AP/AF memory module owner\n");
        }
#endif
		/*
		 * show an indication if low memory
		 */
#if defined (CONFIG_MTK_AEE_FEATURE) && defined (CONFIG_MT_ENG_BUILD)
		if (lowmem_indicator && selected_oom_score_adj <= 1) {
			lowmem_print(5, "low memory: raise aee warning\n");
			in_lowmem = 1;
			lowmem_indicator = 0;
			DAL_LowMemoryOn();
			//aee_kernel_warning(module_name, lowmem_warning);
		}
#endif
		send_sig(SIGKILL, selected, 0);
		set_tsk_thread_flag(selected, TIF_MEMDIE);
		rem -= selected_tasksize;
	}
	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
	rcu_read_unlock();
    spin_unlock(&lowmem_shrink_lock);
	return rem;
}
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * interruption code (int_code):
 *   04       Protection           ->  Write-Protection  (suprression)
 *   10       Segment translation  ->  Not present       (nullification)
 *   11       Page translation     ->  Not present       (nullification)
 *   3b       Region third trans.  ->  Not present       (nullification)
 */
static inline int do_exception(struct pt_regs *regs, int access)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long trans_exc_code;
	unsigned long address;
	unsigned int flags;
	int fault;

	if (notify_page_fault(regs))
		return 0;

	tsk = current;
	mm = tsk->mm;
	trans_exc_code = regs->int_parm_long;

	/*
	 * Verify that the fault happened in user space, that
	 * we are not in an interrupt and that there is a 
	 * user context.
	 */
	fault = VM_FAULT_BADCONTEXT;
	if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
		goto out;

	address = trans_exc_code & __FAIL_ADDR_MASK;
	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
	flags = FAULT_FLAG_ALLOW_RETRY;
	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
		flags |= FAULT_FLAG_WRITE;
	down_read(&mm->mmap_sem);

#ifdef CONFIG_PGSTE
	if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) {
		address = __gmap_fault(address,
				     (struct gmap *) S390_lowcore.gmap);
		if (address == -EFAULT) {
			fault = VM_FAULT_BADMAP;
			goto out_up;
		}
		if (address == -ENOMEM) {
			fault = VM_FAULT_OOM;
			goto out_up;
		}
	}
#endif

retry:
	fault = VM_FAULT_BADMAP;
	vma = find_vma(mm, address);
	if (!vma)
		goto out_up;

	if (unlikely(vma->vm_start > address)) {
		if (!(vma->vm_flags & VM_GROWSDOWN))
			goto out_up;
		if (expand_stack(vma, address))
			goto out_up;
	}

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */
	fault = VM_FAULT_BADACCESS;
	if (unlikely(!(vma->vm_flags & access)))
		goto out_up;

	if (is_vm_hugetlb_page(vma))
		address &= HPAGE_MASK;
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, flags);
	if (unlikely(fault & VM_FAULT_ERROR))
		goto out_up;

	/*
	 * Major/minor page fault accounting is only done on the
	 * initial attempt. If we go through a retry, it is extremely
	 * likely that the page will be found in page cache at that point.
	 */
	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR) {
			tsk->maj_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
				      regs, address);
		} else {
			tsk->min_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
				      regs, address);
		}
		if (fault & VM_FAULT_RETRY) {
			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
			 * of starvation. */
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;
			down_read(&mm->mmap_sem);
			goto retry;
		}
	}
	/*
	 * The instruction that caused the program check will
	 * be repeated. Don't signal single step via SIGTRAP.
	 */
	clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
	fault = 0;
out_up:
	up_read(&mm->mmap_sem);
out:
	return fault;
}
Beispiel #28
0
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
	struct task_struct *tsk;
	struct task_struct *selected = NULL;
	int rem = 0;
	int tasksize;
	int i;
	short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
	int minfree = 0;
	int selected_tasksize = 0;
	short selected_oom_score_adj;
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;

    /* For request of unmovable pages, take no account of free CMA pages*/
    if(IS_ENABLED(CONFIG_CMA) && (allocflags_to_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE))
        other_free -= global_page_state(NR_FREE_CMA_PAGES);

	int other_file = global_page_state(NR_FILE_PAGES) -
						global_page_state(NR_SHMEM);

	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;
	for (i = 0; i < array_size; i++) {
		minfree = lowmem_minfree[i];
		if (other_free < minfree && other_file < minfree) {
			min_score_adj = lowmem_adj[i];
			break;
		}
	}
	if (sc->nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n",
				sc->nr_to_scan, sc->gfp_mask, other_free,
				other_file, min_score_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
		lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
			     sc->nr_to_scan, sc->gfp_mask, rem);
		return rem;
	}
	selected_oom_score_adj = min_score_adj;

	rcu_read_lock();
	for_each_process(tsk) {
		struct task_struct *p;
		short oom_score_adj;

		if (tsk->flags & PF_KTHREAD)
			continue;

		p = find_lock_task_mm(tsk);
		if (!p)
			continue;

		if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
		    time_before_eq(jiffies, lowmem_deathpending_timeout)) {
			task_unlock(p);
			rcu_read_unlock();
			return 0;
		}
		oom_score_adj = p->signal->oom_score_adj;
		if (oom_score_adj < min_score_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(p->mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;
		if (selected) {
			if (oom_score_adj < selected_oom_score_adj)
				continue;
			if (oom_score_adj == selected_oom_score_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_score_adj = oom_score_adj;
		lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n",
			     p->comm, p->pid, oom_score_adj, tasksize);
	}
	if (selected) {
		lowmem_print(1, "Killing '%s' (%d), adj %hd,\n" \
				"   to free %ldkB on behalf of '%s' (%d) because\n" \
				"   cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \
				"   Free memory is %ldkB above reserved\n",
			     selected->comm, selected->pid,
			     selected_oom_score_adj,
			     selected_tasksize * (long)(PAGE_SIZE / 1024),
			     current->comm, current->pid,
			     other_file * (long)(PAGE_SIZE / 1024),
			     minfree * (long)(PAGE_SIZE / 1024),
			     min_score_adj,
			     other_free * (long)(PAGE_SIZE / 1024));
		lowmem_deathpending_timeout = jiffies + HZ;
		send_sig(SIGKILL, selected, 0);
		set_tsk_thread_flag(selected, TIF_MEMDIE);
		rem -= selected_tasksize;
	}
	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
	rcu_read_unlock();
	return rem;
}
Beispiel #29
0
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
	struct task_struct *tsk;
	struct task_struct *selected = NULL;
	int rem = 0;
	int tasksize;
	int i;
	int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
	int selected_tasksize = 0;
	int selected_oom_score_adj;
	int array_size = ARRAY_SIZE(lowmem_adj);
	int other_free = global_page_state(NR_FREE_PAGES);
	int other_file = global_page_state(NR_FILE_PAGES) -
						global_page_state(NR_SHMEM);

	for (i = 0; i < array_size; i++) {
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i]) {
			min_score_adj = lowmem_adj[i];
			break;
		}
	}
	if (sc->nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
				sc->nr_to_scan, sc->gfp_mask, other_free,
				other_file, min_score_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
		lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
			     sc->nr_to_scan, sc->gfp_mask, rem);
		return rem;
	}
	selected_oom_score_adj = min_score_adj;

	rcu_read_lock();
	for_each_process(tsk) {
		struct task_struct *p;
		int oom_score_adj;

		if (tsk->flags & PF_KTHREAD)
			continue;

		p = find_lock_task_mm(tsk);
		if (!p)
			continue;

		if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
		    time_before_eq(jiffies, lowmem_deathpending_timeout)) {
			task_unlock(p);
			rcu_read_unlock();
			return 0;
		}
		oom_score_adj = p->signal->oom_score_adj;
		if (oom_score_adj < min_score_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(p->mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;
		if (selected) {
			if (oom_score_adj < selected_oom_score_adj)
				continue;
			if (oom_score_adj == selected_oom_score_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_score_adj = oom_score_adj;
		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_score_adj, tasksize);
	}
	if (selected) {
		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_score_adj, selected_tasksize);
		lowmem_deathpending_timeout = jiffies + HZ;
		send_sig(SIGKILL, selected, 0);
		set_tsk_thread_flag(selected, TIF_MEMDIE);
		rem -= selected_tasksize;
	}
	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
	rcu_read_unlock();
    if (selected)
        compact_nodes(false);
	return rem;
}
static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
{
	struct task_struct *tsk;
#ifdef ENHANCED_LMK_ROUTINE
	struct task_struct *selected[LOWMEM_DEATHPENDING_DEPTH] = {NULL,};
#else
	struct task_struct *selected = NULL;
#endif
#ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO
#ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE
	static DEFINE_RATELIMIT_STATE(lmk_rs, DEFAULT_RATELIMIT_INTERVAL, 0);
#else
	static DEFINE_RATELIMIT_STATE(lmk_rs, 6*DEFAULT_RATELIMIT_INTERVAL, 0);
#endif
#endif
	int rem = 0;
	int tasksize;
	int i;
	int min_score_adj = OOM_SCORE_ADJ_MAX + 1;
#ifdef ENHANCED_LMK_ROUTINE
	int selected_tasksize[LOWMEM_DEATHPENDING_DEPTH] = {0,};
	int selected_oom_score_adj[LOWMEM_DEATHPENDING_DEPTH] = {OOM_ADJUST_MAX,};
	int all_selected_oom = 0;
	int max_selected_oom_idx = 0;
#else
	int selected_tasksize = 0;
	int selected_oom_score_adj;
#endif
	int array_size = ARRAY_SIZE(lowmem_adj);
#if (!defined(CONFIG_MACH_JF) \
	&& !defined(CONFIG_SEC_PRODUCT_8960)\
	)
	unsigned long nr_to_scan = sc->nr_to_scan;
#endif
#ifndef CONFIG_CMA
	int other_free = global_page_state(NR_FREE_PAGES);
#else
	int other_free = global_page_state(NR_FREE_PAGES) -
				global_page_state(NR_FREE_CMA_PAGES);
#endif
	int other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM);
#ifdef CONFIG_ZRAM_FOR_ANDROID
	other_file -= total_swapcache_pages;
#endif /* CONFIG_ZRAM_FOR_ANDROID */
	if (lowmem_adj_size < array_size)
		array_size = lowmem_adj_size;
	if (lowmem_minfree_size < array_size)
		array_size = lowmem_minfree_size;
	for (i = 0; i < array_size; i++) {
		if (other_free < lowmem_minfree[i] &&
		    other_file < lowmem_minfree[i]) {
			min_score_adj = lowmem_adj[i];
			break;
		}
	}
	if (sc->nr_to_scan > 0)
		lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
				sc->nr_to_scan, sc->gfp_mask, other_free,
				other_file, min_score_adj);
	rem = global_page_state(NR_ACTIVE_ANON) +
		global_page_state(NR_ACTIVE_FILE) +
		global_page_state(NR_INACTIVE_ANON) +
		global_page_state(NR_INACTIVE_FILE);
	if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
		lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
			     sc->nr_to_scan, sc->gfp_mask, rem);
		return rem;
	}

#ifdef ENHANCED_LMK_ROUTINE
	for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++)
		selected_oom_score_adj[i] = min_score_adj;
#else
	selected_oom_score_adj = min_score_adj;
#endif

#ifdef CONFIG_ZRAM_FOR_ANDROID
	atomic_set(&s_reclaim.lmk_running, 1);
#endif /* CONFIG_ZRAM_FOR_ANDROID */
	read_lock(&tasklist_lock);
	for_each_process(tsk) {
		struct task_struct *p;
		int oom_score_adj;
#ifdef ENHANCED_LMK_ROUTINE
		int is_exist_oom_task = 0;
#endif

		if (tsk->flags & PF_KTHREAD)
			continue;

		p = find_lock_task_mm(tsk);
		if (!p)
			continue;

		if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
			time_before_eq(jiffies, lowmem_deathpending_timeout)) {
				task_unlock(p);
				read_unlock(&tasklist_lock);
#ifdef CONFIG_ZRAM_FOR_ANDROID
				atomic_set(&s_reclaim.lmk_running, 0);
#endif /* CONFIG_ZRAM_FOR_ANDROID */
				return 0;
		}
		
		oom_score_adj = p->signal->oom_score_adj;
		if (oom_score_adj < min_score_adj) {
			task_unlock(p);
			continue;
		}
		tasksize = get_mm_rss(p->mm);
		task_unlock(p);
		if (tasksize <= 0)
			continue;

#ifdef ENHANCED_LMK_ROUTINE
		if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH) {
			for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) {
				if (!selected[i]) {
					is_exist_oom_task = 1;
					max_selected_oom_idx = i;
					break;
				}
			}
		} else if (selected_oom_score_adj[max_selected_oom_idx] < oom_score_adj ||
			(selected_oom_score_adj[max_selected_oom_idx] == oom_score_adj &&
			selected_tasksize[max_selected_oom_idx] < tasksize)) {
			is_exist_oom_task = 1;
		}

		if (is_exist_oom_task) {
			selected[max_selected_oom_idx] = p;
			selected_tasksize[max_selected_oom_idx] = tasksize;
			selected_oom_score_adj[max_selected_oom_idx] = oom_score_adj;

			if (all_selected_oom < LOWMEM_DEATHPENDING_DEPTH)
				all_selected_oom++;

			if (all_selected_oom == LOWMEM_DEATHPENDING_DEPTH) {
				for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) {
					if (selected_oom_score_adj[i] < selected_oom_score_adj[max_selected_oom_idx])
						max_selected_oom_idx = i;
					else if (selected_oom_score_adj[i] == selected_oom_score_adj[max_selected_oom_idx] &&
						selected_tasksize[i] < selected_tasksize[max_selected_oom_idx])
						max_selected_oom_idx = i;
				}
			}

			lowmem_print(2, "select %d (%s), adj %d, \
					size %d, to kill\n",
				p->pid, p->comm, oom_score_adj, tasksize);
		}
#else
		if (selected) {
			if (oom_score_adj < selected_oom_score_adj)
				continue;
			if (oom_score_adj == selected_oom_score_adj &&
			    tasksize <= selected_tasksize)
				continue;
		}
		selected = p;
		selected_tasksize = tasksize;
		selected_oom_score_adj = oom_score_adj;
		lowmem_print(2, "select %d (%s), adj %d, size %d, to kill\n",
			     p->pid, p->comm, oom_score_adj, tasksize);
#endif
	}
#ifdef ENHANCED_LMK_ROUTINE
	for (i = 0; i < LOWMEM_DEATHPENDING_DEPTH; i++) {
		if (selected[i]) {
			lowmem_print(1, "send sigkill to %d (%s), adj %d,\
				     size %d, free memory = %d, reclaimable memory = %d\n",
				     selected[i]->pid, selected[i]->comm,
				     selected_oom_score_adj[i],
				     selected_tasksize[i],
				     other_free, other_file);
			lowmem_deathpending_timeout = jiffies + HZ;
			send_sig(SIGKILL, selected[i], 0);
			set_tsk_thread_flag(selected[i], TIF_MEMDIE);
			rem -= selected_tasksize[i];
#ifdef LMK_COUNT_READ
			lmk_count++;
#endif
		}
	}
#else
	if (selected) {
		lowmem_print(1, "send sigkill to %d (%s), adj %d, size %d\n",
			     selected->pid, selected->comm,
			     selected_oom_score_adj, selected_tasksize);
		lowmem_deathpending_timeout = jiffies + HZ;
		send_sig(SIGKILL, selected, 0);
		set_tsk_thread_flag(selected, TIF_MEMDIE);
		rem -= selected_tasksize;
#ifdef LMK_COUNT_READ
		lmk_count++;
#endif
	}
#endif
#ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO
	if (__ratelimit(&lmk_rs)) {
		lowmem_print(1, "lowmem_shrink %lu, %x, ofree %d %d, ma %d\n",
				nr_to_scan, sc->gfp_mask, other_free,
				other_file, min_score_adj);
#ifdef CONFIG_SEC_DEBUG_LMK_MEMINFO_VERBOSE
		show_mem(SHOW_MEM_FILTER_NODES);
		dump_tasks_info();
#endif
	}
#endif
	lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
		     sc->nr_to_scan, sc->gfp_mask, rem);
	read_unlock(&tasklist_lock);
#ifdef CONFIG_ZRAM_FOR_ANDROID
	atomic_set(&s_reclaim.lmk_running, 0);
#endif /* CONFIG_ZRAM_FOR_ANDROID */
	return rem;
}