/*
 * Signal sysrq helper function.  Sends a signal to all user processes.
 */
static void send_sig_all(int sig)
{
	struct task_struct *p;

	for_each_process(p) {
		if (p->mm && !is_global_init(p))
			/* Not swapper, init nor kernel thread */
			force_sig(sig, p);
	}
}
unsigned long __copy_to_user_ll(void __user *to, const void *from,
				unsigned long n)
{
#ifndef CONFIG_X86_WP_WORKS_OK
	if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
			((unsigned long)to) < TASK_SIZE) {
		if (in_atomic())
			return n;

		while (n) {
			unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
			unsigned long len = PAGE_SIZE - offset;
			int retval;
			struct page *pg;
			void *maddr;

			if (len > n)
				len = n;

survive:
			down_read(&current->mm->mmap_sem);
			retval = get_user_pages(current, current->mm,
					(unsigned long)to, 1, 1, 0, &pg, NULL);

			if (retval == -ENOMEM && is_global_init(current)) {
				up_read(&current->mm->mmap_sem);
				congestion_wait(BLK_RW_ASYNC, HZ/50);
				goto survive;
			}

			if (retval != 1) {
				up_read(&current->mm->mmap_sem);
				break;
			}

			maddr = kmap_atomic(pg);
			memcpy(maddr + offset, from, len);
			kunmap_atomic(maddr);
			set_page_dirty_lock(pg);
			put_page(pg);
			up_read(&current->mm->mmap_sem);

			from += len;
			to += len;
			n -= len;
		}
		return n;
	}
#endif
	if (movsl_is_ok(to, from, n))
		__copy_user(to, from, n);
	else
		n = __copy_user_intel(to, from, n);
	return n;
}
Esempio n. 3
0
static int opal_recover_mce(struct pt_regs *regs,
					struct machine_check_event *evt)
{
	int recovered = 0;
	uint64_t ea = get_mce_fault_addr(evt);

	if (!(regs->msr & MSR_RI)) {
		/* If MSR_RI isn't set, we cannot recover */
		recovered = 0;
	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
		/* Platform corrected itself */
		recovered = 1;
	} else if (ea && !is_kernel_addr(ea)) {
		/*
		 * Faulting address is not in kernel text. We should be fine.
		 * We need to find which process uses this address.
		 * For now, kill the task if we have received exception when
		 * in userspace.
		 *
		 * TODO: Queue up this address for hwpoisioning later.
		 */
		if (user_mode(regs) && !is_global_init(current)) {
			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
			recovered = 1;
		} else
			recovered = 0;
	} else if (user_mode(regs) && !is_global_init(current) &&
		evt->severity == MCE_SEV_ERROR_SYNC) {
		/*
		 * If we have received a synchronous error when in userspace
		 * kill the task.
		 */
		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
		recovered = 1;
	}
	return recovered;
}
Esempio n. 4
0
File: sysrq.c Progetto: kprog/linux
/*
 * Signal sysrq helper function.  Sends a signal to all user processes.
 */
static void send_sig_all(int sig)
{
	struct task_struct *p;

	read_lock(&tasklist_lock);
	for_each_process(p) {
		if (p->flags & PF_KTHREAD)
			continue;
		if (is_global_init(p))
			continue;

		force_sig(sig, p);
	}
	read_unlock(&tasklist_lock);
}
Esempio n. 5
0
/*
 * Signal sysrq helper function.  Sends a signal to all user processes.
 */
static void send_sig_all(int sig)
{
	struct task_struct *p;

	read_lock(&tasklist_lock);
	for_each_process(p) {
		if (p->flags & PF_KTHREAD)
			continue;
		if (is_global_init(p))
			continue;

		do_send_sig_info(sig, SEND_SIG_FORCED, p, true);
	}
	read_unlock(&tasklist_lock);
}
Esempio n. 6
0
void trace_unhandled_signal(const char *type, struct pt_regs *regs,
			    unsigned long address, int sig)
{
	struct task_struct *tsk = current;

	if (show_unhandled_signals == 0)
		return;

	/* If the signal is handled, don't show it here. */
	if (!is_global_init(tsk)) {
		void __user *handler =
			tsk->sighand->action[sig-1].sa.sa_handler;
		if (handler != SIG_IGN && handler != SIG_DFL)
			return;
	}

	/* Rate-limit the one-line output, not the detailed output. */
	if (show_unhandled_signals <= 1 && !printk_ratelimit())
		return;

	printk("%s%s[%d]: %s at %lx pc "REGFMT" signal %d",
	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
	       tsk->comm, task_pid_nr(tsk), type, address, regs->pc, sig);

	print_vma_addr(KERN_CONT " in ", regs->pc);

	printk(KERN_CONT "\n");

	if (show_unhandled_signals > 1) {
		switch (sig) {
		case SIGILL:
		case SIGFPE:
		case SIGSEGV:
		case SIGBUS:
			pr_err("User crash: signal %d,"
			       " trap %ld, address 0x%lx\n",
			       sig, regs->faultnum, address);
			show_regs(regs);
			dump_mem((void __user *)address);
			break;
		default:
			pr_err("User crash: signal %d, trap %ld\n",
			       sig, regs->faultnum);
			break;
		}
	}
}
Esempio n. 7
0
int kexec_should_crash(struct task_struct *p)
{
	/*
	 * If crash_kexec_post_notifiers is enabled, don't run
	 * crash_kexec() here yet, which must be run after panic
	 * notifiers in panic().
	 */
	if (crash_kexec_post_notifiers)
		return 0;
	/*
	 * There are 4 panic() calls in do_exit() path, each of which
	 * corresponds to each of these 4 conditions.
	 */
	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
		return 1;
	return 0;
}
Esempio n. 8
0
File: ras.c Progetto: avagin/linux
/*
 * See if we can recover from a machine check exception.
 * This is only called on power4 (or above) and only via
 * the Firmware Non-Maskable Interrupts (fwnmi) handler
 * which provides the error analysis for us.
 *
 * Return 1 if corrected (or delivered a signal).
 * Return 0 if there is nothing we can do.
 */
static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
{
	int recovered = 0;
	int disposition = rtas_error_disposition(err);

	pseries_print_mce_info(regs, err);

	if (!(regs->msr & MSR_RI)) {
		/* If MSR_RI isn't set, we cannot recover */
		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
		recovered = 0;

	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
		/* Platform corrected itself */
		recovered = 1;

	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
		/* Platform corrected itself but could be degraded */
		printk(KERN_ERR "MCE: limited recovery, system may "
		       "be degraded\n");
		recovered = 1;

	} else if (user_mode(regs) && !is_global_init(current) &&
		   rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {

		/*
		 * If we received a synchronous error when in userspace
		 * kill the task. Firmware may report details of the fail
		 * asynchronously, so we can't rely on the target and type
		 * fields being valid here.
		 */
		printk(KERN_ERR "MCE: uncorrectable error, killing task "
		       "%s:%d\n", current->comm, current->pid);

		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
		recovered = 1;
	}

	pseries_process_ue(regs, err);

	/* Queue irq work to log this rtas event later. */
	irq_work_queue(&mce_errlog_process_work);

	return recovered;
}
Esempio n. 9
0
/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p,
		const struct mem_cgroup *memcg, const nodemask_t *nodemask)
{
	if (is_global_init(p))
		return true;
	if (p->flags & PF_KTHREAD)
		return true;

	/* When mem_cgroup_out_of_memory() and p is not member of the group */
	if (memcg && !task_in_mem_cgroup(p, memcg))
		return true;

	/* p may not have freeable memory in nodemask */
	if (!has_intersects_mems_allowed(p, nodemask))
		return true;

	return false;
}
Esempio n. 10
0
/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
			    unsigned long address)
{
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;

	up_read(&mm->mmap_sem);
	if (is_global_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		return 1;
	}
	printk("VM: killing process %s\n", tsk->comm);
	if (regs->psw.mask & PSW_MASK_PSTATE)
		do_group_exit(SIGKILL);
	do_no_context(regs, error_code, address);
	return 0;
}
Esempio n. 11
0
/*
 * See if we can recover from a machine check exception.
 * This is only called on power4 (or above) and only via
 * the Firmware Non-Maskable Interrupts (fwnmi) handler
 * which provides the error analysis for us.
 *
 * Return 1 if corrected (or delivered a signal).
 * Return 0 if there is nothing we can do.
 */
static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
{
	int recovered = 0;
	int disposition = rtas_error_disposition(err);

	if (!(regs->msr & MSR_RI)) {
		/* If MSR_RI isn't set, we cannot recover */
		recovered = 0;

	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
		/* Platform corrected itself */
		recovered = 1;

	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
		/* Platform corrected itself but could be degraded */
		printk(KERN_ERR "MCE: limited recovery, system may "
		       "be degraded\n");
		recovered = 1;

	} else if (user_mode(regs) && !is_global_init(current) &&
		   rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {

		/*
		 * If we received a synchronous error when in userspace
		 * kill the task. Firmware may report details of the fail
		 * asynchronously, so we can't rely on the target and type
		 * fields being valid here.
		 */
		printk(KERN_ERR "MCE: uncorrectable error, killing task "
		       "%s:%d\n", current->comm, current->pid);

		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
		recovered = 1;
	}

	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);

	return recovered;
}
Esempio n. 12
0
void do_page_fault(struct pt_regs *regs, int write, unsigned long address,
		   unsigned long cause_code)
{
	struct vm_area_struct *vma = NULL;
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	siginfo_t info;
	int fault, ret;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (address >= VMALLOC_START && address <= VMALLOC_END) {
		ret = handle_vmalloc_fault(address);
		if (unlikely(ret))
			goto bad_area_nosemaphore;
		else
			return;
	}

	info.si_code = SEGV_MAPERR;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_atomic() || !mm)
		goto no_context;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;
retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */
good_area:
	info.si_code = SEGV_ACCERR;

	/* Handle protection violation, execute on heap or stack */

	if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH))
		goto bad_area;

	if (write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto bad_area;
	}

survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, flags);

	/* If Pagefault was interrupted by SIGKILL, exit page fault "early" */
	if (unlikely(fatal_signal_pending(current))) {
		if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY))
			up_read(&mm->mmap_sem);
		if (user_mode(regs))
			return;
	}

	if (likely(!(fault & VM_FAULT_ERROR))) {
		if (flags & FAULT_FLAG_ALLOW_RETRY) {
			/* To avoid updating stats twice for retry case */
			if (fault & VM_FAULT_MAJOR)
				tsk->maj_flt++;
			else
				tsk->min_flt++;

			if (fault & VM_FAULT_RETRY) {
				flags &= ~FAULT_FLAG_ALLOW_RETRY;
				flags |= FAULT_FLAG_TRIED;
				goto retry;
			}
		}

		/* Fault Handled Gracefully */
		up_read(&mm->mmap_sem);
		return;
	}

	/* TBD: switch to pagefault_out_of_memory() */
	if (fault & VM_FAULT_OOM)
		goto out_of_memory;
	else if (fault & VM_FAULT_SIGBUS)
		goto do_sigbus;

	/* no man's land */
	BUG();

	/*
	 * Something tried to access memory that isn't in our memory map..
	 * Fix it, but check if it's kernel or user first..
	 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		tsk->thread.fault_address = address;
		tsk->thread.cause_code = cause_code;
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void __user *)address;
		force_sig_info(SIGSEGV, &info, tsk);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault?
	 *
	 * (The kernel has valid exception-points in the source
	 *  when it acesses user-memory. When it fails in one
	 *  of those points, we find it in a table and do a jump
	 *  to some fixup code that loads an appropriate error
	 *  code)
	 */
	if (fixup_exception(regs))
		return;

	die("Oops", regs, address, cause_code);

out_of_memory:
	if (is_global_init(tsk)) {
		yield();
		goto survive;
	}
	up_read(&mm->mmap_sem);

	if (user_mode(regs))
		do_group_exit(SIGKILL);	/* This will never return */

	goto no_context;

do_sigbus:
	up_read(&mm->mmap_sem);

	if (!user_mode(regs))
		goto no_context;

	tsk->thread.fault_address = address;
	tsk->thread.cause_code = cause_code;
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void __user *)address;
	force_sig_info(SIGBUS, &info, tsk);
}
Esempio n. 13
0
/*
 * This routine is responsible for faulting in user pages.
 * It passes the work off to one of the appropriate routines.
 * It returns true if the fault was successfully handled.
 */
static int handle_page_fault(struct pt_regs *regs,
			     int fault_num,
			     int is_page_fault,
			     unsigned long address,
			     int write)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long stack_offset;
	int fault;
	int si_code;
	int is_kernel_mode;
	pgd_t *pgd;

	/* on TILE, protection faults are always writes */
	if (!is_page_fault)
		write = 1;

	is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);

	tsk = validate_current();

	/*
	 * Check to see if we might be overwriting the stack, and bail
	 * out if so.  The page fault code is a relatively likely
	 * place to get trapped in an infinite regress, and once we
	 * overwrite the whole stack, it becomes very hard to recover.
	 */
	stack_offset = stack_pointer & (THREAD_SIZE-1);
	if (stack_offset < THREAD_SIZE / 8) {
		pr_alert("Potential stack overrun: sp %#lx\n",
		       stack_pointer);
		show_regs(regs);
		pr_alert("Killing current process %d/%s\n",
		       tsk->pid, tsk->comm);
		do_group_exit(SIGKILL);
	}

	/*
	 * Early on, we need to check for migrating PTE entries;
	 * see homecache.c.  If we find a migrating PTE, we wait until
	 * the backing page claims to be done migrating, then we proceed.
	 * For kernel PTEs, we rewrite the PTE and return and retry.
	 * Otherwise, we treat the fault like a normal "no PTE" fault,
	 * rather than trying to patch up the existing PTE.
	 */
	pgd = get_current_pgd();
	if (handle_migrating_pte(pgd, fault_num, address,
				 is_kernel_mode, write))
		return 1;

	si_code = SEGV_MAPERR;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * This verifies that the fault happens in kernel space
	 * and that the fault was not a protection fault.
	 */
	if (unlikely(address >= TASK_SIZE &&
		     !is_arch_mappable_range(address, 0))) {
		if (is_kernel_mode && is_page_fault &&
		    vmalloc_fault(pgd, address) >= 0)
			return 1;
		/*
		 * Don't take the mm semaphore here. If we fixup a prefetch
		 * fault we could otherwise deadlock.
		 */
		mm = NULL;  /* happy compiler */
		vma = NULL;
		goto bad_area_nosemaphore;
	}

	/*
	 * If we're trying to touch user-space addresses, we must
	 * be either at PL0, or else with interrupts enabled in the
	 * kernel, so either way we can re-enable interrupts here.
	 */
	local_irq_enable();

	mm = tsk->mm;

	/*
	 * If we're in an interrupt, have no user context or are running in an
	 * atomic region then we must not take the fault.
	 */
	if (in_atomic() || !mm) {
		vma = NULL;  /* happy compiler */
		goto bad_area_nosemaphore;
	}

	/*
	 * When running in the kernel we expect faults to occur only to
	 * addresses in user space.  All other faults represent errors in the
	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
	 * erroneous fault occurring in a code path which already holds mmap_sem
	 * we will deadlock attempting to validate the fault against the
	 * address space.  Luckily the kernel only validly references user
	 * space from well defined areas of code, which are listed in the
	 * exceptions table.
	 *
	 * As the vast majority of faults will be valid we will only perform
	 * the source reference check when there is a possibility of a deadlock.
	 * Attempt to lock the address space, if we cannot we then validate the
	 * source.  If this is invalid we can skip the address space check,
	 * thus avoiding the deadlock.
	 */
	if (!down_read_trylock(&mm->mmap_sem)) {
		if (is_kernel_mode &&
		    !search_exception_tables(regs->pc)) {
			vma = NULL;  /* happy compiler */
			goto bad_area_nosemaphore;
		}
		down_read(&mm->mmap_sem);
	}

	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (regs->sp < PAGE_OFFSET) {
		/*
		 * accessing the stack below sp is always a bug.
		 */
		if (address < regs->sp)
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	si_code = SEGV_ACCERR;
	if (fault_num == INT_ITLB_MISS) {
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
	} else if (write) {
#ifdef TEST_VERIFY_AREA
		if (!is_page_fault && regs->cs == KERNEL_CS)
			pr_err("WP fault at "REGFMT"\n", regs->eip);
#endif
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	} else {
		if (!is_page_fault || !(vma->vm_flags & VM_READ))
			goto bad_area;
	}

 survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, write);
	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}
	if (fault & VM_FAULT_MAJOR)
		tsk->maj_flt++;
	else
		tsk->min_flt++;

#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
	/*
	 * If this was an asynchronous fault,
	 * restart the appropriate engine.
	 */
	switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
	case INT_DMATLB_MISS:
	case INT_DMATLB_MISS_DWNCL:
	case INT_DMATLB_ACCESS:
	case INT_DMATLB_ACCESS_DWNCL:
		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
		break;
#endif
#if CHIP_HAS_SN_PROC()
	case INT_SNITLB_MISS:
	case INT_SNITLB_MISS_DWNCL:
		__insn_mtspr(SPR_SNCTL,
			     __insn_mfspr(SPR_SNCTL) &
			     ~SPR_SNCTL__FRZPROC_MASK);
		break;
#endif
	}
#endif

	up_read(&mm->mmap_sem);
	return 1;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (!is_kernel_mode) {
		/*
		 * It's possible to have interrupts off here.
		 */
		local_irq_enable();

		force_sig_info_fault("segfault", SIGSEGV, si_code, address,
				     fault_num, tsk, regs);
		return 0;
	}

no_context:
	/* Are we prepared to handle this kernel fault?  */
	if (fixup_exception(regs))
		return 0;

/*
 * Oops. The kernel tried to access some bad page. We'll have to
 * terminate things with extreme prejudice.
 */

	bust_spinlocks(1);

	/* FIXME: no lookup_address() yet */
#ifdef SUPPORT_LOOKUP_ADDRESS
	if (fault_num == INT_ITLB_MISS) {
		pte_t *pte = lookup_address(address);

		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
			pr_crit("kernel tried to execute"
			       " non-executable page - exploit attempt?"
			       " (uid: %d)\n", current->uid);
	}
#endif
	if (address < PAGE_SIZE)
		pr_alert("Unable to handle kernel NULL pointer dereference\n");
	else
		pr_alert("Unable to handle kernel paging request\n");
	pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n",
		 address, regs->pc);

	show_regs(regs);

	if (unlikely(tsk->pid < 2)) {
		panic("Kernel page fault running %s!",
		      tsk->pid ? "init" : "the idle task");
	}

	/*
	 * More FIXME: we should probably copy the i386 here and
	 * implement a generic die() routine.  Not today.
	 */
#ifdef SUPPORT_DIE
	die("Oops", regs);
#endif
	bust_spinlocks(1);

	do_group_exit(SIGKILL);

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_global_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	pr_alert("VM: killing process %s\n", tsk->comm);
	if (!is_kernel_mode)
		do_group_exit(SIGKILL);
	goto no_context;

do_sigbus:
	up_read(&mm->mmap_sem);

	/* Kernel mode? Handle exceptions or die */
	if (is_kernel_mode)
		goto no_context;

	force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address,
			     fault_num, tsk, regs);
	return 0;
}
Esempio n. 14
0
File: fault.c Progetto: 274914765/C
void do_page_fault(struct pt_regs *regs)
{
    struct vm_area_struct * vma;
    struct mm_struct *mm = current->mm;
    unsigned int exccause = regs->exccause;
    unsigned int address = regs->excvaddr;
    siginfo_t info;

    int is_write, is_exec;
    int fault;

    info.si_code = SEGV_MAPERR;

    /* We fault-in kernel-space virtual memory on-demand. The
     * 'reference' page table is init_mm.pgd.
     */
    if (address >= TASK_SIZE && !user_mode(regs))
        goto vmalloc_fault;

    /* If we're in an interrupt or have no user
     * context, we must not take the fault..
     */
    if (in_atomic() || !mm) {
        bad_page_fault(regs, address, SIGSEGV);
        return;
    }

    is_write = (exccause == EXCCAUSE_STORE_CACHE_ATTRIBUTE) ? 1 : 0;
    is_exec =  (exccause == EXCCAUSE_ITLB_PRIVILEGE ||
            exccause == EXCCAUSE_ITLB_MISS ||
            exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0;

#ifdef DEBUG_PAGE_FAULT
    printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid,
           address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
#endif

    down_read(&mm->mmap_sem);
    vma = find_vma(mm, address);

    if (!vma)
        goto bad_area;
    if (vma->vm_start <= address)
        goto good_area;
    if (!(vma->vm_flags & VM_GROWSDOWN))
        goto bad_area;
    if (expand_stack(vma, address))
        goto bad_area;

    /* Ok, we have a good vm_area for this memory access, so
     * we can handle it..
     */

good_area:
    info.si_code = SEGV_ACCERR;

    if (is_write) {
        if (!(vma->vm_flags & VM_WRITE))
            goto bad_area;
    } else if (is_exec) {
        if (!(vma->vm_flags & VM_EXEC))
            goto bad_area;
    } else    /* Allow read even from write-only pages. */
        if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
            goto bad_area;

    /* If for any reason at all we couldn't handle the fault,
     * make sure we exit gracefully rather than endlessly redo
     * the fault.
     */
survive:
    fault = handle_mm_fault(mm, vma, address, is_write);
    if (unlikely(fault & VM_FAULT_ERROR)) {
        if (fault & VM_FAULT_OOM)
            goto out_of_memory;
        else if (fault & VM_FAULT_SIGBUS)
            goto do_sigbus;
        BUG();
    }
    if (fault & VM_FAULT_MAJOR)
        current->maj_flt++;
    else
        current->min_flt++;

    up_read(&mm->mmap_sem);
    return;

    /* Something tried to access memory that isn't in our memory map..
     * Fix it, but check if it's kernel or user first..
     */
bad_area:
    up_read(&mm->mmap_sem);
    if (user_mode(regs)) {
        current->thread.bad_vaddr = address;
        current->thread.error_code = is_write;
        info.si_signo = SIGSEGV;
        info.si_errno = 0;
        /* info.si_code has been set above */
        info.si_addr = (void *) address;
        force_sig_info(SIGSEGV, &info, current);
        return;
    }
    bad_page_fault(regs, address, SIGSEGV);
    return;


    /* We ran out of memory, or some other thing happened to us that made
     * us unable to handle the page fault gracefully.
     */
out_of_memory:
    up_read(&mm->mmap_sem);
    if (is_global_init(current)) {
        yield();
        down_read(&mm->mmap_sem);
        goto survive;
    }
    printk("VM: killing process %s\n", current->comm);
    if (user_mode(regs))
        do_group_exit(SIGKILL);
    bad_page_fault(regs, address, SIGKILL);
    return;

do_sigbus:
    up_read(&mm->mmap_sem);

    /* Send a sigbus, regardless of whether we were in kernel
     * or user mode.
     */
    current->thread.bad_vaddr = address;
    info.si_code = SIGBUS;
    info.si_errno = 0;
    info.si_code = BUS_ADRERR;
    info.si_addr = (void *) address;
    force_sig_info(SIGBUS, &info, current);

    /* Kernel mode? Handle exceptions or die */
    if (!user_mode(regs))
        bad_page_fault(regs, address, SIGBUS);

vmalloc_fault:
    {
        /* Synchronize this task's top level page-table
         * with the 'reference' page table.
         */
        struct mm_struct *act_mm = current->active_mm;
        int index = pgd_index(address);
        pgd_t *pgd, *pgd_k;
        pmd_t *pmd, *pmd_k;
        pte_t *pte_k;

        if (act_mm == NULL)
            goto bad_page_fault;

        pgd = act_mm->pgd + index;
        pgd_k = init_mm.pgd + index;

        if (!pgd_present(*pgd_k))
            goto bad_page_fault;

        pgd_val(*pgd) = pgd_val(*pgd_k);

        pmd = pmd_offset(pgd, address);
        pmd_k = pmd_offset(pgd_k, address);
        if (!pmd_present(*pmd) || !pmd_present(*pmd_k))
            goto bad_page_fault;

        pmd_val(*pmd) = pmd_val(*pmd_k);
        pte_k = pte_offset_kernel(pmd_k, address);

        if (!pte_present(*pte_k))
            goto bad_page_fault;
        return;
    }
bad_page_fault:
    bad_page_fault(regs, address, SIGKILL);
    return;
}
Esempio n. 15
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
				unsigned long address)
{
	struct vm_area_struct *vma = NULL;
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	int code = SEGV_MAPERR;
	int fault;
	unsigned int flags = 0;

	cause >>= 2;

	/* Restart the instruction */
	regs->ea -= 4;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 */
	if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) {
		if (user_mode(regs))
			goto bad_area_nosemaphore;
		else
			goto vmalloc_fault;
	}

	if (unlikely(address >= TASK_SIZE))
		goto bad_area_nosemaphore;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_atomic() || !mm)
		goto bad_area_nosemaphore;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;

	if (!down_read_trylock(&mm->mmap_sem)) {
		if (!user_mode(regs) && !search_exception_tables(regs->ea))
			goto bad_area_nosemaphore;
		down_read(&mm->mmap_sem);
	}

	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	code = SEGV_ACCERR;

	switch (cause) {
	case EXC_SUPERV_INSN_ACCESS:
		goto bad_area;
	case EXC_SUPERV_DATA_ACCESS:
		goto bad_area;
	case EXC_X_PROTECTION_FAULT:
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
		break;
	case EXC_R_PROTECTION_FAULT:
		if (!(vma->vm_flags & VM_READ))
			goto bad_area;
		break;
	case EXC_W_PROTECTION_FAULT:
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags = FAULT_FLAG_WRITE;
		break;
	}

survive:
	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, flags);
	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGSEGV)
			goto bad_area;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}
	if (fault & VM_FAULT_MAJOR)
		tsk->maj_flt++;
	else
		tsk->min_flt++;

	up_read(&mm->mmap_sem);
	return;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		if (unhandled_signal(current, SIGSEGV) && printk_ratelimit()) {
			pr_info("%s: unhandled page fault (%d) at 0x%08lx, "
				"cause %ld\n", current->comm, SIGSEGV, address, cause);
			show_regs(regs);
		}
		_exception(SIGSEGV, regs, code, address);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault? */
	if (fixup_exception(regs))
		return;

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */
	bust_spinlocks(1);

	pr_alert("Unable to handle kernel %s at virtual address %08lx",
		address < PAGE_SIZE ? "NULL pointer dereference" :
		"paging request", address);
	pr_alert("ea = %08lx, ra = %08lx, cause = %ld\n", regs->ea, regs->ra,
		cause);
	panic("Oops");
	return;

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_global_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

do_sigbus:
	up_read(&mm->mmap_sem);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;

	_exception(SIGBUS, regs, BUS_ADRERR, address);
	return;

vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Do _not_ use "tsk" here. We might be inside
		 * an interrupt in the middle of a task switch..
		 */
		int offset = pgd_index(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

		pgd = pgd_current + offset;
		pgd_k = init_mm.pgd + offset;

		if (!pgd_present(*pgd_k))
			goto no_context;
		set_pgd(pgd, *pgd_k);

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;
		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);
		if (!pmd_present(*pmd_k))
			goto no_context;
		set_pmd(pmd, *pmd_k);

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;

		flush_tlb_one(address);
		return;
	}
}
Esempio n. 16
0
/*
 * If this is a system OOM (not a memcg OOM) and the task selected to be
 * killed is not already running at high (RT) priorities, speed up the
 * recovery by boosting the dying task to the lowest FIFO priority.
 * That helps with the recovery and avoids interfering with RT tasks.
 */
static void boost_dying_task_prio(struct task_struct *p,
				  struct mem_cgroup *mem)
{
	struct sched_param param = { .sched_priority = 1 };

	if (mem)
		return;

	if (!rt_task(p))
		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
}

/*
 * The process p may have detached its own ->mm while exiting or through
 * use_mm(), but one or more of its subthreads may still have a valid
 * pointer.  Return p, or any of its subthreads with a valid ->mm, with
 * task_lock() held.
 */
struct task_struct *find_lock_task_mm(struct task_struct *p)
{
	struct task_struct *t = p;

	do {
		task_lock(t);
		if (likely(t->mm))
			return t;
		task_unlock(t);
	} while_each_thread(p, t);

	return NULL;
}

/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p,
		const struct mem_cgroup *mem, const nodemask_t *nodemask)
{
	if (is_global_init(p))
		return true;
	if (p->flags & PF_KTHREAD)
		return true;

	/* When mem_cgroup_out_of_memory() and p is not member of the group */
	if (mem && !task_in_mem_cgroup(p, mem))
		return true;

	/* p may not have freeable memory in nodemask */
	if (!has_intersects_mems_allowed(p, nodemask))
		return true;

	return false;
}

/**
 * oom_badness - heuristic function to determine which candidate task to kill
 * @p: task struct of which task we should calculate
 * @totalpages: total present RAM allowed for page allocation
 *
 * The heuristic for determining which task to kill is made to be as simple and
 * predictable as possible.  The goal is to return the highest value for the
 * task consuming the most memory to avoid subsequent oom failures.
 */
unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
		      const nodemask_t *nodemask, unsigned long totalpages)
{
	long points;

	if (oom_unkillable_task(p, mem, nodemask))
		return 0;

	p = find_lock_task_mm(p);
	if (!p)
		return 0;

	/*
	 * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN
	 * so the entire heuristic doesn't need to be executed for something
	 * that cannot be killed.
	 */
	if (atomic_read(&p->mm->oom_disable_count)) {
		task_unlock(p);
		return 0;
	}

	/*
	 * The memory controller may have a limit of 0 bytes, so avoid a divide
	 * by zero, if necessary.
	 */
	if (!totalpages)
		totalpages = 1;

	/*
	 * The baseline for the badness score is the proportion of RAM that each
	 * task's rss, pagetable and swap space use.
	 */
	points = get_mm_rss(p->mm) + p->mm->nr_ptes;
	points += get_mm_counter(p->mm, swap_usage);

	points *= 1000;
	points /= totalpages;
	task_unlock(p);

	/*
	 * Root processes get 3% bonus, just like the __vm_enough_memory()
	 * implementation used by LSMs.
	 */
	if (has_capability_noaudit(p, CAP_SYS_ADMIN))
		points -= 30;

	/*
	 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may
	 * either completely disable oom killing or always prefer a certain
	 * task.
	 */
	points += p->signal->oom_score_adj;

	/*
	 * Never return 0 for an eligible task that may be killed since it's
	 * possible that no single user task uses more than 0.1% of memory and
	 * no single admin tasks uses more than 3.0%.
	 */
	if (points <= 0)
		return 1;
	return (points < 1000) ? points : 1000;
}
Esempio n. 17
0
/*
 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
 * segv().
 */
int handle_page_fault(unsigned long address, unsigned long ip,
                      int is_write, int is_user, int *code_out)
{
    struct mm_struct *mm = current->mm;
    struct vm_area_struct *vma;
    pgd_t *pgd;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    int err = -EFAULT;

    *code_out = SEGV_MAPERR;

    /*
     * If the fault was during atomic operation, don't take the fault, just
     * fail.
     */
    if (in_atomic())
        goto out_nosemaphore;

    down_read(&mm->mmap_sem);
    vma = find_vma(mm, address);
    if (!vma)
        goto out;
    else if (vma->vm_start <= address)
        goto good_area;
    else if (!(vma->vm_flags & VM_GROWSDOWN))
        goto out;
    else if (is_user && !ARCH_IS_STACKGROW(address))
        goto out;
    else if (expand_stack(vma, address))
        goto out;

good_area:
    *code_out = SEGV_ACCERR;
    if (is_write && !(vma->vm_flags & VM_WRITE))
        goto out;

    /* Don't require VM_READ|VM_EXEC for write faults! */
    if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
        goto out;

    do {
        int fault;
survive:
        fault = handle_mm_fault(mm, vma, address, is_write);
        if (unlikely(fault & VM_FAULT_ERROR)) {
            if (fault & VM_FAULT_OOM) {
                err = -ENOMEM;
                goto out_of_memory;
            } else if (fault & VM_FAULT_SIGBUS) {
                err = -EACCES;
                goto out;
            }
            BUG();
        }
        if (fault & VM_FAULT_MAJOR)
            current->maj_flt++;
        else
            current->min_flt++;

        pgd = pgd_offset(mm, address);
        pud = pud_offset(pgd, address);
        pmd = pmd_offset(pud, address);
        pte = pte_offset_kernel(pmd, address);
    } while (!pte_present(*pte));
    err = 0;
    /*
     * The below warning was added in place of
     *	pte_mkyoung(); if (is_write) pte_mkdirty();
     * If it's triggered, we'd see normally a hang here (a clean pte is
     * marked read-only to emulate the dirty bit).
     * However, the generic code can mark a PTE writable but clean on a
     * concurrent read fault, triggering this harmlessly. So comment it out.
     */
#if 0
    WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
#endif
    flush_tlb_page(vma, address);
out:
    up_read(&mm->mmap_sem);
out_nosemaphore:
    return err;

    /*
     * We ran out of memory, or some other thing happened to us that made
     * us unable to handle the page fault gracefully.
     */
out_of_memory:
    if (is_global_init(current)) {
        up_read(&mm->mmap_sem);
        yield();
        down_read(&mm->mmap_sem);
        goto survive;
    }
    goto out;
}
Esempio n. 18
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
                              unsigned long address)
{
    struct vm_area_struct *vma = NULL;
    struct task_struct *tsk = current;
    struct mm_struct *mm = tsk->mm;
    const int field = sizeof(unsigned long) * 2;
    siginfo_t info;
    int fault;

    info.si_code = SEGV_MAPERR;

    /*
    * We fault-in kernel-space virtual memory on-demand. The
    * 'reference' page table is init_mm.pgd.
    *
    * NOTE! We MUST NOT take any locks for this case. We may
    * be in an interrupt or a critical region, and should
    * only copy the information from the master page table,
    * nothing more.
    */
    if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
        goto vmalloc_fault;
#ifdef MODULE_START
    if (unlikely(address >= MODULE_START && address < MODULE_END))
        goto vmalloc_fault;
#endif

    /*
    * If we're in an interrupt or have no user
    * context, we must not take the fault..
    */
    if (in_atomic() || !mm)
        goto bad_area_nosemaphore;

    down_read(&mm->mmap_sem);
    vma = find_vma(mm, address);
    if (!vma)
        goto bad_area;
    if (vma->vm_start <= address)
        goto good_area;
    if (!(vma->vm_flags & VM_GROWSDOWN))
        goto bad_area;
    if (expand_stack(vma, address))
        goto bad_area;
    /*
    * Ok, we have a good vm_area for this memory access, so
    * we can handle it..
     */
good_area:
    info.si_code = SEGV_ACCERR;

    if (write) {
        if (!(vma->vm_flags & VM_WRITE))
            goto bad_area;
    } else {
        if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
            goto bad_area;
    }

survive:
    /*
    * If for any reason at all we couldn't handle the fault,
    * make sure we exit gracefully rather than endlessly redo
    * the fault.
    */
    fault = handle_mm_fault(mm, vma, address, write);
    if (unlikely(fault & VM_FAULT_ERROR)) {
        if (fault & VM_FAULT_OOM)
            goto out_of_memory;
        else if (fault & VM_FAULT_SIGSEGV)
            goto bad_area;
        else if (fault & VM_FAULT_SIGBUS)
            goto do_sigbus;
        BUG();
    }
    if (fault & VM_FAULT_MAJOR)
        tsk->maj_flt++;
    else
        tsk->min_flt++;

    up_read(&mm->mmap_sem);
    return;

    /*
    * Something tried to access memory that isn't in our memory map..
    * Fix it, but check if it's kernel or user first..
     */
bad_area:
    up_read(&mm->mmap_sem);

bad_area_nosemaphore:
    /* User mode accesses just cause a SIGSEGV */
    if (user_mode(regs)) {
        tsk->thread.cp0_badvaddr = address;
        tsk->thread.error_code = write;
        info.si_signo = SIGSEGV;
        info.si_errno = 0;
        /* info.si_code has been set above */
        info.si_addr = (void __user *) address;
        force_sig_info(SIGSEGV, &info, tsk);
        return;
    }

no_context:
    /* Are we prepared to handle this kernel fault? */
    if (fixup_exception(regs)) {
        current->thread.cp0_baduaddr = address;
        return;
    }

    /*
    * Oops. The kernel tried to access some bad page. We'll have to
    * terminate things with extreme prejudice.
    */
    bust_spinlocks(1);

    printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
           "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
           0, field, address, field, regs->cp0_epc,
           field, regs->regs[3]);
    die("Oops", regs);

    /*
    * We ran out of memory, or some other thing happened to us that made
    * us unable to handle the page fault gracefully.
    */
out_of_memory:
    up_read(&mm->mmap_sem);
    if (is_global_init(tsk)) {
        yield();
        down_read(&mm->mmap_sem);
        goto survive;
    }
    printk("VM: killing process %s\n", tsk->comm);
    if (user_mode(regs))
        do_group_exit(SIGKILL);
    goto no_context;

do_sigbus:
    up_read(&mm->mmap_sem);
    /* Kernel mode? Handle exceptions or die */
    if (!user_mode(regs))
        goto no_context;
    else
        /*
        * Send a sigbus, regardless of whether we were in kernel
        * or user mode.
        */
        tsk->thread.cp0_badvaddr = address;
    info.si_signo = SIGBUS;
    info.si_errno = 0;
    info.si_code = BUS_ADRERR;
    info.si_addr = (void __user *) address;
    force_sig_info(SIGBUS, &info, tsk);
    return;
vmalloc_fault:
    {
        /*
        * Synchronize this task's top level page-table
        * with the 'reference' page table.
        *
        * Do _not_ use "tsk" here. We might be inside
        * an interrupt in the middle of a task switch..
        */
        int offset = __pgd_offset(address);
        pgd_t *pgd, *pgd_k;
        pud_t *pud, *pud_k;
        pmd_t *pmd, *pmd_k;
        pte_t *pte_k;

        pgd = (pgd_t *) pgd_current + offset;
        pgd_k = init_mm.pgd + offset;

        if (!pgd_present(*pgd_k))
            goto no_context;
        set_pgd(pgd, *pgd_k);

        pud = pud_offset(pgd, address);
        pud_k = pud_offset(pgd_k, address);
        if (!pud_present(*pud_k))
            goto no_context;

        pmd = pmd_offset(pud, address);
        pmd_k = pmd_offset(pud_k, address);
        if (!pmd_present(*pmd_k))
            goto no_context;
        set_pmd(pmd, *pmd_k);

        pte_k = pte_offset_kernel(pmd_k, address);
        if (!pte_present(*pte_k))
            goto no_context;
        return;
    }
}
Esempio n. 19
0
unsigned long __copy_to_user_ll(void __user *to, const void *from,
				unsigned long n)
{
#ifndef CONFIG_X86_WP_WORKS_OK
	if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
			((unsigned long)to) < TASK_SIZE) {
		/*
		 * When we are in an atomic section (see
		 * mm/filemap.c:file_read_actor), return the full
		 * length to take the slow path.
		 */
		if (in_atomic())
			return n;

		/*
		 * CPU does not honor the WP bit when writing
		 * from supervisory mode, and due to preemption or SMP,
		 * the page tables can change at any time.
		 * Do it manually.	Manfred <*****@*****.**>
		 */
		while (n) {
			unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
			unsigned long len = PAGE_SIZE - offset;
			int retval;
			struct page *pg;
			void *maddr;

			if (len > n)
				len = n;

survive:
			down_read(&current->mm->mmap_sem);
			retval = get_user_pages(current, current->mm,
					(unsigned long)to, 1, 1, 0, &pg, NULL);

			if (retval == -ENOMEM && is_global_init(current)) {
				up_read(&current->mm->mmap_sem);
				congestion_wait(BLK_RW_ASYNC, HZ/50);
				goto survive;
			}

			if (retval != 1) {
				up_read(&current->mm->mmap_sem);
				break;
			}

			maddr = kmap_atomic(pg, KM_USER0);
			memcpy(maddr + offset, from, len);
			kunmap_atomic(maddr, KM_USER0);
			set_page_dirty_lock(pg);
			put_page(pg);
			up_read(&current->mm->mmap_sem);

			from += len;
			to += len;
			n -= len;
		}
		return n;
	}
#endif
	if (movsl_is_ok(to, from, n))
		__copy_user(to, from, n);
	else
		n = __copy_user_intel(to, from, n);
	return n;
}
Esempio n. 20
0
static void __oom_kill_process(struct task_struct *victim, const char *message)
{
	struct task_struct *p;
	struct mm_struct *mm;
	bool can_oom_reap = true;

	p = find_lock_task_mm(victim);
	if (!p) {
		put_task_struct(victim);
		return;
	} else if (victim != p) {
		get_task_struct(p);
		put_task_struct(victim);
		victim = p;
	}

	/* Get a reference to safely compare mm after task_unlock(victim) */
	mm = victim->mm;
	mmgrab(mm);

	/* Raise event before sending signal: task reaper must see this */
	count_vm_event(OOM_KILL);
	memcg_memory_event_mm(mm, MEMCG_OOM_KILL);

	/*
	 * We should send SIGKILL before granting access to memory reserves
	 * in order to prevent the OOM victim from depleting the memory
	 * reserves from the user space under its control.
	 */
	do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID);
	mark_oom_victim(victim);
	pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
		message, task_pid_nr(victim), victim->comm,
		K(victim->mm->total_vm),
		K(get_mm_counter(victim->mm, MM_ANONPAGES)),
		K(get_mm_counter(victim->mm, MM_FILEPAGES)),
		K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
	task_unlock(victim);

	/*
	 * Kill all user processes sharing victim->mm in other thread groups, if
	 * any.  They don't get access to memory reserves, though, to avoid
	 * depletion of all memory.  This prevents mm->mmap_sem livelock when an
	 * oom killed thread cannot exit because it requires the semaphore and
	 * its contended by another thread trying to allocate memory itself.
	 * That thread will now get access to memory reserves since it has a
	 * pending fatal signal.
	 */
	rcu_read_lock();
	for_each_process(p) {
		if (!process_shares_mm(p, mm))
			continue;
		if (same_thread_group(p, victim))
			continue;
		if (is_global_init(p)) {
			can_oom_reap = false;
			set_bit(MMF_OOM_SKIP, &mm->flags);
			pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
					task_pid_nr(victim), victim->comm,
					task_pid_nr(p), p->comm);
			continue;
		}
		/*
		 * No use_mm() user needs to read from the userspace so we are
		 * ok to reap it.
		 */
		if (unlikely(p->flags & PF_KTHREAD))
			continue;
		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID);
	}
	rcu_read_unlock();

	if (can_oom_reap)
		wake_oom_reaper(victim);

	mmdrop(mm);
	put_task_struct(victim);
}