Exemplo n.º 1
0
void 
show_registers(struct pt_regs * regs)
{
	/* We either use rdusp() - the USP register, which might not
	   correspond to the current process for all cases we're called,
	   or we use the current->thread.usp, which is not up to date for
	   the current process.  Experience shows we want the USP
	   register.  */
	unsigned long usp = rdusp();

	raw_printk("IRP: %08lx SRP: %08lx DCCR: %08lx USP: %08lx MOF: %08lx\n",
	       regs->irp, regs->srp, regs->dccr, usp, regs->mof );
	raw_printk(" r0: %08lx  r1: %08lx   r2: %08lx  r3: %08lx\n",
	       regs->r0, regs->r1, regs->r2, regs->r3);
	raw_printk(" r4: %08lx  r5: %08lx   r6: %08lx  r7: %08lx\n",
	       regs->r4, regs->r5, regs->r6, regs->r7);
	raw_printk(" r8: %08lx  r9: %08lx  r10: %08lx r11: %08lx\n",
	       regs->r8, regs->r9, regs->r10, regs->r11);
	raw_printk("r12: %08lx r13: %08lx oR10: %08lx  sp: %08lx\n",
	       regs->r12, regs->r13, regs->orig_r10, regs);
	raw_printk("R_MMU_CAUSE: %08lx\n", (unsigned long)*R_MMU_CAUSE);
	raw_printk("Process %s (pid: %d, stackpage=%08lx)\n",
	       current->comm, current->pid, (unsigned long)current);

	/*
         * When in-kernel, we also print out the stack and code at the
         * time of the fault..
         */
        if (! user_mode(regs)) {
	  	int i;

                show_stack(NULL, (unsigned long*)usp);

		/* Dump kernel stack if the previous dump wasn't one.  */
		if (usp != 0)
			show_stack (NULL, NULL);

                raw_printk("\nCode: ");
                if(regs->irp < PAGE_OFFSET)
                        goto bad;

		/* Often enough the value at regs->irp does not point to
		   the interesting instruction, which is most often the
		   _previous_ instruction.  So we dump at an offset large
		   enough that instruction decoding should be in sync at
		   the interesting point, but small enough to fit on a row
		   (sort of).  We point out the regs->irp location in a
		   ksymoops-friendly way by wrapping the byte for that
		   address in parentheses.  */
                for(i = -12; i < 12; i++)
                {
                        unsigned char c;
                        if(__get_user(c, &((unsigned char*)regs->irp)[i])) {
bad:
                                raw_printk(" Bad IP value.");
                                break;
                        }

			if (i == 0)
			  raw_printk("(%02x) ", c);
			else
			  raw_printk("%02x ", c);
                }
		raw_printk("\n");
        }
}
Exemplo n.º 2
0
Arquivo: time.c Projeto: 24hours/linux
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
	unsigned long new_itm;

	if (cpu_is_offline(smp_processor_id())) {
		return IRQ_HANDLED;
	}

	platform_timer_interrupt(irq, dev_id);

	new_itm = local_cpu_data->itm_next;

	if (!time_after(ia64_get_itc(), new_itm))
		printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
		       ia64_get_itc(), new_itm);

	profile_tick(CPU_PROFILING);

	if (paravirt_do_steal_accounting(&new_itm))
		goto skip_process_time_accounting;

	while (1) {
		update_process_times(user_mode(get_irq_regs()));

		new_itm += local_cpu_data->itm_delta;

		if (smp_processor_id() == time_keeper_id)
			xtime_update(1);

		local_cpu_data->itm_next = new_itm;

		if (time_after(new_itm, ia64_get_itc()))
			break;

		/*
		 * Allow IPIs to interrupt the timer loop.
		 */
		local_irq_enable();
		local_irq_disable();
	}

skip_process_time_accounting:

	do {
		/*
		 * If we're too close to the next clock tick for
		 * comfort, we increase the safety margin by
		 * intentionally dropping the next tick(s).  We do NOT
		 * update itm.next because that would force us to call
		 * xtime_update() which in turn would let our clock run
		 * too fast (with the potentially devastating effect
		 * of losing monotony of time).
		 */
		while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
			new_itm += local_cpu_data->itm_delta;
		ia64_set_itm(new_itm);
		/* double check, in case we got hit by a (slow) PMI: */
	} while (time_after_eq(ia64_get_itc(), new_itm));
	return IRQ_HANDLED;
}
static int watchpoint_handler(unsigned long addr, unsigned int esr,
			      struct pt_regs *regs)
{
	int i, step = 0, *kernel_step, access;
	u32 ctrl_reg;
	u64 val, alignment_mask;
	struct perf_event *wp, **slots;
	struct debug_info *debug_info;
	struct arch_hw_breakpoint *info;
	struct arch_hw_breakpoint_ctrl ctrl;

	slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
	debug_info = &current->thread.debug;

	for (i = 0; i < core_num_wrps; ++i) {
		rcu_read_lock();

		wp = slots[i];

		if (wp == NULL)
			goto unlock;

		info = counter_arch_bp(wp);
		/* AArch32 watchpoints are either 4 or 8 bytes aligned. */
		if (is_compat_task()) {
			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
				alignment_mask = 0x7;
			else
				alignment_mask = 0x3;
		} else {
			alignment_mask = 0x7;
		}

		/* Check if the watchpoint value matches. */
		val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
		if (val != (addr & ~alignment_mask))
			goto unlock;

		/* Possible match, check the byte address select to confirm. */
		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
		decode_ctrl_reg(ctrl_reg, &ctrl);
		if (!((1 << (addr & alignment_mask)) & ctrl.len))
			goto unlock;

		/*
		 * Check that the access type matches.
		 * 0 => load, otherwise => store
		 */
		access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
			 HW_BREAKPOINT_R;
		if (!(access & hw_breakpoint_type(wp)))
			goto unlock;

		info->trigger = addr;
		perf_bp_event(wp, regs);

		/* Do we need to handle the stepping? */
		if (!wp->overflow_handler)
			step = 1;

unlock:
		rcu_read_unlock();
	}

	if (!step)
		return 0;

	/*
	 * We always disable EL0 watchpoints because the kernel can
	 * cause these to fire via an unprivileged access.
	 */
	toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0);

	if (user_mode(regs)) {
		debug_info->wps_disabled = 1;

		/* If we're already stepping a breakpoint, just return. */
		if (debug_info->bps_disabled)
			return 0;

		if (test_thread_flag(TIF_SINGLESTEP))
			debug_info->suspended_step = 1;
		else
			user_enable_single_step(current);
	} else {
		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0);
		kernel_step = &__get_cpu_var(stepping_kernel_bp);

		if (*kernel_step != ARM_KERNEL_STEP_NONE)
			return 0;

		if (kernel_active_single_step()) {
			*kernel_step = ARM_KERNEL_STEP_SUSPEND;
		} else {
			*kernel_step = ARM_KERNEL_STEP_ACTIVE;
			kernel_enable_single_step(regs);
		}
	}

	return 0;
}
Exemplo n.º 4
0
/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	struct task_struct *p;
	int trace = 0;
	long nr;

	/*
	 * Do some preliminary argument and permissions checking before we
	 * actually start allocating stuff
	 */
	if (clone_flags & CLONE_NEWUSER) {
		if (clone_flags & CLONE_THREAD)
			return -EINVAL;
		/* hopefully this check will go away when userns support is
		 * complete
		 */
		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
				!capable(CAP_SETGID))
			return -EPERM;
	}

	/*
	 * We hope to recycle these flags after 2.6.26
	 */
	if (unlikely(clone_flags & CLONE_STOPPED)) {
		static int __read_mostly count = 100;

		if (count > 0 && printk_ratelimit()) {
			char comm[TASK_COMM_LEN];

			count--;
			printk(KERN_INFO "fork(): process `%s' used deprecated "
					"clone flags 0x%lx\n",
				get_task_comm(comm, current),
				clone_flags & CLONE_STOPPED);
		}
	}

	/*
	 * When called from kernel_thread, don't do user tracing stuff.
	 */
	if (likely(user_mode(regs)))
		trace = tracehook_prepare_clone(clone_flags);

	p = copy_process(clone_flags, stack_start, regs, stack_size,
			 child_tidptr, NULL, trace);
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	if (!IS_ERR(p)) {
		struct completion vfork;

		trace_sched_process_fork(current, p);

		nr = task_pid_vnr(p);

		if (clone_flags & CLONE_PARENT_SETTID)
			put_user(nr, parent_tidptr);

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
		}

		audit_finish_fork(p);
		tracehook_report_clone(regs, clone_flags, nr, p);

		/*
		 * We set PF_STARTING at creation in case tracing wants to
		 * use this to distinguish a fully live task from one that
		 * hasn't gotten to tracehook_report_clone() yet.  Now we
		 * clear it and set the child going.
		 */
		p->flags &= ~PF_STARTING;

		if (unlikely(clone_flags & CLONE_STOPPED)) {
			/*
			 * We'll start up with an immediate SIGSTOP.
			 */
			sigaddset(&p->pending.signal, SIGSTOP);
			set_tsk_thread_flag(p, TIF_SIGPENDING);
			__set_task_state(p, TASK_STOPPED);
		} else {
			wake_up_new_task(p, clone_flags);
		}

		tracehook_report_clone_complete(trace, regs,
						clone_flags, nr, p);

		if (clone_flags & CLONE_VFORK) {
			freezer_do_not_count();
			wait_for_completion(&vfork);
			freezer_count();
			tracehook_report_vfork_done(p, nr);
		}
	} else {
		nr = PTR_ERR(p);
	}
	return nr;
}
Exemplo n.º 5
0
/*
 * Note that 'init' is a special process: it doesn't get signals it doesn't
 * want to handle. Thus you cannot kill init even with a SIGKILL even by
 * mistake.
 *
 * Note that we go through the signals twice: once to check the signals that
 * the kernel can handle, and then we build all the user-level signal handling
 * stack-frames in one go after that.
 */
static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall)
{
	struct k_sigaction ka;
	siginfo_t info;
	int signr;

#ifdef CONFIG_PREEMPT_RT
	/*
	 * Fully-preemptible kernel does not need interrupts disabled:
	 */
	local_irq_enable();
	preempt_check_resched();
#endif

	/*
	 * We want the common case to go fast, which
	 * is why we may in certain cases get here from
	 * kernel mode. Just return without doing anything
	 * if so.
	 */
	if (!user_mode(regs))
		return 0;

	if (try_to_freeze())
		goto no_signal;

	if (current->ptrace & PT_SINGLESTEP)
		ptrace_cancel_bpt(current);

	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
	if (signr > 0) {
		handle_signal(signr, &ka, &info, oldset, regs, syscall);
		if (current->ptrace & PT_SINGLESTEP)
			ptrace_set_bpt(current);
		return 1;
	}

 no_signal:
	/*
	 * No signal to deliver to the process - restart the syscall.
	 */
	if (syscall) {
		if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) {
			if (thumb_mode(regs)) {
				regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE;
				regs->ARM_pc -= 2;
			} else {
#if defined(CONFIG_AEABI) && !defined(CONFIG_OABI_COMPAT)
				regs->ARM_r7 = __NR_restart_syscall;
				regs->ARM_pc -= 4;
#else
				u32 __user *usp;
				u32 swival = __NR_restart_syscall;

				regs->ARM_sp -= 12;
				usp = (u32 __user *)regs->ARM_sp;

				/*
				 * Either we supports OABI only, or we have
				 * EABI with the OABI compat layer enabled.
				 * In the later case we don't know if user
				 * space is EABI or not, and if not we must
				 * not clobber r7.  Always using the OABI
				 * syscall solves that issue and works for
				 * all those cases.
				 */
				swival = swival - __NR_SYSCALL_BASE + __NR_OABI_SYSCALL_BASE;

				put_user(regs->ARM_pc, &usp[0]);
				/* swi __NR_restart_syscall */
				put_user(0xef000000 | swival, &usp[1]);
				/* ldr	pc, [sp], #12 */
				put_user(0xe49df00c, &usp[2]);

				flush_icache_range((unsigned long)usp,
						   (unsigned long)(usp + 3));

				regs->ARM_pc = regs->ARM_sp + 4;
#endif
			}
		}
		if (regs->ARM_r0 == -ERESTARTNOHAND ||
		    regs->ARM_r0 == -ERESTARTSYS ||
		    regs->ARM_r0 == -ERESTARTNOINTR) {
			restart_syscall(regs);
		}
	}
	if (current->ptrace & PT_SINGLESTEP)
		ptrace_set_bpt(current);
	return 0;
}
Exemplo n.º 6
0
/*
 * Canonical page fault handler
 */
void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
{
	struct vm_area_struct *vma;
	struct mm_struct *mm = current->mm;
	siginfo_t info;
	int si_code = SEGV_MAPERR;
	int fault;
	const struct exception_table_entry *fixup;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
				 (cause > 0 ? FAULT_FLAG_WRITE : 0);

	/*
	 * If we're in an interrupt or have no user context,
	 * then must not take the fault.
	 */
	if (unlikely(in_interrupt() || !mm))
		goto no_context;

	local_irq_enable();

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;

	if (vma->vm_start <= address)
		goto good_area;

	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;

	if (expand_stack(vma, address))
		goto bad_area;

good_area:
	/* Address space is OK.  Now check access rights. */
	si_code = SEGV_ACCERR;

	switch (cause) {
	case FLT_IFETCH:
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
		break;
	case FLT_LOAD:
		if (!(vma->vm_flags & VM_READ))
			goto bad_area;
		break;
	case FLT_STORE:
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		break;
	}

	fault = handle_mm_fault(mm, vma, address, flags);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return;

	/* The most common case -- we are done. */
	if (likely(!(fault & VM_FAULT_ERROR))) {
		if (flags & FAULT_FLAG_ALLOW_RETRY) {
			if (fault & VM_FAULT_MAJOR)
				current->maj_flt++;
			else
				current->min_flt++;
			if (fault & VM_FAULT_RETRY) {
				flags &= ~FAULT_FLAG_ALLOW_RETRY;
				flags |= FAULT_FLAG_TRIED;
				goto retry;
			}
		}

		up_read(&mm->mmap_sem);
		return;
	}

	up_read(&mm->mmap_sem);

	/* Handle copyin/out exception cases */
	if (!user_mode(regs))
		goto no_context;

	if (fault & VM_FAULT_OOM) {
		pagefault_out_of_memory();
		return;
	}

	/* User-mode address is in the memory map, but we are
	 * unable to fix up the page fault.
	 */
	if (fault & VM_FAULT_SIGBUS) {
		info.si_signo = SIGBUS;
		info.si_code = BUS_ADRERR;
	}
	/* Address is not in the memory map */
	else {
		info.si_signo = SIGSEGV;
		info.si_code = SEGV_ACCERR;
	}
	info.si_errno = 0;
	info.si_addr = (void __user *)address;
	force_sig_info(info.si_code, &info, current);
	return;

bad_area:
	up_read(&mm->mmap_sem);

	if (user_mode(regs)) {
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		info.si_code = si_code;
		info.si_addr = (void *)address;
		force_sig_info(SIGSEGV, &info, current);
		return;
	}
	/* Kernel-mode fault falls through */

no_context:
	fixup = search_exception_tables(pt_elr(regs));
	if (fixup) {
		pt_set_elr(regs, fixup->fixup);
		return;
	}

	/* Things are looking very, very bad now */
	bust_spinlocks(1);
	printk(KERN_EMERG "Unable to handle kernel paging request at "
		"virtual address 0x%08lx, regs %p\n", address, regs);
	die("Bad Kernel VA", regs, SIGKILL);
}
Exemplo n.º 7
0
asmlinkage void
do_page_fault(unsigned long address, struct pt_regs *regs,
	      int protection, int writeaccess)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct * vma;
	siginfo_t info;
	int fault;

	D(printk(KERN_DEBUG
		 "Page fault for %lX on %X at %lX, prot %d write %d\n",
		 address, smp_processor_id(), instruction_pointer(regs),
		 protection, writeaccess));

	tsk = current;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * NOTE2: This is done so that, when updating the vmalloc
	 * mappings we don't have to walk all processes pgdirs and
	 * add the high mappings all at once. Instead we do it as they
	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
	 * bit set so sometimes the TLB can use a lingering entry.
	 *
	 * This verifies that the fault happens in kernel space
	 * and that the fault was not a protection error (error_code & 1).
	 */

	if (address >= VMALLOC_START &&
	    !protection &&
	    !user_mode(regs))
		goto vmalloc_fault;

	/* When stack execution is not allowed we store the signal
	 * trampolines in the reserved cris_signal_return_page.
	 * Handle this in the exact same way as vmalloc (we know
	 * that the mapping is there and is valid so no need to
	 * call handle_mm_fault).
	 */
	if (cris_signal_return_page &&
	    address == cris_signal_return_page &&
	    !protection && user_mode(regs))
		goto vmalloc_fault;

	/* we can and should enable interrupts at this point */
	local_irq_enable();

	mm = tsk->mm;
	info.si_code = SEGV_MAPERR;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */

	if (in_interrupt() || !mm)
		goto no_context;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (user_mode(regs)) {
		/*
		 * accessing the stack below usp is always a bug.
		 * we get page-aligned addresses so we can only check
		 * if we're within a page from usp, but that might be
		 * enough to catch brutal errors at least.
		 */
		if (address + PAGE_SIZE < rdusp())
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */

 good_area:
	info.si_code = SEGV_ACCERR;

	/* first do some preliminary protection checks */

	if (writeaccess == 2){
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
	} else if (writeaccess == 1) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto bad_area;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */

	fault = handle_mm_fault(mm, vma, address, writeaccess & 1);
	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}
	if (fault & VM_FAULT_MAJOR)
		tsk->maj_flt++;
	else
		tsk->min_flt++;

	up_read(&mm->mmap_sem);
	return;

	/*
	 * Something tried to access memory that isn't in our memory map..
	 * Fix it, but check if it's kernel or user first..
	 */

 bad_area:
	up_read(&mm->mmap_sem);

 bad_area_nosemaphore:
	DPG(show_registers(regs));

	/* User mode accesses just cause a SIGSEGV */

	if (user_mode(regs)) {
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void *)address;
		force_sig_info(SIGSEGV, &info, tsk);
		printk(KERN_NOTICE "%s (pid %d) segfaults for page "
		       "address %08lx at pc %08lx\n",
		       tsk->comm, tsk->pid, address, instruction_pointer(regs));
		return;
	}

 no_context:

	/* Are we prepared to handle this kernel fault?
	 *
	 * (The kernel has valid exception-points in the source
	 *  when it acesses user-memory. When it fails in one
	 *  of those points, we find it in a table and do a jump
	 *  to some fixup code that loads an appropriate error
	 *  code)
	 */

	if (find_fixup_code(regs))
		return;

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */

	if (!oops_in_progress) {
		oops_in_progress = 1;
		if ((unsigned long) (address) < PAGE_SIZE)
			printk(KERN_ALERT "Unable to handle kernel NULL "
				"pointer dereference");
		else
			printk(KERN_ALERT "Unable to handle kernel access"
				" at virtual address %08lx\n", address);

		die_if_kernel("Oops", regs, (writeaccess << 1) | protection);
		oops_in_progress = 0;
	}

	do_exit(SIGKILL);

	/*
	 * We ran out of memory, or some other thing happened to us that made
	 * us unable to handle the page fault gracefully.
	 */

 out_of_memory:
	up_read(&mm->mmap_sem);
	printk("VM: killing process %s\n", tsk->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);
	goto no_context;

 do_sigbus:
	up_read(&mm->mmap_sem);

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void *)address;
	force_sig_info(SIGBUS, &info, tsk);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	return;

vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Use current_pgd instead of tsk->active_mm->pgd
		 * since the latter might be unavailable if this
		 * code is executed in a misfortunately run irq
		 * (like inside schedule() between switch_mm and
		 *  switch_to...).
		 */

		int offset = pgd_index(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

		pgd = (pgd_t *)per_cpu(current_pgd, smp_processor_id()) + offset;
		pgd_k = init_mm.pgd + offset;

		/* Since we're two-level, we don't need to do both
		 * set_pgd and set_pmd (they do the same thing). If
		 * we go three-level at some point, do the right thing
		 * with pgd_present and set_pgd here.
		 *
		 * Also, since the vmalloc area is global, we don't
		 * need to copy individual PTE's, it is enough to
		 * copy the pgd pointer into the pte page of the
		 * root task. If that is there, we'll find our pte if
		 * it exists.
		 */

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);

		if (!pmd_present(*pmd_k))
			goto bad_area_nosemaphore;

		set_pmd(pmd, *pmd_k);

		/* Make sure the actual PTE exists as well to
		 * catch kernel vmalloc-area accesses to non-mapped
		 * addresses. If we don't do this, this will just
		 * silently loop forever.
		 */

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;

		return;
	}
}
Exemplo n.º 8
0
/*
 * This routine handles page faults.  It determines the problem, and
 * then passes it off to one of the appropriate routines.
 *
 * error_code:
 *	bit 0 == 0 means no page found, 1 means protection fault
 *	bit 1 == 0 means read, 1 means write
 *
 * If this routine detects a bad access, it returns 1, otherwise it
 * returns 0.
 */
int do_page_fault(struct pt_regs *regs, unsigned long address,
			      unsigned long error_code)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct * vma;
	int write, fault;

#ifdef DEBUG
	printk ("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
		regs->sr, regs->pc, address, error_code,
		current->mm->pgd);
#endif

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */
	if (in_interrupt() || !mm)
		goto no_context;

	down(&mm->mmap_sem);

	vma = find_vma(mm, address);
	if (!vma)
		goto map_err;
	if (vma->vm_flags & VM_IO)
		goto acc_err;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto map_err;
	if (user_mode(regs)) {
		/* Accessing the stack below usp is always a bug.  The
		   "+ 256" is there due to some instructions doing
		   pre-decrement on the stack and that doesn't show up
		   until later.  */
		if (address + 256 < rdusp())
			goto map_err;
	}
	if (expand_stack(vma, address))
		goto map_err;

/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
#ifdef DEBUG
	printk("do_page_fault: good_area\n");
#endif
	write = 0;
	switch (error_code & 3) {
		default:	/* 3: write, present */
			/* fall through */
		case 2:		/* write, not present */
			if (!(vma->vm_flags & VM_WRITE))
				goto acc_err;
			write++;
			break;
		case 1:		/* read, present */
			goto acc_err;
		case 0:		/* read, not present */
			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
				goto acc_err;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, write);
#ifdef DEBUG
 	printk("handle_mm_fault returns %d\n",fault);
#endif
	if (fault < 0)
		goto out_of_memory;
	if (!fault)
		goto bus_err;

	/* There seems to be a missing invalidate somewhere in do_no_page.
	 * Until I found it, this one cures the problem and makes
	 * 1.2 run on the 68040 (Martin Apel).
	 */
	#warning should be obsolete now...
	if (CPU_IS_040_OR_060)
		flush_tlb_page(vma, address);
	up(&mm->mmap_sem);
	return 0;

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	printk("VM: killing process %s\n", current->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);

no_context:
	current->thread.signo = SIGBUS;
	current->thread.faddr = address;
	return send_fault_sig(regs);

bus_err:
	current->thread.signo = SIGBUS;
	current->thread.code = BUS_ADRERR;
	current->thread.faddr = address;
	goto send_sig;

map_err:
	current->thread.signo = SIGSEGV;
	current->thread.code = SEGV_MAPERR;
	current->thread.faddr = address;
	goto send_sig;

acc_err:
	current->thread.signo = SIGSEGV;
	current->thread.code = SEGV_ACCERR;
	current->thread.faddr = address;

send_sig:
	up(&mm->mmap_sem);
	return send_fault_sig(regs);
}
Exemplo n.º 9
0
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	struct task_struct *p;
	int trace = 0;
	long nr;

	if (clone_flags & CLONE_NEWUSER) {
		if (clone_flags & CLONE_THREAD)
			return -EINVAL;
		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
				!capable(CAP_SETGID))
			return -EPERM;
	}

	if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
		if (clone_flags & CLONE_VFORK)
			trace = PTRACE_EVENT_VFORK;
		else if ((clone_flags & CSIGNAL) != SIGCHLD)
			trace = PTRACE_EVENT_CLONE;
		else
			trace = PTRACE_EVENT_FORK;

		if (likely(!ptrace_event_enabled(current, trace)))
			trace = 0;
	}

	p = copy_process(clone_flags, stack_start, regs, stack_size,
			 child_tidptr, NULL, trace);
	if (!IS_ERR(p)) {
		struct completion vfork;

		trace_sched_process_fork(current, p);
		atomic_notifier_call_chain(&task_fork_notifier, 0, p);

		nr = task_pid_vnr(p);

		if (clone_flags & CLONE_PARENT_SETTID)
			put_user(nr, parent_tidptr);

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
			get_task_struct(p);
		}

		wake_up_new_task(p);

		
		if (unlikely(trace))
			ptrace_event(trace, nr);

		if (clone_flags & CLONE_VFORK) {
			if (!wait_for_vfork_done(p, &vfork))
				ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
		}
	} else {
		nr = PTR_ERR(p);
	}
	return nr;
}
Exemplo n.º 10
0
/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	struct task_struct *p;
	int trace = 0;
	long nr;

	/*
	 * Do some preliminary argument and permissions checking before we
	 * actually start allocating stuff
	 */
	if (clone_flags & CLONE_NEWUSER) {
		if (clone_flags & CLONE_THREAD)
			return -EINVAL;
		/* hopefully this check will go away when userns support is
		 * complete
		 */
		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
				!capable(CAP_SETGID))
			return -EPERM;
	}

	/*
	 * Determine whether and which event to report to ptracer.  When
	 * called from kernel_thread or CLONE_UNTRACED is explicitly
	 * requested, no event is reported; otherwise, report if the event
	 * for the type of forking is enabled.
	 */
	if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
		if (clone_flags & CLONE_VFORK)
			trace = PTRACE_EVENT_VFORK;
		else if ((clone_flags & CSIGNAL) != SIGCHLD)
			trace = PTRACE_EVENT_CLONE;
		else
			trace = PTRACE_EVENT_FORK;

		if (likely(!ptrace_event_enabled(current, trace)))
			trace = 0;
	}

	p = copy_process(clone_flags, stack_start, regs, stack_size,
			 child_tidptr, NULL, trace);
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	if (!IS_ERR(p)) {
		struct completion vfork;

		trace_sched_process_fork(current, p);

		nr = task_pid_vnr(p);

		if (clone_flags & CLONE_PARENT_SETTID)
			put_user(nr, parent_tidptr);

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
			get_task_struct(p);
		}

		wake_up_new_task(p);

		/* forking complete and child started to run, tell ptracer */
		if (unlikely(trace))
			ptrace_event(trace, nr);

		if (clone_flags & CLONE_VFORK) {
			if (!wait_for_vfork_done(p, &vfork))
				ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
		}
	} else {
		nr = PTR_ERR(p);
	}
	return nr;
}
Exemplo n.º 11
0
void do_page_fault(struct pt_regs *regs, unsigned long code,
			      unsigned long address)
{
	struct vm_area_struct * vma;
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	const struct exception_table_entry *fix;
	unsigned long acc_type;

	if (in_interrupt() || !mm)
		goto no_context;

	down_read(&mm->mmap_sem);
	vma = pa_find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (address < vma->vm_end)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSUP) || expand_stackup(vma, address))
		goto bad_area;
/*
 * Ok, we have a good vm_area for this memory access. We still need to
 * check the access permissions.
 */

good_area:

	acc_type = parisc_acctyp(code,regs->iir);

	if ((vma->vm_flags & acc_type) != acc_type)
		goto bad_area;

	/*
	 * If for any reason at all we couldn't handle the fault, make
	 * sure we exit gracefully rather than endlessly redo the
	 * fault.
	 */

	switch (handle_mm_fault(mm, vma, address, (acc_type & VM_WRITE) != 0)) {
	      case 1:
		++current->min_flt;
		break;
	      case 2:
		++current->maj_flt;
		break;
	      case 0:
		/*
		 * We ran out of memory, or some other thing happened
		 * to us that made us unable to handle the page fault
		 * gracefully.
		 */
		goto bad_area;
	      default:
		goto out_of_memory;
	}
	up_read(&mm->mmap_sem);
	return;

/*
 * Something tried to access memory that isn't in our memory map..
 */
bad_area:
	up_read(&mm->mmap_sem);

	if (user_mode(regs)) {
		struct siginfo si;

		printk("\ndo_page_fault() pid=%d command='%s'\n",
		    tsk->pid, tsk->comm);
		show_regs(regs);
		/* FIXME: actually we need to get the signo and code correct */
		si.si_signo = SIGSEGV;
		si.si_errno = 0;
		si.si_code = SEGV_MAPERR;
		si.si_addr = (void *) address;
		force_sig_info(SIGSEGV, &si, current);
		return;
	}

no_context:

	if (!user_mode(regs)) {

		fix = search_exception_table(regs->iaoq[0]);

		if (fix) {

			if (fix->skip & 1) 
				regs->gr[8] = -EFAULT;
			if (fix->skip & 2)
				regs->gr[9] = 0;

			regs->iaoq[0] += ((fix->skip) & ~3);

			/*
			 * NOTE: In some cases the faulting instruction
			 * may be in the delay slot of a branch. We
			 * don't want to take the branch, so we don't
			 * increment iaoq[1], instead we set it to be
			 * iaoq[0]+4, and clear the B bit in the PSW
			 */

			regs->iaoq[1] = regs->iaoq[0] + 4;
			regs->gr[0] &= ~PSW_B; /* IPSW in gr[0] */

			return;
		}
	}

	parisc_terminate("Bad Address (null pointer deref?)",regs,code,address);

  out_of_memory:
	up_read(&mm->mmap_sem);
	printk("VM: killing process %s\n", current->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);
	goto no_context;
}
Exemplo n.º 12
0
/*
 * Note that 'init' is a special process: it doesn't get signals it doesn't
 * want to handle. Thus you cannot kill init even with a SIGKILL even by
 * mistake.
 *
 * Note that we go through the signals twice: once to check the signals that
 * the kernel can handle, and then we build all the user-level signal handling
 * stack-frames in one go after that.
 */
int do_signal(struct pt_regs *regs, sigset_t *oldset)
{
	siginfo_t info;
	struct k_sigaction *ka;

	/*
	 * We want the common case to go fast, which
	 * is why we may in certain cases get here from
	 * kernel mode. Just return without doing anything
	 * if so.
	 */
	if (!user_mode(regs))
		return 1;

	if (!oldset)
		oldset = &current->blocked;

	for (;;) {
		unsigned long signr;

		spin_lock_irq(&current->sigmask_lock);
		signr = dequeue_signal(&current->blocked, &info);
		spin_unlock_irq(&current->sigmask_lock);

		if (!signr)
			break;

		if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
			/* Let the debugger run.  */
			current->exit_code = signr;
			current->state = TASK_STOPPED;
			notify_parent(current, SIGCHLD);
			schedule();

			/* We're back.  Did the debugger cancel the sig?  */
			if (!(signr = current->exit_code))
				continue;
			current->exit_code = 0;

			/* The debugger continued.  Ignore SIGSTOP.  */
			if (signr == SIGSTOP)
				continue;

			/* Update the siginfo structure.  Is this good?  */
			if (signr != info.si_signo) {
				info.si_signo = signr;
				info.si_errno = 0;
				info.si_code = SI_USER;
				info.si_pid = current->p_pptr->pid;
				info.si_uid = current->p_pptr->uid;
			}

			/* If the (new) signal is now blocked, requeue it.  */
			if (sigismember(&current->blocked, signr)) {
				send_sig_info(signr, &info, current);
				continue;
			}
		}

		ka = &current->sig->action[signr-1];
		if (ka->sa.sa_handler == SIG_IGN) {
			if (signr != SIGCHLD)
				continue;
			/* Check for SIGCHLD: it's special.  */
			while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
				/* nothing */;
			continue;
		}

		if (ka->sa.sa_handler == SIG_DFL) {
			int exit_code = signr;

			/* Init gets no signals it doesn't want.  */
			if (current->pid == 1)
				continue;

			switch (signr) {
			case SIGCONT: case SIGCHLD: case SIGWINCH:
				continue;

			case SIGTSTP: case SIGTTIN: case SIGTTOU:
				if (is_orphaned_pgrp(current->pgrp))
					continue;
				/* FALLTHRU */

			case SIGSTOP:
				current->state = TASK_STOPPED;
				current->exit_code = signr;
				if (!(current->p_pptr->sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
					notify_parent(current, SIGCHLD);
				schedule();
				continue;

			case SIGQUIT: case SIGILL: case SIGTRAP:
			case SIGABRT: case SIGFPE: case SIGSEGV:
			case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
				if (do_coredump(signr, regs))
					exit_code |= 0x80;
				/* FALLTHRU */

			default:
				sigaddset(&current->pending.signal, signr);
				recalc_sigpending(current);
				current->flags |= PF_SIGNALED;
				do_exit(exit_code);
				/* NOTREACHED */
			}
		}

		/* Whee!  Actually deliver the signal.  */
		handle_signal(signr, ka, &info, oldset, regs);
		return 1;
	}

	/* Did we come from a system call? */
	if (PT_REGS_SYSCALL (regs)) {
		/* Restart the system call - no handlers present */
		if (regs->gpr[GPR_RVAL] == -ERESTARTNOHAND ||
		    regs->gpr[GPR_RVAL] == -ERESTARTSYS ||
		    regs->gpr[GPR_RVAL] == -ERESTARTNOINTR) {
			regs->gpr[12] = PT_REGS_SYSCALL (regs);
			regs->pc -= 12; /* 4 + 8 for microblaze return offset wierdness */
		}
	}
	return 0;
}
Exemplo n.º 13
0
/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	struct task_struct *p;
	int trace = 0;
	long nr;

	/*
	 * Do some preliminary argument and permissions checking before we
	 * actually start allocating stuff
	 */
	if (clone_flags & CLONE_NEWUSER) {
		if (clone_flags & CLONE_THREAD)
			return -EINVAL;
		/* hopefully this check will go away when userns support is
		 * complete
		 */
		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
				!capable(CAP_SETGID))
			return -EPERM;
	}

	/*
	 * When called from kernel_thread, don't do user tracing stuff.
	 */
	if (likely(user_mode(regs)))
		trace = tracehook_prepare_clone(clone_flags);

	p = copy_process(clone_flags, stack_start, regs, stack_size,
			 child_tidptr, NULL, trace);
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	if (!IS_ERR(p)) {
		struct completion vfork;

		trace_sched_process_fork(current, p);

		nr = task_pid_vnr(p);

		if (clone_flags & CLONE_PARENT_SETTID)
			put_user(nr, parent_tidptr);

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
		}

		audit_finish_fork(p);
		tracehook_report_clone(regs, clone_flags, nr, p);

		/*
		 * We set PF_STARTING at creation in case tracing wants to
		 * use this to distinguish a fully live task from one that
		 * hasn't gotten to tracehook_report_clone() yet.  Now we
		 * clear it and set the child going.
		 */
		p->flags &= ~PF_STARTING;

		wake_up_new_task(p);

		tracehook_report_clone_complete(trace, regs,
						clone_flags, nr, p);

		if (clone_flags & CLONE_VFORK) {
			freezer_do_not_count();
			wait_for_completion(&vfork);
			freezer_count();
			tracehook_report_vfork_done(p, nr);
		}
	} else {
		nr = PTR_ERR(p);
	}
	return nr;
}
Exemplo n.º 14
0
static void timer_notify(struct pt_regs *regs, int cpu)
{
	struct trace_array_cpu *data;
	struct stack_frame frame;
	struct trace_array *tr;
	const void __user *fp;
	int is_user;
	int i;

	if (!regs)
		return;

	tr = sysprof_trace;
	data = tr->data[cpu];
	is_user = user_mode(regs);

	if (!current || current->pid == 0)
		return;

	if (is_user && current->state != TASK_RUNNING)
		return;

	__trace_special(tr, data, 0, 0, current->pid);

	if (!is_user)
		i = trace_kernel(regs, tr, data);
	else
		i = 0;

	/*
	 * Trace user stack if we are not a kernel thread
	 */
	if (current->mm && i < sample_max_depth) {
		regs = (struct pt_regs *)current->thread.sp0 - 1;

		fp = (void __user *)regs->bp;

		__trace_special(tr, data, 2, regs->ip, 0);

		while (i < sample_max_depth) {
			frame.next_fp = NULL;
			frame.return_address = 0;
			if (!copy_stack_frame(fp, &frame))
				break;
			if ((unsigned long)fp < regs->sp)
				break;

			__trace_special(tr, data, 2, frame.return_address,
					(unsigned long)fp);
			fp = frame.next_fp;

			i++;
		}

	}

	/*
	 * Special trace entry if we overflow the max depth:
	 */
	if (i == sample_max_depth)
		__trace_special(tr, data, -1, -1, -1);

	__trace_special(tr, data, 3, current->pid, i);
}
Exemplo n.º 15
0
/*
 * Copy architecture-specific thread state
 */
int copy_thread(unsigned long clone_flags, unsigned long usp,
		unsigned long unused, struct task_struct *p,
		struct pt_regs *regs)
{
	struct thread_info *ti = task_thread_info(p);
	struct hexagon_switch_stack *ss;
	struct pt_regs *childregs;
	asmlinkage void ret_from_fork(void);

	childregs = (struct pt_regs *) (((unsigned long) ti + THREAD_SIZE) -
					sizeof(*childregs));

	memcpy(childregs, regs, sizeof(*childregs));
	ti->regs = childregs;

	/*
	 * Establish kernel stack pointer and initial PC for new thread
	 */
	ss = (struct hexagon_switch_stack *) ((unsigned long) childregs -
						    sizeof(*ss));
	ss->lr = (unsigned long)ret_from_fork;
	p->thread.switch_sp = ss;

	/* If User mode thread, set pt_reg stack pointer as per parameter */
	if (user_mode(childregs)) {
		pt_set_rte_sp(childregs, usp);

		/* Child sees zero return value */
		childregs->r00 = 0;

		/*
		 * The clone syscall has the C signature:
		 * int [r0] clone(int flags [r0],
		 *           void *child_frame [r1],
		 *           void *parent_tid [r2],
		 *           void *child_tid [r3],
		 *           void *thread_control_block [r4]);
		 * ugp is used to provide TLS support.
		 */
		if (clone_flags & CLONE_SETTLS)
			childregs->ugp = childregs->r04;

		/*
		 * Parent sees new pid -- not necessary, not even possible at
		 * this point in the fork process
		 * Might also want to set things like ti->addr_limit
		 */
	} else {
		/*
		 * If kernel thread, resume stack is kernel stack base.
		 * Note that this is pointer arithmetic on pt_regs *
		 */
		pt_set_rte_sp(childregs, (unsigned long)(childregs + 1));
		/*
		 * We need the current thread_info fast path pointer
		 * set up in pt_regs.  The register to be used is
		 * parametric for assembler code, but the mechanism
		 * doesn't drop neatly into C.  Needs to be fixed.
		 */
		childregs->THREADINFO_REG = (unsigned long) ti;
	}

	/*
	 * thread_info pointer is pulled out of task_struct "stack"
	 * field on switch_to.
	 */
	p->stack = (void *)ti;

	return 0;
}
Exemplo n.º 16
0
asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
				 unsigned long esr0)
{
	static DEFINE_SPINLOCK(atomic_op_lock);
	unsigned long x, y, z;
	unsigned long __user *p;
	mm_segment_t oldfs;
	siginfo_t info;
	int ret;

	y = 0;
	z = 0;

	oldfs = get_fs();
	if (!user_mode(__frame))
		set_fs(KERNEL_DS);

	switch (__frame->tbr & TBR_TT) {
		/*              
                                                             
   */
	case TBR_TT_ATOMIC_CMPXCHG32:
		p = (unsigned long __user *) __frame->gr8;
		x = __frame->gr9;
		y = __frame->gr10;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			if (z != x)
				goto done;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				if (z != x)
					goto done2;

				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

		/*              
                                                 
   */
	case TBR_TT_ATOMIC_XCHG32:
		p = (unsigned long __user *) __frame->gr8;
		y = __frame->gr9;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

		/*              
                                                        
   */
	case TBR_TT_ATOMIC_XOR:
		p = (unsigned long __user *) __frame->gr8;
		x = __frame->gr9;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				y = x ^ z;
				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

		/*              
                                                       
   */
	case TBR_TT_ATOMIC_OR:
		p = (unsigned long __user *) __frame->gr8;
		x = __frame->gr9;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				y = x ^ z;
				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

		/*              
                                                        
   */
	case TBR_TT_ATOMIC_AND:
		p = (unsigned long __user *) __frame->gr8;
		x = __frame->gr9;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				y = x & z;
				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

		/*              
                                                     
   */
	case TBR_TT_ATOMIC_SUB:
		p = (unsigned long __user *) __frame->gr8;
		x = __frame->gr9;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				y = z - x;
				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

		/*              
                                                     
   */
	case TBR_TT_ATOMIC_ADD:
		p = (unsigned long __user *) __frame->gr8;
		x = __frame->gr9;

		for (;;) {
			ret = get_user(z, p);
			if (ret < 0)
				goto error;

			spin_lock_irq(&atomic_op_lock);

			if (__get_user(z, p) == 0) {
				y = z + x;
				if (__put_user(y, p) == 0)
					goto done2;
				goto error2;
			}

			spin_unlock_irq(&atomic_op_lock);
		}

	default:
		BUG();
	}

done2:
	spin_unlock_irq(&atomic_op_lock);
done:
	if (!user_mode(__frame))
		set_fs(oldfs);
	__frame->gr5 = z;
	__frame->gr9 = y;
	return;

error2:
	spin_unlock_irq(&atomic_op_lock);
error:
	if (!user_mode(__frame))
		set_fs(oldfs);
	__frame->pc -= 4;

	die_if_kernel("-- Atomic Op Error --\n");

	info.si_signo	= SIGSEGV;
	info.si_code	= SEGV_ACCERR;
	info.si_errno	= 0;
	info.si_addr	= (void __user *) __frame->pc;

	force_sig_info(info.si_signo, &info, current);
}
Exemplo n.º 17
0
/*
 * fork系统调用的主要实现部分,由fork,vfork,clone系列函数(process.c)调用.
 * 不同之处是各个函数调用do_fork传入的clone_flags不同
 * 复制进程, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	struct task_struct *p;
	int trace = 0;
	long nr;

	/*
	 * 在开始分配之前做初步的验证和权限检查
	 */
	if (clone_flags & CLONE_NEWUSER) {
		if (clone_flags & CLONE_THREAD)
			return -EINVAL;
		/* hopefully this check will go away when userns support is
		 * complete
		 */
		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
				!capable(CAP_SETGID))
			return -EPERM;
	}

	/*
	 * 内核进程调用时, 不做user trace
	 */
	if (likely(user_mode(regs)))
		trace = tracehook_prepare_clone(clone_flags);
	/* copy_process函数是do_fork的主要功能实现部分 */
	p = copy_process(clone_flags, stack_start, regs, stack_size,
			 child_tidptr, NULL, trace);
	/*
	 * 唤醒新进程之前要检查task_struct,如果进程很快退出,这个值可能是非法的 
	 */
	if (!IS_ERR(p)) {
		struct completion vfork;

		trace_sched_process_fork(current, p);

		nr = task_pid_vnr(p);

		if (clone_flags & CLONE_PARENT_SETTID)
			put_user(nr, parent_tidptr);

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
		}

		audit_finish_fork(p);
		tracehook_report_clone(regs, clone_flags, nr, p);

		/*
		 * We set PF_STARTING at creation in case tracing wants to
		 * use this to distinguish a fully live task from one that
		 * hasn't gotten to tracehook_report_clone() yet.  Now we
		 * clear it and set the child going.
		 */
		p->flags &= ~PF_STARTING;

		wake_up_new_task(p, clone_flags);

		tracehook_report_clone_complete(trace, regs,
						clone_flags, nr, p);

		if (clone_flags & CLONE_VFORK) {
			freezer_do_not_count();
			wait_for_completion(&vfork);
			freezer_count();
			tracehook_report_vfork_done(p, nr);
		}
	} else {
		nr = PTR_ERR(p);
	}
	return nr;
}
Exemplo n.º 18
0
static void __kprobes
do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
	long error_code, siginfo_t *info)
{
	struct task_struct *tsk = current;

#ifdef CONFIG_X86_32
	if (regs->flags & X86_VM_MASK) {
		/*
		 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
		 * On nmi (interrupt 2), do_trap should not be called.
		 */
		if (trapnr < 6)
			goto vm86_trap;
		goto trap_signal;
	}
#endif

	if (!user_mode(regs))
		goto kernel_trap;

#ifdef CONFIG_X86_32
trap_signal:
#endif
	/*
	 * We want error_code and trap_no set for userspace faults and
	 * kernelspace faults which result in die(), but not
	 * kernelspace faults which are fixed up.  die() gives the
	 * process no chance to handle the signal and notice the
	 * kernel fault information, so that won't result in polluting
	 * the information about previously queued, but not yet
	 * delivered, faults.  See also do_general_protection below.
	 */
	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = trapnr;

#ifdef CONFIG_X86_64
	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
	    printk_ratelimit()) {
		printk(KERN_INFO
		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
		       tsk->comm, tsk->pid, str,
		       regs->ip, regs->sp, error_code);
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}
#endif

	if (info)
		force_sig_info(signr, info, tsk);
	else
		force_sig(signr, tsk);
	return;

kernel_trap:
	if (!fixup_exception(regs)) {
		tsk->thread.error_code = error_code;
		tsk->thread.trap_no = trapnr;
		die(str, regs, error_code);
	}
	return;

#ifdef CONFIG_X86_32
vm86_trap:
	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
						error_code, trapnr))
		goto trap_signal;
	return;
#endif
}
/*
 * We rearm the timer until we get disabled by the idle code.
 * Called with interrupts disabled and timer->base->cpu_base->lock held.
 */
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
{
    struct tick_sched *ts =
        container_of(timer, struct tick_sched, sched_timer);
    struct pt_regs *regs = get_irq_regs();
    ktime_t now = ktime_get();
    int cpu = smp_processor_id();

#ifdef CONFIG_NO_HZ
    /*
     * Check if the do_timer duty was dropped. We don't care about
     * concurrency: This happens only when the cpu in charge went
     * into a long sleep. If two cpus happen to assign themself to
     * this duty, then the jiffies update is still serialized by
     * xtime_lock.
     */
    if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
        tick_do_timer_cpu = cpu;
#endif

    /* Check, if the jiffies need an update */
    if (tick_do_timer_cpu == cpu)
        tick_do_update_jiffies64(now);

    /*
     * Do not call, when we are not in irq context and have
     * no valid regs pointer
     */
    if (regs) {
        /*
         * When we are idle and the tick is stopped, we have to touch
         * the watchdog as we might not schedule for a really long
         * time. This happens on complete idle SMP systems while
         * waiting on the login prompt. We also increment the "start of
         * idle" jiffy stamp so the idle accounting adjustment we do
         * when we go busy again does not account too much ticks.
         */
        if (ts->tick_stopped) {
            touch_softlockup_watchdog();
            ts->idle_jiffies++;
        }
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);

        if ((rq_info.init == 1) && (tick_do_timer_cpu == cpu)) {

            /*
             * update run queue statistics
             */
            update_rq_stats();

            /*
             * wakeup user if needed
             */
            wakeup_user();
        }
    }

    hrtimer_forward(timer, now, tick_period);

    return HRTIMER_RESTART;
}
Exemplo n.º 20
0
/*
 * Our handling of the processor debug registers is non-trivial.
 * We do not clear them on entry and exit from the kernel. Therefore
 * it is possible to get a watchpoint trap here from inside the kernel.
 * However, the code in ./ptrace.c has ensured that the user can
 * only set watchpoints on userspace addresses. Therefore the in-kernel
 * watchpoint trap can only occur in code which is reading/writing
 * from user space. Such code must not hold kernel locks (since it
 * can equally take a page fault), therefore it is safe to call
 * force_sig_info even though that claims and releases locks.
 *
 * Code in ./signal.c ensures that the debug control register
 * is restored before we deliver any signal, and therefore that
 * user code runs with the correct debug control register even though
 * we clear it here.
 *
 * Being careful here means that we don't have to be as careful in a
 * lot of more complicated places (task switching can be a bit lazy
 * about restoring all the debug state, and ptrace doesn't have to
 * find every occurrence of the TF bit that could be saved away even
 * by user code)
 *
 * May run on IST stack.
 */
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
	struct task_struct *tsk = current;
	unsigned long condition;
	int si_code;

	get_debugreg(condition, 6);

	/* Catch kmemcheck conditions first of all! */
	if (condition & DR_STEP && kmemcheck_trap(regs))
		return;

	/*
	 * The processor cleared BTF, so don't mark that we need it set.
	 */
	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
	tsk->thread.debugctlmsr = 0;

#ifdef  CONFIG_KDB
	if (kdb(KDB_REASON_DEBUG, error_code, regs))
		return;
#endif  /* CONFIG_KDB */

	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
						SIGTRAP) == NOTIFY_STOP)
		return;

	/* It's safe to allow irq's after DR6 has been saved */
	preempt_conditional_sti(regs);

	/* Mask out spurious debug traps due to lazy DR7 setting */
	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
		if (!tsk->thread.debugreg7)
			goto clear_dr7;
	}

#ifdef CONFIG_X86_32
	if (regs->flags & X86_VM_MASK)
		goto debug_vm86;
#endif

	/* Save debug status register where ptrace can see it */
	tsk->thread.debugreg6 = condition;

	/*
	 * Single-stepping through TF: make sure we ignore any events in
	 * kernel space (but re-enable TF when returning to user mode).
	 */
	if (condition & DR_STEP) {
		if (!user_mode(regs))
			goto clear_TF_reenable;
	}

	si_code = get_si_code(condition);
	/* Ok, finally something we can handle */
	send_sigtrap(tsk, regs, error_code, si_code);

	/*
	 * Disable additional traps. They'll be re-enabled when
	 * the signal is delivered.
	 */
clear_dr7:
	set_debugreg(0, 7);
	preempt_conditional_cli(regs);
	return;

#ifdef CONFIG_X86_32
debug_vm86:
	/* reenable preemption: handle_vm86_trap() might sleep */
	dec_preempt_count();
	handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
	conditional_cli(regs);
	return;
#endif

clear_TF_reenable:
	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
	regs->flags &= ~X86_EFLAGS_TF;
	preempt_conditional_cli(regs);
	return;
}
Exemplo n.º 21
0
/*
 * timer_interrupt - gets called when the decrementer overflows,
 * with interrupts disabled.
 * We set it up to overflow again in 1/HZ seconds.
 */
void timer_interrupt(struct pt_regs * regs)
{
	int next_dec;
	unsigned long cpu = smp_processor_id();
	unsigned jiffy_stamp = last_jiffy_stamp(cpu);
	extern void do_IRQ(struct pt_regs *);

	if (atomic_read(&ppc_n_lost_interrupts) != 0)
		do_IRQ(regs);
#if 0   /* added by 2.6.17 lttng patch */
 	trace_kernel_trap_entry(regs->trap, instruction_pointer(regs));
#endif
	irq_enter();

	while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) {
		jiffy_stamp += tb_ticks_per_jiffy;
		
		profile_tick(CPU_PROFILING, regs);
		update_process_times(user_mode(regs));

	  	if (smp_processor_id())
			continue;

		/* We are in an interrupt, no need to save/restore flags */
		write_seqlock(&xtime_lock);
		tb_last_stamp = jiffy_stamp;
#ifdef CONFIG_LTT
		ltt_reset_timestamp();
#endif //CONFIG_LTT
		do_timer(regs);

		/*
		 * update the rtc when needed, this should be performed on the
		 * right fraction of a second. Half or full second ?
		 * Full second works on mk48t59 clocks, others need testing.
		 * Note that this update is basically only used through
		 * the adjtimex system calls. Setting the HW clock in
		 * any other way is a /dev/rtc and userland business.
		 * This is still wrong by -0.5/+1.5 jiffies because of the
		 * timer interrupt resolution and possible delay, but here we
		 * hit a quantization limit which can only be solved by higher
		 * resolution timers and decoupling time management from timer
		 * interrupts. This is also wrong on the clocks
		 * which require being written at the half second boundary.
		 * We should have an rtc call that only sets the minutes and
		 * seconds like on Intel to avoid problems with non UTC clocks.
		 */
		if ( ppc_md.set_rtc_time && ntp_synced() &&
		     xtime.tv_sec - last_rtc_update >= 659 &&
		     abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 500000/HZ &&
		     jiffies - wall_jiffies == 1) {
		  	if (ppc_md.set_rtc_time(xtime.tv_sec+1 + timezone_offset) == 0)
				last_rtc_update = xtime.tv_sec+1;
			else
				/* Try again one minute later */
				last_rtc_update += 60;
		}
		write_sequnlock(&xtime_lock);
	}
	if ( !disarm_decr[smp_processor_id()] )
		set_dec(next_dec);
	last_jiffy_stamp(cpu) = jiffy_stamp;

	if (ppc_md.heartbeat && !ppc_md.heartbeat_count--)
		ppc_md.heartbeat();

	irq_exit();
#if 0   /* added by 2.6.17 lttng patch */
 	trace_kernel_trap_exit();
#endif
}
Exemplo n.º 22
0
static int do_grant(struct Client *source_p, struct Client *target_p, const char *new_privset, int deoper)
{
    int dooper = 0, dodeoper = 0;
    struct PrivilegeSet *privset = 0;

    if (deoper)
    {
        if (!IsOper(target_p))
        {
            sendto_one_notice(source_p, ":You can't deoper someone who isn't an oper.");
            return 0;
        }
        new_privset = "default";
        dodeoper = 1;

        sendto_one_notice(target_p, ":%s is deopering you.", source_p->name);
        sendto_realops_snomask(SNO_GENERAL, L_NETWIDE, "%s is deopering %s.",
                               get_oper_name(source_p), target_p->name);
    }
    else
    {
        if (!(privset = privilegeset_get(new_privset)))
        {
            sendto_one_notice(source_p, ":There is no privilege set named '%s'.",
                              new_privset);
            return 0;
        }

        if (privset == target_p->localClient->privset)
        {
            sendto_one_notice(source_p, ":%s already has privilege set %s.",
                              target_p->name, target_p->localClient->privset->name);
            return 0;
        }
    }

    if (!dodeoper)
    {
        if (!IsOper(target_p))
        {
            sendto_one_notice(target_p, ":%s is opering you with privilege set %s",
                              source_p->name, privset->name);
            sendto_realops_snomask(SNO_GENERAL, L_NETWIDE,
                                   "%s is opering %s with privilege set %s",
                                   get_oper_name(source_p), target_p->name,
                                   privset->name);
            dooper = 1;
        }
        else
        {
            sendto_one_notice(target_p, ":%s is changing your privilege set to %s",
                              source_p->name, privset->name);
            sendto_realops_snomask(SNO_GENERAL, L_NETWIDE,
                                   "%s is changing the privilege set of %s to %s",
                                   get_oper_name(source_p), target_p->name,
                                   privset->name);
        }
    }

    if (dodeoper)
    {
        const char *modeparv[4];
        modeparv[0] = modeparv[1] = target_p->name;
        modeparv[2] = "-o";
        modeparv[3] = NULL;
        user_mode(target_p, target_p, 3, modeparv);
    }

    if (dooper)
    {
        struct oper_conf oper;
        memset(&oper, '\0', sizeof(oper));

        oper.name = "<grant>";
        oper.umodes = 0;
        oper.snomask = 0;
        oper.privset = privset;

        oper_up(target_p, &oper);
    }

    target_p->localClient->privset = privset;
    const char *modeparv[4];
    modeparv[0] = modeparv[1] = target_p->name;
    modeparv[2] = "+";
    modeparv[3] = NULL;
    user_mode(target_p, target_p, 3, modeparv);

    return 0;
}
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
				   struct pt_regs *regs)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	int fault, sig, code;
	unsigned long vm_flags = VM_READ | VM_WRITE;
	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	tsk = current;
	mm  = tsk->mm;

	
	if (interrupts_enabled(regs))
		local_irq_enable();

	if (in_atomic() || !mm)
		goto no_context;

	if (user_mode(regs))
		mm_flags |= FAULT_FLAG_USER;

	if (esr & ESR_LNX_EXEC) {
		vm_flags = VM_EXEC;
	} else if (esr & ESR_EL1_WRITE) {
		vm_flags = VM_WRITE;
		mm_flags |= FAULT_FLAG_WRITE;
	}

	if (!down_read_trylock(&mm->mmap_sem)) {
		if (!user_mode(regs) && !search_exception_tables(regs->pc))
			goto no_context;
retry:
		down_read(&mm->mmap_sem);
	} else {
		might_sleep();
#ifdef CONFIG_DEBUG_VM
		if (!user_mode(regs) && !search_exception_tables(regs->pc))
			goto no_context;
#endif
	}

	fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return 0;


	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
	if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR) {
			tsk->maj_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
				      addr);
		} else {
			tsk->min_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
				      addr);
		}
		if (fault & VM_FAULT_RETRY) {
			mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
			goto retry;
		}
	}

	up_read(&mm->mmap_sem);

	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
			      VM_FAULT_BADACCESS))))
		return 0;

	if (!user_mode(regs))
		goto no_context;

	if (fault & VM_FAULT_OOM) {
		pagefault_out_of_memory();
		return 0;
	}

	if (fault & VM_FAULT_SIGBUS) {
		sig = SIGBUS;
		code = BUS_ADRERR;
	} else {
		sig = SIGSEGV;
		code = fault == VM_FAULT_BADACCESS ?
			SEGV_ACCERR : SEGV_MAPERR;
	}

	__do_user_fault(tsk, addr, esr, sig, code, regs);
	return 0;

no_context:
	__do_kernel_fault(mm, addr, esr, regs);
	return 0;
}
Exemplo n.º 24
0
/*
 * Note that `init' is a special process: it doesn't get signals it doesn't want to
 * handle.  Thus you cannot kill init even with a SIGKILL even by mistake.
 */
void
ia64_do_signal (struct sigscratch *scr, long in_syscall)
{
	struct k_sigaction ka;
	sigset_t *oldset;
	siginfo_t info;
	long restart = in_syscall;
	long errno = scr->pt.r8;
#	define ERR_CODE(c)	(IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))

	/*
	 * In the ia64_leave_kernel code path, we want the common case to go fast, which
	 * is why we may in certain cases get here from kernel mode. Just return without
	 * doing anything if so.
	 */
	if (!user_mode(&scr->pt))
		return;

	if (test_thread_flag(TIF_RESTORE_SIGMASK))
		oldset = &current->saved_sigmask;
	else
		oldset = &current->blocked;

	/*
	 * This only loops in the rare cases of handle_signal() failing, in which case we
	 * need to push through a forced SIGSEGV.
	 */
	while (1) {
		int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL);

		/*
		 * get_signal_to_deliver() may have run a debugger (via notify_parent())
		 * and the debugger may have modified the state (e.g., to arrange for an
		 * inferior call), thus it's important to check for restarting _after_
		 * get_signal_to_deliver().
		 */
		if (IS_IA32_PROCESS(&scr->pt)) {
			if (in_syscall) {
				if (errno >= 0)
					restart = 0;
				else
					errno = -errno;
			}
		} else if ((long) scr->pt.r10 != -1)
			/*
			 * A system calls has to be restarted only if one of the error codes
			 * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned.  If r10
			 * isn't -1 then r8 doesn't hold an error code and we don't need to
			 * restart the syscall, so we can clear the "restart" flag here.
			 */
			restart = 0;

		if (signr <= 0)
			break;

		if (unlikely(restart)) {
			switch (errno) {
			      case ERESTART_RESTARTBLOCK:
			      case ERESTARTNOHAND:
				scr->pt.r8 = ERR_CODE(EINTR);
				/* note: scr->pt.r10 is already -1 */
				break;

			      case ERESTARTSYS:
				if ((ka.sa.sa_flags & SA_RESTART) == 0) {
					scr->pt.r8 = ERR_CODE(EINTR);
					/* note: scr->pt.r10 is already -1 */
					break;
				}
			      case ERESTARTNOINTR:
				if (IS_IA32_PROCESS(&scr->pt)) {
					scr->pt.r8 = scr->pt.r1;
					scr->pt.cr_iip -= 2;
				} else
					ia64_decrement_ip(&scr->pt);
				restart = 0; /* don't restart twice if handle_signal() fails... */
			}
		}

		/*
		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
		 * continue to iterate in this loop so we can deliver the SIGSEGV...
		 */
		if (handle_signal(signr, &ka, &info, oldset, scr)) {
			/* a signal was successfully delivered; the saved
			 * sigmask will have been stored in the signal frame,
			 * and will be restored by sigreturn, so we can simply
			 * clear the TIF_RESTORE_SIGMASK flag */
			if (test_thread_flag(TIF_RESTORE_SIGMASK))
				clear_thread_flag(TIF_RESTORE_SIGMASK);
			return;
		}
	}

	/* Did we come from a system call? */
	if (restart) {
		/* Restart the system call - no handlers present */
		if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
		    || errno == ERESTART_RESTARTBLOCK)
		{
			if (IS_IA32_PROCESS(&scr->pt)) {
				scr->pt.r8 = scr->pt.r1;
				scr->pt.cr_iip -= 2;
				if (errno == ERESTART_RESTARTBLOCK)
					scr->pt.r8 = 0;	/* x86 version of __NR_restart_syscall */
			} else {
				/*
				 * Note: the syscall number is in r15 which is saved in
				 * pt_regs so all we need to do here is adjust ip so that
				 * the "break" instruction gets re-executed.
				 */
				ia64_decrement_ip(&scr->pt);
				if (errno == ERESTART_RESTARTBLOCK)
					scr->pt.r15 = __NR_restart_syscall;
			}
		}
	}

	/* if there's no signal to deliver, we just put the saved sigmask
	 * back */
	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
		clear_thread_flag(TIF_RESTORE_SIGMASK);
		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
	}
}
Exemplo n.º 25
0
/*
 * Note that 'init' is a special process: it doesn't get signals it doesn't
 * want to handle. Thus you cannot kill init even with a SIGKILL even by
 * mistake.
 *
 * Note that we go through the signals twice: once to check the signals that
 * the kernel can handle, and then we build all the user-level signal handling
 * stack-frames in one go after that.
 */
int do_signal(struct pt_regs *regs, sigset_t *oldset)
{
	unsigned long retval = 0, continue_addr = 0, restart_addr = 0;
	siginfo_t info;
	int signr;
	struct k_sigaction ka;

	/*
	 * We want the common case to go fast, which
	 * is why we may in certain cases get here from
	 * kernel mode. Just return without doing anything
	 * if so.
	 */
	if (!user_mode(regs))
		return 1;

	if (!oldset)
		oldset = &current->blocked;

	/* Are we from a system call? */
	if (regs->trap == __LC_SVC_OLD_PSW) {
		continue_addr = regs->psw.addr;
		restart_addr = continue_addr - regs->ilc;
		retval = regs->gprs[2];

		/* Prepare for system call restart.  We do this here so that a
		   debugger will see the already changed PSW. */
		if (retval == -ERESTARTNOHAND ||
		    retval == -ERESTARTSYS ||
		    retval == -ERESTARTNOINTR) {
			regs->gprs[2] = regs->orig_gpr2;
			regs->psw.addr = restart_addr;
		} else if (retval == -ERESTART_RESTARTBLOCK) {
			regs->gprs[2] = -EINTR;
		}
	}

	/* Get signal to deliver.  When running under ptrace, at this point
	   the debugger may change all our registers ... */
	signr = get_signal_to_deliver(&info, &ka, regs, NULL);

	/* Depending on the signal settings we may need to revert the
	   decision to restart the system call. */
	if (signr > 0 && regs->psw.addr == restart_addr) {
		if (retval == -ERESTARTNOHAND
		    || (retval == -ERESTARTSYS
			 && !(current->sighand->action[signr-1].sa.sa_flags
			      & SA_RESTART))) {
			regs->gprs[2] = -EINTR;
			regs->psw.addr = continue_addr;
		}
	}

	if (signr > 0) {
		/* Whee!  Actually deliver the signal.  */
#ifdef CONFIG_S390_SUPPORT
		if (test_thread_flag(TIF_31BIT)) {
			extern void handle_signal32(unsigned long sig,
						    struct k_sigaction *ka,
						    siginfo_t *info,
						    sigset_t *oldset,
						    struct pt_regs *regs);
			handle_signal32(signr, &ka, &info, oldset, regs);
			return 1;
	        }
#endif
		handle_signal(signr, &ka, &info, oldset, regs);
		return 1;
	}

	/* Restart a different system call. */
	if (retval == -ERESTART_RESTARTBLOCK
	    && regs->psw.addr == continue_addr) {
		regs->gprs[2] = __NR_restart_syscall;
		set_thread_flag(TIF_RESTART_SVC);
	}
	return 0;
}
Exemplo n.º 26
0
/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
				unsigned long address)
{
	struct vm_area_struct *vma = NULL;
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	const int field = sizeof(unsigned long) * 2;
	unsigned long flags = 0;
	siginfo_t info;
	int fault;

	info.si_code = SEGV_MAPERR;

	/*
	* We fault-in kernel-space virtual memory on-demand. The
	* 'reference' page table is init_mm.pgd.
	*
	* NOTE! We MUST NOT take any locks for this case. We may
	* be in an interrupt or a critical region, and should
	* only copy the information from the master page table,
	* nothing more.
	*/
	if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
		goto vmalloc_fault;
#ifdef MODULE_START
	if (unlikely(address >= MODULE_START && address < MODULE_END))
		goto vmalloc_fault;
#endif

	/*
	* If we're in an interrupt or have no user
	* context, we must not take the fault..
	*/
	if (in_atomic() || !mm)
		goto bad_area_nosemaphore;

	if (user_mode(regs))
		flags |= FAULT_FLAG_USER;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (expand_stack(vma, address))
		goto bad_area;
	/*
	* Ok, we have a good vm_area for this memory access, so
	* we can handle it..
	 */
good_area:
	info.si_code = SEGV_ACCERR;

	if (write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
	} else {
		if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
			goto bad_area;
	}

survive:
	/*
	* If for any reason at all we couldn't handle the fault,
	* make sure we exit gracefully rather than endlessly redo
	* the fault.
	*/
	fault = handle_mm_fault(mm, vma, address, flags);
	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}
	if (fault & VM_FAULT_MAJOR)
		tsk->maj_flt++;
	else
		tsk->min_flt++;

	up_read(&mm->mmap_sem);
	return;

	/*
	* Something tried to access memory that isn't in our memory map..
	* Fix it, but check if it's kernel or user first..
	 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (user_mode(regs)) {
		tsk->thread.cp0_badvaddr = address;
		tsk->thread.error_code = write;
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void __user *) address;
		force_sig_info(SIGSEGV, &info, tsk);
		return;
	}

no_context:
	/* Are we prepared to handle this kernel fault? */
	if (fixup_exception(regs)) {
		current->thread.cp0_baduaddr = address;
		return;
	}

	/*
	* Oops. The kernel tried to access some bad page. We'll have to
	* terminate things with extreme prejudice.
	*/
	bust_spinlocks(1);

	printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
			"virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
			0, field, address, field, regs->cp0_epc,
			field, regs->regs[3]);
	die("Oops", regs);

	/*
	* We ran out of memory, or some other thing happened to us that made
	* us unable to handle the page fault gracefully.
	*/
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_global_init(tsk)) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

do_sigbus:
	up_read(&mm->mmap_sem);
	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	else
	/*
	* Send a sigbus, regardless of whether we were in kernel
	* or user mode.
	*/
	tsk->thread.cp0_badvaddr = address;
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void __user *) address;
	force_sig_info(SIGBUS, &info, tsk);
	return;
vmalloc_fault:
	{
		/*
		* Synchronize this task's top level page-table
		* with the 'reference' page table.
		*
		* Do _not_ use "tsk" here. We might be inside
		* an interrupt in the middle of a task switch..
		*/
		int offset = __pgd_offset(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

		pgd = (pgd_t *) pgd_current + offset;
		pgd_k = init_mm.pgd + offset;

		if (!pgd_present(*pgd_k))
			goto no_context;
		set_pgd(pgd, *pgd_k);

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);
		if (!pmd_present(*pmd_k))
			goto no_context;
		set_pmd(pmd, *pmd_k);

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;
		return;
	}
}
/*
 * Debug exception handlers.
 */
static int breakpoint_handler(unsigned long unused, unsigned int esr,
			      struct pt_regs *regs)
{
	int i, step = 0, *kernel_step;
	u32 ctrl_reg;
	u64 addr, val;
	struct perf_event *bp, **slots;
	struct debug_info *debug_info;
	struct arch_hw_breakpoint_ctrl ctrl;

	slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
	addr = instruction_pointer(regs);
	debug_info = &current->thread.debug;

	for (i = 0; i < core_num_brps; ++i) {
		rcu_read_lock();

		bp = slots[i];

		if (bp == NULL)
			goto unlock;

		/* Check if the breakpoint value matches. */
		val = read_wb_reg(AARCH64_DBG_REG_BVR, i);
		if (val != (addr & ~0x3))
			goto unlock;

		/* Possible match, check the byte address select to confirm. */
		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i);
		decode_ctrl_reg(ctrl_reg, &ctrl);
		if (!((1 << (addr & 0x3)) & ctrl.len))
			goto unlock;

		counter_arch_bp(bp)->trigger = addr;
		perf_bp_event(bp, regs);

		/* Do we need to handle the stepping? */
		if (!bp->overflow_handler)
			step = 1;
unlock:
		rcu_read_unlock();
	}

	if (!step)
		return 0;

	if (user_mode(regs)) {
		debug_info->bps_disabled = 1;
		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0);

		/* If we're already stepping a watchpoint, just return. */
		if (debug_info->wps_disabled)
			return 0;

		if (test_thread_flag(TIF_SINGLESTEP))
			debug_info->suspended_step = 1;
		else
			user_enable_single_step(current);
	} else {
		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0);
		kernel_step = &__get_cpu_var(stepping_kernel_bp);

		if (*kernel_step != ARM_KERNEL_STEP_NONE)
			return 0;

		if (kernel_active_single_step()) {
			*kernel_step = ARM_KERNEL_STEP_SUSPEND;
		} else {
			*kernel_step = ARM_KERNEL_STEP_ACTIVE;
			kernel_enable_single_step(regs);
		}
	}

	return 0;
}
Exemplo n.º 28
0
/**
 * __do_IRQ - original all in one highlevel IRQ handler
 * @irq:	the interrupt number
 *
 * __do_IRQ handles all normal device IRQ's (the special
 * SMP cross-CPU interrupts have their own specific
 * handlers).
 *
 * This is the original x86 implementation which is used for every
 * interrupt type.
 */
fastcall notrace unsigned int __do_IRQ(unsigned int irq)
{
	struct irq_desc *desc = irq_desc + irq;
	struct irqaction *action;
	unsigned int status;

	kstat_this_cpu.irqs[irq]++;
	if (CHECK_IRQ_PER_CPU(desc->status)) {
		irqreturn_t action_ret;

		/*
		 * No locking required for CPU-local interrupts:
		 */
		if (desc->chip->ack)
			desc->chip->ack(irq);
		action_ret = handle_IRQ_event(irq, desc->action);
		desc->chip->end(irq);
		return 1;
	}
	/*
	 * If the task is currently running in user mode, don't
	 * detect soft lockups.  If CONFIG_DETECT_SOFTLOCKUP is not
	 * configured, this should be optimized out.
	 */
	if (user_mode(get_irq_regs()))
		touch_softlockup_watchdog();

	spin_lock(&desc->lock);
	if (desc->chip->ack)
		desc->chip->ack(irq);
	/*
	 * REPLAY is when Linux resends an IRQ that was dropped earlier
	 * WAITING is used by probe to mark irqs that are being tested
	 */
	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
	status |= IRQ_PENDING; /* we _want_ to handle it */

	/*
	 * If the IRQ is disabled for whatever reason, we cannot
	 * use the action we have.
	 */
	action = NULL;
	if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
		action = desc->action;
		status &= ~IRQ_PENDING; /* we commit to handling */
		status |= IRQ_INPROGRESS; /* we are handling it */
	}
	desc->status = status;

	/*
	 * If there is no IRQ handler or it was disabled, exit early.
	 * Since we set PENDING, if another processor is handling
	 * a different instance of this same irq, the other processor
	 * will take care of it.
	 */
	if (unlikely(!action))
		goto out;

	/*
	 * Edge triggered interrupts need to remember
	 * pending events.
	 * This applies to any hw interrupts that allow a second
	 * instance of the same irq to arrive while we are in do_IRQ
	 * or in the handler. But the code here only handles the _second_
	 * instance of the irq, not the third or fourth. So it is mostly
	 * useful for irq hardware that does not mask cleanly in an
	 * SMP environment.
	 */
	for (;;) {
		irqreturn_t action_ret;

		spin_unlock(&desc->lock);

		action_ret = handle_IRQ_event(irq, action);
		if (!noirqdebug)
			note_interrupt(irq, desc, action_ret);

		spin_lock(&desc->lock);
		if (likely(!(desc->status & IRQ_PENDING)))
			break;
		desc->status &= ~IRQ_PENDING;
	}
	desc->status &= ~IRQ_INPROGRESS;

out:
	/*
	 * The ->end() handler has to deal with interrupts which got
	 * disabled while the handler was running.
	 */
	desc->chip->end(irq);
	spin_unlock(&desc->lock);

	return 1;
}
Exemplo n.º 29
0
/*
 * Note that 'init' is a special process: it doesn't get signals it doesn't
 * want to handle. Thus you cannot kill init even with a SIGKILL even by
 * mistake.
 */
static void do_signal(void)
{
	struct k_sigaction ka;
	siginfo_t info;
	sigset_t *oldset;
	int signr;

	/*
	 * We want the common case to go fast, which
	 * is why we may in certain cases get here from
	 * kernel mode. Just return without doing anything
	 * if so.
	 */
	if (!user_mode(__frame))
		return;

	if (try_to_freeze())
		goto no_signal;

	if (test_thread_flag(TIF_RESTORE_SIGMASK))
		oldset = &current->saved_sigmask;
	else
		oldset = &current->blocked;

	signr = get_signal_to_deliver(&info, &ka, __frame, NULL);
	if (signr > 0) {
		if (handle_signal(signr, &info, &ka, oldset) == 0) {
			/* a signal was successfully delivered; the saved
			 * sigmask will have been stored in the signal frame,
			 * and will be restored by sigreturn, so we can simply
			 * clear the TIF_RESTORE_SIGMASK flag */
			if (test_thread_flag(TIF_RESTORE_SIGMASK))
				clear_thread_flag(TIF_RESTORE_SIGMASK);
		}

		return;
	}

no_signal:
	/* Did we come from a system call? */
	if (__frame->syscallno >= 0) {
		/* Restart the system call - no handlers present */
		switch (__frame->gr8) {
		case -ERESTARTNOHAND:
		case -ERESTARTSYS:
		case -ERESTARTNOINTR:
			__frame->gr8 = __frame->orig_gr8;
			__frame->pc -= 4;
			break;

		case -ERESTART_RESTARTBLOCK:
			__frame->gr8 = __NR_restart_syscall;
			__frame->pc -= 4;
			break;
		}
	}

	/* if there's no signal to deliver, we just put the saved sigmask
	 * back */
	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
		clear_thread_flag(TIF_RESTORE_SIGMASK);
		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
	}

} /* end do_signal() */
Exemplo n.º 30
0
asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs)
#endif
{
	/*
	 * We ack quickly, we don't want the irq controller
	 * thinking we're snobs just because some other CPU has
	 * disabled global interrupts (we have already done the
	 * INT_ACK cycles, it's too late to try to pretend to the
	 * controller that we aren't taking the interrupt).
	 *
	 * 0 return value means that this irq is already being
	 * handled by some other CPU. (or is disabled)
	 */
	int cpu = smp_processor_id();
	irq_desc_t *desc = irq_desc + irq;
	struct irqaction * action;
	unsigned int status;

	ILATENCY_INTERRUPT_OVERHEAD_START();

	TRACE_IRQ_ENTRY(irq, !user_mode(regs));

	kstat.irqs[cpu][irq]++;
	spin_lock(&desc->lock);
	desc->handler->ack(irq);
	/*
	   REPLAY is when Linux resends an IRQ that was dropped earlier
	   WAITING is used by probe to mark irqs that are being tested
	   */
	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
	status |= IRQ_PENDING; /* we _want_ to handle it */

	/*
	 * If the IRQ is disabled for whatever reason, we cannot
	 * use the action we have.
	 */
	action = NULL;
	if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
		action = desc->action;
		status &= ~IRQ_PENDING; /* we commit to handling */
		status |= IRQ_INPROGRESS; /* we are handling it */
	}
	desc->status = status;

	/*
	 * If there is no IRQ handler or it was disabled, exit early.
	   Since we set PENDING, if another processor is handling
	   a different instance of this same irq, the other processor
	   will take care of it.
	 */
	if (!action)
		goto out;
	ILATENCY_INTERRUPT_OVERHEAD_STOP();
	/*
	 * Edge triggered interrupts need to remember
	 * pending events.
	 * This applies to any hw interrupts that allow a second
	 * instance of the same irq to arrive while we are in do_IRQ
	 * or in the handler. But the code here only handles the _second_
	 * instance of the irq, not the third or fourth. So it is mostly
	 * useful for irq hardware that does not mask cleanly in an
	 * SMP environment.
	 */
	for (;;) {
		spin_unlock(&desc->lock);
		handle_IRQ_event(irq, regs, action);
		spin_lock(&desc->lock);

		if (!(desc->status & IRQ_PENDING))
			break;
		desc->status &= ~IRQ_PENDING;
	}
	desc->status &= ~IRQ_INPROGRESS;
out:
	/*
	 * The ->end() handler has to deal with interrupts which got
	 * disabled while the handler was running.
	 */
	desc->handler->end(irq);
	spin_unlock(&desc->lock);

        TRACE_IRQ_EXIT();

	if (softirq_pending(cpu))
		do_softirq();
        ILATENCY_RET_FROM_EXCEPTION();
	return 1;
}