Пример #1
0
void
intr_irq_handler(struct trapframe *frame)
{
	struct intr_event *event;
	int i;

	VM_CNT_INC(v_intr);
	i = -1;
	while ((i = arm_get_next_irq(i)) != -1) {
		intrcnt[i]++;
		event = intr_events[i];
		if (intr_event_handle(event, frame) != 0) {
			/* XXX: Log stray IRQs */
			arm_mask_irq(i);
		}
	}
#ifdef HWPMC_HOOKS
	if (pmc_hook && (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN))
		pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, frame);
#endif
}
Пример #2
0
void
undefinedinstruction(struct trapframe *frame)
{
	struct thread *td;
	u_int fault_pc;
	int fault_instruction;
	int fault_code;
	int coprocessor;
	struct undefined_handler *uh;
	int error;
#ifdef VERBOSE_ARM32
	int s;
#endif
	ksiginfo_t ksi;

	/* Enable interrupts if they were enabled before the exception. */
	if (__predict_true(frame->tf_spsr & PSR_I) == 0)
		enable_interrupts(PSR_I);
	if (__predict_true(frame->tf_spsr & PSR_F) == 0)
		enable_interrupts(PSR_F);

	VM_CNT_INC(v_trap);

	fault_pc = frame->tf_pc;

	/*
	 * Get the current thread/proc structure or thread0/proc0 if there is
	 * none.
	 */
	td = curthread == NULL ? &thread0 : curthread;

	coprocessor = 0;
	if ((frame->tf_spsr & PSR_T) == 0) {
		/*
		 * Make sure the program counter is correctly aligned so we
		 * don't take an alignment fault trying to read the opcode.
		 */
		if (__predict_false((fault_pc & 3) != 0)) {
			ksiginfo_init_trap(&ksi);
			ksi.ksi_signo = SIGILL;
			ksi.ksi_code = ILL_ILLADR;
			ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
			trapsignal(td, &ksi);
			userret(td, frame);
			return;
		}

		/*
		 * Should use fuword() here .. but in the interests of
		 * squeezing every bit of speed we will just use ReadWord().
		 * We know the instruction can be read as was just executed
		 * so this will never fail unless the kernel is screwed up
		 * in which case it does not really matter does it ?
		 */

		fault_instruction = *(u_int32_t *)fault_pc;

		/* Check for coprocessor instruction */

		/*
		 * According to the datasheets you only need to look at bit
		 * 27 of the instruction to tell the difference between and
		 * undefined instruction and a coprocessor instruction
		 * following an undefined instruction trap.
		 */

		if (ARM_COPROC_INSN(fault_instruction))
			coprocessor = ARM_COPROC(fault_instruction);
		else {          /* check for special instructions */
			if (ARM_VFP_INSN(fault_instruction))
				coprocessor = COPROC_VFP; /* vfp / simd */
		}
	} else {
#if __ARM_ARCH >= 7
		fault_instruction = *(uint16_t *)fault_pc;
		if (THUMB_32BIT_INSN(fault_instruction)) {
			fault_instruction <<= 16;
			fault_instruction |= *(uint16_t *)(fault_pc + 2);

			/*
			 * Is it a Coprocessor, Advanced SIMD, or
			 * Floating-point instruction.
			 */
			if (THUMB_COPROC_INSN(fault_instruction)) {
				if (THUMB_COPROC_UNDEFINED(fault_instruction)) {
					/* undefined insn */
				} else if (THUMB_VFP_INSN(fault_instruction))
					coprocessor = COPROC_VFP;
				else
					coprocessor =
					    THUMB_COPROC(fault_instruction);
			}
		}
#else
		/*
		 * No support for Thumb-2 on this cpu
		 */
		ksiginfo_init_trap(&ksi);
		ksi.ksi_signo = SIGILL;
		ksi.ksi_code = ILL_ILLADR;
		ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
		trapsignal(td, &ksi);
		userret(td, frame);
		return;
#endif
	}

	if ((frame->tf_spsr & PSR_MODE) == PSR_USR32_MODE) {
		/*
		 * Modify the fault_code to reflect the USR/SVC state at
		 * time of fault.
		 */
		fault_code = FAULT_USER;
		td->td_frame = frame;
	} else
		fault_code = 0;

	/* OK this is were we do something about the instruction. */
	LIST_FOREACH(uh, &undefined_handlers[coprocessor], uh_link)
	    if (uh->uh_handler(fault_pc, fault_instruction, frame,
			       fault_code) == 0)
		    break;

	if (fault_code & FAULT_USER) {
		/* TODO: No support for ptrace from Thumb-2 */
		if ((frame->tf_spsr & PSR_T) == 0 &&
		    fault_instruction == PTRACE_BREAKPOINT) {
			PROC_LOCK(td->td_proc);
			_PHOLD(td->td_proc);
			error = ptrace_clear_single_step(td);
			_PRELE(td->td_proc);
			PROC_UNLOCK(td->td_proc);
			if (error != 0) {
				ksiginfo_init_trap(&ksi);
				ksi.ksi_signo = SIGILL;
				ksi.ksi_code = ILL_ILLOPC;
				ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
				trapsignal(td, &ksi);
			}
			return;
		}
	}

	if (uh == NULL && (fault_code & FAULT_USER)) {
		/* Fault has not been handled */
		ksiginfo_init_trap(&ksi);
		ksi.ksi_signo = SIGILL;
		ksi.ksi_code = ILL_ILLOPC;
		ksi.ksi_addr = (u_int32_t *)(intptr_t) fault_pc;
		trapsignal(td, &ksi);
	}

	if ((fault_code & FAULT_USER) == 0) {
		if (fault_instruction == KERNEL_BREAKPOINT) {
#ifdef KDB
			kdb_trap(T_BREAKPOINT, 0, frame);
#else
			printf("No debugger in kernel.\n");
#endif
			return;
		}
		else
			panic("Undefined instruction in kernel.\n");
	}

	userret(td, frame);
}
Пример #3
0
static void
do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
    struct vmspace *vm2, struct file *fp_procdesc)
{
	struct proc *p1, *pptr;
	int trypid;
	struct filedesc *fd;
	struct filedesc_to_leader *fdtol;
	struct sigacts *newsigacts;

	sx_assert(&proctree_lock, SX_SLOCKED);
	sx_assert(&allproc_lock, SX_XLOCKED);

	p1 = td->td_proc;

	trypid = fork_findpid(fr->fr_flags);

	sx_sunlock(&proctree_lock);

	p2->p_state = PRS_NEW;		/* protect against others */
	p2->p_pid = trypid;
	AUDIT_ARG_PID(p2->p_pid);
	LIST_INSERT_HEAD(&allproc, p2, p_list);
	allproc_gen++;
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	tidhash_add(td2);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	sx_xunlock(&allproc_lock);

	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    __rangeof(struct proc, p_startcopy, p_endcopy));
	pargs_hold(p2->p_args);

	PROC_UNLOCK(p1);

	bzero(&p2->p_startzero,
	    __rangeof(struct proc, p_startzero, p_endzero));

	/* Tell the prison that we exist. */
	prison_proc_hold(p2->p_ucred->cr_prison);

	PROC_UNLOCK(p2);

	/*
	 * Malloc things while we don't hold any locks.
	 */
	if (fr->fr_flags & RFSIGSHARE)
		newsigacts = NULL;
	else
		newsigacts = sigacts_alloc();

	/*
	 * Copy filedesc.
	 */
	if (fr->fr_flags & RFCFDG) {
		fd = fdinit(p1->p_fd, false);
		fdtol = NULL;
	} else if (fr->fr_flags & RFFDG) {
		fd = fdcopy(p1->p_fd);
		fdtol = NULL;
	} else {
		fd = fdshare(p1->p_fd);
		if (p1->p_fdtol == NULL)
			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
			    p1->p_leader);
		if ((fr->fr_flags & RFTHREAD) != 0) {
			/*
			 * Shared file descriptor table, and shared
			 * process leaders.
			 */
			fdtol = p1->p_fdtol;
			FILEDESC_XLOCK(p1->p_fd);
			fdtol->fdl_refcount++;
			FILEDESC_XUNLOCK(p1->p_fd);
		} else {
			/* 
			 * Shared file descriptor table, and different
			 * process leaders.
			 */
			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
			    p1->p_fd, p2);
		}
	}
	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */

	PROC_LOCK(p2);
	PROC_LOCK(p1);

	bzero(&td2->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));

	bcopy(&td->td_startcopy, &td2->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));

	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
	td2->td_sigstk = td->td_sigstk;
	td2->td_flags = TDF_INMEM;
	td2->td_lend_user_pri = PRI_MAX;

#ifdef VIMAGE
	td2->td_vnet = NULL;
	td2->td_vnet_lpush = NULL;
#endif

	/*
	 * Allow the scheduler to initialize the child.
	 */
	thread_lock(td);
	sched_fork(td, td2);
	thread_unlock(td);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 */
	p2->p_flag = P_INMEM;
	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
	p2->p_swtick = ticks;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);

	/*
	 * Whilst the proc lock is held, copy the VM domain data out
	 * using the VM domain method.
	 */
	vm_domain_policy_init(&p2->p_vm_dom_policy);
	vm_domain_policy_localcopy(&p2->p_vm_dom_policy,
	    &p1->p_vm_dom_policy);

	if (fr->fr_flags & RFSIGSHARE) {
		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
	} else {
		sigacts_copy(newsigacts, p1->p_sigacts);
		p2->p_sigacts = newsigacts;
	}

	if (fr->fr_flags & RFTSIGZMB)
	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
	else if (fr->fr_flags & RFLINUXTHPN)
	        p2->p_sigparent = SIGUSR1;
	else
	        p2->p_sigparent = SIGCHLD;

	p2->p_textvp = p1->p_textvp;
	p2->p_fd = fd;
	p2->p_fdtol = fdtol;

	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
		p2->p_flag |= P_PROTECTED;
		p2->p_flag2 |= P2_INHERIT_PROTECTED;
	}

	/*
	 * p_limit is copy-on-write.  Bump its refcount.
	 */
	lim_fork(p1, p2);

	thread_cow_get_proc(td2, p2);

	pstats_fork(p1->p_stats, p2->p_stats);

	PROC_UNLOCK(p1);
	PROC_UNLOCK(p2);

	/* Bump references to the text vnode (for procfs). */
	if (p2->p_textvp)
		vrefact(p2->p_textvp);

	/*
	 * Set up linkage for kernel based threading.
	 */
	if ((fr->fr_flags & RFTHREAD) != 0) {
		mtx_lock(&ppeers_lock);
		p2->p_peers = p1->p_peers;
		p1->p_peers = p2;
		p2->p_leader = p1->p_leader;
		mtx_unlock(&ppeers_lock);
		PROC_LOCK(p1->p_leader);
		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
			PROC_UNLOCK(p1->p_leader);
			/*
			 * The task leader is exiting, so process p1 is
			 * going to be killed shortly.  Since p1 obviously
			 * isn't dead yet, we know that the leader is either
			 * sending SIGKILL's to all the processes in this
			 * task or is sleeping waiting for all the peers to
			 * exit.  We let p1 complete the fork, but we need
			 * to go ahead and kill the new process p2 since
			 * the task leader may not get a chance to send
			 * SIGKILL to it.  We leave it on the list so that
			 * the task leader will wait for this new process
			 * to commit suicide.
			 */
			PROC_LOCK(p2);
			kern_psignal(p2, SIGKILL);
			PROC_UNLOCK(p2);
		} else
			PROC_UNLOCK(p1->p_leader);
	} else {
		p2->p_peers = NULL;
		p2->p_leader = p2;
	}

	sx_xlock(&proctree_lock);
	PGRP_LOCK(p1->p_pgrp);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	/*
	 * Preserve some more flags in subprocess.  P_PROFIL has already
	 * been preserved.
	 */
	p2->p_flag |= p1->p_flag & P_SUGID;
	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
	SESS_LOCK(p1->p_session);
	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		p2->p_flag |= P_CONTROLT;
	SESS_UNLOCK(p1->p_session);
	if (fr->fr_flags & RFPPWAIT)
		p2->p_flag |= P_PPWAIT;

	p2->p_pgrp = p1->p_pgrp;
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	PGRP_UNLOCK(p1->p_pgrp);
	LIST_INIT(&p2->p_children);
	LIST_INIT(&p2->p_orphans);

	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);

	/*
	 * If PF_FORK is set, the child process inherits the
	 * procfs ioctl flags from its parent.
	 */
	if (p1->p_pfsflags & PF_FORK) {
		p2->p_stops = p1->p_stops;
		p2->p_pfsflags = p1->p_pfsflags;
	}

	/*
	 * This begins the section where we must prevent the parent
	 * from being swapped.
	 */
	_PHOLD(p1);
	PROC_UNLOCK(p1);

	/*
	 * Attach the new process to its parent.
	 *
	 * If RFNOWAIT is set, the newly created process becomes a child
	 * of init.  This effectively disassociates the child from the
	 * parent.
	 */
	if ((fr->fr_flags & RFNOWAIT) != 0) {
		pptr = p1->p_reaper;
		p2->p_reaper = pptr;
	} else {
		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
		    p1 : p1->p_reaper;
		pptr = p1;
	}
	p2->p_pptr = pptr;
	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
	LIST_INIT(&p2->p_reaplist);
	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
	if (p2->p_reaper == p1)
		p2->p_reapsubtree = p2->p_pid;
	sx_xunlock(&proctree_lock);

	/* Inform accounting that we have forked. */
	p2->p_acflag = AFORK;
	PROC_UNLOCK(p2);

#ifdef KTRACE
	ktrprocfork(p1, p2);
#endif

	/*
	 * Finish creating the child process.  It will return via a different
	 * execution path later.  (ie: directly into user mode)
	 */
	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);

	if (fr->fr_flags == (RFFDG | RFPROC)) {
		VM_CNT_INC(v_forks);
		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
		VM_CNT_INC(v_vforks);
		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (p1 == &proc0) {
		VM_CNT_INC(v_kthreads);
		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else {
		VM_CNT_INC(v_rforks);
		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	}

	/*
	 * Associate the process descriptor with the process before anything
	 * can happen that might cause that process to need the descriptor.
	 * However, don't do this until after fork(2) can no longer fail.
	 */
	if (fr->fr_flags & RFPROCDESC)
		procdesc_new(p2, fr->fr_pd_flags);

	/*
	 * Both processes are set up, now check if any loadable modules want
	 * to adjust anything.
	 */
	EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags);

	/*
	 * Set the child start time and mark the process as being complete.
	 */
	PROC_LOCK(p2);
	PROC_LOCK(p1);
	microuptime(&p2->p_stats->p_start);
	PROC_SLOCK(p2);
	p2->p_state = PRS_NORMAL;
	PROC_SUNLOCK(p2);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the new process so that any
	 * tracepoints inherited from the parent can be removed. We have to do
	 * this only after p_state is PRS_NORMAL since the fasttrap module will
	 * use pfind() later on.
	 */
	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
		dtrace_fasttrap_fork(p1, p2);
#endif
	/*
	 * Hold the process so that it cannot exit after we make it runnable,
	 * but before we wait for the debugger.
	 */
	_PHOLD(p2);
	if (p1->p_ptevents & PTRACE_FORK) {
		/*
		 * Arrange for debugger to receive the fork event.
		 *
		 * We can report PL_FLAG_FORKED regardless of
		 * P_FOLLOWFORK settings, but it does not make a sense
		 * for runaway child.
		 */
		td->td_dbgflags |= TDB_FORK;
		td->td_dbg_forked = p2->p_pid;
		td2->td_dbgflags |= TDB_STOPATFORK;
	}
	if (fr->fr_flags & RFPPWAIT) {
		td->td_pflags |= TDP_RFPPWAIT;
		td->td_rfppwait_p = p2;
		td->td_dbgflags |= TDB_VFORK;
	}
	PROC_UNLOCK(p2);

	/*
	 * Now can be swapped.
	 */
	_PRELE(p1);
	PROC_UNLOCK(p1);

	/*
	 * Tell any interested parties about the new process.
	 */
	knote_fork(p1->p_klist, p2->p_pid);
	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);

	if (fr->fr_flags & RFPROCDESC) {
		procdesc_finit(p2->p_procdesc, fp_procdesc);
		fdrop(fp_procdesc, td);
	}

	if ((fr->fr_flags & RFSTOPPED) == 0) {
		/*
		 * If RFSTOPPED not requested, make child runnable and
		 * add to run queue.
		 */
		thread_lock(td2);
		TD_SET_CAN_RUN(td2);
		sched_add(td2, SRQ_BORING);
		thread_unlock(td2);
		if (fr->fr_pidp != NULL)
			*fr->fr_pidp = p2->p_pid;
	} else {
		*fr->fr_procp = p2;
	}

	PROC_LOCK(p2);
	/*
	 * Wait until debugger is attached to child.
	 */
	while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0)
		cv_wait(&p2->p_dbgwait, &p2->p_mtx);
	_PRELE(p2);
	racct_proc_fork_done(p2);
	PROC_UNLOCK(p2);
}
Пример #4
0
void
trap(struct trapframe *frame)
{
	struct thread	*td;
	struct proc	*p;
#ifdef KDTRACE_HOOKS
	uint32_t inst;
#endif
	int		sig, type, user;
	u_int		ucode;
	ksiginfo_t	ksi;
	register_t 	fscr;

	VM_CNT_INC(v_trap);

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		return;
	}
#endif

	td = curthread;
	p = td->td_proc;

	type = ucode = frame->exc;
	sig = 0;
	user = frame->srr1 & PSL_PR;

	CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name,
	    trapname(type), user ? "user" : "kernel");

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in its per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 *
	 * If the DTrace kernel module has registered a trap handler,
	 * call it and if it returns non-zero, assume that it has
	 * handled the trap and modified the trap frame so that this
	 * function can return normally.
	 */
	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0)
		return;
#endif

	if (user) {
		td->td_pticks = 0;
		td->td_frame = frame;
		if (td->td_cowgen != p->p_cowgen)
			thread_cow_update(td);

		/* User Mode Traps */
		switch (type) {
		case EXC_RUNMODETRC:
		case EXC_TRC:
			frame->srr1 &= ~PSL_SE;
			sig = SIGTRAP;
			ucode = TRAP_TRACE;
			break;

#if defined(__powerpc64__) && defined(AIM)
		case EXC_ISE:
		case EXC_DSE:
			if (handle_user_slb_spill(&p->p_vmspace->vm_pmap,
			    (type == EXC_ISE) ? frame->srr0 : frame->dar) != 0){
				sig = SIGSEGV;
				ucode = SEGV_MAPERR;
			}
			break;
#endif
		case EXC_DSI:
		case EXC_ISI:
			sig = trap_pfault(frame, 1);
			if (sig == SIGSEGV)
				ucode = SEGV_MAPERR;
			break;

		case EXC_SC:
			syscall(frame);
			break;

		case EXC_FPU:
			KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
			    ("FPU already enabled for thread"));
			enable_fpu(td);
			break;

		case EXC_VEC:
			KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC,
			    ("Altivec already enabled for thread"));
			enable_vec(td);
			break;

		case EXC_VSX:
			KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX,
			    ("VSX already enabled for thread"));
			if (!(td->td_pcb->pcb_flags & PCB_VEC))
				enable_vec(td);
			if (!(td->td_pcb->pcb_flags & PCB_FPU))
				save_fpu(td);
			td->td_pcb->pcb_flags |= PCB_VSX;
			enable_fpu(td);
			break;

		case EXC_FAC:
			fscr = mfspr(SPR_FSCR);
			switch (fscr & FSCR_IC_MASK) {
			case FSCR_IC_HTM:
				CTR0(KTR_TRAP,
				    "Hardware Transactional Memory subsystem disabled");
				sig = SIGILL;
				ucode =	ILL_ILLOPC;
				break;
			case FSCR_IC_DSCR:
				td->td_pcb->pcb_flags |= PCB_CFSCR | PCB_CDSCR;
				fscr |= FSCR_DSCR;
				mtspr(SPR_DSCR, 0);
				break;
			case FSCR_IC_EBB:
				td->td_pcb->pcb_flags |= PCB_CFSCR;
				fscr |= FSCR_EBB;
				mtspr(SPR_EBBHR, 0);
				mtspr(SPR_EBBRR, 0);
				mtspr(SPR_BESCR, 0);
				break;
			case FSCR_IC_TAR:
				td->td_pcb->pcb_flags |= PCB_CFSCR;
				fscr |= FSCR_TAR;
				mtspr(SPR_TAR, 0);
				break;
			case FSCR_IC_LM:
				td->td_pcb->pcb_flags |= PCB_CFSCR;
				fscr |= FSCR_LM;
				mtspr(SPR_LMRR, 0);
				mtspr(SPR_LMSER, 0);
				break;
			default:
				sig = SIGILL;
				ucode =	ILL_ILLOPC;
			}
			mtspr(SPR_FSCR, fscr & ~FSCR_IC_MASK);
			break;
		case EXC_HEA:
			sig = SIGILL;
			ucode =	ILL_ILLOPC;
			break;

		case EXC_VECAST_E:
		case EXC_VECAST_G4:
		case EXC_VECAST_G5:
			/*
			 * We get a VPU assist exception for IEEE mode
			 * vector operations on denormalized floats.
			 * Emulating this is a giant pain, so for now,
			 * just switch off IEEE mode and treat them as
			 * zero.
			 */

			save_vec(td);
			td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ;
			enable_vec(td);
			break;

		case EXC_ALI:
			if (fix_unaligned(td, frame) != 0) {
				sig = SIGBUS;
				ucode = BUS_ADRALN;
			}
			else
				frame->srr0 += 4;
			break;

		case EXC_DEBUG:	/* Single stepping */
			mtspr(SPR_DBSR, mfspr(SPR_DBSR));
			frame->srr1 &= ~PSL_DE;
			frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC);
			sig = SIGTRAP;
			ucode = TRAP_TRACE;
			break;

		case EXC_PGM:
			/* Identify the trap reason */
			if (frame_is_trap_inst(frame)) {
#ifdef KDTRACE_HOOKS
				inst = fuword32((const void *)frame->srr0);
				if (inst == 0x0FFFDDDD &&
				    dtrace_pid_probe_ptr != NULL) {
					(*dtrace_pid_probe_ptr)(frame);
					break;
				}
#endif
 				sig = SIGTRAP;
				ucode = TRAP_BRKPT;
			} else {
				sig = ppc_instr_emulate(frame, td);
				if (sig == SIGILL) {
					if (frame->srr1 & EXC_PGM_PRIV)
						ucode = ILL_PRVOPC;
					else if (frame->srr1 & EXC_PGM_ILLEGAL)
						ucode = ILL_ILLOPC;
				} else if (sig == SIGFPE)
					ucode = FPE_FLTINV;	/* Punt for now, invalid operation. */
			}
			break;

		case EXC_MCHK:
			/*
			 * Note that this may not be recoverable for the user
			 * process, depending on the type of machine check,
			 * but it at least prevents the kernel from dying.
			 */
			sig = SIGBUS;
			ucode = BUS_OBJERR;
			break;

#if defined(__powerpc64__) && defined(AIM)
		case EXC_SOFT_PATCH:
			/*
			 * Point to the instruction that generated the exception to execute it again,
			 * and normalize the register values.
			 */
			frame->srr0 -= 4;
			normalize_inputs();
			break;
#endif

		default:
			trap_fatal(frame);
		}
	} else {
		/* Kernel Mode Traps */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case EXC_PGM:
#ifdef KDTRACE_HOOKS
			if (frame_is_trap_inst(frame)) {
				if (*(uint32_t *)frame->srr0 == EXC_DTRACE) {
					if (dtrace_invop_jump_addr != NULL) {
						dtrace_invop_jump_addr(frame);
						return;
					}
				}
			}
#endif
#ifdef KDB
			if (db_trap_glue(frame))
				return;
#endif
			break;
#if defined(__powerpc64__) && defined(AIM)
		case EXC_DSE:
			if (td->td_pcb->pcb_cpu.aim.usr_vsid != 0 &&
			    (frame->dar & SEGMENT_MASK) == USER_ADDR) {
				__asm __volatile ("slbmte %0, %1" ::
					"r"(td->td_pcb->pcb_cpu.aim.usr_vsid),
					"r"(USER_SLB_SLBE));
				return;
			}
			break;
#endif
		case EXC_DSI:
			if (trap_pfault(frame, 0) == 0)
 				return;
			break;
		case EXC_MCHK:
			if (handle_onfault(frame))
 				return;
			break;
		default:
			break;
		}
		trap_fatal(frame);
	}
Пример #5
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
    int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	off_t foff;
#ifdef INVARIANTS
	off_t blkno0;
#endif
	int bsize, pagesperblock, *freecnt;
	int error, before, after, rbehind, rahead, poff, i;
	int bytecount, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("%s does not support devices", __func__));

	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	foff = IDX_TO_OFF(m[0]->pindex);
	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;

	KASSERT(foff < object->un_pager.vnp.vnp_size,
	    ("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
	KASSERT(count <= sizeof(bp->b_pages),
	    ("%s: requested %d pages", __func__, count));

	/*
	 * The last page has valid blocks.  Invalid part can only
	 * exist at the end of file, and the page is made fully valid
	 * by zeroing in vm_pager_get_pages().
	 */
	if (m[count - 1]->valid != 0 && --count == 0) {
		if (iodone != NULL)
			iodone(arg, m, 1, 0);
		return (VM_PAGER_OK);
	}

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++) {
			VM_CNT_INC(v_vnodein);
			VM_CNT_INC(v_vnodepgsin);
			error = vnode_pager_input_old(object, m[i]);
			if (error)
				break;
		}
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		return (VM_PAGER_ERROR);
	}

	/*
	 * If the file system supports BMAP, but blocksize is smaller
	 * than a page size, then use special small filesystem code.
	 */
	if (pagesperblock == 0) {
		relpbuf(bp, freecnt);
		for (i = 0; i < count; i++) {
			VM_CNT_INC(v_vnodein);
			VM_CNT_INC(v_vnodepgsin);
			error = vnode_pager_input_smlfs(object, m[i]);
			if (error)
				break;
		}
		return (error);
	}

	/*
	 * A sparse file can be encountered only for a single page request,
	 * which may not be preceded by call to vm_pager_haspage().
	 */
	if (bp->b_blkno == -1) {
		KASSERT(count == 1,
		    ("%s: array[%d] request to a sparse file %p", __func__,
		    count, vp));
		relpbuf(bp, freecnt);
		pmap_zero_page(m[0]);
		KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
		    __func__, m[0]));
		VM_OBJECT_WLOCK(object);
		m[0]->valid = VM_PAGE_BITS_ALL;
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	}

#ifdef INVARIANTS
	blkno0 = bp->b_blkno;
#endif
	bp->b_blkno += (foff % bsize) / DEV_BSIZE;

	/* Recalculate blocks available after/before to pages. */
	poff = (foff % bsize) / PAGE_SIZE;
	before *= pagesperblock;
	before += poff;
	after *= pagesperblock;
	after += pagesperblock - (poff + 1);
	if (m[0]->pindex + after >= object->size)
		after = object->size - 1 - m[0]->pindex;
	KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d",
	    __func__, count, after + 1));
	after -= count - 1;

	/* Trim requested rbehind/rahead to possible values. */   
	rbehind = a_rbehind ? *a_rbehind : 0;
	rahead = a_rahead ? *a_rahead : 0;
	rbehind = min(rbehind, before);
	rbehind = min(rbehind, m[0]->pindex);
	rahead = min(rahead, after);
	rahead = min(rahead, object->size - m[count - 1]->pindex);
	/*
	 * Check that total amount of pages fit into buf.  Trim rbehind and
	 * rahead evenly if not.
	 */
	if (rbehind + rahead + count > nitems(bp->b_pages)) {
		int trim, sum;

		trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
		sum = rbehind + rahead;
		if (rbehind == before) {
			/* Roundup rbehind trim to block size. */
			rbehind -= roundup(trim * rbehind / sum, pagesperblock);
			if (rbehind < 0)
				rbehind = 0;
		} else
			rbehind -= trim * rbehind / sum;
		rahead -= trim * rahead / sum;
	}
	KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
	    ("%s: behind %d ahead %d count %d", __func__,
	    rbehind, rahead, count));

	/*
	 * Fill in the bp->b_pages[] array with requested and optional   
	 * read behind or read ahead pages.  Read behind pages are looked
	 * up in a backward direction, down to a first cached page.  Same
	 * for read ahead pages, but there is no need to shift the array
	 * in case of encountering a cached page.
	 */
	i = bp->b_npages = 0;
	if (rbehind) {
		vm_pindex_t startpindex, tpindex;
		vm_page_t p;

		VM_OBJECT_WLOCK(object);
		startpindex = m[0]->pindex - rbehind;
		if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL &&
		    p->pindex >= startpindex)
			startpindex = p->pindex + 1;

		/* tpindex is unsigned; beware of numeric underflow. */
		for (tpindex = m[0]->pindex - 1;
		    tpindex >= startpindex && tpindex < m[0]->pindex;
		    tpindex--, i++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL) {
				/* Shift the array. */
				for (int j = 0; j < i; j++)
					bp->b_pages[j] = bp->b_pages[j + 
					    tpindex + 1 - startpindex]; 
				break;
			}
			bp->b_pages[tpindex - startpindex] = p;
		}

		bp->b_pgbefore = i;
		bp->b_npages += i;
		bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE;
	} else
		bp->b_pgbefore = 0;

	/* Requested pages. */
	for (int j = 0; j < count; j++, i++)
		bp->b_pages[i] = m[j];
	bp->b_npages += count;

	if (rahead) {
		vm_pindex_t endpindex, tpindex;
		vm_page_t p;

		if (!VM_OBJECT_WOWNED(object))
			VM_OBJECT_WLOCK(object);
		endpindex = m[count - 1]->pindex + rahead + 1;
		if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL &&
		    p->pindex < endpindex)
			endpindex = p->pindex;
		if (endpindex > object->size)
			endpindex = object->size;

		for (tpindex = m[count - 1]->pindex + 1;
		    tpindex < endpindex; i++, tpindex++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL)
				break;
			bp->b_pages[i] = p;
		}

		bp->b_pgafter = i - bp->b_npages;
		bp->b_npages = i;
	} else
		bp->b_pgafter = 0;

	if (VM_OBJECT_WOWNED(object))
		VM_OBJECT_WUNLOCK(object);

	/* Report back actual behind/ahead read. */
	if (a_rbehind)
		*a_rbehind = bp->b_pgbefore;
	if (a_rahead)
		*a_rahead = bp->b_pgafter;

#ifdef INVARIANTS
	KASSERT(bp->b_npages <= nitems(bp->b_pages),
	    ("%s: buf %p overflowed", __func__, bp));
	for (int j = 1, prev = 0; j < bp->b_npages; j++) {
		if (bp->b_pages[j] == bogus_page)
			continue;
		KASSERT(bp->b_pages[j]->pindex - bp->b_pages[prev]->pindex ==
		    j - prev, ("%s: pages array not consecutive, bp %p",
		     __func__, bp));
		prev = j;
	}
#endif

	/*
	 * Recalculate first offset and bytecount with regards to read behind.
	 * Truncate bytecount to vnode real size and round up physical size
	 * for real devices.
	 */
	foff = IDX_TO_OFF(bp->b_pages[0]->pindex);
	bytecount = bp->b_npages << PAGE_SHIFT;
	if ((foff + bytecount) > object->un_pager.vnp.vnp_size)
		bytecount = object->un_pager.vnp.vnp_size - foff;
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("%s: sector size %d too large", __func__, secmask + 1));
	bytecount = (bytecount + secmask) & ~secmask;

	/*
	 * And map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
	}

	/* Build a minimal buffer header. */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount;
	bp->b_iooffset = dbtob(bp->b_blkno);
	KASSERT(IDX_TO_OFF(m[0]->pindex - bp->b_pages[0]->pindex) ==
	    (blkno0 - bp->b_blkno) * DEV_BSIZE +
	    IDX_TO_OFF(m[0]->pindex) % bsize,
	    ("wrong offsets bsize %d m[0] %ju b_pages[0] %ju "
	    "blkno0 %ju b_blkno %ju", bsize,
	    (uintmax_t)m[0]->pindex, (uintmax_t)bp->b_pages[0]->pindex,
	    (uintmax_t)blkno0, (uintmax_t)bp->b_blkno));

	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
	VM_CNT_INC(v_vnodein);
	VM_CNT_ADD(v_vnodepgsin, bp->b_npages);

	if (iodone != NULL) { /* async */
		bp->b_pgiodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		return (VM_PAGER_OK);
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
		return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
	}
}
Пример #6
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_PUTPAGES.
 *
 * This is typically called indirectly via the pageout daemon and
 * clustering has already typically occurred, so in general we ask the
 * underlying filesystem to write the data out asynchronously rather
 * then delayed.
 */
int
vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
    int flags, int *rtvals)
{
	vm_object_t object;
	vm_page_t m;
	vm_ooffset_t poffset;
	struct uio auio;
	struct iovec aiov;
	int count, error, i, maxsize, ncount, pgoff, ppscheck;
	static struct timeval lastfail;
	static int curfail;

	object = vp->v_object;
	count = bytecount / PAGE_SIZE;

	for (i = 0; i < count; i++)
		rtvals[i] = VM_PAGER_ERROR;

	if ((int64_t)ma[0]->pindex < 0) {
		printf("vnode_pager_generic_putpages: "
		    "attempt to write meta-data 0x%jx(%lx)\n",
		    (uintmax_t)ma[0]->pindex, (u_long)ma[0]->dirty);
		rtvals[0] = VM_PAGER_BAD;
		return (VM_PAGER_BAD);
	}

	maxsize = count * PAGE_SIZE;
	ncount = count;

	poffset = IDX_TO_OFF(ma[0]->pindex);

	/*
	 * If the page-aligned write is larger then the actual file we
	 * have to invalidate pages occurring beyond the file EOF.  However,
	 * there is an edge case where a file may not be page-aligned where
	 * the last page is partially invalid.  In this case the filesystem
	 * may not properly clear the dirty bits for the entire page (which
	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
	 * With the page locked we are free to fix-up the dirty bits here.
	 *
	 * We do not under any circumstances truncate the valid bits, as
	 * this will screw up bogus page replacement.
	 */
	VM_OBJECT_WLOCK(object);
	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
		if (object->un_pager.vnp.vnp_size > poffset) {
			maxsize = object->un_pager.vnp.vnp_size - poffset;
			ncount = btoc(maxsize);
			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
				/*
				 * If the object is locked and the following
				 * conditions hold, then the page's dirty
				 * field cannot be concurrently changed by a
				 * pmap operation.
				 */
				m = ma[ncount - 1];
				vm_page_assert_sbusied(m);
				KASSERT(!pmap_page_is_write_mapped(m),
		("vnode_pager_generic_putpages: page %p is not read-only", m));
				MPASS(m->dirty != 0);
				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
				    pgoff);
			}
		} else {
			maxsize = 0;
			ncount = 0;
		}
		for (i = ncount; i < count; i++)
			rtvals[i] = VM_PAGER_BAD;
	}
	for (i = 0; i < ncount - ((btoc(maxsize) & PAGE_MASK) != 0); i++)
		MPASS(ma[i]->dirty == VM_PAGE_BITS_ALL);
	VM_OBJECT_WUNLOCK(object);

	aiov.iov_base = NULL;
	aiov.iov_len = maxsize;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_offset = poffset;
	auio.uio_segflg = UIO_NOCOPY;
	auio.uio_rw = UIO_WRITE;
	auio.uio_resid = maxsize;
	auio.uio_td = NULL;
	error = VOP_WRITE(vp, &auio, vnode_pager_putpages_ioflags(flags),
	    curthread->td_ucred);
	VM_CNT_INC(v_vnodeout);
	VM_CNT_ADD(v_vnodepgsout, ncount);

	ppscheck = 0;
	if (error != 0 && (ppscheck = ppsratecheck(&lastfail, &curfail, 1))
	    != 0)
		printf("vnode_pager_putpages: I/O error %d\n", error);
	if (auio.uio_resid != 0 && (ppscheck != 0 ||
	    ppsratecheck(&lastfail, &curfail, 1) != 0))
		printf("vnode_pager_putpages: residual I/O %zd at %ju\n",
		    auio.uio_resid, (uintmax_t)ma[0]->pindex);
	for (i = 0; i < ncount; i++)
		rtvals[i] = VM_PAGER_OK;
	return (rtvals[0]);
}
Пример #7
0
/*
 * Abort handler.
 *
 * FAR, FSR, and everything what can be lost after enabling
 * interrupts must be grabbed before the interrupts will be
 * enabled. Note that when interrupts will be enabled, we
 * could even migrate to another CPU ...
 *
 * TODO: move quick cases to ASM
 */
void
abort_handler(struct trapframe *tf, int prefetch)
{
	struct thread *td;
	vm_offset_t far, va;
	int idx, rv;
	uint32_t fsr;
	struct ksig ksig;
	struct proc *p;
	struct pcb *pcb;
	struct vm_map *map;
	struct vmspace *vm;
	vm_prot_t ftype;
	bool usermode;
#ifdef INVARIANTS
	void *onfault;
#endif

	VM_CNT_INC(v_trap);
	td = curthread;

	fsr = (prefetch) ? cp15_ifsr_get(): cp15_dfsr_get();
#if __ARM_ARCH >= 7
	far = (prefetch) ? cp15_ifar_get() : cp15_dfar_get();
#else
	far = (prefetch) ? TRAPF_PC(tf) : cp15_dfar_get();
#endif

	idx = FSR_TO_FAULT(fsr);
	usermode = TRAPF_USERMODE(tf);	/* Abort came from user mode? */
	if (usermode)
		td->td_frame = tf;

	CTR6(KTR_TRAP, "%s: fsr %#x (idx %u) far %#x prefetch %u usermode %d",
	    __func__, fsr, idx, far, prefetch, usermode);

	/*
	 * Firstly, handle aborts that are not directly related to mapping.
	 */
	if (__predict_false(idx == FAULT_EA_IMPREC)) {
		abort_imprecise(tf, fsr, prefetch, usermode);
		return;
	}

	if (__predict_false(idx == FAULT_DEBUG)) {
		abort_debug(tf, fsr, prefetch, usermode, far);
		return;
	}

	/*
	 * ARM has a set of unprivileged load and store instructions
	 * (LDRT/LDRBT/STRT/STRBT ...) which are supposed to be used in other
	 * than user mode and OS should recognize their aborts and behave
	 * appropriately. However, there is no way how to do that reasonably
	 * in general unless we restrict the handling somehow.
	 *
	 * For now, these instructions are used only in copyin()/copyout()
	 * like functions where usermode buffers are checked in advance that
	 * they are not from KVA space. Thus, no action is needed here.
	 */

	/*
	 * (1) Handle access and R/W hardware emulation aborts.
	 * (2) Check that abort is not on pmap essential address ranges.
	 *     There is no way how to fix it, so we don't even try.
	 */
	rv = pmap_fault(PCPU_GET(curpmap), far, fsr, idx, usermode);
	if (rv == KERN_SUCCESS)
		return;
#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif
	if (rv == KERN_INVALID_ADDRESS)
		goto nogo;

	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
		/*
		 * Due to both processor errata and lazy TLB invalidation when
		 * access restrictions are removed from virtual pages, memory
		 * accesses that are allowed by the physical mapping layer may
		 * nonetheless cause one spurious page fault per virtual page.
		 * When the thread is executing a "no faulting" section that
		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
		 * every page fault is treated as a spurious page fault,
		 * unless it accesses the same virtual address as the most
		 * recent page fault within the same "no faulting" section.
		 */
		if (td->td_md.md_spurflt_addr != far ||
		    (td->td_pflags & TDP_RESETSPUR) != 0) {
			td->td_md.md_spurflt_addr = far;
			td->td_pflags &= ~TDP_RESETSPUR;

			tlb_flush_local(far & ~PAGE_MASK);
			return;
		}
	} else {
		/*
		 * If we get a page fault while in a critical section, then
		 * it is most likely a fatal kernel page fault.  The kernel
		 * is already going to panic trying to get a sleep lock to
		 * do the VM lookup, so just consider it a fatal trap so the
		 * kernel can print out a useful trap message and even get
		 * to the debugger.
		 *
		 * If we get a page fault while holding a non-sleepable
		 * lock, then it is most likely a fatal kernel page fault.
		 * If WITNESS is enabled, then it's going to whine about
		 * bogus LORs with various VM locks, so just skip to the
		 * fatal trap handling directly.
		 */
		if (td->td_critnest != 0 ||
		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
		    "Kernel page fault") != 0) {
			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
			return;
		}
	}

	/* Re-enable interrupts if they were enabled previously. */
	if (td->td_md.md_spinlock_count == 0) {
		if (__predict_true(tf->tf_spsr & PSR_I) == 0)
			enable_interrupts(PSR_I);
		if (__predict_true(tf->tf_spsr & PSR_F) == 0)
			enable_interrupts(PSR_F);
	}

	p = td->td_proc;
	if (usermode) {
		td->td_pticks = 0;
		if (td->td_cowgen != p->p_cowgen)
			thread_cow_update(td);
	}

	/* Invoke the appropriate handler, if necessary. */
	if (__predict_false(aborts[idx].func != NULL)) {
		if ((aborts[idx].func)(tf, idx, fsr, far, prefetch, td, &ksig))
			goto do_trapsignal;
		goto out;
	}

	/*
	 * At this point, we're dealing with one of the following aborts:
	 *
	 *  FAULT_ICACHE   - I-cache maintenance
	 *  FAULT_TRAN_xx  - Translation
	 *  FAULT_PERM_xx  - Permission
	 */

	/*
	 * Don't pass faulting cache operation to vm_fault(). We don't want
	 * to handle all vm stuff at this moment.
	 */
	pcb = td->td_pcb;
	if (__predict_false(pcb->pcb_onfault == cachebailout)) {
		tf->tf_r0 = far;		/* return failing address */
		tf->tf_pc = (register_t)pcb->pcb_onfault;
		return;
	}

	/* Handle remaining I-cache aborts. */
	if (idx == FAULT_ICACHE) {
		if (abort_icache(tf, idx, fsr, far, prefetch, td, &ksig))
			goto do_trapsignal;
		goto out;
	}

	va = trunc_page(far);
	if (va >= KERNBASE) {
		/*
		 * Don't allow user-mode faults in kernel address space.
		 */
		if (usermode)
			goto nogo;

		map = kernel_map;
	} else {
		/*
		 * This is a fault on non-kernel virtual memory. If curproc
		 * is NULL or curproc->p_vmspace is NULL the fault is fatal.
		 */
		vm = (p != NULL) ? p->p_vmspace : NULL;
		if (vm == NULL)
			goto nogo;

		map = &vm->vm_map;
		if (!usermode && (td->td_intr_nesting_level != 0 ||
		    pcb->pcb_onfault == NULL)) {
			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
			return;
		}
	}

	ftype = (fsr & FSR_WNR) ? VM_PROT_WRITE : VM_PROT_READ;
	if (prefetch)
		ftype |= VM_PROT_EXECUTE;

#ifdef DEBUG
	last_fault_code = fsr;
#endif

#ifdef INVARIANTS
	onfault = pcb->pcb_onfault;
	pcb->pcb_onfault = NULL;
#endif

	/* Fault in the page. */
	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);

#ifdef INVARIANTS
	pcb->pcb_onfault = onfault;
#endif

	if (__predict_true(rv == KERN_SUCCESS))
		goto out;
nogo:
	if (!usermode) {
		if (td->td_intr_nesting_level == 0 &&
		    pcb->pcb_onfault != NULL) {
			tf->tf_r0 = rv;
			tf->tf_pc = (int)pcb->pcb_onfault;
			return;
		}
		CTR2(KTR_TRAP, "%s: vm_fault() failed with %d", __func__, rv);
		abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
		return;
	}

	ksig.sig = SIGSEGV;
	ksig.code = (rv == KERN_PROTECTION_FAILURE) ? SEGV_ACCERR : SEGV_MAPERR;
	ksig.addr = far;

do_trapsignal:
	call_trapsignal(td, ksig.sig, ksig.code, ksig.addr);
out:
	if (usermode)
		userret(td, tf);
}
Пример #8
0
void
trap(struct trapframe *frame)
{
#ifdef KDTRACE_HOOKS
	struct reg regs;
#endif
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
#ifdef KDB
	register_t dr6;
#endif
	int i = 0, ucode = 0;
	u_int type;
	register_t addr = 0;
	ksiginfo_t ksi;

	VM_CNT_INC(v_trap);
	type = frame->tf_trapno;

#ifdef SMP
	/* Handler for NMI IPIs used for stopping CPUs. */
	if (type == T_NMI) {
	         if (ipi_nmi_handler() == 0)
	                   goto out;
	}
#endif /* SMP */

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif

	if (type == T_RESERVED) {
		trap_fatal(frame, 0);
		goto out;
	}

	if (type == T_NMI) {
#ifdef HWPMC_HOOKS
		/*
		 * CPU PMCs interrupt using an NMI.  If the PMC module is
		 * active, pass the 'rip' value to the PMC module's interrupt
		 * handler.  A non-zero return value from the handler means that
		 * the NMI was consumed by it and we can return immediately.
		 */
		if (pmc_intr != NULL &&
		    (*pmc_intr)(PCPU_GET(cpuid), frame) != 0)
			goto out;
#endif

#ifdef STACK
		if (stack_nmi_handler(frame) != 0)
			goto out;
#endif
	}

	if (type == T_MCHK) {
		mca_intr();
		goto out;
	}

	if ((frame->tf_rflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled
		 * interrupts and then trapped.  Enabling interrupts
		 * now is wrong, but it is better than running with
		 * interrupts disabled until they are accidentally
		 * enabled later.
		 */
		if (TRAPF_USERMODE(frame))
			uprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curthread->td_name, type);
		else if (type != T_NMI && type != T_BPTFLT &&
		    type != T_TRCTRAP) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			printf("kernel trap %d with interrupts disabled\n",
			    type);

			/*
			 * We shouldn't enable interrupts while holding a
			 * spin lock.
			 */
			if (td->td_md.md_spinlock_count == 0)
				enable_intr();
		}
	}

	if (TRAPF_USERMODE(frame)) {
		/* user trap */

		td->td_pticks = 0;
		td->td_frame = frame;
		addr = frame->tf_rip;
		if (td->td_cowgen != p->p_cowgen)
			thread_cow_update(td);

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			enable_intr();
#ifdef KDTRACE_HOOKS
			if (type == T_BPTFLT) {
				fill_frame_regs(frame, &regs);
				if (dtrace_pid_probe_ptr != NULL &&
				    dtrace_pid_probe_ptr(&regs) == 0)
					goto out;
			}
#endif
			frame->tf_rflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
			ucode = fputrap_x87();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;

		case T_PROTFLT:		/* general protection fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_STKFLT:		/* stack fault */
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_ALIGNFLT:
			i = SIGBUS;
			ucode = BUS_ADRALN;
			break;
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */
			/*
			 * Emulator can take care about this trap?
			 */
			if (*p->p_sysent->sv_trap != NULL &&
			    (*p->p_sysent->sv_trap)(td) == 0)
				goto userout;

			addr = frame->tf_addr;
			i = trap_pfault(frame, TRUE);
			if (i == -1)
				goto userout;
			if (i == 0)
				goto user;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				if (prot_fault_translation == 0) {
					/*
					 * Autodetect.
					 * This check also covers the images
					 * without the ABI-tag ELF note.
					 */
					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
					    && p->p_osrel >= P_OSREL_SIGSEGV) {
						i = SIGSEGV;
						ucode = SEGV_ACCERR;
					} else {
						i = SIGBUS;
						ucode = BUS_PAGE_FAULT;
					}
				} else if (prot_fault_translation == 1) {
					/*
					 * Always compat mode.
					 */
					i = SIGBUS;
					ucode = BUS_PAGE_FAULT;
				} else {
					/*
					 * Always SIGSEGV mode.
					 */
					i = SIGSEGV;
					ucode = SEGV_ACCERR;
				}
			}
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#ifdef DEV_ISA
		case T_NMI:
			nmi_handle_intr(type, frame);
			break;
#endif /* DEV_ISA */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
			/* transparent fault (due to context switch "late") */
			KASSERT(PCB_USER_FPU(td->td_pcb),
			    ("kernel FPU ctx has leaked"));
			fpudna();
			goto userout;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = fputrap_sse();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;
#ifdef KDTRACE_HOOKS
		case T_DTRACE_RET:
			enable_intr();
			fill_frame_regs(frame, &regs);
			if (dtrace_return_probe_ptr != NULL &&
			    dtrace_return_probe_ptr(&regs) == 0)
				goto out;
			break;
#endif
		}
	} else {
		/* kernel trap */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case T_PAGEFLT:			/* page fault */
			(void) trap_pfault(frame, FALSE);
			goto out;

		case T_DNA:
			if (PCB_USER_FPU(td->td_pcb))
				panic("Unregistered use of FPU in kernel");
			fpudna();
			goto out;

		case T_ARITHTRAP:	/* arithmetic trap */
		case T_XMMFLT:		/* SIMD floating-point exception */
		case T_FPOPFLT:		/* FPU operand fetch fault */
			/*
			 * For now, supporting kernel handler
			 * registration for FPU traps is overkill.
			 */
			trap_fatal(frame, 0);
			goto out;

		case T_STKFLT:		/* stack fault */
		case T_PROTFLT:		/* general protection fault */
		case T_SEGNPFLT:	/* segment not present fault */
			if (td->td_intr_nesting_level != 0)
				break;

			/*
			 * Invalid segment selectors and out of bounds
			 * %rip's and %rsp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (frame->tf_rip == (long)doreti_iret) {
				frame->tf_rip = (long)doreti_iret_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_ds) {
				frame->tf_rip = (long)ds_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_es) {
				frame->tf_rip = (long)es_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_fs) {
				frame->tf_rip = (long)fs_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_gs) {
				frame->tf_rip = (long)gs_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_gsbase) {
				frame->tf_rip = (long)gsbase_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_fsbase) {
				frame->tf_rip = (long)fsbase_load_fault;
				goto out;
			}
			if (curpcb->pcb_onfault != NULL) {
				frame->tf_rip = (long)curpcb->pcb_onfault;
				goto out;
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_rflags & PSL_NT) {
				frame->tf_rflags &= ~PSL_NT;
				goto out;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap()) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				load_dr6(rdr6() & ~0xf);
				goto out;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If KDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
#ifdef KDB
			/* XXX %dr6 is not quite reentrant. */
			dr6 = rdr6();
			load_dr6(dr6 & ~0x4000);
			if (kdb_trap(type, dr6, frame))
				goto out;
#endif
			break;

#ifdef DEV_ISA
		case T_NMI:
			nmi_handle_intr(type, frame);
			goto out;
#endif /* DEV_ISA */
		}

		trap_fatal(frame, 0);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	ksiginfo_init_trap(&ksi);
	ksi.ksi_signo = i;
	ksi.ksi_code = ucode;
	ksi.ksi_trapno = type;
	ksi.ksi_addr = (void *)addr;
	if (uprintf_signal) {
		uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
		    "addr 0x%lx rsp 0x%lx rip 0x%lx "
		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
		    p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
		    frame->tf_rsp, frame->tf_rip,
		    fubyte((void *)(frame->tf_rip + 0)),
		    fubyte((void *)(frame->tf_rip + 1)),
		    fubyte((void *)(frame->tf_rip + 2)),
		    fubyte((void *)(frame->tf_rip + 3)),
		    fubyte((void *)(frame->tf_rip + 4)),
		    fubyte((void *)(frame->tf_rip + 5)),
		    fubyte((void *)(frame->tf_rip + 6)),
		    fubyte((void *)(frame->tf_rip + 7)));
	}
	KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
	trapsignal(td, &ksi);

user:
	userret(td, frame);
	KASSERT(PCB_USER_FPU(td->td_pcb),
	    ("Return from trap with kernel FPU ctx leaked"));
userout:
out:
	return;
}