/**
 * create_image - Create a hibernation image.
 * @platform_mode: Whether or not to use the platform driver.
 *
 * Execute device drivers' "late" and "noirq" freeze callbacks, create a
 * hibernation image and run the drivers' "noirq" and "early" thaw callbacks.
 *
 * Control reappears in this routine after the subsequent restore.
 */
static int create_image(int platform_mode)
{
	int error;

	error = dpm_suspend_end(PMSG_FREEZE);
	if (error) {
		printk(KERN_ERR "PM: Some devices failed to power down, "
			"aborting hibernation\n");
		return error;
	}

	error = platform_pre_snapshot(platform_mode);
	if (error || hibernation_test(TEST_PLATFORM))
		goto Platform_finish;

	error = disable_nonboot_cpus();
	if (error || hibernation_test(TEST_CPUS))
		goto Enable_cpus;

	local_irq_disable();

	error = syscore_suspend();
	if (error) {
		printk(KERN_ERR "PM: Some system devices failed to power down, "
			"aborting hibernation\n");
		goto Enable_irqs;
	}

	if (hibernation_test(TEST_CORE) || pm_wakeup_pending())
		goto Power_up;

	in_suspend = 1;
	save_processor_state();
	error = swsusp_arch_suspend();
	if (error)
		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
			error);
	/* Restore control flow magically appears here */
	restore_processor_state();
	if (!in_suspend) {
		events_check_enabled = false;
		platform_leave(platform_mode);
	}

 Power_up:
	syscore_resume();

 Enable_irqs:
	local_irq_enable();

 Enable_cpus:
	enable_nonboot_cpus();

 Platform_finish:
	platform_finish(platform_mode);

	dpm_resume_start(in_suspend ?
		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);

	return error;
}
Example #2
0
/**
 * acpi_idle_enter_simple - enters an ACPI state without BM handling
 * @dev: the target CPU
 * @drv: cpuidle driver with cpuidle state information
 * @index: the index of suggested state
 */
static int acpi_idle_enter_simple(struct cpuidle_device *dev,
		struct cpuidle_driver *drv, int index)
{
	struct acpi_processor *pr;
	struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
	struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
	ktime_t  kt1, kt2;
	s64 idle_time_ns;
	s64 idle_time;

	pr = __this_cpu_read(processors);
	dev->last_residency = 0;

	if (unlikely(!pr))
		return -EINVAL;

	local_irq_disable();


	if (cx->entry_method != ACPI_CSTATE_FFH) {
		current_thread_info()->status &= ~TS_POLLING;
		/*
		 * TS_POLLING-cleared state must be visible before we test
		 * NEED_RESCHED:
		 */
		smp_mb();

		if (unlikely(need_resched())) {
			current_thread_info()->status |= TS_POLLING;
			local_irq_enable();
			return -EINVAL;
		}
	}

	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
	lapic_timer_state_broadcast(pr, cx, 1);

	if (cx->type == ACPI_STATE_C3)
		ACPI_FLUSH_CPU_CACHE();

	kt1 = ktime_get_real();
	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
	acpi_idle_do_entry(cx);
	kt2 = ktime_get_real();
	idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1));
	idle_time = idle_time_ns;
	do_div(idle_time, NSEC_PER_USEC);

	/* Update device last_residency*/
	dev->last_residency = (int)idle_time;

	/* Tell the scheduler how much we idled: */
	sched_clock_idle_wakeup_event(idle_time_ns);

	local_irq_enable();
	if (cx->entry_method != ACPI_CSTATE_FFH)
		current_thread_info()->status |= TS_POLLING;

	lapic_timer_state_broadcast(pr, cx, 0);
	return index;
}
Example #3
0
/**
 * cpuidle_idle_call - the main idle loop
 *
 * NOTE: no locks or semaphores should be used here
 * return non-zero on failure
 */
int cpuidle_idle_call(void)
{
	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
	struct cpuidle_driver *drv = cpuidle_get_driver();
	int next_state, entered_state;

	if (off)
		return -ENODEV;

	if (!initialized)
		return -ENODEV;

	/* check if the device is ready */
	if (!dev || !dev->enabled)
		return -EBUSY;

#if 0
	/* shows regressions, re-enable for 2.6.29 */
	/*
	 * run any timers that can be run now, at this point
	 * before calculating the idle duration etc.
	 */
	hrtimer_peek_ahead_timers();
#endif

	/* ask the governor for the next state */
	next_state = cpuidle_curr_governor->select(drv, dev);
	if (need_resched()) {
		dev->last_residency = 0;
		/* give the governor an opportunity to reflect on the outcome */
		if (cpuidle_curr_governor->reflect)
			cpuidle_curr_governor->reflect(dev, next_state);
		local_irq_enable();
		return 0;
	}

	trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu);
	trace_cpu_idle_rcuidle(next_state, dev->cpu);

	entered_state = cpuidle_enter_ops(dev, drv, next_state);

	trace_power_end_rcuidle(dev->cpu);
	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);

	if (entered_state >= 0) {
		/* Update cpuidle counters */
		/* This can be moved to within driver enter routine
		 * but that results in multiple copies of same code.
		 */
		dev->states_usage[entered_state].time +=
				(unsigned long long)dev->last_residency;
		dev->states_usage[entered_state].usage++;
	} else {
		dev->last_residency = 0;
	}

	/* give the governor an opportunity to reflect on the outcome */
	if (cpuidle_curr_governor->reflect)
		cpuidle_curr_governor->reflect(dev, entered_state);

	return 0;
}
Example #4
0
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
	inc_preempt_count();
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_enable();
}
Example #5
0
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
			struct page **pages)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr, len, end;
	unsigned long next;
	pgd_t *pgdp;
	int nr = 0;

	pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");

	start &= PAGE_MASK;
	addr = start;
	len = (unsigned long) nr_pages << PAGE_SHIFT;
	end = start + len;

	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
					start, len)))
		goto slow_irqon;

	pr_devel("  aligned: %lx .. %lx\n", start, end);

	/*
	 * XXX: batch / limit 'nr', to avoid large irq off latency
	 * needs some instrumenting to determine the common sizes used by
	 * important workloads (eg. DB2), and whether limiting the batch size
	 * will decrease performance.
	 *
	 * It seems like we're in the clear for the moment. Direct-IO is
	 * the main guy that batches up lots of get_user_pages, and even
	 * they are limited to 64-at-a-time which is not so many.
	 */
	/*
	 * This doesn't prevent pagetable teardown, but does prevent
	 * the pagetables from being freed on powerpc.
	 *
	 * So long as we atomically load page table pointers versus teardown,
	 * we can follow the address down to the the page and take a ref on it.
	 */
	local_irq_disable();

	pgdp = pgd_offset(mm, addr);
	do {
		pgd_t pgd = *pgdp;

		pr_devel("  %016lx: normal pgd %p\n", addr,
			 (void *)pgd_val(pgd));
		next = pgd_addr_end(addr, end);
		if (pgd_none(pgd))
			goto slow;
		if (is_hugepd(pgdp)) {
			if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
					addr, next, write, pages, &nr))
				goto slow;
		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
			goto slow;
	} while (pgdp++, addr = next, addr != end);

	local_irq_enable();

	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
	return nr;

	{
		int ret;

slow:
		local_irq_enable();
slow_irqon:
		pr_devel("  slow path ! nr = %d\n", nr);

		/* Try to get the remaining pages with get_user_pages */
		start += nr << PAGE_SHIFT;
		pages += nr;

		down_read(&mm->mmap_sem);
		ret = get_user_pages(current, mm, start,
			(end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
		up_read(&mm->mmap_sem);

		/* Have to be a bit careful with return values */
		if (nr > 0) {
			if (ret < 0)
				ret = nr;
			else
				ret += nr;
		}

		return ret;
	}
}
Example #6
0
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
{
	/*
	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
	 * convention.  Adjust regs so it looks like we entered using int80.
	 */

	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
		vdso_image_32.sym_int80_landing_pad;

	/*
	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
	 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
	 * Fix it up.
	 */
	regs->ip = landing_pad;

	enter_from_user_mode();

	local_irq_enable();

	/* Fetch EBP from where the vDSO stashed it. */
	if (
#ifdef CONFIG_X86_64
		/*
		 * Micro-optimization: the pointer we're following is explicitly
		 * 32 bits, so it can't be out of range.
		 */
		__get_user(*(u32 *)&regs->bp,
			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
		get_user(*(u32 *)&regs->bp,
			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
		) {

		/* User code screwed up. */
		local_irq_disable();
		regs->ax = -EFAULT;
		prepare_exit_to_usermode(regs);
		return 0;	/* Keep it simple: use IRET. */
	}

	/* Now this is just like a normal syscall. */
	do_syscall_32_irqs_on(regs);

#ifdef CONFIG_X86_64
	/*
	 * Opportunistic SYSRETL: if possible, try to return using SYSRETL.
	 * SYSRETL is available on all 64-bit CPUs, so we don't need to
	 * bother with SYSEXIT.
	 *
	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
	 * because the ECX fixup above will ensure that this is essentially
	 * never the case.
	 */
	return regs->cs == __USER32_CS && regs->ss == __USER_DS &&
		regs->ip == landing_pad &&
		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
#else
	/*
	 * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
	 *
	 * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
	 * because the ECX fixup above will ensure that this is essentially
	 * never the case.
	 *
	 * We don't allow syscalls at all from VM86 mode, but we still
	 * need to check VM, because we might be returning from sys_vm86.
	 */
	return static_cpu_has(X86_FEATURE_SEP) &&
		regs->cs == __USER_CS && regs->ss == __USER_DS &&
		regs->ip == landing_pad &&
		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}
Example #7
0
static inline void gpio_unlock(void)
{
	local_irq_enable();
}
Example #8
0
/*
 * This thread processes interrupts reported by the Primary Interrupt Handler.
 */
static int twl6030_irq_thread(void *data)
{
    long irq = (long)data;
    static unsigned i2c_errors;
    static const unsigned max_i2c_errors = 100;
    int ret;

    while (!kthread_should_stop()) {
        int i;
        union {
            u8 bytes[4];
            u32 int_sts;
        } sts;

        /* Wait for IRQ, then read PIH irq status (also blocking) */
        wait_for_completion_interruptible(&irq_event);

        /* read INT_STS_A, B and C in one shot using a burst read */
        ret = twl_i2c_read(TWL_MODULE_PIH, sts.bytes,
                           REG_INT_STS_A, 3);
        if (ret) {
            pr_warning("twl6030: I2C error %d reading PIH ISR\n",
                       ret);
            if (++i2c_errors >= max_i2c_errors) {
                printk(KERN_ERR "Maximum I2C error count"
                       " exceeded.  Terminating %s.\n",
                       __func__);
                break;
            }
            complete(&irq_event);
            continue;
        }



        sts.bytes[3] = 0; /* Only 24 bits are valid*/

        /*
         * Since VBUS status bit is not reliable for VBUS disconnect
         * use CHARGER VBUS detection status bit instead.
         */
        if (sts.bytes[2] & 0x10)
            sts.bytes[2] |= 0x08;

        for (i = 0; sts.int_sts; sts.int_sts >>= 1, i++) {
            local_irq_disable();
            if (sts.int_sts & 0x1) {
                int module_irq = twl6030_irq_base +
                                 twl6030_interrupt_mapping[i];
                generic_handle_irq(module_irq);

            }
            local_irq_enable();
        }

        /*
         * NOTE:
         * Simulation confirms that documentation is wrong w.r.t the
         * interrupt status clear operation. A single *byte* write to
         * any one of STS_A to STS_C register results in all three
         * STS registers being reset. Since it does not matter which
         * value is written, all three registers are cleared on a
         * single byte write, so we just use 0x0 to clear.
         */
        ret = twl_i2c_write_u8(TWL_MODULE_PIH, 0x00, REG_INT_STS_A);
        if (ret)
            pr_warning("twl6030: I2C error in clearing PIH ISR\n");

        enable_irq(irq);
    }

    return 0;
}
Example #9
0
/* default implementation */
void __attribute__ ((weak)) arch_suspend_enable_irqs(void)
{
	local_irq_enable();
}
Example #10
0
/*
 * This routine handles page faults.  It determines the address, and the
 * problem, and then passes it handle_page_fault() for normal DTLB and
 * ITLB issues, and for DMA or SN processor faults when we are in user
 * space.  For the latter, if we're in kernel mode, we just save the
 * interrupt away appropriately and return immediately.  We can't do
 * page faults for user code while in kernel mode.
 */
void do_page_fault(struct pt_regs *regs, int fault_num,
		   unsigned long address, unsigned long write)
{
	int is_page_fault;

	/* This case should have been handled by do_page_fault_ics(). */
	BUG_ON(write & ~1);

#if CHIP_HAS_TILE_DMA()
	/*
	 * If it's a DMA fault, suspend the transfer while we're
	 * handling the miss; we'll restart after it's handled.  If we
	 * don't suspend, it's possible that this process could swap
	 * out and back in, and restart the engine since the DMA is
	 * still 'running'.
	 */
	if (fault_num == INT_DMATLB_MISS ||
	    fault_num == INT_DMATLB_ACCESS ||
	    fault_num == INT_DMATLB_MISS_DWNCL ||
	    fault_num == INT_DMATLB_ACCESS_DWNCL) {
		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
		while (__insn_mfspr(SPR_DMA_USER_STATUS) &
		       SPR_DMA_STATUS__BUSY_MASK)
			;
	}
#endif

	/* Validate fault num and decide if this is a first-time page fault. */
	switch (fault_num) {
	case INT_ITLB_MISS:
	case INT_DTLB_MISS:
#if CHIP_HAS_TILE_DMA()
	case INT_DMATLB_MISS:
	case INT_DMATLB_MISS_DWNCL:
#endif
#if CHIP_HAS_SN_PROC()
	case INT_SNITLB_MISS:
	case INT_SNITLB_MISS_DWNCL:
#endif
		is_page_fault = 1;
		break;

	case INT_DTLB_ACCESS:
#if CHIP_HAS_TILE_DMA()
	case INT_DMATLB_ACCESS:
	case INT_DMATLB_ACCESS_DWNCL:
#endif
		is_page_fault = 0;
		break;

	default:
		panic("Bad fault number %d in do_page_fault", fault_num);
	}

#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
	if (EX1_PL(regs->ex1) != USER_PL) {
		struct async_tlb *async;
		switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
		case INT_DMATLB_MISS:
		case INT_DMATLB_ACCESS:
		case INT_DMATLB_MISS_DWNCL:
		case INT_DMATLB_ACCESS_DWNCL:
			async = &current->thread.dma_async_tlb;
			break;
#endif
#if CHIP_HAS_SN_PROC()
		case INT_SNITLB_MISS:
		case INT_SNITLB_MISS_DWNCL:
			async = &current->thread.sn_async_tlb;
			break;
#endif
		default:
			async = NULL;
		}
		if (async) {

			/*
			 * No vmalloc check required, so we can allow
			 * interrupts immediately at this point.
			 */
			local_irq_enable();

			set_thread_flag(TIF_ASYNC_TLB);
			if (async->fault_num != 0) {
				panic("Second async fault %d;"
				      " old fault was %d (%#lx/%ld)",
				      fault_num, async->fault_num,
				      address, write);
			}
			BUG_ON(fault_num == 0);
			async->fault_num = fault_num;
			async->is_fault = is_page_fault;
			async->is_write = write;
			async->address = address;
			return;
		}
	}
#endif

	handle_page_fault(regs, fault_num, is_page_fault, address, write);
}
/**
 * nohz_restart_sched_tick - restart the idle tick from the idle task
 *
 * Restart the idle tick when the CPU is woken up from idle
 */
void tick_nohz_restart_sched_tick(void)
{
	int cpu = smp_processor_id();
	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
	unsigned long ticks;
	ktime_t now, delta;

	if (!ts->tick_stopped)
		return;

	/* Update jiffies first */
	now = ktime_get();

	local_irq_disable();
	tick_do_update_jiffies64(now);
	cpu_clear(cpu, nohz_cpu_mask);

	/* Account the idle time */
	delta = ktime_sub(now, ts->idle_entrytime);
	ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);

	/*
	 * We stopped the tick in idle. Update process times would miss the
	 * time we slept as update_process_times does only a 1 tick
	 * accounting. Enforce that this is accounted to idle !
	 */
	ticks = jiffies - ts->idle_jiffies;
	/*
	 * We might be one off. Do not randomly account a huge number of ticks!
	 */
	if (ticks && ticks < LONG_MAX) {
		add_preempt_count(HARDIRQ_OFFSET);
		account_system_time(current, HARDIRQ_OFFSET,
				    jiffies_to_cputime(ticks));
		sub_preempt_count(HARDIRQ_OFFSET);
	}

	/*
	 * Cancel the scheduled timer and restore the tick
	 */
	ts->tick_stopped  = 0;
	hrtimer_cancel(&ts->sched_timer);
	ts->sched_timer.expires = ts->idle_tick;

	while (1) {
		/* Forward the time to expire in the future */
		hrtimer_forward(&ts->sched_timer, now, tick_period);

		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
			hrtimer_start(&ts->sched_timer,
				      ts->sched_timer.expires,
				      HRTIMER_MODE_ABS);
			/* Check, if the timer was already in the past */
			if (hrtimer_active(&ts->sched_timer))
				break;
		} else {
			if (!tick_program_event(ts->sched_timer.expires, 0))
				break;
		}
		/* Update jiffies and reread time */
		tick_do_update_jiffies64(now);
		now = ktime_get();
	}
	local_irq_enable();
}
Example #12
0
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
				   struct pt_regs *regs)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	int fault, sig, code;
	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	tsk = current;
	mm  = tsk->mm;

	/* Enable interrupts if they were enabled in the parent context. */
	if (interrupts_enabled(regs))
		local_irq_enable();

	/*
	 * If we're in an interrupt or have no user context, we must not take
	 * the fault.
	 */
	if (in_atomic() || !mm)
		goto no_context;

	if (user_mode(regs))
		mm_flags |= FAULT_FLAG_USER;

	if (esr & ESR_LNX_EXEC) {
		vm_flags = VM_EXEC;
	} else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
		vm_flags = VM_WRITE;
		mm_flags |= FAULT_FLAG_WRITE;
	}

	/*
	 * PAN bit set implies the fault happened in kernel space, but not
	 * in the arch's user access functions.
	 */
	if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
		goto no_context;

	/*
	 * As per x86, we may deadlock here. However, since the kernel only
	 * validly references user space from well defined areas of the code,
	 * we can bug out early if this is from code which shouldn't.
	 */
	if (!down_read_trylock(&mm->mmap_sem)) {
		if (!user_mode(regs) && !search_exception_tables(regs->pc))
			goto no_context;
retry:
		down_read(&mm->mmap_sem);
	} else {
		/*
		 * The above down_read_trylock() might have succeeded in which
		 * case, we'll have missed the might_sleep() from down_read().
		 */
		might_sleep();
#ifdef CONFIG_DEBUG_VM
		if (!user_mode(regs) && !search_exception_tables(regs->pc))
			goto no_context;
#endif
	}

	fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);

	/*
	 * If we need to retry but a fatal signal is pending, handle the
	 * signal first. We do not need to release the mmap_sem because it
	 * would already be released in __lock_page_or_retry in mm/filemap.c.
	 */
	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return 0;

	/*
	 * Major/minor page fault accounting is only done on the initial
	 * attempt. If we go through a retry, it is extremely likely that the
	 * page will be found in page cache at that point.
	 */

	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
	if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR) {
			tsk->maj_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
				      addr);
		} else {
			tsk->min_flt++;
			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
				      addr);
		}
		if (fault & VM_FAULT_RETRY) {
			/*
			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
			 * starvation.
			 */
			mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
			mm_flags |= FAULT_FLAG_TRIED;
			goto retry;
		}
	}

	up_read(&mm->mmap_sem);

	/*
	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
	 */
	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
			      VM_FAULT_BADACCESS))))
		return 0;

	/*
	 * If we are in kernel mode at this point, we have no context to
	 * handle this fault with.
	 */
	if (!user_mode(regs))
		goto no_context;

	if (fault & VM_FAULT_OOM) {
		/*
		 * We ran out of memory, call the OOM killer, and return to
		 * userspace (which will retry the fault, or kill us if we got
		 * oom-killed).
		 */
		pagefault_out_of_memory();
		return 0;
	}

	if (fault & VM_FAULT_SIGBUS) {
		/*
		 * We had some memory, but were unable to successfully fix up
		 * this page fault.
		 */
		sig = SIGBUS;
		code = BUS_ADRERR;
	} else {
		/*
		 * Something tried to access memory that isn't in our memory
		 * map.
		 */
		sig = SIGSEGV;
		code = fault == VM_FAULT_BADACCESS ?
			SEGV_ACCERR : SEGV_MAPERR;
	}

	__do_user_fault(tsk, addr, esr, sig, code, regs);
	return 0;

no_context:
	__do_kernel_fault(mm, addr, esr, regs);
	return 0;
}
/**
 * hibernation_platform_enter - Power off the system using the platform driver.
 */
int hibernation_platform_enter(void)
{
	int error;

	if (!hibernation_ops)
		return -ENOSYS;

	/*
	 * We have cancelled the power transition by running
	 * hibernation_ops->finish() before saving the image, so we should let
	 * the firmware know that we're going to enter the sleep state after all
	 */
	error = hibernation_ops->begin();
	if (error)
		goto Close;

	entering_platform_hibernation = true;
	suspend_console();
	ftrace_stop();
	error = dpm_suspend_start(PMSG_HIBERNATE);
	if (error) {
		if (hibernation_ops->recover)
			hibernation_ops->recover();
		goto Resume_devices;
	}

	error = dpm_suspend_end(PMSG_HIBERNATE);
	if (error)
		goto Resume_devices;

	error = hibernation_ops->prepare();
	if (error)
		goto Platform_finish;

	error = disable_nonboot_cpus();
	if (error)
		goto Platform_finish;

	local_irq_disable();
	syscore_suspend();
	if (pm_wakeup_pending()) {
		error = -EAGAIN;
		goto Power_up;
	}

	hibernation_ops->enter();
	/* We should never get here */
	while (1);

 Power_up:
	syscore_resume();
	local_irq_enable();
	enable_nonboot_cpus();

 Platform_finish:
	hibernation_ops->finish();

	dpm_resume_start(PMSG_RESTORE);

 Resume_devices:
	entering_platform_hibernation = false;
	dpm_resume_end(PMSG_RESTORE);
	ftrace_start();
	resume_console();

 Close:
	hibernation_ops->end();

	return error;
}
/**
 * resume_target_kernel - Restore system state from a hibernation image.
 * @platform_mode: Whether or not to use the platform driver.
 *
 * Execute device drivers' "noirq" and "late" freeze callbacks, restore the
 * contents of highmem that have not been restored yet from the image and run
 * the low-level code that will restore the remaining contents of memory and
 * switch to the just restored target kernel.
 */
static int resume_target_kernel(bool platform_mode)
{
	int error;

	error = dpm_suspend_end(PMSG_QUIESCE);
	if (error) {
		printk(KERN_ERR "PM: Some devices failed to power down, "
			"aborting resume\n");
		return error;
	}

	error = platform_pre_restore(platform_mode);
	if (error)
		goto Cleanup;

	error = disable_nonboot_cpus();
	if (error)
		goto Enable_cpus;

	local_irq_disable();

	error = syscore_suspend();
	if (error)
		goto Enable_irqs;

	save_processor_state();
	error = restore_highmem();
	if (!error) {
		error = swsusp_arch_resume();
		/*
		 * The code below is only ever reached in case of a failure.
		 * Otherwise, execution continues at the place where
		 * swsusp_arch_suspend() was called.
		 */
		BUG_ON(!error);
		/*
		 * This call to restore_highmem() reverts the changes made by
		 * the previous one.
		 */
		restore_highmem();
	}
	/*
	 * The only reason why swsusp_arch_resume() can fail is memory being
	 * very tight, so we have to free it as soon as we can to avoid
	 * subsequent failures.
	 */
	swsusp_free();
	restore_processor_state();
	touch_softlockup_watchdog();

	syscore_resume();

 Enable_irqs:
	local_irq_enable();

 Enable_cpus:
	enable_nonboot_cpus();

 Cleanup:
	platform_restore_cleanup(platform_mode);

	dpm_resume_start(PMSG_RECOVER);

	return error;
}
Example #15
0
void _spin_unlock_irq(spinlock_t *lock)
{
    _spin_unlock(lock);
    local_irq_enable();
}
Example #16
0
asmlinkage void __init start_kernel(void)
{
	char * command_line;
	extern struct kernel_param __start___param[], __stop___param[];
#ifdef CONFIG_RTAI_RTSPMM
        unsigned int indice_part;
        /* Size of the needed memory block by the configuration */
        unsigned long rt_mem_block_size = 0;
#endif
/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
	lock_kernel();
	page_address_init();
	printk(linux_banner);
	setup_arch(&command_line);
	setup_per_cpu_areas();

	/*
	 * Mark the boot cpu "online" so that it can call console drivers in
	 * printk() and can access its per-cpu storage.
	 */
	smp_prepare_boot_cpu();

	/*
	 * Set up the scheduler prior starting any interrupts (such as the
	 * timer interrupt). Full topology setup happens at smp_init()
	 * time - but meanwhile we still have a functioning scheduler.
	 */
	sched_init();
	/*
	 * Disable preemption - early bootup scheduling is extremely
	 * fragile until we cpu_idle() for the first time.
	 */
	preempt_disable();
	build_all_zonelists();
	page_alloc_init();
	early_init_hardirqs();
	printk("Kernel command line: %s\n", saved_command_line);
	parse_early_param();
	parse_args("Booting kernel", command_line, __start___param,
		   __stop___param - __start___param,
		   &unknown_bootoption);
	sort_main_extable();
	trap_init();
	rcu_init();
	init_IRQ();
	pidhash_init();
	init_timers();
	softirq_init();
	time_init();

	/*
	 * HACK ALERT! This is early. We're enabling the console before
	 * we've done PCI setups etc, and console_init() must be aware of
	 * this. But we do want output early, in case something goes wrong.
	 */
	console_init();
	if (panic_later)
		panic(panic_later, panic_param);

#ifdef CONFIG_RTAI_RTSPMM
        /* Allocate a big and continuous memory block for the module SPMM
           included in the RTAI functionalities */
        printk("--- Memory Allocation for the module rt_spmm ---\n");
        /* WARNING
           We need to add some space for the structures vrtxptext and vrtxpt and the partitions bitmap
           that the module rt_spmm uses to handle the blocks in each partition */
        /* for each defined partitions */
        for(indice_part = 0; indice_part <  RT_MAX_PART_NUM; indice_part ++)
          {
            if ((rt_partitions_table[indice_part].block_size != 0) &&
                (rt_partitions_table[indice_part].num_of_blocks != 0))
              {
                rt_partitions_table[indice_part].part_size =
                  (rt_partitions_table[indice_part].block_size + XN_NBBY)
                  *rt_partitions_table[indice_part].num_of_blocks +
                  + sizeof(vrtxptext_t)+sizeof(vrtxpt_t);
                rt_mem_block_size += rt_partitions_table[indice_part].part_size;
              }
          }
#ifdef CONFIG_RTAI_PART_DMA
        printk("Allocate memory in the low part of memory\n");
        rt_mem_block_ptr=(void*)alloc_bootmem_low(rt_mem_block_size + PAGE_SIZE-1);
#else
        printk("Allocate memory in the standard part of memory\n");
        rt_mem_block_ptr=(void*)alloc_bootmem(rt_mem_block_size + PAGE_SIZE-1);
#endif /* CONFIG_PART_DMA */
        printk("Needed Memory Size : %lu\n", rt_mem_block_size);
        printk("Allocated Memory Size : %lu\n", rt_mem_block_size + PAGE_SIZE-1);
        printk("Memory block address : 0x%x\n", (unsigned int)rt_mem_block_ptr);
        printk("-----------------------------------------------\n");
#endif /* CONFIG_RTAI_RTSPMM */

	profile_init();
	local_irq_enable();
#ifdef CONFIG_BLK_DEV_INITRD
	if (initrd_start && !initrd_below_start_ok &&
			initrd_start < min_low_pfn << PAGE_SHIFT) {
		printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
		    "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
		initrd_start = 0;
	}
#endif
	vfs_caches_init_early();
	mem_init();
	kmem_cache_init();
	numa_policy_init();
	if (late_time_init)
		late_time_init();
	calibrate_delay();
	pidmap_init();
	pgtable_cache_init();
	prio_tree_init();
	anon_vma_init();
#ifdef CONFIG_X86
	if (efi_enabled)
		efi_enter_virtual_mode();
#endif
	fork_init(num_physpages);
	proc_caches_init();
	buffer_init();
	unnamed_dev_init();
	security_init();
	vfs_caches_init(num_physpages);

#ifdef CONFIG_MOT_FEAT_DEVICE_TREE
	mothwcfg_init();
#endif /* CONFIG_MOT_FEAT_DEVICE_TREE */

	radix_tree_init();
	signals_init();
	/* rootfs populating might need page-writeback */
	page_writeback_init();
#ifdef CONFIG_PROC_FS
	proc_root_init();
#endif
	check_bugs();

	acpi_early_init(); /* before LAPIC and SMP init */

	/* Do the rest non-__init'ed, we're now alive */
	rest_init();
}
Example #17
0
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
	enter_from_user_mode();
	local_irq_enable();
	do_syscall_32_irqs_on(regs);
}
Example #18
0
asmlinkage unsigned long asm_do_sig(unsigned long esr, struct pt_regs *regs)
{
	struct siginfo info;
	struct task_struct *tsk;
	struct mm_struct *mm;
	int sig;
 	int trapidx = 0;
	unsigned long addr;
	//register unsigned long sp asm("sp");

	switch(esr & ESR_CODE)
	{
		case TCT_PRIVILEGED:
			addr = regs->elkr;
			printk("Privileged Instruction Exception @ %lx\n", addr);
			sig = SIGSEGV;
			trapidx = 1;
			break;
		case TCT_UNDEFINED:
			addr = regs->elkr;
			printk("Undefined Instruction Exception @ %lx\n", addr);
			sig = SIGSEGV;
			trapidx = 2;
			break;
		case TCT_ITLB:
			addr = regs->elkr;
			printk("Instruction TLB Exception @ %lx\n", addr);
			sig = SIGSEGV;
			trapidx = 3;
			break;
		case TCT_DTLB:
			addr = regs->elkr;
			printk("Data TLB Exception @ %lx\n", addr);
			sig = SIGSEGV;
			trapidx = 4;
			break;
		case TCT_FETCH_ABORT:
			addr = regs->elkr;
			printk("Fetch Abort Exception @ address %lx\n", addr);
			sig = SIGABRT;
			trapidx= 5;
			break;
		case TCT_DATA_ABORT:/*interrupt exception */
			addr = regs->elkr;
			printk("Data Abort Exception @ %lx\n", addr);
			sig = SIGABRT;
			trapidx = 6;
			break; 
		case TCT_ZERO_DIV:/*interrupt exception */
			addr = regs->elkr;
			printk("Zero Division Exception @ %lx\n", addr);
			sig = SIGSEGV;
			trapidx = 7;
			break;	
		default:
			addr = regs->elkr;
			printk("Unknown exception, defaulting to SIGSEGV @ %lx\n", addr);
			sig = SIGSEGV;
			trapidx = 8;
			break;		 	
	}

	memset(&info, 0, sizeof(info));
	info.si_signo = sig;
	info.si_addr = (void*)addr;

	tsk = current;
	mm = tsk->mm;

	/*
	 * If we're on the kernel stack or during disabled interrupts
	 * or in an atomic section or if we have no user context, the kernel gets the fault.
	 */
	if( !mm || in_atomic())
	{
		/* the kernel gets the fault */
		bust_spinlocks(1);
		printk(KERN_ALERT "%s: unhandled kernel space fault (%d) @ %lx\n", tsk->comm, sig, addr);
		bust_spinlocks(0);
		local_irq_enable();
		do_exit(SIGKILL);
		return 0;
	} else {
		/* the user space gets the fault */
		if( sig != SIGTRAP )
			printk(KERN_ERR "%s: unhandled user space fault (%d) @ %lx\n", tsk->comm, sig, addr);
		force_sig_info(sig, &info, tsk);
		return regs->r1;
	}
}
Example #19
0
void kgdb_roundup_cpus(unsigned long flags)
{
       local_irq_enable();
       smp_call_function(kgdb_call_nmi_hook, NULL, 0);
       local_irq_disable();
}
Example #20
0
File: smp.c Project: E-LLP/n900
irqreturn_t
ipi_interrupt(int irq, void *dev_id) 
{
	int this_cpu = smp_processor_id();
	struct cpuinfo_parisc *p = &cpu_data[this_cpu];
	unsigned long ops;
	unsigned long flags;

	/* Count this now; we may make a call that never returns. */
	p->ipi_count++;

	mb();	/* Order interrupt and bit testing. */

	for (;;) {
		spinlock_t *lock = &per_cpu(ipi_lock, this_cpu);
		spin_lock_irqsave(lock, flags);
		ops = p->pending_ipi;
		p->pending_ipi = 0;
		spin_unlock_irqrestore(lock, flags);

		mb(); /* Order bit clearing and data access. */

		if (!ops)
		    break;

		while (ops) {
			unsigned long which = ffz(~ops);

			ops &= ~(1 << which);

			switch (which) {
			case IPI_NOP:
				smp_debug(100, KERN_DEBUG "CPU%d IPI_NOP\n", this_cpu);
				break;
				
			case IPI_RESCHEDULE:
				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
				/*
				 * Reschedule callback.  Everything to be
				 * done is done by the interrupt return path.
				 */
				break;

			case IPI_CALL_FUNC:
				smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC\n", this_cpu);
				generic_smp_call_function_interrupt();
				break;

			case IPI_CALL_FUNC_SINGLE:
				smp_debug(100, KERN_DEBUG "CPU%d IPI_CALL_FUNC_SINGLE\n", this_cpu);
				generic_smp_call_function_single_interrupt();
				break;

			case IPI_CPU_START:
				smp_debug(100, KERN_DEBUG "CPU%d IPI_CPU_START\n", this_cpu);
				break;

			case IPI_CPU_STOP:
				smp_debug(100, KERN_DEBUG "CPU%d IPI_CPU_STOP\n", this_cpu);
				halt_processor();
				break;

			case IPI_CPU_TEST:
				smp_debug(100, KERN_DEBUG "CPU%d is alive!\n", this_cpu);
				break;

			default:
				printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n",
					this_cpu, which);
				return IRQ_NONE;
			} /* Switch */
		/* let in any pending interrupts */
		local_irq_enable();
		local_irq_disable();
		} /* while (ops) */
	}
	return IRQ_HANDLED;
}
Example #21
0
static inline void conditional_sti(struct pt_regs *regs)
{
	if (regs->flags & X86_EFLAGS_IF)
		local_irq_enable();
}
Example #22
0
File: core.c Project: 168519/linux
/*H:030
 * Let's jump straight to the the main loop which runs the Guest.
 * Remember, this is called by the Launcher reading /dev/lguest, and we keep
 * going around and around until something interesting happens.
 */
int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
{
	/* If the launcher asked for a register with LHREQ_GETREG */
	if (cpu->reg_read) {
		if (put_user(*cpu->reg_read, user))
			return -EFAULT;
		cpu->reg_read = NULL;
		return sizeof(*cpu->reg_read);
	}

	/* We stop running once the Guest is dead. */
	while (!cpu->lg->dead) {
		unsigned int irq;
		bool more;

		/* First we run any hypercalls the Guest wants done. */
		if (cpu->hcall)
			do_hypercalls(cpu);

		/* Do we have to tell the Launcher about a trap? */
		if (cpu->pending.trap) {
			if (copy_to_user(user, &cpu->pending,
					 sizeof(cpu->pending)))
				return -EFAULT;
			return sizeof(cpu->pending);
		}

		/*
		 * All long-lived kernel loops need to check with this horrible
		 * thing called the freezer.  If the Host is trying to suspend,
		 * it stops us.
		 */
		try_to_freeze();

		/* Check for signals */
		if (signal_pending(current))
			return -ERESTARTSYS;

		/*
		 * Check if there are any interrupts which can be delivered now:
		 * if so, this sets up the hander to be executed when we next
		 * run the Guest.
		 */
		irq = interrupt_pending(cpu, &more);
		if (irq < LGUEST_IRQS)
			try_deliver_interrupt(cpu, irq, more);

		/*
		 * Just make absolutely sure the Guest is still alive.  One of
		 * those hypercalls could have been fatal, for example.
		 */
		if (cpu->lg->dead)
			break;

		/*
		 * If the Guest asked to be stopped, we sleep.  The Guest's
		 * clock timer will wake us.
		 */
		if (cpu->halted) {
			set_current_state(TASK_INTERRUPTIBLE);
			/*
			 * Just before we sleep, make sure no interrupt snuck in
			 * which we should be doing.
			 */
			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
				set_current_state(TASK_RUNNING);
			else
				schedule();
			continue;
		}

		/*
		 * OK, now we're ready to jump into the Guest.  First we put up
		 * the "Do Not Disturb" sign:
		 */
		local_irq_disable();

		/* Actually run the Guest until something happens. */
		lguest_arch_run_guest(cpu);

		/* Now we're ready to be interrupted or moved to other CPUs */
		local_irq_enable();

		/* Now we deal with whatever happened to the Guest. */
		lguest_arch_handle_trap(cpu);
	}

	/* Special case: Guest is 'dead' but wants a reboot. */
	if (cpu->lg->dead == ERR_PTR(-ERESTART))
		return -ERESTART;

	/* The Guest is dead => "No such file or directory" */
	return -ENOENT;
}
Example #23
0
/*
 * Update the 
 */
int ide_driveid_update (ide_drive_t *drive)
{
	ide_hwif_t *hwif	= HWIF(drive);
	struct hd_driveid *id;
#if 0
	id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
	if (!id)
		return 0;

	taskfile_lib_get_identify(drive, (char *)&id);

	ide_fix_driveid(id);
	if (id) {
		drive->id->dma_ultra = id->dma_ultra;
		drive->id->dma_mword = id->dma_mword;
		drive->id->dma_1word = id->dma_1word;
		/* anything more ? */
		kfree(id);
	}
	return 1;
#else
	/*
	 * Re-read drive->id for possible DMA mode
	 * change (copied from ide-probe.c)
	 */
	unsigned long timeout, flags;

	SELECT_MASK(drive, 1);
	if (IDE_CONTROL_REG)
		hwif->OUTB(drive->ctl,IDE_CONTROL_REG);
	msleep(50);
	hwif->OUTB(WIN_IDENTIFY, IDE_COMMAND_REG);
	timeout = jiffies + WAIT_WORSTCASE;
	do {
		if (time_after(jiffies, timeout)) {
			SELECT_MASK(drive, 0);
			return 0;	/* drive timed-out */
		}
		msleep(50);	/* give drive a breather */
	} while (hwif->INB(IDE_ALTSTATUS_REG) & BUSY_STAT);
	msleep(50);	/* wait for IRQ and DRQ_STAT */
	if (!OK_STAT(hwif->INB(IDE_STATUS_REG),DRQ_STAT,BAD_R_STAT)) {
		SELECT_MASK(drive, 0);
		printk("%s: CHECK for good STATUS\n", drive->name);
		return 0;
	}
	local_irq_save(flags);
	SELECT_MASK(drive, 0);
	id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
	if (!id) {
		local_irq_restore(flags);
		return 0;
	}
	ata_input_data(drive, id, SECTOR_WORDS);
	(void) hwif->INB(IDE_STATUS_REG);	/* clear drive IRQ */
	local_irq_enable();
	local_irq_restore(flags);
	ide_fix_driveid(id);
	if (id) {
		drive->id->dma_ultra = id->dma_ultra;
		drive->id->dma_mword = id->dma_mword;
		drive->id->dma_1word = id->dma_1word;
		/* anything more ? */
		kfree(id);
	}

	return 1;
#endif
}
Example #24
0
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
			      unsigned long vector, int write_acc)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	siginfo_t info;
	int fault;
	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	tsk = current;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * NOTE2: This is done so that, when updating the vmalloc
	 * mappings we don't have to walk all processes pgdirs and
	 * add the high mappings all at once. Instead we do it as they
	 * are used. However vmalloc'ed page entries have the PAGE_GLOBAL
	 * bit set so sometimes the TLB can use a lingering entry.
	 *
	 * This verifies that the fault happens in kernel space
	 * and that the fault was not a protection error.
	 */

	if (address >= VMALLOC_START &&
	    (vector != 0x300 && vector != 0x400) &&
	    !user_mode(regs))
		goto vmalloc_fault;

	/* If exceptions were enabled, we can reenable them here */
	if (user_mode(regs)) {
		/* Exception was in userspace: reenable interrupts */
		local_irq_enable();
		flags |= FAULT_FLAG_USER;
	} else {
		/* If exception was in a syscall, then IRQ's may have
		 * been enabled or disabled.  If they were enabled,
		 * reenable them.
		 */
		if (regs->sr && (SPR_SR_IEE | SPR_SR_TEE))
			local_irq_enable();
	}

	mm = tsk->mm;
	info.si_code = SEGV_MAPERR;

	/*
	 * If we're in an interrupt or have no user
	 * context, we must not take the fault..
	 */

	if (in_interrupt() || !mm)
		goto no_context;

retry:
	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);

	if (!vma)
		goto bad_area;

	if (vma->vm_start <= address)
		goto good_area;

	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;

	if (user_mode(regs)) {
		/*
		 * accessing the stack below usp is always a bug.
		 * we get page-aligned addresses so we can only check
		 * if we're within a page from usp, but that might be
		 * enough to catch brutal errors at least.
		 */
		if (address + PAGE_SIZE < regs->sp)
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

	/*
	 * Ok, we have a good vm_area for this memory access, so
	 * we can handle it..
	 */

good_area:
	info.si_code = SEGV_ACCERR;

	/* first do some preliminary protection checks */

	if (write_acc) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
	} else {
		/* not present */
		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
			goto bad_area;
	}

	/* are we trying to execute nonexecutable area */
	if ((vector == 0x400) && !(vma->vm_page_prot.pgprot & _PAGE_EXEC))
		goto bad_area;

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */

	fault = handle_mm_fault(mm, vma, address, flags);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGSEGV)
			goto bad_area;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}

	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		/*RGD modeled on Cris */
		if (fault & VM_FAULT_MAJOR)
			tsk->maj_flt++;
		else
			tsk->min_flt++;
		if (fault & VM_FAULT_RETRY) {
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;

			 /* No need to up_read(&mm->mmap_sem) as we would
			 * have already released it in __lock_page_or_retry
			 * in mm/filemap.c.
			 */

			goto retry;
		}
	}

	up_read(&mm->mmap_sem);
	return;

	/*
	 * Something tried to access memory that isn't in our memory map..
	 * Fix it, but check if it's kernel or user first..
	 */

bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:

	/* User mode accesses just cause a SIGSEGV */

	if (user_mode(regs)) {
		info.si_signo = SIGSEGV;
		info.si_errno = 0;
		/* info.si_code has been set above */
		info.si_addr = (void *)address;
		force_sig_info(SIGSEGV, &info, tsk);
		return;
	}

no_context:

	/* Are we prepared to handle this kernel fault?
	 *
	 * (The kernel has valid exception-points in the source
	 *  when it acesses user-memory. When it fails in one
	 *  of those points, we find it in a table and do a jump
	 *  to some fixup code that loads an appropriate error
	 *  code)
	 */

	{
		const struct exception_table_entry *entry;

		__asm__ __volatile__("l.nop 42");

		if ((entry = search_exception_tables(regs->pc)) != NULL) {
			/* Adjust the instruction pointer in the stackframe */
			regs->pc = entry->fixup;
			return;
		}
	}

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to
	 * terminate things with extreme prejudice.
	 */

	if ((unsigned long)(address) < PAGE_SIZE)
		printk(KERN_ALERT
		       "Unable to handle kernel NULL pointer dereference");
	else
		printk(KERN_ALERT "Unable to handle kernel access");
	printk(" at virtual address 0x%08lx\n", address);

	die("Oops", regs, write_acc);

	do_exit(SIGKILL);

	/*
	 * We ran out of memory, or some other thing happened to us that made
	 * us unable to handle the page fault gracefully.
	 */

out_of_memory:
	__asm__ __volatile__("l.nop 42");
	__asm__ __volatile__("l.nop 1");

	up_read(&mm->mmap_sem);
	if (!user_mode(regs))
		goto no_context;
	pagefault_out_of_memory();
	return;

do_sigbus:
	up_read(&mm->mmap_sem);

	/*
	 * Send a sigbus, regardless of whether we were in kernel
	 * or user mode.
	 */
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void *)address;
	force_sig_info(SIGBUS, &info, tsk);

	/* Kernel mode? Handle exceptions or die */
	if (!user_mode(regs))
		goto no_context;
	return;

vmalloc_fault:
	{
		/*
		 * Synchronize this task's top level page-table
		 * with the 'reference' page table.
		 *
		 * Use current_pgd instead of tsk->active_mm->pgd
		 * since the latter might be unavailable if this
		 * code is executed in a misfortunately run irq
		 * (like inside schedule() between switch_mm and
		 *  switch_to...).
		 */

		int offset = pgd_index(address);
		pgd_t *pgd, *pgd_k;
		pud_t *pud, *pud_k;
		pmd_t *pmd, *pmd_k;
		pte_t *pte_k;

/*
		phx_warn("do_page_fault(): vmalloc_fault will not work, "
			 "since current_pgd assign a proper value somewhere\n"
			 "anyhow we don't need this at the moment\n");

		phx_mmu("vmalloc_fault");
*/
		pgd = (pgd_t *)current_pgd[smp_processor_id()] + offset;
		pgd_k = init_mm.pgd + offset;

		/* Since we're two-level, we don't need to do both
		 * set_pgd and set_pmd (they do the same thing). If
		 * we go three-level at some point, do the right thing
		 * with pgd_present and set_pgd here.
		 *
		 * Also, since the vmalloc area is global, we don't
		 * need to copy individual PTE's, it is enough to
		 * copy the pgd pointer into the pte page of the
		 * root task. If that is there, we'll find our pte if
		 * it exists.
		 */

		pud = pud_offset(pgd, address);
		pud_k = pud_offset(pgd_k, address);
		if (!pud_present(*pud_k))
			goto no_context;

		pmd = pmd_offset(pud, address);
		pmd_k = pmd_offset(pud_k, address);

		if (!pmd_present(*pmd_k))
			goto bad_area_nosemaphore;

		set_pmd(pmd, *pmd_k);

		/* Make sure the actual PTE exists as well to
		 * catch kernel vmalloc-area accesses to non-mapped
		 * addresses. If we don't do this, this will just
		 * silently loop forever.
		 */

		pte_k = pte_offset_kernel(pmd_k, address);
		if (!pte_present(*pte_k))
			goto no_context;

		return;
	}
}
Example #25
0
asmlinkage void __init start_kernel(void)
{
	char * command_line;
	extern struct kernel_param __start___param[], __stop___param[];

	smp_setup_processor_id();

	/*
	 * Need to run as early as possible, to initialize the
	 * lockdep hash:
	 */
	lockdep_init();
	debug_objects_early_init();

	/*
	 * Set up the the initial canary ASAP:
	 */
	boot_init_stack_canary();

	cgroup_init_early();

	local_irq_disable();
	early_boot_irqs_off();
	early_init_irq_lock_class();

/*
 * Interrupts are still disabled. Do necessary setups, then
 * enable them
 */
	lock_kernel();
	tick_init();
	boot_cpu_init();
	page_address_init();
	printk(KERN_NOTICE "%s", linux_banner);
	setup_arch(&command_line);
	mm_init_owner(&init_mm, &init_task);
	setup_command_line(command_line);
	setup_nr_cpu_ids();
	setup_per_cpu_areas();
	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */

	build_all_zonelists(NULL);
	page_alloc_init();

	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
	parse_early_param();
	parse_args("Booting kernel", static_command_line, __start___param,
		   __stop___param - __start___param,
		   &unknown_bootoption);
	/*
	 * These use large bootmem allocations and must precede
	 * kmem_cache_init()
	 */
	pidhash_init();
	vfs_caches_init_early();
	sort_main_extable();
	trap_init();
	mm_init();
	/*
	 * Set up the scheduler prior starting any interrupts (such as the
	 * timer interrupt). Full topology setup happens at smp_init()
	 * time - but meanwhile we still have a functioning scheduler.
	 */
	sched_init();
	/*
	 * Disable preemption - early bootup scheduling is extremely
	 * fragile until we cpu_idle() for the first time.
	 */
	preempt_disable();
	if (!irqs_disabled()) {
		printk(KERN_WARNING "start_kernel(): bug: interrupts were "
				"enabled *very* early, fixing it\n");
		local_irq_disable();
	}
	rcu_init();
	radix_tree_init();
	/* init some links before init_ISA_irqs() */
	early_irq_init();
	init_IRQ();
	prio_tree_init();
	init_timers();
	hrtimers_init();
	softirq_init();
	timekeeping_init();
	time_init();
	profile_init();
	if (!irqs_disabled())
		printk(KERN_CRIT "start_kernel(): bug: interrupts were "
				 "enabled early\n");
	early_boot_irqs_on();
	local_irq_enable();

	/* Interrupts are enabled now so all GFP allocations are safe. */
	gfp_allowed_mask = __GFP_BITS_MASK;

	kmem_cache_init_late();

	/*
	 * HACK ALERT! This is early. We're enabling the console before
	 * we've done PCI setups etc, and console_init() must be aware of
	 * this. But we do want output early, in case something goes wrong.
	 */
	console_init();
	if (panic_later)
		panic(panic_later, panic_param);

	lockdep_info();

	/*
	 * Need to run this when irqs are enabled, because it wants
	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
	 * too:
	 */
	locking_selftest();

#ifdef CONFIG_BLK_DEV_INITRD
	if (initrd_start && !initrd_below_start_ok &&
	    page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
		printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
		    "disabling it.\n",
		    page_to_pfn(virt_to_page((void *)initrd_start)),
		    min_low_pfn);
		initrd_start = 0;
	}
#endif
	page_cgroup_init();
	enable_debug_pagealloc();
	kmemtrace_init();
	kmemleak_init();
	debug_objects_mem_init();
	idr_init_cache();
	setup_per_cpu_pageset();
	numa_policy_init();
	if (late_time_init)
		late_time_init();
	sched_clock_init();
	calibrate_delay();
	pidmap_init();
	anon_vma_init();
#ifdef CONFIG_X86
	if (efi_enabled)
		efi_enter_virtual_mode();
#endif
	thread_info_cache_init();
	cred_init();
	fork_init(totalram_pages);
	proc_caches_init();
	buffer_init();
	key_init();
	security_init();
	dbg_late_init();
	vfs_caches_init(totalram_pages);
	signals_init();
	/* rootfs populating might need page-writeback */
	page_writeback_init();
#ifdef CONFIG_PROC_FS
	proc_root_init();
#endif
	cgroup_init();
	cpuset_init();
	taskstats_init_early();
	delayacct_init();

	check_bugs();

	acpi_early_init(); /* before LAPIC and SMP init */
	sfi_init_late();

	ftrace_init();

	/* Do the rest non-__init'ed, we're now alive */
	rest_init();
}
Example #26
0
/*
 * This routine is responsible for faulting in user pages.
 * It passes the work off to one of the appropriate routines.
 * It returns true if the fault was successfully handled.
 */
static int handle_page_fault(struct pt_regs *regs,
			     int fault_num,
			     int is_page_fault,
			     unsigned long address,
			     int write)
{
	struct task_struct *tsk;
	struct mm_struct *mm;
	struct vm_area_struct *vma;
	unsigned long stack_offset;
	int fault;
	int si_code;
	int is_kernel_mode;
	pgd_t *pgd;
	unsigned int flags;

	/* on TILE, protection faults are always writes */
	if (!is_page_fault)
		write = 1;

	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;

	is_kernel_mode = !user_mode(regs);

	tsk = validate_current();

	/*
	 * Check to see if we might be overwriting the stack, and bail
	 * out if so.  The page fault code is a relatively likely
	 * place to get trapped in an infinite regress, and once we
	 * overwrite the whole stack, it becomes very hard to recover.
	 */
	stack_offset = stack_pointer & (THREAD_SIZE-1);
	if (stack_offset < THREAD_SIZE / 8) {
		pr_alert("Potential stack overrun: sp %#lx\n",
		       stack_pointer);
		show_regs(regs);
		pr_alert("Killing current process %d/%s\n",
		       tsk->pid, tsk->comm);
		do_group_exit(SIGKILL);
	}

	/*
	 * Early on, we need to check for migrating PTE entries;
	 * see homecache.c.  If we find a migrating PTE, we wait until
	 * the backing page claims to be done migrating, then we proceed.
	 * For kernel PTEs, we rewrite the PTE and return and retry.
	 * Otherwise, we treat the fault like a normal "no PTE" fault,
	 * rather than trying to patch up the existing PTE.
	 */
	pgd = get_current_pgd();
	if (handle_migrating_pte(pgd, fault_num, address, regs->pc,
				 is_kernel_mode, write))
		return 1;

	si_code = SEGV_MAPERR;

	/*
	 * We fault-in kernel-space virtual memory on-demand. The
	 * 'reference' page table is init_mm.pgd.
	 *
	 * NOTE! We MUST NOT take any locks for this case. We may
	 * be in an interrupt or a critical region, and should
	 * only copy the information from the master page table,
	 * nothing more.
	 *
	 * This verifies that the fault happens in kernel space
	 * and that the fault was not a protection fault.
	 */
	if (unlikely(address >= TASK_SIZE &&
		     !is_arch_mappable_range(address, 0))) {
		if (is_kernel_mode && is_page_fault &&
		    vmalloc_fault(pgd, address) >= 0)
			return 1;
		/*
		 * Don't take the mm semaphore here. If we fixup a prefetch
		 * fault we could otherwise deadlock.
		 */
		mm = NULL;  /* happy compiler */
		vma = NULL;
		goto bad_area_nosemaphore;
	}

	/*
	 * If we're trying to touch user-space addresses, we must
	 * be either at PL0, or else with interrupts enabled in the
	 * kernel, so either way we can re-enable interrupts here
	 * unless we are doing atomic access to user space with
	 * interrupts disabled.
	 */
	if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
		local_irq_enable();

	mm = tsk->mm;

	/*
	 * If we're in an interrupt, have no user context or are running in an
	 * atomic region then we must not take the fault.
	 */
	if (in_atomic() || !mm) {
		vma = NULL;  /* happy compiler */
		goto bad_area_nosemaphore;
	}

	if (!is_kernel_mode)
		flags |= FAULT_FLAG_USER;

	/*
	 * When running in the kernel we expect faults to occur only to
	 * addresses in user space.  All other faults represent errors in the
	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
	 * erroneous fault occurring in a code path which already holds mmap_sem
	 * we will deadlock attempting to validate the fault against the
	 * address space.  Luckily the kernel only validly references user
	 * space from well defined areas of code, which are listed in the
	 * exceptions table.
	 *
	 * As the vast majority of faults will be valid we will only perform
	 * the source reference check when there is a possibility of a deadlock.
	 * Attempt to lock the address space, if we cannot we then validate the
	 * source.  If this is invalid we can skip the address space check,
	 * thus avoiding the deadlock.
	 */
	if (!down_read_trylock(&mm->mmap_sem)) {
		if (is_kernel_mode &&
		    !search_exception_tables(regs->pc)) {
			vma = NULL;  /* happy compiler */
			goto bad_area_nosemaphore;
		}

retry:
		down_read(&mm->mmap_sem);
	}

	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (regs->sp < PAGE_OFFSET) {
		/*
		 * accessing the stack below sp is always a bug.
		 */
		if (address < regs->sp)
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	si_code = SEGV_ACCERR;
	if (fault_num == INT_ITLB_MISS) {
		if (!(vma->vm_flags & VM_EXEC))
			goto bad_area;
	} else if (write) {
#ifdef TEST_VERIFY_AREA
		if (!is_page_fault && regs->cs == KERNEL_CS)
			pr_err("WP fault at "REGFMT"\n", regs->eip);
#endif
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
		flags |= FAULT_FLAG_WRITE;
	} else {
		if (!is_page_fault || !(vma->vm_flags & VM_READ))
			goto bad_area;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
	fault = handle_mm_fault(mm, vma, address, flags);

	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
		return 0;

	if (unlikely(fault & VM_FAULT_ERROR)) {
		if (fault & VM_FAULT_OOM)
			goto out_of_memory;
		else if (fault & VM_FAULT_SIGBUS)
			goto do_sigbus;
		BUG();
	}
	if (flags & FAULT_FLAG_ALLOW_RETRY) {
		if (fault & VM_FAULT_MAJOR)
			tsk->maj_flt++;
		else
			tsk->min_flt++;
		if (fault & VM_FAULT_RETRY) {
			flags &= ~FAULT_FLAG_ALLOW_RETRY;
			flags |= FAULT_FLAG_TRIED;

			 /*
			  * No need to up_read(&mm->mmap_sem) as we would
			  * have already released it in __lock_page_or_retry
			  * in mm/filemap.c.
			  */
			goto retry;
		}
	}

#if CHIP_HAS_TILE_DMA()
	/* If this was a DMA TLB fault, restart the DMA engine. */
	switch (fault_num) {
	case INT_DMATLB_MISS:
	case INT_DMATLB_MISS_DWNCL:
	case INT_DMATLB_ACCESS:
	case INT_DMATLB_ACCESS_DWNCL:
		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
		break;
	}
#endif

	up_read(&mm->mmap_sem);
	return 1;

/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
	up_read(&mm->mmap_sem);

bad_area_nosemaphore:
	/* User mode accesses just cause a SIGSEGV */
	if (!is_kernel_mode) {
		/*
		 * It's possible to have interrupts off here.
		 */
		local_irq_enable();

		force_sig_info_fault("segfault", SIGSEGV, si_code, address,
				     fault_num, tsk, regs);
		return 0;
	}

no_context:
	/* Are we prepared to handle this kernel fault?  */
	if (fixup_exception(regs))
		return 0;

/*
 * Oops. The kernel tried to access some bad page. We'll have to
 * terminate things with extreme prejudice.
 */

	bust_spinlocks(1);

	/* FIXME: no lookup_address() yet */
#ifdef SUPPORT_LOOKUP_ADDRESS
	if (fault_num == INT_ITLB_MISS) {
		pte_t *pte = lookup_address(address);

		if (pte && pte_present(*pte) && !pte_exec_kernel(*pte))
			pr_crit("kernel tried to execute"
			       " non-executable page - exploit attempt?"
			       " (uid: %d)\n", current->uid);
	}
#endif
	if (address < PAGE_SIZE)
		pr_alert("Unable to handle kernel NULL pointer dereference\n");
	else
		pr_alert("Unable to handle kernel paging request\n");
	pr_alert(" at virtual address "REGFMT", pc "REGFMT"\n",
		 address, regs->pc);

	show_regs(regs);

	if (unlikely(tsk->pid < 2)) {
		panic("Kernel page fault running %s!",
		      is_idle_task(tsk) ? "the idle task" : "init");
	}

	/*
	 * More FIXME: we should probably copy the i386 here and
	 * implement a generic die() routine.  Not today.
	 */
#ifdef SUPPORT_DIE
	die("Oops", regs);
#endif
	bust_spinlocks(1);

	do_group_exit(SIGKILL);

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (is_kernel_mode)
		goto no_context;
	pagefault_out_of_memory();
	return 0;

do_sigbus:
	up_read(&mm->mmap_sem);

	/* Kernel mode? Handle exceptions or die */
	if (is_kernel_mode)
		goto no_context;

	force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address,
			     fault_num, tsk, regs);
	return 0;
}
Example #27
0
/**
 * acpi_idle_enter_bm - enters C3 with proper BM handling
 * @dev: the target CPU
 * @drv: cpuidle driver containing state data
 * @index: the index of suggested state
 *
 * If BM is detected, the deepest non-C3 idle state is entered instead.
 */
static int acpi_idle_enter_bm(struct cpuidle_device *dev,
		struct cpuidle_driver *drv, int index)
{
	struct acpi_processor *pr;
	struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
	struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
	ktime_t  kt1, kt2;
	s64 idle_time_ns;
	s64 idle_time;


	pr = __this_cpu_read(processors);
	dev->last_residency = 0;

	if (unlikely(!pr))
		return -EINVAL;

	if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
		if (drv->safe_state_index >= 0) {
			return drv->states[drv->safe_state_index].enter(dev,
						drv, drv->safe_state_index);
		} else {
			local_irq_disable();
			acpi_safe_halt();
			local_irq_enable();
			return -EBUSY;
		}
	}

	local_irq_disable();


	if (cx->entry_method != ACPI_CSTATE_FFH) {
		current_thread_info()->status &= ~TS_POLLING;
		/*
		 * TS_POLLING-cleared state must be visible before we test
		 * NEED_RESCHED:
		 */
		smp_mb();

		if (unlikely(need_resched())) {
			current_thread_info()->status |= TS_POLLING;
			local_irq_enable();
			return -EINVAL;
		}
	}

	acpi_unlazy_tlb(smp_processor_id());

	/* Tell the scheduler that we are going deep-idle: */
	sched_clock_idle_sleep_event();
	/*
	 * Must be done before busmaster disable as we might need to
	 * access HPET !
	 */
	lapic_timer_state_broadcast(pr, cx, 1);

	kt1 = ktime_get_real();
	/*
	 * disable bus master
	 * bm_check implies we need ARB_DIS
	 * !bm_check implies we need cache flush
	 * bm_control implies whether we can do ARB_DIS
	 *
	 * That leaves a case where bm_check is set and bm_control is
	 * not set. In that case we cannot do much, we enter C3
	 * without doing anything.
	 */
	if (pr->flags.bm_check && pr->flags.bm_control) {
		raw_spin_lock(&c3_lock);
		c3_cpu_count++;
		/* Disable bus master arbitration when all CPUs are in C3 */
		if (c3_cpu_count == num_online_cpus())
			acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
		raw_spin_unlock(&c3_lock);
	} else if (!pr->flags.bm_check) {
		ACPI_FLUSH_CPU_CACHE();
	}

	acpi_idle_do_entry(cx);

	/* Re-enable bus master arbitration */
	if (pr->flags.bm_check && pr->flags.bm_control) {
		raw_spin_lock(&c3_lock);
		acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
		c3_cpu_count--;
		raw_spin_unlock(&c3_lock);
	}
	kt2 = ktime_get_real();
	idle_time_ns = ktime_to_ns(ktime_sub(kt2, kt1));
	idle_time = idle_time_ns;
	do_div(idle_time, NSEC_PER_USEC);

	/* Update device last_residency*/
	dev->last_residency = (int)idle_time;

	/* Tell the scheduler how much we idled: */
	sched_clock_idle_wakeup_event(idle_time_ns);

	local_irq_enable();
	if (cx->entry_method != ACPI_CSTATE_FFH)
		current_thread_info()->status |= TS_POLLING;

	lapic_timer_state_broadcast(pr, cx, 0);
	return index;
}
Example #28
0
/*
 * This routine handles page faults.  It determines the address, and the
 * problem, and then passes it handle_page_fault() for normal DTLB and
 * ITLB issues, and for DMA or SN processor faults when we are in user
 * space.  For the latter, if we're in kernel mode, we just save the
 * interrupt away appropriately and return immediately.  We can't do
 * page faults for user code while in kernel mode.
 */
void do_page_fault(struct pt_regs *regs, int fault_num,
		   unsigned long address, unsigned long write)
{
	int is_page_fault;

#ifdef CONFIG_KPROBES
	/*
	 * This is to notify the fault handler of the kprobes.  The
	 * exception code is redundant as it is also carried in REGS,
	 * but we pass it anyhow.
	 */
	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
		       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
		return;
#endif

#ifdef __tilegx__
	/*
	 * We don't need early do_page_fault_ics() support, since unlike
	 * Pro we don't need to worry about unlocking the atomic locks.
	 * There is only one current case in GX where we touch any memory
	 * under ICS other than our own kernel stack, and we handle that
	 * here.  (If we crash due to trying to touch our own stack,
	 * we're in too much trouble for C code to help out anyway.)
	 */
	if (write & ~1) {
		unsigned long pc = write & ~1;
		if (pc >= (unsigned long) __start_unalign_asm_code &&
		    pc < (unsigned long) __end_unalign_asm_code) {
			struct thread_info *ti = current_thread_info();
			/*
			 * Our EX_CONTEXT is still what it was from the
			 * initial unalign exception, but now we've faulted
			 * on the JIT page.  We would like to complete the
			 * page fault however is appropriate, and then retry
			 * the instruction that caused the unalign exception.
			 * Our state has been "corrupted" by setting the low
			 * bit in "sp", and stashing r0..r3 in the
			 * thread_info area, so we revert all of that, then
			 * continue as if this were a normal page fault.
			 */
			regs->sp &= ~1UL;
			regs->regs[0] = ti->unalign_jit_tmp[0];
			regs->regs[1] = ti->unalign_jit_tmp[1];
			regs->regs[2] = ti->unalign_jit_tmp[2];
			regs->regs[3] = ti->unalign_jit_tmp[3];
			write &= 1;
		} else {
			pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n",
				 current->comm, current->pid, pc, address);
			show_regs(regs);
			do_group_exit(SIGKILL);
			return;
		}
	}
#else
	/* This case should have been handled by do_page_fault_ics(). */
	BUG_ON(write & ~1);
#endif

#if CHIP_HAS_TILE_DMA()
	/*
	 * If it's a DMA fault, suspend the transfer while we're
	 * handling the miss; we'll restart after it's handled.  If we
	 * don't suspend, it's possible that this process could swap
	 * out and back in, and restart the engine since the DMA is
	 * still 'running'.
	 */
	if (fault_num == INT_DMATLB_MISS ||
	    fault_num == INT_DMATLB_ACCESS ||
	    fault_num == INT_DMATLB_MISS_DWNCL ||
	    fault_num == INT_DMATLB_ACCESS_DWNCL) {
		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK);
		while (__insn_mfspr(SPR_DMA_USER_STATUS) &
		       SPR_DMA_STATUS__BUSY_MASK)
			;
	}
#endif

	/* Validate fault num and decide if this is a first-time page fault. */
	switch (fault_num) {
	case INT_ITLB_MISS:
	case INT_DTLB_MISS:
#if CHIP_HAS_TILE_DMA()
	case INT_DMATLB_MISS:
	case INT_DMATLB_MISS_DWNCL:
#endif
		is_page_fault = 1;
		break;

	case INT_DTLB_ACCESS:
#if CHIP_HAS_TILE_DMA()
	case INT_DMATLB_ACCESS:
	case INT_DMATLB_ACCESS_DWNCL:
#endif
		is_page_fault = 0;
		break;

	default:
		panic("Bad fault number %d in do_page_fault", fault_num);
	}

#if CHIP_HAS_TILE_DMA()
	if (!user_mode(regs)) {
		struct async_tlb *async;
		switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
		case INT_DMATLB_MISS:
		case INT_DMATLB_ACCESS:
		case INT_DMATLB_MISS_DWNCL:
		case INT_DMATLB_ACCESS_DWNCL:
			async = &current->thread.dma_async_tlb;
			break;
#endif
		default:
			async = NULL;
		}
		if (async) {

			/*
			 * No vmalloc check required, so we can allow
			 * interrupts immediately at this point.
			 */
			local_irq_enable();

			set_thread_flag(TIF_ASYNC_TLB);
			if (async->fault_num != 0) {
				panic("Second async fault %d;"
				      " old fault was %d (%#lx/%ld)",
				      fault_num, async->fault_num,
				      address, write);
			}
			BUG_ON(fault_num == 0);
			async->fault_num = fault_num;
			async->is_fault = is_page_fault;
			async->is_write = write;
			async->address = address;
			return;
		}
	}
#endif

	handle_page_fault(regs, fault_num, is_page_fault, address, write);
}
Example #29
0
/*
 * This thread processes interrupts reported by the Primary Interrupt Handler.
 */
static int twl6030_irq_thread(void *data)
{
	long irq = (long)data;
	static unsigned i2c_errors;
	static const unsigned max_i2c_errors = 100;
	int ret;

	current->flags |= PF_NOFREEZE;

	while (!kthread_should_stop()) {
		int i;
		union {
		u8 bytes[4];
		u32 int_sts;
		} sts;

		/* Wait for IRQ, then read PIH irq status (also blocking) */
		wait_for_completion_interruptible(&irq_event);

		/* read INT_STS_A, B and C in one shot using a burst read */
		ret = twl_i2c_read(TWL_MODULE_PIH, sts.bytes,
				REG_INT_STS_A, 3);
		if (ret) {
			pr_warning("twl6030: I2C error %d reading PIH ISR\n",
					ret);
			if (++i2c_errors >= max_i2c_errors) {
				printk(KERN_ERR "Maximum I2C error count"
						" exceeded.  Terminating %s.\n",
						__func__);
				break;
			}
			complete(&irq_event);
			continue;
		}



		sts.bytes[3] = 0; /* Only 24 bits are valid*/

		for (i = 0; sts.int_sts; sts.int_sts >>= 1, i++) {
			local_irq_disable();
			if (sts.int_sts & 0x1) {
				int module_irq = twl6030_irq_base +
					twl6030_interrupt_mapping[i];
				struct irq_desc *d = irq_to_desc(module_irq);

				if (!d) {
					pr_err("twl6030: Invalid SIH IRQ: %d\n",
					       module_irq);
					return -EINVAL;
				}

				/* These can't be masked ... always warn
				 * if we get any surprises.
				 */
				if (d->status & IRQ_DISABLED)
					note_interrupt(module_irq, d,
							IRQ_NONE);
				else
					d->handle_irq(module_irq, d);

			}
		local_irq_enable();
		}
		ret = twl_i2c_write(TWL_MODULE_PIH, sts.bytes,
				REG_INT_STS_A, 3); /* clear INT_STS_A */
		if (ret)
			pr_warning("twl6030: I2C error in clearing PIH ISR\n");

		enable_irq(irq);
	}

	return 0;
}
Example #30
0
/*
 * The driver enables interrupts as much as possible.  In order to do this,
 * (a) the device-interrupt is disabled before entering hd_request(),
 * and (b) the timeout-interrupt is disabled before the sti().
 *
 * Interrupts are still masked (by default) whenever we are exchanging
 * data/cmds with a drive, because some drives seem to have very poor
 * tolerance for latency during I/O. The IDE driver has support to unmask
 * interrupts for non-broken hardware, so use that driver if required.
 */
static void hd_request(void)
{
	unsigned int block, nsect, sec, track, head, cyl;
	struct hd_i_struct *disk;
	struct request *req;

	if (do_hd)
		return;
repeat:
	del_timer(&device_timer);
	local_irq_enable();

	req = CURRENT;
	if (!req) {
		do_hd = NULL;
		return;
	}

	if (reset) {
		local_irq_disable();
		reset_hd();
		return;
	}
	disk = req->rq_disk->private_data;
	block = req->sector;
	nsect = req->nr_sectors;
	if (block >= get_capacity(req->rq_disk) ||
	    ((block+nsect) > get_capacity(req->rq_disk))) {
		printk("%s: bad access: block=%d, count=%d\n",
			req->rq_disk->disk_name, block, nsect);
		end_request(req, 0);
		goto repeat;
	}

	if (disk->special_op) {
		if (do_special_op(disk, req))
			goto repeat;
		return;
	}
	sec   = block % disk->sect + 1;
	track = block / disk->sect;
	head  = track % disk->head;
	cyl   = track / disk->head;
#ifdef DEBUG
	printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
		req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
		cyl, head, sec, nsect, req->buffer);
#endif
	if (req->flags & REQ_CMD) {
		switch (rq_data_dir(req)) {
		case READ:
			hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
			if (reset)
				goto repeat;
			break;
		case WRITE:
			hd_out(disk,nsect,sec,head,cyl,WIN_WRITE,&write_intr);
			if (reset)
				goto repeat;
			if (wait_DRQ()) {
				bad_rw_intr();
				goto repeat;
			}
			outsw(HD_DATA,req->buffer,256);
			break;
		default:
			printk("unknown hd-command\n");
			end_request(req, 0);
			break;
		}
	}
}