asmlinkage int vprintk(const char *fmt, va_list args)
{
    unsigned long flags;
    int printed_len;
    char *p;
    static char printk_buf[1024];
    static int log_level_unknown = 1;

    preempt_disable();
    if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
        /* If a crash is occurring during printk() on this CPU,
         * make sure we can't deadlock */
        zap_locks();

    /* This stops the holder of console_sem just where we want him */
    raw_local_irq_save(flags);
    lockdep_off();
    spin_lock(&logbuf_lock);
    printk_cpu = smp_processor_id();

    /* Emit the output into the temporary buffer */
    printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);

    if (printed_len > 0) {
        unsigned int loglevel;
        int mark_len;
        char *mark_buf;
        char saved_char;

        if (printk_buf[0] == '<' && printk_buf[1] >= '0' &&
                printk_buf[1] <= '7' && printk_buf[2] == '>') {
            loglevel = printk_buf[1] - '0';
            mark_buf = &printk_buf[3];
            mark_len = printed_len - 3;
        } else {
            loglevel = default_message_loglevel;
            mark_buf = printk_buf;
            mark_len = printed_len;
        }
        if (mark_buf[mark_len - 1] == '\n')
            mark_len--;
        saved_char = mark_buf[mark_len];
        mark_buf[mark_len] = '\0';
        _trace_mark(kernel_vprintk, "loglevel %c string %s ip %p",
                    loglevel, mark_buf, __builtin_return_address(0));
        mark_buf[mark_len] = saved_char;
    }

    /*
     * Copy the output into log_buf.  If the caller didn't provide
     * appropriate log level tags, we insert them here
     */
    for (p = printk_buf; *p; p++) {
        if (log_level_unknown) {
            /* log_level_unknown signals the start of a new line */
            if (printk_time) {
                int loglev_char;
                char tbuf[50], *tp;
                unsigned tlen;
                unsigned long long t;
                unsigned long nanosec_rem;

                /*
                 * force the log level token to be
                 * before the time output.
                 */
                if (p[0] == '<' && p[1] >='0' &&
                        p[1] <= '7' && p[2] == '>') {
                    loglev_char = p[1];
                    p += 3;
                    printed_len -= 3;
                } else {
                    loglev_char = default_message_loglevel
                                  + '0';
                }
                t = printk_clock();
                nanosec_rem = do_div(t, 1000000000);
                tlen = sprintf(tbuf,
                               "<%c>[%5lu.%06lu] ",
                               loglev_char,
                               (unsigned long)t,
                               nanosec_rem/1000);

                for (tp = tbuf; tp < tbuf + tlen; tp++)
                    emit_log_char(*tp);
                printed_len += tlen;
            } else {
                if (p[0] != '<' || p[1] < '0' ||
                        p[1] > '7' || p[2] != '>') {
                    emit_log_char('<');
                    emit_log_char(default_message_loglevel
                                  + '0');
                    emit_log_char('>');
                    printed_len += 3;
                }
            }
            log_level_unknown = 0;
            if (!*p)
                break;
        }
        emit_log_char(*p);
        if (*p == '\n')
            log_level_unknown = 1;
    }

    if (!down_trylock(&console_sem)) {
        /*
         * We own the drivers.  We can drop the spinlock and
         * let release_console_sem() print the text, maybe ...
         */
        console_locked = 1;
        printk_cpu = UINT_MAX;
        spin_unlock(&logbuf_lock);

        /*
         * Console drivers may assume that per-cpu resources have
         * been allocated. So unless they're explicitly marked as
         * being able to cope (CON_ANYTIME) don't call them until
         * this CPU is officially up.
         */
        if (cpu_online(smp_processor_id()) || have_callable_console()) {
            console_may_schedule = 0;
            release_console_sem();
        } else {
            /* Release by hand to avoid flushing the buffer. */
            console_locked = 0;
            up(&console_sem);
        }
        lockdep_on();
        raw_local_irq_restore(flags);
    } else {
        /*
         * Someone else owns the drivers.  We drop the spinlock, which
         * allows the semaphore holder to proceed and to call the
         * console drivers with the output which we just produced.
         */
        printk_cpu = UINT_MAX;
        spin_unlock(&logbuf_lock);
        lockdep_on();
        raw_local_irq_restore(flags);
    }

    preempt_enable();
    return printed_len;
}
static int do_vperfctr_control(struct vperfctr *perfctr,
			       const struct vperfctr_control __user *argp,
			       unsigned int argbytes,
			       struct task_struct *tsk)
{
	struct vperfctr_control *control;
	int err;
	unsigned int next_cstatus;
	unsigned int nrctrs, i;

	if (!tsk) {
		return -ESRCH;	/* attempt to update unlinked perfctr */
	}

	/* The control object can be large (over 300 bytes on i386),
	   so kmalloc() it instead of storing it on the stack.
	   We must use task-private storage to prevent racing with a
	   monitor process attaching to us before the non-preemptible
	   perfctr update step. Therefore we cannot store the copy
	   in the perfctr object itself. */
	control = kmalloc(sizeof(*control), GFP_USER);
	if (!control) {
		return -ENOMEM;
	}

	err = -EINVAL;
	if (argbytes > sizeof *control) {
		goto out_kfree;
	}

	err = -EFAULT;
	if (copy_from_user(control, argp, argbytes)) {
		goto out_kfree;
	}

	if (argbytes < sizeof *control)
		memset((char*)control + argbytes, 0, sizeof *control - argbytes);

	// figure out what is happening in the following 'if' loop

	if (control->cpu_control.nractrs || control->cpu_control.nrictrs) {
		cpumask_t old_mask, new_mask;

		old_mask = tsk->cpus_allowed;
		cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask);

		err = -EINVAL;
		if (cpus_empty(new_mask)) {
			goto out_kfree;
		}
		if (!cpus_equal(new_mask, old_mask))
			set_cpus_allowed(tsk, new_mask);
	}

	/* PREEMPT note: preemption is disabled over the entire
	   region since we're updating an active perfctr. */
	preempt_disable();

	// the task whose control register I am changing might actually be
	// in suspended state. That can happen when the other is executing
	// under the control of another task as in the case of debugging
	// or ptrace. However, if the write_control is done for the current
	// executing process, first suspend them and then do the update
	// why are we resetting 'perfctr->cpu_state.cstatus' ?

	if (IS_RUNNING(perfctr)) {
		if (tsk == current)
			vperfctr_suspend(perfctr);
	
		// not sure why we are zeroing out the following explicitly
		perfctr->cpu_state.cstatus = 0;
		vperfctr_clear_iresume_cstatus(perfctr);
	}

	// coying the user-specified control values to 'state'
	perfctr->cpu_state.control = control->cpu_control;

	/* remote access note: perfctr_cpu_update_control() is ok */
	err = perfctr_cpu_update_control(&perfctr->cpu_state, 0);
	if (err < 0) {
		goto out;
	}
	next_cstatus = perfctr->cpu_state.cstatus;
	if (!perfctr_cstatus_enabled(next_cstatus))
		goto out;

	/* XXX: validate si_signo? */
	perfctr->si_signo = control->si_signo;

	if (!perfctr_cstatus_has_tsc(next_cstatus))
		perfctr->cpu_state.tsc_sum = 0;

	nrctrs = perfctr_cstatus_nrctrs(next_cstatus);
	for(i = 0; i < nrctrs; ++i)
		if (!(control->preserve & (1<<i)))
			perfctr->cpu_state.pmc[i].sum = 0;

	// I am not sure why we are removing the inheritance just because
	// we updated the control information. True, because the children might
	// be performing something else. So, the control will have to be set
	// before spawning any children

	spin_lock(&perfctr->children_lock);
	perfctr->inheritance_id = new_inheritance_id();
	memset(&perfctr->children, 0, sizeof perfctr->children);
	spin_unlock(&perfctr->children_lock);

	if (tsk == current) {
		vperfctr_resume(perfctr);
	}

 out:
	preempt_enable();
 out_kfree:
	kfree(control);
	return err;
}
/*
 * Package up a bounce condition.
 */
void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
{
	u32 fpscr, orig_fpscr, fpsid, exceptions;

	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);

	/*
	 * At this point, FPEXC can have the following configuration:
	 *
	 *  EX DEX IXE
	 *  0   1   x   - synchronous exception
	 *  1   x   0   - asynchronous exception
	 *  1   x   1   - sychronous on VFP subarch 1 and asynchronous on later
	 *  0   0   1   - synchronous on VFP9 (non-standard subarch 1
	 *                implementation), undefined otherwise
	 *
	 * Clear various bits and enable access to the VFP so we can
	 * handle the bounce.
	 */
	fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK));

	fpsid = fmrx(FPSID);
	orig_fpscr = fpscr = fmrx(FPSCR);

	/*
	 * Check for the special VFP subarch 1 and FPSCR.IXE bit case
	 */
	if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT)
	    && (fpscr & FPSCR_IXE)) {
		/*
		 * Synchronous exception, emulate the trigger instruction
		 */
		goto emulate;
	}

	if (fpexc & FPEXC_EX) {
#ifndef CONFIG_CPU_FEROCEON
		/*
		 * Asynchronous exception. The instruction is read from FPINST
		 * and the interrupted instruction has to be restarted.
		 */
		trigger = fmrx(FPINST);
		regs->ARM_pc -= 4;
#endif
	} else if (!(fpexc & FPEXC_DEX)) {
		/*
		 * Illegal combination of bits. It can be caused by an
		 * unallocated VFP instruction but with FPSCR.IXE set and not
		 * on VFP subarch 1.
		 */
		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
		goto exit;
	}

	/*
	 * Modify fpscr to indicate the number of iterations remaining.
	 * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates
	 * whether FPEXC.VECITR or FPSCR.LEN is used.
	 */
	if (fpexc & (FPEXC_EX | FPEXC_VV)) {
		u32 len;

		len = fpexc + (1 << FPEXC_LENGTH_BIT);

		fpscr &= ~FPSCR_LENGTH_MASK;
		fpscr |= (len & FPEXC_LENGTH_MASK) << (FPSCR_LENGTH_BIT - FPEXC_LENGTH_BIT);
	}

	/*
	 * Handle the first FP instruction.  We used to take note of the
	 * FPEXC bounce reason, but this appears to be unreliable.
	 * Emulate the bounced instruction instead.
	 */
	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
	if (exceptions)
		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);

	/*
	 * If there isn't a second FP instruction, exit now. Note that
	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
	 */
	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
		goto exit;

	/*
	 * The barrier() here prevents fpinst2 being read
	 * before the condition above.
	 */
	barrier();
	trigger = fmrx(FPINST2);

 emulate:
	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
	if (exceptions)
		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
 exit:
	preempt_enable();
}
Beispiel #4
0
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage)
{
	struct address_space *swap_space;
	struct file *swap_storage;
	struct page *from_page;
	struct page *to_page;
	void *from_virtual;
	void *to_virtual;
	int i;
	int ret = -ENOMEM;

	BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated);
	BUG_ON(ttm->caching_state != tt_cached);

	/*
	 * For user buffers, just unpin the pages, as there should be
	 * vma references.
	 */

	if (ttm->page_flags & TTM_PAGE_FLAG_USER) {
		ttm_tt_free_user_pages(ttm);
		ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
		ttm->swap_storage = NULL;
		return 0;
	}

	if (!persistant_swap_storage) {
		swap_storage = shmem_file_setup("ttm swap",
						ttm->num_pages << PAGE_SHIFT,
						0);
		if (unlikely(IS_ERR(swap_storage))) {
			printk(KERN_ERR "Failed allocating swap storage.\n");
			return PTR_ERR(swap_storage);
		}
	} else
		swap_storage = persistant_swap_storage;

	swap_space = swap_storage->f_path.dentry->d_inode->i_mapping;

	for (i = 0; i < ttm->num_pages; ++i) {
		from_page = ttm->pages[i];
		if (unlikely(from_page == NULL))
			continue;
		to_page = read_mapping_page(swap_space, i, NULL);
		if (unlikely(IS_ERR(to_page))) {
			ret = PTR_ERR(to_page);
			goto out_err;
		}
		preempt_disable();
		from_virtual = kmap_atomic(from_page, KM_USER0);
		to_virtual = kmap_atomic(to_page, KM_USER1);
		memcpy(to_virtual, from_virtual, PAGE_SIZE);
		kunmap_atomic(to_virtual, KM_USER1);
		kunmap_atomic(from_virtual, KM_USER0);
		preempt_enable();
		set_page_dirty(to_page);
		mark_page_accessed(to_page);
		page_cache_release(to_page);
	}

	ttm_tt_free_alloced_pages(ttm);
	ttm->swap_storage = swap_storage;
	ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
	if (persistant_swap_storage)
		ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP;

	return 0;
out_err:
	if (!persistant_swap_storage)
		fput(swap_storage);

	return ret;
}
int
trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
{
	unsigned long save_max = tracing_max_latency;
	unsigned long count;
	int ret;

	/*
	 * Now that the big kernel lock is no longer preemptable,
	 * and this is called with the BKL held, it will always
	 * fail. If preemption is already disabled, simply
	 * pass the test. When the BKL is removed, or becomes
	 * preemptible again, we will once again test this,
	 * so keep it in.
	 */
	if (preempt_count()) {
		printk(KERN_CONT "can not test ... force ");
		return 0;
	}

	/* start the tracing */
	ret = trace->init(tr);
	if (ret) {
		warn_failed_init_tracer(trace, ret);
		goto out;
	}

	/* reset the max latency */
	tracing_max_latency = 0;

	/* disable preemption and interrupts for a bit */
	preempt_disable();
	local_irq_disable();
	udelay(100);
	preempt_enable();
	/* reverse the order of preempt vs irqs */
	local_irq_enable();

	/* stop the tracing. */
	tracing_stop();
	/* check both trace buffers */
	ret = trace_test_buffer(tr, NULL);
	if (ret) {
		tracing_start();
		goto out;
	}

	ret = trace_test_buffer(&max_tr, &count);
	if (ret) {
		tracing_start();
		goto out;
	}

	if (!ret && !count) {
		printk(KERN_CONT ".. no entries found ..");
		ret = -1;
		tracing_start();
		goto out;
	}

	/* do the test by disabling interrupts first this time */
	tracing_max_latency = 0;
	tracing_start();
	preempt_disable();
	local_irq_disable();
	udelay(100);
	preempt_enable();
	/* reverse the order of preempt vs irqs */
	local_irq_enable();

	/* stop the tracing. */
	tracing_stop();
	/* check both trace buffers */
	ret = trace_test_buffer(tr, NULL);
	if (ret)
		goto out;

	ret = trace_test_buffer(&max_tr, &count);

	if (!ret && !count) {
		printk(KERN_CONT ".. no entries found ..");
		ret = -1;
		goto out;
	}

 out:
	trace->reset(tr);
	tracing_start();
	tracing_max_latency = save_max;

	return ret;
}
static void gru_free_cpu_resources(void *cb, void *dsr)
{
	preempt_enable();
}
static int mcsdl_download(const UINT8 *pBianry, const UINT16 unLength,
		INT8 IdxNum)
{
	int nRet;

	//---------------------------------
	// Check Binary Size
	//---------------------------------
	if (unLength >= MELFAS_FIRMWARE_MAX_SIZE)
	{

		nRet = MCSDL_RET_PROGRAM_SIZE_IS_WRONG;
		goto MCSDL_DOWNLOAD_FINISH;
	}

#if MELFAS_ENABLE_DBG_PROGRESS_PRINT
	printk(" - Starting download...\n");
#endif

	//---------------------------------
	// Make it ready
	//---------------------------------
#if MELFAS_ENABLE_DBG_PROGRESS_PRINT
	printk(" > Ready\n");
#endif

	mcsdl_set_ready();

	//---------------------------------
	// Erase Flash
	//---------------------------------
#if MELFAS_ENABLE_DBG_PROGRESS_PRINT
	printk(" > Erase\n");
#endif

	printk(" > Erase - Start \n");
	preempt_disable();
	nRet = mcsdl_erase_flash(IdxNum);
	preempt_enable();
	printk(" > Erase- End \n");

	if (nRet != MCSDL_RET_SUCCESS)
		goto MCSDL_DOWNLOAD_FINISH;

	//goto MCSDL_DOWNLOAD_FINISH; //lcs_test

	//---------------------------------
	// Program Flash
	//---------------------------------
#if MELFAS_ENABLE_DBG_PROGRESS_PRINT
	printk(" > Program   ");
#endif

	preempt_disable();
	nRet = mcsdl_program_flash((UINT8*) pBianry, (UINT16) unLength, IdxNum);
	preempt_enable();
	if (nRet != MCSDL_RET_SUCCESS)
		goto MCSDL_DOWNLOAD_FINISH;
	//---------------------------------
	// Verify flash
	//---------------------------------

#if MELFAS_ENABLE_DBG_PROGRESS_PRINT
	printk(" > Verify    ");
#endif

	preempt_disable();
	nRet = mcsdl_verify_flash((UINT8*) pBianry, (UINT16) unLength, IdxNum);
	preempt_enable();

	if (nRet != MCSDL_RET_SUCCESS)
		goto MCSDL_DOWNLOAD_FINISH;

	nRet = MCSDL_RET_SUCCESS;

	MCSDL_DOWNLOAD_FINISH:

#if MELFAS_ENABLE_DBG_PRINT
	mcsdl_print_result( nRet ); // Show result
#endif

#if MELFAS_ENABLE_DBG_PROGRESS_PRINT
	printk(" > Rebooting\n");
	printk(" - Fin.\n\n");
#endif

	mcsdl_reboot_mcs();

	return nRet;
}
Beispiel #8
0
static inline void __unlock_kernel(void)
{
	_raw_spin_unlock(&kernel_flag);
	preempt_enable();
}
Beispiel #9
0
/**
 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
 * @timer:	the timer to be added
 * @tim:	expiry time
 * @delta_ns:	"slack" range for the timer
 * @mode:	expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 *
 * Returns:
 *  0 on success
 *  1 when the timer was active
 */
int
hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
			const enum hrtimer_mode mode)
{
	struct hrtimer_clock_base *base, *new_base;
	unsigned long flags;
	int ret, raise;

	base = lock_hrtimer_base(timer, &flags);

	/* Remove an active timer from the queue: */
	ret = remove_hrtimer(timer, base);

	/* Switch the timer base, if necessary: */
	new_base = switch_hrtimer_base(timer, base);

	if (mode == HRTIMER_MODE_REL) {
		tim = ktime_add_safe(tim, new_base->get_time());
		/*
		 * CONFIG_TIME_LOW_RES is a temporary way for architectures
		 * to signal that they simply return xtime in
		 * do_gettimeoffset(). In this case we want to round up by
		 * resolution when starting a relative timer, to avoid short
		 * timeouts. This will go away with the GTOD framework.
		 */
#ifdef CONFIG_TIME_LOW_RES
		tim = ktime_add_safe(tim, base->resolution);
#endif
	}

	hrtimer_set_expires_range_ns(timer, tim, delta_ns);

	timer_stats_hrtimer_set_start_info(timer);

	/*
	 * Only allow reprogramming if the new base is on this CPU.
	 * (it might still be on another CPU if the timer was pending)
	 */
	enqueue_hrtimer(timer, new_base,
			new_base->cpu_base == &__get_cpu_var(hrtimer_bases));

	/*
	 * The timer may be expired and moved to the cb_pending
	 * list. We can not raise the softirq with base lock held due
	 * to a possible deadlock with runqueue lock.
	 */
	raise = timer->state == HRTIMER_STATE_PENDING;

	/*
	 * We use preempt_disable to prevent this task from migrating after
	 * setting up the softirq and raising it. Otherwise, if me migrate
	 * we will raise the softirq on the wrong CPU.
	 */
	preempt_disable();

	unlock_hrtimer_base(timer, &flags);

	if (raise)
		hrtimer_raise_softirq();
	preempt_enable();

	return ret;
}
static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
				enum kfd_preempt_type reset_type,
				unsigned int utimeout, uint32_t pipe_id,
				uint32_t queue_id)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	uint32_t temp;
	enum hqd_dequeue_request_type type;
	unsigned long flags, end_jiffies;
	int retry;
	struct vi_mqd *m = get_mqd(mqd);

	if (adev->in_gpu_reset)
		return -EIO;

	acquire_queue(kgd, pipe_id, queue_id);

	if (m->cp_hqd_vmid == 0)
		WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);

	switch (reset_type) {
	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
		type = DRAIN_PIPE;
		break;
	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
		type = RESET_WAVES;
		break;
	default:
		type = DRAIN_PIPE;
		break;
	}

	/* Workaround: If IQ timer is active and the wait time is close to or
	 * equal to 0, dequeueing is not safe. Wait until either the wait time
	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
	 * cleared before continuing. Also, ensure wait times are set to at
	 * least 0x3.
	 */
	local_irq_save(flags);
	preempt_disable();
	retry = 5000; /* wait for 500 usecs at maximum */
	while (true) {
		temp = RREG32(mmCP_HQD_IQ_TIMER);
		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
			pr_debug("HW is processing IQ\n");
			goto loop;
		}
		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
					== 3) /* SEM-rearm is safe */
				break;
			/* Wait time 3 is safe for CP, but our MMIO read/write
			 * time is close to 1 microsecond, so check for 10 to
			 * leave more buffer room
			 */
			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
					>= 10)
				break;
			pr_debug("IQ timer is active\n");
		} else
			break;
loop:
		if (!retry) {
			pr_err("CP HQD IQ timer status time out\n");
			break;
		}
		ndelay(100);
		--retry;
	}
	retry = 1000;
	while (true) {
		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
			break;
		pr_debug("Dequeue request is pending\n");

		if (!retry) {
			pr_err("CP HQD dequeue request time out\n");
			break;
		}
		ndelay(100);
		--retry;
	}
	local_irq_restore(flags);
	preempt_enable();

	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);

	end_jiffies = (utimeout * HZ / 1000) + jiffies;
	while (true) {
		temp = RREG32(mmCP_HQD_ACTIVE);
		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
			break;
		if (time_after(jiffies, end_jiffies)) {
			pr_err("cp queue preemption time out.\n");
			release_queue(kgd);
			return -ETIME;
		}
		usleep_range(500, 1000);
	}

	release_queue(kgd);
	return 0;
}
Beispiel #11
0
/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
struct xpc_rsvd_page *
xpc_rsvd_page_init(void)
{
    struct xpc_rsvd_page *rp;
    AMO_t *amos_page;
    u64 rp_pa, nasid_array = 0;
    int i, ret;

    /* get the local reserved page's address */

    preempt_disable();
    rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
    preempt_enable();
    if (rp_pa == 0) {
        dev_err(xpc_part, "SAL failed to locate the reserved page\n");
        return NULL;
    }
    rp = (struct xpc_rsvd_page *)__va(rp_pa);

    if (rp->partid != sn_partition_id) {
        dev_err(xpc_part, "the reserved page's partid of %d should be "
            "%d\n", rp->partid, sn_partition_id);
        return NULL;
    }

    rp->version = XPC_RP_VERSION;

    /* establish the actual sizes of the nasid masks */
    if (rp->SAL_version == 1) {
        /* SAL_version 1 didn't set the nasids_size field */
        rp->nasids_size = 128;
    }
    xp_nasid_mask_bytes = rp->nasids_size;
    xp_nasid_mask_words = xp_nasid_mask_bytes / 8;

    /* setup the pointers to the various items in the reserved page */
    xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
    xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
    xpc_vars = XPC_RP_VARS(rp);
    xpc_vars_part = XPC_RP_VARS_PART(rp);

    /*
     * Before clearing xpc_vars, see if a page of AMOs had been previously
     * allocated. If not we'll need to allocate one and set permissions
     * so that cross-partition AMOs are allowed.
     *
     * The allocated AMO page needs MCA reporting to remain disabled after
     * XPC has unloaded.  To make this work, we keep a copy of the pointer
     * to this page (i.e., amos_page) in the struct xpc_vars structure,
     * which is pointed to by the reserved page, and re-use that saved copy
     * on subsequent loads of XPC. This AMO page is never freed, and its
     * memory protections are never restricted.
     */
    amos_page = xpc_vars->amos_page;
    if (amos_page == NULL) {
        amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1));
        if (amos_page == NULL) {
            dev_err(xpc_part, "can't allocate page of AMOs\n");
            return NULL;
        }

        /*
         * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
         * when xpc_allow_IPI_ops() is called via xpc_hb_init().
         */
        if (!enable_shub_wars_1_1()) {
            ret = sn_change_memprotect(ia64_tpa((u64)amos_page),
                           PAGE_SIZE,
                           SN_MEMPROT_ACCESS_CLASS_1,
                           &nasid_array);
            if (ret != 0) {
                dev_err(xpc_part, "can't change memory "
                    "protections\n");
                uncached_free_page(__IA64_UNCACHED_OFFSET |
                           TO_PHYS((u64)amos_page), 1);
                return NULL;
            }
        }
    } else if (!IS_AMO_ADDRESS((u64)amos_page)) {
        /*
         * EFI's XPBOOT can also set amos_page in the reserved page,
         * but it happens to leave it as an uncached physical address
         * and we need it to be an uncached virtual, so we'll have to
         * convert it.
         */
        if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) {
            dev_err(xpc_part, "previously used amos_page address "
                "is bad = 0x%p\n", (void *)amos_page);
            return NULL;
        }
        amos_page = (AMO_t *)TO_AMO((u64)amos_page);
    }

    /* clear xpc_vars */
    memset(xpc_vars, 0, sizeof(struct xpc_vars));

    xpc_vars->version = XPC_V_VERSION;
    xpc_vars->act_nasid = cpuid_to_nasid(0);
    xpc_vars->act_phys_cpuid = cpu_physical_id(0);
    xpc_vars->vars_part_pa = __pa(xpc_vars_part);
    xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page);
    xpc_vars->amos_page = amos_page;    /* save for next load of XPC */

    /* clear xpc_vars_part */
    memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
           XP_MAX_PARTITIONS);

    /* initialize the activate IRQ related AMO variables */
    for (i = 0; i < xp_nasid_mask_words; i++)
        (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);

    /* initialize the engaged remote partitions related AMO variables */
    (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
    (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);

    /* timestamp of when reserved page was setup by XPC */
    rp->stamp = CURRENT_TIME;

    /*
     * This signifies to the remote partition that our reserved
     * page is initialized.
     */
    rp->vars_pa = __pa(xpc_vars);

    return rp;
}
Beispiel #12
0
static int clamp_thread(void *arg)
{
	int cpunr = (unsigned long)arg;
	DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
	static const struct sched_param param = {
		.sched_priority = MAX_USER_RT_PRIO/2,
	};
	unsigned int count = 0;
	unsigned int target_ratio;

	set_bit(cpunr, cpu_clamping_mask);
	set_freezable();
	init_timer_on_stack(&wakeup_timer);
	sched_setscheduler(current, SCHED_FIFO, &param);

	while (true == clamping && !kthread_should_stop() &&
		cpu_online(cpunr)) {
		int sleeptime;
		unsigned long target_jiffies;
		unsigned int guard;
		unsigned int compensation = 0;
		int interval; /* jiffies to sleep for each attempt */
		unsigned int duration_jiffies = msecs_to_jiffies(duration);
		unsigned int window_size_now;

		try_to_freeze();
		/*
		 * make sure user selected ratio does not take effect until
		 * the next round. adjust target_ratio if user has changed
		 * target such that we can converge quickly.
		 */
		target_ratio = set_target_ratio;
		guard = 1 + target_ratio/20;
		window_size_now = window_size;
		count++;

		/*
		 * systems may have different ability to enter package level
		 * c-states, thus we need to compensate the injected idle ratio
		 * to achieve the actual target reported by the HW.
		 */
		compensation = get_compensation(target_ratio);
		interval = duration_jiffies*100/(target_ratio+compensation);

		/* align idle time */
		target_jiffies = roundup(jiffies, interval);
		sleeptime = target_jiffies - jiffies;
		if (sleeptime <= 0)
			sleeptime = 1;
		schedule_timeout_interruptible(sleeptime);
		/*
		 * only elected controlling cpu can collect stats and update
		 * control parameters.
		 */
		if (cpunr == control_cpu && !(count%window_size_now)) {
			should_skip =
				powerclamp_adjust_controls(target_ratio,
							guard, window_size_now);
			smp_mb();
		}

		if (should_skip)
			continue;

		target_jiffies = jiffies + duration_jiffies;
		mod_timer(&wakeup_timer, target_jiffies);
		if (unlikely(local_softirq_pending()))
			continue;
		/*
		 * stop tick sched during idle time, interrupts are still
		 * allowed. thus jiffies are updated properly.
		 */
		preempt_disable();
		/* mwait until target jiffies is reached */
		while (time_before(jiffies, target_jiffies)) {
			unsigned long ecx = 1;
			unsigned long eax = target_mwait;

			/*
			 * REVISIT: may call enter_idle() to notify drivers who
			 * can save power during cpu idle. same for exit_idle()
			 */
			local_touch_nmi();
			stop_critical_timings();
			mwait_idle_with_hints(eax, ecx);
			start_critical_timings();
			atomic_inc(&idle_wakeup_counter);
		}
		preempt_enable();
	}
	del_timer_sync(&wakeup_timer);
	clear_bit(cpunr, cpu_clamping_mask);

	return 0;
}

/*
 * 1 HZ polling while clamping is active, useful for userspace
 * to monitor actual idle ratio.
 */
static void poll_pkg_cstate(struct work_struct *dummy);
static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
static void poll_pkg_cstate(struct work_struct *dummy)
{
	static u64 msr_last;
	static u64 tsc_last;
	static unsigned long jiffies_last;

	u64 msr_now;
	unsigned long jiffies_now;
	u64 tsc_now;
	u64 val64;

	msr_now = pkg_state_counter();
	rdtscll(tsc_now);
	jiffies_now = jiffies;

	/* calculate pkg cstate vs tsc ratio */
	if (!msr_last || !tsc_last)
		pkg_cstate_ratio_cur = 1;
	else {
		if (tsc_now - tsc_last) {
			val64 = 100 * (msr_now - msr_last);
			do_div(val64, (tsc_now - tsc_last));
			pkg_cstate_ratio_cur = val64;
		}
	}

	/* update record */
	msr_last = msr_now;
	jiffies_last = jiffies_now;
	tsc_last = tsc_now;

	if (true == clamping)
		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
}

static int start_power_clamp(void)
{
	unsigned long cpu;
	struct task_struct *thread;

	/* check if pkg cstate counter is completely 0, abort in this case */
	if (!has_pkg_state_counter()) {
		pr_err("pkg cstate counter not functional, abort\n");
		return -EINVAL;
	}

	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
	/* prevent cpu hotplug */
	get_online_cpus();

	/* prefer BSP */
	control_cpu = 0;
	if (!cpu_online(control_cpu))
		control_cpu = smp_processor_id();

	clamping = true;
	schedule_delayed_work(&poll_pkg_cstate_work, 0);

	/* start one thread per online cpu */
	for_each_online_cpu(cpu) {
		struct task_struct **p =
			per_cpu_ptr(powerclamp_thread, cpu);

		thread = kthread_create_on_node(clamp_thread,
						(void *) cpu,
						cpu_to_node(cpu),
						"kidle_inject/%ld", cpu);
		/* bind to cpu here */
		if (likely(!IS_ERR(thread))) {
			kthread_bind(thread, cpu);
			wake_up_process(thread);
			*p = thread;
		}

	}
	put_online_cpus();

	return 0;
}
Beispiel #13
0
void unlock_vm_area(struct vm_struct *area)
{
	preempt_enable();
}
Beispiel #14
0
/*
 * Fill the partition reserved page with the information needed by
 * other partitions to discover we are alive and establish initial
 * communications.
 */
int
xpc_setup_rsvd_page(void)
{
	int ret;
	struct xpc_rsvd_page *rp;
	unsigned long rp_pa;
	unsigned long new_ts_jiffies;

	/* get the local reserved page's address */

	preempt_disable();
	rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
	preempt_enable();
	if (rp_pa == 0) {
		dev_err(xpc_part, "SAL failed to locate the reserved page\n");
		return -ESRCH;
	}
	rp = (struct xpc_rsvd_page *)__va(rp_pa);

	if (rp->SAL_version < 3) {
		/* SAL_versions < 3 had a SAL_partid defined as a u8 */
		rp->SAL_partid &= 0xff;
	}
	BUG_ON(rp->SAL_partid != xp_partition_id);

	if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
		dev_err(xpc_part, "the reserved page's partid of %d is outside "
			"supported range (< 0 || >= %d)\n", rp->SAL_partid,
			xp_max_npartitions);
		return -EINVAL;
	}

	rp->version = XPC_RP_VERSION;
	rp->max_npartitions = xp_max_npartitions;

	/* establish the actual sizes of the nasid masks */
	if (rp->SAL_version == 1) {
		/* SAL_version 1 didn't set the nasids_size field */
		rp->SAL_nasids_size = 128;
	}
	xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
	xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
					      BITS_PER_BYTE);

	/* setup the pointers to the various items in the reserved page */
	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);

	ret = xpc_setup_rsvd_page_sn(rp);
	if (ret != 0)
		return ret;

	/*
	 * Set timestamp of when reserved page was setup by XPC.
	 * This signifies to the remote partition that our reserved
	 * page is initialized.
	 */
	new_ts_jiffies = jiffies;
	if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
		new_ts_jiffies++;
	rp->ts_jiffies = new_ts_jiffies;

	xpc_rsvd_page = rp;
	return 0;
}
/*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
 * the caller to perform writeback if the system is over `vm_dirty_ratio'.
 * If we're over `background_thresh' then pdflush is woken to perform some
 * writeout.
 */
static void balance_dirty_pages(struct address_space *mapping)
{
	long nr_reclaimable, bdi_nr_reclaimable;
	long nr_writeback, bdi_nr_writeback;
	long background_thresh;
	long dirty_thresh;
	long bdi_thresh;
	unsigned long pages_written = 0;
	unsigned long write_chunk = sync_writeback_pages();

	struct backing_dev_info *bdi = mapping->backing_dev_info;

	for (;;) {
		struct writeback_control wbc = {
			.bdi		= bdi,
			.sync_mode	= WB_SYNC_NONE,
			.older_than_this = NULL,
			.nr_to_write	= write_chunk,
			.range_cyclic	= 1,
		};

		get_dirty_limits(&background_thresh, &dirty_thresh,
				&bdi_thresh, bdi);

		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
					global_page_state(NR_UNSTABLE_NFS);
		nr_writeback = global_page_state(NR_WRITEBACK);

		bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
		bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);

		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
			break;

		/*
		 * Throttle it only when the background writeback cannot
		 * catch-up. This avoids (excessively) small writeouts
		 * when the bdi limits are ramping up.
		 */
		if (nr_reclaimable + nr_writeback <
				(background_thresh + dirty_thresh) / 2)
			break;

		if (!bdi->dirty_exceeded)
			bdi->dirty_exceeded = 1;

		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
		 * Unstable writes are a feature of certain networked
		 * filesystems (i.e. NFS) in which data may have been
		 * written to the server's write cache, but has not yet
		 * been flushed to permanent storage.
		 */
		if (bdi_nr_reclaimable) {
			writeback_inodes(&wbc);
			pages_written += write_chunk - wbc.nr_to_write;
			get_dirty_limits(&background_thresh, &dirty_thresh,
				       &bdi_thresh, bdi);
		}

		/*
		 * In order to avoid the stacked BDI deadlock we need
		 * to ensure we accurately count the 'dirty' pages when
		 * the threshold is low.
		 *
		 * Otherwise it would be possible to get thresh+n pages
		 * reported dirty, even though there are thresh-m pages
		 * actually dirty; with m+n sitting in the percpu
		 * deltas.
		 */
		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
		} else if (bdi_nr_reclaimable) {
			bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
		}

		if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
			break;
		if (pages_written >= write_chunk)
			break;		/* We've done our duty */

		congestion_wait(WRITE, HZ/10);
	}

	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
			bdi->dirty_exceeded)
		bdi->dirty_exceeded = 0;

	if (writeback_in_progress(bdi))
		return;		/* pdflush is already working this queue */

	/*
	 * In laptop mode, we wait until hitting the higher threshold before
	 * starting background writeout, and then write out all the way down
	 * to the lower threshold.  So slow writers cause minimal disk activity.
	 *
	 * In normal mode, we start background writeout at the lower
	 * background_thresh, to keep the amount of dirty memory low.
	 */
	if ((laptop_mode && pages_written) ||
			(!laptop_mode && (global_page_state(NR_FILE_DIRTY)
					  + global_page_state(NR_UNSTABLE_NFS)
					  > background_thresh)))
		pdflush_operation(background_writeout, 0);
}

void set_page_dirty_balance(struct page *page, int page_mkwrite)
{
	if (set_page_dirty(page) || page_mkwrite) {
		struct address_space *mapping = page_mapping(page);

		if (mapping)
			balance_dirty_pages_ratelimited(mapping);
	}
}

/**
 * balance_dirty_pages_ratelimited_nr - balance dirty memory state
 * @mapping: address_space which was dirtied
 * @nr_pages_dirtied: number of pages which the caller has just dirtied
 *
 * Processes which are dirtying memory should call in here once for each page
 * which was newly dirtied.  The function will periodically check the system's
 * dirty state and will initiate writeback if needed.
 *
 * On really big machines, get_writeback_state is expensive, so try to avoid
 * calling it too often (ratelimiting).  But once we're over the dirty memory
 * limit we decrease the ratelimiting by a lot, to prevent individual processes
 * from overshooting the limit by (ratelimit_pages) each.
 */
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
					unsigned long nr_pages_dirtied)
{
	static DEFINE_PER_CPU(unsigned long, ratelimits) = 0;
	unsigned long ratelimit;
	unsigned long *p;

	ratelimit = ratelimit_pages;
	if (mapping->backing_dev_info->dirty_exceeded)
		ratelimit = 8;

	/*
	 * Check the rate limiting. Also, we do not want to throttle real-time
	 * tasks in balance_dirty_pages(). Period.
	 */
	preempt_disable();
	p =  &__get_cpu_var(ratelimits);
	*p += nr_pages_dirtied;
	if (unlikely(*p >= ratelimit)) {
		*p = 0;
		preempt_enable();
		balance_dirty_pages(mapping);
		return;
	}
	preempt_enable();
}
EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);

void throttle_vm_writeout(gfp_t gfp_mask)
{
	long background_thresh;
	long dirty_thresh;

        for ( ; ; ) {
		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);

                /*
                 * Boost the allowable dirty threshold a bit for page
                 * allocators so they don't get DoS'ed by heavy writers
                 */
                dirty_thresh += dirty_thresh / 10;      /* wheeee... */

                if (global_page_state(NR_UNSTABLE_NFS) +
			global_page_state(NR_WRITEBACK) <= dirty_thresh)
                        	break;
                congestion_wait(WRITE, HZ/10);

		/*
		 * The caller might hold locks which can prevent IO completion
		 * or progress in the filesystem.  So we cannot just sit here
		 * waiting for IO to complete.
		 */
		if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO))
			break;
        }
}
Beispiel #16
0
/*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
 * the caller to perform writeback if the system is over `vm_dirty_ratio'.
 * If we're over `background_thresh' then the writeback threads are woken to
 * perform some writeout.
 */
static void balance_dirty_pages(struct address_space *mapping,
				unsigned long write_chunk)
{
	long nr_reclaimable, bdi_nr_reclaimable;
	long nr_writeback, bdi_nr_writeback;
	unsigned long background_thresh;
	unsigned long dirty_thresh;
	unsigned long bdi_thresh;
	unsigned long pages_written = 0;
	unsigned long pause = 1;
	bool dirty_exceeded = false;
	struct backing_dev_info *bdi = mapping->backing_dev_info;

	for (;;) {
		struct writeback_control wbc = {
			.sync_mode	= WB_SYNC_NONE,
			.older_than_this = NULL,
			.nr_to_write	= write_chunk,
			.range_cyclic	= 1,
		};

		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
					global_page_state(NR_UNSTABLE_NFS);
		nr_writeback = global_page_state(NR_WRITEBACK);

		global_dirty_limits(&background_thresh, &dirty_thresh);

		/*
		 * Throttle it only when the background writeback cannot
		 * catch-up. This avoids (excessively) small writeouts
		 * when the bdi limits are ramping up.
		 */
		if (nr_reclaimable + nr_writeback <
				(background_thresh + dirty_thresh) / 2)
			break;

		bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
		bdi_thresh = task_dirty_limit(current, bdi_thresh);

		/*
		 * In order to avoid the stacked BDI deadlock we need
		 * to ensure we accurately count the 'dirty' pages when
		 * the threshold is low.
		 *
		 * Otherwise it would be possible to get thresh+n pages
		 * reported dirty, even though there are thresh-m pages
		 * actually dirty; with m+n sitting in the percpu
		 * deltas.
		 */
		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
		} else {
			bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
		}

		/*
		 * The bdi thresh is somehow "soft" limit derived from the
		 * global "hard" limit. The former helps to prevent heavy IO
		 * bdi or process from holding back light ones; The latter is
		 * the last resort safeguard.
		 */
		dirty_exceeded =
			(bdi_nr_reclaimable + bdi_nr_writeback >= bdi_thresh)
			|| (nr_reclaimable + nr_writeback >= dirty_thresh);

		if (!dirty_exceeded)
			break;

		if (!bdi->dirty_exceeded)
			bdi->dirty_exceeded = 1;

		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
		 * Unstable writes are a feature of certain networked
		 * filesystems (i.e. NFS) in which data may have been
		 * written to the server's write cache, but has not yet
		 * been flushed to permanent storage.
		 * Only move pages to writeback if this bdi is over its
		 * threshold otherwise wait until the disk writes catch
		 * up.
		 */
		if (bdi_nr_reclaimable > bdi_thresh) {
			writeback_inodes_wb(&bdi->wb, &wbc);
			pages_written += write_chunk - wbc.nr_to_write;
			if (pages_written >= write_chunk)
				break;		/* We've done our duty */
		}
		__set_current_state(TASK_INTERRUPTIBLE);
		io_schedule_timeout(pause);

		/*
		 * Increase the delay for each loop, up to our previous
		 * default of taking a 100ms nap.
		 */
		pause <<= 1;
		if (pause > HZ / 10)
			pause = HZ / 10;
	}

	if (!dirty_exceeded && bdi->dirty_exceeded)
		bdi->dirty_exceeded = 0;

	if (writeback_in_progress(bdi))
		return;

	/*
	 * In laptop mode, we wait until hitting the higher threshold before
	 * starting background writeout, and then write out all the way down
	 * to the lower threshold.  So slow writers cause minimal disk activity.
	 *
	 * In normal mode, we start background writeout at the lower
	 * background_thresh, to keep the amount of dirty memory low.
	 */
	if ((laptop_mode && pages_written) ||
	    (!laptop_mode && (nr_reclaimable > background_thresh)))
		bdi_start_background_writeback(bdi);
}

void set_page_dirty_balance(struct page *page, int page_mkwrite)
{
	if (set_page_dirty(page) || page_mkwrite) {
		struct address_space *mapping = page_mapping(page);

		if (mapping)
			balance_dirty_pages_ratelimited(mapping);
	}
}

static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0;

/**
 * balance_dirty_pages_ratelimited_nr - balance dirty memory state
 * @mapping: address_space which was dirtied
 * @nr_pages_dirtied: number of pages which the caller has just dirtied
 *
 * Processes which are dirtying memory should call in here once for each page
 * which was newly dirtied.  The function will periodically check the system's
 * dirty state and will initiate writeback if needed.
 *
 * On really big machines, get_writeback_state is expensive, so try to avoid
 * calling it too often (ratelimiting).  But once we're over the dirty memory
 * limit we decrease the ratelimiting by a lot, to prevent individual processes
 * from overshooting the limit by (ratelimit_pages) each.
 */
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
					unsigned long nr_pages_dirtied)
{
	unsigned long ratelimit;
	unsigned long *p;

	ratelimit = ratelimit_pages;
	if (mapping->backing_dev_info->dirty_exceeded)
		ratelimit = 8;

	/*
	 * Check the rate limiting. Also, we do not want to throttle real-time
	 * tasks in balance_dirty_pages(). Period.
	 */
	preempt_disable();
	p =  &__get_cpu_var(bdp_ratelimits);
	*p += nr_pages_dirtied;
	if (unlikely(*p >= ratelimit)) {
		ratelimit = sync_writeback_pages(*p);
		*p = 0;
		preempt_enable();
		balance_dirty_pages(mapping, ratelimit);
		return;
	}
	preempt_enable();
}
static void spe_end(void)
{
	disable_kernel_spe();
	/* reenable preemption */
	preempt_enable();
}
Beispiel #18
0
/*
 * All calls to this function will be optimised into tail calls. We are
 * passed a pointer to the destination which we return as required by a
 * memcpy implementation.
 */
void *exit_vmx_copy(void *dest)
{
	preempt_enable();
	return dest;
}
Beispiel #19
0
void jrcu_read_unlock(void)
{
       if (preempt_count() == 1)
               rcu_eob(rcu_cpu());
       preempt_enable();
}
Beispiel #20
0
static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
			sigset_t *set, struct pt_regs *regs)
{
	struct rt_sigframe __user *frame;
	int err = 0;
	int signal;
	unsigned long address = 0;
#ifdef CONFIG_MMU
	pmd_t *pmdp;
	pte_t *ptep;
#endif

	frame = get_sigframe(ka, regs, sizeof(*frame));

	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
		goto give_sigsegv;

	signal = current_thread_info()->exec_domain
		&& current_thread_info()->exec_domain->signal_invmap
		&& sig < 32
		? current_thread_info()->exec_domain->signal_invmap[sig]
		: sig;

	if (info)
		err |= copy_siginfo_to_user(&frame->info, info);

	/* Create the ucontext. */
	err |= __put_user(0, &frame->uc.uc_flags);
	err |= __put_user(0, &frame->uc.uc_link);
	err |= __put_user((void *)current->sas_ss_sp,
			&frame->uc.uc_stack.ss_sp);
	err |= __put_user(sas_ss_flags(regs->r1),
			&frame->uc.uc_stack.ss_flags);
	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
	err |= setup_sigcontext(&frame->uc.uc_mcontext,
			regs, set->sig[0]);
	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));

	/* Set up to return from userspace. If provided, use a stub
	 already in userspace. */
	/* minus 8 is offset to cater for "rtsd r15,8" */
	if (ka->sa.sa_flags & SA_RESTORER) {
		regs->r15 = ((unsigned long)ka->sa.sa_restorer)-8;
	} else {
		/* addi r12, r0, __NR_sigreturn */
		err |= __put_user(0x31800000 | __NR_rt_sigreturn ,
				frame->tramp + 0);
		/* brki r14, 0x8 */
		err |= __put_user(0xb9cc0008, frame->tramp + 1);

		/* Return from sighandler will jump to the tramp.
		 Negative 8 offset because return is rtsd r15, 8 */
		regs->r15 = ((unsigned long)frame->tramp)-8;

		address = ((unsigned long)frame->tramp);
#ifdef CONFIG_MMU
		pmdp = pmd_offset(pud_offset(
				pgd_offset(current->mm, address),
						address), address);

		preempt_disable();
		ptep = pte_offset_map(pmdp, address);
		if (pte_present(*ptep)) {
			address = (unsigned long) page_address(pte_page(*ptep));
			/* MS: I need add offset in page */
			address += ((unsigned long)frame->tramp) & ~PAGE_MASK;
			/* MS address is virtual */
			address = virt_to_phys(address);
			invalidate_icache_range(address, address + 8);
			flush_dcache_range(address, address + 8);
		}
		pte_unmap(ptep);
		preempt_enable();
#else
		invalidate_icache_range(address, address + 8);
		flush_dcache_range(address, address + 8);
#endif
	}

	if (err)
		goto give_sigsegv;

	/* Set up registers for signal handler */
	regs->r1 = (unsigned long) frame - STATE_SAVE_ARG_SPACE;

	/* Signal handler args: */
	regs->r5 = signal; /* arg 0: signum */
	regs->r6 = (unsigned long) &frame->info; /* arg 1: siginfo */
	regs->r7 = (unsigned long) &frame->uc; /* arg2: ucontext */
	/* Offset to handle microblaze rtid r14, 0 */
	regs->pc = (unsigned long)ka->sa.sa_handler;

	set_fs(USER_DS);

	/* the tracer may want to single-step inside the handler */
	if (test_thread_flag(TIF_SINGLESTEP))
		ptrace_notify(SIGTRAP);

#ifdef DEBUG_SIG
	printk(KERN_INFO "SIG deliver (%s:%d): sp=%p pc=%08lx\n",
		current->comm, current->pid, frame, regs->pc);
#endif

	return;

give_sigsegv:
	if (sig == SIGSEGV)
		ka->sa.sa_handler = SIG_DFL;
	force_sig(SIGSEGV, current);
}
Beispiel #21
0
inline bool skb_recycler_consume(struct sk_buff *skb) {
	unsigned long flags;
	struct sk_buff_head *h;

	/* Can we recycle this skb?  If not, simply return that we cannot */
	if (unlikely(!consume_skb_can_recycle(skb, SKB_RECYCLE_MIN_SIZE,
					      SKB_RECYCLE_MAX_SIZE)))
		return false;

	/*
	 * If we can, then it will be much faster for us to recycle this one
	 * later than to allocate a new one from scratch.
	 */
	preempt_disable();
	h = &__get_cpu_var(recycle_list);
	local_irq_save(flags);
	/* Attempt to enqueue the CPU hot recycle list first */
	if (likely(skb_queue_len(h) < SKB_RECYCLE_MAX_SKBS)) {
		__skb_queue_head(h, skb);
		local_irq_restore(flags);
		preempt_enable();
		return true;
	}
#ifdef CONFIG_SKB_RECYCLER_MULTI_CPU
	h = &__get_cpu_var(recycle_spare_list);

	/* The CPU hot recycle list was full; if the spare list is also full,
	 * attempt to move the spare list to the global list for other CPUs to
	 * use.
	 */
	if (unlikely(skb_queue_len(h) >= SKB_RECYCLE_SPARE_MAX_SKBS)) {
		uint8_t cur_tail, next_tail;
		spin_lock(&glob_recycler.lock);
		cur_tail = glob_recycler.tail;
		next_tail = (cur_tail + 1) & SKB_RECYCLE_MAX_SHARED_POOLS_MASK;
		if (next_tail != glob_recycler.head) {
			struct sk_buff_head *p = &glob_recycler.pool[cur_tail];

			/* Optimized, inlined SKB List splice */
			p->next = h->next;
			h->next->prev = (struct sk_buff *)p;
			p->prev = h->prev;
			h->prev->next = (struct sk_buff *)p;
			p->qlen = SKB_RECYCLE_SPARE_MAX_SKBS;

			/* Done with global list init */
			glob_recycler.tail = next_tail;
			spin_unlock(&glob_recycler.lock);

			/* Optimized, inlined spare SKB list init */
			h->next = (struct sk_buff *)h;
			h->prev = (struct sk_buff *)h;
			h->qlen = 0;

			/* We have now cleared room in the spare; enqueue */
			__skb_queue_head(h, skb);
			local_irq_restore(flags);
			preempt_enable();
			return true;
		}
		/* We still have a full spare because the global is also full */
		spin_unlock(&glob_recycler.lock);
	} else {
		/* We have room in the spare list; enqueue to spare list */
		__skb_queue_head(h, skb);
		local_irq_restore(flags);
		preempt_enable();
		return true;
	}
#endif

	local_irq_restore(flags);
	preempt_enable();

	return false;
}
Beispiel #22
0
int copy_thread(unsigned long clone_flags, unsigned long usp,
	unsigned long unused, struct task_struct *p, struct pt_regs *regs)
{
	struct thread_info *ti = task_thread_info(p);
	struct pt_regs *childregs;
	unsigned long childksp;
	p->set_child_tid = p->clear_child_tid = NULL;

	childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;

	preempt_disable();

	if (is_fpu_owner())
		save_fp(p);

	if (cpu_has_dsp)
		save_dsp(p);

	preempt_enable();

	/* set up new TSS. */
	childregs = (struct pt_regs *) childksp - 1;
	/*  Put the stack after the struct pt_regs.  */
	childksp = (unsigned long) childregs;
	*childregs = *regs;
	childregs->regs[7] = 0;	/* Clear error flag */

	childregs->regs[2] = 0;	/* Child gets zero as return value */

	if (childregs->cp0_status & ST0_CU0) {
		childregs->regs[28] = (unsigned long) ti;
		childregs->regs[29] = childksp;
		ti->addr_limit = KERNEL_DS;
	} else {
		childregs->regs[29] = usp;
		ti->addr_limit = USER_DS;
	}
	p->thread.reg29 = (unsigned long) childregs;
	p->thread.reg31 = (unsigned long) ret_from_fork;

	/*
	 * New tasks lose permission to use the fpu. This accelerates context
	 * switching for most programs since they don't use the fpu.
	 */
	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);

#ifdef CONFIG_MIPS_MT_SMTC
	/*
	 * SMTC restores TCStatus after Status, and the CU bits
	 * are aliased there.
	 */
	childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1);
#endif
	clear_tsk_thread_flag(p, TIF_USEDFPU);

#ifdef CONFIG_MIPS_MT_FPAFF
	clear_tsk_thread_flag(p, TIF_FPUBOUND);
#endif /* CONFIG_MIPS_MT_FPAFF */

	if (clone_flags & CLONE_SETTLS)
		ti->tp_value = regs->regs[7];

	return 0;
}
Beispiel #23
0
/*
 * Lock a mutex (possibly interruptible), slowpath:
 */
static inline int __sched
__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
		    struct lockdep_map *nest_lock, unsigned long ip)
{
	struct task_struct *task = current;
	struct mutex_waiter waiter;
	unsigned long flags;

	preempt_disable();
	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);

#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
	/*
	 * Optimistic spinning.
	 *
	 * We try to spin for acquisition when we find that there are no
	 * pending waiters and the lock owner is currently running on a
	 * (different) CPU.
	 *
	 * The rationale is that if the lock owner is running, it is likely to
	 * release the lock soon.
	 *
	 * Since this needs the lock owner, and this mutex implementation
	 * doesn't track the owner atomically in the lock field, we need to
	 * track it non-atomically.
	 *
	 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
	 * to serialize everything.
	 */

	for (;;) {
		struct task_struct *owner;

		/*
		 * If there's an owner, wait for it to either
		 * release the lock or go to sleep.
		 */
		owner = ACCESS_ONCE(lock->owner);
		if (owner && !mutex_spin_on_owner(lock, owner))
			break;

		if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
			lock_acquired(&lock->dep_map, ip);
			mutex_set_owner(lock);
			preempt_enable();
			return 0;
		}

		/*
		 * When there's no owner, we might have preempted between the
		 * owner acquiring the lock and setting the owner field. If
		 * we're an RT task that will live-lock because we won't let
		 * the owner complete.
		 */
		if (!owner && (need_resched() || rt_task(task)))
			break;

		/*
		 * The cpu_relax() call is a compiler barrier which forces
		 * everything in this loop to be re-loaded. We don't need
		 * memory barriers as we'll eventually observe the right
		 * values at the cost of a few extra spins.
		 */
		arch_mutex_cpu_relax();
	}
#endif
	spin_lock_mutex(&lock->wait_lock, flags);

	debug_mutex_lock_common(lock, &waiter);
	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));

	/* add waiting tasks to the end of the waitqueue (FIFO): */
	list_add_tail(&waiter.list, &lock->wait_list);
	waiter.task = task;

	if (atomic_xchg(&lock->count, -1) == 1)
		goto done;

	lock_contended(&lock->dep_map, ip);

	for (;;) {
		/*
		 * Lets try to take the lock again - this is needed even if
		 * we get here for the first time (shortly after failing to
		 * acquire the lock), to make sure that we get a wakeup once
		 * it's unlocked. Later on, if we sleep, this is the
		 * operation that gives us the lock. We xchg it to -1, so
		 * that when we release the lock, we properly wake up the
		 * other waiters:
		 */
		if (atomic_xchg(&lock->count, -1) == 1)
			break;

		/*
		 * got a signal? (This code gets eliminated in the
		 * TASK_UNINTERRUPTIBLE case.)
		 */
		if (unlikely(signal_pending_state(state, task))) {
			mutex_remove_waiter(lock, &waiter,
					    task_thread_info(task));
			mutex_release(&lock->dep_map, 1, ip);
			spin_unlock_mutex(&lock->wait_lock, flags);

			debug_mutex_free_waiter(&waiter);
			preempt_enable();
			return -EINTR;
		}
		__set_task_state(task, state);

		/* didn't get the lock, go to sleep: */
		spin_unlock_mutex(&lock->wait_lock, flags);
		preempt_enable_no_resched();
		schedule();
		preempt_disable();
		spin_lock_mutex(&lock->wait_lock, flags);
	}

done:
	lock_acquired(&lock->dep_map, ip);
	/* got the lock - rejoice! */
	mutex_remove_waiter(lock, &waiter, current_thread_info());
	mutex_set_owner(lock);

	/* set it to 0 if there are no waiters left: */
	if (likely(list_empty(&lock->wait_list)))
		atomic_set(&lock->count, 0);

	spin_unlock_mutex(&lock->wait_lock, flags);

	debug_mutex_free_waiter(&waiter);
	preempt_enable();

	return 0;
}
/*
 * Add a new chunk of uncached memory pages to the specified pool.
 *
 * @pool: pool to add new chunk of uncached memory to
 * @nid: node id of node to allocate memory from, or -1
 *
 * This is accomplished by first allocating a granule of cached memory pages
 * and then converting them to uncached memory pages.
 */
static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
{
	struct page *page;
	int status, i, nchunks_added = uc_pool->nchunks_added;
	unsigned long c_addr, uc_addr;

	if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0)
		return -1;	/* interrupted by a signal */

	if (uc_pool->nchunks_added > nchunks_added) {
		/* someone added a new chunk while we were waiting */
		mutex_unlock(&uc_pool->add_chunk_mutex);
		return 0;
	}

	if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) {
		mutex_unlock(&uc_pool->add_chunk_mutex);
		return -1;
	}

	/* attempt to allocate a granule's worth of cached memory pages */

	page = alloc_pages_exact_node(nid,
				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
				IA64_GRANULE_SHIFT-PAGE_SHIFT);
	if (!page) {
		mutex_unlock(&uc_pool->add_chunk_mutex);
		return -1;
	}

	/* convert the memory pages from cached to uncached */

	c_addr = (unsigned long)page_address(page);
	uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET;

	/*
	 * There's a small race here where it's possible for someone to
	 * access the page through /dev/mem halfway through the conversion
	 * to uncached - not sure it's really worth bothering about
	 */
	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
		SetPageUncached(&page[i]);

	flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);

	status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
	if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
		atomic_set(&uc_pool->status, 0);
		status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
		if (status || atomic_read(&uc_pool->status))
			goto failed;
	} else if (status != PAL_VISIBILITY_OK)
		goto failed;

	preempt_disable();

	if (ia64_platform_is("sn2"))
		sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE);
	else
		flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);

	/* flush the just introduced uncached translation from the TLB */
	local_flush_tlb_all();

	preempt_enable();

	status = ia64_pal_mc_drain();
	if (status != PAL_STATUS_SUCCESS)
		goto failed;
	atomic_set(&uc_pool->status, 0);
	status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
	if (status || atomic_read(&uc_pool->status))
		goto failed;

	/*
	 * The chunk of memory pages has been converted to uncached so now we
	 * can add it to the pool.
	 */
	status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid);
	if (status)
		goto failed;

	uc_pool->nchunks_added++;
	mutex_unlock(&uc_pool->add_chunk_mutex);
	return 0;

	/* failed to convert or add the chunk so give it back to the kernel */
failed:
	for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
		ClearPageUncached(&page[i]);

	free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT);
	mutex_unlock(&uc_pool->add_chunk_mutex);
	return -1;
}
Beispiel #25
0
/*
 * Free the current cpus reserved DSR/CBR resources.
 */
static void gru_free_cpu_resources(void *cb, void *dsr)
{
	gru_unlock_kernel_context(uv_numa_blade_id());
	preempt_enable();
}
static long
restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
{
	unsigned long ip, flags, nat, um, cfm, rsc;
	long err;

	/* Always make any pending restarted system calls return -EINTR */
	current_thread_info()->restart_block.fn = do_no_restart_syscall;

	/* restore scratch that always needs gets updated during signal delivery: */
	err  = __get_user(flags, &sc->sc_flags);
	err |= __get_user(nat, &sc->sc_nat);
	err |= __get_user(ip, &sc->sc_ip);			/* instruction pointer */
	err |= __get_user(cfm, &sc->sc_cfm);
	err |= __get_user(um, &sc->sc_um);			/* user mask */
	err |= __get_user(rsc, &sc->sc_ar_rsc);
	err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
	err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
	err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
	err |= __get_user(scr->pt.pr, &sc->sc_pr);		/* predicates */
	err |= __get_user(scr->pt.b0, &sc->sc_br[0]);		/* b0 (rp) */
	err |= __get_user(scr->pt.b6, &sc->sc_br[6]);		/* b6 */
	err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8);	/* r1 */
	err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8);	/* r8-r11 */
	err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8);	/* r12-r13 */
	err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8);	/* r15 */

	scr->pt.cr_ifs = cfm | (1UL << 63);
	scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */

	/* establish new instruction pointer: */
	scr->pt.cr_iip = ip & ~0x3UL;
	ia64_psr(&scr->pt)->ri = ip & 0x3;
	scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);

	scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);

	if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
		/* Restore most scratch-state only when not in syscall. */
		err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv);		/* ar.ccv */
		err |= __get_user(scr->pt.b7, &sc->sc_br[7]);			/* b7 */
		err |= __get_user(scr->pt.r14, &sc->sc_gr[14]);			/* r14 */
		err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
		err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8);	/* r2-r3 */
		err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8);	/* r16-r31 */
	}

	if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
		struct ia64_psr *psr = ia64_psr(&scr->pt);

		err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
		psr->mfh = 0;	/* drop signal handler's fph contents... */
		preempt_disable();
		if (psr->dfh)
			ia64_drop_fpu(current);
		else {
			/* We already own the local fph, otherwise psr->dfh wouldn't be 0.  */
			__ia64_load_fpu(current->thread.fph);
			ia64_set_local_fpu_owner(current);
		}
		preempt_enable();
	}
	return err;
}
// sys_vperfctr_read() -> this()
static int do_vperfctr_read(struct vperfctr *perfctr,
			    unsigned int cmd,
			    void __user *argp,
			    unsigned int argbytes,
			    struct task_struct *tsk)
{
	union {
		struct perfctr_sum_ctrs sum;
		struct vperfctr_control control;
		struct perfctr_sum_ctrs children;
	} *tmp;
	unsigned int tmpbytes;
	int ret;

	/* The state snapshot can be large, so kmalloc() it instead of storing it on the stack.
	   We must use task-private storage to prevent racing with a monitor process attaching to 
	   us during the preemptible copy_to_user() step. Therefore we cannot store the snapshot
	   in the perfctr object itself. 
    */
	tmp = kmalloc(sizeof(*tmp), GFP_USER);
	if (!tmp)
		return -ENOMEM;

	/* PREEMPT note: While we're reading our own control, another process may ptrace ATTACH 
       to us and update our control. Disable preemption to ensure we get a consistent copy.
	   Not needed for other cases since the perfctr is either unlinked or its owner is ptrace 
       ATTACH suspended by us. 
	*/
	if (tsk == current)
		preempt_disable();

	switch (cmd) {
    	case VPERFCTR_READ_SUM: {
    		int j;
    
    		vperfctr_sample(perfctr);
    		tmp->sum.tsc = perfctr->cpu_state.tsc_sum;
    		for(j = 0; j < ARRAY_SIZE(tmp->sum.pmc); ++j)
    			tmp->sum.pmc[j] = perfctr->cpu_state.pmc[j].sum;
    		tmpbytes = sizeof(tmp->sum);
    	}
    	break;
    	case VPERFCTR_READ_CONTROL:
    		tmp->control.si_signo = perfctr->si_signo;
    		tmp->control.cpu_control = perfctr->cpu_state.control;
    		tmp->control.preserve = 0;
    		tmpbytes = sizeof(tmp->control);
    	break;
    	case VPERFCTR_READ_CHILDREN:
    		if (tsk)
    			spin_lock(&perfctr->children_lock);
    		tmp->children = perfctr->children;
    		if (tsk)
    			spin_unlock(&perfctr->children_lock);
    		tmpbytes = sizeof(tmp->children);
    	break;
    	default:
    		tmpbytes = 0;
	}

	if (tsk == current)
		preempt_enable();

	ret = -EINVAL;
	if (tmpbytes > argbytes)
		tmpbytes = argbytes;
	if (tmpbytes > 0) {
		ret = tmpbytes;
		if (copy_to_user(argp, tmp, tmpbytes))
			ret = -EFAULT;
	}
	kfree(tmp);
	return ret;
}
NORET_TYPE void panic(const char * fmt, ...)
{
	long i;
	static char buf[1024];
	va_list args;
#if defined(CONFIG_S390)
	unsigned long caller = (unsigned long) __builtin_return_address(0);
#endif
    int count,chr_count;

	/*
	 * It's possible to come here directly from a panic-assertion and not
	 * have preempt disabled. Some functions called from here want
	 * preempt to be disabled. No point enabling it later though...
	 */
	preempt_disable();

	bust_spinlocks(1);
	va_start(args, fmt);
	vsnprintf(buf, sizeof(buf), fmt, args);
	va_end(args);
	printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
	bust_spinlocks(0);

#ifdef CONFIG_KERNEL_PANIC_DUMP
	PANIC_Current += sprintf(PANIC_Current,"Kernel panic - not syncing: %s\n",buf);
	if (!multiple_panic) {
		multiple_panic++;	
		printk(KERN_EMERG "PANIC_DUMP Test : \n");
		count = PANIC_Current - PANIC_Base;
		printk("count : %d\n",count);
		chr_count = 0;
		while(count) 
		{
			memset(buf,0x0,1024);
			if(count > 1024) 
			{
				memcpy(buf,PANIC_Base+chr_count,1024);
				printk("%s",buf);
				chr_count += 1024;
				count -= 1024;
			} 
			else 
			{
				memcpy(buf,PANIC_Base+chr_count,count);
				printk("%s",buf);
				chr_count += count;
				count -= count;
			}
		}

		{
			mm_segment_t old_fs;
			struct file *filp;
			int writelen;
			loff_t pos =0;
			fl_owner_t id = current->files;

			preempt_enable();

			old_fs = get_fs();
			set_fs(KERNEL_DS);

			struct timeval val;
			struct tm *ptm,ptmTemp;
			char dt[35];


			do_gettimeofday(&val);


			ptm = localtime_r(&val.tv_sec,&ptmTemp);

			memset(dt , 0x00 , sizeof(dt));

			// format : YYMMDDhhmm
			sprintf(dt, "/data/KERNEL_PANIC%04d%02d%02d%02d%02d.txt"
					, ptm->tm_year+1900 , ptm->tm_mon+1 , ptm->tm_mday
					, ptm->tm_hour, ptm->tm_min);

			printk("Panic log file is %s \n",dt);

			count = PANIC_Current - PANIC_Base;
			chr_count = 0;
			filp = filp_open(dt,O_CREAT|O_WRONLY,0666);
			if(filp<0)
				printk("Sorry. Can't creat panic file\n");
			else {
				//				vfs_write(filp, PANIC_Base, strlen(PANIC_Base),
				while(count) {
					memset(buf,0x0,1024);
					if(count > 1024) {
						memcpy(buf,PANIC_Base+chr_count,1024);
						writelen = filp->f_op->write(filp,buf,1024,&filp->f_pos);
						if(writelen == 0)
							printk("Write Error!!\n");
						else
							filp->f_op->flush(filp,id);
						chr_count += 1024;
						count -= 1024;
					} else {
						memcpy(buf,PANIC_Base+chr_count,count);
						writelen = filp->f_op->write(filp,buf,count,&filp->f_pos);
						if(writelen == 0)
							printk("Write Error\n");
						else
							filp->f_op->flush(filp,id);
						chr_count += count;
						count -= count;
					}   
				}
			}
			set_fs(old_fs);
			preempt_disable();
		}

		count = PANIC_Current - PANIC_Base;
		printk("\nPanic Dump END,  panic message size is : %d\n",count);
	}
	else {
#if 0
		/* Reset Target */
#else		
		while (1);
#endif
	}
#endif

	/*
	 * If we have crashed and we have a crash kernel loaded let it handle
	 * everything else.
	 * Do we want to call this before we try to display a message?
	 */
	crash_kexec(NULL);

#ifdef CONFIG_SMP
	/*
	 * Note smp_send_stop is the usual smp shutdown function, which
	 * unfortunately means it may not be hardened to work in a panic
	 * situation.
	 */
	smp_send_stop();
#endif

	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);

	if (!panic_blink)
		panic_blink = no_blink;

	if (panic_timeout > 0) {
		/*
	 	 * Delay timeout seconds before rebooting the machine. 
		 * We can't use the "normal" timers since we just panicked..
	 	 */
		printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
		for (i = 0; i < panic_timeout*1000; ) {
			touch_nmi_watchdog();
			i += panic_blink(i);
			mdelay(1);
			i++;
		}
		/*	This will not be a clean reboot, with everything
		 *	shutting down.  But if there is a chance of
		 *	rebooting the system it will be rebooted.
		 */
		emergency_restart();
	}
#ifdef __sparc__
	{
		extern int stop_a_enabled;
		/* Make sure the user can actually press Stop-A (L1-A) */
		stop_a_enabled = 1;
		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
	}
#endif
#if defined(CONFIG_S390)
	disabled_wait(caller);
#endif
	local_irq_enable();
	for (i = 0;;) {
		touch_softlockup_watchdog();
		i += panic_blink(i);
		mdelay(1);
		i++;
	}
}
Beispiel #29
0
static void unlock_rtas(unsigned long flags)
{
	arch_spin_unlock(&rtas.lock);
	local_irq_restore(flags);
	preempt_enable();
}
asmlinkage int vprintk(const char *fmt, va_list args)
{
	unsigned long flags;
	int printed_len;
	char *p;
	static char printk_buf[1024];
	static int log_level_unknown = 1;

	preempt_disable();
	if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
		/* If a crash is occurring during printk() on this CPU,
		 * make sure we can't deadlock */
		zap_locks();

	/* This stops the holder of console_sem just where we want him */
	spin_lock_irqsave(&logbuf_lock, flags);
	printk_cpu = smp_processor_id();

	/* Emit the output into the temporary buffer */
	printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
#if defined(CONFIG_FEROCEON) && defined(CONFIG_DEBUG_LL)
	if (ll_debug) putstr(printk_buf);
#endif
	/*
	 * Copy the output into log_buf.  If the caller didn't provide
	 * appropriate log level tags, we insert them here
	 */
	for (p = printk_buf; *p; p++) {
		if (log_level_unknown) {
                        /* log_level_unknown signals the start of a new line */
			if (printk_time) {
				int loglev_char;
				char tbuf[50], *tp;
				unsigned tlen;
				unsigned long long t;
				unsigned long nanosec_rem;

				/*
				 * force the log level token to be
				 * before the time output.
				 */
				if (p[0] == '<' && p[1] >='0' &&
				   p[1] <= '7' && p[2] == '>') {
					loglev_char = p[1];
					p += 3;
					printed_len -= 3;
				} else {
					loglev_char = default_message_loglevel
						+ '0';
				}
				t = printk_clock();
				nanosec_rem = do_div(t, 1000000000);
				tlen = sprintf(tbuf,
						"<%c>[%5lu.%06lu] ",
						loglev_char,
						(unsigned long)t,
						nanosec_rem/1000);

				for (tp = tbuf; tp < tbuf + tlen; tp++)
					emit_log_char(*tp);
				printed_len += tlen;
			} else {
				if (p[0] != '<' || p[1] < '0' ||
				   p[1] > '7' || p[2] != '>') {
					emit_log_char('<');
					emit_log_char(default_message_loglevel
						+ '0');
					emit_log_char('>');
					printed_len += 3;
				}
			}
			log_level_unknown = 0;
			if (!*p)
				break;
		}
		emit_log_char(*p);
		if (*p == '\n')
			log_level_unknown = 1;
	}

	if (!cpu_online(smp_processor_id())) {
		/*
		 * Some console drivers may assume that per-cpu resources have
		 * been allocated.  So don't allow them to be called by this
		 * CPU until it is officially up.  We shouldn't be calling into
		 * random console drivers on a CPU which doesn't exist yet..
		 */
		printk_cpu = UINT_MAX;
		spin_unlock_irqrestore(&logbuf_lock, flags);
		goto out;
	}
	if (!down_trylock(&console_sem)) {
		console_locked = 1;
		/*
		 * We own the drivers.  We can drop the spinlock and let
		 * release_console_sem() print the text
		 */
		printk_cpu = UINT_MAX;
		spin_unlock_irqrestore(&logbuf_lock, flags);
		console_may_schedule = 0;
#if defined(CONFIG_FEROCEON) && defined(CONFIG_MV_PRINTK_SLICE_SUPPORT)
		local_irq_restore(flags);
		release_console_sem();
		local_irq_save(flags);
#else
			release_console_sem();
#endif
	} else {
		/*
		 * Someone else owns the drivers.  We drop the spinlock, which
		 * allows the semaphore holder to proceed and to call the
		 * console drivers with the output which we just produced.
		 */
		printk_cpu = UINT_MAX;
		spin_unlock_irqrestore(&logbuf_lock, flags);
	}
out:
	preempt_enable();
	return printed_len;
}