Beispiel #1
0
/*
 * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
 * (normally the time-keeper CPU).  We use a closed loop to eliminate the possibility of
 * unaccounted-for errors (such as getting a machine check in the middle of a calibration
 * step).  The basic idea is for the slave to ask the master what itc value it has and to
 * read its own itc before and after the master responds.  Each iteration gives us three
 * timestamps:
 *
 *	slave		master
 *
 *	t0 ---\
 *             ---\
 *		   --->
 *			tm
 *		   /---
 *	       /---
 *	t1 <---
 *
 *
 * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
 * and t1.  If we achieve this, the clocks are synchronized provided the interconnect
 * between the slave and the master is symmetric.  Even if the interconnect were
 * asymmetric, we would still know that the synchronization error is smaller than the
 * roundtrip latency (t0 - t1).
 *
 * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
 * within one or two cycles.  However, we can only *guarantee* that the synchronization is
 * accurate to within a round-trip time, which is typically in the range of several
 * hundred cycles (e.g., ~500 cycles).  In practice, this means that the itc's are usually
 * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
 * than half a micro second or so.
 */
void
ia64_sync_itc (unsigned int master)
{
    long i, delta, adj, adjust_latency = 0, done = 0;
    unsigned long flags, rt, master_time_stamp, bound;
#if DEBUG_ITC_SYNC
    struct {
        long rt;	/* roundtrip time */
        long master;	/* master's timestamp */
        long diff;	/* difference between midpoint and master's timestamp */
        long lat;	/* estimate of itc adjustment latency */
    } t[NUM_ROUNDS];
#endif

    /*
     * Make sure local timer ticks are disabled while we sync.  If
     * they were enabled, we'd have to worry about nasty issues
     * like setting the ITC ahead of (or a long time before) the
     * next scheduled tick.
     */
    BUG_ON((ia64_get_itv() & (1 << 16)) == 0);

    go[MASTER] = 1;

    if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
        printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
        return;
    }

    while (go[MASTER])
        cpu_relax();	/* wait for master to be ready */

    spin_lock_irqsave(&itc_sync_lock, flags);
    {
        for (i = 0; i < NUM_ROUNDS; ++i) {
            delta = get_delta(&rt, &master_time_stamp);
            if (delta == 0) {
                done = 1;	/* let's lock on to this... */
                bound = rt;
            }

            if (!done) {
                if (i > 0) {
                    adjust_latency += -delta;
                    adj = -delta + adjust_latency/4;
                } else
                    adj = -delta;

                ia64_set_itc(ia64_get_itc() + adj);
            }
#if DEBUG_ITC_SYNC
            t[i].rt = rt;
            t[i].master = master_time_stamp;
            t[i].diff = delta;
            t[i].lat = adjust_latency/4;
#endif
        }
    }
    spin_unlock_irqrestore(&itc_sync_lock, flags);

#if DEBUG_ITC_SYNC
    for (i = 0; i < NUM_ROUNDS; ++i)
        printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
               t[i].rt, t[i].master, t[i].diff, t[i].lat);
#endif

    printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
           "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
}
/*
 * Check, if the new registered device should be used.
 */
static int tick_check_new_device(struct clock_event_device *newdev)
{
	struct clock_event_device *curdev;
	struct tick_device *td;
	int cpu, ret = NOTIFY_OK;
	unsigned long flags;

	raw_spin_lock_irqsave(&tick_device_lock, flags);

	cpu = smp_processor_id();
	if (!cpumask_test_cpu(cpu, newdev->cpumask))
		goto out_bc;

	td = &per_cpu(tick_cpu_device, cpu);
	curdev = td->evtdev;

	/* cpu local device ? */
	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {

		/*
		 * If the cpu affinity of the device interrupt can not
		 * be set, ignore it.
		 */
		if (!irq_can_set_affinity(newdev->irq))
			goto out_bc;

		/*
		 * If we have a cpu local device already, do not replace it
		 * by a non cpu local device
		 */
		if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
			goto out_bc;
	}

	/*
	 * If we have an active device, then check the rating and the oneshot
	 * feature.
	 */
	if (curdev) {
		/*
		 * Prefer one shot capable devices !
		 */
		if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
		    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
			goto out_bc;
		/*
		 * Check the rating
		 */
		if (curdev->rating >= newdev->rating)
			goto out_bc;
	}

	/*
	 * Replace the eventually existing device by the new
	 * device. If the current device is the broadcast device, do
	 * not give it back to the clockevents layer !
	 */
	if (tick_is_broadcast_device(curdev)) {
		clockevents_shutdown(curdev);
		curdev = NULL;
	}
	clockevents_exchange_device(curdev, newdev);
	tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
	if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
		tick_oneshot_notify();

	raw_spin_unlock_irqrestore(&tick_device_lock, flags);
	return NOTIFY_STOP;

out_bc:
	/*
	 * Can the new device be used as a broadcast device ?
	 */
	if (tick_check_broadcast_device(newdev))
		ret = NOTIFY_STOP;

	raw_spin_unlock_irqrestore(&tick_device_lock, flags);

	return ret;
}
Beispiel #3
0
/*
 * The kernel is already mapped with linear mapping at kseg_c so there's no
 * need to map it with a page table. However, head.S also temporarily mapped it
 * at kseg_4 thus the ksegs are set up again. Also clear the TLB and do various
 * other paging stuff.
 */
void __init
cris_mmu_init(void)
{
	unsigned long mmu_config;
	unsigned long mmu_kbase_hi;
	unsigned long mmu_kbase_lo;
	unsigned short mmu_page_id;

	/*
	 * Make sure the current pgd table points to something sane, even if it
	 * is most probably not used until the next switch_mm.
	 */
	per_cpu(current_pgd, smp_processor_id()) = init_mm.pgd;

#ifdef CONFIG_SMP
	{
		pgd_t **pgd;
		pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
		SUPP_BANK_SEL(1);
		SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
		SUPP_BANK_SEL(2);
		SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
	}
#endif

	/* Initialise the TLB. Function found in tlb.c. */
	tlb_init();

	/* Enable exceptions and initialize the kernel segments. */
	mmu_config = ( REG_STATE(mmu, rw_mm_cfg, we, on)        |
		       REG_STATE(mmu, rw_mm_cfg, acc, on)       |
		       REG_STATE(mmu, rw_mm_cfg, ex, on)        |
		       REG_STATE(mmu, rw_mm_cfg, inv, on)       |
		       REG_STATE(mmu, rw_mm_cfg, seg_f, linear) |
		       REG_STATE(mmu, rw_mm_cfg, seg_e, linear) |
		       REG_STATE(mmu, rw_mm_cfg, seg_d, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_c, linear) |
		       REG_STATE(mmu, rw_mm_cfg, seg_b, linear) |
#ifndef CONFIG_ETRAXFS_SIM
                       REG_STATE(mmu, rw_mm_cfg, seg_a, page)   |
#else
		       REG_STATE(mmu, rw_mm_cfg, seg_a, linear) |
#endif
		       REG_STATE(mmu, rw_mm_cfg, seg_9, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_8, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_7, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_6, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_5, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_4, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_3, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_2, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_1, page)   |
		       REG_STATE(mmu, rw_mm_cfg, seg_0, page));

	mmu_kbase_hi = ( REG_FIELD(mmu, rw_mm_kbase_hi, base_f, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_e, 0x8) |
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_d, 0x0) |
#ifndef CONFIG_ETRAXFS_SIM
                         REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 0x4) |
#else
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_c, 0x0) |
#endif
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_b, 0xb) |
#ifndef CONFIG_ETRAXFS_SIM
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_a, 0x0) |
#else
                         REG_FIELD(mmu, rw_mm_kbase_hi, base_a, 0xa) |
#endif
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_9, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_hi, base_8, 0x0));

	mmu_kbase_lo = ( REG_FIELD(mmu, rw_mm_kbase_lo, base_7, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_6, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_5, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_4, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_3, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_2, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_1, 0x0) |
			 REG_FIELD(mmu, rw_mm_kbase_lo, base_0, 0x0));

	mmu_page_id = REG_FIELD(mmu, rw_mm_tlb_hi, pid, 0);

	/* Update the instruction MMU. */
	SUPP_BANK_SEL(BANK_IM);
	SUPP_REG_WR(RW_MM_CFG, mmu_config);
	SUPP_REG_WR(RW_MM_KBASE_HI, mmu_kbase_hi);
	SUPP_REG_WR(RW_MM_KBASE_LO, mmu_kbase_lo);
	SUPP_REG_WR(RW_MM_TLB_HI, mmu_page_id);

	/* Update the data MMU. */
	SUPP_BANK_SEL(BANK_DM);
	SUPP_REG_WR(RW_MM_CFG, mmu_config);
	SUPP_REG_WR(RW_MM_KBASE_HI, mmu_kbase_hi);
	SUPP_REG_WR(RW_MM_KBASE_LO, mmu_kbase_lo);
	SUPP_REG_WR(RW_MM_TLB_HI, mmu_page_id);

	SPEC_REG_WR(SPEC_REG_PID, 0);

	/*
	 * The MMU has been enabled ever since head.S but just to make it
	 * totally obvious enable it here as well.
	 */
	SUPP_BANK_SEL(BANK_GC);
	SUPP_REG_WR(RW_GC_CFG, 0xf); /* IMMU, DMMU, ICache, DCache on */
}
Beispiel #4
0
asmlinkage int vprintk(const char *fmt, va_list args)
{
	int printed_len = 0;
	int current_log_level = default_message_loglevel;
	unsigned long flags;
	int this_cpu;
	char *p;

	boot_delay_msec();

	preempt_disable();
	/* This stops the holder of console_sem just where we want him */
	raw_local_irq_save(flags);
	this_cpu = smp_processor_id();

	/*
	 * Ouch, printk recursed into itself!
	 */
	if (unlikely(printk_cpu == this_cpu)) {
		/*
		 * If a crash is occurring during printk() on this CPU,
		 * then try to get the crash message out but make sure
		 * we can't deadlock. Otherwise just return to avoid the
		 * recursion and return - but flag the recursion so that
		 * it can be printed at the next appropriate moment:
		 */
		if (!oops_in_progress) {
			recursion_bug = 1;
			goto out_restore_irqs;
		}
		zap_locks();
	}

	lockdep_off();
	spin_lock(&logbuf_lock);
	printk_cpu = this_cpu;

	if (recursion_bug) {
		recursion_bug = 0;
		strcpy(printk_buf, recursion_bug_msg);
		printed_len = sizeof(recursion_bug_msg);
	}
	/* Emit the output into the temporary buffer */
	printed_len += vscnprintf(printk_buf + printed_len,
				  sizeof(printk_buf) - printed_len, fmt, args);


	/*
	 * Copy the output into log_buf.  If the caller didn't provide
	 * appropriate log level tags, we insert them here
	 */
	for (p = printk_buf; *p; p++) {
		if (new_text_line) {
			/* If a token, set current_log_level and skip over */
			if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' &&
			    p[2] == '>') {
				current_log_level = p[1] - '0';
				p += 3;
				printed_len -= 3;
			}

			/* Always output the token */
			emit_log_char('<');
			emit_log_char(current_log_level + '0');
			emit_log_char('>');
#ifdef CONFIG_KERNEL_LOGGING
			{
				char tbuf[3];

				// check kernel start
				if( (b_first_call_after_booting == 0) &&
					(logging_mode & LOGGING_RAM_MASK) && 
					(!ioremapped && log_buf_base  ) )
				{
//					char tempStar[] = "********************************************\n";
					char tempChar[] = "============== start kernel logging !! ==============\n";
					
					b_first_call_after_booting = 1;

//					emit_log_char_RAMbuf(tempStar, sizeof(tempStar));
					emit_log_char_RAMbuf(tempChar, sizeof(tempChar));
//					emit_log_char_RAMbuf(tempStar, sizeof(tempStar));
				}
				
				sprintf(tbuf, "<%1d>",default_message_loglevel);
				emit_log_char_RAMbuf(tbuf, 3);
			}
#endif
			printed_len += 3;
			new_text_line = 0;

			if (printk_time) {
				/* Follow the token with the time */
				char tbuf[50], *tp;
				unsigned tlen;
				unsigned long long t;
				unsigned long nanosec_rem;

				t = cpu_clock(printk_cpu);
				nanosec_rem = do_div(t, 1000000000);
				tlen = sprintf(tbuf, "[%5lu.%06lu] ",
						(unsigned long) t,
						nanosec_rem / 1000);

				for (tp = tbuf; tp < tbuf + tlen; tp++)
					emit_log_char(*tp);
#ifdef CONFIG_KERNEL_LOGGING				
				emit_log_char_RAMbuf(tbuf, tlen);
#endif
				printed_len += tlen;
			}

			if (!*p)
				break;
		}

		emit_log_char(*p);
		if (*p == '\n')
			new_text_line = 1;
	}

#ifdef CONFIG_KERNEL_LOGGING
	emit_log_char_RAMbuf(printk_buf, strlen(printk_buf));
#endif

	/*
	 * Try to acquire and then immediately release the
	 * console semaphore. The release will do all the
	 * actual magic (print out buffers, wake up klogd,
	 * etc). 
	 *
	 * The acquire_console_semaphore_for_printk() function
	 * will release 'logbuf_lock' regardless of whether it
	 * actually gets the semaphore or not.
	 */
	if (acquire_console_semaphore_for_printk(this_cpu))
		release_console_sem();

	lockdep_on();
out_restore_irqs:
	raw_local_irq_restore(flags);

	preempt_enable();
	return printed_len;
}
Beispiel #5
0
Datei: mce.c Projekt: sheep/xen
/* Shared #MC handler. */
void mcheck_cmn_handler(const struct cpu_user_regs *regs)
{
    struct mca_banks *bankmask = mca_allbanks;
    struct mca_banks *clear_bank = __get_cpu_var(mce_clear_banks);
    uint64_t gstatus;
    mctelem_cookie_t mctc = NULL;
    struct mca_summary bs;

    mce_spin_lock(&mce_logout_lock);

    if (clear_bank != NULL) {
        memset( clear_bank->bank_map, 0x0,
            sizeof(long) * BITS_TO_LONGS(clear_bank->num));
    }
    mctc = mcheck_mca_logout(MCA_MCE_SCAN, bankmask, &bs, clear_bank);

    if (bs.errcnt) {
        /*
         * Uncorrected errors must be dealt with in softirq context.
         */
        if (bs.uc || bs.pcc) {
            add_taint(TAINT_MACHINE_CHECK);
            if (mctc != NULL)
                mctelem_defer(mctc);
            /*
             * For PCC=1 and can't be recovered, context is lost, so
             * reboot now without clearing the banks, and deal with
             * the telemetry after reboot (the MSRs are sticky)
             */
            if (bs.pcc || !bs.recoverable)
                cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus);
        } else {
            if (mctc != NULL)
                mctelem_commit(mctc);
        }
        atomic_set(&found_error, 1);

        /* The last CPU will be take check/clean-up etc */
        atomic_set(&severity_cpu, smp_processor_id());

        mce_printk(MCE_CRITICAL, "MCE: clear_bank map %lx on CPU%d\n",
                *((unsigned long*)clear_bank), smp_processor_id());
        if (clear_bank != NULL)
            mcheck_mca_clearbanks(clear_bank);
    } else {
        if (mctc != NULL)
            mctelem_dismiss(mctc);
    }
    mce_spin_unlock(&mce_logout_lock);

    mce_barrier_enter(&mce_trap_bar);
    if ( mctc != NULL && mce_urgent_action(regs, mctc))
        cpumask_set_cpu(smp_processor_id(), &mce_fatal_cpus);
    mce_barrier_exit(&mce_trap_bar);

    /*
     * Wait until everybody has processed the trap.
     */
    mce_barrier_enter(&mce_trap_bar);
    if (atomic_read(&severity_cpu) == smp_processor_id())
    {
        /* According to SDM, if no error bank found on any cpus,
         * something unexpected happening, we can't do any
         * recovery job but to reset the system.
         */
        if (atomic_read(&found_error) == 0)
            mc_panic("MCE: No CPU found valid MCE, need reset");
        if (!cpumask_empty(&mce_fatal_cpus))
        {
            char *ebufp, ebuf[96] = "MCE: Fatal error happened on CPUs ";
            ebufp = ebuf + strlen(ebuf);
            cpumask_scnprintf(ebufp, 95 - strlen(ebuf), &mce_fatal_cpus);
            mc_panic(ebuf);
        }
        atomic_set(&found_error, 0);
    }
    mce_barrier_exit(&mce_trap_bar);

    /* Clear flags after above fatal check */
    mce_barrier_enter(&mce_trap_bar);
    gstatus = mca_rdmsr(MSR_IA32_MCG_STATUS);
    if ((gstatus & MCG_STATUS_MCIP) != 0) {
        mce_printk(MCE_CRITICAL, "MCE: Clear MCIP@ last step");
        mca_wrmsr(MSR_IA32_MCG_STATUS, 0);
    }
    mce_barrier_exit(&mce_trap_bar);

    raise_softirq(MACHINE_CHECK_SOFTIRQ);
}
Beispiel #6
0
/* Interrupts are disabled. */
void xics_migrate_irqs_away(void)
{
	int cpu = smp_processor_id(), hw_cpu = hard_smp_processor_id();
	unsigned int irq, virq;
	struct irq_desc *desc;

	/* If we used to be the default server, move to the new "boot_cpuid" */
	if (hw_cpu == xics_default_server)
		xics_update_irq_servers();

	/* Reject any interrupt that was queued to us... */
	icp_ops->set_priority(0);

	/* Remove ourselves from the global interrupt queue */
	xics_set_cpu_giq(xics_default_distrib_server, 0);

	/* Allow IPIs again... */
	icp_ops->set_priority(DEFAULT_PRIORITY);

	for_each_irq_desc(virq, desc) {
		struct irq_chip *chip;
		long server;
		unsigned long flags;
		struct ics *ics;

		/* We can't set affinity on ISA interrupts */
		if (virq < NUM_ISA_INTERRUPTS)
			continue;
		/* We only need to migrate enabled IRQS */
		if (!desc->action)
			continue;
		if (desc->irq_data.domain != xics_host)
			continue;
		irq = desc->irq_data.hwirq;
		/* We need to get IPIs still. */
		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
			continue;
		chip = irq_desc_get_chip(desc);
		if (!chip || !chip->irq_set_affinity)
			continue;

		raw_spin_lock_irqsave(&desc->lock, flags);

		/* Locate interrupt server */
		server = -1;
		ics = irq_get_chip_data(virq);
		if (ics)
			server = ics->get_server(ics, irq);
		if (server < 0) {
			printk(KERN_ERR "%s: Can't find server for irq %d\n",
			       __func__, irq);
			goto unlock;
		}

		/* We only support delivery to all cpus or to one cpu.
		 * The irq has to be migrated only in the single cpu
		 * case.
		 */
		if (server != hw_cpu)
			goto unlock;

		/* This is expected during cpu offline. */
		if (cpu_online(cpu))
			pr_warning("IRQ %u affinity broken off cpu %u\n",
			       virq, cpu);

		/* Reset affinity to all cpus */
		raw_spin_unlock_irqrestore(&desc->lock, flags);
		irq_set_affinity(virq, cpu_all_mask);
		continue;
unlock:
		raw_spin_unlock_irqrestore(&desc->lock, flags);
	}
}
Beispiel #7
0
/**
 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
 *
 * When the next event is more than a tick into the future, stop the idle tick
 * Called either from the idle loop or from irq_exit() when an idle period was
 * just interrupted by an interrupt which did not cause a reschedule.
 */
void tick_nohz_stop_sched_tick(int inidle)
{
	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
	struct tick_sched *ts;
	ktime_t last_update, expires, now;
	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
	u64 time_delta;
	int cpu;

	local_irq_save(flags);

	cpu = smp_processor_id();
	ts = &per_cpu(tick_cpu_sched, cpu);

	/*
	 * Call to tick_nohz_start_idle stops the last_update_time from being
	 * updated. Thus, it must not be called in the event we are called from
	 * irq_exit() with the prior state different than idle.
	 */
	if (!inidle && !ts->inidle)
		goto end;

	/*
	 * Set ts->inidle unconditionally. Even if the system did not
	 * switch to NOHZ mode the cpu frequency governers rely on the
	 * update of the idle time accounting in tick_nohz_start_idle().
	 */
	ts->inidle = 1;

	now = tick_nohz_start_idle(cpu, ts);

	/*
	 * If this cpu is offline and it is the one which updates
	 * jiffies, then give up the assignment and let it be taken by
	 * the cpu which runs the tick timer next. If we don't drop
	 * this here the jiffies might be stale and do_timer() never
	 * invoked.
	 */
	if (unlikely(!cpu_online(cpu))) {
		if (cpu == tick_do_timer_cpu)
			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
	}

	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
		goto end;

	if (need_resched())
		goto end;

	if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
		static int ratelimit;

		if (ratelimit < 10) {
			printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
			       (unsigned int) local_softirq_pending());
			ratelimit++;
		}
		goto end;
	}

	ts->idle_calls++;
	/* Read jiffies and the time when jiffies were updated last */
	do {
		seq = read_seqbegin(&xtime_lock);
		last_update = last_jiffies_update;
		last_jiffies = jiffies;
		time_delta = timekeeping_max_deferment();
	} while (read_seqretry(&xtime_lock, seq));

	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
	    arch_needs_cpu(cpu)) {
		next_jiffies = last_jiffies + 1;
		delta_jiffies = 1;
	} else {
		/* Get the next timer wheel timer */
		next_jiffies = get_next_timer_interrupt(last_jiffies);
		delta_jiffies = next_jiffies - last_jiffies;
	}
	/*
	 * Do not stop the tick, if we are only one off
	 * or if the cpu is required for rcu
	 */
	if (!ts->tick_stopped && delta_jiffies == 1)
		goto out;

	/* Schedule the tick, if we are at least one jiffie off */
	if ((long)delta_jiffies >= 1) {

		/*
		 * If this cpu is the one which updates jiffies, then
		 * give up the assignment and let it be taken by the
		 * cpu which runs the tick timer next, which might be
		 * this cpu as well. If we don't drop this here the
		 * jiffies might be stale and do_timer() never
		 * invoked. Keep track of the fact that it was the one
		 * which had the do_timer() duty last. If this cpu is
		 * the one which had the do_timer() duty last, we
		 * limit the sleep time to the timekeeping
		 * max_deferement value which we retrieved
		 * above. Otherwise we can sleep as long as we want.
		 */
		if (cpu == tick_do_timer_cpu) {
			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
			ts->do_timer_last = 1;
		} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
			time_delta = KTIME_MAX;
			ts->do_timer_last = 0;
		} else if (!ts->do_timer_last) {
			time_delta = KTIME_MAX;
		}

		/*
		 * calculate the expiry time for the next timer wheel
		 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
		 * that there is no timer pending or at least extremely
		 * far into the future (12 days for HZ=1000). In this
		 * case we set the expiry to the end of time.
		 */
		if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
			/*
			 * Calculate the time delta for the next timer event.
			 * If the time delta exceeds the maximum time delta
			 * permitted by the current clocksource then adjust
			 * the time delta accordingly to ensure the
			 * clocksource does not wrap.
			 */
			time_delta = min_t(u64, time_delta,
					   tick_period.tv64 * delta_jiffies);
		}

		if (time_delta < KTIME_MAX)
			expires = ktime_add_ns(last_update, time_delta);
		else
			expires.tv64 = KTIME_MAX;

		if (delta_jiffies > 1)
			cpumask_set_cpu(cpu, nohz_cpu_mask);

		/* Skip reprogram of event if its not changed */
		if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
			goto out;

		/*
		 * nohz_stop_sched_tick can be called several times before
		 * the nohz_restart_sched_tick is called. This happens when
		 * interrupts arrive which do not cause a reschedule. In the
		 * first call we save the current tick time, so we can restart
		 * the scheduler tick in nohz_restart_sched_tick.
		 */
		if (!ts->tick_stopped) {
			select_nohz_load_balancer(1);

			ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
			ts->tick_stopped = 1;
			ts->idle_jiffies = last_jiffies;
			rcu_enter_nohz();
		}

		ts->idle_sleeps++;

		/* Mark expires */
		ts->idle_expires = expires;

		/*
		 * If the expiration time == KTIME_MAX, then
		 * in this case we simply stop the tick timer.
		 */
		 if (unlikely(expires.tv64 == KTIME_MAX)) {
			if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
				hrtimer_cancel(&ts->sched_timer);
			goto out;
		}

		if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
			hrtimer_start(&ts->sched_timer, expires,
				      HRTIMER_MODE_ABS_PINNED);
			/* Check, if the timer was already in the past */
			if (hrtimer_active(&ts->sched_timer))
				goto out;
		} else if (!tick_program_event(expires, 0))
				goto out;
		/*
		 * We are past the event already. So we crossed a
		 * jiffie boundary. Update jiffies and raise the
		 * softirq.
		 */
		tick_do_update_jiffies64(ktime_get());
		cpumask_clear_cpu(cpu, nohz_cpu_mask);
	}
	raise_softirq_irqoff(TIMER_SOFTIRQ);
out:
	ts->next_jiffies = next_jiffies;
	ts->last_jiffies = last_jiffies;
	ts->sleep_length = ktime_sub(dev->next_event, now);
end:
	local_irq_restore(flags);
}
Beispiel #8
0
/*
 * timer_interrupt - gets called when the decrementer overflows,
 * with interrupts disabled.
 * We set it up to overflow again in 1/HZ seconds.
 */
void timer_interrupt(struct pt_regs * regs)
{
	int next_dec;
	unsigned long cpu = smp_processor_id();
	unsigned jiffy_stamp = last_jiffy_stamp(cpu);
	extern void do_IRQ(struct pt_regs *);

	if (atomic_read(&ppc_n_lost_interrupts) != 0)
		do_IRQ(regs);

	irq_enter();

	while ((next_dec = tb_ticks_per_jiffy - tb_delta(&jiffy_stamp)) <= 0) {
		jiffy_stamp += tb_ticks_per_jiffy;
		
		profile_tick(CPU_PROFILING, regs);
		update_process_times(user_mode(regs));

	  	if (smp_processor_id())
			continue;

		/* We are in an interrupt, no need to save/restore flags */
		write_seqlock(&xtime_lock);
		tb_last_stamp = jiffy_stamp;
		do_timer(regs);

		/*
		 * update the rtc when needed, this should be performed on the
		 * right fraction of a second. Half or full second ?
		 * Full second works on mk48t59 clocks, others need testing.
		 * Note that this update is basically only used through
		 * the adjtimex system calls. Setting the HW clock in
		 * any other way is a /dev/rtc and userland business.
		 * This is still wrong by -0.5/+1.5 jiffies because of the
		 * timer interrupt resolution and possible delay, but here we
		 * hit a quantization limit which can only be solved by higher
		 * resolution timers and decoupling time management from timer
		 * interrupts. This is also wrong on the clocks
		 * which require being written at the half second boundary.
		 * We should have an rtc call that only sets the minutes and
		 * seconds like on Intel to avoid problems with non UTC clocks.
		 */
		if ( ppc_md.set_rtc_time && ntp_synced() &&
		     xtime.tv_sec - last_rtc_update >= 659 &&
		     abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 500000/HZ &&
		     jiffies - wall_jiffies == 1) {
		  	if (ppc_md.set_rtc_time(xtime.tv_sec+1 + timezone_offset) == 0)
				last_rtc_update = xtime.tv_sec+1;
			else
				/* Try again one minute later */
				last_rtc_update += 60;
		}
		write_sequnlock(&xtime_lock);
	}
	if ( !disarm_decr[smp_processor_id()] )
		set_dec(next_dec);
	last_jiffy_stamp(cpu) = jiffy_stamp;

	if (ppc_md.heartbeat && !ppc_md.heartbeat_count--)
		ppc_md.heartbeat();

	irq_exit();
}
asmlinkage int vprintk(const char *fmt, va_list args)
{
	int printed_len = 0;
	int current_log_level = default_message_loglevel;
	unsigned long flags;
	int this_cpu;
	char *p;

	boot_delay_msec();
	printk_delay();

	preempt_disable();
	/* This stops the holder of console_sem just where we want him */
	raw_local_irq_save(flags);
	this_cpu = smp_processor_id();

	/*
	 * Ouch, printk recursed into itself!
	 */
	if (unlikely(printk_cpu == this_cpu)) {
		/*
		 * If a crash is occurring during printk() on this CPU,
		 * then try to get the crash message out but make sure
		 * we can't deadlock. Otherwise just return to avoid the
		 * recursion and return - but flag the recursion so that
		 * it can be printed at the next appropriate moment:
		 */
		if (!oops_in_progress) {
			recursion_bug = 1;
			goto out_restore_irqs;
		}
		zap_locks();
	}

	lockdep_off();
	spin_lock(&logbuf_lock);
	printk_cpu = this_cpu;

	if (recursion_bug) {
		recursion_bug = 0;
		strcpy(printk_buf, recursion_bug_msg);
		printed_len = strlen(recursion_bug_msg);
	}
	/* Emit the output into the temporary buffer */
	printed_len += vscnprintf(printk_buf + printed_len,
				  sizeof(printk_buf) - printed_len, fmt, args);

#ifdef	CONFIG_DEBUG_LL
	printascii(printk_buf);
#endif

	p = printk_buf;

	/* Do we have a loglevel in the string? */
	if (p[0] == '<') {
		unsigned char c = p[1];
		if (c && p[2] == '>') {
			switch (c) {
			case '0' ... '7': /* loglevel */
				current_log_level = c - '0';
			/* Fallthrough - make sure we're on a new line */
			case 'd': /* KERN_DEFAULT */
				if (!new_text_line) {
					emit_log_char('\n');
					new_text_line = 1;
				}
			/* Fallthrough - skip the loglevel */
			case 'c': /* KERN_CONT */
				p += 3;
				break;
			}
		}
	}
Beispiel #10
0
/*
 * This cpu is going to be removed and its vectors migrated to the remaining
 * online cpus.  Check to see if there are enough vectors in the remaining cpus.
 * This function is protected by stop_machine().
 */
int check_irq_vectors_for_cpu_disable(void)
{
	unsigned int this_cpu, vector, this_count, count;
	struct irq_desc *desc;
	struct irq_data *data;
	int cpu;

	this_cpu = smp_processor_id();
	cpumask_copy(&online_new, cpu_online_mask);
	cpumask_clear_cpu(this_cpu, &online_new);

	this_count = 0;
	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
		desc = __this_cpu_read(vector_irq[vector]);
		if (IS_ERR_OR_NULL(desc))
			continue;
		/*
		 * Protect against concurrent action removal, affinity
		 * changes etc.
		 */
		raw_spin_lock(&desc->lock);
		data = irq_desc_get_irq_data(desc);
		cpumask_copy(&affinity_new,
			     irq_data_get_affinity_mask(data));
		cpumask_clear_cpu(this_cpu, &affinity_new);

		/* Do not count inactive or per-cpu irqs. */
		if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
			raw_spin_unlock(&desc->lock);
			continue;
		}

		raw_spin_unlock(&desc->lock);
		/*
		 * A single irq may be mapped to multiple cpu's
		 * vector_irq[] (for example IOAPIC cluster mode).  In
		 * this case we have two possibilities:
		 *
		 * 1) the resulting affinity mask is empty; that is
		 * this the down'd cpu is the last cpu in the irq's
		 * affinity mask, or
		 *
		 * 2) the resulting affinity mask is no longer a
		 * subset of the online cpus but the affinity mask is
		 * not zero; that is the down'd cpu is the last online
		 * cpu in a user set affinity mask.
		 */
		if (cpumask_empty(&affinity_new) ||
		    !cpumask_subset(&affinity_new, &online_new))
			this_count++;
	}

	count = 0;
	for_each_online_cpu(cpu) {
		if (cpu == this_cpu)
			continue;
		/*
		 * We scan from FIRST_EXTERNAL_VECTOR to first system
		 * vector. If the vector is marked in the used vectors
		 * bitmap or an irq is assigned to it, we don't count
		 * it as available.
		 *
		 * As this is an inaccurate snapshot anyway, we can do
		 * this w/o holding vector_lock.
		 */
		for (vector = FIRST_EXTERNAL_VECTOR;
		     vector < first_system_vector; vector++) {
			if (!test_bit(vector, used_vectors) &&
			    IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector]))
			    count++;
		}
	}

	if (count < this_count) {
		pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
			this_cpu, this_count, count);
		return -ERANGE;
	}
	return 0;
}
Beispiel #11
0
/*
 * Each cpu has a pair of open-addressed hashtables for pending
 * profile hits. read_profile() IPI's all cpus to request them
 * to flip buffers and flushes their contents to prof_buffer itself.
 * Flip requests are serialized by the profile_flip_mutex. The sole
 * use of having a second hashtable is for avoiding cacheline
 * contention that would otherwise happen during flushes of pending
 * profile hits required for the accuracy of reported profile hits
 * and so resurrect the interrupt livelock issue.
 *
 * The open-addressed hashtables are indexed by profile buffer slot
 * and hold the number of pending hits to that profile buffer slot on
 * a cpu in an entry. When the hashtable overflows, all pending hits
 * are accounted to their corresponding profile buffer slots with
 * atomic_add() and the hashtable emptied. As numerous pending hits
 * may be accounted to a profile buffer slot in a hashtable entry,
 * this amortizes a number of atomic profile buffer increments likely
 * to be far larger than the number of entries in the hashtable,
 * particularly given that the number of distinct profile buffer
 * positions to which hits are accounted during short intervals (e.g.
 * several seconds) is usually very small. Exclusion from buffer
 * flipping is provided by interrupt disablement (note that for
 * SCHED_PROFILING profile_hit() may be called from process context).
 * The hash function is meant to be lightweight as opposed to strong,
 * and was vaguely inspired by ppc64 firmware-supported inverted
 * pagetable hash functions, but uses a full hashtable full of finite
 * collision chains, not just pairs of them.
 *
 * -- wli
 */
static void __profile_flip_buffers(void *unused)
{
	int cpu = smp_processor_id();

	per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
}
Beispiel #12
0
static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
{
	play_dead_common();
	HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
	cpu_bringup();
}
/*
 * We are going deep-idle (irqs are disabled):
 */
void sched_clock_idle_sleep_event(void)
{
	sched_clock_cpu(smp_processor_id());
}
Beispiel #14
0
static void __cpuinit
smp_callin (void)
{
    int cpuid, phys_id, itc_master;
    struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo;
    extern void ia64_init_itm(void);
    extern volatile int time_keeper_id;

#ifdef CONFIG_PERFMON
    extern void pfm_init_percpu(void);
#endif

    cpuid = smp_processor_id();
    phys_id = hard_smp_processor_id();
    itc_master = time_keeper_id;

    if (cpu_online(cpuid)) {
        printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
               phys_id, cpuid);
        BUG();
    }

    fix_b0_for_bsp();

    lock_ipi_calllock();
    spin_lock(&vector_lock);
    /* Setup the per cpu irq handling data structures */
    __setup_vector_irq(cpuid);
    cpu_set(cpuid, cpu_online_map);
    unlock_ipi_calllock();
    per_cpu(cpu_state, cpuid) = CPU_ONLINE;
    spin_unlock(&vector_lock);

    smp_setup_percpu_timer();

    ia64_mca_cmc_vector_setup();	/* Setup vector on AP */

#ifdef CONFIG_PERFMON
    pfm_init_percpu();
#endif

    local_irq_enable();

    if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
        /*
         * Synchronize the ITC with the BP.  Need to do this after irqs are
         * enabled because ia64_sync_itc() calls smp_call_function_single(), which
         * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
         * local_bh_enable(), which bugs out if irqs are not enabled...
         */
        Dprintk("Going to syncup ITC with ITC Master.\n");
        ia64_sync_itc(itc_master);
    }

    /*
     * Get our bogomips.
     */
    ia64_init_itm();

    /*
     * Delay calibration can be skipped if new processor is identical to the
     * previous processor.
     */
    last_cpuinfo = cpu_data(cpuid - 1);
    this_cpuinfo = local_cpu_data;
    if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq ||
            last_cpuinfo->proc_freq != this_cpuinfo->proc_freq ||
            last_cpuinfo->features != this_cpuinfo->features ||
            last_cpuinfo->revision != this_cpuinfo->revision ||
            last_cpuinfo->family != this_cpuinfo->family ||
            last_cpuinfo->archrev != this_cpuinfo->archrev ||
            last_cpuinfo->model != this_cpuinfo->model)
        calibrate_delay();
    local_cpu_data->loops_per_jiffy = loops_per_jiffy;

#ifdef CONFIG_IA32_SUPPORT
    ia32_gdt_init();
#endif

    /*
     * Allow the master to continue.
     */
    cpu_set(cpuid, cpu_callin_map);
    Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
}
Beispiel #15
0
static void nmi_cpu_setup(void * dummy)
{
	int cpu = smp_processor_id();
	struct op_msrs * msrs = &cpu_msrs[cpu];
	model->setup_ctrs(msrs);
}
Beispiel #16
0
static void nmi_save_registers(void *dummy)
{
    int cpu = smp_processor_id();
    struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
    nmi_cpu_save_registers(msrs);
}
Beispiel #17
0
static void nmi_cpu_shutdown(void * dummy)
{
	int cpu = smp_processor_id();
	struct op_msrs * msrs = &cpu_msrs[cpu];
	nmi_restore_registers(msrs);
}
Beispiel #18
0
/*
 * This creates a new process as a copy of the old one,
 * but does not actually start it yet.
 *
 * It copies the registers, and all the appropriate
 * parts of the process environment (as per the clone
 * flags). The actual kick-off is left to the caller.
 */
static struct task_struct *copy_process(unsigned long clone_flags,
					unsigned long stack_start,
					struct pt_regs *regs,
					unsigned long stack_size,
					int __user *child_tidptr,
					struct pid *pid,
					int trace)
{
	int retval;
	struct task_struct *p;
	int cgroup_callbacks_done = 0;

	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
		return ERR_PTR(-EINVAL);

	/*
	 * Thread groups must share signals as well, and detached threads
	 * can only be started up within the thread group.
	 */
	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
		return ERR_PTR(-EINVAL);

	/*
	 * Shared signal handlers imply shared VM. By way of the above,
	 * thread groups also imply shared VM. Blocking this case allows
	 * for various simplifications in other code.
	 */
	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
		return ERR_PTR(-EINVAL);

	retval = security_task_create(clone_flags);
	if (retval)
		goto fork_out;

	retval = -ENOMEM;
	p = dup_task_struct(current);
	if (!p)
		goto fork_out;

	rt_mutex_init_task(p);

#ifdef CONFIG_PROVE_LOCKING
	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
	retval = -EAGAIN;
	if (atomic_read(&p->user->processes) >=
			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
		    p->user != current->nsproxy->user_ns->root_user)
			goto bad_fork_free;
	}

	atomic_inc(&p->user->__count);
	atomic_inc(&p->user->processes);
	get_group_info(p->group_info);

	/*
	 * If multiple threads are within copy_process(), then this check
	 * triggers too late. This doesn't hurt, the check is only there
	 * to stop root fork bombs.
	 */
	if (nr_threads >= max_threads)
		goto bad_fork_cleanup_count;

	if (!try_module_get(task_thread_info(p)->exec_domain->module))
		goto bad_fork_cleanup_count;

	if (p->binfmt && !try_module_get(p->binfmt->module))
		goto bad_fork_cleanup_put_domain;

	p->did_exec = 0;
	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
	copy_flags(clone_flags, p);
	INIT_LIST_HEAD(&p->children);
	INIT_LIST_HEAD(&p->sibling);
#ifdef CONFIG_PREEMPT_RCU
	p->rcu_read_lock_nesting = 0;
	p->rcu_flipctr_idx = 0;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
	p->vfork_done = NULL;
	spin_lock_init(&p->alloc_lock);

	clear_tsk_thread_flag(p, TIF_SIGPENDING);
	init_sigpending(&p->pending);

	p->utime = cputime_zero;
	p->stime = cputime_zero;
	p->gtime = cputime_zero;
	p->utimescaled = cputime_zero;
	p->stimescaled = cputime_zero;
	p->prev_utime = cputime_zero;
	p->prev_stime = cputime_zero;

#ifdef CONFIG_DETECT_SOFTLOCKUP
	p->last_switch_count = 0;
	p->last_switch_timestamp = 0;
#endif

	task_io_accounting_init(&p->ioac);
	acct_clear_integrals(p);

	p->it_virt_expires = cputime_zero;
	p->it_prof_expires = cputime_zero;
	p->it_sched_expires = 0;
	INIT_LIST_HEAD(&p->cpu_timers[0]);
	INIT_LIST_HEAD(&p->cpu_timers[1]);
	INIT_LIST_HEAD(&p->cpu_timers[2]);

	p->lock_depth = -1;		/* -1 = no lock */
	do_posix_clock_monotonic_gettime(&p->start_time);
	p->real_start_time = p->start_time;
	monotonic_to_bootbased(&p->real_start_time);
#ifdef CONFIG_SECURITY
	p->security = NULL;
#endif
	p->cap_bset = current->cap_bset;
	p->io_context = NULL;
	p->audit_context = NULL;
	cgroup_fork(p);
#ifdef CONFIG_NUMA
	p->mempolicy = mpol_dup(p->mempolicy);
 	if (IS_ERR(p->mempolicy)) {
 		retval = PTR_ERR(p->mempolicy);
 		p->mempolicy = NULL;
 		goto bad_fork_cleanup_cgroup;
 	}
	mpol_fix_fork_child_flag(p);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
	p->irq_events = 0;
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
	p->hardirqs_enabled = 1;
#else
	p->hardirqs_enabled = 0;
#endif
	p->hardirq_enable_ip = 0;
	p->hardirq_enable_event = 0;
	p->hardirq_disable_ip = _THIS_IP_;
	p->hardirq_disable_event = 0;
	p->softirqs_enabled = 1;
	p->softirq_enable_ip = _THIS_IP_;
	p->softirq_enable_event = 0;
	p->softirq_disable_ip = 0;
	p->softirq_disable_event = 0;
	p->hardirq_context = 0;
	p->softirq_context = 0;
#endif
#ifdef CONFIG_LOCKDEP
	p->lockdep_depth = 0; /* no locks held yet */
	p->curr_chain_key = 0;
	p->lockdep_recursion = 0;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
	p->blocked_on = NULL; /* not blocked yet */
#endif

	/* Perform scheduler related setup. Assign this task to a CPU. */
	sched_fork(p, clone_flags);

	if ((retval = security_task_alloc(p)))
		goto bad_fork_cleanup_policy;
	if ((retval = audit_alloc(p)))
		goto bad_fork_cleanup_security;
	/* copy all the process information */
	if ((retval = copy_semundo(clone_flags, p)))
		goto bad_fork_cleanup_audit;
	if ((retval = copy_files(clone_flags, p)))
		goto bad_fork_cleanup_semundo;
	if ((retval = copy_fs(clone_flags, p)))
		goto bad_fork_cleanup_files;
	if ((retval = copy_sighand(clone_flags, p)))
		goto bad_fork_cleanup_fs;
	if ((retval = copy_signal(clone_flags, p)))
		goto bad_fork_cleanup_sighand;
	if ((retval = copy_mm(clone_flags, p)))
		goto bad_fork_cleanup_signal;
	if ((retval = copy_keys(clone_flags, p)))
		goto bad_fork_cleanup_mm;
	if ((retval = copy_namespaces(clone_flags, p)))
		goto bad_fork_cleanup_keys;
	if ((retval = copy_io(clone_flags, p)))
		goto bad_fork_cleanup_namespaces;
	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
	if (retval)
		goto bad_fork_cleanup_io;

	if (pid != &init_struct_pid) {
		retval = -ENOMEM;
		pid = alloc_pid(task_active_pid_ns(p));
		if (!pid)
			goto bad_fork_cleanup_io;

		if (clone_flags & CLONE_NEWPID) {
			retval = pid_ns_prepare_proc(task_active_pid_ns(p));
			if (retval < 0)
				goto bad_fork_free_pid;
		}
	}

	p->pid = pid_nr(pid);
	p->tgid = p->pid;
	if (clone_flags & CLONE_THREAD)
		p->tgid = current->tgid;

	if (current->nsproxy != p->nsproxy) {
		retval = ns_cgroup_clone(p, pid);
		if (retval)
			goto bad_fork_free_pid;
	}

	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
	/*
	 * Clear TID on mm_release()?
	 */
	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
#ifdef CONFIG_FUTEX
	p->robust_list = NULL;
#ifdef CONFIG_COMPAT
	p->compat_robust_list = NULL;
#endif
	INIT_LIST_HEAD(&p->pi_state_list);
	p->pi_state_cache = NULL;
#endif
	/*
	 * sigaltstack should be cleared when sharing the same VM
	 */
	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
		p->sas_ss_sp = p->sas_ss_size = 0;

	/*
	 * Syscall tracing should be turned off in the child regardless
	 * of CLONE_PTRACE.
	 */
	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
#ifdef TIF_SYSCALL_EMU
	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif
	clear_all_latency_tracing(p);

	/* ok, now we should be set up.. */
	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
	p->pdeath_signal = 0;
	p->exit_state = 0;

	/*
	 * Ok, make it visible to the rest of the system.
	 * We dont wake it up yet.
	 */
	p->group_leader = p;
	INIT_LIST_HEAD(&p->thread_group);

	/* Now that the task is set up, run cgroup callbacks if
	 * necessary. We need to run them before the task is visible
	 * on the tasklist. */
	cgroup_fork_callbacks(p);
	cgroup_callbacks_done = 1;

	/* Need tasklist lock for parent etc handling! */
	write_lock_irq(&tasklist_lock);

	/*
	 * The task hasn't been attached yet, so its cpus_allowed mask will
	 * not be changed, nor will its assigned CPU.
	 *
	 * The cpus_allowed mask of the parent may have changed after it was
	 * copied first time - so re-copy it here, then check the child's CPU
	 * to ensure it is on a valid CPU (and if not, just force it back to
	 * parent's CPU). This avoids alot of nasty races.
	 */
	p->cpus_allowed = current->cpus_allowed;
	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
			!cpu_online(task_cpu(p))))
		set_task_cpu(p, smp_processor_id());

	/* CLONE_PARENT re-uses the old parent */
	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
		p->real_parent = current->real_parent;
		p->parent_exec_id = current->parent_exec_id;
	} else {
		p->real_parent = current;
		p->parent_exec_id = current->self_exec_id;
	}

	spin_lock(&current->sighand->siglock);

	/*
	 * Process group and session signals need to be delivered to just the
	 * parent before the fork or both the parent and the child after the
	 * fork. Restart if a signal comes in before we add the new process to
	 * it's process group.
	 * A fatal signal pending means that current will exit, so the new
	 * thread can't slip out of an OOM kill (or normal SIGKILL).
 	 */
	recalc_sigpending();
	if (signal_pending(current)) {
		spin_unlock(&current->sighand->siglock);
		write_unlock_irq(&tasklist_lock);
		retval = -ERESTARTNOINTR;
		goto bad_fork_free_pid;
	}

	if (clone_flags & CLONE_THREAD) {
		atomic_inc(&current->signal->count);
		atomic_inc(&current->signal->live);
		p->group_leader = current->group_leader;
		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);

		if (!cputime_eq(current->signal->it_virt_expires,
				cputime_zero) ||
		    !cputime_eq(current->signal->it_prof_expires,
				cputime_zero) ||
		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
		    !list_empty(&current->signal->cpu_timers[0]) ||
		    !list_empty(&current->signal->cpu_timers[1]) ||
		    !list_empty(&current->signal->cpu_timers[2])) {
			/*
			 * Have child wake up on its first tick to check
			 * for process CPU timers.
			 */
			p->it_prof_expires = jiffies_to_cputime(1);
		}
	}

	if (likely(p->pid)) {
		list_add_tail(&p->sibling, &p->real_parent->children);
		tracehook_finish_clone(p, clone_flags, trace);

		if (thread_group_leader(p)) {
			if (clone_flags & CLONE_NEWPID)
				p->nsproxy->pid_ns->child_reaper = p;

			p->signal->leader_pid = pid;
			p->signal->tty = current->signal->tty;
			set_task_pgrp(p, task_pgrp_nr(current));
			set_task_session(p, task_session_nr(current));
			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
			attach_pid(p, PIDTYPE_SID, task_session(current));
			list_add_tail_rcu(&p->tasks, &init_task.tasks);
			__get_cpu_var(process_counts)++;
		}
		attach_pid(p, PIDTYPE_PID, pid);
		nr_threads++;
	}

	total_forks++;
	spin_unlock(&current->sighand->siglock);
	write_unlock_irq(&tasklist_lock);
	proc_fork_connector(p);
	cgroup_post_fork(p);
	return p;

bad_fork_free_pid:
	if (pid != &init_struct_pid)
		free_pid(pid);
bad_fork_cleanup_io:
	put_io_context(p->io_context);
bad_fork_cleanup_namespaces:
	exit_task_namespaces(p);
bad_fork_cleanup_keys:
	exit_keys(p);
bad_fork_cleanup_mm:
	if (p->mm)
		mmput(p->mm);
bad_fork_cleanup_signal:
	if (!(clone_flags & CLONE_THREAD))
		__cleanup_signal(p->signal);
bad_fork_cleanup_sighand:
	__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
	exit_fs(p); /* blocking */
bad_fork_cleanup_files:
	exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
	exit_sem(p);
bad_fork_cleanup_audit:
	audit_free(p);
bad_fork_cleanup_security:
	security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
	mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup:
#endif
	cgroup_exit(p, cgroup_callbacks_done);
	delayacct_tsk_free(p);
	if (p->binfmt)
		module_put(p->binfmt->module);
bad_fork_cleanup_put_domain:
	module_put(task_thread_info(p)->exec_domain->module);
bad_fork_cleanup_count:
	put_group_info(p->group_info);
	atomic_dec(&p->user->processes);
	free_uid(p->user);
bad_fork_free:
	free_task(p);
fork_out:
	return ERR_PTR(retval);
}
Beispiel #19
0
int __init bridge_probe(nasid_t nasid, int widget_id, int masterwid)
{
    unsigned long offset = NODE_OFFSET(nasid);
    struct bridge_controller *bc;
    static int num_bridges = 0;
    bridge_t *bridge;
    int slot;

    printk("a bridge\n");

    /* XXX: kludge alert.. */
    if (!num_bridges)
        ioport_resource.end = ~0UL;

    bc = &bridges[num_bridges];

    bc->pc.pci_ops		= &bridge_pci_ops;
    bc->pc.mem_resource	= &bc->mem;
    bc->pc.io_resource	= &bc->io;

    bc->pc.index		= num_bridges;

    bc->mem.name		= "Bridge PCI MEM";
    bc->pc.mem_offset	= offset;
    bc->mem.start		= 0;
    bc->mem.end		= ~0UL;
    bc->mem.flags		= IORESOURCE_MEM;

    bc->io.name		= "Bridge IO MEM";
    bc->pc.io_offset	= offset;
    bc->io.start		= 0UL;
    bc->io.end		= ~0UL;
    bc->io.flags		= IORESOURCE_IO;

    bc->irq_cpu = smp_processor_id();
    bc->widget_id = widget_id;
    bc->nasid = nasid;

    bc->baddr = (u64)masterwid << 60 | PCI64_ATTR_BAR;

    /*
     * point to this bridge
     */
    bridge = (bridge_t *) RAW_NODE_SWIN_BASE(nasid, widget_id);

    /*
     * Clear all pending interrupts.
     */
    bridge->b_int_rst_stat = BRIDGE_IRR_ALL_CLR;

    /*
     * Until otherwise set up, assume all interrupts are from slot 0
     */
    bridge->b_int_device = 0x0;

    /*
     * swap pio's to pci mem and io space (big windows)
     */
    bridge->b_wid_control |= BRIDGE_CTRL_IO_SWAP |
                             BRIDGE_CTRL_MEM_SWAP;

    /*
     * Hmm...  IRIX sets additional bits in the address which
     * are documented as reserved in the bridge docs.
     */
    bridge->b_wid_int_upper = 0x8000 | (masterwid << 16);
    bridge->b_wid_int_lower = 0x01800090;	/* PI_INT_PEND_MOD off*/
    bridge->b_dir_map = (masterwid << 20);	/* DMA */
    bridge->b_int_enable = 0;

    for (slot = 0; slot < 8; slot ++) {
        bridge->b_device[slot].reg |= BRIDGE_DEV_SWAP_DIR;
        bc->pci_int[slot] = -1;
    }
    bridge->b_wid_tflush;     /* wait until Bridge PIO complete */

    bc->base = bridge;

    register_pci_controller(&bc->pc);

    num_bridges++;

    return 0;
}
void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
	unsigned int i, id, cpu = smp_processor_id();
	unsigned long *map;

	/* No lockless fast path .. yet */
	raw_spin_lock(&context_lock);

	pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
		cpu, next, next->context.active, next->context.id);

#ifdef CONFIG_SMP
	/* Mark us active and the previous one not anymore */
	next->context.active++;
	if (prev) {
		pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
		WARN_ON(prev->context.active < 1);
		prev->context.active--;
	}

 again:
#endif /* CONFIG_SMP */

	/* If we already have a valid assigned context, skip all that */
	id = next->context.id;
	if (likely(id != MMU_NO_CONTEXT)) {
#ifdef DEBUG_MAP_CONSISTENCY
		if (context_mm[id] != next)
			pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
			       next, id, id, context_mm[id]);
#endif
		goto ctxt_ok;
	}

	/* We really don't have a context, let's try to acquire one */
	id = next_context;
	if (id > last_context)
		id = first_context;
	map = context_map;

	/* No more free contexts, let's try to steal one */
	if (nr_free_contexts == 0) {
#ifdef CONFIG_SMP
		if (num_online_cpus() > 1) {
			id = steal_context_smp(id);
			if (id == MMU_NO_CONTEXT)
				goto again;
			goto stolen;
		}
#endif /* CONFIG_SMP */
		id = steal_context_up(id);
		goto stolen;
	}
	nr_free_contexts--;

	/* We know there's at least one free context, try to find it */
	while (__test_and_set_bit(id, map)) {
		id = find_next_zero_bit(map, last_context+1, id);
		if (id > last_context)
			id = first_context;
	}
 stolen:
	next_context = id + 1;
	context_mm[id] = next;
	next->context.id = id;
	pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);

	context_check_map();
 ctxt_ok:

	/* If that context got marked stale on this CPU, then flush the
	 * local TLB for it and unmark it before we use it
	 */
	if (test_bit(id, stale_map[cpu])) {
		pr_hardcont(" | stale flush %d [%d..%d]",
			    id, cpu_first_thread_sibling(cpu),
			    cpu_last_thread_sibling(cpu));

		local_flush_tlb_mm(next);

		/* XXX This clear should ultimately be part of local_flush_tlb_mm */
		for (i = cpu_first_thread_sibling(cpu);
		     i <= cpu_last_thread_sibling(cpu); i++) {
			__clear_bit(id, stale_map[i]);
		}
	}

	/* Flick the MMU and release lock */
	pr_hardcont(" -> %d\n", id);
	set_context(id, next->pgd);
	raw_spin_unlock(&context_lock);
}
Beispiel #21
0
bool osq_lock(struct optimistic_spin_queue *lock)
{
	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
	struct optimistic_spin_node *prev, *next;
	int curr = encode_cpu(smp_processor_id());
	int old;

	node->locked = 0;
	node->next = NULL;
	node->cpu = curr;

	old = atomic_xchg(&lock->tail, curr);
	if (old == OSQ_UNLOCKED_VAL)
		return true;

	prev = decode_cpu(old);
	node->prev = prev;
	WRITE_ONCE(prev->next, node);

	/*
	 * Normally @prev is untouchable after the above store; because at that
	 * moment unlock can proceed and wipe the node element from stack.
	 *
	 * However, since our nodes are static per-cpu storage, we're
	 * guaranteed their existence -- this allows us to apply
	 * cmpxchg in an attempt to undo our queueing.
	 */

	while (!READ_ONCE(node->locked)) {
		/*
		 * If we need to reschedule bail... so we can block.
		 */
		if (need_resched())
			goto unqueue;

		cpu_relax_lowlatency();
	}
	return true;

unqueue:
	/*
	 * Step - A  -- stabilize @prev
	 *
	 * Undo our @prev->next assignment; this will make @prev's
	 * unlock()/unqueue() wait for a next pointer since @lock points to us
	 * (or later).
	 */

	for (;;) {
		if (prev->next == node &&
		    cmpxchg(&prev->next, node, NULL) == node)
			break;

		/*
		 * We can only fail the cmpxchg() racing against an unlock(),
		 * in which case we should observe @node->locked becomming
		 * true.
		 */
		if (smp_load_acquire(&node->locked))
			return true;

		cpu_relax_lowlatency();

		/*
		 * Or we race against a concurrent unqueue()'s step-B, in which
		 * case its step-C will write us a new @node->prev pointer.
		 */
		prev = READ_ONCE(node->prev);
	}

	/*
	 * Step - B -- stabilize @next
	 *
	 * Similar to unlock(), wait for @node->next or move @lock from @node
	 * back to @prev.
	 */

	next = osq_wait_next(lock, node, prev);
	if (!next)
		return false;

	/*
	 * Step - C -- unlink
	 *
	 * @prev is stable because its still waiting for a new @prev->next
	 * pointer, @next is stable because our @node->next pointer is NULL and
	 * it will wait in Step-A.
	 */

	WRITE_ONCE(next->prev, prev);
	WRITE_ONCE(prev->next, next);

	return false;
}
Beispiel #22
0
static void
xen_register_percpu_irq(ia64_vector vec, struct irqaction *action)
{
	__xen_register_percpu_irq(smp_processor_id(), vec, action, 1);
}
Beispiel #23
0
void cpuinfo_store_cpu(void)
{
	struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
	__cpuinfo_store_cpu(info);
	update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
}
Beispiel #24
0
/*
 * We rearm the timer until we get disabled by the idle code.
 * Called with interrupts disabled and timer->base->cpu_base->lock held.
 */
static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
{
    struct tick_sched *ts =
        container_of(timer, struct tick_sched, sched_timer);
    struct pt_regs *regs = get_irq_regs();
    ktime_t now = ktime_get();
    int cpu = smp_processor_id();

#ifdef CONFIG_NO_HZ
    /*
     * Check if the do_timer duty was dropped. We don't care about
     * concurrency: This happens only when the cpu in charge went
     * into a long sleep. If two cpus happen to assign themself to
     * this duty, then the jiffies update is still serialized by
     * xtime_lock.
     */
    if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
        tick_do_timer_cpu = cpu;
#endif

    /* Check, if the jiffies need an update */
    if (tick_do_timer_cpu == cpu)
        tick_do_update_jiffies64(now);

    /*
     * Do not call, when we are not in irq context and have
     * no valid regs pointer
     */
    if (regs) {
        /*
         * When we are idle and the tick is stopped, we have to touch
         * the watchdog as we might not schedule for a really long
         * time. This happens on complete idle SMP systems while
         * waiting on the login prompt. We also increment the "start of
         * idle" jiffy stamp so the idle accounting adjustment we do
         * when we go busy again does not account too much ticks.
         */
        if (ts->tick_stopped) {
            touch_softlockup_watchdog();
            ts->idle_jiffies++;
        }
        update_process_times(user_mode(regs));
        profile_tick(CPU_PROFILING);

        if ((rq_info.init == 1) && (tick_do_timer_cpu == cpu)) {

            /*
             * update run queue statistics
             */
            update_rq_stats();

            /*
             * wakeup user if needed
             */
            wakeup_user();
        }
    }

    hrtimer_forward(timer, now, tick_period);

    return HRTIMER_RESTART;
}
Beispiel #25
0
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
}
Beispiel #26
0
//#ifdef PX_SOC_ARMADAXP
static void get_cpu_ids_on_each_core(void* arg) {
	int coreIndex;
	unsigned long* info = arg;
	coreIndex = smp_processor_id();
	info[coreIndex] = get_arm_cpu_id();
}
Beispiel #27
0
unsigned long co_os_current_processor(void)
{
	return smp_processor_id();
}
Beispiel #28
0
/**
 * acpi_save_state_mem - save kernel state
 *
 * Create an identity mapped page table and copy the wakeup routine to
 * low memory.
 *
 * Note that this is too late to change acpi_wakeup_address.
 */
int acpi_save_state_mem(void)
{
	struct wakeup_header *header;

	if (!acpi_realmode) {
		printk(KERN_ERR "Could not allocate memory during boot, "
		       "S3 disabled\n");
		return -ENOMEM;
	}
	memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);

	header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
	if (header->signature != 0x51ee1111) {
		printk(KERN_ERR "wakeup header does not match\n");
		return -EINVAL;
	}

	header->video_mode = saved_video_mode;

	header->wakeup_jmp_seg = acpi_wakeup_address >> 4;

	/*
	 * Set up the wakeup GDT.  We set these up as Big Real Mode,
	 * that is, with limits set to 4 GB.  At least the Lenovo
	 * Thinkpad X61 is known to need this for the video BIOS
	 * initialization quirk to work; this is likely to also
	 * be the case for other laptops or integrated video devices.
	 */

	/* GDT[0]: GDT self-pointer */
	header->wakeup_gdt[0] =
		(u64)(sizeof(header->wakeup_gdt) - 1) +
		((u64)(acpi_wakeup_address +
			((char *)&header->wakeup_gdt - (char *)acpi_realmode))
				<< 16);
	/* GDT[1]: big real mode-like code segment */
	header->wakeup_gdt[1] =
		GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
	/* GDT[2]: big real mode-like data segment */
	header->wakeup_gdt[2] =
		GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);

#ifndef CONFIG_64BIT
	store_gdt((struct desc_ptr *)&header->pmode_gdt);

	header->pmode_efer_low = nx_enabled;
	if (header->pmode_efer_low & 1) {
		/* This is strange, why not save efer, always? */
		rdmsr(MSR_EFER, header->pmode_efer_low,
			header->pmode_efer_high);
	}
#endif /* !CONFIG_64BIT */

	header->pmode_cr0 = read_cr0();
	header->pmode_cr4 = read_cr4_safe();
	header->realmode_flags = acpi_realmode_flags;
	header->real_magic = 0x12345678;

#ifndef CONFIG_64BIT
	header->pmode_entry = (u32)&wakeup_pmode_return;
	header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
	saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
	header->trampoline_segment = setup_trampoline() >> 4;
#ifdef CONFIG_SMP
	stack_start.sp = temp_stack + sizeof(temp_stack);

	pax_open_kernel();
	early_gdt_descr.address =
			(unsigned long)get_cpu_gdt_table(smp_processor_id());
	pax_close_kernel();

	initial_gs = per_cpu_offset(smp_processor_id());
#endif
	initial_code = (unsigned long)wakeup_long64;
       saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */

	return 0;
}
Beispiel #29
0
void __init smp_prepare_boot_cpu(void)
{
	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
Beispiel #30
0
void spinning_cpu(void* addr)
{
  raw_printk("CPU %d spinning on %X\n", smp_processor_id(), addr);
  dump_stack();
}