asmlinkage int vprintk(const char *fmt, va_list args) { unsigned long flags; int printed_len; char *p; static char printk_buf[1024]; static int log_level_unknown = 1; preempt_disable(); if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id()) /* If a crash is occurring during printk() on this CPU, * make sure we can't deadlock */ zap_locks(); /* This stops the holder of console_sem just where we want him */ raw_local_irq_save(flags); lockdep_off(); spin_lock(&logbuf_lock); printk_cpu = smp_processor_id(); /* Emit the output into the temporary buffer */ printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); if (printed_len > 0) { unsigned int loglevel; int mark_len; char *mark_buf; char saved_char; if (printk_buf[0] == '<' && printk_buf[1] >= '0' && printk_buf[1] <= '7' && printk_buf[2] == '>') { loglevel = printk_buf[1] - '0'; mark_buf = &printk_buf[3]; mark_len = printed_len - 3; } else { loglevel = default_message_loglevel; mark_buf = printk_buf; mark_len = printed_len; } if (mark_buf[mark_len - 1] == '\n') mark_len--; saved_char = mark_buf[mark_len]; mark_buf[mark_len] = '\0'; _trace_mark(kernel_vprintk, "loglevel %c string %s ip %p", loglevel, mark_buf, __builtin_return_address(0)); mark_buf[mark_len] = saved_char; } /* * Copy the output into log_buf. If the caller didn't provide * appropriate log level tags, we insert them here */ for (p = printk_buf; *p; p++) { if (log_level_unknown) { /* log_level_unknown signals the start of a new line */ if (printk_time) { int loglev_char; char tbuf[50], *tp; unsigned tlen; unsigned long long t; unsigned long nanosec_rem; /* * force the log level token to be * before the time output. */ if (p[0] == '<' && p[1] >='0' && p[1] <= '7' && p[2] == '>') { loglev_char = p[1]; p += 3; printed_len -= 3; } else { loglev_char = default_message_loglevel + '0'; } t = printk_clock(); nanosec_rem = do_div(t, 1000000000); tlen = sprintf(tbuf, "<%c>[%5lu.%06lu] ", loglev_char, (unsigned long)t, nanosec_rem/1000); for (tp = tbuf; tp < tbuf + tlen; tp++) emit_log_char(*tp); printed_len += tlen; } else { if (p[0] != '<' || p[1] < '0' || p[1] > '7' || p[2] != '>') { emit_log_char('<'); emit_log_char(default_message_loglevel + '0'); emit_log_char('>'); printed_len += 3; } } log_level_unknown = 0; if (!*p) break; } emit_log_char(*p); if (*p == '\n') log_level_unknown = 1; } if (!down_trylock(&console_sem)) { /* * We own the drivers. We can drop the spinlock and * let release_console_sem() print the text, maybe ... */ console_locked = 1; printk_cpu = UINT_MAX; spin_unlock(&logbuf_lock); /* * Console drivers may assume that per-cpu resources have * been allocated. So unless they're explicitly marked as * being able to cope (CON_ANYTIME) don't call them until * this CPU is officially up. */ if (cpu_online(smp_processor_id()) || have_callable_console()) { console_may_schedule = 0; release_console_sem(); } else { /* Release by hand to avoid flushing the buffer. */ console_locked = 0; up(&console_sem); } lockdep_on(); raw_local_irq_restore(flags); } else { /* * Someone else owns the drivers. We drop the spinlock, which * allows the semaphore holder to proceed and to call the * console drivers with the output which we just produced. */ printk_cpu = UINT_MAX; spin_unlock(&logbuf_lock); lockdep_on(); raw_local_irq_restore(flags); } preempt_enable(); return printed_len; }
static int do_vperfctr_control(struct vperfctr *perfctr, const struct vperfctr_control __user *argp, unsigned int argbytes, struct task_struct *tsk) { struct vperfctr_control *control; int err; unsigned int next_cstatus; unsigned int nrctrs, i; if (!tsk) { return -ESRCH; /* attempt to update unlinked perfctr */ } /* The control object can be large (over 300 bytes on i386), so kmalloc() it instead of storing it on the stack. We must use task-private storage to prevent racing with a monitor process attaching to us before the non-preemptible perfctr update step. Therefore we cannot store the copy in the perfctr object itself. */ control = kmalloc(sizeof(*control), GFP_USER); if (!control) { return -ENOMEM; } err = -EINVAL; if (argbytes > sizeof *control) { goto out_kfree; } err = -EFAULT; if (copy_from_user(control, argp, argbytes)) { goto out_kfree; } if (argbytes < sizeof *control) memset((char*)control + argbytes, 0, sizeof *control - argbytes); // figure out what is happening in the following 'if' loop if (control->cpu_control.nractrs || control->cpu_control.nrictrs) { cpumask_t old_mask, new_mask; old_mask = tsk->cpus_allowed; cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask); err = -EINVAL; if (cpus_empty(new_mask)) { goto out_kfree; } if (!cpus_equal(new_mask, old_mask)) set_cpus_allowed(tsk, new_mask); } /* PREEMPT note: preemption is disabled over the entire region since we're updating an active perfctr. */ preempt_disable(); // the task whose control register I am changing might actually be // in suspended state. That can happen when the other is executing // under the control of another task as in the case of debugging // or ptrace. However, if the write_control is done for the current // executing process, first suspend them and then do the update // why are we resetting 'perfctr->cpu_state.cstatus' ? if (IS_RUNNING(perfctr)) { if (tsk == current) vperfctr_suspend(perfctr); // not sure why we are zeroing out the following explicitly perfctr->cpu_state.cstatus = 0; vperfctr_clear_iresume_cstatus(perfctr); } // coying the user-specified control values to 'state' perfctr->cpu_state.control = control->cpu_control; /* remote access note: perfctr_cpu_update_control() is ok */ err = perfctr_cpu_update_control(&perfctr->cpu_state, 0); if (err < 0) { goto out; } next_cstatus = perfctr->cpu_state.cstatus; if (!perfctr_cstatus_enabled(next_cstatus)) goto out; /* XXX: validate si_signo? */ perfctr->si_signo = control->si_signo; if (!perfctr_cstatus_has_tsc(next_cstatus)) perfctr->cpu_state.tsc_sum = 0; nrctrs = perfctr_cstatus_nrctrs(next_cstatus); for(i = 0; i < nrctrs; ++i) if (!(control->preserve & (1<<i))) perfctr->cpu_state.pmc[i].sum = 0; // I am not sure why we are removing the inheritance just because // we updated the control information. True, because the children might // be performing something else. So, the control will have to be set // before spawning any children spin_lock(&perfctr->children_lock); perfctr->inheritance_id = new_inheritance_id(); memset(&perfctr->children, 0, sizeof perfctr->children); spin_unlock(&perfctr->children_lock); if (tsk == current) { vperfctr_resume(perfctr); } out: preempt_enable(); out_kfree: kfree(control); return err; }
/* * Package up a bounce condition. */ void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) { u32 fpscr, orig_fpscr, fpsid, exceptions; pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc); /* * At this point, FPEXC can have the following configuration: * * EX DEX IXE * 0 1 x - synchronous exception * 1 x 0 - asynchronous exception * 1 x 1 - sychronous on VFP subarch 1 and asynchronous on later * 0 0 1 - synchronous on VFP9 (non-standard subarch 1 * implementation), undefined otherwise * * Clear various bits and enable access to the VFP so we can * handle the bounce. */ fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK)); fpsid = fmrx(FPSID); orig_fpscr = fpscr = fmrx(FPSCR); /* * Check for the special VFP subarch 1 and FPSCR.IXE bit case */ if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT) && (fpscr & FPSCR_IXE)) { /* * Synchronous exception, emulate the trigger instruction */ goto emulate; } if (fpexc & FPEXC_EX) { #ifndef CONFIG_CPU_FEROCEON /* * Asynchronous exception. The instruction is read from FPINST * and the interrupted instruction has to be restarted. */ trigger = fmrx(FPINST); regs->ARM_pc -= 4; #endif } else if (!(fpexc & FPEXC_DEX)) { /* * Illegal combination of bits. It can be caused by an * unallocated VFP instruction but with FPSCR.IXE set and not * on VFP subarch 1. */ vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs); goto exit; } /* * Modify fpscr to indicate the number of iterations remaining. * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates * whether FPEXC.VECITR or FPSCR.LEN is used. */ if (fpexc & (FPEXC_EX | FPEXC_VV)) { u32 len; len = fpexc + (1 << FPEXC_LENGTH_BIT); fpscr &= ~FPSCR_LENGTH_MASK; fpscr |= (len & FPEXC_LENGTH_MASK) << (FPSCR_LENGTH_BIT - FPEXC_LENGTH_BIT); } /* * Handle the first FP instruction. We used to take note of the * FPEXC bounce reason, but this appears to be unreliable. * Emulate the bounced instruction instead. */ exceptions = vfp_emulate_instruction(trigger, fpscr, regs); if (exceptions) vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); /* * If there isn't a second FP instruction, exit now. Note that * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1. */ if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) goto exit; /* * The barrier() here prevents fpinst2 being read * before the condition above. */ barrier(); trigger = fmrx(FPINST2); emulate: exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions) vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs); exit: preempt_enable(); }
int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistant_swap_storage) { struct address_space *swap_space; struct file *swap_storage; struct page *from_page; struct page *to_page; void *from_virtual; void *to_virtual; int i; int ret = -ENOMEM; BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated); BUG_ON(ttm->caching_state != tt_cached); /* * For user buffers, just unpin the pages, as there should be * vma references. */ if (ttm->page_flags & TTM_PAGE_FLAG_USER) { ttm_tt_free_user_pages(ttm); ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; ttm->swap_storage = NULL; return 0; } if (!persistant_swap_storage) { swap_storage = shmem_file_setup("ttm swap", ttm->num_pages << PAGE_SHIFT, 0); if (unlikely(IS_ERR(swap_storage))) { printk(KERN_ERR "Failed allocating swap storage.\n"); return PTR_ERR(swap_storage); } } else swap_storage = persistant_swap_storage; swap_space = swap_storage->f_path.dentry->d_inode->i_mapping; for (i = 0; i < ttm->num_pages; ++i) { from_page = ttm->pages[i]; if (unlikely(from_page == NULL)) continue; to_page = read_mapping_page(swap_space, i, NULL); if (unlikely(IS_ERR(to_page))) { ret = PTR_ERR(to_page); goto out_err; } preempt_disable(); from_virtual = kmap_atomic(from_page, KM_USER0); to_virtual = kmap_atomic(to_page, KM_USER1); memcpy(to_virtual, from_virtual, PAGE_SIZE); kunmap_atomic(to_virtual, KM_USER1); kunmap_atomic(from_virtual, KM_USER0); preempt_enable(); set_page_dirty(to_page); mark_page_accessed(to_page); page_cache_release(to_page); } ttm_tt_free_alloced_pages(ttm); ttm->swap_storage = swap_storage; ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED; if (persistant_swap_storage) ttm->page_flags |= TTM_PAGE_FLAG_PERSISTANT_SWAP; return 0; out_err: if (!persistant_swap_storage) fput(swap_storage); return ret; }
int trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr) { unsigned long save_max = tracing_max_latency; unsigned long count; int ret; /* * Now that the big kernel lock is no longer preemptable, * and this is called with the BKL held, it will always * fail. If preemption is already disabled, simply * pass the test. When the BKL is removed, or becomes * preemptible again, we will once again test this, * so keep it in. */ if (preempt_count()) { printk(KERN_CONT "can not test ... force "); return 0; } /* start the tracing */ ret = trace->init(tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; } /* reset the max latency */ tracing_max_latency = 0; /* disable preemption and interrupts for a bit */ preempt_disable(); local_irq_disable(); udelay(100); preempt_enable(); /* reverse the order of preempt vs irqs */ local_irq_enable(); /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (ret) { tracing_start(); goto out; } ret = trace_test_buffer(&max_tr, &count); if (ret) { tracing_start(); goto out; } if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; tracing_start(); goto out; } /* do the test by disabling interrupts first this time */ tracing_max_latency = 0; tracing_start(); preempt_disable(); local_irq_disable(); udelay(100); preempt_enable(); /* reverse the order of preempt vs irqs */ local_irq_enable(); /* stop the tracing. */ tracing_stop(); /* check both trace buffers */ ret = trace_test_buffer(tr, NULL); if (ret) goto out; ret = trace_test_buffer(&max_tr, &count); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); ret = -1; goto out; } out: trace->reset(tr); tracing_start(); tracing_max_latency = save_max; return ret; }
static void gru_free_cpu_resources(void *cb, void *dsr) { preempt_enable(); }
static int mcsdl_download(const UINT8 *pBianry, const UINT16 unLength, INT8 IdxNum) { int nRet; //--------------------------------- // Check Binary Size //--------------------------------- if (unLength >= MELFAS_FIRMWARE_MAX_SIZE) { nRet = MCSDL_RET_PROGRAM_SIZE_IS_WRONG; goto MCSDL_DOWNLOAD_FINISH; } #if MELFAS_ENABLE_DBG_PROGRESS_PRINT printk(" - Starting download...\n"); #endif //--------------------------------- // Make it ready //--------------------------------- #if MELFAS_ENABLE_DBG_PROGRESS_PRINT printk(" > Ready\n"); #endif mcsdl_set_ready(); //--------------------------------- // Erase Flash //--------------------------------- #if MELFAS_ENABLE_DBG_PROGRESS_PRINT printk(" > Erase\n"); #endif printk(" > Erase - Start \n"); preempt_disable(); nRet = mcsdl_erase_flash(IdxNum); preempt_enable(); printk(" > Erase- End \n"); if (nRet != MCSDL_RET_SUCCESS) goto MCSDL_DOWNLOAD_FINISH; //goto MCSDL_DOWNLOAD_FINISH; //lcs_test //--------------------------------- // Program Flash //--------------------------------- #if MELFAS_ENABLE_DBG_PROGRESS_PRINT printk(" > Program "); #endif preempt_disable(); nRet = mcsdl_program_flash((UINT8*) pBianry, (UINT16) unLength, IdxNum); preempt_enable(); if (nRet != MCSDL_RET_SUCCESS) goto MCSDL_DOWNLOAD_FINISH; //--------------------------------- // Verify flash //--------------------------------- #if MELFAS_ENABLE_DBG_PROGRESS_PRINT printk(" > Verify "); #endif preempt_disable(); nRet = mcsdl_verify_flash((UINT8*) pBianry, (UINT16) unLength, IdxNum); preempt_enable(); if (nRet != MCSDL_RET_SUCCESS) goto MCSDL_DOWNLOAD_FINISH; nRet = MCSDL_RET_SUCCESS; MCSDL_DOWNLOAD_FINISH: #if MELFAS_ENABLE_DBG_PRINT mcsdl_print_result( nRet ); // Show result #endif #if MELFAS_ENABLE_DBG_PROGRESS_PRINT printk(" > Rebooting\n"); printk(" - Fin.\n\n"); #endif mcsdl_reboot_mcs(); return nRet; }
static inline void __unlock_kernel(void) { _raw_spin_unlock(&kernel_flag); preempt_enable(); }
/** * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU * @timer: the timer to be added * @tim: expiry time * @delta_ns: "slack" range for the timer * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) * * Returns: * 0 on success * 1 when the timer was active */ int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; int ret, raise; base = lock_hrtimer_base(timer, &flags); /* Remove an active timer from the queue: */ ret = remove_hrtimer(timer, base); /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base); if (mode == HRTIMER_MODE_REL) { tim = ktime_add_safe(tim, new_base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in * do_gettimeoffset(). In this case we want to round up by * resolution when starting a relative timer, to avoid short * timeouts. This will go away with the GTOD framework. */ #ifdef CONFIG_TIME_LOW_RES tim = ktime_add_safe(tim, base->resolution); #endif } hrtimer_set_expires_range_ns(timer, tim, delta_ns); timer_stats_hrtimer_set_start_info(timer); /* * Only allow reprogramming if the new base is on this CPU. * (it might still be on another CPU if the timer was pending) */ enqueue_hrtimer(timer, new_base, new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); /* * The timer may be expired and moved to the cb_pending * list. We can not raise the softirq with base lock held due * to a possible deadlock with runqueue lock. */ raise = timer->state == HRTIMER_STATE_PENDING; /* * We use preempt_disable to prevent this task from migrating after * setting up the softirq and raising it. Otherwise, if me migrate * we will raise the softirq on the wrong CPU. */ preempt_disable(); unlock_hrtimer_base(timer, &flags); if (raise) hrtimer_raise_softirq(); preempt_enable(); return ret; }
static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t temp; enum hqd_dequeue_request_type type; unsigned long flags, end_jiffies; int retry; struct vi_mqd *m = get_mqd(mqd); if (adev->in_gpu_reset) return -EIO; acquire_queue(kgd, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0); switch (reset_type) { case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: type = DRAIN_PIPE; break; case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: type = RESET_WAVES; break; default: type = DRAIN_PIPE; break; } /* Workaround: If IQ timer is active and the wait time is close to or * equal to 0, dequeueing is not safe. Wait until either the wait time * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is * cleared before continuing. Also, ensure wait times are set to at * least 0x3. */ local_irq_save(flags); preempt_disable(); retry = 5000; /* wait for 500 usecs at maximum */ while (true) { temp = RREG32(mmCP_HQD_IQ_TIMER); if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { pr_debug("HW is processing IQ\n"); goto loop; } if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) == 3) /* SEM-rearm is safe */ break; /* Wait time 3 is safe for CP, but our MMIO read/write * time is close to 1 microsecond, so check for 10 to * leave more buffer room */ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) >= 10) break; pr_debug("IQ timer is active\n"); } else break; loop: if (!retry) { pr_err("CP HQD IQ timer status time out\n"); break; } ndelay(100); --retry; } retry = 1000; while (true) { temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) break; pr_debug("Dequeue request is pending\n"); if (!retry) { pr_err("CP HQD dequeue request time out\n"); break; } ndelay(100); --retry; } local_irq_restore(flags); preempt_enable(); WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { temp = RREG32(mmCP_HQD_ACTIVE); if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); release_queue(kgd); return -ETIME; } usleep_range(500, 1000); } release_queue(kgd); return 0; }
/* * Fill the partition reserved page with the information needed by * other partitions to discover we are alive and establish initial * communications. */ struct xpc_rsvd_page * xpc_rsvd_page_init(void) { struct xpc_rsvd_page *rp; AMO_t *amos_page; u64 rp_pa, nasid_array = 0; int i, ret; /* get the local reserved page's address */ preempt_disable(); rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id())); preempt_enable(); if (rp_pa == 0) { dev_err(xpc_part, "SAL failed to locate the reserved page\n"); return NULL; } rp = (struct xpc_rsvd_page *)__va(rp_pa); if (rp->partid != sn_partition_id) { dev_err(xpc_part, "the reserved page's partid of %d should be " "%d\n", rp->partid, sn_partition_id); return NULL; } rp->version = XPC_RP_VERSION; /* establish the actual sizes of the nasid masks */ if (rp->SAL_version == 1) { /* SAL_version 1 didn't set the nasids_size field */ rp->nasids_size = 128; } xp_nasid_mask_bytes = rp->nasids_size; xp_nasid_mask_words = xp_nasid_mask_bytes / 8; /* setup the pointers to the various items in the reserved page */ xpc_part_nasids = XPC_RP_PART_NASIDS(rp); xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); xpc_vars = XPC_RP_VARS(rp); xpc_vars_part = XPC_RP_VARS_PART(rp); /* * Before clearing xpc_vars, see if a page of AMOs had been previously * allocated. If not we'll need to allocate one and set permissions * so that cross-partition AMOs are allowed. * * The allocated AMO page needs MCA reporting to remain disabled after * XPC has unloaded. To make this work, we keep a copy of the pointer * to this page (i.e., amos_page) in the struct xpc_vars structure, * which is pointed to by the reserved page, and re-use that saved copy * on subsequent loads of XPC. This AMO page is never freed, and its * memory protections are never restricted. */ amos_page = xpc_vars->amos_page; if (amos_page == NULL) { amos_page = (AMO_t *)TO_AMO(uncached_alloc_page(0, 1)); if (amos_page == NULL) { dev_err(xpc_part, "can't allocate page of AMOs\n"); return NULL; } /* * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems * when xpc_allow_IPI_ops() is called via xpc_hb_init(). */ if (!enable_shub_wars_1_1()) { ret = sn_change_memprotect(ia64_tpa((u64)amos_page), PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, &nasid_array); if (ret != 0) { dev_err(xpc_part, "can't change memory " "protections\n"); uncached_free_page(__IA64_UNCACHED_OFFSET | TO_PHYS((u64)amos_page), 1); return NULL; } } } else if (!IS_AMO_ADDRESS((u64)amos_page)) { /* * EFI's XPBOOT can also set amos_page in the reserved page, * but it happens to leave it as an uncached physical address * and we need it to be an uncached virtual, so we'll have to * convert it. */ if (!IS_AMO_PHYS_ADDRESS((u64)amos_page)) { dev_err(xpc_part, "previously used amos_page address " "is bad = 0x%p\n", (void *)amos_page); return NULL; } amos_page = (AMO_t *)TO_AMO((u64)amos_page); } /* clear xpc_vars */ memset(xpc_vars, 0, sizeof(struct xpc_vars)); xpc_vars->version = XPC_V_VERSION; xpc_vars->act_nasid = cpuid_to_nasid(0); xpc_vars->act_phys_cpuid = cpu_physical_id(0); xpc_vars->vars_part_pa = __pa(xpc_vars_part); xpc_vars->amos_page_pa = ia64_tpa((u64)amos_page); xpc_vars->amos_page = amos_page; /* save for next load of XPC */ /* clear xpc_vars_part */ memset((u64 *)xpc_vars_part, 0, sizeof(struct xpc_vars_part) * XP_MAX_PARTITIONS); /* initialize the activate IRQ related AMO variables */ for (i = 0; i < xp_nasid_mask_words; i++) (void)xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i); /* initialize the engaged remote partitions related AMO variables */ (void)xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO); (void)xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO); /* timestamp of when reserved page was setup by XPC */ rp->stamp = CURRENT_TIME; /* * This signifies to the remote partition that our reserved * page is initialized. */ rp->vars_pa = __pa(xpc_vars); return rp; }
static int clamp_thread(void *arg) { int cpunr = (unsigned long)arg; DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); static const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; unsigned int count = 0; unsigned int target_ratio; set_bit(cpunr, cpu_clamping_mask); set_freezable(); init_timer_on_stack(&wakeup_timer); sched_setscheduler(current, SCHED_FIFO, ¶m); while (true == clamping && !kthread_should_stop() && cpu_online(cpunr)) { int sleeptime; unsigned long target_jiffies; unsigned int guard; unsigned int compensation = 0; int interval; /* jiffies to sleep for each attempt */ unsigned int duration_jiffies = msecs_to_jiffies(duration); unsigned int window_size_now; try_to_freeze(); /* * make sure user selected ratio does not take effect until * the next round. adjust target_ratio if user has changed * target such that we can converge quickly. */ target_ratio = set_target_ratio; guard = 1 + target_ratio/20; window_size_now = window_size; count++; /* * systems may have different ability to enter package level * c-states, thus we need to compensate the injected idle ratio * to achieve the actual target reported by the HW. */ compensation = get_compensation(target_ratio); interval = duration_jiffies*100/(target_ratio+compensation); /* align idle time */ target_jiffies = roundup(jiffies, interval); sleeptime = target_jiffies - jiffies; if (sleeptime <= 0) sleeptime = 1; schedule_timeout_interruptible(sleeptime); /* * only elected controlling cpu can collect stats and update * control parameters. */ if (cpunr == control_cpu && !(count%window_size_now)) { should_skip = powerclamp_adjust_controls(target_ratio, guard, window_size_now); smp_mb(); } if (should_skip) continue; target_jiffies = jiffies + duration_jiffies; mod_timer(&wakeup_timer, target_jiffies); if (unlikely(local_softirq_pending())) continue; /* * stop tick sched during idle time, interrupts are still * allowed. thus jiffies are updated properly. */ preempt_disable(); /* mwait until target jiffies is reached */ while (time_before(jiffies, target_jiffies)) { unsigned long ecx = 1; unsigned long eax = target_mwait; /* * REVISIT: may call enter_idle() to notify drivers who * can save power during cpu idle. same for exit_idle() */ local_touch_nmi(); stop_critical_timings(); mwait_idle_with_hints(eax, ecx); start_critical_timings(); atomic_inc(&idle_wakeup_counter); } preempt_enable(); } del_timer_sync(&wakeup_timer); clear_bit(cpunr, cpu_clamping_mask); return 0; } /* * 1 HZ polling while clamping is active, useful for userspace * to monitor actual idle ratio. */ static void poll_pkg_cstate(struct work_struct *dummy); static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); static void poll_pkg_cstate(struct work_struct *dummy) { static u64 msr_last; static u64 tsc_last; static unsigned long jiffies_last; u64 msr_now; unsigned long jiffies_now; u64 tsc_now; u64 val64; msr_now = pkg_state_counter(); rdtscll(tsc_now); jiffies_now = jiffies; /* calculate pkg cstate vs tsc ratio */ if (!msr_last || !tsc_last) pkg_cstate_ratio_cur = 1; else { if (tsc_now - tsc_last) { val64 = 100 * (msr_now - msr_last); do_div(val64, (tsc_now - tsc_last)); pkg_cstate_ratio_cur = val64; } } /* update record */ msr_last = msr_now; jiffies_last = jiffies_now; tsc_last = tsc_now; if (true == clamping) schedule_delayed_work(&poll_pkg_cstate_work, HZ); } static int start_power_clamp(void) { unsigned long cpu; struct task_struct *thread; /* check if pkg cstate counter is completely 0, abort in this case */ if (!has_pkg_state_counter()) { pr_err("pkg cstate counter not functional, abort\n"); return -EINVAL; } set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); /* prevent cpu hotplug */ get_online_cpus(); /* prefer BSP */ control_cpu = 0; if (!cpu_online(control_cpu)) control_cpu = smp_processor_id(); clamping = true; schedule_delayed_work(&poll_pkg_cstate_work, 0); /* start one thread per online cpu */ for_each_online_cpu(cpu) { struct task_struct **p = per_cpu_ptr(powerclamp_thread, cpu); thread = kthread_create_on_node(clamp_thread, (void *) cpu, cpu_to_node(cpu), "kidle_inject/%ld", cpu); /* bind to cpu here */ if (likely(!IS_ERR(thread))) { kthread_bind(thread, cpu); wake_up_process(thread); *p = thread; } } put_online_cpus(); return 0; }
void unlock_vm_area(struct vm_struct *area) { preempt_enable(); }
/* * Fill the partition reserved page with the information needed by * other partitions to discover we are alive and establish initial * communications. */ int xpc_setup_rsvd_page(void) { int ret; struct xpc_rsvd_page *rp; unsigned long rp_pa; unsigned long new_ts_jiffies; /* get the local reserved page's address */ preempt_disable(); rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id())); preempt_enable(); if (rp_pa == 0) { dev_err(xpc_part, "SAL failed to locate the reserved page\n"); return -ESRCH; } rp = (struct xpc_rsvd_page *)__va(rp_pa); if (rp->SAL_version < 3) { /* SAL_versions < 3 had a SAL_partid defined as a u8 */ rp->SAL_partid &= 0xff; } BUG_ON(rp->SAL_partid != xp_partition_id); if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) { dev_err(xpc_part, "the reserved page's partid of %d is outside " "supported range (< 0 || >= %d)\n", rp->SAL_partid, xp_max_npartitions); return -EINVAL; } rp->version = XPC_RP_VERSION; rp->max_npartitions = xp_max_npartitions; /* establish the actual sizes of the nasid masks */ if (rp->SAL_version == 1) { /* SAL_version 1 didn't set the nasids_size field */ rp->SAL_nasids_size = 128; } xpc_nasid_mask_nbytes = rp->SAL_nasids_size; xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size * BITS_PER_BYTE); /* setup the pointers to the various items in the reserved page */ xpc_part_nasids = XPC_RP_PART_NASIDS(rp); xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp); ret = xpc_setup_rsvd_page_sn(rp); if (ret != 0) return ret; /* * Set timestamp of when reserved page was setup by XPC. * This signifies to the remote partition that our reserved * page is initialized. */ new_ts_jiffies = jiffies; if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies) new_ts_jiffies++; rp->ts_jiffies = new_ts_jiffies; xpc_rsvd_page = rp; return 0; }
/* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. * If we're over `background_thresh' then pdflush is woken to perform some * writeout. */ static void balance_dirty_pages(struct address_space *mapping) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; long background_thresh; long dirty_thresh; long bdi_thresh; unsigned long pages_written = 0; unsigned long write_chunk = sync_writeback_pages(); struct backing_dev_info *bdi = mapping->backing_dev_info; for (;;) { struct writeback_control wbc = { .bdi = bdi, .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, .range_cyclic = 1, }; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); nr_reclaimable = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); nr_writeback = global_page_state(NR_WRITEBACK); bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; /* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts * when the bdi limits are ramping up. */ if (nr_reclaimable + nr_writeback < (background_thresh + dirty_thresh) / 2) break; if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1; /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage. */ if (bdi_nr_reclaimable) { writeback_inodes(&wbc); pages_written += write_chunk - wbc.nr_to_write; get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); } /* * In order to avoid the stacked BDI deadlock we need * to ensure we accurately count the 'dirty' pages when * the threshold is low. * * Otherwise it would be possible to get thresh+n pages * reported dirty, even though there are thresh-m pages * actually dirty; with m+n sitting in the percpu * deltas. */ if (bdi_thresh < 2*bdi_stat_error(bdi)) { bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); } else if (bdi_nr_reclaimable) { bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); } if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) break; if (pages_written >= write_chunk) break; /* We've done our duty */ congestion_wait(WRITE, HZ/10); } if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && bdi->dirty_exceeded) bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* pdflush is already working this queue */ /* * In laptop mode, we wait until hitting the higher threshold before * starting background writeout, and then write out all the way down * to the lower threshold. So slow writers cause minimal disk activity. * * In normal mode, we start background writeout at the lower * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || (!laptop_mode && (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) > background_thresh))) pdflush_operation(background_writeout, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) { if (set_page_dirty(page) || page_mkwrite) { struct address_space *mapping = page_mapping(page); if (mapping) balance_dirty_pages_ratelimited(mapping); } } /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * * On really big machines, get_writeback_state is expensive, so try to avoid * calling it too often (ratelimiting). But once we're over the dirty memory * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied) { static DEFINE_PER_CPU(unsigned long, ratelimits) = 0; unsigned long ratelimit; unsigned long *p; ratelimit = ratelimit_pages; if (mapping->backing_dev_info->dirty_exceeded) ratelimit = 8; /* * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */ preempt_disable(); p = &__get_cpu_var(ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { *p = 0; preempt_enable(); balance_dirty_pages(mapping); return; } preempt_enable(); } EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); void throttle_vm_writeout(gfp_t gfp_mask) { long background_thresh; long dirty_thresh; for ( ; ; ) { get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); /* * Boost the allowable dirty threshold a bit for page * allocators so they don't get DoS'ed by heavy writers */ dirty_thresh += dirty_thresh / 10; /* wheeee... */ if (global_page_state(NR_UNSTABLE_NFS) + global_page_state(NR_WRITEBACK) <= dirty_thresh) break; congestion_wait(WRITE, HZ/10); /* * The caller might hold locks which can prevent IO completion * or progress in the filesystem. So we cannot just sit here * waiting for IO to complete. */ if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) break; } }
/* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force * the caller to perform writeback if the system is over `vm_dirty_ratio'. * If we're over `background_thresh' then the writeback threads are woken to * perform some writeout. */ static void balance_dirty_pages(struct address_space *mapping, unsigned long write_chunk) { long nr_reclaimable, bdi_nr_reclaimable; long nr_writeback, bdi_nr_writeback; unsigned long background_thresh; unsigned long dirty_thresh; unsigned long bdi_thresh; unsigned long pages_written = 0; unsigned long pause = 1; bool dirty_exceeded = false; struct backing_dev_info *bdi = mapping->backing_dev_info; for (;;) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, .older_than_this = NULL, .nr_to_write = write_chunk, .range_cyclic = 1, }; nr_reclaimable = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS); nr_writeback = global_page_state(NR_WRITEBACK); global_dirty_limits(&background_thresh, &dirty_thresh); /* * Throttle it only when the background writeback cannot * catch-up. This avoids (excessively) small writeouts * when the bdi limits are ramping up. */ if (nr_reclaimable + nr_writeback < (background_thresh + dirty_thresh) / 2) break; bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); bdi_thresh = task_dirty_limit(current, bdi_thresh); /* * In order to avoid the stacked BDI deadlock we need * to ensure we accurately count the 'dirty' pages when * the threshold is low. * * Otherwise it would be possible to get thresh+n pages * reported dirty, even though there are thresh-m pages * actually dirty; with m+n sitting in the percpu * deltas. */ if (bdi_thresh < 2*bdi_stat_error(bdi)) { bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); } else { bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); } /* * The bdi thresh is somehow "soft" limit derived from the * global "hard" limit. The former helps to prevent heavy IO * bdi or process from holding back light ones; The latter is * the last resort safeguard. */ dirty_exceeded = (bdi_nr_reclaimable + bdi_nr_writeback >= bdi_thresh) || (nr_reclaimable + nr_writeback >= dirty_thresh); if (!dirty_exceeded) break; if (!bdi->dirty_exceeded) bdi->dirty_exceeded = 1; /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. * Unstable writes are a feature of certain networked * filesystems (i.e. NFS) in which data may have been * written to the server's write cache, but has not yet * been flushed to permanent storage. * Only move pages to writeback if this bdi is over its * threshold otherwise wait until the disk writes catch * up. */ if (bdi_nr_reclaimable > bdi_thresh) { writeback_inodes_wb(&bdi->wb, &wbc); pages_written += write_chunk - wbc.nr_to_write; if (pages_written >= write_chunk) break; /* We've done our duty */ } __set_current_state(TASK_INTERRUPTIBLE); io_schedule_timeout(pause); /* * Increase the delay for each loop, up to our previous * default of taking a 100ms nap. */ pause <<= 1; if (pause > HZ / 10) pause = HZ / 10; } if (!dirty_exceeded && bdi->dirty_exceeded) bdi->dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* * In laptop mode, we wait until hitting the higher threshold before * starting background writeout, and then write out all the way down * to the lower threshold. So slow writers cause minimal disk activity. * * In normal mode, we start background writeout at the lower * background_thresh, to keep the amount of dirty memory low. */ if ((laptop_mode && pages_written) || (!laptop_mode && (nr_reclaimable > background_thresh))) bdi_start_background_writeback(bdi); } void set_page_dirty_balance(struct page *page, int page_mkwrite) { if (set_page_dirty(page) || page_mkwrite) { struct address_space *mapping = page_mapping(page); if (mapping) balance_dirty_pages_ratelimited(mapping); } } static DEFINE_PER_CPU(unsigned long, bdp_ratelimits) = 0; /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied * @nr_pages_dirtied: number of pages which the caller has just dirtied * * Processes which are dirtying memory should call in here once for each page * which was newly dirtied. The function will periodically check the system's * dirty state and will initiate writeback if needed. * * On really big machines, get_writeback_state is expensive, so try to avoid * calling it too often (ratelimiting). But once we're over the dirty memory * limit we decrease the ratelimiting by a lot, to prevent individual processes * from overshooting the limit by (ratelimit_pages) each. */ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied) { unsigned long ratelimit; unsigned long *p; ratelimit = ratelimit_pages; if (mapping->backing_dev_info->dirty_exceeded) ratelimit = 8; /* * Check the rate limiting. Also, we do not want to throttle real-time * tasks in balance_dirty_pages(). Period. */ preempt_disable(); p = &__get_cpu_var(bdp_ratelimits); *p += nr_pages_dirtied; if (unlikely(*p >= ratelimit)) { ratelimit = sync_writeback_pages(*p); *p = 0; preempt_enable(); balance_dirty_pages(mapping, ratelimit); return; } preempt_enable(); }
static void spe_end(void) { disable_kernel_spe(); /* reenable preemption */ preempt_enable(); }
/* * All calls to this function will be optimised into tail calls. We are * passed a pointer to the destination which we return as required by a * memcpy implementation. */ void *exit_vmx_copy(void *dest) { preempt_enable(); return dest; }
void jrcu_read_unlock(void) { if (preempt_count() == 1) rcu_eob(rcu_cpu()); preempt_enable(); }
static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; int err = 0; int signal; unsigned long address = 0; #ifdef CONFIG_MMU pmd_t *pmdp; pte_t *ptep; #endif frame = get_sigframe(ka, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; signal = current_thread_info()->exec_domain && current_thread_info()->exec_domain->signal_invmap && sig < 32 ? current_thread_info()->exec_domain->signal_invmap[sig] : sig; if (info) err |= copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); err |= __put_user((void *)current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(regs->r1), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. If provided, use a stub already in userspace. */ /* minus 8 is offset to cater for "rtsd r15,8" */ if (ka->sa.sa_flags & SA_RESTORER) { regs->r15 = ((unsigned long)ka->sa.sa_restorer)-8; } else { /* addi r12, r0, __NR_sigreturn */ err |= __put_user(0x31800000 | __NR_rt_sigreturn , frame->tramp + 0); /* brki r14, 0x8 */ err |= __put_user(0xb9cc0008, frame->tramp + 1); /* Return from sighandler will jump to the tramp. Negative 8 offset because return is rtsd r15, 8 */ regs->r15 = ((unsigned long)frame->tramp)-8; address = ((unsigned long)frame->tramp); #ifdef CONFIG_MMU pmdp = pmd_offset(pud_offset( pgd_offset(current->mm, address), address), address); preempt_disable(); ptep = pte_offset_map(pmdp, address); if (pte_present(*ptep)) { address = (unsigned long) page_address(pte_page(*ptep)); /* MS: I need add offset in page */ address += ((unsigned long)frame->tramp) & ~PAGE_MASK; /* MS address is virtual */ address = virt_to_phys(address); invalidate_icache_range(address, address + 8); flush_dcache_range(address, address + 8); } pte_unmap(ptep); preempt_enable(); #else invalidate_icache_range(address, address + 8); flush_dcache_range(address, address + 8); #endif } if (err) goto give_sigsegv; /* Set up registers for signal handler */ regs->r1 = (unsigned long) frame - STATE_SAVE_ARG_SPACE; /* Signal handler args: */ regs->r5 = signal; /* arg 0: signum */ regs->r6 = (unsigned long) &frame->info; /* arg 1: siginfo */ regs->r7 = (unsigned long) &frame->uc; /* arg2: ucontext */ /* Offset to handle microblaze rtid r14, 0 */ regs->pc = (unsigned long)ka->sa.sa_handler; set_fs(USER_DS); /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); #ifdef DEBUG_SIG printk(KERN_INFO "SIG deliver (%s:%d): sp=%p pc=%08lx\n", current->comm, current->pid, frame, regs->pc); #endif return; give_sigsegv: if (sig == SIGSEGV) ka->sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); }
inline bool skb_recycler_consume(struct sk_buff *skb) { unsigned long flags; struct sk_buff_head *h; /* Can we recycle this skb? If not, simply return that we cannot */ if (unlikely(!consume_skb_can_recycle(skb, SKB_RECYCLE_MIN_SIZE, SKB_RECYCLE_MAX_SIZE))) return false; /* * If we can, then it will be much faster for us to recycle this one * later than to allocate a new one from scratch. */ preempt_disable(); h = &__get_cpu_var(recycle_list); local_irq_save(flags); /* Attempt to enqueue the CPU hot recycle list first */ if (likely(skb_queue_len(h) < SKB_RECYCLE_MAX_SKBS)) { __skb_queue_head(h, skb); local_irq_restore(flags); preempt_enable(); return true; } #ifdef CONFIG_SKB_RECYCLER_MULTI_CPU h = &__get_cpu_var(recycle_spare_list); /* The CPU hot recycle list was full; if the spare list is also full, * attempt to move the spare list to the global list for other CPUs to * use. */ if (unlikely(skb_queue_len(h) >= SKB_RECYCLE_SPARE_MAX_SKBS)) { uint8_t cur_tail, next_tail; spin_lock(&glob_recycler.lock); cur_tail = glob_recycler.tail; next_tail = (cur_tail + 1) & SKB_RECYCLE_MAX_SHARED_POOLS_MASK; if (next_tail != glob_recycler.head) { struct sk_buff_head *p = &glob_recycler.pool[cur_tail]; /* Optimized, inlined SKB List splice */ p->next = h->next; h->next->prev = (struct sk_buff *)p; p->prev = h->prev; h->prev->next = (struct sk_buff *)p; p->qlen = SKB_RECYCLE_SPARE_MAX_SKBS; /* Done with global list init */ glob_recycler.tail = next_tail; spin_unlock(&glob_recycler.lock); /* Optimized, inlined spare SKB list init */ h->next = (struct sk_buff *)h; h->prev = (struct sk_buff *)h; h->qlen = 0; /* We have now cleared room in the spare; enqueue */ __skb_queue_head(h, skb); local_irq_restore(flags); preempt_enable(); return true; } /* We still have a full spare because the global is also full */ spin_unlock(&glob_recycler.lock); } else { /* We have room in the spare list; enqueue to spare list */ __skb_queue_head(h, skb); local_irq_restore(flags); preempt_enable(); return true; } #endif local_irq_restore(flags); preempt_enable(); return false; }
int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs; unsigned long childksp; p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; preempt_disable(); if (is_fpu_owner()) save_fp(p); if (cpu_has_dsp) save_dsp(p); preempt_enable(); /* set up new TSS. */ childregs = (struct pt_regs *) childksp - 1; /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; *childregs = *regs; childregs->regs[7] = 0; /* Clear error flag */ childregs->regs[2] = 0; /* Child gets zero as return value */ if (childregs->cp0_status & ST0_CU0) { childregs->regs[28] = (unsigned long) ti; childregs->regs[29] = childksp; ti->addr_limit = KERNEL_DS; } else { childregs->regs[29] = usp; ti->addr_limit = USER_DS; } p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; /* * New tasks lose permission to use the fpu. This accelerates context * switching for most programs since they don't use the fpu. */ p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); #ifdef CONFIG_MIPS_MT_SMTC /* * SMTC restores TCStatus after Status, and the CU bits * are aliased there. */ childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1); #endif clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF clear_tsk_thread_flag(p, TIF_FPUBOUND); #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) ti->tp_value = regs->regs[7]; return 0; }
/* * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; unsigned long flags; preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Optimistic spinning. * * We try to spin for acquisition when we find that there are no * pending waiters and the lock owner is currently running on a * (different) CPU. * * The rationale is that if the lock owner is running, it is likely to * release the lock soon. * * Since this needs the lock owner, and this mutex implementation * doesn't track the owner atomically in the lock field, we need to * track it non-atomically. * * We can't do this for DEBUG_MUTEXES because that relies on wait_lock * to serialize everything. */ for (;;) { struct task_struct *owner; /* * If there's an owner, wait for it to either * release the lock or go to sleep. */ owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) break; if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); preempt_enable(); return 0; } /* * When there's no owner, we might have preempted between the * owner acquiring the lock and setting the owner field. If * we're an RT task that will live-lock because we won't let * the owner complete. */ if (!owner && (need_resched() || rt_task(task))) break; /* * The cpu_relax() call is a compiler barrier which forces * everything in this loop to be re-loaded. We don't need * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ arch_mutex_cpu_relax(); } #endif spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; if (atomic_xchg(&lock->count, -1) == 1) goto done; lock_contended(&lock->dep_map, ip); for (;;) { /* * Lets try to take the lock again - this is needed even if * we get here for the first time (shortly after failing to * acquire the lock), to make sure that we get a wakeup once * it's unlocked. Later on, if we sleep, this is the * operation that gives us the lock. We xchg it to -1, so * that when we release the lock, we properly wake up the * other waiters: */ if (atomic_xchg(&lock->count, -1) == 1) break; /* * got a signal? (This code gets eliminated in the * TASK_UNINTERRUPTIBLE case.) */ if (unlikely(signal_pending_state(state, task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); preempt_enable(); return -EINTR; } __set_task_state(task, state); /* didn't get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); preempt_enable_no_resched(); schedule(); preempt_disable(); spin_lock_mutex(&lock->wait_lock, flags); } done: lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, current_thread_info()); mutex_set_owner(lock); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); preempt_enable(); return 0; }
/* * Add a new chunk of uncached memory pages to the specified pool. * * @pool: pool to add new chunk of uncached memory to * @nid: node id of node to allocate memory from, or -1 * * This is accomplished by first allocating a granule of cached memory pages * and then converting them to uncached memory pages. */ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) { struct page *page; int status, i, nchunks_added = uc_pool->nchunks_added; unsigned long c_addr, uc_addr; if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0) return -1; /* interrupted by a signal */ if (uc_pool->nchunks_added > nchunks_added) { /* someone added a new chunk while we were waiting */ mutex_unlock(&uc_pool->add_chunk_mutex); return 0; } if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) { mutex_unlock(&uc_pool->add_chunk_mutex); return -1; } /* attempt to allocate a granule's worth of cached memory pages */ page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); return -1; } /* convert the memory pages from cached to uncached */ c_addr = (unsigned long)page_address(page); uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; /* * There's a small race here where it's possible for someone to * access the page through /dev/mem halfway through the conversion * to uncached - not sure it's really worth bothering about */ for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) SetPageUncached(&page[i]); flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { atomic_set(&uc_pool->status, 0); status = smp_call_function(uncached_ipi_visibility, uc_pool, 1); if (status || atomic_read(&uc_pool->status)) goto failed; } else if (status != PAL_VISIBILITY_OK) goto failed; preempt_disable(); if (ia64_platform_is("sn2")) sn_flush_all_caches(uc_addr, IA64_GRANULE_SIZE); else flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); /* flush the just introduced uncached translation from the TLB */ local_flush_tlb_all(); preempt_enable(); status = ia64_pal_mc_drain(); if (status != PAL_STATUS_SUCCESS) goto failed; atomic_set(&uc_pool->status, 0); status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); if (status || atomic_read(&uc_pool->status)) goto failed; /* * The chunk of memory pages has been converted to uncached so now we * can add it to the pool. */ status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid); if (status) goto failed; uc_pool->nchunks_added++; mutex_unlock(&uc_pool->add_chunk_mutex); return 0; /* failed to convert or add the chunk so give it back to the kernel */ failed: for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) ClearPageUncached(&page[i]); free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); mutex_unlock(&uc_pool->add_chunk_mutex); return -1; }
/* * Free the current cpus reserved DSR/CBR resources. */ static void gru_free_cpu_resources(void *cb, void *dsr) { gru_unlock_kernel_context(uv_numa_blade_id()); preempt_enable(); }
static long restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) { unsigned long ip, flags, nat, um, cfm, rsc; long err; /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; /* restore scratch that always needs gets updated during signal delivery: */ err = __get_user(flags, &sc->sc_flags); err |= __get_user(nat, &sc->sc_nat); err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ err |= __get_user(cfm, &sc->sc_cfm); err |= __get_user(um, &sc->sc_um); /* user mask */ err |= __get_user(rsc, &sc->sc_ar_rsc); err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */ err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */ err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */ err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ scr->pt.cr_ifs = cfm | (1UL << 63); scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */ /* establish new instruction pointer: */ scr->pt.cr_iip = ip & ~0x3UL; ia64_psr(&scr->pt)->ri = ip & 0x3; scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat); if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { /* Restore most scratch-state only when not in syscall. */ err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */ err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ } if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) { struct ia64_psr *psr = ia64_psr(&scr->pt); err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); psr->mfh = 0; /* drop signal handler's fph contents... */ preempt_disable(); if (psr->dfh) ia64_drop_fpu(current); else { /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */ __ia64_load_fpu(current->thread.fph); ia64_set_local_fpu_owner(current); } preempt_enable(); } return err; }
// sys_vperfctr_read() -> this() static int do_vperfctr_read(struct vperfctr *perfctr, unsigned int cmd, void __user *argp, unsigned int argbytes, struct task_struct *tsk) { union { struct perfctr_sum_ctrs sum; struct vperfctr_control control; struct perfctr_sum_ctrs children; } *tmp; unsigned int tmpbytes; int ret; /* The state snapshot can be large, so kmalloc() it instead of storing it on the stack. We must use task-private storage to prevent racing with a monitor process attaching to us during the preemptible copy_to_user() step. Therefore we cannot store the snapshot in the perfctr object itself. */ tmp = kmalloc(sizeof(*tmp), GFP_USER); if (!tmp) return -ENOMEM; /* PREEMPT note: While we're reading our own control, another process may ptrace ATTACH to us and update our control. Disable preemption to ensure we get a consistent copy. Not needed for other cases since the perfctr is either unlinked or its owner is ptrace ATTACH suspended by us. */ if (tsk == current) preempt_disable(); switch (cmd) { case VPERFCTR_READ_SUM: { int j; vperfctr_sample(perfctr); tmp->sum.tsc = perfctr->cpu_state.tsc_sum; for(j = 0; j < ARRAY_SIZE(tmp->sum.pmc); ++j) tmp->sum.pmc[j] = perfctr->cpu_state.pmc[j].sum; tmpbytes = sizeof(tmp->sum); } break; case VPERFCTR_READ_CONTROL: tmp->control.si_signo = perfctr->si_signo; tmp->control.cpu_control = perfctr->cpu_state.control; tmp->control.preserve = 0; tmpbytes = sizeof(tmp->control); break; case VPERFCTR_READ_CHILDREN: if (tsk) spin_lock(&perfctr->children_lock); tmp->children = perfctr->children; if (tsk) spin_unlock(&perfctr->children_lock); tmpbytes = sizeof(tmp->children); break; default: tmpbytes = 0; } if (tsk == current) preempt_enable(); ret = -EINVAL; if (tmpbytes > argbytes) tmpbytes = argbytes; if (tmpbytes > 0) { ret = tmpbytes; if (copy_to_user(argp, tmp, tmpbytes)) ret = -EFAULT; } kfree(tmp); return ret; }
NORET_TYPE void panic(const char * fmt, ...) { long i; static char buf[1024]; va_list args; #if defined(CONFIG_S390) unsigned long caller = (unsigned long) __builtin_return_address(0); #endif int count,chr_count; /* * It's possible to come here directly from a panic-assertion and not * have preempt disabled. Some functions called from here want * preempt to be disabled. No point enabling it later though... */ preempt_disable(); bust_spinlocks(1); va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); bust_spinlocks(0); #ifdef CONFIG_KERNEL_PANIC_DUMP PANIC_Current += sprintf(PANIC_Current,"Kernel panic - not syncing: %s\n",buf); if (!multiple_panic) { multiple_panic++; printk(KERN_EMERG "PANIC_DUMP Test : \n"); count = PANIC_Current - PANIC_Base; printk("count : %d\n",count); chr_count = 0; while(count) { memset(buf,0x0,1024); if(count > 1024) { memcpy(buf,PANIC_Base+chr_count,1024); printk("%s",buf); chr_count += 1024; count -= 1024; } else { memcpy(buf,PANIC_Base+chr_count,count); printk("%s",buf); chr_count += count; count -= count; } } { mm_segment_t old_fs; struct file *filp; int writelen; loff_t pos =0; fl_owner_t id = current->files; preempt_enable(); old_fs = get_fs(); set_fs(KERNEL_DS); struct timeval val; struct tm *ptm,ptmTemp; char dt[35]; do_gettimeofday(&val); ptm = localtime_r(&val.tv_sec,&ptmTemp); memset(dt , 0x00 , sizeof(dt)); // format : YYMMDDhhmm sprintf(dt, "/data/KERNEL_PANIC%04d%02d%02d%02d%02d.txt" , ptm->tm_year+1900 , ptm->tm_mon+1 , ptm->tm_mday , ptm->tm_hour, ptm->tm_min); printk("Panic log file is %s \n",dt); count = PANIC_Current - PANIC_Base; chr_count = 0; filp = filp_open(dt,O_CREAT|O_WRONLY,0666); if(filp<0) printk("Sorry. Can't creat panic file\n"); else { // vfs_write(filp, PANIC_Base, strlen(PANIC_Base), while(count) { memset(buf,0x0,1024); if(count > 1024) { memcpy(buf,PANIC_Base+chr_count,1024); writelen = filp->f_op->write(filp,buf,1024,&filp->f_pos); if(writelen == 0) printk("Write Error!!\n"); else filp->f_op->flush(filp,id); chr_count += 1024; count -= 1024; } else { memcpy(buf,PANIC_Base+chr_count,count); writelen = filp->f_op->write(filp,buf,count,&filp->f_pos); if(writelen == 0) printk("Write Error\n"); else filp->f_op->flush(filp,id); chr_count += count; count -= count; } } } set_fs(old_fs); preempt_disable(); } count = PANIC_Current - PANIC_Base; printk("\nPanic Dump END, panic message size is : %d\n",count); } else { #if 0 /* Reset Target */ #else while (1); #endif } #endif /* * If we have crashed and we have a crash kernel loaded let it handle * everything else. * Do we want to call this before we try to display a message? */ crash_kexec(NULL); #ifdef CONFIG_SMP /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic * situation. */ smp_send_stop(); #endif atomic_notifier_call_chain(&panic_notifier_list, 0, buf); if (!panic_blink) panic_blink = no_blink; if (panic_timeout > 0) { /* * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked.. */ printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); for (i = 0; i < panic_timeout*1000; ) { touch_nmi_watchdog(); i += panic_blink(i); mdelay(1); i++; } /* This will not be a clean reboot, with everything * shutting down. But if there is a chance of * rebooting the system it will be rebooted. */ emergency_restart(); } #ifdef __sparc__ { extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) disabled_wait(caller); #endif local_irq_enable(); for (i = 0;;) { touch_softlockup_watchdog(); i += panic_blink(i); mdelay(1); i++; } }
static void unlock_rtas(unsigned long flags) { arch_spin_unlock(&rtas.lock); local_irq_restore(flags); preempt_enable(); }
asmlinkage int vprintk(const char *fmt, va_list args) { unsigned long flags; int printed_len; char *p; static char printk_buf[1024]; static int log_level_unknown = 1; preempt_disable(); if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id()) /* If a crash is occurring during printk() on this CPU, * make sure we can't deadlock */ zap_locks(); /* This stops the holder of console_sem just where we want him */ spin_lock_irqsave(&logbuf_lock, flags); printk_cpu = smp_processor_id(); /* Emit the output into the temporary buffer */ printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args); #if defined(CONFIG_FEROCEON) && defined(CONFIG_DEBUG_LL) if (ll_debug) putstr(printk_buf); #endif /* * Copy the output into log_buf. If the caller didn't provide * appropriate log level tags, we insert them here */ for (p = printk_buf; *p; p++) { if (log_level_unknown) { /* log_level_unknown signals the start of a new line */ if (printk_time) { int loglev_char; char tbuf[50], *tp; unsigned tlen; unsigned long long t; unsigned long nanosec_rem; /* * force the log level token to be * before the time output. */ if (p[0] == '<' && p[1] >='0' && p[1] <= '7' && p[2] == '>') { loglev_char = p[1]; p += 3; printed_len -= 3; } else { loglev_char = default_message_loglevel + '0'; } t = printk_clock(); nanosec_rem = do_div(t, 1000000000); tlen = sprintf(tbuf, "<%c>[%5lu.%06lu] ", loglev_char, (unsigned long)t, nanosec_rem/1000); for (tp = tbuf; tp < tbuf + tlen; tp++) emit_log_char(*tp); printed_len += tlen; } else { if (p[0] != '<' || p[1] < '0' || p[1] > '7' || p[2] != '>') { emit_log_char('<'); emit_log_char(default_message_loglevel + '0'); emit_log_char('>'); printed_len += 3; } } log_level_unknown = 0; if (!*p) break; } emit_log_char(*p); if (*p == '\n') log_level_unknown = 1; } if (!cpu_online(smp_processor_id())) { /* * Some console drivers may assume that per-cpu resources have * been allocated. So don't allow them to be called by this * CPU until it is officially up. We shouldn't be calling into * random console drivers on a CPU which doesn't exist yet.. */ printk_cpu = UINT_MAX; spin_unlock_irqrestore(&logbuf_lock, flags); goto out; } if (!down_trylock(&console_sem)) { console_locked = 1; /* * We own the drivers. We can drop the spinlock and let * release_console_sem() print the text */ printk_cpu = UINT_MAX; spin_unlock_irqrestore(&logbuf_lock, flags); console_may_schedule = 0; #if defined(CONFIG_FEROCEON) && defined(CONFIG_MV_PRINTK_SLICE_SUPPORT) local_irq_restore(flags); release_console_sem(); local_irq_save(flags); #else release_console_sem(); #endif } else { /* * Someone else owns the drivers. We drop the spinlock, which * allows the semaphore holder to proceed and to call the * console drivers with the output which we just produced. */ printk_cpu = UINT_MAX; spin_unlock_irqrestore(&logbuf_lock, flags); } out: preempt_enable(); return printed_len; }