static uint_t gcpu_xpv_virq_intr(void) { int types[] = { XEN_MC_URGENT, XEN_MC_NONURGENT }; uint64_t fetch_id; int count = 0; int i; if (gcpu_xpv_virq_vect == -1 || gcpu_xpv_poll_bankregs_sz == 0) { gcpu_xpv_intr_unclaimed++; return (DDI_INTR_UNCLAIMED); } if (!mutex_tryenter(&gcpu_xpv_polldata_lock)) { gcpu_xpv_mca_hcall_busy++; return (DDI_INTR_CLAIMED); } for (i = 0; i < sizeof (types) / sizeof (types[0]); i++) { while (gcpu_xpv_telem_read(&gcpu_xpv_polldata, types[i], &fetch_id)) { gcpu_poll_trace(&gcpu_xpv_poll_trace_ctl, GCPU_MPT_WHAT_XPV_VIRQ, x86_mcinfo_nentries(&gcpu_xpv_polldata)); gcpu_xpv_mci_process(&gcpu_xpv_polldata, types[i], gcpu_xpv_poll_bankregs, gcpu_xpv_poll_bankregs_sz); gcpu_xpv_telem_ack(types[i], fetch_id); count++; } } mutex_exit(&gcpu_xpv_polldata_lock); return (DDI_INTR_CLAIMED); }
static void fipe_disable(void) { /* * Try to acquire lock, which also implicitly has the same effect * of calling membar_sync(). */ while (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) { /* * If power saving is inactive, just return and all dirty * house-keeping work will be handled in fipe_enable(). */ if (fipe_gbl_ctrl.pm_active == B_FALSE) { return; } else { (void) SMT_PAUSE(); } } /* Disable power saving if it's active. */ if (fipe_gbl_ctrl.pm_active) { /* * Set pm_active to FALSE as soon as possible to prevent * other CPUs from waiting on pm_active flag. */ fipe_gbl_ctrl.pm_active = B_FALSE; membar_producer(); fipe_mc_restore(); fipe_ioat_cancel(); } mutex_exit(&fipe_gbl_ctrl.lock); }
/* * cleanvnode: grab a vnode from freelist, clean and free it. * * => Releases vnode_free_list_lock. */ static int cleanvnode(void) { vnode_t *vp; vnodelst_t *listhd; struct mount *mp; KASSERT(mutex_owned(&vnode_free_list_lock)); listhd = &vnode_free_list; try_nextlist: TAILQ_FOREACH(vp, listhd, v_freelist) { /* * It's safe to test v_usecount and v_iflag * without holding the interlock here, since * these vnodes should never appear on the * lists. */ KASSERT(vp->v_usecount == 0); KASSERT(vp->v_freelisthd == listhd); if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) continue; if (!mutex_tryenter(vp->v_interlock)) { VOP_UNLOCK(vp); continue; } mp = vp->v_mount; if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { mutex_exit(vp->v_interlock); VOP_UNLOCK(vp); continue; } break; }
void bge_receive(bge_t *bgep, bge_status_t *bsp) { recv_ring_t *rrp; uint64_t index; mblk_t *mp; for (index = 0; index < bgep->chipid.rx_rings; index++) { /* * Start from the first ring. */ rrp = &bgep->recv[index]; /* * For each ring, (rrp->prod_index_p) points to the * proper index within the status block (which has * already been sync'd by the caller) */ ASSERT(rrp->prod_index_p == RECV_INDEX_P(bsp, index)); if (*rrp->prod_index_p == rrp->rx_next || rrp->poll_flag) continue; /* no packets */ if (mutex_tryenter(rrp->rx_lock) == 0) continue; /* already in process */ mp = bge_receive_ring(bgep, rrp); mutex_exit(rrp->rx_lock); if (mp != NULL) mac_rx_ring(bgep->mh, rrp->ring_handle, mp, rrp->ring_gen_num); } }
int cpr_init(int fcn) { /* * Allow only one suspend/resume process. */ if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); CPR->c_flags = 0; CPR->c_substate = 0; CPR->c_cprboot_magic = 0; CPR->c_alloc_cnt = 0; CPR->c_fcn = fcn; if (fcn == AD_CPR_REUSABLE) CPR->c_flags |= C_REUSABLE; else CPR->c_flags |= C_SUSPENDING; if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ) CPR->c_flags |= C_COMPRESSING; /* * reserve CPR_MAXCONTIG virtual pages for cpr_dump() */ CPR->c_mapping_area = i_cpr_map_setup(); if (CPR->c_mapping_area == 0) { /* no space in kernelmap */ cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n"); mutex_exit(&cpr_slock); return (EAGAIN); } DEBUG3(cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing " "kas\n", (void *)CPR->c_mapping_area)); return (0); }
/* * Invalidate the attributes on all rnodes forcing the next getattr * to go over the wire. Used to flush stale uid and gid mappings. * Maybe done on a per vfsp, or all rnodes (vfsp == NULL) */ void nfs4_rnode_invalidate(struct vfs *vfsp) { int index; rnode4_t *rp; vnode_t *vp; /* * Walk the hash queues looking for rnodes. */ for (index = 0; index < rtable4size; index++) { rw_enter(&rtable4[index].r_lock, RW_READER); for (rp = rtable4[index].r_hashf; rp != (rnode4_t *)(&rtable4[index]); rp = rp->r_hashf) { vp = RTOV4(rp); if (vfsp != NULL && vp->v_vfsp != vfsp) continue; if (!mutex_tryenter(&rp->r_statelock)) continue; /* * Expire the attributes by resetting the change * and attr timeout. */ rp->r_change = 0; PURGE_ATTRCACHE4_LOCKED(rp); mutex_exit(&rp->r_statelock); } rw_exit(&rtable4[index].r_lock); } }
/* * cleanvnode: grab a vnode from freelist, clean and free it. * * => Releases vnode_free_list_lock. */ static int cleanvnode(void) { vnode_t *vp; vnodelst_t *listhd; KASSERT(mutex_owned(&vnode_free_list_lock)); retry: listhd = &vnode_free_list; try_nextlist: TAILQ_FOREACH(vp, listhd, v_freelist) { /* * It's safe to test v_usecount and v_iflag * without holding the interlock here, since * these vnodes should never appear on the * lists. */ KASSERT(vp->v_usecount == 0); KASSERT((vp->v_iflag & VI_CLEAN) == 0); KASSERT(vp->v_freelisthd == listhd); if (!mutex_tryenter(vp->v_interlock)) continue; if ((vp->v_iflag & VI_XLOCK) == 0) break; mutex_exit(vp->v_interlock); }
static int nfs4_active_data_reclaim(rnode4_t *rp) { char *contents; vnode_t *xattr; int size; vsecattr_t *vsp; int freed; bool_t rdc = FALSE; /* * Free any held credentials and caches which * may be associated with this rnode. */ if (!mutex_tryenter(&rp->r_statelock)) return (0); contents = rp->r_symlink.contents; size = rp->r_symlink.size; rp->r_symlink.contents = NULL; vsp = rp->r_secattr; rp->r_secattr = NULL; if (rp->r_dir != NULL) rdc = TRUE; xattr = rp->r_xattr_dir; rp->r_xattr_dir = NULL; mutex_exit(&rp->r_statelock); /* * Free the access cache entries. */ freed = nfs4_access_purge_rp(rp); if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL) return (freed); /* * Free the symbolic link cache. */ if (contents != NULL) { kmem_free((void *)contents, size); } /* * Free any cached ACL. */ if (vsp != NULL) nfs4_acl_free_cache(vsp); nfs4_purge_rddir_cache(RTOV4(rp)); /* * Release the xattr directory vnode */ if (xattr != NULL) VN_RELE(xattr); return (1); }
void db_kill_proc(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) { #ifdef _KERNEL /* XXX CRASH(8) */ struct proc *p; ksiginfo_t ksi; db_expr_t pid, sig; int t; /* What pid? */ if (!db_expression(&pid)) { db_error("pid?\n"); /*NOTREACHED*/ } /* What sig? */ t = db_read_token(); if (t == tCOMMA) { if (!db_expression(&sig)) { db_error("sig?\n"); /*NOTREACHED*/ } } else { db_unread_token(t); sig = 15; } if (db_read_token() != tEOL) { db_error("?\n"); /*NOTREACHED*/ } /* We might stop when the mutex is held or when not */ t = mutex_tryenter(proc_lock); #ifdef DIAGNOSTIC if (!t) { db_error("could not acquire proc_lock mutex\n"); /*NOTREACHED*/ } #endif p = proc_find((pid_t)pid); if (p == NULL) { if (t) mutex_exit(proc_lock); db_error("no such proc\n"); /*NOTREACHED*/ } KSI_INIT(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_USER; ksi.ksi_pid = 0; ksi.ksi_uid = 0; mutex_enter(p->p_lock); kpsignal2(p, &ksi); mutex_exit(p->p_lock); if (t) mutex_exit(proc_lock); #else db_printf("This command is not currently supported.\n"); #endif }
/*ARGSUSED*/ int spec_sync(struct vfs *vfsp, short flag, struct cred *cr) { struct snode *sync_list; register struct snode **spp, *sp, *spnext; register struct vnode *vp; if (mutex_tryenter(&spec_syncbusy) == 0) return (0); if (flag & SYNC_ATTR) { mutex_exit(&spec_syncbusy); return (0); } mutex_enter(&stable_lock); sync_list = NULL; /* * Find all the snodes that are dirty and add them to the sync_list */ for (spp = stable; spp < &stable[STABLESIZE]; spp++) { for (sp = *spp; sp != NULL; sp = sp->s_next) { vp = STOV(sp); /* * Don't bother sync'ing a vp if it's * part of a virtual swap device. */ if (IS_SWAPVP(vp)) continue; if (vp->v_type == VBLK && vn_has_cached_data(vp)) { /* * Prevent vp from going away before we * we get a chance to do a VOP_PUTPAGE * via sync_list processing */ VN_HOLD(vp); sp->s_list = sync_list; sync_list = sp; } } } mutex_exit(&stable_lock); /* * Now write out all the snodes we marked asynchronously. */ for (sp = sync_list; sp != NULL; sp = spnext) { spnext = sp->s_list; vp = STOV(sp); (void) VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, B_ASYNC, cr); VN_RELE(vp); /* Release our hold on vnode */ } mutex_exit(&spec_syncbusy); return (0); }
/* * Check condition (fipe_gbl_ctrl.cpu_cnt == ncpus) to make sure that * there is other CPU trying to wake up system from memory power saving state. * If a CPU is waking up system, fipe_disable() will set * fipe_gbl_ctrl.pm_active to false as soon as possible and allow other CPU's * to continue, and it will take the responsibility to recover system from * memory power saving state. */ static void fipe_enable(int throttle, cpu_idle_check_wakeup_t check_func, void* check_arg) { extern void membar_sync(void); FIPE_KSTAT_DETAIL_INC(pm_tryenter_cnt); /* * Check CPU wakeup events. */ if (check_func != NULL) { (*check_func)(check_arg); } /* * Try to acquire mutex, which also implicitly has the same effect * of calling membar_sync(). * If mutex_tryenter fails, that means other CPU is waking up. */ if (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) { FIPE_KSTAT_DETAIL_INC(pm_race_cnt); /* * Handle a special race condition for the case that a CPU wakes * and then enters into idle state within a short period. * This case can't be reliably detected by cpu_count mechanism. */ } else if (fipe_gbl_ctrl.pm_active) { FIPE_KSTAT_DETAIL_INC(pm_race_cnt); mutex_exit(&fipe_gbl_ctrl.lock); } else { fipe_gbl_ctrl.pm_active = B_TRUE; membar_sync(); if (fipe_gbl_ctrl.cpu_count != ncpus) { FIPE_KSTAT_DETAIL_INC(pm_race_cnt); fipe_gbl_ctrl.pm_active = B_FALSE; } else if (fipe_ioat_trigger() != 0) { fipe_gbl_ctrl.pm_active = B_FALSE; } else if (fipe_gbl_ctrl.cpu_count != ncpus || fipe_mc_change(throttle) != 0) { fipe_gbl_ctrl.pm_active = B_FALSE; fipe_ioat_cancel(); if (fipe_gbl_ctrl.cpu_count != ncpus) { FIPE_KSTAT_DETAIL_INC(pm_race_cnt); } } else if (fipe_gbl_ctrl.cpu_count != ncpus) { fipe_gbl_ctrl.pm_active = B_FALSE; fipe_mc_restore(); fipe_ioat_cancel(); FIPE_KSTAT_DETAIL_INC(pm_race_cnt); } else { FIPE_KSTAT_DETAIL_INC(pm_success_cnt); } mutex_exit(&fipe_gbl_ctrl.lock); } }
void mutex_enter(kmutex_t *mtx) { UPMTX(mtx); /* fastpath? */ if (mutex_tryenter(mtx)) return; /* * No? bummer, do it the slow and painful way then. */ upm->upm_wanted++; while (!mutex_tryenter(mtx)) { rump_schedlock_cv_wait(upm->upm_rucv); } upm->upm_wanted--; KASSERT(upm->upm_wanted >= 0); }
static void splat_mutex_test1_func(void *arg) { mutex_priv_t *mp = (mutex_priv_t *)arg; ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC); if (mutex_tryenter(&mp->mp_mtx)) { mp->mp_rc = 0; mutex_exit(&mp->mp_mtx); } else { mp->mp_rc = -EBUSY; } }
static int awin_hdmi_i2c_acquire_bus(void *priv, int flags) { struct awin_hdmi_softc *sc = priv; if (flags & I2C_F_POLL) { if (!mutex_tryenter(&sc->sc_ic_lock)) return EBUSY; } else { mutex_enter(&sc->sc_ic_lock); } return 0; }
/*ARGSUSED*/ static int smbfs_sync(vfs_t *vfsp, short flag, cred_t *cr) { /* * Cross-zone calls are OK here, since this translates to a * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone. */ if (!(flag & SYNC_ATTR) && mutex_tryenter(&smbfs_syncbusy) != 0) { smbfs_rflush(vfsp, cr); mutex_exit(&smbfs_syncbusy); } return (0); }
ACPI_CPU_FLAGS AcpiOsAcquireLock(ACPI_HANDLE Handle) { if (Handle == NULL) return (AE_BAD_PARAMETER); if (curthread == CPU->cpu_idle_thread) { while (!mutex_tryenter((kmutex_t *)Handle)) /* spin */; } else mutex_enter((kmutex_t *)Handle); return (AE_OK); }
/* * callout_schedule_locked: * * Schedule a callout to run. The function and argument must * already be set in the callout structure. Must be called with * callout_lock. */ static void callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks) { struct callout_cpu *cc, *occ; int old_time; KASSERT(to_ticks >= 0); KASSERT(c->c_func != NULL); /* Initialize the time here, it won't change. */ occ = c->c_cpu; c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING); /* * If this timeout is already scheduled and now is moved * earlier, reschedule it now. Otherwise leave it in place * and let it be rescheduled later. */ if ((c->c_flags & CALLOUT_PENDING) != 0) { /* Leave on existing CPU. */ old_time = c->c_time; c->c_time = to_ticks + occ->cc_ticks; if (c->c_time - old_time < 0) { CIRCQ_REMOVE(&c->c_list); CIRCQ_INSERT(&c->c_list, &occ->cc_todo); } mutex_spin_exit(lock); return; } cc = curcpu()->ci_data.cpu_callout; if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ || !mutex_tryenter(cc->cc_lock)) { /* Leave on existing CPU. */ c->c_time = to_ticks + occ->cc_ticks; c->c_flags |= CALLOUT_PENDING; CIRCQ_INSERT(&c->c_list, &occ->cc_todo); } else { /* Move to this CPU. */ c->c_cpu = cc; c->c_time = to_ticks + cc->cc_ticks; c->c_flags |= CALLOUT_PENDING; CIRCQ_INSERT(&c->c_list, &cc->cc_todo); mutex_spin_exit(cc->cc_lock); } mutex_spin_exit(lock); }
static int coram_iic_acquire_bus(void *cookie, int flags) { struct coram_iic_softc *cic; cic = cookie; if (flags & I2C_F_POLL) { while (mutex_tryenter(&cic->cic_busmutex) == 0) delay(50); return 0; } mutex_enter(&cic->cic_busmutex); return 0; }
static void kprintf_rnd_get(size_t bytes, void *priv) { if (kprnd_added) { KASSERT(kprintf_inited); if (mutex_tryenter(&kprintf_mtx)) { SHA512_Final(kprnd_accum, &kprnd_sha); rnd_add_data(&rnd_printf_source, kprnd_accum, sizeof(kprnd_accum), 0); kprnd_added = 0; /* This, we must do, since we called _Final. */ SHA512_Init(&kprnd_sha); /* This is optional but seems useful. */ SHA512_Update(&kprnd_sha, kprnd_accum, sizeof(kprnd_accum)); mutex_exit(&kprintf_mtx); } } }
/* * Return a buffer w/o sleeping */ struct buf * trygetblk(dev_t dev, daddr_t blkno) { struct buf *bp; struct buf *dp; struct hbuf *hp; kmutex_t *hmp; uint_t index; index = bio_bhash(dev, blkno); hp = &hbuf[index]; hmp = &hp->b_lock; if (!mutex_tryenter(hmp)) return (NULL); dp = (struct buf *)hp; for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { if (bp->b_blkno != blkno || bp->b_edev != dev || (bp->b_flags & B_STALE)) continue; /* * Get access to a valid buffer without sleeping */ if (sema_tryp(&bp->b_sem)) { if (bp->b_flags & B_DONE) { hp->b_length--; notavail(bp); mutex_exit(hmp); return (bp); } else { sema_v(&bp->b_sem); break; } } break; } mutex_exit(hmp); return (NULL); }
/* * System filesystem synchronizer daemon. */ void sched_sync(void *arg) { synclist_t *slp; struct vnode *vp; time_t starttime; bool synced; for (;;) { mutex_enter(&syncer_mutex); mutex_enter(&syncer_data_lock); starttime = time_second; /* * Push files whose dirty time has expired. */ slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno >= syncer_last) syncer_delayno = 0; while ((vp = TAILQ_FIRST(slp)) != NULL) { /* We are locking in the wrong direction. */ synced = false; if (mutex_tryenter(vp->v_interlock)) { mutex_exit(&syncer_data_lock); if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { synced = true; (void) VOP_FSYNC(vp, curlwp->l_cred, FSYNC_LAZY, 0, 0); vput(vp); } mutex_enter(&syncer_data_lock); } /* * XXX The vnode may have been recycled, in which * case it may have a new identity. */ if (TAILQ_FIRST(slp) == vp) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. * * Try again sooner rather than later if * we were unable to lock the vnode. Lock * failure should not prevent us from doing * the sync "soon". * * If we locked it yet arrive here, it's * likely that lazy sync is in progress and * so the vnode still has dirty metadata. * syncdelay is mainly to get this vnode out * of the way so we do not consider it again * "soon" in this loop, so the delay time is * not critical as long as it is not "soon". * While write-back strategy is the file * system's domain, we expect write-back to * occur no later than syncdelay seconds * into the future. */ vn_syncer_add1(vp, synced ? syncdelay : lockdelay); } } mutex_exit(&syncer_mutex); /* * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (time_second == starttime) { kpause("syncer", false, hz, &syncer_data_lock); } mutex_exit(&syncer_data_lock); } }
int ghd_transport(ccc_t *cccp, gcmd_t *gcmdp, gtgt_t *gtgtp, ulong_t timeout, int polled, void *intr_status) { gdev_t *gdevp = gtgtp->gt_gdevp; ASSERT(!mutex_owned(&cccp->ccc_hba_mutex)); ASSERT(!mutex_owned(&cccp->ccc_waitq_mutex)); if (polled) { /* * Grab the HBA mutex so no other requests are started * until after this one completes. */ mutex_enter(&cccp->ccc_hba_mutex); GDBG_START(("ghd_transport: polled" " cccp 0x%p gdevp 0x%p gtgtp 0x%p gcmdp 0x%p\n", (void *)cccp, (void *)gdevp, (void *)gtgtp, (void *)gcmdp)); /* * Lock the doneq so no other thread flushes the Q. */ ghd_doneq_pollmode_enter(cccp); } #if defined(GHD_DEBUG) || defined(__lint) else { GDBG_START(("ghd_transport: non-polled" " cccp 0x%p gdevp 0x%p gtgtp 0x%p gcmdp 0x%p\n", (void *)cccp, (void *)gdevp, (void *)gtgtp, (void *)gcmdp)); } #endif /* * add this request to the tail of the waitq */ gcmdp->cmd_waitq_level = 1; mutex_enter(&cccp->ccc_waitq_mutex); L2_add(&GDEV_QHEAD(gdevp), &gcmdp->cmd_q, gcmdp); /* * Add this request to the packet timer active list and start its * abort timer. */ gcmdp->cmd_state = GCMD_STATE_WAITQ; ghd_timer_start(cccp, gcmdp, timeout); /* * Check the device wait queue throttle and perhaps move * some requests to the end of the HBA wait queue. */ ghd_waitq_shuffle_up(cccp, gdevp); if (!polled) { /* * See if the HBA mutex is available but use the * tryenter so I don't deadlock. */ if (!mutex_tryenter(&cccp->ccc_hba_mutex)) { /* The HBA mutex isn't available */ GDBG_START(("ghd_transport: !mutex cccp 0x%p\n", (void *)cccp)); mutex_exit(&cccp->ccc_waitq_mutex); return (TRAN_ACCEPT); } GDBG_START(("ghd_transport: got mutex cccp 0x%p\n", (void *)cccp)); /* * start as many requests as possible from the head * of the HBA wait queue */ ghd_waitq_process_and_mutex_exit(cccp); ASSERT(!mutex_owned(&cccp->ccc_hba_mutex)); ASSERT(!mutex_owned(&cccp->ccc_waitq_mutex)); return (TRAN_ACCEPT); } /* * If polled mode (FLAG_NOINTR specified in scsi_pkt flags), * then ghd_poll() waits until the request completes or times out * before returning. */ mutex_exit(&cccp->ccc_waitq_mutex); (void) ghd_poll(cccp, GHD_POLL_REQUEST, 0, gcmdp, gtgtp, intr_status); ghd_doneq_pollmode_exit(cccp); mutex_enter(&cccp->ccc_waitq_mutex); ghd_waitq_process_and_mutex_exit(cccp); /* call HBA's completion function but don't do callback to target */ (*cccp->ccc_hba_complete)(cccp->ccc_hba_handle, gcmdp, FALSE); GDBG_START(("ghd_transport: polled done cccp 0x%p\n", (void *)cccp)); return (TRAN_ACCEPT); }
void zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) { dmu_buf_t *db = sa_get_db(hdl); znode_t *zp = sa_get_userdata(hdl); zfsvfs_t *zfsvfs = zp->z_zfsvfs; sa_bulk_attr_t bulk[20]; int count = 0; sa_bulk_attr_t sa_attrs[20] = { { 0 } }; zfs_acl_locator_cb_t locate = { 0 }; uint64_t uid, gid, mode, rdev, xattr, parent; uint64_t crtime[2], mtime[2], ctime[2]; zfs_acl_phys_t znode_acl; char scanstamp[AV_SCANSTAMP_SZ]; boolean_t drop_lock = B_FALSE; /* * No upgrade if ACL isn't cached * since we won't know which locks are held * and ready the ACL would require special "locked" * interfaces that would be messy */ if (zp->z_acl_cached == NULL || vnode_islnk(ZTOV(zp))) return; /* * If the z_lock is held and we aren't the owner * the just return since we don't want to deadlock * trying to update the status of z_is_sa. This * file can then be upgraded at a later time. * * Otherwise, we know we are doing the * sa_update() that caused us to enter this function. */ if (mutex_owner(&zp->z_lock) != curthread) { if (mutex_tryenter(&zp->z_lock) == 0) return; else drop_lock = B_TRUE; } /* First do a bulk query of the attributes that aren't cached */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, &znode_acl, 88); if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) goto done; /* * While the order here doesn't matter its best to try and organize * it is such a way to pick up an already existing layout number */ count = 0; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL, zp->z_atime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, 8); if (vnode_isblk(zp->z_vnode) || vnode_islnk(zp->z_vnode)) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL, &zp->z_acl_cached->z_acl_count, 8); if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID) zfs_acl_xform(zp, zp->z_acl_cached, CRED()); locate.cb_aclp = zp->z_acl_cached; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs), zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes); if (xattr) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8); /* if scanstamp then add scanstamp */ if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) { bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, scanstamp, AV_SCANSTAMP_SZ); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs), NULL, scanstamp, AV_SCANSTAMP_SZ); zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP; } VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0); VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs, count, tx) == 0); if (znode_acl.z_acl_extern_obj) VERIFY(0 == dmu_object_free(zfsvfs->z_os, znode_acl.z_acl_extern_obj, tx)); zp->z_is_sa = B_TRUE; done: if (drop_lock) mutex_exit(&zp->z_lock); }
int rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx) { return mutex_tryenter(mtx); }
/* ARGSUSED */ void tcp_time_wait_collector(void *arg) { tcp_t *tcp; int64_t now; mblk_t *mp; conn_t *connp; kmutex_t *lock; boolean_t removed; extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t, uint8_t *, in_port_t, uint8_t *, in_port_t, void *); squeue_t *sqp = (squeue_t *)arg; tcp_squeue_priv_t *tcp_time_wait = *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP)); mutex_enter(&tcp_time_wait->tcp_time_wait_lock); tcp_time_wait->tcp_time_wait_tid = 0; #ifdef DEBUG tcp_time_wait->tcp_time_wait_running = B_TRUE; #endif if (tcp_time_wait->tcp_free_list != NULL && tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) { TCP_G_STAT(tcp_freelist_cleanup); while ((tcp = tcp_time_wait->tcp_free_list) != NULL) { tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next; tcp->tcp_time_wait_next = NULL; tcp_time_wait->tcp_free_list_cnt--; ASSERT(tcp->tcp_tcps == NULL); CONN_DEC_REF(tcp->tcp_connp); } ASSERT(tcp_time_wait->tcp_free_list_cnt == 0); } /* * In order to reap time waits reliably, we should use a * source of time that is not adjustable by the user -- hence * the call to ddi_get_lbolt64(). */ now = ddi_get_lbolt64(); while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) { /* * lbolt64 should not wrap around in practice... So we can * do a direct comparison. */ if (now < tcp->tcp_time_wait_expire) break; removed = tcp_time_wait_remove(tcp, tcp_time_wait); ASSERT(removed); connp = tcp->tcp_connp; ASSERT(connp->conn_fanout != NULL); lock = &connp->conn_fanout->connf_lock; /* * This is essentially a TW reclaim fast path optimization for * performance where the timewait collector checks under the * fanout lock (so that no one else can get access to the * conn_t) that the refcnt is 2 i.e. one for TCP and one for * the classifier hash list. If ref count is indeed 2, we can * just remove the conn under the fanout lock and avoid * cleaning up the conn under the squeue, provided that * clustering callbacks are not enabled. If clustering is * enabled, we need to make the clustering callback before * setting the CONDEMNED flag and after dropping all locks and * so we forego this optimization and fall back to the slow * path. Also please see the comments in tcp_closei_local * regarding the refcnt logic. * * Since we are holding the tcp_time_wait_lock, its better * not to block on the fanout_lock because other connections * can't add themselves to time_wait list. So we do a * tryenter instead of mutex_enter. */ if (mutex_tryenter(lock)) { mutex_enter(&connp->conn_lock); if ((connp->conn_ref == 2) && (cl_inet_disconnect == NULL)) { ipcl_hash_remove_locked(connp, connp->conn_fanout); /* * Set the CONDEMNED flag now itself so that * the refcnt cannot increase due to any * walker. */ connp->conn_state_flags |= CONN_CONDEMNED; mutex_exit(lock); mutex_exit(&connp->conn_lock); if (tcp_time_wait->tcp_free_list_cnt < tcp_free_list_max_cnt) { /* Add to head of tcp_free_list */ mutex_exit( &tcp_time_wait->tcp_time_wait_lock); tcp_cleanup(tcp); ASSERT(connp->conn_latch == NULL); ASSERT(connp->conn_policy == NULL); ASSERT(tcp->tcp_tcps == NULL); ASSERT(connp->conn_netstack == NULL); mutex_enter( &tcp_time_wait->tcp_time_wait_lock); tcp->tcp_time_wait_next = tcp_time_wait->tcp_free_list; tcp_time_wait->tcp_free_list = tcp; tcp_time_wait->tcp_free_list_cnt++; continue; } else { /* Do not add to tcp_free_list */ mutex_exit( &tcp_time_wait->tcp_time_wait_lock); tcp_bind_hash_remove(tcp); ixa_cleanup(tcp->tcp_connp->conn_ixa); tcp_ipsec_cleanup(tcp); CONN_DEC_REF(tcp->tcp_connp); } } else { CONN_INC_REF_LOCKED(connp); mutex_exit(lock); mutex_exit(&tcp_time_wait->tcp_time_wait_lock); mutex_exit(&connp->conn_lock); /* * We can reuse the closemp here since conn has * detached (otherwise we wouldn't even be in * time_wait list). tcp_closemp_used can safely * be changed without taking a lock as no other * thread can concurrently access it at this * point in the connection lifecycle. */ if (tcp->tcp_closemp.b_prev == NULL) tcp->tcp_closemp_used = B_TRUE; else cmn_err(CE_PANIC, "tcp_timewait_collector: " "concurrent use of tcp_closemp: " "connp %p tcp %p\n", (void *)connp, (void *)tcp); TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); mp = &tcp->tcp_closemp; SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL, SQ_FILL, SQTAG_TCP_TIMEWAIT); } } else { mutex_enter(&connp->conn_lock); CONN_INC_REF_LOCKED(connp); mutex_exit(&tcp_time_wait->tcp_time_wait_lock); mutex_exit(&connp->conn_lock); /* * We can reuse the closemp here since conn has * detached (otherwise we wouldn't even be in * time_wait list). tcp_closemp_used can safely * be changed without taking a lock as no other * thread can concurrently access it at this * point in the connection lifecycle. */ if (tcp->tcp_closemp.b_prev == NULL) tcp->tcp_closemp_used = B_TRUE; else cmn_err(CE_PANIC, "tcp_timewait_collector: " "concurrent use of tcp_closemp: " "connp %p tcp %p\n", (void *)connp, (void *)tcp); TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); mp = &tcp->tcp_closemp; SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL, SQ_FILL, SQTAG_TCP_TIMEWAIT); } mutex_enter(&tcp_time_wait->tcp_time_wait_lock); } if (tcp_time_wait->tcp_free_list != NULL) tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE; /* * If the time wait list is not empty and there is no timer running, * restart it. */ if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL && tcp_time_wait->tcp_time_wait_tid == 0) { hrtime_t firetime; firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now); /* This ensures that we won't wake up too often. */ firetime = MAX(TCP_TIME_WAIT_DELAY, firetime); tcp_time_wait->tcp_time_wait_tid = timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector, sqp, firetime, CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP); } #ifdef DEBUG tcp_time_wait->tcp_time_wait_running = B_FALSE; #endif mutex_exit(&tcp_time_wait->tcp_time_wait_lock); }
static int zvol_first_open(zvol_state_t *zv) { objset_t *os; uint64_t volsize; int locked = 0; int error; uint64_t ro; /* * In all other cases the spa_namespace_lock is taken before the * bdev->bd_mutex lock. But in this case the Linux __blkdev_get() * function calls fops->open() with the bdev->bd_mutex lock held. * * To avoid a potential lock inversion deadlock we preemptively * try to take the spa_namespace_lock(). Normally it will not * be contended and this is safe because spa_open_common() handles * the case where the caller already holds the spa_namespace_lock. * * When it is contended we risk a lock inversion if we were to * block waiting for the lock. Luckily, the __blkdev_get() * function allows us to return -ERESTARTSYS which will result in * bdev->bd_mutex being dropped, reacquired, and fops->open() being * called again. This process can be repeated safely until both * locks are acquired. */ if (!mutex_owned(&spa_namespace_lock)) { locked = mutex_tryenter(&spa_namespace_lock); if (!locked) return (-ERESTARTSYS); } /* lie and say we're read-only */ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os); if (error) goto out_mutex; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) { dmu_objset_disown(os, zvol_tag); goto out_mutex; } zv->zv_objset = os; error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); if (error) { dmu_objset_disown(os, zvol_tag); goto out_mutex; } set_capacity(zv->zv_disk, volsize >> 9); zv->zv_volsize = volsize; zv->zv_zilog = zil_open(os, zvol_get_data); VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0); if (ro || dmu_objset_is_snapshot(os) || !spa_writeable(dmu_objset_spa(os))) { set_disk_ro(zv->zv_disk, 1); zv->zv_flags |= ZVOL_RDONLY; } else { set_disk_ro(zv->zv_disk, 0); zv->zv_flags &= ~ZVOL_RDONLY; } out_mutex: if (locked) mutex_exit(&spa_namespace_lock); return (-error); }
int cpr(int fcn, void *mdep) { #if defined(__sparc) static const char noswapstr[] = "reusable statefile requires " "that no swap area be configured.\n"; static const char blockstr[] = "reusable statefile must be " "a block device. See power.conf(4) and pmconfig(1M).\n"; static const char normalfmt[] = "cannot run normal " "checkpoint/resume when in reusable statefile mode. " "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) " "to exit reusable statefile mode.\n"; static const char modefmt[] = "%s in reusable mode.\n"; #endif register int rc = 0; int cpr_sleeptype; /* * First, reject commands that we don't (yet) support on this arch. * This is easier to understand broken out like this than grotting * through the second switch below. */ switch (fcn) { #if defined(__sparc) case AD_CHECK_SUSPEND_TO_RAM: case AD_SUSPEND_TO_RAM: return (ENOTSUP); case AD_CHECK_SUSPEND_TO_DISK: case AD_SUSPEND_TO_DISK: case AD_CPR_REUSEINIT: case AD_CPR_NOCOMPRESS: case AD_CPR_FORCE: case AD_CPR_REUSABLE: case AD_CPR_REUSEFINI: case AD_CPR_TESTZ: case AD_CPR_TESTNOZ: case AD_CPR_TESTHALT: case AD_CPR_SUSP_DEVICES: cpr_sleeptype = CPR_TODISK; break; #endif #if defined(__x86) case AD_CHECK_SUSPEND_TO_DISK: case AD_SUSPEND_TO_DISK: case AD_CPR_REUSEINIT: case AD_CPR_NOCOMPRESS: case AD_CPR_FORCE: case AD_CPR_REUSABLE: case AD_CPR_REUSEFINI: case AD_CPR_TESTZ: case AD_CPR_TESTNOZ: case AD_CPR_TESTHALT: case AD_CPR_PRINT: return (ENOTSUP); /* The DEV_* values need to be removed after sys-syspend is fixed */ case DEV_CHECK_SUSPEND_TO_RAM: case DEV_SUSPEND_TO_RAM: case AD_CPR_SUSP_DEVICES: case AD_CHECK_SUSPEND_TO_RAM: case AD_SUSPEND_TO_RAM: case AD_LOOPBACK_SUSPEND_TO_RAM_PASS: case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL: case AD_FORCE_SUSPEND_TO_RAM: case AD_DEVICE_SUSPEND_TO_RAM: cpr_sleeptype = CPR_TORAM; break; #endif } #if defined(__sparc) /* * Need to know if we're in reusable mode, but we will likely have * rebooted since REUSEINIT, so we have to get the info from the * file system */ if (!cpr_reusable_mode) cpr_reusable_mode = cpr_get_reusable_mode(); cpr_forget_cprconfig(); #endif switch (fcn) { #if defined(__sparc) case AD_CPR_REUSEINIT: if (!i_cpr_reusable_supported()) return (ENOTSUP); if (!cpr_statefile_is_spec()) { cpr_err(CE_CONT, blockstr); return (EINVAL); } if ((rc = cpr_check_spec_statefile()) != 0) return (rc); if (swapinfo) { cpr_err(CE_CONT, noswapstr); return (EINVAL); } cpr_test_mode = 0; break; case AD_CPR_NOCOMPRESS: case AD_CPR_COMPRESS: case AD_CPR_FORCE: if (cpr_reusable_mode) { cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI); return (ENOTSUP); } cpr_test_mode = 0; break; case AD_CPR_REUSABLE: if (!i_cpr_reusable_supported()) return (ENOTSUP); if (!cpr_statefile_is_spec()) { cpr_err(CE_CONT, blockstr); return (EINVAL); } if ((rc = cpr_check_spec_statefile()) != 0) return (rc); if (swapinfo) { cpr_err(CE_CONT, noswapstr); return (EINVAL); } if ((rc = cpr_reusable_mount_check()) != 0) return (rc); cpr_test_mode = 0; break; case AD_CPR_REUSEFINI: if (!i_cpr_reusable_supported()) return (ENOTSUP); cpr_test_mode = 0; break; case AD_CPR_TESTZ: case AD_CPR_TESTNOZ: case AD_CPR_TESTHALT: if (cpr_reusable_mode) { cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI); return (ENOTSUP); } cpr_test_mode = 1; break; case AD_CPR_CHECK: if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode) return (ENOTSUP); return (0); case AD_CPR_PRINT: CPR_STAT_EVENT_END("POST CPR DELAY"); cpr_stat_event_print(); return (0); #endif case AD_CPR_DEBUG0: cpr_debug = 0; return (0); case AD_CPR_DEBUG1: case AD_CPR_DEBUG2: case AD_CPR_DEBUG3: case AD_CPR_DEBUG4: case AD_CPR_DEBUG5: case AD_CPR_DEBUG7: case AD_CPR_DEBUG8: cpr_debug |= CPR_DEBUG_BIT(fcn); return (0); case AD_CPR_DEBUG9: cpr_debug |= CPR_DEBUG6; return (0); /* The DEV_* values need to be removed after sys-syspend is fixed */ case DEV_CHECK_SUSPEND_TO_RAM: case DEV_SUSPEND_TO_RAM: case AD_CHECK_SUSPEND_TO_RAM: case AD_SUSPEND_TO_RAM: cpr_test_point = LOOP_BACK_NONE; break; case AD_LOOPBACK_SUSPEND_TO_RAM_PASS: cpr_test_point = LOOP_BACK_PASS; break; case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL: cpr_test_point = LOOP_BACK_FAIL; break; case AD_FORCE_SUSPEND_TO_RAM: cpr_test_point = FORCE_SUSPEND_TO_RAM; break; case AD_DEVICE_SUSPEND_TO_RAM: if (mdep == NULL) { /* Didn't pass enough arguments */ return (EINVAL); } cpr_test_point = DEVICE_SUSPEND_TO_RAM; cpr_device = (major_t)atoi((char *)mdep); break; case AD_CPR_SUSP_DEVICES: cpr_test_point = FORCE_SUSPEND_TO_RAM; if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS) cmn_err(CE_WARN, "Some devices did not suspend " "and may be unusable"); (void) cpr_resume_devices(ddi_root_node(), 0); return (0); default: return (ENOTSUP); } if (!i_cpr_is_supported(cpr_sleeptype)) return (ENOTSUP); #if defined(__sparc) if ((cpr_sleeptype == CPR_TODISK && !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs))) return (ENOTSUP); #endif if (fcn == AD_CHECK_SUSPEND_TO_RAM || fcn == DEV_CHECK_SUSPEND_TO_RAM) { ASSERT(i_cpr_is_supported(cpr_sleeptype)); return (0); } #if defined(__sparc) if (fcn == AD_CPR_REUSEINIT) { if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); if (cpr_reusable_mode) { cpr_err(CE_CONT, modefmt, "already"); mutex_exit(&cpr_slock); return (EBUSY); } rc = i_cpr_reuseinit(); mutex_exit(&cpr_slock); return (rc); } if (fcn == AD_CPR_REUSEFINI) { if (mutex_tryenter(&cpr_slock) == 0) return (EBUSY); if (!cpr_reusable_mode) { cpr_err(CE_CONT, modefmt, "not"); mutex_exit(&cpr_slock); return (EINVAL); } rc = i_cpr_reusefini(); mutex_exit(&cpr_slock); return (rc); } #endif /* * acquire cpr serial lock and init cpr state structure. */ if (rc = cpr_init(fcn)) return (rc); #if defined(__sparc) if (fcn == AD_CPR_REUSABLE) { if ((rc = i_cpr_check_cprinfo()) != 0) { mutex_exit(&cpr_slock); return (rc); } } #endif /* * Call the main cpr routine. If we are successful, we will be coming * down from the resume side, otherwise we are still in suspend. */ cpr_err(CE_CONT, "System is being suspended"); if (rc = cpr_main(cpr_sleeptype)) { CPR->c_flags |= C_ERROR; PMD(PMD_SX, ("cpr: Suspend operation failed.\n")) cpr_err(CE_NOTE, "Suspend operation failed."); } else if (CPR->c_flags & C_SUSPENDING) { /* * In the suspend to RAM case, by the time we get * control back we're already resumed */ if (cpr_sleeptype == CPR_TORAM) { PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n")) cpr_done(); return (rc); } #if defined(__sparc) PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n")) /* * Back from a successful checkpoint */ if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) { mdboot(0, AD_BOOT, "", B_FALSE); /* NOTREACHED */ } /* make sure there are no more changes to the device tree */ PMD(PMD_SX, ("cpr: dev tree freeze\n")) devtree_freeze(); /* * stop other cpus and raise our priority. since there is only * one active cpu after this, and our priority will be too high * for us to be preempted, we're essentially single threaded * from here on out. */ PMD(PMD_SX, ("cpr: stop other cpus\n")) i_cpr_stop_other_cpus(); PMD(PMD_SX, ("cpr: spl6\n")) (void) spl6(); /* * try and reset leaf devices. reset_leaves() should only * be called when there are no other threads that could be * accessing devices */ PMD(PMD_SX, ("cpr: reset leaves\n")) reset_leaves(); /* * If i_cpr_power_down() succeeds, it'll not return * * Drives with write-cache enabled need to flush * their cache. */ if (fcn != AD_CPR_TESTHALT) { PMD(PMD_SX, ("cpr: power down\n")) (void) i_cpr_power_down(cpr_sleeptype); } ASSERT(cpr_sleeptype == CPR_TODISK); /* currently CPR_TODISK comes back via a boot path */ CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n"); halt(NULL); /* NOTREACHED */ #endif } PMD(PMD_SX, ("cpr: cpr done\n")) cpr_done(); return (rc); }
static void uvm_unloanpage(struct vm_page **ploans, int npages) { struct vm_page *pg; kmutex_t *slock; mutex_enter(&uvm_pageqlock); while (npages-- > 0) { pg = *ploans++; /* * do a little dance to acquire the object or anon lock * as appropriate. we are locking in the wrong order, * so we have to do a try-lock here. */ slock = NULL; while (pg->uobject != NULL || pg->uanon != NULL) { if (pg->uobject != NULL) { slock = &pg->uobject->vmobjlock; } else { slock = &pg->uanon->an_lock; } if (mutex_tryenter(slock)) { break; } mutex_exit(&uvm_pageqlock); /* XXX Better than yielding but inadequate. */ kpause("livelock", false, 1, NULL); mutex_enter(&uvm_pageqlock); slock = NULL; } /* * drop our loan. if page is owned by an anon but * PQ_ANON is not set, the page was loaned to the anon * from an object which dropped ownership, so resolve * this by turning the anon's loan into real ownership * (ie. decrement loan_count again and set PQ_ANON). * after all this, if there are no loans left, put the * page back a paging queue (if the page is owned by * an anon) or free it (if the page is now unowned). */ KASSERT(pg->loan_count > 0); pg->loan_count--; if (pg->uobject == NULL && pg->uanon != NULL && (pg->pqflags & PQ_ANON) == 0) { KASSERT(pg->loan_count > 0); pg->loan_count--; pg->pqflags |= PQ_ANON; } if (pg->loan_count == 0 && pg->uobject == NULL && pg->uanon == NULL) { KASSERT((pg->flags & PG_BUSY) == 0); uvm_pagefree(pg); } if (slock != NULL) { mutex_exit(slock); } } mutex_exit(&uvm_pageqlock); }
static int tap_dev_read(int unit, struct uio *uio, int flags) { struct tap_softc *sc = device_lookup_private(&tap_cd, unit); struct ifnet *ifp; struct mbuf *m, *n; int error = 0, s; if (sc == NULL) return (ENXIO); getnanotime(&sc->sc_atime); ifp = &sc->sc_ec.ec_if; if ((ifp->if_flags & IFF_UP) == 0) return (EHOSTDOWN); /* * In the TAP_NBIO case, we have to make sure we won't be sleeping */ if ((sc->sc_flags & TAP_NBIO) != 0) { if (!mutex_tryenter(&sc->sc_rdlock)) return (EWOULDBLOCK); } else { mutex_enter(&sc->sc_rdlock); } s = splnet(); if (IFQ_IS_EMPTY(&ifp->if_snd)) { ifp->if_flags &= ~IFF_OACTIVE; /* * We must release the lock before sleeping, and re-acquire it * after. */ mutex_exit(&sc->sc_rdlock); if (sc->sc_flags & TAP_NBIO) error = EWOULDBLOCK; else error = tsleep(sc, PSOCK|PCATCH, "tap", 0); splx(s); if (error != 0) return (error); /* The device might have been downed */ if ((ifp->if_flags & IFF_UP) == 0) return (EHOSTDOWN); if ((sc->sc_flags & TAP_NBIO)) { if (!mutex_tryenter(&sc->sc_rdlock)) return (EWOULDBLOCK); } else { mutex_enter(&sc->sc_rdlock); } s = splnet(); } IFQ_DEQUEUE(&ifp->if_snd, m); ifp->if_flags &= ~IFF_OACTIVE; splx(s); if (m == NULL) { error = 0; goto out; } ifp->if_opackets++; bpf_mtap(ifp, m); /* * One read is one packet. */ do { error = uiomove(mtod(m, void *), min(m->m_len, uio->uio_resid), uio); m = n = m_free(m); } while (m != NULL && uio->uio_resid > 0 && error == 0); if (m != NULL) m_freem(m); out: mutex_exit(&sc->sc_rdlock); return (error); }
/*ARGSUSED*/ static kmem_cbrc_t zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) { znode_t *ozp = buf, *nzp = newbuf; zfsvfs_t *zfsvfs; vnode_t *vp; /* * The znode is on the file system's list of known znodes if the vfs * pointer is valid. We set the low bit of the vfs pointer when freeing * the znode to invalidate it, and the memory patterns written by kmem * (baddcafe and deadbeef) set at least one of the two low bits. A newly * created znode sets the vfs pointer last of all to indicate that the * znode is known and in a valid state to be moved by this function. */ zfsvfs = ozp->z_zfsvfs; if (!POINTER_IS_VALID(zfsvfs)) { ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * Ensure that the filesystem is not unmounted during the move. */ if (zfs_enter(zfsvfs) != 0) { /* ZFS_ENTER */ ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); return (KMEM_CBRC_DONT_KNOW); } mutex_enter(&zfsvfs->z_znodes_lock); /* * Recheck the vfs pointer in case the znode was removed just before * acquiring the lock. */ if (zfsvfs != ozp->z_zfsvfs) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * At this point we know that as long as we hold z_znodes_lock, the * znode cannot be freed and fields within the znode can be safely * accessed. Now, prevent a race with zfs_zget(). */ if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); return (KMEM_CBRC_LATER); } vp = ZTOV(ozp); if (mutex_tryenter(&vp->v_lock) == 0) { ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); return (KMEM_CBRC_LATER); } /* Only move znodes that are referenced _only_ by the DNLC. */ if (vp->v_count != 1 || !vn_in_dnlc(vp)) { mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); return (KMEM_CBRC_LATER); } /* * The znode is known and in a valid state to move. We're holding the * locks needed to execute the critical section. */ zfs_znode_move_impl(ozp, nzp); mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); list_link_replace(&ozp->z_link_node, &nzp->z_link_node); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); return (KMEM_CBRC_YES); }