static uint_t
gcpu_xpv_virq_intr(void)
{
	int types[] = { XEN_MC_URGENT, XEN_MC_NONURGENT };
	uint64_t fetch_id;
	int count = 0;
	int i;

	if (gcpu_xpv_virq_vect == -1 || gcpu_xpv_poll_bankregs_sz == 0) {
		gcpu_xpv_intr_unclaimed++;
		return (DDI_INTR_UNCLAIMED);
	}

	if (!mutex_tryenter(&gcpu_xpv_polldata_lock)) {
		gcpu_xpv_mca_hcall_busy++;
		return (DDI_INTR_CLAIMED);
	}

	for (i = 0; i < sizeof (types) / sizeof (types[0]); i++) {
		while (gcpu_xpv_telem_read(&gcpu_xpv_polldata, types[i],
		    &fetch_id)) {
			gcpu_poll_trace(&gcpu_xpv_poll_trace_ctl,
			    GCPU_MPT_WHAT_XPV_VIRQ,
			    x86_mcinfo_nentries(&gcpu_xpv_polldata));
			gcpu_xpv_mci_process(&gcpu_xpv_polldata, types[i],
			    gcpu_xpv_poll_bankregs, gcpu_xpv_poll_bankregs_sz);
			gcpu_xpv_telem_ack(types[i], fetch_id);
			count++;
		}
	}

	mutex_exit(&gcpu_xpv_polldata_lock);

	return (DDI_INTR_CLAIMED);
}
Beispiel #2
0
static void
fipe_disable(void)
{
	/*
	 * Try to acquire lock, which also implicitly has the same effect
	 * of calling membar_sync().
	 */
	while (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) {
		/*
		 * If power saving is inactive, just return and all dirty
		 * house-keeping work will be handled in fipe_enable().
		 */
		if (fipe_gbl_ctrl.pm_active == B_FALSE) {
			return;
		} else {
			(void) SMT_PAUSE();
		}
	}

	/* Disable power saving if it's active. */
	if (fipe_gbl_ctrl.pm_active) {
		/*
		 * Set pm_active to FALSE as soon as possible to prevent
		 * other CPUs from waiting on pm_active flag.
		 */
		fipe_gbl_ctrl.pm_active = B_FALSE;
		membar_producer();
		fipe_mc_restore();
		fipe_ioat_cancel();
	}

	mutex_exit(&fipe_gbl_ctrl.lock);
}
Beispiel #3
0
/*
 * cleanvnode: grab a vnode from freelist, clean and free it.
 *
 * => Releases vnode_free_list_lock.
 */
static int
cleanvnode(void)
{
	vnode_t *vp;
	vnodelst_t *listhd;
	struct mount *mp;

	KASSERT(mutex_owned(&vnode_free_list_lock));

	listhd = &vnode_free_list;
try_nextlist:
	TAILQ_FOREACH(vp, listhd, v_freelist) {
		/*
		 * It's safe to test v_usecount and v_iflag
		 * without holding the interlock here, since
		 * these vnodes should never appear on the
		 * lists.
		 */
		KASSERT(vp->v_usecount == 0);
		KASSERT(vp->v_freelisthd == listhd);

		if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
			continue;
		if (!mutex_tryenter(vp->v_interlock)) {
			VOP_UNLOCK(vp);
			continue;
		}
		mp = vp->v_mount;
		if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
			mutex_exit(vp->v_interlock);
			VOP_UNLOCK(vp);
			continue;
		}
		break;
	}
Beispiel #4
0
void
bge_receive(bge_t *bgep, bge_status_t *bsp)
{
    recv_ring_t *rrp;
    uint64_t index;
    mblk_t *mp;

    for (index = 0; index < bgep->chipid.rx_rings; index++) {
        /*
         * Start from the first ring.
         */
        rrp = &bgep->recv[index];

        /*
         * For each ring, (rrp->prod_index_p) points to the
         * proper index within the status block (which has
         * already been sync'd by the caller)
         */
        ASSERT(rrp->prod_index_p == RECV_INDEX_P(bsp, index));

        if (*rrp->prod_index_p == rrp->rx_next || rrp->poll_flag)
            continue;		/* no packets		*/
        if (mutex_tryenter(rrp->rx_lock) == 0)
            continue;		/* already in process	*/
        mp = bge_receive_ring(bgep, rrp);
        mutex_exit(rrp->rx_lock);

        if (mp != NULL)
            mac_rx_ring(bgep->mh, rrp->ring_handle, mp,
                        rrp->ring_gen_num);
    }
}
Beispiel #5
0
int
cpr_init(int fcn)
{
	/*
	 * Allow only one suspend/resume process.
	 */
	if (mutex_tryenter(&cpr_slock) == 0)
		return (EBUSY);

	CPR->c_flags = 0;
	CPR->c_substate = 0;
	CPR->c_cprboot_magic = 0;
	CPR->c_alloc_cnt = 0;

	CPR->c_fcn = fcn;
	if (fcn == AD_CPR_REUSABLE)
		CPR->c_flags |= C_REUSABLE;
	else
		CPR->c_flags |= C_SUSPENDING;
	if (fcn != AD_CPR_NOCOMPRESS && fcn != AD_CPR_TESTNOZ)
		CPR->c_flags |= C_COMPRESSING;
	/*
	 * reserve CPR_MAXCONTIG virtual pages for cpr_dump()
	 */
	CPR->c_mapping_area = i_cpr_map_setup();
	if (CPR->c_mapping_area == 0) {		/* no space in kernelmap */
		cpr_err(CE_CONT, "Unable to alloc from kernelmap.\n");
		mutex_exit(&cpr_slock);
		return (EAGAIN);
	}
	DEBUG3(cpr_err(CE_CONT, "Reserved virtual range from 0x%p for writing "
	    "kas\n", (void *)CPR->c_mapping_area));

	return (0);
}
/*
 * Invalidate the attributes on all rnodes forcing the next getattr
 * to go over the wire.  Used to flush stale uid and gid mappings.
 * Maybe done on a per vfsp, or all rnodes (vfsp == NULL)
 */
void
nfs4_rnode_invalidate(struct vfs *vfsp)
{
	int index;
	rnode4_t *rp;
	vnode_t *vp;

	/*
	 * Walk the hash queues looking for rnodes.
	 */
	for (index = 0; index < rtable4size; index++) {
		rw_enter(&rtable4[index].r_lock, RW_READER);
		for (rp = rtable4[index].r_hashf;
		    rp != (rnode4_t *)(&rtable4[index]);
		    rp = rp->r_hashf) {
			vp = RTOV4(rp);
			if (vfsp != NULL && vp->v_vfsp != vfsp)
				continue;

			if (!mutex_tryenter(&rp->r_statelock))
				continue;

			/*
			 * Expire the attributes by resetting the change
			 * and attr timeout.
			 */
			rp->r_change = 0;
			PURGE_ATTRCACHE4_LOCKED(rp);
			mutex_exit(&rp->r_statelock);
		}
		rw_exit(&rtable4[index].r_lock);
	}
}
/*
 * cleanvnode: grab a vnode from freelist, clean and free it.
 *
 * => Releases vnode_free_list_lock.
 */
static int
cleanvnode(void)
{
	vnode_t *vp;
	vnodelst_t *listhd;

	KASSERT(mutex_owned(&vnode_free_list_lock));
retry:
	listhd = &vnode_free_list;
try_nextlist:
	TAILQ_FOREACH(vp, listhd, v_freelist) {
		/*
		 * It's safe to test v_usecount and v_iflag
		 * without holding the interlock here, since
		 * these vnodes should never appear on the
		 * lists.
		 */
		KASSERT(vp->v_usecount == 0);
		KASSERT((vp->v_iflag & VI_CLEAN) == 0);
		KASSERT(vp->v_freelisthd == listhd);

		if (!mutex_tryenter(vp->v_interlock))
			continue;
		if ((vp->v_iflag & VI_XLOCK) == 0)
			break;
		mutex_exit(vp->v_interlock);
	}
static int
nfs4_active_data_reclaim(rnode4_t *rp)
{
	char *contents;
	vnode_t *xattr;
	int size;
	vsecattr_t *vsp;
	int freed;
	bool_t rdc = FALSE;

	/*
	 * Free any held credentials and caches which
	 * may be associated with this rnode.
	 */
	if (!mutex_tryenter(&rp->r_statelock))
		return (0);
	contents = rp->r_symlink.contents;
	size = rp->r_symlink.size;
	rp->r_symlink.contents = NULL;
	vsp = rp->r_secattr;
	rp->r_secattr = NULL;
	if (rp->r_dir != NULL)
		rdc = TRUE;
	xattr = rp->r_xattr_dir;
	rp->r_xattr_dir = NULL;
	mutex_exit(&rp->r_statelock);

	/*
	 * Free the access cache entries.
	 */
	freed = nfs4_access_purge_rp(rp);

	if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL)
		return (freed);

	/*
	 * Free the symbolic link cache.
	 */
	if (contents != NULL) {

		kmem_free((void *)contents, size);
	}

	/*
	 * Free any cached ACL.
	 */
	if (vsp != NULL)
		nfs4_acl_free_cache(vsp);

	nfs4_purge_rddir_cache(RTOV4(rp));

	/*
	 * Release the xattr directory vnode
	 */
	if (xattr != NULL)
		VN_RELE(xattr);

	return (1);
}
Beispiel #9
0
void
db_kill_proc(db_expr_t addr, bool haddr,
    db_expr_t count, const char *modif)
{
#ifdef _KERNEL	/* XXX CRASH(8) */
	struct proc *p;
	ksiginfo_t	ksi;
	db_expr_t pid, sig;
	int t;

	/* What pid? */
	if (!db_expression(&pid)) {
	       db_error("pid?\n");
	       /*NOTREACHED*/
	}
	/* What sig? */
	t = db_read_token();
	if (t == tCOMMA) {
	       if (!db_expression(&sig)) {
		       db_error("sig?\n");
		       /*NOTREACHED*/
	       }
	} else {
	       db_unread_token(t);
	       sig = 15;
	}
	if (db_read_token() != tEOL) {
	       db_error("?\n");
	       /*NOTREACHED*/
	}
	/* We might stop when the mutex is held or when not */
	t = mutex_tryenter(proc_lock);
#ifdef DIAGNOSTIC
	if (!t) {
	       db_error("could not acquire proc_lock mutex\n");
	       /*NOTREACHED*/
	}
#endif
	p = proc_find((pid_t)pid);
	if (p == NULL) {
		if (t)
			mutex_exit(proc_lock);
		db_error("no such proc\n");
		/*NOTREACHED*/
	}
	KSI_INIT(&ksi);
	ksi.ksi_signo = sig;
	ksi.ksi_code = SI_USER;
	ksi.ksi_pid = 0;
	ksi.ksi_uid = 0;
	mutex_enter(p->p_lock);
	kpsignal2(p, &ksi);
	mutex_exit(p->p_lock);
	if (t)
		mutex_exit(proc_lock);
#else
	db_printf("This command is not currently supported.\n");
#endif
}
Beispiel #10
0
/*ARGSUSED*/
int
spec_sync(struct vfs *vfsp,
	short	flag,
	struct cred *cr)
{
	struct snode *sync_list;
	register struct snode **spp, *sp, *spnext;
	register struct vnode *vp;

	if (mutex_tryenter(&spec_syncbusy) == 0)
		return (0);

	if (flag & SYNC_ATTR) {
		mutex_exit(&spec_syncbusy);
		return (0);
	}
	mutex_enter(&stable_lock);
	sync_list = NULL;
	/*
	 * Find all the snodes that are dirty and add them to the sync_list
	 */
	for (spp = stable; spp < &stable[STABLESIZE]; spp++) {
		for (sp = *spp; sp != NULL; sp = sp->s_next) {
			vp = STOV(sp);
			/*
			 * Don't bother sync'ing a vp if it's
			 * part of a virtual swap device.
			 */
			if (IS_SWAPVP(vp))
				continue;

			if (vp->v_type == VBLK && vn_has_cached_data(vp)) {
				/*
				 * Prevent vp from going away before we
				 * we get a chance to do a VOP_PUTPAGE
				 * via sync_list processing
				 */
				VN_HOLD(vp);
				sp->s_list = sync_list;
				sync_list = sp;
			}
		}
	}
	mutex_exit(&stable_lock);
	/*
	 * Now write out all the snodes we marked asynchronously.
	 */
	for (sp = sync_list; sp != NULL; sp = spnext) {
		spnext = sp->s_list;
		vp = STOV(sp);
		(void) VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0, B_ASYNC, cr);
		VN_RELE(vp);		/* Release our hold on vnode */
	}
	mutex_exit(&spec_syncbusy);
	return (0);
}
Beispiel #11
0
/*
 * Check condition (fipe_gbl_ctrl.cpu_cnt == ncpus) to make sure that
 * there is other CPU trying to wake up system from memory power saving state.
 * If a CPU is waking up system, fipe_disable() will set
 * fipe_gbl_ctrl.pm_active to false as soon as possible and allow other CPU's
 * to continue, and it will take the responsibility to recover system from
 * memory power saving state.
 */
static void
fipe_enable(int throttle, cpu_idle_check_wakeup_t check_func, void* check_arg)
{
	extern void membar_sync(void);

	FIPE_KSTAT_DETAIL_INC(pm_tryenter_cnt);

	/*
	 * Check CPU wakeup events.
	 */
	if (check_func != NULL) {
		(*check_func)(check_arg);
	}

	/*
	 * Try to acquire mutex, which also implicitly has the same effect
	 * of calling membar_sync().
	 * If mutex_tryenter fails, that means other CPU is waking up.
	 */
	if (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) {
		FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
	/*
	 * Handle a special race condition for the case that a CPU wakes
	 * and then enters into idle state within a short period.
	 * This case can't be reliably detected by cpu_count mechanism.
	 */
	} else if (fipe_gbl_ctrl.pm_active) {
		FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
		mutex_exit(&fipe_gbl_ctrl.lock);
	} else {
		fipe_gbl_ctrl.pm_active = B_TRUE;
		membar_sync();
		if (fipe_gbl_ctrl.cpu_count != ncpus) {
			FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
			fipe_gbl_ctrl.pm_active = B_FALSE;
		} else if (fipe_ioat_trigger() != 0) {
			fipe_gbl_ctrl.pm_active = B_FALSE;
		} else if (fipe_gbl_ctrl.cpu_count != ncpus ||
		    fipe_mc_change(throttle) != 0) {
			fipe_gbl_ctrl.pm_active = B_FALSE;
			fipe_ioat_cancel();
			if (fipe_gbl_ctrl.cpu_count != ncpus) {
				FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
			}
		} else if (fipe_gbl_ctrl.cpu_count != ncpus) {
			fipe_gbl_ctrl.pm_active = B_FALSE;
			fipe_mc_restore();
			fipe_ioat_cancel();
			FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
		} else {
			FIPE_KSTAT_DETAIL_INC(pm_success_cnt);
		}
		mutex_exit(&fipe_gbl_ctrl.lock);
	}
}
Beispiel #12
0
void
mutex_enter(kmutex_t *mtx)
{
	UPMTX(mtx);

	/* fastpath? */
	if (mutex_tryenter(mtx))
		return;

	/*
	 * No?  bummer, do it the slow and painful way then.
	 */
	upm->upm_wanted++;
	while (!mutex_tryenter(mtx)) {
		rump_schedlock_cv_wait(upm->upm_rucv);
	}
	upm->upm_wanted--;

	KASSERT(upm->upm_wanted >= 0);
}
Beispiel #13
0
static void
splat_mutex_test1_func(void *arg)
{
        mutex_priv_t *mp = (mutex_priv_t *)arg;
        ASSERT(mp->mp_magic == SPLAT_MUTEX_TEST_MAGIC);

        if (mutex_tryenter(&mp->mp_mtx)) {
                mp->mp_rc = 0;
                mutex_exit(&mp->mp_mtx);
        } else {
                mp->mp_rc = -EBUSY;
        }
}
Beispiel #14
0
static int
awin_hdmi_i2c_acquire_bus(void *priv, int flags)
{
	struct awin_hdmi_softc *sc = priv;

	if (flags & I2C_F_POLL) {
		if (!mutex_tryenter(&sc->sc_ic_lock))
			return EBUSY;
	} else {
		mutex_enter(&sc->sc_ic_lock);
	}

	return 0;
}
Beispiel #15
0
/*ARGSUSED*/
static int
smbfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
{
	/*
	 * Cross-zone calls are OK here, since this translates to a
	 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
	 */
	if (!(flag & SYNC_ATTR) && mutex_tryenter(&smbfs_syncbusy) != 0) {
		smbfs_rflush(vfsp, cr);
		mutex_exit(&smbfs_syncbusy);
	}

	return (0);
}
Beispiel #16
0
ACPI_CPU_FLAGS
AcpiOsAcquireLock(ACPI_HANDLE Handle)
{


	if (Handle == NULL)
		return (AE_BAD_PARAMETER);

	if (curthread == CPU->cpu_idle_thread) {
		while (!mutex_tryenter((kmutex_t *)Handle))
			/* spin */;
	} else
		mutex_enter((kmutex_t *)Handle);
	return (AE_OK);
}
/*
 * callout_schedule_locked:
 *
 *	Schedule a callout to run.  The function and argument must
 *	already be set in the callout structure.  Must be called with
 *	callout_lock.
 */
static void
callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks)
{
	struct callout_cpu *cc, *occ;
	int old_time;

	KASSERT(to_ticks >= 0);
	KASSERT(c->c_func != NULL);

	/* Initialize the time here, it won't change. */
	occ = c->c_cpu;
	c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING);

	/*
	 * If this timeout is already scheduled and now is moved
	 * earlier, reschedule it now.  Otherwise leave it in place
	 * and let it be rescheduled later.
	 */
	if ((c->c_flags & CALLOUT_PENDING) != 0) {
		/* Leave on existing CPU. */
		old_time = c->c_time;
		c->c_time = to_ticks + occ->cc_ticks;
		if (c->c_time - old_time < 0) {
			CIRCQ_REMOVE(&c->c_list);
			CIRCQ_INSERT(&c->c_list, &occ->cc_todo);
		}
		mutex_spin_exit(lock);
		return;
	}

	cc = curcpu()->ci_data.cpu_callout;
	if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ ||
	    !mutex_tryenter(cc->cc_lock)) {
		/* Leave on existing CPU. */
		c->c_time = to_ticks + occ->cc_ticks;
		c->c_flags |= CALLOUT_PENDING;
		CIRCQ_INSERT(&c->c_list, &occ->cc_todo);
	} else {
		/* Move to this CPU. */
		c->c_cpu = cc;
		c->c_time = to_ticks + cc->cc_ticks;
		c->c_flags |= CALLOUT_PENDING;
		CIRCQ_INSERT(&c->c_list, &cc->cc_todo);
		mutex_spin_exit(cc->cc_lock);
	}
	mutex_spin_exit(lock);
}
static int
coram_iic_acquire_bus(void *cookie, int flags)
{
	struct coram_iic_softc *cic;

	cic = cookie;

	if (flags & I2C_F_POLL) {
		while (mutex_tryenter(&cic->cic_busmutex) == 0)
			delay(50);
		return 0;
	}

	mutex_enter(&cic->cic_busmutex);

	return 0;
}
Beispiel #19
0
static void kprintf_rnd_get(size_t bytes, void *priv)
{
	if (kprnd_added)  {
		KASSERT(kprintf_inited);
		if (mutex_tryenter(&kprintf_mtx)) {
			SHA512_Final(kprnd_accum, &kprnd_sha);
			rnd_add_data(&rnd_printf_source,
				     kprnd_accum, sizeof(kprnd_accum), 0);
			kprnd_added = 0;
			/* This, we must do, since we called _Final. */
			SHA512_Init(&kprnd_sha);
			/* This is optional but seems useful. */
			SHA512_Update(&kprnd_sha, kprnd_accum,
				      sizeof(kprnd_accum));
			mutex_exit(&kprintf_mtx);
		}
	}
}
Beispiel #20
0
/*
 * Return a buffer w/o sleeping
 */
struct buf *
trygetblk(dev_t dev, daddr_t blkno)
{
	struct buf	*bp;
	struct buf	*dp;
	struct hbuf	*hp;
	kmutex_t	*hmp;
	uint_t		index;

	index = bio_bhash(dev, blkno);
	hp = &hbuf[index];
	hmp = &hp->b_lock;

	if (!mutex_tryenter(hmp))
		return (NULL);

	dp = (struct buf *)hp;
	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
		if (bp->b_blkno != blkno || bp->b_edev != dev ||
		    (bp->b_flags & B_STALE))
			continue;
		/*
		 * Get access to a valid buffer without sleeping
		 */
		if (sema_tryp(&bp->b_sem)) {
			if (bp->b_flags & B_DONE) {
				hp->b_length--;
				notavail(bp);
				mutex_exit(hmp);
				return (bp);
			} else {
				sema_v(&bp->b_sem);
				break;
			}
		}
		break;
	}
	mutex_exit(hmp);
	return (NULL);
}
/*
 * System filesystem synchronizer daemon.
 */
void
sched_sync(void *arg)
{
	synclist_t *slp;
	struct vnode *vp;
	time_t starttime;
	bool synced;

	for (;;) {
		mutex_enter(&syncer_mutex);
		mutex_enter(&syncer_data_lock);

		starttime = time_second;

		/*
		 * Push files whose dirty time has expired.
		 */
		slp = &syncer_workitem_pending[syncer_delayno];
		syncer_delayno += 1;
		if (syncer_delayno >= syncer_last)
			syncer_delayno = 0;

		while ((vp = TAILQ_FIRST(slp)) != NULL) {
			/* We are locking in the wrong direction. */
			synced = false;
			if (mutex_tryenter(vp->v_interlock)) {
				mutex_exit(&syncer_data_lock);
				if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
					synced = true;
					(void) VOP_FSYNC(vp, curlwp->l_cred,
					    FSYNC_LAZY, 0, 0);
					vput(vp);
				}
				mutex_enter(&syncer_data_lock);
			}

			/*
			 * XXX The vnode may have been recycled, in which
			 * case it may have a new identity.
			 */
			if (TAILQ_FIRST(slp) == vp) {
				/*
				 * Put us back on the worklist.  The worklist
				 * routine will remove us from our current
				 * position and then add us back in at a later
				 * position.
				 *
				 * Try again sooner rather than later if
				 * we were unable to lock the vnode.  Lock
				 * failure should not prevent us from doing
				 * the sync "soon".
				 *
				 * If we locked it yet arrive here, it's
				 * likely that lazy sync is in progress and
				 * so the vnode still has dirty metadata. 
				 * syncdelay is mainly to get this vnode out
				 * of the way so we do not consider it again
				 * "soon" in this loop, so the delay time is
				 * not critical as long as it is not "soon". 
				 * While write-back strategy is the file
				 * system's domain, we expect write-back to
				 * occur no later than syncdelay seconds
				 * into the future.
				 */
				vn_syncer_add1(vp,
				    synced ? syncdelay : lockdelay);
			}
		}
		mutex_exit(&syncer_mutex);

		/*
		 * If it has taken us less than a second to process the
		 * current work, then wait.  Otherwise start right over
		 * again.  We can still lose time if any single round
		 * takes more than two seconds, but it does not really
		 * matter as we are just trying to generally pace the
		 * filesystem activity.
		 */
		if (time_second == starttime) {
			kpause("syncer", false, hz, &syncer_data_lock);
		}
		mutex_exit(&syncer_data_lock);
	}
}
Beispiel #22
0
int
ghd_transport(ccc_t	*cccp,
		gcmd_t	*gcmdp,
		gtgt_t	*gtgtp,
		ulong_t	 timeout,
		int	 polled,
		void	*intr_status)
{
	gdev_t	*gdevp = gtgtp->gt_gdevp;

	ASSERT(!mutex_owned(&cccp->ccc_hba_mutex));
	ASSERT(!mutex_owned(&cccp->ccc_waitq_mutex));

	if (polled) {
		/*
		 * Grab the HBA mutex so no other requests are started
		 * until after this one completes.
		 */
		mutex_enter(&cccp->ccc_hba_mutex);

		GDBG_START(("ghd_transport: polled"
		    " cccp 0x%p gdevp 0x%p gtgtp 0x%p gcmdp 0x%p\n",
		    (void *)cccp, (void *)gdevp, (void *)gtgtp, (void *)gcmdp));

		/*
		 * Lock the doneq so no other thread flushes the Q.
		 */
		ghd_doneq_pollmode_enter(cccp);
	}
#if defined(GHD_DEBUG) || defined(__lint)
	else {
		GDBG_START(("ghd_transport: non-polled"
		    " cccp 0x%p gdevp 0x%p gtgtp 0x%p gcmdp 0x%p\n",
		    (void *)cccp, (void *)gdevp, (void *)gtgtp, (void *)gcmdp));
	}
#endif
	/*
	 * add this request to the tail of the waitq
	 */
	gcmdp->cmd_waitq_level = 1;
	mutex_enter(&cccp->ccc_waitq_mutex);
	L2_add(&GDEV_QHEAD(gdevp), &gcmdp->cmd_q, gcmdp);

	/*
	 * Add this request to the packet timer active list and start its
	 * abort timer.
	 */
	gcmdp->cmd_state = GCMD_STATE_WAITQ;
	ghd_timer_start(cccp, gcmdp, timeout);


	/*
	 * Check the device wait queue throttle and perhaps move
	 * some requests to the end of the HBA wait queue.
	 */
	ghd_waitq_shuffle_up(cccp, gdevp);

	if (!polled) {
		/*
		 * See if the HBA mutex is available but use the
		 * tryenter so I don't deadlock.
		 */
		if (!mutex_tryenter(&cccp->ccc_hba_mutex)) {
			/* The HBA mutex isn't available */
			GDBG_START(("ghd_transport: !mutex cccp 0x%p\n",
			    (void *)cccp));
			mutex_exit(&cccp->ccc_waitq_mutex);
			return (TRAN_ACCEPT);
		}
		GDBG_START(("ghd_transport: got mutex cccp 0x%p\n",
		    (void *)cccp));

		/*
		 * start as many requests as possible from the head
		 * of the HBA wait queue
		 */

		ghd_waitq_process_and_mutex_exit(cccp);

		ASSERT(!mutex_owned(&cccp->ccc_hba_mutex));
		ASSERT(!mutex_owned(&cccp->ccc_waitq_mutex));

		return (TRAN_ACCEPT);
	}


	/*
	 * If polled mode (FLAG_NOINTR specified in scsi_pkt flags),
	 * then ghd_poll() waits until the request completes or times out
	 * before returning.
	 */

	mutex_exit(&cccp->ccc_waitq_mutex);
	(void) ghd_poll(cccp, GHD_POLL_REQUEST, 0, gcmdp, gtgtp, intr_status);
	ghd_doneq_pollmode_exit(cccp);

	mutex_enter(&cccp->ccc_waitq_mutex);
	ghd_waitq_process_and_mutex_exit(cccp);

	/* call HBA's completion function but don't do callback to target */
	(*cccp->ccc_hba_complete)(cccp->ccc_hba_handle, gcmdp, FALSE);

	GDBG_START(("ghd_transport: polled done cccp 0x%p\n", (void *)cccp));
	return (TRAN_ACCEPT);
}
Beispiel #23
0
void
zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
{
	dmu_buf_t *db = sa_get_db(hdl);
	znode_t *zp = sa_get_userdata(hdl);
	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
	sa_bulk_attr_t bulk[20];
	int count = 0;
	sa_bulk_attr_t sa_attrs[20] = { { 0 } };
	zfs_acl_locator_cb_t locate = { 0 };
	uint64_t uid, gid, mode, rdev, xattr, parent;
	uint64_t crtime[2], mtime[2], ctime[2];
	zfs_acl_phys_t znode_acl;
	char scanstamp[AV_SCANSTAMP_SZ];
	boolean_t drop_lock = B_FALSE;

	/*
	 * No upgrade if ACL isn't cached
	 * since we won't know which locks are held
	 * and ready the ACL would require special "locked"
	 * interfaces that would be messy
	 */
	if (zp->z_acl_cached == NULL || vnode_islnk(ZTOV(zp)))
		return;

	/*
	 * If the z_lock is held and we aren't the owner
	 * the just return since we don't want to deadlock
	 * trying to update the status of z_is_sa.  This
	 * file can then be upgraded at a later time.
	 *
	 * Otherwise, we know we are doing the
	 * sa_update() that caused us to enter this function.
	 */
	if (mutex_owner(&zp->z_lock) != curthread) {
		if (mutex_tryenter(&zp->z_lock) == 0)
			return;
		else
			drop_lock = B_TRUE;
	}

	/* First do a bulk query of the attributes that aren't cached */
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
	    &znode_acl, 88);

	if (sa_bulk_lookup_locked(hdl, bulk, count) != 0)
		goto done;


	/*
	 * While the order here doesn't matter its best to try and organize
	 * it is such a way to pick up an already existing layout number
	 */
	count = 0;
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL,
	    &zp->z_size, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs),
	    NULL, &zp->z_gen, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs),
	    NULL, &parent, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL,
	    &zp->z_pflags, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL,
	    zp->z_atime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL,
	    &mtime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL,
	    &ctime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
	    &crtime, 16);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
	    &zp->z_links, 8);
	if (vnode_isblk(zp->z_vnode) || vnode_islnk(zp->z_vnode))
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
		    &rdev, 8);
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
	    &zp->z_acl_cached->z_acl_count, 8);

	if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
		zfs_acl_xform(zp, zp->z_acl_cached, CRED());

	locate.cb_aclp = zp->z_acl_cached;
	SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs),
	    zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);

	if (xattr)
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs),
		    NULL, &xattr, 8);

	/* if scanstamp then add scanstamp */

	if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
		bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
		    scanstamp, AV_SCANSTAMP_SZ);
		SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs),
		    NULL, scanstamp, AV_SCANSTAMP_SZ);
		zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
	}

	VERIFY(dmu_set_bonustype(db, DMU_OT_SA, tx) == 0);
	VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
	    count, tx) == 0);
	if (znode_acl.z_acl_extern_obj)
		VERIFY(0 == dmu_object_free(zfsvfs->z_os,
		    znode_acl.z_acl_extern_obj, tx));

	zp->z_is_sa = B_TRUE;
done:
	if (drop_lock)
		mutex_exit(&zp->z_lock);
}
Beispiel #24
0
int
rumpuser_mutex_tryenter(struct rumpuser_mtx *mtx)
{

	return mutex_tryenter(mtx);
}
Beispiel #25
0
/* ARGSUSED */
void
tcp_time_wait_collector(void *arg)
{
	tcp_t *tcp;
	int64_t now;
	mblk_t *mp;
	conn_t *connp;
	kmutex_t *lock;
	boolean_t removed;
	extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
	    uint8_t *, in_port_t, uint8_t *, in_port_t, void *);

	squeue_t *sqp = (squeue_t *)arg;
	tcp_squeue_priv_t *tcp_time_wait =
	    *((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));

	mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
	tcp_time_wait->tcp_time_wait_tid = 0;
#ifdef DEBUG
	tcp_time_wait->tcp_time_wait_running = B_TRUE;
#endif

	if (tcp_time_wait->tcp_free_list != NULL &&
	    tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
		TCP_G_STAT(tcp_freelist_cleanup);
		while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
			tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
			tcp->tcp_time_wait_next = NULL;
			tcp_time_wait->tcp_free_list_cnt--;
			ASSERT(tcp->tcp_tcps == NULL);
			CONN_DEC_REF(tcp->tcp_connp);
		}
		ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
	}

	/*
	 * In order to reap time waits reliably, we should use a
	 * source of time that is not adjustable by the user -- hence
	 * the call to ddi_get_lbolt64().
	 */
	now = ddi_get_lbolt64();
	while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
		/*
		 * lbolt64 should not wrap around in practice...  So we can
		 * do a direct comparison.
		 */
		if (now < tcp->tcp_time_wait_expire)
			break;

		removed = tcp_time_wait_remove(tcp, tcp_time_wait);
		ASSERT(removed);

		connp = tcp->tcp_connp;
		ASSERT(connp->conn_fanout != NULL);
		lock = &connp->conn_fanout->connf_lock;
		/*
		 * This is essentially a TW reclaim fast path optimization for
		 * performance where the timewait collector checks under the
		 * fanout lock (so that no one else can get access to the
		 * conn_t) that the refcnt is 2 i.e. one for TCP and one for
		 * the classifier hash list. If ref count is indeed 2, we can
		 * just remove the conn under the fanout lock and avoid
		 * cleaning up the conn under the squeue, provided that
		 * clustering callbacks are not enabled. If clustering is
		 * enabled, we need to make the clustering callback before
		 * setting the CONDEMNED flag and after dropping all locks and
		 * so we forego this optimization and fall back to the slow
		 * path. Also please see the comments in tcp_closei_local
		 * regarding the refcnt logic.
		 *
		 * Since we are holding the tcp_time_wait_lock, its better
		 * not to block on the fanout_lock because other connections
		 * can't add themselves to time_wait list. So we do a
		 * tryenter instead of mutex_enter.
		 */
		if (mutex_tryenter(lock)) {
			mutex_enter(&connp->conn_lock);
			if ((connp->conn_ref == 2) &&
			    (cl_inet_disconnect == NULL)) {
				ipcl_hash_remove_locked(connp,
				    connp->conn_fanout);
				/*
				 * Set the CONDEMNED flag now itself so that
				 * the refcnt cannot increase due to any
				 * walker.
				 */
				connp->conn_state_flags |= CONN_CONDEMNED;
				mutex_exit(lock);
				mutex_exit(&connp->conn_lock);
				if (tcp_time_wait->tcp_free_list_cnt <
				    tcp_free_list_max_cnt) {
					/* Add to head of tcp_free_list */
					mutex_exit(
					    &tcp_time_wait->tcp_time_wait_lock);
					tcp_cleanup(tcp);
					ASSERT(connp->conn_latch == NULL);
					ASSERT(connp->conn_policy == NULL);
					ASSERT(tcp->tcp_tcps == NULL);
					ASSERT(connp->conn_netstack == NULL);

					mutex_enter(
					    &tcp_time_wait->tcp_time_wait_lock);
					tcp->tcp_time_wait_next =
					    tcp_time_wait->tcp_free_list;
					tcp_time_wait->tcp_free_list = tcp;
					tcp_time_wait->tcp_free_list_cnt++;
					continue;
				} else {
					/* Do not add to tcp_free_list */
					mutex_exit(
					    &tcp_time_wait->tcp_time_wait_lock);
					tcp_bind_hash_remove(tcp);
					ixa_cleanup(tcp->tcp_connp->conn_ixa);
					tcp_ipsec_cleanup(tcp);
					CONN_DEC_REF(tcp->tcp_connp);
				}
			} else {
				CONN_INC_REF_LOCKED(connp);
				mutex_exit(lock);
				mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
				mutex_exit(&connp->conn_lock);
				/*
				 * We can reuse the closemp here since conn has
				 * detached (otherwise we wouldn't even be in
				 * time_wait list). tcp_closemp_used can safely
				 * be changed without taking a lock as no other
				 * thread can concurrently access it at this
				 * point in the connection lifecycle.
				 */

				if (tcp->tcp_closemp.b_prev == NULL)
					tcp->tcp_closemp_used = B_TRUE;
				else
					cmn_err(CE_PANIC,
					    "tcp_timewait_collector: "
					    "concurrent use of tcp_closemp: "
					    "connp %p tcp %p\n", (void *)connp,
					    (void *)tcp);

				TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
				mp = &tcp->tcp_closemp;
				SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
				    tcp_timewait_close, connp, NULL,
				    SQ_FILL, SQTAG_TCP_TIMEWAIT);
			}
		} else {
			mutex_enter(&connp->conn_lock);
			CONN_INC_REF_LOCKED(connp);
			mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
			mutex_exit(&connp->conn_lock);
			/*
			 * We can reuse the closemp here since conn has
			 * detached (otherwise we wouldn't even be in
			 * time_wait list). tcp_closemp_used can safely
			 * be changed without taking a lock as no other
			 * thread can concurrently access it at this
			 * point in the connection lifecycle.
			 */

			if (tcp->tcp_closemp.b_prev == NULL)
				tcp->tcp_closemp_used = B_TRUE;
			else
				cmn_err(CE_PANIC, "tcp_timewait_collector: "
				    "concurrent use of tcp_closemp: "
				    "connp %p tcp %p\n", (void *)connp,
				    (void *)tcp);

			TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
			mp = &tcp->tcp_closemp;
			SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
			    tcp_timewait_close, connp, NULL,
			    SQ_FILL, SQTAG_TCP_TIMEWAIT);
		}
		mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
	}

	if (tcp_time_wait->tcp_free_list != NULL)
		tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;

	/*
	 * If the time wait list is not empty and there is no timer running,
	 * restart it.
	 */
	if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
	    tcp_time_wait->tcp_time_wait_tid == 0) {
		hrtime_t firetime;

		firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
		/* This ensures that we won't wake up too often. */
		firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
		tcp_time_wait->tcp_time_wait_tid =
		    timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
		    sqp, firetime, CALLOUT_TCP_RESOLUTION,
		    CALLOUT_FLAG_ROUNDUP);
	}
#ifdef DEBUG
	tcp_time_wait->tcp_time_wait_running = B_FALSE;
#endif
	mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
}
Beispiel #26
0
static int
zvol_first_open(zvol_state_t *zv)
{
	objset_t *os;
	uint64_t volsize;
	int locked = 0;
	int error;
	uint64_t ro;

	/*
	 * In all other cases the spa_namespace_lock is taken before the
	 * bdev->bd_mutex lock.  But in this case the Linux __blkdev_get()
	 * function calls fops->open() with the bdev->bd_mutex lock held.
	 *
	 * To avoid a potential lock inversion deadlock we preemptively
	 * try to take the spa_namespace_lock().  Normally it will not
	 * be contended and this is safe because spa_open_common() handles
	 * the case where the caller already holds the spa_namespace_lock.
	 *
	 * When it is contended we risk a lock inversion if we were to
	 * block waiting for the lock.  Luckily, the __blkdev_get()
	 * function allows us to return -ERESTARTSYS which will result in
	 * bdev->bd_mutex being dropped, reacquired, and fops->open() being
	 * called again.  This process can be repeated safely until both
	 * locks are acquired.
	 */
	if (!mutex_owned(&spa_namespace_lock)) {
		locked = mutex_tryenter(&spa_namespace_lock);
		if (!locked)
			return (-ERESTARTSYS);
	}

	/* lie and say we're read-only */
	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
	if (error)
		goto out_mutex;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
	if (error) {
		dmu_objset_disown(os, zvol_tag);
		goto out_mutex;
	}

	zv->zv_objset = os;
	error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
	if (error) {
		dmu_objset_disown(os, zvol_tag);
		goto out_mutex;
	}

	set_capacity(zv->zv_disk, volsize >> 9);
	zv->zv_volsize = volsize;
	zv->zv_zilog = zil_open(os, zvol_get_data);

	VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0);
	if (ro || dmu_objset_is_snapshot(os) ||
	    !spa_writeable(dmu_objset_spa(os))) {
		set_disk_ro(zv->zv_disk, 1);
		zv->zv_flags |= ZVOL_RDONLY;
	} else {
		set_disk_ro(zv->zv_disk, 0);
		zv->zv_flags &= ~ZVOL_RDONLY;
	}

out_mutex:
	if (locked)
		mutex_exit(&spa_namespace_lock);

	return (-error);
}
Beispiel #27
0
int
cpr(int fcn, void *mdep)
{

#if defined(__sparc)
	static const char noswapstr[] = "reusable statefile requires "
	    "that no swap area be configured.\n";
	static const char blockstr[] = "reusable statefile must be "
	    "a block device.  See power.conf(4) and pmconfig(1M).\n";
	static const char normalfmt[] = "cannot run normal "
	    "checkpoint/resume when in reusable statefile mode. "
	    "use uadmin A_FREEZE AD_REUSEFINI (uadmin %d %d) "
	    "to exit reusable statefile mode.\n";
	static const char modefmt[] = "%s in reusable mode.\n";
#endif
	register int rc = 0;
	int cpr_sleeptype;

	/*
	 * First, reject commands that we don't (yet) support on this arch.
	 * This is easier to understand broken out like this than grotting
	 * through the second switch below.
	 */

	switch (fcn) {
#if defined(__sparc)
	case AD_CHECK_SUSPEND_TO_RAM:
	case AD_SUSPEND_TO_RAM:
		return (ENOTSUP);
	case AD_CHECK_SUSPEND_TO_DISK:
	case AD_SUSPEND_TO_DISK:
	case AD_CPR_REUSEINIT:
	case AD_CPR_NOCOMPRESS:
	case AD_CPR_FORCE:
	case AD_CPR_REUSABLE:
	case AD_CPR_REUSEFINI:
	case AD_CPR_TESTZ:
	case AD_CPR_TESTNOZ:
	case AD_CPR_TESTHALT:
	case AD_CPR_SUSP_DEVICES:
		cpr_sleeptype = CPR_TODISK;
		break;
#endif
#if defined(__x86)
	case AD_CHECK_SUSPEND_TO_DISK:
	case AD_SUSPEND_TO_DISK:
	case AD_CPR_REUSEINIT:
	case AD_CPR_NOCOMPRESS:
	case AD_CPR_FORCE:
	case AD_CPR_REUSABLE:
	case AD_CPR_REUSEFINI:
	case AD_CPR_TESTZ:
	case AD_CPR_TESTNOZ:
	case AD_CPR_TESTHALT:
	case AD_CPR_PRINT:
		return (ENOTSUP);
	/* The DEV_* values need to be removed after sys-syspend is fixed */
	case DEV_CHECK_SUSPEND_TO_RAM:
	case DEV_SUSPEND_TO_RAM:
	case AD_CPR_SUSP_DEVICES:
	case AD_CHECK_SUSPEND_TO_RAM:
	case AD_SUSPEND_TO_RAM:
	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
	case AD_FORCE_SUSPEND_TO_RAM:
	case AD_DEVICE_SUSPEND_TO_RAM:
		cpr_sleeptype = CPR_TORAM;
		break;
#endif
	}
#if defined(__sparc)
	/*
	 * Need to know if we're in reusable mode, but we will likely have
	 * rebooted since REUSEINIT, so we have to get the info from the
	 * file system
	 */
	if (!cpr_reusable_mode)
		cpr_reusable_mode = cpr_get_reusable_mode();

	cpr_forget_cprconfig();
#endif

	switch (fcn) {

#if defined(__sparc)
	case AD_CPR_REUSEINIT:
		if (!i_cpr_reusable_supported())
			return (ENOTSUP);
		if (!cpr_statefile_is_spec()) {
			cpr_err(CE_CONT, blockstr);
			return (EINVAL);
		}
		if ((rc = cpr_check_spec_statefile()) != 0)
			return (rc);
		if (swapinfo) {
			cpr_err(CE_CONT, noswapstr);
			return (EINVAL);
		}
		cpr_test_mode = 0;
		break;

	case AD_CPR_NOCOMPRESS:
	case AD_CPR_COMPRESS:
	case AD_CPR_FORCE:
		if (cpr_reusable_mode) {
			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
			return (ENOTSUP);
		}
		cpr_test_mode = 0;
		break;

	case AD_CPR_REUSABLE:
		if (!i_cpr_reusable_supported())
			return (ENOTSUP);
		if (!cpr_statefile_is_spec()) {
			cpr_err(CE_CONT, blockstr);
			return (EINVAL);
		}
		if ((rc = cpr_check_spec_statefile()) != 0)
			return (rc);
		if (swapinfo) {
			cpr_err(CE_CONT, noswapstr);
			return (EINVAL);
		}
		if ((rc = cpr_reusable_mount_check()) != 0)
			return (rc);
		cpr_test_mode = 0;
		break;

	case AD_CPR_REUSEFINI:
		if (!i_cpr_reusable_supported())
			return (ENOTSUP);
		cpr_test_mode = 0;
		break;

	case AD_CPR_TESTZ:
	case AD_CPR_TESTNOZ:
	case AD_CPR_TESTHALT:
		if (cpr_reusable_mode) {
			cpr_err(CE_CONT, normalfmt, A_FREEZE, AD_REUSEFINI);
			return (ENOTSUP);
		}
		cpr_test_mode = 1;
		break;

	case AD_CPR_CHECK:
		if (!i_cpr_is_supported(cpr_sleeptype) || cpr_reusable_mode)
			return (ENOTSUP);
		return (0);

	case AD_CPR_PRINT:
		CPR_STAT_EVENT_END("POST CPR DELAY");
		cpr_stat_event_print();
		return (0);
#endif

	case AD_CPR_DEBUG0:
		cpr_debug = 0;
		return (0);

	case AD_CPR_DEBUG1:
	case AD_CPR_DEBUG2:
	case AD_CPR_DEBUG3:
	case AD_CPR_DEBUG4:
	case AD_CPR_DEBUG5:
	case AD_CPR_DEBUG7:
	case AD_CPR_DEBUG8:
		cpr_debug |= CPR_DEBUG_BIT(fcn);
		return (0);

	case AD_CPR_DEBUG9:
		cpr_debug |= CPR_DEBUG6;
		return (0);

	/* The DEV_* values need to be removed after sys-syspend is fixed */
	case DEV_CHECK_SUSPEND_TO_RAM:
	case DEV_SUSPEND_TO_RAM:
	case AD_CHECK_SUSPEND_TO_RAM:
	case AD_SUSPEND_TO_RAM:
		cpr_test_point = LOOP_BACK_NONE;
		break;

	case AD_LOOPBACK_SUSPEND_TO_RAM_PASS:
		cpr_test_point = LOOP_BACK_PASS;
		break;

	case AD_LOOPBACK_SUSPEND_TO_RAM_FAIL:
		cpr_test_point = LOOP_BACK_FAIL;
		break;

	case AD_FORCE_SUSPEND_TO_RAM:
		cpr_test_point = FORCE_SUSPEND_TO_RAM;
		break;

	case AD_DEVICE_SUSPEND_TO_RAM:
		if (mdep == NULL) {
			/* Didn't pass enough arguments */
			return (EINVAL);
		}
		cpr_test_point = DEVICE_SUSPEND_TO_RAM;
		cpr_device = (major_t)atoi((char *)mdep);
		break;

	case AD_CPR_SUSP_DEVICES:
		cpr_test_point = FORCE_SUSPEND_TO_RAM;
		if (cpr_suspend_devices(ddi_root_node()) != DDI_SUCCESS)
			cmn_err(CE_WARN,
			    "Some devices did not suspend "
			    "and may be unusable");
		(void) cpr_resume_devices(ddi_root_node(), 0);
		return (0);

	default:
		return (ENOTSUP);
	}

	if (!i_cpr_is_supported(cpr_sleeptype))
		return (ENOTSUP);

#if defined(__sparc)
	if ((cpr_sleeptype == CPR_TODISK &&
	    !cpr_is_ufs(rootvfs) && !cpr_is_zfs(rootvfs)))
		return (ENOTSUP);
#endif

	if (fcn == AD_CHECK_SUSPEND_TO_RAM ||
	    fcn == DEV_CHECK_SUSPEND_TO_RAM) {
		ASSERT(i_cpr_is_supported(cpr_sleeptype));
		return (0);
	}

#if defined(__sparc)
	if (fcn == AD_CPR_REUSEINIT) {
		if (mutex_tryenter(&cpr_slock) == 0)
			return (EBUSY);
		if (cpr_reusable_mode) {
			cpr_err(CE_CONT, modefmt, "already");
			mutex_exit(&cpr_slock);
			return (EBUSY);
		}
		rc = i_cpr_reuseinit();
		mutex_exit(&cpr_slock);
		return (rc);
	}

	if (fcn == AD_CPR_REUSEFINI) {
		if (mutex_tryenter(&cpr_slock) == 0)
			return (EBUSY);
		if (!cpr_reusable_mode) {
			cpr_err(CE_CONT, modefmt, "not");
			mutex_exit(&cpr_slock);
			return (EINVAL);
		}
		rc = i_cpr_reusefini();
		mutex_exit(&cpr_slock);
		return (rc);
	}
#endif

	/*
	 * acquire cpr serial lock and init cpr state structure.
	 */
	if (rc = cpr_init(fcn))
		return (rc);

#if defined(__sparc)
	if (fcn == AD_CPR_REUSABLE) {
		if ((rc = i_cpr_check_cprinfo()) != 0)  {
			mutex_exit(&cpr_slock);
			return (rc);
		}
	}
#endif

	/*
	 * Call the main cpr routine. If we are successful, we will be coming
	 * down from the resume side, otherwise we are still in suspend.
	 */
	cpr_err(CE_CONT, "System is being suspended");
	if (rc = cpr_main(cpr_sleeptype)) {
		CPR->c_flags |= C_ERROR;
		PMD(PMD_SX, ("cpr: Suspend operation failed.\n"))
		cpr_err(CE_NOTE, "Suspend operation failed.");
	} else if (CPR->c_flags & C_SUSPENDING) {

		/*
		 * In the suspend to RAM case, by the time we get
		 * control back we're already resumed
		 */
		if (cpr_sleeptype == CPR_TORAM) {
			PMD(PMD_SX, ("cpr: cpr CPR_TORAM done\n"))
			cpr_done();
			return (rc);
		}

#if defined(__sparc)

		PMD(PMD_SX, ("cpr: Suspend operation succeeded.\n"))
		/*
		 * Back from a successful checkpoint
		 */
		if (fcn == AD_CPR_TESTZ || fcn == AD_CPR_TESTNOZ) {
			mdboot(0, AD_BOOT, "", B_FALSE);
			/* NOTREACHED */
		}

		/* make sure there are no more changes to the device tree */
		PMD(PMD_SX, ("cpr: dev tree freeze\n"))
		devtree_freeze();

		/*
		 * stop other cpus and raise our priority.  since there is only
		 * one active cpu after this, and our priority will be too high
		 * for us to be preempted, we're essentially single threaded
		 * from here on out.
		 */
		PMD(PMD_SX, ("cpr: stop other cpus\n"))
		i_cpr_stop_other_cpus();
		PMD(PMD_SX, ("cpr: spl6\n"))
		(void) spl6();

		/*
		 * try and reset leaf devices.  reset_leaves() should only
		 * be called when there are no other threads that could be
		 * accessing devices
		 */
		PMD(PMD_SX, ("cpr: reset leaves\n"))
		reset_leaves();

		/*
		 * If i_cpr_power_down() succeeds, it'll not return
		 *
		 * Drives with write-cache enabled need to flush
		 * their cache.
		 */
		if (fcn != AD_CPR_TESTHALT) {
			PMD(PMD_SX, ("cpr: power down\n"))
			(void) i_cpr_power_down(cpr_sleeptype);
		}
		ASSERT(cpr_sleeptype == CPR_TODISK);
		/* currently CPR_TODISK comes back via a boot path */
		CPR_DEBUG(CPR_DEBUG1, "(Done. Please Switch Off)\n");
		halt(NULL);
		/* NOTREACHED */
#endif
	}
	PMD(PMD_SX, ("cpr: cpr done\n"))
	cpr_done();
	return (rc);
}
Beispiel #28
0
static void
uvm_unloanpage(struct vm_page **ploans, int npages)
{
	struct vm_page *pg;
	kmutex_t *slock;

	mutex_enter(&uvm_pageqlock);
	while (npages-- > 0) {
		pg = *ploans++;

		/*
		 * do a little dance to acquire the object or anon lock
		 * as appropriate.  we are locking in the wrong order,
		 * so we have to do a try-lock here.
		 */

		slock = NULL;
		while (pg->uobject != NULL || pg->uanon != NULL) {
			if (pg->uobject != NULL) {
				slock = &pg->uobject->vmobjlock;
			} else {
				slock = &pg->uanon->an_lock;
			}
			if (mutex_tryenter(slock)) {
				break;
			}
			mutex_exit(&uvm_pageqlock);
			/* XXX Better than yielding but inadequate. */
			kpause("livelock", false, 1, NULL);
			mutex_enter(&uvm_pageqlock);
			slock = NULL;
		}

		/*
		 * drop our loan.  if page is owned by an anon but
		 * PQ_ANON is not set, the page was loaned to the anon
		 * from an object which dropped ownership, so resolve
		 * this by turning the anon's loan into real ownership
		 * (ie. decrement loan_count again and set PQ_ANON).
		 * after all this, if there are no loans left, put the
		 * page back a paging queue (if the page is owned by
		 * an anon) or free it (if the page is now unowned).
		 */

		KASSERT(pg->loan_count > 0);
		pg->loan_count--;
		if (pg->uobject == NULL && pg->uanon != NULL &&
		    (pg->pqflags & PQ_ANON) == 0) {
			KASSERT(pg->loan_count > 0);
			pg->loan_count--;
			pg->pqflags |= PQ_ANON;
		}
		if (pg->loan_count == 0 && pg->uobject == NULL &&
		    pg->uanon == NULL) {
			KASSERT((pg->flags & PG_BUSY) == 0);
			uvm_pagefree(pg);
		}
		if (slock != NULL) {
			mutex_exit(slock);
		}
	}
	mutex_exit(&uvm_pageqlock);
}
Beispiel #29
0
static int
tap_dev_read(int unit, struct uio *uio, int flags)
{
	struct tap_softc *sc = device_lookup_private(&tap_cd, unit);
	struct ifnet *ifp;
	struct mbuf *m, *n;
	int error = 0, s;

	if (sc == NULL)
		return (ENXIO);

	getnanotime(&sc->sc_atime);

	ifp = &sc->sc_ec.ec_if;
	if ((ifp->if_flags & IFF_UP) == 0)
		return (EHOSTDOWN);

	/*
	 * In the TAP_NBIO case, we have to make sure we won't be sleeping
	 */
	if ((sc->sc_flags & TAP_NBIO) != 0) {
		if (!mutex_tryenter(&sc->sc_rdlock))
			return (EWOULDBLOCK);
	} else {
		mutex_enter(&sc->sc_rdlock);
	}

	s = splnet();
	if (IFQ_IS_EMPTY(&ifp->if_snd)) {
		ifp->if_flags &= ~IFF_OACTIVE;
		/*
		 * We must release the lock before sleeping, and re-acquire it
		 * after.
		 */
		mutex_exit(&sc->sc_rdlock);
		if (sc->sc_flags & TAP_NBIO)
			error = EWOULDBLOCK;
		else
			error = tsleep(sc, PSOCK|PCATCH, "tap", 0);
		splx(s);

		if (error != 0)
			return (error);
		/* The device might have been downed */
		if ((ifp->if_flags & IFF_UP) == 0)
			return (EHOSTDOWN);
		if ((sc->sc_flags & TAP_NBIO)) {
			if (!mutex_tryenter(&sc->sc_rdlock))
				return (EWOULDBLOCK);
		} else {
			mutex_enter(&sc->sc_rdlock);
		}
		s = splnet();
	}

	IFQ_DEQUEUE(&ifp->if_snd, m);
	ifp->if_flags &= ~IFF_OACTIVE;
	splx(s);
	if (m == NULL) {
		error = 0;
		goto out;
	}

	ifp->if_opackets++;
	bpf_mtap(ifp, m);

	/*
	 * One read is one packet.
	 */
	do {
		error = uiomove(mtod(m, void *),
		    min(m->m_len, uio->uio_resid), uio);
		m = n = m_free(m);
	} while (m != NULL && uio->uio_resid > 0 && error == 0);

	if (m != NULL)
		m_freem(m);

out:
	mutex_exit(&sc->sc_rdlock);
	return (error);
}
Beispiel #30
0
/*ARGSUSED*/
static kmem_cbrc_t
zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
{
	znode_t *ozp = buf, *nzp = newbuf;
	zfsvfs_t *zfsvfs;
	vnode_t *vp;

	/*
	 * The znode is on the file system's list of known znodes if the vfs
	 * pointer is valid. We set the low bit of the vfs pointer when freeing
	 * the znode to invalidate it, and the memory patterns written by kmem
	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
	 * created znode sets the vfs pointer last of all to indicate that the
	 * znode is known and in a valid state to be moved by this function.
	 */
	zfsvfs = ozp->z_zfsvfs;
	if (!POINTER_IS_VALID(zfsvfs)) {
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
		return (KMEM_CBRC_DONT_KNOW);
	}

	/*
	 * Ensure that the filesystem is not unmounted during the move.
	 */
	if (zfs_enter(zfsvfs) != 0) {		/* ZFS_ENTER */
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
		return (KMEM_CBRC_DONT_KNOW);
	}

	mutex_enter(&zfsvfs->z_znodes_lock);
	/*
	 * Recheck the vfs pointer in case the znode was removed just before
	 * acquiring the lock.
	 */
	if (zfsvfs != ozp->z_zfsvfs) {
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck_invalid);
		return (KMEM_CBRC_DONT_KNOW);
	}

	/*
	 * At this point we know that as long as we hold z_znodes_lock, the
	 * znode cannot be freed and fields within the znode can be safely
	 * accessed. Now, prevent a race with zfs_zget().
	 */
	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
		return (KMEM_CBRC_LATER);
	}

	vp = ZTOV(ozp);
	if (mutex_tryenter(&vp->v_lock) == 0) {
		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
		return (KMEM_CBRC_LATER);
	}

	/* Only move znodes that are referenced _only_ by the DNLC. */
	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
		mutex_exit(&vp->v_lock);
		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
		mutex_exit(&zfsvfs->z_znodes_lock);
		ZFS_EXIT(zfsvfs);
		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
		return (KMEM_CBRC_LATER);
	}

	/*
	 * The znode is known and in a valid state to move. We're holding the
	 * locks needed to execute the critical section.
	 */
	zfs_znode_move_impl(ozp, nzp);
	mutex_exit(&vp->v_lock);
	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);

	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
	mutex_exit(&zfsvfs->z_znodes_lock);
	ZFS_EXIT(zfsvfs);

	return (KMEM_CBRC_YES);
}